The attached file Q8.txt contains 100,000 integers between 1 and 100,000 (each row has a single integer), the order of these integers is random and no integer is repeated.
- Write a program to implement the Sort-and-Count algorithms in your favorite language, find the number of inversions in the given file.
- In the lecture, we count the number of inversions in O(nlogn) time, using the Merge-Sort idea. Is it possible to use the Quick-Sort idea instead ?
If possible, implement the algorithm in your favourite language, run it over the given file, and compare its running time with the one above. If not, give a explanation
1. 归并法
最终逆序对的个数是:2500572073
归并法的核心就是通过讲子问题划分成一个一个小的问题后进行合并求解。首先对整个数组一直对半的进行划分,一直分到只剩1个为止开始排序并合并,如上图所示。
具体算法如下。从第74行开始计算将要合并的两个子数组的逆序对数,如果左边数组的的一个值是大于右边数组中的数的,则说明左边数组中这个数的后面所有的数字都是大于右边数组的这个数字的(因为在合并时,左右两边数组都已经是有序的),因此直接加 (mid - left + 1 - i)
就可以了,然后一层一层的向上计算过去,即得到了逆序对数。
因为老师有让对比归并法和快排法的效率,因此中间有计算时间的。
#include<cstdio>
#include<cstdlib>
#include<string.h>
#include<sys/time.h>
#include<iostream>
#include<fstream>
#define MAXN 100001
using namespace std;
double SortAndCount(int left, int right);
double MergeAndCount(int left, int mid, int mid1, int right);
//int num[] = {2, 4, 1, 3, 5};
//int num[] = {4, 7, 6, 3, 1, 2, 9, 8, 5};
int num[MAXN];
int main()
{
char n[15];
int i = 0;
double ans;
struct timeval t_start, t_end;
//open the file
ifstream in("Q8.txt", ios::in);
if (!in.is_open()) {
printf("Can not open this file!");
return 0;
}
//use getline to get the number from the file and put into num[]
while (!in.eof()) {
in.getline(n, 15);
if (atoi(n) == 0)
break;
num[i++] = atoi(n);
//printf("%d\n", num[i-1]);
}
//get the start time
gettimeofday(&t_start, NULL);
long start = ((long)t_start.tv_sec) * 1000 + (long)t_start.tv_usec / 1000;
printf("Start time: %ld ms\n", start);
//calculate the number of invertions number
ans = SortAndCount(0, MAXN-2);
//get the end time
gettimeofday(&t_end, NULL);
long end = ((long)t_end.tv_sec) * 1000 + (long)t_end.tv_usec / 1000;
printf("End time: %ld ms\n", end);
long cost_time = end - start;
printf("Cost time: %ld ms\n", cost_time);
printf("The number of invertions is %.lf\n", ans);
return 0;
}
double SortAndCount(int left, int right) {
//when the length of the sub num[] less than 1, return 0
if (left < right) {
//get the middle of the sub num[]
int mid = left + ((right - left) / 2);
//calculate the left one and the right one
double cnt_l = SortAndCount(left, mid);
double cnt_r = SortAndCount(mid+1, right);
//merge the left one and the right one
double cnt = MergeAndCount(left, mid, mid+1, right);
return cnt_l + cnt_r + cnt;
}
return 0;
}
double MergeAndCount(int left, int mid, int mid1, int right) {
double cnt = 0;
int i = 0, j = 0, k;
int len = right - left + 1;
int ans[len];
for (k = 0; k < len; k++) {
if (left+i > mid)
break;
if (mid1+j > right)
break;
//when the left one is bigger than right one, that means
//the number behind the left one all bigger than the right one
//so just add the (mid-left+1-i)
if (num[left+i] > num[mid1+j] && (left+i) < (mid1+j)) {
ans[k] = num[mid1+j];
j++;
cnt = cnt + mid - left + 1 - i;
}
else {
ans[k] = num[left+i];
i++;
}
}
//to get the ordered sequence
while(left+i >= mid && mid1+j <= right) {
ans[k++] = num[mid1+j];
j++;
}
while(mid1+j >= right && left+i <= mid) {
ans[k] = num[left+i];
k++;
i++;
}
//copy the ordered sequence in the num
memcpy(num+left, ans, sizeof(int)*(len));
return cnt;
}
2. 快排法
快排的思想呢,是从一堆数里面,随便选取一个数,作为轴(pivot),然后把其他的数与这个轴进行比较,把比轴小的数放在pivot的左边,把比轴大的数放在pivot的右边,这样就通过pivot把整个数组分成了两半,一半比pivot小,一半比pivot大,然后再对两边的数继续划分下去。
代码如下所示。从第59行开始,是快排解决这个问题的核心点,首先选取最左边的数字作为pivot,让其他数字与其进行比较,若比pivot小,则将该数字放在左边,因为该数字比pivot小,因此将其放在左边时会有逆序对的损失(这个是关键),比如4,7,6,3,1,2,快排时,将数字3放在了左边,但是其实前面的4 7和6都比3大,损失的值为之前比pivot大的值,只加2,因为小于pivot并大于该数的,和该数放在了同一边,可以在下一次递归时进行计算,因此需要加放在右边的数的个数。在循环之后,因为所有比pivot小的数字都放在了左边,因此直接加左边数组的长度就可以。
接下来继续递归求左半边和右半边的逆序数对.
#include<cstdio>
#include<cstdlib>
#include<sys/time.h>
#include<string.h>
#include<iostream>
#include<fstream>
#define MAXN 100001
using namespace std;
double Invertions_number_Qsort(int left, int right);
//int num[] = {2, 4, 1, 3, 5};
//int num[] = {4, 7, 6, 3, 1, 2, 9, 8, 5};
int num[MAXN];
int l[MAXN], r[MAXN];
int main()
{
char n[15];
int i = 0;
double ans;
struct timeval t_start, t_end;
//open the file
ifstream in("Q8.txt", ios::in);
if (!in.is_open()) {
printf("Can not open this file!");
return 0;
}
//use getline to get the number from the file and put into num[]
while (!in.eof()) {
in.getline(n, 15);
if (atoi(n) == 0)
break;
num[i++] = atoi(n);
//printf("%d\n", num[i-1]);
}
//get the start time
gettimeofday(&t_start, NULL);
long start = ((long)t_start.tv_sec) * 1000 + (long)t_start.tv_usec / 1000;
printf("Start time: %ld ms\n", start);
//calculate the number of invertions number
ans = Invertions_number_Qsort(0, MAXN-2);
//get the end time
gettimeofday(&t_end, NULL);
long end = ((long)t_end.tv_sec) * 1000 + (long)t_end.tv_usec / 1000;
printf("End time: %ld ms\n", end);
long cost_time = end - start;
printf("Cost time: %ld ms\n\n", cost_time);
printf("The number of invertions is %.lf\n", ans);
return 0;
}
double Invertions_number_Qsort(int left, int right) {
int len = right - left + 1;
if (len <= 1)
return 0;
//int l[len], r[len];
int k, ll = 0, rr = 0;
double cnt = 0;
for (k = 1; k < len; k++) {
if (num[left] > num[left+k]) {
l[ll++] = num[left+k];
cnt += rr;
}
else {
r[rr++] = num[left+k];
}
}
l[ll] = num[left];
//the number put in the l all lower than pivot, so just add ll
cnt += ll;
//copy the ordered sequence in the num
memcpy(num+left, l, sizeof(int)*(ll+1));
memcpy(num+left+ll+1, r, sizeof(int)*(rr));
//calculate the left one and right one
double cnt_l = Invertions_number_Qsort(left, left+ll-1);
double cnt_r = Invertions_number_Qsort(left+ll+1, right);
return cnt + cnt_l + cnt_r;
}