#include <assert.h>
#include <limits.h>
#include <mpi.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/time.h>


// Global constants
static const int ROOT = 0;

// Global variables
int rank = -1;
int size = -1;
int* data = NULL;
int* localData = NULL;
int* keptData = NULL;
int* exchangedData = NULL;
int* localSizes = NULL;
int* displs = NULL;
int numElements = 0;
int numLocalElements = 0;
int numKeptElements = 0;
int numExchangedElements = 0;


// Compare the value of two integers
int compare (const void * a, const void * b) {
   return ( *(int*)a - *(int*)b );
}


// Clean all dynamically created buffers
void free_buffers() {
   if (data != NULL) {
      free(data);
      data = NULL;
   }
   if (localData != NULL) {
      free(localData);
      localData = NULL;
   }
   if (keptData != NULL) {
      free(keptData);
      keptData = NULL;
   }
   if (exchangedData != NULL) {
      free(exchangedData);
      exchangedData = NULL;
   }
   if (localSizes != NULL) {
      free(localSizes);
      localSizes = NULL;
   }
   if (displs != NULL) {
      free(displs);
      displs = NULL;
   }
}


// Close libraries and clean dynamic variables
void finalize(int code) {
   free_buffers();
   fflush(stdout);
   MPI_Finalize();
   exit(code);
}


// Validates the data is correctly sorted
void validate(int* output, int numElements) {
   int i = 0;
   assert(output != NULL);
   for(i = 0; i < numElements -1; i++) {
      if (output[i] > output[i+1]) {
         printf("************* NOT sorted *************\n");
         return;
      }
   }
   printf("============= SORTED ===========\n");
}


// Reads input data from file
void read_input() {
   int i = 0;
   FILE* fin = NULL;
   // Open file
   if ((fin = fopen("input.txt", "r")) == NULL) {
      printf("Error opening input file\n");
      finalize(0);
   }
   // Create a memory to store the data
   fscanf(fin, "%d", &numElements);
   if ( !(data = (int *)calloc(numElements, sizeof(int))) ) {
      printf("Memory error\n");
      finalize(0);
   }
   // Read data
   for (i = 0; (i < numElements) && !feof(fin); i++) {
      fscanf(fin, "%d", &data[i]);
   }
   fclose(fin);
}


// Send input to the other processors
void distribute_input() {
   int i = 0;
   int div;
   int res;
   // Send the size
   MPI_Bcast(&numElements, 1, MPI_INT, ROOT, MPI_COMM_WORLD);
   div = numElements/size;
   res = numElements%size;
   numLocalElements = (rank < res) ? (div + 1) : (div);
   // Create the local arrays
   localData = (int *)calloc(numElements+1, sizeof(int));
   keptData = (int *)calloc(numElements+1, sizeof(int));
   exchangedData = (int *)calloc(numElements+1, sizeof(int));
   // Send corresponding data
   if (rank == ROOT) {
      localSizes = (int *)calloc(size, sizeof(int));
      for (i = 0; i < res; i++) {
         localSizes[i] = div + 1;
      }
      for (i = res; i < size; i++) {
         localSizes[i] = div;
      }
      displs = (int *)calloc(size, sizeof(int));
      displs[0] = 0;
      for (i = 1; i < size; i++) {
         displs[i] = displs[i-1] + localSizes[i-1];
      }
   }
   MPI_Scatterv(data, localSizes, displs, MPI_INT, 
           localData, numLocalElements, MPI_INT,
           ROOT, MPI_COMM_WORLD);
}


// Receive the sorted data from all processors
void collect_sorted() {
   int i = 0;
   MPI_Status status;
   // Ask for the amount of data in each processor
   if (rank != ROOT) {
      MPI_Send(&numLocalElements, 1, MPI_INT, ROOT, 2, MPI_COMM_WORLD);
   } else {
      localSizes[0] = numLocalElements;
      for (i = 1; i < size; i++) {
         MPI_Recv(&localSizes[i], 1, MPI_INT, i, 2, MPI_COMM_WORLD, &status);
      }
      displs[0] = 0;
      for (i = 1; i < size; i++) {
         displs[i] = displs[i-1] + localSizes[i-1];
      }
   }
   // Collect data
   MPI_Gatherv(localData, numLocalElements, MPI_INT,
           data, localSizes, displs, MPI_INT,
           ROOT, MPI_COMM_WORLD);
}


// Write sorted data into file
void write_output() {
   int i = 0;
   FILE* fout = NULL;
   // Open file
   if ((fout = fopen("output.txt", "w")) == NULL) {
      printf("Error opening output file\n");
      finalize(0);
   }
   // Write data
   fprintf(fout, "%d\n", numElements);
   for (i = 0; i < numElements; i++) {
      fprintf(fout, "%d\n", data[i]);
   }
   fclose(fout);
}


// Merge the kept data with the received one
void merge() {
   int i = 0;
   int posKept = 0;
   int posExchanged = 0;
   numLocalElements = numKeptElements + numExchangedElements;
   keptData[numKeptElements] = INT_MAX;
   exchangedData[numExchangedElements] = INT_MAX;
   for (i = 0; i < numLocalElements; i++) {
      localData[i] = (keptData[posKept] < exchangedData[posExchanged]) ?
         (keptData[posKept++]) : (exchangedData[posExchanged++]);
   }
}


// Sort using MPI
void sort() {
   int color = 0;
   int partRank = -1;
   int partSize = -1;
   int pair = 0;
   int pivot = 0;
   int pivotPos = 0;
   int selBit = 0;
   int keptLength = 0;
   int exchangeLength = 0;
   MPI_Status status;
   MPI_Comm newComm;
   // Create a communicator with all processes
   MPI_Comm_split(MPI_COMM_WORLD, 0, rank, &newComm);
   // Locally sort data
   qsort(localData, numLocalElements, sizeof(int), compare);
   // Sort by interchaning data between processes in groups
   for (selBit = size >> 1; selBit > 0; selBit>>= 1) {
      // Select and send pivot
      pivot = localData[numLocalElements/2];
      MPI_Bcast(&pivot, 1, MPI_INT, 0, newComm);
      // Split processes into two groups
      MPI_Comm_rank(newComm, &partRank);
      MPI_Comm_size(newComm, &partSize);
      color = (partRank < partSize/2) ? (0) : (1);
      MPI_Comm_split(newComm, color, partRank, &newComm);
      // Partition around the pivot; keep one side and interchange the other
      pair = rank ^ (selBit);
      for (pivotPos = 0; (pivotPos < numLocalElements) && (pivot > localData[pivotPos]); pivotPos++) {
      }
      keptLength = (rank < pair) ? (pivotPos): (numLocalElements -pivotPos);
      exchangeLength = numLocalElements -keptLength;
      MPI_Sendrecv(&exchangeLength, 1, MPI_INT, pair, 0,
              &numExchangedElements, 1, MPI_INT, pair, 0, MPI_COMM_WORLD, &status);
      if (rank < pair) {
         MPI_Sendrecv(&localData[pivotPos], exchangeLength, MPI_INT, pair, 1,
                 exchangedData, numExchangedElements, MPI_INT, pair, 1, MPI_COMM_WORLD, &status);
         memcpy(keptData, localData, sizeof(localData[0])*keptLength);
      } else {
         MPI_Sendrecv(localData, exchangeLength, MPI_INT, pair, 1,
                 exchangedData, numExchangedElements, MPI_INT, pair, 1, MPI_COMM_WORLD, &status);
         memcpy(keptData, &localData[pivotPos], sizeof(localData[0])*keptLength);
      }
      numKeptElements = keptLength;
      // Merge the kept data with the one received
      merge();
   }
}


// Get the current time
double getTime() {
   struct timeval tim;
   gettimeofday(&tim, NULL);
   return tim.tv_sec + (tim.tv_usec/1000000.0);
}


// Main function
int main(int argc, char **argv) {
   double startTime;
   double endTime;
   // Initialize
   MPI_Init(&argc, &argv);
   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
   MPI_Comm_size(MPI_COMM_WORLD, &size);
   // 1. Read and distribute elements by root.
   if (rank == ROOT) {
      read_input();
   }
   distribute_input();
   // 2. Sort elements.
   MPI_Barrier(MPI_COMM_WORLD);
   startTime = getTime();
   sort();
   MPI_Barrier(MPI_COMM_WORLD);
   endTime = getTime();
   if (rank == ROOT) {
      printf("Time: %lf\n", endTime -startTime);
   }
   // 3. Gather and write sorted elements by root.
   collect_sorted();
   if (rank == ROOT) {
      validate(data, numElements);
      write_output();
   }
   // Finalize
   finalize(0);
   return 0;
}
