/*************************************************************************** * * * README README README README README README README README * * * * The uncompiled sofware in this file (PROGRAM) comes with NO warranty! * * * * If you use the PROGRAM in any way, you do so at your own risk! * * * * The use of the PROGRAM and/or its source code is permitted for * * non-commercial use ONLY! * * * * July/September 2003, (c) Henning Meyerhenke * * * **************************************************************************/ /*************************************************************************** * parPrimes.C * * Author: Henning Meyerhenke * Lecture: Cluster computing, German summer term (Sommersemester) 2003 * * Programming contest, TASK: * Find all primes from 2 up to N within 5 minutes on the chair's cluster * computer with 8 Pentium 4 processors and Gigabit Ethernet; * do not store any primes other than 2 or 3 in your program, * all the others must be computed by your program * * My algorithm of choice: Parallel sieve of Eratosthenes * * Compiler: GNU GCC * Library: LAM/MPI * **************************************************************************/ // C #include #include #include // C++ #include using namespace std; // MPI #include typedef long long T_num; // global constants const bool DEBUG = false; const int SEQ_SIEVE_TO = 500000; // upper bound of sequential sieve const int ARR_SIZE = SEQ_SIEVE_TO / 2; // size of 1st bit array const int BUFFER_SIZE = 40 * 1024; // size of buffer sent for output const int INTERVAL_SIZE = (int) (BUFFER_SIZE * 1.7); // interv. for paral. sieving const int ARR_SIZE_INTERVAL = INTERVAL_SIZE / 2; // size of 2nd bit array const float FINISH_TIME = 295.5; // time after which the program terminates (not strict) int main(int argc, char ** argv) { // *** declaration and init where necessary ****************************** register int i = 1; // index of inner loop (delete multiples) register int j; // index of outer loop (do for each prime) register int prime; // current prime register int primeBy3; // current prime * 3 register int startIndex; // startIndex of current prime // sieve until sqrt(SEQ_SIEVE_TO) is reached = endLoop int endLoop = ((T_num) floor(sqrt((double) SEQ_SIEVE_TO))) >> 1; bitset < ARR_SIZE > numberArray; // bit array of odd numbers FILE * outFile; // output file char name[2]; // name of host executing the process int myRank, // rank of processors within COMM_WORL clusterSize, // number of processors within COMM_WORLD computeClusterSize, // number of processors actually computing fileDes; // file descriptor of outFile // *** ****************************************************************** // *** init ************************************************************** // MPI MPI_Init(& argc, & argv); double startTime = MPI_Wtime(); MPI_Comm_size(MPI_COMM_WORLD, & clusterSize); // number of procs MPI_Comm_rank(MPI_COMM_WORLD, & myRank); // rank of this proc computeClusterSize = clusterSize - 1; // number of procs for actual computing if (myRank == 0) { // * test if process 0 is working on the computer with the harddisk **** gethostname(name, 2); if (((name[0] != 'a') || (name[1] != 'n')) && ((name[0] != 'h') || (name[1] != 'm'))) { printf("Wrong host %s for process 0 instead of anne. Abnormal exit!", name); MPI_Finalize(); exit(1); } // * ******************************************************************* // init output file and adjust bit array outFile = fopen("parPrimeList.txt", "w"); fileDes = fileno(outFile); // 2 and 3 are prime => output and count fprintf(outFile, "2\n3\n"); } numberArray.set(); numberArray.reset(0); // *** ******************************************************************* // *** sieve all divisors of 3 ******************************************* for (j = 4; j < ARR_SIZE; j += 3) { numberArray.reset(j); } // *********************************************************************** // *** sieve starting from 5 ********************************************* for (j = 2; j <= endLoop; ++j) { if (numberArray.test(j)) { prime = (j << 1) + 1; // = 2 * j + 1 primeBy3 = (prime << 1) + prime; // primeBy3 = 3 * prime // startIndex = index of prime * prime = (((2j+1)^2)-1)/2 = 2j^2+2j startIndex = ((j * j) << 1) + (j << 1); for (i = startIndex; i < ARR_SIZE; i += primeBy3) { numberArray.reset(i); } // ** save 1/3 of the work ******************************* if (((prime + 1) % 3) == 0) { // 3 divides prime + 1 => 3 | p(p+1) = p^2+p // hence: start with p(p+2) and cross out every 6th multiple of 3 for (i = startIndex + prime; i < ARR_SIZE; i += primeBy3) { numberArray.reset(i); } } else { if (((prime + 2) % 3) == 0) { // 3 divides prime + 2 => 3 | p(p+2) // hence: start with p(p+4) and cross out every 6th multiple of 3 for (i = startIndex + (2 * prime); i < ARR_SIZE; i += primeBy3) { numberArray.reset(i); } } } // ** **************************************************** } } // *** ********************************************************************* // *** output process ****************************************************** if (myRank == 0) { // ** write primes to output file ****************************** for (j = 2; j < ARR_SIZE; ++j) { if (numberArray.test(j)) { // prime with index j is number 2 * j + 1 prime = (j << 1) + 1; fprintf(outFile, "%i\n", prime); //if (COUNT_PRIMES) ++countPrimes; } } fflush(outFile); // ** ********************************************************** // ** receive and write primes to output file ****************** // receive buffer for primes in char representation (+ 4 due to entries code) unsigned char * recvArr = new unsigned char[BUFFER_SIZE + 4]; int numEntries; // number of actual entries sent by other procs int p; // processors in loop MPI_Status status; // status variable for MPI_Recv while (true) { // for each other processor for (p = 1; p < clusterSize; ++p) { MPI_Recv(recvArr, BUFFER_SIZE + 4, MPI_CHAR, p, 1, MPI_COMM_WORLD, & status); // compute number of actual entries in recvArr numEntries = (int) (recvArr[0] << 12) + (int) (recvArr[1] << 8) + (int) (recvArr[2] << 4) + (int) (recvArr[3]); // if a processor signals time line by sending 0: leave loops if (numEntries == 0) goto finishLoops; write(fileDes, & recvArr[4], numEntries); } } // while // ** *********************************************************** finishLoops: delete[] recvArr; } // endif outputProcess // *** ********************************************************************* // *** other processors do the actual computing work: sieve higher primes ** else { // ** mask old int variables with new T_num ones ******************* T_num prime; // current prime T_num startIndex; // startIndex of current prime T_num endLoop; // = index of sqrt(myEnd) T_num i = 1; // index of inner loop (delete multiples) T_num j; // index of outer loop (do for each prime) T_num myStart = SEQ_SIEVE_TO + 1 + (myRank - 1) * INTERVAL_SIZE; T_num myEnd = SEQ_SIEVE_TO + myRank * INTERVAL_SIZE; // ** ************************************************************* // ** other variables: declaration and init *********************** // bit array for parallel sieve bitset bitArray; // prime char array which is sent to processor 0 unsigned char * primesArray = new unsigned char[BUFFER_SIZE + 4]; int iteration = 1; // counter for iterations int bufferIndex; // index within primesArray double endTime; // in order to know when to finish // compute length of character representation of future primes int charLength = 0; int temp = myStart; while (temp > 0) { ++charLength; temp /= 10; } // ** ************************************************************* // ** sieve in your interval and send the converted results to 0 ** while (true) { // init bit array (everything is prime until it's marked as multiple) bitArray.set(); // every 256th iteration if ((iteration & 0xff) == 0) { endTime = MPI_Wtime(); if ((endTime - startTime) > FINISH_TIME) { // code number of entries into the first 4 bytes primesArray[0] = 0x0; primesArray[1] = 0x0; primesArray[2] = 0x0; primesArray[3] = 0x0; // send data MPI_Send(primesArray, BUFFER_SIZE + 4, MPI_CHAR, 0, 1, MPI_COMM_WORLD); break; } } // ** sieve all divisors of 3 ***************************************** // * set j to the index of the first odd multiple of 3 within interval* j = myStart; while (j % 3 != 0) { j += 2; } // index j = smallest multiple of 3 minus myStart, divided by 2 j = (j - myStart) >> 1; // * ****************************************************************** for ( ; j < ARR_SIZE_INTERVAL; j += 3) { bitArray.reset(j); } // ** ***************************************************************** // ** sieve starting from 5 ******************************************* // stop iterating at index of sqrt(end of current interval) endLoop = ((T_num) floor(sqrt((long double) myEnd))) >> 1; for (j = 2; j <= endLoop; ++j) { if (numberArray.test(j)) { prime = (j << 1) + 1; // = 2 * j + 1 // startIndex = index of (prime * prime - myStart) // = (p^2 - myStart) / 2 if (myStart % prime == 0) { startIndex = 0; } else { // * compute proper index where to start sieving ***************** // start sieving at max(p^2, first multiple of p within interval) startIndex = max(prime * prime, (myStart / prime) * prime + prime); if (startIndex % 2 == 0) { startIndex += prime; } // number startIndex -> index startIndex: divide by 2 startIndex = (startIndex - myStart) >> 1; // * ************************************************************* } // without saving 1/3 of the work yet (easier to handle) for (i = startIndex; i < ARR_SIZE_INTERVAL; i += prime) { bitArray.reset(i); } } } // ** ******************************************************************* // ** send newly discovered primes to output process ********************* bufferIndex = 3; for (j = 0; j < ARR_SIZE_INTERVAL; ++j) { // for each prime if (bitArray.test(j)) { prime = (j << 1) + myStart; // compute char conversion for (int k = charLength; k > 0; --k) { primesArray[bufferIndex + k] = (char) (prime % 10) + '0'; prime /= 10; } // test if guess concerning the prime's length was accurate while (prime > 0) { ++charLength; prime = (j << 1) + myStart; for (int k = charLength; k > 0; --k) { primesArray[bufferIndex + k] = (char) (prime % 10) + '0'; prime /= 10; } } bufferIndex += (charLength + 1); primesArray[bufferIndex] = '\n'; } } // decrease bufferIndex by 3 because of empty slots at the beginning bufferIndex -= 3; // code number of entries into the first 4 bytes primesArray[0] = (bufferIndex >> 12); primesArray[1] = (bufferIndex >> 8) & 0xf; primesArray[2] = (bufferIndex >> 4) & 0xf; primesArray[3] = bufferIndex & 0xf; // send data MPI_Send(primesArray, BUFFER_SIZE + 4, MPI_CHAR, 0, 1, MPI_COMM_WORLD); // ** ********************************************************************* // compute new interval boundaries myStart += (computeClusterSize * INTERVAL_SIZE); myEnd += (computeClusterSize * INTERVAL_SIZE); ++iteration; } // while // ** ********************************************************************* delete[] primesArray; } // *** ********************************************************************* // *** DEBUG timing ********************************************************* if (DEBUG) { if (myRank == 0) { double endTime = MPI_Wtime(); printf("%i: Time needed: %lf\n", myRank, (endTime - startTime)); } } // *** ********************************************************************** // close output file to clean buffer if (myRank == 0) { fclose(outFile); } // clean up MPI MPI_Finalize(); return 0; }