/*----------------------------------------------------*/ /* Saleh Elmohamed */ /* SOR implementation. */ /* 1996 */ /*----------------------------------------------------*/ #include #include #include #include #include #define DEBUGLEVEL 0 /* ********************************************************************* * Red/Black SOR solution to Laplaces' Equation. * MPI Implementation. ********************************************************************* */ int Procs; /* Number of processors */ int Sprocs; /* Square root of number of processors */ time_t start, stop; double Xmin; /* X min boundary of square */ double Xmax; /* X max boundary of square */ double Ymin; /* Y min boundary of square */ double Ymax; /* Y max boundary of square */ double h; /* grid spacing */ double omega; /* overrelaxation parameter */ double tolerance; /* convergence test value */ int NGridH; /* Number of points in the horizontal grid */ int NGridV; /* Number of points in the vertical grid */ int NGridH_Int; /* Number of interior points in the horizontal grid */ int NGridV_Int; /* Number of interior points in the vertical grid */ int BlockLenH; /* length Horizontal block (local grid) */ int BlockLenV; /* length Vertical block (local grid) */ int RowNumber; /* Number of Rows (local grid with guard ring) */ int ColNumber; /* Number of Colums (local grid with guard ring) */ struct nodestruct { /* Information regarding the processor */ int rank; /* Processor id 0..Procs-1 */ /* My position in the processors grid */ int row; /* row 0..Sprocs-1 */ int col; /* col 0 ..Sprocs-1*/ /* My neighbours 0..Procs-1, -1 if no such neighbour*/ int north; /* North neighbour */ int south; /* South neighbour */ int west; /* West neighbour */ int east; /* Eats neighbour */ }; struct nodestruct mynode; /* Derived data types used to shift matrix data */ MPI_Datatype MPI_InteriorPointsRow; MPI_Datatype MPI_InteriorPointsCol; /* Local procedures forward defined */ void PrintParameters(); int InitGlobalData(int myId); int InitBoundaryPhi(double Phi[],double *Average); void InitInteriorPhi(double Phi[],double Average); void RedBlackSORIteration(double Phi[], double omega); int IterationConvergency(double Phi[],double PhiOld[],double *globaldiff); int ErrorToTheoretical(double Phi[],double *maxerror, double *averageerror); double U(int i, int j,double h); /* ********************************************************************* * The main application: perform the initializations, perform the * iteration and test for convergence ********************************************************************* */ void main(int argc, char *argv[]) { int ierr; /* MPI calls return */ int myid; /* My rank in the global comunicator */ double BoundaryAverage; /* The average of boundary values */ double *Phi; /* The local matrix in this processor the size is determined from the parameters and the matrix is dynamically allocated */ double *PhiOld; /* A copy of the matrix used to test convergence */ int iterations; /* Counter of iterations */ double maxiterations; /* in case of infinite loops */ double globaldiff; /* The global convergence test value */ int done; /* loop control */ double maxerror; /* Max difference to theoretical result U */ double averageerror; /* Average difference to U */ #if (DEBUGLEVEL>0) printf("DEBUGLEVEL = %d\n",DEBUGLEVEL); #endif Xmin = 0.0; Xmax = 2.0; Ymin = 0.0; Ymax = 1.0; h = (double)0.02; tolerance = (double)2e-6; ierr = MPI_Init(&argc,&argv); ierr |= MPI_Comm_size(MPI_COMM_WORLD,&Procs); ierr |= MPI_Comm_rank(MPI_COMM_WORLD,&myid); if (ierr != MPI_SUCCESS) { printf("Error initializing MPI ....\n"); } else if (InitGlobalData(myid) != 0) { /* Processor 0, Displays all the configuration */ if (mynode.rank ==0) { PrintParameters(); } #if (DEBUGLEVEL>=1) printf("I am processor %d <%d,%d> and my neighbours are :\n", mynode.rank,mynode.row,mynode.col); printf("North %d South %d West %d East %d \n", mynode.north,mynode.south,mynode.west,mynode.east); #endif /* Allocate the local matrices */ Phi = (double *) malloc(sizeof(double)*RowNumber*ColNumber); PhiOld = (double *) malloc(sizeof(double)*RowNumber*ColNumber); #if (DEBUGLEVEL>=1) /* When debugging just do one value of omega */ omega = (double)1.7; #else for(omega=(double)1.0;omega+h<(double)2.0;omega+=h){ #endif start = time(NULL); /* Perform the initialization of the "global" matrix.*/ InitBoundaryPhi(Phi,&BoundaryAverage); InitInteriorPhi(Phi,BoundaryAverage); /* Perform SOR iteration */ iterations = 0; done = 0; maxiterations = pow((double)NGridH,(double)2.0); while (done == 0) { iterations ++; memcpy(PhiOld,Phi,sizeof(double)*RowNumber*ColNumber); RedBlackSORIteration (Phi,omega); /* Check convergency */ IterationConvergency(Phi,PhiOld,&globaldiff); if (globaldiff < tolerance) done = 1; /* avoid infinite loop in the algorithm diverges */ if (iterations > maxiterations) done = 1; } /* Processor 0, Displays the results */ ErrorToTheoretical(Phi,&maxerror,&averageerror); stop = time(NULL); if (mynode.rank ==0) { printf("********************************************\n"); printf("Overrelaxation parameter omega = %lf\n",omega); printf(".... Iterations = %d in %4.2f seconds ....\n", iterations, difftime(stop, start)); printf("Maximal error to theoretical result %lf\n", maxerror); printf("Average error to theoretical result %lf\n", averageerror); } } MPI_Type_free(&MPI_InteriorPointsRow); MPI_Type_free(&MPI_InteriorPointsCol); } MPI_Finalize(); } /* ********************************************************************* * Prints all the parameters of the configuration ********************************************************************* */ void PrintParameters() { printf("***************************************************\n"); printf("*** Red/Black SOR solution of Laplaces'equation ***\n"); printf("Number of processors %dx%d=%d \n",Sprocs,Sprocs,Procs); printf("Boundary Rectangle X %lf-%lf\n",Xmin,Xmax); printf("Boundary Rectangle Y %lf-%lf\n",Ymin,Ymax); printf("Grid spacing h= %lf\n",h); printf("Tolerance (convergence criteria) %lf\n",tolerance); printf("Grid size %dx%d Interior Points %dx%d\n", NGridV,NGridH,NGridV_Int,NGridH_Int); printf("Local grid to each processor %dx%d(interior %dx%d)\n", RowNumber,ColNumber,BlockLenV,BlockLenH); } /* ********************************************************************* * Initializes all the global variables. Check the parameters to see * wether it is posible to do a (block,block) map of the interior * points of the grid to the SProcsxSprocs grid of processor ( The * mapped regions must have the same size in all processors to make * programming easier) ********************************************************************* * The relationship between the different parameters can be * visualized in the following diagram: * * * NGridH * ------------------------------ * ColNumber * --------------- * BlockLenH Proc 1<0,1> * [Xmin,Ymin] ------------- [Xmax,Ymin] * ------------------------------ | | * Proc 0<0,0> |(1,1) (1,2) | (1,1) (1,2) | | | | * |(2,1) (2,2) | (2,1) (2,2) | | | | * |--------------------------- | BlockLenV | | * Proc 2<1,0> |(1,1) (1,2) | (1,1) (1,2) | RowNumber | * |(2,1) (2,2) | (2,1) (2,2) | | * ------------------------------ | * NGridV * [Xmin,Ymax] Proc 3<1,1> [Xmin,Ymax] * * The conventions are: * horizontal = span cols = X-axis * vertical = span rows = Y-axis * * [..] Problem coordinates(non-discrete) NORTH * <..> Processor coordinates WEST EAST * (..) Interior points coordinates SOUTH * - Boundary value/ Guard ring * | Boundary value/ Guard Ring */ int InitGlobalData(int myId) { int ierr; Sprocs = (int)sqrt((double)Procs); if (Procs != (Sprocs*Sprocs)) { printf("Processors must arrange in a square grid\n"); printf("Invalid number of processors %d (%d)\n",Procs,Sprocs); return 0; } if (h==0) { printf("Choose an spacing value for the grid h=%lf\n",h); return 0; } NGridH = ((Xmax-Xmin)+(0.2*h))/h; NGridV = ((Ymax-Ymin)+(0.2*h))/h; if ((NGridH<=2)||(NGridV<=2)) { printf("Data grid must be at least 3 points in any direction\n"); printf("The region is [%lf-%lf][%lf-%lf] spacing %lf\n", Xmin,Xmax,Ymin,Ymax,h); printf("The correspondent grid is %dx%d\n",NGridH,NGridV); return 0; } NGridH_Int = NGridH-2; NGridV_Int = NGridV-2; if( ((NGridH_Int % Sprocs) != 0) || ((NGridV_Int % Sprocs) != 0) ) { printf("The interior points grid is %dx%d\n", NGridH_Int,NGridV_Int); printf("and cannot be exactly mapped to the processor grid %dx%d\n", Sprocs,Sprocs); return 0; } else { BlockLenH = (NGridH_Int/Sprocs); BlockLenV = (NGridV_Int/Sprocs); ColNumber = BlockLenH + 2; RowNumber = BlockLenV + 2; } ierr = MPI_Type_contiguous(BlockLenH, MPI_DOUBLE, &MPI_InteriorPointsRow); ierr |= MPI_Type_vector(BlockLenV,1,ColNumber,MPI_DOUBLE, &MPI_InteriorPointsCol); ierr |= MPI_Type_commit(&MPI_InteriorPointsRow); ierr |= MPI_Type_commit(&MPI_InteriorPointsCol); if (ierr != MPI_SUCCESS) { printf("Error creating derived data types %d\n",ierr); return 0; } { int r = myId / Sprocs; int c = myId % Sprocs; mynode.rank = myId; mynode.row = r; mynode.col = c; mynode.north = (r==0) ? -1 : ((r-1)*Sprocs)+c; mynode.south = (r==Sprocs-1) ? -1 : ((r+1)*Sprocs)+c; mynode.west = (c==0) ? -1 : (r*Sprocs)+(c-1); mynode.east = (c==Sprocs-1) ? -1 : (r*Sprocs)+(c+1); } return 1; } /* ********************************************************************* * Initializes the boundary values in matrix PHI. This can be done in * parallel by corner and edge processors. We also calculate in parallel * the average of the boundary values (This implies a reduction * operation) ********************************************************************* */ int InitBoundaryPhi(double Phi[], double *Average) { int ierr; double localsum = (double)0.0; double globalsum = (double)0.0; int i,j; /* Initializes the top boundary */ if (mynode.north == -1) { i = 0; #if (DEBUGLEVEL>=1) printf("I am <%d,%d> and will init row %d\n", mynode.row,mynode.col,i); #endif for (j=0;j=1) printf("I am <%d,%d> and will init row %d\n", mynode.row,mynode.col,i); #endif for (j=0;j=1) printf("I am <%d,%d> and will init col %d\n", mynode.row,mynode.col,j); #endif for (i=1;i=1) printf("I am <%d,%d> and will init col %d\n", mynode.row,mynode.col,j); #endif for (i=1;i=1) printf("I am <%d,%d> init-boundary local %lf global %lf average %lf\n", mynode.row,mynode.col,localsum,globalsum,*Average); #endif return MPI_SUCCESS; } /* ********************************************************************* * Initializes the internal values and the guard rings in matrix PHI. * This can be done in parallel by all processors. We have to be * aware that some of the edges of the Guard Ring in boundary * processors have already been initialized. ********************************************************************* */ void InitInteriorPhi(double Phi[], double Average) { int i,j; /* Initializes the top guard */ if (mynode.north != -1) { i = 0; #if (DEBUGLEVEL>=1) printf("I am <%d,%d> and will average-init row %d\n", mynode.row,mynode.col,i); #endif for (j=0;j=1) printf("I am <%d,%d> and will average-init row %d\n", mynode.row,mynode.col,i); #endif for (j=0;j=1) printf("I am <%d,%d> and will average-init col %d\n", mynode.row,mynode.col,j); #endif for (i=1;i=1) printf("I am <%d,%d> and will average-init col %d\n", mynode.row,mynode.col,j); #endif for (i=1;i=1) printf("I am <%d,%d> and will average-init (%lf) internal points\n", mynode.row,mynode.col,Average); #endif /* Initializes internal points */ for (i=1;i=2) printf("I am <%d,%d> and sent col (%d,%d) to %d\n", mynode.row,mynode.col,i_send,j_send,mynode.west); #endif } else if (mynode.west == -1) { ierr = MPI_Recv(&Phi[(i_recv*ColNumber)+j_recv], 1,MPI_InteriorPointsCol, mynode.east,tag, MPI_COMM_WORLD, &status); #if (DEBUGLEVEL>=2) printf("I am <%d,%d> and received col (%d,%d) from %d\n", mynode.row,mynode.col,i_recv,j_recv,mynode.east); #endif } else { ierr = MPI_Sendrecv(&Phi[(i_send*ColNumber)+j_send], 1, MPI_InteriorPointsCol, mynode.west,tag, &Phi[(i_recv*ColNumber)+j_recv], 1,MPI_InteriorPointsCol, mynode.east,tag, MPI_COMM_WORLD, &status); #if (DEBUGLEVEL>=2) printf("I am <%d,%d> and sent col (%d,%d) to %d and received (%d,%d) from %d\n", mynode.row,mynode.col,i_send,j_send,mynode.west, i_recv,j_recv,mynode.east); #endif } if (ierr != MPI_SUCCESS) { printf("Error in left shift communication %d \n",ierr); } /* Right shift : I send East Interior Column I receive West Guard Column */ tag = RIGHT_SHIFT; i_send = 1; j_send = ColNumber-2; i_recv = 1; j_recv = 0; if (mynode.west == -1) { ierr = MPI_Send(&Phi[(i_send*ColNumber)+j_send], 1, MPI_InteriorPointsCol, mynode.east,tag, MPI_COMM_WORLD); #if (DEBUGLEVEL>=2) printf("I am <%d,%d> and sent col (%d,%d) to %d\n", mynode.row,mynode.col,i_send,j_send,mynode.east); #endif } else if (mynode.east == -1) { ierr = MPI_Recv(&Phi[(i_recv*ColNumber)+j_recv], 1,MPI_InteriorPointsCol, mynode.west,tag, MPI_COMM_WORLD, &status); #if (DEBUGLEVEL>=2) printf("I am <%d,%d> and received col (%d,%d) from %d\n", mynode.row,mynode.col,i_recv,j_recv,mynode.west); #endif } else { ierr = MPI_Sendrecv(&Phi[(i_send*ColNumber)+j_send], 1, MPI_InteriorPointsCol, mynode.east,tag, &Phi[(i_recv*ColNumber)+j_recv], 1,MPI_InteriorPointsCol, mynode.west,tag, MPI_COMM_WORLD, &status); #if (DEBUGLEVEL>=2) printf("I am <%d,%d> and sent col (%d,%d) to %d and received (%d,%d) from %d\n", mynode.row,mynode.col,i_send,j_send,mynode.east, i_recv,j_recv,mynode.west); #endif } if (ierr != MPI_SUCCESS) { printf("Error in right shift communication %d \n",ierr); } /* Up shift : I send North Interior Column I receive South Guard Column */ tag = UP_SHIFT; i_send = 1; j_send = 1; i_recv = RowNumber-1; j_recv = 1; if (mynode.south == -1) { ierr = MPI_Send(&Phi[(i_send*ColNumber)+j_send], 1, MPI_InteriorPointsRow, mynode.north,tag, MPI_COMM_WORLD); #if (DEBUGLEVEL>=2) printf("I am <%d,%d> and sent row (%d,%d) to %d\n", mynode.row,mynode.col,i_send,j_send,mynode.north); #endif } else if (mynode.north == -1) { ierr = MPI_Recv(&Phi[(i_recv*ColNumber)+j_recv], 1,MPI_InteriorPointsRow, mynode.south,tag, MPI_COMM_WORLD, &status); #if (DEBUGLEVEL>=2) printf("I am <%d,%d> and received row (%d,%d) from %d\n", mynode.row,mynode.col,i_recv,j_recv,mynode.south); #endif } else { ierr = MPI_Sendrecv(&Phi[(i_send*ColNumber)+j_send], 1, MPI_InteriorPointsRow, mynode.north,tag, &Phi[(i_recv*ColNumber)+j_recv], 1,MPI_InteriorPointsRow, mynode.south,tag, MPI_COMM_WORLD, &status); #if (DEBUGLEVEL>=2) printf("I am <%d,%d> and sent row (%d,%d) to %d and received (%d,%d) from %d\n", mynode.row,mynode.col,i_send,j_send,mynode.north, i_recv,j_recv,mynode.south); #endif } if (ierr != MPI_SUCCESS) { printf("Error in up shift communication %d \n",ierr); } /* Down shift : I send South Interior Column I receive North Guard Column */ tag = DOWN_SHIFT; i_send = RowNumber-2; j_send = 1; i_recv = 0 ; j_recv = 1; if (mynode.north == -1) { ierr = MPI_Send(&Phi[(i_send*ColNumber)+j_send], 1, MPI_InteriorPointsRow, mynode.south,tag, MPI_COMM_WORLD); #if (DEBUGLEVEL>=2) printf("I am <%d,%d> and sent row (%d,%d) to %d\n", mynode.row,mynode.col,i_send,j_send,mynode.south); #endif } else if (mynode.south == -1) { ierr = MPI_Recv(&Phi[(i_recv*ColNumber)+j_recv], 1,MPI_InteriorPointsRow, mynode.north,tag, MPI_COMM_WORLD, &status); #if (DEBUGLEVEL>=2) printf("I am <%d,%d> and received row (%d,%d) from %d\n", mynode.row,mynode.col,i_recv,j_recv,mynode.north); #endif } else { ierr = MPI_Sendrecv(&Phi[(i_send*ColNumber)+j_send], 1, MPI_InteriorPointsRow, mynode.south,tag, &Phi[(i_recv*ColNumber)+j_recv], 1,MPI_InteriorPointsRow, mynode.north,tag, MPI_COMM_WORLD, &status); #if (DEBUGLEVEL>=2) printf("I am <%d,%d> and sent row (%d,%d) to %d and received (%d,%d) from %d\n", mynode.row,mynode.col,i_send,j_send,mynode.south, i_recv,j_recv,mynode.north); #endif } if (ierr != MPI_SUCCESS) { printf("Error in down shift communication %d \n",ierr); } return ierr; } /* ********************************************************************* * Perform a cycle of the Red/Black SOR on matrix Phi. * Updated values are overwritten in the matrix. The caller procedure * must keep a "before image" of the matrix for testing convergence. * This procedure implements the 2 phases (red and black) of each * iteration. * * The algorithm implemented is the one described in * "Solving Problems on Concurrent Processors, Vol 1" G. Fox et al. * Prentice Hall, 1988 * * In this algorithm the rectangular region mapped to each processor * is in turn subdivided into four regions of two kinds (Red and Black). * A parallel SOR update is first realized updating the Red regions and * then Black points. ********************************************************************* */ void RedBlackSORIteration(double Phi[], double omega) { /* Interior points middle row-zero relative,beware of guard ring */ int middlerow = (BlockLenV/2); /* Interior points middle col-zero relative,beware of guard ring */ int middlecol = (BlockLenH/2); int i,j; /* Update Red UpperLeft region */ for (i=1;i<=middlerow;i++) for (j=1;j<=middlecol;j++) Phi[(i*ColNumber)+j] = (((double)1.0 - omega)*(Phi[(i*ColNumber)+j])) + ( omega * ((double)0.25) * ( Phi[((i-1)*ColNumber)+j] + Phi[(i*ColNumber)+(j-1)] + Phi[((i+1)*ColNumber)+j] + Phi[(i*ColNumber)+(j+1)] )) ; /* Update Red LowerRight region */ for (i=middlerow+1;i localdiff) ? diff : localdiff; } /* computes global maximum difference, from max in all processors This is a reduction operation. */ ierr = MPI_Allreduce(&localdiff,globaldiff, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); if (ierr != MPI_SUCCESS) printf("Error in reduction to calculate convergence %d\n",ierr); #if (DEBUGLEVEL>=2) printf("I am <%d,%d> and testing convergency local %lf global %lf\n", mynode.row,mynode.col,localdiff,*globaldiff); #endif return ierr; } /* ********************************************************************* * Calculate the max and average difference between the result of the * algorithm and the theoretical result. ********************************************************************* */ int ErrorToTheoretical(double Phi[], double *maxerror, double *averageerror) { int ierr; double localmax = 0.0; double localsum = 0.0; double globalsum = 0.0; double diff; int i,j; /* computes the local sum and max */ for (i=1;i localmax) ? diff : localmax; localsum += diff; } /* computes the local sum and max from the values in all processors This is a reduction operation */ ierr = MPI_Allreduce(&localmax,maxerror, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); ierr |= MPI_Allreduce(&localsum,&globalsum, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); *averageerror = globalsum / (double) (NGridH_Int*NGridV_Int); if (ierr != MPI_SUCCESS) printf("Error in reduction to compare to U %d\n",ierr); #if (DEBUGLEVEL>=1) printf("I am <%d,%d> error lmax %lf gmax %lf lsum %lf gsum %lf aver %lf\n", mynode.row,mynode.col,localmax,*maxerror, localsum,globalsum,*averageerror); #endif return ierr; } /* ********************************************************************* * The Steady-state theoretical heat distribution ********************************************************************* */ double U(int i, int j,double h) { double x = i * h; double y = j * h; double Uxy; Uxy = (double)(-4.7) + ((double)0.2 * x) - ((double)0.3 * y) + ((double)0.09 * (pow(x,(double)2.0) - pow(y,(double)2.0))) + (log(sqrt( pow((x+(double)0.2),(double)2.0)+ pow((y+(double)0.2),(double)2.0) ) ) ); #if (DEBUGLEVEL>=3) printf("I am <%d,%d> calculating U(%d,%d,%lf)=%lf\n", mynode.row,mynode.col,i,j,h,Uxy); #endif return Uxy; }