Sophie

Sophie

distrib > Mandriva > 2007.0 > i586 > media > contrib-release > by-pkgid > 6afa3d7151a9e166f78b433b84dc16ac > files > 14

python-pypar-1.9.2-1mdk.i586.rpm

/*
  Estimate bandwidth and latency of a parallel computer using MPI.
  Ole Moller Nielsen - 1998
*/
	
#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>


#define MAXI  10         /* Number of blocks */
#define MAXM  500000     /* Largest block */
#define BLOCK MAXM/MAXI  /* Block size */


double linfit(double* x, double* y, int N, double* a, double* b) 
{
  /* Given vectors y and x fit a and b to the model y = ax + b */
  
  double Sx=0, Sy=0, SxoN=0, SSoN=0, t=0;
  double res, varest=0, norm=0; 
  int i;
  
  for (i=0; i<N; i++)
  {
    /*printf("x,y = %f, %f\n",x[i],y[i]);*/
    Sx  = Sx + x[i];
    Sy  = Sy + y[i];
  }

  SxoN = Sx/N;
  
  *a = 0.0; 
  for (i=0; i<N; i++)
  {
    t    = x[i] - SxoN;
    SSoN = SSoN + t*t;
    *a   = *a + t*y[i];
  }

  *a = (*a)/SSoN;          /* a = (N Sxy - SxSy)/(NSxx - Sx^2) */
  *b = (Sy - Sx*(*a))/N;
  
  /* Quality - variance estimate \sum_i r_i^2 /(m-n) */
  for (i=0; i<N; i++)
  {
    norm = norm + x[i]*x[i];
    res = y[i] - (*a)*x[i] - (*b);
    varest = varest + res*res;
  } 
  varest = varest/norm/(N-2); 
  return(varest);
}

main(int argc, char **argv) 
{
   int repeats = 10, msgid = 0;
   int myid, procs;
   int i,j,k,m;

   double t1, t2, cpuOH; 
   double Tbw, Tlat;
   double varest;
      
   int noelem[MAXI];
   double bytes[MAXI];   
   double mintime[MAXI];   
   double maxtime[MAXI];      
   double avgtime[MAXI];         
   double A[MAXM]; 

   int  namelen;
   char processor_name[MPI_MAX_PROCESSOR_NAME];
   
   MPI_Status stat;
  
   
   /* Initialize */

   MPI_Init(&argc,&argv);
   MPI_Comm_size(MPI_COMM_WORLD,&procs);
   MPI_Comm_rank(MPI_COMM_WORLD,&myid);
   MPI_Get_processor_name(processor_name,&namelen);
   
   if (myid==0)
   {
     printf("MAXM = %d, number of processors = %d\n",MAXM,procs);         
     printf("Measurements are repeated %d times for reliability\n",repeats);
   } 
   
   if (procs < 2) {
     printf("Program needs at least two processors - aborting\n");
     MPI_Abort(MPI_COMM_WORLD,999);
   }
   
   MPI_Barrier(MPI_COMM_WORLD); /* Synchronize */   
   printf("I am process %d on %s\n",myid,processor_name);   
      
   for (j=0; j<MAXM; j++) 
   {
      A[j]=rand();
   }   
   for (i=0; i<MAXI; i++) 
   {
      avgtime[i] =  0;         
      mintime[i] =  1000000;      
      maxtime[i] = -1000000;            
   }
   
   /* Determine timer overhead */
   if (myid == 0) {   
     cpuOH = 1.0;
     for (k=0; k<repeats; k++)   /* Repeat to get reliable timings */
     { 
       t1 = MPI_Wtime();
       t2 = MPI_Wtime();
       if (t2-t1 < cpuOH) cpuOH = t2-t1;
     }  
     printf("Timing overhead is %f seconds\n\n", cpuOH);              
   }

   
        
   /* Pass msg circularly */
     
   for (k=0; k<repeats; k++) {
     if (myid == 0) {  
       printf("Run %d of %d\n", k+1, repeats);
     }  

     for (i=0; i<MAXI; i++) {
       /*m=BLOCK*(i+1);*/
       m=BLOCK*i+1;       
      
       noelem[i] = m;
      
       MPI_Barrier(MPI_COMM_WORLD); /* Synchronize */
      
       if (myid == 0) {
         t1=MPI_Wtime();
         MPI_Send(&A[0],m,MPI_DOUBLE,1,msgid,MPI_COMM_WORLD);
         MPI_Recv(&A[0],m,MPI_DOUBLE,procs-1,msgid,MPI_COMM_WORLD,&stat);
         t2=MPI_Wtime() - t1 - cpuOH;
	 t2 = t2/procs;
	 avgtime[i] = avgtime[i] + t2;
         if (t2 < mintime[i]) mintime[i] = t2;
         if (t2 > maxtime[i]) maxtime[i] = t2;	 
       } else {
         MPI_Recv(&A[0],m,MPI_DOUBLE,myid-1,msgid,MPI_COMM_WORLD,&stat);
         MPI_Send(&A[0],m,MPI_DOUBLE,(myid+1)%procs,msgid,MPI_COMM_WORLD);
       }
     } 
   }

   if (myid == 0) {
     printf("Bytes transferred   time (micro seconds)\n");
     printf("                    min        avg        max \n");     
     printf("----------------------------------------------\n");      
        
     for (i=0; i<MAXI; i++) {
       avgtime[i] = avgtime[i]/repeats*1.0e6; /*Average micro seconds*/
       mintime[i] = mintime[i]*1.0e6;         /*Min micro seconds*/       
       maxtime[i] = maxtime[i]*1.0e6;         /*Min micro seconds*/              
             
       m = noelem[i];
       bytes[i] = (double) 8*noelem[i];       
	 
       /* printf("m=%d, time(min)=%lf, time(avg)=%lf, time(max)=%lf\n",
	       m,mintime[i],avgtime[i],maxtime[i]); */
       printf("%10d    %10d %10d %10d\n",
	 (int) bytes[i], (int) mintime[i], (int) avgtime[i], (int)maxtime[i]); 
     }		 
   
     varest=linfit(bytes, mintime, MAXI, &Tbw, &Tlat);
     printf("\nLinear regression on best timings (t = t_l + t_b * bytes):\n");

     printf("  t_b = %f\n  t_l = %f\n", Tbw, Tlat);
     printf("  Estimated relative variance = %.9f\n\n",varest);     

     printf("Estimated bandwith (1/t_b):  %.3f Mb/s\n", (1.0/Tbw));         
     printf("Estimated latency:           %d micro s\n", 
             (int) (mintime[0] - (float) bytes[0]* (float)Tbw));         
     
   }

   MPI_Finalize();
}