#include #include #include #include "adc.h" #define BlockSize 1024 void swap4(void * num){ char t, *p; p = (char *) num; t = *p; *p = *(p + 3); *(p + 3) = t; t = *(p + 1); *(p + 1) = *(p + 2); *(p + 2) = t; } void swap8(void * num){ char t, *p; p = (char *) num; t = *p; *p = *(p + 7); *(p + 7) = t; t = *(p + 1); *(p + 1) = *(p + 6); *(p + 6) = t; t = *(p + 2); *(p + 2) = *(p + 5); *(p + 5) = t; t = *(p + 3); *(p + 3) = *(p + 4); *(p + 4) = t; } void initADCpar(ADC_PAR *par){ par->ndid=0; par->dim=5; par->mnum=1; par->tuplenum=100; /* par->isascii=1; */ par->inverse_endian=0; par->filename="ADC"; par->clss='U'; } int ParseParFile(char* parfname,ADC_PAR *par); int GenerateADC(ADC_PAR *par); typedef struct Factorization{ long int *mlt; long int *exp; long int dim; } Factorization; void ShowFactorization(Factorization *nmbfct){ int i=0; for(i=0;idim;i++){ if(nmbfct->mlt[i]==1){ if(i==0) fprintf(stdout,"prime."); break; } if(i>0) fprintf(stdout,"*"); if(nmbfct->exp[i]==1) fprintf(stdout,"%ld",nmbfct->mlt[i]); else fprintf(stdout,"%ld^%ld",nmbfct->mlt[i], nmbfct->exp[i]); } fprintf(stdout,"\n"); } long int adcprime[]={ 421,601,631,701,883, 419,443,647,21737,31769, 1427,18353,22817,34337,98717, 3527,8693,9677,11093,18233}; long int ListFirstPrimes(long int mpr,long int *prlist){ /* fprintf(stdout,"ListFirstPrimes: listing primes less than %ld...\n", mpr); */ long int prnum=0; int composed=0; long int nmb=0,j=0; prlist[prnum++]=2; prlist[prnum++]=3; prlist[prnum++]=5; prlist[prnum++]=7; for(nmb=8;nmb0){ if(mask==2*(mask/2)){ mask=mask>>1; i++; continue; } pr=adcprime[i]; genexp=adcexpons[i]; /* fprintf(stdout,"[%ld,%ld]\n",pr,genexp); ShowFactorization(fctlist[genexp]); */ for(j=0;jdim;j++){ fct=fctlist[pr-1]->mlt[j]; lexp=fctlist[pr-1]->exp[j]; for(k=0;kdim;k++){ if(fctlist[genexp]->mlt[k]==1) break; if(fct!=fctlist[genexp]->mlt[k]) continue; lexp-=fctlist[genexp]->exp[k]; break; } if(expons[fct]>1; i++; } /* for(i=0;i0) fprintf(stdout,"*%ld^%ld",i,expons[i]); } fprintf(stdout,"\n"); */ for(i=0;i<=maxprmfctr;i++){ while(expons[i]>0){ LCM*=i; if(LCM>LARGE_NUM/maxprmfctr) return LCM; expons[i]--; } } /* fprintf(stdout,"==== %lld\n",LCM); */ free(expons); return LCM; } void ExtendFactors(long int nmb,long int firstdiv, Factorization *nmbfct,Factorization **fctlist){ Factorization *divfct=fctlist[nmb/firstdiv]; int fdivused=0; int multnum=0; int i=0; /* fprintf(stdout,"==== %lld %ld %ld\n",divfct->dim,nmb,firstdiv); */ for(i=0;idim;i++){ if(divfct->mlt[i]==1){ if(fdivused==0){ nmbfct->mlt[multnum]=firstdiv; nmbfct->exp[multnum]=1; } break; } if(divfct->mlt[i]mlt[i]=divfct->mlt[i]; nmbfct->exp[i]=divfct->exp[i]; multnum++; }else if(divfct->mlt[i]==firstdiv){ nmbfct->mlt[i]=divfct->mlt[i]; nmbfct->exp[i]=divfct->exp[i]+1; fdivused=1; }else{ int j=i; if(fdivused==0) j=i+1; nmbfct->mlt[j]=divfct->mlt[i]; nmbfct->exp[j]=divfct->exp[i]; } } } void GetFactorization(long int prnum,long int *prlist, Factorization **fctlist){ /*fprintf(stdout,"GetFactorization: factorizing first %ld numbers.\n", prnum);*/ long int i=0,j=0; Factorization *fct=(Factorization*)malloc(2*sizeof(Factorization)); long int len=0,isft=0,div=1,firstdiv=1; fct->dim=2; fct->mlt=(long int*)malloc(2*sizeof(long int)); fct->exp=(long int*)malloc(2*sizeof(long int)); for(i=0;idim;i++){ fct->mlt[i]=1; fct->exp[i]=0; } fct->mlt[0]=2; fct->exp[0]=1; fctlist[2]=fct; fct=(Factorization*)malloc(2*sizeof(Factorization)); fct->dim=2; fct->mlt=(long int*)malloc(2*sizeof(long int)); fct->exp=(long int*)malloc(2*sizeof(long int)); for(i=0;idim;i++){ fct->mlt[i]=1; fct->exp[i]=0; } fct->mlt[0]=3; fct->exp[0]=1; fctlist[3]=fct; for(i=0;i0){ len++; isft=isft>>1; } fct=(Factorization*)malloc(2*sizeof(Factorization)); fct->dim=len; if (len==0) len=1; fct->mlt=(long int*)malloc(len*sizeof(long int)); fct->exp=(long int*)malloc(len*sizeof(long int)); for(j=0;jdim;j++){ fct->mlt[j]=1; fct->exp[j]=0; } div=1; for(j=0;prlist[j]*prlist[j]<=i;j++){ firstdiv=prlist[j]; if(i-firstdiv*((long int)i/firstdiv)==0){ div=firstdiv; if(firstdiv*firstdiv==i){ fct->mlt[0]=firstdiv; fct->exp[0]=2; }else{ ExtendFactors(i,firstdiv,fct,fctlist); } break; } } if(div==1){ fct->mlt[0]=i; fct->exp[0]=1; } fctlist[i]=fct; /* ShowFactorization(fct); */ } /* fprintf(stdout,"GetFactorization: Done.\n"); */ } long int adcexp[]={ 11,13,17,19,23, 23,29,31,37,41, 41,43,47,53,59, 3,5,7,11,13}; long int adcexpS[]={ 11,13,17,19,23}; long int adcexpW[]={ 2*2,2*2*2*5,2*3,2*2*5,2*3*7, 23,29,31,2*2,2*2*19}; long int adcexpA[]={ 2*2,2*2*2*5,2*3,2*2*5,2*3*7, 2*19,2*13,2*19,2*2*2*13*19,2*2*2*19*19, 2*23,2*2*2*2,2*2*2*2*2*23,2*2*2*2*2,2*2*23}; long int adcexpB[]={ 2*2*7,2*2*2*5,2*3*7,2*2*5*7,2*3*7*7, 2*19,2*13,2*19,2*2*2*13*19,2*2*2*19*19, 2*31,2*2*2*2*31,2*2*2*2*2*31,2*2*2*2*2*29,2*2*29, 2*43,2*2,2*2,2*2*47,2*2*2*43}; long int UpPrimeLim=100000; typedef struct dc_view{ long long int vsize; long int vidx; } DC_view; int CompareSizesByValue( const void* sz0, const void* sz1) { long long int *size0=(long long int*)sz0, *size1=(long long int*)sz1; int res=0; if(*size0-*size1>0) res=1; else if(*size0-*size1<0) res=-1; return res; } int CompareViewsBySize( const void* vw0, const void* vw1) { DC_view *lvw0=(DC_view *)vw0, *lvw1=(DC_view *)vw1; int res=0; if(lvw0->vsize>lvw1->vsize) res=1; else if(lvw0->vsizevsize) res=-1; else if(lvw0->vidx>lvw1->vidx) res=1; else if(lvw0->vidxvidx) res=-1; return res; } int CalculateVeiwSizes(ADC_PAR *par){ unsigned long long totalInBytes = 0; unsigned long long nViewDims, nCubeTuples = 0; const char *adcfname=par->filename; int NDID=par->ndid; char clss=par->clss; int dcdim=par->dim; long long int tnum=par->tuplenum; long long int i=0,j=0; Factorization **fctlist=(Factorization **) calloc(UpPrimeLim,sizeof(Factorization *)); long int *prlist=(long int *) calloc(UpPrimeLim,sizeof(long int)); int prnum=ListFirstPrimes(UpPrimeLim,prlist); DC_view *dcview=(DC_view *)calloc((1<tnum) LCM=tnum; dcview[i].vsize=LCM; dcview[i].vidx=i; } for(i=0;imlt) free(fctlist[i]->mlt); if(fctlist[i]->exp) free(fctlist[i]->exp); free(fctlist[i]); } free(fctlist); free(prlist); vszefname0="view.sz"; vszefname=(char*)calloc(BlockSize,sizeof(char)); sprintf(vszefname,"%s.%s.%d",adcfname,vszefname0,NDID); if(!(view = fopen(vszefname, "w+")) ) { fprintf(stderr,"CalculateVeiwSizes: Can't open file: %s\n",vszefname); return 0; } qsort( dcview, (1<>j)&0x1==1) { fprintf(view," %lld",j+1); nViewDims++;} fprintf(view,"\nView Size: %lld\n",dcview[i].vsize); totalInBytes += (8+4*nViewDims)*dcview[i].vsize; nCubeTuples += dcview[i].vsize; } fprintf(view,"\nTotal in bytes: %lld Number of tuples: %lld\n", totalInBytes, nCubeTuples); fclose(view); free(dcview); fprintf(stdout,"View sizes are written into %s\n",vszefname); free(vszefname); return 1; } int ParseParFile(char* parfname,ADC_PAR *par){ char line[BlockSize]; FILE* parfile=NULL; char* pos=strchr(parfname,'.'); int linenum=0,i=0; const char *kwd; if(!(parfile = fopen(parfname, "r")) ) { fprintf(stderr,"ParseParFile: Can't open file: %s\n",parfname); return 0; } if(pos) pos=strchr(pos+1,'.'); if(pos) sscanf(pos+1,"%d",&(par->ndid)); linenum=0; while(fgets(&line[0],BlockSize,parfile)){ i=0; kwd=adcKeyword[i]; while(kwd){ if(strstr(line,"#")) { ;/*comment line, do nothing*/ }else if(strstr(line,kwd)){ char *pos=line+strlen(kwd)+1; switch(i){ case 0: sscanf(pos,"%d",&(par->dim)); break; case 1: sscanf(pos,"%d",&(par->mnum)); break; case 2: sscanf(pos,"%lld",&(par->tuplenum)); break; case 3: /* sscanf(pos,"%d",&(par->isascii));*/ break; case 4: sscanf(pos,"%d",&(par->inverse_endian)); break; case 5: par->filename=(char*) malloc(strlen(pos)*sizeof(char)); sscanf(pos,"%s",par->filename); break; case 6: sscanf(pos,"%c",&(par->clss)); break; } break; } i++; kwd=adcKeyword[i]; } linenum++; } fclose(parfile); switch(par->clss){/* overwriting parameters according the class */ case 'S': par->dim=5; par->mnum=1; par->tuplenum=1000; break; case 'W': par->dim=10; par->mnum=1; par->tuplenum=100000; break; case 'A': par->dim=15; par->mnum=1; par->tuplenum=1000000; break; case 'B': par->dim=20; par->mnum=1; par->tuplenum=10000000; break; } return 1; } int WriteADCPar(ADC_PAR *par,char* fname){ char *lname=(char*) calloc(BlockSize,sizeof(char)); FILE *parfile=NULL; sprintf(lname,"%s",fname); parfile=fopen(lname,"w"); if(!parfile){ fprintf(stderr,"WriteADCPar: can't open file %s\n",lname); return 0; } fprintf(parfile,"attrNum=%d\n",par->dim); fprintf(parfile,"measuresNum=%d\n",par->mnum); fprintf(parfile,"tuplesNum=%lld\n",par->tuplenum); fprintf(parfile,"class=%c\n",par->clss); /* fprintf(parfile,"isASCII=%d\n",par->isascii); */ fprintf(parfile,"INVERSE_ENDIAN=%d\n",par->inverse_endian); fprintf(parfile,"fileName=%s\n",par->filename); fclose(parfile); return 1; } void ShowADCPar(ADC_PAR *par){ fprintf(stdout,"********************* ADC paramters\n"); fprintf(stdout," id %d\n",par->ndid); fprintf(stdout," attributes %d\n",par->dim); fprintf(stdout," measures %d\n",par->mnum); fprintf(stdout," tuples %lld\n",par->tuplenum); fprintf(stdout," class \t%c\n",par->clss); fprintf(stdout," filename %s\n",par->filename); fprintf(stdout,"***********************************\n"); } long int adcgen[]={ 2,7,3,2,2, 2,2,5,31,7, 2,3,3,3,2, 5,2,2,2,3}; int GetNextTuple(int dcdim, int measnum, long long int* attr,long long int* meas, char clss){ static int tuplenum=0; static const int maxdim=20; static int measbound=31415; int i=0,j=0; int maxattr=0; static long int seed[20]; long int *locexp=NULL; if(dcdim>maxdim){ fprintf(stderr,"GetNextTuple: number of dcdim is too large:%d", dcdim); return 0; } if(measnum>measbound){ fprintf(stderr,"GetNextTuple: number of mes is too large:%d", measnum); return 0; } locexp=adcexp; switch(clss){ case 'S': locexp=adcexpS; break; case 'W': locexp=adcexpW; break; case 'A': locexp=adcexpA; break; case 'B': locexp=adcexpB; break; } if(tuplenum==0){ for(i=0;imaxattr) maxattr=seed[i]; } for(i=0;idim, mesnum=par->mnum, tplnum=par->tuplenum; char *adcfname=(char*)calloc(BlockSize,sizeof(char)); FILE *adc; int i=0,j=0; long long int* attr=NULL,*mes=NULL; /* if(par->isascii==1){ sprintf(adcfname,"%s.tpl.%d",par->filename,par->ndid); if(!(adc = fopen(adcfname, "w+"))) { fprintf(stderr,"GenerateADC: Can't open file: %s\n",adcfname); return 0; } }else{ */ sprintf(adcfname,"%s.dat.%d",par->filename,par->ndid); if(!(adc = fopen(adcfname, "wb+"))){ fprintf(stderr,"GenerateADC: Can't open file: %s\n",adcfname); return 0; } /* } */ attr=(long long int *)malloc(dcdim*sizeof(long long int)); mes=(long long int *)malloc(mesnum*sizeof(long long int)); fprintf(stdout,"\nGenerateADC: writing %d tuples of %d attributes and %d measures to %s\n", tplnum,dcdim,mesnum,adcfname); for(i=0;iclss)) return 0; /* if(par->isascii==1){ for(int j=0;jinverse_endian==1) swap8(&mv); fwrite(&mv, 8, 1, adc); } for(j=0;jinverse_endian==1) swap4(&av); fwrite(&av, 4, 1, adc); } } /* } */ fclose(adc); fprintf(stdout,"Binary ADC file %s ",adcfname); fprintf(stdout,"have been generated.\n"); free(attr); free(mes); free(adcfname); CalculateVeiwSizes(par); return 1; }