/*CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
C                                                                             C
C  COMPUTE MERTENS FUNCTION (sum of M(x/i)^2 where x is highly composite)     C
C  01/27/16 (DKC)							      C
C                                                                             C
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC*/
#include <stdio.h>
#include <math.h>
#include <omp.h>
#include "table.h"
extern char *malloc();
// compute partial sums of Mertens function
long long newmert(unsigned long long s, unsigned long long x, int *M) {
unsigned long long t;
long long i;
long long sum;
t=(unsigned long long)sqrt((double)x);
t=t+2;
if (s>(x/t))
   return(0x7fffffffffffffff);
sum=0;
#pragma omp parallel for reduction (+:sum)
for (i=(long long)s; i<=(long long)(x/t); i++)
   sum=sum+M[x/i-1];
#pragma omp parallel for reduction (+:sum)
for (i=1; i<(long long)t; i++) {
   sum=sum+(long long)M[i-1]*(x/(unsigned long long)i-x/(unsigned long long)(i+1));
   }
return(sum);
}
// compute Mobius function
int newmobl(unsigned long long a, unsigned long long b, long long *out, unsigned int *table,
	    unsigned int tsize) {
unsigned int i,p,count;
unsigned long long beg,ps,rem;
long long t;
count=(unsigned int)(b-a);
for (i=0; i<count; i++)
   out[i]=1;
for (i=0; i<tsize; i++) {
   p=table[i];
   ps=(unsigned long long)p*(unsigned long long)p;
   if (ps>b)
      goto askip;
   rem=a-(a/ps)*ps;
   if (rem!=0)
      beg=ps-rem;
   else
      beg=0;
   while (beg<count) {
      out[beg]=0;
      beg=beg+ps;
      }
   rem=a-(a/p)*p;
   if (rem!=0)
      beg=p-rem;
   else
      beg=0;
   while (beg<count) {
      out[beg]=-out[beg]*p;
      beg=beg+p;
      }
   }
return(p);
askip:
for (i=0; i<count; i++) {
   if (out[i]==0)
      continue;
   t=out[i];
   if (t<0)
      t=-t;
   if ((unsigned long long)t<(i+a))
      out[i]=-out[i];
   }
for (i=0; i<count; i++) {
   if (out[i]==0)
      continue;
   if (out[i]>0)
      out[i]=1;
   else
      out[i]=-1;
   }
return(1);
}
// compute primes
unsigned int primed(unsigned int *out, unsigned int tsize,
		    unsigned int *table,unsigned int limit) {
unsigned int d;
unsigned int i,j,k,l,flag,count;
count=tsize;
for (i=0; i<tsize; i++)
   out[i]=table[i];
j=table[tsize-1]+1;
for (d=j; d<=limit; d++) {
   if(d==(d/2)*2) continue;
   if(d==(d/3)*3) continue;
   if(d==(d/5)*5) continue;
   if(d==(d/7)*7) continue;
   if(d==(d/11)*11) continue;
   l=(unsigned int)(10.0+sqrt((double)d));
   k=0;
   if (l>table[tsize-1])
      return(0);
   else {
      for (i=0; i<tsize; i++) {
	 if (table[i]<=l)
	    k=i;
	 else
	    break;
	 }
      }
   flag=1;
   l=k;
   for (i=0; i<=l; i++) {
      k=table[i];
      if ((d/k)*k==d) {
	 flag=0;
	 break;
	 }
      }
   if (flag==1)
      out[count]=d;
   count=count+flag;
   }
return(count);
}

unsigned long long Msize=15000000000;	// for 64 GB of RAM, 64-bit OS
unsigned int tsize=1230;	  // prime look-up table size
unsigned int tmpsiz=10000;	  // used to compute Mobius function
unsigned int t2size=200000;	  // prime look-up table size
unsigned int Tsize=100000000;	  // used to compute partial sums of Mobius function
//
unsigned int incnt=156;
unsigned long long in[167*2]={  // highly composite numbers and their number of divisors
		   2,		2,
		   4,		3,
		   6,		4,
		  12,		6,
		  24,		8,
		  36,		9,
		  48,	       10,
		  60,	       12,
		 120,	       16,
		 180,	       18,
		 240,	       20,
		 360,	       24,
		 720,	       30,
		 840,	       32,
		1260,	       36,
		1680,	       40,
		2520,	       48,
		5040,	       60,
		7560,	       64,
	       10080,	       72,
	       15120,	       80,
	       20160,	       84,
	       25200,	       90,
	       27720,	       96,
	       45360,	      100,
	       50400,	      108,
	       55440,	      120,
	       83160,	      128,
	      110880,	      144,
	      166320,	      160,
	      221760,	      168,
	      277200,	      180,
	      332640,	      192,
	      498960,	      200,
	      554400,	      216,
	      665280,	      224,
	      720720,	      240,
	     1081080,	      256,
	     1441440,	      288,
	     2162160,	      320,
	     2882880,	      336,
	     3603600,	      360,
	     4324320,	      384,
	     6486480,	      400,
	     7207200,	      432,
	     8648640,	      448,
	    10810800,	      480,
	    14414400,	      504,
	    17297280,	      512,
	    21621600,	      576,
	    32432400,	      600,
	    36756720,	      640,
	    43243200,	      672,
	    61261200,	      720,
	    73513440,	      768,
	   110270160,	      800,
	   122522400,	      864,
	   147026880,	      896,
	   183783600,	      960,
	   245044800,	     1008,
	   294053760,	     1024,
	   367567200,	     1152,
	   551350800,	     1200,
	   698377680,	     1280,
	   735134400,	     1344,
	  1102701600,	     1440,
	  1396755360,	     1536,
	  2095133040,	     1600,
	  2205403200,	     1680,
	  2327925600,	     1728,
	  2793510720,	     1792,
	  3491888400,	     1920,
	  4655851200,	     2016,
	  5587021440,	     2048,
	  6983776800,	     2304,
	 10475665200,	     2400,
	 13967553600,	     2688,
	 20951330400,	     2880,
	 27935107200,	     3072,
	 41902660800,	     3360,
	 48886437600,	     3456,
	 64250746560,	     3584,
	 73329656400,	     3600,
	 80313433200,	     3840,
	 97772875200,	     4032,
	128501493120,	     4096,
	146659312800,	     4320,
	160626866400,	     4608,
	240940299600,	     4800,
	293318625600,	     5040,
	321253732800,	     5376,
	481880599200,	     5760,
	642507465600,	     6144,
	963761198400,	     6720,
       1124388064800,	     6912, 
       1606268664000,	     7168, 
       1686582097200,	     7200,
       1927522396800,	     7680,
       2248776129600,	     8064, 
       3212537328000,	     8192,
       3373164194400,	     8640,
       4497552259200,	     9216,
       6746328388800,	    10080,
       8995104518400,	    10368,
       9316358251200,	    10752,
      13492656777600,	    11520,
      18632716502400,	    12288,
      26985313555200,	    12960,
      27949074753600,	    13440,
      32607253879200,	    13824,
      46581791256000,	    14336,
      48910880818800,	    14400,
      55898149507200,	    15360,
      65214507758400,	    16128,
      93163582512000,	    16384,
      97821761637600,	    17280,
     130429015516800,	    18432,
     195643523275200,	    20160,
     260858031033600,	    20736,
     288807105787200,	    21504,
     391287046550400,	    23040,
     577614211574400,	    24576,
     782574093100800,	    25920,
     866421317361600,	    26880,
    1010824870255200,	    27648,
    1444035528936000,	    28672, 
    1516237305382800,	    28800,
    1732842634723200,	    30720,
    2021649740510400,	    32256,
    2888071057872000,	    32768,
    3032474610765600,	    34560, 
    4043299481020800,	    36864,
    6064949221531200,	    40320, 
    8086598962041600,	    41472,
   10108248702552000,	    43008, 
   12129898443062400,	    46080,
   18194847664593600,	    48384,
   20216497405104000,	    49152,
   24259796886124800,	    51840, 
   30324746107656000,	    53760, 
   36389695329187200,	    55296,
   48519593772249600,	    57600,
   60649492215312000,	    61440,
   72779390658374400,	    62208,
   74801040398884800,	    64512,
  106858629141264000,	    65536, 
  112201560598327200,	    69120,
  149602080797769600,	    73728,
  224403121196654400,	    80640,
  299204161595539200,	    82944,
  374005201994424000,	    86016,
  448806242393308800,	    92160,
  673209363589963200,	    96768,
  748010403988848000,	    98304,
  897612484786617600,	   103680,
 1122015605983272000,	   107520,
 1346418727179926400,	   110592,
 1795224969573235200,	   115200,
 2244031211966544000,	   122880,
 2692837454359852800,	   124416,
 3066842656354276800,	   129024,
 4381203794791824000,	   131072,
 4488062423933088000,	   138240,
 6133685312708553600,	   147456,
 8976124847866176000,	   153600,
 9200527969062830400,	   161280,
 12267370625417107200,	   165888};
//
void main() {
long long *temp,sum,*T,ltemp,c;
int *M;
unsigned int *ntable;
unsigned int ntsize;
unsigned int g,L;
unsigned long long index,ta,tb,i,j,k,N,mcount,start;
int savet,t,ID,d,e;
FILE *Outfp;
Outfp = fopen("out1asr.dat","w");
omp_set_dynamic(0);
omp_set_num_threads(8);
#pragma omp parallel  
{
   ID=omp_get_thread_num();
   printf(" ID=%d \n",ID);
}
ntable=(unsigned int*) malloc((t2size+1)*4);
if (ntable==NULL) {
   printf("not enough memory \n");
   goto zskip;
   }
temp=(long long*) malloc((tmpsiz+1)*8);
if (temp==NULL) {
   printf("not enough memory \n");
   return;
   }
M=(int*) malloc((Msize+1)*4);
if (M==NULL) {
   printf("not enough memory \n");
   return;
   }
T=(long long*)malloc((Tsize+1)*8);
if (T==NULL) {
   printf("not enough memory \n");
   return;
   }
ntsize=primed(ntable,tsize,table,2000000);
printf("prime look-up table size=%d, largest prime=%d \n",ntsize,ntable[ntsize-1]);
printf("computing Mobius function \n");
index=0;
ta=1;
tb=(unsigned long long)(tmpsiz+1);
mcount=Msize/(unsigned long long)tmpsiz;
for (i=0; i<mcount; i++) {
   t=newmobl(ta,tb,temp,ntable,ntsize);
   if (t!=1) {
      printf("error \n");
      goto zskip;
      }
   ta=tb;
   tb=tb+(unsigned long long)tmpsiz;
   for (j=0; j<tmpsiz; j++)
      M[j+index]=(int)temp[j];
   index=index+(unsigned long long)tmpsiz;
   }
//
// compute Mertens function
//
printf("computing Mertens function \n");
for (i=1; i<=Msize; i++) {
    M[i]=M[i-1]+M[i];
   }
//
printf("computing sum \n");
for (g=0; g<incnt; g++) {
   N=in[2*g];
   L=0;
   if (N>Msize) {
      L=(unsigned int)(N/(unsigned long long)Msize);
      if (L>(unsigned long long)Tsize) {
	 printf("error: not enough memory \n");
	 goto zskip;
	 }
      for (e=1; e<=(int)L; e++) {
	 start=(N/(unsigned long long)e)/(unsigned long long)Msize+1;
	 ltemp=newmert(start,N/e,M);
	 if (ltemp==0x7fffffffffffffff) {
	    printf("error: s>(x/t) \n");
	    goto zskip;
	    }
	 T[e-1]=1-ltemp;
	 }
      for (e=(int)(L/2); e>=1; e--) {
	 sum=0;
#pragma omp parallel for reduction (+:sum)
	 for (d=1; d<=(int)(L/(unsigned int)e-1); d++)
	    sum=sum+T[(d+1)*e-1];
	 T[e-1]=T[e-1]-sum;
	 }
      sum=0;
      savet=(int)T[0];
#pragma omp parallel for reduction (+:sum)
      for (e=1; e<=(int)L; e++)
	 sum=sum+T[e-1]*T[e-1];
      k=(unsigned long long)sqrt((double)N);
      k=k+2;
      if ((L+1)>(N/(long long)k)) {
	 printf("error: L+1>N/k \n");
	 goto zskip;
	 }
#pragma omp parallel for reduction (+:sum)
      for (c=(long long)(L+1); c<=(long long)(N/(long long)k); c++)
	 sum=sum+(long long)M[N/c-1]*(long long)M[N/c-1];
#pragma omp parallel for reduction (+:sum)
      for (c=1; c<(long long)k; c++)
	 sum=sum+(long long)M[c-1]*(long long)M[c-1]*(N/(unsigned long long)c-N/(unsigned long long)(c+1));
      }
   else {
      sum=0;
      savet=M[N-1];
      k=(unsigned long long)sqrt((double)N);
      k=k+2;
#pragma omp parallel for reduction (+:sum)
      for (c=1; c<=(long long)(N/(long long)k); c++)
	 sum=sum+(long long)M[N/c-1]*(long long)M[N/c-1];
#pragma omp parallel for reduction (+:sum)
      for (c=1; c<(long long)k; c++)
	 sum=sum+(long long)M[c-1]*(long long)M[c-1]*(N/(unsigned long long)c-N/(unsigned long long)(c+1));
      }
   printf(" %I64x %I64x %d %d %d \n",N,sum,in[2*g+1],savet,L);
   fprintf(Outfp," %I64x, %I64x, %d, %d, \n",N,sum,in[2*g+1],savet);
   }
zskip:
fclose(Outfp);
return;
}