/*
 
      Testexample for GIANT_GBIT: Artificial test problem.
 
 *  Written by        L. Weimann 
 *  Purpose           Testexample for code GIANT_GBIT
 *  Version           1.0
 *  Revision          June 2006
 *  Latest Change     June 2006
 *  Library           NewtonLib
 *  Code              C, Double Precision
 *  Environment       Standard C environment on PC's,
                      workstations and hosts.
 
*/

#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include "giant.h"

#define MAX_NPDE 2

struct PDE_DATA
{
  int nx, ny, npde;
  double *xgri, *ygri;
};

struct JACOBIAN_DATA
{
  int n, nzmax, nfill;
  int *row, *col;
  double *v, *vp;
};

struct PROBLEM_PARAMETER
{
  int ipar;
  double rpar1, rpar2;
};

static struct PDE_DATA          *pdedata = NULL;
static struct JACOBIAN_DATA     *jacdata = NULL;
static struct PROBLEM_PARAMETER *propar  = NULL;

void fun(int n, double *u, double *fu, int nfcn, int *fail);
void jac(int n, double *u, double *xscale, double *fu, int njac, int *fail);
void muljac(int n, double *y, double *z);
void precon(int n, double *z, double *w);
void funs(int npde, double x, double y, double *u, double *ux,
          double *uy, double *dx, double *dy, double *cxy, double *g);
void dfuns(int npde, double x, double y, double *u, double *ux,
	       double *uy, double *dx, double *dy, double *dcdu, double *dcdxu,
	       double *dcdyu, double *dgdu);

int main()
{
  int ix, iy, j, n, nx, ny, npde=1, nzmax;
  double *u, *xscale, *xgrid, *ygrid;
  double xmin, xmax, ymin, ymax, dx, dy, x, y, q, rpar1, rpar2;
  float rho_in, r1, r2;
  struct GIANT_FUN *problem = malloc( sizeof(struct GIANT_FUN) );
  struct GIANT_OPT  *opt  = malloc( sizeof(struct GIANT_OPT) );
  struct GIANT_INFO *info = malloc( sizeof(struct GIANT_INFO) );
  pdedata = malloc( sizeof(struct PDE_DATA) );
  jacdata = malloc( sizeof(struct JACOBIAN_DATA) );
  propar  = malloc( sizeof(struct PROBLEM_PARAMETER) );
  if ( !pdedata || ! jacdata || !propar || !problem || !opt || !info )
    { fprintf(stdout," Memory allocation failed (1)\n");  exit(-2); };
  problem->fun    = &fun;     problem->jac     = &jac;  
  problem->muljac = &muljac;  problem->preconl = &precon;
  fprintf(stdout," nx=?(>=3),  ny=?(>=3)  rho=?(>0,<=1.0)\n");
  fscanf(stdin,"%i %i %g",&nx,&ny,&rho_in);
  opt->rho = rho_in;
  if ( nx<3 || ny<3 )
    { fprintf(stdout," nx or ny is too small\n"); exit (-1); };
  fprintf(stdout," rpar1=?(>0.0)  rpar2=?(>0.0)  ipar=?(1,2,3)\n");
  fscanf(stdin,"%g %g %i",&r1,&r2,&(propar->ipar));
  propar->rpar1 = r1;   propar->rpar2 = r2;
  pdedata->nx   = nx;
  pdedata->ny   = ny;
  pdedata->npde = npde;
  pdedata->xgri = malloc( nx*sizeof(double) );
  pdedata->ygri = malloc( ny*sizeof(double) );
  n = npde*nx*ny;
  u = malloc( n*sizeof(double) );   xscale = malloc( n*sizeof(double) );
  if ( !pdedata->xgri || !pdedata->ygri || !u || !xscale )
    { fprintf(stdout," Memory allocation failed (2)\n");  exit(-3); };
  nzmax = 5*n;
  jacdata->nzmax = nzmax;
  jacdata->row = malloc( nzmax*sizeof(int) );
  jacdata->col = malloc( nzmax*sizeof(int) );
  jacdata->v   = malloc( nzmax*sizeof(double) );
  jacdata->vp  = malloc( n*sizeof(double) );
  if ( !jacdata->row || ! jacdata->col || !jacdata->v || !jacdata->vp )
    { fprintf(stdout," Memory allocation failed (3)\n");  exit(-4); };

  xmin = -3.0;  xmax=3.0;   ymin = -3.0;  ymax=3.0;

  rpar1 = propar->rpar1;    rpar2 = propar->rpar2;
  dx = (xmax-xmin)/(nx-1);   dy = (ymax-ymin)/(ny-1);
  xgrid = pdedata->xgri;   for (ix=0;ix<nx;ix++) xgrid[ix]=xmin+ix*dx;
  ygrid = pdedata->ygri;   for (iy=0;iy<ny;iy++) ygrid[iy]=ymin+iy*dy;
  fprintf(stdout,"xgrid: ");
  for (ix=0;ix<nx;ix++) 
    {fprintf(stdout," %6.3f",xgrid[ix]);
     if (ix%10 == 9) fprintf(stdout,"\n       ");};
  fprintf(stdout,"\n");
  fprintf(stdout,"ygrid: ");
  for (iy=0;iy<ny;iy++) 
    {fprintf(stdout," %6.3f",ygrid[iy]); 
     if (iy%10 == 9) fprintf(stdout,"\n       ");};
  fprintf(stdout,"\n");
  /* start vector for Newton iteration: */
  for (iy=0;iy<ny;iy++)
    { y = ygrid[iy];
      for (ix=0;ix<nx;ix++)
        { x = xgrid[ix];
          q = x*x/(rpar1*rpar1)+y*y/(rpar2*rpar2);
          for (j=0;j<npde;j++)  u[j+npde*ix+npde*nx*iy]=0.2*exp(-q);
        }
    };
  for (j=0;j<n;j++) xscale[j]=1.0;  
 /* ----------------------------- */
  opt->tol = 1.0e-5;
  opt->maxiter = 50;
  opt->lin_maxiter = 1000;
  opt->i_max =10;
  opt->errorlevel = Verbose;
  opt->monitorlevel = Verbose;
  opt->datalevel = Minimum;
  opt->errorfile = stdout;
  opt->monitorfile = opt->errorfile;
  opt->datafile = fopen("test_giantgbit.dat","w");
  if (!opt->datafile) fprintf(stdout,
     "\n open of test_giantgbit.dat failed!\n");
  opt->iterfile = NULL;
  opt->resfile  = NULL;
  opt->miscfile = NULL;
  opt->linmonlevel  = Verbose;
  opt->lindatalevel = None;
  opt->linmonfile   = fopen("test_giantgbit_lin.dat","w");;
  opt->lindatafile  = NULL;
  opt->nonlin = Highly_Nonlinear;
  opt->restricted = False;
  opt->scaleopt = StandardScale;
  opt->scale = xscale;
  giant_gbit(*problem, n, u, opt, info);
  if ( info->rcode == 0 )
    fprintf(stdout,"\n precision=%e\n",info->precision);
  else
     fprintf(stdout,"\n GIANT_GBIT failed - no solution found\n");
  fprintf(stdout," iter    = %i\n",info->iter);
  fprintf(stdout," rcode   = %i\n",info->rcode);
  fprintf(stdout," subcode = %i\n",info->subcode);
  fprintf(stdout," nfun    = %i\n",info->nofunevals);
  fprintf(stdout," njac    = %i\n",info->nojacevals);
  fprintf(stdout," nmuljac = %i\n",info->nomuljac);
  fprintf(stdout," nprecon = %i\n",info->noprecon);
  return 0;
}

void fun(int n, double *u, double *f, int nfcn, int *fail)
{
  int nx=pdedata->nx, ny=pdedata->ny,  npde=pdedata->npde;
  double *xgri = pdedata->xgri, *ygri = pdedata->ygri;
  int iy, ix, j;
  double yl, ym, yr, dyl, dyr, dyhh, xl, xm, xr, dxl, dxr, dxhh;
  double *uxx = malloc( npde*sizeof(double) ),
         *ux  = malloc( npde*sizeof(double) ), 
         *uyy = malloc( npde*sizeof(double) ),
         *uy  = malloc( npde*sizeof(double) ),
         *dx  = malloc( npde*sizeof(double) ),
         *dy  = malloc( npde*sizeof(double) ),
         *cxy = malloc( npde*sizeof(double) ),
         *g   = malloc( npde*sizeof(double) );
  
  /* interior nodes  */ 
  for (iy=1;iy<=ny-2;iy++)
    {
      yl=ygri[iy-1];  ym=ygri[iy];  yr=ygri[iy+1];
      dyl=ym-yl;      dyr=yr-ym;    dyhh=2.0/((dyl+dyr)*dyl*dyr);

      for (ix=1;ix<=nx-2;ix++)
        {
         xl=xgri[ix-1];  xm=xgri[ix];  xr=xgri[ix+1];
		 dxl=xm-xl;      dxr=xr-xm;    dxhh=2.0/((dxl+dxr)*dxl*dxr);
	     /* approx. uxx and ux */
		 for (j=0;j<npde;j++)
		   {
		     uxx[j]=(dxl * u[j+npde*(ix+1)+npde*nx*iy]
		            - (dxl+dxr) * u[j+npde*ix+npde*nx*iy]
	                + dxr * u[j+npde*(ix-1)+npde*nx*iy])*dxhh;
		     ux[j] =(dxl*(u[j+npde*(ix+1)+npde*nx*iy]
		            -u[j+npde*ix+npde*nx*iy])/dxr
	                +dxr*(u[j+npde*ix+npde*nx*iy]
	                -u[j+npde*(ix-1)+npde*nx*iy])/dxl)/(dxl+dxr);
           }
	/* approx. uyy and uy */
		 for (j=0;j<npde;j++)
		   {
		     uyy[j]=(dyl*u[j+npde*ix+npde*nx*(iy+1)]
		            -(dyl+dyr)*u[j+npde*ix+npde*nx*iy]
	                +dyr*u[j+npde*ix+npde*nx*(iy-1)])*dyhh;
		     uy[j]=(dyl*(u[j+npde*ix+npde*nx*(iy+1)]
		           -u[j+npde*ix+npde*nx*iy])/dyr
	               +dyr*(u[j+npde*ix+npde*nx*iy]
	               -u[j+npde*ix+npde*nx*(iy-1)])/dyl)/(dyl+dyr);
           }
	/* get terms */
		 funs(npde,xm,ym,&u[npde*ix+npde*nx*iy],ux,uy,dx,dy,cxy,g);
	/* set up rhs at actual node */
		 for (j=0;j<npde;j++)
		   f[j+npde*ix+npde*nx*iy]=dx[j]*uxx[j]+dy[j]*uyy[j]+cxy[j]+g[j];
        }
    }
   /* boundary nodes 
      homogeneous dirichlet boundary conditions
      x=xmin  */
  for (iy=0;iy<ny;iy++)
      for (j=0;j<npde;j++)  f[j+npde*nx*iy]=u[j+npde*nx*iy];
   /* x=xmax  */
  for (iy=0;iy<ny;iy++)
      for (j=0;j<npde;j++)  
        f[j+npde*(nx-1)+npde*nx*iy]=u[j+npde*(nx-1)+npde*nx*iy];
   /* y=ymin  */
  for (ix=0;ix<nx;ix++)
      for (j=0;j<npde;j++)  f[j+npde*ix]=u[j+npde*ix];
   /* y=ymax  */
  for (ix=0;ix<nx;ix++)
      for (j=0;j<npde;j++)  
        f[j+npde*ix+npde*nx*(ny-1)]=u[j+npde*ix+npde*nx*(ny-1)];

   free(uxx);  free(ux);  free(uyy);  free(uy);
   free(dx);   free(dy);  free(cxy);  free(g);
   return;
/*  end routine fun */
}

void jac(int n, double *u, double *xscale, double *fu, int njac, int *fail)
{
  int nx=pdedata->nx, ny=pdedata->ny,  npde=pdedata->npde,
      nzmax = jacdata->nzmax;
  int *row=jacdata->row, *col=jacdata->col;
  double *xgri = pdedata->xgri, *ygri = pdedata->ygri,
         *a    =jacdata->v,     *pr   = jacdata->vp;
  int iy, ix, ixa, iya, j, k, ih, nfill = -1;
  double yl, ym, yr, dyl, dyr, dyhh, uyyfac, uyfac, uyylfc, uyyrfc, uylfac,
         uyrfac,
         xl, xm, xr, dxl, dxr, dxhh, uxxfac, uxfac, uxxlfc, uxxrfc, uxlfac,
         uxrfac;
  double *ux     = malloc( npde*sizeof(double) ), 
         *uy     = malloc( npde*sizeof(double) ),
         *dx     = malloc( npde*sizeof(double) ),
         *dy     = malloc( npde*sizeof(double) ),
         *dcdu   = malloc( npde*npde*sizeof(double) ),
         *dcdxu  = malloc( npde*npde*sizeof(double) ),
         *dcdyu  = malloc( npde*npde*sizeof(double) ),
         *dgdu   = malloc( npde*npde*sizeof(double) );
  
  *fail = 0;

 /*  interior nodes */
  for (iy=1;iy<=ny-2;iy++)
    {
     yl=ygri[iy-1];  ym=ygri[iy];   yr=ygri[iy+1];
     dyl=ym-yl;   dyr=yr-ym;        dyhh=2.0/((dyl+dyr)*dyl*dyr);
     uyyfac=-2.0/(dyl*dyr);       uyfac=(dyr-dyl)/(dyl*dyr);
     uyylfc=2.0/((dyl+dyr)*dyl);  uyyrfc=2.0/((dyl+dyr)*dyr);
     uylfac=-(dyr/dyl)/(dyl+dyr);   uyrfac=(dyl/dyr)/(dyl+dyr);
     for (ix=1;ix<=nx-2;ix++)
       {
        xl=xgri[ix-1];  xm=xgri[ix];   xr=xgri[ix+1];
        dxl=xm-xl;   dxr=xr-xm;        dxhh=2.0/((dxl+dxr)*dxl*dxr);
        uxxfac=-2.0/(dxl*dxr);       uxfac=(dxr-dxl)/(dxl*dxr);
        uxxlfc=2.0/((dxl+dxr)*dxl);  uxxrfc=2.0/((dxl+dxr)*dxr);
        uxlfac=-(dxr/dxl)/(dxl+dxr);   uxrfac=(dxl/dxr)/(dxl+dxr);

		if ( nfill >= nzmax-1 ) {*fail = -1;  jacdata->nfill = nfill; return; };
	    /*  approx. ux  */;
		for (j=0;j<npde;j++)
		  ux[j]= (dxl*(u[j+npde*(ix+1)+npde*nx*iy]
		        -u[j+npde*ix+npde*nx*iy])/dxr
	            +dxr*(u[j+npde*ix+npde*nx*iy]
	            -u[j+npde*(ix-1)+npde*nx*iy])/dxl)/(dxl+dxr);
	    /*  approx. uy */;
		for (j=0;j<npde;j++)
		  uy[j]= (dyl*(u[j+npde*ix+npde*nx*(iy+1)]
		         -u[j+npde*ix+npde*nx*iy])/dyr
	             +dyr*(u[j+npde*ix+npde*nx*iy]
	             -u[j+npde*ix+npde*nx*(iy-1)])/dyl)/(dyl+dyr);
	    /*  get terms */;
	    dfuns(npde,xm,ym,&u[npde*ix+npde*nx*iy],ux,uy,
	          dx,dy,dcdu,dcdxu,dcdyu,dgdu);
	/*  set up Jacobian elements at current node */;
		ih=(iy*nx+ix)*npde;
        for (j=0;j<npde;j++)
          {
		   nfill++;  a[nfill]  = dcdyu[j+npde*j]*uylfac + dy[j]*uyylfc;
		   row[nfill]  = ih+j;
		   col[nfill]  = ih+j - nx*npde;
 
		   nfill++;  a[nfill]  = dcdxu[j+npde*j]*uxlfac + dx[j]*uxxlfc;
		   row[nfill]  = ih+j;
		   col[nfill]  = ih+j - npde;
 
		   for (k=0;k<npde;k++)
		     {
			  nfill++;
			  a[nfill]  = dcdu[j+npde*k] + dcdxu[j+npde*k]*uxfac
						  + dcdyu[j+npde*k]*uyfac + dgdu[j+npde*k];
			  if ( k==j ) 
			    {a[nfill]  = a[nfill]  + dx[j]*uxxfac+dy[j]*uyyfac;
			     pr[ih+j] = 1.0/a[nfill];
			    };
			  row[nfill]  = ih+j;
			  col[nfill]  = ih+k;
             }

		   nfill++;  a[nfill]  = dcdxu[j+npde*j]*uxrfac + dx[j]*uxxrfc;
		   row[nfill]  = ih+j;
		   col[nfill]  = ih+j + npde;

		   nfill++;  a[nfill]  = dcdyu[j+npde*j]*uyrfac + dy[j]*uyyrfc;
		   row[nfill]  = ih+j;
		   col[nfill]  = ih+j + nx*npde;
          };
       };
    };
/*     boundary nodes;
       homogeneous dirichlet boundary conditions;
       y=ymin */
   for (ix=0;ix<nx;ix++)
     {
	  ixa = ix*npde;
	  for (j=0;j<npde;j++)
	    {
		 nfill++;     a[nfill]   = 1.0;  pr[ixa+j] = 1.0;
		 row[nfill]  = ixa+j;  col[nfill]  = ixa+j;
		}
     };
/*  x=xmin  */;
   for (iy=1;iy<ny-1;iy++)
     {
	  iya = iy*nx*npde;
	  for (j=0;j<npde;j++)
	    {
		nfill++;      a[nfill]   = 1.0;  pr[iya+j] = 1.0;
		row[nfill]  = iya+j;   col[nfill]  = iya+j;
		}
     };
/*  x=xmax  */;
   for (iy=1;iy<ny-1;iy++)
     {
      iya = ((iy+1)*nx-1)*npde;
	  for (j=0;j<npde;j++)
	    {
         nfill++;      a[nfill]  = 1.0;  pr[iya+j] = 1.0;
         row[nfill]  = iya+j;   col[nfill]  = iya+j;
		}
     };
/*  y=ymax  */;
   for (ix=0;ix<nx;ix++)
     {
      ixa = (ix+(ny-1)*nx)*npde;
	  for (j=0;j<npde;j++)
	    {
         nfill++;      a[nfill]  = 1.0;  pr[ixa+j] = 1.0;
         row[nfill]  = ixa+j;   col[nfill]  = ixa+j;
		}
     };

   jacdata->nfill = nfill;
   free(ux);    free(uy);     free(dx);     free(dy);
   free(dcdu);  free(dcdxu);  free(dcdyu);  free(dgdu); 
   return;
}

void muljac(int n, double *y, double *z)
{
  int nz, nfill = jacdata->nfill, j;
  int *row = jacdata->row, *col = jacdata->col;
  double *a = jacdata->v;
  for (j=0;j<n;j++) z[j] = 0.0;
  for (nz=0;nz<=nfill;nz++) z[row[nz]] += a[nz]*y[col[nz]];
}

void precon(int n, double *z, double *w)
{
  int j;
  double *pr = jacdata->vp;
  for (j=0;j<n;j++) w[j] = pr[j]*z[j];
}

void funs(int npde, double x, double y, double *u, double *ux,
          double *uy, double *dx, double *dy, double *cxy, double *g)
{
   double rpar1 = propar->rpar1, rpar2 = propar->rpar2;
   int    ipar  = propar->ipar;
   double uh[MAX_NPDE], uxxh, uxh, uyyh, uyh, q;
   q=x*x/(rpar1*rpar1) + y*y/(rpar2*rpar2);
   uh[0]=0.9*exp(-q)+0.1*u[0];
   uxxh=uh[0]*(4.0*x*x/(rpar1*rpar1*rpar1*rpar1) - 2.0/(rpar1*rpar1));
   uxh=-uh[0]*2.0*x/(rpar1*rpar1) ;
   uyyh=uh[0]*(4.0*y*y/(rpar2*rpar2*rpar2*rpar2) - 2.0/(rpar2*rpar2));
   uyh=-uh[0]*2.0*y/(rpar2*rpar2);
/*   version 1: */
   if ( ipar==1 ) g[0]=-uxxh-uyyh+exp(u[0])-exp(exp(-q));
/*   version 2: */
   if ( ipar==2 ) g[0]=-uxxh-uyyh-exp(u[0])+exp(exp(-q));
/*   version 3: */
   if ( ipar==3 ) g[0]=-uxxh-uyyh;

   dx[0]=1.0;
   dy[0]=1.0;

   cxy[0]=0.0;
}

void dfuns(int npde, double x, double y, double *u, double *ux,
	       double *uy, double *dx, double *dy, double *dcdu, double *dcdxu,
	       double *dcdyu, double *dgdu)
{
   double rpar1 = propar->rpar1, rpar2 = propar->rpar2;
   int    ipar  = propar->ipar;
   double duh[MAX_NPDE], duxxh, duxh, duyyh, duyh, q;
   q=x*x/(rpar1*rpar1) + y*y/(rpar2*rpar2);
   duh[0]=0.1;
   duxxh=duh[0]*(4.0*x*x/(rpar1*rpar1*rpar1*rpar1) - 2.0/(rpar1*rpar1));
   duxh=-duh[0]*2.0*x/(rpar1*rpar1) ;
   duyyh=duh[0]*(4.0*y*y/(rpar2*rpar2*rpar2*rpar2)- 2.0/(rpar2*rpar2));
   duyh=-duh[0]*2.0*y/(rpar2*rpar2);
/*   version 1: */
   if ( ipar==1 ) dgdu[0]=-duxxh-duyyh+exp(u[0]);
/*   version 2: */
   if ( ipar==2 ) dgdu[0]=-duxxh-duyyh-exp(u[0]);
/*   version 3: */
   if ( ipar==3 ) dgdu[0]=-duxxh-duyyh;

   dx[0] = 1.0;
   dy[0] = 1.0;

   dcdu [0] = 0.0;
   dcdxu[0] = 0.0;
   dcdyu[0] = 0.0;
}

