#include "mex.h" 


/* ------------------------------------------------------------------*/
/*            calculate X'*K*X                                       */
/*            X is a 3x3 block                                       */
/* ------------------------------------------------------------------*/
void x_k_x (const double* x, const double* y, double* result, 
  double* offset, int type, int YM,int YN) { 

  int             j1, j2, jOff;   
  double          *K1, *cof;
    
  K1 = mxCalloc (YM*YN, sizeof(double));      /* memory allocation for z1 */

  if (type!=2) { /* Right multiplication */
    jOff=0;cof=offset; j2=0;while (j2+2<YN) { /* loop on columns */
    for (j1=0;j1<YM;j1++) {
       K1[j1+j2*YM]=
         y[j1+j2*YM]*x[0]+y[j1+(j2+1)*YM]*x[1]+y[j1+(j2+2)*YM]*x[2];
       K1[j1+(j2+1)*YM]=
         y[j1+j2*YM]*x[3]+y[j1+(j2+1)*YM]*x[4]+y[j1+(j2+2)*YM]*x[5];
       K1[j1+(j2+2)*YM]=
         y[j1+j2*YM]*x[6]+y[j1+(j2+1)*YM]*x[7]+y[j1+(j2+2)*YM]*x[8];
    }
    j2+=3;jOff+=3;
    if (jOff==6 && offset!=NULL) {/*account for rigid link defined by offset*/
     for (j1=0;j1<YM;j1++) {
       K1[j1+(j2-3)*YM]-=cof[2]*K1[j1+(j2-5)*YM];
       K1[j1+(j2-3)*YM]+=cof[1]*K1[j1+(j2-4)*YM];
       K1[j1+(j2-2)*YM]+=cof[2]*K1[j1+(j2-6)*YM];
       K1[j1+(j2-2)*YM]-=cof[0]*K1[j1+(j2-4)*YM];
       K1[j1+(j2-1)*YM]-=cof[1]*K1[j1+(j2-6)*YM];
       K1[j1+(j2-1)*YM]+=cof[0]*K1[j1+(j2-5)*YM];
     }
      jOff=0;cof+=3;
    } /*offset*/
    }
  } else {memcpy(K1,y,YM*YN*sizeof(double));} /* right multiplication */

  if (type!=1) { /* Left multiplication */
    jOff=0;cof=offset; j1=0;while (j1+2<YM) { /* loop on columns */
    for (j2=0;j2<YN;j2++) {
       result[j1+j2*YM]=
         K1[j1+j2*YM]*x[0]+K1[j1+1+j2*YM]*x[1]+K1[j1+2+j2*YM]*x[2];
       result[j1+1+j2*YM]=
         K1[j1+j2*YM]*x[3]+K1[j1+1+j2*YM]*x[4]+K1[j1+2+j2*YM]*x[5];
       result[j1+2+j2*YM]=
         K1[j1+j2*YM]*x[6]+K1[j1+1+j2*YM]*x[7]+K1[j1+2+j2*YM]*x[8];
    }
    j1+=3;jOff+=3;
    if (jOff==6 && offset!=NULL) {/*account for rigid link defined by offset*/
     for (j2=0;j2<YN;j2++) {
       result[j1-3+j2*YM]-=cof[2]*result[j1-5+j2*YM];
       result[j1-3+j2*YM]+=cof[1]*result[j1-4+j2*YM];
       result[j1-2+j2*YM]+=cof[2]*result[j1-6+j2*YM];
       result[j1-2+j2*YM]-=cof[0]*result[j1-4+j2*YM];
       result[j1-1+j2*YM]-=cof[1]*result[j1-6+j2*YM];
       result[j1-1+j2*YM]+=cof[0]*result[j1-5+j2*YM];
     }
      jOff=0;cof+=3;
    } /*offset*/
    }
  } else {memcpy(result,K1,YM*YN*sizeof(double));} /* left multiplication */

  mxFree (K1);                                    /* desallocate k1 */
}
/*-----------------------------------------------------------------------*/
/* inverse of 3x3 matrix */
void Inv3(double *C, double *CI) {

 double jdet;
 int    j1;
 /*
 cof11 = ys*zt - yt*zs; cof12 = yt*zr - yr*zt; cof13 = yr*zs - ys*zr;
 cof21 = zs*xt - zt*xs; cof22 = zt*xr - zr*xt; cof23 = zr*xs - zs*xr;
 cof31 = xs*yt - xt*ys; cof32 = xt*yr - xr*yt; cof33 = xr*ys - xs*yr;
 jdet  = xr*cof11+xs*cof12+xt*cof13; 
 */
 

 CI[0] = C[4]*C[8] - C[5]*C[7]; CI[3] = C[5]*C[6] - C[3]*C[8]; CI[6] = C[3]*C[7] - C[4]*C[6];
 CI[1] = C[7]*C[2] - C[8]*C[1]; CI[4] = C[8]*C[0] - C[6]*C[2]; CI[7] = C[6]*C[1] - C[7]*C[0];
 CI[2] = C[1]*C[5] - C[2]*C[4]; CI[5] = C[2]*C[3] - C[0]*C[5]; CI[8] = C[0]*C[4] - C[1]*C[3];

 jdet = C[0]*CI[0] + C[1]*CI[3] + C[2]*CI[6];
 jdet = 1./jdet;
 if (!mxIsFinite(jdet)) mexErrMsgTxt("Singular matrix"); 
 for (j1=0;j1<9;j1++) CI[j1]*= jdet;

}
/*-----------------------------------------------------------------------*/

void cross(double *u, double *v, double *w){

  w[0] = u[1]*v[2]-u[2]*v[1];
  w[1] = u[2]*v[0]-u[0]*v[2];
  w[2] = u[0]*v[1]-u[1]*v[0];

}

/*-----------------------------------------------------------------------*/
EnHeart(int *integ,double *constit,double *I,double *dWdI,double *d2WdI2) {
  /* %C1=0.3MPa, C2=0.2MPa, K=0.3MPa
constit=[.3e6 .2e6 .3e6]/1e6;  % Cenerg : C1 C2 K

dWdI(1) = constit(1)*I(3)^(-1./3.);
dWdI(2) = constit(2)*I(3)^(-2./3.);
dWdI(3) = - 1./3.* constit(1)*I(1)*I(3)^(-4./3.) ...
          - 2./3.* constit(2)*I(2)*I(3)^(-5./3.) ...
              +   constit(3)*(1-I(3)^(-1/2)) ;

d2WdI2=[0 0 -1./3.*constit(1)*I(3)^(-4./3.) ;
        0 0  -2./3.*constit(2)*I(3)^(-5./3.);
       -1./3.*constit(1)*I(3)^(-4./3.)  -2./3.*constit(2)*I(3)^(-5./3.) ...
        (4/9*constit(1)*I(1)*I(3)^(-7./3.) ...
              + 10./9.* constit(2)*I(2)*I(3)^(-8./3.) ...
              + 1/2 * constit(3)*I(3)^(-3/2))]; */
  /* constit[0]=.3e6/1.e6; 
  constit[1]=.2e6/1.e6; 
   constit[2]=.3e6/1.e6; */
  dWdI[0] = constit[0]*pow(I[2],-1./3.);
  dWdI[1] = constit[1]*pow(I[2],-2./3.);
  dWdI[2] = -1./3.*constit[0]*I[0]*pow(I[2],-4./3.) 
            -2./3.*constit[1]*I[1]*pow(I[2],-5./3.) 
                 + constit[2]*(1.-pow(I[2],-.5));
  d2WdI2[0]=0.; d2WdI2[3]=0.; d2WdI2[6]=-1./3.*constit[0]*pow(I[2],-4./3.); 
  d2WdI2[1]=0.; d2WdI2[4]=0.; d2WdI2[7]=-2./3.*constit[1]*pow(I[2],-5./3.);
  d2WdI2[2]= -1./3.*constit[0]*pow(I[2],-4./3.);
  d2WdI2[5]= -2./3.*constit[1]*pow(I[2],-5./3.);
  d2WdI2[8]=  4./9.*constit[0]*I[0]*pow(I[2],-7./3.)
            + 10./9.*constit[1]*I[1]*pow(I[2],-8./3.)
    + .5*constit[2]*pow(I[2],-3./2.);

  /* mexPrintf("%10.5g %10.5g %10.5g \n%10.5g %10.5g %10.5g \n%10.5g %10.5g %10.5g \n",d2WdI2[0],d2WdI2[1],d2WdI2[2],d2WdI2[3],d2WdI2[4],d2WdI2[5],d2WdI2[6],d2WdI2[7],d2WdI2[8]); */
}
/*-----------------------------------------------------------------------*/
/*-----------------------------------------------------------------------*/
mxArray* pre_cvs2 () {
 mxArray *st;
 st= mxCreateString("$Revision: 1.24 $  $Date: 2006/04/06 15:32:49 $");
 return(st);
}

#ifndef NULL
#define NULL 0
#endif

/*-----------------------------------------------------------------------*/
int findPosInSparse( int row, int col, int *jc, int *ir )
{
  int ii;


#if 1
  for (ii = jc[col]; ii < (jc[col+1]); ii++) {
    if (ir[ii] == row) {
      return( ii );
    }
  }
#else
  RO[0]=row;
  bsearch(RO,ir+jc[col],jc[col+1]-jc[col],sizeof( int32 ), &compareI32 );
#endif

  return( -1 );
}

/*-----------------------------------------------------------------------*/
int nMiss, nMissAlloc, AllowMissing = 0;
int *mij;
double *mv;


int addMissing( int gr, int gc, double val)
{
  /* If caching for all elements, we do not know size in advance... */
/*    mexPrintf( "alloc: %d\n", nMissAlloc ); */
  if ((nMiss >= nMissAlloc)) {
    int *auxi;
    double *auxv;

    nMissAlloc *= 2;
    /* mxRealloc deos not work for some reason? */
    auxi = mij;
    auxv = mv;
    mij = mxCalloc( 2 * nMissAlloc, sizeof( int ) );
    mv = mxCalloc( nMissAlloc, sizeof( double ) );
    memcpy( mij, auxi, 2 * nMissAlloc * sizeof( int ) );
    memcpy( mv, auxv, nMissAlloc * sizeof( double ) );
    mxFree( auxi );
    mxFree( auxv );
    mexMakeMemoryPersistent( mij );
    mexMakeMemoryPersistent( mv );
    mexPrintf( "new alloc: %d\n", nMissAlloc );
  }
  mij[2*nMiss+0] = gr;
  mij[2*nMiss+1] = gc;
  mv[nMiss] = val;
  nMiss++;

/*    mexPrintf( "miss: %d\n", nMiss ); */

  return( 0 );
}


/* ------------------------------------------------------------------*/
/*      Starting information for meshGraph DOFGraph addMissing ...   */
/* ------------------------------------------------------------------*/

#ifdef OSTYPEmexrs6
#else
typedef int int32;
#endif
typedef double float64;
/* Globals. */
#define allocMem( Type, num ) \
 (Type *) mxCalloc( 1, (num) * sizeof( Type ) )
#define freeMem( p ) do {\
  mxFree( p ); p = 0; } while (0)
typedef enum ReturnStatus {
  RET_OK,
  RET_Fail
} ReturnStatus;
#if !defined(Max)
#define Max(a,b) (((a) > (b)) ? (a) : (b))
#endif

int *mij;
double *mv;

/* Function headers. */
int32 compareI32( const void *a, const void *b );
int32 mesh_meshGraph( int32 *p_nnz, int32 **p_prow, int32 **p_icol,
		      int32 nNod, int32 nGr, int32 nEPMax,
		      int32 *ptrGr, int32 *nEl,
		      int32 *nEP, int32 *conn );
int32 mesh_dofGraph( int32 *p_dnnz, int32 **p_dprow, int32 **p_dicol,
		     int32 nNod, int32 *nprow, int32 *nicol,
		     int32 nEq, int32 *dpn, int32 *dofOffset, int32 *eq );
int addMissing( int gr, int gc, double val );



int32 compareI32( const void *a, const void *b )
{
  int32 i1, i2;

  i1 = *((int32 *) a);
  i2 = *((int32 *) b);

  return( i1 - i2 );
}

#undef __FUNC__
#define __FUNC__ "mesh_meshGraph"
/*!
  @par Revision history:
  - 23.05.2003, c
  - 26.05.2003
  - 27.05.2003
  - 28.05.2003
*/
int32 mesh_meshGraph( int32 *p_nnz, int32 **p_prow, int32 **p_icol,
		      int32 nNod, int32 nGr, int32 nEPMax,
		      int32 *ptrGr, int32 *nEl,
		      int32 *nEP, int32 *conn )
{
  int32 in, ii, ip, ig, iel, iep, ir, ic, nn, np, pr,
    niecMax, nUnique, iir, iic, found;
  int32 *niec, *pconn, *eonlist, *nir, *nods, *icol;


  /*  mexPrintf( "%d %d %d\n", nNod, nGr, nEPMax ); */
/*    mexCallMATLAB( 0, 0, 0, 0, "pause" ); */

  niec = allocMem( int32, nNod + 1 );
  memset( niec, 0, (nNod + 1) * sizeof( int32 ) );

  nn = 0;
  for (ig = 0; ig < nGr; ig++) {
    nn += nEP[ig] * nEl[ig];

    for (iel = ptrGr[ig]; iel < (ptrGr[ig] + nEl[ig]); iel++) {
      pconn = conn + nEPMax * iel;
      for (iep = 0; iep < nEP[ig]; iep++) {
	niec[1+pconn[iep]]++;
      }
    }
  }


  niec[0] = 0;
  niecMax = 0;
  for (in = 0; in < nNod; in++) {
    niecMax = Max( niecMax, niec[in] );
  }

  /*  mexPrintf( "%d dofs not used\n", niec[0] ); */
  /* mexPrintf( "%d\n", niecMax ); */

  for (in = 0; in < nNod; in++) {
    niec[in+1] += niec[in];
  }

  eonlist = allocMem( int32, 2 * nn );

  nir = allocMem( int32, nNod + 1 );
  memset( nir, 0, (nNod + 1) * sizeof( int32 ) );

/*    mexPrintf( "1b\n" ); */

  for (ig = 0; ig < nGr; ig++) {
/*      mexPrintf( "%d\n", nEP[ig] ); */
    for (iel = ptrGr[ig]; iel < (ptrGr[ig] + nEl[ig]); iel++) {
      pconn = conn + nEPMax * iel;
      for (iep = 0; iep < nEP[ig]; iep++) {
	np = pconn[iep];
	if (np >= 0) {
	  eonlist[2*(niec[np]+nir[np])+0] = iel;
	  eonlist[2*(niec[np]+nir[np])+1] = ig;
/*  	mexPrintf( "  %d %d %d %d\n", np, eonlist[2*(niec[np]+nir[np])+0], */
/*  		   eonlist[2*(niec[np]+nir[np])+1], nir[np] ); */
	  nir[np]++;
	}
      }
    }
  }

/*    mexPrintf( "2\n" ); */
 
  memset( nir, 0, (nNod) * sizeof( int32 ) );

  nods = allocMem( int32, niecMax * nEPMax );

  nn = 0;
  for (in = 0; in < nNod; in++) {
    ii = 0;
/*      mexPrintf( "%d\n", in ); */
    for (ip = niec[in]; ip < niec[in+1]; ip++) {
      iel = eonlist[2*(ip)+0];
      ig = eonlist[2*(ip)+1];
/*        mexPrintf( " %d %d %d\n", ip, ig, iel ); */
      for (iep = 0; iep < nEP[ig]; iep++) {
	np = conn[nEPMax*iel+iep];
	if (np >= 0) {
	  nods[ii] = conn[nEPMax*iel+iep];
/*  	mexPrintf( "  %d\n", nods[ii] ); */
	  ii++;
	}
      }
    }
    qsort( nods, ii, sizeof( int32 ), &compareI32 );

    nUnique = 1;
    for (ir = 0; ir < (ii - 1); ir++) {
      if (nods[ir] != nods[ir+1]) {
	nUnique++;
      }
    }
    nn += nUnique;
/*      mexPrintf( " -> %d\n", nUnique ); */

    nir[in] = nUnique;
  }
  
/*    mexPrintf( "3\n" ); */
/*    mexCallMATLAB( 0, 0, 0, 0, "pause" ); */

  *p_nnz = nn;
  *p_prow = niec;
  icol = *p_icol = allocMem( int32, nn );

  niec[0] = 0;
  for (in = 0; in < nNod; in++) {
    niec[in+1] = niec[in] + nir[in];
/*      mexPrintf( " %d\n", niec[in+1] ); */
  }

  memset( nir, 0, (nNod + 1) * sizeof( int32 ) );

/*    mexPrintf( "4\n" ); */
/*    mexCallMATLAB( 0, 0, 0, 0, "pause" ); */

  for (ig = 0; ig < nGr; ig++) {
    for (iel = ptrGr[ig]; iel < (ptrGr[ig] + nEl[ig]); iel++) {
      pconn = conn + nEPMax * iel;
      for (ir = 0; ir < nEP[ig]; ir++) {
	iir = pconn[ir];
	if (iir < 0) continue;
	pr = niec[iir];
/*  	mexPrintf( " %d %d %d\n", iir, pr, niec[iir+1] - pr ); */
	for (ic = 0; ic < nEP[ig]; ic++) {
	  iic = pconn[ic];
	  if (iic < 0) continue;
/*  	  mexPrintf( "   %d %d\n", iic, nir[iir] ); */
	  found = 0;
	  for (ii = pr; ii < (pr + nir[iir]); ii++) {
	    if (icol[ii] == iic) {
	      found = 1;
	      break;
	    }
	  }
/*  	  mexPrintf( "  ? %d\n", found ); */
	  if (!found) {
	    if (nir[iir] < (niec[iir+1] - pr)) {
	      icol[pr+nir[iir]] = iic;
	      nir[iir]++;
/*  	      mexPrintf( "  + %d %d\n", nir[iir], niec[iir+1] - pr ); */
	    } else {
	      mexPrintf( "  %d %d\n", nir[iir], niec[iir+1] - pr );
	      mexErrMsgTxt( "ERR_VerificationFail\n" );
	    }
	  }
	}
	qsort( icol + pr, nir[iir], sizeof( int32 ), &compareI32 );
      }
    }
  }

/*    mexPrintf( "5\n" ); */
/*    mexCallMATLAB( 0, 0, 0, 0, "pause" ); */

  freeMem( nods );
  freeMem( nir );
  freeMem( eonlist );

  return( RET_OK );
}
/*
  mex_mesh( 'mesh_meshGraph', size( m.Node, 1 ), m.eli.nGr, size( m.Elt, 2 ), int( m.eli.ptrGr ), int( m.eli.nEl ), int( m.eli.nEP ), int( m.Elt' - 1 ) )
*/

#undef __FUNC__
#define __FUNC__ "mesh_dofGraph"
/*!
  @par Revision history:
  - 26.05.2003, c
  - 27.05.2003
*/
int32 mesh_dofGraph( int32 *p_dnnz, int32 **p_dprow, int32 **p_dicol,
		     int32 nNod, int32 *nprow, int32 *nicol,
		     int32 nEq, int32 *dpn, int32 *dofOffset, int32 *eq )
{
  int32 in, ir, ic, ird, icd, er, ec;
  int32 dnnz;
  int32 *dir, *dprow, *dicol;

  *p_dprow = dprow = allocMem( int32, nEq + 1 );
  memset( dprow, 0, (nEq + 1) * sizeof( int32 ) );

/*    mexPrintf( "1\n" ); 
  mexPrintf( "%d %d\n", nNod, nEq );
*/
  dnnz = 0;

  /* Row pointers and total number of nonzeros. */
  for (ir = 0; ir < nNod; ir++) {
/*      mexPrintf( "%d\n", dpn[ir] ); */
    for (ird = 0; ird < dpn[ir]; ird++) {
/*        mexPrintf( "%d\n", dofOffset[ir] ); */
      er = eq[dofOffset[ir]+ird];
/*        mexPrintf( "%d %d %d\n", ir, ird, er ); */
      if (er >= 0) {
	for (ic = nprow[ir]; ic < nprow[ir+1]; ic++) {
	  in = nicol[ic];
	  for (icd = 0; icd < dpn[in]; icd++) {
	    ec = eq[dofOffset[in]+icd];
/*  	    mexPrintf( "  %d %d %d\n", ic, icd, ec ); */
	    if (ec >= 0) dprow[er+1]++;
	  }
	}
	dnnz += dprow[er+1];
      }
    }
  }

/*    mexPrintf( "2\n" ); */

  for (ir = 0; ir < nEq; ir++) {
    dprow[ir+1] += dprow[ir];
  }

  if (dnnz != dprow[nEq]) {
    mexPrintf( "(%d == %d)\n", dnnz, dprow[nEq] );
    mexErrMsgTxt( "ERR_VerificationFail!" );
  }

  *p_dnnz = dnnz;
  *p_dicol = dicol = allocMem( int32, dnnz );

  dir = allocMem( int32, nEq + 1 );
  memset( dir, 0, (nEq + 1) * sizeof( int32 ) );
  
  /* Sorted column numbers. */
  for (ir = 0; ir < nNod; ir++) {
    for (ird = 0; ird < dpn[ir]; ird++) {
      er = eq[dofOffset[ir]+ird];
      if (er >= 0) {
	for (ic = nprow[ir]; ic < nprow[ir+1]; ic++) {
	  in = nicol[ic];
	  for (icd = 0; icd < dpn[in]; icd++) {
	    ec = eq[dofOffset[in]+icd];
	    if (ec >= 0) {
	      dicol[dprow[er]+dir[er]] = ec;
	      dir[er]++;
	    }
	  }
	}
	qsort( dicol + dprow[er], dir[er], sizeof( int32 ), &compareI32 );
      }
    }
  }
/*    mexPrintf( "3\n" ); */

#ifdef DEBUG_MESH
  for (er = 0; er < nEq; er++) {
    if (dir[er] != (dprow[er+1] - dprow[er])) {
      mexPrintf( "(%d == %d)\n", dir[er], dprow[er+1] - dprow[er] );
      mexErrMsgTxt( "ERR_VerificationFail!" );
    }
  }
#endif

  freeMem( dir );

  return( RET_OK );
}




/*-----------------------------------------------------------------------*/
AssembleSparse(int* ir, int* jc, double* pr, 
		   int NDDL, int IsSymVal, int IsSymK, int* keind, int* elmap, 
	           int* vir, int* vjc, double* val)

{
    double      *mv; 
    int         nVal, ie, lr, lc, gr, gc, ii, jj; 


    if (vir==NULL) { /* full matrix */

      for (lc = 0; lc < NDDL; lc++) {
	gc = keind[lc];
	if (gc == -1) continue;
	for (lr = 0; lr < NDDL; lr++) {
	  gr = keind[lr];
	  if (gr == -1) continue;
	  if ((IsSymK) && (gr < gc)) continue;
	  ii = findPosInSparse( gr, gc, jc, ir );
	  ie = elmap[NDDL*lc+lr] - 1;
	  if (ii < 0) {
	    if (val[ie]==0) {}
            else if (AllowMissing) { addMissing( gr, gc, val[ie] ); }
	    else {
              mexPrintf("%i (%i,%i) %f",ie+1,gr,gc,val[ie]);
              mexErrMsgTxt( "Non-existent matrix item!" );
            }
	  } else { pr[ii] += val[ie]; }
	}
      }

    } else { /* sparse matrix */
      if (IsSymVal) {
	mexErrMsgTxt( "Symmetric sparse storage of element matrix"
		      " not supported!" );
      }

      for (lc = 0; lc < NDDL; lc++) {
	gc = keind[lc];
	if (gc == -1) continue;
	for (lr = 0; lr < NDDL; lr++) {
	  gr = keind[lr];
	  if (gr == -1) continue;
	  if ((IsSymK) && (gr < gc)) continue;
	  ii = findPosInSparse( gr, gc, jc, ir );
	  if (ii < 0) {
	    if (AllowMissing) {
	      jj = findPosInSparse( lr, lc, vjc, vir );
	      addMissing( gr, gc, val[jj] );
	    } else mexErrMsgTxt( "Non-existent matrix item!" );
	  } else {
	    jj = findPosInSparse( lr, lc, vjc, vir );
	    if (jj >= 0) pr[ii] += val[jj];
	  }
	}
      }
   }

}

/*-----------------------------------------------------------------------*/

BuildNDNS3d(double* w,double* N,double* Nr,double* Ns,
           double* nodeE,double* jdet,double* NDN, int Nw,
           int Nnode, int Nshape, double* bas, double* J)
{
  /*

nw=size(opt.N,1); % number fo integration points
Nnode=size(opt.N,2);

%n0=node;
%node=n0+[rand(4,1) zeros(4,2)]/5;
%opt2=opt;opt2.jdet=opt2.jdet*0;of_mk('buildndn2d',opt2,node);opt.NDN=opt.NDN*0;
opt.NDN(:,1:size(opt.N,1))=opt.N';

for jw=1:nw

  % local basis on the surface
  % [dx_e;dy_e]=J[dr;ds]
  x=opt.Nr(jw,:)*node;xr=norm(x); x=x/xr; xs=0;
  y=opt.Ns(jw,:)*node;  a=-x*y'; y=y+a*x;
  ys=norm(y);yr=-a; y=y/ys;
  z=-[y(2)*x(3)-y(3)*x(2);y(3)*x(1)-y(1)*x(3);y(1)*x(2)-y(2)*x(1)];

  opt.jdet(jw)=xr*ys;
  opt.NDN(:,nw+jw)=opt.Nr(jw,:)'/xr; % N,x(jw)
  opt.NDN(:,2*nw+jw)=opt.Ns(jw,:)'/ys-opt.Nr(jw,:)'*(yr/xr/ys); % N,x(jw)
  opt.bas(:,jw)=[x';y';z];
end
%norm(opt.NDN-opt2.NDN)

   */
  double        x[3], y[3], z[3], a, xr, xs, ys, yr, *J1;
  int           j1, j2, jw;

  /* opt.NDN(:,1:size(opt.N,1))=opt.N'; */
  for (j1=0;j1<Nshape;j1++) for (jw=0;jw<Nw;jw++) NDN[j1+Nshape*jw]=N[jw+Nw*j1];
  for (jw=0;jw<Nw;jw++) { /* loop on integration points */

    for (j1=0;j1<3;j1++) x[j1] = 0.; 
    for (j1=0;j1<3;j1++) for (j2=0;j2<Nnode;j2++) x[j1] += Nr[jw+j2*Nw]*nodeE[j2+j1*Nnode];
    xr = sqrt(x[0]*x[0]+x[1]*x[1]+x[2]*x[2]);
    for (j1=0;j1<3;j1++) x[j1] /=xr;    
    xs = 0.;
    
    for (j1=0;j1<3;j1++) y[j1] = 0.; 
    for (j1=0;j1<3;j1++) for (j2=0;j2<Nnode;j2++) {y[j1] += Ns[jw+j2*Nw]*nodeE[j2+j1*Nnode];}

    a = 0.;    for (j1=0;j1<3;j1++) a -= x[j1] * y[j1];
    for (j1=0;j1<3;j1++) y[j1] += a * x[j1];
    ys = sqrt(y[0]*y[0]+y[1]*y[1]+y[2]*y[2]);  yr=-a;
    for (j1=0;j1<3;j1++) y[j1] /=ys;

    /*    z=-[y(2)*x(3)-y(3)*x(2);y(3)*x(1)-y(1)*x(3);y(1)*x(2)-y(2)*x(1)]    */
    z[0] = -y[1]*x[2] + y[2]*x[1];
    z[1] = -y[2]*x[0] + y[0]*x[2];
    z[2] = -y[0]*x[1] + y[1]*x[0];

    jdet[jw] = xr * ys;

    /* opt.NDN(:,nw+jw)=opt.Nr(jw,:)'/xr; % N,x(jw)
       opt.NDN(:,2*nw+jw)=opt.Ns(jw,:)'/ys-opt.Nr(jw,:)'*(yr/xr/ys); % N,x(jw) */    
    for (j1=0;j1<Nshape;j1++)    NDN[ j1+Nshape*(Nw+jw) ] = Nr[jw+Nw*j1]/xr;
    for (j1=0;j1<Nshape;j1++) {
      NDN[ j1+Nshape*(2*Nw+jw) ] = Ns[jw+Nw*j1]/ys -Nr[jw+Nw*j1]*(yr/xr/ys);
    }
    /* opt.bas(:,jw)=[x';y';z]; */
    if (J!=NULL) { 
      J1=J+4*jw; 
                J1[0]= ys/jdet[jw]; J1[2]=-xs/jdet[jw];
                J1[1]=-yr/jdet[jw]; J1[3]= xr/jdet[jw]; 
    }
    if (bas!=NULL) { 
      for (j1=0;j1<3;j1++) bas[j1+9*jw]   = x[j1];  
      for (j1=0;j1<3;j1++) bas[j1+9*jw+3] = y[j1];  
      for (j1=0;j1<3;j1++) bas[j1+9*jw+6] = z[j1];  
    }
  } /* for jW */


}
/*-----------------------------------------------------------------------*/

BuildNDN3d(double* w,double* N,double* Nr,double* Ns,double* Nt,
           double* nodeE,double* jdet,double* NDN, int Nw,
           int Nnode, int Nshape)
{
        double        xr, yr, zr, xs, ys, zs, xt, yt, zt,
	              cof11, cof12, cof13, cof21, cof22, cof23, 
                      cof31, cof32, cof33;
        int           jw, j2;

        for (jw=0;jw<Nw;jw++) { /* loop on integration points */

          xr=0.;xs=0.;xt=0.;yr=0.;ys=0.;yt=0.;zr=0.;zs=0.;zt=0.;
          for (j2=0;j2<Nnode;j2++) {
            xr += Nr[j2*Nw+jw] * nodeE[j2]; 
            xs += Ns[j2*Nw+jw] * nodeE[j2]; 
            xt += Nt[j2*Nw+jw] * nodeE[j2]; 
            yr += Nr[j2*Nw+jw] * nodeE[j2+Nnode]; 
            ys += Ns[j2*Nw+jw] * nodeE[j2+Nnode]; 
            yt += Nt[j2*Nw+jw] * nodeE[j2+Nnode]; 
            zr += Nr[j2*Nw+jw] * nodeE[j2+2*Nnode]; 
            zs += Ns[j2*Nw+jw] * nodeE[j2+2*Nnode]; 
            zt += Nt[j2*Nw+jw] * nodeE[j2+2*Nnode]; 
	  } /* j2 */

         cof11 = ys*zt - yt*zs; cof12 = yt*zr - yr*zt; cof13 = yr*zs - ys*zr;
         cof21 = zs*xt - zt*xs; cof22 = zt*xr - zr*xt; cof23 = zr*xs - zs*xr;
         cof31 = xs*yt - xt*ys; cof32 = xt*yr - xr*yt; cof33 = xr*ys - xs*yr;
         jdet[jw] = xr*cof11+xs*cof12+xt*cof13; 

         for (j2=0;j2<Nshape;j2++) {
          NDN[j2+jw*Nshape]=N[jw+j2*Nw];                              /*N*/
          NDN[j2+(jw+Nw)*Nshape] = ( cof11*Nr[jw+j2*Nw]
                                  + cof12*Ns[jw+j2*Nw]
                                  + cof13*Nt[jw+j2*Nw] )/jdet[jw];   /*Nx*/
          NDN[j2+(jw+2*Nw)*Nshape] = ( cof21*Nr[jw+j2*Nw]
                                    + cof22*Ns[jw+j2*Nw]
             		            + cof23*Nt[jw+j2*Nw]) /jdet[jw]; /*Ny*/ 
          NDN[j2+(jw+3*Nw)*Nshape] = ( cof31*Nr[jw+j2*Nw]
                                    + cof32*Ns[jw+j2*Nw]
                                    + cof33*Nt[jw+j2*Nw]) /jdet[jw]; /*Nz*/
	 } /* j2 NNode*/
        } /* jw Nw */

}
/*-----------------------------------------------------------------------*/
BuildNDN3dW(double* w,double* N,double* Nr,double* Ns,double* Nt,
           double* nodeE,double* jdet,double* NDN, int Nw,
           int Nnode, int Nshape)
{
        double        xr, yr, zr, xs, ys, zs, xt, yt, zt,
	              cof11, cof12, cof13, cof21, cof22, cof23, 
                      cof31, cof32, cof33;
        int           jw, j2, unit=1;

        for (jw=0;jw<Nw;jw++) { /* loop on integration points */

	    /*
          xr=0.;xs=0.;xt=0.;yr=0.;ys=0.;yt=0.;zr=0.;zs=0.;zt=0.;
          for (j2=0;j2<Nnode;j2++) {
            xr += Nr[j2*Nw+jw] * nodeE[j2]; 
            xs += Ns[j2*Nw+jw] * nodeE[j2]; 
            xt += Nt[j2*Nw+jw] * nodeE[j2]; 
            yr += Nr[j2*Nw+jw] * nodeE[j2+Nnode]; 
            ys += Ns[j2*Nw+jw] * nodeE[j2+Nnode]; 
            yt += Nt[j2*Nw+jw] * nodeE[j2+Nnode]; 
            zr += Nr[j2*Nw+jw] * nodeE[j2+2*Nnode]; 
            zs += Ns[j2*Nw+jw] * nodeE[j2+2*Nnode]; 
            zt += Nt[j2*Nw+jw] * nodeE[j2+2*Nnode]; 
	  } 
            */

	    xr=of_ddot(&Nnode,Nr+jw,&Nw,nodeE,&unit);
	    xs=of_ddot(&Nnode,Ns+jw,&Nw,nodeE,&unit);
	    xt=of_ddot(&Nnode,Nt+jw,&Nw,nodeE,&unit);
	    yr=of_ddot(&Nnode,Nr+jw,&Nw,nodeE+Nnode,&unit);
	    ys=of_ddot(&Nnode,Ns+jw,&Nw,nodeE+Nnode,&unit);
	    yt=of_ddot(&Nnode,Nt+jw,&Nw,nodeE+Nnode,&unit);
	    zr=of_ddot(&Nnode,Nr+jw,&Nw,nodeE+2*Nnode,&unit);
	    zs=of_ddot(&Nnode,Ns+jw,&Nw,nodeE+2*Nnode,&unit);
	    zt=of_ddot(&Nnode,Nt+jw,&Nw,nodeE+2*Nnode,&unit);


         cof11 = ys*zt - yt*zs; cof12 = yt*zr - yr*zt; cof13 = yr*zs - ys*zr;
         cof21 = zs*xt - zt*xs; cof22 = zt*xr - zr*xt; cof23 = zr*xs - zs*xr;
         cof31 = xs*yt - xt*ys; cof32 = xt*yr - xr*yt; cof33 = xr*ys - xs*yr;
         jdet[jw] = xr*cof11+xs*cof12+xt*cof13; 

         for (j2=0;j2<Nshape;j2++) {
          NDN[j2*4+jw*Nshape*4]   =N[jw+j2*Nw];                      /*N*/
          NDN[j2*4+jw*Nshape*4+1] = ( cof11*Nr[jw+j2*Nw]
                                  + cof12*Ns[jw+j2*Nw]
                                  + cof13*Nt[jw+j2*Nw] )/jdet[jw];   /*Nx*/
          NDN[j2*4+jw*Nshape*4+2] = ( cof21*Nr[jw+j2*Nw]
                                    + cof22*Ns[jw+j2*Nw]
             		            + cof23*Nt[jw+j2*Nw]) /jdet[jw]; /*Ny*/ 
          NDN[j2*4+jw*Nshape*4+3] = ( cof31*Nr[jw+j2*Nw]
                                    + cof32*Ns[jw+j2*Nw]
                                    + cof33*Nt[jw+j2*Nw]) /jdet[jw]; /*Nz*/
	 } /* j2 NNode*/
        } /* jw Nw */

}
/*-----------------------------------------------------------------------*/
BuildNDN2d(double* w,double* N,double* Nr,double* Ns,
           double* nodeE,double* jdet,double* NDN, int Nw,
           int Nnode, int Nshape)

{
        double        xr, yr, xs, ys; 
        int           jw, j2;


        for (jw=0;jw<Nw;jw++) { /* loop on integration points */

          xr=0.;xs=0.;yr=0.;ys=0.;
          for (j2=0;j2<Nnode;j2++) {
            xr += Nr[j2*Nw+jw] * nodeE[j2]; 
            xs += Ns[j2*Nw+jw] * nodeE[j2]; 
            yr += Nr[j2*Nw+jw] * nodeE[j2+Nnode]; 
            ys += Ns[j2*Nw+jw] * nodeE[j2+Nnode]; 
	  } /* j2 */

          jdet[jw] = xr*ys-xs*yr; 

          for (j2=0;j2<Nshape;j2++) {   
            NDN[j2+jw*Nshape]        = N[jw+j2*Nw];                  /* N */
            NDN[j2+(jw+Nw)*Nshape]   = ( ys*Nr[jw+j2*Nw]
			              - yr*Ns[jw+j2*Nw] )/jdet[jw]; /* Nx */
            NDN[j2+(jw+2*Nw)*Nshape] = (- xs*Nr[jw+j2*Nw]
                                        + xr*Ns[jw+j2*Nw] )/jdet[jw];/* Ny */
	  } /* j2 */
	} /* jw */
    
}

/*-----------------------------------------------------------------------*/

BuildNDN1d(double* w,double* N,double* Nr,
           double* nodeE,double* jdet,
           double* NDN, int Nw,
           int Nnode, int Nshape)
{
        double        xr, yr, zr;
        int           jw, jNode;

        for (jw=0;jw<Nw;jw++) { /* loop on integration points */

          xr=0.;yr=0.;zr=0.;

          for (jNode=0;jNode<Nnode;jNode++) {
            xr += Nr[jNode*Nw+jw] * nodeE[jNode]; 
            yr += Nr[jNode*Nw+jw] * nodeE[jNode+Nnode]; 
            zr += Nr[jNode*Nw+jw] * nodeE[jNode+2*Nnode]; 
	  } /* jNode */

         jdet[jw] = sqrt(xr*xr+yr*yr+zr*zr); 

        } /* jw Nw */

}


/*-----------------------------------------------------------------------
function Mecha3DInteg(k,DD,w,jdet,NDN,Nnode,Ndof,Nw,jW)
   DD=zeros(9); 
   for jn=0:2; for jj=0:2; for jm=0:2; for jl=0:2; 
     for ji=0:2; for jk=0:2
      DD(jn+3*jj+9*jm+27*jl+1)=DD(jn+3*jj+9*jm+27*jl+1)+ ...
       F_ij(jn+ji*3+1)* ...
       d2wde2(ind_ts_eg(ji+3*jj+1)+6*(ind_ts_eg(jk+3*jl+1)-1))* ...
       F_ij(jm+jk*3+1);
     end;end
     if jn==jm; 
       DD(jn+3*jj+9*jm+27*jl+1)=DD(jn+3*jj+9*jm+27*jl+1)+ ...
        Sigma(jj+3*jl+1);
     end
   end;end;end;end

   for ji=0:2;for jj=0:2;for jk=0:2;for jl=0:2;
    coef=DD(ji+jj*3+jk*9+jl*27+1)*jdet(jW+1)*w(jW+1);
    of_mk('k<-k+a*x*y',k,NDN,NDN,coef, ...
     int32([Nnode*ji+Ndof*Nnode*jk ... % block in stiffness matrix
            Nnode*Nw*[1+jj 1+jl]   ... % columns in NDN
            Ndof Nnode Nnode 1 1]));
   end;end;end;end
*/

Mecha3DInteg(double* k,double* F, double* d2wde2, double* Sigma, 
             double* w,double* jdet, double* NDN,
             int Nnode,int Ndof,int Nw, int jw)
{
  double coef[1], DD[81];
  int    ji,jj,jk,jl,jm,jn,unit=1,ind_ts_eg[9]={0,5,4,5,1,3,4,3,2};

  for (ji=0;ji<81;ji++) DD[ji]=0;

  for (jn=0;jn<3;jn++) {
  for (jj=0;jj<3;jj++) {
  for (jm=0;jm<3;jm++) {
  for (jl=0;jl<3;jl++) {
  for (ji=0;ji<3;ji++) {
  for (jk=0;jk<3;jk++) {
      DD[jn+3*jj+9*jm+27*jl]+=
       F[jn+ji*3]*
       d2wde2[ind_ts_eg[ji+3*jj]+6*ind_ts_eg[jk+3*jl]]*
       F[jm+jk*3];
  }}
  if (jn==jm) {
       DD[jn+3*jj+9*jm+27*jl]+=Sigma[jj+3*jl];
  }
  }}}}

  /*for (ji=0;ji<9;ji++) { for (jj=0;jj<9;jj++) {
     mexPrintf("%10.5g",DD[ji+9*jj]);} mexPrintf("\n");};mexPrintf("\n");
  */

   for (ji=0;ji<3;ji++) {
   for (jj=0;jj<3;jj++) {
   for (jk=0;jk<3;jk++) {
   for (jl=0;jl<3;jl++) {
    coef[0]=DD[ji+jj*3+jk*9+jl*27]*jdet[jw]*w[jw];
   /* opt=[offk[0] offx[1] offy[2] size(k,1)[3] m[4] n[5]  incx[6] incy[7]] */

    of_dger(&Nnode,&Nnode,coef,NDN+Nnode*(Nw*(jj+1)+jw),&unit,
      NDN+Nnode*(Nw*(jl+1)+jw),&unit,
      k+Nnode*ji+Ndof*Nnode*jk,&Ndof);

   }}}}

}

/* -----------------------------------------------------------------------*/
Mecha3DIntegH(double* k, double *Be, double* F, double* d2wde2, double* Sigma, double* w,
                 double* jdet, double* NDN, int Nnode,int Ndof,int Nw, int jw)
{
  double coef[1], DD[9][9] ,D[6][9], AUX[6][9], TEMP[81],*B, alpha, beta,un=1.,zero=0. ;
  int    ji,jj,jk,jl,jm,jn;
  int  unit=1,deux=2, trois=3 ,quatre=4, six=6, neuf=9,nb,nul=0;
  char norm = 'N', trans= 'T';

  B=mxCalloc(18*Nnode,sizeof(double));

  for (ji=0;ji<6;ji++) {for (jj=0;jj<9;jj++) D[ji][jj]=0;}

  D[0][0]=F[0];  D[0][3]=F[1];  D[0][6]=F[2]; 
  D[1][1]=F[3];  D[1][4]=F[4];  D[1][7]=F[5]; 
  D[2][2]=F[6];  D[2][5]=F[7];  D[2][8]=F[8];
 
  D[3][1]=F[6];  D[3][2]=F[3];  D[3][4]=F[7]; 
  D[3][5]=F[4];  D[3][7]=F[8];  D[3][8]=F[5];

  D[4][0]=F[6];  D[4][2]=F[0];  D[4][3]=F[7]; 
  D[4][5]=F[1];  D[4][6]=F[8];  D[4][8]=F[2];

  D[5][0]=F[3];  D[5][1]=F[0];  D[5][3]=F[4]; 
  D[5][4]=F[1];  D[5][6]=F[5];  D[5][7]=F[2];

  /* aux=d2wde2 *d */
  for (ji=0;ji<6;ji++) {
  for (jj=0;jj<9;jj++) {

      AUX[ji][jj] =0;

  for (jn=0;jn<6;jn++) {
      AUX[ji][jj] +=  d2wde2[6*jn+ji]*D[jn][jj];
  }}}

  /* dd=d^t aux */

  for (ji=0;ji<9;ji++) {
  for (jj=0;jj<9;jj++) {

      DD[ji][jj] =0;

  for (jn=0;jn<6;jn++) {
      DD[ji][jj] +=  D[jn][ji]*AUX[jn][jj];
  }}}

  /* rajouter k_nl */

  for (jj=0;jj<3;jj++) {
      DD[jj][jj]     += Sigma[jj];
      DD[jj+3][jj+3] += Sigma[jj];
      DD[jj+6][jj+6] += Sigma[jj];
  for (ji=0;ji<3;ji++) {
      if(ji!=jj) {
      DD[ji][jj]     += Sigma[6-jj-ji];
      DD[ji+3][jj+3] += Sigma[6-jj-ji];
      DD[ji+6][jj+6] += Sigma[6-jj-ji];}
  }}



   /*for (ji=0;ji<9;ji++) { for (jj=0;jj<9;jj++) { 
     mexPrintf("%10.5g",DD[ji+9*jj]);} mexPrintf("\n");};mexPrintf("\n");*/
  
  
     alpha = jdet[jw]*w[jw]; 

   /*              Multiplication par blocs 

              K_ij        = DP^T  DD_ij      DP 
         (Nnode x Nnode)        (3 x 3)  (3x Nnode)      */

      for (ji=0;ji<3;ji++) {
      for (jj=0;jj<3;jj++) { 

          /* temp = dd_ij*Dp */

          of_dgemm(&trans,&norm,&trois,&Nnode,&trois,&un,DD[3*ji]+3*jj,&neuf,
		NDN+Nnode*4*jw+1,&quatre,&zero,TEMP,&trois); 

          /* K_ij = K_ij+jdet[jw]*w[jw] Dp^t Temp*/

          of_dgemm(&trans,&norm,&Nnode,&Nnode,&trois,&alpha, NDN+Nnode*4*jw+1,
		&quatre,TEMP,&trois,&un, k+Nnode*ji+Ndof*Nnode*jj,&Ndof);
       }
      
      /* B = D*Dp (rhs)*/
      
      of_dgemm(&trans,&norm,&six,&Nnode,&trois,&un,D[0]+3*ji,&neuf,
	       NDN+Nnode*4*jw+1,&quatre,&zero,B+6*ji*Nnode,&six); 
      
      }  

    /* Be = Sigma_i*B[i} (rhs)*/
      for (ji=0;ji<6;ji++) {
          beta =alpha*Sigma[ji];
	  of_daxpy(&Ndof,&beta,B+ji,&six,Be,&unit);
      }

      if (B!=NULL) mxFree(B); 

}
