#include <comp.hpp>
#include <multigrid.hpp>

namespace ngcomp
{
  using namespace ngcomp;

  // dummy function header
  void CalcEigenSystem (FlatMatrix<Complex> & elmat, 
			FlatVector<> & lami, 
			FlatMatrix<> & evecs)
  { ; }



  BilinearForm :: 
  BilinearForm (const FESpace & afespace,
		const string & aname,
		const Flags & flags)
    : NGS_Object(afespace.GetMeshAccess(), aname), fespace(afespace)
  {
    fespace2 = NULL;
    diagonal = 0;
    multilevel = 1;
    galerkin = 0;
    symmetric = 1;
    hermitean = 0;
    nonassemble = 0;
    SetEpsRegularization (0);
    SetUnusedDiag (1);
    low_order_bilinear_form = 0;
    linearform = 0;
    timing = 0;
    print = 0;
    printelmat = 0;
    elmat_ev = 1;
    eliminate_internal = 0;
  }


  BilinearForm :: 
  BilinearForm (const FESpace & afespace,
		const FESpace & afespace2, const string & aname,
		const Flags & flags)
    : NGS_Object(afespace.GetMeshAccess(), aname), fespace(afespace), fespace2(&afespace2)
  {
    diagonal = 0;
    multilevel = 1;
    galerkin = 0;
    symmetric = 1;
    hermitean = 0;
    nonassemble = 0;
    SetEpsRegularization (0);
    SetUnusedDiag (0);
    low_order_bilinear_form = 0;
    linearform = 0;
    timing = 0;
    print = 0;
    printelmat = 0;
    elmat_ev = 1;
    eliminate_internal = 0;
  }



  BilinearForm :: ~BilinearForm ()
  {
    if (!low_order_bilinear_form)
      for (int i = 0; i < parts.Size(); i++)
	delete parts[i];
    delete low_order_bilinear_form;
  }

  void BilinearForm :: SetPrint (bool ap)
  { 
    print = ap; 
    if (low_order_bilinear_form)
      low_order_bilinear_form -> SetPrint (ap);
  }

  void BilinearForm :: SetPrintElmat (bool ap)
  { 
    printelmat = ap; 
    if (low_order_bilinear_form)
      low_order_bilinear_form -> SetPrintElmat (ap);
  }



  void BilinearForm ::  SetElmatEigenValues (bool ee)
  { 
    elmat_ev = ee;
    if (low_order_bilinear_form)
      low_order_bilinear_form -> SetElmatEigenValues (ee);
  }




  void BilinearForm :: Assemble (LocalHeap & lh)
  {
    if (mats.Size() == ma.GetNLevels())
      return;

    if (nonassemble)
      {
	mats.Append (new BilinearFormApplication (this)); 
      
	if (timing)
	  {
	    clock_t starttime;
	    double time;
	    starttime = clock();
	  
	    BaseVector & vecf = *mats.Last()->CreateVector();
	    BaseVector & vecu = *mats.Last()->CreateVector();
	  
	    vecu = 1;
	    int steps = 0;
	    do
	      {
		vecf = (*mats.Last()) * vecu;
		steps++;
		time = double(clock() - starttime) / CLOCKS_PER_SEC;
	      }
	    while (time < 2.0);
	  
	    cout << " 1 application takes "
		 << time / steps
		 << " seconds" << endl;
	  }

	return;
      }

    if (low_order_bilinear_form)
      low_order_bilinear_form->Assemble(lh);


    try
      {
	AllocateMatrix ();
      }
    catch (exception & e)
      {
	throw Exception (e.what() + 
			 string ("\nthrown by allocate matrix ") +
			 string (GetName()));
      }
    catch (Exception & e)
      {
	e.Append (string ("\nthrown by allocate matrix ") +
		  string (GetName()));
	throw;
      }



    DoAssemble(lh);


    if (timing)
      {
	clock_t starttime;
	double time;
	starttime = clock();
      
	BaseVector & vecf = *mats.Last()->CreateVector();
	BaseVector & vecu = *mats.Last()->CreateVector();
      
	vecu = 1;
	int steps = 0;
	do
	  {
	    vecf = (*mats.Last()) * vecu;
	    steps++;
	    time = double(clock() - starttime) / CLOCKS_PER_SEC;
	  }
	while (time < 1.0);
	  
	cout << " 1 application takes "
	     << time / steps
	     << " seconds" << endl;



	VarBlockSparseMatrix<double> * bsm = 
	  VarBlockSparseMatrix<double>::Create (dynamic_cast<const SparseMatrix<double>&> (*mats.Last()));

	steps = 0;
	starttime = clock();
	do
	  {
	    vecf = (*bsm) * vecu;
	    steps++;
	    time = double(clock() - starttime) / CLOCKS_PER_SEC;
	  }
	while (time < 1.0);
	  
	cout << " block matrix, 1 application takes "
	     << time / steps
	     << " seconds" << endl;


      }



    if (galerkin)
      GalerkinProjection();
  }


  void BilinearForm :: ReAssemble (LocalHeap & lh, bool reallocate)
  {

    if (nonassemble) return;

    if (low_order_bilinear_form)
      low_order_bilinear_form->ReAssemble(lh);

    if (mats.Size() < ma.GetNLevels())
      {
	Assemble(lh);
	return;
      }


    if (reallocate)
      {
	delete mats.Last();
	// delete graphs.Last();
	mats.DeleteLast();
	// graphs.DeleteLast();

	Assemble(lh);
	return;
      }

    GetMatrix() = 0.0;
    DoAssemble(lh);

    if (galerkin)
      GalerkinProjection();
  }


  void BilinearForm :: PrintReport (ostream & ost)
  {
    ost << "on space " << GetFESpace().GetName() << endl
	<< "symmetric   = " << symmetric << endl
	<< "multilevel  = " << multilevel << endl
	<< "nonassemble = " << nonassemble << endl
	<< "printelmat = " << printelmat << endl
	<< "eliminate_internal = " << eliminate_internal << endl
	<< "integrators: " << endl;
  
    for (int i = 0; i < parts.Size(); i++)
      ost << "  " << parts[i]->Name() << endl;
  }


  void BilinearForm :: MemoryUsage (ARRAY<MemoryUsageStruct*> & mu) const
  {
    if (low_order_bilinear_form)
      low_order_bilinear_form -> MemoryUsage (mu);
  
    int olds = mu.Size();

    for (int i = 0; i < mats.Size(); i++)
      if (mats[i]) mats[i]->MemoryUsage (mu);
    //  for (i = 0; i < graphs.Size(); i++)
    //    if (graphs[i]) graphs[i]->MemoryUsage (mu);

    for (int i = olds; i < mu.Size(); i++)
      mu[i]->AddName (string(" bf ")+GetName());
  }


  template <class SCAL>
  void S_BilinearForm<SCAL> :: DoAssemble (LocalHeap & lh)
  {
    try
      {
	if (!MixedSpaces())
	
	  {
	    ma.PushStatus ("Assemble Matrix");

	    ARRAY<int> dnums, idofs;

	    ElementTransformation eltrans;
	  
	    int ndof = fespace.GetNDof();
	    BitArray useddof(ndof);
	    useddof.Clear();

	    int ne = ma.GetNE();
	    int nse = ma.GetNSE();

	    BaseMatrix & mat = GetMatrix();
	    mat = 0.0;

	    bool hasbound = 0;
	    bool hasinner = 0;

	    for (int j = 0; j < NumIntegrators(); j++)
	      {
		if (parts[j] -> BoundaryForm())
		  hasbound = 1;
		else
		  hasinner = 1;
	      }

	    clock_t prevtime = clock();
	    if (hasinner)
	      {
		void * heapp = lh.GetPointer();
		for (int i = 0; i < ne; i++)
		  {
		    if (clock()-prevtime > 0.1 * CLOCKS_PER_SEC)
		      {
			cout << "\rassemble element " << i << "/" << ne << flush;
			ma.SetThreadPercentage ( 100.0*i / (ne+nse) );
			prevtime = clock();
		      }
		
		    lh.CleanUp(heapp);
		  
		    if (!fespace.DefinedOn (ma.GetElIndex (i))) continue;
		  
		    const FiniteElement & fel = fespace.GetFE (i, lh);
		  
		    ma.GetElementTransformation (i, eltrans, lh);
		    fespace.GetDofNrs (i, dnums);
		  
		  
		    FlatMatrix<SCAL> sum_elmat(dnums.Size()*fespace.GetDimension(), lh);
		    sum_elmat = 0;
		    for (int j = 0; j < NumIntegrators(); j++)
		      {
			const BilinearFormIntegrator & bfi = *parts[j];
		      
			if (bfi.BoundaryForm()) continue;
			if (!bfi.DefinedOn (ma.GetElIndex (i))) continue;
		      
			FlatMatrix<SCAL> elmat;
			try
			  {
			    clock_t starttime;
			    double time;
			    starttime = clock();
			  
			    bfi.AssembleElementMatrix (fel, eltrans, elmat, lh);
			  
			    time = double(clock() - starttime) / CLOCKS_PER_SEC;
			    // cout << "time = " << time << endl;
			  
			    if (printelmat) //  || fel.ElementType() == ET_TET)
			      {
				testout->precision(8);
				(*testout) << "elnum= " << i << endl;
				(*testout) << "integrator " << bfi.Name() << endl;
				(*testout) << "dnums = " << endl << dnums << endl;
				(*testout) << "elmat = " << endl << elmat << endl;
			      
				// Matrix<SCAL> invelmat (elmat.Height());
				// CalcInverse (elmat, invelmat);
				// (*testout) << "inv_elmat = " << endl << invelmat << endl;
			      }
			  
			    /*
			      int nz = 0;
			      for (int hi = 0; hi < elmat.Height(); hi++)
			      for (int hj = 0; hj < elmat.Width(); hj++)
			      if (abs (elmat(hi,hj)) < 1e-14) nz++;
			      cout << "elmat has " << double(nz) / (elmat.Height()*elmat.Width()) * 100 << " % zero entries" << endl;
			    */
			  
			    if (elmat_ev)
			      {
				(*testout) << "elind = " << eltrans.GetElementIndex() << endl;
			      
				Vector<> lami(elmat.Height());
				Matrix<> evecs(elmat.Height());
			      
				CalcEigenSystem (elmat, lami, evecs);
				(*testout) << "lami = " << endl << lami << endl
					   << "evecs = " << endl << evecs << endl;
			      
				(*testout) << "ev * elmat * ev^T = " << evecs * elmat * Trans (evecs) << endl;
			      } 
			  
			  }
			catch (Exception & e)
			  {
			    e.Append (string("in Assemble Element Matrix, bfi = ") + 
				      bfi.Name() + string("\n"));
			    throw;
			  }
			catch (exception & e)
			  {
			    throw (Exception (string(e.what()) +
					      string("in Assemble Element Matrix, bfi = ") + 
					      bfi.Name() + string("\n")));
			  }
		      
			sum_elmat += elmat;
		      }

		    fespace.TransformMat (i, false, sum_elmat, TRANSFORM_MAT_LEFT_RIGHT);

		    if (eliminate_internal)
		      {
			fel.GetInternalDofs(idofs);
			if (idofs.Size())
			  {
			    (*testout) << idofs.Size() << " idofs out of " << dnums.Size() << endl;
			    // (*testout) << "idofs = " << endl << idofs << endl;
			    // (*testout) << "elmat, before = " << endl << sum_elmat << endl;

			    int dim = sum_elmat.Height() / dnums.Size();
			
			    FlatVector<SCAL> elvec (sum_elmat.Height(), lh);
			    if (linearform)
			      linearform->GetVector().GetIndirect (dnums, elvec);

			    for (int jj = 0; jj < idofs.Size(); jj++)
			      for (int j = dim * idofs[jj]; j < dim*(idofs[jj]+1); j++)
				{
				  SCAL val = sum_elmat(j,j);
				  if (val != 0.0) val = 1.0/val;
				  else val = 0.0;

				  /*
				    for (int k = 0; k < sum_elmat.Height(); k++) if (k != j)
				    for (int l = 0; l < sum_elmat.Width(); l++) if (l != j)
				    sum_elmat(k,l) -= val * sum_elmat(k,j) * sum_elmat(j,l);
				  */
			      
				  SCAL fval = val * elvec(j);
				  for (int k = 0; k < sum_elmat.Height(); k++)
				    elvec(k) -= sum_elmat(k,j) * fval;

				  for (int k = 0; k < sum_elmat.Height(); k++) if (k != j)
				    {
				      SCAL hval = val * sum_elmat(k,j);
				      SCAL * pk = &sum_elmat(k,0);
				      SCAL * pj = &sum_elmat(j,0);
				  
				      for (int l = 0; l < j; l++) 
					pk[l] -= hval * pj[l];
				      for (int l = j+1; l < sum_elmat.Width(); l++) 
					pk[l] -= hval * pj[l];
				    }

				  for (int k = 0; k < sum_elmat.Height(); k++)
				    sum_elmat(k,j) = sum_elmat(j,k) = 0;
			      
				  sum_elmat(j,j) = 1;
				  dnums[idofs[jj]] = -1;
				}
			    // (*testout) << "elmat, after = " << endl << sum_elmat << endl;

			    if (linearform)
			      linearform->GetVector().SetIndirect (dnums, elvec);
			  }
		      }

		    AddElementMatrix (dnums, dnums, sum_elmat, lh);

		    for (int j = 0; j < dnums.Size(); j++)
		      if (dnums[j] != -1)
			useddof.Set (dnums[j]);
		  }
		lh.CleanUp(heapp);
		cout << "\rassemble element " << ne << "/" << ne << endl;
	      }


	    if (hasbound)
	      {

		for (int i = 0; i < nse; i++)
		  {
		    if (i % 100 == 0)
		      cout << "\rassemble surface element " << i << "/" << nse << flush;
		    ma.SetThreadPercentage ( 100.0*(ne+i) / (ne+nse) );

		    lh.CleanUp();
		  
		    if (!fespace.DefinedOnBoundary (ma.GetSElIndex (i))) continue;
		
		    const FiniteElement & fel = fespace.GetSFE (i, lh);
		
		    ma.GetSurfaceElementTransformation (i, eltrans, lh);
		    fespace.GetSDofNrs (i, dnums);

		    for (int j = 0; j < dnums.Size(); j++)
		      if (dnums[j] != -1)
			useddof.Set (dnums[j]);

		    for (int j = 0; j < NumIntegrators(); j++)
		      {
			const BilinearFormIntegrator & bfi = *parts[j];
		    
			if (!bfi.BoundaryForm()) continue;
			if (!bfi.DefinedOn (ma.GetSElIndex (i))) continue;		    

			FlatMatrix<SCAL> elmat;
			bfi.AssembleElementMatrix (fel, eltrans, elmat, lh);

			fespace.TransformMat (i, true, elmat, TRANSFORM_MAT_LEFT_RIGHT);

			if (printelmat)
			  {
			    testout->precision(8);

			    (*testout) << "surface-elnum= " << i << endl;
			    (*testout) << "integrator " << bfi.Name() << endl;
			    (*testout) << "dnums = " << endl << dnums << endl;
			    (*testout) << "element-index = " << eltrans.GetElementIndex() << endl;
			    (*testout) << "elmat = " << endl << elmat << endl;
			  }

		    
			if (elmat_ev)
			  {
			    testout->precision(8);

			    (*testout) << "elind = " << eltrans.GetElementIndex() << endl;
			    Vector<> lami(elmat.Height());
			    Matrix<> evecs(elmat.Height());

			    CalcEigenSystem (elmat, lami, evecs);
			    (*testout) << "lami = " << endl << lami << endl
				       << "evecs = " << endl << evecs << endl;
			  }
		    
			AddElementMatrix (dnums, dnums, elmat, lh);
		      }
		  }
		cout << "\rassemble surface element " << nse << "/" << nse << endl;	  
	      }

	    ma.SetThreadPercentage ( 100.0 );


	    if (fespace.specialelements.Size())
	      cout << "special elements: " << fespace.specialelements.Size() << endl;
	    for (int i = 0; i < fespace.specialelements.Size(); i++)
	      {
		lh.CleanUp();

		const SpecialElement & el = *fespace.specialelements[i];
		el.GetDofNrs (dnums);
		
		for (int j = 0; j < dnums.Size(); j++)
		  if (dnums[j] != -1)
		    useddof.Set (dnums[j]);
	      
		FlatMatrix<SCAL> elmat;
		el.Assemble (elmat, lh);
	      
		AddElementMatrix (dnums, dnums, elmat, lh);
	      }

	  
	    // add eps to avoid empty lines
	    FlatMatrix<SCAL> elmat (fespace.GetDimension(), lh);
	    elmat = 0;
	    dnums.SetSize(1);

	    void * p = lh.GetPointer();
	    if (eps_regularization != 0)
	      {
		for (int i = 0; i < elmat.Height(); i++)
		  elmat(i, i) = eps_regularization;
		for (int i = 0; i < ndof; i++)
		  {
		    lh.CleanUp (p);
		    dnums[0] = i; 
		    AddElementMatrix (dnums, dnums, elmat, lh);
		  }
	      }
	    if (unuseddiag != 0)
	      {
		for (int i = 0; i < elmat.Height(); i++)
		  elmat(i, i) = unuseddiag;
		for (int i = 0; i < ndof; i++)
		  if (!useddof.Test (i))
		    {
		      // (*testout) << "unused: " << i << endl;
		      lh.CleanUp (p);
		      dnums[0] = i;
		      AddElementMatrix (dnums, dnums, elmat, lh);
		    }
	      }


	    if (print)
	      (*testout) << "mat = " << endl << GetMatrix() << endl;

	    int cntused = 0;
	    for (int i = 0; i < useddof.Size(); i++)
	      if (useddof.Test(i))
		cntused++;
	    cout << "used " << cntused
		 << ", unused = " << useddof.Size()-cntused
		 << ", total = " << useddof.Size() << endl;

	    ma.PopStatus ();
	  }

	else
	

	  {
	    cout << "assemble mixed not implemented" << endl;
#ifdef ABC
	    // mixed spaces
      
	    ARRAY<int> dnums1;
	    ARRAY<int> dnums2;

	    //      DenseMatrix elmat;
	    ElementTransformation eltrans;
      
	    int ne = ma.GetNE();
      
	    BaseMatrix & mat = GetMatrix();
	    //      (*mat) = 0;
      
	    cout << "assemble" << endl;
	    // LocalHeap lh (5000000);

	    int hasbound = 0;
	    int hasinner = 0;

	    for (int j = 1; j <= NumIntegrators(); j++)
	      {
		const BilinearFormIntegrator & bfi = *GetIntegrator(j);
		if (bfi.BoundaryForm())
		  hasbound = 1;
		else
		  hasinner = 1;
	      }

	    if (hasinner)
	      for (int i = 1; i <= ne; i++)
		{
		  if (i % 100 == 0)
		    cout << "." << flush;
		  lh.CleanUp();
	  
		  const FiniteElement & fel1 = fespace.GetFE (i, lh);
		  const FiniteElement & fel2 = fespace2->GetFE (i, lh);

		  ma.GetElementTransformation (i, eltrans, lh);
		  fespace.GetDofNrs (i, dnums1);
		  fespace2->GetDofNrs (i, dnums2);
	  
		  for (int j = 0; j < NumIntegrators(); j++)
		    {
		      const BilinearFormIntegrator & bfi = *GetIntegrator(j);
		      if (bfi.BoundaryForm()) continue;
	      
		      /*
		      // elmat = 
		      bfi.AssembleMixedElementMatrix (fel1, fel2, eltrans, 
		      lh);
		      */
		      //	      (*testout) << "mixed mat = " << elmat << endl;

		      //	      fespace.TransformMatrix (i, elmat);

		      /*
			dynamic_cast<SparseSystemMatrixRectangle<SysVector1d, SysVector2d > *> (mat)
			-> AddMixedElementMatrix (dnums2, dnums1, elmat);
		      */
		    }
		}
      

	    cout << " boundary terms ";

	    int nse = ma.GetNSE();
	    if (hasbound)
	      for (int i = 1; i <= nse; i++)
		{
		  const FiniteElement & fel1 = fespace.GetSFE (i, lh);
		  const FiniteElement & fel2 = fespace2->GetSFE (i, lh);
	  
		  ma.GetSurfaceElementTransformation (i, eltrans, lh);
		  fespace.GetSDofNrs (i, dnums1);
		  fespace2->GetSDofNrs (i, dnums2);
	  
		  for (int j = 0; j < NumIntegrators(); j++)
		    {
		      const BilinearFormIntegrator & bfi = *parts[j];

		      if (!bfi.BoundaryForm()) continue;
	      
		      //  elmat = 
		      //	      bfi.AssembleMixedElementMatrix (fel1, fel2, eltrans, lh);
		      /*
			fespace.Transform (i, true, elmat, TRANSFORM_MAT_RIGHT);
			fespace2.Transform (i, true, elmat, TRANFORM_MAT_LEFT);
		      */
		      /*
			dynamic_cast<SparseSystemMatrixRectangle<SysVector1d, SysVector1d> *> (mat)
			-> AddMixedElementMatrix (dnums2, dnums1, elmat);
		      */
		    }
		}

	    // if (print)
	    //	(*testout) << "mat = " << (*mat) << endl;

	    cout << endl;
#endif
	  }
	//  cout << "done" << endl;
	//  WriteMatrix (*testout);
      
      }
    catch (Exception & e)
      {
	e.Append (string ("in Assemble BilinearForm '") + 
		  GetName() + string("'\n"));
	throw;
      }
    catch (exception & e)
      {
	throw (Exception (string(e.what()) +
			  string("\n in Assemble BilinearForm '" + GetName() + "'\n")));
      }
  }




  template <class SCAL>
  void S_BilinearForm<SCAL> :: ComputeInternal (BaseVector & u, LocalHeap & lh) const
  {
    if (!eliminate_internal) return;
    if (!linearform)
      throw Exception ("ComputeInternal needs a linear-form");


    try
      {
	ma.PushStatus ("Compute Internal");

	ARRAY<int> dnums, idofs;
	ElementTransformation eltrans;
      
	int ne = ma.GetNE();

	bool hasinner = 0;

	for (int j = 0; j < NumIntegrators(); j++)
	  {
	    if (!parts[j] -> BoundaryForm())
	      hasinner = 1;
	  }	  
	 
	clock_t prevtime = clock();
	if (hasinner)
	  {
	    for (int i = 0; i < ne; i++)
	      {
		if (clock()-prevtime > 0.1 * CLOCKS_PER_SEC)
		  {
		    cout << "\rcompute internal element " << i << "/" << ne << flush;
		    ma.SetThreadPercentage ( 100.0*i / ne );
		    prevtime = clock();
		  }
	      
		lh.CleanUp();
	      
		if (!fespace.DefinedOn (ma.GetElIndex (i))) continue;
		  
		const FiniteElement & fel = fespace.GetFE (i, lh);
	      
		ma.GetElementTransformation (i, eltrans, lh);
		fespace.GetDofNrs (i, dnums);
	      
	      
		FlatMatrix<SCAL> sum_elmat(dnums.Size()*fespace.GetDimension(), lh);
		sum_elmat = 0;
		for (int j = 0; j < NumIntegrators(); j++)
		  {
		    const BilinearFormIntegrator & bfi = *parts[j];
		  
		    if (bfi.BoundaryForm()) continue;
		    if (!bfi.DefinedOn (ma.GetElIndex (i))) continue;
		  
		    FlatMatrix<SCAL> elmat;
		    bfi.AssembleElementMatrix (fel, eltrans, elmat, lh);
		  
		    sum_elmat += elmat;
		  }
	      
		fespace.TransformMat (i, false, sum_elmat, TRANSFORM_MAT_LEFT_RIGHT);
	      
		fel.GetInternalDofs(idofs);

		if (idofs.Size())
		  {
		    int dim = sum_elmat.Height() / dnums.Size();
		  
		    FlatVector<SCAL> elf (sum_elmat.Height(), lh);
		    FlatVector<SCAL> elu (sum_elmat.Height(), lh);
		    FlatMatrix<SCAL> ai(dim*idofs.Size(), lh);
		    FlatVector<SCAL> resi(dim*idofs.Size(), lh);
		    FlatVector<SCAL> wi(dim*idofs.Size(), lh);
		  
		    linearform->GetVector().GetIndirect (dnums, elf);
		    u.GetIndirect (dnums, elu);
		  

		    // compute residuum
		    elf -= sum_elmat * elu;

		    for (int jj = 0; jj < idofs.Size(); jj++)
		      for (int j = 0; j < dim; j++)
			{
			  for (int kk = 0; kk < idofs.Size(); kk++)
			    for (int k = 0; k < dim; k++)
			      {
				ai(jj*dim+j, kk*dim+k) =
				  sum_elmat(idofs[jj]*dim+j, idofs[kk]*dim+k);
			      }
			  resi(jj*dim+j) = elf(idofs[jj]*dim+j);
			}
		  
		    CholeskyFactors<SCAL> inv_ai(ai);
		    inv_ai.Mult (resi, wi);

		    for (int jj = 0; jj < idofs.Size(); jj++)
		      for (int j = 0; j < dim; j++)
			elu(idofs[jj]*dim+j) += wi(jj*dim+j);
		  
		    u.SetIndirect (dnums, elu);
		  }
	      }
	    cout << "\rcompute internal element " << ne << "/" << ne << endl;
	  }
	  
	ma.SetThreadPercentage ( 100.0 );
      
	ma.PopStatus ();
      }
    catch (Exception & e)
      {
	stringstream ost;
	ost << "in ComputeInternal" << endl;
	e.Append (ost.str());
	throw;
      }
    catch (exception & e)
      {
	throw (Exception (string(e.what()) +
			  string("\n in ComputeInternal\n")));
      }
  }










  template <class SCAL>
  void S_BilinearForm<SCAL> :: AssembleLinearization (const BaseVector & lin,
						      LocalHeap & lh, 
						      bool reallocate)
  {
    try
      {
	ARRAY<int> dnums;
	ElementTransformation eltrans;
      
	int ndof = fespace.GetNDof();
	BitArray useddof(ndof);
	useddof.Clear();
      
	int ne = ma.GetNE();
      
	BaseMatrix & mat = GetMatrix();
	mat = 0.0;
      
	cout << "Assemble linearization" << endl;
      
      
	//	  LocalHeap lh (5000000);
      
	bool hasbound = 0;
	bool hasinner = 0;
      
	for (int j = 0; j < NumIntegrators(); j++)
	  {
	    if (parts[j] -> BoundaryForm())
	      hasbound = 1;
	    else
	      hasinner = 1;
	  }
      
	if (hasinner)
	  {
	    for (int i = 0; i < ne; i++)
	      {
		if (i % 10 == 0)
		  cout << "\rassemble element " << i << "/" << ne << flush;
		lh.CleanUp();
	      
		if (!fespace.DefinedOn (ma.GetElIndex (i))) continue;
	      
		const FiniteElement & fel = fespace.GetFE (i, lh);
		ma.GetElementTransformation (i, eltrans, lh);
		fespace.GetDofNrs (i, dnums);
	      
		for (int j = 0; j < dnums.Size(); j++)
		  if (dnums[j] != -1)
		    useddof.Set (dnums[j]);
	      
		FlatMatrix<SCAL> sum_elmat(dnums.Size()*fespace.GetDimension(), lh);
		sum_elmat = 0;

		FlatVector<SCAL> elveclin (dnums.Size()*fespace.GetDimension(), lh);
		lin.GetIndirect (dnums, elveclin);
		fespace.TransformVec (i, false, elveclin, TRANSFORM_SOL);

		for (int j = 0; j < NumIntegrators(); j++)
		  {
		    const BilinearFormIntegrator & bfi = *parts[j];
		  
		    if (bfi.BoundaryForm()) continue;
		    if (!bfi.DefinedOn (ma.GetElIndex (i))) continue;
		  
		    FlatMatrix<SCAL> elmat;
		    try
		      {
			bfi.AssembleLinearizedElementMatrix (fel, eltrans, elveclin, elmat, lh);
			//////////////////////////////////////////////////
			/*		      
			//		      cout << " material: " << ma.GetElMaterial(i) << endl;
		      			
			cout << " assemble linearization, elmat: " << endl;
			cout << elmat << endl;

			FlatMatrix<SCAL> new_elmat(dnums.Size()*fespace.GetDimension(), lh);
			FlatVector<SCAL> e(dnums.Size()*fespace.GetDimension(), lh);
			FlatVector<SCAL> temp(dnums.Size()*fespace.GetDimension(), lh);
			FlatVector<SCAL> y1(dnums.Size()*fespace.GetDimension(), lh);
			FlatVector<SCAL> y2(dnums.Size()*fespace.GetDimension(), lh);

			double eps = 1e-5;
			for ( int ii=0; ii<e.Size(); ii++ )
			{
			e = 0;
			e(ii) = 1;
			temp = elveclin + eps*e;
			bfi.ApplyElementMatrix(fel,eltrans,temp,y1,lh);
			bfi.ApplyElementMatrix(fel,eltrans,elveclin,y2,lh);
			temp = y1-y2;
			for ( int jj=0; jj<new_elmat.Width(); jj++ ) new_elmat(jj,ii) = temp(jj)/eps;
			}

			cout << " elmat by num. diff:" << endl << new_elmat << endl;
			*/
			//////////////////////////////////////////////////

		      }
		    catch (Exception & e)
		      {
			e.Append (string("in Assemble Element Mat, bfi = ") + 
				  bfi.Name() + string("\n"));
			throw;
		      }
		    catch (exception & e)
		      {
			throw (Exception (string(e.what()) +
					  string("in Assemble Element Mat, bfi = ") + 
					  bfi.Name() + string("\n")));
		      }
		  
		    sum_elmat += elmat;
		  }
	      
		fespace.TransformMat (i, false, sum_elmat, TRANSFORM_MAT_LEFT_RIGHT);
		AddElementMatrix (dnums, dnums, sum_elmat, lh);
	      }
	    cout << "\rassemble element " << ne << "/" << ne << endl;
	  }
      
	// (*testout) << "Assemble Mat, vol, mat = " << endl << GetMatrix() << endl;
      
	int nse = ma.GetNSE();
	if (hasbound)
	  {
	    for (int i = 0; i < nse; i++)
	      {
		if (i % 100 == 0)
		  cout << "\rassemble surface element " << i << "/" << nse << flush;
		lh.CleanUp();
	      
		if (!fespace.DefinedOnBoundary (ma.GetSElIndex (i))) continue;
	      
		const FiniteElement & fel = fespace.GetSFE (i, lh);
	      
		ma.GetSurfaceElementTransformation (i, eltrans, lh);
		fespace.GetSDofNrs (i, dnums);
	      
		for (int j = 0; j < dnums.Size(); j++)
		  if (dnums[j] != -1)
		    useddof.Set (dnums[j]);

		FlatVector<SCAL> elveclin (dnums.Size()*fespace.GetDimension(), lh);
		lin.GetIndirect (dnums, elveclin);
		fespace.TransformVec (i, true, elveclin, TRANSFORM_SOL);
	      
		for (int j = 0; j < NumIntegrators(); j++)
		  {
		    const BilinearFormIntegrator & bfi = *parts[j];
		  
		    if (!bfi.BoundaryForm()) continue;

		    FlatMatrix<SCAL> elmat;
		  
		    bfi.AssembleLinearizedElementMatrix (fel, eltrans, elveclin, elmat, lh);
		  	  
		    fespace.TransformMat (i, true, elmat, TRANSFORM_MAT_LEFT_RIGHT);
		    AddElementMatrix (dnums, dnums, elmat, lh);
		  }
	      }
	    cout << "\rassemble surface element " << nse << "/" << nse << endl;	  
	  }
      
      
	if (fespace.specialelements.Size())
	  cout << "special elements: " << fespace.specialelements.Size() << endl;
	for (int i = 0; i < fespace.specialelements.Size(); i++)
	  {
	    lh.CleanUp();
	  
	    const SpecialElement & el = *fespace.specialelements[i];
	    el.GetDofNrs (dnums);
	  
	    for (int j = 0; j < dnums.Size(); j++)
	      if (dnums[j] != -1)
		useddof.Set (dnums[j]);
	  
	    FlatMatrix<SCAL> elmat;
	    el.Assemble (elmat, lh);
	  
	    AddElementMatrix (dnums, dnums, elmat, lh);
	  }
      
      
	//	  fespace.LockSomeDofs (GetMatrix());
      
      
	// add eps to avoid empty lines
	FlatMatrix<SCAL> elmat (fespace.GetDimension(), lh);
	elmat = 0;
	dnums.SetSize(1);
      
	void * p = lh.GetPointer();
	if (eps_regularization != 0)
	  {
	    for (int i = 0; i < elmat.Height(); i++)
	      elmat(i, i) = eps_regularization;
	    for (int i = 0; i < ndof; i++)
	      {
		lh.CleanUp (p);
		dnums[0] = i; 
		AddElementMatrix (dnums, dnums, elmat, lh);
	      }
	  }
	if (unuseddiag != 0)
	  {
	    for (int i = 0; i < elmat.Height(); i++)
	      elmat(i, i) = unuseddiag;
	    for (int i = 0; i < ndof; i++)
	      if (!useddof.Test (i))
		{
		  // (*testout) << "unused: " << i << endl;
		  lh.CleanUp (p);
		  dnums[0] = i;
		  AddElementMatrix (dnums, dnums, elmat, lh);
		}
	  }
      
	//	  (*testout) << "mat = " << endl << GetMatrix() << endl;
	/*
	  if (mat.Height() < 100)
	  mat.Print (cout);
	*/
	int cntused = 0;
	for (int i = 0; i < useddof.Size(); i++)
	  if (useddof.Test(i))
	    cntused++;
	cout << "used " << cntused
	     << ", unused = " << useddof.Size()-cntused
	     << ", total = " << useddof.Size() << endl;
  
      }
    catch (Exception & e)
      {
	stringstream ost;
	ost << "in Assemble BilinearForm" << endl;
	e.Append (ost.str());
	throw;
      }
    catch (exception & e)
      {
	throw (Exception (string(e.what()) +
			  string("\n in Assemble BilinearForm\n")));
      }
  }









  template <class SCAL>
  void S_BilinearForm<SCAL> :: AddMatrix1 (SCAL val,
					   const BaseVector & x,
					   BaseVector & y) const
  {
    if (!MixedSpaces())

      {
	ARRAY<int> dnums;
	ElementTransformation eltrans;
      
	int ne = ma.GetNE();
      
	LocalHeap lh (20000000);

	int hasbound = 0;
	int hasinner = 0;

	for (int j = 0; j < NumIntegrators(); j++)
	  {
	    const BilinearFormIntegrator & bfi = *GetIntegrator(j);
	    if (bfi.BoundaryForm())
	      hasbound = 1;
	    else
	      hasinner = 1;
	  }


	int cnt1 = 0, cnt2 = 0;
	if (hasinner)
	  for (int i = 0; i < ne; i++)
	    {
	      lh.CleanUp();

	      if (!fespace.DefinedOn (ma.GetElIndex (i))) continue;
	  
	      const FiniteElement & fel = fespace.GetFE (i, lh);
	      ma.GetElementTransformation (i, eltrans, lh);
	      fespace.GetDofNrs (i, dnums);
	  
	      FlatVector<SCAL> elvecx (dnums.Size() * GetFESpace().GetDimension(), 
				       lh);
	      FlatVector<SCAL> elvecy (dnums.Size() * GetFESpace().GetDimension(), 
				       lh);

	      x.GetIndirect (dnums, elvecx);
	      fespace.TransformVec (i, false, elvecx, TRANSFORM_SOL);

	      for (int j = 0; j < NumIntegrators(); j++)
		{
		  const BilinearFormIntegrator & bfi = *parts[j];

		  if (bfi.BoundaryForm()) continue;
		  cnt1++;
		  if (!bfi.DefinedOn (ma.GetElIndex (i))) continue;
		  cnt2++;

		  bfi.ApplyElementMatrix (fel, eltrans, elvecx, elvecy, lh);

		  fespace.TransformVec (i, false, elvecy, TRANSFORM_RHS);

		  elvecy *= val;
		  y.AddIndirect (dnums, elvecy);
		}
	    }
	// cout << "apply on " << cnt2 << "/" << cnt1 << endl;

	int nse = ma.GetNSE();
	if (hasbound)
	  for (int i = 0; i < nse; i++)
	    {
	      lh.CleanUp();
	      const FiniteElement & fel = fespace.GetSFE (i, lh);
	    
	      ma.GetSurfaceElementTransformation (i, eltrans, lh);
	      fespace.GetSDofNrs (i, dnums);
	    
	      FlatVector<SCAL> elvecx (dnums.Size() * GetFESpace().GetDimension(), lh);
	      FlatVector<SCAL> elvecy (dnums.Size() * GetFESpace().GetDimension(), lh);
	    
	      x.GetIndirect (dnums, elvecx);
	    
	      fespace.TransformVec (i, true, elvecx, TRANSFORM_SOL);
	  
	      for (int j = 0; j < NumIntegrators(); j++)
		{
		  const BilinearFormIntegrator & bfi = *parts[j];
		
		  if (!bfi.BoundaryForm()) continue;
		  //		if (!bfi.DefinedOn (eltrans.GetElementIndex())) continue;
	      
		  bfi.ApplyElementMatrix (fel, eltrans, elvecx, elvecy, lh);
		  fespace.TransformVec (i, true, elvecy, TRANSFORM_RHS);

		  elvecy *= val;
		  y.AddIndirect (dnums, elvecy);
		}
	    }

	for (int i = 0; i < fespace.specialelements.Size(); i++)
	  {
	    lh.CleanUp();
	    const SpecialElement & el = *fespace.specialelements[i];
	    el.GetDofNrs (dnums);
	  
	    FlatVector<SCAL> elvecx (dnums.Size() * GetFESpace().GetDimension(), lh);
	    FlatVector<SCAL> elvecy (dnums.Size() * GetFESpace().GetDimension(), lh);

	    x.GetIndirect (dnums, elvecx);
	    el.Apply (elvecx, elvecy, lh);
	    elvecy *= val;
	    y.AddIndirect (dnums, elvecy);
	  }



      }
    else
      { 
	cout << "apply not implemented for mixed" << endl;
      }
  }






  template <class SCAL>
  void S_BilinearForm<SCAL> :: ApplyLinearizedMatrixAdd1 (SCAL val,
							  const BaseVector & lin,
							  const BaseVector & x,
							  BaseVector & y) const
  {
    if (!MixedSpaces())

      {
	/*
	  (*testout) << "val = " << val << endl;
	  (*testout) << "applylin, lin = " << endl << lin << endl;
	  (*testout) << "global x = " << endl << x << endl;
	  (*testout) << "global y,in = " << endl << y << endl;
	*/
	ARRAY<int> dnums;
	ElementTransformation eltrans;
      
	int ne = ma.GetNE();
	int dim = GetFESpace().GetDimension(); 
	LocalHeap lh (2000000);

	int hasbound = 0;
	int hasinner = 0;

	for (int j = 0; j < NumIntegrators(); j++)
	  {
	    const BilinearFormIntegrator & bfi = *GetIntegrator(j);
	    if (bfi.BoundaryForm())
	      hasbound = 1;
	    else
	      hasinner = 1;
	  }


	if (hasinner)
	  for (int i = 0; i < ne; i++)
	    {
	      lh.CleanUp();
	  
	      const FiniteElement & fel = fespace.GetFE (i, lh);
	      ma.GetElementTransformation (i, eltrans, lh);
	      fespace.GetDofNrs (i, dnums);
	  
	      FlatVector<SCAL> elveclin (dnums.Size() * dim, lh);
	      FlatVector<SCAL> elvecx (dnums.Size() * dim, lh);
	      FlatVector<SCAL> elvecy (dnums.Size() * dim, lh);

	      lin.GetIndirect (dnums, elveclin);
	      fespace.TransformVec (i, false, elveclin, TRANSFORM_SOL);

	      x.GetIndirect (dnums, elvecx);
	      fespace.TransformVec (i, false, elvecx, TRANSFORM_SOL);

	      for (int j = 0; j < NumIntegrators(); j++)
		{
		  const BilinearFormIntegrator & bfi = *parts[j];

		  if (bfi.BoundaryForm()) continue;
		  if (!bfi.DefinedOn (ma.GetElIndex (i))) continue;


		  bfi.ApplyLinearizedElementMatrix (fel, eltrans, elveclin, elvecx, elvecy, lh);

		  fespace.TransformVec (i, false, elvecy, TRANSFORM_RHS);

		  elvecy *= val;

		  y.AddIndirect (dnums, elvecy);
		}
	    }

	int nse = ma.GetNSE();
	if (hasbound)
	  for (int i = 0; i < nse; i++)
	    {
	      lh.CleanUp();
	      const FiniteElement & fel = fespace.GetSFE (i, lh);
	    
	      ma.GetSurfaceElementTransformation (i, eltrans, lh);
	      fespace.GetSDofNrs (i, dnums);
	    
	      FlatVector<SCAL> elveclin (dnums.Size() * dim, lh);
	      FlatVector<SCAL> elvecx (dnums.Size() * dim, lh);
	      FlatVector<SCAL> elvecy (dnums.Size() * dim, lh);
	    
	      lin.GetIndirect (dnums, elveclin);
	      fespace.TransformVec (i, true, elveclin, TRANSFORM_SOL);
	      x.GetIndirect (dnums, elvecx);
	      fespace.TransformVec (i, true, elvecx, TRANSFORM_SOL);
	  
	      for (int j = 0; j < NumIntegrators(); j++)
		{
		  const BilinearFormIntegrator & bfi = *parts[j];
		
		  if (!bfi.BoundaryForm()) continue;
		  if (!bfi.DefinedOn (eltrans.GetElementIndex())) continue;
	      
		  bfi.ApplyLinearizedElementMatrix (fel, eltrans, elveclin, elvecx, elvecy, lh);
		  fespace.TransformVec (i, true, elvecy, TRANSFORM_RHS);
		  elvecy *= val;
		  y.AddIndirect (dnums, elvecy);
		}
	    }

	for (int i = 0; i < fespace.specialelements.Size(); i++)
	  {
	    lh.CleanUp();
	    const SpecialElement & el = *fespace.specialelements[i];
	    el.GetDofNrs (dnums);
	  
	    FlatVector<SCAL> elvecx (dnums.Size() * dim, lh);
	    FlatVector<SCAL> elvecy (dnums.Size() * dim, lh);

	    x.GetIndirect (dnums, elvecx);
	    el.Apply (elvecx, elvecy, lh);

	    elvecy *= val;
	    y.AddIndirect (dnums, elvecy);
	  }
      }
    else
      { 
	cout << "apply not implemented for mixed" << endl;
      }
  }







  template <class SCAL>
  double S_BilinearForm<SCAL> :: Energy (const BaseVector & x) const
  {
    double energy = 0;

    if (!MixedSpaces())
      {
	ARRAY<int> dnums;
	ElementTransformation eltrans;

	int ne = ma.GetNE();
      
	LocalHeap lh (2000000);

	int hasbound = 0;
	int hasinner = 0;

	for (int j = 0; j < NumIntegrators(); j++)
	  {
	    const BilinearFormIntegrator & bfi = *GetIntegrator(j);
	    if (bfi.BoundaryForm())
	      hasbound = 1;
	    else
	      hasinner = 1;
	  }

	if (hasinner)
	  for (int i = 0; i < ne; i++)
	    {
	      lh.CleanUp();
	  
	      const FiniteElement & fel = fespace.GetFE (i, lh);
	      ma.GetElementTransformation (i, eltrans, lh);
	      fespace.GetDofNrs (i, dnums);
	  
	      FlatVector<SCAL> elvecx (dnums.Size() * GetFESpace().GetDimension(), 
				       lh);
	      FlatVector<SCAL> elvecy (dnums.Size() * GetFESpace().GetDimension(), 
				       lh);

	      x.GetIndirect (dnums, elvecx);
	      fespace.TransformVec (i, false, elvecx, TRANSFORM_SOL);

	      for (int j = 0; j < NumIntegrators(); j++)
		{
		  const BilinearFormIntegrator & bfi = *parts[j];

		  if (bfi.BoundaryForm()) continue;
		  energy += bfi.Energy (fel, eltrans, elvecx, lh);
		}
	    }

	int nse = ma.GetNSE();
	if (hasbound)
	  for (int i = 0; i < nse; i++)
	    {
	      lh.CleanUp();
	      const FiniteElement & fel = fespace.GetSFE (i, lh);
	    
	      ma.GetSurfaceElementTransformation (i, eltrans, lh);
	      fespace.GetSDofNrs (i, dnums);
	    
	      FlatVector<SCAL> elvecx (dnums.Size() * GetFESpace().GetDimension(), lh);
	      x.GetIndirect (dnums, elvecx);
	      fespace.TransformVec (i, true, elvecx, TRANSFORM_SOL);
	  
	      for (int j = 0; j < NumIntegrators(); j++)
		{
		  const BilinearFormIntegrator & bfi = *parts[j];
		
		  if (!bfi.BoundaryForm()) continue;
		  energy += bfi.Energy (fel, eltrans, elvecx, lh);
		}
	    }

	for (int i = 0; i < fespace.specialelements.Size(); i++)
	  {
	    lh.CleanUp();

	    const SpecialElement & el = *fespace.specialelements[i];
	    el.GetDofNrs (dnums);

	    FlatVector<SCAL> elvecx (dnums.Size() * GetFESpace().GetDimension(), lh);
	    x.GetIndirect (dnums, elvecx);
	  
	    energy += el.Energy (elvecx, lh);
	  }
      }
    return energy;
  }



  template class S_BilinearForm<double>;
  template class S_BilinearForm<Complex>;

  template <class TM>
  T_BilinearForm<TM>::
  T_BilinearForm (const FESpace & afespace, const string & aname, const Flags & flags)
    : S_BilinearForm<TSCAL> (afespace, aname, flags)
  { 
    if (&this->fespace.LowOrderFESpace())
      this->low_order_bilinear_form = 
	new T_BilinearForm<TM> (this->fespace.LowOrderFESpace(),
				aname+string(" low-order"), flags);
  }

  template <class TM>
  T_BilinearForm<TM>::
  T_BilinearForm (const FESpace & afespace, 
		  const FESpace & afespace2,
		  const string & aname,
		  const Flags & flags)
    : S_BilinearForm<TSCAL> (afespace, afespace2, aname, flags) 
  {
    ;
  }

  template <class TM>
  T_BilinearForm<TM>::
  ~T_BilinearForm ()
  {
    for (int i = 0; i < this->mats.Size(); i++)
      delete this->mats[i];
  }



  template <class TM>
  void T_BilinearForm<TM>::
  AllocateMatrix ()
  {
    if (this->mats.Size() == this->ma.GetNLevels())
      return;

    MatrixGraph * graph =
      const_cast<FESpace&>(this->fespace).GetGraph (this->ma.GetNLevels()-1, false);

    //  graphs.Append (graph);
    this->mats.Append (new SparseMatrix<TM> (*graph));
    delete graph;

    if (!this->multilevel || this->low_order_bilinear_form)
      for (int i = 0; i < this->mats.Size()-1; i++)
	{
	  delete this->mats[i];
	  this->mats[i] = 0;
	  //	delete graphs[i];
	  //	graphs[i] = 0;
	}
  }



  template <class TM>
  BaseVector * T_BilinearForm<TM>::
  CreateVector() const
  {
    return new VVector<TV_COL> (this->fespace.GetNDof());
  }

  
  ///
  template <class TM>
  void T_BilinearForm<TM>::
  AddElementMatrix (const ARRAY<int> & dnums1,
		    const ARRAY<int> & dnums2,
		    const FlatMatrix<TSCAL> & elmat,
		    LocalHeap & lh) 
  {
    TMATRIX & mat = dynamic_cast<TMATRIX&> (*this->mats.Last());
    //FlatArray<int> colpos(dnums2.Size(), lh);

    for (int i = 0; i < dnums1.Size(); i++)
      for (int j = 0; j < dnums2.Size(); j++)
	if (dnums1[i] != -1 && dnums2[j] != -1)
	  {
	    TM & mij = mat(dnums1[i], dnums2[j]);
	  
	    int hi = Height(mij);
	    int wi = Width(mij);
	  
	    for (int k = 0; k < hi; k++)
	      for (int l = 0; l < wi; l++)
		mij(k,l) += elmat(i*hi+k, j*wi+l);
	  }
  }




  ///
  template <> void T_BilinearForm<double>::
  AddElementMatrix (const ARRAY<int> & dnums1,
		    const ARRAY<int> & dnums2,
		    const FlatMatrix<double> & elmat,
		    LocalHeap & lh) 
  {
    TMATRIX & mat = dynamic_cast<TMATRIX&>(*this->mats.Last());
    
    /*
    int n2 = dnums2.Size();
    if (n2 < 10)
      {
	void * heap = lh.GetPointer();
      
	FlatArray<int> colpos(n2, lh);
	FlatArray<int> colpos2(n2, lh);
	FlatArray<int> map(n2, lh);
      
	for (int j = 0; j < n2; j++)
	  {
	    colpos[j] = dnums2[j];
	    map[j] = j;
	  }
	for (int i = 0; i < n2; i++)
	  for (int j = 1; j < n2-i; j++)
	    if (colpos[j-1] > colpos[j])
	      {
		Swap (colpos[j-1], colpos[j]);
		Swap (map[j-1], map[j]);
	      }
      
	for (int i = 0; i < dnums1.Size(); i++)
	  {
	    for (int j = 0; j < n2; j++)
	      colpos2[j] = colpos[j];

	    mat.GetPositionsSorted (dnums1[i], n2,
				    &colpos2[0]);
	  
	    for (int j = 0; j < n2; j++)
	      mat[colpos2[j]] += elmat(i, map[j]);
	  }
      
	lh.CleanUp (heap);
      }
    else
    */
      {
	for (int i = 0; i < dnums1.Size(); i++)
	  for (int j = 0; j < dnums2.Size(); j++)
	    if (dnums1[i] != -1 && dnums2[j] != -1)
	      mat(dnums1[i], dnums2[j]) += elmat(i, j);
      }
  }

  template <> void T_BilinearForm<Complex>::
  AddElementMatrix (const ARRAY<int> & dnums1,
		    const ARRAY<int> & dnums2,
		    const FlatMatrix<Complex> & elmat,
		    LocalHeap & lh)
  {
    TMATRIX & mat = dynamic_cast<TMATRIX&> (*this->mats.Last());

    for (int i = 0; i < dnums1.Size(); i++)
      for (int j = 0; j < dnums2.Size(); j++)
	if (dnums1[i] != -1 && dnums2[j] != -1)
	  mat(dnums1[i], dnums2[j]) += elmat(i, j);

  }








  template <class TM>
  T_BilinearFormSymmetric<TM> :: 
  T_BilinearFormSymmetric (const FESpace & afespace, const string & aname,
			   const Flags & flags)
    : S_BilinearForm<TSCAL> (afespace, aname, flags) 
  { 
    if (&this->fespace.LowOrderFESpace())
      this->low_order_bilinear_form = 
	new T_BilinearFormSymmetric<TM> (this->fespace.LowOrderFESpace(),
					 aname+string(" low-order"), flags);
  }

  template <class TM>
  T_BilinearFormSymmetric<TM> :: 
  ~T_BilinearFormSymmetric ()
  {
    for (int i = 0; i < this->mats.Size(); i++)
      delete this->mats[i];
  }

  ///
  template <class TM>
  void T_BilinearFormSymmetric<TM> :: 
  AllocateMatrix ()
  {
    if (this->mats.Size() == this->ma.GetNLevels())
      return;

    MatrixGraph * graph = 
      const_cast<FESpace&>(this->fespace).GetGraph (this->ma.GetNLevels()-1, true);


    // graphs.Append (graph);
    this->mats.Append (new SparseMatrixSymmetric<TM> (*graph));
    delete graph;

    if (!this->multilevel || this->low_order_bilinear_form)
      for (int i = 0; i < this->mats.Size()-1; i++)
	{
	  delete this->mats[i];
	  this->mats[i] = 0;
	  // delete graphs[i];
	  // graphs[i] = 0;
	}
  }

  template <class TM>
  BaseVector * T_BilinearFormSymmetric<TM> :: 
  CreateVector() const
  {
    return new VVector<TV_COL> (this->fespace.GetNDof());
  }

  ///
  template <class TM>
  void T_BilinearFormSymmetric<TM> :: 
  AddElementMatrix (const ARRAY<int> & dnums1,
		    const ARRAY<int> & dnums2,
		    const FlatMatrix<TSCAL> & elmat,
		    LocalHeap & lh) 
  {
    TMATRIX & mat = dynamic_cast<TMATRIX&> (*this->mats.Last());
  
    for (int i = 0; i < dnums1.Size(); i++)
      for (int j = 0; j < dnums2.Size(); j++)
	if (dnums1[i] != -1 && dnums2[j] != -1 &&
	    dnums1[i] >= dnums2[j])
	  {
	    TM & mij = mat(dnums1[i], dnums2[j]);
	    int hi = Height (mij);
	    int wi = Width (mij);
	  
	    for (int k = 0; k < hi; k++)
	      for (int l = 0; l < wi; l++)
		mij(k,l) += elmat(i*hi+k, j*wi+l);
	  }
  }




  ///
  template <> void T_BilinearFormSymmetric<double>::
  AddElementMatrix (const ARRAY<int> & dnums1,
		    const ARRAY<int> & dnums2,
		    const FlatMatrix<double> & elmat,
		    LocalHeap & lh) 
  {
    TMATRIX & mat = dynamic_cast<TMATRIX&> (GetMatrix()); // *mats.Last());

    for (int i = 0; i < dnums1.Size(); i++)
      for (int j = 0; j < dnums2.Size(); j++)
	if (dnums1[i] != -1 && dnums2[j] != -1 && 
	    dnums1[i] >= dnums2[j])
	  mat(dnums1[i], dnums2[j]) += elmat(i, j);
  }

  template <> void T_BilinearFormSymmetric<Complex>::
  AddElementMatrix (const ARRAY<int> & dnums1,
		    const ARRAY<int> & dnums2,
		    const FlatMatrix<Complex> & elmat,
		    LocalHeap & lh) 
  {
    TMATRIX & mat = dynamic_cast<TMATRIX&> (*mats.Last());
  
    for (int i = 0; i < dnums1.Size(); i++)
      for (int j = 0; j < dnums2.Size(); j++)
	if (dnums1[i] != -1 && dnums2[j] != -1 && 
	    dnums1[i] >= dnums2[j])
	  mat(dnums1[i], dnums2[j]) += elmat(i, j);
  }
















  BilinearForm * CreateBilinearForm (const FESpace * space,
				     const string & name,
				     const Flags & flags)
  {
    BilinearForm * bf = 0;

    if (flags.GetDefineFlag ("symmetric"))
      {
	CreateSymMatObject3 (bf, T_BilinearFormSymmetric, 
			     space->GetDimension(), space->IsComplex(),   
			     *space, name, flags);
      }
    else
      {
	CreateSymMatObject3 (bf, T_BilinearForm, 
			     space->GetDimension(), space->IsComplex(),   
			     *space, name, flags);
      }

    bf -> SetNonAssemble (flags.GetDefineFlag ("nonassemble"));
    bf -> SetDiagonal (flags.GetDefineFlag ("diagonal"));
    if (flags.GetDefineFlag ("nonsym")) bf -> SetSymmetric (0);
    if (flags.GetDefineFlag ("nonmultilevel")) bf -> SetMultiLevel (0);
    bf -> SetHermitean (flags.GetDefineFlag ("hermitean"));
    bf-> SetUnusedDiag (flags.GetNumFlag ("unuseddiag",1));
  
    bf -> SetPrint (flags.GetDefineFlag ("print"));
    bf -> SetPrintElmat (flags.GetDefineFlag ("printelmat"));
    bf -> SetElmatEigenValues (flags.GetDefineFlag ("elmatev"));

    if (flags.GetDefineFlag ("timing")) bf->SetTiming (1);
    if (flags.GetDefineFlag ("eliminate_internal")) bf->SetEliminateInternal (1);

    return bf;
  }







  /*

  void BilinearForm :: AllocateMatrix ()
  {
  //  cout << "graphs.s = " << graphs.Size() << ", levels = " << ma.GetNLevels() << endl;
  
  if (graphs.Size() == ma.GetNLevels())
  return;

  int ndof = fespace.GetNDof();
  int ne = ma.GetNE();
  int nse = ma.GetNSE();

  MatrixGraph * graph;


  // new version
  graph = const_cast<FESpace&>(fespace).GetGraph (ma.GetNLevels());
  graphs.Append (graph);

  mats.Append (new SparseMatrix<Mat<1,1> > (*graph));

  if (diagonal)
  {
  BaseMatrix * mat;
  switch (fespace.GetDimension())
  {
  case 1:
  mat = new DiagonalSystemMatrix<SysMatrix1d, SysVector1d> (ndof);
  break;
  case 2:
  mat = new DiagonalSystemMatrix<SysMatrix2d, SysVector2d> (ndof);
  break;
  case 3:
  mat = new DiagonalSystemMatrix<SysMatrix3d, SysVector3d> (ndof);
  break;
  case 4:
  mat = new DiagonalSystemMatrix<SysMatrix4d, SysVector4d> (ndof);
  break;
  } 
  mats.Append (mat);
  return;
  }

  if (MixedSpaces())
  {
  int ndof2 = fespace2->GetNDof();

  IntTable connecteddofs(ndof2);
  ARRAY<int> linesize (ndof2);
  ARRAY<int> dnums1, dnums2;
      
  for (int i = 1; i <= ne; i++)
  {
  fespace.GetDofNrs (i, dnums1);
  fespace2->GetDofNrs (i, dnums2);

  for (int j = 1; j <= dnums2.Size(); j++)
  for (int k = 1; k <= dnums1.Size(); k++)
  connecteddofs.AddUnique (dnums2.Get(j), dnums1.Get(k));
  }
  for (int i = 1; i <= nse; i++)
  {
  fespace.GetSDofNrs (i, dnums1);
  fespace2->GetSDofNrs (i, dnums2);


  // 	  (*testout) << "dnums1 = ";
  // 	  for (int j = 1; j <= dnums1.Size(); j++)
  // 	    (*testout) << dnums1.Get(j) << " ";
  // 	  (*testout) << endl;
  // 	  (*testout) << "dnums2 = ";
  // 	  for (int j = 1; j <= dnums2.Size(); j++)
  // 	    (*testout) << dnums2.Get(j) << " ";
  // 	  (*testout) << endl;

  for (int j = 1; j <= dnums2.Size(); j++)
  for (int k = 1; k <= dnums1.Size(); k++)
  connecteddofs.AddUnique (dnums2.Get(j), dnums1.Get(k));
  }
      

  for (int i = 1; i <= ndof2; i++)
  connecteddofs.AddUnique (i, i);
      
  for (int i = 1; i <= ndof2; i++)
  linesize.Elem(i) = connecteddofs.EntrySize(i);
      
      
  graph = new MatrixGraph (linesize);
  graphs.Append (graph);
      
  for (int i = 1; i <= ne; i++)
  {
  fespace.GetDofNrs (i, dnums1);
  fespace2->GetDofNrs (i, dnums2);
	  
  for (int j = 1; j <= dnums2.Size(); j++)
  for (int k = 1; k <= dnums1.Size(); k++)
  graph->CreatePosition (dnums2.Get(j), dnums1.Get(k));
  }
  for (int i = 1; i <= nse; i++)
  {
  fespace.GetSDofNrs (i, dnums1);
  fespace2->GetSDofNrs (i, dnums2);
	  
  for (int j = 1; j <= dnums2.Size(); j++)
  for (int k = 1; k <= dnums1.Size(); k++)
  graph->CreatePosition (dnums2.Get(j), dnums1.Get(k));
  }

  //      for (int i = 1; i <= min2(ndof2, ndof); i++)
  //	graph->CreatePosition (i, i);


  }
  else
  {
  graph = const_cast<FESpace&>(fespace).GetGraph (ma.GetNLevels());
  graphs.Append (graph);
  }
      
  BaseMatrix * mat;
  cout << "complex = " << fespace.IsComplex() << endl;
  cout << "mixed = " << MixedSpaces() << endl;

  if (!MixedSpaces())
  {
  int sym = symmetric || hermitean;
  if (!fespace.IsComplex())
  {
  switch (fespace.GetDimension())
  {
  case 1:
  mat = new SparseSystemMatrix<SysMatrix1d, SysVector1d> (*graph, sym);
  break;
  case 2:
  mat = new SparseSystemMatrix<SysMatrix2d, SysVector2d> (*graph, sym);
  break;
  case 3:
  mat = new SparseSystemMatrix<SysMatrix3d, SysVector3d> (*graph, sym);
  break;
  case 4:
  mat = new SparseSystemMatrix<SysMatrix4d, SysVector4d> (*graph, sym);
  break;
  }
  }
  else
  {
  switch (fespace.GetDimension())
  {
  case 1:
  cout << "allocate 1d complex" << endl;
  mat = new ComplexSparseSystemMatrix<SysMatrixC1d, SysVectorC1d> (*graph, sym);
  break;
  case 2:
  mat = new ComplexSparseSystemMatrix<SysMatrixC2d, SysVectorC2d> (*graph, sym);
  break;
  case 4:
  mat = new ComplexSparseSystemMatrix<SysMatrixC4d, SysVectorC4d> (*graph, sym);
  break;
  }
  }
  }
  else
  {
  switch (fespace.GetDimension())
  {
  case 1:
  mat = new SparseSystemMatrixRectangle<SysVector1d, SysVector1d> 
  (fespace2->GetNDof(), fespace.GetNDof(), *graph);
  break;
  case 2:
  mat = new SparseSystemMatrixRectangle<SysVector1d, SysVector2d> 
  (fespace2->GetNDof(), fespace.GetNDof(), *graph);
  break;	  
  }
  //      (*testout) << "mat = " << (*mat) << endl;
  }

  mat -> SetSymmetric (symmetric);
  mat -> SetHermitean (hermitean);
  mats.Append (mat);
  }
  */



  void BilinearForm::WriteMatrix (ostream & ost) const
  {
    // Stefan's Pebbles format

    /*
      ofstream matfile ("helmholtz.re");
      ofstream matfileim ("helmholtz.im");

      SparseSystemMatrix<SysMatrixC1d,SysVectorC1d> & mata = 
      (SparseSystemMatrix<SysMatrixC1d,SysVectorC1d> &)
      *mats.Last();


      const MatrixGraph & graph = mata.GetGraph();
      int n = mata.Height();
      int row, col;
      int nne = 0;
  
      matfile << n << endl;
      matfileim << n << endl;

      for (row = 1; row <= n; row++)
      for (int j = 1; j <= graph.ElementsInLine(row); j++)
      {
      col = graph.GetColIndex (row, j);
      nne++;
      }
  
      matfile << nne << endl;
      matfile << 1 << endl;
      matfileim << nne << endl;
      matfileim << 1 << endl;
  
      for (row = 1; row <= n; row++)
      for (int j = 1; j <= graph.ElementsInLine(row); j++)
      {
      col = graph.GetColIndex (row, j);
      matfile << row << " " << col << " ";
      matfileim << row << " " << col << " ";

      if (row >= col)
      {
      matfile << mata.Elem (row, col).Elem(1) << endl;
      matfileim << mata.Elem (row, col).Elem(2) << endl;
      }
      else
      {
      matfile << mata.Elem (col, row).Elem(1) << endl;
      matfileim << mata.Elem (col, row).Elem(2) << endl;
      }
      }
    */
  }






  class ApplyFineMatrix : public BaseMatrix
  {
    const BaseMatrix & finemat;
    const ngmg::Prolongation & prol;
    int level;
    
  public:
    ApplyFineMatrix (const BaseMatrix & afinemat,
		     const ngmg::Prolongation & aprol,
		     int alevel);
  
    virtual void Mult (const BaseVector & x, BaseVector & y) const;
    virtual BaseVector * CreateVector () const;
  };

  ApplyFineMatrix :: 
  ApplyFineMatrix (const BaseMatrix & afinemat,
		   const ngmg::Prolongation & aprol,
		   int alevel)
    : finemat(afinemat), prol(aprol), level(alevel)
  {
    ;
  }

  void ApplyFineMatrix :: 
  Mult (const BaseVector & x, BaseVector & y) const
  {
    /*
      BaseVector & fx = *finemat.CreateVector();
      BaseVector & fy = *finemat.CreateVector();
  
      fx.SetScalar (0);
      fx.AddPart (1, 1, x);
      //  prol.ProlongateInline (level, fx);
      finemat.Mult (fx, fy);
      //  prol.RestrictInline (level, fy);
      fy.GetPart (1, y);

      delete &fx;
      delete &fy;
    */
    cout << "Apply Matrix currently not implemented" << endl;
  }

  BaseVector * ApplyFineMatrix :: CreateVector () const
  {
    cerr << "ApplyFineMatrix::CreateVector:  Need Help !!!" << endl;
    return NULL;
  }





  void BilinearForm :: GalerkinProjection ()
  {
    for (int i = GetNLevels()-1; i >= 1; i--)
      {
	ApplyFineMatrix afm (GetMatrix(i+1),
			     *GetFESpace().GetProlongation(),
			     i+1);
			   
	//      const_cast<BaseMatrix&> (GetMatrix(i)).MakeMatrixFromOperator (afm);
      }
  }



  BilinearFormApplication :: 
  BilinearFormApplication (const BilinearForm * abf)
    : bf (abf)
  {
    ;
  }

  void BilinearFormApplication :: 
  Mult (const BaseVector & v, BaseVector & prod) const
  {
    prod = 0;
    bf -> AddMatrix (1, v, prod);
  }

  void BilinearFormApplication :: 
  MultAdd (double val, const BaseVector & v, BaseVector & prod) const
  {
    bf -> AddMatrix (val, v, prod);
  }

  void BilinearFormApplication :: 
  MultAdd (Complex val, const BaseVector & v, BaseVector & prod) const
  {
    bf -> AddMatrix (val, v, prod);
  }

  /*
    void BilinearFormApplication :: 
    MultTransAdd (double val, const BaseVector & v, BaseVector & prod) const
    {
    bf -> AddMatrix (val, v, prod);
    }
  */


  BaseVector * BilinearFormApplication :: 
  CreateVector () const
  {
    return bf -> CreateVector();
  }




  LinearizedBilinearFormApplication ::
  LinearizedBilinearFormApplication (const BilinearForm * abf,
				     const BaseVector * aveclin)
    : BilinearFormApplication (abf), veclin(aveclin)
  {
    ;
  }

  void  LinearizedBilinearFormApplication :: 
  Mult (const BaseVector & v, BaseVector & prod) const
  {
    prod = 0;
    bf->ApplyLinearizedMatrixAdd (1, *veclin, v, prod);
  }

  void LinearizedBilinearFormApplication :: 
  MultAdd (double val, const BaseVector & v, BaseVector & prod) const
  {
    bf->ApplyLinearizedMatrixAdd (val, *veclin, v, prod);
  }

  void LinearizedBilinearFormApplication :: 
  MultAdd (Complex val, const BaseVector & v, BaseVector & prod) const
  {
    bf->ApplyLinearizedMatrixAdd (val, *veclin, v, prod);
  }


}
