LemmaSoftware
/
EMSchur3D


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942
							// ===========================================================================
//
//       Filename:  bicgstab.h
//
//    Description:
//
//        Version:  0.0
//        Created:  10/27/2009 03:15:06 PM
//       Revision:  none
//       Compiler:  g++ (c++)
//
//         Author:  Trevor Irons (ti)
//
//   Organisation:  Colorado School of Mines (CSM)
//                  United States Geological Survey (USGS)
//
//          Email:  tirons@mines.edu, tirons@usgs.gov
//
//  This program is free software: you can redistribute it and/or modify
//  it under the terms of the GNU General Public License as published by
//  the Free Software Foundation, either version 3 of the License, or
//  (at your option) any later version.
//
//  This program is distributed in the hope that it will be useful,
//  but WITHOUT ANY WARRANTY; without even the implied warranty of
//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
//  GNU General Public License for more details.
//
//  You should have received a copy of the GNU General Public License
//  along with this program.  If not, see <http://www.gnu.org/licenses/>.
//
// ===========================================================================
#pragma once

#ifndef  BICGSTAB_INC
#define  BICGSTAB_INC

#include <Eigen/Core>
#include <Eigen/Sparse>

#ifdef CHOLMODPRECONDITION
#include <Eigen/CholmodSupport>
#endif // CHOLMODPRECONDITION

//#include "unsupported/Eigen/IterativeSolvers"
//#include <unsupported/Eigen/SuperLUSupport>

#include <iostream>
#include <string>
#include <complex>
#include <fstream>
#include <LemmaCore>
#include "timer.h"

using namespace Eigen;
using namespace Lemma;

//typedef Eigen::VectorXcd VectorXcr;
typedef Eigen::SparseMatrix<Complex> SparseMat;


// On Input
// A = Matrix
// B = Right hand side
// X = initial guess, and solution
// maxit = maximum Number of iterations
// tol = error tolerance
// On Output
// X real solution vector
// errorn = Real error norm
int bicgstab(const SparseMat &A, const SparseMat &M, const VectorXcr &b, VectorXcr &x,
                int &max_it, Real &tol, Real &errorn, int &iter_done,
                const bool& banner = true) {

    Complex omega, rho, rho_1, alpha, beta;
    Real bnrm2, eps, errmin;
    int n, iter; //, istat;

    // Determine size of system and init vectors
    n = x.size();
    VectorXcr r(n);
    VectorXcr r_tld(n);
    VectorXcr p(n);
    VectorXcr v(n);
    VectorXcr p_hat(n);
    VectorXcr s(n);
    VectorXcr s_hat(n);
    VectorXcr t(n);
    VectorXcr xmin(n);

    if (banner) {
        std::cout << "Start BiCGStab, memory needed: "
                  <<  (sizeof(Complex)*(9+2)*n/(1024.*1024*1024)) << " [Gb]\n";
    }

    // Initialise
    iter_done = 0;
    v.setConstant(0.); // not necessary I don't think
    t.setConstant(0.);
    eps = 1e-100;

    bnrm2 = b.norm();
    if (bnrm2 == 0) {
        x.setConstant(0.0);
        errorn = 0;
        std::cerr << "Trivial case of Ax = b, where b is 0\n";
        return (0);
    }

    // If there is an initial guess
    if ( x.norm() ) {
        r = b - A.selfadjointView<Eigen::Upper>()*x;
        //r = b - A*x;
    } else {
        r = b;
    }

    errorn = r.norm() / bnrm2;
    omega = 1.;
    r_tld = r;
    errmin = 1e30;

    // Get down to business
    for (iter=0; iter<max_it; ++iter) {

        rho = r_tld.dot(r);
        if ( abs(rho) < eps) return (0);

        if (iter > 0) {
            beta = (rho/rho_1) * (alpha/omega);
            p = r.array() + beta*(p.array()-omega*v.array()).array();
        } else {
            p = r;
        }

        // Use pseudo inverse to get approximate answer
        //#pragma omp sections
        p_hat = M*p;
        //v = A*p_hat; // TODO double check
        v = A.selfadjointView<Eigen::Upper>()*p_hat; // TODO double check

        alpha = rho / r_tld.dot(v);
        s = r.array() - alpha*v.array();
        errorn = s.norm()/bnrm2;

        if (errorn < tol && iter > 1) {
            x.array() += alpha*p_hat.array();
            return (0);
        }

        s_hat = M*s;
        t = A.selfadjointView<Eigen::Upper>()*s_hat;
        //t = A*s_hat;

        omega = t.dot(s)  / t.dot(t);
        x.array() += alpha*p_hat.array() + omega*s_hat.array();
        r = s.array() - omega*t.array();
        errorn = r.norm() / bnrm2;
        iter_done = iter;

        if (errorn < errmin) {
            // remember the model with the smallest norm
            errmin = errorn;
            xmin = x;
        }

        if ( errorn <= tol ) return (0);
        if ( abs(omega) < eps ) return (0);
        rho_1 = rho;

    }
    return (0);
}

template <typename Preconditioner>
bool preconditionedBiCGStab(const SparseMat &A, const Preconditioner &M,
        const Ref< VectorXcr const > b,
        Ref <VectorXcr > x,
        const int &max_it, const Real &tol,
        Real &errorn, int &iter_done) {

    Complex omega, rho, rho_1, alpha, beta;
    Real bnrm2, eps;
    int n, iter;
    Real tol2 = tol*tol;

    // Determine size of system and init vectors
    n = x.size();

    VectorXcd r(n);
    VectorXcd r_tld(n);
    VectorXcd p(n);
    VectorXcd s(n);
    VectorXcd s_hat(n);
    VectorXcd p_hat(n);
    VectorXcd v = VectorXcr::Zero(n);
    VectorXcd t = VectorXcr::Zero(n);

    //std::cout << "Start BiCGStab, memory needed: "
    //          <<  (sizeof(Complex)*(8+2)*n/(1024.*1024)) / (1024.) << " [Gb]\n";

    // Initialise
    iter_done = 0;
    eps = 1e-100;

    bnrm2 = b.squaredNorm();
    if (bnrm2 == 0) {
        x.setConstant(0.0);
        errorn = 0;
        std::cerr << "Trivial case of Ax = b, where b is 0\n";
        return (false);
    }

    // If there is an initial guess
    if ( x.squaredNorm() ) {
        r = b - A.selfadjointView<Eigen::Upper>()*x;
    } else {
        r = b;
    }

    errorn = r.squaredNorm() / bnrm2;
    omega = 1.;
    r_tld = r;

    // Get down to business
    for (iter=0; iter<max_it; ++iter) {

        rho = r_tld.dot(r);
        if (abs(rho) < eps) {
            std::cerr << "arbitrary orthogonality issue in bicgstab\n";
            std::cerr << "consider eigen restarting\n";
            return (false);
        }

        if (iter > 0) {
            beta = (rho/rho_1) * (alpha/omega);
            p = r + beta*(p-omega*v);
        } else {
            p = r;
        }

        p_hat = M.solve(p);
        v.noalias() = A.selfadjointView<Eigen::Upper>()*p_hat;

        alpha = rho / r_tld.dot(v);
        s = r - alpha*v;
        errorn = s.squaredNorm()/bnrm2;

        if (errorn < tol2 && iter > 1) {
            x = x + alpha*p_hat;
            errorn = std::sqrt(errorn);
            return (true);
        }

        s_hat = M.solve(s);
        t.noalias() = A.selfadjointView<Eigen::Upper>()*s_hat;

        omega = t.dot(s)  / t.dot(t);
        x += alpha*p_hat + omega*s_hat;
        r = s - omega*t;
        errorn = r.squaredNorm() / bnrm2;
        iter_done = iter;

        if ( errorn <= tol2 || abs(omega) < eps) {
            errorn = std::sqrt(errorn);
            return (true);
        }

        rho_1 = rho;
    }
    return (false);
}

template <typename Preconditioner>
bool preconditionedSCBiCG(const SparseMat &A, const Preconditioner &M,
        const Ref< VectorXcr const > b,
        Ref <VectorXcr > x,
        const int &max_iter, const Real &tol,
        Real &errorn, int &iter_done) {

    Real resid;
    VectorXcr p, z, q;
    Complex alpha, beta, rho, rho_1;

    Real normb = b.norm( );
    VectorXcr r = b - A*x;

    if (normb == 0.0) normb = 1;

    if ((resid = r.norm( ) / normb) <= tol) {
        errorn = resid;
        iter_done = 0;
        return 0;
    }

    for (int i = 1; i <= max_iter; i++) {
        z = M.solve(r);
        rho = r.dot(z);

        if (i == 1)  p = z;
        else {
            beta = rho / rho_1;
            p = z + beta * p;
        }

        q = A*p;
        alpha = rho / p.dot(q);

        x += alpha * p;
        r -= alpha * q;
        std::cout << "resid\t" << resid << std::endl;
        if ((resid = r.norm( ) / normb) <= tol) {
            errorn = resid;
            iter_done = i;
            return 0;
        }

        rho_1 = rho;
    }

    errorn = resid;

    return (false);
}


/** \internal Low-level conjugate gradient algorithm
  * \param mat The matrix A
  * \param rhs The right hand side vector b5
  * \param x On input and initial solution, on output the computed solution.
  * \param precond A preconditioner being able to efficiently solve for an
  *                approximation of Ax=b (regardless of b)
  * \param iters On input the max number of iteration, on output the number of performed iterations.
  * \param tol_error On input the tolerance error, on output an estimation of the relative error.
  */
template<typename Rhs, typename Dest, typename Preconditioner>
EIGEN_DONT_INLINE
void conjugateGradient(const SparseMat& mat, const Rhs& rhs, Dest& x,
                        const Preconditioner& precond, int& iters,
                        typename Dest::RealScalar& tol_error)
{
  using std::sqrt;
  using std::abs;
  typedef typename Dest::RealScalar RealScalar;
  typedef typename Dest::Scalar Scalar;
  typedef Matrix<Scalar,Dynamic,1> VectorType;

  RealScalar tol = tol_error;
  int maxIters = iters;

  int n = mat.cols();

  VectorType residual = rhs - mat.selfadjointView<Eigen::Upper>() * x; //initial residual

  RealScalar rhsNorm2 = rhs.squaredNorm();
  if(rhsNorm2 == 0)
  {
    x.setZero();
    iters = 0;
    tol_error = 0;
    return;
  }
  RealScalar threshold = tol*tol*rhsNorm2;
  RealScalar residualNorm2 = residual.squaredNorm();
  if (residualNorm2 < threshold)
  {
    iters = 0;
    tol_error = sqrt(residualNorm2 / rhsNorm2);
    return;
  }

  VectorType p(n);
  p = precond.solve(residual);      //initial search direction

  VectorType z(n), tmp(n);
  RealScalar absNew = numext::real(residual.dot(p));  // the square of the absolute value of r scaled by invM
  int i = 0;
  while(i < maxIters)
  {
    tmp.noalias() = mat.selfadjointView<Eigen::Upper>() * p;              // the bottleneck of the algorithm

    Scalar alpha = absNew / p.dot(tmp);   // the amount we travel on dir
    x += alpha * p;                       // update solution
    residual -= alpha * tmp;              // update residue

    residualNorm2 = residual.squaredNorm();
    if(residualNorm2 < threshold)
      break;

    z = precond.solve(residual);          // approximately solve for "A z = residual"

    RealScalar absOld = absNew;
    absNew = numext::real(residual.dot(z));     // update the absolute value of r
    RealScalar beta = absNew / absOld;            // calculate the Gram-Schmidt value used to create the new search direction
    p = z + beta * p;                             // update search direction
    i++;
  }
  tol_error = sqrt(residualNorm2 / rhsNorm2);
  iters = i;
}

// // Computes implicit
// VectorXcr implicitDCInvBPhi (const SparseMat& D, const SparseMat& C,
//                         const SparseMat& B, const SparseMat& MC,
//                         const VectorXcr& Phi, Real& tol,
//                         int& max_it) {
//     int iter_done(0);
//     Real errorn(0);
//     VectorXcr b = B*Phi;
//     VectorXcr y = VectorXcr::Zero(C.rows()) ; // = C^1*b;
//     bicgstab(C, MC, b, y, max_it, tol, errorn, iter_done, false);
//     //std::cout << "Temp " << errorn << std::endl;
//     return  D*y;
// }

// Computes implicit
VectorXcr implicitDCInvBPhi (const SparseMat& D, const SparseMat& C,
                        const VectorXcr& ioms, const SparseMat& MC,
                        const VectorXcr& Phi, Real& tol,
                        int& max_it) {
    int iter_done(0);
    Real errorn(0);
    VectorXcr b = (ioms).asDiagonal() * (D.transpose()*Phi);
    VectorXcr y = VectorXcr::Zero(C.rows()) ; // = C^1*b;
    bicgstab(C, MC, b, y, max_it, tol, errorn, iter_done, false);
    //std::cout << "Temp " << errorn << std::endl;
    max_it = iter_done;
    return  D*y;
}

// Computes implicit
template <typename Preconditioner>
VectorXcr implicitDCInvBPhi2 (const SparseMat& D, const SparseMat& C,
                        const Ref<VectorXcr const> ioms, const Preconditioner& solver,
                        const Ref<VectorXcr const> Phi, Real& tol,
                        int& max_it) {

    VectorXcr b = (ioms).asDiagonal() * (D.transpose()*Phi);
    VectorXcr y = VectorXcr::Zero(C.rows()) ; // = C^1*b;

    // Home Made
    //int iter_done(0);
    //Real errorn(0);
    //preconditionedBiCGStab(C, solver, b, y, max_it, tol, errorn, iter_done); //, false); // Jacobi M
    //max_it = iter_done;

    // Eigen BiCGStab
    Eigen::BiCGSTAB<SparseMatrix<Complex> > BiCG;
    BiCG.compute( C ); // TODO move this out of this loop!
    y = BiCG.solve(b);
    max_it = BiCG.iterations();
    tol = BiCG.error();

    // Direct
/*
    std::cout << "Computing LLT" << std::endl;
    Eigen::SimplicialLLT<SparseMatrix<Complex>, Eigen::Upper, Eigen::AMDOrdering<int> >  LLT;
    LLT.compute(C);
    max_it = 1;
    std::cout << "Computed LLT" << std::endl;
    y = LLT.solve(b);
*/

    return  D*y;
}

// Computes implicit
//template <typename Solver>
template < typename Solver >
inline VectorXcr implicitDCInvBPhi3 (const SparseMat& D, const Solver& solver,
                        const Ref<VectorXcr const> ioms,
                        const Ref<VectorXcr const> Phi, Real& tol,
                        int& max_it) {
    VectorXcr b = (ioms).asDiagonal() * (D.transpose()*Phi);
    VectorXcr y = solver.solve(b);
    //max_it = 0;
    max_it = solver.iterations();
    //errorn = solver.error();
    return  D*y;
}


// // Simple extraction of indices in idx into reduceed array x1
// void vmap( const Ref<VectorXcr const> x0, Ref<VectorXcr> x1, const std::vector<int>& idx ) {
//     for (unsigned int ii=0; ii<idx.size(); ++ii) {
//         x1(ii) = x0(idx[ii]);
//     }
// }

// Simple extraction of indices in idx into reduceed array x1
VectorXcr vmap( const Ref<VectorXcr const> x0, const std::vector<int>& idx ) {
    VectorXcr x1 = VectorXcr::Zero( idx.size() );
    for (unsigned int ii=0; ii<idx.size(); ++ii) {
        x1(ii) = x0(idx[ii]);
    }
    return x1;
}

// reverse of above
void ivmap( Ref<VectorXcr > x0, const Ref<VectorXcr const> x1, const std::vector<int>& idx ) {
    for (unsigned int ii=0; ii<idx.size(); ++ii) {
        x0(idx[ii]) = x1(ii);
    }
}


// On Input
// A = Matrix
// B = Right hand side
// X = initial guess, and solution
// maxit = maximum Number of iterations
// tol = error tolerance
// On Output
// X real solution vector
// errorn = Real error norm
template < typename CSolver >
int implicitbicgstab(//const SparseMat& D,
                     //const SparseMat& C,
                     const Ref< Eigen::SparseMatrix<Complex> const > D,
                     const std::vector<int>& idx,
                     const Ref< VectorXcr const > ioms,
                     const Ref< VectorXcr const > rhs,
                     Ref <VectorXcr> phi,
                     CSolver& solver,
                     int &max_it, const Real &tol, Real &errorn, int &iter_done, ofstream& logio) {

    logio << "using the preconditioned implicit solver" << std::endl;

    Complex omega, rho, rho_1, alpha, beta;
    Real    tol2;
    int     iter, max_it2, max_it1;

    // Look at reduced problem
    VectorXcr rhs2 = vmap(rhs, idx);
    VectorXcr phi2 = vmap(phi, idx);

    // Determine size of system and init vectors
    int n = idx.size();        // was phi.size();
    VectorXcr r(n);
    VectorXcr r_tld(n);
    VectorXcr p(n);
    VectorXcr s(n);
    VectorXcr v = VectorXcr::Zero(n);
    VectorXcr t = VectorXcr::Zero(n);

//     TODO, refigure for implicit large system
//     std::cout << "Start BiCGStab, memory needed: "
//               <<  (sizeof(Complex)*(9+2)*n/(1024.*1024*1024)) << " [Gb]\n";

    // Initialise
    iter_done = 0;
    Real eps = 1e-100;

    Real bnrm2 = rhs.norm();
    if (bnrm2 == 0) {
        phi.setConstant(0.0);
        errorn = 0;
        std::cerr << "Trivial case of Ax = b, where b is 0\n";
        return (0);
    }

    // If there is an initial guess
    if ( phi.norm() ) {
        tol2 = tol;
        max_it2 = 50000;
        //r = rhs - implicitDCInvBPhi3(D, solver, ioms, phi, tol2, max_it2);
        //r = rhs - implicitDCInvBPhi3(D, solver, ioms, phi, tol2, max_it2);
        r = rhs2 - vmap( implicitDCInvBPhi3(D, solver, ioms, phi, tol2, max_it2), idx );
    } else {
        r = vmap(rhs, idx);
    }

    jsw_timer timer;

    errorn = r.norm() / bnrm2;
    omega = 1.;
    r_tld = r;
    Real errornold = 1e14;
    // Get down to business
    for (iter=0; iter<max_it; ++iter) {

        timer.begin();

        rho = r_tld.dot(r);
        if (abs(rho) < eps) {
            ivmap( phi, phi2, idx );
            return (0);
        }

        if (iter > 0) {
            beta = (rho/rho_1) * (alpha/omega);
            p = r.array() + beta*(p.array()-omega*v.array()).array();
        } else {
            p = r;
        }

        tol2 = tol;

        max_it2 = 500000;
        //v = implicitDCInvBPhi2(D, C, ioms, solver, p, tol2, max_it2);
        ivmap(phi, p, idx);
        v = vmap(implicitDCInvBPhi3(D, solver, ioms, phi, tol2, max_it2), idx);

        alpha = rho / r_tld.dot(v);
        s = r.array() - alpha*v.array();
        errorn = s.norm()/bnrm2;

        if (errorn < tol && iter > 1) {
            phi2.array() += alpha*p.array();
            ivmap( phi, phi2, idx );
            return (0);
        }

        tol2 = tol;

        max_it1 = 500000;
        //t = implicitDCInvBPhi2(D, C, ioms, solver, s, tol2, max_it1);
        //t = implicitDCInvBPhi3(D, solver, ioms, s, tol2, max_it1);
        ivmap(phi, s, idx);
        t = vmap(implicitDCInvBPhi3(D, solver, ioms, phi, tol2, max_it1), idx);
        omega = t.dot(s)  / t.dot(t);

        r = s.array() - omega*t.array();
        errorn = r.norm() / bnrm2;
        iter_done = iter;

        if (errorn <= tol) {
            ivmap( phi, phi2, idx );
            return (0);
        }

        if (abs(omega) < eps) {
            ivmap( phi, phi2, idx );
            return (0);
        }

        rho_1 = rho;

        logio << "iteration " << std::setw(3) << iter
              << "  errorn " << std::setw(6) << std::setprecision(4) << std::scientific << errorn
              //<< "\timplicit iterations " << std::setw(5) << max_it1+max_it2
              << "  time " << std::setw(6) << std::fixed << std::setprecision(2) << timer.end()/60. << " [m]  "
              << max_it2+max_it2 << " iterations " << std::endl;

        // Check to see how progress is going

        if (errornold - errorn < 0) {
            logio << "Irregular non-monotonic (negative) convergence. Recommend restart. \n";
            ivmap( phi, phi2, idx );
            return (2);
        }

        /*
        if (errornold - errorn < 1e-14) {
            logio << "not making any progress. Giving up\n";
            return (1);
        }
        */

        //std::cout << "|| p-s ||" << (alpha*p - omega*s).norm() << std::endl;

        // only update phi if good things are happening
        phi2.array() += alpha*p.array() + omega*s.array();
        errornold = errorn;

    }
    ivmap( phi, phi2, idx );
    return (0);
}

// On Input
// A = Matrix
// B = Right hand side
// X = initial guess, and solution
// maxit = maximum Number of iterations
// tol = error tolerance
// On Output
// X real solution vector
// errorn = Real error norm
template < typename Solver >
int implicitbicgstab_ei(const SparseMat&  D,
                        const Ref< VectorXcr const > ioms,
                        const Ref< VectorXcr const > rhs,
                        Ref <VectorXcr> phi,
                        Solver& solver,
                        int &max_it, const Real &tol, Real &errorn, int &iter_done, ofstream& logio) {

    logio << "using the preconditioned Eigen implicit solver" << std::endl;

    Complex omega, rho, rho_1, alpha, beta;
    Real tol2;
    int  iter, max_it2,max_it1;

    // Determine size of system and init vectors
    int n = phi.size();
    VectorXcr r(n);
    VectorXcr r_tld(n);
    VectorXcr p(n);
    VectorXcr v(n);
    VectorXcr s(n);
    VectorXcr t(n);

    // Initialise
    iter_done = 0;
    Real eps = 1e-100;

    Real bnrm2 = rhs.norm();
    if (bnrm2 == 0) {
        phi.setConstant(0.0);
        errorn = 0;
        std::cerr << "Trivial case of Ax = b, where b is 0\n";
        return (0);
    }

    // If there is an initial guess
    if ( phi.norm() ) {
        tol2 = tol;
        max_it2 = 50000;
        r = rhs - implicitDCInvBPhi3(D, solver, ioms, phi, tol2, max_it2);
    } else {
        r = rhs;
    }

    jsw_timer timer;

    errorn = r.norm() / bnrm2;
    omega = 1.;
    r_tld = r;
    Real errornold = 1e14;

    // Get down to business
    for (iter=0; iter<max_it; ++iter) {

        timer.begin();

        rho = r_tld.dot(r);
        if (abs(rho) < eps) return (0);

        if (iter > 0) {
            beta = (rho/rho_1) * (alpha/omega);
            p = r.array() + beta*(p.array()-omega*v.array()).array();
        } else {
            p = r;
        }

        tol2 = tol;
        max_it2 = 500000;
        v = implicitDCInvBPhi3(D, solver, ioms, p, tol2, max_it2);
        max_it2 = solver.iterations();

        alpha = rho / r_tld.dot(v);
        s = r.array() - alpha*v.array();
        errorn = s.norm()/bnrm2;

        if (errorn < tol && iter > 1) {
            phi.array() += alpha*p.array();
            return (0);
        }

        tol2 = tol;
        max_it1 = 500000;
        t = implicitDCInvBPhi3(D, solver, ioms, s, tol2, max_it1);
        max_it1 = solver.iterations();
        omega = t.dot(s)  / t.dot(t);

        r = s.array() - omega*t.array();
        errorn = r.norm() / bnrm2;
        iter_done = iter;

        if (errorn <= tol ) return (0);
        if (abs(omega) < eps) return (0);
        rho_1 = rho;

        logio << "iteration " << std::setw(4) << iter
              << "\terrorn " << std::setw(6) << std::setprecision(4) << std::scientific << errorn
              << "\timplicit iterations " << std::setw(5) << max_it1+max_it2
              << "\ttime " << std::setw(10) << std::fixed << std::setprecision(2) << timer.end() << std::endl;

        // Check to see how progress is going
        if (errornold - errorn < 0) {
            logio << "irregular (negative) convergence. Try again? \n";
            return (2);
        }

        // only update phi if good things are happening
        phi.array() += alpha*p.array() + omega*s.array();
        errornold = errorn;

    }
    return (0);
}


// On Input
// A = Matrix
// B = Right hand side
// X = initial guess, and solution
// maxit = maximum Number of iterations
// tol = error tolerance
// On Output
// X real solution vector
// errorn = Real error norm
int implicitbicgstabnt(const SparseMat& D,
                     const SparseMat& C,
                     const VectorXcr& ioms,
                     const SparseMat& MC,
                     Eigen::Ref< VectorXcr > rhs,
                     VectorXcr& phi,
                     int &max_it, Real &tol, Real &errorn, int &iter_done) {

    Complex omega, rho, rho_1, alpha, beta;
    Real errmin, tol2;
    int  iter, max_it2;

//     // Cholesky decomp
//     SparseLLT<SparseMatrix<Complex>, Cholmod>
//         CholC(SparseMatrix<Complex> (C.real()) );
//     if(!CholC.succeeded()) {
//         std::cerr << "decomposiiton failed\n";
//         return EXIT_FAILURE;
//     }

    // Determine size of system and init vectors
    int n = phi.size();
    VectorXcr r(n);
    VectorXcr r_tld(n);
    VectorXcr p(n);
    VectorXcr v(n);
    //VectorXcr p_hat(n);
    VectorXcr s(n);
    //VectorXcr s_hat(n);
    VectorXcr t(n);
    VectorXcr xmin(n);

//     TODO, refigure for implicit large system
//     std::cout << "Start BiCGStab, memory needed: "
//               <<  (sizeof(Complex)*(9+2)*n/(1024.*1024*1024)) << " [Gb]\n";

    // Initialise
    iter_done = 0;
    v.setConstant(0.); // not necessary I don't think
    t.setConstant(0.);
    Real eps = 1e-100;

    Real bnrm2 = rhs.norm();
    if (bnrm2 == 0) {
        phi.setConstant(0.0);
        errorn = 0;
        std::cerr << "Trivial case of Ax = b, where b is 0\n";
        return (0);
    }

    // If there is an initial guess
    if ( phi.norm() ) {
        //r = rhs - A*phi;
        tol2 = tol;
        max_it2 = 50000;
        std::cout << "Initial guess " << std::endl;
        r = rhs - implicitDCInvBPhi(D, C, ioms, MC, phi, tol2, max_it2);
        //r = rhs - implicitDCInvBPhi (D, C, B, CholC, phi, tol2, max_it2);
    } else {
        r = rhs;
    }


    errorn = r.norm() / bnrm2;
    //std::cout << "Initial |r|  " << r.norm() << "\t" << errorn<< std::endl;
    omega = 1.;
    r_tld = r;
    errmin = 1e30;
    Real errornold = 1e6;
    // Get down to business
    for (iter=0; iter<max_it; ++iter) {

        rho = r_tld.dot(r);
        if (abs(rho) < eps) return (0);

        if (iter > 0) {
            beta = (rho/rho_1) * (alpha/omega);
            p = r.array() + beta*(p.array()-omega*v.array()).array();
        } else {
            p = r;
        }

        // Use pseudo inverse to get approximate answer
        //p_hat = p;
        tol2  = std::max(1e-4*errorn, tol);
        tol2 = tol;
        max_it2 = 500000;
        //v = A*p_hat;
        v = implicitDCInvBPhi(D, C, ioms, MC, p, tol2, max_it2);
        //v = implicitDCInvBPhi(D, C, B, CholC, p, tol2, max_it2);

        alpha = rho / r_tld.dot(v);
        s = r.array() - alpha*v.array();
        errorn = s.norm()/bnrm2;

        if (errorn < tol && iter > 1) {
            phi.array() += alpha*p.array();
            return (0);
        }

        // s_hat = M*s;
        //tol2 = tol;
        tol2  = std::max(1e-4*errorn, tol);
        tol2 = tol;
        max_it2 = 50000;
        // t = A*s_hat;
        t = implicitDCInvBPhi(D, C, ioms, MC, s, tol2, max_it2);
        //t = implicitDCInvBPhi(D, C, B, CholC, s, tol2, max_it2);
        omega = t.dot(s)  / t.dot(t);
        phi.array() += alpha*p.array() + omega*s.array();
        r = s.array() - omega*t.array();
        errorn = r.norm() / bnrm2;
        iter_done = iter;
        if (errorn < errmin) {
            // remember the model with the smallest norm
            errmin = errorn;
            xmin = phi;
        }

        if (errorn <= tol ) return (0);
        if (abs(omega) < eps) return (0);
        rho_1 = rho;

        std::cout << "iteration " << std::setw(4) << iter << "\terrorn "  << std::setw(6) << std::scientific << errorn
                  << "\timplicit iterations " << std::setw(5) << max_it2 << std::endl;
        if (errornold - errorn < 1e-14) {
            std::cout << "not making any progress. Giving up\n";
            return (2);
        }
        errornold = errorn;

    }
    return (0);
}

#endif   // ----- #ifndef BICGSTAB_INC  -----

//int bicgstab(const SparseMat &A, Eigen::SparseLU< Eigen::SparseMatrix<Complex, RowMajor> ,