Procházet zdrojové kódy

SuperLU support added

master
Trevor Irons před 6 roky
rodič
revize
c46ba57d88

+ 8
- 1
CMakeLists.txt Zobrazit soubor

@@ -7,6 +7,13 @@ set(EMSCHUR3D_VERSION_NOQUOTES "${EMSCHUR3D_VERSION_MAJOR}.${EMSCHUR3D_VERSION_M
7 7
 
8 8
 option ( LEMMA_MODULE_EMSCHUR3D TRUE )
9 9
 
10
+find_package( SuperLU )
11
+if (SUPERLU_FOUND)
12
+	message( STATUS "SuperLU was found" )
13
+	add_compile_options(-DHAVE_SUPERLU)
14
+	INCLUDE_DIRECTORIES(${SUPERLU_INCLUDES})
15
+endif()
16
+
10 17
 if ( LEMMA_VTK6_SUPPORT OR LEMMA_VTK7_SUPPORT OR LEMMA_VTK8_SUPPORT AND LEMMA_MODULE_EMSCHUR3D ) 
11 18
 
12 19
 	configure_file (
@@ -32,7 +39,7 @@ if ( LEMMA_VTK6_SUPPORT OR LEMMA_VTK7_SUPPORT OR LEMMA_VTK8_SUPPORT AND LEMMA_MO
32 39
 
33 40
 	# Linking
34 41
 	target_link_libraries(emschur3d ${VTK_LIBRARIES})
35
-
42
+	target_link_libraries(emschur3d ${SUPERLU_LIBRARIES})
36 43
 
37 44
 	# Testing
38 45
 	if (LEMMA_ENABLE_TESTING)

+ 2
- 2
examples/EMSchur3D-vtk.cpp Zobrazit soubor

@@ -114,8 +114,8 @@ int main( int argc, char** argv ) {
114 114
     // And solve
115 115
 
116 116
     // Use BiCGSTAB Diagonal preconditioner
117
-    auto EM3D = EMSchur3D< Eigen::BiCGSTAB<Eigen::SparseMatrix<Complex, Eigen::ColMajor>, Eigen::IncompleteLUT<Complex> > >::NewSP();
118
-    //auto EM3D = EMSchur3D< Eigen::BiCGSTAB<Eigen::SparseMatrix<Complex, Eigen::ColMajor> > >::NewSP();
117
+    //auto EM3D = EMSchur3D< Eigen::BiCGSTAB<Eigen::SparseMatrix<Complex, Eigen::ColMajor>, Eigen::IncompleteLUT<Complex> > >::NewSP();
118
+    auto EM3D = EMSchur3D< Eigen::BiCGSTAB<Eigen::SparseMatrix<Complex, Eigen::ColMajor> > >::NewSP();
119 119
 
120 120
     // LS CG
121 121
     //auto EM3D = EMSchur3D< Eigen::LeastSquaresConjugateGradient<Eigen::SparseMatrix<Complex, Eigen::ColMajor> > >::NewSP();

+ 5
- 1
examples/EMSchur3D.cpp Zobrazit soubor

@@ -29,7 +29,11 @@ using namespace Lemma;
29 29
 int main( int argc, char** argv ) {
30 30
 
31 31
     // BiCGSTAB Diagonal preconditioner
32
-    auto EM3D = EMSchur3D< Eigen::BiCGSTAB<Eigen::SparseMatrix<Complex, Eigen::ColMajor>, Eigen::IncompleteLUT<Complex> > >::NewSP();
32
+    //auto EM3D = EMSchur3D< Eigen::BiCGSTAB<Eigen::SparseMatrix<Complex, Eigen::ColMajor>, Eigen::IncompleteLUT<Complex> > >::NewSP();
33
+    //auto EM3D = EMSchur3D< Eigen::BiCGSTAB<Eigen::SparseMatrix<Complex, Eigen::ColMajor> > >::NewSP();
34
+
35
+    // SUPERLU
36
+    auto EM3D = EMSchur3D< Eigen::SuperLU<Eigen::SparseMatrix<Complex, Eigen::ColMajor> > >::NewSP();
33 37
 
34 38
     if (argc < 3) {
35 39
         std::cout << "EMSchur3D  <rgrid>  <1dmod> <3dmod>  <aemsurvey> " << std::endl;

+ 29
- 22
include/EMSchur3D.h Zobrazit soubor

@@ -24,6 +24,10 @@
24 24
 
25 25
 #include "EMSchur3DBase.h"
26 26
 #include "bicgstab.h"
27
+
28
+#ifdef HAVE_SUPERLU
29
+#include "Eigen/SuperLUSupport"
30
+#endif
27 31
 //#include "CSymSimplicialCholesky.h"
28 32
 
29 33
 namespace Lemma {
@@ -54,7 +58,6 @@ namespace Lemma {
54 58
          */
55 59
         static std::shared_ptr< EMSchur3D > NewSP() {
56 60
             return std::make_shared< EMSchur3D<Solver> >( ctor_key() );
57
-            //return std::make_shared< EMSchur3D< Eigen::BiCGSTAB<Eigen::SparseMatrix<Complex, Eigen::ColMajor> > > >( ctor_key() ) ;
58 61
         }
59 62
 
60 63
         /** Default protected constructor, use New */
@@ -205,7 +208,7 @@ namespace Lemma {
205 208
         logio << "solving RHS for source " << isource << std::endl;
206 209
 
207 210
         // TODO, this is stupid, try and get rid of this copy!
208
-        Eigen::SparseMatrix<Complex>  Cc  = Cvec[iw];
211
+        //Eigen::SparseMatrix<Complex>  Cc  = Cvec[iw];
209 212
 
210 213
         jsw_timer timer;
211 214
         jsw_timer timer2;
@@ -232,12 +235,13 @@ namespace Lemma {
232 235
 //         /* END EXPERIMENTAL */
233 236
 
234 237
         VectorXcr ADiv = D*A;  // ADiv == RHS == D C^I Se
235
-        VectorXcr Error = ((Cc.selfadjointView<Eigen::Lower>()*A).array() - Se.array());
238
+        //VectorXcr Error = ((Cc.selfadjointView<Eigen::Lower>()*A).array() - Se.array());
239
+        VectorXcr Error = ((Cvec[iw].selfadjointView<Eigen::Lower>()*A).array() - Se.array());
236 240
         logio << "|| Div(A) || = " << ADiv.norm()
237 241
               << "\tInital solution error="<<   Error.norm()  // Iteritive info
238
-              << "\tSolver reported error="<<   CSolver[iw].error()  // Iteritive info
242
+//              << "\tSolver reported error="<<   CSolver[iw].error()  // Iteritive info
239 243
               << "\ttime " << timer.end() / 60. << " [m]   "
240
-              <<  CSolver[iw].iterations() << "  iterations"
244
+//              <<  CSolver[iw].iterations() << "  iterations"
241 245
               << std::endl;
242 246
 
243 247
         //VectorXcr ADivMAC = ADiv.array() * MAC.array().cast<Complex>();
@@ -247,14 +251,13 @@ namespace Lemma {
247 251
         // Solve for Phi
248 252
         logio << "Solving for Phi " << std::flush;
249 253
         timer.begin();
250
-        tol = 1e-18;
254
+        tol = 1e-20;
251 255
         int success(2);
252 256
 
253 257
         success = implicitbicgstab(D, idx, ms, ADiv, Phi, CSolver[iw], max_it, tol, errorn, iter_done, logio);
254 258
         //Phi.array() *= MAC.array().cast<Complex>(); // remove phi from air regions
255 259
 
256 260
         /* Restart if necessary */
257
-/*
258 261
         int nrestart(1);
259 262
         // TODO send MAC to implicitbicgstab?
260 263
         while (success == 2 && nrestart < 18 && iter_done > 1) {
@@ -262,7 +265,6 @@ namespace Lemma {
262 265
             //Phi.array() *= MAC.array().cast<Complex>(); // remove phi from air regions
263 266
             nrestart += 1;
264 267
         }
265
-*/
266 268
 
267 269
         logio << "Implicit BiCGStab solution in " << iter_done << " iterations."
268 270
                 << " with error " << std::setprecision(8) << std::scientific << errorn << std::endl;
@@ -292,20 +294,23 @@ namespace Lemma {
292 294
               //<<  " with error " << errorn << "\t";
293 295
 
294 296
         // Report error of solutions
295
-        Error = ((Cc.selfadjointView<Eigen::Lower>()*A).array() + E.array() - Se.array());
296
-        //logio << "\tsolution error " << Error.norm()
297
-        //      << std::fixed << std::setprecision(2) << "\ttime " << timer.end()/60. << "\ttotal time " << timer2.end()/60. << std::endl;
297
+        //Error = ((Cc.selfadjointView<Eigen::Lower>()*A).array() + E.array() - Se.array());
298
+        Error = ((Cvec[iw].selfadjointView<Eigen::Lower>()*A).array() + E.array() - Se.array());
299
+
298 300
         //      << "\tSolver reported error="<<   CSolver[iw].error()  // Iteritive info
299 301
         //      << "\ttime " << timer.end() / 60. << " [m]   "
300 302
         //      <<  CSolver[iw].iterations() << "  iterations"
301 303
 
302 304
 
303 305
         logio << "|| Div(A) || = " << ADiv2.norm()
304
-              << "\tInital solution error="<<   Error.norm()  // Iteritive info
305
-              << "\tSolver reported error="<<   CSolver[iw].error()  // Iteritive info
306
+              << "\tSolution error="<<   Error.norm()  // Iteritive info
307
+//              << "\tSolver reported error="<<   CSolver[iw].error()  // Iteritive info
306 308
               << "\ttime " << timer.end() / 60. << " [m]   "
307
-              <<  CSolver[iw].iterations() << "  iterations"
308
-              << std::endl;
309
+//              <<  CSolver[iw].iterations() << "  iterations"
310
+              << std::endl << std::endl;
311
+
312
+        logio << std::fixed << std::setprecision(2) << "\ttime " << timer.end()/60. << "\ttotal time " << timer2.end()/60. << std::endl;
313
+
309 314
         logio.close();
310 315
 
311 316
         //////////////////////////////////////
@@ -356,7 +361,7 @@ namespace Lemma {
356 361
         }
357 362
     }
358 363
 
359
-    #ifdef HAVE_SUPERLUMT
364
+    #ifdef HAVE_SUPERLU
360 365
     template<>
361 366
     void EMSchur3D< Eigen::SuperLU<Eigen::SparseMatrix<Complex, Eigen::ColMajor> > >::BuildCDirectSolver() {
362 367
 
@@ -367,8 +372,8 @@ namespace Lemma {
367 372
             timer.begin();
368 373
 
369 374
             /* SuperLU */
370
-            //CSolver[iw].options().DiagPivotThresh = 0.01;
371
-            //CSolver[iw].options().SymmetricMode = YES;
375
+            CSolver[iw].options().DiagPivotThresh = 0.0;
376
+            CSolver[iw].options().SymmetricMode = YES;
372 377
             //CSolver[iw].options().ColPerm = MMD_AT_PLUS_A;
373 378
             //CSolver[iw].options().Trans = NOTRANS;
374 379
             //CSolver[iw].options().ConditionNumber = NO;
@@ -381,14 +386,14 @@ namespace Lemma {
381 386
             //std::cout << "\tCondition Number: " << CSolver[iw].options().ConditionNumber << std::endl;
382 387
 
383 388
             /*  Complex system */
384
-            std::cout << "SuperLU_MT pattern analyzing C_" << iw << ",";
389
+            std::cout << "SuperLU pattern analyzing C_" << iw << ",";
385 390
             std::cout.flush();
386 391
             CSolver[iw].analyzePattern( Cvec[iw].selfadjointView< Eigen::Lower>() );
387 392
             std::cout << " done in " << timer.end() / 60. << " [m]" << std::endl;
388 393
 
389 394
             // factorize
390 395
             timer.begin();
391
-            std::cout << "SuperLU_MT factorising C_" << iw << ", ";
396
+            std::cout << "SuperLU factorising C_" << iw << ", ";
392 397
             std::cout.flush();
393 398
             CSolver[iw].factorize( Cvec[iw].selfadjointView< Eigen::Lower>() );
394 399
             std::cout << " done in " << timer.end() / 60. << " [m]" << std::endl;
@@ -514,20 +519,22 @@ namespace Lemma {
514 519
         CSolver = new Eigen::BiCGSTAB<Eigen::SparseMatrix<Complex, Eigen::ColMajor>, Eigen::IncompleteLUT<Complex> > [Omegas.size()];
515 520
         for (int iw=0; iw<Omegas.size(); ++iw) {
516 521
             Csym = Cvec[iw].selfadjointView<Eigen::Lower>();
517
-            CSolver[iw].preconditioner().setDroptol(1e-5);      // 1e-12
518
-            //CSolver[iw].preconditioner().setFillfactor(5e1);     // 1e2
522
+            CSolver[iw].preconditioner().setDroptol(1e-6);       //1e-5);      // 1e-12
523
+            CSolver[iw].preconditioner().setFillfactor(5e1);     // 1e2
519 524
             jsw_timer timer;
520 525
             timer.begin();
521 526
             /*  Complex system */
522 527
             std::cout << "BiCGSTAB(ILU) pattern analyzing C_" << iw << ",";
523 528
             std::cout.flush();
524 529
             CSolver[iw].analyzePattern( Csym );
530
+            //CSolver[iw].analyzePattern( Cvec[iw]);
525 531
             std::cout << " done in " << timer.end() / 60. << " [m]" << std::endl;
526 532
             /* factorize */
527 533
             timer.begin();
528 534
             std::cout << "BiCGSTAB(ILU) factorising C_" << iw << ", ";
529 535
             std::cout.flush();
530 536
             CSolver[iw].factorize( Csym );
537
+            //CSolver[iw].factorize( Cvec[iw] );
531 538
             std::cout << " done in " << timer.end() / 60. << " [m]" << std::endl;
532 539
         }
533 540
     }

+ 19
- 687
include/bicgstab.h Zobrazit soubor

@@ -55,428 +55,25 @@
55 55
 using namespace Eigen;
56 56
 using namespace Lemma;
57 57
 
58
-//typedef Eigen::VectorXcd VectorXcr;
59 58
 typedef Eigen::SparseMatrix<Complex> SparseMat;
60 59
 
61 60
 
62
-// On Input
63
-// A = Matrix
64
-// B = Right hand side
65
-// X = initial guess, and solution
66
-// maxit = maximum Number of iterations
67
-// tol = error tolerance
68
-// On Output
69
-// X real solution vector
70
-// errorn = Real error norm
71
-int bicgstab(const SparseMat &A, const SparseMat &M, const VectorXcr &b, VectorXcr &x,
72
-                int &max_it, Real &tol, Real &errorn, int &iter_done,
73
-                const bool& banner = true) {
74
-
75
-    Complex omega, rho, rho_1, alpha, beta;
76
-    Real bnrm2, eps, errmin;
77
-    int n, iter; //, istat;
78
-
79
-    // Determine size of system and init vectors
80
-    n = x.size();
81
-    VectorXcr r(n);
82
-    VectorXcr r_tld(n);
83
-    VectorXcr p(n);
84
-    VectorXcr v(n);
85
-    VectorXcr p_hat(n);
86
-    VectorXcr s(n);
87
-    VectorXcr s_hat(n);
88
-    VectorXcr t(n);
89
-    VectorXcr xmin(n);
90
-
91
-    if (banner) {
92
-        std::cout << "Start BiCGStab, memory needed: "
93
-                  <<  (sizeof(Complex)*(9+2)*n/(1024.*1024*1024)) << " [Gb]\n";
94
-    }
95
-
96
-    // Initialise
97
-    iter_done = 0;
98
-    v.setConstant(0.); // not necessary I don't think
99
-    t.setConstant(0.);
100
-    eps = 1e-100;
101
-
102
-    bnrm2 = b.norm();
103
-    if (bnrm2 == 0) {
104
-        x.setConstant(0.0);
105
-        errorn = 0;
106
-        std::cerr << "Trivial case of Ax = b, where b is 0\n";
107
-        return (0);
108
-    }
109
-
110
-    // If there is an initial guess
111
-    if ( x.norm() ) {
112
-        r = b - A.selfadjointView<Eigen::Upper>()*x;
113
-        //r = b - A*x;
114
-    } else {
115
-        r = b;
116
-    }
117
-
118
-    errorn = r.norm() / bnrm2;
119
-    omega = 1.;
120
-    r_tld = r;
121
-    errmin = 1e30;
122
-
123
-    // Get down to business
124
-    for (iter=0; iter<max_it; ++iter) {
125
-
126
-        rho = r_tld.dot(r);
127
-        if ( abs(rho) < eps) return (0);
128
-
129
-        if (iter > 0) {
130
-            beta = (rho/rho_1) * (alpha/omega);
131
-            p = r.array() + beta*(p.array()-omega*v.array()).array();
132
-        } else {
133
-            p = r;
134
-        }
135
-
136
-        // Use pseudo inverse to get approximate answer
137
-        //#pragma omp sections
138
-        p_hat = M*p;
139
-        //v = A*p_hat; // TODO double check
140
-        v = A.selfadjointView<Eigen::Upper>()*p_hat; // TODO double check
141
-
142
-        alpha = rho / r_tld.dot(v);
143
-        s = r.array() - alpha*v.array();
144
-        errorn = s.norm()/bnrm2;
145
-
146
-        if (errorn < tol && iter > 1) {
147
-            x.array() += alpha*p_hat.array();
148
-            return (0);
149
-        }
150
-
151
-        s_hat = M*s;
152
-        t = A.selfadjointView<Eigen::Upper>()*s_hat;
153
-        //t = A*s_hat;
154
-
155
-        omega = t.dot(s)  / t.dot(t);
156
-        x.array() += alpha*p_hat.array() + omega*s_hat.array();
157
-        r = s.array() - omega*t.array();
158
-        errorn = r.norm() / bnrm2;
159
-        iter_done = iter;
160
-
161
-        if (errorn < errmin) {
162
-            // remember the model with the smallest norm
163
-            errmin = errorn;
164
-            xmin = x;
165
-        }
166
-
167
-        if ( errorn <= tol ) return (0);
168
-        if ( abs(omega) < eps ) return (0);
169
-        rho_1 = rho;
170
-
171
-    }
172
-    return (0);
173
-}
174
-
175
-template <typename Preconditioner>
176
-bool preconditionedBiCGStab(const SparseMat &A, const Preconditioner &M,
177
-        const Ref< VectorXcr const > b,
178
-        Ref <VectorXcr > x,
179
-        const int &max_it, const Real &tol,
180
-        Real &errorn, int &iter_done) {
181
-
182
-    Complex omega, rho, rho_1, alpha, beta;
183
-    Real bnrm2, eps;
184
-    int n, iter;
185
-    Real tol2 = tol*tol;
186
-
187
-    // Determine size of system and init vectors
188
-    n = x.size();
189
-
190
-    VectorXcd r(n);
191
-    VectorXcd r_tld(n);
192
-    VectorXcd p(n);
193
-    VectorXcd s(n);
194
-    VectorXcd s_hat(n);
195
-    VectorXcd p_hat(n);
196
-    VectorXcd v = VectorXcr::Zero(n);
197
-    VectorXcd t = VectorXcr::Zero(n);
198
-
199
-    //std::cout << "Start BiCGStab, memory needed: "
200
-    //          <<  (sizeof(Complex)*(8+2)*n/(1024.*1024)) / (1024.) << " [Gb]\n";
201
-
202
-    // Initialise
203
-    iter_done = 0;
204
-    eps = 1e-100;
205
-
206
-    bnrm2 = b.squaredNorm();
207
-    if (bnrm2 == 0) {
208
-        x.setConstant(0.0);
209
-        errorn = 0;
210
-        std::cerr << "Trivial case of Ax = b, where b is 0\n";
211
-        return (false);
212
-    }
213 61
 
214
-    // If there is an initial guess
215
-    if ( x.squaredNorm() ) {
216
-        r = b - A.selfadjointView<Eigen::Upper>()*x;
217
-    } else {
218
-        r = b;
219
-    }
220
-
221
-    errorn = r.squaredNorm() / bnrm2;
222
-    omega = 1.;
223
-    r_tld = r;
224
-
225
-    // Get down to business
226
-    for (iter=0; iter<max_it; ++iter) {
227
-
228
-        rho = r_tld.dot(r);
229
-        if (abs(rho) < eps) {
230
-            std::cerr << "arbitrary orthogonality issue in bicgstab\n";
231
-            std::cerr << "consider eigen restarting\n";
232
-            return (false);
233
-        }
234
-
235
-        if (iter > 0) {
236
-            beta = (rho/rho_1) * (alpha/omega);
237
-            p = r + beta*(p-omega*v);
238
-        } else {
239
-            p = r;
240
-        }
241
-
242
-        p_hat = M.solve(p);
243
-        v.noalias() = A.selfadjointView<Eigen::Upper>()*p_hat;
244
-
245
-        alpha = rho / r_tld.dot(v);
246
-        s = r - alpha*v;
247
-        errorn = s.squaredNorm()/bnrm2;
248
-
249
-        if (errorn < tol2 && iter > 1) {
250
-            x = x + alpha*p_hat;
251
-            errorn = std::sqrt(errorn);
252
-            return (true);
253
-        }
254
-
255
-        s_hat = M.solve(s);
256
-        t.noalias() = A.selfadjointView<Eigen::Upper>()*s_hat;
257
-
258
-        omega = t.dot(s)  / t.dot(t);
259
-        x += alpha*p_hat + omega*s_hat;
260
-        r = s - omega*t;
261
-        errorn = r.squaredNorm() / bnrm2;
262
-        iter_done = iter;
263
-
264
-        if ( errorn <= tol2 || abs(omega) < eps) {
265
-            errorn = std::sqrt(errorn);
266
-            return (true);
267
-        }
268
-
269
-        rho_1 = rho;
270
-    }
271
-    return (false);
272
-}
273
-
274
-template <typename Preconditioner>
275
-bool preconditionedSCBiCG(const SparseMat &A, const Preconditioner &M,
276
-        const Ref< VectorXcr const > b,
277
-        Ref <VectorXcr > x,
278
-        const int &max_iter, const Real &tol,
279
-        Real &errorn, int &iter_done) {
280
-
281
-    Real resid;
282
-    VectorXcr p, z, q;
283
-    Complex alpha, beta, rho, rho_1;
284
-
285
-    Real normb = b.norm( );
286
-    VectorXcr r = b - A*x;
287
-
288
-    if (normb == 0.0) normb = 1;
289
-
290
-    if ((resid = r.norm( ) / normb) <= tol) {
291
-        errorn = resid;
292
-        iter_done = 0;
293
-        return 0;
294
-    }
295
-
296
-    for (int i = 1; i <= max_iter; i++) {
297
-        z = M.solve(r);
298
-        rho = r.dot(z);
299
-
300
-        if (i == 1)  p = z;
301
-        else {
302
-            beta = rho / rho_1;
303
-            p = z + beta * p;
304
-        }
305
-
306
-        q = A*p;
307
-        alpha = rho / p.dot(q);
308
-
309
-        x += alpha * p;
310
-        r -= alpha * q;
311
-        std::cout << "resid\t" << resid << std::endl;
312
-        if ((resid = r.norm( ) / normb) <= tol) {
313
-            errorn = resid;
314
-            iter_done = i;
315
-            return 0;
316
-        }
317
-
318
-        rho_1 = rho;
319
-    }
320
-
321
-    errorn = resid;
322
-
323
-    return (false);
324
-}
325
-
326
-
327
-/** \internal Low-level conjugate gradient algorithm
328
-  * \param mat The matrix A
329
-  * \param rhs The right hand side vector b5
330
-  * \param x On input and initial solution, on output the computed solution.
331
-  * \param precond A preconditioner being able to efficiently solve for an
332
-  *                approximation of Ax=b (regardless of b)
333
-  * \param iters On input the max number of iteration, on output the number of performed iterations.
334
-  * \param tol_error On input the tolerance error, on output an estimation of the relative error.
335
-  */
336
-template<typename Rhs, typename Dest, typename Preconditioner>
337
-EIGEN_DONT_INLINE
338
-void conjugateGradient(const SparseMat& mat, const Rhs& rhs, Dest& x,
339
-                        const Preconditioner& precond, int& iters,
340
-                        typename Dest::RealScalar& tol_error)
341
-{
342
-  using std::sqrt;
343
-  using std::abs;
344
-  typedef typename Dest::RealScalar RealScalar;
345
-  typedef typename Dest::Scalar Scalar;
346
-  typedef Matrix<Scalar,Dynamic,1> VectorType;
347
-
348
-  RealScalar tol = tol_error;
349
-  int maxIters = iters;
350
-
351
-  int n = mat.cols();
352
-
353
-  VectorType residual = rhs - mat.selfadjointView<Eigen::Upper>() * x; //initial residual
354
-
355
-  RealScalar rhsNorm2 = rhs.squaredNorm();
356
-  if(rhsNorm2 == 0)
357
-  {
358
-    x.setZero();
359
-    iters = 0;
360
-    tol_error = 0;
361
-    return;
362
-  }
363
-  RealScalar threshold = tol*tol*rhsNorm2;
364
-  RealScalar residualNorm2 = residual.squaredNorm();
365
-  if (residualNorm2 < threshold)
366
-  {
367
-    iters = 0;
368
-    tol_error = sqrt(residualNorm2 / rhsNorm2);
369
-    return;
370
-  }
371
-
372
-  VectorType p(n);
373
-  p = precond.solve(residual);      //initial search direction
374
-
375
-  VectorType z(n), tmp(n);
376
-  RealScalar absNew = numext::real(residual.dot(p));  // the square of the absolute value of r scaled by invM
377
-  int i = 0;
378
-  while(i < maxIters)
379
-  {
380
-    tmp.noalias() = mat.selfadjointView<Eigen::Upper>() * p;              // the bottleneck of the algorithm
381
-
382
-    Scalar alpha = absNew / p.dot(tmp);   // the amount we travel on dir
383
-    x += alpha * p;                       // update solution
384
-    residual -= alpha * tmp;              // update residue
385
-
386
-    residualNorm2 = residual.squaredNorm();
387
-    if(residualNorm2 < threshold)
388
-      break;
389
-
390
-    z = precond.solve(residual);          // approximately solve for "A z = residual"
391
-
392
-    RealScalar absOld = absNew;
393
-    absNew = numext::real(residual.dot(z));     // update the absolute value of r
394
-    RealScalar beta = absNew / absOld;            // calculate the Gram-Schmidt value used to create the new search direction
395
-    p = z + beta * p;                             // update search direction
396
-    i++;
397
-  }
398
-  tol_error = sqrt(residualNorm2 / rhsNorm2);
399
-  iters = i;
400
-}
401
-
402
-// // Computes implicit
403
-// VectorXcr implicitDCInvBPhi (const SparseMat& D, const SparseMat& C,
404
-//                         const SparseMat& B, const SparseMat& MC,
405
-//                         const VectorXcr& Phi, Real& tol,
406
-//                         int& max_it) {
407
-//     int iter_done(0);
408
-//     Real errorn(0);
409
-//     VectorXcr b = B*Phi;
410
-//     VectorXcr y = VectorXcr::Zero(C.rows()) ; // = C^1*b;
411
-//     bicgstab(C, MC, b, y, max_it, tol, errorn, iter_done, false);
412
-//     //std::cout << "Temp " << errorn << std::endl;
413
-//     return  D*y;
414
-// }
415
-
416
-// Computes implicit
417
-VectorXcr implicitDCInvBPhi (const SparseMat& D, const SparseMat& C,
418
-                        const VectorXcr& ioms, const SparseMat& MC,
419
-                        const VectorXcr& Phi, Real& tol,
420
-                        int& max_it) {
421
-    int iter_done(0);
422
-    Real errorn(0);
423
-    VectorXcr b = (ioms).asDiagonal() * (D.transpose()*Phi);
424
-    VectorXcr y = VectorXcr::Zero(C.rows()) ; // = C^1*b;
425
-    bicgstab(C, MC, b, y, max_it, tol, errorn, iter_done, false);
426
-    //std::cout << "Temp " << errorn << std::endl;
427
-    max_it = iter_done;
428
-    return  D*y;
429
-}
430
-
431
-// Computes implicit
432
-template <typename Preconditioner>
433
-VectorXcr implicitDCInvBPhi2 (const SparseMat& D, const SparseMat& C,
434
-                        const Ref<VectorXcr const> ioms, const Preconditioner& solver,
435
-                        const Ref<VectorXcr const> Phi, Real& tol,
436
-                        int& max_it) {
437
-
438
-    VectorXcr b = (ioms).asDiagonal() * (D.transpose()*Phi);
439
-    VectorXcr y = VectorXcr::Zero(C.rows()) ; // = C^1*b;
440
-
441
-    // Home Made
442
-    //int iter_done(0);
443
-    //Real errorn(0);
444
-    //preconditionedBiCGStab(C, solver, b, y, max_it, tol, errorn, iter_done); //, false); // Jacobi M
445
-    //max_it = iter_done;
446
-
447
-    // Eigen BiCGStab
448
-    Eigen::BiCGSTAB<SparseMatrix<Complex> > BiCG;
449
-    BiCG.compute( C ); // TODO move this out of this loop!
450
-    y = BiCG.solve(b);
451
-    max_it = BiCG.iterations();
452
-    tol = BiCG.error();
453
-
454
-    // Direct
455
-/*
456
-    std::cout << "Computing LLT" << std::endl;
457
-    Eigen::SimplicialLLT<SparseMatrix<Complex>, Eigen::Upper, Eigen::AMDOrdering<int> >  LLT;
458
-    LLT.compute(C);
459
-    max_it = 1;
460
-    std::cout << "Computed LLT" << std::endl;
461
-    y = LLT.solve(b);
462
-*/
463
-
464
-    return  D*y;
465
-}
466
-
467
-// Computes implicit
468
-//template <typename Solver>
62
+// Computes implicit calculation
469 63
 template < typename Solver >
470
-inline VectorXcr implicitDCInvBPhi3 (const SparseMat& D, const Solver& solver,
64
+inline VectorXcr implicitDCInvBPhi3 (
65
+                        const SparseMat& D,
66
+                        const Solver& solver,
471 67
                         const Ref<VectorXcr const> ioms,
472
-                        const Ref<VectorXcr const> Phi, Real& tol,
473
-                        int& max_it) {
68
+                        const Ref<VectorXcr const> Phi,
69
+                        Real& tol,   // not used
70
+                        int& max_it  // not used
71
+                ) {
474 72
     VectorXcr b = (ioms).asDiagonal() * (D.transpose()*Phi);
475 73
     VectorXcr y = solver.solve(b);
476
-    //max_it = 0;
477
-    max_it = solver.iterations();
478
-    //errorn = solver.error();
74
+    //max_it = solver.iterations(); // actualy no need to pass this
479 75
     return  D*y;
76
+    //return  y;
480 77
 }
481 78
 
482 79
 
@@ -536,12 +133,19 @@ int implicitbicgstab(//const SparseMat& D,
536 133
 
537 134
     // Determine size of system and init vectors
538 135
     int n = idx.size();        // was phi.size();
136
+
137
+    std::cout << "BiCGStab SIZES  " << n << "\t" << phi.size() << "\t" << ioms.size() << std::endl;
138
+
539 139
     VectorXcr r(n);
540 140
     VectorXcr r_tld(n);
541 141
     VectorXcr p(n);
542 142
     VectorXcr s(n);
543
-    VectorXcr v = VectorXcr::Zero(n);
544
-    VectorXcr t = VectorXcr::Zero(n);
143
+
144
+    VectorXcr v = VectorXcr::Zero(ioms.size());
145
+    VectorXcr t = VectorXcr::Zero(ioms.size());
146
+
147
+//    VectorXcr vm1 = VectorXcr::Zero(ioms.size());
148
+//    VectorXcr tm1 = VectorXcr::Zero(ioms.size());
545 149
 
546 150
 //     TODO, refigure for implicit large system
547 151
 //     std::cout << "Start BiCGStab, memory needed: "
@@ -597,7 +201,6 @@ int implicitbicgstab(//const SparseMat& D,
597 201
         tol2 = tol;
598 202
 
599 203
         max_it2 = 500000;
600
-        //v = implicitDCInvBPhi2(D, C, ioms, solver, p, tol2, max_it2);
601 204
         ivmap(phi, p, idx);
602 205
         v = vmap(implicitDCInvBPhi3(D, solver, ioms, phi, tol2, max_it2), idx);
603 206
 
@@ -643,7 +246,6 @@ int implicitbicgstab(//const SparseMat& D,
643 246
               << max_it2+max_it2 << " iterations " << std::endl;
644 247
 
645 248
         // Check to see how progress is going
646
-
647 249
         if (errornold - errorn < 0) {
648 250
             logio << "Irregular non-monotonic (negative) convergence. Recommend restart. \n";
649 251
             ivmap( phi, phi2, idx );
@@ -668,275 +270,5 @@ int implicitbicgstab(//const SparseMat& D,
668 270
     return (0);
669 271
 }
670 272
 
671
-// On Input
672
-// A = Matrix
673
-// B = Right hand side
674
-// X = initial guess, and solution
675
-// maxit = maximum Number of iterations
676
-// tol = error tolerance
677
-// On Output
678
-// X real solution vector
679
-// errorn = Real error norm
680
-template < typename Solver >
681
-int implicitbicgstab_ei(const SparseMat&  D,
682
-                        const Ref< VectorXcr const > ioms,
683
-                        const Ref< VectorXcr const > rhs,
684
-                        Ref <VectorXcr> phi,
685
-                        Solver& solver,
686
-                        int &max_it, const Real &tol, Real &errorn, int &iter_done, ofstream& logio) {
687
-
688
-    logio << "using the preconditioned Eigen implicit solver" << std::endl;
689
-
690
-    Complex omega, rho, rho_1, alpha, beta;
691
-    Real tol2;
692
-    int  iter, max_it2,max_it1;
693
-
694
-    // Determine size of system and init vectors
695
-    int n = phi.size();
696
-    VectorXcr r(n);
697
-    VectorXcr r_tld(n);
698
-    VectorXcr p(n);
699
-    VectorXcr v(n);
700
-    VectorXcr s(n);
701
-    VectorXcr t(n);
702
-
703
-    // Initialise
704
-    iter_done = 0;
705
-    Real eps = 1e-100;
706
-
707
-    Real bnrm2 = rhs.norm();
708
-    if (bnrm2 == 0) {
709
-        phi.setConstant(0.0);
710
-        errorn = 0;
711
-        std::cerr << "Trivial case of Ax = b, where b is 0\n";
712
-        return (0);
713
-    }
714
-
715
-    // If there is an initial guess
716
-    if ( phi.norm() ) {
717
-        tol2 = tol;
718
-        max_it2 = 50000;
719
-        r = rhs - implicitDCInvBPhi3(D, solver, ioms, phi, tol2, max_it2);
720
-    } else {
721
-        r = rhs;
722
-    }
723
-
724
-    jsw_timer timer;
725
-
726
-    errorn = r.norm() / bnrm2;
727
-    omega = 1.;
728
-    r_tld = r;
729
-    Real errornold = 1e14;
730
-
731
-    // Get down to business
732
-    for (iter=0; iter<max_it; ++iter) {
733
-
734
-        timer.begin();
735
-
736
-        rho = r_tld.dot(r);
737
-        if (abs(rho) < eps) return (0);
738
-
739
-        if (iter > 0) {
740
-            beta = (rho/rho_1) * (alpha/omega);
741
-            p = r.array() + beta*(p.array()-omega*v.array()).array();
742
-        } else {
743
-            p = r;
744
-        }
745
-
746
-        tol2 = tol;
747
-        max_it2 = 500000;
748
-        v = implicitDCInvBPhi3(D, solver, ioms, p, tol2, max_it2);
749
-        max_it2 = solver.iterations();
750
-
751
-        alpha = rho / r_tld.dot(v);
752
-        s = r.array() - alpha*v.array();
753
-        errorn = s.norm()/bnrm2;
754
-
755
-        if (errorn < tol && iter > 1) {
756
-            phi.array() += alpha*p.array();
757
-            return (0);
758
-        }
759
-
760
-        tol2 = tol;
761
-        max_it1 = 500000;
762
-        t = implicitDCInvBPhi3(D, solver, ioms, s, tol2, max_it1);
763
-        max_it1 = solver.iterations();
764
-        omega = t.dot(s)  / t.dot(t);
765
-
766
-        r = s.array() - omega*t.array();
767
-        errorn = r.norm() / bnrm2;
768
-        iter_done = iter;
769
-
770
-        if (errorn <= tol ) return (0);
771
-        if (abs(omega) < eps) return (0);
772
-        rho_1 = rho;
773
-
774
-        logio << "iteration " << std::setw(4) << iter
775
-              << "\terrorn " << std::setw(6) << std::setprecision(4) << std::scientific << errorn
776
-              << "\timplicit iterations " << std::setw(5) << max_it1+max_it2
777
-              << "\ttime " << std::setw(10) << std::fixed << std::setprecision(2) << timer.end() << std::endl;
778
-
779
-        // Check to see how progress is going
780
-        if (errornold - errorn < 0) {
781
-            logio << "irregular (negative) convergence. Try again? \n";
782
-            return (2);
783
-        }
784
-
785
-        // only update phi if good things are happening
786
-        phi.array() += alpha*p.array() + omega*s.array();
787
-        errornold = errorn;
788
-
789
-    }
790
-    return (0);
791
-}
792
-
793
-
794
-// On Input
795
-// A = Matrix
796
-// B = Right hand side
797
-// X = initial guess, and solution
798
-// maxit = maximum Number of iterations
799
-// tol = error tolerance
800
-// On Output
801
-// X real solution vector
802
-// errorn = Real error norm
803
-int implicitbicgstabnt(const SparseMat& D,
804
-                     const SparseMat& C,
805
-                     const VectorXcr& ioms,
806
-                     const SparseMat& MC,
807
-                     Eigen::Ref< VectorXcr > rhs,
808
-                     VectorXcr& phi,
809
-                     int &max_it, Real &tol, Real &errorn, int &iter_done) {
810
-
811
-    Complex omega, rho, rho_1, alpha, beta;
812
-    Real errmin, tol2;
813
-    int  iter, max_it2;
814
-
815
-//     // Cholesky decomp
816
-//     SparseLLT<SparseMatrix<Complex>, Cholmod>
817
-//         CholC(SparseMatrix<Complex> (C.real()) );
818
-//     if(!CholC.succeeded()) {
819
-//         std::cerr << "decomposiiton failed\n";
820
-//         return EXIT_FAILURE;
821
-//     }
822
-
823
-    // Determine size of system and init vectors
824
-    int n = phi.size();
825
-    VectorXcr r(n);
826
-    VectorXcr r_tld(n);
827
-    VectorXcr p(n);
828
-    VectorXcr v(n);
829
-    //VectorXcr p_hat(n);
830
-    VectorXcr s(n);
831
-    //VectorXcr s_hat(n);
832
-    VectorXcr t(n);
833
-    VectorXcr xmin(n);
834
-
835
-//     TODO, refigure for implicit large system
836
-//     std::cout << "Start BiCGStab, memory needed: "
837
-//               <<  (sizeof(Complex)*(9+2)*n/(1024.*1024*1024)) << " [Gb]\n";
838
-
839
-    // Initialise
840
-    iter_done = 0;
841
-    v.setConstant(0.); // not necessary I don't think
842
-    t.setConstant(0.);
843
-    Real eps = 1e-100;
844
-
845
-    Real bnrm2 = rhs.norm();
846
-    if (bnrm2 == 0) {
847
-        phi.setConstant(0.0);
848
-        errorn = 0;
849
-        std::cerr << "Trivial case of Ax = b, where b is 0\n";
850
-        return (0);
851
-    }
852
-
853
-    // If there is an initial guess
854
-    if ( phi.norm() ) {
855
-        //r = rhs - A*phi;
856
-        tol2 = tol;
857
-        max_it2 = 50000;
858
-        std::cout << "Initial guess " << std::endl;
859
-        r = rhs - implicitDCInvBPhi(D, C, ioms, MC, phi, tol2, max_it2);
860
-        //r = rhs - implicitDCInvBPhi (D, C, B, CholC, phi, tol2, max_it2);
861
-    } else {
862
-        r = rhs;
863
-    }
864
-
865
-
866
-    errorn = r.norm() / bnrm2;
867
-    //std::cout << "Initial |r|  " << r.norm() << "\t" << errorn<< std::endl;
868
-    omega = 1.;
869
-    r_tld = r;
870
-    errmin = 1e30;
871
-    Real errornold = 1e6;
872
-    // Get down to business
873
-    for (iter=0; iter<max_it; ++iter) {
874
-
875
-        rho = r_tld.dot(r);
876
-        if (abs(rho) < eps) return (0);
877
-
878
-        if (iter > 0) {
879
-            beta = (rho/rho_1) * (alpha/omega);
880
-            p = r.array() + beta*(p.array()-omega*v.array()).array();
881
-        } else {
882
-            p = r;
883
-        }
884
-
885
-        // Use pseudo inverse to get approximate answer
886
-        //p_hat = p;
887
-        tol2  = std::max(1e-4*errorn, tol);
888
-        tol2 = tol;
889
-        max_it2 = 500000;
890
-        //v = A*p_hat;
891
-        v = implicitDCInvBPhi(D, C, ioms, MC, p, tol2, max_it2);
892
-        //v = implicitDCInvBPhi(D, C, B, CholC, p, tol2, max_it2);
893
-
894
-        alpha = rho / r_tld.dot(v);
895
-        s = r.array() - alpha*v.array();
896
-        errorn = s.norm()/bnrm2;
897
-
898
-        if (errorn < tol && iter > 1) {
899
-            phi.array() += alpha*p.array();
900
-            return (0);
901
-        }
902
-
903
-        // s_hat = M*s;
904
-        //tol2 = tol;
905
-        tol2  = std::max(1e-4*errorn, tol);
906
-        tol2 = tol;
907
-        max_it2 = 50000;
908
-        // t = A*s_hat;
909
-        t = implicitDCInvBPhi(D, C, ioms, MC, s, tol2, max_it2);
910
-        //t = implicitDCInvBPhi(D, C, B, CholC, s, tol2, max_it2);
911
-        omega = t.dot(s)  / t.dot(t);
912
-        phi.array() += alpha*p.array() + omega*s.array();
913
-        r = s.array() - omega*t.array();
914
-        errorn = r.norm() / bnrm2;
915
-        iter_done = iter;
916
-        if (errorn < errmin) {
917
-            // remember the model with the smallest norm
918
-            errmin = errorn;
919
-            xmin = phi;
920
-        }
921
-
922
-        if (errorn <= tol ) return (0);
923
-        if (abs(omega) < eps) return (0);
924
-        rho_1 = rho;
925
-
926
-        std::cout << "iteration " << std::setw(4) << iter << "\terrorn "  << std::setw(6) << std::scientific << errorn
927
-                  << "\timplicit iterations " << std::setw(5) << max_it2 << std::endl;
928
-        if (errornold - errorn < 1e-14) {
929
-            std::cout << "not making any progress. Giving up\n";
930
-            return (2);
931
-        }
932
-        errornold = errorn;
933
-
934
-    }
935
-    return (0);
936
-}
937
-
938 273
 #endif   // ----- #ifndef BICGSTAB_INC  -----
939 274
 
940
-//int bicgstab(const SparseMat &A, Eigen::SparseLU< Eigen::SparseMatrix<Complex, RowMajor> ,
941
-
942
-

+ 7
- 3
src/EMSchur3DBase.cpp Zobrazit soubor

@@ -26,8 +26,8 @@
26 26
 typedef Eigen::Triplet<Lemma::Complex> Tc;
27 27
 typedef Eigen::Triplet<Lemma::Real> Tr;
28 28
 
29
-#define UPPER 0
30
-#define LOWER 1 // 1=true, 0=false
29
+#define UPPER 0  // LOWER WAS 0
30
+#define LOWER 1  // 1=true, 0=false
31 31
 
32 32
 namespace Lemma {
33 33
 
@@ -107,8 +107,12 @@ namespace Lemma {
107 107
     void EMSchur3DBase::BuildC ( Real*** sigmax, Real*** sigmay, Real*** sigmaz, const int& iw) {
108 108
 
109 109
         Cvec[iw].resize( unx+uny+unz , unx+uny+unz );
110
+
111
+#if LOWER && UPPER
112
+        Cvec[iw].reserve(Eigen::VectorXi::Constant(unx+uny+unz, 7));   // Whole
113
+#else
110 114
         Cvec[iw].reserve(Eigen::VectorXi::Constant(unx+uny+unz, 4)); // Upper/Lower
111
-        //Cvec[iw].reserve(Eigen::VectorXi::Constant(unx+uny+unz, 7));   // Whole
115
+#endif
112 116
 
113 117
         //Cvec_s.resize( idx. )
114 118
         //CMMvec[iw].resize( unx+uny+unz , unx+uny+unz );

Načítá se…
Zrušit
Uložit