Albany: a Trilinos-based PDE code: QCAD_GreensFunctionTunneling.cpp Source File

Go to the documentation of this file.
00001 /********************************************************************\
00002 *            Albany, Copyright (2010) Sandia Corporation             *
00003 *                                                                    *
00004 * Notice: This computer software was prepared by Sandia Corporation, *
00005 * hereinafter the Contractor, under Contract DE-AC04-94AL85000 with  *
00006 * the Department of Energy (DOE). All rights in the computer software*
00007 * are reserved by DOE on behalf of the United States Government and  *
00008 * the Contractor as provided in the Contract. You are authorized to  *
00009 * use this computer software for Governmental purposes but it is not *
00010 * to be released or distributed to the public. NEITHER THE GOVERNMENT*
00011 * NOR THE CONTRACTOR MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR      *
00012 * ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE. This notice    *
00013 * including this sentence must appear on any copies of this software.*
00014 *    Questions to Andy Salinger, agsalin@sandia.gov                  *
00015 \********************************************************************/
00016 
00017 
00018 
00019 
00020 #include "AnasaziConfigDefs.hpp"
00021 #include "AnasaziBasicEigenproblem.hpp"
00022 #include "AnasaziBlockDavidsonSolMgr.hpp"
00023 #include "Epetra_CrsMatrix.h"
00024 
00025 #include "Teuchos_TestForException.hpp"
00026 #include "Teuchos_CommHelpers.hpp"
00027 
00028 #include "QCAD_GreensFunctionTunneling.hpp"
00029 #include <fstream>
00030 
00031 // Assume EcValues are in units of eV
00032 // Assume effMass is in units of m_0 (electron rest mass)
00033 // Assume ptSpacing is in units of microns (um)
00034 // Assume pathLen is in unit of microns (um)
00035 
00036 QCAD::GreensFunctionTunnelingSolver::
00037 GreensFunctionTunnelingSolver(const Teuchos::RCP<std::vector<double> >& EcValues_,
00038             const Teuchos::RCP<std::vector<double> >& pathLen_, int nGFPts_,
00039             double ptSpacing_, double effMass_, const Teuchos::RCP<const Epetra_Comm>& Comm_,
00040             const std::string& outputFilename, bool bNeumannBC_)
00041 {  
00042   Comm = Comm_;
00043   ptSpacing = ptSpacing_;
00044   nGFPts = nGFPts_; 
00045   effMass = effMass_;
00046   bNeumannBC = bNeumannBC_;
00047 
00048   // Retrieve Ec and pathLen for spline interpolation
00049   std::vector<double>  oldEcValues = (*EcValues_);
00050   std::vector<double>  oldPathLen = (*pathLen_);
00051   
00052   // Get size of oldEcValues
00053   int nOldPts = oldEcValues.size(); 
00054   
00055   // Call preSplineInterp() to obtain y2 
00056   std::vector<double> y2(nOldPts,0.0);
00057   prepSplineInterp(oldPathLen, oldEcValues, y2, nOldPts);  
00058   
00059   // Initialize the pointer, otherwise, seg. fault
00060   EcValues = Teuchos::rcp( new std::vector<double>(nGFPts) );
00061   
00062   // Keep the beginning and ending values the same
00063   (*EcValues)[0] = oldEcValues[0];
00064   (*EcValues)[nGFPts-1] = oldEcValues[nOldPts-1];
00065   
00066   // Splinely interpolate Ec values to the equally-spaced spatial grid defined by ptSpacing and nGFPts
00067   for (int i = 1; i < (nGFPts-1); i++)
00068   {
00069     double pathLength = double(i) * ptSpacing;
00070     (*EcValues)[i] = execSplineInterp(oldPathLen, oldEcValues, y2, nOldPts, pathLength);
00071   }
00072 
00073   // Output the spline interpolated Ec values
00074   if( outputFilename.length() > 0) 
00075   {
00076     std::cout << std::endl << "Append the spline interpolated Ec values to Output Filename..." << std::endl; 
00077     std::fstream out; 
00078     out.open(outputFilename.c_str(), std::fstream::out | std::fstream::app);
00079     out << std::endl << std::endl << "% Splinely interpolated Ec values on the equally-spaced spatial grid" << std::endl; 
00080     out << "% index  pathLength  Splined-Ec" << std::endl;
00081     for (int i = 0; i < nGFPts; i++)
00082     {
00083       double pathLength = double(i) * ptSpacing;
00084       out << i << " " << pathLength << " " << (*EcValues)[i] << std::endl; 
00085     }
00086     out.close();
00087   }
00088   
00089 /* temporarily keep it
00090 
00091   matlabEvals.resize(nGFPts); 
00092   std::cout << "read Matlab-generated eigenvalues ..." << std::endl; 
00093   std::string outputFilename = "matlab_evals.dat";
00094   if( outputFilename.length() > 0) 
00095   {
00096     std::fstream out; 
00097     out.open(outputFilename.c_str(), std::fstream::in);
00098 
00099     for (int i = 0; i < (nGFPts-1); i++)
00100     { 
00101       out >> matlabEvals[i];
00102       // std::cout << "i=" << i << ", matlabEvals[i]=" << matlabEvals[i] << std::endl; 
00103     }
00104     out.close();
00105   }   
00106   std::cout << "finish reading from file ... " << std::endl;  */
00107 
00108 }
00109 
00110 
00111 QCAD::GreensFunctionTunnelingSolver::
00112 ~GreensFunctionTunnelingSolver()
00113 {
00114 }
00115 
00116 
00117 double QCAD::GreensFunctionTunnelingSolver::
00118 computeCurrent(double Vds, double kbT, double Ecutoff_offset_from_Emax, bool bUseAnasazi)
00119 {
00120   const double hbar_1 = 6.582119e-16; // eV * s
00121   const double hbar_2 = 1.054572e-34; // J * s = kg * m^2 / s
00122   const double m_0 = 9.109382e-31;    // kg
00123   const double um = 1e-6;     // m
00124   const double min_a0 = 1e-4; // um, so == .1nm  HARDCODED MIN PT SPACING
00125 
00126   std::vector<double> evals;
00127   std::vector<double> evecBeginEls;
00128   std::vector<double> evecEndEls;
00129 
00130   // chemical potential energies of the left and right leads
00131   double muL = 0.; 
00132   double muR = -Vds;   // Vds can be >= 0 or < 0
00133 
00134   // Setup Energy bins for integration:  muL = 0, muR = -Vds, so set
00135   // Emin = min(Ec[0], Ec[nPts-1]-Vds)
00136   // Emax = max(muL, muR) + 20*kbT 
00137   // dE ~ kbT / 10
00138   int nPts    = EcValues->size();
00139   double Emax = std::max(muL, muR) + 20.*kbT;  // fermi dist. ~ exp(-40)=2.06e-9 small enough
00140   
00141   // Emin = min(Ec[0], Ec[nPts-1]-Vds), should work for arbitrary 1D Ec profile and Vds >= 0 or <0
00142   double Emin = std::min((*EcValues)[0], (*EcValues)[nPts-1]+muR); 
00143   if (Emin > Emax)
00144   {
00145     double tmp = Emin; 
00146     Emin = Emax; 
00147     Emax = tmp; 
00148   }
00149 
00150   // set up uniform energy spacing
00151   double dE = kbT / 10;
00152   int nEPts = int((Emax - Emin) / dE) + 1;
00153   dE = (Emax - Emin)/(nEPts-1);  // recalculate dE for given nEPts
00154   double a0 = ptSpacing;
00155   bool ret;
00156 
00157   double Ecutoff = Emax + Ecutoff_offset_from_Emax; // maximum eigenvalue needed
00158   int nEvecs;  // number of converged eigenvectors
00159 
00160   Teuchos::RCP<std::vector<double> > pEc = Teuchos::null;
00161   Teuchos::RCP<std::vector<double> > pLastEc = Teuchos::null;
00162 
00163   Map = Teuchos::rcp(new Epetra_Map(nPts, 0, *Comm));
00164   t0  = hbar_1 * hbar_2 /(2*effMass*m_0* pow(a0*um,2) ); // gives t0 in units of eV 
00165   std::cout << "Emin=" << Emin << ", Emax=" << Emax << ", Ecutoff=" << Ecutoff <<", t0=" << t0 << std::endl;    
00166 
00167   std::cout << "Doing Initial H-mx diagonalization for Vds = " << Vds << std::endl;
00168 
00169   if(bUseAnasazi) {
00170     Teuchos::RCP<Epetra_MultiVector> evecs;
00171 
00172     ret = doMatrixDiag_Anasazi(Vds, *EcValues, Ecutoff, evals, evecs);
00173     pEc = EcValues;
00174     std::cout << "  Diag w/ a0 = " << a0 << ", nPts = " << nPts << " give "
00175           << evals.size() << " eigenvalues, with Max Eval = " << evals[evals.size()-1] << std::endl; 
00176           // << "(need >= "<< Ecutoff << ")" << std::endl;
00177     
00178     // The following block is not called when a0 is small enough (<= 0.5 nm)
00179     while(ret == false && a0 > min_a0) // whether to perform auto-refinement -- add " && false" to disable
00180     {
00181       a0 /= 2.; nPts *= 2;  
00182       t0 = hbar_1 * hbar_2 /(2.*effMass*m_0* pow(a0*um,2.) );
00183       Map = Teuchos::rcp(new Epetra_Map(nPts, 0, *Comm));
00184     
00185       // Interpolate pLastEc onto pEc
00186       pLastEc = pEc;
00187       pEc = Teuchos::rcp(new std::vector<double>(nPts));
00188       for(int i = 0; i < nPts; i++) 
00189       {
00190         if(i%2 && i/2+1<nPts/2) (*pEc)[i] = ((*pLastEc)[i/2] + (*pLastEc)[i/2+1])/2.0;
00191         else (*pEc)[i] = (*pLastEc)[i/2];
00192       }
00193     
00194       // Setup and diagonalize H matrix
00195       ret = doMatrixDiag_Anasazi(Vds, *pEc, Ecutoff, evals, evecs);
00196       std::cout << "  Diag w/ a0 = " << a0 << ", nPts = " << nPts << ", t0 = " << t0 << " gives "
00197             << "Max Eval = " << evals[evals.size()-1]
00198             << "(need >= "<< Ecutoff << ")" << std::endl;
00199     }
00200     std::cout << "Done H-mx diagonalization, now broadcasting results" << std::endl;
00201     
00202     
00203     // Since all we need are the eigenvalues at beginning and end (index [0] and [nPts-1] ?)
00204     // then broadcast these values to all processors
00205     nEvecs = evecs->NumVectors();
00206 
00207     int GIDlist[2];
00208     GIDlist[0] = 0; // "beginning" index
00209     GIDlist[1] = nPts-1; // "ending" index
00210     
00211     //Get the IDs (rank) of the processors holding beginning and ending index
00212     std::vector<int> PIDlist(2), LIDlist(2);
00213     Map->RemoteIDList(2, GIDlist, &PIDlist[0], &LIDlist[0]);
00214     
00215     // Broadcast the beginning and ending elements of each eigenvector to all processors
00216     evecBeginEls.resize(nEvecs);
00217     evecEndEls.resize(nEvecs);
00218     
00219     if(Comm->MyPID() == PIDlist[0]) { // this proc owns beginning point
00220       for(int i=0; i<nEvecs; i++)
00221         evecBeginEls[i] = (*evecs)[i][LIDlist[0]]; // check that this is correct: Epetra_Vector [] operator takes *local* index?
00222     }
00223     Comm->Broadcast( &evecBeginEls[0], nEvecs, PIDlist[0] );
00224     
00225     if(Comm->MyPID() == PIDlist[1]) { // this proc owns ending point
00226       for(int i=0; i<nEvecs; i++)
00227         evecEndEls[i] = (*evecs)[i][LIDlist[1]]; // check that this is correct: Epetra_Vector [] operator takes *local* index?
00228     }
00229     Comm->Broadcast( &evecEndEls[0], nEvecs, PIDlist[1] );
00230   }
00231 
00232   else {  // use TQL2 tridiagonal routine.  Not parallel, all procs compute and store all evectors & evals
00233 
00234     std::vector<double> evecs;
00235     ret = doMatrixDiag_tql2(Vds, *EcValues, Ecutoff, evals, evecs);
00236     pEc = EcValues;
00237     std::cout << "  Diag w/ a0 = " << a0 << ", nPts = " << nPts << " give "
00238           << evals.size() << " eigenvalues, with Max Eval = " << evals[evals.size()-1] << std::endl; 
00239           // << "(need >= "<< Ecutoff << ")" << std::endl;
00240     
00241     // The following block is not called when a0 is small enough (<= 0.5 nm)
00242     while(ret == false && a0 > min_a0) // whether to perform auto-refinement -- add " && false" to disable
00243     {
00244       a0 /= 2.; nPts *= 2;  
00245       t0 = hbar_1 * hbar_2 /(2.*effMass*m_0* pow(a0*um,2.) );
00246     
00247       // Interpolate pLastEc onto pEc
00248       pLastEc = pEc;
00249       pEc = Teuchos::rcp(new std::vector<double>(nPts));
00250       for(int i = 0; i < nPts; i++) 
00251       {
00252         if(i%2 && i/2+1<nPts/2) (*pEc)[i] = ((*pLastEc)[i/2] + (*pLastEc)[i/2+1])/2.0;
00253         else (*pEc)[i] = (*pLastEc)[i/2];
00254       }
00255     
00256       // Setup and diagonalize H matrix
00257       ret = doMatrixDiag_tql2(Vds, *pEc, Ecutoff, evals, evecs);
00258       std::cout << "  Diag w/ a0 = " << a0 << ", nPts = " << nPts << ", t0 = " << t0 << " gives "
00259             << "Max Eval = " << evals[evals.size()-1] 
00260             << "(need >= "<< Ecutoff << ")" << std::endl;
00261     }
00262     std::cout << "Done H-mx diagonalization" << std::endl;
00263 
00264     // nEvecs = nPts;
00265     nEvecs = evals.size();  // include the case where only part of EVs are found, not equal to nPts
00266       
00267     evecBeginEls.resize(nEvecs);
00268     evecEndEls.resize(nEvecs);
00269     for(int i = 0; i < nEvecs; i++) { // assume evecs are in *columns* of evecs
00270       evecBeginEls[i] = evecs[i];
00271       evecEndEls[i] = evecs[nPts*(nPts-1) + i];
00272     }
00273   }
00274 
00275   // Potential energies, assumed to be constant, in each lead
00276   double VL = (*pEc)[0];
00277   double VR = (*pEc)[nPts-1] - Vds;
00278 
00279   std::cout << nEPts << " Energy Pts: " << Emin << " to " << Emax << " eV in steps of " << dE << std::endl;
00280 
00281   // Spread energy points evenly across processors
00282   Epetra_Map EnergyMap(nEPts, 0, *Comm);
00283   
00284   // Number of energy points on current processor. 
00285   int NumMyEnergyPts = EnergyMap.NumMyElements();
00286 
00287   // Put list of global elements on this processor into the user-provided array. 
00288   std::vector<int> MyGlobalEnergyPts(NumMyEnergyPts);
00289   EnergyMap.MyGlobalElements(&MyGlobalEnergyPts[0]);
00290 
00291   double I, Iloc = 0., x, y;
00292   double E, Gamma11, GammaNN, G011, G01N, G0NN, Tm;
00293   std::complex<double> Sigma11, SigmaNN, GR1N, p11, pNN;
00294   
00295   // add up contributions to energy integral from current processor
00296   // eigenvectors are assumed to be real
00297   for (int i = 0; i < NumMyEnergyPts; i++) {
00298     E = Emin + MyGlobalEnergyPts[i]*dE;
00299 
00300     x = (E-VL)*(t0-(E-VL)/4.);
00301     y = bNeumannBC ? 0. : t0;
00302     if( x >= 0. )
00303       Sigma11 = std::complex<double>( (E-VL)/2. - y, -sqrt(x) );
00304     else
00305       Sigma11 = std::complex<double>( (E-VL)/2. - y + sqrt(-x), 0.);
00306 
00307     x = (E-VR)*(t0-(E-VR)/4.);
00308     if( x >= 0. )
00309       SigmaNN = std::complex<double>( (E-VR)/2. - y, -sqrt(x) );
00310     else
00311       SigmaNN = std::complex<double>( (E-VR)/2. - y + sqrt(-x ), 0.);
00312 
00313     Gamma11 = -2.*Sigma11.imag();
00314     GammaNN = -2.*SigmaNN.imag();
00315 
00316     G011 = G01N = G0NN = 0.;
00317     for(int j = 0; j < nEvecs; j++) {
00318       G011 += evecBeginEls[j] * evecBeginEls[j] / (E - evals[j]);
00319       G01N += evecBeginEls[j] * evecEndEls[j] / (E - evals[j]);
00320       G0NN += evecEndEls[j] * evecEndEls[j] / (E - evals[j]);
00321     }
00322 
00323     p11 = Sigma11*G011; pNN = SigmaNN*G0NN; 
00324     GR1N = G01N / ((p11-1.0)*(pNN-1.0) - Sigma11*SigmaNN*G01N*G01N);
00325     Tm = Gamma11 * GammaNN * std::norm(GR1N);
00326     //std::cout << "DEBUG: brkdown = " << Gamma11 << " , " << GammaNN << " , " << 
00327     // std::norm(GR1N) << " , " << E << " , " << VL << " , " << VR << " , " << t0 << " , " << x << std::endl;
00328 
00329     Iloc += Tm * ( f0((E - muL)/kbT) - f0((E - muR)/kbT) ) * dE;
00330   }
00331 
00332   std::cout << "Energy Integral contrib from proc " << Comm->MyPID() << " = " << Iloc << " (" << NumMyEnergyPts << " energy pts)"<< std::endl;
00333 
00334   // add contributions from all processors
00335   Comm->SumAll(&Iloc, &I, 1);
00336 
00337   std::cout << "Total Energy Integral = " << I << " eV" << std::endl;
00338   
00339   const double h = 4.135668e-15;  // eV * s
00340   double q = 1.602177e-19;        // C
00341   
00342   // single-mode 1D ballistic current in [A]
00343   I = 2*q / h * I;  // since I was in units of eV, now I is in units of Amps (C/s)
00344 
00345   return I;
00346 }
00347 
00348 
00349 double QCAD::GreensFunctionTunnelingSolver::f0(double x) const
00350 {
00351   return 1.0 / (1. + exp(x));
00352 }
00353 
00354 
00355 bool QCAD::GreensFunctionTunnelingSolver::
00356 doMatrixDiag_tql2(double Vds, std::vector<double>& Ec, double Ecutoff,
00357       std::vector<double>& evals, 
00358       std::vector<double>& evecs)
00359 {
00360   bool bPrintResults = false;
00361   int ierr;
00362   int nPts = Ec.size();
00363   std::vector<double> offDiag(nPts);  //off diagonal of symmetric tri-diagonal matrix (last nPts-1 els)
00364 
00365   /* We are building a matrix of block structure (left = DBC, right = NBC):
00366   
00367       | Ec+2t0  -t0                  |          | Ec+t0   -t0                  |
00368       | -t0    Ec+2t0  -t0           |          | -t0    Ec+2t0  -t0           |
00369       |         -t0   ...            |   OR     |         -t0   ...            |
00370       |                    ..    -t0 |          |                    ..    -t0 |
00371       |                   -t0  Ec+2t0|          |                   -t0  Ec+t0 |
00372 
00373    where the matrix has nPts rows and nPts columns
00374   */
00375 
00376 
00377   // Initialize diagonal of matrix in evals (since these will be the eigenvalues upon exit from tql2)
00378   //  and the off diagonal elements in the offDiag (tql2 only looks at the last nPts-1 els)
00379   evals.resize(nPts);
00380   for (int i = 0; i < nPts; i++) {
00381     double Ulin = -Vds * ((double)i) / (nPts-1);
00382     evals[i] = 2*t0 + Ec[i] + Ulin;
00383     offDiag[i] = -t0;
00384   }  
00385   if(bNeumannBC) {
00386     evals[0] -= t0;
00387     evals[nPts-1] -= t0;
00388   }
00389   
00390   // initialize evecs to n x n identity mx
00391   evecs.resize(nPts*nPts);
00392   for(int i=0; i<nPts; i++) {
00393     for(int j=0; j<nPts; j++) {
00394       evecs[nPts*i + j] = (i==j) ? 1.0 : 0.0;  
00395     } 
00396   }
00397 
00398   /*DEBUG
00399   std::cout << "Doing diag with: " << std::endl;
00400   std::cout << "Diag = "; printVector(evals, 1, evals.size());
00401   std::cout << "OffDiag = "; printVector(offDiag, 1, offDiag.size());
00402   std::cout << "Z = " << std::endl;  printVector(evecs,nPts,nPts);
00403   */
00404   
00405   // Diagonalize the matrix
00406   int nEvecs, max_iter = 1000;
00407   tql2(nPts, max_iter, evals, offDiag, evecs, ierr);
00408 
00409   /*DEBUG
00410   std::cout << "Results: (ret = " << ierr << ")" << std::endl;
00411   std::cout << "Evals = "; printVector(evals, 1, evals.size());
00412   std::cout << "Evecs = " << std::endl;  printVector(evecs,nPts,nPts);
00413   */
00414 
00415 
00416   // Truncate evals to the number of converged eigenvalues
00417   //  (note that they aren't necessarily ordered when ierr != 0)
00418   if(ierr != 0) 
00419   {
00420     nEvecs = ierr-1;    // because the ierr-th eigenvalue is not determined
00421     evals.resize(nEvecs);
00422     std::cout << "Only " << nEvecs << "eigenvalues out of total " << nPts << " are found !" << std::endl;
00423     
00424     // sort evals and evecs (note: evals.size() < nPts)
00425     sortTql2PartialResults(nPts, evals, evecs);
00426   }
00427   else nEvecs = nPts;  // find all evals and evecs
00428 
00429   // Print the results
00430   if(bPrintResults) {
00431     std::ostringstream os;
00432     os.setf(std::ios_base::right, std::ios_base::adjustfield);
00433     os<<"TQL2 solver returned " << ierr << std::endl;
00434     os<<std::endl;
00435     os<<"------------------------------------------------------"<<std::endl;
00436     os<<std::setw(16)<<"Eigenvalue"<<std::endl;
00437     os<<"------------------------------------------------------"<<std::endl;
00438     for (int i=0; i<nEvecs; i++) {
00439       os<<std::setw(16)<<evals[i]<<std::endl;
00440     }
00441     os<<"------------------------------------------------------"<<std::endl;
00442     std::cout << os.str() << std::endl; 
00443 /*
00444     std::ostringstream os;
00445     os.setf(std::ios_base::right, std::ios_base::adjustfield);
00446     os << "Evals difference between Matlab and tql2 " << std::endl;
00447     os << std::endl;
00448     os << "------------------------------------------------------" << std::endl;
00449     os << "Index" << std::setw(16) << "  Eigenvalue difference" << std::endl;
00450     os << "------------------------------------------------------" << std::endl;
00451     
00452     for (int i = 0; i < nEvecs; i++)
00453     {
00454       if (std::abs(matlabEvals[i]-evals[i]) > 1e-3)
00455         os << i << std::setw(16) << matlabEvals[i]-evals[i] << std::endl;
00456     }
00457     os << "------------------------------------------------------" << std::endl;
00458     std::cout << os.str() << std::endl;  */
00459 
00460   }
00461 
00462   // double maxEigenvalue = evals[nPts-1];
00463   double maxEigenvalue = evals[nEvecs-1];  // include the (ierr!=0) case
00464   
00465   return (maxEigenvalue > Ecutoff);
00466 }
00467 
00468 
00469 bool QCAD::GreensFunctionTunnelingSolver::
00470 doMatrixDiag_Anasazi(double Vds, std::vector<double>& Ec, double Ecutoff,
00471        std::vector<double>& evals, 
00472        Teuchos::RCP<Epetra_MultiVector>& evecs)
00473 {
00474   typedef Epetra_MultiVector MV;
00475   typedef Epetra_Operator OP;
00476   typedef Anasazi::MultiVecTraits<double, Epetra_MultiVector> MVT;
00477 
00478   bool bPrintResults = false;
00479   std::vector<Anasazi::Value<double> > anasazi_evals;
00480 
00481   // Get number of local pts for this proc from newly created Map.
00482   int nPts = Ec.size();
00483   int nLocalEls = Map->NumMyElements();
00484 
00485   std::vector<int> myGlobalElements(nLocalEls);
00486   Map->MyGlobalElements(&myGlobalElements[0]);
00487 
00488   // Create an integer vector NumNz that is used to build the Petra Matrix.
00489   // NumNz[i] is the Number of OFF-DIAGONAL term for the ith H-Mx row
00490   // on this processor
00491   std::vector<int> NumNz(nLocalEls);
00492 
00493   /* We are building a matrix of block structure (left = DBC, right = NBC):
00494   
00495       | Ec+2t0  -t0                  |          | Ec+t0   -t0                  |
00496       | -t0    Ec+2t0  -t0           |          | -t0    Ec+2t0  -t0           |
00497       |         -t0   ...            |   OR     |         -t0   ...            |
00498       |                    ..    -t0 |          |                    ..    -t0 |
00499       |                   -t0  Ec+2t0|          |                   -t0  Ec+t0 |
00500 
00501    where the matrix has nPts rows and nPts columns
00502   */
00503   for (int i=0; i<nLocalEls; i++) {
00504     if (myGlobalElements[i] == 0 || myGlobalElements[i] == nPts-1)
00505       NumNz[i] = 2;
00506     else
00507       NumNz[i] = 3;
00508   }
00509 
00510   // Create an Epetra_Matrix
00511   Teuchos::RCP<Epetra_CrsMatrix> A = Teuchos::rcp( new Epetra_CrsMatrix(Copy, *Map, &NumNz[0]) );
00512 
00513   // Fill Hamiltonian Matrix
00514   std::vector<double> Values(3), endValues(3);
00515   std::vector<int> Indices(3);
00516   int NumEntries;
00517 
00518   Values[0] = -t0; Values[1] = 2.*t0; Values[2] = -t0;
00519   if(bNeumannBC) {
00520     endValues[0] = -t0; endValues[1] =   t0; endValues[2] = -t0;  }
00521   else {
00522     endValues[0] = -t0; endValues[1] = 2.*t0; endValues[2] = -t0;  }
00523 
00524   
00525   for (int i = 0; i < nLocalEls; i++) {
00526     double Ulin = -Vds * ((double)myGlobalElements[i]) / (nPts-1);
00527     Values[1] = 2.*t0 + Ec[myGlobalElements[i]] + Ulin;
00528     
00529     if (bNeumannBC)  // NBC
00530       endValues[1] = t0 + Ec[myGlobalElements[i]] + Ulin;
00531     else             // DBC
00532       endValues[1] = 2.*t0 + Ec[myGlobalElements[i]] + Ulin;
00533 
00534     if (myGlobalElements[i]==0) {
00535       Indices[0] = 0;
00536       Indices[1] = 1;
00537       NumEntries = 2;
00538       int info = A->InsertGlobalValues(myGlobalElements[i], NumEntries, &endValues[1], &Indices[0]);
00539       TEUCHOS_TEST_FOR_EXCEPTION( info != 0, std::runtime_error, "Failure in InsertGlobalValues()" );
00540     }
00541     else if (myGlobalElements[i] == nPts-1) {
00542       Indices[0] = nPts-2;
00543       Indices[1] = nPts-1;
00544       NumEntries = 2;
00545       int info = A->InsertGlobalValues(myGlobalElements[i], NumEntries, &endValues[0], &Indices[0]);
00546       TEUCHOS_TEST_FOR_EXCEPTION( info != 0, std::runtime_error, "Failure in InsertGlobalValues()" );
00547     }
00548     else {
00549       Indices[0] = myGlobalElements[i]-1;
00550       Indices[1] = myGlobalElements[i];
00551       Indices[2] = myGlobalElements[i]+1;
00552       NumEntries = 3;
00553       int info = A->InsertGlobalValues(myGlobalElements[i], NumEntries, &Values[0], &Indices[0]);
00554       TEUCHOS_TEST_FOR_EXCEPTION( info != 0, std::runtime_error, "Failure in InsertGlobalValues()" );
00555     }
00556   }
00557 
00558   // Finish up
00559   int info = A->FillComplete();
00560   assert( info == 0 );
00561   A->SetTracebackMode(1); // Shutdown Epetra Warning tracebacks
00562 
00563   
00564   //************************************
00565   // Call the Block Davidson solver manager
00566   //***********************************
00567   //
00568   //  Variables used for the Block Davidson Method
00569   //
00570   std::string  which("SR");
00571   const int    nev         = nPts/2; //get half the eigenvalues (most we can with Block Davidson?)
00572   const int    blockSize   = nPts/2;
00573   const int    numBlocks   = 2;
00574   const int    maxRestarts = 100;
00575   const double tol         = 1.0e-8;
00576 
00577   // Create an Epetra_MultiVector for an initial vector to start the solver.
00578   // Note:  This needs to have the same number of columns as the blocksize.
00579   Teuchos::RCP<Epetra_MultiVector> ivec = Teuchos::rcp( new Epetra_MultiVector(*Map, blockSize) );
00580   ivec->Random();
00581 
00582   // Create the eigenproblem.
00583   Teuchos::RCP<Anasazi::BasicEigenproblem<double, MV, OP> > eigenProblem =
00584     Teuchos::rcp( new Anasazi::BasicEigenproblem<double, MV, OP>(A, ivec) );
00585 
00586   // Inform the eigenproblem that the operator A is symmetric
00587   eigenProblem->setHermitian(true);
00588 
00589   // Set the number of eigenvalues requested
00590   eigenProblem->setNEV( nev );
00591 
00592   // Inform the eigenproblem that you are finishing passing it information
00593   bool boolret = eigenProblem->setProblem();
00594   TEUCHOS_TEST_FOR_EXCEPTION( boolret != true, std::runtime_error, 
00595             "Anasazi::BasicEigenproblem::setProblem() returned an error.\n");
00596   
00597   // Create parameter list to pass into the solver manager
00598   Teuchos::ParameterList smPL;
00599   smPL.set( "Which", which );
00600   smPL.set( "Block Size", blockSize );
00601   smPL.set( "Num Blocks", numBlocks );
00602   smPL.set( "Maximum Restarts", maxRestarts );
00603   smPL.set( "Convergence Tolerance", tol );
00604 
00605   // Create the solver manager
00606   Anasazi::BlockDavidsonSolMgr<double, MV, OP> solverMan(eigenProblem, smPL);
00607 
00608   // Solve the problem
00609   Anasazi::ReturnType returnCode = solverMan.solve();
00610 
00611   // Get the eigenvalues and eigenvectors from the eigenproblem
00612   Anasazi::Eigensolution<double,MV> sol = eigenProblem->getSolution();
00613   anasazi_evals = sol.Evals;
00614   evecs = sol.Evecs;
00615   evals.resize(sol.numVecs);
00616 
00617   // Compute residuals.
00618   std::vector<double> normR(sol.numVecs);
00619   if (sol.numVecs > 0) {
00620     Teuchos::SerialDenseMatrix<int,double> T(sol.numVecs, sol.numVecs);
00621     Epetra_MultiVector tempAevec( *Map, sol.numVecs );
00622     T.putScalar(0.0); 
00623     for (int i=0; i<sol.numVecs; i++) {
00624       T(i,i) = anasazi_evals[i].realpart;
00625       evals[i] = anasazi_evals[i].realpart;
00626     }
00627     A->Apply( *evecs, tempAevec );
00628     MVT::MvTimesMatAddMv( -1.0, *evecs, T, 1.0, tempAevec );
00629     MVT::MvNorm( tempAevec, normR );
00630   }
00631 
00632   std::ostringstream os;
00633   os.setf(std::ios_base::right, std::ios_base::adjustfield);
00634   os<<"Solver manager returned " << (returnCode == Anasazi::Converged ? "converged." : "unconverged.") << std::endl;
00635 
00636   // Print the results
00637   if(bPrintResults) {
00638 
00639     os<<std::endl;
00640     os<<"------------------------------------------------------"<<std::endl;
00641     os<<std::setw(16)<<"Eigenvalue"
00642       <<std::setw(18)<<"Direct Residual"
00643       <<std::endl;
00644     os<<"------------------------------------------------------"<<std::endl;
00645     for (int i=0; i<sol.numVecs; i++) 
00646     {
00647       os<<std::setw(16)<<anasazi_evals[i].realpart
00648         <<std::setw(18)<<normR[i]/anasazi_evals[i].realpart
00649         <<std::endl;
00650     }
00651     os<<"------------------------------------------------------"<<std::endl;
00652     std::cout << os.str() << std::endl;  
00653 
00654 /*
00655     std::ostringstream os;
00656     os.setf(std::ios_base::right, std::ios_base::adjustfield);
00657     os << "Evals difference between Matlab and Anasazi " << std::endl;
00658     os << std::endl;
00659     os << "------------------------------------------------------" << std::endl;
00660     os << "Index" << std::setw(16) << "Eigenvalue difference" << std::endl;
00661     os << "------------------------------------------------------" << std::endl;
00662     
00663     for (int i = 0; i < sol.numVecs; i++)
00664     {
00665       if (std::abs(matlabEvals[i] - anasazi_evals[i].realpart) > 1e-3)
00666         os << i << std::setw(16) << matlabEvals[i] - anasazi_evals[i].realpart << std::endl;
00667     }
00668     os << "------------------------------------------------------" << std::endl; 
00669     std::cout << os.str() << std::endl;  */
00670 
00671   }
00672 
00673   double maxEigenvalue = anasazi_evals[sol.numVecs-1].realpart;
00674   return (maxEigenvalue > Ecutoff);
00675 }
00676 
00677     
00678 void QCAD::GreensFunctionTunnelingSolver::
00679 computeCurrentRange(const std::vector<double> Vds, double kbT, 
00680   double Ecutoff_offset_from_Emax, std::vector<double>& resultingCurrent, bool bUseAnasazi)
00681 {
00682   for(std::size_t i = 0; i < Vds.size(); i++) {
00683     resultingCurrent[i] = computeCurrent(Vds[i], kbT, Ecutoff_offset_from_Emax, bUseAnasazi);
00684   }
00685 }
00686 
00687 
00688 void QCAD::GreensFunctionTunnelingSolver::prepSplineInterp
00689   (const std::vector<double>& x, const std::vector<double>& y,  
00690    std::vector<double>& y2, const int& n)
00691 {
00692   /* Given arrays x(0:n-1) and y(0:n-1) containing a tabulated function, i.e., yi = f(xi),
00693   with x0 < x1 < ... < x(n-1), this routine returns an array y2(0:n-1) of length n which 
00694   contains the second derivatives of the interpolating function at the tabulated
00695   points xi. Assume zero second derivatives at points 1 and n, that is, natural 
00696   spline boundary conditions. y2(0:n-1) is used in the execSplineInterp() function.
00697   */
00698   
00699   std::vector<double> u;
00700   u.resize(n-1);  
00701 
00702   // set the lower BC to be natural (2nd deriv. = 0)
00703   y2[0] = 0.;
00704   u[0] = 0.; 
00705   
00706   // decomposition loop of the tridiagonal algorithm, y2 and u are used for 
00707   // temporary storage of the decomposed factors
00708   double sig = 0.0, p = 0.0; 
00709   for (int i = 1; i < (n-1); ++i)
00710   {
00711     sig = (x[i]-x[i-1]) / (x[i+1]-x[i-1]);
00712     p = sig *y2[i-1] + 2.; 
00713     y2[i] = (sig -1.)/p; 
00714     u[i] = (6. *( (y[i+1]-y[i]) / (x[i+1]-x[i]) - (y[i]-y[i-1]) / (x[i]-x[i-1]) )
00715                /(x[i+1]-x[i-1]) - sig*u[i-1]) / p; 
00716   }
00717   
00718   // set the upper BC to be natural; 
00719   double qn = 0., un = 0.; 
00720   
00721   // backsubstition of the tridiagonal algorithm to obtain y2
00722   y2[n-1] = (un-qn*u[n-2]) / (qn*y2[n-2]+1.);   
00723   for (int i = (n-2); i >=0; i--)
00724   {
00725     y2[i] = y2[i]*y2[i+1] + u[i];
00726   }
00727   
00728   return; 
00729 } 
00730 
00731 
00732 double QCAD::GreensFunctionTunnelingSolver::execSplineInterp
00733   (const std::vector<double>& xa, const std::vector<double>& ya,  
00734    const std::vector<double>& y2a, const int& n, const double& x)
00735 {
00736   /* Given the arrays xa(0:n-1) and ya(0:n-1) of length n, which tabulate a function 
00737   with the xai's in order, and given array y2a(0:n-1), which is the output from 
00738   prepSplineInterp() above, and given a value of x, this routine returns a 
00739   cubic-spline interpolated value y. 
00740   */
00741 
00742   int klo = 0; 
00743   int khi = n-1;
00744   
00745   // bisection searching
00746   while ( (khi-klo) > 1 )
00747   {
00748     int k = (khi+klo)/2;
00749     if (xa[k] > x) 
00750       khi = k;
00751     else
00752       klo = k; 
00753   }
00754   
00755   // klo and khi now bracket the input value of x
00756   
00757   double h = xa[khi] - xa[klo]; 
00758   if ( h < 1.e-100 ) 
00759     TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, 
00760        "Bad xa input in execSplineInterp. The xa's must be distinct ! \n");
00761 
00762   // perform spline interpolation
00763   double a = (xa[khi] - x) / h; 
00764   double b = (x - xa[klo]) / h; 
00765   double c = (pow(a, 3.)-a) * pow(h, 2.) / 6.;
00766   double d = (pow(b, 3.)-b) * pow(h, 2.) / 6.;
00767   double y = a*ya[klo] + b*ya[khi] + c*y2a[klo] + d*y2a[khi];  
00768   
00769   return y; 
00770 } 
00771 
00772 
00773 //Utility routine for printing a stl vector of doubles.  this
00774 // should probably be moved to a utility source file or replaced.
00775 void printVector(const std::vector<double>& v, int m, int n)
00776 {
00777   assert((int)v.size() == m*n);
00778   for(int i=0; i<m; i++) {
00779     for(int j=0; j<n; j++) {
00780       std::cout << v[n*i + j] << "  ";
00781     }
00782     std::cout << std::endl;
00783   }
00784 }
00785 
00786 
00787 
00788 
00789 // Helper function for tql2
00790 double pythag(double a, double b)
00791 {
00792   return sqrt(a*a + b*b);
00793 }
00794 
00795 // Helper function for tql2:
00796 // returns the value of a with the sign of b
00797 double sign(double a, double b)
00798 {
00799   return (b >= 0) ? fabs(a) : -fabs(a);
00800 }
00801 
00802 
00803 // Shamelessly taken from http://www.netlib.org/seispack/tql2.f (converted to C)
00804 int QCAD::GreensFunctionTunnelingSolver::tql2(int n, int max_iter,
00805                 std::vector<double>& d, 
00806                 std::vector<double>& e, 
00807                 std::vector<double>& z,
00808                 int& ierr)
00809 {
00810   int i,j,k,l,m,ii,l1,l2;
00811   double c,c2,c3,dl1,el1,f,g,h,p,r,s,s2,tst1,tst2;
00812 
00813   /*
00814    *     this subroutine is a translation of the algol procedure tql2,
00815    *     num. math. 11, 293-306(1968) by bowdler, martin, reinsch, and
00816    *     wilkinson handbook for auto. comp., vol.ii-linear algebra, 227-240(1971).
00817    *
00818    *     this subroutine finds the eigenvalues and eigenvectors
00819    *     of a symmetric tridiagonal matrix by the ql method.
00820    *     the eigenvectors of a full symmetric matrix can also
00821    *     be found if  tred2  has been used to reduce this
00822    *     full matrix to tridiagonal form.
00823    *
00824    *     on input
00825    *
00826    *        n is the order of the matrix.
00827    *
00828    *        d contains the diagonal elements of the input matrix.
00829    *
00830    *        e contains the subdiagonal elements of the input matrix
00831    *     c          in its last n-1 positions.  e(1) is arbitrary.
00832    *
00833    *        z contains the transformation matrix produced in the
00834    *          reduction by  tred2, if performed.  if the eigenvectors
00835    *     c          of the tridiagonal matrix are desired, z must contain
00836    *          the identity matrix.
00837    *
00838    *      on output
00839    *
00840    *        d contains the eigenvalues in ascending order.  if an
00841    *     c          error exit is made, the eigenvalues are correct but
00842    *     c          unordered for indices 1,2,...,ierr-1.
00843    *
00844    *        e has been destroyed.
00845    *
00846    *        z contains orthonormal eigenvectors of the symmetric
00847    *     c          tridiagonal (or full) matrix.  if an error exit is made,
00848    *          z contains the eigenvectors associated with the stored
00849    *          eigenvalues.
00850    *
00851    *        ierr is set to
00852    *     c          zero       for normal return,
00853    *          j          if the j-th eigenvalue has not been
00854    *                     determined after max_iter iterations.
00855    *
00856    *     c     calls pythag for  sqrt(a*a + b*b) .
00857    *
00858    *     c     questions and comments should be directed to burton s. garbow,
00859    *     c     mathematics and computer science div, argonne national laboratory
00860    *
00861    *     this version dated august 1983.
00862    *
00863    *     ------------------------------------------------------------------
00864    */
00865 
00866   ierr = 0;
00867   if (n == 1) return ierr;
00868 
00869   for(i=1; i<n; i++)
00870     e[i-1] = e[i];
00871 
00872   f = 0.0;
00873   tst1 = 0.0;
00874   e[n-1] = 0.0;
00875 
00876   for(l=0; l<n; l++) {
00877     j = 0;
00878     h = fabs(d[l]) + fabs(e[l]);
00879     if(tst1 < h) tst1 = h;  
00880     //  .......... look for small sub-diagonal element ..........
00881 
00882     for(m=l; m<n; m++) { 
00883       tst2 = tst1 + fabs(e[m]);
00884       if(tst2 == tst1) break;
00885       // .......... e[n-1] is always zero, so there is no exit
00886       //              through the bottom of the loop ..........
00887     }
00888 
00889     if(m != l) {
00890       do {
00891         if(j == max_iter) {
00892     //     .......... set error -- no convergence to an
00893     //                eigenvalue after maximum allowed iterations ..........
00894     ierr = l; return ierr;
00895         }
00896         
00897         j = j + 1;
00898         //   .......... form shift ..........
00899         l1 = l + 1;
00900         l2 = l1 + 1;
00901         g = d[l];
00902         p = (d[l1] - g) / (2.0 * e[l]);
00903         r = pythag(p,1.0);
00904         d[l] = e[l] / (p + sign(r,p));
00905         d[l1] = e[l] * (p + sign(r,p));
00906         dl1 = d[l1];
00907         h = g - d[l];
00908         if (l2 <= n) {
00909           for(i=l2; i<n; i++) 
00910       d[i] = d[i] - h;
00911         }
00912         f = f + h;
00913         
00914         //.......... ql transformation ..........
00915         p = d[m];
00916         c = 1.0;
00917         c2 = c;
00918         el1 = e[l1];
00919         s = 0.0;
00920         
00921         // .......... for i=m-1 step -1 until l do -- ..........
00922   for(i=m-1; i>=l; i--) {
00923           c3 = c2;
00924           c2 = c;
00925           s2 = s;
00926           g = c * e[i];
00927           h = c * p;
00928           r = pythag(p,e[i]);
00929           e[i+1] = s * r;
00930           s = e[i] / r;
00931           c = p / r;
00932           p = c * d[i] - s * g;
00933           d[i+1] = h + s * (c * g + s * d[i]);
00934         
00935           // .......... form vector ..........
00936           for(k=0; k<n; k++) {
00937       h = z[n*k + (i+1)];
00938       z[n*k + (i+1)] = s * z[n*k + i] + c * h;
00939       z[n*k + i] = c * z[n*k + i] - s * h;
00940     }
00941         }
00942         
00943         p = -s * s2 * c3 * el1 * e[l] / dl1;
00944         e[l] = s * p;
00945         d[l] = c * p;
00946         tst2 = tst1 + fabs(e[l]);
00947       } while(tst2 > tst1);
00948     }
00949     
00950     d[l] = d[l] + f;
00951   }
00952 
00953   // .......... order eigenvalues and eigenvectors ..........
00954   for(ii=1; ii<n; ii++) { 
00955     i = ii - 1;
00956     k = i;
00957     p = d[i];
00958 
00959     for(j=ii; j<n; j++) {
00960       if(d[j] >= p) continue;
00961       k = j;
00962       p = d[j];
00963     }
00964 
00965     if(k == i) continue;
00966     d[k] = d[i];
00967     d[i] = p;
00968 
00969     for(j=0; j<n; j++) {
00970       p = z[n*j + i];
00971       z[n*j + i] = z[n*j + k];
00972       z[n*j + k] = p;
00973     }
00974   }
00975 
00976   return ierr;
00977 }
00978 
00979 
00980 // Sort eigenvalues in ascending order and corresponding eigenvectors when tql2() 
00981 // finds only part of evals and evecs, taken from tql2()
00982 void QCAD::GreensFunctionTunnelingSolver::sortTql2PartialResults 
00983   (int n, std::vector<double>& d, std::vector<double>& z)
00984 {
00985   int nEVs = d.size(); 
00986   int ii, i, k, j;
00987   double p; 
00988   
00989   // .......... order eigenvalues and eigenvectors ..........
00990   for(ii = 1; ii < nEVs; ii++) 
00991   { 
00992     i = ii - 1;
00993     k = i;
00994     p = d[i];
00995 
00996     for(j = ii; j < nEVs; j++) 
00997     {
00998       if(d[j] >= p) continue;
00999       k = j;
01000       p = d[j];
01001     }
01002 
01003     if(k == i) continue;
01004     d[k] = d[i];
01005     d[i] = p;
01006 
01007     // rearrange eigenvectors
01008     for(j = 0; j < n; j++) 
01009     {
01010       p = z[n*j + i];
01011       z[n*j + i] = z[n*j + k];
01012       z[n*j + k] = p;
01013     }
01014   }
01015 
01016   return; 
01017 }