From c9da891d8097edfce4b7c80ca3f7eb2734efa08c Mon Sep 17 00:00:00 2001 From: "saru@southpole" Date: Fri, 6 Dec 2024 15:32:32 -0800 Subject: [PATCH] major modifications to make c_Nanostructure class polymorphic --- .../PointChargeContainer.cpp | 10 +- .../Solver/Transport/Broyden_First_Serial.cpp | 171 ------ .../Transport/Broyden_Parallel_General.cpp | 55 +- Source/Solver/Transport/Make.package | 1 - Source/Solver/Transport/NEGF/NEGF_Common.H | 56 +- Source/Solver/Transport/NEGF/NEGF_Common.cpp | 332 +++++++++-- Source/Solver/Transport/Nanostructure.H | 129 ++++- Source/Solver/Transport/Nanostructure.cpp | 7 +- Source/Solver/Transport/Transport.H | 106 +--- Source/Solver/Transport/Transport.cpp | 544 ++++-------------- 10 files changed, 600 insertions(+), 811 deletions(-) delete mode 100644 Source/Solver/Transport/Broyden_First_Serial.cpp diff --git a/Source/Input/MacroscopicProperties/PointChargeContainer.cpp b/Source/Input/MacroscopicProperties/PointChargeContainer.cpp index 3dd90ad..8b54d7c 100644 --- a/Source/Input/MacroscopicProperties/PointChargeContainer.cpp +++ b/Source/Input/MacroscopicProperties/PointChargeContainer.cpp @@ -78,9 +78,13 @@ void c_PointChargeContainer::Write_OutputFile() outfile_pc_step << std::setw(10) << step; #ifdef USE_TRANSPORT auto &rTransport = rCode.get_TransportSolver(); - outfile_pc_step << std::setw(12) << rTransport.get_Vds() - << std::setw(12) << rTransport.get_Vgs() - << std::setw(10) << rTransport.get_Broyden_Step() - 1; + const auto& biases = rTransport.Get_Vec_Biases(); + if(!biases.empty()) { + auto [Vds, Vgs] = biases[0]; //here biases on all nanostructures are equal + outfile_pc_step << std::setw(12) << Vds + << std::setw(12) << Vgs; + } + outfile_pc_step << std::setw(10) << rTransport.get_Broyden_Step() - 1; #endif outfile_pc_step << std::setw(15) << Get_total_charge() << std::setw(15) << Get_total_charge() / static_cast( diff --git a/Source/Solver/Transport/Broyden_First_Serial.cpp b/Source/Solver/Transport/Broyden_First_Serial.cpp deleted file mode 100644 index aab845f..0000000 --- a/Source/Solver/Transport/Broyden_First_Serial.cpp +++ /dev/null @@ -1,171 +0,0 @@ -#include "Transport.H" - -using namespace amrex; - -#ifndef BROYDEN_PARALLEL -void c_TransportSolver::Execute_Broyden_First_Algorithm() -{ - if (ParallelDescriptor::IOProcessor()) - { - amrex::Print() << "\nBroydenStep: " << Broyden_Step - << ", fraction: " << Broyden_fraction - << ", scalar: " << Broyden_Scalar << "\n"; - - auto const &n_curr_in = h_n_curr_in_data.table(); - auto const &n_curr_out = h_n_curr_out_data.table(); - auto const &n_prev_in = h_n_prev_in_data.table(); - auto const &F_curr = h_F_curr_data.table(); - auto const &delta_F_curr = h_delta_F_curr_data.table(); - auto const &delta_n_curr = h_delta_n_curr_data.table(); - auto const &Jinv_curr = h_Jinv_curr_data.table(); - auto const &Norm = h_Norm_data.table(); - - RealTable1D h_sum_Fcurr_data({0}, {num_field_sites_all_NS}, - The_Pinned_Arena()); - RealTable1D h_sum_deltaFcurr_data({0}, {num_field_sites_all_NS}, - The_Pinned_Arena()); - RealTable1D h_delta_n_Jinv_data({0}, {num_field_sites_all_NS}, - The_Pinned_Arena()); - - auto const &sum_Fcurr = h_sum_Fcurr_data.table(); - auto const &sum_deltaFcurr = h_sum_deltaFcurr_data.table(); - auto const &delta_n_Jinv = h_delta_n_Jinv_data.table(); - - SetVal_RealTable1D(h_Norm_data, 0.); - Broyden_NormSum_Curr = 0.; - - for (int l = 0; l < num_field_sites_all_NS; ++l) - { - sum_Fcurr(l) = 0; - sum_deltaFcurr(l) = 0; - delta_n_Jinv(l) = 0.; - } - - switch (map_NormType[Broyden_Norm_Type]) - { - case s_Norm::Type::Absolute: - { - for (int l = 0; l < num_field_sites_all_NS; ++l) - { - amrex::Real Fcurr = n_curr_in(l) - n_curr_out(l); - Norm(l) = fabs(Fcurr); - Broyden_NormSum_Curr += pow(Fcurr, 2); - } - break; - } - case s_Norm::Type::Relative: - { - for (int l = 0; l < num_field_sites_all_NS; ++l) - { - amrex::Real Fcurr = n_curr_in(l) - n_curr_out(l); - Norm(l) = fabs(Fcurr / (n_curr_in(l) + n_curr_out(l))); - Broyden_NormSum_Curr += pow(Norm(l), 2); - } - break; - } - default: - { - amrex::Abort("Norm Type " + Broyden_Norm_Type + - " is not yet defined."); - } - } - Broyden_NormSum_Curr = sqrt(Broyden_NormSum_Curr); - - Broyden_Norm = Norm(0); - int norm_index = 0; - for (int l = 1; l < num_field_sites_all_NS; ++l) - { - if (Broyden_Norm < Norm(l)) - { - Broyden_Norm = Norm(l); - norm_index = l; - } - } - amrex::Print() << "\nBroyden_NormSum_Curr: " << std::setw(20) - << Broyden_NormSum_Curr << "\n"; - amrex::Print() << "Broyden_NormSum_Prev: " << std::setw(20) - << Broyden_NormSum_Prev << ", Difference: " - << (Broyden_NormSum_Curr - Broyden_NormSum_Prev) << "\n"; - amrex::Print() << "Broyden max norm: " << Broyden_Norm - << " at location: " << norm_index << "\n\n"; - - Broyden_NormSum_Prev = Broyden_NormSum_Curr; - - for (int l = 0; l < num_field_sites_all_NS; ++l) - { - amrex::Real Fcurr = n_curr_in(l) - n_curr_out(l); - delta_F_curr(l) = Fcurr - F_curr(l); - F_curr(l) = Fcurr; - delta_n_curr(l) = n_curr_in(l) - n_prev_in(l); - } - - amrex::Print() << "n_curr_in, n_prev_in: " << n_curr_in(0) << " " - << n_prev_in(0) << "\n"; - - int m = Broyden_Step - 1; - if (m > 0) - { - for (int a = 0; a < num_field_sites_all_NS; ++a) - { - amrex::Real sum = 0.; - for (int b = 0; b < num_field_sites_all_NS; ++b) - { - sum += Jinv_curr(a, b) * delta_F_curr(b); - } - sum_deltaFcurr(a) = sum; - } - - amrex::Real denom = 0.; - for (int l = 0; l < num_field_sites_all_NS; ++l) - { - denom += delta_n_curr(l) * sum_deltaFcurr(l); - } - - for (int b = 0; b < num_field_sites_all_NS; ++b) - { - amrex::Real sum = 0.; - for (int a = 0; a < num_field_sites_all_NS; ++a) - { - sum += delta_n_curr(a) * Jinv_curr(a, b); - } - delta_n_Jinv(b) = sum; - } - - for (int a = 0; a < num_field_sites_all_NS; ++a) - { - for (int b = 0; b < num_field_sites_all_NS; ++b) - { - Jinv_curr(a, b) += (delta_n_curr(a) - sum_deltaFcurr(a)) * - delta_n_Jinv(b) / denom; - } - } - } - for (int a = 0; a < num_field_sites_all_NS; ++a) - { - amrex::Real sum = 0.; - for (int b = 0; b < num_field_sites_all_NS; ++b) - { - sum += Jinv_curr(a, b) * F_curr(b); - } - sum_Fcurr(a) = sum; - } - - for (int l = 0; l < num_field_sites_all_NS; ++l) - { - n_prev_in(l) = n_curr_in(l); - n_curr_in(l) = n_prev_in(l) - sum_Fcurr(l); - } - amrex::Print() << "n_new_in: " << n_curr_in(0) << "\n"; - - h_sum_Fcurr_data.clear(); - h_sum_deltaFcurr_data.clear(); - h_delta_n_Jinv_data.clear(); - - Broyden_Step += 1; - } - - MPI_Bcast(&Broyden_Norm, 1, MPI_DOUBLE, - ParallelDescriptor::IOProcessorNumber(), - ParallelDescriptor::Communicator()); -} -#endif diff --git a/Source/Solver/Transport/Broyden_Parallel_General.cpp b/Source/Solver/Transport/Broyden_Parallel_General.cpp index 69e3022..116f7f1 100644 --- a/Source/Solver/Transport/Broyden_Parallel_General.cpp +++ b/Source/Solver/Transport/Broyden_Parallel_General.cpp @@ -6,7 +6,6 @@ using namespace amrex; enum class s_Algorithm_Type : int { - broyden_first, broyden_second, simple_mixing }; @@ -45,39 +44,26 @@ void c_TransportSolver::Define_Broyden_Partition() /* Each process executes this function. * * Create the following: - * num_field_sites_all_NS * my_rank * total_proc - * site_size_loc + * site_size_loc_all_NS + * site_size_loc_cumulative vector of size equal to all nanostructure. */ - - num_field_sites_all_NS = 0; total_proc = amrex::ParallelDescriptor::NProcs(); my_rank = amrex::ParallelDescriptor::MyProc(); - // MPI_recv_count.resize(total_proc); - // MPI_recv_disp.resize(total_proc); - // num_procs_with_sites=total_proc; - - site_size_loc_cumulative.resize(vp_CNT.size() + 1); + site_size_loc_cumulative.resize(vp_NS.size() + 1); site_size_loc_cumulative[0] = 0; - for (int c = 0; c < vp_CNT.size(); ++c) + for (int c = 0; c < vp_NS.size(); ++c) { - vp_CNT[c]->set_site_size_loc_offset(site_size_loc_cumulative[c]); + vp_NS[c]->Set_NumFieldSites_Local_NSOffset(site_size_loc_cumulative[c]); + site_size_loc_cumulative[c + 1] = - site_size_loc_cumulative[c] + vp_CNT[c]->MPI_recv_count[my_rank]; + site_size_loc_cumulative[c] + vp_NS[c]->Get_NumFieldSites_Local(); } - site_size_loc_all_NS = site_size_loc_cumulative[vp_CNT.size()]; - - // if (ParallelDescriptor::IOProcessor()) - //{ - // amrex::Print() << "recv_count/recv_disp: \n"; - // for(int i=0; i < total_proc; ++i) { - // amrex::Print() << i << " " << MPI_recv_count[i] << " "<< - // MPI_recv_disp[i] << "\n"; - // } - // } + site_size_loc_all_NS = site_size_loc_cumulative[vp_NS.size()]; + } void c_TransportSolver::Set_Broyden_Parallel() @@ -99,16 +85,15 @@ void c_TransportSolver::Set_Broyden_Parallel() auto const &h_n_curr_in = h_n_curr_in_data.table(); /*Need generalization for multiple CNTs*/ - for (int c = 0; c < vp_CNT.size(); ++c) + for (int c = 0; c < vp_NS.size(); ++c) { int NS_offset = site_size_loc_cumulative[c]; - vp_CNT[c]->Fetch_InputLocalCharge_FromNanostructure( - h_n_curr_in_data, NS_offset, vp_CNT[c]->MPI_recv_disp[my_rank], - vp_CNT[c]->MPI_recv_count[my_rank]); + vp_NS[c]->Copy_ForBroydenInput_LocalChargeFromNanostructure( + h_n_curr_in_data, NS_offset); // amrex::Print() << "Fetching h_n_curr_in for NS_id: " << - // vp_CNT[c]->NS_Id << "\n"; for(int i=NS_offset; i< NS_offset + - // vp_CNT[c]->MPI_recv_count[my_rank]; ++i) { + // vp_NS[c]->NS_Id << "\n"; for(int i=NS_offset; i< NS_offset + + // vp_NS[c]->MPI_recv_count[my_rank]; ++i) { // amrex::Print() << i << " " << h_n_curr_in(i) << "\n"; // } } @@ -155,14 +140,6 @@ void c_TransportSolver::Set_Broyden_Parallel() switch (c_TransportSolver::map_AlgorithmType.at(Algorithm_Type)) { - case s_Algorithm_Type::broyden_first: - { - amrex::Abort( - "Algorithm, broyden_first is not parallelized. Compile " - "with preprocessor directive, BROYDEN_PARALLEL=False, or " - "use broyden_second algorithm."); - break; - } case s_Algorithm_Type::broyden_second: { h_intermed_vector_data.resize({0}, {Broyden_Threshold_MaxStep}, @@ -314,10 +291,6 @@ void c_TransportSolver::Reset_Broyden_Parallel() switch (c_TransportSolver::map_AlgorithmType.at(Algorithm_Type)) { - case s_Algorithm_Type::broyden_first: - { - break; - } case s_Algorithm_Type::broyden_second: { #ifdef BROYDEN_SKIP_GPU_OPTIMIZATION diff --git a/Source/Solver/Transport/Make.package b/Source/Solver/Transport/Make.package index 5241479..b9f40ed 100644 --- a/Source/Solver/Transport/Make.package +++ b/Source/Solver/Transport/Make.package @@ -4,7 +4,6 @@ CEXE_sources += Broyden_Serial_General.cpp CEXE_sources += Broyden_Parallel_General.cpp CEXE_sources += Simple_Mixing_Serial.cpp -CEXE_sources += Broyden_First_Serial.cpp CEXE_sources += Broyden_Second_Serial.cpp CEXE_sources += Broyden_Second_Parallel.cpp diff --git a/Source/Solver/Transport/NEGF/NEGF_Common.H b/Source/Solver/Transport/NEGF/NEGF_Common.H index fa009f7..910ee70 100644 --- a/Source/Solver/Transport/NEGF/NEGF_Common.H +++ b/Source/Solver/Transport/NEGF/NEGF_Common.H @@ -28,6 +28,12 @@ enum class s_Norm_Type : int Relative }; +enum class Gate_Terminal_Type : int +{ + EB, + Boundary +}; + struct s_Position3D { amrex::Array dir = {AMREX_D_DECL(0., 0., 0.)}; @@ -173,6 +179,7 @@ class c_NEGF_Common : protected MatrixBlock, private c_IntegrationPath void Read_TerminalParams(amrex::ParmParse &); void Read_PotentialProfileParams(amrex::ParmParse &); void Read_EqContourIntgPts(amrex::ParmParse &); + void Read_DOSParams(amrex::ParmParse &); void Read_FlatbandDOSParams(amrex::ParmParse &); void Read_NonEqPathParams(amrex::ParmParse &); void Read_AdaptiveIntegrationParams(amrex::ParmParse &); @@ -196,6 +203,7 @@ class c_NEGF_Common : protected MatrixBlock, private c_IntegrationPath void Print_TerminalParams(); void Print_PotentialProfileParams(); void Print_EqContourIntgPts(); + void Print_DOSParams(); void Print_FlatbandDOSParams(); void Print_NonEqPathParams(); void Print_IntegrationParams(); @@ -324,7 +332,7 @@ class c_NEGF_Common : protected MatrixBlock, private c_IntegrationPath int my_rank = 0; int num_proc = 1; int NS_Id = 0; - int site_size_loc_offset = 0; + int num_field_sites_loc_NS_offset = 0; int min_local_site_id = 0; MPI_Datatype MPI_BlkType; @@ -397,6 +405,12 @@ class c_NEGF_Common : protected MatrixBlock, private c_IntegrationPath amrex::Vector noneq_integration_pts_density; amrex::Vector noneq_percent_intercuts; + /*DOS/LDOS params*/ + bool flag_compute_DOS = false; + bool flag_write_LDOS = false; + bool flag_write_LDOS_iter = false; + int write_LDOS_iter_period = 1e6; + bool flag_noneq_integration_pts_density = false; bool flag_compute_flatband_dos = false; int total_noneq_integration_pts = 0; @@ -410,8 +424,12 @@ class c_NEGF_Common : protected MatrixBlock, private c_IntegrationPath amrex::Vector ContourPath_DOS; /*For contacts*/ - std::string Gate_String = "Gate"; bool flag_EC_potential_updated = false; + amrex::Real Vds = 0; + amrex::Real Vgs = 0; + std::string Gate_String = "Gate"; + std::string gate_terminal_type_str = "EB"; + Gate_Terminal_Type gate_terminal_type; int flag_contact_mu_specified = 1; amrex::Real E_f = 0.; @@ -471,7 +489,9 @@ class c_NEGF_Common : protected MatrixBlock, private c_IntegrationPath void Initialize_GPUArraysForGreensAndSpectralFunctionToZero(); void Initialize_GPUArraysForChargeComputationToZero(); - void Solve_NEGF(RealTable1D &n_curr_out_data, const int iter); + void Set_TerminalBiasesAndContactPotential(); + void Solve_NEGF(RealTable1D &n_curr_out_data, const int iter, + const bool flag_update_terminal_bias); void Compute_InducedCharge(RealTable1D &n_curr_out_data); void Compute_Rho0(); @@ -482,14 +502,19 @@ class c_NEGF_Common : protected MatrixBlock, private c_IntegrationPath void Compute_Current(); void Compute_DensityOfStates(std::string DOS_foldername, bool flag_write_LDOS); + void Compute_DensityOfStates_General(bool isIter, int iter_or_step); + + void Copy_LocalChargeFromNanostructure(RealTable1D &container_data, + const int NS_offset); - void Fetch_InputLocalCharge_FromNanostructure(RealTable1D &container_data, - const int NS_offset, - const int disp, - const int data_size); + void Copy_BroydenPredictedCharge(const RealTable1D& h_n_curr_in_data); - void Scatterv_BroydenComputed_GlobalCharge(RealTable1D &n_curr_in_glo_data); - // void Gatherv_NEGFComputed_LocalCharge(RealTable1D& n_curr_out_glo_data); + void Write_Data_General(const bool isIter, + const int iter_or_step, + const int total_iterations_in_step, + const amrex::Real avg_intg_pts, + RealTable1D const& h_n_curr_out_data, + RealTable1D const& h_Norm_data); void Write_Data(const std::string filename_prefix, const RealTable1D &n_curr_out_data, @@ -498,20 +523,23 @@ class c_NEGF_Common : protected MatrixBlock, private c_IntegrationPath void Write_InputInducedCharge(const std::string filename_prefix, const RealTable1D &n_curr_in_data); - void Write_Current(const int step, const amrex::Real Vds, - const amrex::Real Vgs, const int Broyden_Step, - const int max_iter, const amrex::Real Broyden_fraction, - const int Broyden_Scalar); + void Write_Current(const int step, + const int total_iterations_in_step, + const amrex::Real avg_intg_pts); // setters/getters for MPI params - void set_site_size_loc_offset(int val) { site_size_loc_offset = val; } + void set_num_field_sites_loc_NS_offset(int val) + { num_field_sites_loc_NS_offset = val; } // setters/getters for nanostructure params int get_NS_Id() const { return NS_Id; } int get_num_field_sites() const { return num_field_sites; } + int get_num_field_sites_loc() const { return blkCol_size_loc; } amrex::Real get_Fermi_level() const { return E_f; } // setters/getters for contact params + int get_Vds() const { return Vds; } + int get_Vgs() const { return Vgs; } std::string get_Gate_String() const { return Gate_String; } bool is_contact_mu_specified() const { return flag_contact_mu_specified; } void set_Contact_Electrochemical_Potential( diff --git a/Source/Solver/Transport/NEGF/NEGF_Common.cpp b/Source/Solver/Transport/NEGF/NEGF_Common.cpp index c8f3225..2897438 100644 --- a/Source/Solver/Transport/NEGF/NEGF_Common.cpp +++ b/Source/Solver/Transport/NEGF/NEGF_Common.cpp @@ -1,5 +1,9 @@ #include "NEGF_Common.H" +#include "../../../Code.H" +#include "../../../Input/GeometryProperties/GeometryProperties.H" +#include "../../../Input/MacroscopicProperties/MacroscopicProperties.H" +#include "../../../Input/BoundaryConditions/BoundaryConditions.H" #include "../../Utils/CodeUtils/CodeUtil.H" #include "../../Utils/SelectWarpXUtils/TextMsg.H" #include "../../Utils/SelectWarpXUtils/WarpXConst.H" @@ -21,6 +25,13 @@ const std::map map_strToAxisType = { {"x", AxisType::X}, {"X", AxisType::X}, {"y", AxisType::Y}, {"Y", AxisType::Y}, {"z", AxisType::Z}, {"Z", AxisType::Z}}; +const std::map + map_S_GateTerminalType = { + {"eb", Gate_Terminal_Type::EB}, + {"EB", Gate_Terminal_Type::EB}, + {"boundary", Gate_Terminal_Type::Boundary}, + {"Boundary", Gate_Terminal_Type::Boundary}}; + template void c_NEGF_Common::Set_KeyParams(const std::string &name_str, const int &id_counter, @@ -365,6 +376,18 @@ void c_NEGF_Common::Read_TerminalParams(amrex::ParmParse &pp_ns) { pp_ns.query("gate_string", Gate_String); + pp_ns.query("gate_terminal_type", gate_terminal_type_str); + + if (map_S_GateTerminalType.find(gate_terminal_type_str) != map_S_GateTerminalType.end()) + { + gate_terminal_type = map_S_GateTerminalType.at( + gate_terminal_type_str); + } + else + { + amrex::Abort("gate_terminal_type: " + gate_terminal_type_str + + " is invalid. Valid type: EB or Boundary."); + } queryWithParser(pp_ns, "contact_Fermi_level", E_f); pp_ns.query("contact_mu_specified", flag_contact_mu_specified); @@ -425,6 +448,16 @@ void c_NEGF_Common::Read_EqContourIntgPts(amrex::ParmParse &pp_ns) eq_integration_pts, 0, 3); } +template +void c_NEGF_Common::Read_DOSParams(amrex::ParmParse &pp_ns) +{ + pp_ns.query("flag_compute_DOS", flag_compute_DOS); + pp_ns.query("flag_write_LDOS", flag_write_LDOS); + pp_ns.query("flag_write_LDOS_iter", flag_write_LDOS_iter); + pp_ns.query("write_LDOS_iter_period", write_LDOS_iter_period); +} + + template void c_NEGF_Common::Read_FlatbandDOSParams(amrex::ParmParse &pp_ns) { @@ -636,6 +669,7 @@ void c_NEGF_Common::Read_CommonData(amrex::ParmParse &pp) Read_TerminalParams(pp); Read_PotentialProfileParams(pp); Read_EqContourIntgPts(pp); + Read_DOSParams(pp); Read_FlatbandDOSParams(pp); Read_NonEqPathParams(pp); Read_IntegrationParams(pp); @@ -728,6 +762,7 @@ void c_NEGF_Common::Print_ReadData() Print_TerminalParams(); Print_PotentialProfileParams(); Print_EqContourIntgPts(); + Print_DOSParams(); Print_FlatbandDOSParams(); Print_NonEqPathParams(); Print_IntegrationParams(); @@ -805,6 +840,9 @@ template void c_NEGF_Common::Print_TerminalParams() { amrex::Print() << "##### gate_string: " << Gate_String << "\n"; + + amrex::Print() << "##### gate_terminal_type: " << gate_terminal_type_str + << "\n"; amrex::Print() << "##### contact_Fermi_level, E_f / [eV]: " << E_f << "\n"; @@ -859,6 +897,21 @@ void c_NEGF_Common::Print_EqContourIntgPts() amrex::Print() << "\n"; } + +template +void c_NEGF_Common::Print_DOSParams() +{ + amrex::Print() << "##### flag_compute_DOS: " << flag_compute_DOS << "\n"; + + amrex::Print() << "##### flag_write_LDOS: " << flag_write_LDOS << "\n"; + + amrex::Print() << "##### flag_write_LDOS_iter: " << flag_write_LDOS_iter + << "\n"; + + amrex::Print() << "##### write_LDOS_iter_period: " << write_LDOS_iter_period + << "\n"; +} + template void c_NEGF_Common::Print_FlatbandDOSParams() { @@ -1238,8 +1291,11 @@ void c_NEGF_Common::Initialize_NEGF(const std::string common_foldername_str, } template -void c_NEGF_Common::Solve_NEGF(RealTable1D &n_curr_out_data, const int iter) +void c_NEGF_Common::Solve_NEGF(RealTable1D &n_curr_out_data, const int iter, + const bool flag_update_terminal_bias) { + if(flag_update_terminal_bias) Set_TerminalBiasesAndContactPotential(); + Add_PotentialToHamiltonian(); if (flag_adaptive_integration_limits and @@ -1268,6 +1324,68 @@ void c_NEGF_Common::Solve_NEGF(RealTable1D &n_curr_out_data, const int iter) Compute_InducedCharge(n_curr_out_data); } +template +void c_NEGF_Common:: Write_Data_General(const bool isIter, + const int iter_or_step, + const int total_iterations_in_step, + const amrex::Real avg_intg_pts, + RealTable1D const& h_n_curr_out_data, + RealTable1D const& h_Norm_data) +{ + // Note: n_curr_out_glo is output from negf and input to broyden. + // n_curr_in is the broyden predicted charge for next iteration. + // NEGF->n_curr_out -> Broyden->n_curr_in -> Electrostatics -> NEGF. + std::string write_filename; + bool to_write = false; + + if(isIter) { + write_filename = get_iter_filename(); + if(get_flag_write_at_iter()) { + to_write = true; + } + } + else { + write_filename = get_step_filename(); + to_write = true; + } + + if(to_write) { + + auto const &h_n_curr_out = h_n_curr_out_data.table(); + auto const &h_Norm = h_Norm_data.table(); + + RealTable1D n_curr_out_glo_data; + RealTable1D Norm_glo_data; + + if (ParallelDescriptor::IOProcessor()) + { + const int Hsize = get_num_field_sites(); + n_curr_out_glo_data.resize({0}, {Hsize}, The_Pinned_Arena()); + Norm_glo_data.resize({0}, {Hsize}, The_Pinned_Arena()); + } + auto const &n_curr_out_glo = n_curr_out_glo_data.table(); + auto const &Norm_glo = Norm_glo_data.table(); + + MPI_Gatherv(&h_n_curr_out(0), MPI_recv_count[my_rank], MPI_DOUBLE, + &n_curr_out_glo(0), MPI_recv_count.data(), + MPI_recv_disp.data(), MPI_DOUBLE, + ParallelDescriptor::IOProcessorNumber(), + ParallelDescriptor::Communicator()); + + MPI_Gatherv(&h_Norm(0), MPI_recv_count[my_rank], MPI_DOUBLE, + &Norm_glo(0), MPI_recv_count.data(), + MPI_recv_disp.data(), MPI_DOUBLE, + ParallelDescriptor::IOProcessorNumber(), + ParallelDescriptor::Communicator()); + + Write_Data(write_filename, n_curr_out_glo_data, Norm_glo_data); + } + + if(!isIter) { + Write_Current(iter_or_step, total_iterations_in_step, avg_intg_pts); + } +} + template void c_NEGF_Common::Write_Data(const std::string filename_prefix, const RealTable1D &n_curr_out_data, @@ -2153,6 +2271,40 @@ void c_NEGF_Common::Deallocate_TemporaryArraysForGFComputation() #endif } + +template +void c_NEGF_Common::Compute_DensityOfStates_General(bool flag_isIter, int iter_or_step) +{ + if(flag_isIter) { + if (flag_write_LDOS_iter and + (iter_or_step + 1) % write_LDOS_iter_period == 0) + { + std::string iter_dos_foldername_str = + amrex::Concatenate(iter_foldername_str + + "/DOS_iter", + iter_or_step, negf_plt_name_digits); + + CreateDirectory(iter_dos_foldername_str); + // e.g.: /negf/cnt/step0001_iter/LDOS_iter0002/ + + Compute_DensityOfStates(iter_dos_foldername_str, + flag_write_LDOS_iter); + } + } + else if(flag_compute_DOS) { + std::string dos_step_foldername_str = + amrex::Concatenate(step_foldername_str + + "/DOS_step", + iter_or_step, negf_plt_name_digits); + + CreateDirectory(dos_step_foldername_str); + // e.g.: /negf/cnt/DOS_step0001/ + + Compute_DensityOfStates(dos_step_foldername_str, + flag_write_LDOS); + } +} + template void c_NEGF_Common::Compute_DensityOfStates(std::string dos_foldername, bool flag_write_LDOS) @@ -2656,11 +2808,11 @@ void c_NEGF_Common::Compute_InducedCharge(RealTable1D &n_curr_out_data) #endif auto const &RhoInduced_loc = n_curr_out_data.table(); - int SSL_offset = site_size_loc_offset; /*changes for multiple nanotube*/ + int field_sites_NS_offset = num_field_sites_loc_NS_offset; amrex::ParallelFor(blkCol_size_loc, [=] AMREX_GPU_DEVICE(int n) noexcept { - RhoInduced_loc(n + SSL_offset) = + RhoInduced_loc(n + field_sites_NS_offset) = (RhoEq_loc(n).DiagSum().imag() + RhoNonEq_loc(n).DiagSum().real() - Rho0_loc(n).DiagSum().imag()); @@ -2790,12 +2942,15 @@ void c_NEGF_Common::Write_ChargeComponents( } template -void c_NEGF_Common::Fetch_InputLocalCharge_FromNanostructure( - RealTable1D &container_data, const int NS_offset, const int disp, - const int data_size) +void c_NEGF_Common::Copy_LocalChargeFromNanostructure( + RealTable1D &container_data, const int NS_offset) { auto const &h_n_curr_in_glo = h_n_curr_in_glo_data.table(); auto const &container = container_data.table(); + + int disp = MPI_recv_disp[my_rank]; + int data_size = MPI_recv_count[my_rank]; + for (int i = 0; i < data_size; ++i) { int gid = disp + i; @@ -2804,31 +2959,6 @@ void c_NEGF_Common::Fetch_InputLocalCharge_FromNanostructure( h_n_curr_in_glo_data.clear(); } -template -void c_NEGF_Common::Scatterv_BroydenComputed_GlobalCharge( - RealTable1D &n_curr_in_glo_data) -{ - auto const &n_curr_in_glo = n_curr_in_glo_data.table(); - auto const &h_n_curr_in_loc = h_n_curr_in_loc_data.table(); - - // amrex::Print() << "In Scatterv: num_local_field_sites, " << - // num_local_field_sites << "\n"; - - MPI_Scatterv(&n_curr_in_glo(0), MPI_send_count.data(), MPI_send_disp.data(), - MPI_DOUBLE, &h_n_curr_in_loc(0), num_local_field_sites, - MPI_DOUBLE, ParallelDescriptor::IOProcessorNumber(), - ParallelDescriptor::Communicator()); - - // amrex::Print() << "h_n_curr_in_loc in CopyToNS: \n"; - // amrex::Print() << "MPI_send_count/disp: " << MPI_send_count[0] << " " << - // MPI_send_disp[0] << "\n"; if (ParallelDescriptor::IOProcessor()) - //{ - // for(int n=0; n < 5; ++n) { - // amrex::Print() << n << " " << h_n_curr_in_loc(n) << "\n"; - // } - // } - // MPI_Barrier(ParallelDescriptor::Communicator()); -} template void c_NEGF_Common::Write_PotentialAtSites(const std::string filename_prefix) @@ -4542,11 +4672,9 @@ void c_NEGF_Common::Compute_Current() } template -void c_NEGF_Common::Write_Current(const int step, const amrex::Real Vds, - const amrex::Real Vgs, - const int avg_intg_pts, const int max_iter, - const amrex::Real Broyden_fraction, - const int Broyden_Scalar) +void c_NEGF_Common::Write_Current(const int step, + const int total_iterations_in_step, + const amrex::Real avg_intg_pts) { if (ParallelDescriptor::IOProcessor()) { @@ -4561,35 +4689,115 @@ void c_NEGF_Common::Write_Current(const int step, const amrex::Real Vds, { outfile_I << std::setw(20) << h_Current_loc(k); } - outfile_I << std::setw(10) << avg_intg_pts << std::setw(10) << max_iter - << std::setw(10) << Broyden_fraction << std::setw(10) - << Broyden_Scalar << std::setw(20) << total_conductance + outfile_I << std::setw(10) << avg_intg_pts << std::setw(10) + << total_iterations_in_step + << std::setw(20) << total_conductance << "\n"; outfile_I.close(); } } -// template -// void -// c_NEGF_Common::DeallocateArrays () -//{ -// -// } -// -// -// template -// void -// c_NEGF_Common::ComputeChargeDensity () -//{ -// -// } -// -// template -// AMREX_GPU_HOST_DEVICE -// ComplexType -// c_NEGF_Common::conjugate(ComplexType a) -//{ -// ComplexType a_conj(a.real(), -1.*a.imag()); -// return a_conj; -//} +template +void c_NEGF_Common:: Set_TerminalBiasesAndContactPotential() +{ + auto &rCode = c_Code::GetInstance(); + auto &rGprop = rCode.get_GeometryProperties(); + auto &rBC = rCode.get_BoundaryConditions(); + + if (!is_contact_mu_specified()) + { + amrex::Real V_contact[NUM_CONTACTS] = {0., 0.}; + amrex::Vector ep(NUM_CONTACTS, 0); + + const auto *ps = get_Contact_Parser_String(); + for (int k = 0; k < NUM_CONTACTS; ++k) + { + V_contact[k] = rGprop.pEB->Read_SurfSoln(ps[k]); + + amrex::Print() << "\n Updated terminal voltage: " << k << " " + << V_contact[k] << " V\n"; + + ep[k] = get_Fermi_level() - V_contact[k]; + } + set_Contact_Electrochemical_Potential(ep); + + Vds = V_contact[1] - V_contact[0]; + + if (gate_terminal_type == Gate_Terminal_Type::EB) + { + Vgs = + rGprop.pEB->Read_SurfSoln(get_Gate_String()) - V_contact[0]; + } + else if (gate_terminal_type == Gate_Terminal_Type::Boundary) + { + Vgs = rBC.Read_BoundarySoln(get_Gate_String()) - V_contact[0]; + } + } + else + { + amrex::Real ep_s = get_Source_Electrochemical_Potential(); + amrex::Real ep_d = get_Drain_Electrochemical_Potential(); + + Vds = ep_s - ep_d; + + amrex::Real GV = 0.; + if (gate_terminal_type == Gate_Terminal_Type::EB) + { + GV = rGprop.pEB->Read_SurfSoln(get_Gate_String()); + } + else if (gate_terminal_type == Gate_Terminal_Type::Boundary) + { + GV = rBC.Read_BoundarySoln(get_Gate_String()); + } + + amrex::Print() << "\n Updated gate voltage: " << GV << " V\n"; + Vgs = GV - (get_Fermi_level() - ep_s); + } + amrex::Print() + << " Vds: " << Vds << " V, Vgs: " << Vgs << " V\n"; +} + + +template +void c_NEGF_Common:: Copy_BroydenPredictedCharge(const RealTable1D& h_n_curr_in_data) +{ + auto const &h_n_curr_in = h_n_curr_in_data.table(); + + RealTable1D n_curr_in_glo_data; + + if (ParallelDescriptor::IOProcessor()) + { + n_curr_in_glo_data.resize({0}, {num_field_sites}, The_Pinned_Arena()); + } + auto const &n_curr_in_glo = n_curr_in_glo_data.table(); + + MPI_Gatherv(&h_n_curr_in(0), MPI_recv_count[my_rank], MPI_DOUBLE, + &n_curr_in_glo(0), MPI_recv_count.data(), + MPI_recv_disp.data(), MPI_DOUBLE, + ParallelDescriptor::IOProcessorNumber(), + ParallelDescriptor::Communicator()); + + auto const &h_n_curr_in_loc = h_n_curr_in_loc_data.table(); + + MPI_Scatterv(&n_curr_in_glo(0), MPI_send_count.data(), MPI_send_disp.data(), + MPI_DOUBLE, &h_n_curr_in_loc(0), num_local_field_sites, + MPI_DOUBLE, ParallelDescriptor::IOProcessorNumber(), + ParallelDescriptor::Communicator()); + // amrex::Print() << "h_n_curr_in_loc in CopyToNS: \n"; + // amrex::Print() << "MPI_send_count/disp: " << MPI_send_count[0] << " " << + // MPI_send_disp[0] << "\n"; if (ParallelDescriptor::IOProcessor()) + //{ + // for(int n=0; n < 5; ++n) { + // amrex::Print() << n << " " << h_n_curr_in_loc(n) << "\n"; + // } + // } + // MPI_Barrier(ParallelDescriptor::Communicator()); + + //write out charge + if (write_at_iter) + { + Write_InputInducedCharge( + iter_filename_str, n_curr_in_glo_data); + } +} diff --git a/Source/Solver/Transport/Nanostructure.H b/Source/Solver/Transport/Nanostructure.H index 8a631cb..35454b8 100644 --- a/Source/Solver/Transport/Nanostructure.H +++ b/Source/Solver/Transport/Nanostructure.H @@ -12,13 +12,48 @@ #include "NEGF/Graphene.H" #include "Nanostructure_fwd.H" +class c_Nanostructure_Base { + + public: + using RealTable1D = TableData; + + virtual ~c_Nanostructure_Base() = default; + + virtual void Set_StepFilenames(int step) = 0; + virtual void Set_IterationFilenames(int iter) = 0; + virtual int Get_NanostructureID() = 0; + virtual int Get_NumFieldSites() = 0; + virtual int Get_NumFieldSites_Local() = 0; + virtual void Set_NumFieldSites_Local_NSOffset(int val) = 0; + virtual int Get_Flag_WriteAtIter() = 0; + virtual std::pair Get_Biases() = 0; + + virtual void Gather_PotentialAtAtoms() = 0; + virtual int Solve_NEGF(RealTable1D& n_curr_out_data, + int iter, + bool flag_update_terminal_bias) = 0; + virtual void Copy_ForBroydenInput_LocalChargeFromNanostructure( + RealTable1D &container_data, + const int NS_offset) = 0; + virtual void Copy_BroydenPredictedChargeToNanostructure(const RealTable1D&) = 0; + virtual void Deposit_ChargeDensityToMesh() = 0; + virtual void PostProcess(bool isIter, int iter_or_step) = 0; + virtual void Write_Output(const bool isIter, + const int iter_or_step, + const int total_iterations_in_step, + const amrex::Real avg_intg_pts, + RealTable1D const& h_n_curr_out_data, + RealTable1D const& h_Norm_data) = 0; + +}; + template class c_Nanostructure - : private amrex::ParticleContainer, public NSType { - using RealTable1D = TableData; int _use_electrostatic = 0; int _use_negf = 0; @@ -31,10 +66,16 @@ class c_Nanostructure void Fill_AtomLocations(); void Read_AtomLocations(); - void Set_GatherAndDepositMultiFabs(); amrex::Real Compute_CellVolume(); + protected: + + void Evaluate_LocalFieldSites(); + void Deposit_ZeroToMesh(); + void Obtain_PotentialAtSites(); + void Mark_CellsWithAtoms(); + public: c_Nanostructure(const amrex::Geometry &geom, const amrex::DistributionMapping &dm, @@ -44,11 +85,81 @@ class c_Nanostructure const amrex::Real NS_initial_deposit_value, const int use_negf, const std::string negf_foldername_str); - void Evaluate_LocalFieldSites(); - void Gather_MeshAttributeAtAtoms(); - void Deposit_AtomAttributeToMesh(); - void Deposit_ZeroToMesh(); - void Obtain_PotentialAtSites(); - void Mark_CellsWithAtoms(); + void Gather_PotentialAtAtoms() override; + + void Deposit_ChargeDensityToMesh() override; + + void Set_StepFilenames(int step) override { + NSType::Set_StepFilenameString(step); + } + + void Set_IterationFilenames(int iter) override { + NSType::Set_IterationFilenameString(iter); + } + + int Get_NanostructureID() override { + return NSType::get_NS_Id(); + } + + int Get_NumFieldSites() override { + return NSType::get_num_field_sites(); + } + + int Get_NumFieldSites_Local() override { + return NSType::get_num_field_sites_loc(); + } + + void Set_NumFieldSites_Local_NSOffset(int val) override { + return NSType::set_num_field_sites_loc_NS_offset(val); + } + + int Get_Flag_WriteAtIter() override { + return NSType::get_flag_write_at_iter(); + } + + std::pair Get_Biases() override { + return std::make_pair(NSType::get_Vds(), NSType::get_Vgs()); + } + + virtual int Solve_NEGF(RealTable1D &n_curr_out_data, const int iter, + bool flag_update_terminal_bias) override + { + NSType::Solve_NEGF(n_curr_out_data, iter, flag_update_terminal_bias); + return NSType::get_Total_Integration_Pts(); + } + + virtual void Copy_ForBroydenInput_LocalChargeFromNanostructure( + RealTable1D &container_data, + const int NS_offset) override + { + NSType::Copy_LocalChargeFromNanostructure(container_data, NS_offset); + } + + virtual void Copy_BroydenPredictedChargeToNanostructure( + const RealTable1D& h_n_curr_in_data) override + { + NSType::Copy_BroydenPredictedCharge(h_n_curr_in_data); + } + + virtual void PostProcess(bool isIter, int iter_or_step) override + { + NSType::Compute_DensityOfStates_General(isIter, iter_or_step); + if(!isIter) NSType::Compute_Current(); + } + + void Write_Output(const bool isIter, + const int iter_or_step, + const int total_iterations_in_step, + const amrex::Real avg_intg_pts, + RealTable1D const& h_n_curr_out_data, + RealTable1D const& h_Norm_data) override + { + NSType::Write_Data_General(isIter, + iter_or_step, + total_iterations_in_step, + avg_intg_pts, + h_n_curr_out_data, h_Norm_data); + } + }; #endif diff --git a/Source/Solver/Transport/Nanostructure.cpp b/Source/Solver/Transport/Nanostructure.cpp index bf8fea8..d56e9eb 100644 --- a/Source/Solver/Transport/Nanostructure.cpp +++ b/Source/Solver/Transport/Nanostructure.cpp @@ -17,7 +17,6 @@ template class c_Nanostructure; template class c_Nanostructure; -// template class c_Nanostructure; template c_Nanostructure::c_Nanostructure( @@ -64,7 +63,7 @@ c_Nanostructure::c_Nanostructure( NSType::Initialize_ChargeAtFieldSites(); - Deposit_AtomAttributeToMesh(); + Deposit_ChargeDensityToMesh(); } if (_use_negf) @@ -284,7 +283,7 @@ void c_Nanostructure::Mark_CellsWithAtoms() } template -void c_Nanostructure::Gather_MeshAttributeAtAtoms() +void c_Nanostructure::Gather_PotentialAtAtoms() { const auto &plo = _geom->ProbLoArray(); const auto dx = _geom->CellSizeArray(); @@ -342,7 +341,7 @@ void c_Nanostructure::Deposit_ZeroToMesh() } template -void c_Nanostructure::Deposit_AtomAttributeToMesh() +void c_Nanostructure::Deposit_ChargeDensityToMesh() { const auto &plo = _geom->ProbLoArray(); const auto dx = _geom->CellSizeArray(); diff --git a/Source/Solver/Transport/Transport.H b/Source/Solver/Transport/Transport.H index e506ea4..b77b9cc 100644 --- a/Source/Solver/Transport/Transport.H +++ b/Source/Solver/Transport/Transport.H @@ -12,66 +12,39 @@ enum class s_NS_Type : int; enum class s_Algorithm_Type : int; -enum class Gate_Terminal_Type : int; class c_TransportSolver { - public: - c_TransportSolver(); - void InitData(); - void Solve(const int step, const amrex::Real time); - void Cleanup(); - -/* The following methods are public because - * __device__ lambda cannot have private or protected access within its class. - * Need refactoring. */ -#ifdef BROYDEN_PARALLEL - void Set_Broyden_Parallel(); - void Reset_Broyden_Parallel(); - void Execute_Broyden_Modified_Second_Algorithm_Parallel(); - void Execute_Broyden_Modified_Second_Algorithm_Parallel_SkipGPU(); -#endif - - amrex::Real get_Vgs() { return Vgs; } - amrex::Real get_Vds() { return Vds; } - amrex::Real get_Broyden_Step() { return Broyden_Step; } - private: using RealTable1D = TableData; using RealTable2D = TableData; - bool flag_compute_DOS = false; - bool flag_write_LDOS = false; - bool flag_write_LDOS_iter = false; bool flag_isDefined_InitialDepositValue = false; bool flag_isDefined_NS_type_default = false; int NS_num = 0; int m_step = 0; + int m_iter = 1; int total_iter = 0; - int max_iter = 1; - int total_intg_pts_in_all_iter = 0; int use_selfconsistent_potential = 0; int use_negf = 0; const int negf_plt_name_digits = 4; - int write_LDOS_iter_period = 1e6; int flag_reset_with_previous_charge_distribution = 0; - int flag_initialize_inverse_jacobian = 0; int total_proc; int my_rank; int num_procs_with_sites; int site_size_loc_all_NS = 0; + + std::vector> vec_biases; + /*Broyden*/ int num_field_sites_all_NS = 0; int Broyden_Step = 1; int Broyden_Threshold_MaxStep = 200; - amrex::Real Vds = 0; - amrex::Real Vgs = 0; amrex::Real NS_initial_deposit_value = 0.; amrex::Real total_max_time_across_all_steps[7] = {0., 0., 0., 0., 0., 0., 0.}; - /*Broyden*/ amrex::Real Broyden_fraction = 0.1; amrex::Real Broyden_max_norm = 1.e-5; amrex::Real Broyden_Original_Fraction = 0.1; @@ -86,15 +59,10 @@ class c_TransportSolver std::string negf_foldername_str = "output/negf"; std::string common_foldername_str = "output/negf/transport_common"; std::string common_step_folder_str; - std::string inverse_jacobian_filename; - std::string gate_terminal_type_str = "EB"; - /*Broyden*/ std::string Algorithm_Type = "broyden_second"; std::string Broyden_Norm_Type = "relative"; static const std::map map_NSType_enum; - static const std::map - map_S_GateTerminalType; static const std::map map_AlgorithmType; std::map map_NormType = { {"absolute", s_Norm_Type::Absolute}, @@ -106,13 +74,11 @@ class c_TransportSolver amrex::Geometry const *_geom = nullptr; amrex::BoxArray const *_ba = nullptr; amrex::DistributionMapping const *_dm = nullptr; - Gate_Terminal_Type gate_terminal_type; amrex::Vector vec_NS_names; amrex::Vector site_size_loc_cumulative; - amrex::Vector>> vp_CNT; - amrex::Vector>> vp_Graphene; + amrex::Vector> vp_NS; /*Tables for Broyden*/ RealTable1D h_n_curr_in_data; @@ -124,8 +90,6 @@ class c_TransportSolver #ifdef BROYDEN_PARALLEL RealTable1D n_curr_in_glo_data; RealTable1D h_intermed_vector_data; - RealTable1D n_curr_out_glo_data; - RealTable1D Norm_glo_data; amrex::Gpu::HostVector h_Intermed_values_vec = {0., 0., 0.}; /*stores Broyden_Norm, Broyden_NormSum_Curr, Broyden_Denom*/ #ifdef BROYDEN_SKIP_GPU_OPTIMIZATION @@ -147,12 +111,6 @@ class c_TransportSolver amrex::Gpu::DeviceVector d_Intermed_values_vec = {0., 0., 0.}; /*stores Broyden_Norm, Broyden_NormSum_Curr, Broyden_Denom*/ #endif -#else - RealTable2D h_Jinv_curr_data; - RealTable1D h_n_start_in_data; - RealTable1D h_delta_n_curr_data; - amrex::Vector W_Broyden; - amrex::Vector V_Broyden; #endif /***Methods***/ @@ -162,40 +120,25 @@ class c_TransportSolver void Read_ControlFlags(amrex::ParmParse &pp); void Read_GatherAndDepositFields(amrex::ParmParse &pp); void Read_SelfConsistencyInput(amrex::ParmParse &pp); - void Read_InverseJacobianFilename(amrex::ParmParse &pp); - void Read_DOSInput(amrex::ParmParse &pp); - void Read_GateTerminalType(amrex::ParmParse &pp); void Set_NEGFFolderDirectories(); void Create_NEGFFolderDirectories(); void Assert_GatherAndDepositFields(); void Define_InitialDepositValue(); + template + void Create_Nanostructure(const std::string& name, + const int NS_id_counter); int Instantiate_Materials(); std::string Get_NS_type_str(const std::string &name); - void Set_gate_terminal_type(const std::string); - void Sum_ChargeDepositedByAllNS(); + void Group_ChargeDepositedByAllNS(); void Set_CommonStepFolder(const int step); - static bool doesKeyExist(std::string key) - { - return map_S_GateTerminalType.find(key) != map_S_GateTerminalType.end(); - } - template - void Set_TerminalBiasesAndContactPotential(NSType const &NS); - template - void CopyFromNS_ChargeComputedFromNEGF(NSType const &NS); - template - void CopyToNS_ChargeComputedUsingSelfConsistencyAlgorithm(NSType const &NS); - template - void Write_MoreDataAndComputeCurrent(NSType const &NS, - std::string const &write_filename, - bool const compute_current_flag); void Perform_SelfConsistencyAlgorithm(); + void Copy_BroydenPredictedChargeToHost(int NS_id); + void Copy_DataToBeWrittenToHost(int NS_id); void Reset_ForNextBiasStep(); void Obtain_maximum_time(amrex::Real const *total_time_counter_diff); #ifdef BROYDEN_PARALLEL - template - void Create_Global_Output_Data(NSType const &NS); void Deallocate_Broyden_Parallel(); void Define_Broyden_Partition(); void Define_MPI_Vector_Type_and_MPI_Vector_Sum(); @@ -223,13 +166,6 @@ class c_TransportSolver B[i] += A[i]; } } -#else - void Set_Broyden(); - void Reset_Broyden(); - void Execute_Broyden_First_Algorithm(); - void Execute_Broyden_Modified_Second_Algorithm(); - void Execute_Simple_Mixing_Algorithm(); - void Deallocate_Broyden_Serial(); #endif void SetVal_RealTable1D(RealTable1D &Tab1D_data, amrex::Real val); @@ -251,6 +187,26 @@ class c_TransportSolver template void Write_Table2D(const TableData &Arr_data, std::string filename, std::string header); + + public: + c_TransportSolver(); + void InitData(); + const std::vector>& Get_Vec_Biases() const { return vec_biases; } + void Solve(const int step, const amrex::Real time); + void Cleanup(); + +/* The following methods are public because + * __device__ lambda cannot have private or protected access within its class. + * Need refactoring. */ +#ifdef BROYDEN_PARALLEL + void Set_Broyden_Parallel(); + void Reset_Broyden_Parallel(); + void Execute_Broyden_Modified_Second_Algorithm_Parallel(); + void Execute_Broyden_Modified_Second_Algorithm_Parallel_SkipGPU(); +#endif + + amrex::Real get_Broyden_Step() { return Broyden_Step; } + }; #endif diff --git a/Source/Solver/Transport/Transport.cpp b/Source/Solver/Transport/Transport.cpp index 3787f98..538c903 100644 --- a/Source/Solver/Transport/Transport.cpp +++ b/Source/Solver/Transport/Transport.cpp @@ -25,15 +25,9 @@ enum class s_NS_Type : int }; enum class s_Algorithm_Type : int { - broyden_first, broyden_second, simple_mixing }; -enum class Gate_Terminal_Type : int -{ - EB, - Boundary -}; const std::map c_TransportSolver::map_NSType_enum = { {"carbon nanotube", s_NS_Type::CNT}, @@ -46,19 +40,9 @@ const std::map c_TransportSolver::map_NSType_enum = { const std::map c_TransportSolver::map_AlgorithmType = { - {"broyden_first", s_Algorithm_Type::broyden_first}, - {"Broyden_first", s_Algorithm_Type::broyden_first}, {"broyden_second", s_Algorithm_Type::broyden_second}, {"Broyden_second", s_Algorithm_Type::broyden_second}, - {"simple_mixing", s_Algorithm_Type::simple_mixing}, -}; - -const std::map - c_TransportSolver::map_S_GateTerminalType = { - {"eb", Gate_Terminal_Type::EB}, - {"EB", Gate_Terminal_Type::EB}, - {"boundary", Gate_Terminal_Type::Boundary}, - {"Boundary", Gate_Terminal_Type::Boundary}}; + {"simple_mixing", s_Algorithm_Type::simple_mixing}}; c_TransportSolver::c_TransportSolver() { ReadData(); } @@ -83,9 +67,6 @@ void c_TransportSolver::ReadData() Read_SelfConsistencyInput(pp_transport); - Read_DOSInput(pp_transport); - - Read_GateTerminalType(pp_transport); } void c_TransportSolver::Read_NSNames(amrex::ParmParse &pp) @@ -183,11 +164,10 @@ void c_TransportSolver::InitData() if (!flag_isDefined_InitialDepositValue) Define_InitialDepositValue(); - Set_gate_terminal_type(gate_terminal_type_str); } num_field_sites_all_NS = Instantiate_Materials(); - if (rCode.use_electrostatic) Sum_ChargeDepositedByAllNS(); + if (rCode.use_electrostatic) Group_ChargeDepositedByAllNS(); Set_Broyden_Parallel(); } @@ -282,85 +262,25 @@ void c_TransportSolver::Read_SelfConsistencyInput(amrex::ParmParse &pp) amrex::Print() << "##### reset_with_previous_charge_distribution: " << flag_reset_with_previous_charge_distribution << "\n"; - flag_initialize_inverse_jacobian = 0; - pp.query("initialize_inverse_jacobian", flag_initialize_inverse_jacobian); - amrex::Print() << "##### flag_initialize_inverse_jacobian: " - << flag_initialize_inverse_jacobian << "\n"; - - if (flag_initialize_inverse_jacobian) Read_InverseJacobianFilename(pp); -} - -void c_TransportSolver::Read_InverseJacobianFilename(amrex::ParmParse &pp) -{ - amrex::ParmParse pp_default; - int flag_restart = 0; - pp_default.query("restart", flag_restart); - - if (flag_restart) - { - int restart_step = 0; - getWithParser(pp_default, "restart_step", restart_step); - - std::string restart_folder_str = - amrex::Concatenate(common_foldername_str + "/step", - restart_step - 1, negf_plt_name_digits); - /*eg. output/negf/transport_common/step0001 for step 1*/ - - inverse_jacobian_filename = restart_folder_str + "/Jinv.dat"; - pp.query("inverse_jacobian_filename", inverse_jacobian_filename); - } - else - { - pp.get("inverse_jacobian_filename", inverse_jacobian_filename); - } - amrex::Print() << "##### inverse_jacobian_filename: " - << inverse_jacobian_filename << "\n"; } -void c_TransportSolver::Read_DOSInput(amrex::ParmParse &pp) -{ - pp.query("flag_compute_DOS", flag_compute_DOS); - amrex::Print() << "##### flag_compute_DOS: " << flag_compute_DOS << "\n"; - pp.query("flag_write_LDOS", flag_write_LDOS); - amrex::Print() << "##### flag_write_LDOS: " << flag_write_LDOS << "\n"; - - pp.query("flag_write_LDOS_iter", flag_write_LDOS_iter); - amrex::Print() << "##### flag_write_LDOS_iter: " << flag_write_LDOS_iter - << "\n"; - - pp.query("write_LDOS_iter_period", write_LDOS_iter_period); - amrex::Print() << "##### write_LDOS_iter_period: " << write_LDOS_iter_period - << "\n"; -} - -void c_TransportSolver::Read_GateTerminalType(amrex::ParmParse &pp) +std::string c_TransportSolver::Get_NS_type_str(const std::string &name) { - pp.query("gate_terminal_type", gate_terminal_type_str); + return flag_isDefined_NS_type_default ? NS_type_default + : map_NSNameToTypeStr.at(name); } -void c_TransportSolver::Set_gate_terminal_type( - const std::string gate_terminal_type_str) +template +void c_TransportSolver::Create_Nanostructure(const std::string& name, + const int NS_id_counter) { - if (doesKeyExist(gate_terminal_type_str)) - { - amrex::Print() << "##### gate_terminal_type: " << gate_terminal_type_str - << "\n"; - gate_terminal_type = c_TransportSolver::map_S_GateTerminalType.at( - gate_terminal_type_str); - } - else - { - amrex::Abort("gate_terminal_type: " + Algorithm_Type + - " is invalid. Valid type: EB or Boundary."); - } + vp_NS.push_back(std::make_unique>( + *_geom, *_dm, *_ba, name, NS_id_counter, + NS_gather_field_str, NS_deposit_field_str, + NS_initial_deposit_value, use_negf, negf_foldername_str)); } -std::string c_TransportSolver::Get_NS_type_str(const std::string &name) -{ - return flag_isDefined_NS_type_default ? NS_type_default - : map_NSNameToTypeStr.at(name); -} int c_TransportSolver::Instantiate_Materials() { @@ -376,23 +296,12 @@ int c_TransportSolver::Instantiate_Materials() { case s_NS_Type::CNT: { - using T = c_CNT; - vp_CNT.push_back(std::make_unique>( - *_geom, *_dm, *_ba, name, NS_id_counter, - NS_gather_field_str, NS_deposit_field_str, - NS_initial_deposit_value, use_negf, negf_foldername_str)); - field_sites = vp_CNT.back()->get_num_field_sites(); + Create_Nanostructure(name, NS_id_counter); break; } case s_NS_Type::Graphene: { - using T = c_Graphene; - - vp_Graphene.push_back(std::make_unique>( - *_geom, *_dm, *_ba, name, NS_id_counter, - NS_gather_field_str, NS_deposit_field_str, - NS_initial_deposit_value, use_negf, negf_foldername_str)); - field_sites = vp_Graphene.back()->get_num_field_sites(); + Create_Nanostructure(name, NS_id_counter); amrex::Abort("NS_type graphene is not yet defined."); break; } @@ -406,11 +315,15 @@ int c_TransportSolver::Instantiate_Materials() " is not supported."); } } + + field_sites = vp_NS.back()->Get_NumFieldSites(); + int cumulative_sites = NS_field_sites_cumulative.back() + field_sites; NS_field_sites_cumulative.push_back(cumulative_sites); NS_id_counter++; } + vec_biases.resize(NS_id_counter, std::make_pair(0, 0)); amrex::Print() << "NS_field_sites_cumulative: \n"; for (auto offset : NS_field_sites_cumulative) @@ -421,14 +334,13 @@ int c_TransportSolver::Instantiate_Materials() return NS_field_sites_cumulative.back(); } -void c_TransportSolver::Sum_ChargeDepositedByAllNS() +void c_TransportSolver::Group_ChargeDepositedByAllNS() { auto &rCode = c_Code::GetInstance(); auto &rGprop = rCode.get_GeometryProperties(); auto &rMprop = rCode.get_MacroscopicProperties(); amrex::MultiFab *p_mf_deposit = rMprop.get_p_mf(NS_deposit_field_str); - // p_mf_deposit->SumBoundary(rGprop.geom.periodicity()); p_mf_deposit->FillBoundary(rGprop.geom.periodicity()); } @@ -444,6 +356,7 @@ void c_TransportSolver::Set_CommonStepFolder(const int step) /*eg. output/negf/common/step0001 for step 1*/ } + void c_TransportSolver::Solve(const int step, const amrex::Real time) { auto &rCode = c_Code::GetInstance(); @@ -452,14 +365,14 @@ void c_TransportSolver::Solve(const int step, const amrex::Real time) auto &rOutput = rCode.get_Output(); auto &rPostPro = rCode.get_PostProcessor(); - max_iter = 0; - total_intg_pts_in_all_iter = 0; + m_iter = 0; + long long total_intg_pts_in_all_iter = 0; m_step = step; - for (int c = 0; c < vp_CNT.size(); ++c) + for (int c = 0; c < vp_NS.size(); ++c) { - vp_CNT[c]->Set_StepFilenameString(step); - Set_CommonStepFolder(step); + vp_NS[c]->Set_StepFilenames(m_step); + Set_CommonStepFolder(m_step); } amrex::Real time_counter[7] = {0., 0., 0., 0., 0., 0., 0.}; @@ -469,11 +382,11 @@ void c_TransportSolver::Solve(const int step, const amrex::Real time) { BL_PROFILE_VAR("Part1_to_6_sum", part1_to_6_sum_counter); - bool update_surface_soln_flag = true; + bool flag_update_terminal_bias = true; do { amrex::Print() << "\n\n##### Self-Consistent Iteration: " - << max_iter << " #####\n"; + << m_iter << " #####\n"; if (Broyden_Step > Broyden_Threshold_MaxStep) { amrex::Abort( @@ -484,46 +397,34 @@ void c_TransportSolver::Solve(const int step, const amrex::Real time) time_counter[0] = amrex::second(); rMprop.ReInitializeMacroparam(NS_gather_field_str); - rMLMG.UpdateBoundaryConditions(update_surface_soln_flag); + rMLMG.UpdateBoundaryConditions(flag_update_terminal_bias); auto mlmg_solve_time = rMLMG.Solve_PoissonEqn(); - rPostPro.Compute(); - // rOutput.WriteOutput(max_iter+100, time); + // rOutput.WriteOutput(m_iter+100, time); time_counter[1] = amrex::second(); // Part 2: Gather - for (int c = 0; c < vp_CNT.size(); ++c) + for (int c = 0; c < vp_NS.size(); ++c) { - if (vp_CNT[c]->get_flag_write_at_iter()) - { - vp_CNT[c]->Set_IterationFilenameString(max_iter); - } - - vp_CNT[c]->Gather_MeshAttributeAtAtoms(); + vp_NS[c]->Set_IterationFilenames(m_iter); + vp_NS[c]->Gather_PotentialAtAtoms(); } time_counter[2] = amrex::second(); // Part 3: NEGF int total_intg_pts_in_this_iter = 0; - for (int c = 0; c < vp_CNT.size(); ++c) + for (int c = 0; c < vp_NS.size(); ++c) { - if (update_surface_soln_flag) - { - Set_TerminalBiasesAndContactPotential(vp_CNT[c]); - } - amrex::Print() - << " Vds: " << Vds << " V, Vgs: " << Vgs << " V\n"; - #ifdef AMREX_USE_GPU - vp_CNT[c]->Solve_NEGF(d_n_curr_out_data, max_iter); + total_intg_pts_in_this_iter += vp_NS[c]->Solve_NEGF(d_n_curr_out_data, + m_iter, flag_update_terminal_bias); #else - vp_CNT[c]->Solve_NEGF(h_n_curr_out_data, max_iter); + total_intg_pts_in_this_iter += vp_NS[c]->Solve_NEGF(h_n_curr_out_data, + m_iter, flag_update_terminal_bias); #endif - total_intg_pts_in_this_iter += - vp_CNT[c]->get_Total_Integration_Pts(); - // CopyFromNS_ChargeComputedFromNEGF(vp_CNT[c]); + if(flag_update_terminal_bias) vec_biases[c] = vp_NS[c]->Get_Biases(); } total_intg_pts_in_all_iter += total_intg_pts_in_this_iter; time_counter[3] = amrex::second(); @@ -537,56 +438,35 @@ void c_TransportSolver::Solve(const int step, const amrex::Real time) rMprop.ReInitializeMacroparam(NS_deposit_field_str); rMprop.Deposit_AllExternalChargeDensitySources(); - for (int c = 0; c < vp_CNT.size(); ++c) + for (int c = 0; c < vp_NS.size(); ++c) { - CopyToNS_ChargeComputedUsingSelfConsistencyAlgorithm(vp_CNT[c]); - - vp_CNT[c]->Deposit_AtomAttributeToMesh(); - - if (vp_CNT[c]->get_flag_write_at_iter()) - { - vp_CNT[c]->Write_InputInducedCharge( - vp_CNT[c]->get_iter_filename(), n_curr_in_glo_data); - } - if (ParallelDescriptor::IOProcessor()) - { - n_curr_in_glo_data.clear(); - } + Copy_BroydenPredictedChargeToHost(vp_NS[c]->Get_NanostructureID()); + + vp_NS[c]->Copy_BroydenPredictedChargeToNanostructure(h_n_curr_in_data); + + vp_NS[c]->Deposit_ChargeDensityToMesh(); } - Sum_ChargeDepositedByAllNS(); + Group_ChargeDepositedByAllNS(); time_counter[5] = amrex::second(); - // Part 6: Write data - for (int c = 0; c < vp_CNT.size(); ++c) + // Part 6: PostProcess: compute/write DOS, current, other data + for (int c = 0; c < vp_NS.size(); ++c) { - if (vp_CNT[c]->get_flag_write_at_iter()) - { - bool compute_current = false; - Write_MoreDataAndComputeCurrent( - vp_CNT[c], vp_CNT[c]->get_iter_filename(), - compute_current); - } - - if (flag_write_LDOS_iter and - (max_iter + 1) % write_LDOS_iter_period == 0) - { - std::string iter_dos_foldername_str = - amrex::Concatenate(vp_CNT[c]->get_iter_foldername() + - "/DOS_iter", - max_iter, negf_plt_name_digits); - - CreateDirectory(iter_dos_foldername_str); - // e.g.: /negf/cnt/step0001_iter/LDOS_iter0002/ - - vp_CNT[c]->Compute_DensityOfStates(iter_dos_foldername_str, - flag_write_LDOS_iter); - } + bool isIter = true; + vp_NS[c]->PostProcess(isIter, m_iter); + + if(vp_NS[c]->Get_Flag_WriteAtIter()) + Copy_DataToBeWrittenToHost(vp_NS[c]->Get_NanostructureID()); + + vp_NS[c]->Write_Output(isIter, m_iter, m_iter, + static_cast(total_intg_pts_in_this_iter), + h_n_curr_out_data, h_Norm_data); } time_counter[6] = amrex::second(); - update_surface_soln_flag = false; - max_iter += 1; + flag_update_terminal_bias = false; + m_iter += 1; total_time_counter_diff[0] += time_counter[1] - time_counter[0]; total_time_counter_diff[1] += time_counter[2] - time_counter[1]; @@ -623,293 +503,97 @@ void c_TransportSolver::Solve(const int step, const amrex::Real time) Obtain_maximum_time(total_time_counter_diff); - /* LDOS computation is before current computation because - * if total conductance is calculated during LDOS computation, - * and it is written in the same file used for writing current - * after current computation.*/ - if (flag_compute_DOS) - { - for (int c = 0; c < vp_CNT.size(); ++c) - { - std::string dos_step_foldername_str = - amrex::Concatenate(vp_CNT[c]->get_step_foldername() + - "/DOS_step", - step, negf_plt_name_digits); - - CreateDirectory(dos_step_foldername_str); - // e.g.: /negf/cnt/DOS_step0001/ + // Part 7: Postprocess + amrex::Real time_for_current = amrex::second(); + for (int c = 0; c < vp_NS.size(); ++c) + { + bool isIter = false; + vp_NS[c]->PostProcess(isIter, m_step); - vp_CNT[c]->Compute_DensityOfStates(dos_step_foldername_str, - flag_write_LDOS); - } - } + Copy_DataToBeWrittenToHost(vp_NS[c]->Get_NanostructureID()); - // Part 7: current computation & writing data - amrex::Real time_for_current = amrex::second(); - for (int c = 0; c < vp_CNT.size(); ++c) - { - bool compute_current = true; - Write_MoreDataAndComputeCurrent(vp_CNT[c], - vp_CNT[c]->get_step_filename(), - compute_current); + vp_NS[c]->Write_Output(isIter, m_step, m_iter, + static_cast(total_intg_pts_in_all_iter)/m_iter, + h_n_curr_out_data, h_Norm_data); } - - amrex::Print() << "Time for current computation & writing data: " + amrex::Print() << "Time for postprocessing dos/current/writing data: " << amrex::second() - time_for_current << "\n"; Reset_ForNextBiasStep(); - - } // if use electrostatics - else + } + else //if not using electrostatics { - for (int c = 0; c < vp_CNT.size(); ++c) + for (int c = 0; c < vp_NS.size(); ++c) { RealTable1D RhoInduced; /*this is not correct but added just so Solve_NEGF compiles*/ - vp_CNT[c]->Solve_NEGF(RhoInduced, 0); - - bool compute_current = true; + vp_NS[c]->Solve_NEGF(RhoInduced, 0, false); - Write_MoreDataAndComputeCurrent(vp_CNT[c], - vp_CNT[c]->get_step_filename(), - compute_current); } } } -template -void c_TransportSolver::Set_TerminalBiasesAndContactPotential(NSType const &NS) -{ - auto &rCode = c_Code::GetInstance(); - auto &rGprop = rCode.get_GeometryProperties(); - auto &rBC = rCode.get_BoundaryConditions(); +void c_TransportSolver::Copy_BroydenPredictedChargeToHost(int NS_id) { - if (!NS->is_contact_mu_specified()) - { - amrex::Real V_contact[NUM_CONTACTS] = {0., 0.}; - - amrex::Vector ep(NUM_CONTACTS, 0); - - const auto *ps = NS->get_Contact_Parser_String(); - for (int k = 0; k < NUM_CONTACTS; ++k) - { - V_contact[k] = rGprop.pEB->Read_SurfSoln(ps[k]); - - amrex::Print() << "\n Updated terminal voltage: " << k << " " - << V_contact[k] << " V\n"; - - ep[k] = NS->get_Fermi_level() - V_contact[k]; - } - NS->set_Contact_Electrochemical_Potential(ep); - - Vds = V_contact[1] - V_contact[0]; - - if (gate_terminal_type == Gate_Terminal_Type::EB) - { - Vgs = - rGprop.pEB->Read_SurfSoln(NS->get_Gate_String()) - V_contact[0]; - } - else if (gate_terminal_type == Gate_Terminal_Type::Boundary) - { - Vgs = rBC.Read_BoundarySoln(NS->get_Gate_String()) - V_contact[0]; - } - } - else - { - amrex::Real ep_s = NS->get_Source_Electrochemical_Potential(); - amrex::Real ep_d = NS->get_Drain_Electrochemical_Potential(); - - Vds = ep_s - ep_d; - - amrex::Real GV = 0.; - if (gate_terminal_type == Gate_Terminal_Type::EB) - { - GV = rGprop.pEB->Read_SurfSoln(NS->get_Gate_String()); - } - else if (gate_terminal_type == Gate_Terminal_Type::Boundary) - { - GV = rBC.Read_BoundarySoln(NS->get_Gate_String()); - } - - amrex::Print() << "\n Updated gate voltage: " << GV << " V\n"; - Vgs = GV - (NS->get_Fermi_level() - ep_s); - } -} - -template -void c_TransportSolver::CopyFromNS_ChargeComputedFromNEGF(NSType const &NS) -{ -/* (?) Need a strategy to gather data for multiple CNTs*/ -/* (?) Future: if condition to check if proc was used for charge computation of - * this nanostructure*/ -/* (?) Future: for multiple nanotubes, then gather the data into a global array - * and then partition */ -#ifdef BROYDEN_SKIP_GPU_OPTIMIZATION - h_n_curr_out_data.copy(NS->h_RhoInduced_loc_data); -#else - d_n_curr_out_data.copy(NS->h_RhoInduced_loc_data); -#endif -} - -template -void c_TransportSolver::CopyToNS_ChargeComputedUsingSelfConsistencyAlgorithm( - NSType const &NS) -{ - int begin = site_size_loc_cumulative[NS->get_NS_Id()]; - int end = site_size_loc_cumulative[NS->get_NS_Id() + 1]; +#ifdef AMREX_USE_GPU + int begin = site_size_loc_cumulative[NS_id]; + int end = site_size_loc_cumulative[NS_id + 1]; int site_size_loc = end - begin; - auto const &h_n_curr_in = h_n_curr_in_data.table(); + h_n_curr_in_data.resize({0}, {site_size_loc}, The_Pinned_Arena()); -#ifdef AMREX_USE_GPU + auto const &h_n_curr_in = h_n_curr_in_data.table(); auto const &d_n_curr_in = d_n_curr_in_data.table(); - h_n_curr_in_data.resize({0}, {site_size_loc}, The_Pinned_Arena()); - amrex::Gpu::copyAsync(amrex::Gpu::deviceToHost, d_n_curr_in.p + begin, d_n_curr_in.p + end, h_n_curr_in.p); amrex::Gpu::streamSynchronize(); #endif - if (ParallelDescriptor::IOProcessor()) - { - const int Hsize = NS->get_num_field_sites(); - n_curr_in_glo_data.resize({0}, {Hsize}, The_Pinned_Arena()); - } - auto const &n_curr_in_glo = n_curr_in_glo_data.table(); - - /*offset necessary for multiple NS*/ - MPI_Gatherv(&h_n_curr_in(0), NS->MPI_recv_count[my_rank], MPI_DOUBLE, - &n_curr_in_glo(0), NS->MPI_recv_count.data(), - NS->MPI_recv_disp.data(), MPI_DOUBLE, - ParallelDescriptor::IOProcessorNumber(), - ParallelDescriptor::Communicator()); - - // amrex::Print() << "n_curr_in_glo in CopyToNS: \n"; - // if (ParallelDescriptor::IOProcessor()) - //{ - // for(int n=0; n < 5; ++n) { - // amrex::Print() << n << " " << n_curr_in_glo(n) << "\n"; - // } - // } - NS->Scatterv_BroydenComputed_GlobalCharge(n_curr_in_glo_data); -} - -template -void c_TransportSolver::Write_MoreDataAndComputeCurrent( - NSType const &NS, std::string const &write_filename, - bool const compute_current_flag) -{ - // Note: n_curr_out_glo was output from negf and input to broyden. - // n_curr_in is the broyden predicted charge for next iteration. - // NEGF->n_curr_out -> Broyden->n_curr_in -> Electrostatics -> NEGF. - Create_Global_Output_Data(NS); - NS->Write_Data(write_filename, n_curr_out_glo_data, Norm_glo_data); - - if (ParallelDescriptor::IOProcessor()) - { - n_curr_out_glo_data.clear(); - Norm_glo_data.clear(); - } - - if (compute_current_flag) - { - NS->Compute_Current(); - NS->Write_Current(m_step, Vds, Vgs, total_intg_pts_in_all_iter, - max_iter, Broyden_fraction, Broyden_Scalar); - } } -template -void c_TransportSolver::Create_Global_Output_Data(NSType const &NS) -{ -#ifdef BROYDEN_SKIP_GPU_OPTIMIZATION - auto const &h_n_curr_out = h_n_curr_out_data.table(); - auto const &h_Norm = h_Norm_data.table(); -#else - /*only select data need to be copied for multiple NS*/ +void c_TransportSolver::Copy_DataToBeWrittenToHost(int NS_id) { - int begin = site_size_loc_cumulative[NS->get_NS_Id()]; - int end = site_size_loc_cumulative[NS->get_NS_Id() + 1]; - int site_size_loc = end - begin; + #ifdef BROYDEN_SKIP_GPU_OPTIMIZATION + auto const &h_n_curr_out = h_n_curr_out_data.table(); + auto const &h_Norm = h_Norm_data.table(); + #else + /*only select data need to be copied for multiple NS*/ - amrex::Print() << "Create_Global: begin/end/site_size_loc: " << begin << " " - << end << " " << site_size_loc << "\n"; + int begin = site_size_loc_cumulative[NS_id]; + int end = site_size_loc_cumulative[NS_id + 1]; + int site_size_loc = end - begin; - h_n_curr_out_data.resize({0}, {site_size_loc}, The_Pinned_Arena()); - h_Norm_data.resize({0}, {site_size_loc}, The_Pinned_Arena()); + amrex::Print() << "Create_Global: begin/end/site_size_loc: " << begin << " " + << end << " " << site_size_loc << "\n"; - auto const &h_n_curr_out = h_n_curr_out_data.table(); - auto const &h_Norm = h_Norm_data.table(); + h_n_curr_out_data.resize({0}, {site_size_loc}, The_Pinned_Arena()); + h_Norm_data.resize({0}, {site_size_loc}, The_Pinned_Arena()); - auto const &d_n_curr_out = d_n_curr_out_data.const_table(); - auto const &d_Norm = d_Norm_data.const_table(); + auto const &h_n_curr_out = h_n_curr_out_data.table(); + auto const &h_Norm = h_Norm_data.table(); - amrex::Gpu::copyAsync(amrex::Gpu::deviceToHost, d_n_curr_out.p + begin, - d_n_curr_out.p + end, h_n_curr_out.p); + auto const &d_n_curr_out = d_n_curr_out_data.const_table(); + auto const &d_Norm = d_Norm_data.const_table(); - amrex::Gpu::copyAsync(amrex::Gpu::deviceToHost, d_Norm.p + begin, - d_Norm.p + end, h_Norm.p); + amrex::Gpu::copyAsync(amrex::Gpu::deviceToHost, d_n_curr_out.p + begin, + d_n_curr_out.p + end, h_n_curr_out.p); - amrex::Gpu::streamSynchronize(); + amrex::Gpu::copyAsync(amrex::Gpu::deviceToHost, d_Norm.p + begin, + d_Norm.p + end, h_Norm.p); - amrex::Print() << "h_n_curr_out(0): " << h_n_curr_out(0) << "\n"; - amrex::Print() << "h_Norm(0): " << h_Norm(0) << "\n"; -#endif + amrex::Gpu::streamSynchronize(); - if (ParallelDescriptor::IOProcessor()) - { - const int Hsize = NS->get_num_field_sites(); - n_curr_out_glo_data.resize({0}, {Hsize}, The_Pinned_Arena()); - Norm_glo_data.resize({0}, {Hsize}, The_Pinned_Arena()); - } + amrex::Print() << "h_n_curr_out(0): " << h_n_curr_out(0) << "\n"; + amrex::Print() << "h_Norm(0): " << h_Norm(0) << "\n"; + #endif - auto const &n_curr_out_glo = n_curr_out_glo_data.table(); - auto const &Norm_glo = Norm_glo_data.table(); - - /*offset necessary for multiple NS*/ - MPI_Gatherv(&h_n_curr_out(0), NS->MPI_recv_count[my_rank], MPI_DOUBLE, - &n_curr_out_glo(0), NS->MPI_recv_count.data(), - NS->MPI_recv_disp.data(), MPI_DOUBLE, - ParallelDescriptor::IOProcessorNumber(), - ParallelDescriptor::Communicator()); - - /*offset necessary for multiple NS*/ - MPI_Gatherv(&h_Norm(0), NS->MPI_recv_count[my_rank], MPI_DOUBLE, - &Norm_glo(0), NS->MPI_recv_count.data(), - NS->MPI_recv_disp.data(), MPI_DOUBLE, - ParallelDescriptor::IOProcessorNumber(), - ParallelDescriptor::Communicator()); - - // if (ParallelDescriptor::IOProcessor()) - //{ - // amrex::Print() << "\nPrinting n_curr_out_glo_data \n"; - // for (int n=0; n