diff --git a/src/parcsr_ls/HYPRE_parcsr_ls.h b/src/parcsr_ls/HYPRE_parcsr_ls.h index a507781ae7..7da26a8c07 100644 --- a/src/parcsr_ls/HYPRE_parcsr_ls.h +++ b/src/parcsr_ls/HYPRE_parcsr_ls.h @@ -4145,6 +4145,7 @@ HYPRE_MGRSetLevelFRelaxType(HYPRE_Solver solver, * Options for \e cg_method are: * * - 0 : Galerkin coarse grid computation using RAP. + * - 5 : Galerkin coarse grid computation using RAI (injective prolongation). * - 1 - 4 : Non-Galerkin coarse grid computation with dropping strategy. * - 1: inv(A_FF) approximated by its (block) diagonal inverse * - 2: CPR-like approximation with inv(A_FF) approximated by its diagonal inverse @@ -4178,6 +4179,7 @@ HYPRE_MGRSetLevelFRelaxNumFunctions(HYPRE_Solver solver, * - 5 : pAIR distance 2 * - 12 : Block Jacobi * - 13 : CPR-like restriction operator + * - 14 : (Block) Column-lumped restriction * - else : use classical modified interpolation * * The default is injection. diff --git a/src/parcsr_ls/_hypre_parcsr_ls.h b/src/parcsr_ls/_hypre_parcsr_ls.h index 220f272efa..70bcfb5d6c 100644 --- a/src/parcsr_ls/_hypre_parcsr_ls.h +++ b/src/parcsr_ls/_hypre_parcsr_ls.h @@ -3621,6 +3621,7 @@ HYPRE_Int hypre_MGRBlockRelaxSolve( hypre_ParCSRMatrix *A, hypre_ParVector *f, HYPRE_Int n_block, HYPRE_Int left_size, HYPRE_Int method, HYPRE_Real *diaginv, hypre_ParVector *Vtemp ); +HYPRE_Int hypre_BlockDiagInvLapack( HYPRE_Real *diag, HYPRE_Int N, HYPRE_Int blk_size ); HYPRE_Int hypre_MGRBlockRelaxSetup( hypre_ParCSRMatrix *A, HYPRE_Int blk_size, HYPRE_Real **diaginvptr ); //HYPRE_Int hypre_blockRelax(hypre_ParCSRMatrix *A, hypre_ParVector *f, hypre_ParVector *u, @@ -3647,12 +3648,14 @@ HYPRE_Int hypre_MGRAddVectorR( hypre_IntArray *CF_marker, HYPRE_Int point_type, hypre_ParVector **toVector ); HYPRE_Int hypre_MGRTruncateAcfCPRDevice( hypre_ParCSRMatrix *A_CF, hypre_ParCSRMatrix **A_CF_new_ptr ); -HYPRE_Int hypre_MGRTruncateAcfCPR( hypre_ParCSRMatrix *A_CF, hypre_ParCSRMatrix **A_CF_new_ptr ); -HYPRE_Int hypre_MGRComputeNonGalerkinCoarseGrid( hypre_ParCSRMatrix *A, hypre_ParCSRMatrix *P, - hypre_ParCSRMatrix *RT, HYPRE_Int bsize, - HYPRE_Int ordering, HYPRE_Int method, - HYPRE_Int Pmax, HYPRE_Int *CF_marker, - hypre_ParCSRMatrix **A_H_ptr ); +HYPRE_Int hypre_MGRComputeNonGalerkinCoarseGrid(hypre_ParCSRMatrix *A_FF, + hypre_ParCSRMatrix *A_FC, + hypre_ParCSRMatrix *A_CF, + hypre_ParCSRMatrix *A_CC, + hypre_ParCSRMatrix *Wp, hypre_ParCSRMatrix *Wr, + HYPRE_Int bsize, HYPRE_Int ordering, + HYPRE_Int method, HYPRE_Int max_elmts, + hypre_ParCSRMatrix **A_H_ptr); HYPRE_Int hypre_MGRSetAffSolverType( void *systg_vdata, HYPRE_Int *aff_solver_type ); HYPRE_Int hypre_MGRSetCoarseSolverType( void *systg_vdata, HYPRE_Int coarse_solver_type ); HYPRE_Int hypre_MGRSetCoarseSolverIter( void *systg_vdata, HYPRE_Int coarse_solver_iter ); @@ -3710,23 +3713,23 @@ HYPRE_Int hypre_MGRBuildInterp( hypre_ParCSRMatrix *A, hypre_ParCSRMatrix *A_FF, hypre_ParCSRMatrix **P_tr, HYPRE_Int method, HYPRE_Int num_sweeps_post ); HYPRE_Int hypre_MGRBuildRestrict( hypre_ParCSRMatrix *A, hypre_ParCSRMatrix *A_FF, - hypre_ParCSRMatrix *A_FC, HYPRE_Int *CF_marker, - HYPRE_BigInt *num_cpts_global, HYPRE_Real trunc_factor, - HYPRE_Int max_elmts, HYPRE_Real strong_threshold, - HYPRE_Real max_row_sum, HYPRE_Int blk_size, - hypre_ParCSRMatrix **RT, HYPRE_Int method ); + hypre_ParCSRMatrix *A_FC, hypre_ParCSRMatrix *A_CF, + hypre_IntArray *CF_marker, HYPRE_BigInt *num_cpts_global, + HYPRE_Real trunc_factor, HYPRE_Int max_elmts, + HYPRE_Real strong_threshold, HYPRE_Real max_row_sum, + HYPRE_Int blk_size, HYPRE_Int method, + hypre_ParCSRMatrix **W_ptr, hypre_ParCSRMatrix **R_ptr, + hypre_ParCSRMatrix **RT_ptr ); HYPRE_Int hypre_MGRBuildPFromWp( hypre_ParCSRMatrix *A, hypre_ParCSRMatrix *Wp, HYPRE_Int *CF_marker, hypre_ParCSRMatrix **P_ptr ); HYPRE_Int hypre_MGRBuildPFromWpHost( hypre_ParCSRMatrix *A, hypre_ParCSRMatrix *Wp, HYPRE_Int *CF_marker, hypre_ParCSRMatrix **P_ptr ); HYPRE_Int hypre_MGRBuildBlockJacobiWp( hypre_ParCSRMatrix *A_FF, hypre_ParCSRMatrix *A_FC, - HYPRE_Int blk_size, HYPRE_Int *CF_marker, - HYPRE_BigInt *cpts_starts_in, - hypre_ParCSRMatrix **Wp_ptr ); + HYPRE_Int blk_size, hypre_ParCSRMatrix **Wp_ptr ); HYPRE_Int hypre_MGRBuildPBlockJacobi( hypre_ParCSRMatrix *A, hypre_ParCSRMatrix *A_FF, hypre_ParCSRMatrix *A_FC, hypre_ParCSRMatrix *Wp, HYPRE_Int blk_size, HYPRE_Int *CF_marker, - HYPRE_BigInt *cpts_starts, hypre_ParCSRMatrix **P_ptr ); + hypre_ParCSRMatrix **P_ptr ); HYPRE_Int hypre_MGRBuildP( hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, HYPRE_BigInt *num_cpts_global, HYPRE_Int method, HYPRE_Int debug_flag, hypre_ParCSRMatrix **P_ptr ); @@ -3736,13 +3739,18 @@ HYPRE_Int hypre_MGRBuildPHost( hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, HYPRE_Int hypre_MGRBuildInterpApproximateInverse( hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, HYPRE_BigInt *num_cpts_global, hypre_ParCSRMatrix **P_ptr ); -HYPRE_Int hypre_MGRGetAcfCPR( hypre_ParCSRMatrix *A, HYPRE_Int blk_size, - HYPRE_Int *c_marker, HYPRE_Int *f_marker, - hypre_ParCSRMatrix **A_CF_ptr ); HYPRE_Int hypre_MGRTruncateAcfCPR( hypre_ParCSRMatrix *A_CF, hypre_ParCSRMatrix **A_CF_new_ptr ); -HYPRE_Int hypre_MGRBuildRFromW( HYPRE_Int *C_map, HYPRE_Int *F_map, HYPRE_BigInt *row_starts_R, - HYPRE_BigInt *col_starts_R, hypre_ParCSRMatrix *W, - hypre_ParCSRMatrix **R_ptr ); +HYPRE_Int hypre_MGRBuildRFromW( HYPRE_Int *C_map, HYPRE_Int *F_map, + HYPRE_BigInt global_num_rows_R, HYPRE_BigInt global_num_cols_R, + HYPRE_BigInt *row_starts_R, HYPRE_BigInt *col_starts_R, + hypre_ParCSRMatrix *W, hypre_ParCSRMatrix **R_ptr ); +HYPRE_Int hypre_MGRBlockColLumpedRestrict( hypre_ParCSRMatrix *A, hypre_ParCSRMatrix *A_FF, + hypre_ParCSRMatrix *A_CF, hypre_IntArray *CF_marker, + HYPRE_Int block_dim, hypre_ParCSRMatrix **W_ptr, + hypre_ParCSRMatrix **R_ptr); +HYPRE_Int hypre_MGRColLumpedRestrict(hypre_ParCSRMatrix *A, hypre_ParCSRMatrix *A_FF, + hypre_ParCSRMatrix *A_CF, hypre_IntArray *CF_marker, + hypre_ParCSRMatrix **W_ptr, hypre_ParCSRMatrix **R_ptr); /* par_mgr_coarsen.c */ HYPRE_Int hypre_MGRCoarseParms( MPI_Comm comm, HYPRE_Int num_rows, hypre_IntArray *CF_marker, @@ -3770,6 +3778,7 @@ HYPRE_Int hypre_ParCSRMatrixBlockDiagMatrixDevice( hypre_ParCSRMatrix *A, HYPRE_ hypre_ParCSRMatrix **B_ptr ); HYPRE_Int hypre_MGRComputeNonGalerkinCGDevice( hypre_ParCSRMatrix *A_FF, hypre_ParCSRMatrix *A_FC, hypre_ParCSRMatrix *A_CF, hypre_ParCSRMatrix *A_CC, + hypre_ParCSRMatrix *Wp, hypre_ParCSRMatrix *Wr, HYPRE_Int blk_size, HYPRE_Int method, HYPRE_Complex threshold, hypre_ParCSRMatrix **A_H_ptr ); diff --git a/src/parcsr_ls/par_mgr.c b/src/parcsr_ls/par_mgr.c index f4dce4a01c..85b446fd16 100644 --- a/src/parcsr_ls/par_mgr.c +++ b/src/parcsr_ls/par_mgr.c @@ -54,6 +54,7 @@ hypre_MGRCreate(void) (mgr_data -> P_FF_array) = NULL; #endif (mgr_data -> P_array) = NULL; + (mgr_data -> R_array) = NULL; (mgr_data -> RT_array) = NULL; (mgr_data -> RAP) = NULL; (mgr_data -> CF_marker_array) = NULL; @@ -236,7 +237,8 @@ hypre_MGRDestroy( void *data ) } /* linear system and cf marker array */ - if (mgr_data -> A_array || mgr_data -> P_array || mgr_data -> RT_array || + if (mgr_data -> A_array || mgr_data -> P_array || + mgr_data -> RT_array || mgr_data -> R_array || mgr_data -> CF_marker_array) { for (i = 1; i < num_coarse_levels + 1; i++) @@ -249,6 +251,11 @@ hypre_MGRDestroy( void *data ) hypre_ParCSRMatrixDestroy((mgr_data -> P_array)[i - 1]); } + if ((mgr_data -> R_array)[i - 1]) + { + hypre_ParCSRMatrixDestroy((mgr_data -> R_array)[i - 1]); + } + if ((mgr_data -> RT_array)[i - 1]) { hypre_ParCSRMatrixDestroy((mgr_data -> RT_array)[i - 1]); @@ -375,6 +382,7 @@ hypre_MGRDestroy( void *data ) hypre_TFree((mgr_data -> B_array), HYPRE_MEMORY_HOST); hypre_TFree((mgr_data -> B_FF_array), HYPRE_MEMORY_HOST); hypre_TFree((mgr_data -> P_array), HYPRE_MEMORY_HOST); + hypre_TFree((mgr_data -> R_array), HYPRE_MEMORY_HOST); hypre_TFree((mgr_data -> RT_array), HYPRE_MEMORY_HOST); hypre_TFree((mgr_data -> CF_marker_array), HYPRE_MEMORY_HOST); hypre_TFree((mgr_data -> reserved_Cpoint_local_indexes), HYPRE_MEMORY_HOST); @@ -989,10 +997,6 @@ hypre_MGRCoarsen(hypre_ParCSRMatrix *S, return hypre_error_flag; } - - - - /* Scale ParCSR matrix A = scalar * A * A: the target CSR matrix * vector: array of real numbers @@ -1031,216 +1035,190 @@ hypre_ParCSRMatrixLeftScale(HYPRE_Real *vector, /*-------------------------------------------------------------------------- * hypre_MGRComputeNonGalerkinCoarseGrid * - * Computes the level (grid) operator A_H = RAP, assuming that restriction - * equals to the injection operator: R = [0 I] + * Computes the level (grid) operator A_H = RAP. * * Available methods: * 1: inv(A_FF) approximated by its (block) diagonal inverse * 2: CPR-like approx. with inv(A_FF) approx. by its diagonal inverse * 3: CPR-like approx. with inv(A_FF) approx. by its block diagonal inverse * 4: inv(A_FF) approximated by sparse approximate inverse + * 5: Uses classical restriction R = [-Wr I] from input parameters list. + * + * Methods 1-4 assume that restriction is the injection operator. + * Method 5 assumes that interpolation is the injection operator. * * TODO (VPM): Can we have a single function that works for host and device? - * RT is not being used. *--------------------------------------------------------------------------*/ HYPRE_Int -hypre_MGRComputeNonGalerkinCoarseGrid(hypre_ParCSRMatrix *A, +hypre_MGRComputeNonGalerkinCoarseGrid(hypre_ParCSRMatrix *A_FF, + hypre_ParCSRMatrix *A_FC, + hypre_ParCSRMatrix *A_CF, + hypre_ParCSRMatrix *A_CC, hypre_ParCSRMatrix *Wp, - hypre_ParCSRMatrix *RT, + hypre_ParCSRMatrix *Wr, HYPRE_Int bsize, HYPRE_Int ordering, HYPRE_Int method, - HYPRE_Int Pmax, - HYPRE_Int *CF_marker, + HYPRE_Int max_elmts, hypre_ParCSRMatrix **A_H_ptr) { - HYPRE_UNUSED_VAR(RT); - - HYPRE_Int *c_marker, *f_marker; - HYPRE_Int n_local_fine_grid, i, i1, jj; - hypre_ParCSRMatrix *A_cc = NULL; - hypre_ParCSRMatrix *A_ff = NULL; - hypre_ParCSRMatrix *A_fc = NULL; - hypre_ParCSRMatrix *A_cf = NULL; - hypre_ParCSRMatrix *A_ff_inv = NULL; - hypre_ParCSRMatrix *A_H = NULL; - hypre_ParCSRMatrix *A_H_correction = NULL; - HYPRE_Int max_elmts = Pmax; - HYPRE_Real alpha = -1.0; - - HYPRE_BigInt coarse_pnts_global[2]; - HYPRE_BigInt fine_pnts_global[2]; - hypre_IntArray *marker_array = NULL; - // HYPRE_Real wall_time = 0.; - // HYPRE_Real wall_time_1 = 0.; + HYPRE_MemoryLocation memory_location = hypre_ParCSRMatrixMemoryLocation(A_FF); - HYPRE_Int my_id; - MPI_Comm comm = hypre_ParCSRMatrixComm(A); - hypre_MPI_Comm_rank(comm, &my_id); - HYPRE_MemoryLocation memory_location = hypre_ParCSRMatrixMemoryLocation(A); + hypre_ParCSRMatrix *A_H = NULL; + hypre_ParCSRMatrix *A_Hc = NULL; + hypre_ParCSRMatrix *Wp_tmp = NULL; + hypre_ParCSRMatrix *Wr_tmp = NULL; + hypre_ParCSRMatrix *A_CF_truncated = NULL; + hypre_ParCSRMatrix *A_FF_inv = NULL; + hypre_ParCSRMatrix *minus_Wp = NULL; - //wall_time = time_getWallclockSeconds(); - n_local_fine_grid = hypre_CSRMatrixNumRows(hypre_ParCSRMatrixDiag(A)); - c_marker = hypre_CTAlloc(HYPRE_Int, n_local_fine_grid, memory_location); - f_marker = hypre_CTAlloc(HYPRE_Int, n_local_fine_grid, memory_location); - - for (i = 0; i < n_local_fine_grid; i++) - { - HYPRE_Int point_type = CF_marker[i]; - //hypre_assert(point_type == 1 || point_type == -1); - c_marker[i] = point_type; - f_marker[i] = -point_type; - } - // get local range for C and F points - // Set IntArray pointers to obtain global row and col (start) ranges - marker_array = hypre_IntArrayCreate(n_local_fine_grid); - hypre_IntArrayMemoryLocation(marker_array) = memory_location; - hypre_IntArrayData(marker_array) = c_marker; - // get range for c_points - hypre_BoomerAMGCoarseParms(comm, n_local_fine_grid, 1, NULL, marker_array, NULL, - coarse_pnts_global); - // get range for f_points - hypre_IntArrayData(marker_array) = f_marker; - hypre_BoomerAMGCoarseParms(comm, n_local_fine_grid, 1, NULL, marker_array, NULL, fine_pnts_global); - - // Generate A_FF, A_FC, A_CC and A_CF submatrices. - // Note: Not all submatrices are needed for each method below. - // hypre_ParCSRMatrixGenerateFFFC computes A_FF and A_FC given the CF_marker and start locations for the global C-points. - // To compute A_CC and A_CF, we need to pass in equivalent information for F-points. (i.e. CF_marker marking F-points - // and start locations for the global F-points. - hypre_ParCSRMatrixGenerateFFFC(A, c_marker, coarse_pnts_global, NULL, &A_fc, &A_ff); - hypre_ParCSRMatrixGenerateFFFC(A, f_marker, fine_pnts_global, NULL, &A_cf, &A_cc); + HYPRE_Int i, i1, jj; + HYPRE_Int blk_inv_size; + HYPRE_Real neg_one = -1.0; + HYPRE_Real one = 1.0; if (method == 1) { if (Wp != NULL) { - A_H_correction = hypre_ParCSRMatMat(A_cf, Wp); + A_Hc = hypre_ParCSRMatMat(A_CF, Wp); } else { // Build block diagonal inverse for A_FF - hypre_ParCSRMatrixBlockDiagMatrix(A_ff, 1, -1, NULL, 1, &A_ff_inv); - // compute Wp = A_ff_inv * A_fc + hypre_ParCSRMatrixBlockDiagMatrix(A_FF, 1, -1, NULL, 1, &A_FF_inv); + + // compute Wp = A_FF_inv * A_FC // NOTE: Use hypre_ParMatmul here instead of hypre_ParCSRMatMat to avoid padding // zero entries at diagonals for the latter routine. Use MatMat once this padding // issue is resolved since it is more efficient. - // hypre_ParCSRMatrix *Wp_tmp = hypre_ParCSRMatMat(A_ff_inv, A_fc); - hypre_ParCSRMatrix *Wp_tmp = hypre_ParMatmul(A_ff_inv, A_fc); - // compute correction A_H_correction = A_cf * (A_ff_inv * A_fc); - // A_H_correction = hypre_ParMatmul(A_cf, Wp_tmp); - A_H_correction = hypre_ParCSRMatMat(A_cf, Wp_tmp); + // hypre_ParCSRMatrix *Wp_tmp = hypre_ParCSRMatMat(A_FF_inv, A_FC); + Wp_tmp = hypre_ParMatmul(A_FF_inv, A_FC); + + /* Compute correction A_Hc = A_CF * (A_FF_inv * A_FC); */ + A_Hc = hypre_ParCSRMatMat(A_CF, Wp_tmp); hypre_ParCSRMatrixDestroy(Wp_tmp); - hypre_ParCSRMatrixDestroy(A_ff_inv); + hypre_ParCSRMatrixDestroy(A_FF_inv); } } else if (method == 2 || method == 3) { - // extract the diagonal of A_cf - hypre_ParCSRMatrix *A_cf_truncated = NULL; - hypre_MGRGetAcfCPR(A, bsize, c_marker, f_marker, &A_cf_truncated); + /* Extract the diagonal of A_CF */ + hypre_MGRTruncateAcfCPR(A_CF, &A_CF_truncated); if (Wp != NULL) { - A_H_correction = hypre_ParCSRMatMat(A_cf_truncated, Wp); + A_Hc = hypre_ParCSRMatMat(A_CF_truncated, Wp); } else { - HYPRE_Int blk_inv_size = method == 2 ? 1 : bsize; - hypre_ParCSRMatrixBlockDiagMatrix(A_ff, blk_inv_size, -1, NULL, 1, &A_ff_inv); - hypre_ParCSRMatrix *Wr = NULL; - Wr = hypre_ParCSRMatMat(A_cf_truncated, A_ff_inv); - A_H_correction = hypre_ParCSRMatMat(Wr, A_fc); - hypre_ParCSRMatrixDestroy(Wr); - hypre_ParCSRMatrixDestroy(A_ff_inv); + blk_inv_size = method == 2 ? 1 : bsize; + hypre_ParCSRMatrixBlockDiagMatrix(A_FF, blk_inv_size, -1, NULL, 1, &A_FF_inv); + + /* TODO (VPM): We shouldn't need to compute Wr_tmp since we are passing in Wr already */ + Wr_tmp = hypre_ParCSRMatMat(A_CF_truncated, A_FF_inv); + A_Hc = hypre_ParCSRMatMat(Wr_tmp, A_FC); + hypre_ParCSRMatrixDestroy(Wr_tmp); + hypre_ParCSRMatrixDestroy(A_FF_inv); } - hypre_ParCSRMatrixDestroy(A_cf_truncated); + hypre_ParCSRMatrixDestroy(A_CF_truncated); } else if (method == 4) { - // Approximate inverse for ideal interploation - hypre_ParCSRMatrix *A_ff_inv = NULL; - hypre_ParCSRMatrix *minus_Wp = NULL; - hypre_MGRApproximateInverse(A_ff, &A_ff_inv); - minus_Wp = hypre_ParCSRMatMat(A_ff_inv, A_fc); - A_H_correction = hypre_ParCSRMatMat(A_cf, minus_Wp); + /* Approximate inverse for ideal interploation */ + hypre_MGRApproximateInverse(A_FF, &A_FF_inv); + + minus_Wp = hypre_ParCSRMatMat(A_FF_inv, A_FC); + A_Hc = hypre_ParCSRMatMat(A_CF, minus_Wp); hypre_ParCSRMatrixDestroy(minus_Wp); } - // Free data - hypre_ParCSRMatrixDestroy(A_ff); - hypre_ParCSRMatrixDestroy(A_fc); - hypre_ParCSRMatrixDestroy(A_cf); + else if (method == 5) + { + if (!Wr) + { + hypre_error_w_msg(HYPRE_ERROR_GENERIC, "Expected Wr matrix!"); + return hypre_error_flag; + } - // perform dropping for A_H_correction - // specific to multiphase poromechanics - // we only keep the diagonal of each block - HYPRE_Int n_local_cpoints = hypre_CSRMatrixNumRows(hypre_ParCSRMatrixDiag(A_H_correction)); + /* A_Hc = Wr * A_FC */ + A_Hc = hypre_ParCSRMatMat(Wr, A_FC); + } - hypre_CSRMatrix *A_H_correction_diag = hypre_ParCSRMatrixDiag(A_H_correction); - HYPRE_Real *A_H_correction_diag_data = hypre_CSRMatrixData(A_H_correction_diag); - HYPRE_Int *A_H_correction_diag_i = hypre_CSRMatrixI(A_H_correction_diag); - HYPRE_Int *A_H_correction_diag_j = hypre_CSRMatrixJ(A_H_correction_diag); - HYPRE_Int ncol_diag = hypre_CSRMatrixNumCols(A_H_correction_diag); + /* Drop small entries in the correction term A_Hc */ + if (max_elmts > 0) + { + // perform dropping for A_Hc + // specific to multiphase poromechanics + // we only keep the diagonal of each block + HYPRE_Int n_local_cpoints = hypre_CSRMatrixNumRows(hypre_ParCSRMatrixDiag(A_Hc)); - hypre_CSRMatrix *A_H_correction_offd = hypre_ParCSRMatrixOffd(A_H_correction); - HYPRE_Real *A_H_correction_offd_data = hypre_CSRMatrixData(A_H_correction_offd); - HYPRE_Int *A_H_correction_offd_i = hypre_CSRMatrixI(A_H_correction_offd); - HYPRE_Int *A_H_correction_offd_j = hypre_CSRMatrixJ(A_H_correction_offd); + hypre_CSRMatrix *A_Hc_diag = hypre_ParCSRMatrixDiag(A_Hc); + HYPRE_Complex *A_Hc_diag_a = hypre_CSRMatrixData(A_Hc_diag); + HYPRE_Int *A_Hc_diag_i = hypre_CSRMatrixI(A_Hc_diag); + HYPRE_Int *A_Hc_diag_j = hypre_CSRMatrixJ(A_Hc_diag); + HYPRE_Int ncol_diag = hypre_CSRMatrixNumCols(A_Hc_diag); + + hypre_CSRMatrix *A_Hc_offd = hypre_ParCSRMatrixOffd(A_Hc); + HYPRE_Complex *A_Hc_offd_a = hypre_CSRMatrixData(A_Hc_offd); + HYPRE_Int *A_Hc_offd_i = hypre_CSRMatrixI(A_Hc_offd); + HYPRE_Int *A_Hc_offd_j = hypre_CSRMatrixJ(A_Hc_offd); - // drop small entries in the correction - if (Pmax > 0) - { if (ordering == 0) // interleaved ordering { - HYPRE_Int *A_H_correction_diag_i_new = hypre_CTAlloc(HYPRE_Int, n_local_cpoints + 1, - memory_location); - HYPRE_Int *A_H_correction_diag_j_new = hypre_CTAlloc(HYPRE_Int, - (bsize + max_elmts) * n_local_cpoints, memory_location); - HYPRE_Complex *A_H_correction_diag_data_new = hypre_CTAlloc(HYPRE_Complex, - (bsize + max_elmts) * n_local_cpoints, memory_location); - HYPRE_Int num_nonzeros_diag_new = 0; - - HYPRE_Int *A_H_correction_offd_i_new = hypre_CTAlloc(HYPRE_Int, n_local_cpoints + 1, - memory_location); - HYPRE_Int *A_H_correction_offd_j_new = hypre_CTAlloc(HYPRE_Int, max_elmts * n_local_cpoints, - memory_location); - HYPRE_Complex *A_H_correction_offd_data_new = hypre_CTAlloc(HYPRE_Complex, - max_elmts * n_local_cpoints, memory_location); - HYPRE_Int num_nonzeros_offd_new = 0; - + HYPRE_Int *A_Hc_diag_i_new, *A_Hc_diag_j_new; + HYPRE_Complex *A_Hc_diag_a_new; + HYPRE_Int num_nonzeros_diag_new = 0; + + HYPRE_Int *A_Hc_offd_i_new, *A_Hc_offd_j_new; + HYPRE_Complex *A_Hc_offd_a_new; + HYPRE_Int num_nonzeros_offd_new = 0; + + /* Allocate new memory */ + A_Hc_diag_i_new = hypre_CTAlloc(HYPRE_Int, n_local_cpoints + 1, memory_location); + A_Hc_diag_j_new = hypre_CTAlloc(HYPRE_Int, (bsize + max_elmts) * n_local_cpoints, + memory_location); + A_Hc_diag_a_new = hypre_CTAlloc(HYPRE_Complex, (bsize + max_elmts) * n_local_cpoints, + memory_location); + A_Hc_offd_i_new = hypre_CTAlloc(HYPRE_Int, n_local_cpoints + 1, memory_location); + A_Hc_offd_j_new = hypre_CTAlloc(HYPRE_Int, max_elmts * n_local_cpoints, + memory_location); + A_Hc_offd_a_new = hypre_CTAlloc(HYPRE_Complex, max_elmts * n_local_cpoints, + memory_location); for (i = 0; i < n_local_cpoints; i++) { - HYPRE_Int max_num_nonzeros = A_H_correction_diag_i[i + 1] - A_H_correction_diag_i[i] + - A_H_correction_offd_i[i + 1] - A_H_correction_offd_i[i]; - HYPRE_Int *aux_j = hypre_CTAlloc(HYPRE_Int, max_num_nonzeros, memory_location); - HYPRE_Real *aux_data = hypre_CTAlloc(HYPRE_Real, max_num_nonzeros, memory_location); - HYPRE_Int row_start = i - (i % bsize); - HYPRE_Int row_stop = row_start + bsize - 1; - HYPRE_Int cnt = 0; - for (jj = A_H_correction_offd_i[i]; jj < A_H_correction_offd_i[i + 1]; jj++) + HYPRE_Int max_num_nonzeros = A_Hc_diag_i[i + 1] - A_Hc_diag_i[i] + + A_Hc_offd_i[i + 1] - A_Hc_offd_i[i]; + HYPRE_Int *aux_j = hypre_CTAlloc(HYPRE_Int, max_num_nonzeros, memory_location); + HYPRE_Real *aux_data = hypre_CTAlloc(HYPRE_Real, max_num_nonzeros, memory_location); + HYPRE_Int row_start = i - (i % bsize); + HYPRE_Int row_stop = row_start + bsize - 1; + HYPRE_Int cnt = 0; + + for (jj = A_Hc_offd_i[i]; jj < A_Hc_offd_i[i + 1]; jj++) { - aux_j[cnt] = A_H_correction_offd_j[jj] + ncol_diag; - aux_data[cnt] = A_H_correction_offd_data[jj]; + aux_j[cnt] = A_Hc_offd_j[jj] + ncol_diag; + aux_data[cnt] = A_Hc_offd_a[jj]; cnt++; } - for (jj = A_H_correction_diag_i[i]; jj < A_H_correction_diag_i[i + 1]; jj++) + + for (jj = A_Hc_diag_i[i]; jj < A_Hc_diag_i[i + 1]; jj++) { - aux_j[cnt] = A_H_correction_diag_j[jj]; - aux_data[cnt] = A_H_correction_diag_data[jj]; + aux_j[cnt] = A_Hc_diag_j[jj]; + aux_data[cnt] = A_Hc_diag_a[jj]; cnt++; } hypre_qsort2_abs(aux_j, aux_data, 0, cnt - 1); - for (jj = A_H_correction_diag_i[i]; jj < A_H_correction_diag_i[i + 1]; jj++) + for (jj = A_Hc_diag_i[i]; jj < A_Hc_diag_i[i + 1]; jj++) { - i1 = A_H_correction_diag_j[jj]; + i1 = A_Hc_diag_j[jj]; if (i1 >= row_start && i1 <= row_stop) { // copy data to new arrays - A_H_correction_diag_j_new[num_nonzeros_diag_new] = i1; - A_H_correction_diag_data_new[num_nonzeros_diag_new] = A_H_correction_diag_data[jj]; + A_Hc_diag_j_new[num_nonzeros_diag_new] = i1; + A_Hc_diag_a_new[num_nonzeros_diag_new] = A_Hc_diag_a[jj]; ++num_nonzeros_diag_new; } else @@ -1253,64 +1231,57 @@ hypre_MGRComputeNonGalerkinCoarseGrid(hypre_ParCSRMatrix *A, { for (jj = 0; jj < hypre_min(max_elmts, cnt); jj++) { - HYPRE_Int col_idx = aux_j[jj]; + HYPRE_Int col_idx = aux_j[jj]; HYPRE_Real col_value = aux_data[jj]; if (col_idx < ncol_diag && (col_idx < row_start || col_idx > row_stop)) { - A_H_correction_diag_j_new[num_nonzeros_diag_new] = col_idx; - A_H_correction_diag_data_new[num_nonzeros_diag_new] = col_value; + A_Hc_diag_j_new[num_nonzeros_diag_new] = col_idx; + A_Hc_diag_a_new[num_nonzeros_diag_new] = col_value; ++num_nonzeros_diag_new; } else if (col_idx >= ncol_diag) { - A_H_correction_offd_j_new[num_nonzeros_offd_new] = col_idx - ncol_diag; - A_H_correction_offd_data_new[num_nonzeros_offd_new] = col_value; + A_Hc_offd_j_new[num_nonzeros_offd_new] = col_idx - ncol_diag; + A_Hc_offd_a_new[num_nonzeros_offd_new] = col_value; ++num_nonzeros_offd_new; } } } - A_H_correction_diag_i_new[i + 1] = num_nonzeros_diag_new; - A_H_correction_offd_i_new[i + 1] = num_nonzeros_offd_new; + A_Hc_diag_i_new[i + 1] = num_nonzeros_diag_new; + A_Hc_offd_i_new[i + 1] = num_nonzeros_offd_new; hypre_TFree(aux_j, memory_location); hypre_TFree(aux_data, memory_location); } - hypre_TFree(A_H_correction_diag_i, memory_location); - hypre_TFree(A_H_correction_diag_j, memory_location); - hypre_TFree(A_H_correction_diag_data, memory_location); - hypre_CSRMatrixI(A_H_correction_diag) = A_H_correction_diag_i_new; - hypre_CSRMatrixJ(A_H_correction_diag) = A_H_correction_diag_j_new; - hypre_CSRMatrixData(A_H_correction_diag) = A_H_correction_diag_data_new; - hypre_CSRMatrixNumNonzeros(A_H_correction_diag) = num_nonzeros_diag_new; - - if (A_H_correction_offd_i) { hypre_TFree(A_H_correction_offd_i, memory_location); } - if (A_H_correction_offd_j) { hypre_TFree(A_H_correction_offd_j, memory_location); } - if (A_H_correction_offd_data) { hypre_TFree(A_H_correction_offd_data, memory_location); } - hypre_CSRMatrixI(A_H_correction_offd) = A_H_correction_offd_i_new; - hypre_CSRMatrixJ(A_H_correction_offd) = A_H_correction_offd_j_new; - hypre_CSRMatrixData(A_H_correction_offd) = A_H_correction_offd_data_new; - hypre_CSRMatrixNumNonzeros(A_H_correction_offd) = num_nonzeros_offd_new; + hypre_TFree(A_Hc_diag_i, memory_location); + hypre_TFree(A_Hc_diag_j, memory_location); + hypre_TFree(A_Hc_diag_a, memory_location); + hypre_CSRMatrixI(A_Hc_diag) = A_Hc_diag_i_new; + hypre_CSRMatrixJ(A_Hc_diag) = A_Hc_diag_j_new; + hypre_CSRMatrixData(A_Hc_diag) = A_Hc_diag_a_new; + hypre_CSRMatrixNumNonzeros(A_Hc_diag) = num_nonzeros_diag_new; + + hypre_TFree(A_Hc_offd_i, memory_location); + hypre_TFree(A_Hc_offd_j, memory_location); + hypre_TFree(A_Hc_offd_a, memory_location); + hypre_CSRMatrixI(A_Hc_offd) = A_Hc_offd_i_new; + hypre_CSRMatrixJ(A_Hc_offd) = A_Hc_offd_j_new; + hypre_CSRMatrixData(A_Hc_offd) = A_Hc_offd_a_new; + hypre_CSRMatrixNumNonzeros(A_Hc_offd) = num_nonzeros_offd_new; } else { - // do nothing. Dropping not yet implemented for non-interleaved variable ordering options - // hypre_printf("Error!! Block ordering for non-Galerkin coarse grid is not currently supported\n"); - // exit(-1); + hypre_error_w_msg(HYPRE_ERROR_GENERIC, "Non-interleaved dropping not implemented!"); + return hypre_error_flag; } } /* Coarse grid / Schur complement */ - alpha = -1.0; - hypre_ParCSRMatrixAdd(1.0, A_cc, alpha, A_H_correction, &A_H); + hypre_ParCSRMatrixAdd(one, A_CC, neg_one, A_Hc, &A_H); /* Free memory */ - hypre_ParCSRMatrixDestroy(A_cc); - hypre_ParCSRMatrixDestroy(A_H_correction); - hypre_TFree(c_marker, memory_location); - hypre_TFree(f_marker, memory_location); - // free IntArray. Note: IntArrayData was not initialized so need not be freed here (pointer is already freed elsewhere). - hypre_TFree(marker_array, memory_location); + hypre_ParCSRMatrixDestroy(A_Hc); /* Set output pointer */ *A_H_ptr = A_H; @@ -1471,35 +1442,58 @@ hypre_MGRApproximateInverse(hypre_ParCSRMatrix *A, return hypre_error_flag; } -/* TODO: move matrix inversion functions outside parcsr_ls (VPM) */ +/*-------------------------------------------------------------------------- + * hypre_blas_smat_inv_n2 + * + * TODO (VPM): move this function to seq_ls + *--------------------------------------------------------------------------*/ void hypre_blas_smat_inv_n2 (HYPRE_Real *a) { const HYPRE_Real a11 = a[0], a12 = a[1]; const HYPRE_Real a21 = a[2], a22 = a[3]; const HYPRE_Real det_inv = 1.0 / (a11 * a22 - a12 * a21); - a[0] = a22 * det_inv; a[1] = -a12 * det_inv; - a[2] = -a21 * det_inv; a[3] = a11 * det_inv; + + a[0] = a22 * det_inv; + a[1] = -a12 * det_inv; + a[2] = -a21 * det_inv; + a[3] = a11 * det_inv; } +/*-------------------------------------------------------------------------- + * hypre_blas_smat_inv_n3 + * + * TODO (VPM): move this function to seq_ls + *--------------------------------------------------------------------------*/ + void hypre_blas_smat_inv_n3 (HYPRE_Real *a) { const HYPRE_Real a11 = a[0], a12 = a[1], a13 = a[2]; const HYPRE_Real a21 = a[3], a22 = a[4], a23 = a[5]; const HYPRE_Real a31 = a[6], a32 = a[7], a33 = a[8]; - const HYPRE_Real det = a11 * a22 * a33 - a11 * a23 * a32 - a12 * a21 * a33 + a12 * a23 * a31 + a13 * - a21 * a32 - a13 * a22 * a31; + const HYPRE_Real det = a11 * a22 * a33 - a11 * a23 * a32 - + a12 * a21 * a33 + a12 * a23 * a31 + + a13 * a21 * a32 - a13 * a22 * a31; const HYPRE_Real det_inv = 1.0 / det; - a[0] = (a22 * a33 - a23 * a32) * det_inv; a[1] = (a13 * a32 - a12 * a33) * det_inv; + a[0] = (a22 * a33 - a23 * a32) * det_inv; + a[1] = (a13 * a32 - a12 * a33) * det_inv; a[2] = (a12 * a23 - a13 * a22) * det_inv; - a[3] = (a23 * a31 - a21 * a33) * det_inv; a[4] = (a11 * a33 - a13 * a31) * det_inv; + a[3] = (a23 * a31 - a21 * a33) * det_inv; + a[4] = (a11 * a33 - a13 * a31) * det_inv; a[5] = (a13 * a21 - a11 * a23) * det_inv; - a[6] = (a21 * a32 - a22 * a31) * det_inv; a[7] = (a12 * a31 - a11 * a32) * det_inv; + a[6] = (a21 * a32 - a22 * a31) * det_inv; + a[7] = (a12 * a31 - a11 * a32) * det_inv; a[8] = (a11 * a22 - a12 * a21) * det_inv; } +/*-------------------------------------------------------------------------- + * hypre_blas_smat_inv_n4 + * + * TODO (VPM): move this function to seq_ls + *--------------------------------------------------------------------------*/ + void hypre_blas_smat_inv_n4 (HYPRE_Real *a) { const HYPRE_Real a11 = a[0], a12 = a[1], a13 = a[2], a14 = a[3]; @@ -1507,41 +1501,72 @@ void hypre_blas_smat_inv_n4 (HYPRE_Real *a) const HYPRE_Real a31 = a[8], a32 = a[9], a33 = a[10], a34 = a[11]; const HYPRE_Real a41 = a[12], a42 = a[13], a43 = a[14], a44 = a[15]; - const HYPRE_Real M11 = a22 * a33 * a44 + a23 * a34 * a42 + a24 * a32 * a43 - a22 * a34 * a43 - a23 * - a32 * a44 - a24 * a33 * a42; - const HYPRE_Real M12 = a12 * a34 * a43 + a13 * a32 * a44 + a14 * a33 * a42 - a12 * a33 * a44 - a13 * - a34 * a42 - a14 * a32 * a43; - const HYPRE_Real M13 = a12 * a23 * a44 + a13 * a24 * a42 + a14 * a22 * a43 - a12 * a24 * a43 - a13 * - a22 * a44 - a14 * a23 * a42; - const HYPRE_Real M14 = a12 * a24 * a33 + a13 * a22 * a34 + a14 * a23 * a32 - a12 * a23 * a34 - a13 * - a24 * a32 - a14 * a22 * a33; - const HYPRE_Real M21 = a21 * a34 * a43 + a23 * a31 * a44 + a24 * a33 * a41 - a21 * a33 * a44 - a23 * - a34 * a41 - a24 * a31 * a43; - const HYPRE_Real M22 = a11 * a33 * a44 + a13 * a34 * a41 + a14 * a31 * a43 - a11 * a34 * a43 - a13 * - a31 * a44 - a14 * a33 * a41; - const HYPRE_Real M23 = a11 * a24 * a43 + a13 * a21 * a44 + a14 * a23 * a41 - a11 * a23 * a44 - a13 * - a24 * a41 - a14 * a21 * a43; - const HYPRE_Real M24 = a11 * a23 * a34 + a13 * a24 * a31 + a14 * a21 * a33 - a11 * a24 * a33 - a13 * - a21 * a34 - a14 * a23 * a31; - const HYPRE_Real M31 = a21 * a32 * a44 + a22 * a34 * a41 + a24 * a31 * a42 - a21 * a34 * a42 - a22 * - a31 * a44 - a24 * a32 * a41; - const HYPRE_Real M32 = a11 * a34 * a42 + a12 * a31 * a44 + a14 * a32 * a41 - a11 * a32 * a44 - a12 * - a34 * a41 - a14 * a31 * a42; - const HYPRE_Real M33 = a11 * a22 * a44 + a12 * a24 * a41 + a14 * a21 * a42 - a11 * a24 * a42 - a12 * - a21 * a44 - a14 * a22 * a41; - const HYPRE_Real M34 = a11 * a24 * a32 + a12 * a21 * a34 + a14 * a22 * a31 - a11 * a22 * a34 - a12 * - a24 * a31 - a14 * a21 * a32; - const HYPRE_Real M41 = a21 * a33 * a42 + a22 * a31 * a43 + a23 * a32 * a41 - a21 * a32 * a43 - a22 * - a33 * a41 - a23 * a31 * a42; - const HYPRE_Real M42 = a11 * a32 * a43 + a12 * a33 * a41 + a13 * a31 * a42 - a11 * a33 * a42 - a12 * - a31 * a43 - a13 * a32 * a41; - const HYPRE_Real M43 = a11 * a23 * a42 + a12 * a21 * a43 + a13 * a22 * a41 - a11 * a22 * a43 - a12 * - a23 * a41 - a13 * a21 * a42; - const HYPRE_Real M44 = a11 * a22 * a33 + a12 * a23 * a31 + a13 * a21 * a32 - a11 * a23 * a32 - a12 * - a21 * a33 - a13 * a22 * a31; + const HYPRE_Real M11 = a22 * a33 * a44 + a23 * a34 * a42 + + a24 * a32 * a43 - a22 * a34 * a43 - + a23 * a32 * a44 - a24 * a33 * a42; + + const HYPRE_Real M12 = a12 * a34 * a43 + a13 * a32 * a44 + + a14 * a33 * a42 - a12 * a33 * a44 - + a13 * a34 * a42 - a14 * a32 * a43; + + const HYPRE_Real M13 = a12 * a23 * a44 + a13 * a24 * a42 + + a14 * a22 * a43 - a12 * a24 * a43 - + a13 * a22 * a44 - a14 * a23 * a42; + + const HYPRE_Real M14 = a12 * a24 * a33 + a13 * a22 * a34 + + a14 * a23 * a32 - a12 * a23 * a34 - + a13 * a24 * a32 - a14 * a22 * a33; + + const HYPRE_Real M21 = a21 * a34 * a43 + a23 * a31 * a44 + + a24 * a33 * a41 - a21 * a33 * a44 - + a23 * a34 * a41 - a24 * a31 * a43; + + const HYPRE_Real M22 = a11 * a33 * a44 + a13 * a34 * a41 + + a14 * a31 * a43 - a11 * a34 * a43 - + a13 * a31 * a44 - a14 * a33 * a41; + + const HYPRE_Real M23 = a11 * a24 * a43 + a13 * a21 * a44 + + a14 * a23 * a41 - a11 * a23 * a44 - + a13 * a24 * a41 - a14 * a21 * a43; + + const HYPRE_Real M24 = a11 * a23 * a34 + a13 * a24 * a31 + + a14 * a21 * a33 - a11 * a24 * a33 - + a13 * a21 * a34 - a14 * a23 * a31; + + const HYPRE_Real M31 = a21 * a32 * a44 + a22 * a34 * a41 + + a24 * a31 * a42 - a21 * a34 * a42 - + a22 * a31 * a44 - a24 * a32 * a41; + + const HYPRE_Real M32 = a11 * a34 * a42 + a12 * a31 * a44 + + a14 * a32 * a41 - a11 * a32 * a44 - + a12 * a34 * a41 - a14 * a31 * a42; + + const HYPRE_Real M33 = a11 * a22 * a44 + a12 * a24 * a41 + + a14 * a21 * a42 - a11 * a24 * a42 - + a12 * a21 * a44 - a14 * a22 * a41; + + const HYPRE_Real M34 = a11 * a24 * a32 + a12 * a21 * a34 + + a14 * a22 * a31 - a11 * a22 * a34 - + a12 * a24 * a31 - a14 * a21 * a32; + + const HYPRE_Real M41 = a21 * a33 * a42 + a22 * a31 * a43 + + a23 * a32 * a41 - a21 * a32 * a43 - + a22 * a33 * a41 - a23 * a31 * a42; + + const HYPRE_Real M42 = a11 * a32 * a43 + a12 * a33 * a41 + + a13 * a31 * a42 - a11 * a33 * a42 - + a12 * a31 * a43 - a13 * a32 * a41; + + const HYPRE_Real M43 = a11 * a23 * a42 + a12 * a21 * a43 + + a13 * a22 * a41 - a11 * a22 * a43 - + a12 * a23 * a41 - a13 * a21 * a42; + + const HYPRE_Real M44 = a11 * a22 * a33 + a12 * a23 * a31 + + a13 * a21 * a32 - a11 * a23 * a32 - + a12 * a21 * a33 - a13 * a22 * a31; const HYPRE_Real det = a11 * M11 + a12 * M21 + a13 * M31 + a14 * M41; - HYPRE_Real det_inv; + HYPRE_Real det_inv = 1.0 / det; //if ( hypre_abs(det) < 1e-22 ) { //hypre_printf("### WARNING: Matrix is nearly singular! det = %e\n", det); @@ -1555,17 +1580,20 @@ void hypre_blas_smat_inv_n4 (HYPRE_Real *a) */ //} - det_inv = 1.0 / det; - - a[0] = M11 * det_inv; a[1] = M12 * det_inv; a[2] = M13 * det_inv; a[3] = M14 * det_inv; - a[4] = M21 * det_inv; a[5] = M22 * det_inv; a[6] = M23 * det_inv; a[7] = M24 * det_inv; - a[8] = M31 * det_inv; a[9] = M32 * det_inv; a[10] = M33 * det_inv; a[11] = M34 * det_inv; + a[0] = M11 * det_inv; a[1] = M12 * det_inv; a[2] = M13 * det_inv; a[3] = M14 * det_inv; + a[4] = M21 * det_inv; a[5] = M22 * det_inv; a[6] = M23 * det_inv; a[7] = M24 * det_inv; + a[8] = M31 * det_inv; a[9] = M32 * det_inv; a[10] = M33 * det_inv; a[11] = M34 * det_inv; a[12] = M41 * det_inv; a[13] = M42 * det_inv; a[14] = M43 * det_inv; a[15] = M44 * det_inv; - } +/*-------------------------------------------------------------------------- + * hypre_MGRSmallBlkInverse + * + * TODO (VPM): move this function to seq_ls + *--------------------------------------------------------------------------*/ + void hypre_MGRSmallBlkInverse(HYPRE_Real *mat, - HYPRE_Int blk_size) + HYPRE_Int blk_size) { if (blk_size == 2) { @@ -1581,6 +1609,12 @@ void hypre_MGRSmallBlkInverse(HYPRE_Real *mat, } } +/*-------------------------------------------------------------------------- + * hypre_MGRSmallBlkInverse + * + * TODO (VPM): move this function to seq_ls + *--------------------------------------------------------------------------*/ + void hypre_blas_mat_inv(HYPRE_Real *a, HYPRE_Int n) { @@ -2034,6 +2068,12 @@ hypre_MGRBlockRelaxSolve( hypre_ParCSRMatrix *A, return hypre_error_flag; } +/*-------------------------------------------------------------------------- + * hypre_BlockDiagInvLapack + * + * TODO (VPM): move this function to seq_ls + *--------------------------------------------------------------------------*/ + HYPRE_Int hypre_BlockDiagInvLapack(HYPRE_Real *diag, HYPRE_Int N, HYPRE_Int blk_size) { diff --git a/src/parcsr_ls/par_mgr.h b/src/parcsr_ls/par_mgr.h index 286178c979..517fc5b87a 100644 --- a/src/parcsr_ls/par_mgr.h +++ b/src/parcsr_ls/par_mgr.h @@ -53,6 +53,7 @@ typedef struct hypre_ParCSRMatrix **P_FF_array; #endif hypre_ParCSRMatrix **P_array; + hypre_ParCSRMatrix **R_array; hypre_ParCSRMatrix **RT_array; hypre_ParCSRMatrix *RAP; hypre_IntArray **CF_marker_array; diff --git a/src/parcsr_ls/par_mgr_device.c b/src/parcsr_ls/par_mgr_device.c index beca027c2c..7c168978d9 100644 --- a/src/parcsr_ls/par_mgr_device.c +++ b/src/parcsr_ls/par_mgr_device.c @@ -1004,11 +1004,7 @@ hypre_ParCSRMatrixBlockDiagMatrixDevice( hypre_ParCSRMatrix *A, /*-------------------------------------------------------------------------- * hypre_MGRComputeNonGalerkinCGDevice * - * Available methods: - * 1: inv(A_FF) approximated by its (block) diagonal inverse - * 2: CPR-like approx. with inv(A_FF) approx. by its diagonal inverse - * 3: CPR-like approx. with inv(A_FF) approx. by its block diagonal inverse - * 4: inv(A_FF) approximated by sparse approximate inverse + * See hypre_MGRComputeNonGalerkinCoarseGrid for available methods. * * TODO (VPM): Can we have a single function that works for host and device? * inv(A_FF)*A_FC might have been computed before. Reuse it! @@ -1019,6 +1015,8 @@ hypre_MGRComputeNonGalerkinCGDevice(hypre_ParCSRMatrix *A_FF, hypre_ParCSRMatrix *A_FC, hypre_ParCSRMatrix *A_CF, hypre_ParCSRMatrix *A_CC, + hypre_ParCSRMatrix *Wp, + hypre_ParCSRMatrix *Wr, HYPRE_Int blk_size, HYPRE_Int method, HYPRE_Complex threshold, @@ -1028,8 +1026,8 @@ hypre_MGRComputeNonGalerkinCGDevice(hypre_ParCSRMatrix *A_FF, hypre_ParCSRMatrix *A_H; hypre_ParCSRMatrix *A_Hc; hypre_ParCSRMatrix *A_CF_trunc; - hypre_ParCSRMatrix *Wp; - HYPRE_Complex alpha = -1.0; + hypre_ParCSRMatrix *Wp_tmp = Wp; + HYPRE_Complex alpha = -1.0; hypre_GpuProfilingPushRange("MGRComputeNonGalerkinCG"); @@ -1043,8 +1041,8 @@ hypre_MGRComputeNonGalerkinCGDevice(hypre_ParCSRMatrix *A_FF, A_CF_trunc = A_CF; } - /* Compute Wp */ - if (method == 1 || method == 2) + /* Compute Wp/Wr if not passed in */ + if (!Wp && (method == 1 || method == 2)) { hypre_Vector *D_FF_inv; HYPRE_Complex *data; @@ -1058,14 +1056,14 @@ hypre_MGRComputeNonGalerkinCGDevice(hypre_ParCSRMatrix *A_FF, hypre_CSRMatrixExtractDiagonalDevice(hypre_ParCSRMatrixDiag(A_FF), data, 2); /* Compute D_FF_inv*A_FC */ - Wp = hypre_ParCSRMatrixClone(A_FC, 1); - hypre_CSRMatrixDiagScaleDevice(hypre_ParCSRMatrixDiag(Wp), D_FF_inv, NULL); - hypre_CSRMatrixDiagScaleDevice(hypre_ParCSRMatrixOffd(Wp), D_FF_inv, NULL); + Wp_tmp = hypre_ParCSRMatrixClone(A_FC, 1); + hypre_CSRMatrixDiagScaleDevice(hypre_ParCSRMatrixDiag(Wp_tmp), D_FF_inv, NULL); + hypre_CSRMatrixDiagScaleDevice(hypre_ParCSRMatrixOffd(Wp_tmp), D_FF_inv, NULL); /* Free memory */ hypre_SeqVectorDestroy(D_FF_inv); } - else if (method == 3) + else if (!Wp && (method == 3)) { hypre_ParCSRMatrix *B_FF_inv; @@ -1073,22 +1071,39 @@ hypre_MGRComputeNonGalerkinCGDevice(hypre_ParCSRMatrix *A_FF, hypre_ParCSRMatrixBlockDiagMatrixDevice(A_FF, blk_size, -1, NULL, 1, &B_FF_inv); /* Compute Wp = A_FF_inv * A_FC */ - Wp = hypre_ParCSRMatMat(B_FF_inv, A_FC); + Wp_tmp = hypre_ParCSRMatMat(B_FF_inv, A_FC); /* Free memory */ hypre_ParCSRMatrixDestroy(B_FF_inv); } else { - /* Use approximate inverse for ideal interploation */ - hypre_error_w_msg(HYPRE_ERROR_GENERIC, "Error: feature not implemented yet!"); - hypre_GpuProfilingPopRange(); + if (method != 5) + { + /* Use approximate inverse for ideal interploation */ + hypre_error_w_msg(HYPRE_ERROR_GENERIC, "Error: feature not implemented yet!"); + hypre_GpuProfilingPopRange(); - return hypre_error_flag; + return hypre_error_flag; + } } /* Compute A_Hc (the correction for A_H) */ - A_Hc = hypre_ParCSRMatMat(A_CF_trunc, Wp); + if (method != 5) + { + A_Hc = hypre_ParCSRMatMat(A_CF_trunc, Wp_tmp); + } + else if (Wr && (method == 5)) + { + A_Hc = hypre_ParCSRMatMat(Wr, A_FC); + } + else + { + hypre_error_w_msg(HYPRE_ERROR_GENERIC, "Wr matrix was not provided!"); + hypre_GpuProfilingPopRange(); + + return hypre_error_flag; + } /* Drop small entries from A_Hc */ hypre_ParCSRMatrixDropSmallEntriesDevice(A_Hc, threshold, -1); @@ -1098,7 +1113,10 @@ hypre_MGRComputeNonGalerkinCGDevice(hypre_ParCSRMatrix *A_FF, /* Free memory */ hypre_ParCSRMatrixDestroy(A_Hc); - hypre_ParCSRMatrixDestroy(Wp); + if (Wp_tmp != Wp) + { + hypre_ParCSRMatrixDestroy(Wp_tmp); + } if (method == 2 || method == 3) { hypre_ParCSRMatrixDestroy(A_CF_trunc); diff --git a/src/parcsr_ls/par_mgr_interp.c b/src/parcsr_ls/par_mgr_interp.c index 5e7b2a94d5..6355693528 100644 --- a/src/parcsr_ls/par_mgr_interp.c +++ b/src/parcsr_ls/par_mgr_interp.c @@ -33,6 +33,8 @@ hypre_MGRBuildInterp(hypre_ParCSRMatrix *A, HYPRE_ExecutionPolicy exec = hypre_GetExecPolicy1( hypre_ParCSRMatrixMemoryLocation(A) ); #endif + HYPRE_ANNOTATE_FUNC_BEGIN; + /* Interpolation for each level */ if (interp_type < 3) { @@ -73,6 +75,7 @@ hypre_MGRBuildInterp(hypre_ParCSRMatrix *A, { hypre_error_w_msg(HYPRE_ERROR_GENERIC, "No GPU support!"); + HYPRE_ANNOTATE_FUNC_END; return hypre_error_flag; } else @@ -99,8 +102,7 @@ hypre_MGRBuildInterp(hypre_ParCSRMatrix *A, } else if (interp_type == 12) { - hypre_MGRBuildPBlockJacobi(A, A_FF, A_FC, aux_mat, blk_size, CF_marker, - num_cpts_global, &P); + hypre_MGRBuildPBlockJacobi(A, A_FF, A_FC, aux_mat, blk_size, CF_marker, &P); } else { @@ -112,44 +114,59 @@ hypre_MGRBuildInterp(hypre_ParCSRMatrix *A, /* set pointer to P */ *P_ptr = P; + HYPRE_ANNOTATE_FUNC_END; + return hypre_error_flag; } /*-------------------------------------------------------------------------- - * hypre_MGRBuildInterp + * hypre_MGRBuildRestrict * * Setup restriction operator. * * TODOs (VPM): - * 1) Change R -> RT (VPM) - * 2) Add post-smoothing + * 1) Add post-smoothing *--------------------------------------------------------------------------*/ HYPRE_Int hypre_MGRBuildRestrict( hypre_ParCSRMatrix *A, hypre_ParCSRMatrix *A_FF, hypre_ParCSRMatrix *A_FC, - HYPRE_Int *CF_marker, + hypre_ParCSRMatrix *A_CF, + hypre_IntArray *CF_marker, HYPRE_BigInt *num_cpts_global, HYPRE_Real trunc_factor, HYPRE_Int max_elmts, HYPRE_Real strong_threshold, HYPRE_Real max_row_sum, HYPRE_Int blk_size, + HYPRE_Int restrict_type, + hypre_ParCSRMatrix **W_ptr, hypre_ParCSRMatrix **R_ptr, - HYPRE_Int restrict_type) + hypre_ParCSRMatrix **RT_ptr) { + /* Input variables */ + HYPRE_Int *CF_marker_data = hypre_IntArrayData(CF_marker); + + /* Output variables */ + hypre_ParCSRMatrix *W = NULL; hypre_ParCSRMatrix *R = NULL; + hypre_ParCSRMatrix *RT = NULL; + + /* Local variables */ hypre_ParCSRMatrix *AT = NULL; hypre_ParCSRMatrix *A_FFT = NULL; hypre_ParCSRMatrix *A_FCT = NULL; hypre_ParCSRMatrix *ST = NULL; + #if defined (HYPRE_USING_GPU) HYPRE_ExecutionPolicy exec = hypre_GetExecPolicy1( hypre_ParCSRMatrixMemoryLocation(A) ); #endif + HYPRE_ANNOTATE_FUNC_BEGIN; + /* Build AT (transpose A) */ - if (restrict_type > 0) + if (restrict_type > 0 && restrict_type != 14) { hypre_ParCSRMatrixTranspose(A, &AT, 1); @@ -170,12 +187,12 @@ hypre_MGRBuildRestrict( hypre_ParCSRMatrix *A, #if defined (HYPRE_USING_GPU) if (exec == HYPRE_EXEC_DEVICE) { - hypre_MGRBuildPDevice(A, CF_marker, num_cpts_global, restrict_type, &R); + hypre_MGRBuildPDevice(A, CF_marker_data, num_cpts_global, restrict_type, &RT); } else #endif { - hypre_MGRBuildP(A, CF_marker, num_cpts_global, restrict_type, 0, &R); + hypre_MGRBuildP(A, CF_marker_data, num_cpts_global, restrict_type, 0, &RT); } } else if (restrict_type == 1 || restrict_type == 2) @@ -183,80 +200,79 @@ hypre_MGRBuildRestrict( hypre_ParCSRMatrix *A, #if defined (HYPRE_USING_GPU) if (exec == HYPRE_EXEC_DEVICE) { - hypre_MGRBuildPDevice(AT, CF_marker, num_cpts_global, restrict_type, &R); + hypre_MGRBuildPDevice(AT, CF_marker_data, num_cpts_global, restrict_type, &RT); } else #endif { - hypre_MGRBuildP(AT, CF_marker, num_cpts_global, restrict_type, 0, &R); + hypre_MGRBuildP(AT, CF_marker_data, num_cpts_global, restrict_type, 0, &RT); } } else if (restrict_type == 3) { /* move diagonal to first entry */ hypre_CSRMatrixReorder(hypre_ParCSRMatrixDiag(AT)); - hypre_MGRBuildInterpApproximateInverse(AT, CF_marker, num_cpts_global, &R); - hypre_BoomerAMGInterpTruncation(R, trunc_factor, max_elmts); + hypre_MGRBuildInterpApproximateInverse(AT, CF_marker_data, num_cpts_global, &RT); + hypre_BoomerAMGInterpTruncation(RT, trunc_factor, max_elmts); } else if (restrict_type == 12) { - hypre_MGRBuildPBlockJacobi(AT, A_FFT, A_FCT, NULL, blk_size, CF_marker, num_cpts_global, - &R); + hypre_MGRBuildPBlockJacobi(AT, A_FFT, A_FCT, NULL, blk_size, CF_marker_data, &RT); } else if (restrict_type == 13) // CPR-like restriction operator { /* TODO: create a function with this block (VPM) */ - hypre_ParCSRMatrix *blk_A_cf = NULL; - hypre_ParCSRMatrix *blk_A_cf_transpose = NULL; - hypre_ParCSRMatrix *Wr_transpose = NULL; - hypre_ParCSRMatrix *blk_A_ff_inv_transpose = NULL; - HYPRE_Int *c_marker = NULL; - HYPRE_Int *f_marker = NULL; - HYPRE_Int i; - HYPRE_Int nrows = hypre_ParCSRMatrixNumRows(A); - - HYPRE_MemoryLocation memory_location = hypre_ParCSRMatrixMemoryLocation(A); - - /* TODO: Port this to GPU (VPM) */ - /* create C and F markers to extract A_CF */ - c_marker = CF_marker; - f_marker = hypre_CTAlloc(HYPRE_Int, nrows, memory_location); - for (i = 0; i < nrows; i++) - { - f_marker[i] = - CF_marker[i]; - } + hypre_ParCSRMatrix *A_CF_blk = NULL; + hypre_ParCSRMatrix *A_CFT_blk = NULL; + hypre_ParCSRMatrix *WrT = NULL; + hypre_ParCSRMatrix *A_FF_blkinv = NULL; #if defined (HYPRE_USING_GPU) if (exec == HYPRE_EXEC_DEVICE) { hypre_error_w_msg(HYPRE_ERROR_GENERIC, "No GPU support!"); + HYPRE_ANNOTATE_FUNC_END; return hypre_error_flag; } else #endif { - /* get block A_cf */ - hypre_MGRGetAcfCPR(A, blk_size, c_marker, f_marker, &blk_A_cf); + /* Get block A_CF */ + hypre_MGRTruncateAcfCPR(A_CF, &A_CF_blk); - /* transpose block A_cf */ - hypre_ParCSRMatrixTranspose(blk_A_cf, &blk_A_cf_transpose, 1); + /* Transpose block A_CF */ + hypre_ParCSRMatrixTranspose(A_CF_blk, &A_CFT_blk, 1); - /* compute block diagonal A_ff */ - hypre_ParCSRMatrixBlockDiagMatrix(AT, blk_size, -1, CF_marker, 1, - &blk_A_ff_inv_transpose); + /* Compute block diagonal A_FF */ + hypre_ParCSRMatrixBlockDiagMatrix(AT, blk_size, -1, CF_marker_data, 1, + &A_FF_blkinv); - /* compute Wr = A^{-T} * A_cf^{T} */ - Wr_transpose = hypre_ParCSRMatMat(blk_A_ff_inv_transpose, blk_A_cf_transpose); + /* Compute WrT = A_FF_blk^{-T} * A_CF^{T} */ + WrT = hypre_ParCSRMatMat(A_FF_blkinv, A_CFT_blk); - /* compute restriction operator R = [-Wr I] (transposed for use with RAP) */ - hypre_MGRBuildPFromWp(AT, Wr_transpose, CF_marker, &R); + /* compute restriction operator RT = [-WrT I] (transposed for use with RAP) */ + hypre_MGRBuildPFromWp(AT, WrT, CF_marker_data, &RT); + } + + /* Free memory */ + hypre_ParCSRMatrixDestroy(A_CF_blk); + hypre_ParCSRMatrixDestroy(A_CFT_blk); + hypre_ParCSRMatrixDestroy(WrT); + hypre_ParCSRMatrixDestroy(A_FF_blkinv); + } + else if (restrict_type == 14) + { + if (blk_size > 1) + { + /* Block column-lumped restriction */ + hypre_MGRBlockColLumpedRestrict(A, A_FF, A_CF, CF_marker, blk_size, &W, &R); + } + else + { + /* Column-lumped restriction */ + hypre_MGRColLumpedRestrict(A, A_FF, A_CF, CF_marker, &W, &R); } - hypre_ParCSRMatrixDestroy(blk_A_cf); - hypre_ParCSRMatrixDestroy(blk_A_cf_transpose); - hypre_ParCSRMatrixDestroy(Wr_transpose); - hypre_ParCSRMatrixDestroy(blk_A_ff_inv_transpose); - hypre_TFree(f_marker, memory_location); } else { @@ -264,15 +280,20 @@ hypre_MGRBuildRestrict( hypre_ParCSRMatrix *A, hypre_BoomerAMGCreateS(AT, strong_threshold, max_row_sum, 1, NULL, &ST); /* Classical modified interpolation */ - hypre_BoomerAMGBuildInterp(AT, CF_marker, ST, num_cpts_global, 1, NULL, 0, - trunc_factor, max_elmts, &R); + hypre_BoomerAMGBuildInterp(AT, CF_marker_data, ST, num_cpts_global, 1, NULL, 0, + trunc_factor, max_elmts, &RT); } /* Compute R^T so it can be used in the solve phase */ - hypre_ParCSRMatrixLocalTranspose(R); + if (RT) + { + hypre_ParCSRMatrixLocalTranspose(RT); + } - /* Set pointer to R */ - *R_ptr = R; + /* Set output pointers */ + *RT_ptr = RT; + *R_ptr = R; + *W_ptr = W; /* Free memory */ if (restrict_type > 0) @@ -286,6 +307,8 @@ hypre_MGRBuildRestrict( hypre_ParCSRMatrix *A, hypre_ParCSRMatrixDestroy(ST); } + HYPRE_ANNOTATE_FUNC_END; + return hypre_error_flag; } @@ -303,6 +326,8 @@ hypre_MGRBuildPFromWp( hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, hypre_ParCSRMatrix **P_ptr) { + HYPRE_ANNOTATE_FUNC_BEGIN; + #if defined(HYPRE_USING_GPU) HYPRE_ExecutionPolicy exec = hypre_GetExecPolicy1( hypre_ParCSRMatrixMemoryLocation(A) ); @@ -316,6 +341,8 @@ hypre_MGRBuildPFromWp( hypre_ParCSRMatrix *A, hypre_MGRBuildPFromWpHost(A, Wp, CF_marker, P_ptr); } + HYPRE_ANNOTATE_FUNC_END; + return hypre_error_flag; } @@ -481,16 +508,13 @@ HYPRE_Int hypre_MGRBuildBlockJacobiWp( hypre_ParCSRMatrix *A_FF, hypre_ParCSRMatrix *A_FC, HYPRE_Int blk_size, - HYPRE_Int *CF_marker, - HYPRE_BigInt *cpts_starts, hypre_ParCSRMatrix **Wp_ptr ) { - HYPRE_UNUSED_VAR(CF_marker); - HYPRE_UNUSED_VAR(cpts_starts); - hypre_ParCSRMatrix *A_FF_inv; hypre_ParCSRMatrix *Wp; + HYPRE_ANNOTATE_FUNC_BEGIN; + /* Build A_FF_inv */ hypre_ParCSRMatrixBlockDiagMatrix(A_FF, blk_size, -1, NULL, 1, &A_FF_inv); @@ -503,6 +527,8 @@ hypre_MGRBuildBlockJacobiWp( hypre_ParCSRMatrix *A_FF, /* Set output pointer */ *Wp_ptr = Wp; + HYPRE_ANNOTATE_FUNC_END; + return hypre_error_flag; } @@ -517,7 +543,6 @@ hypre_MGRBuildPBlockJacobi( hypre_ParCSRMatrix *A, hypre_ParCSRMatrix *Wp, HYPRE_Int blk_size, HYPRE_Int *CF_marker, - HYPRE_BigInt *cpts_starts, hypre_ParCSRMatrix **P_ptr) { hypre_ParCSRMatrix *Wp_tmp; @@ -526,7 +551,7 @@ hypre_MGRBuildPBlockJacobi( hypre_ParCSRMatrix *A, if (Wp == NULL) { - hypre_MGRBuildBlockJacobiWp(A_FF, A_FC, blk_size, CF_marker, cpts_starts, &Wp_tmp); + hypre_MGRBuildBlockJacobiWp(A_FF, A_FC, blk_size, &Wp_tmp); hypre_MGRBuildPFromWp(A, Wp_tmp, CF_marker, P_ptr); hypre_ParCSRMatrixDeviceColMapOffd(Wp_tmp) = NULL; @@ -547,7 +572,7 @@ hypre_MGRBuildPBlockJacobi( hypre_ParCSRMatrix *A, /*-------------------------------------------------------------------------- * hypre_ExtendWtoPHost * - * TODO: move this to par_interp.c? + * TODO (VPM): merge with hypre_MGRBuildPFromWpHost *--------------------------------------------------------------------------*/ HYPRE_Int @@ -2096,167 +2121,6 @@ hypre_MGRBuildInterpApproximateInverse(hypre_ParCSRMatrix *A, return hypre_error_flag; } -/*-------------------------------------------------------------------------- - * hypre_MGRGetAcfCPR - *--------------------------------------------------------------------------*/ - -HYPRE_Int -hypre_MGRGetAcfCPR(hypre_ParCSRMatrix *A, - HYPRE_Int blk_size, - HYPRE_Int *c_marker, - HYPRE_Int *f_marker, - hypre_ParCSRMatrix **A_CF_ptr) -{ - MPI_Comm comm = hypre_ParCSRMatrixComm(A); - HYPRE_Int i, j, jj, jj1; - HYPRE_Int jj_counter, cpts_cnt; - hypre_ParCSRMatrix *A_CF = NULL; - hypre_CSRMatrix *A_CF_diag = NULL; - - HYPRE_MemoryLocation memory_location = hypre_ParCSRMatrixMemoryLocation(A); - hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A); - - HYPRE_Int *A_diag_i = hypre_CSRMatrixI(A_diag); - HYPRE_Int *A_diag_j = hypre_CSRMatrixJ(A_diag); - HYPRE_Complex *A_diag_data = hypre_CSRMatrixData(A_diag); - - HYPRE_Int total_fpts, n_fpoints; - HYPRE_Int num_rows = hypre_CSRMatrixNumRows(hypre_ParCSRMatrixDiag(A)); - HYPRE_Int nnz_diag_new = 0; - HYPRE_Int num_procs, my_id; - hypre_IntArray *wrap_cf = NULL; - hypre_IntArray *coarse_dof_func_ptr = NULL; - HYPRE_BigInt num_row_cpts_global[2], num_col_fpts_global[2]; - HYPRE_BigInt total_global_row_cpts, total_global_col_fpts; - - hypre_MPI_Comm_size(comm, &num_procs); - hypre_MPI_Comm_rank(comm, &my_id); - - // Count total F-points - // Also setup F to C column map - total_fpts = 0; - HYPRE_Int *f_to_c_col_map = hypre_CTAlloc(HYPRE_Int, num_rows, HYPRE_MEMORY_HOST); - for (i = 0; i < num_rows; i++) - { - // if (c_marker[i] == 1) - // { - // total_cpts++; - // } - if (f_marker[i] == 1) - { - f_to_c_col_map[i] = total_fpts; - total_fpts++; - } - } - n_fpoints = blk_size; - /* get the number of coarse rows */ - wrap_cf = hypre_IntArrayCreate(num_rows); - hypre_IntArrayMemoryLocation(wrap_cf) = HYPRE_MEMORY_HOST; - hypre_IntArrayData(wrap_cf) = c_marker; - hypre_BoomerAMGCoarseParms(comm, num_rows, 1, NULL, wrap_cf, &coarse_dof_func_ptr, - num_row_cpts_global); - hypre_IntArrayDestroy(coarse_dof_func_ptr); - coarse_dof_func_ptr = NULL; - - //hypre_printf("my_id = %d, cpts_this = %d, cpts_next = %d\n", my_id, num_row_cpts_global[0], num_row_cpts_global[1]); - - if (my_id == (num_procs - 1)) { total_global_row_cpts = num_row_cpts_global[1]; } - hypre_MPI_Bcast(&total_global_row_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); - - /* get the number of coarse rows */ - hypre_IntArrayData(wrap_cf) = f_marker; - hypre_BoomerAMGCoarseParms(comm, num_rows, 1, NULL, wrap_cf, &coarse_dof_func_ptr, - num_col_fpts_global); - hypre_IntArrayDestroy(coarse_dof_func_ptr); - coarse_dof_func_ptr = NULL; - hypre_IntArrayData(wrap_cf) = NULL; - hypre_IntArrayDestroy(wrap_cf); - - //hypre_printf("my_id = %d, cpts_this = %d, cpts_next = %d\n", my_id, num_col_fpts_global[0], num_col_fpts_global[1]); - - if (my_id == (num_procs - 1)) { total_global_col_fpts = num_col_fpts_global[1]; } - hypre_MPI_Bcast(&total_global_col_fpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); - - // First pass: count the nnz of A_CF - jj_counter = 0; - cpts_cnt = 0; - for (i = 0; i < num_rows; i++) - { - if (c_marker[i] == 1) - { - for (j = A_diag_i[i]; j < A_diag_i[i + 1]; j++) - { - jj = A_diag_j[j]; - if (f_marker[jj] == 1) - { - jj1 = f_to_c_col_map[jj]; - if (jj1 >= cpts_cnt * n_fpoints && jj1 < (cpts_cnt + 1)*n_fpoints) - { - jj_counter++; - } - } - } - cpts_cnt++; - } - } - nnz_diag_new = jj_counter; - - HYPRE_Int *A_CF_diag_i = hypre_CTAlloc(HYPRE_Int, cpts_cnt + 1, memory_location); - HYPRE_Int *A_CF_diag_j = hypre_CTAlloc(HYPRE_Int, nnz_diag_new, memory_location); - HYPRE_Complex *A_CF_diag_data = hypre_CTAlloc(HYPRE_Complex, nnz_diag_new, memory_location); - A_CF_diag_i[cpts_cnt] = nnz_diag_new; - - jj_counter = 0; - cpts_cnt = 0; - for (i = 0; i < num_rows; i++) - { - if (c_marker[i] == 1) - { - A_CF_diag_i[cpts_cnt] = jj_counter; - for (j = A_diag_i[i]; j < A_diag_i[i + 1]; j++) - { - jj = A_diag_j[j]; - if (f_marker[jj] == 1) - { - jj1 = f_to_c_col_map[jj]; - if (jj1 >= cpts_cnt * n_fpoints && jj1 < (cpts_cnt + 1)*n_fpoints) - { - A_CF_diag_j[jj_counter] = jj1; - A_CF_diag_data[jj_counter] = A_diag_data[j]; - jj_counter++; - } - } - } - cpts_cnt++; - } - } - - /* Create A_CF matrix */ - A_CF = hypre_ParCSRMatrixCreate(comm, - total_global_row_cpts, - total_global_col_fpts, - num_row_cpts_global, - num_col_fpts_global, - 0, - nnz_diag_new, - 0); - - A_CF_diag = hypre_ParCSRMatrixDiag(A_CF); - hypre_CSRMatrixData(A_CF_diag) = A_CF_diag_data; - hypre_CSRMatrixI(A_CF_diag) = A_CF_diag_i; - hypre_CSRMatrixJ(A_CF_diag) = A_CF_diag_j; - - hypre_CSRMatrixData(hypre_ParCSRMatrixOffd(A_CF)) = NULL; - hypre_CSRMatrixI(hypre_ParCSRMatrixOffd(A_CF)) = NULL; - hypre_CSRMatrixJ(hypre_ParCSRMatrixOffd(A_CF)) = NULL; - - *A_CF_ptr = A_CF; - - hypre_TFree(f_to_c_col_map, HYPRE_MEMORY_HOST); - - return hypre_error_flag; -} - /*-------------------------------------------------------------------------- * hypre_MGRTruncateAcfCPRDevice * @@ -2318,8 +2182,7 @@ hypre_MGRTruncateAcfCPR(hypre_ParCSRMatrix *A_CF, HYPRE_Int jj_counter; HYPRE_Int blk_size = num_cols / num_rows; - /* Sanity check */ - hypre_assert(hypre_ParCSRMatrixMemoryLocation(A_CF) == HYPRE_MEMORY_HOST); + HYPRE_ANNOTATE_FUNC_BEGIN; /* First pass: count the nnz of truncated (new) A_CF */ jj_counter = 0; @@ -2373,6 +2236,8 @@ hypre_MGRTruncateAcfCPR(hypre_ParCSRMatrix *A_CF, /* Set output pointer */ *A_CF_new_ptr = A_CF_new; + HYPRE_ANNOTATE_FUNC_END; + return hypre_error_flag; } @@ -2437,6 +2302,8 @@ hypre_MGRBuildRFromWHost(HYPRE_Int *C_map, HYPRE_Int hypre_MGRBuildRFromW(HYPRE_Int *C_map, HYPRE_Int *F_map, + HYPRE_BigInt global_num_rows_R, + HYPRE_BigInt global_num_cols_R, HYPRE_BigInt *row_starts_R, HYPRE_BigInt *col_starts_R, hypre_ParCSRMatrix *W, @@ -2444,8 +2311,6 @@ hypre_MGRBuildRFromW(HYPRE_Int *C_map, { /* Input matrix variables */ MPI_Comm comm = hypre_ParCSRMatrixComm(W); - HYPRE_BigInt num_rows_W = hypre_ParCSRMatrixGlobalNumRows(W); - HYPRE_BigInt num_cols_W = hypre_ParCSRMatrixGlobalNumCols(W); HYPRE_MemoryLocation memory_location_W = hypre_ParCSRMatrixMemoryLocation(W); hypre_CSRMatrix *W_diag = hypre_ParCSRMatrixDiag(W); @@ -2458,8 +2323,6 @@ hypre_MGRBuildRFromW(HYPRE_Int *C_map, /* Output matrix */ hypre_ParCSRMatrix *R; - HYPRE_BigInt num_rows_R = num_rows_W; - HYPRE_BigInt num_cols_R = num_cols_W + num_rows_W; HYPRE_Int num_nonzeros_diag = W_diag_nnz + W_diag_num_rows; HYPRE_Int num_nonzeros_offd = W_offd_nnz; @@ -2472,11 +2335,13 @@ hypre_MGRBuildRFromW(HYPRE_Int *C_map, return hypre_error_flag; } + HYPRE_ANNOTATE_FUNC_BEGIN; + /*----------------------------------------------------------------------- * Create and initialize output matrix *-----------------------------------------------------------------------*/ - R = hypre_ParCSRMatrixCreate(comm, num_rows_R, num_cols_R, + R = hypre_ParCSRMatrixCreate(comm, global_num_rows_R, global_num_cols_R, row_starts_R, col_starts_R, W_offd_num_cols, num_nonzeros_diag, num_nonzeros_offd); hypre_ParCSRMatrixInitialize_v2(R, memory_location_W); @@ -2509,5 +2374,209 @@ hypre_MGRBuildRFromW(HYPRE_Int *C_map, /* Set output pointer */ *R_ptr = R; + HYPRE_ANNOTATE_FUNC_END; + + return hypre_error_flag; +} + +/*-------------------------------------------------------------------------- + * hypre_MGRColLumpedRestrict + *--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_MGRColLumpedRestrict(hypre_ParCSRMatrix *A, + hypre_ParCSRMatrix *A_FF, + hypre_ParCSRMatrix *A_CF, + hypre_IntArray *CF_marker, + hypre_ParCSRMatrix **W_ptr, + hypre_ParCSRMatrix **R_ptr) +{ + hypre_ParVector *b_FF = NULL; + hypre_ParVector *b_CF = NULL; + hypre_ParVector *r_CF = NULL; + hypre_ParCSRMatrix *W = NULL; + hypre_ParCSRMatrix *R = NULL; + + HYPRE_Int num_points = 2; + HYPRE_Int points[2] = {1, -1}; // {C, F} + HYPRE_Int sizes[2] = {hypre_ParCSRMatrixNumRows(A_CF), + hypre_ParCSRMatrixNumCols(A_CF) + }; + hypre_IntArrayArray *CF_maps; + + HYPRE_ANNOTATE_FUNC_BEGIN; + + /*------------------------------------------------------- + * 1) b_FF = approx(A_FF) + *-------------------------------------------------------*/ + + hypre_ParCSRMatrixColSum(A_FF, &b_FF); + + /*------------------------------------------------------- + * 2) b_CF = approx(A_CF) + *-------------------------------------------------------*/ + + hypre_ParCSRMatrixColSum(A_CF, &b_CF); + + /*------------------------------------------------------- + * 3) W = approx(A_CF) * inv(approx(A_FF)) + *-------------------------------------------------------*/ + + r_CF = hypre_ParVectorCreate(hypre_ParCSRMatrixComm(A_CF), + hypre_ParCSRMatrixGlobalNumRows(A_CF), + hypre_ParCSRMatrixRowStarts(A_CF)); + hypre_ParVectorInitialize_v2(r_CF, hypre_ParCSRMatrixMemoryLocation(A_CF)); + hypre_ParVectorElmdivpy(b_CF, b_FF, r_CF); + W = hypre_ParCSRMatrixCreateFromParVector(r_CF, + hypre_ParCSRMatrixGlobalNumRows(A_CF), + hypre_ParCSRMatrixGlobalNumCols(A_CF), + hypre_ParCSRMatrixRowStarts(A_CF), + hypre_ParCSRMatrixColStarts(A_CF)); + + /* Free memory */ + hypre_ParVectorDestroy(b_FF); + hypre_ParVectorDestroy(b_CF); + hypre_ParVectorDestroy(r_CF); + + /*------------------------------------------------------- + * 4) Build Restriction + *-------------------------------------------------------*/ + + /* Compute C/F local mappings */ + hypre_IntArraySeparateByValue(num_points, points, sizes, CF_marker, &CF_maps); + + /* Build restriction from W (R = [-W I]) */ + hypre_MGRBuildRFromW(hypre_IntArrayArrayEntryIData(CF_maps, 0), + hypre_IntArrayArrayEntryIData(CF_maps, 1), + hypre_ParCSRMatrixGlobalNumRows(A_CF), + hypre_ParCSRMatrixGlobalNumCols(A), + hypre_ParCSRMatrixRowStarts(A_CF), + hypre_ParCSRMatrixColStarts(A), + W, &R); + + /* Set output pointers */ + *W_ptr = W; + *R_ptr = R; + + /* Free memory */ + hypre_IntArrayArrayDestroy(CF_maps); + + HYPRE_ANNOTATE_FUNC_END; + + return hypre_error_flag; +} + +/*-------------------------------------------------------------------------- + * hypre_MGRBlockColLumpedRestrict + *--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_MGRBlockColLumpedRestrict(hypre_ParCSRMatrix *A, + hypre_ParCSRMatrix *A_FF, + hypre_ParCSRMatrix *A_CF, + hypre_IntArray *CF_marker, + HYPRE_Int block_dim, + hypre_ParCSRMatrix **W_ptr, + hypre_ParCSRMatrix **R_ptr) +{ + hypre_DenseBlockMatrix *b_FF = NULL; + hypre_DenseBlockMatrix *b_CF = NULL; + hypre_DenseBlockMatrix *r_CF = NULL; + hypre_ParCSRMatrix *W = NULL; + hypre_ParCSRMatrix *R = NULL; + + HYPRE_Int row_major = 0; + HYPRE_Int num_points = 2; + HYPRE_Int points[2] = {1, -1}; // {C, F} + HYPRE_Int sizes[2] = {hypre_ParCSRMatrixNumRows(A_CF), + hypre_ParCSRMatrixNumCols(A_CF) + }; + hypre_IntArrayArray *CF_maps; + + HYPRE_ANNOTATE_FUNC_BEGIN; + + /* Sanity check */ + if (block_dim <= 1) + { + hypre_error_w_msg(HYPRE_ERROR_GENERIC, "Invalid block dimension!"); + return hypre_error_flag; + } + + /*------------------------------------------------------- + * 1) b_FF = approx(A_FF) + *-------------------------------------------------------*/ + + hypre_ParCSRMatrixBlockColSum(A_FF, row_major, block_dim, block_dim, &b_FF); + + /*------------------------------------------------------- + * 2) b_CF = approx(A_CF) + *-------------------------------------------------------*/ + + hypre_ParCSRMatrixBlockColSum(A_CF, row_major, 1, block_dim, &b_CF); + + /*------------------------------------------------------- + * 3) b_FF = inv(approx(A_FF)) (invert in-place) + *-------------------------------------------------------*/ + +#if defined (HYPRE_USING_GPU) + if (hypre_GetExecPolicy1(hypre_DenseBlockMatrixMemoryLocation(b_FF)) == HYPRE_EXEC_DEVICE) + { + /* TODO (VPM): GPU version */ + hypre_DenseBlockMatrixMigrate(b_FF, HYPRE_MEMORY_HOST); + hypre_BlockDiagInvLapack(hypre_DenseBlockMatrixData(b_FF), + hypre_DenseBlockMatrixNumBlocks(b_FF), + hypre_DenseBlockMatrixNumRowsBlock(b_FF)); + hypre_DenseBlockMatrixMigrate(b_FF, HYPRE_MEMORY_DEVICE); + } + else +#endif + { + hypre_BlockDiagInvLapack(hypre_DenseBlockMatrixData(b_FF), + hypre_DenseBlockMatrixNumBlocks(b_FF), + hypre_DenseBlockMatrixNumRowsBlock(b_FF)); + } + + /*------------------------------------------------------- + * 4) W = approx(A_CF) * inv(approx(A_FF)) + *-------------------------------------------------------*/ + + hypre_DenseBlockMatrixMultiply(b_CF, b_FF, &r_CF); + W = hypre_ParCSRMatrixCreateFromDenseBlockMatrix(hypre_ParCSRMatrixComm(A_CF), + hypre_ParCSRMatrixGlobalNumRows(A_CF), + hypre_ParCSRMatrixGlobalNumCols(A_CF), + hypre_ParCSRMatrixRowStarts(A_CF), + hypre_ParCSRMatrixColStarts(A_CF), + r_CF); + + /* Free memory */ + hypre_DenseBlockMatrixDestroy(b_FF); + hypre_DenseBlockMatrixDestroy(b_CF); + hypre_DenseBlockMatrixDestroy(r_CF); + + /*------------------------------------------------------- + * 5) Build Restriction + *-------------------------------------------------------*/ + + /* Compute C/F local mappings */ + hypre_IntArraySeparateByValue(num_points, points, sizes, CF_marker, &CF_maps); + + /* Build restriction from W (R = [-W I]) */ + hypre_MGRBuildRFromW(hypre_IntArrayArrayEntryIData(CF_maps, 0), + hypre_IntArrayArrayEntryIData(CF_maps, 1), + hypre_ParCSRMatrixGlobalNumRows(A_CF), + hypre_ParCSRMatrixGlobalNumCols(A), + hypre_ParCSRMatrixRowStarts(A_CF), + hypre_ParCSRMatrixColStarts(A), + W, &R); + + /* Set output pointers */ + *W_ptr = W; + *R_ptr = R; + + /* Free memory */ + hypre_IntArrayArrayDestroy(CF_maps); + + HYPRE_ANNOTATE_FUNC_END; + return hypre_error_flag; } diff --git a/src/parcsr_ls/par_mgr_setup.c b/src/parcsr_ls/par_mgr_setup.c index ea7914e8dd..619c53a250 100644 --- a/src/parcsr_ls/par_mgr_setup.c +++ b/src/parcsr_ls/par_mgr_setup.c @@ -31,11 +31,14 @@ hypre_MGRSetup( void *mgr_vdata, HYPRE_Int level_blk_size; hypre_ParCSRMatrix *RT = NULL; + hypre_ParCSRMatrix *R = NULL; hypre_ParCSRMatrix *P = NULL; hypre_ParCSRMatrix *S = NULL; hypre_ParCSRMatrix *ST = NULL; hypre_ParCSRMatrix *AT = NULL; hypre_ParCSRMatrix *Wp = NULL; + hypre_ParCSRMatrix *Wr = NULL; + hypre_ParCSRMatrix *AP = NULL; HYPRE_Int *dof_func_buff_data = NULL; HYPRE_BigInt coarse_pnts_global[2]; // TODO: Change to row_starts_cpts @@ -68,6 +71,7 @@ hypre_MGRSetup( void *mgr_vdata, HYPRE_Int * reserved_Cpoint_local_indexes = (mgr_data -> reserved_Cpoint_local_indexes); hypre_IntArray **CF_marker_array = (mgr_data -> CF_marker_array); HYPRE_Int *CF_marker; + hypre_IntArray *FC_marker; hypre_ParCSRMatrix **A_array = (mgr_data -> A_array); hypre_ParCSRMatrix **B_array = (mgr_data -> B_array); hypre_ParCSRMatrix **B_FF_array = (mgr_data -> B_FF_array); @@ -75,20 +79,20 @@ hypre_MGRSetup( void *mgr_vdata, hypre_ParCSRMatrix **P_FF_array = (mgr_data -> P_FF_array); #endif hypre_ParCSRMatrix **P_array = (mgr_data -> P_array); + hypre_ParCSRMatrix **R_array = (mgr_data -> RT_array); hypre_ParCSRMatrix **RT_array = (mgr_data -> RT_array); - hypre_ParCSRMatrix *RAP_ptr = NULL; + hypre_ParCSRMatrix *RAP_ptr = NULL; hypre_ParCSRMatrix *A_FF = NULL; hypre_ParCSRMatrix *A_FC = NULL; -#if defined (HYPRE_USING_GPU) hypre_ParCSRMatrix *A_CF = NULL; hypre_ParCSRMatrix *A_CC = NULL; -#endif + hypre_Solver *aff_base; HYPRE_Solver **aff_solver = (mgr_data -> aff_solver); hypre_ParCSRMatrix **A_ff_array = (mgr_data -> A_ff_array); - hypre_ParVector **F_fine_array = (mgr_data -> F_fine_array); - hypre_ParVector **U_fine_array = (mgr_data -> U_fine_array); + hypre_ParVector **F_fine_array = (mgr_data -> F_fine_array); + hypre_ParVector **U_fine_array = (mgr_data -> U_fine_array); HYPRE_Int (*fgrid_solver_setup)(void*, void*, void*, void*); HYPRE_Int (*fgrid_solver_solve)(void*, void*, void*, void*); @@ -377,8 +381,8 @@ hypre_MGRSetup( void *mgr_vdata, (mgr_data -> num_coarse_per_level) = level_coarse_size; /* Free Previously allocated data, if any not destroyed */ - if (A_array || B_array || B_FF_array || - P_array || RT_array || CF_marker_array) + if (A_array || B_array || B_FF_array || + P_array || R_array || RT_array || CF_marker_array) { for (j = 1; j < (old_num_coarse_levels); j++) { @@ -409,6 +413,12 @@ hypre_MGRSetup( void *mgr_vdata, P_array[j] = NULL; } + if (R_array[j]) + { + hypre_ParCSRMatrixDestroy(R_array[j]); + R_array[j] = NULL; + } + if (RT_array[j]) { hypre_ParCSRMatrixDestroy(RT_array[j]); @@ -422,15 +432,11 @@ hypre_MGRSetup( void *mgr_vdata, } } hypre_TFree(B_array, HYPRE_MEMORY_HOST); - B_array = NULL; hypre_TFree(B_FF_array, HYPRE_MEMORY_HOST); - B_FF_array = NULL; hypre_TFree(P_array, HYPRE_MEMORY_HOST); - P_array = NULL; + hypre_TFree(R_array, HYPRE_MEMORY_HOST); hypre_TFree(RT_array, HYPRE_MEMORY_HOST); - RT_array = NULL; hypre_TFree(CF_marker_array, HYPRE_MEMORY_HOST); - CF_marker_array = NULL; } #if defined(HYPRE_USING_GPU) @@ -621,6 +627,10 @@ hypre_MGRSetup( void *mgr_vdata, { P_array = hypre_CTAlloc(hypre_ParCSRMatrix*, max_num_coarse_levels, HYPRE_MEMORY_HOST); } + if (R_array == NULL && max_num_coarse_levels > 0) + { + R_array = hypre_CTAlloc(hypre_ParCSRMatrix*, max_num_coarse_levels, HYPRE_MEMORY_HOST); + } #if defined(HYPRE_USING_GPU) if (P_FF_array == NULL && max_num_coarse_levels > 0) { @@ -798,6 +808,7 @@ hypre_MGRSetup( void *mgr_vdata, (mgr_data -> A_array) = A_array; (mgr_data -> B_array) = B_array; (mgr_data -> P_array) = P_array; + (mgr_data -> R_array) = R_array; (mgr_data -> RT_array) = RT_array; (mgr_data -> CF_marker_array) = CF_marker_array; (mgr_data -> l1_norms) = l1_norms; @@ -1046,9 +1057,9 @@ hypre_MGRSetup( void *mgr_vdata, HYPRE_ILUCreate(&(level_smoother[lev])); HYPRE_ILUSetType(level_smoother[lev], 0); HYPRE_ILUSetLevelOfFill(level_smoother[lev], 0); - HYPRE_ILUSetLocalReordering(level_smoother[lev], 1); HYPRE_ILUSetMaxIter(level_smoother[lev], level_smooth_iters[lev]); HYPRE_ILUSetTol(level_smoother[lev], 0.0); + HYPRE_ILUSetLocalReordering(level_smoother[lev], 0); HYPRE_ILUSetup(level_smoother[lev], A_array[lev], NULL, NULL); } else @@ -1108,19 +1119,14 @@ hypre_MGRSetup( void *mgr_vdata, interp_type[lev] = 2; } - /* Extract A_FF and A_FC when needed by MGR's interpolation/relaxation strategies */ -#if !defined(HYPRE_USING_GPU) - if ((Frelax_type[lev] == 2) || - (Frelax_type[lev] == 9) || - (Frelax_type[lev] == 99) || - (Frelax_type[lev] == 199) || - (interp_type[lev] == 12) || - (mgr_coarse_grid_method[lev] != 0)) -#endif - { - hypre_ParCSRMatrixGenerateFFFC(A_array[lev], CF_marker, coarse_pnts_global, - NULL, &A_FC, &A_FF); - } + /* Compute C/F splitting (needed by RAP computation and other operations) */ + FC_marker = hypre_IntArrayCloneDeep(CF_marker_array[lev]); + hypre_IntArrayNegate(FC_marker); + + hypre_ParCSRMatrixGenerateFFFC(A_array[lev], CF_marker, coarse_pnts_global, + NULL, &A_FC, &A_FF); + hypre_ParCSRMatrixGenerateFFFC(A_array[lev], hypre_IntArrayData(FC_marker), row_starts_fpts, + NULL, &A_CF, &A_CC); /* Build MGR interpolation */ hypre_sprintf(region_name, "Interp"); @@ -1131,8 +1137,7 @@ hypre_MGRSetup( void *mgr_vdata, { if (mgr_coarse_grid_method[lev] != 0) { - hypre_MGRBuildBlockJacobiWp(A_FF, A_FC, block_jacobi_bsize, - CF_marker, coarse_pnts_global, &Wp); + hypre_MGRBuildBlockJacobiWp(A_FF, A_FC, block_jacobi_bsize, &Wp); } hypre_MGRBuildInterp(A_array[lev], A_FF, A_FC, CF_marker, Wp, coarse_pnts_global, trunc_factor, P_max_elmts[lev], @@ -1248,16 +1253,16 @@ hypre_MGRSetup( void *mgr_vdata, hypre_BoomerAMGBuildRestrAIR(A_array[lev], CF_marker, ST, coarse_pnts_global, 1, dof_func_buff_data, filter_thresholdR, - debug_flag, &RT, is_triangular, gmres_switch); + debug_flag, &R, is_triangular, gmres_switch); } else /* distance-1.5 AIR - distance 2 locally and distance 1 across procs. */ { hypre_BoomerAMGBuildRestrDist2AIR(A_array[lev], CF_marker, ST, coarse_pnts_global, 1, dof_func_buff_data, filter_thresholdR, - debug_flag, &RT, 1, is_triangular, gmres_switch); + debug_flag, &R, 1, is_triangular, gmres_switch); } - RT_array[lev] = RT; + R_array[lev] = R; hypre_GpuProfilingPopRange(); HYPRE_ANNOTATE_REGION_END("%s", region_name); @@ -1268,7 +1273,7 @@ hypre_MGRSetup( void *mgr_vdata, hypre_GpuProfilingPushRange(region_name); HYPRE_ANNOTATE_REGION_BEGIN("%s", region_name); AP = hypre_ParMatmul(A_array[lev], P_array[lev]); - RAP_ptr = hypre_ParMatmul(RT, AP); + RAP_ptr = hypre_ParMatmul(R, AP); if (num_procs > 1) { hypre_MatvecCommPkgCreate(RAP_ptr); @@ -1292,13 +1297,14 @@ hypre_MGRSetup( void *mgr_vdata, { restrict_type[lev] = 2; } - // if (restrict_type[lev] > 0) - { - hypre_MGRBuildRestrict(A_array[lev], A_FF, A_FC, CF_marker, coarse_pnts_global, - trunc_factor, P_max_elmts[lev], strong_threshold, - max_row_sum, block_num_f_points, &RT, restrict_type[lev]); - RT_array[lev] = RT; - } + + hypre_MGRBuildRestrict(A_array[lev], A_FF, A_FC, A_CF, CF_marker_array[lev], + coarse_pnts_global, trunc_factor, P_max_elmts[lev], + strong_threshold, max_row_sum, block_num_f_points, + restrict_type[lev], &Wr, &R, &RT); + R_array[lev] = R; + RT_array[lev] = RT; + hypre_GpuProfilingPopRange(); HYPRE_ANNOTATE_REGION_END("%s", region_name); @@ -1309,27 +1315,19 @@ hypre_MGRSetup( void *mgr_vdata, #if defined (HYPRE_USING_GPU) if (exec == HYPRE_EXEC_DEVICE) { - hypre_ParCSRMatrixGenerateCCCFDevice(RAP_ptr, CF_marker, - coarse_pnts_global, - S, &A_CF, &A_CC); - hypre_MGRComputeNonGalerkinCGDevice(A_FF, A_FC, A_CF, A_CC, - block_num_f_points, + Wp, Wr, block_num_f_points, mgr_coarse_grid_method[lev], truncate_cg_threshold, &RAP_ptr); - - hypre_ParCSRMatrixDestroy(A_CC); - hypre_ParCSRMatrixDestroy(A_CF); } else #endif { - hypre_MGRComputeNonGalerkinCoarseGrid(A_array[lev], Wp, RT, - block_num_f_points, - set_c_points_method, + hypre_MGRComputeNonGalerkinCoarseGrid(A_FF, A_FC, A_CF, A_CC, Wp, Wr, + block_num_f_points, set_c_points_method, mgr_coarse_grid_method[lev], - P_max_elmts[lev], CF_marker, &RAP_ptr); + P_max_elmts[lev], &RAP_ptr); } if (interp_type[lev] == 12) @@ -1351,10 +1349,11 @@ hypre_MGRSetup( void *mgr_vdata, { restrict_type[lev] = 2; } - hypre_MGRBuildRestrict(A_array[lev], A_FF, A_FC, CF_marker, + hypre_MGRBuildRestrict(A_array[lev], A_FF, A_FC, A_CF, CF_marker_array[lev], coarse_pnts_global, trunc_factor, P_max_elmts[lev], strong_threshold, max_row_sum, block_jacobi_bsize, - &RT, restrict_type[lev]); + restrict_type[lev], &Wr, &R, &RT); + R_array[lev] = R; RT_array[lev] = RT; hypre_GpuProfilingPopRange(); HYPRE_ANNOTATE_REGION_END("%s", region_name); @@ -1362,12 +1361,28 @@ hypre_MGRSetup( void *mgr_vdata, hypre_sprintf(region_name, "RAP"); hypre_GpuProfilingPushRange(region_name); HYPRE_ANNOTATE_REGION_BEGIN("%s", region_name); - RAP_ptr = hypre_ParCSRMatrixRAPKT(RT, A_array[lev], P, 1); + if (RT) + { + RAP_ptr = hypre_ParCSRMatrixRAPKT(RT, A_array[lev], P, 1); + } + else if (R) + { + AP = hypre_ParCSRMatMat(A_array[lev], P); + RAP_ptr = hypre_ParCSRMatMat(R, AP); + hypre_CSRMatrixReorder(hypre_ParCSRMatrixDiag(RAP_ptr)); + hypre_ParCSRMatrixDestroy(AP); + } + else + { + hypre_error_w_msg(HYPRE_ERROR_GENERIC, "Expected either R or RT!"); + return hypre_error_flag; + } hypre_GpuProfilingPopRange(); HYPRE_ANNOTATE_REGION_END("%s", region_name); } } + /* TODO (VPM): truncation is also performed in hypre_MGRComputeNonGalerkinCoarseGrid */ if (truncate_cg_threshold > 0.0) { /* Truncate the coarse grid */ @@ -1383,9 +1398,11 @@ hypre_MGRSetup( void *mgr_vdata, #endif } - /* Destroy temporary FC splitting */ - hypre_ParCSRMatrixDestroy(A_FC); - A_FC = NULL; + /* Destroy temporary variables */ + hypre_ParCSRMatrixDestroy(A_FC), A_FC = NULL; + hypre_ParCSRMatrixDestroy(A_CF), A_CF = NULL; + hypre_ParCSRMatrixDestroy(A_CC), A_CF = NULL; + hypre_ParCSRMatrixDestroy(Wr); Wr = NULL; /* User-prescribed F-solver */ if (Frelax_type[lev] == 2 || @@ -1481,19 +1498,6 @@ hypre_MGRSetup( void *mgr_vdata, A_ff_array[lev] = A_FF; } - /* TODO: refactor this block (VPM) */ -#if defined (HYPRE_USING_GPU) - hypre_ParCSRMatrix *P_FF_ptr; - hypre_IntArray *F_marker = hypre_IntArrayCloneDeep(CF_marker_array[lev]); - - hypre_IntArrayNegate(F_marker); - hypre_MGRBuildPDevice(A_array[lev], hypre_IntArrayData(F_marker), - row_starts_fpts, 0, &P_FF_ptr); - P_FF_array[lev] = P_FF_ptr; - - hypre_IntArrayDestroy(F_marker); -#endif - /* TODO: Check use of A_ff_array[lev], vectors at (lev + 1) are correct? (VPM) */ F_fine_array[lev + 1] = hypre_ParVectorCreate(hypre_ParCSRMatrixComm(A_ff_array[lev]), @@ -1508,12 +1512,22 @@ hypre_MGRSetup( void *mgr_vdata, hypre_ParVectorInitialize(U_fine_array[lev + 1]); } + /* TODO: refactor this block (VPM) */ +#if defined (HYPRE_USING_GPU) + if (exec == HYPRE_EXEC_DEVICE) + { + hypre_MGRBuildPDevice(A_array[lev], hypre_IntArrayData(FC_marker), + row_starts_fpts, 0, &P_FF_array[lev]); + } +#endif + /* Destroy A_FF if it has not been saved on A_ff_array[lev] */ if (!A_ff_array[lev]) { hypre_ParCSRMatrixDestroy(A_FF); } A_FF = NULL; + hypre_IntArrayDestroy(FC_marker); /* TODO: move this to par_mgr_coarsen.c and port to GPUs (VPM) */ /* Update coarse level indexes for next levels */ @@ -1521,6 +1535,13 @@ hypre_MGRSetup( void *mgr_vdata, { for (i = lev + 1; i < max_num_coarse_levels; i++) { + memory_location = hypre_IntArrayMemoryLocation(CF_marker_array[lev]); + if (hypre_GetActualMemLocation(memory_location) == hypre_MEMORY_DEVICE) + { + hypre_IntArrayMigrate(CF_marker_array[lev], HYPRE_MEMORY_HOST); + } + CF_marker = hypre_IntArrayData(CF_marker_array[lev]); + /* First mark indexes to be updated */ for (j = 0; j < level_coarse_size[i]; j++) { @@ -1549,6 +1570,11 @@ hypre_MGRSetup( void *mgr_vdata, { CF_marker[level_coarse_indexes[lev][j]] = CMRK; } + + if (hypre_GetActualMemLocation(memory_location) == hypre_MEMORY_DEVICE) + { + hypre_IntArrayMigrate(CF_marker_array[lev], HYPRE_MEMORY_DEVICE); + } } } diff --git a/src/parcsr_ls/par_mgr_solve.c b/src/parcsr_ls/par_mgr_solve.c index 46f9fd61ca..244ff14726 100644 --- a/src/parcsr_ls/par_mgr_solve.c +++ b/src/parcsr_ls/par_mgr_solve.c @@ -524,6 +524,7 @@ hypre_MGRCycle( void *mgr_vdata, hypre_ParCSRMatrix **A_array = (mgr_data -> A_array); hypre_ParCSRMatrix **RT_array = (mgr_data -> RT_array); hypre_ParCSRMatrix **P_array = (mgr_data -> P_array); + hypre_ParCSRMatrix **R_array = (mgr_data -> R_array); #if defined(HYPRE_USING_GPU) hypre_ParCSRMatrix **B_array = (mgr_data -> B_array); hypre_ParCSRMatrix **B_FF_array = (mgr_data -> B_FF_array); @@ -573,7 +574,6 @@ hypre_MGRCycle( void *mgr_vdata, HYPRE_Int *restrict_type = (mgr_data -> restrict_type); HYPRE_Int pre_smoothing = (mgr_data -> global_smooth_cycle) == 1 ? 1 : 0; HYPRE_Int post_smoothing = (mgr_data -> global_smooth_cycle) == 2 ? 1 : 0; - HYPRE_Int use_air = 0; HYPRE_Int my_id; char region_name[1024]; char msg[1024]; @@ -1056,19 +1056,13 @@ hypre_MGRCycle( void *mgr_vdata, hypre_GpuProfilingPopRange(); HYPRE_ANNOTATE_REGION_END("%s", region_name); - if ((restrict_type[fine_grid] == 4) || - (restrict_type[fine_grid] == 5)) - { - use_air = 1; - } - hypre_sprintf(region_name, "Restrict"); hypre_GpuProfilingPushRange(region_name); HYPRE_ANNOTATE_REGION_BEGIN("%s", region_name); - if (use_air) + if (R_array[fine_grid]) { /* no transpose necessary for R */ - hypre_ParCSRMatrixMatvec(fp_one, RT_array[fine_grid], Vtemp, + hypre_ParCSRMatrixMatvec(fp_one, R_array[fine_grid], Vtemp, fp_zero, F_array[coarse_grid]); } else diff --git a/src/parcsr_ls/par_mgr_stats.c b/src/parcsr_ls/par_mgr_stats.c index 805e527823..b58bc4e7d8 100644 --- a/src/parcsr_ls/par_mgr_stats.c +++ b/src/parcsr_ls/par_mgr_stats.c @@ -256,6 +256,9 @@ hypre_MGRGetRestrictionName(hypre_ParMGRData *mgr_data, case 13: return "CPR-like"; + case 14: + return "Blk-ColLumped"; + default: return "Classical"; } @@ -272,7 +275,7 @@ hypre_MGRGetCoarseGridName(hypre_ParMGRData *mgr_data, switch (hypre_ParMGRDataCoarseGridMethodI(mgr_data, level)) { case 0: - return "RAP"; + return "Glk-RAP"; case 1: return "NG-BlkDiag"; @@ -286,6 +289,9 @@ hypre_MGRGetCoarseGridName(hypre_ParMGRData *mgr_data, case 4: return "NG-ApproxInv"; + case 5: + return "Glk-RAI"; + default: return "Unknown"; } diff --git a/src/parcsr_ls/protos.h b/src/parcsr_ls/protos.h index 162cdf6c7b..dcfd4b5635 100644 --- a/src/parcsr_ls/protos.h +++ b/src/parcsr_ls/protos.h @@ -2168,6 +2168,7 @@ HYPRE_Int hypre_MGRBlockRelaxSolve( hypre_ParCSRMatrix *A, hypre_ParVector *f, HYPRE_Int n_block, HYPRE_Int left_size, HYPRE_Int method, HYPRE_Real *diaginv, hypre_ParVector *Vtemp ); +HYPRE_Int hypre_BlockDiagInvLapack( HYPRE_Real *diag, HYPRE_Int N, HYPRE_Int blk_size ); HYPRE_Int hypre_MGRBlockRelaxSetup( hypre_ParCSRMatrix *A, HYPRE_Int blk_size, HYPRE_Real **diaginvptr ); //HYPRE_Int hypre_blockRelax(hypre_ParCSRMatrix *A, hypre_ParVector *f, hypre_ParVector *u, @@ -2194,12 +2195,14 @@ HYPRE_Int hypre_MGRAddVectorR( hypre_IntArray *CF_marker, HYPRE_Int point_type, hypre_ParVector **toVector ); HYPRE_Int hypre_MGRTruncateAcfCPRDevice( hypre_ParCSRMatrix *A_CF, hypre_ParCSRMatrix **A_CF_new_ptr ); -HYPRE_Int hypre_MGRTruncateAcfCPR( hypre_ParCSRMatrix *A_CF, hypre_ParCSRMatrix **A_CF_new_ptr ); -HYPRE_Int hypre_MGRComputeNonGalerkinCoarseGrid( hypre_ParCSRMatrix *A, hypre_ParCSRMatrix *P, - hypre_ParCSRMatrix *RT, HYPRE_Int bsize, - HYPRE_Int ordering, HYPRE_Int method, - HYPRE_Int Pmax, HYPRE_Int *CF_marker, - hypre_ParCSRMatrix **A_H_ptr ); +HYPRE_Int hypre_MGRComputeNonGalerkinCoarseGrid(hypre_ParCSRMatrix *A_FF, + hypre_ParCSRMatrix *A_FC, + hypre_ParCSRMatrix *A_CF, + hypre_ParCSRMatrix *A_CC, + hypre_ParCSRMatrix *Wp, hypre_ParCSRMatrix *Wr, + HYPRE_Int bsize, HYPRE_Int ordering, + HYPRE_Int method, HYPRE_Int max_elmts, + hypre_ParCSRMatrix **A_H_ptr); HYPRE_Int hypre_MGRSetAffSolverType( void *systg_vdata, HYPRE_Int *aff_solver_type ); HYPRE_Int hypre_MGRSetCoarseSolverType( void *systg_vdata, HYPRE_Int coarse_solver_type ); HYPRE_Int hypre_MGRSetCoarseSolverIter( void *systg_vdata, HYPRE_Int coarse_solver_iter ); @@ -2257,23 +2260,23 @@ HYPRE_Int hypre_MGRBuildInterp( hypre_ParCSRMatrix *A, hypre_ParCSRMatrix *A_FF, hypre_ParCSRMatrix **P_tr, HYPRE_Int method, HYPRE_Int num_sweeps_post ); HYPRE_Int hypre_MGRBuildRestrict( hypre_ParCSRMatrix *A, hypre_ParCSRMatrix *A_FF, - hypre_ParCSRMatrix *A_FC, HYPRE_Int *CF_marker, - HYPRE_BigInt *num_cpts_global, HYPRE_Real trunc_factor, - HYPRE_Int max_elmts, HYPRE_Real strong_threshold, - HYPRE_Real max_row_sum, HYPRE_Int blk_size, - hypre_ParCSRMatrix **RT, HYPRE_Int method ); + hypre_ParCSRMatrix *A_FC, hypre_ParCSRMatrix *A_CF, + hypre_IntArray *CF_marker, HYPRE_BigInt *num_cpts_global, + HYPRE_Real trunc_factor, HYPRE_Int max_elmts, + HYPRE_Real strong_threshold, HYPRE_Real max_row_sum, + HYPRE_Int blk_size, HYPRE_Int method, + hypre_ParCSRMatrix **W_ptr, hypre_ParCSRMatrix **R_ptr, + hypre_ParCSRMatrix **RT_ptr ); HYPRE_Int hypre_MGRBuildPFromWp( hypre_ParCSRMatrix *A, hypre_ParCSRMatrix *Wp, HYPRE_Int *CF_marker, hypre_ParCSRMatrix **P_ptr ); HYPRE_Int hypre_MGRBuildPFromWpHost( hypre_ParCSRMatrix *A, hypre_ParCSRMatrix *Wp, HYPRE_Int *CF_marker, hypre_ParCSRMatrix **P_ptr ); HYPRE_Int hypre_MGRBuildBlockJacobiWp( hypre_ParCSRMatrix *A_FF, hypre_ParCSRMatrix *A_FC, - HYPRE_Int blk_size, HYPRE_Int *CF_marker, - HYPRE_BigInt *cpts_starts_in, - hypre_ParCSRMatrix **Wp_ptr ); + HYPRE_Int blk_size, hypre_ParCSRMatrix **Wp_ptr ); HYPRE_Int hypre_MGRBuildPBlockJacobi( hypre_ParCSRMatrix *A, hypre_ParCSRMatrix *A_FF, hypre_ParCSRMatrix *A_FC, hypre_ParCSRMatrix *Wp, HYPRE_Int blk_size, HYPRE_Int *CF_marker, - HYPRE_BigInt *cpts_starts, hypre_ParCSRMatrix **P_ptr ); + hypre_ParCSRMatrix **P_ptr ); HYPRE_Int hypre_MGRBuildP( hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, HYPRE_BigInt *num_cpts_global, HYPRE_Int method, HYPRE_Int debug_flag, hypre_ParCSRMatrix **P_ptr ); @@ -2283,13 +2286,18 @@ HYPRE_Int hypre_MGRBuildPHost( hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, HYPRE_Int hypre_MGRBuildInterpApproximateInverse( hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, HYPRE_BigInt *num_cpts_global, hypre_ParCSRMatrix **P_ptr ); -HYPRE_Int hypre_MGRGetAcfCPR( hypre_ParCSRMatrix *A, HYPRE_Int blk_size, - HYPRE_Int *c_marker, HYPRE_Int *f_marker, - hypre_ParCSRMatrix **A_CF_ptr ); HYPRE_Int hypre_MGRTruncateAcfCPR( hypre_ParCSRMatrix *A_CF, hypre_ParCSRMatrix **A_CF_new_ptr ); -HYPRE_Int hypre_MGRBuildRFromW( HYPRE_Int *C_map, HYPRE_Int *F_map, HYPRE_BigInt *row_starts_R, - HYPRE_BigInt *col_starts_R, hypre_ParCSRMatrix *W, - hypre_ParCSRMatrix **R_ptr ); +HYPRE_Int hypre_MGRBuildRFromW( HYPRE_Int *C_map, HYPRE_Int *F_map, + HYPRE_BigInt global_num_rows_R, HYPRE_BigInt global_num_cols_R, + HYPRE_BigInt *row_starts_R, HYPRE_BigInt *col_starts_R, + hypre_ParCSRMatrix *W, hypre_ParCSRMatrix **R_ptr ); +HYPRE_Int hypre_MGRBlockColLumpedRestrict( hypre_ParCSRMatrix *A, hypre_ParCSRMatrix *A_FF, + hypre_ParCSRMatrix *A_CF, hypre_IntArray *CF_marker, + HYPRE_Int block_dim, hypre_ParCSRMatrix **W_ptr, + hypre_ParCSRMatrix **R_ptr); +HYPRE_Int hypre_MGRColLumpedRestrict(hypre_ParCSRMatrix *A, hypre_ParCSRMatrix *A_FF, + hypre_ParCSRMatrix *A_CF, hypre_IntArray *CF_marker, + hypre_ParCSRMatrix **W_ptr, hypre_ParCSRMatrix **R_ptr); /* par_mgr_coarsen.c */ HYPRE_Int hypre_MGRCoarseParms( MPI_Comm comm, HYPRE_Int num_rows, hypre_IntArray *CF_marker, @@ -2317,6 +2325,7 @@ HYPRE_Int hypre_ParCSRMatrixBlockDiagMatrixDevice( hypre_ParCSRMatrix *A, HYPRE_ hypre_ParCSRMatrix **B_ptr ); HYPRE_Int hypre_MGRComputeNonGalerkinCGDevice( hypre_ParCSRMatrix *A_FF, hypre_ParCSRMatrix *A_FC, hypre_ParCSRMatrix *A_CF, hypre_ParCSRMatrix *A_CC, + hypre_ParCSRMatrix *Wp, hypre_ParCSRMatrix *Wr, HYPRE_Int blk_size, HYPRE_Int method, HYPRE_Complex threshold, hypre_ParCSRMatrix **A_H_ptr ); diff --git a/src/parcsr_mv/_hypre_parcsr_mv.h b/src/parcsr_mv/_hypre_parcsr_mv.h index 7481611826..10e2ce4e1f 100644 --- a/src/parcsr_mv/_hypre_parcsr_mv.h +++ b/src/parcsr_mv/_hypre_parcsr_mv.h @@ -266,10 +266,13 @@ typedef struct hypre_ParVector_struct #define hypre_ParVectorLastIndex(vector) ((vector) -> last_index) #define hypre_ParVectorPartitioning(vector) ((vector) -> partitioning) #define hypre_ParVectorActualLocalSize(vector) ((vector) -> actual_local_size) -#define hypre_ParVectorLocalVector(vector) ((vector) -> local_vector) #define hypre_ParVectorOwnsData(vector) ((vector) -> owns_data) #define hypre_ParVectorAllZeros(vector) ((vector) -> all_zeros) -#define hypre_ParVectorNumVectors(vector) (hypre_VectorNumVectors((vector) -> local_vector)) +#define hypre_ParVectorLocalVector(vector) ((vector) -> local_vector) +#define hypre_ParVectorLocalSize(vector) ((vector) -> local_vector -> size) +#define hypre_ParVectorLocalData(vector) ((vector) -> local_vector -> data) +#define hypre_ParVectorLocalStorage(vector) ((vector) -> local_vector -> multivec_storage_method) +#define hypre_ParVectorNumVectors(vector) ((vector) -> local_vector -> num_vectors) #define hypre_ParVectorEntryI(vector, i) (hypre_VectorEntryI((vector) -> local_vector, i)) #define hypre_ParVectorEntryIJ(vector, i, j) (hypre_VectorEntryIJ((vector) -> local_vector, i, j)) @@ -1122,6 +1125,17 @@ HYPRE_Int hypre_ParCSRMatrixSetDNumNonzeros ( hypre_ParCSRMatrix *matrix ); HYPRE_Int hypre_ParCSRMatrixSetNumRownnz ( hypre_ParCSRMatrix *matrix ); HYPRE_Int hypre_ParCSRMatrixSetDataOwner ( hypre_ParCSRMatrix *matrix, HYPRE_Int owns_data ); HYPRE_Int hypre_ParCSRMatrixSetPatternOnly( hypre_ParCSRMatrix *matrix, HYPRE_Int pattern_only); +hypre_ParCSRMatrix* hypre_ParCSRMatrixCreateFromDenseBlockMatrix(MPI_Comm comm, + HYPRE_BigInt global_num_rows, + HYPRE_BigInt global_num_cols, + HYPRE_BigInt *row_starts, + HYPRE_BigInt *col_starts, + hypre_DenseBlockMatrix *B); +hypre_ParCSRMatrix* hypre_ParCSRMatrixCreateFromParVector(hypre_ParVector *b, + HYPRE_BigInt global_num_rows, + HYPRE_BigInt global_num_cols, + HYPRE_BigInt *row_starts, + HYPRE_BigInt *col_starts); hypre_ParCSRMatrix *hypre_ParCSRMatrixRead ( MPI_Comm comm, const char *file_name ); HYPRE_Int hypre_ParCSRMatrixPrint ( hypre_ParCSRMatrix *matrix, const char *file_name ); HYPRE_Int hypre_ParCSRMatrixPrintIJ ( const hypre_ParCSRMatrix *matrix, const HYPRE_Int base_i, diff --git a/src/parcsr_mv/par_csr_matop.c b/src/parcsr_mv/par_csr_matop.c index 021293de35..043a82778b 100644 --- a/src/parcsr_mv/par_csr_matop.c +++ b/src/parcsr_mv/par_csr_matop.c @@ -6695,8 +6695,8 @@ hypre_ParCSRMatrixBlockColSumHost( hypre_ParCSRMatrix *A, /* Communication variables */ hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A); HYPRE_Int num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); - HYPRE_Int *send_map_elmts = hypre_ParCSRCommPkgSendMapElmts(comm_pkg); - HYPRE_Int *send_map_starts = hypre_ParCSRCommPkgSendMapStarts(comm_pkg); + HYPRE_Int *send_map_elmts; + HYPRE_Int *send_map_starts; #if defined(HYPRE_USING_PERSISTENT_COMM) hypre_ParCSRPersistentCommHandle *comm_handle; #else @@ -6704,7 +6704,9 @@ hypre_ParCSRMatrixBlockColSumHost( hypre_ParCSRMatrix *A, #endif /* Update commpkg offsets */ - hypre_ParCSRCommPkgUpdateVecStarts(comm_pkg, num_cols_block_B, 1, num_cols_block_B); + hypre_ParCSRCommPkgUpdateVecStarts(comm_pkg, 1, 0, 1); + send_map_elmts = hypre_ParCSRCommPkgSendMapElmts(comm_pkg); + send_map_starts = hypre_ParCSRCommPkgSendMapStarts(comm_pkg); /* Allocate the recv and send buffers */ #if defined(HYPRE_USING_PERSISTENT_COMM) @@ -6712,22 +6714,20 @@ hypre_ParCSRMatrixBlockColSumHost( hypre_ParCSRMatrix *A, recv_data = (HYPRE_Complex *) hypre_ParCSRCommHandleRecvDataBuffer(comm_handle); send_data = (HYPRE_Complex *) hypre_ParCSRCommHandleSendDataBuffer(comm_handle); send_data = hypre_Memset((void *) send_data, 0, - (size_t) (num_cols_offd_A * num_cols_block_B) * sizeof(HYPRE_Complex), + (size_t) (num_cols_offd_A) * sizeof(HYPRE_Complex), memory_location); #else - send_data = hypre_CTAlloc(HYPRE_Complex, num_cols_offd_A * num_cols_block_B, memory_location); + send_data = hypre_CTAlloc(HYPRE_Complex, num_cols_offd_A, memory_location); recv_data = hypre_TAlloc(HYPRE_Complex, send_map_starts[num_sends], memory_location); #endif /* Pack send data */ for (i = 0; i < num_rows_offd_A; i++) { - jr = i % num_cols_block_B; for (j = A_offd_i[i]; j < A_offd_i[i + 1]; j++) { col = A_offd_j[j]; - - send_data[col * num_cols_block_B + jr] += A_offd_data[j]; + send_data[col] += A_offd_data[j]; } } @@ -6904,8 +6904,8 @@ hypre_ParCSRMatrixColSumHost( hypre_ParCSRMatrix *A, /* Communication variables */ hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A); HYPRE_Int num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); - HYPRE_Int *send_map_elmts = hypre_ParCSRCommPkgSendMapElmts(comm_pkg); - HYPRE_Int *send_map_starts = hypre_ParCSRCommPkgSendMapStarts(comm_pkg); + HYPRE_Int *send_map_elmts; + HYPRE_Int *send_map_starts; #if defined(HYPRE_USING_PERSISTENT_COMM) hypre_ParCSRPersistentCommHandle *comm_handle; #else @@ -6914,6 +6914,8 @@ hypre_ParCSRMatrixColSumHost( hypre_ParCSRMatrix *A, /* Update commpkg offsets */ hypre_ParCSRCommPkgUpdateVecStarts(comm_pkg, 1, 0, 1); + send_map_elmts = hypre_ParCSRCommPkgSendMapElmts(comm_pkg); + send_map_starts = hypre_ParCSRCommPkgSendMapStarts(comm_pkg); /* Allocate the recv and send buffers */ #if defined(HYPRE_USING_PERSISTENT_COMM) diff --git a/src/parcsr_mv/par_csr_matrix.c b/src/parcsr_mv/par_csr_matrix.c index 9dc774d3fa..fa50724036 100644 --- a/src/parcsr_mv/par_csr_matrix.c +++ b/src/parcsr_mv/par_csr_matrix.c @@ -187,7 +187,8 @@ hypre_ParCSRMatrixDestroy( hypre_ParCSRMatrix *matrix ) hypre_TFree(hypre_ParCSRMatrixRowindices(matrix), memory_location); hypre_TFree(hypre_ParCSRMatrixRowvalues(matrix), memory_location); - if ( hypre_ParCSRMatrixAssumedPartition(matrix) && hypre_ParCSRMatrixOwnsAssumedPartition(matrix) ) + if ( hypre_ParCSRMatrixAssumedPartition(matrix) && + hypre_ParCSRMatrixOwnsAssumedPartition(matrix) ) { hypre_AssumedPartitionDestroy(hypre_ParCSRMatrixAssumedPartition(matrix)); } @@ -215,7 +216,7 @@ hypre_ParCSRMatrixDestroy( hypre_ParCSRMatrix *matrix ) } /*-------------------------------------------------------------------------- - * hypre_ParCSRMatrixInitialize + * hypre_ParCSRMatrixInitialize_v2 *--------------------------------------------------------------------------*/ HYPRE_Int @@ -238,6 +239,10 @@ hypre_ParCSRMatrixInitialize_v2( hypre_ParCSRMatrix *matrix, return hypre_error_flag; } +/*-------------------------------------------------------------------------- + * hypre_ParCSRMatrixInitialize + *--------------------------------------------------------------------------*/ + HYPRE_Int hypre_ParCSRMatrixInitialize( hypre_ParCSRMatrix *matrix ) { @@ -281,14 +286,23 @@ hypre_ParCSRMatrixClone_v2(hypre_ParCSRMatrix *A, return S; } +/*-------------------------------------------------------------------------- + * hypre_ParCSRMatrixClone + *--------------------------------------------------------------------------*/ + hypre_ParCSRMatrix* hypre_ParCSRMatrixClone(hypre_ParCSRMatrix *A, HYPRE_Int copy_data) { return hypre_ParCSRMatrixClone_v2(A, copy_data, hypre_ParCSRMatrixMemoryLocation(A)); } +/*-------------------------------------------------------------------------- + * hypre_ParCSRMatrixMigrate + *--------------------------------------------------------------------------*/ + HYPRE_Int -hypre_ParCSRMatrixMigrate(hypre_ParCSRMatrix *A, HYPRE_MemoryLocation memory_location) +hypre_ParCSRMatrixMigrate(hypre_ParCSRMatrix *A, + HYPRE_MemoryLocation memory_location) { if (!A) { @@ -311,8 +325,13 @@ hypre_ParCSRMatrixMigrate(hypre_ParCSRMatrix *A, HYPRE_MemoryLocation memory_loc return hypre_error_flag; } +/*-------------------------------------------------------------------------- + * hypre_ParCSRMatrixSetNumNonzeros_core + *--------------------------------------------------------------------------*/ + HYPRE_Int -hypre_ParCSRMatrixSetNumNonzeros_core( hypre_ParCSRMatrix *matrix, const char* format ) +hypre_ParCSRMatrixSetNumNonzeros_core( hypre_ParCSRMatrix *matrix, + const char *format ) { MPI_Comm comm; hypre_CSRMatrix *diag; @@ -369,6 +388,7 @@ hypre_ParCSRMatrixSetNumNonzeros_core( hypre_ParCSRMatrix *matrix, const char* f /*-------------------------------------------------------------------------- * hypre_ParCSRMatrixSetNumNonzeros *--------------------------------------------------------------------------*/ + HYPRE_Int hypre_ParCSRMatrixSetNumNonzeros( hypre_ParCSRMatrix *matrix ) { @@ -467,15 +487,229 @@ hypre_ParCSRMatrixSetPatternOnly( hypre_ParCSRMatrix *matrix, return hypre_error_flag; } - hypre_CSRMatrix *diag = hypre_ParCSRMatrixDiag(matrix); - if (diag) { hypre_CSRMatrixSetPatternOnly(diag, pattern_only); } + if (hypre_ParCSRMatrixDiag(matrix)) + { + hypre_CSRMatrixSetPatternOnly(hypre_ParCSRMatrixDiag(matrix), pattern_only); + } - hypre_CSRMatrix *offd = hypre_ParCSRMatrixOffd(matrix); - if (offd) { hypre_CSRMatrixSetPatternOnly(offd, pattern_only); } + if (hypre_ParCSRMatrixOffd(matrix)) + { + hypre_CSRMatrixSetPatternOnly(hypre_ParCSRMatrixOffd(matrix), pattern_only); + } return hypre_error_flag; } +/*-------------------------------------------------------------------------- + * hypre_ParCSRMatrixCreateFromDenseBlockMatrix + *--------------------------------------------------------------------------*/ + +hypre_ParCSRMatrix* +hypre_ParCSRMatrixCreateFromDenseBlockMatrix(MPI_Comm comm, + HYPRE_BigInt global_num_rows, + HYPRE_BigInt global_num_cols, + HYPRE_BigInt *row_starts, + HYPRE_BigInt *col_starts, + hypre_DenseBlockMatrix *B) +{ + /* Input matrix variables */ + HYPRE_Int num_rows_diag = hypre_DenseBlockMatrixNumRows(B); + HYPRE_Int num_nonzeros_diag = hypre_DenseBlockMatrixNumNonzeros(B); + HYPRE_Int num_rows_block = hypre_DenseBlockMatrixNumRowsBlock(B); + HYPRE_Int num_cols_block = hypre_DenseBlockMatrixNumColsBlock(B); + HYPRE_Int num_cols_offd = 0; + HYPRE_Int num_nonzeros_offd = 0; + HYPRE_MemoryLocation memory_location = hypre_DenseBlockMatrixMemoryLocation(B); + + /* Output matrix variables */ + hypre_ParCSRMatrix *A; + hypre_CSRMatrix *A_diag; + hypre_CSRMatrix *A_offd; + HYPRE_Int *A_diag_i; + HYPRE_Int *A_diag_j; + + /* Local variables */ + HYPRE_Int i, j, ib; + + /* Create output matrix */ + A = hypre_ParCSRMatrixCreate(comm, global_num_rows, global_num_cols, + row_starts, col_starts, num_cols_offd, + num_nonzeros_diag, num_nonzeros_offd); + A_diag = hypre_ParCSRMatrixDiag(A); + A_offd = hypre_ParCSRMatrixOffd(A); + + /* Set memory locations */ + hypre_CSRMatrixMemoryLocation(A_diag) = memory_location; + hypre_CSRMatrixMemoryLocation(A_offd) = memory_location; + + /* Set diag's data pointer */ + if (hypre_DenseBlockMatrixOwnsData(B)) + { + hypre_CSRMatrixData(A_diag) = hypre_DenseBlockMatrixData(B); + } + else + { + hypre_CSRMatrixData(A_diag) = hypre_CTAlloc(HYPRE_Complex, + num_nonzeros_diag, + memory_location); + hypre_TMemcpy(hypre_CSRMatrixData(A_diag), + hypre_DenseBlockMatrixData(B), + HYPRE_Complex, + num_nonzeros_diag, + memory_location, memory_location); + } + hypre_DenseBlockMatrixOwnsData(B) = 0; + + /* Set diag's row pointer and column indices */ + A_diag_i = hypre_CTAlloc(HYPRE_Int, num_rows_diag + 1, HYPRE_MEMORY_HOST); + A_diag_j = hypre_CTAlloc(HYPRE_Int, num_nonzeros_diag, HYPRE_MEMORY_HOST); + +#ifdef HYPRE_USING_OPENMP + #pragma omp parallel for private(i, ib, j) HYPRE_SMP_SCHEDULE +#endif + for (i = 0; i < num_rows_diag; i++) + { + ib = i / num_rows_block; + A_diag_i[i] = i * num_cols_block; + for (j = A_diag_i[i]; j < (i + 1) * num_cols_block; j++) + { + A_diag_j[j] = ib * num_cols_block + (j - A_diag_i[i]); + } + } + A_diag_i[num_rows_diag] = num_rows_diag * num_cols_block; + + /* Migrate to dest. memory location */ + if (memory_location != HYPRE_MEMORY_HOST) + { + hypre_CSRMatrixI(A_diag) = hypre_TAlloc(HYPRE_Int, num_rows_diag + 1, memory_location); + hypre_CSRMatrixJ(A_diag) = hypre_TAlloc(HYPRE_Int, num_nonzeros_diag, memory_location); + + hypre_TMemcpy(hypre_CSRMatrixI(A_diag), A_diag_i, + HYPRE_Int, num_rows_diag + 1, + memory_location, HYPRE_MEMORY_HOST); + + hypre_TMemcpy(hypre_CSRMatrixJ(A_diag), A_diag_j, + HYPRE_Int, num_nonzeros_diag, + memory_location, HYPRE_MEMORY_HOST); + } + else + { + hypre_CSRMatrixI(A_diag) = A_diag_i; + hypre_CSRMatrixJ(A_diag) = A_diag_j; + } + + return A; +} + +/*-------------------------------------------------------------------------- + * hypre_ParCSRMatrixCreateFromParVector + *--------------------------------------------------------------------------*/ + +hypre_ParCSRMatrix* +hypre_ParCSRMatrixCreateFromParVector(hypre_ParVector *b, + HYPRE_BigInt global_num_rows, + HYPRE_BigInt global_num_cols, + HYPRE_BigInt *row_starts, + HYPRE_BigInt *col_starts) +{ + /* Input vector variables */ + MPI_Comm comm = hypre_ParVectorComm(b); + hypre_Vector *local_vector = hypre_ParVectorLocalVector(b); + HYPRE_MemoryLocation memory_location = hypre_ParVectorMemoryLocation(b); + + /* Auxiliary variables */ + HYPRE_Int num_rows = (HYPRE_Int) row_starts[1] - row_starts[0]; + HYPRE_Int num_cols = (HYPRE_Int) col_starts[1] - col_starts[0]; + HYPRE_Int num_nonzeros = hypre_min(num_rows, num_cols); + + /* Output matrix variables */ + hypre_ParCSRMatrix *A; + hypre_CSRMatrix *A_diag; + hypre_CSRMatrix *A_offd; + HYPRE_Int *A_diag_i; + HYPRE_Int *A_diag_j; + + /* Local variables */ + HYPRE_Int i; + + /* Sanity check */ + if (hypre_ParVectorNumVectors(b) > 1) + { + hypre_error_w_msg(HYPRE_ERROR_GENERIC, "Not implemented for multi-component vectors"); + return NULL; + } + + /* Create output matrix */ + A = hypre_ParCSRMatrixCreate(comm, global_num_rows, global_num_cols, + row_starts, col_starts, 0, num_nonzeros, 0); + A_diag = hypre_ParCSRMatrixDiag(A); + A_offd = hypre_ParCSRMatrixOffd(A); + + /* Set memory locations */ + hypre_CSRMatrixMemoryLocation(A_diag) = memory_location; + hypre_CSRMatrixMemoryLocation(A_offd) = memory_location; + + /* Set diag's data pointer */ + if (hypre_VectorOwnsData(local_vector)) + { + hypre_CSRMatrixData(A_diag) = hypre_VectorData(local_vector); + hypre_VectorOwnsData(b) = 0; + } + else + { + hypre_CSRMatrixData(A_diag) = hypre_CTAlloc(HYPRE_Complex, num_nonzeros, memory_location); + hypre_TMemcpy(hypre_CSRMatrixData(A_diag), + hypre_VectorData(local_vector), + HYPRE_Complex, num_nonzeros, + memory_location, memory_location); + } + + /* Set diag's row pointer and column indices */ + A_diag_i = hypre_CTAlloc(HYPRE_Int, num_rows + 1, HYPRE_MEMORY_HOST); + A_diag_j = hypre_CTAlloc(HYPRE_Int, num_nonzeros, HYPRE_MEMORY_HOST); + +#ifdef HYPRE_USING_OPENMP + #pragma omp parallel for HYPRE_SMP_SCHEDULE +#endif + for (i = 0; i < num_nonzeros; i++) + { + A_diag_i[i] = A_diag_j[i] = i; + } + +#ifdef HYPRE_USING_OPENMP + #pragma omp parallel for HYPRE_SMP_SCHEDULE +#endif + for (i = num_nonzeros; i < num_rows + 1; i++) + { + A_diag_i[i] = num_nonzeros; + } + + /* Initialize offd portion */ + hypre_CSRMatrixInitialize_v2(A_offd, 0, memory_location); + + /* Migrate to dest. memory location */ + if (memory_location != HYPRE_MEMORY_HOST) + { + hypre_CSRMatrixI(A_diag) = hypre_TAlloc(HYPRE_Int, num_rows + 1, memory_location); + hypre_CSRMatrixJ(A_diag) = hypre_TAlloc(HYPRE_Int, num_nonzeros, memory_location); + + hypre_TMemcpy(hypre_CSRMatrixI(A_diag), A_diag_i, + HYPRE_Int, num_rows + 1, + memory_location, HYPRE_MEMORY_HOST); + + hypre_TMemcpy(hypre_CSRMatrixJ(A_diag), A_diag_j, + HYPRE_Int, num_nonzeros, + memory_location, HYPRE_MEMORY_HOST); + } + else + { + hypre_CSRMatrixI(A_diag) = A_diag_i; + hypre_CSRMatrixJ(A_diag) = A_diag_j; + } + + return A; +} + /*-------------------------------------------------------------------------- * hypre_ParCSRMatrixRead *--------------------------------------------------------------------------*/ @@ -2556,8 +2790,9 @@ hypre_FillResponseParToCSRMatrix( void *p_recv_contact_buf, * the matrix. *--------------------------------------------------------------------------*/ -hypre_ParCSRMatrix * hypre_ParCSRMatrixUnion( hypre_ParCSRMatrix * A, - hypre_ParCSRMatrix * B ) +hypre_ParCSRMatrix* +hypre_ParCSRMatrixUnion( hypre_ParCSRMatrix *A, + hypre_ParCSRMatrix *B ) { hypre_ParCSRMatrix *C; HYPRE_BigInt *col_map_offd_C = NULL; @@ -2609,7 +2844,10 @@ hypre_ParCSRMatrix * hypre_ParCSRMatrixUnion( hypre_ParCSRMatrix * A, return C; } -/* Perform dual truncation of ParCSR matrix. +/*-------------------------------------------------------------------------- + * hypre_ParCSRMatrixTruncate + * + * Perform dual truncation of ParCSR matrix. * This code is adapted from original BoomerAMGInterpTruncate() * A: parCSR matrix to be modified * tol: relative tolerance or truncation factor for dropping small terms @@ -2622,7 +2860,8 @@ hypre_ParCSRMatrix * hypre_ParCSRMatrixUnion( hypre_ParCSRMatrix * A, * -- 0 = infinity-norm * -- 1 = 1-norm * -- 2 = 2-norm -*/ + *--------------------------------------------------------------------------*/ + HYPRE_Int hypre_ParCSRMatrixTruncate(hypre_ParCSRMatrix *A, HYPRE_Real tol, @@ -3212,6 +3451,10 @@ hypre_ParCSRMatrixTruncate(hypre_ParCSRMatrix *A, return ierr; } +/*-------------------------------------------------------------------------- + * hypre_ParCSRMatrixSetConstantValues + *--------------------------------------------------------------------------*/ + HYPRE_Int hypre_ParCSRMatrixSetConstantValues( hypre_ParCSRMatrix *A, HYPRE_Complex value ) @@ -3222,6 +3465,10 @@ hypre_ParCSRMatrixSetConstantValues( hypre_ParCSRMatrix *A, return hypre_error_flag; } +/*-------------------------------------------------------------------------- + * hypre_ParCSRMatrixCopyColMapOffdToDevice + *--------------------------------------------------------------------------*/ + void hypre_ParCSRMatrixCopyColMapOffdToDevice(hypre_ParCSRMatrix *A) { @@ -3244,6 +3491,10 @@ hypre_ParCSRMatrixCopyColMapOffdToDevice(hypre_ParCSRMatrix *A) #endif } +/*-------------------------------------------------------------------------- + * hypre_ParCSRMatrixCopyColMapOffdToHost + *--------------------------------------------------------------------------*/ + void hypre_ParCSRMatrixCopyColMapOffdToHost(hypre_ParCSRMatrix *A) { diff --git a/src/parcsr_mv/par_csr_matrix_stats.c b/src/parcsr_mv/par_csr_matrix_stats.c index 7925d1a203..6842272a22 100644 --- a/src/parcsr_mv/par_csr_matrix_stats.c +++ b/src/parcsr_mv/par_csr_matrix_stats.c @@ -345,6 +345,12 @@ hypre_ParCSRMatrixStatsArrayCompute(HYPRE_Int num_matrices, HYPRE_BigInt global_num_rows; HYPRE_Real global_size; + /* Sanity check */ + if (num_matrices < 1) + { + return hypre_error_flag; + } + /* We assume all MPI communicators are equal */ comm = hypre_ParCSRMatrixComm(matrices[0]); diff --git a/src/parcsr_mv/par_vector.h b/src/parcsr_mv/par_vector.h index fe17bdfbb0..766229040d 100644 --- a/src/parcsr_mv/par_vector.h +++ b/src/parcsr_mv/par_vector.h @@ -54,10 +54,13 @@ typedef struct hypre_ParVector_struct #define hypre_ParVectorLastIndex(vector) ((vector) -> last_index) #define hypre_ParVectorPartitioning(vector) ((vector) -> partitioning) #define hypre_ParVectorActualLocalSize(vector) ((vector) -> actual_local_size) -#define hypre_ParVectorLocalVector(vector) ((vector) -> local_vector) #define hypre_ParVectorOwnsData(vector) ((vector) -> owns_data) #define hypre_ParVectorAllZeros(vector) ((vector) -> all_zeros) -#define hypre_ParVectorNumVectors(vector) (hypre_VectorNumVectors((vector) -> local_vector)) +#define hypre_ParVectorLocalVector(vector) ((vector) -> local_vector) +#define hypre_ParVectorLocalSize(vector) ((vector) -> local_vector -> size) +#define hypre_ParVectorLocalData(vector) ((vector) -> local_vector -> data) +#define hypre_ParVectorLocalStorage(vector) ((vector) -> local_vector -> multivec_storage_method) +#define hypre_ParVectorNumVectors(vector) ((vector) -> local_vector -> num_vectors) #define hypre_ParVectorEntryI(vector, i) (hypre_VectorEntryI((vector) -> local_vector, i)) #define hypre_ParVectorEntryIJ(vector, i, j) (hypre_VectorEntryIJ((vector) -> local_vector, i, j)) diff --git a/src/parcsr_mv/protos.h b/src/parcsr_mv/protos.h index fcdaf8e565..b2666faf66 100644 --- a/src/parcsr_mv/protos.h +++ b/src/parcsr_mv/protos.h @@ -464,6 +464,17 @@ HYPRE_Int hypre_ParCSRMatrixSetDNumNonzeros ( hypre_ParCSRMatrix *matrix ); HYPRE_Int hypre_ParCSRMatrixSetNumRownnz ( hypre_ParCSRMatrix *matrix ); HYPRE_Int hypre_ParCSRMatrixSetDataOwner ( hypre_ParCSRMatrix *matrix, HYPRE_Int owns_data ); HYPRE_Int hypre_ParCSRMatrixSetPatternOnly( hypre_ParCSRMatrix *matrix, HYPRE_Int pattern_only); +hypre_ParCSRMatrix* hypre_ParCSRMatrixCreateFromDenseBlockMatrix(MPI_Comm comm, + HYPRE_BigInt global_num_rows, + HYPRE_BigInt global_num_cols, + HYPRE_BigInt *row_starts, + HYPRE_BigInt *col_starts, + hypre_DenseBlockMatrix *B); +hypre_ParCSRMatrix* hypre_ParCSRMatrixCreateFromParVector(hypre_ParVector *b, + HYPRE_BigInt global_num_rows, + HYPRE_BigInt global_num_cols, + HYPRE_BigInt *row_starts, + HYPRE_BigInt *col_starts); hypre_ParCSRMatrix *hypre_ParCSRMatrixRead ( MPI_Comm comm, const char *file_name ); HYPRE_Int hypre_ParCSRMatrixPrint ( hypre_ParCSRMatrix *matrix, const char *file_name ); HYPRE_Int hypre_ParCSRMatrixPrintIJ ( const hypre_ParCSRMatrix *matrix, const HYPRE_Int base_i, diff --git a/src/seq_block_mv/dense_block_matmult.c b/src/seq_block_mv/dense_block_matmult.c index 74a55a5bdc..46c69475c5 100644 --- a/src/seq_block_mv/dense_block_matmult.c +++ b/src/seq_block_mv/dense_block_matmult.c @@ -28,9 +28,9 @@ hypre_DenseBlockMatrixMultiplyHost( hypre_DenseBlockMatrix *A, HYPRE_Int num_cols_block_C = hypre_DenseBlockMatrixNumColsBlock(C); HYPRE_Int num_rows_block_B = hypre_DenseBlockMatrixNumRowsBlock(B); - HYPRE_Int num_nonzeros_block_A = hypre_DenseBlockMatrixNumNonzeros(A); - HYPRE_Int num_nonzeros_block_B = hypre_DenseBlockMatrixNumNonzeros(B); - HYPRE_Int num_nonzeros_block_C = hypre_DenseBlockMatrixNumNonzeros(C); + HYPRE_Int num_nonzeros_block_A = hypre_DenseBlockMatrixNumNonzerosBlock(A); + HYPRE_Int num_nonzeros_block_B = hypre_DenseBlockMatrixNumNonzerosBlock(B); + HYPRE_Int num_nonzeros_block_C = hypre_DenseBlockMatrixNumNonzerosBlock(C); HYPRE_Int ib; diff --git a/src/seq_mv/csr_matop.c b/src/seq_mv/csr_matop.c index e899f3b14b..be51db25c2 100644 --- a/src/seq_mv/csr_matop.c +++ b/src/seq_mv/csr_matop.c @@ -1915,7 +1915,7 @@ hypre_CSRMatrixComputeRowSum( hypre_CSRMatrix *A, * 4: abs diag inverse sqrt *--------------------------------------------------------------------------*/ -void +HYPRE_Int hypre_CSRMatrixExtractDiagonalHost( hypre_CSRMatrix *A, HYPRE_Complex *d, HYPRE_Int type) @@ -1926,6 +1926,7 @@ hypre_CSRMatrixExtractDiagonalHost( hypre_CSRMatrix *A, HYPRE_Int *A_j = hypre_CSRMatrixJ(A); HYPRE_Int i, j; HYPRE_Complex d_i; + char msg[HYPRE_MAX_MSG_LEN]; for (i = 0; i < nrows; i++) { @@ -1942,23 +1943,33 @@ hypre_CSRMatrixExtractDiagonalHost( hypre_CSRMatrix *A, { d_i = hypre_cabs(A_data[j]); } - else if (type == 2) - { - d_i = 1.0 / (A_data[j]); - } - else if (type == 3) - { - d_i = 1.0 / (hypre_sqrt(A_data[j])); - } - else if (type == 4) + else { - d_i = 1.0 / (hypre_sqrt(hypre_cabs(A_data[j]))); + if (A_data[j] == 0.0) + { + hypre_sprintf(msg, "Zero diagonal found at row %i!", i); + hypre_error_w_msg(HYPRE_ERROR_GENERIC, msg); + } + else if (type == 2) + { + d_i = 1.0 / A_data[j]; + } + else if (type == 3) + { + d_i = 1.0 / hypre_sqrt(A_data[j]); + } + else if (type == 4) + { + d_i = 1.0 / hypre_sqrt(hypre_cabs(A_data[j])); + } } break; } } d[i] = d_i; } + + return hypre_error_flag; } /*-------------------------------------------------------------------------- @@ -1970,7 +1981,7 @@ hypre_CSRMatrixExtractDiagonalHost( hypre_CSRMatrix *A, * 3: diag inverse sqrt *--------------------------------------------------------------------------*/ -void +HYPRE_Int hypre_CSRMatrixExtractDiagonal( hypre_CSRMatrix *A, HYPRE_Complex *d, HYPRE_Int type) @@ -1987,6 +1998,8 @@ hypre_CSRMatrixExtractDiagonal( hypre_CSRMatrix *A, { hypre_CSRMatrixExtractDiagonalHost(A, d, type); } + + return hypre_error_flag; } /*-------------------------------------------------------------------------- diff --git a/src/seq_mv/csr_matop_device.c b/src/seq_mv/csr_matop_device.c index a165c13729..c1bbad75c4 100644 --- a/src/seq_mv/csr_matop_device.c +++ b/src/seq_mv/csr_matop_device.c @@ -1502,7 +1502,9 @@ hypreGPUKernel_CSRExtractDiag( hypre_DeviceItem &item, HYPRE_Int has_diag = 0; - for (HYPRE_Int j = p + lane; warp_any_sync(item, HYPRE_WARP_FULL_MASK, j < q); j += HYPRE_WARP_SIZE) + for (HYPRE_Int j = p + lane; + warp_any_sync(item, HYPRE_WARP_FULL_MASK, j < q); + j += HYPRE_WARP_SIZE) { hypre_int find_diag = j < q && ja[j] == row; @@ -1516,21 +1518,28 @@ hypreGPUKernel_CSRExtractDiag( hypre_DeviceItem &item, { d[row] = hypre_abs(aa[j]); } - else if (type == 2) - { - d[row] = 1.0 / aa[j]; - } - else if (type == 3) - { - d[row] = 1.0 / hypre_sqrt(aa[j]); - } - else if (type == 4) + else { - d[row] = 1.0 / hypre_sqrt(hypre_abs(aa[j])); + if (aa[j] == 0.0) + { + d[row] = 0.0; + } + else if (type == 2) + { + d[row] = 1.0 / aa[j]; + } + else if (type == 3) + { + d[row] = 1.0 / hypre_sqrt(aa[j]); + } + else if (type == 4) + { + d[row] = 1.0 / hypre_sqrt(hypre_abs(aa[j])); + } } } - if ( warp_any_sync(item, HYPRE_WARP_FULL_MASK, find_diag) ) + if (warp_any_sync(item, HYPRE_WARP_FULL_MASK, find_diag)) { has_diag = 1; break; diff --git a/src/seq_mv/protos.h b/src/seq_mv/protos.h index f0541629bb..b2fe3beb83 100644 --- a/src/seq_mv/protos.h +++ b/src/seq_mv/protos.h @@ -41,8 +41,8 @@ void hypre_CSRMatrixComputeRowSumHost( hypre_CSRMatrix *A, HYPRE_Int *CF_i, HYPR HYPRE_Complex *row_sum, HYPRE_Int type, HYPRE_Complex scal, const char *set_or_add); void hypre_CSRMatrixComputeRowSum( hypre_CSRMatrix *A, HYPRE_Int *CF_i, HYPRE_Int *CF_j, HYPRE_Complex *row_sum, HYPRE_Int type, HYPRE_Complex scal, const char *set_or_add); -void hypre_CSRMatrixExtractDiagonal( hypre_CSRMatrix *A, HYPRE_Complex *d, HYPRE_Int type); -void hypre_CSRMatrixExtractDiagonalHost( hypre_CSRMatrix *A, HYPRE_Complex *d, HYPRE_Int type); +HYPRE_Int hypre_CSRMatrixExtractDiagonal( hypre_CSRMatrix *A, HYPRE_Complex *d, HYPRE_Int type); +HYPRE_Int hypre_CSRMatrixExtractDiagonalHost( hypre_CSRMatrix *A, HYPRE_Complex *d, HYPRE_Int type); HYPRE_Int hypre_CSRMatrixScale(hypre_CSRMatrix *A, HYPRE_Complex scalar); HYPRE_Int hypre_CSRMatrixSetConstantValues( hypre_CSRMatrix *A, HYPRE_Complex value); HYPRE_Int hypre_CSRMatrixDiagScale( hypre_CSRMatrix *A, hypre_Vector *ld, hypre_Vector *rd); diff --git a/src/seq_mv/seq_mv.h b/src/seq_mv/seq_mv.h index 6b4305096f..8ef72b5cca 100644 --- a/src/seq_mv/seq_mv.h +++ b/src/seq_mv/seq_mv.h @@ -320,8 +320,8 @@ void hypre_CSRMatrixComputeRowSumHost( hypre_CSRMatrix *A, HYPRE_Int *CF_i, HYPR HYPRE_Complex *row_sum, HYPRE_Int type, HYPRE_Complex scal, const char *set_or_add); void hypre_CSRMatrixComputeRowSum( hypre_CSRMatrix *A, HYPRE_Int *CF_i, HYPRE_Int *CF_j, HYPRE_Complex *row_sum, HYPRE_Int type, HYPRE_Complex scal, const char *set_or_add); -void hypre_CSRMatrixExtractDiagonal( hypre_CSRMatrix *A, HYPRE_Complex *d, HYPRE_Int type); -void hypre_CSRMatrixExtractDiagonalHost( hypre_CSRMatrix *A, HYPRE_Complex *d, HYPRE_Int type); +HYPRE_Int hypre_CSRMatrixExtractDiagonal( hypre_CSRMatrix *A, HYPRE_Complex *d, HYPRE_Int type); +HYPRE_Int hypre_CSRMatrixExtractDiagonalHost( hypre_CSRMatrix *A, HYPRE_Complex *d, HYPRE_Int type); HYPRE_Int hypre_CSRMatrixScale(hypre_CSRMatrix *A, HYPRE_Complex scalar); HYPRE_Int hypre_CSRMatrixSetConstantValues( hypre_CSRMatrix *A, HYPRE_Complex value); HYPRE_Int hypre_CSRMatrixDiagScale( hypre_CSRMatrix *A, hypre_Vector *ld, hypre_Vector *rd); diff --git a/src/seq_mv/vector.c b/src/seq_mv/vector.c index d0a8f1700f..d1ad6340c0 100644 --- a/src/seq_mv/vector.c +++ b/src/seq_mv/vector.c @@ -961,15 +961,9 @@ hypre_SeqVectorElmdivpyMarked( hypre_Vector *x, #endif /* Sanity checks */ - if (hypre_VectorSize(y) != hypre_VectorSize(b)) + if (hypre_VectorSize(x) < hypre_VectorSize(b)) { - hypre_error_w_msg(HYPRE_ERROR_GENERIC, "sizes of y and b do not match!\n"); - return hypre_error_flag; - } - - if (hypre_VectorSize(x) < hypre_VectorSize(y)) - { - hypre_error_w_msg(HYPRE_ERROR_GENERIC, "x_size is smaller than y_size!\n"); + hypre_error_w_msg(HYPRE_ERROR_GENERIC, "sizes of x and b do not match!\n"); return hypre_error_flag; } @@ -1028,8 +1022,8 @@ hypre_SeqVectorElmdivpyMarked( hypre_Vector *x, * Computes: y = y + x ./ b * * Notes: - * 1) y and b must have the same sizes - * 2) x_size can be larger than y_size + * 1) x and b must have the same sizes + * 2) x and y can have different sizes *--------------------------------------------------------------------------*/ HYPRE_Int diff --git a/src/seq_mv/vector_device.c b/src/seq_mv/vector_device.c index 0ae7b82f4d..eca2dbf492 100644 --- a/src/seq_mv/vector_device.c +++ b/src/seq_mv/vector_device.c @@ -358,8 +358,8 @@ hypre_SeqVectorStridedCopyDevice( hypre_Vector *vector, HYPRE_THRUST_CALL( transform, begin, last, thrust::make_permutation_iterator(v_data, - thrust::make_transform_iterator(begin, - hypreFunctor_IndexStrided(ostride))), + thrust::make_transform_iterator(begin, + hypreFunctor_IndexStrided(ostride))), hypreFunctor_ArrayStridedAccess(istride, data) ); #elif defined(HYPRE_USING_DEVICE_OPENMP) || defined(HYPRE_USING_SYCL) diff --git a/src/utilities/_hypre_utilities.hpp b/src/utilities/_hypre_utilities.hpp index 2fe5fee1dc..67f80446ad 100644 --- a/src/utilities/_hypre_utilities.hpp +++ b/src/utilities/_hypre_utilities.hpp @@ -81,14 +81,14 @@ struct hypreFunctor_ArrayStridedAccess template struct hypreFunctor_IndexStrided { - T s_; + T s_; - hypreFunctor_IndexStrided(T s) : s_(s) {} + hypreFunctor_IndexStrided(T s) : s_(s) {} - __host__ __device__ T operator()(const T i) const - { - return i * s_; - } + __host__ __device__ T operator()(const T i) const + { + return i * s_; + } }; /*-------------------------------------------------------------------------- diff --git a/src/utilities/functors.h b/src/utilities/functors.h index 75cc8c954d..c721696ec2 100644 --- a/src/utilities/functors.h +++ b/src/utilities/functors.h @@ -70,14 +70,14 @@ struct hypreFunctor_ArrayStridedAccess template struct hypreFunctor_IndexStrided { - T s_; + T s_; - hypreFunctor_IndexStrided(T s) : s_(s) {} + hypreFunctor_IndexStrided(T s) : s_(s) {} - __host__ __device__ T operator()(const T i) const - { - return i * s_; - } + __host__ __device__ T operator()(const T i) const + { + return i * s_; + } }; /*-------------------------------------------------------------------------- diff --git a/src/utilities/int_array_device.c b/src/utilities/int_array_device.c index 3fef8fc615..342a3777ad 100644 --- a/src/utilities/int_array_device.c +++ b/src/utilities/int_array_device.c @@ -205,8 +205,7 @@ hypre_IntArraySeparateByValueDevice( HYPRE_Int num_values, { val = values[i]; - HYPRE_THRUST_CALL(copy_if, indices, indices + v_size, v_data, buffer, - [val] __device__ (HYPRE_Int x) { return x == val; }); + HYPRE_THRUST_CALL(copy_if, indices, indices + v_size, v_data, buffer, equal(val)); hypre_TMemcpy(hypre_IntArrayArrayEntryIData(w, i), buffer, HYPRE_Int, sizes[i], HYPRE_MEMORY_DEVICE, HYPRE_MEMORY_DEVICE);