Skip to content

Commit

Permalink
Merge pull request #363 from sbriseid/vs_openmp_support
Browse files Browse the repository at this point in the history
Vs openmp support
  • Loading branch information
sbriseid authored Sep 23, 2024
2 parents dbd79dd + 827280a commit d2d2137
Show file tree
Hide file tree
Showing 8 changed files with 77 additions and 16 deletions.
9 changes: 8 additions & 1 deletion gotools-core/src/utils/ClosestPointUtils.C
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,13 @@ using namespace Go::boxStructuring;

// #define LOG_CLOSEST_POINTS

// Visual Studio 2022 (and earlier) does not support the auto schedule.
#ifdef _WIN32
#define OMP_SCHEDULE_AUTO schedule(runtime)
#else
#define OMP_SCHEDULE_AUTO schedule(auto)
#endif


namespace Go
{
Expand Down Expand Up @@ -1321,7 +1328,7 @@ namespace Go
default(none) \
private(pt_idx) \
shared(nmb_points_tested, start_idx, skip, inPoints, rotationMatrix, translation, boxStructure, result, lastBoxCall, return_type, search_extend)
#pragma omp for schedule(auto)
#pragma omp for OMP_SCHEDULE_AUTO
for (pt_idx = 0; pt_idx < nmb_points_tested; ++pt_idx)
closestPointSingleCalculation(pt_idx, start_idx, skip, inPoints, rotationMatrix, translation, boxStructure,
result, lastBoxCall, return_type, search_extend);
Expand Down
9 changes: 8 additions & 1 deletion gotools-core/src/utils/RegistrationUtils.C
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,13 @@
using namespace std;
using namespace Go;

// Visual Studio 2022 (and earlier) does not support the auto schedule.
#ifdef _WIN32
#define OMP_SCHEDULE_AUTO schedule(runtime)
#else
#define OMP_SCHEDULE_AUTO schedule(auto)
#endif

namespace Go
{

Expand Down Expand Up @@ -716,7 +723,7 @@ namespace Go
default(none) \
private(pt_idx) \
shared(n_pts, points_fixed, points_transform, allow_rescaling, id, fine_R, fine_T, fine_s, m_rot_R, s2, R2, zero_R, all_lhs_matrix, all_rhs_matrix)
#pragma omp for schedule(auto)
#pragma omp for OMP_SCHEDULE_AUTO
for (pt_idx = 0; pt_idx < n_pts; ++pt_idx)
addToLinearSystem(pt_idx, points_fixed, points_transform, allow_rescaling,
id, fine_R, fine_T, fine_s,
Expand Down
13 changes: 10 additions & 3 deletions lrsplines2D/src/LRApproxApp.C
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,13 @@ using namespace Go;
using std::vector;
using std::string;

// Visual Studio 2022 (and earlier) does not support the auto schedule.
#ifdef _WIN32
#define OMP_SCHEDULE_AUTO schedule(runtime)
#else
#define OMP_SCHEDULE_AUTO schedule(auto)
#endif

//#define DEBUG


Expand Down Expand Up @@ -558,7 +565,7 @@ void LRApproxApp::computeDistPointSpline_omp(vector<double>& points,
double *curr;
double dist;
double aeps = 0.001;
#pragma omp for schedule(auto)
#pragma omp for OMP_SCHEDULE_AUTO
for (kj=0; kj < num_kj; ++kj)
{
knotv = vknots_begin + kj; // Left side of global element.
Expand Down Expand Up @@ -833,7 +840,7 @@ void LRApproxApp::classifyCloudFromDist_omp(vector<double>& points,
const double* knotu;
const double* knotv;
double aeps = 0.001;
#pragma omp for schedule(auto)
#pragma omp for OMP_SCHEDULE_AUTO
for (kj = 0; kj < num_kj; ++kj)
{
knotv = vknots_begin + kj; // Left side of global element.
Expand Down Expand Up @@ -1138,7 +1145,7 @@ void LRApproxApp::categorizeCloudFromDist_omp(vector<double>& points,
const double* knotu;
const double* knotv;
double aeps = 0.001;
#pragma omp for schedule(auto)
#pragma omp for OMP_SCHEDULE_AUTO
for (kj = 0; kj < num_kj; ++kj)
{
knotv = vknots_begin + kj; // Left side of global element.
Expand Down
11 changes: 9 additions & 2 deletions lrsplines2D/src/LRSplineMBA.C
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,13 @@ using std::cout;
using std::endl;
using namespace Go;

// Visual Studio 2022 (and earlier) does not support the auto schedule.
#ifdef _WIN32
#define OMP_SCHEDULE_AUTO schedule(runtime)
#else
#define OMP_SCHEDULE_AUTO schedule(auto)
#endif

//==============================================================================
void LRSplineMBA::MBADistAndUpdate(LRSplineSurface *srf,
double significant_factor,
Expand Down Expand Up @@ -420,7 +427,7 @@ void LRSplineMBA::MBADistAndUpdate_omp(LRSplineSurface *srf,
vector<double> Bval;
bool u_at_end, v_at_end;
double val, dist, wc, wgt, phi_c, total_squared_inv;
#pragma omp for schedule(auto)//guided)//static,8)//runtime)//dynamic,4)
#pragma omp for OMP_SCHEDULE_AUTO//guided)//static,8)//runtime)//dynamic,4)
for (kl = 0; kl < num_elem; ++kl)
{
el1 = el1_vec[kl];
Expand Down Expand Up @@ -959,7 +966,7 @@ void LRSplineMBA::MBAUpdate_omp(LRSplineSurface *srf,
const double *curr;
double total_squared_inv, val, wgt, wc, phi_c, gamma;

#pragma omp for schedule(auto)//guided)//static,8)//runtime)//dynamic,4)
#pragma omp for OMP_SCHEDULE_AUTO//guided)//static,8)//runtime)//dynamic,4)
for (kl = 0; kl < num_elem; ++kl)
{
el1 = el1_vec[kl];
Expand Down
12 changes: 11 additions & 1 deletion lrsplines2D/src/LRSurfApprox.C
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,13 @@ using std::pair;
using std::make_pair;
using namespace Go;

// Visual Studio 2022 (and earlier) does not support the auto schedule.
#ifdef _WIN32
#define OMP_SCHEDULE_AUTO schedule(runtime)
#else
#define OMP_SCHEDULE_AUTO schedule(auto)
#endif

//==============================================================================
LRSurfApprox::LRSurfApprox(vector<double>& points,
int dim, double epsge, bool init_mba,
Expand Down Expand Up @@ -1834,7 +1841,7 @@ void LRSurfApprox::computeAccuracy_omp(vector<Element2D*>& ghost_elems)
int del;
double minheight, maxheight, height;

#pragma omp for schedule(auto)//guided)//static,8)//runtime)//dynamic,4)
#pragma omp for OMP_SCHEDULE_AUTO//guided)//static,8)//runtime)//dynamic,4)
for (kj = 0; kj < num_elem ; ++kj)
{
it = elem_iters[kj];
Expand Down Expand Up @@ -2453,11 +2460,14 @@ void LRSurfApprox::computeAccuracyElement_omp(vector<double>& points, int nmb, i
const int dyn_div = nmb/num_threads;

#ifdef _OPENMP
#ifndef _WIN32
pthread_attr_t attr;
size_t stacksize;
pthread_attr_getstacksize(&attr, &stacksize);
// std::cout << "stacksize (in MB): " << (double)stacksize/(1024.0*1024.0) << std::endl;
#endif
#endif

// omp_set_num_threads(4);
#pragma omp parallel default(none) private(ki, curr, idx1, idx2, dist, upar, vpar, close_pt, curr_pt, vec, norm, dist1, dist2, dist3, dist4, sgn, pos, kr, kj/*, sfval, bval*/) \
shared(points, nmb, umax, vmax, del, dim, rd, maxiter, elem_grid_start, grid2, grid1, grid_height, grid3, grid4, elem2, bsplines, del2, prev_point_dist, nmb_bsplines)
Expand Down
15 changes: 11 additions & 4 deletions lrsplines2D/src/LRSurfSmoothLS.C
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,13 @@ using std::vector;
using std::cout;
using std::endl;

// Visual Studio 2022 (and earlier) does not support the auto schedule.
#ifdef _WIN32
#define OMP_SCHEDULE_AUTO schedule(runtime)
#else
#define OMP_SCHEDULE_AUTO schedule(auto)
#endif

namespace {

const int indices[] = {1, 2, 3, 4, 5, 8};
Expand Down Expand Up @@ -533,9 +540,9 @@ void LRSurfSmoothLS::setLeastSquares_omp(const double weight,
double *subLSmat, *subLSright;
int kcond, nc;
vector<size_t> in_bs;
size_t ki, kj, kl, kr, kh, kk;
size_t kj, kl, kr, kh, kk;

#pragma omp for schedule(auto)//guided)//static,8)//runtime)//dynamic,4)
#pragma omp for OMP_SCHEDULE_AUTO//guided)//static,8)//runtime)//dynamic,4)
for (ki = 0; ki < num_elem; ++ki)
{
it = elem_iters[ki];
Expand Down Expand Up @@ -865,7 +872,7 @@ void LRSurfSmoothLS::localLeastSquares_omp(vector<double>& points,
// OpenMP-fitting.
#if 1
#pragma omp parallel default(none) private(kr, pp, ki, kj, kk, kp, kq) shared(nmbp, nmbb, del, dim, bsplines, mat, right, ncond, start_pt, pt_wgt, ptype, mat_local, ki_threated, right_local, kp_threated, outlier_test)
#pragma omp for schedule(auto)//guided)//static,8)//runtime)//dynamic,4)
#pragma omp for OMP_SCHEDULE_AUTO//guided)//static,8)//runtime)//dynamic,4)
#endif
for (kr=0; kr<nmbp[ptype]; ++kr)
{
Expand All @@ -880,7 +887,7 @@ void LRSurfSmoothLS::localLeastSquares_omp(vector<double>& points,
// 201503: Tested splitting on the support of an element. Slower than without OpenMP.
// And that was with some minor errors in the calculations due to missing thread safety for 1 line.
#pragma omp parallel default(none) private(ki, kj, kk, kp, kq) shared(pp, sb, nmbp, nmbb, del, dim, bsplines, mat, right, ncond, start_pt, pt_wgt, ptype, mat_local, ki_threated, right_local, kp_threated)
#pragma omp for schedule(auto)//guided)//static,8)//runtime)//dynamic,4)
#pragma omp for OMP_SCHEDULE_AUTO//guided)//static,8)//runtime)//dynamic,4)
#endif
double val1, val2;
for (ki=0; ki<nmbb; ++ki)
Expand Down
11 changes: 9 additions & 2 deletions lrsplines3D/src/LRSpline3DMBA.C
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,13 @@ using std::cout;
using std::endl;
using namespace Go;

// Visual Studio 2022 (and earlier) does not support the auto schedule.
#ifdef _WIN32
#define OMP_SCHEDULE_AUTO schedule(runtime)
#else
#define OMP_SCHEDULE_AUTO schedule(auto)
#endif

//==============================================================================
void LRSpline3DMBA::MBADistAndUpdate(LRSplineVolume *vol)
//==============================================================================
Expand Down Expand Up @@ -323,7 +330,7 @@ void LRSpline3DMBA::MBADistAndUpdate_omp(LRSplineVolume *vol, double eps,
bool u_at_end, v_at_end, w_at_end;
double val, dist, wc, wgt, phi_c, total_squared_inv;
double ptwgt, ptdel;
#pragma omp for schedule(auto)//guided)//static,8)//runtime)//dynamic,4)
#pragma omp for OMP_SCHEDULE_AUTO//guided)//static,8)//runtime)//dynamic,4)
for (kl = 0; kl < num_elem; ++kl)
{
el1 = el1_vec[kl];
Expand Down Expand Up @@ -676,7 +683,7 @@ void LRSplineMBA::MBAUpdate_omp(LRSplineSurface *srf)
const double *curr;
double total_squared_inv, val, wgt, wc, phi_c, gamma;
#pragma omp for schedule(auto)//guided)//static,8)//runtime)//dynamic,4)
#pragma omp for OMP_SCHEDULE_AUTO//guided)//static,8)//runtime)//dynamic,4)
for (kl = 0; kl < num_elem; ++kl)
{
el1 = el1_vec[kl];
Expand Down
13 changes: 11 additions & 2 deletions lrsplines3D/src/LRVolApprox.C
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,13 @@ using std::endl;
using std::pair;
using namespace Go;

// Visual Studio 2022 (and earlier) does not support the auto schedule.
#ifdef _WIN32
#define OMP_SCHEDULE_AUTO schedule(runtime)
#else
#define OMP_SCHEDULE_AUTO schedule(auto)
#endif

//==============================================================================
LRVolApprox::LRVolApprox(vector<double>& points,
int dim, double epsge,
Expand Down Expand Up @@ -1360,7 +1367,7 @@ void LRVolApprox::computeAccuracy_omp(vector<Element3D*>& ghost_elems)
double acc_prev;
double tol;

#pragma omp for schedule(auto)//guided)//static,8)//runtime)//dynamic,4)
#pragma omp for OMP_SCHEDULE_AUTO//guided)//static,8)//runtime)//dynamic,4)
for (kj = 0; kj < num_elem ; ++kj)
{
it = elem_iters[kj];
Expand Down Expand Up @@ -1661,9 +1668,11 @@ void LRVolApprox::computeAccuracyElement_omp(vector<double>& points, int nmb, in
vector<double> tmpval;//(3*nmb_bsplines);

#ifdef _OPENMP
#ifndef _WIN32
pthread_attr_t attr;
size_t stacksize;
pthread_attr_getstacksize(&attr, &stacksize);
#endif
#endif
// std::cout << "stacksize (in MB): " << (double)stacksize/(1024.0*1024.0) << std::endl;
// omp_set_num_threads(4);
Expand All @@ -1672,7 +1681,7 @@ void LRVolApprox::computeAccuracyElement_omp(vector<double>& points, int nmb, in
{
bval.resize(bsplines.size());
tmpval.resize(3*bsplines.size());
#pragma omp for schedule(auto)//static, 4)//runtime)//guided)//auto)
#pragma omp for OMP_SCHEDULE_AUTO//static, 4)//runtime)//guided)//auto)
//#pragma omp for schedule(dynamic, 4)//static, 4)//runtime)//guided)//auto)
for (ki=0; ki<nmb; ++ki)
{
Expand Down

0 comments on commit d2d2137

Please sign in to comment.