diff --git a/CHANGES.md b/CHANGES.md index 44ff9b32d573..70ecea906ab1 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,6 +1,52 @@ Release Changes List ==================== +version 1.24.1 +============== + +Update to the twenty-seventh public release of Chapel, April 2021 +(see also changes below for 1.24.0) + +Highlights +---------- +* significant performance improvements for InfiniBand systems +* improved support for computing with `enum` ranges +* closed all known memory leaks + +Feature Improvements +-------------------- +* extended `param` for-loops to support `enum` ranges +* added support for open-interval `enum` ranges + +Performance Optimizations / Improvements +---------------------------------------- +* improved performance on InfiniBand systems by upgrading GASNet-EX +* improved NUMA affinity and startup times when using a fixed heap + +Memory Improvements +------------------- +* closed a memory leak related to tuple coercions +* closed a memory leak in `list.insert()` +* closed a memory leak in constrained generic interfaces + +Third-Party Software Changes +---------------------------- +* upgraded GASNet-EX to version 2021.3.0 + +Portability +----------- +* improved the portability of the code base to HPE Cray EX + +Bug Fixes for Libraries +----------------------- +* fixed a bug in which `indexOf()` on an empty list halted +* fixed bugs in binary operations for sets with `parSafe=true` + +Developer-oriented changes: Runtime improvements +------------------------------------------------ +* improved our approach to polling when communicating using GASNet over `ucx` + + version 1.24.0 ============== @@ -869,7 +915,7 @@ Developer-oriented changes: Testing System version 1.22.1 ============== -Update to twenty-fifth public release of Chapel, June 2020 +Update to twenty-fifth public release of Chapel, June 2020 (see also changes below for 1.22.0) Portability @@ -2909,7 +2955,7 @@ Developer-oriented changes: Tool improvements version 1.17.1 ============== -Update to twentieth public release of Chapel, April 2018 +Update to twentieth public release of Chapel, April 2018 (see also changes below for 1.17.0) Bug Fixes @@ -4759,7 +4805,7 @@ Developer-oriented changes: Third-party improvements version 1.13.1 ============== -Update to sixteenth public release of Chapel, June 2016 +Update to sixteenth public release of Chapel, June 2016 (see also changes below for 1.13.0) Bug Fixes @@ -8360,7 +8406,7 @@ Internal version 1.1.2 ============= -Update to fourth public release of Chapel, September, 2010 +Update to fourth public release of Chapel, September, 2010 (see also changes below for 1.1.1 and 1.1) Platform-specific notes @@ -8373,7 +8419,7 @@ Platform-specific notes version 1.1.1 ============= -Update to fourth public release of Chapel, July 8, 2010 +Update to fourth public release of Chapel, July 8, 2010 (see also changes below for 1.1) Platform-specific notes @@ -8717,7 +8763,7 @@ Internal version 1.02 ============ -Update to third public release of Chapel, November 12, 2009 +Update to third public release of Chapel, November 12, 2009 (see also changes below for version 1.01 and 1.0) High-Level Themes @@ -8785,7 +8831,7 @@ Internal version 1.01 ============ -Update to third public release of Chapel, October 30, 2009 +Update to third public release of Chapel, October 30, 2009 (see also changes for version 1.0) High-Level Themes diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index edf0b21ddc98..0a19832eda7d 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -2,10 +2,10 @@ Chapel Contributors =================== The following people have contributed to the implementation of the -Chapel release: +most recent Chapel releases: -Contributors to the Chapel 1.24 release ---------------------------------------- +Contributors to the Chapel 1.24.x releases +------------------------------------------ * Souris Ash, individual contributor * Ben Albrecht, [HPE] * Paul Cassella, [HPE] @@ -17,6 +17,7 @@ Contributors to the Chapel 1.24 release * Lydia Duncan, [HPE] * Prashanth Duvvuri, individual contributor * Michael Ferguson, [HPE] +* Piyush Gupta, individual contributor * David Iten, [HPE] * Engin Kayraklioglu, [HPE] (former [GSoC 2017] mentor, [Cray Inc.] intern from [George Washington University]) * Lee Killough, [HPE] diff --git a/compiler/AST/ParamForLoop.cpp b/compiler/AST/ParamForLoop.cpp index e73130d067d8..9593412813a4 100644 --- a/compiler/AST/ParamForLoop.cpp +++ b/compiler/AST/ParamForLoop.cpp @@ -410,25 +410,26 @@ CallExpr* ParamForLoop::foldForResolve() SymExpr* hse = highExprGet(); SymExpr* sse = strideExprGet(); + CallExpr* noop = new CallExpr(PRIM_NOOP); + + Symbol* idxSym = idxExpr->symbol(); + Type* idxType = indexType(); + + bool emptyLoop = true; + Symbol* continueSym = continueLabelGet(); + + if (!is_enum_type(idxType)) { VarSymbol* lvar = toVarSymbol(lse->symbol()); VarSymbol* hvar = toVarSymbol(hse->symbol()); VarSymbol* svar = toVarSymbol(sse->symbol()); - CallExpr* noop = new CallExpr(PRIM_NOOP); - validateLoop(lvar, hvar, svar); - - Symbol* idxSym = idxExpr->symbol(); - Symbol* continueSym = continueLabelGet(); - Type* idxType = indexType(); IF1_int_type idxSize = (is_bool_type(idxType) || get_width(idxType) == 32) ? INT_SIZE_32 : INT_SIZE_64; // Insert an "insertion marker" for loop unrolling insertAfter(noop); - bool emptyLoop = true; - if (is_int_type(idxType)) { int64_t low = lvar->immediate->to_int(); @@ -499,6 +500,106 @@ CallExpr* ParamForLoop::foldForResolve() } } } + } else { + EnumSymbol* lvar = toEnumSymbol(lse->symbol()); + EnumSymbol* hvar = toEnumSymbol(hse->symbol()); + VarSymbol* svar = toVarSymbol(sse->symbol()); + + validateLoop(lvar, hvar, svar); + + int64_t stride = svar->immediate->to_int(); + + // Insert an "insertion marker" for loop unrolling + insertAfter(noop); + + bool foundLow = false; + bool foundHigh = false; + bool degenRange = false; + + EnumType* et = toEnumType(lvar->type); + + // Check to make sure the range is valid + for_enums(constant, et) { + if (constant->sym == lvar) { + foundLow = true; + if (foundHigh == true) { + degenRange = true; + break; + } + } + if (constant->sym == hvar) { + foundHigh = true; + if (foundLow == true) { + break; + } + } + } + + if (!degenRange) { + // Handle cases with positive strides + if (stride >= 1) { + bool foundFirst = false; // have we found our first enum bound yet? + int i = 0; + int strcount = 0; // used to count off strides + for_enums(constant, et) { + if (constant->sym == lvar) { // found the starting point + foundFirst = true; + strcount = 0; // start counting the stride from here + } + + // stamp out a copy of the loop body + if (foundFirst && strcount == 0) { + SymbolMap map; + + map.put(idxSym, constant->sym); + copyBodyHelper(noop, i, &map, this, continueSym); + emptyLoop = false; + } + + // advance the stride + strcount++; + if (strcount == stride) { + strcount = 0; + } + if (constant->sym == hvar) { // quit when we find the stopping bound + break; + } + i++; + } + } else { + // Handle cases with negative strides + bool foundFirst = false; // have we found our first enum bound yet? + int i = 0; + int strcount = 0; // used to count off strides + for_enums_backward(constant, et) { + if (constant->sym == hvar) { // found the starting point + foundFirst = true; + strcount = 0; // start counting the stride from here + } + + // stamp out a copy of the loop body + if (foundFirst && strcount == 0) { + SymbolMap map; + + map.put(idxSym, constant->sym); + copyBodyHelper(noop, i, &map, this, continueSym); + emptyLoop = false; + } + + // advance the stride + strcount++; + if (strcount == -stride) { + strcount = 0; + } + if (constant->sym == lvar) { // quit when we find the stopping bound + break; + } + i++; + } + } + } + } + if (emptyLoop) addMentionToEndOfStatement(this, NULL); @@ -519,7 +620,7 @@ CallExpr* ParamForLoop::foldForResolve() void ParamForLoop::validateLoop(VarSymbol* lvar, VarSymbol* hvar, VarSymbol* svar) { - if (!lvar || !hvar || !svar) + if (!lvar || !hvar || !svar) USR_FATAL(this, "param for-loops must be defined over a bounded param range"); @@ -532,6 +633,22 @@ void ParamForLoop::validateLoop(VarSymbol* lvar, } } +void ParamForLoop::validateLoop(EnumSymbol* lvar, + EnumSymbol* hvar, + VarSymbol* svar) { + if (!lvar || !hvar || !svar) + USR_FATAL(this, + "param for-loops must be defined over a bounded param range"); + + if (!svar->immediate) + USR_FATAL(this, + "param for-loops must be defined over a bounded param range"); + + if (!is_int_type(svar->type) && !is_uint_type(svar->type)) { + USR_FATAL(this, "Range stride must be an int"); + } +} + // // Determine the index type for a ParamForLoop. // diff --git a/compiler/AST/build.cpp b/compiler/AST/build.cpp index 342be6eb4905..6d08fa36c46f 100644 --- a/compiler/AST/build.cpp +++ b/compiler/AST/build.cpp @@ -2510,36 +2510,27 @@ BlockStmt* handleConfigTypes(BlockStmt* blk) { return blk; } -static VarSymbol* one = NULL; - -static SymExpr* buildOneExpr() { - if (one == NULL) { - one = new_IntSymbol(1); - } - return new SymExpr(one); -} - CallExpr* buildBoundedRange(Expr* low, Expr* high, bool openlow, bool openhigh) { if (openlow) { - low = new CallExpr("+", low, buildOneExpr()); + low = new CallExpr("chpl__nudgeLowBound", low); } if (openhigh) { - high = new CallExpr("-", high, buildOneExpr()); + high = new CallExpr("chpl__nudgeHighBound", high); } return new CallExpr("chpl_build_bounded_range",low, high); } CallExpr* buildLowBoundedRange(Expr* low, bool open) { if (open) { - low = new CallExpr("+", low, buildOneExpr()); + low = new CallExpr("chpl__nudgeLowBound", low); } return new CallExpr("chpl_build_low_bounded_range", low); } CallExpr* buildHighBoundedRange(Expr* high, bool open) { if (open) { - high = new CallExpr("-", high, buildOneExpr()); + high = new CallExpr("chpl__nudgeHighBound", high); } return new CallExpr("chpl_build_high_bounded_range", high); } diff --git a/compiler/include/ParamForLoop.h b/compiler/include/ParamForLoop.h index 5a008642f655..dccc9bb58a29 100644 --- a/compiler/include/ParamForLoop.h +++ b/compiler/include/ParamForLoop.h @@ -85,6 +85,9 @@ class ParamForLoop final : public LoopStmt void validateLoop(VarSymbol* lvar, VarSymbol* hvar, VarSymbol* svar); + void validateLoop(EnumSymbol* lvar, + EnumSymbol* hvar, + VarSymbol* svar); // // NOAKES 2014/12/11 diff --git a/compiler/passes/buildDefaultFunctions.cpp b/compiler/passes/buildDefaultFunctions.cpp index f6ab53f25631..1b264bba512a 100644 --- a/compiler/passes/buildDefaultFunctions.cpp +++ b/compiler/passes/buildDefaultFunctions.cpp @@ -1364,15 +1364,18 @@ static void buildEnumToOrderFunction(EnumType* et, bool paramVersion) { // // 'proc chpl_enumToOrder(i: integral, type et: et): et' // -static void buildOrderToEnumFunction(EnumType* et) { +static void buildOrderToEnumFunction(EnumType* et, bool paramVersion) { FnSymbol* fn = new FnSymbol(astr("chpl__orderToEnum")); fn->addFlag(FLAG_COMPILER_GENERATED); fn->addFlag(FLAG_LAST_RESORT); - ArgSymbol* arg1 = new ArgSymbol(INTENT_BLANK, "i", dtIntegral); + ArgSymbol* arg1 = new ArgSymbol(paramVersion ? INTENT_PARAM : INTENT_BLANK, + "i", dtIntegral); ArgSymbol* arg2 = new ArgSymbol(INTENT_BLANK, "et", et); arg2->addFlag(FLAG_TYPE_VARIABLE); fn->insertFormalAtTail(arg1); fn->insertFormalAtTail(arg2); + if (paramVersion) + fn->retTag = RET_PARAM; // Generate a select statement with when clauses for each of the // enumeration constants, and an otherwise clause that calls halt. @@ -1388,7 +1391,7 @@ static void buildOrderToEnumFunction(EnumType* et) { const char * errorString = "enumerated type out of bounds in chpl__orderToEnum()"; CondStmt* otherwise = new CondStmt(new CallExpr(PRIM_WHEN), - new BlockStmt(new CallExpr("halt", + new BlockStmt(new CallExpr(paramVersion ? "compilerError" : "halt", new_StringSymbol(errorString)))); whenstmts->insertAtTail(otherwise); fn->insertAtTail(buildSelectStmt(new SymExpr(arg1), whenstmts)); @@ -1410,7 +1413,8 @@ static void buildEnumOrderFunctions(EnumType* et) { // buildEnumToOrderFunction(et, true); buildEnumToOrderFunction(et, false); - buildOrderToEnumFunction(et); + buildOrderToEnumFunction(et, true); + buildOrderToEnumFunction(et, false); } diff --git a/compiler/resolution/addAutoDestroyCalls.cpp b/compiler/resolution/addAutoDestroyCalls.cpp index b05d5919a0a4..c381a94dc4b8 100644 --- a/compiler/resolution/addAutoDestroyCalls.cpp +++ b/compiler/resolution/addAutoDestroyCalls.cpp @@ -264,6 +264,16 @@ static Expr* walkBlockStmt(FnSymbol* fn, if (isAutoDestroyedOrSplitInitedVariable(v)) scope.addInitialization(v); + // workaround for issue #1833 + if (CallExpr* c = toCallExpr(stmt)) + if (c->isPrimitive(PRIM_SET_MEMBER)) + if (SymExpr* lhs = toSymExpr(c->get(1))) + if (VarSymbol* v = toVarSymbol(lhs->symbol())) + if (isAutoDestroyedOrSplitInitedVariable(v)) + if (v->hasFlag(FLAG_COERCE_TEMP) && + v->type->symbol->hasFlag(FLAG_TUPLE)) + scope.addInitialization(v); + if (fCall != NULL) { // Check also for out intent in a called function for_formals_actuals(formal, actual, fCall) { diff --git a/compiler/resolution/resolveFunction.cpp b/compiler/resolution/resolveFunction.cpp index c7fa579a7a2d..8ede486e1320 100644 --- a/compiler/resolution/resolveFunction.cpp +++ b/compiler/resolution/resolveFunction.cpp @@ -2140,7 +2140,8 @@ bool shouldAddInFormalTempAtCallSite(ArgSymbol* formal, FnSymbol* fn) { return false; // TODO: remove this filtering on records/unions - if (isRecord(formal->getValType()) || isUnion(formal->getValType())) { + if (isRecord(formal->getValType()) || isUnion(formal->getValType()) || + isConstrainedType(formal->getValType())) { if (formal->intent == INTENT_IN || formal->intent == INTENT_CONST_IN || formal->originalIntent == INTENT_IN || @@ -2157,7 +2158,7 @@ bool shouldAddInFormalTempAtCallSite(ArgSymbol* formal, FnSymbol* fn) { // passing an argument of type 't'. // static bool backendRequiresCopyForIn(Type* t) { - return argMustUseCPtr(t); + return argMustUseCPtr(t) || isConstrainedType(t); } diff --git a/modules/internal/ChapelRange.chpl b/modules/internal/ChapelRange.chpl index ec296db4062d..5a882ad4f5ad 100644 --- a/modules/internal/ChapelRange.chpl +++ b/modules/internal/ChapelRange.chpl @@ -344,6 +344,19 @@ module ChapelRange { compilerError("Bounds of 'low..high' must be integers of compatible types."); } + proc chpl__nudgeLowBound(low) { + return chpl__intToIdx(low.type, chpl__idxToInt(low) + 1); + } + proc chpl__nudgeLowBound(param low) param { + return chpl__intToIdx(low.type, chpl__idxToInt(low) + 1); + } + proc chpl__nudgeHighBound(high) { + return chpl__intToIdx(high.type, chpl__idxToInt(high) - 1); + } + proc chpl__nudgeHighBound(param high) param { + return chpl__intToIdx(high.type, chpl__idxToInt(high) - 1); + } + // Range builders for low bounded ranges proc chpl_build_low_bounded_range(low: integral) return new range(low.type, BoundedRangeType.boundedLow, _low=low); @@ -396,6 +409,16 @@ module ChapelRange { return high; } + proc chpl_compute_low_param_loop_bound(param low: enum, + param high: low.type) param { + return low; + } + + proc chpl_compute_high_param_loop_bound(param low: enum, + param high: low.type) param { + return high; + } + proc chpl_compute_low_param_loop_bound(param low: bool, param high: bool) param { return low; @@ -2614,7 +2637,7 @@ operator :(r: range(?), type t: range(?)) { return i: idxType; } - inline proc chpl__intToIdx(type idxType: integral, param i: integral) { + inline proc chpl__intToIdx(type idxType: integral, param i: integral) param { if (i.type == idxType) then return i; else @@ -2625,6 +2648,10 @@ operator :(r: range(?), type t: range(?)) { return chpl__orderToEnum(i, idxType); } + inline proc chpl__intToIdx(type idxType: enum, param i: integral) param { + return chpl__orderToEnum(i, idxType); + } + inline proc chpl__intToIdx(type idxType, i: integral) where isBoolType(idxType) { return i: bool; } @@ -2649,6 +2676,10 @@ operator :(r: range(?), type t: range(?)) { return chpl__enumToOrder(i); } + inline proc chpl__idxToInt(param i: enum) param { + return chpl__enumToOrder(i); + } + inline proc chpl__idxToInt(i: bool) { return i: int; } diff --git a/modules/standard/List.chpl b/modules/standard/List.chpl index 032b35a0e579..2a21d3aa3ac7 100644 --- a/modules/standard/List.chpl +++ b/modules/standard/List.chpl @@ -899,8 +899,7 @@ module List { lifetime this < x { var result = false; - on this { - _enter(); + _enter(); // Handle special case of `a.insert((a.size), x)` here. if idx == _size { @@ -915,8 +914,7 @@ module List { result = true; } - _leave(); - } + _leave(); // Destroy our copy if it was never used. if !result then @@ -1297,6 +1295,12 @@ module List { :rtype: `int` */ proc const indexOf(x: eltType, start: int=0, end: int=-1): int { + + param error = -1; + + if _size == 0 then + return error; + if boundsChecking { const msg = " index for \"list.indexOf\" out of bounds: "; @@ -1306,9 +1310,7 @@ module List { if !_withinBounds(start) then boundsCheckHalt("Start" + msg + start:string); } - - param error = -1; - + if end >= 0 && end < start then return error; diff --git a/modules/standard/Set.chpl b/modules/standard/Set.chpl index 0d3f6512d753..1101243d1650 100644 --- a/modules/standard/Set.chpl +++ b/modules/standard/Set.chpl @@ -195,17 +195,26 @@ module Set { :arg other: A set to initialize this set with. */ - proc init=(const ref other: set(?t, ?)) lifetime this < other { - this.eltType = t; - this.parSafe = other.parSafe; + proc init=(const ref other: set(?t, ?p)) lifetime this < other { + this.eltType = if this.type.eltType != ? then + this.type.eltType else t; + this.parSafe = if this.type.parSafe != ? then + this.type.parSafe else p; this.complete(); - if !isCopyableType(eltType) then - compilerError('cannot initialize ' + this.type:string + ' from ' + - other.type:string + ' because element type ' + - eltType:string + ' is not copyable'); - - for elem in other do _addElem(elem); + // TODO: Relax this to allow if 'isCoercible(t, this.eltType)'? + if eltType != t { + compilerError('cannot initialize ', this.type:string, ' from ', + other.type:string, ' due to element type ', + 'mismatch'); + } else if !isCopyableType(eltType) { + compilerError('cannot initialize ', this.type:string, ' from ', + other.type:string, ' because element type ', + eltType:string, ' is not copyable'); + } else { + // TODO: Use a forall when this.parSafe? + for elem in other do _addElem(elem); + } } pragma "no doc" @@ -539,11 +548,14 @@ module Set { :arg b: A set to take the union of. :return: A new set containing the union between `a` and `b`. - :rtype: `set(?t, ?)` */ - proc |(const ref a: set(?t, ?), const ref b: set(t, ?)): set(t) { + proc |(const ref a: set(?t, ?), const ref b: set(t, ?)) { var result: set(t, (a.parSafe || b.parSafe)); + // TODO: Split-init causes weird errors, remove this line and then run + // setCompositionParSafe.chpl to see. + result; + result = a; result |= b; @@ -569,9 +581,8 @@ module Set { :arg b: A set to take the union of. :return: A new set containing the union between `a` and `b`. - :rtype: `set(?t, ?)` */ - proc +(const ref a: set(?t, ?), const ref b: set(t, ?)): set(t, ?) { + proc +(const ref a: set(?t, ?), const ref b: set(t, ?)) { return a | b; } @@ -592,13 +603,12 @@ module Set { :arg b: A set to take the difference of. :return: A new set containing the difference between `a` and `b`. - :rtype: `set(t)` */ - proc -(const ref a: set(?t, ?), const ref b: set(t, ?)): set(t) { + proc -(const ref a: set(?t, ?), const ref b: set(t, ?)) { var result = new set(t, (a.parSafe || b.parSafe)); if a.parSafe && b.parSafe { - forall x in a do + forall x in a with (ref result) do if !b.contains(x) then result.add(x); } else { @@ -623,7 +633,7 @@ module Set { */ proc -=(ref lhs: set(?t, ?), const ref rhs: set(t, ?)) { if lhs.parSafe && rhs.parSafe { - forall x in rhs do + forall x in rhs with (ref lhs) do lhs.remove(x); } else { for x in rhs do @@ -638,15 +648,14 @@ module Set { :arg b: A set to take the intersection of. :return: A new set containing the intersection of `a` and `b`. - :rtype: `set(t)` */ - proc &(const ref a: set(?t, ?), const ref b: set(t, ?)): set(t) { + proc &(const ref a: set(?t, ?), const ref b: set(t, ?)) { var result: set(t, (a.parSafe || b.parSafe)); /* Iterate over the smaller set */ if a.size <= b.size { if a.parSafe && b.parSafe { - forall x in a do + forall x in a with (ref result) do if b.contains(x) then result.add(x); } else { @@ -656,7 +665,7 @@ module Set { } } else { if a.parSafe && b.parSafe { - forall x in b do + forall x in b with (ref result) do if a.contains(x) then result.add(x); } else { @@ -687,7 +696,7 @@ module Set { var result: set(t, (lhs.parSafe || rhs.parSafe)); if lhs.parSafe && rhs.parSafe { - forall x in lhs do + forall x in lhs with (ref result) do if rhs.contains(x) then result.add(x); } else { @@ -695,6 +704,7 @@ module Set { if rhs.contains(x) then result.add(x); } + lhs = result; } @@ -705,11 +715,14 @@ module Set { :arg b: A set to take the symmetric difference of. :return: A new set containing the symmetric difference of `a` and `b`. - :rtype: `set(?t, ?)` */ - proc ^(const ref a: set(?t, ?), const ref b: set(t, ?)): set(t) { + proc ^(const ref a: set(?t, ?), const ref b: set(t, ?)) { var result: set(t, (a.parSafe || b.parSafe)); + // TODO: Split-init causes weird errors, remove this line and then run + // setCompositionParSafe.chpl to see. + result; + /* Expect the loop in ^= to be more expensive than the loop in =, so arrange for the rhs of the ^= to be the smaller set. */ if a.size <= b.size { @@ -737,7 +750,7 @@ module Set { */ proc ^=(ref lhs: set(?t, ?), const ref rhs: set(t, ?)) { if lhs.parSafe && rhs.parSafe { - forall x in rhs { + forall x in rhs with (ref lhs) { if lhs.contains(x) { lhs.remove(x); } else { @@ -784,6 +797,17 @@ module Set { return result; } + pragma "no doc" + operator :(x: set(?et1, ?p1), type t: set(?et2, ?p2)) { + // TODO: Allow coercion between element types? If we do then init= + // should also be changed accordingly. + if et1 != et2 then + compilerError('Cannot cast to set with different ', + 'element type: ', t:string); + var result: set(et1, p2) = x; + return result; + } + /* Return `true` if the sets `a` and `b` are not equal. diff --git a/runtime/include/chpl-comm.h b/runtime/include/chpl-comm.h index 4a32ffea317c..90902bc6b4ff 100644 --- a/runtime/include/chpl-comm.h +++ b/runtime/include/chpl-comm.h @@ -237,6 +237,11 @@ void chpl_comm_rollcall(void); // This returns the page size for the comm layer registered heap, // either the size of a system page or some hugepage size. // +// chpl_comm_regMemHeapTouch(): +// For configurations that use a static/fixed heap, this attempts to +// touch the heap in an interleaved and parallel manner to improve +// NUMA affinity and speed up faulting in the memory. +// // chpl_comm_regMemAllocThreshold(): // Allocations smaller than this should be done normally, by the // memory layer. Those at least this size may be done through this @@ -279,6 +284,8 @@ size_t chpl_comm_regMemHeapPageSize(void) { return CHPL_COMM_IMPL_REG_MEM_HEAP_PAGE_SIZE(); } +void chpl_comm_regMemHeapTouch(void* start, size_t size); + #ifndef CHPL_COMM_IMPL_REG_MEM_ALLOC_THRESHOLD #define CHPL_COMM_IMPL_REG_MEM_ALLOC_THRESHOLD() SIZE_MAX #endif diff --git a/runtime/src/chpl-comm.c b/runtime/src/chpl-comm.c index 6bc54cb9c14b..b7cfd0338fb3 100644 --- a/runtime/src/chpl-comm.c +++ b/runtime/src/chpl-comm.c @@ -23,12 +23,15 @@ // comm//comm-.c // #include "chplrt.h" + +#include "chpl-align.h" #include "chpl-comm.h" #include "chpl-comm-compiler-macros.h" #include "chpl-comm-diags.h" #include "chpl-comm-internal.h" #include "chpl-env.h" #include "chpl-mem.h" +#include "chpl-topo.h" // Don't get warning macros for chpl_comm_get etc. #include "chpl-comm-no-warning-macros.h" @@ -153,6 +156,62 @@ size_t chpl_comm_getenvMaxHeapSize(void) return maxHeapSize; } +typedef struct { + unsigned char *start; + uintptr_t size; + int tid; + int nthreads; +} memory_region; + +// Pin a thread a specific NUMA domain and cyclically touch pages to get +// interleaved memory. We don't have an accurate estimate of the page size when +// Transparent Huge Pages (THP) are used, so we fault in regions in at least 2 +// MiB chunks to cover the most common THP size. We then touch the first +// element of every system page or non-transparent huge page to fault in. +static void *touch_thread(void *mem_region) { + memory_region* mr = (memory_region*) mem_region; + + uintptr_t page_size = chpl_comm_regMemHeapPageSize(); + uintptr_t touch_size = page_size > 2<<20 ? page_size: 2<<20; + unsigned char* aligned_start = round_up_to_mask_ptr(mr->start, touch_size-1); + uintptr_t aligned_offset = (uintptr_t)aligned_start - (uintptr_t)mr->start; + uintptr_t aligned_size = round_down_to_mask(mr->size - aligned_offset, touch_size-1); + + chpl_topo_setThreadLocality(mr->tid % chpl_topo_getNumNumaDomains()); + // Iterate through all the touch regions cyclically + for (uintptr_t tr=mr->tid*touch_size; trnthreads*touch_size) { + // Iterate through all the page regions in the current region we're touching + for (uintptr_t pr=tr; pr/dev/null | head -n 1`; \ + if test -z "$$GASNET_DESC"; then \ + echo 'Use of git describe failed even though .git exists' 1>&2; \ + exit 1; \ + fi; \ + $(PERL) -pi -e "s:no-version-control-info:$$GASNET_DESC:;" -- $(distdir)/configure || exit $$? ; \ echo $$GASNET_DESC > $(distdir)/version.git || exit $$? ; \ fi if test -f $(distdir)/docs/Makefile ; then \ diff --git a/third-party/gasnet/gasnet-src/Makefile.in b/third-party/gasnet/gasnet-src/Makefile.in index 350428e47aba..8556cd0c7518 100644 --- a/third-party/gasnet/gasnet-src/Makefile.in +++ b/third-party/gasnet/gasnet-src/Makefile.in @@ -303,6 +303,10 @@ CONFIGURE_ARGS = @CONFIGURE_ARGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CROSS_COMPILING = @CROSS_COMPILING@ +CUDA_UVA_CFLAGS = @CUDA_UVA_CFLAGS@ +CUDA_UVA_LDFLAGS = @CUDA_UVA_LDFLAGS@ +CUDA_UVA_LIBS = @CUDA_UVA_LIBS@ +CUDA_guess_prog = @CUDA_guess_prog@ CXX = @CXX@ CXXCPP = @CXXCPP@ CXXDEPMODE = @CXXDEPMODE@ @@ -328,6 +332,8 @@ ENVCMD = @ENVCMD@ EXEEXT = @EXEEXT@ EXESUFFIX = @EXESUFFIX@ FH_CFLAGS = @FH_CFLAGS@ +GASNETI_HWLOC_BIND_PATH = @GASNETI_HWLOC_BIND_PATH@ +GASNETI_HWLOC_CALC_PATH = @GASNETI_HWLOC_CALC_PATH@ GASNETI_PTR_BITS = @GASNETI_PTR_BITS@ GASNET_EXTRA_DEFINES = @GASNET_EXTRA_DEFINES@ GASNET_IBV_SPAWNER_CONF = @GASNET_IBV_SPAWNER_CONF@ @@ -356,6 +362,10 @@ GSTACK_PATH = @GSTACK_PATH@ HAVE_INTTYPES_H = @HAVE_INTTYPES_H@ HAVE_STDINT_H = @HAVE_STDINT_H@ HAVE_SYS_TYPES_H = @HAVE_SYS_TYPES_H@ +HWLOC_CFLAGS = @HWLOC_CFLAGS@ +HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ +HWLOC_LIBS = @HWLOC_LIBS@ +HWLOC_guess_prog = @HWLOC_guess_prog@ IBV_CFLAGS = @IBV_CFLAGS@ IBV_LDFLAGS = @IBV_LDFLAGS@ IBV_LIBS = @IBV_LIBS@ @@ -532,6 +542,7 @@ CONDUITMODE_headers = \ gasnet_ammacros.h \ gasnet_trace.h \ gasnet_fwd.h \ + other/kinds/gasnet_mk.h \ extended-ref/coll/gasnet_coll.h \ extended-ref/ratomic/gasnet_ratomic.h \ extended-ref/vis/gasnet_vis.h @@ -1434,8 +1445,12 @@ dist-hook: exit 1 ; \ fi if test -d $(srcdir)/.git ; then \ - GASNET_DESC=`$${GIT=git} --git-dir=$(srcdir)/.git describe` || exit $$? ; \ - $(PERL) -pi -e "s:no-version-control-info:$$GASNET_DESC:;" -- $(distdir)/configure $(distdir)/gasnet_tools.c || exit $$? ; \ + GASNET_DESC=`( cd $(srcdir) && $${GIT=git} describe --long --dirty --always ) 2>/dev/null | head -n 1`; \ + if test -z "$$GASNET_DESC"; then \ + echo 'Use of git describe failed even though .git exists' 1>&2; \ + exit 1; \ + fi; \ + $(PERL) -pi -e "s:no-version-control-info:$$GASNET_DESC:;" -- $(distdir)/configure || exit $$? ; \ echo $$GASNET_DESC > $(distdir)/version.git || exit $$? ; \ fi if test -f $(distdir)/docs/Makefile ; then \ diff --git a/third-party/gasnet/gasnet-src/README b/third-party/gasnet/gasnet-src/README index b3686845b824..9f792c26c49e 100644 --- a/third-party/gasnet/gasnet-src/README +++ b/third-party/gasnet/gasnet-src/README @@ -427,9 +427,10 @@ ucx-conduit: Unified Communication X framework [EXPERIMENTAL] This conduit is currently experimental, and is not yet carefully tuned for performance. It has only been validated on Mellanox InfiniBand devices starting from ConnectX-5. -ofi-conduit: Open Fabrics Interfaces (*TEMPORARILY DISABLED*) +ofi-conduit: Open Fabrics Interfaces [EXPERIMENTAL] GASNet over the Open Fabrics Interface framework (libfabric). This conduit is currently being re-implemented for GASNet-EX. + Work to date is believed to be functionally complete and correct, but not performant. Until that rewrite is complete, users are advised to use other conduits compatible with their hardware, or to use the legacy GASNet-1 release (if EX-specific features are not required). @@ -520,6 +521,7 @@ Platforms where GASNet and Berkeley UPC have been successfully tested include: * macOS/{x86,x86_64}/icc/{32,64}: smp, udp, mpi & * macOS/{x86,x86_64}/{gcc,clang}/{32,64}: smp, udp, mpi * macOS/{x86,x86_64}/PGI/{32,64}: smp, udp, # + * macOS/AARCH64/{gcc,clang}/64: smp, udp, # * CNL/Cray-XT/{gcc,PGI,PathScale,Intel}/64: smp, mpi & @@ -766,15 +768,10 @@ application-specific support. a query using GEX_FLAG_TM_SCRATCH_SIZE_RECOMMENDED. If the size is set too low, then the performance of collectives may suffer. This parameter must be single-valued (same value on all processes). + A value of zero is permitted, but any value below some implementation-specific + minimum value will be silently increased to that minimum. Defaults to 2MB per rank. -* GASNET_COLL_MIN_SCRATCH_SIZE - Specifies the minimum size of the scratch - space allocated on each rank for internal use in collective communications. - This is the minimum size permitted for the initial team (or job startup will - fail), and is the value returned from a query using GEX_FLAG_TM_SCRATCH_SIZE_MIN. - This parameter must be single-valued (same value on all processes). - Defaults to 1KB per rank. - * GASNET_COLL_ENABLE_SEARCH - enable autotuning of collectives * GASNET_COLL_TUNING_FILE - file to read and/or write collective autotuning data For usage information, see the file autotuner.txt in the docs directory. @@ -906,6 +903,7 @@ Optional environment variable settings: A - AM requests/replies (and handler execution, if conduit-supported) X - AMPoll I - informational messages about system status or performance alerts + O - Object creation, modification and destruction C - conduit-specific (low-level) messages D - Detailed message data for gets/puts/AMreqrep N - Line number information from client source files diff --git a/third-party/gasnet/gasnet-src/README-tools b/third-party/gasnet/gasnet-src/README-tools index a8159209f754..27d86f7d894b 100644 --- a/third-party/gasnet/gasnet-src/README-tools +++ b/third-party/gasnet/gasnet-src/README-tools @@ -1,7 +1,7 @@ ============================ README file for GASNet tools ============================ -GASNet tools specification version: 1.16 +GASNet tools specification version: 1.17 Authors: Dan Bonachea Paul H. Hargrove @@ -1119,6 +1119,83 @@ uint64_t gasnett_release_version(void); Return an monotonically advancing integral representation of the compiled library's package version. Also see GASNETT_RELEASE_VERSION_* below. +------------------- +Trace/Stats support +------------------- + +This section documents interfaces used to control the implementation-defined (non-normative) +tracing and statistical collection features of the GASNet library implementation. +These features are conditionally available based on configure-time settings, and are currently +only active for a "appropriately configured" conduit-mode GASNet library, meaning one configured +with the --enable-trace/--enable-stats arguments (or the --enable-debug argument that implies these). + +The interfaces specified in this section are ONLY functional in translation units meeting ALL +of the following conditions: + +1. Must `#include ` before `#include ` +2. The included headers must reference an appropriately configured GASNet library. +3. Must eventually link an appropriately configured conduit-mode GASNet library. + +In all other cases, the macros described in this section may compile away to +nothing, or appropriate constant values indicating lack-of-support. + +See "GASNet tracing & statistical collection" in the GASNet README for more details on +the command-line user interface to generate and control trace and stats output. +In a nutshell, each process can generate an output stream of trace and stats +output (which is usually sent to a file). The trace stream outputs event +records as they occur, and the stats stream is used to output statistics that +are aggregated in memory before output (usually at process exit). Both streams +are optionally filtered by category via user-controllable masks. Note that +depending on library build settings, all stats output may additionally be +echoed into the trace log (if any). + +const char *GASNETT_TRACE_GETMASK() +const char *GASNETT_STATS_GETMASK() +GASNETT_TRACE_SETMASK(const char *newmask) +GASNETT_STATS_SETMASK(const char *newmask) + + Get and set the trace and stats masks for the current process at runtime. + The mask is represented as a character string of categories, as defined in the GASNet README. + The environment variables described there are used to initialize the mask of + active categories at job start, and these calls modify the mask programmatically. + Note that envvars GASNET_{TRACE,STATS}FILE (and optionally GASNET_{TRACE,STATS}NODES) + must still be set appropriately to enable output, otherwise the mask has no effect. + +bool GASNETT_TRACE_ENABLED + + Expands to a non-zero value iff tracing for the `H` category is currently enabled and + active (i.e. not filtered by the trace-mask) on the calling process. + In general this is NOT a compile-time constant value. + +GASNETT_TRACE_PRINTF(const char *format, ...) +GASNETT_TRACE_PRINTF_FORCE(const char *format, ...) + + Print a message into the trace log using the 'H' or 'U' categories, respectively, + when such tracing is enabled and active. The 'U' category is not subject to trace-mask + filtering, thus the _FORCE variant should be used sparingly for low-frequency messages. + Argument syntax follows C99 printf conventions (format string followed by arguments). + +GASNETT_STATS_PRINTF(const char *format, ...) +GASNETT_STATS_PRINTF_FORCE(const char *format, ...) + + Print a message into the stats log using the 'H' or 'U' categories, respectively, + when the stats log is enabled and active. The 'U' category is not subject to stats-mask + filtering, thus the _FORCE variant should be used sparingly for low-frequency messages. + Argument syntax follows C99 printf conventions (format string followed by arguments). + +GASNETT_STATS_DUMP(bool reset) + + This call directs the statistical collection system on this process to + immediately dump a report of all active statistical counters to the stats + output stream (appending to any earlier stats output). + If the `reset` argument is non-zero, then statistical counters + are additionally reset to their initial values after they are output. + This output-then-reset is performed with "best effort" atomicity on a + counter-by-counter basis, but atomicity is not strongly guaranteed, especially + across separate counters or in the presence of concurrent activity + by other threads. + If the stats output stream is inactive on this process, this call has no effect. + ------------------- Miscellaneous tools ------------------- @@ -1251,6 +1328,32 @@ int gasnett_maximize_rlimit(int res, const char *lim_desc); setting GASNET_MAXIMIZE_RLIMIT_CPU=0 will suppress maximizing the limit on cpu time, but will not (by itself) cause a zero (failure) return value. +------------------------- +Macro argument evaluation +------------------------- + +When implementing function-like macros that (perhaps conditionally) ignore one +or more of their arguments, it is usually desirable to ensure every argument is +evaluated exactly once (for side-effects). Unfortunately, the simple idiom +`((void)(arg1),whatever(arg2))` will yield warnings from some compilers. + +The following combine exactly-once evaluation and compile-specific warning +suppression (when needed): + + GASNETT_UNUSED_ARGS1(a1) + GASNETT_UNUSED_ARGS2(a1,a2) + GASNETT_UNUSED_ARGS3(a1,a2,a3) + GASNETT_UNUSED_ARGS4(a1,a2,a3,a4) + GASNETT_UNUSED_ARGS5(a1,a2,a3,a4,a5) + GASNETT_UNUSED_ARGS6(a1,a2,a3,a4,a5,a6) + GASNETT_UNUSED_ARGS7(a1,a2,a3,a4,a5,a6,a7) + GASNETT_UNUSED_ARGS8(a1,a2,a3,a4,a5,a6,a7,a8) + +Contrived example: + #define OPTION_1_OF_3(x,y,z) (GASNETT_UNUSED_ARGS2(y,z),(x)) + #define OPTION_2_OF_3(x,y,z) (GASNETT_UNUSED_ARGS2(x,z),(y)) + #define OPTION_3_OF_3(x,y,z) (GASNETT_UNUSED_ARGS2(x,y),(z)) + --------------- Feature control --------------- diff --git a/third-party/gasnet/gasnet-src/acconfig.h b/third-party/gasnet/gasnet-src/acconfig.h index 9ea73f4b291c..9d5c5ced0db4 100644 --- a/third-party/gasnet/gasnet-src/acconfig.h +++ b/third-party/gasnet/gasnet-src/acconfig.h @@ -218,6 +218,12 @@ /* --with-max-threads value (if given) */ #undef GASNETI_MAX_THREADS_CONFIGURE +/* --with-maxeps value (if given) */ +#undef GASNETI_MAXEPS_CONFIGURE + +/* memory kinds support */ +#undef GASNETI_MK_CLASS_CUDA_UVA_ENABLED + /* has clock_gettime() */ #undef HAVE_CLOCK_GETTIME @@ -242,6 +248,12 @@ /* has Portable Linux Processor Affinity */ #undef HAVE_PLPA +/* Portable Hardware Locality (hwloc) library and command-line utils */ +#undef GASNETI_HAVE_HWLOC_LIB +#undef GASNETI_HAVE_HWLOC_UTILS +#undef GASNETI_HWLOC_BIND_PATH +#undef GASNETI_HWLOC_CALC_PATH + /* have ptmalloc's mallopt() options */ #undef HAVE_PTMALLOC @@ -339,9 +351,6 @@ /* has x86 EBX register (not reserved for GOT) */ #undef GASNETI_HAVE_X86_EBX -/* has support (toolchain and cpu) for ia64 cmp8xchg16 instruction */ -#undef GASNETI_HAVE_IA64_CMP8XCHG16 - /* has support (toolchain and cpu) for x86_64 cmpxchg16b instruction */ #undef GASNETI_HAVE_X86_CMPXCHG16B @@ -452,7 +461,7 @@ #undef GASNETC_USE_SOCKETPAIR /* GASNet aries-conduit settings */ -#undef GASNETC_GNI_MAX_MEDIUM +#undef GASNETC_GNI_MAX_MEDIUM_DFLT #undef GASNETC_GNI_MULTI_DOMAIN #undef GASNETC_GNI_UDREG @@ -468,12 +477,15 @@ #undef HAVE_IBV_TRANSPORT_TYPE #undef GASNETC_IBV_MAX_MEDIUM #undef GASNETC_IBV_ODP +#undef GASNETC_IBV_ODP_MLNX +#undef GASNETC_IBV_ODP_CORE #undef GASNETC_IBV_ODP_DISABLED #undef GASNETC_IBV_RCV_THREAD #undef GASNETC_IBV_CONN_THREAD -#undef GASNETC_IBV_MAX_HCAS +#undef GASNETC_IBV_MAX_HCAS_CONFIGURE #undef GASNETC_IBV_PHYSMEM_MAX_CONFIGURE #undef GASNETC_IBV_PHYSMEM_PROBE_CONFIGURE +#undef GASNETC_IBV_PORTS_CONFIGURE /* GASNet bug1389 detection/work-around */ #undef GASNETI_BUG1389_WORKAROUND diff --git a/third-party/gasnet/gasnet-src/acinclude.m4 b/third-party/gasnet/gasnet-src/acinclude.m4 index 8804904dc7d0..e02a53283de3 100644 --- a/third-party/gasnet/gasnet-src/acinclude.m4 +++ b/third-party/gasnet/gasnet-src/acinclude.m4 @@ -911,7 +911,7 @@ AC_DEFUN([GASNET_DISPLAY_VERSION],[ display_version_info="$display_version_info AC_PACKAGE_VERSION" ]) if test -d "$srcdir/.git" ; then - git_describe=`${GIT=git} --git-dir="$srcdir/.git" describe 2> /dev/null` + git_describe=`( cd "$srcdir" && ${GIT=git} describe --long --dirty --always ) 2> /dev/null` if test -n "$git_describe"; then display_version_info="$display_version_info ($git_describe)" fi diff --git a/third-party/gasnet/gasnet-src/aries-conduit/Makefile.am b/third-party/gasnet/gasnet-src/aries-conduit/Makefile.am index f083c63bf31f..dcfeefe22e69 100644 --- a/third-party/gasnet/gasnet-src/aries-conduit/Makefile.am +++ b/third-party/gasnet/gasnet-src/aries-conduit/Makefile.am @@ -58,6 +58,10 @@ CONDUIT_EXTRADEPS = $(pmi_srcdir)/*.[ch] # forced targets in this file, and should probably use LIBINCLUDES/LIBDEFINES CONDUIT_SPECIAL_OBJS = +# memory kinds supported by this conduit (space separated) +# to be included in libgasnet if support was enabled at configure time +CONDUIT_KINDS = + # the default job spawn command to be used for "make run-tests" # The following substitutions are performed: # %P = program executable name diff --git a/third-party/gasnet/gasnet-src/aries-conduit/Makefile.in b/third-party/gasnet/gasnet-src/aries-conduit/Makefile.in index fc072aaca6e5..be2bdbf8fc05 100644 --- a/third-party/gasnet/gasnet-src/aries-conduit/Makefile.in +++ b/third-party/gasnet/gasnet-src/aries-conduit/Makefile.in @@ -279,6 +279,10 @@ CONFIGURE_ARGS = @CONFIGURE_ARGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CROSS_COMPILING = @CROSS_COMPILING@ +CUDA_UVA_CFLAGS = @CUDA_UVA_CFLAGS@ +CUDA_UVA_LDFLAGS = @CUDA_UVA_LDFLAGS@ +CUDA_UVA_LIBS = @CUDA_UVA_LIBS@ +CUDA_guess_prog = @CUDA_guess_prog@ CXX = @CXX@ CXXCPP = @CXXCPP@ CXXDEPMODE = @CXXDEPMODE@ @@ -304,6 +308,8 @@ ENVCMD = @ENVCMD@ EXEEXT = @EXEEXT@ EXESUFFIX = @EXESUFFIX@ FH_CFLAGS = @FH_CFLAGS@ +GASNETI_HWLOC_BIND_PATH = @GASNETI_HWLOC_BIND_PATH@ +GASNETI_HWLOC_CALC_PATH = @GASNETI_HWLOC_CALC_PATH@ GASNETI_PTR_BITS = @GASNETI_PTR_BITS@ GASNET_EXTRA_DEFINES = @GASNET_EXTRA_DEFINES@ GASNET_IBV_SPAWNER_CONF = @GASNET_IBV_SPAWNER_CONF@ @@ -332,6 +338,10 @@ GSTACK_PATH = @GSTACK_PATH@ HAVE_INTTYPES_H = @HAVE_INTTYPES_H@ HAVE_STDINT_H = @HAVE_STDINT_H@ HAVE_SYS_TYPES_H = @HAVE_SYS_TYPES_H@ +HWLOC_CFLAGS = @HWLOC_CFLAGS@ +HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ +HWLOC_LIBS = @HWLOC_LIBS@ +HWLOC_guess_prog = @HWLOC_guess_prog@ IBV_CFLAGS = @IBV_CFLAGS@ IBV_LDFLAGS = @IBV_LDFLAGS@ IBV_LIBS = @IBV_LIBS@ @@ -537,6 +547,10 @@ CONDUIT_EXTRADEPS = $(pmi_srcdir)/*.[ch] # forced targets in this file, and should probably use LIBINCLUDES/LIBDEFINES CONDUIT_SPECIAL_OBJS = +# memory kinds supported by this conduit (space separated) +# to be included in libgasnet if support was enabled at configure time +CONDUIT_KINDS = + # the default job spawn command to be used for "make run-tests" # The following substitutions are performed: # %P = program executable name diff --git a/third-party/gasnet/gasnet-src/aries-conduit/README b/third-party/gasnet/gasnet-src/aries-conduit/README index 5e75b90a04eb..3ff2965e6c68 100644 --- a/third-party/gasnet/gasnet-src/aries-conduit/README +++ b/third-party/gasnet/gasnet-src/aries-conduit/README @@ -26,9 +26,13 @@ Optional configure-time settings: By default gasnet_AMMaxMedium() is 4032: a 4096 byte buffer minus 64 bytes reserved for up to 16 handler arguments. This configure option allows control over the value of gasnet_AMMaxMedium(). - The value must be a multiple of 64, and cannot be less than 512. + The value must be a multiple of 64, and cannot be less than 512 or + greater than 65408. It is recommended to use values that are 64-bytes less than a - power-of-two, to preserve efficient memory use. + power-of-two, to preserve efficient memory use. By default, + values will be rounded down to the nearest such recommended value + or to the minimum value of 512. One may prefix '+' to the + setting to prevent this behavior. The default value is 4032. --enable-aries-multi-domain @@ -156,6 +160,16 @@ GASNET_GNI_AM_RVOUS_BUFFERS - number of AM rendezvous buffers Rendezvous protocol The default value is 64, and the minimum is 1. +GASNET_GNI_MAX_MEDIUM - max payload of 16-argument AM Mediums + This determines the maximum size of AM Medium payloads with 16 arguments. + More specifically, this is the value returned by gex_AM_LUBRequestMedium(), + gex_AM_LUBReplyMedium() and the legacy API gasnet_AMMaxMedium(). + The value must be a multiple of 64, between 512 and 65408, inclusive. + See the documentation for --with-aries-max-medium, above, for recommended + values and corresponding convenience aliases. + The default value is 4032, unless a different default was set at configure + time using --with-aries-max-medium=N. + GASNET_NETWORKDEPTH_TOTAL - depth of out-going AM Request queue This determines the maximum number of AM Requests that can be outstanding from one endpoint to all others before flow-control. @@ -174,10 +188,17 @@ GASNET_NETWORKDEPTH_SPACE - volume of Eager AM Request queue outstanding between a given pair of peers before flow-control in the Eager AM protocol, even if less than GASNET_NETWORKDEPTH Requests are outstanding. Values smaller than two maximum-size Medium AMs, or larger than 64 maximum- - sized Medium AMs, will be silently adjusted to that range. Additionally, - values will be silently rounded down to the product of GASNET_NETWORKDEPTH - times a power-of-two. - The default value is 16K. + sized Medium AMs, will be silently adjusted to that range. + Additionally, values will be silently rounded down to the product of a power + power-of-two times GASNET_NETWORKDEPTH. When GASNET_GNI_MAX_MEDIUM is set + to a recommended value, this adjustment results in an exact multiple of the + maximum size of a AM medium. For all other values, however, there will not + be such an alignment of sizes and the maximum number of max-size AM Mediums + in flight simultaneously may be reduced as a result. + The default value (prior to the adjustment noted above) is four maximum-size + Medium AMs (16K with all defaults). + See also GASNET_GNI_MAX_MEDIUM, above, regarding the maximum size of a + Medium AM and recommended values. GASNET_LONG_DEPTH - depth of per-peer Long tracking This determines the maximum number of AM Long Requests payload transfers diff --git a/third-party/gasnet/gasnet-src/aries-conduit/contrib/Makefile.in b/third-party/gasnet/gasnet-src/aries-conduit/contrib/Makefile.in index 97031a5a2d68..2f0ef3dadcf6 100644 --- a/third-party/gasnet/gasnet-src/aries-conduit/contrib/Makefile.in +++ b/third-party/gasnet/gasnet-src/aries-conduit/contrib/Makefile.in @@ -184,6 +184,10 @@ CONFIGURE_ARGS = @CONFIGURE_ARGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CROSS_COMPILING = @CROSS_COMPILING@ +CUDA_UVA_CFLAGS = @CUDA_UVA_CFLAGS@ +CUDA_UVA_LDFLAGS = @CUDA_UVA_LDFLAGS@ +CUDA_UVA_LIBS = @CUDA_UVA_LIBS@ +CUDA_guess_prog = @CUDA_guess_prog@ CXX = @CXX@ CXXCPP = @CXXCPP@ CXXDEPMODE = @CXXDEPMODE@ @@ -209,6 +213,8 @@ ENVCMD = @ENVCMD@ EXEEXT = @EXEEXT@ EXESUFFIX = @EXESUFFIX@ FH_CFLAGS = @FH_CFLAGS@ +GASNETI_HWLOC_BIND_PATH = @GASNETI_HWLOC_BIND_PATH@ +GASNETI_HWLOC_CALC_PATH = @GASNETI_HWLOC_CALC_PATH@ GASNETI_PTR_BITS = @GASNETI_PTR_BITS@ GASNET_EXTRA_DEFINES = @GASNET_EXTRA_DEFINES@ GASNET_IBV_SPAWNER_CONF = @GASNET_IBV_SPAWNER_CONF@ @@ -237,6 +243,10 @@ GSTACK_PATH = @GSTACK_PATH@ HAVE_INTTYPES_H = @HAVE_INTTYPES_H@ HAVE_STDINT_H = @HAVE_STDINT_H@ HAVE_SYS_TYPES_H = @HAVE_SYS_TYPES_H@ +HWLOC_CFLAGS = @HWLOC_CFLAGS@ +HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ +HWLOC_LIBS = @HWLOC_LIBS@ +HWLOC_guess_prog = @HWLOC_guess_prog@ IBV_CFLAGS = @IBV_CFLAGS@ IBV_LDFLAGS = @IBV_LDFLAGS@ IBV_LIBS = @IBV_LIBS@ diff --git a/third-party/gasnet/gasnet-src/aries-conduit/gasnet_aries.c b/third-party/gasnet/gasnet-src/aries-conduit/gasnet_aries.c index a1a6a0956140..ab0ad7b83b88 100644 --- a/third-party/gasnet/gasnet-src/aries-conduit/gasnet_aries.c +++ b/third-party/gasnet/gasnet-src/aries-conduit/gasnet_aries.c @@ -10,7 +10,7 @@ #include #include -#define GASNETC_NETWORKDEPTH_SPACE_DEFAULT (16*1024) +#define GASNETC_NETWORKDEPTH_SPACE_DEFAULT (4*GASNETC_MAX_MEDIUM(0)) #define GASNETC_NETWORKDEPTH_TOTAL_DEFAULT 64 #define GASNETC_NETWORKDEPTH_DEFAULT 64 @@ -126,7 +126,6 @@ static unsigned int am_maxcredit; static unsigned int request_bits; static int have_auxseg = 0; -static int have_segment = 0; static gni_cq_handle_t am_cq_handle; static int gasnetc_poll_burst = 10; @@ -137,8 +136,6 @@ static size_t gasnetc_put_bounce_register_cutover; size_t gasnetc_max_get_unaligned; /* read-only: */ -// TODO-EX: this needs to be more general for multi-segment support -static gni_mem_handle_t my_mem_handle; static gni_mem_handle_t my_aux_handle; #if GASNETC_BUILD_GNICE @@ -799,9 +796,8 @@ void gasnetc_init_gni(gasnet_seginfo_t seginfo) } /*-------------------------------------------------*/ -/* called after client segment init. */ -/* allgather the memory handles for the segments */ -void gasnetc_init_segment(gasnet_seginfo_t seginfo) +// register (create memory handle for) a client segment +void gasnetc_segment_register(gasnetc_Segment_t segment) { gni_return_t status; #if GASNETC_USE_MULTI_DOMAIN @@ -810,17 +806,20 @@ void gasnetc_init_segment(gasnet_seginfo_t seginfo) DOMAIN_SPECIFIC_VAR(peer_struct_t * const, peer_data); #endif + void *segbase = segment->_addr; + uintptr_t segsize = segment->_size; + { int count = 0; for (;;) { - status = GNI_MemRegister(nic_handle, (uint64_t) seginfo.addr, - (uint64_t) seginfo.size, am_cq_handle, + status = GNI_MemRegister(nic_handle, (uint64_t) segbase, + segsize, am_cq_handle, gasnetc_memreg_flags|GNI_MEM_READWRITE, -1, - &my_mem_handle); + &segment->mem_handle); if (status == GNI_RC_SUCCESS) break; if (status == GNI_RC_ERROR_RESOURCE) { gasnetc_GNIT_Log("MemRegister segment fault %d at %p %lx, code %s", - count, seginfo.addr, seginfo.size, gasnetc_gni_rc_string(status)); + count, segbase, (unsigned long)segsize, gasnetc_gni_rc_string(status)); count += 1; if (count >= 10) break; } else { @@ -828,35 +827,63 @@ void gasnetc_init_segment(gasnet_seginfo_t seginfo) } } } - have_segment = 1; gasneti_assert_always (status == GNI_RC_SUCCESS); +} - { - gni_mem_handle_t *all_mem_handle = gasneti_malloc(gasneti_nodes * sizeof(gni_mem_handle_t)); - #if 0// Cannot use gni-specific bootstrap collectives this late - gasnetc_bootstrapExchange_gni(&my_mem_handle, sizeof(gni_mem_handle_t), all_mem_handle); - #else - // TODO-EX: but we want real collectives here eventually anyway - gasneti_defaultExchange(&my_mem_handle, sizeof(gni_mem_handle_t), all_mem_handle); - #endif - for (gex_Rank_t i = 0; i < gasneti_nodes; ++i) { - peer_data[i].mem_handle = all_mem_handle[i]; - } - gasneti_free(all_mem_handle); - } +/*-------------------------------------------------*/ +// set the local memory handle for the client segment and exchanges with other procs +// TODO: non-primordial EP support +void gasnetc_segment_exchange(gex_TM_t tm, gex_EP_t *eps, size_t num_eps) +{ + // Exchange a gni_mem_handle_t + struct exchg_data { + gex_EP_Location_t loc; + gni_mem_handle_t mem_handle; + } *local, *global, *p; + + size_t elem_sz = sizeof(struct exchg_data); + local = gasneti_malloc(num_eps * elem_sz); + + // Pack + p = local; + for (gex_Rank_t i = 0; i < num_eps; ++i) { + gex_EP_t ep = eps[i]; + gasnetc_Segment_t segment = (gasnetc_Segment_t) gasneti_import_ep(ep)->_segment; + if (! segment) continue; + p->loc.gex_rank = gasneti_mynode; + p->loc.gex_ep_index = gex_EP_QueryIndex(ep); + p->mem_handle = segment->mem_handle; + ++p; + } + + size_t local_bytes = elem_sz * (p - local); + size_t total_bytes = gasneti_blockingRotatedExchangeV(tm, local, local_bytes, (void**)&global, NULL); + size_t total_eps = total_bytes / elem_sz; + gasneti_free(local); + + // Unpack + p = global; + for (size_t i = 0; i < total_eps; ++i, ++p) { + gex_Rank_t jobrank = p->loc.gex_rank; + if (! p->loc.gex_ep_index ) { // Primordial EP (includes loopback) + GASNETC_DIDX_POST(GASNETC_DEFAULT_DOMAIN); + DOMAIN_SPECIFIC_VAL(peer_data[jobrank]).mem_handle = p->mem_handle; #if GASNETC_USE_MULTI_DOMAIN && (GASNETC_DOMAIN_ALLOC_POLICY == GASNETC_STATIC_DOMAIN_ALLOC) - /* Replicate mem handle - not stricty necessary, but cache-friendly: */ - for (int d = 1; d < gasnetc_domain_count; d++) { - gasnete_threadidx_t tidx = gasnetc_get_domain_first_thread_idx(d); - GASNETC_DIDX_POST(gasnetc_get_domain_idx(tidx)); - - for (gex_Rank_t n = 0; n < gasneti_nodes; ++n) { - DOMAIN_SPECIFIC_VAL(peer_data[n]).mem_handle = gasnetc_cdom_data[0].peer_data[n].mem_handle; + // Replicate mem handle + for (int d = 1; d < gasnetc_domain_count; d++) { + gasnete_threadidx_t tidx = gasnetc_get_domain_first_thread_idx(d); + GASNETC_DIDX_POST(gasnetc_get_domain_idx(tidx)); + DOMAIN_SPECIFIC_VAL(peer_data[jobrank]).mem_handle = p->mem_handle; + } +#endif + } else { + // Non-primordial + gasneti_unreachable_error(("gex_EP_PublishBoundSegment does not yet handle non-primordial EPs")); } } -#endif + gasneti_free(global); } @@ -1497,12 +1524,15 @@ void gasnetc_shutdown(void) gasnetc_GNIT_Log("CqDestroy(am_cq) failed with %s", gasnetc_gni_rc_string(status)); } - if_pt (have_segment) { - status = GNI_MemDeregister(nic_handle, &my_mem_handle); - if_pf (status != GNI_RC_SUCCESS) { - gasnetc_GNIT_Log("MemDeregister(segment) failed with %s", gasnetc_gni_rc_string(status)); + GASNETI_SEGTBL_LOCK(); + gasneti_Segment_t seg; + GASNETI_SEGTBL_FOR_EACH(seg) { + status = GNI_MemDeregister(nic_handle, &((gasnetc_Segment_t)seg)->mem_handle); + if_pf (status != GNI_RC_SUCCESS) { + gasnetc_GNIT_Log("MemDeregister(segment) failed with %s", gasnetc_gni_rc_string(status)); + } } - } + GASNETI_SEGTBL_UNLOCK(); if_pt (have_auxseg) { status = GNI_MemDeregister(nic_handle, &my_aux_handle); @@ -1541,7 +1571,7 @@ void gasnetc_shutdown(void) } } -extern void gasnetc_trace_finish(void) { +extern void gasnetc_stats_dump(int reset) { #if GASNETC_GNI_UDREG if (GASNETI_STATS_ENABLED(C) && gasnetc_udreg_hndl) { int max_memreg = MAX(1,gasneti_getenv_int_withdefault("GASNET_GNI_MEMREG", GASNETC_GNI_MEMREG_DEFAULT, 0)); @@ -1552,6 +1582,11 @@ extern void gasnetc_trace_finish(void) { GASNETI_STATS_PRINTF(C,("UDREG size=%d hit/miss/evict: %"PRIu64"/%"PRIu64"/%"PRIu64"\n", max_memreg, hit, miss, evict)); } + if (reset && gasnetc_udreg_hndl) { + (void)UDREG_ResetStat(gasnetc_udreg_hndl, UDREG_STAT_CACHE_HIT); + (void)UDREG_ResetStat(gasnetc_udreg_hndl, UDREG_STAT_CACHE_MISS); + (void)UDREG_ResetStat(gasnetc_udreg_hndl, UDREG_STAT_CACHE_EVICTED); + } #endif } @@ -2095,9 +2130,9 @@ gasnetc_alloc_request_post_descriptor_np( #if GASNETC_NP_MEDXL gasnetc_post_descriptor_t *gpd = request_post_descriptor_inner(dest, 0, 0, min_length, max_length, flags GASNETI_THREAD_PASS); - if (gpd && (gpd->pd.length > GASNETC_MSG_MAXSIZE)) { + if (gpd && (gpd->pd.length > GASNETC_MAX_MEDIUM(0))) { // We have a "extra large" landing zone on the peer, but the gpd has a - // source buffer of at most GASNETC_MSG_MAXSIZE. We need an alternate. + // source buffer of at most GASNETC_MAX_MEDIUM(0). We need an alternate. void *buf = gasneti_lifo_pop(&medxl_descriptor_pool); if_pf (! buf) buf = gasneti_malloc(am_maxcredit << am_slot_bits); gpd->pd.local_addr = (uint64_t) buf; @@ -2106,8 +2141,7 @@ gasnetc_alloc_request_post_descriptor_np( return gpd; #else // TODO-EX: cannot negotiate larger than MaxMedium until/unless reply_pool is over-sized too - // We cannot send 65536 bytes in a 16-bit field (bug 4042) - max_length = MIN(max_length, MIN(GASNETC_MSG_MAXSIZE,65535)); + max_length = MIN(max_length, GASNETC_MAX_MEDIUM(0)); return request_post_descriptor_inner(dest, 0, 0, min_length, max_length, flags GASNETI_THREAD_PASS); #endif } @@ -2906,14 +2940,15 @@ gni_return_t myPostFma(gni_ep_handle_t ep, gasnetc_post_descriptor_t *gpd, int l return status; } -// TODO-EX: this is our auxseg support until real multi-segment support arrives -// // Note len=1 is sufficient since the full (addr,len) will have already passed // gasneti_in_{,local_}fullsegment(). While len=0 might seem cheaper, it is not // permitted by gasneti_in_*segment(). GASNETI_INLINE(gasnetc_local_mh) gni_mem_handle_t gasnetc_local_mh(gasneti_EP_t i_ep, void *addr) { - return gasneti_in_local_auxsegment(i_ep,addr,1) ? my_aux_handle : my_mem_handle; + if (gasneti_in_local_clientsegment(i_ep, addr, 1)) { + return ((gasnetc_Segment_t) i_ep->_segment)->mem_handle; + } + return my_aux_handle; } GASNETI_INLINE(gasnetc_remote_mh) gni_mem_handle_t gasnetc_remote_mh(peer_struct_t * const peer, void *addr) { @@ -2930,7 +2965,7 @@ size_t gasnetc_rdma_put_bulk(gex_TM_t tm, gex_Rank_t rank, { GASNETC_DIDX_POST(gpd->domain_idx); gex_Rank_t jobrank = gasneti_e_tm_rank_to_jobrank(tm, rank); - gasneti_EP_t i_ep = gasneti_import_tm(tm)->_ep; + gasneti_EP_t i_ep = gasneti_e_tm_to_i_ep(tm); DOMAIN_SPECIFIC_VAR(peer_struct_t * const, peer_data); peer_struct_t * const peer = &peer_data[jobrank]; gni_post_descriptor_t * const pd = &gpd->pd; @@ -3004,7 +3039,7 @@ gasnetc_rdma_put_lc(gex_TM_t tm, gex_Rank_t rank, GASNETC_DIDX_POST(gpd->domain_idx); DOMAIN_SPECIFIC_VAR(peer_struct_t * const, peer_data); gex_Rank_t jobrank = gasneti_e_tm_rank_to_jobrank(tm, rank); - gasneti_EP_t i_ep = gasneti_import_tm(tm)->_ep; + gasneti_EP_t i_ep = gasneti_e_tm_to_i_ep(tm); peer_struct_t * const peer = &peer_data[jobrank]; gni_post_descriptor_t * const pd = &gpd->pd; gni_return_t status; @@ -3155,7 +3190,7 @@ size_t gasnetc_rdma_get(gex_TM_t tm, gex_Rank_t rank, GASNETC_DIDX_POST(gpd->domain_idx); DOMAIN_SPECIFIC_VAR(peer_struct_t * const, peer_data); gex_Rank_t jobrank = gasneti_e_tm_rank_to_jobrank(tm, rank); - gasneti_EP_t i_ep = gasneti_import_tm(tm)->_ep; + gasneti_EP_t i_ep = gasneti_e_tm_to_i_ep(tm); peer_struct_t * const peer = &peer_data[jobrank]; gni_post_descriptor_t * const pd = &gpd->pd; @@ -3275,7 +3310,7 @@ int gasnetc_rdma_get_buff(gex_TM_t tm, gex_Rank_t rank, GASNETC_DIDX_POST(gpd->domain_idx); DOMAIN_SPECIFIC_VAR(peer_struct_t * const, peer_data); gex_Rank_t jobrank = gasneti_e_tm_rank_to_jobrank(tm, rank); - gasneti_EP_t i_ep = gasneti_import_tm(tm)->_ep; + gasneti_EP_t i_ep = gasneti_e_tm_to_i_ep(tm); peer_struct_t * const peer = &peer_data[jobrank]; gni_post_descriptor_t * const pd = &gpd->pd; gni_return_t status; @@ -3414,7 +3449,7 @@ void gasnetc_rdma_put_long( DOMAIN_SPECIFIC_VAR(peer_struct_t * const, peer_data); gex_Rank_t jobrank = gasneti_e_tm_rank_to_jobrank(tm, rank); - gasneti_EP_t i_ep = gasneti_import_tm(tm)->_ep; + gasneti_EP_t i_ep = gasneti_e_tm_to_i_ep(tm); peer_struct_t * const peer = &peer_data[jobrank]; gni_return_t status; diff --git a/third-party/gasnet/gasnet-src/aries-conduit/gasnet_aries.h b/third-party/gasnet/gasnet-src/aries-conduit/gasnet_aries.h index bb30fa5c0581..0e68dc563e6c 100644 --- a/third-party/gasnet/gasnet-src/aries-conduit/gasnet_aries.h +++ b/third-party/gasnet/gasnet-src/aries-conduit/gasnet_aries.h @@ -360,6 +360,14 @@ struct gasnetc_post_descriptor { #endif }; +// Conduit-specific Segment type +typedef struct gasnetc_Segment_t_ { + GASNETI_SEGMENT_COMMON // conduit-indep part as prefix + + // memory registation info + gni_mem_handle_t mem_handle; +} *gasnetc_Segment_t; + gasnetc_post_descriptor_t * gasnetc_alloc_post_descriptor(gex_Flags_t flags GASNETC_DIDX_FARG) GASNETI_MALLOC; @@ -379,7 +387,8 @@ int gasnetc_get_domain_idx(gasnete_threadidx_t tidx); #endif void gasnetc_init_gni(gasnet_seginfo_t seginfo); -void gasnetc_init_segment(gasnet_seginfo_t seginfo); +void gasnetc_segment_register(gasnetc_Segment_t segment); +void gasnetc_segment_exchange(gex_TM_t tm, gex_EP_t *eps, size_t num_eps); uintptr_t gasnetc_init_messaging(void); void gasnetc_shutdown(void); /* clean up all gni state */ diff --git a/third-party/gasnet/gasnet-src/aries-conduit/gasnet_core.c b/third-party/gasnet/gasnet-src/aries-conduit/gasnet_core.c index 812b592ca2d9..8981bbf6bad6 100644 --- a/third-party/gasnet/gasnet-src/aries-conduit/gasnet_core.c +++ b/third-party/gasnet/gasnet-src/aries-conduit/gasnet_core.c @@ -24,12 +24,10 @@ GASNETI_IDENT(gasnetc_IdentString_Version, "$GASNetCoreLibraryVersion: " GASNET_CORE_VERSION_STR " $"); GASNETI_IDENT(gasnetc_IdentString_Name, "$GASNetCoreLibraryName: " GASNET_CORE_NAME_STR " $"); -GASNETI_IDENT(gasnetc_IdentString_AMMaxMedium, "$GASNetAMMaxMedium: " _STRINGIFY(GASNETC_GNI_MAX_MEDIUM) " $"); +GASNETI_IDENT(gasnetc_IdentString_AMMaxMediumDefault, "$GASNetAMMaxMediumDefault: " _STRINGIFY(GASNETC_GNI_MAX_MEDIUM_DFLT) " $"); static void gasnetc_atexit(int exitcode); -gex_AM_Entry_t const *gasnetc_get_handlertable(void); - gex_AM_Entry_t *gasnetc_handler; // TODO-EX: will be replaced with per-EP tables gasneti_spawnerfn_t const *gasneti_spawner = NULL; @@ -37,6 +35,13 @@ gasneti_spawnerfn_t const *gasneti_spawner = NULL; // gex_TM_t used for AM-based bootstrap collectives and exit handling static gex_TM_t gasnetc_bootstrap_tm = NULL; +size_t gasnetc_sizeof_segment_t(void) { + gasnetc_Segment_t segment; + return sizeof(*segment); +} + +size_t gasnetc_gni_lub_medium = (size_t)(-1); // "goes boom" if not overwritten + /* ------------------------------------------------------------------------------------ */ /* Initialization @@ -51,10 +56,6 @@ static void gasnetc_check_config(void) { gasneti_assert((1< 65536) || (gasnetc_gni_lub_medium % 64)) { + gasneti_fatalerror("GASNET_GNI_MAX_MEDIUM setting (%s) is not valid. " + "The value must be a multiple of 64, between 512 and 65408, inclusive. " + "See aries-conduit README for more details.", + env_val); + } + int orig = gasnetc_gni_lub_medium; + if (gasnetc_gni_lub_medium > 65408) { + // MUST make this adjustment for correctness, even if prefixed by '+' (bug 4042) + // However, since 65408 the advertised maximum, this is just an undocumented convenience. + gasnetc_gni_lub_medium = 65408; + } else if (!exact && !GASNETI_POWEROFTWO(gasnetc_gni_lub_medium + 64)) { + gasnetc_gni_lub_medium = gasnetc_prev_power_of_2(gasnetc_gni_lub_medium) - 64; + gasnetc_gni_lub_medium = MAX(512, gasnetc_gni_lub_medium); // pointy corner + } + if ((gasnetc_gni_lub_medium != orig) && !exact && !gasneti_mynode) { + int is_max = (orig > 65408); + gasneti_console_message("WARNING", "GASNET_GNI_MAX_MEDIUM reduced from %d to %s value %d. " + "One may prefix the value with '+' to %ssilence this warning.", + orig, is_max ? "the maximum" : "recommended", (int)gasnetc_gni_lub_medium, + is_max ? "" : "prevent this behavior and "); + } + } + + // Ensure different views of the max-sized medium and its buffer are consistent + gasneti_assert_uint(GASNETC_MSG_MAXSIZE ,==, + gasnetc_gni_lub_medium + GASNETC_HEADLEN(medium, GASNETC_MAX_ARGS)); + gasneti_assert_uint(GASNETC_MSG_MAXSIZE ,==, GASNETC_MAX_MEDIUM(0)); + #if GASNET_PSHM /* If your conduit will support PSHM, you should initialize it here. * The 1st argument is normally "&gasnetc_bootstrapSNodeBroadcast" or equivalent @@ -663,16 +702,16 @@ static int gasnetc_init( gex_Client_t *client_p, // Create first Client, EP and TM *here*, for use in subsequent bootstrap collectives { // allocate the client object - gasneti_Client_t client = gasneti_alloc_client(clientName, flags, 0); + gasneti_Client_t client = gasneti_alloc_client(clientName, flags); *client_p = gasneti_export_client(client); // create the initial endpoint with internal handlers - if (gasnetc_EP_Create(ep_p, *client_p, flags)) + if (gex_EP_Create(ep_p, *client_p, GEX_EP_CAPABILITY_ALL, flags)) GASNETI_RETURN_ERRR(RESOURCE,"Error creating initial endpoint"); gasneti_EP_t ep = gasneti_import_ep(*ep_p); gasnetc_handler = ep->_amtbl; // TODO-EX: this global variable to be removed - gasneti_TM_t tm = gasneti_alloc_tm(ep, gasneti_mynode, gasneti_nodes, flags, 0); + gasneti_TM_t tm = gasneti_alloc_tm(ep, gasneti_mynode, gasneti_nodes, flags); gasnetc_bootstrap_tm = gasneti_export_tm(tm); } @@ -770,15 +809,19 @@ static int gasnetc_attach_primary(void) { static int gasnetc_attach_segment(gex_Segment_t *segment_p, gex_TM_t tm, uintptr_t segsize, - gasneti_bootstrapExchangefn_t exchangefn, gex_Flags_t flags) { /* ------------------------------------------------------------------------------------ */ /* register client segment */ - gasnet_seginfo_t myseg = gasneti_segmentAttach(segment_p, 0, tm, segsize, exchangefn, flags); + gasnet_seginfo_t myseg = gasneti_segmentAttach(segment_p, tm, segsize, flags); // Register client segment with NIC - gasnetc_init_segment(myseg); + gasnetc_Segment_t segment = (gasnetc_Segment_t) gasneti_import_segment(*segment_p); + gasnetc_segment_register(segment); + + // Exchange registration info + gex_EP_t ep = gex_TM_QueryEP(tm); + gasnetc_segment_exchange(tm, &ep, 1); return GASNET_OK; } @@ -791,7 +834,7 @@ extern int gasnetc_attach( gex_TM_t _tm, { GASNETI_TRACE_PRINTF(C,("gasnetc_attach(table (%i entries), segsize=%"PRIuPTR")", numentries, segsize)); - gasneti_TM_t tm = gasneti_import_tm(_tm); + gasneti_TM_t tm = gasneti_import_tm_nonpair(_tm); gasneti_EP_t ep = tm->_ep; if (!gasneti_init_done) @@ -816,12 +859,12 @@ extern int gasnetc_attach( gex_TM_t _tm, #if GASNET_SEGMENT_FAST || GASNET_SEGMENT_LARGE /* register client segment */ gex_Segment_t seg; // g2ex segment is automatically saved by a hook - if (GASNET_OK != gasnetc_attach_segment(&seg, _tm, segsize, gasneti_defaultExchange, GASNETI_FLAG_INIT_LEGACY)) + if (GASNET_OK != gasnetc_attach_segment(&seg, _tm, segsize, GASNETI_FLAG_INIT_LEGACY)) GASNETI_RETURN_ERRR(RESOURCE,"Error attaching segment"); #endif /* register client handlers */ - if (table && gasneti_amregister_legacy(ep->_amtbl, table, numentries) != GASNET_OK) + if (table && gasneti_amregister_legacy(ep, table, numentries) != GASNET_OK) GASNETI_RETURN_ERRR(RESOURCE,"Error registering handlers"); /* ensure everything is initialized across all nodes */ @@ -863,19 +906,23 @@ extern int gasnetc_Client_Init( #endif } else { // NOT first client // allocate the client object - gasneti_Client_t client = gasneti_alloc_client(clientName, flags, 0); + gasneti_Client_t client = gasneti_alloc_client(clientName, flags); *client_p = gasneti_export_client(client); // create the initial endpoint with internal handlers - if (gasnetc_EP_Create(ep_p, *client_p, flags)) + if (gex_EP_Create(ep_p, *client_p, GEX_EP_CAPABILITY_ALL, flags)) GASNETI_RETURN_ERRR(RESOURCE,"Error creating initial endpoint"); } gasneti_EP_t ep = gasneti_import_ep(*ep_p); + // Do NOT move this prior to the gasneti_trace_init() call + GASNETI_TRACE_PRINTF(O,("gex_Client_Init: name='%s' argc_p=%p argv_p=%p flags=%d", + clientName, (void *)argc, (void *)argv, flags)); + // TODO-EX: create team gasneti_TM_t tm = first_client ? gasneti_import_tm(gasnetc_bootstrap_tm) // gasnetc_init() creates very first TM - : gasneti_alloc_tm(ep, gasneti_mynode, gasneti_nodes, flags, 0); + : gasneti_alloc_tm(ep, gasneti_mynode, gasneti_nodes, flags); *tm_p = gasneti_export_tm(tm); if (0 == (flags & GASNETI_FLAG_INIT_LEGACY)) { @@ -910,60 +957,61 @@ extern int gasnetc_Segment_Attach( /* create a segment collectively */ // TODO-EX: this implementation only works *once* - // TODO-EX: should be using the team's exchange function if possible // TODO-EX: need to pass proper flags (e.g. pshm and bind) instead of 0 - if (GASNET_OK != gasnetc_attach_segment(segment_p, tm, length, gasneti_defaultExchange, 0)) + if (GASNET_OK != gasnetc_attach_segment(segment_p, tm, length, 0)) GASNETI_RETURN_ERRR(RESOURCE,"Error attaching segment"); return GASNET_OK; } -extern int gasnetc_EP_Create(gex_EP_t *ep_p, - gex_Client_t client, - gex_Flags_t flags) { - /* (###) add code here to create an endpoint belonging to the given client */ -#if 1 // TODO-EX: This is a stub, which assumes 1 implicit call from ClientCreate - static gasneti_mutex_t lock = GASNETI_MUTEX_INITIALIZER; - gasneti_mutex_lock(&lock); - static int once = 0; - int prev = once; - once = 1; - gasneti_mutex_unlock(&lock); - if (prev) gasneti_fatalerror("Multiple endpoints are not yet implemented"); -#endif +extern int gasnetc_Segment_Create( + gex_Segment_t *segment_p, + gex_Client_t client, + gex_Addr_t address, + uintptr_t length, + gex_MK_t kind, + gex_Flags_t flags) +{ + gasneti_assert(segment_p); - gasneti_EP_t ep = gasneti_alloc_ep(gasneti_import_client(client), flags, 0); - *ep_p = gasneti_export_ep(ep); - - { /* core API handlers */ - gex_AM_Entry_t *ctable = (gex_AM_Entry_t *)gasnetc_get_handlertable(); - int len = 0; - int numreg = 0; - gasneti_assert(ctable); - while (ctable[len].gex_fnptr) len++; /* calc len */ - if (gasneti_amregister(ep->_amtbl, ctable, len, GASNETC_HANDLER_BASE, GASNETE_HANDLER_BASE, 0, &numreg) != GASNET_OK) - GASNETI_RETURN_ERRR(RESOURCE,"Error registering core API handlers"); - gasneti_assert(numreg == len); - } + // Create the Segment object, allocating memory if appropriate + gasneti_Client_t i_client = gasneti_import_client(client); + int rc = gasneti_segmentCreate(segment_p, i_client, address, length, kind, flags); - { /* extended API handlers */ - gex_AM_Entry_t *etable = (gex_AM_Entry_t *)gasnete_get_handlertable(); - int len = 0; - int numreg = 0; - gasneti_assert(etable); - while (etable[len].gex_fnptr) len++; /* calc len */ - if (gasneti_amregister(ep->_amtbl, etable, len, GASNETE_HANDLER_BASE, GASNETI_CLIENT_HANDLER_BASE, 0, &numreg) != GASNET_OK) - GASNETI_RETURN_ERRR(RESOURCE,"Error registering extended API handlers"); - gasneti_assert(numreg == len); + if (rc == GASNET_OK) { + // Register segment with NIC + gasnetc_Segment_t segment = (gasnetc_Segment_t) gasneti_import_segment(*segment_p); + gasnetc_segment_register(segment); } + return rc; +} + +extern int gasnetc_EP_PublishBoundSegment( + gex_TM_t tm, + gex_EP_t *eps, + size_t num_eps, + gex_Flags_t flags) +{ + // Conduit-independent parts + int rc = gasneti_EP_PublishBoundSegment(tm, eps, num_eps, flags); + if (GASNET_OK != rc) return rc; + + // Conduit-dependent parts + // TODO: merge comms into gasneti_EP_PublishBoundSegment(). + gasnetc_segment_exchange(tm, eps, num_eps); + + // Avoid race in which AMRequestLong triggers AMRepyLong before exchange completes remotely + // TODO: barrier for multi-tm per-process + gex_Event_Wait(gex_Coll_BarrierNB(tm, 0)); + return GASNET_OK; } extern int gasnetc_EP_RegisterHandlers(gex_EP_t ep, gex_AM_Entry_t *table, size_t numentries) { - return gasneti_amregister_client(gasneti_import_ep(ep)->_amtbl, table, numentries); + return gasneti_amregister_client(gasneti_import_ep(ep), table, numentries); } /* ------------------------------------------------------------------------------------ */ static int gasnetc_exit_in_signal = 0; /* to avoid certain things in signal context */ @@ -1029,6 +1077,9 @@ extern void gasnetc_exit(int exitcode) { gasnetc_disable_AMs(); + // prevent possible GASNETI_CHECK_INJECT() failures when we communicate + GASNETI_CHECK_INJECT_RESET(); + /* HACK borrowed from elan-conduit: release locks we might have held If we are exiting from a signal hander, we might already hold some locks. In a debug build we want to avoid the resulting assertions, and in all @@ -1596,11 +1647,12 @@ extern gex_AM_SrcDesc_t gasnetc_AM_PrepareRequestMedium( GASNETI_THREAD_FARG, unsigned int nargs) { + GASNETI_TRACE_PREP_REQUESTMEDIUM(tm,rank,client_buf,least_payload,most_payload,flags,nargs); + GASNETC_IMMEDIATE_MAYBE_POLL(flags); // Ensure at least one poll upon Request injection + gasneti_AM_SrcDesc_t sd = gasneti_init_request_srcdesc(GASNETI_THREAD_PASS_ALONE); GASNETI_COMMON_PREP_REQ(sd,tm,rank,client_buf,least_payload,most_payload,NULL,lc_opt,flags,nargs,Medium); - GASNETC_IMMEDIATE_MAYBE_POLL(flags); // Ensure at least one poll upon Request injection - flags &= ~(GEX_FLAG_AM_PREPARE_LEAST_CLIENT | GEX_FLAG_AM_PREPARE_LEAST_ALLOC); gex_Rank_t jobrank = gasneti_e_tm_rank_to_jobrank(tm, rank); @@ -1623,6 +1675,7 @@ extern gex_AM_SrcDesc_t gasnetc_AM_PrepareRequestMedium( } GASNETI_TRACE_PREP_RETURN(REQUEST_MEDIUM, sd); + GASNETI_CHECK_SD(client_buf, least_payload, most_payload, sd); return gasneti_export_srcdesc(sd); } @@ -1830,6 +1883,8 @@ extern gex_AM_SrcDesc_t gasnetc_AM_PrepareReplyMedium( gex_Flags_t flags, unsigned int nargs) { + GASNETI_TRACE_PREP_REPLYMEDIUM(token,client_buf,least_payload,most_payload,flags,nargs); + gasneti_AM_SrcDesc_t sd; flags &= ~(GEX_FLAG_AM_PREPARE_LEAST_CLIENT | GEX_FLAG_AM_PREPARE_LEAST_ALLOC); @@ -1855,6 +1910,7 @@ extern gex_AM_SrcDesc_t gasnetc_AM_PrepareReplyMedium( } GASNETI_TRACE_PREP_RETURN(REPLY_MEDIUM, sd); + GASNETI_CHECK_SD(client_buf, least_payload, most_payload, sd); return gasneti_export_srcdesc(sd); } @@ -1986,9 +2042,7 @@ extern int gasnetc_hsl_trylock(gex_HSL_t *hsl) { (for internal conduit use in bootstrapping, job management, etc.) */ static gex_AM_Entry_t const gasnetc_handlers[] = { - #ifdef GASNETC_COMMON_HANDLERS GASNETC_COMMON_HANDLERS(), - #endif /* ptr-width independent handlers */ gasneti_handler_tableentry_no_bits(gasnetc_exit_reqh,1,REQUEST,SHORT,0), diff --git a/third-party/gasnet/gasnet-src/aries-conduit/gasnet_core.h b/third-party/gasnet/gasnet-src/aries-conduit/gasnet_core.h index b59266bd87a3..d6ff5d03cb4b 100644 --- a/third-party/gasnet/gasnet-src/aries-conduit/gasnet_core.h +++ b/third-party/gasnet/gasnet-src/aries-conduit/gasnet_core.h @@ -19,7 +19,7 @@ ============== */ -extern void gasnetc_exit(int exitcode) GASNETI_NORETURN; +extern void gasnetc_exit(int _exitcode) GASNETI_NORETURN; GASNETI_NORETURNP(gasnetc_exit) #define gasnet_exit gasnetc_exit @@ -28,31 +28,34 @@ GASNETI_NORETURNP(gasnetc_exit) #define GASNET_NULL_ARGV_OK 1 /* ------------------------------------------------------------------------------------ */ extern int gasnetc_Client_Init( - gex_Client_t *client_p, - gex_EP_t *ep_p, - gex_TM_t *tm_p, - const char *clientName, - int *argc, - char ***argv, - gex_Flags_t flags); + gex_Client_t *_client_p, + gex_EP_t *_ep_p, + gex_TM_t *_tm_p, + const char *_clientName, + int *_argc, + char ***_argv, + gex_Flags_t _flags); // gasnetex.h handles name-shifting of gex_Client_Init() extern int gasnetc_Segment_Attach( - gex_Segment_t *segment_p, - gex_TM_t tm, - uintptr_t length); + gex_Segment_t *_segment_p, + gex_TM_t _tm, + uintptr_t _length); #define gex_Segment_Attach gasnetc_Segment_Attach -extern int gasnetc_EP_Create( - gex_EP_t *ep_p, - gex_Client_t client, - gex_Flags_t flags); -#define gex_EP_Create gasnetc_EP_Create +extern int gasnetc_Segment_Create( + gex_Segment_t *_segment_p, + gex_Client_t _client, + gex_Addr_t _address, + uintptr_t _length, + gex_MK_t _kind, + gex_Flags_t _flags); +#define gex_Segment_Create gasnetc_Segment_Create extern int gasnetc_EP_RegisterHandlers( - gex_EP_t ep, - gex_AM_Entry_t *table, - size_t numentries); + gex_EP_t _ep, + gex_AM_Entry_t *_table, + size_t _numentries); #define gex_EP_RegisterHandlers gasnetc_EP_RegisterHandlers /* ------------------------------------------------------------------------------------ */ /* @@ -96,11 +99,11 @@ typedef struct { #define gex_HSL_Unlock(hsl) #define gex_HSL_Trylock(hsl) GASNET_OK #else - extern void gasnetc_hsl_init (gex_HSL_t *hsl); - extern void gasnetc_hsl_destroy(gex_HSL_t *hsl); - extern void gasnetc_hsl_lock (gex_HSL_t *hsl); - extern void gasnetc_hsl_unlock (gex_HSL_t *hsl); - extern int gasnetc_hsl_trylock(gex_HSL_t *hsl) GASNETI_WARN_UNUSED_RESULT; + extern void gasnetc_hsl_init (gex_HSL_t *_hsl); + extern void gasnetc_hsl_destroy(gex_HSL_t *_hsl); + extern void gasnetc_hsl_lock (gex_HSL_t *_hsl); + extern void gasnetc_hsl_unlock (gex_HSL_t *_hsl); + extern int gasnetc_hsl_trylock(gex_HSL_t *_hsl) GASNETI_WARN_UNUSED_RESULT; #define gex_HSL_Init gasnetc_hsl_init #define gex_HSL_Destroy gasnetc_hsl_destroy @@ -118,13 +121,10 @@ typedef struct { #define gex_AM_MaxArgs() ((unsigned int)GASNETC_MAX_ARGS) #define GASNETC_LUB_LONG 0x800000 -#define GASNETC_LUB_MEDIUM ((size_t)GASNETC_GNI_MAX_MEDIUM) -#if GASNETC_GNI_MAX_MEDIUM == 65472 // Cannot use all 65536 bytes of buffer (bug 4042) -#define GASNETC_MAX_MEDIUM(nargs) (GASNETC_LUB_MEDIUM+8*((GASNETC_MAX_ARGS-(MAX(nargs,1)))/2)) -#else -#define GASNETC_MAX_MEDIUM(nargs) (GASNETC_LUB_MEDIUM+8*((GASNETC_MAX_ARGS-(nargs))/2)) -#endif +extern size_t gasnetc_gni_lub_medium; +#define GASNETC_LUB_MEDIUM ((size_t)gasnetc_gni_lub_medium) // Cast prevents assignment +#define GASNETC_MAX_MEDIUM(nargs) (GASNETC_LUB_MEDIUM+8*((GASNETC_MAX_ARGS-(nargs))>>1)) #define gex_AM_LUBRequestMedium() ((size_t)GASNETC_LUB_MEDIUM) #define gex_AM_LUBReplyMedium() ((size_t)GASNETC_LUB_MEDIUM) @@ -132,12 +132,28 @@ typedef struct { #define gex_AM_LUBReplyLong() ((size_t)GASNETC_LUB_LONG) // TODO-EX: Medium sizes can be improved upon for PSHM case -#define gasnetc_AM_MaxRequestMedium(tm,rank,lc_opt,flags,nargs) GASNETC_MAX_MEDIUM(nargs) -#define gasnetc_AM_MaxReplyMedium(tm,rank,lc_opt,flags,nargs) GASNETC_MAX_MEDIUM(nargs) -#define gasnetc_AM_MaxRequestLong(tm,rank,lc_opt,flags,nargs) ((size_t)GASNETC_LUB_LONG) -#define gasnetc_AM_MaxReplyLong(tm,rank,lc_opt,flags,nargs) ((size_t)GASNETC_LUB_LONG) -#define gasnetc_Token_MaxReplyMedium(token,lc_opt,flags,nargs) GASNETC_MAX_MEDIUM(nargs) -#define gasnetc_Token_MaxReplyLong(token,lc_opt,flags,nargs) ((size_t)GASNETC_LUB_LONG) +#define gasnetc_AM_MaxRequestMedium(tm,rank,lc_opt,flags,nargs) \ + (GASNETI_UNUSED_ARGS4(tm,rank,lc_opt,flags),GASNETC_MAX_MEDIUM(nargs)) +#define gasnetc_AM_MaxReplyMedium(tm,rank,lc_opt,flags,nargs) \ + (GASNETI_UNUSED_ARGS4(tm,rank,lc_opt,flags),GASNETC_MAX_MEDIUM(nargs)) +#define gasnetc_Token_MaxReplyMedium(token,lc_opt,flags,nargs) \ + (GASNETI_UNUSED_ARGS3(token,lc_opt,flags),GASNETC_MAX_MEDIUM(nargs)) + +#define gasnetc_AM_MaxRequestLong(tm,rank,lc_opt,flags,nargs) \ + (GASNETI_UNUSED_ARGS4(tm,rank,lc_opt,nargs), \ + ((flags) & GEX_FLAG_AM_PREPARE_LEAST_ALLOC \ + ? GASNETC_REF_NPAM_MAX_ALLOC \ + : gex_AM_LUBRequestLong())) +#define gasnetc_AM_MaxReplyLong(tm,rank,lc_opt,flags,nargs) \ + (GASNETI_UNUSED_ARGS4(tm,rank,lc_opt,nargs), \ + ((flags) & GEX_FLAG_AM_PREPARE_LEAST_ALLOC \ + ? GASNETC_REF_NPAM_MAX_ALLOC \ + : gex_AM_LUBReplyLong())) +#define gasnetc_Token_MaxReplyLong(token,lc_opt,flags,nargs) \ + (GASNETI_UNUSED_ARGS3(token,lc_opt,nargs), \ + ((flags) & GEX_FLAG_AM_PREPARE_LEAST_ALLOC \ + ? GASNETC_REF_NPAM_MAX_ALLOC \ + : gex_AM_LUBReplyLong())) /* ------------------------------------------------------------------------------------ */ /* diff --git a/third-party/gasnet/gasnet-src/aries-conduit/gasnet_core_fwd.h b/third-party/gasnet/gasnet-src/aries-conduit/gasnet_core_fwd.h index 7826dd1524ca..3a0798ba0d4c 100644 --- a/third-party/gasnet/gasnet-src/aries-conduit/gasnet_core_fwd.h +++ b/third-party/gasnet/gasnet-src/aries-conduit/gasnet_core_fwd.h @@ -11,7 +11,7 @@ #ifndef _GASNET_CORE_FWD_H #define _GASNET_CORE_FWD_H -#define GASNET_CORE_VERSION 2.3 +#define GASNET_CORE_VERSION 2.4 #define GASNET_CORE_VERSION_STR _STRINGIFY(GASNET_CORE_VERSION) #define GASNET_CORE_NAME ARIES #define GASNET_CORE_NAME_STR _STRINGIFY(GASNET_CORE_NAME) @@ -45,6 +45,9 @@ #define GASNETI_SUPPORTS_OUTOFSEGMENT_PUTGET 1 #endif + // uncomment for each MK_CLASS which the conduit supports. leave commented otherwise +//#define GASNET_HAVE_MK_CLASS_CUDA_UVA GASNETI_MK_CLASS_CUDA_UVA_ENABLED + /* conduits should define GASNETI_CONDUIT_THREADS to 1 if they have one or more "private" threads which may be used to run AM handlers, even under GASNET_SEQ this ensures locking is still done correctly, etc @@ -75,12 +78,12 @@ your conduit must provide the V-suffixed functions for any of these that are not defined. */ -#define GASNETC_HAVE_NP_REQ_MEDIUM 1 -#define GASNETC_HAVE_NP_REP_MEDIUM 1 -/* #define GASNETC_HAVE_NP_REQ_LONG 1 */ -/* #define GASNETC_HAVE_NP_REP_LONG 1 */ +#define GASNET_NATIVE_NP_ALLOC_REQ_MEDIUM 1 +#define GASNET_NATIVE_NP_ALLOC_REP_MEDIUM 1 +/* #define GASNET_NATIVE_NP_ALLOC_REQ_LONG 1 */ +/* #define GASNET_NATIVE_NP_ALLOC_REP_LONG 1 */ - /* uncomment for each GASNETC_HAVE_NP_* enabled above if the Commit function + /* uncomment for each GASNET_NATIVE_NP_ALLOC_* enabled above if the Commit function has the numargs argument even in an NDEBUG build (it is always passed in DEBUG builds). */ @@ -93,14 +96,52 @@ include a call to gasneti_AMPoll (or equivalent) for progress. The preferred implementation is to Poll only in the M-suffixed calls and not the V-suffixed calls (and GASNETC_REQUESTV_POLLS undefined). - Used if (and only if) any of the GASNETC_HAVE_NP_* values above are unset. + Used if (and only if) any of the GASNET_NATIVE_NP_ALLOC_* values above are unset. */ /* #define GASNETC_REQUESTV_POLLS 1 */ + // uncomment if conduit provides a gasnetc-prefixed override + // TODO: this should be a hook rather than an override +#define GASNETC_HAVE_EP_PUBLISHBOUNDSEGMENT 1 + + /* If your conduit uses conduit-specific extensions to the basic object + types, then define the corresponding SIZEOF macros below to return + the total length of the conduit-specific object, including the prefix + portion which must be the matching GASNETI_[OBJECT]_COMMON fields. + Similarly, *_HOOK macros should be defined as callbacks to perform + conduit-specific initialization and finalization tasks, if any. + If a given SIZEOF macro is defined, but the corresponding INIT_HOOK is + not, then space beyond the COMMON fields will be zero-initialized. + In all cases, GASNETC_[OBJECT]_EXTRA_DECLS provides the place to + provide necessary declarations (since this file is included very early). + */ + +//#define GASNETC_CLIENT_EXTRA_DECLS (###) +//#define GASNETC_CLIENT_INIT_HOOK(i_client) (###) +//#define GASNETC_CLIENT_FINI_HOOK(i_client) (###) +//#define GASNETC_SIZEOF_CLIENT_T() (###) + +#define GASNETC_SEGMENT_EXTRA_DECLS \ + extern size_t gasnetc_sizeof_segment_t(void); +//#define GASNETC_SEGMENT_INIT_HOOK(i_segment) (###) +//#define GASNETC_SEGMENT_FINI_HOOK(i_segment) (###) +#define GASNETC_SIZEOF_SEGMENT_T() \ + gasnetc_sizeof_segment_t() + +//#define GASNETC_TM_EXTRA_DECLS (###) +//#define GASNETC_TM_INIT_HOOK(i_tm) (###) +//#define GASNETC_TM_FINI_HOOK(i_tm) (###) +//#define GASNETC_SIZEOF_TM_T() (###) + +//#define GASNETC_EP_EXTRA_DECLS (###) +//#define GASNETC_EP_INIT_HOOK(i_ep) (###) +//#define GASNETC_EP_FINI_HOOK(i_ep) (###) +//#define GASNETC_SIZEOF_EP_T() (###) + #if defined(GASNET_PAR) && GASNETC_GNI_MULTI_DOMAIN /* Need to hook pthread create to ensure collective creation of domains */ typedef int (gasnetc_pthread_create_fn_t)(pthread_t *, const pthread_attr_t *, void *(*)(void *), void *); -extern int gasnetc_pthread_create(gasnetc_pthread_create_fn_t *create_fn, pthread_t *thread, const pthread_attr_t *attr, void * (*fn)(void *), void * arg) ; +extern int gasnetc_pthread_create(gasnetc_pthread_create_fn_t *_create_fn, pthread_t *_thread, const pthread_attr_t *_attr, void * (*_fn)(void *), void * _arg) ; #define GASNETC_PTHREAD_CREATE_OVERRIDE(create_fn, thread, attr, start_routine, arg) \ gasnetc_pthread_create(create_fn, thread, attr, start_routine, arg) #endif @@ -110,6 +151,14 @@ extern int gasnetc_pthread_create(gasnetc_pthread_create_fn_t *create_fn, pthrea #define GASNETC_USING_SUSPEND_RESUME 1 #endif +// If conduit supports GASNET_MAXEPS!=1, set default and (optional) max values here. +// Leaving GASNETC_MAXEPS_DFLT unset will result in GASNET_MAXEPS=1, independent +// of all other settings (appropriate for conduits without multi-ep support). +// If set, GASNETC_MAXEPS_MAX it is used to limit a user's --with-maxeps (and a +// global default limit is used otherwise). +//#define GASNETC_MAXEPS_DFLT ### // default num endpoints this conduit supports, undef means no multi-ep support +//#define GASNETC_MAXEPS_MAX ### // leave unset for default + /* this can be used to add conduit-specific statistical collection values (see gasnet_trace.h) */ #define GASNETC_CONDUIT_STATS(CNT,VAL,TIME) \ @@ -129,10 +178,10 @@ extern int gasnetc_pthread_create(gasnetc_pthread_create_fn_t *create_fn, pthrea CNT(C, AMPOLL_INS, late notifies) \ /* blank */ -extern void gasnetc_fatalsignal_callback(int sig); +extern void gasnetc_fatalsignal_callback(int _sig); #define GASNETC_FATALSIGNAL_CALLBACK(sig) gasnetc_fatalsignal_callback(sig) -extern void gasnetc_trace_finish(void); -#define GASNETC_TRACE_FINISH() gasnetc_trace_finish() +extern void gasnetc_stats_dump(int _reset); +#define GASNETC_STATS_DUMP(reset) gasnetc_stats_dump(reset) #endif diff --git a/third-party/gasnet/gasnet-src/aries-conduit/gasnet_core_internal.h b/third-party/gasnet/gasnet-src/aries-conduit/gasnet_core_internal.h index 2bd5bdefb36d..8b6b8337095e 100644 --- a/third-party/gasnet/gasnet-src/aries-conduit/gasnet_core_internal.h +++ b/third-party/gasnet/gasnet-src/aries-conduit/gasnet_core_internal.h @@ -13,7 +13,7 @@ #define GASNETC_HSL_SPINLOCK 1 /* ------------------------------------------------------------------------------------ */ -#define _hidx_gasnetc_exchg_reqh (GASNETC_HANDLER_BASE+0) +#define _hidx_gasnetc_hbarr_reqh (GASNETC_HANDLER_BASE+0) #define _hidx_gasnetc_exit_reqh (GASNETC_HANDLER_BASE+1) #define _hidx_gasnetc_sys_barrier_reqh (GASNETC_HANDLER_BASE+2) #define _hidx_gasnetc_sys_exchange_reqh (GASNETC_HANDLER_BASE+3) diff --git a/third-party/gasnet/gasnet-src/aries-conduit/gasnet_extended.c b/third-party/gasnet/gasnet-src/aries-conduit/gasnet_extended.c index 09bf98021f96..fcd7143fce65 100644 --- a/third-party/gasnet/gasnet-src/aries-conduit/gasnet_extended.c +++ b/third-party/gasnet/gasnet-src/aries-conduit/gasnet_extended.c @@ -138,7 +138,7 @@ gasnete_get_bulk_inner(void *dest, gex_TM_t tm, gex_Rank_t rank, void *src, size gasnetc_post_descriptor_t *gpd; size_t chunksz; - gasneti_EP_t ep = gasneti_import_tm(tm)->_ep; + gasneti_EP_t ep = gasneti_e_tm_to_i_ep(tm); chunksz = gasneti_in_local_fullsegment(ep, dest, nbytes) ? GC_MAXRDMA_IN : GC_MAXRDMA_OUT; if (nbytes > 2*chunksz) { @@ -252,7 +252,7 @@ gasnete_put_inner(gex_TM_t tm, gex_Rank_t rank, void *dest, void *src, size_t nb gasnetc_post_descriptor_t *gpd; size_t chunksz; - gasneti_EP_t ep = gasneti_import_tm(tm)->_ep; + gasneti_EP_t ep = gasneti_e_tm_to_i_ep(tm); chunksz = gasneti_in_local_fullsegment(ep, src, nbytes) ? GC_MAXRDMA_IN : GC_MAXRDMA_OUT; gasneti_suspend_spinpollers(); @@ -316,7 +316,7 @@ gasnete_put_bulk_inner(gex_TM_t tm, gex_Rank_t rank, void *dest, void *src, size gasnetc_post_descriptor_t *gpd; size_t chunksz; - gasneti_EP_t ep = gasneti_import_tm(tm)->_ep; + gasneti_EP_t ep = gasneti_e_tm_to_i_ep(tm); chunksz = gasneti_in_local_fullsegment(ep, src, nbytes) ? GC_MAXRDMA_IN : GC_MAXRDMA_OUT; gasneti_suspend_spinpollers(); diff --git a/third-party/gasnet/gasnet-src/aries-conduit/gasnet_extended_fwd.h b/third-party/gasnet/gasnet-src/aries-conduit/gasnet_extended_fwd.h index 2300a5330ae9..d471168694fc 100644 --- a/third-party/gasnet/gasnet-src/aries-conduit/gasnet_extended_fwd.h +++ b/third-party/gasnet/gasnet-src/aries-conduit/gasnet_extended_fwd.h @@ -11,7 +11,7 @@ #ifndef _GASNET_EXTENDED_FWD_H #define _GASNET_EXTENDED_FWD_H -#define GASNET_EXTENDED_VERSION 2.3 +#define GASNET_EXTENDED_VERSION 2.4 #define GASNET_EXTENDED_VERSION_STR _STRINGIFY(GASNET_EXTENDED_VERSION) #define GASNET_EXTENDED_NAME ARIES #define GASNET_EXTENDED_NAME_STR _STRINGIFY(GASNET_EXTENDED_NAME) diff --git a/third-party/gasnet/gasnet-src/aries-conduit/gasnet_ratomic.c b/third-party/gasnet/gasnet-src/aries-conduit/gasnet_ratomic.c index 002aade44802..a47f2d7f3fb7 100644 --- a/third-party/gasnet/gasnet-src/aries-conduit/gasnet_ratomic.c +++ b/third-party/gasnet/gasnet-src/aries-conduit/gasnet_ratomic.c @@ -643,13 +643,13 @@ GASNETE_DT_APPLY(GASNETE_GNIRATOMIC_TBL) // // Create-hook to install the dispatch tables (aka algoritm selection) // -void gasnete_gniratomic_create_hook( - gasneti_AD_t real_ad, - gasneti_TM_t real_tm, - gex_DT_t dt, - gex_OP_t ops, - gex_Flags_t flags) +void gasnete_gniratomic_init_hook(gasneti_AD_t real_ad) { + gex_Flags_t flags = real_ad->_flags; + gasneti_TM_t real_tm = real_ad->_tm; + gex_DT_t dt = real_ad->_dt; + gex_OP_t ops = real_ad->_ops; + // Check for cases that should favor AM over NIC if (! (flags & GEX_FLAG_AD_FAVOR_REMOTE)) { if (flags & (GEX_FLAG_AD_FAVOR_MY_RANK | GEX_FLAG_AD_FAVOR_MY_NBRHD)) { @@ -689,12 +689,12 @@ void gasnete_gniratomic_create_hook( } #undef GASNETE_GNIRATOMIC_TBL_CASE - GASNETI_TRACE_PRINTF(C,("gex_AD_Create(dt=%d, ops=0x%x) -> Aries", (int)dt, (unsigned int)ops)); + GASNETI_TRACE_PRINTF(O,("gex_AD_Create(dt=%d, ops=0x%x) -> Aries", (int)dt, (unsigned int)ops)); real_ad->_tools_safe = 0; return; use_am: - gasnete_amratomic_create_hook(real_ad, real_tm, dt, ops, flags); + gasnete_amratomic_init_hook(real_ad); return; } diff --git a/third-party/gasnet/gasnet-src/aries-conduit/gasnet_ratomic_fwd.h b/third-party/gasnet/gasnet-src/aries-conduit/gasnet_ratomic_fwd.h index 17196e60042c..6c17a05d519d 100644 --- a/third-party/gasnet/gasnet-src/aries-conduit/gasnet_ratomic_fwd.h +++ b/third-party/gasnet/gasnet-src/aries-conduit/gasnet_ratomic_fwd.h @@ -38,7 +38,20 @@ #endif #if GASNETC_BUILD_GNIRATOMIC - #define GASNETI_AD_CREATE_HOOK gasnete_gniratomic_create_hook + // Hooks for conduit-specific extension to create and destroy + // + // These hooks are analogous to the following: + // GASNETC_CLIENT_EXTRA_DECLS + // GASNETC_CLIENT_INIT_HOOK + // GASNETC_CLIENT_FINI_HOOK + // GASNETC_SIZEOF_CLIENT_T + // which are documented in template-conduit/gasnet_core_fwd.h + + #define GASNETC_AD_EXTRA_DECLS \ + extern void gasnete_gniratomic_init_hook(gasneti_AD_t); + #define GASNETC_AD_INIT_HOOK(i_ad) gasnete_gniratomic_init_hook(i_ad) + //#define GASNETC_AD_FINI_HOOK(i_ad) (###) + //#define GASNETC_SIZEOF_AD_T() (###) /* stats needed by the GNI-specific atomics implementation */ #ifndef GASNETI_RATOMIC_STATS diff --git a/third-party/gasnet/gasnet-src/config-aux/Makefile.in b/third-party/gasnet/gasnet-src/config-aux/Makefile.in index 2796005a637c..01ba8815831c 100644 --- a/third-party/gasnet/gasnet-src/config-aux/Makefile.in +++ b/third-party/gasnet/gasnet-src/config-aux/Makefile.in @@ -149,6 +149,10 @@ CONFIGURE_ARGS = @CONFIGURE_ARGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CROSS_COMPILING = @CROSS_COMPILING@ +CUDA_UVA_CFLAGS = @CUDA_UVA_CFLAGS@ +CUDA_UVA_LDFLAGS = @CUDA_UVA_LDFLAGS@ +CUDA_UVA_LIBS = @CUDA_UVA_LIBS@ +CUDA_guess_prog = @CUDA_guess_prog@ CXX = @CXX@ CXXCPP = @CXXCPP@ CXXDEPMODE = @CXXDEPMODE@ @@ -174,6 +178,8 @@ ENVCMD = @ENVCMD@ EXEEXT = @EXEEXT@ EXESUFFIX = @EXESUFFIX@ FH_CFLAGS = @FH_CFLAGS@ +GASNETI_HWLOC_BIND_PATH = @GASNETI_HWLOC_BIND_PATH@ +GASNETI_HWLOC_CALC_PATH = @GASNETI_HWLOC_CALC_PATH@ GASNETI_PTR_BITS = @GASNETI_PTR_BITS@ GASNET_EXTRA_DEFINES = @GASNET_EXTRA_DEFINES@ GASNET_IBV_SPAWNER_CONF = @GASNET_IBV_SPAWNER_CONF@ @@ -202,6 +208,10 @@ GSTACK_PATH = @GSTACK_PATH@ HAVE_INTTYPES_H = @HAVE_INTTYPES_H@ HAVE_STDINT_H = @HAVE_STDINT_H@ HAVE_SYS_TYPES_H = @HAVE_SYS_TYPES_H@ +HWLOC_CFLAGS = @HWLOC_CFLAGS@ +HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ +HWLOC_LIBS = @HWLOC_LIBS@ +HWLOC_guess_prog = @HWLOC_guess_prog@ IBV_CFLAGS = @IBV_CFLAGS@ IBV_LDFLAGS = @IBV_LDFLAGS@ IBV_LIBS = @IBV_LIBS@ diff --git a/third-party/gasnet/gasnet-src/config-aux/config.guess b/third-party/gasnet/gasnet-src/config-aux/config.guess index 3465476e24ea..1972fda8eb05 100755 --- a/third-party/gasnet/gasnet-src/config-aux/config.guess +++ b/third-party/gasnet/gasnet-src/config-aux/config.guess @@ -1,8 +1,8 @@ #! /bin/sh # Attempt to guess a canonical system name. -# Copyright 1992-2014 Free Software Foundation, Inc. +# Copyright 1992-2021 Free Software Foundation, Inc. -timestamp='2014-11-04' +timestamp='2021-01-25' # This file is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by @@ -15,7 +15,7 @@ timestamp='2014-11-04' # General Public License for more details. # # You should have received a copy of the GNU General Public License -# along with this program; if not, see . +# along with this program; if not, see . # # As a special exception to the GNU General Public License, if you # distribute this file as part of a program that contains a @@ -27,19 +27,19 @@ timestamp='2014-11-04' # Originally written by Per Bothner; maintained since 2000 by Ben Elliston. # # You can get the latest version of this script from: -# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess;hb=HEAD +# https://git.savannah.gnu.org/cgit/config.git/plain/config.guess # # Please send patches to . -me=`echo "$0" | sed -e 's,.*/,,'` +me=$(echo "$0" | sed -e 's,.*/,,') usage="\ Usage: $0 [OPTION] Output the configuration name of the system \`$me' is run on. -Operation modes: +Options: -h, --help print this help, then exit -t, --time-stamp print date of last modification, then exit -v, --version print version number, then exit @@ -50,7 +50,7 @@ version="\ GNU config.guess ($timestamp) Originally written by Per Bothner. -Copyright 1992-2014 Free Software Foundation, Inc. +Copyright 1992-2021 Free Software Foundation, Inc. This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." @@ -84,8 +84,6 @@ if test $# != 0; then exit 1 fi -trap 'exit 1' 1 2 15 - # CC_FOR_BUILD -- compiler used by this script. Note that the use of a # compiler to aid in system detection is discouraged as it requires # temporary files to be created and, as you can see below, it is a @@ -96,66 +94,89 @@ trap 'exit 1' 1 2 15 # Portable tmp directory creation inspired by the Autoconf team. -set_cc_for_build=' -trap "exitcode=\$?; (rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null) && exit \$exitcode" 0 ; -trap "rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null; exit 1" 1 2 13 15 ; -: ${TMPDIR=/tmp} ; - { tmp=`(umask 077 && mktemp -d "$TMPDIR/cgXXXXXX") 2>/dev/null` && test -n "$tmp" && test -d "$tmp" ; } || - { test -n "$RANDOM" && tmp=$TMPDIR/cg$$-$RANDOM && (umask 077 && mkdir $tmp) ; } || - { tmp=$TMPDIR/cg-$$ && (umask 077 && mkdir $tmp) && echo "Warning: creating insecure temp directory" >&2 ; } || - { echo "$me: cannot create a temporary directory in $TMPDIR" >&2 ; exit 1 ; } ; -dummy=$tmp/dummy ; -tmpfiles="$dummy.c $dummy.o $dummy.rel $dummy" ; -case $CC_FOR_BUILD,$HOST_CC,$CC in - ,,) echo "int x;" > $dummy.c ; - for c in cc gcc c89 c99 ; do - if ($c -c -o $dummy.o $dummy.c) >/dev/null 2>&1 ; then - CC_FOR_BUILD="$c"; break ; - fi ; - done ; - if test x"$CC_FOR_BUILD" = x ; then - CC_FOR_BUILD=no_compiler_found ; - fi - ;; - ,,*) CC_FOR_BUILD=$CC ;; - ,*,*) CC_FOR_BUILD=$HOST_CC ;; -esac ; set_cc_for_build= ;' +tmp= +# shellcheck disable=SC2172 +trap 'test -z "$tmp" || rm -fr "$tmp"' 0 1 2 13 15 + +set_cc_for_build() { + # prevent multiple calls if $tmp is already set + test "$tmp" && return 0 + : "${TMPDIR=/tmp}" + # shellcheck disable=SC2039 + { tmp=$( (umask 077 && mktemp -d "$TMPDIR/cgXXXXXX") 2>/dev/null) && test -n "$tmp" && test -d "$tmp" ; } || + { test -n "$RANDOM" && tmp=$TMPDIR/cg$$-$RANDOM && (umask 077 && mkdir "$tmp" 2>/dev/null) ; } || + { tmp=$TMPDIR/cg-$$ && (umask 077 && mkdir "$tmp" 2>/dev/null) && echo "Warning: creating insecure temp directory" >&2 ; } || + { echo "$me: cannot create a temporary directory in $TMPDIR" >&2 ; exit 1 ; } + dummy=$tmp/dummy + case ${CC_FOR_BUILD-},${HOST_CC-},${CC-} in + ,,) echo "int x;" > "$dummy.c" + for driver in cc gcc c89 c99 ; do + if ($driver -c -o "$dummy.o" "$dummy.c") >/dev/null 2>&1 ; then + CC_FOR_BUILD="$driver" + break + fi + done + if test x"$CC_FOR_BUILD" = x ; then + CC_FOR_BUILD=no_compiler_found + fi + ;; + ,,*) CC_FOR_BUILD=$CC ;; + ,*,*) CC_FOR_BUILD=$HOST_CC ;; + esac +} # This is needed to find uname on a Pyramid OSx when run in the BSD universe. # (ghazi@noc.rutgers.edu 1994-08-24) -if (test -f /.attbin/uname) >/dev/null 2>&1 ; then +if test -f /.attbin/uname ; then PATH=$PATH:/.attbin ; export PATH fi -UNAME_MACHINE=`(uname -m) 2>/dev/null` || UNAME_MACHINE=unknown -UNAME_RELEASE=`(uname -r) 2>/dev/null` || UNAME_RELEASE=unknown -UNAME_SYSTEM=`(uname -s) 2>/dev/null` || UNAME_SYSTEM=unknown -UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown +UNAME_MACHINE=$( (uname -m) 2>/dev/null) || UNAME_MACHINE=unknown +UNAME_RELEASE=$( (uname -r) 2>/dev/null) || UNAME_RELEASE=unknown +UNAME_SYSTEM=$( (uname -s) 2>/dev/null) || UNAME_SYSTEM=unknown +UNAME_VERSION=$( (uname -v) 2>/dev/null) || UNAME_VERSION=unknown -case "${UNAME_SYSTEM}" in +case "$UNAME_SYSTEM" in Linux|GNU|GNU/*) - # If the system lacks a compiler, then just pick glibc. - # We could probably try harder. - LIBC=gnu + LIBC=unknown - eval $set_cc_for_build - cat <<-EOF > $dummy.c + set_cc_for_build + cat <<-EOF > "$dummy.c" #include #if defined(__UCLIBC__) LIBC=uclibc #elif defined(__dietlibc__) LIBC=dietlibc - #else + #elif defined(__GLIBC__) LIBC=gnu + #else + #include + /* First heuristic to detect musl libc. */ + #ifdef __DEFINED_va_list + LIBC=musl + #endif #endif EOF - eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^LIBC' | sed 's, ,,g'` + eval "$($CC_FOR_BUILD -E "$dummy.c" 2>/dev/null | grep '^LIBC' | sed 's, ,,g')" + + # Second heuristic to detect musl libc. + if [ "$LIBC" = unknown ] && + command -v ldd >/dev/null && + ldd --version 2>&1 | grep -q ^musl; then + LIBC=musl + fi + + # If the system lacks a compiler, then just pick glibc. + # We could probably try harder. + if [ "$LIBC" = unknown ]; then + LIBC=gnu + fi ;; esac # Note: order is significant - the case branches are not exclusive. -case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in +case "$UNAME_MACHINE:$UNAME_SYSTEM:$UNAME_RELEASE:$UNAME_VERSION" in *:NetBSD:*:*) # NetBSD (nbsd) targets should (where applicable) match one or # more of the tuples: *-*-netbsdelf*, *-*-netbsdaout*, @@ -167,22 +188,32 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in # # Note: NetBSD doesn't particularly care about the vendor # portion of the name. We always set it to "unknown". - sysctl="sysctl -n hw.machine_arch" - UNAME_MACHINE_ARCH=`(/sbin/$sysctl 2>/dev/null || \ - /usr/sbin/$sysctl 2>/dev/null || echo unknown)` - case "${UNAME_MACHINE_ARCH}" in + UNAME_MACHINE_ARCH=$( (uname -p 2>/dev/null || \ + /sbin/sysctl -n hw.machine_arch 2>/dev/null || \ + /usr/sbin/sysctl -n hw.machine_arch 2>/dev/null || \ + echo unknown)) + case "$UNAME_MACHINE_ARCH" in + aarch64eb) machine=aarch64_be-unknown ;; armeb) machine=armeb-unknown ;; arm*) machine=arm-unknown ;; sh3el) machine=shl-unknown ;; sh3eb) machine=sh-unknown ;; sh5el) machine=sh5le-unknown ;; - *) machine=${UNAME_MACHINE_ARCH}-unknown ;; + earmv*) + arch=$(echo "$UNAME_MACHINE_ARCH" | sed -e 's,^e\(armv[0-9]\).*$,\1,') + endian=$(echo "$UNAME_MACHINE_ARCH" | sed -ne 's,^.*\(eb\)$,\1,p') + machine="${arch}${endian}"-unknown + ;; + *) machine="$UNAME_MACHINE_ARCH"-unknown ;; esac # The Operating System including object format, if it has switched - # to ELF recently, or will in the future. - case "${UNAME_MACHINE_ARCH}" in + # to ELF recently (or will in the future) and ABI. + case "$UNAME_MACHINE_ARCH" in + earm*) + os=netbsdelf + ;; arm*|i386|m68k|ns32k|sh3*|sparc|vax) - eval $set_cc_for_build + set_cc_for_build if echo __ELF__ | $CC_FOR_BUILD -E - 2>/dev/null \ | grep -q __ELF__ then @@ -197,117 +228,137 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in os=netbsd ;; esac + # Determine ABI tags. + case "$UNAME_MACHINE_ARCH" in + earm*) + expr='s/^earmv[0-9]/-eabi/;s/eb$//' + abi=$(echo "$UNAME_MACHINE_ARCH" | sed -e "$expr") + ;; + esac # The OS release # Debian GNU/NetBSD machines have a different userland, and # thus, need a distinct triplet. However, they do not need # kernel version information, so it can be replaced with a # suitable tag, in the style of linux-gnu. - case "${UNAME_VERSION}" in + case "$UNAME_VERSION" in Debian*) release='-gnu' ;; *) - release=`echo ${UNAME_RELEASE}|sed -e 's/[-_].*/\./'` + release=$(echo "$UNAME_RELEASE" | sed -e 's/[-_].*//' | cut -d. -f1,2) ;; esac # Since CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM: # contains redundant information, the shorter form: # CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM is used. - echo "${machine}-${os}${release}" + echo "$machine-${os}${release}${abi-}" exit ;; *:Bitrig:*:*) - UNAME_MACHINE_ARCH=`arch | sed 's/Bitrig.//'` - echo ${UNAME_MACHINE_ARCH}-unknown-bitrig${UNAME_RELEASE} + UNAME_MACHINE_ARCH=$(arch | sed 's/Bitrig.//') + echo "$UNAME_MACHINE_ARCH"-unknown-bitrig"$UNAME_RELEASE" exit ;; *:OpenBSD:*:*) - UNAME_MACHINE_ARCH=`arch | sed 's/OpenBSD.//'` - echo ${UNAME_MACHINE_ARCH}-unknown-openbsd${UNAME_RELEASE} + UNAME_MACHINE_ARCH=$(arch | sed 's/OpenBSD.//') + echo "$UNAME_MACHINE_ARCH"-unknown-openbsd"$UNAME_RELEASE" + exit ;; + *:LibertyBSD:*:*) + UNAME_MACHINE_ARCH=$(arch | sed 's/^.*BSD\.//') + echo "$UNAME_MACHINE_ARCH"-unknown-libertybsd"$UNAME_RELEASE" + exit ;; + *:MidnightBSD:*:*) + echo "$UNAME_MACHINE"-unknown-midnightbsd"$UNAME_RELEASE" exit ;; *:ekkoBSD:*:*) - echo ${UNAME_MACHINE}-unknown-ekkobsd${UNAME_RELEASE} + echo "$UNAME_MACHINE"-unknown-ekkobsd"$UNAME_RELEASE" exit ;; *:SolidBSD:*:*) - echo ${UNAME_MACHINE}-unknown-solidbsd${UNAME_RELEASE} + echo "$UNAME_MACHINE"-unknown-solidbsd"$UNAME_RELEASE" + exit ;; + *:OS108:*:*) + echo "$UNAME_MACHINE"-unknown-os108_"$UNAME_RELEASE" exit ;; macppc:MirBSD:*:*) - echo powerpc-unknown-mirbsd${UNAME_RELEASE} + echo powerpc-unknown-mirbsd"$UNAME_RELEASE" exit ;; *:MirBSD:*:*) - echo ${UNAME_MACHINE}-unknown-mirbsd${UNAME_RELEASE} + echo "$UNAME_MACHINE"-unknown-mirbsd"$UNAME_RELEASE" + exit ;; + *:Sortix:*:*) + echo "$UNAME_MACHINE"-unknown-sortix + exit ;; + *:Twizzler:*:*) + echo "$UNAME_MACHINE"-unknown-twizzler + exit ;; + *:Redox:*:*) + echo "$UNAME_MACHINE"-unknown-redox + exit ;; + mips:OSF1:*.*) + echo mips-dec-osf1 exit ;; alpha:OSF1:*:*) case $UNAME_RELEASE in *4.0) - UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $3}'` + UNAME_RELEASE=$(/usr/sbin/sizer -v | awk '{print $3}') ;; *5.*) - UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $4}'` + UNAME_RELEASE=$(/usr/sbin/sizer -v | awk '{print $4}') ;; esac # According to Compaq, /usr/sbin/psrinfo has been available on # OSF/1 and Tru64 systems produced since 1995. I hope that # covers most systems running today. This code pipes the CPU # types through head -n 1, so we only detect the type of CPU 0. - ALPHA_CPU_TYPE=`/usr/sbin/psrinfo -v | sed -n -e 's/^ The alpha \(.*\) processor.*$/\1/p' | head -n 1` + ALPHA_CPU_TYPE=$(/usr/sbin/psrinfo -v | sed -n -e 's/^ The alpha \(.*\) processor.*$/\1/p' | head -n 1) case "$ALPHA_CPU_TYPE" in "EV4 (21064)") - UNAME_MACHINE="alpha" ;; + UNAME_MACHINE=alpha ;; "EV4.5 (21064)") - UNAME_MACHINE="alpha" ;; + UNAME_MACHINE=alpha ;; "LCA4 (21066/21068)") - UNAME_MACHINE="alpha" ;; + UNAME_MACHINE=alpha ;; "EV5 (21164)") - UNAME_MACHINE="alphaev5" ;; + UNAME_MACHINE=alphaev5 ;; "EV5.6 (21164A)") - UNAME_MACHINE="alphaev56" ;; + UNAME_MACHINE=alphaev56 ;; "EV5.6 (21164PC)") - UNAME_MACHINE="alphapca56" ;; + UNAME_MACHINE=alphapca56 ;; "EV5.7 (21164PC)") - UNAME_MACHINE="alphapca57" ;; + UNAME_MACHINE=alphapca57 ;; "EV6 (21264)") - UNAME_MACHINE="alphaev6" ;; + UNAME_MACHINE=alphaev6 ;; "EV6.7 (21264A)") - UNAME_MACHINE="alphaev67" ;; + UNAME_MACHINE=alphaev67 ;; "EV6.8CB (21264C)") - UNAME_MACHINE="alphaev68" ;; + UNAME_MACHINE=alphaev68 ;; "EV6.8AL (21264B)") - UNAME_MACHINE="alphaev68" ;; + UNAME_MACHINE=alphaev68 ;; "EV6.8CX (21264D)") - UNAME_MACHINE="alphaev68" ;; + UNAME_MACHINE=alphaev68 ;; "EV6.9A (21264/EV69A)") - UNAME_MACHINE="alphaev69" ;; + UNAME_MACHINE=alphaev69 ;; "EV7 (21364)") - UNAME_MACHINE="alphaev7" ;; + UNAME_MACHINE=alphaev7 ;; "EV7.9 (21364A)") - UNAME_MACHINE="alphaev79" ;; + UNAME_MACHINE=alphaev79 ;; esac # A Pn.n version is a patched version. # A Vn.n version is a released version. # A Tn.n version is a released field test version. # A Xn.n version is an unreleased experimental baselevel. # 1.2 uses "1.2" for uname -r. - echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[PVTX]//' | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'` + echo "$UNAME_MACHINE"-dec-osf"$(echo "$UNAME_RELEASE" | sed -e 's/^[PVTX]//' | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz)" # Reset EXIT trap before exiting to avoid spurious non-zero exit code. exitcode=$? trap '' 0 exit $exitcode ;; - Alpha\ *:Windows_NT*:*) - # How do we know it's Interix rather than the generic POSIX subsystem? - # Should we change UNAME_MACHINE based on the output of uname instead - # of the specific Alpha model? - echo alpha-pc-interix - exit ;; - 21064:Windows_NT:50:3) - echo alpha-dec-winnt3.5 - exit ;; Amiga*:UNIX_System_V:4.0:*) echo m68k-unknown-sysv4 exit ;; *:[Aa]miga[Oo][Ss]:*:*) - echo ${UNAME_MACHINE}-unknown-amigaos + echo "$UNAME_MACHINE"-unknown-amigaos exit ;; *:[Mm]orph[Oo][Ss]:*:*) - echo ${UNAME_MACHINE}-unknown-morphos + echo "$UNAME_MACHINE"-unknown-morphos exit ;; *:OS/390:*:*) echo i370-ibm-openedition @@ -319,7 +370,7 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in echo powerpc-ibm-os400 exit ;; arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*) - echo arm-acorn-riscix${UNAME_RELEASE} + echo arm-acorn-riscix"$UNAME_RELEASE" exit ;; arm*:riscos:*:*|arm*:RISCOS:*:*) echo arm-unknown-riscos @@ -329,7 +380,7 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in exit ;; Pyramid*:OSx*:*:* | MIS*:OSx*:*:* | MIS*:SMP_DC-OSx*:*:*) # akee@wpdis03.wpafb.af.mil (Earle F. Ake) contributed MIS and NILE. - if test "`(/bin/universe) 2>/dev/null`" = att ; then + if test "$( (/bin/universe) 2>/dev/null)" = att ; then echo pyramid-pyramid-sysv3 else echo pyramid-pyramid-bsd @@ -342,69 +393,69 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in echo sparc-icl-nx6 exit ;; DRS?6000:UNIX_SV:4.2*:7* | DRS?6000:isis:4.2*:7*) - case `/usr/bin/uname -p` in + case $(/usr/bin/uname -p) in sparc) echo sparc-icl-nx7; exit ;; esac ;; s390x:SunOS:*:*) - echo ${UNAME_MACHINE}-ibm-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` + echo "$UNAME_MACHINE"-ibm-solaris2"$(echo "$UNAME_RELEASE" | sed -e 's/[^.]*//')" exit ;; sun4H:SunOS:5.*:*) - echo sparc-hal-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` + echo sparc-hal-solaris2"$(echo "$UNAME_RELEASE"|sed -e 's/[^.]*//')" exit ;; sun4*:SunOS:5.*:* | tadpole*:SunOS:5.*:*) - echo sparc-sun-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` + echo sparc-sun-solaris2"$(echo "$UNAME_RELEASE" | sed -e 's/[^.]*//')" exit ;; i86pc:AuroraUX:5.*:* | i86xen:AuroraUX:5.*:*) - echo i386-pc-auroraux${UNAME_RELEASE} + echo i386-pc-auroraux"$UNAME_RELEASE" exit ;; i86pc:SunOS:5.*:* | i86xen:SunOS:5.*:*) - eval $set_cc_for_build - SUN_ARCH="i386" + set_cc_for_build + SUN_ARCH=i386 # If there is a compiler, see if it is configured for 64-bit objects. # Note that the Sun cc does not turn __LP64__ into 1 like gcc does. # This test works for both compilers. - if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then + if test "$CC_FOR_BUILD" != no_compiler_found; then if (echo '#ifdef __amd64'; echo IS_64BIT_ARCH; echo '#endif') | \ - (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \ + (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \ grep IS_64BIT_ARCH >/dev/null then - SUN_ARCH="x86_64" + SUN_ARCH=x86_64 fi fi - echo ${SUN_ARCH}-pc-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` + echo "$SUN_ARCH"-pc-solaris2"$(echo "$UNAME_RELEASE"|sed -e 's/[^.]*//')" exit ;; sun4*:SunOS:6*:*) # According to config.sub, this is the proper way to canonicalize # SunOS6. Hard to guess exactly what SunOS6 will be like, but # it's likely to be more like Solaris than SunOS4. - echo sparc-sun-solaris3`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` + echo sparc-sun-solaris3"$(echo "$UNAME_RELEASE"|sed -e 's/[^.]*//')" exit ;; sun4*:SunOS:*:*) - case "`/usr/bin/arch -k`" in + case "$(/usr/bin/arch -k)" in Series*|S4*) - UNAME_RELEASE=`uname -v` + UNAME_RELEASE=$(uname -v) ;; esac # Japanese Language versions have a version number like `4.1.3-JL'. - echo sparc-sun-sunos`echo ${UNAME_RELEASE}|sed -e 's/-/_/'` + echo sparc-sun-sunos"$(echo "$UNAME_RELEASE"|sed -e 's/-/_/')" exit ;; sun3*:SunOS:*:*) - echo m68k-sun-sunos${UNAME_RELEASE} + echo m68k-sun-sunos"$UNAME_RELEASE" exit ;; sun*:*:4.2BSD:*) - UNAME_RELEASE=`(sed 1q /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null` - test "x${UNAME_RELEASE}" = "x" && UNAME_RELEASE=3 - case "`/bin/arch`" in + UNAME_RELEASE=$( (sed 1q /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null) + test "x$UNAME_RELEASE" = x && UNAME_RELEASE=3 + case "$(/bin/arch)" in sun3) - echo m68k-sun-sunos${UNAME_RELEASE} + echo m68k-sun-sunos"$UNAME_RELEASE" ;; sun4) - echo sparc-sun-sunos${UNAME_RELEASE} + echo sparc-sun-sunos"$UNAME_RELEASE" ;; esac exit ;; aushp:SunOS:*:*) - echo sparc-auspex-sunos${UNAME_RELEASE} + echo sparc-auspex-sunos"$UNAME_RELEASE" exit ;; # The situation for MiNT is a little confusing. The machine name # can be virtually everything (everything which is not @@ -415,44 +466,44 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in # MiNT. But MiNT is downward compatible to TOS, so this should # be no problem. atarist[e]:*MiNT:*:* | atarist[e]:*mint:*:* | atarist[e]:*TOS:*:*) - echo m68k-atari-mint${UNAME_RELEASE} + echo m68k-atari-mint"$UNAME_RELEASE" exit ;; atari*:*MiNT:*:* | atari*:*mint:*:* | atarist[e]:*TOS:*:*) - echo m68k-atari-mint${UNAME_RELEASE} + echo m68k-atari-mint"$UNAME_RELEASE" exit ;; *falcon*:*MiNT:*:* | *falcon*:*mint:*:* | *falcon*:*TOS:*:*) - echo m68k-atari-mint${UNAME_RELEASE} + echo m68k-atari-mint"$UNAME_RELEASE" exit ;; milan*:*MiNT:*:* | milan*:*mint:*:* | *milan*:*TOS:*:*) - echo m68k-milan-mint${UNAME_RELEASE} + echo m68k-milan-mint"$UNAME_RELEASE" exit ;; hades*:*MiNT:*:* | hades*:*mint:*:* | *hades*:*TOS:*:*) - echo m68k-hades-mint${UNAME_RELEASE} + echo m68k-hades-mint"$UNAME_RELEASE" exit ;; *:*MiNT:*:* | *:*mint:*:* | *:*TOS:*:*) - echo m68k-unknown-mint${UNAME_RELEASE} + echo m68k-unknown-mint"$UNAME_RELEASE" exit ;; m68k:machten:*:*) - echo m68k-apple-machten${UNAME_RELEASE} + echo m68k-apple-machten"$UNAME_RELEASE" exit ;; powerpc:machten:*:*) - echo powerpc-apple-machten${UNAME_RELEASE} + echo powerpc-apple-machten"$UNAME_RELEASE" exit ;; RISC*:Mach:*:*) echo mips-dec-mach_bsd4.3 exit ;; RISC*:ULTRIX:*:*) - echo mips-dec-ultrix${UNAME_RELEASE} + echo mips-dec-ultrix"$UNAME_RELEASE" exit ;; VAX*:ULTRIX*:*:*) - echo vax-dec-ultrix${UNAME_RELEASE} + echo vax-dec-ultrix"$UNAME_RELEASE" exit ;; 2020:CLIX:*:* | 2430:CLIX:*:*) - echo clipper-intergraph-clix${UNAME_RELEASE} + echo clipper-intergraph-clix"$UNAME_RELEASE" exit ;; mips:*:*:UMIPS | mips:*:*:RISCos) - eval $set_cc_for_build - sed 's/^ //' << EOF >$dummy.c + set_cc_for_build + sed 's/^ //' << EOF > "$dummy.c" #ifdef __cplusplus #include /* for printf() prototype */ int main (int argc, char *argv[]) { @@ -461,23 +512,23 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in #endif #if defined (host_mips) && defined (MIPSEB) #if defined (SYSTYPE_SYSV) - printf ("mips-mips-riscos%ssysv\n", argv[1]); exit (0); + printf ("mips-mips-riscos%ssysv\\n", argv[1]); exit (0); #endif #if defined (SYSTYPE_SVR4) - printf ("mips-mips-riscos%ssvr4\n", argv[1]); exit (0); + printf ("mips-mips-riscos%ssvr4\\n", argv[1]); exit (0); #endif #if defined (SYSTYPE_BSD43) || defined(SYSTYPE_BSD) - printf ("mips-mips-riscos%sbsd\n", argv[1]); exit (0); + printf ("mips-mips-riscos%sbsd\\n", argv[1]); exit (0); #endif #endif exit (-1); } EOF - $CC_FOR_BUILD -o $dummy $dummy.c && - dummyarg=`echo "${UNAME_RELEASE}" | sed -n 's/\([0-9]*\).*/\1/p'` && - SYSTEM_NAME=`$dummy $dummyarg` && + $CC_FOR_BUILD -o "$dummy" "$dummy.c" && + dummyarg=$(echo "$UNAME_RELEASE" | sed -n 's/\([0-9]*\).*/\1/p') && + SYSTEM_NAME=$("$dummy" "$dummyarg") && { echo "$SYSTEM_NAME"; exit; } - echo mips-mips-riscos${UNAME_RELEASE} + echo mips-mips-riscos"$UNAME_RELEASE" exit ;; Motorola:PowerMAX_OS:*:*) echo powerpc-motorola-powermax @@ -502,18 +553,18 @@ EOF exit ;; AViiON:dgux:*:*) # DG/UX returns AViiON for all architectures - UNAME_PROCESSOR=`/usr/bin/uname -p` - if [ $UNAME_PROCESSOR = mc88100 ] || [ $UNAME_PROCESSOR = mc88110 ] + UNAME_PROCESSOR=$(/usr/bin/uname -p) + if test "$UNAME_PROCESSOR" = mc88100 || test "$UNAME_PROCESSOR" = mc88110 then - if [ ${TARGET_BINARY_INTERFACE}x = m88kdguxelfx ] || \ - [ ${TARGET_BINARY_INTERFACE}x = x ] + if test "$TARGET_BINARY_INTERFACE"x = m88kdguxelfx || \ + test "$TARGET_BINARY_INTERFACE"x = x then - echo m88k-dg-dgux${UNAME_RELEASE} + echo m88k-dg-dgux"$UNAME_RELEASE" else - echo m88k-dg-dguxbcs${UNAME_RELEASE} + echo m88k-dg-dguxbcs"$UNAME_RELEASE" fi else - echo i586-dg-dgux${UNAME_RELEASE} + echo i586-dg-dgux"$UNAME_RELEASE" fi exit ;; M88*:DolphinOS:*:*) # DolphinOS (SVR3) @@ -530,26 +581,26 @@ EOF echo m68k-tektronix-bsd exit ;; *:IRIX*:*:*) - echo mips-sgi-irix`echo ${UNAME_RELEASE}|sed -e 's/-/_/g'` + echo mips-sgi-irix"$(echo "$UNAME_RELEASE"|sed -e 's/-/_/g')" exit ;; ????????:AIX?:[12].1:2) # AIX 2.2.1 or AIX 2.1.1 is RT/PC AIX. echo romp-ibm-aix # uname -m gives an 8 hex-code CPU id - exit ;; # Note that: echo "'`uname -s`'" gives 'AIX ' + exit ;; # Note that: echo "'$(uname -s)'" gives 'AIX ' i*86:AIX:*:*) echo i386-ibm-aix exit ;; ia64:AIX:*:*) - if [ -x /usr/bin/oslevel ] ; then - IBM_REV=`/usr/bin/oslevel` + if test -x /usr/bin/oslevel ; then + IBM_REV=$(/usr/bin/oslevel) else - IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE} + IBM_REV="$UNAME_VERSION.$UNAME_RELEASE" fi - echo ${UNAME_MACHINE}-ibm-aix${IBM_REV} + echo "$UNAME_MACHINE"-ibm-aix"$IBM_REV" exit ;; *:AIX:2:3) if grep bos325 /usr/include/stdio.h >/dev/null 2>&1; then - eval $set_cc_for_build - sed 's/^ //' << EOF >$dummy.c + set_cc_for_build + sed 's/^ //' << EOF > "$dummy.c" #include main() @@ -560,7 +611,7 @@ EOF exit(0); } EOF - if $CC_FOR_BUILD -o $dummy $dummy.c && SYSTEM_NAME=`$dummy` + if $CC_FOR_BUILD -o "$dummy" "$dummy.c" && SYSTEM_NAME=$("$dummy") then echo "$SYSTEM_NAME" else @@ -573,28 +624,28 @@ EOF fi exit ;; *:AIX:*:[4567]) - IBM_CPU_ID=`/usr/sbin/lsdev -C -c processor -S available | sed 1q | awk '{ print $1 }'` - if /usr/sbin/lsattr -El ${IBM_CPU_ID} | grep ' POWER' >/dev/null 2>&1; then + IBM_CPU_ID=$(/usr/sbin/lsdev -C -c processor -S available | sed 1q | awk '{ print $1 }') + if /usr/sbin/lsattr -El "$IBM_CPU_ID" | grep ' POWER' >/dev/null 2>&1; then IBM_ARCH=rs6000 else IBM_ARCH=powerpc fi - if [ -x /usr/bin/lslpp ] ; then - IBM_REV=`/usr/bin/lslpp -Lqc bos.rte.libc | - awk -F: '{ print $3 }' | sed s/[0-9]*$/0/` + if test -x /usr/bin/lslpp ; then + IBM_REV=$(/usr/bin/lslpp -Lqc bos.rte.libc | + awk -F: '{ print $3 }' | sed s/[0-9]*$/0/) else - IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE} + IBM_REV="$UNAME_VERSION.$UNAME_RELEASE" fi - echo ${IBM_ARCH}-ibm-aix${IBM_REV} + echo "$IBM_ARCH"-ibm-aix"$IBM_REV" exit ;; *:AIX:*:*) echo rs6000-ibm-aix exit ;; - ibmrt:4.4BSD:*|romp-ibm:BSD:*) + ibmrt:4.4BSD:*|romp-ibm:4.4BSD:*) echo romp-ibm-bsd4.4 exit ;; ibmrt:*BSD:*|romp-ibm:BSD:*) # covers RT/PC BSD and - echo romp-ibm-bsd${UNAME_RELEASE} # 4.3 with uname added to + echo romp-ibm-bsd"$UNAME_RELEASE" # 4.3 with uname added to exit ;; # report: romp-ibm BSD 4.3 *:BOSX:*:*) echo rs6000-bull-bosx @@ -609,28 +660,28 @@ EOF echo m68k-hp-bsd4.4 exit ;; 9000/[34678]??:HP-UX:*:*) - HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'` - case "${UNAME_MACHINE}" in - 9000/31? ) HP_ARCH=m68000 ;; - 9000/[34]?? ) HP_ARCH=m68k ;; + HPUX_REV=$(echo "$UNAME_RELEASE"|sed -e 's/[^.]*.[0B]*//') + case "$UNAME_MACHINE" in + 9000/31?) HP_ARCH=m68000 ;; + 9000/[34]??) HP_ARCH=m68k ;; 9000/[678][0-9][0-9]) - if [ -x /usr/bin/getconf ]; then - sc_cpu_version=`/usr/bin/getconf SC_CPU_VERSION 2>/dev/null` - sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null` - case "${sc_cpu_version}" in - 523) HP_ARCH="hppa1.0" ;; # CPU_PA_RISC1_0 - 528) HP_ARCH="hppa1.1" ;; # CPU_PA_RISC1_1 + if test -x /usr/bin/getconf; then + sc_cpu_version=$(/usr/bin/getconf SC_CPU_VERSION 2>/dev/null) + sc_kernel_bits=$(/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null) + case "$sc_cpu_version" in + 523) HP_ARCH=hppa1.0 ;; # CPU_PA_RISC1_0 + 528) HP_ARCH=hppa1.1 ;; # CPU_PA_RISC1_1 532) # CPU_PA_RISC2_0 - case "${sc_kernel_bits}" in - 32) HP_ARCH="hppa2.0n" ;; - 64) HP_ARCH="hppa2.0w" ;; - '') HP_ARCH="hppa2.0" ;; # HP-UX 10.20 + case "$sc_kernel_bits" in + 32) HP_ARCH=hppa2.0n ;; + 64) HP_ARCH=hppa2.0w ;; + '') HP_ARCH=hppa2.0 ;; # HP-UX 10.20 esac ;; esac fi - if [ "${HP_ARCH}" = "" ]; then - eval $set_cc_for_build - sed 's/^ //' << EOF >$dummy.c + if test "$HP_ARCH" = ""; then + set_cc_for_build + sed 's/^ //' << EOF > "$dummy.c" #define _HPUX_SOURCE #include @@ -663,13 +714,13 @@ EOF exit (0); } EOF - (CCOPTS= $CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null) && HP_ARCH=`$dummy` + (CCOPTS="" $CC_FOR_BUILD -o "$dummy" "$dummy.c" 2>/dev/null) && HP_ARCH=$("$dummy") test -z "$HP_ARCH" && HP_ARCH=hppa fi ;; esac - if [ ${HP_ARCH} = "hppa2.0w" ] + if test "$HP_ARCH" = hppa2.0w then - eval $set_cc_for_build + set_cc_for_build # hppa2.0w-hp-hpux* has a 64-bit kernel and a compiler generating # 32-bit code. hppa64-hp-hpux* has the same kernel and a compiler @@ -680,23 +731,23 @@ EOF # $ CC_FOR_BUILD="cc +DA2.0w" ./config.guess # => hppa64-hp-hpux11.23 - if echo __LP64__ | (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | + if echo __LP64__ | (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | grep -q __LP64__ then - HP_ARCH="hppa2.0w" + HP_ARCH=hppa2.0w else - HP_ARCH="hppa64" + HP_ARCH=hppa64 fi fi - echo ${HP_ARCH}-hp-hpux${HPUX_REV} + echo "$HP_ARCH"-hp-hpux"$HPUX_REV" exit ;; ia64:HP-UX:*:*) - HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'` - echo ia64-hp-hpux${HPUX_REV} + HPUX_REV=$(echo "$UNAME_RELEASE"|sed -e 's/[^.]*.[0B]*//') + echo ia64-hp-hpux"$HPUX_REV" exit ;; 3050*:HI-UX:*:*) - eval $set_cc_for_build - sed 's/^ //' << EOF >$dummy.c + set_cc_for_build + sed 's/^ //' << EOF > "$dummy.c" #include int main () @@ -721,11 +772,11 @@ EOF exit (0); } EOF - $CC_FOR_BUILD -o $dummy $dummy.c && SYSTEM_NAME=`$dummy` && + $CC_FOR_BUILD -o "$dummy" "$dummy.c" && SYSTEM_NAME=$("$dummy") && { echo "$SYSTEM_NAME"; exit; } echo unknown-hitachi-hiuxwe2 exit ;; - 9000/7??:4.3bsd:*:* | 9000/8?[79]:4.3bsd:*:* ) + 9000/7??:4.3bsd:*:* | 9000/8?[79]:4.3bsd:*:*) echo hppa1.1-hp-bsd exit ;; 9000/8??:4.3bsd:*:*) @@ -734,17 +785,17 @@ EOF *9??*:MPE/iX:*:* | *3000*:MPE/iX:*:*) echo hppa1.0-hp-mpeix exit ;; - hp7??:OSF1:*:* | hp8?[79]:OSF1:*:* ) + hp7??:OSF1:*:* | hp8?[79]:OSF1:*:*) echo hppa1.1-hp-osf exit ;; hp8??:OSF1:*:*) echo hppa1.0-hp-osf exit ;; i*86:OSF1:*:*) - if [ -x /usr/sbin/sysversion ] ; then - echo ${UNAME_MACHINE}-unknown-osf1mk + if test -x /usr/sbin/sysversion ; then + echo "$UNAME_MACHINE"-unknown-osf1mk else - echo ${UNAME_MACHINE}-unknown-osf1 + echo "$UNAME_MACHINE"-unknown-osf1 fi exit ;; parisc*:Lites*:*:*) @@ -769,130 +820,123 @@ EOF echo c4-convex-bsd exit ;; CRAY*Y-MP:*:*:*) - echo ymp-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' + echo ymp-cray-unicos"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/' exit ;; CRAY*[A-Z]90:*:*:*) - echo ${UNAME_MACHINE}-cray-unicos${UNAME_RELEASE} \ + echo "$UNAME_MACHINE"-cray-unicos"$UNAME_RELEASE" \ | sed -e 's/CRAY.*\([A-Z]90\)/\1/' \ -e y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/ \ -e 's/\.[^.]*$/.X/' exit ;; CRAY*TS:*:*:*) - echo t90-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' + echo t90-cray-unicos"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/' exit ;; CRAY*T3E:*:*:*) - echo alphaev5-cray-unicosmk${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' + echo alphaev5-cray-unicosmk"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/' exit ;; CRAY*SV1:*:*:*) - echo sv1-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' + echo sv1-cray-unicos"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/' exit ;; *:UNICOS/mp:*:*) - echo craynv-cray-unicosmp${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' + echo craynv-cray-unicosmp"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/' exit ;; F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*) - FUJITSU_PROC=`uname -m | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'` - FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'` - FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'` + FUJITSU_PROC=$(uname -m | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz) + FUJITSU_SYS=$(uname -p | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/\///') + FUJITSU_REL=$(echo "$UNAME_RELEASE" | sed -e 's/ /_/') echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" exit ;; 5000:UNIX_System_V:4.*:*) - FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'` - FUJITSU_REL=`echo ${UNAME_RELEASE} | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/ /_/'` + FUJITSU_SYS=$(uname -p | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/\///') + FUJITSU_REL=$(echo "$UNAME_RELEASE" | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/ /_/') echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" exit ;; i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*) - echo ${UNAME_MACHINE}-pc-bsdi${UNAME_RELEASE} + echo "$UNAME_MACHINE"-pc-bsdi"$UNAME_RELEASE" exit ;; sparc*:BSD/OS:*:*) - echo sparc-unknown-bsdi${UNAME_RELEASE} + echo sparc-unknown-bsdi"$UNAME_RELEASE" exit ;; *:BSD/OS:*:*) - echo ${UNAME_MACHINE}-unknown-bsdi${UNAME_RELEASE} + echo "$UNAME_MACHINE"-unknown-bsdi"$UNAME_RELEASE" + exit ;; + arm:FreeBSD:*:*) + UNAME_PROCESSOR=$(uname -p) + set_cc_for_build + if echo __ARM_PCS_VFP | $CC_FOR_BUILD -E - 2>/dev/null \ + | grep -q __ARM_PCS_VFP + then + echo "${UNAME_PROCESSOR}"-unknown-freebsd"$(echo ${UNAME_RELEASE}|sed -e 's/[-(].*//')"-gnueabi + else + echo "${UNAME_PROCESSOR}"-unknown-freebsd"$(echo ${UNAME_RELEASE}|sed -e 's/[-(].*//')"-gnueabihf + fi exit ;; *:FreeBSD:*:*) - UNAME_PROCESSOR=`/usr/bin/uname -p` - case ${UNAME_PROCESSOR} in + UNAME_PROCESSOR=$(/usr/bin/uname -p) + case "$UNAME_PROCESSOR" in amd64) - echo x86_64-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;; - *) - echo ${UNAME_PROCESSOR}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;; + UNAME_PROCESSOR=x86_64 ;; + i386) + UNAME_PROCESSOR=i586 ;; esac + echo "$UNAME_PROCESSOR"-unknown-freebsd"$(echo "$UNAME_RELEASE"|sed -e 's/[-(].*//')" exit ;; i*:CYGWIN*:*) - echo ${UNAME_MACHINE}-pc-cygwin + echo "$UNAME_MACHINE"-pc-cygwin exit ;; *:MINGW64*:*) - echo ${UNAME_MACHINE}-pc-mingw64 + echo "$UNAME_MACHINE"-pc-mingw64 exit ;; *:MINGW*:*) - echo ${UNAME_MACHINE}-pc-mingw32 + echo "$UNAME_MACHINE"-pc-mingw32 exit ;; *:MSYS*:*) - echo ${UNAME_MACHINE}-pc-msys - exit ;; - i*:windows32*:*) - # uname -m includes "-pc" on this system. - echo ${UNAME_MACHINE}-mingw32 + echo "$UNAME_MACHINE"-pc-msys exit ;; i*:PW*:*) - echo ${UNAME_MACHINE}-pc-pw32 + echo "$UNAME_MACHINE"-pc-pw32 exit ;; *:Interix*:*) - case ${UNAME_MACHINE} in + case "$UNAME_MACHINE" in x86) - echo i586-pc-interix${UNAME_RELEASE} + echo i586-pc-interix"$UNAME_RELEASE" exit ;; authenticamd | genuineintel | EM64T) - echo x86_64-unknown-interix${UNAME_RELEASE} + echo x86_64-unknown-interix"$UNAME_RELEASE" exit ;; IA64) - echo ia64-unknown-interix${UNAME_RELEASE} + echo ia64-unknown-interix"$UNAME_RELEASE" exit ;; esac ;; - [345]86:Windows_95:* | [345]86:Windows_98:* | [345]86:Windows_NT:*) - echo i${UNAME_MACHINE}-pc-mks - exit ;; - 8664:Windows_NT:*) - echo x86_64-pc-mks - exit ;; - i*:Windows_NT*:* | Pentium*:Windows_NT*:*) - # How do we know it's Interix rather than the generic POSIX subsystem? - # It also conflicts with pre-2.0 versions of AT&T UWIN. Should we - # UNAME_MACHINE based on the output of uname instead of i386? - echo i586-pc-interix - exit ;; i*:UWIN*:*) - echo ${UNAME_MACHINE}-pc-uwin + echo "$UNAME_MACHINE"-pc-uwin exit ;; amd64:CYGWIN*:*:* | x86_64:CYGWIN*:*:*) - echo x86_64-unknown-cygwin - exit ;; - p*:CYGWIN*:*) - echo powerpcle-unknown-cygwin + echo x86_64-pc-cygwin exit ;; prep*:SunOS:5.*:*) - echo powerpcle-unknown-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` + echo powerpcle-unknown-solaris2"$(echo "$UNAME_RELEASE"|sed -e 's/[^.]*//')" exit ;; *:GNU:*:*) # the GNU system - echo `echo ${UNAME_MACHINE}|sed -e 's,[-/].*$,,'`-unknown-${LIBC}`echo ${UNAME_RELEASE}|sed -e 's,/.*$,,'` + echo "$(echo "$UNAME_MACHINE"|sed -e 's,[-/].*$,,')-unknown-$LIBC$(echo "$UNAME_RELEASE"|sed -e 's,/.*$,,')" exit ;; *:GNU/*:*:*) # other systems with GNU libc and userland - echo ${UNAME_MACHINE}-unknown-`echo ${UNAME_SYSTEM} | sed 's,^[^/]*/,,' | tr '[A-Z]' '[a-z]'``echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`-${LIBC} + echo "$UNAME_MACHINE-unknown-$(echo "$UNAME_SYSTEM" | sed 's,^[^/]*/,,' | tr "[:upper:]" "[:lower:]")$(echo "$UNAME_RELEASE"|sed -e 's/[-(].*//')-$LIBC" exit ;; - i*86:Minix:*:*) - echo ${UNAME_MACHINE}-pc-minix + *:Minix:*:*) + echo "$UNAME_MACHINE"-unknown-minix exit ;; aarch64:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" exit ;; aarch64_be:Linux:*:*) UNAME_MACHINE=aarch64_be - echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" exit ;; alpha:Linux:*:*) - case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in + case $(sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' /proc/cpuinfo 2>/dev/null) in EV5) UNAME_MACHINE=alphaev5 ;; EV56) UNAME_MACHINE=alphaev56 ;; PCA56) UNAME_MACHINE=alphapca56 ;; @@ -902,132 +946,182 @@ EOF EV68*) UNAME_MACHINE=alphaev68 ;; esac objdump --private-headers /bin/sh | grep -q ld.so.1 - if test "$?" = 0 ; then LIBC="gnulibc1" ; fi - echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + if test "$?" = 0 ; then LIBC=gnulibc1 ; fi + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" exit ;; arc:Linux:*:* | arceb:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" exit ;; arm*:Linux:*:*) - eval $set_cc_for_build + set_cc_for_build if echo __ARM_EABI__ | $CC_FOR_BUILD -E - 2>/dev/null \ | grep -q __ARM_EABI__ then - echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" else if echo __ARM_PCS_VFP | $CC_FOR_BUILD -E - 2>/dev/null \ | grep -q __ARM_PCS_VFP then - echo ${UNAME_MACHINE}-unknown-linux-${LIBC}eabi + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"eabi else - echo ${UNAME_MACHINE}-unknown-linux-${LIBC}eabihf + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"eabihf fi fi exit ;; avr32*:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" exit ;; cris:Linux:*:*) - echo ${UNAME_MACHINE}-axis-linux-${LIBC} + echo "$UNAME_MACHINE"-axis-linux-"$LIBC" exit ;; crisv32:Linux:*:*) - echo ${UNAME_MACHINE}-axis-linux-${LIBC} + echo "$UNAME_MACHINE"-axis-linux-"$LIBC" + exit ;; + e2k:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" exit ;; frv:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" exit ;; hexagon:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" exit ;; i*86:Linux:*:*) - echo ${UNAME_MACHINE}-pc-linux-${LIBC} + echo "$UNAME_MACHINE"-pc-linux-"$LIBC" exit ;; ia64:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" exit ;; k1om:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-${LIBC} - exit ;; + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + loongarch32:Linux:*:* | loongarch64:Linux:*:* | loongarchx32:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; m32r*:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" exit ;; m68*:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" exit ;; mips:Linux:*:* | mips64:Linux:*:*) - eval $set_cc_for_build - sed 's/^ //' << EOF >$dummy.c + set_cc_for_build + IS_GLIBC=0 + test x"${LIBC}" = xgnu && IS_GLIBC=1 + sed 's/^ //' << EOF > "$dummy.c" #undef CPU - #undef ${UNAME_MACHINE} - #undef ${UNAME_MACHINE}el + #undef mips + #undef mipsel + #undef mips64 + #undef mips64el + #if ${IS_GLIBC} && defined(_ABI64) + LIBCABI=gnuabi64 + #else + #if ${IS_GLIBC} && defined(_ABIN32) + LIBCABI=gnuabin32 + #else + LIBCABI=${LIBC} + #endif + #endif + + #if ${IS_GLIBC} && defined(__mips64) && defined(__mips_isa_rev) && __mips_isa_rev>=6 + CPU=mipsisa64r6 + #else + #if ${IS_GLIBC} && !defined(__mips64) && defined(__mips_isa_rev) && __mips_isa_rev>=6 + CPU=mipsisa32r6 + #else + #if defined(__mips64) + CPU=mips64 + #else + CPU=mips + #endif + #endif + #endif + #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL) - CPU=${UNAME_MACHINE}el + MIPS_ENDIAN=el #else #if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB) - CPU=${UNAME_MACHINE} + MIPS_ENDIAN= #else - CPU= + MIPS_ENDIAN= #endif #endif EOF - eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^CPU'` - test x"${CPU}" != x && { echo "${CPU}-unknown-linux-${LIBC}"; exit; } + eval "$($CC_FOR_BUILD -E "$dummy.c" 2>/dev/null | grep '^CPU\|^MIPS_ENDIAN\|^LIBCABI')" + test "x$CPU" != x && { echo "$CPU${MIPS_ENDIAN}-unknown-linux-$LIBCABI"; exit; } ;; + mips64el:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; openrisc*:Linux:*:*) - echo or1k-unknown-linux-${LIBC} + echo or1k-unknown-linux-"$LIBC" exit ;; or32:Linux:*:* | or1k*:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" exit ;; padre:Linux:*:*) - echo sparc-unknown-linux-${LIBC} + echo sparc-unknown-linux-"$LIBC" exit ;; parisc64:Linux:*:* | hppa64:Linux:*:*) - echo hppa64-unknown-linux-${LIBC} + echo hppa64-unknown-linux-"$LIBC" exit ;; parisc:Linux:*:* | hppa:Linux:*:*) # Look for CPU level - case `grep '^cpu[^a-z]*:' /proc/cpuinfo 2>/dev/null | cut -d' ' -f2` in - PA7*) echo hppa1.1-unknown-linux-${LIBC} ;; - PA8*) echo hppa2.0-unknown-linux-${LIBC} ;; - *) echo hppa-unknown-linux-${LIBC} ;; + case $(grep '^cpu[^a-z]*:' /proc/cpuinfo 2>/dev/null | cut -d' ' -f2) in + PA7*) echo hppa1.1-unknown-linux-"$LIBC" ;; + PA8*) echo hppa2.0-unknown-linux-"$LIBC" ;; + *) echo hppa-unknown-linux-"$LIBC" ;; esac exit ;; ppc64:Linux:*:*) - echo powerpc64-unknown-linux-${LIBC} + echo powerpc64-unknown-linux-"$LIBC" exit ;; ppc:Linux:*:*) - echo powerpc-unknown-linux-${LIBC} + echo powerpc-unknown-linux-"$LIBC" exit ;; ppc64le:Linux:*:*) - echo powerpc64le-unknown-linux-${LIBC} + echo powerpc64le-unknown-linux-"$LIBC" exit ;; ppcle:Linux:*:*) - echo powerpcle-unknown-linux-${LIBC} + echo powerpcle-unknown-linux-"$LIBC" + exit ;; + riscv32:Linux:*:* | riscv32be:Linux:*:* | riscv64:Linux:*:* | riscv64be:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" exit ;; s390:Linux:*:* | s390x:Linux:*:*) - echo ${UNAME_MACHINE}-ibm-linux-${LIBC} + echo "$UNAME_MACHINE"-ibm-linux-"$LIBC" exit ;; sh64*:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" exit ;; sh*:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" exit ;; sparc:Linux:*:* | sparc64:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" exit ;; tile*:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" exit ;; vax:Linux:*:*) - echo ${UNAME_MACHINE}-dec-linux-${LIBC} + echo "$UNAME_MACHINE"-dec-linux-"$LIBC" exit ;; x86_64:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + set_cc_for_build + LIBCABI=$LIBC + if test "$CC_FOR_BUILD" != no_compiler_found; then + if (echo '#ifdef __ILP32__'; echo IS_X32; echo '#endif') | \ + (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \ + grep IS_X32 >/dev/null + then + LIBCABI="$LIBC"x32 + fi + fi + echo "$UNAME_MACHINE"-pc-linux-"$LIBCABI" exit ;; xtensa*:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" exit ;; i*86:DYNIX/ptx:4*:*) # ptx 4.0 does uname -s correctly, with DYNIX/ptx in there. @@ -1041,51 +1135,51 @@ EOF # I am not positive that other SVR4 systems won't match this, # I just have to hope. -- rms. # Use sysv4.2uw... so that sysv4* matches it. - echo ${UNAME_MACHINE}-pc-sysv4.2uw${UNAME_VERSION} + echo "$UNAME_MACHINE"-pc-sysv4.2uw"$UNAME_VERSION" exit ;; i*86:OS/2:*:*) # If we were able to find `uname', then EMX Unix compatibility # is probably installed. - echo ${UNAME_MACHINE}-pc-os2-emx + echo "$UNAME_MACHINE"-pc-os2-emx exit ;; i*86:XTS-300:*:STOP) - echo ${UNAME_MACHINE}-unknown-stop + echo "$UNAME_MACHINE"-unknown-stop exit ;; i*86:atheos:*:*) - echo ${UNAME_MACHINE}-unknown-atheos + echo "$UNAME_MACHINE"-unknown-atheos exit ;; i*86:syllable:*:*) - echo ${UNAME_MACHINE}-pc-syllable + echo "$UNAME_MACHINE"-pc-syllable exit ;; i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.[02]*:*) - echo i386-unknown-lynxos${UNAME_RELEASE} + echo i386-unknown-lynxos"$UNAME_RELEASE" exit ;; i*86:*DOS:*:*) - echo ${UNAME_MACHINE}-pc-msdosdjgpp + echo "$UNAME_MACHINE"-pc-msdosdjgpp exit ;; - i*86:*:4.*:* | i*86:SYSTEM_V:4.*:*) - UNAME_REL=`echo ${UNAME_RELEASE} | sed 's/\/MP$//'` + i*86:*:4.*:*) + UNAME_REL=$(echo "$UNAME_RELEASE" | sed 's/\/MP$//') if grep Novell /usr/include/link.h >/dev/null 2>/dev/null; then - echo ${UNAME_MACHINE}-univel-sysv${UNAME_REL} + echo "$UNAME_MACHINE"-univel-sysv"$UNAME_REL" else - echo ${UNAME_MACHINE}-pc-sysv${UNAME_REL} + echo "$UNAME_MACHINE"-pc-sysv"$UNAME_REL" fi exit ;; i*86:*:5:[678]*) # UnixWare 7.x, OpenUNIX and OpenServer 6. - case `/bin/uname -X | grep "^Machine"` in + case $(/bin/uname -X | grep "^Machine") in *486*) UNAME_MACHINE=i486 ;; *Pentium) UNAME_MACHINE=i586 ;; *Pent*|*Celeron) UNAME_MACHINE=i686 ;; esac - echo ${UNAME_MACHINE}-unknown-sysv${UNAME_RELEASE}${UNAME_SYSTEM}${UNAME_VERSION} + echo "$UNAME_MACHINE-unknown-sysv${UNAME_RELEASE}${UNAME_SYSTEM}${UNAME_VERSION}" exit ;; i*86:*:3.2:*) if test -f /usr/options/cb.name; then - UNAME_REL=`sed -n 's/.*Version //p' /dev/null >/dev/null ; then - UNAME_REL=`(/bin/uname -X|grep Release|sed -e 's/.*= //')` + UNAME_REL=$( (/bin/uname -X|grep Release|sed -e 's/.*= //')) (/bin/uname -X|grep i80486 >/dev/null) && UNAME_MACHINE=i486 (/bin/uname -X|grep '^Machine.*Pentium' >/dev/null) \ && UNAME_MACHINE=i586 @@ -1093,9 +1187,9 @@ EOF && UNAME_MACHINE=i686 (/bin/uname -X|grep '^Machine.*Pentium Pro' >/dev/null) \ && UNAME_MACHINE=i686 - echo ${UNAME_MACHINE}-pc-sco$UNAME_REL + echo "$UNAME_MACHINE"-pc-sco"$UNAME_REL" else - echo ${UNAME_MACHINE}-pc-sysv32 + echo "$UNAME_MACHINE"-pc-sysv32 fi exit ;; pc:*:*:*) @@ -1103,7 +1197,7 @@ EOF # uname -m prints for DJGPP always 'pc', but it prints nothing about # the processor, so we play safe by assuming i586. # Note: whatever this is, it MUST be the same as what config.sub - # prints for the "djgpp" host, or else GDB configury will decide that + # prints for the "djgpp" host, or else GDB configure will decide that # this is a cross-build. echo i586-pc-msdosdjgpp exit ;; @@ -1115,9 +1209,9 @@ EOF exit ;; i860:*:4.*:*) # i860-SVR4 if grep Stardent /usr/include/sys/uadmin.h >/dev/null 2>&1 ; then - echo i860-stardent-sysv${UNAME_RELEASE} # Stardent Vistra i860-SVR4 + echo i860-stardent-sysv"$UNAME_RELEASE" # Stardent Vistra i860-SVR4 else # Add other i860-SVR4 vendors below as they are discovered. - echo i860-unknown-sysv${UNAME_RELEASE} # Unknown i860-SVR4 + echo i860-unknown-sysv"$UNAME_RELEASE" # Unknown i860-SVR4 fi exit ;; mini*:CTIX:SYS*5:*) @@ -1135,41 +1229,41 @@ EOF 3[345]??:*:4.0:3.0 | 3[34]??A:*:4.0:3.0 | 3[34]??,*:*:4.0:3.0 | 3[34]??/*:*:4.0:3.0 | 4400:*:4.0:3.0 | 4850:*:4.0:3.0 | SKA40:*:4.0:3.0 | SDS2:*:4.0:3.0 | SHG2:*:4.0:3.0 | S7501*:*:4.0:3.0) OS_REL='' test -r /etc/.relid \ - && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid` + && OS_REL=.$(sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid) /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ - && { echo i486-ncr-sysv4.3${OS_REL}; exit; } + && { echo i486-ncr-sysv4.3"$OS_REL"; exit; } /bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \ - && { echo i586-ncr-sysv4.3${OS_REL}; exit; } ;; + && { echo i586-ncr-sysv4.3"$OS_REL"; exit; } ;; 3[34]??:*:4.0:* | 3[34]??,*:*:4.0:*) /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ && { echo i486-ncr-sysv4; exit; } ;; NCR*:*:4.2:* | MPRAS*:*:4.2:*) OS_REL='.3' test -r /etc/.relid \ - && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid` + && OS_REL=.$(sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid) /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ - && { echo i486-ncr-sysv4.3${OS_REL}; exit; } + && { echo i486-ncr-sysv4.3"$OS_REL"; exit; } /bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \ - && { echo i586-ncr-sysv4.3${OS_REL}; exit; } + && { echo i586-ncr-sysv4.3"$OS_REL"; exit; } /bin/uname -p 2>/dev/null | /bin/grep pteron >/dev/null \ - && { echo i586-ncr-sysv4.3${OS_REL}; exit; } ;; + && { echo i586-ncr-sysv4.3"$OS_REL"; exit; } ;; m68*:LynxOS:2.*:* | m68*:LynxOS:3.0*:*) - echo m68k-unknown-lynxos${UNAME_RELEASE} + echo m68k-unknown-lynxos"$UNAME_RELEASE" exit ;; mc68030:UNIX_System_V:4.*:*) echo m68k-atari-sysv4 exit ;; TSUNAMI:LynxOS:2.*:*) - echo sparc-unknown-lynxos${UNAME_RELEASE} + echo sparc-unknown-lynxos"$UNAME_RELEASE" exit ;; rs6000:LynxOS:2.*:*) - echo rs6000-unknown-lynxos${UNAME_RELEASE} + echo rs6000-unknown-lynxos"$UNAME_RELEASE" exit ;; PowerPC:LynxOS:2.*:* | PowerPC:LynxOS:3.[01]*:* | PowerPC:LynxOS:4.[02]*:*) - echo powerpc-unknown-lynxos${UNAME_RELEASE} + echo powerpc-unknown-lynxos"$UNAME_RELEASE" exit ;; SM[BE]S:UNIX_SV:*:*) - echo mips-dde-sysv${UNAME_RELEASE} + echo mips-dde-sysv"$UNAME_RELEASE" exit ;; RM*:ReliantUNIX-*:*:*) echo mips-sni-sysv4 @@ -1179,8 +1273,8 @@ EOF exit ;; *:SINIX-*:*:*) if uname -p 2>/dev/null >/dev/null ; then - UNAME_MACHINE=`(uname -p) 2>/dev/null` - echo ${UNAME_MACHINE}-sni-sysv4 + UNAME_MACHINE=$( (uname -p) 2>/dev/null) + echo "$UNAME_MACHINE"-sni-sysv4 else echo ns32k-sni-sysv fi @@ -1200,23 +1294,23 @@ EOF exit ;; i*86:VOS:*:*) # From Paul.Green@stratus.com. - echo ${UNAME_MACHINE}-stratus-vos + echo "$UNAME_MACHINE"-stratus-vos exit ;; *:VOS:*:*) # From Paul.Green@stratus.com. echo hppa1.1-stratus-vos exit ;; mc68*:A/UX:*:*) - echo m68k-apple-aux${UNAME_RELEASE} + echo m68k-apple-aux"$UNAME_RELEASE" exit ;; news*:NEWS-OS:6*:*) echo mips-sony-newsos6 exit ;; R[34]000:*System_V*:*:* | R4000:UNIX_SYSV:*:* | R*000:UNIX_SV:*:*) - if [ -d /usr/nec ]; then - echo mips-nec-sysv${UNAME_RELEASE} + if test -d /usr/nec; then + echo mips-nec-sysv"$UNAME_RELEASE" else - echo mips-unknown-sysv${UNAME_RELEASE} + echo mips-unknown-sysv"$UNAME_RELEASE" fi exit ;; BeBox:BeOS:*:*) # BeOS running on hardware made by Be, PPC only. @@ -1235,77 +1329,97 @@ EOF echo x86_64-unknown-haiku exit ;; SX-4:SUPER-UX:*:*) - echo sx4-nec-superux${UNAME_RELEASE} + echo sx4-nec-superux"$UNAME_RELEASE" exit ;; SX-5:SUPER-UX:*:*) - echo sx5-nec-superux${UNAME_RELEASE} + echo sx5-nec-superux"$UNAME_RELEASE" exit ;; SX-6:SUPER-UX:*:*) - echo sx6-nec-superux${UNAME_RELEASE} + echo sx6-nec-superux"$UNAME_RELEASE" exit ;; SX-7:SUPER-UX:*:*) - echo sx7-nec-superux${UNAME_RELEASE} + echo sx7-nec-superux"$UNAME_RELEASE" exit ;; SX-8:SUPER-UX:*:*) - echo sx8-nec-superux${UNAME_RELEASE} + echo sx8-nec-superux"$UNAME_RELEASE" exit ;; SX-8R:SUPER-UX:*:*) - echo sx8r-nec-superux${UNAME_RELEASE} + echo sx8r-nec-superux"$UNAME_RELEASE" + exit ;; + SX-ACE:SUPER-UX:*:*) + echo sxace-nec-superux"$UNAME_RELEASE" exit ;; Power*:Rhapsody:*:*) - echo powerpc-apple-rhapsody${UNAME_RELEASE} + echo powerpc-apple-rhapsody"$UNAME_RELEASE" exit ;; *:Rhapsody:*:*) - echo ${UNAME_MACHINE}-apple-rhapsody${UNAME_RELEASE} + echo "$UNAME_MACHINE"-apple-rhapsody"$UNAME_RELEASE" + exit ;; + arm64:Darwin:*:*) + echo aarch64-apple-darwin"$UNAME_RELEASE" exit ;; *:Darwin:*:*) - UNAME_PROCESSOR=`uname -p` || UNAME_PROCESSOR=unknown - eval $set_cc_for_build - if test "$UNAME_PROCESSOR" = unknown ; then - UNAME_PROCESSOR=powerpc + UNAME_PROCESSOR=$(uname -p) + case $UNAME_PROCESSOR in + unknown) UNAME_PROCESSOR=powerpc ;; + esac + if command -v xcode-select > /dev/null 2> /dev/null && \ + ! xcode-select --print-path > /dev/null 2> /dev/null ; then + # Avoid executing cc if there is no toolchain installed as + # cc will be a stub that puts up a graphical alert + # prompting the user to install developer tools. + CC_FOR_BUILD=no_compiler_found + else + set_cc_for_build fi - if test `echo "$UNAME_RELEASE" | sed -e 's/\..*//'` -le 10 ; then - if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then - if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \ - (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \ - grep IS_64BIT_ARCH >/dev/null - then - case $UNAME_PROCESSOR in - i386) UNAME_PROCESSOR=x86_64 ;; - powerpc) UNAME_PROCESSOR=powerpc64 ;; - esac - fi + if test "$CC_FOR_BUILD" != no_compiler_found; then + if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \ + (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \ + grep IS_64BIT_ARCH >/dev/null + then + case $UNAME_PROCESSOR in + i386) UNAME_PROCESSOR=x86_64 ;; + powerpc) UNAME_PROCESSOR=powerpc64 ;; + esac + fi + # On 10.4-10.6 one might compile for PowerPC via gcc -arch ppc + if (echo '#ifdef __POWERPC__'; echo IS_PPC; echo '#endif') | \ + (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \ + grep IS_PPC >/dev/null + then + UNAME_PROCESSOR=powerpc fi elif test "$UNAME_PROCESSOR" = i386 ; then - # Avoid executing cc on OS X 10.9, as it ships with a stub - # that puts up a graphical alert prompting to install - # developer tools. Any system running Mac OS X 10.7 or - # later (Darwin 11 and later) is required to have a 64-bit - # processor. This is not true of the ARM version of Darwin - # that Apple uses in portable devices. - UNAME_PROCESSOR=x86_64 + # uname -m returns i386 or x86_64 + UNAME_PROCESSOR=$UNAME_MACHINE fi - echo ${UNAME_PROCESSOR}-apple-darwin${UNAME_RELEASE} + echo "$UNAME_PROCESSOR"-apple-darwin"$UNAME_RELEASE" exit ;; *:procnto*:*:* | *:QNX:[0123456789]*:*) - UNAME_PROCESSOR=`uname -p` - if test "$UNAME_PROCESSOR" = "x86"; then + UNAME_PROCESSOR=$(uname -p) + if test "$UNAME_PROCESSOR" = x86; then UNAME_PROCESSOR=i386 UNAME_MACHINE=pc fi - echo ${UNAME_PROCESSOR}-${UNAME_MACHINE}-nto-qnx${UNAME_RELEASE} + echo "$UNAME_PROCESSOR"-"$UNAME_MACHINE"-nto-qnx"$UNAME_RELEASE" exit ;; *:QNX:*:4*) echo i386-pc-qnx exit ;; - NEO-?:NONSTOP_KERNEL:*:*) - echo neo-tandem-nsk${UNAME_RELEASE} + NEO-*:NONSTOP_KERNEL:*:*) + echo neo-tandem-nsk"$UNAME_RELEASE" exit ;; NSE-*:NONSTOP_KERNEL:*:*) - echo nse-tandem-nsk${UNAME_RELEASE} + echo nse-tandem-nsk"$UNAME_RELEASE" + exit ;; + NSR-*:NONSTOP_KERNEL:*:*) + echo nsr-tandem-nsk"$UNAME_RELEASE" + exit ;; + NSV-*:NONSTOP_KERNEL:*:*) + echo nsv-tandem-nsk"$UNAME_RELEASE" exit ;; - NSR-?:NONSTOP_KERNEL:*:*) - echo nsr-tandem-nsk${UNAME_RELEASE} + NSX-*:NONSTOP_KERNEL:*:*) + echo nsx-tandem-nsk"$UNAME_RELEASE" exit ;; *:NonStop-UX:*:*) echo mips-compaq-nonstopux @@ -1314,18 +1428,19 @@ EOF echo bs2000-siemens-sysv exit ;; DS/*:UNIX_System_V:*:*) - echo ${UNAME_MACHINE}-${UNAME_SYSTEM}-${UNAME_RELEASE} + echo "$UNAME_MACHINE"-"$UNAME_SYSTEM"-"$UNAME_RELEASE" exit ;; *:Plan9:*:*) # "uname -m" is not consistent, so use $cputype instead. 386 # is converted to i386 for consistency with other x86 # operating systems. - if test "$cputype" = "386"; then + # shellcheck disable=SC2154 + if test "$cputype" = 386; then UNAME_MACHINE=i386 else UNAME_MACHINE="$cputype" fi - echo ${UNAME_MACHINE}-unknown-plan9 + echo "$UNAME_MACHINE"-unknown-plan9 exit ;; *:TOPS-10:*:*) echo pdp10-unknown-tops10 @@ -1346,14 +1461,14 @@ EOF echo pdp10-unknown-its exit ;; SEI:*:*:SEIUX) - echo mips-sei-seiux${UNAME_RELEASE} + echo mips-sei-seiux"$UNAME_RELEASE" exit ;; *:DragonFly:*:*) - echo ${UNAME_MACHINE}-unknown-dragonfly`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` + echo "$UNAME_MACHINE"-unknown-dragonfly"$(echo "$UNAME_RELEASE"|sed -e 's/[-(].*//')" exit ;; *:*VMS:*:*) - UNAME_MACHINE=`(uname -p) 2>/dev/null` - case "${UNAME_MACHINE}" in + UNAME_MACHINE=$( (uname -p) 2>/dev/null) + case "$UNAME_MACHINE" in A*) echo alpha-dec-vms ; exit ;; I*) echo ia64-dec-vms ; exit ;; V*) echo vax-dec-vms ; exit ;; @@ -1362,62 +1477,223 @@ EOF echo i386-pc-xenix exit ;; i*86:skyos:*:*) - echo ${UNAME_MACHINE}-pc-skyos`echo ${UNAME_RELEASE}` | sed -e 's/ .*$//' + echo "$UNAME_MACHINE"-pc-skyos"$(echo "$UNAME_RELEASE" | sed -e 's/ .*$//')" exit ;; i*86:rdos:*:*) - echo ${UNAME_MACHINE}-pc-rdos + echo "$UNAME_MACHINE"-pc-rdos exit ;; - i*86:AROS:*:*) - echo ${UNAME_MACHINE}-pc-aros + *:AROS:*:*) + echo "$UNAME_MACHINE"-unknown-aros exit ;; x86_64:VMkernel:*:*) - echo ${UNAME_MACHINE}-unknown-esx + echo "$UNAME_MACHINE"-unknown-esx exit ;; + amd64:Isilon\ OneFS:*:*) + echo x86_64-unknown-onefs + exit ;; + *:Unleashed:*:*) + echo "$UNAME_MACHINE"-unknown-unleashed"$UNAME_RELEASE" + exit ;; +esac + +# No uname command or uname output not recognized. +set_cc_for_build +cat > "$dummy.c" < +#include +#endif +#if defined(ultrix) || defined(_ultrix) || defined(__ultrix) || defined(__ultrix__) +#if defined (vax) || defined (__vax) || defined (__vax__) || defined(mips) || defined(__mips) || defined(__mips__) || defined(MIPS) || defined(__MIPS__) +#include +#if defined(_SIZE_T_) || defined(SIGLOST) +#include +#endif +#endif +#endif +main () +{ +#if defined (sony) +#if defined (MIPSEB) + /* BFD wants "bsd" instead of "newsos". Perhaps BFD should be changed, + I don't know.... */ + printf ("mips-sony-bsd\n"); exit (0); +#else +#include + printf ("m68k-sony-newsos%s\n", +#ifdef NEWSOS4 + "4" +#else + "" +#endif + ); exit (0); +#endif +#endif + +#if defined (NeXT) +#if !defined (__ARCHITECTURE__) +#define __ARCHITECTURE__ "m68k" +#endif + int version; + version=$( (hostinfo | sed -n 's/.*NeXT Mach \([0-9]*\).*/\1/p') 2>/dev/null); + if (version < 4) + printf ("%s-next-nextstep%d\n", __ARCHITECTURE__, version); + else + printf ("%s-next-openstep%d\n", __ARCHITECTURE__, version); + exit (0); +#endif + +#if defined (MULTIMAX) || defined (n16) +#if defined (UMAXV) + printf ("ns32k-encore-sysv\n"); exit (0); +#else +#if defined (CMU) + printf ("ns32k-encore-mach\n"); exit (0); +#else + printf ("ns32k-encore-bsd\n"); exit (0); +#endif +#endif +#endif + +#if defined (__386BSD__) + printf ("i386-pc-bsd\n"); exit (0); +#endif + +#if defined (sequent) +#if defined (i386) + printf ("i386-sequent-dynix\n"); exit (0); +#endif +#if defined (ns32000) + printf ("ns32k-sequent-dynix\n"); exit (0); +#endif +#endif + +#if defined (_SEQUENT_) + struct utsname un; + + uname(&un); + if (strncmp(un.version, "V2", 2) == 0) { + printf ("i386-sequent-ptx2\n"); exit (0); + } + if (strncmp(un.version, "V1", 2) == 0) { /* XXX is V1 correct? */ + printf ("i386-sequent-ptx1\n"); exit (0); + } + printf ("i386-sequent-ptx\n"); exit (0); +#endif + +#if defined (vax) +#if !defined (ultrix) +#include +#if defined (BSD) +#if BSD == 43 + printf ("vax-dec-bsd4.3\n"); exit (0); +#else +#if BSD == 199006 + printf ("vax-dec-bsd4.3reno\n"); exit (0); +#else + printf ("vax-dec-bsd\n"); exit (0); +#endif +#endif +#else + printf ("vax-dec-bsd\n"); exit (0); +#endif +#else +#if defined(_SIZE_T_) || defined(SIGLOST) + struct utsname un; + uname (&un); + printf ("vax-dec-ultrix%s\n", un.release); exit (0); +#else + printf ("vax-dec-ultrix\n"); exit (0); +#endif +#endif +#endif +#if defined(ultrix) || defined(_ultrix) || defined(__ultrix) || defined(__ultrix__) +#if defined(mips) || defined(__mips) || defined(__mips__) || defined(MIPS) || defined(__MIPS__) +#if defined(_SIZE_T_) || defined(SIGLOST) + struct utsname *un; + uname (&un); + printf ("mips-dec-ultrix%s\n", un.release); exit (0); +#else + printf ("mips-dec-ultrix\n"); exit (0); +#endif +#endif +#endif + +#if defined (alliant) && defined (i860) + printf ("i860-alliant-bsd\n"); exit (0); +#endif + + exit (1); +} +EOF + +$CC_FOR_BUILD -o "$dummy" "$dummy.c" 2>/dev/null && SYSTEM_NAME=$($dummy) && + { echo "$SYSTEM_NAME"; exit; } + +# Apollos put the system type in the environment. +test -d /usr/apollo && { echo "$ISP-apollo-$SYSTYPE"; exit; } + +echo "$0: unable to guess system type" >&2 + +case "$UNAME_MACHINE:$UNAME_SYSTEM" in + mips:Linux | mips64:Linux) + # If we got here on MIPS GNU/Linux, output extra information. + cat >&2 <&2 < in order to provide the needed -information to handle your system. +year=$(echo $timestamp | sed 's,-.*,,') +# shellcheck disable=SC2003 +if test "$(expr "$(date +%Y)" - "$year")" -lt 3 ; then + cat >&2 </dev/null || echo unknown` -uname -r = `(uname -r) 2>/dev/null || echo unknown` -uname -s = `(uname -s) 2>/dev/null || echo unknown` -uname -v = `(uname -v) 2>/dev/null || echo unknown` +uname -m = $( (uname -m) 2>/dev/null || echo unknown) +uname -r = $( (uname -r) 2>/dev/null || echo unknown) +uname -s = $( (uname -s) 2>/dev/null || echo unknown) +uname -v = $( (uname -v) 2>/dev/null || echo unknown) -/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null` -/bin/uname -X = `(/bin/uname -X) 2>/dev/null` +/usr/bin/uname -p = $( (/usr/bin/uname -p) 2>/dev/null) +/bin/uname -X = $( (/bin/uname -X) 2>/dev/null) -hostinfo = `(hostinfo) 2>/dev/null` -/bin/universe = `(/bin/universe) 2>/dev/null` -/usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null` -/bin/arch = `(/bin/arch) 2>/dev/null` -/usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null` -/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null` +hostinfo = $( (hostinfo) 2>/dev/null) +/bin/universe = $( (/bin/universe) 2>/dev/null) +/usr/bin/arch -k = $( (/usr/bin/arch -k) 2>/dev/null) +/bin/arch = $( (/bin/arch) 2>/dev/null) +/usr/bin/oslevel = $( (/usr/bin/oslevel) 2>/dev/null) +/usr/convex/getsysinfo = $( (/usr/convex/getsysinfo) 2>/dev/null) -UNAME_MACHINE = ${UNAME_MACHINE} -UNAME_RELEASE = ${UNAME_RELEASE} -UNAME_SYSTEM = ${UNAME_SYSTEM} -UNAME_VERSION = ${UNAME_VERSION} +UNAME_MACHINE = "$UNAME_MACHINE" +UNAME_RELEASE = "$UNAME_RELEASE" +UNAME_SYSTEM = "$UNAME_SYSTEM" +UNAME_VERSION = "$UNAME_VERSION" EOF +fi exit 1 # Local variables: -# eval: (add-hook 'write-file-hooks 'time-stamp) +# eval: (add-hook 'before-save-hook 'time-stamp) # time-stamp-start: "timestamp='" # time-stamp-format: "%:y-%02m-%02d" # time-stamp-end: "'" diff --git a/third-party/gasnet/gasnet-src/config-aux/config.sub b/third-party/gasnet/gasnet-src/config-aux/config.sub old mode 100755 new mode 100644 index 7ffe37378428..63c1f1c8b5e2 --- a/third-party/gasnet/gasnet-src/config-aux/config.sub +++ b/third-party/gasnet/gasnet-src/config-aux/config.sub @@ -1,8 +1,8 @@ #! /bin/sh # Configuration validation subroutine script. -# Copyright 1992-2014 Free Software Foundation, Inc. +# Copyright 1992-2021 Free Software Foundation, Inc. -timestamp='2014-12-03' +timestamp='2021-01-08' # This file is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by @@ -15,7 +15,7 @@ timestamp='2014-12-03' # General Public License for more details. # # You should have received a copy of the GNU General Public License -# along with this program; if not, see . +# along with this program; if not, see . # # As a special exception to the GNU General Public License, if you # distribute this file as part of a program that contains a @@ -33,7 +33,7 @@ timestamp='2014-12-03' # Otherwise, we print the canonical config type on stdout and succeed. # You can get the latest version of this script from: -# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub;hb=HEAD +# https://git.savannah.gnu.org/cgit/config.git/plain/config.sub # This file is supposed to be the same for all GNU packages # and recognize all the CPU types, system types and aliases @@ -50,15 +50,14 @@ timestamp='2014-12-03' # CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM # It is wrong to echo any other type of specification. -me=`echo "$0" | sed -e 's,.*/,,'` +me=$(echo "$0" | sed -e 's,.*/,,') usage="\ -Usage: $0 [OPTION] CPU-MFR-OPSYS - $0 [OPTION] ALIAS +Usage: $0 [OPTION] CPU-MFR-OPSYS or ALIAS Canonicalize a configuration name. -Operation modes: +Options: -h, --help print this help, then exit -t, --time-stamp print date of last modification, then exit -v, --version print version number, then exit @@ -68,7 +67,7 @@ Report bugs and patches to ." version="\ GNU config.sub ($timestamp) -Copyright 1992-2014 Free Software Foundation, Inc. +Copyright 1992-2021 Free Software Foundation, Inc. This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." @@ -90,12 +89,12 @@ while test $# -gt 0 ; do - ) # Use stdin as input. break ;; -* ) - echo "$me: invalid option $1$help" + echo "$me: invalid option $1$help" >&2 exit 1 ;; *local*) # First pass through any local machine types. - echo $1 + echo "$1" exit ;; * ) @@ -111,1228 +110,1169 @@ case $# in exit 1;; esac -# Separate what the user gave into CPU-COMPANY and OS or KERNEL-OS (if any). -# Here we must recognize all the valid KERNEL-OS combinations. -maybe_os=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\2/'` -case $maybe_os in - nto-qnx* | linux-gnu* | linux-android* | linux-dietlibc | linux-newlib* | \ - linux-musl* | linux-uclibc* | uclinux-uclibc* | uclinux-gnu* | kfreebsd*-gnu* | \ - knetbsd*-gnu* | netbsd*-gnu* | \ - kopensolaris*-gnu* | \ - storm-chaos* | os2-emx* | rtmk-nova*) - os=-$maybe_os - basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'` - ;; - android-linux) - os=-linux-android - basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'`-unknown - ;; - *) - basic_machine=`echo $1 | sed 's/-[^-]*$//'` - if [ $basic_machine != $1 ] - then os=`echo $1 | sed 's/.*-/-/'` - else os=; fi - ;; -esac +# Split fields of configuration type +# shellcheck disable=SC2162 +IFS="-" read field1 field2 field3 field4 <&2 + exit 1 ;; - -ptx*) - basic_machine=`echo $1 | sed -e 's/86-.*/86-sequent/'` + *-*-*-*) + basic_machine=$field1-$field2 + basic_os=$field3-$field4 ;; - -windowsnt*) - os=`echo $os | sed -e 's/windowsnt/winnt/'` + *-*-*) + # Ambiguous whether COMPANY is present, or skipped and KERNEL-OS is two + # parts + maybe_os=$field2-$field3 + case $maybe_os in + nto-qnx* | linux-* | uclinux-uclibc* \ + | uclinux-gnu* | kfreebsd*-gnu* | knetbsd*-gnu* | netbsd*-gnu* \ + | netbsd*-eabi* | kopensolaris*-gnu* | cloudabi*-eabi* \ + | storm-chaos* | os2-emx* | rtmk-nova*) + basic_machine=$field1 + basic_os=$maybe_os + ;; + android-linux) + basic_machine=$field1-unknown + basic_os=linux-android + ;; + *) + basic_machine=$field1-$field2 + basic_os=$field3 + ;; + esac ;; - -psos*) - os=-psos + *-*) + # A lone config we happen to match not fitting any pattern + case $field1-$field2 in + decstation-3100) + basic_machine=mips-dec + basic_os= + ;; + *-*) + # Second component is usually, but not always the OS + case $field2 in + # Prevent following clause from handling this valid os + sun*os*) + basic_machine=$field1 + basic_os=$field2 + ;; + # Manufacturers + dec* | mips* | sequent* | encore* | pc533* | sgi* | sony* \ + | att* | 7300* | 3300* | delta* | motorola* | sun[234]* \ + | unicom* | ibm* | next | hp | isi* | apollo | altos* \ + | convergent* | ncr* | news | 32* | 3600* | 3100* \ + | hitachi* | c[123]* | convex* | sun | crds | omron* | dg \ + | ultra | tti* | harris | dolphin | highlevel | gould \ + | cbm | ns | masscomp | apple | axis | knuth | cray \ + | microblaze* | sim | cisco \ + | oki | wec | wrs | winbond) + basic_machine=$field1-$field2 + basic_os= + ;; + *) + basic_machine=$field1 + basic_os=$field2 + ;; + esac + ;; + esac ;; - -mint | -mint[0-9]*) - basic_machine=m68k-atari - os=-mint + *) + # Convert single-component short-hands not valid as part of + # multi-component configurations. + case $field1 in + 386bsd) + basic_machine=i386-pc + basic_os=bsd + ;; + a29khif) + basic_machine=a29k-amd + basic_os=udi + ;; + adobe68k) + basic_machine=m68010-adobe + basic_os=scout + ;; + alliant) + basic_machine=fx80-alliant + basic_os= + ;; + altos | altos3068) + basic_machine=m68k-altos + basic_os= + ;; + am29k) + basic_machine=a29k-none + basic_os=bsd + ;; + amdahl) + basic_machine=580-amdahl + basic_os=sysv + ;; + amiga) + basic_machine=m68k-unknown + basic_os= + ;; + amigaos | amigados) + basic_machine=m68k-unknown + basic_os=amigaos + ;; + amigaunix | amix) + basic_machine=m68k-unknown + basic_os=sysv4 + ;; + apollo68) + basic_machine=m68k-apollo + basic_os=sysv + ;; + apollo68bsd) + basic_machine=m68k-apollo + basic_os=bsd + ;; + aros) + basic_machine=i386-pc + basic_os=aros + ;; + aux) + basic_machine=m68k-apple + basic_os=aux + ;; + balance) + basic_machine=ns32k-sequent + basic_os=dynix + ;; + blackfin) + basic_machine=bfin-unknown + basic_os=linux + ;; + cegcc) + basic_machine=arm-unknown + basic_os=cegcc + ;; + convex-c1) + basic_machine=c1-convex + basic_os=bsd + ;; + convex-c2) + basic_machine=c2-convex + basic_os=bsd + ;; + convex-c32) + basic_machine=c32-convex + basic_os=bsd + ;; + convex-c34) + basic_machine=c34-convex + basic_os=bsd + ;; + convex-c38) + basic_machine=c38-convex + basic_os=bsd + ;; + cray) + basic_machine=j90-cray + basic_os=unicos + ;; + crds | unos) + basic_machine=m68k-crds + basic_os= + ;; + da30) + basic_machine=m68k-da30 + basic_os= + ;; + decstation | pmax | pmin | dec3100 | decstatn) + basic_machine=mips-dec + basic_os= + ;; + delta88) + basic_machine=m88k-motorola + basic_os=sysv3 + ;; + dicos) + basic_machine=i686-pc + basic_os=dicos + ;; + djgpp) + basic_machine=i586-pc + basic_os=msdosdjgpp + ;; + ebmon29k) + basic_machine=a29k-amd + basic_os=ebmon + ;; + es1800 | OSE68k | ose68k | ose | OSE) + basic_machine=m68k-ericsson + basic_os=ose + ;; + gmicro) + basic_machine=tron-gmicro + basic_os=sysv + ;; + go32) + basic_machine=i386-pc + basic_os=go32 + ;; + h8300hms) + basic_machine=h8300-hitachi + basic_os=hms + ;; + h8300xray) + basic_machine=h8300-hitachi + basic_os=xray + ;; + h8500hms) + basic_machine=h8500-hitachi + basic_os=hms + ;; + harris) + basic_machine=m88k-harris + basic_os=sysv3 + ;; + hp300 | hp300hpux) + basic_machine=m68k-hp + basic_os=hpux + ;; + hp300bsd) + basic_machine=m68k-hp + basic_os=bsd + ;; + hppaosf) + basic_machine=hppa1.1-hp + basic_os=osf + ;; + hppro) + basic_machine=hppa1.1-hp + basic_os=proelf + ;; + i386mach) + basic_machine=i386-mach + basic_os=mach + ;; + isi68 | isi) + basic_machine=m68k-isi + basic_os=sysv + ;; + m68knommu) + basic_machine=m68k-unknown + basic_os=linux + ;; + magnum | m3230) + basic_machine=mips-mips + basic_os=sysv + ;; + merlin) + basic_machine=ns32k-utek + basic_os=sysv + ;; + mingw64) + basic_machine=x86_64-pc + basic_os=mingw64 + ;; + mingw32) + basic_machine=i686-pc + basic_os=mingw32 + ;; + mingw32ce) + basic_machine=arm-unknown + basic_os=mingw32ce + ;; + monitor) + basic_machine=m68k-rom68k + basic_os=coff + ;; + morphos) + basic_machine=powerpc-unknown + basic_os=morphos + ;; + moxiebox) + basic_machine=moxie-unknown + basic_os=moxiebox + ;; + msdos) + basic_machine=i386-pc + basic_os=msdos + ;; + msys) + basic_machine=i686-pc + basic_os=msys + ;; + mvs) + basic_machine=i370-ibm + basic_os=mvs + ;; + nacl) + basic_machine=le32-unknown + basic_os=nacl + ;; + ncr3000) + basic_machine=i486-ncr + basic_os=sysv4 + ;; + netbsd386) + basic_machine=i386-pc + basic_os=netbsd + ;; + netwinder) + basic_machine=armv4l-rebel + basic_os=linux + ;; + news | news700 | news800 | news900) + basic_machine=m68k-sony + basic_os=newsos + ;; + news1000) + basic_machine=m68030-sony + basic_os=newsos + ;; + necv70) + basic_machine=v70-nec + basic_os=sysv + ;; + nh3000) + basic_machine=m68k-harris + basic_os=cxux + ;; + nh[45]000) + basic_machine=m88k-harris + basic_os=cxux + ;; + nindy960) + basic_machine=i960-intel + basic_os=nindy + ;; + mon960) + basic_machine=i960-intel + basic_os=mon960 + ;; + nonstopux) + basic_machine=mips-compaq + basic_os=nonstopux + ;; + os400) + basic_machine=powerpc-ibm + basic_os=os400 + ;; + OSE68000 | ose68000) + basic_machine=m68000-ericsson + basic_os=ose + ;; + os68k) + basic_machine=m68k-none + basic_os=os68k + ;; + paragon) + basic_machine=i860-intel + basic_os=osf + ;; + parisc) + basic_machine=hppa-unknown + basic_os=linux + ;; + psp) + basic_machine=mipsallegrexel-sony + basic_os=psp + ;; + pw32) + basic_machine=i586-unknown + basic_os=pw32 + ;; + rdos | rdos64) + basic_machine=x86_64-pc + basic_os=rdos + ;; + rdos32) + basic_machine=i386-pc + basic_os=rdos + ;; + rom68k) + basic_machine=m68k-rom68k + basic_os=coff + ;; + sa29200) + basic_machine=a29k-amd + basic_os=udi + ;; + sei) + basic_machine=mips-sei + basic_os=seiux + ;; + sequent) + basic_machine=i386-sequent + basic_os= + ;; + sps7) + basic_machine=m68k-bull + basic_os=sysv2 + ;; + st2000) + basic_machine=m68k-tandem + basic_os= + ;; + stratus) + basic_machine=i860-stratus + basic_os=sysv4 + ;; + sun2) + basic_machine=m68000-sun + basic_os= + ;; + sun2os3) + basic_machine=m68000-sun + basic_os=sunos3 + ;; + sun2os4) + basic_machine=m68000-sun + basic_os=sunos4 + ;; + sun3) + basic_machine=m68k-sun + basic_os= + ;; + sun3os3) + basic_machine=m68k-sun + basic_os=sunos3 + ;; + sun3os4) + basic_machine=m68k-sun + basic_os=sunos4 + ;; + sun4) + basic_machine=sparc-sun + basic_os= + ;; + sun4os3) + basic_machine=sparc-sun + basic_os=sunos3 + ;; + sun4os4) + basic_machine=sparc-sun + basic_os=sunos4 + ;; + sun4sol2) + basic_machine=sparc-sun + basic_os=solaris2 + ;; + sun386 | sun386i | roadrunner) + basic_machine=i386-sun + basic_os= + ;; + sv1) + basic_machine=sv1-cray + basic_os=unicos + ;; + symmetry) + basic_machine=i386-sequent + basic_os=dynix + ;; + t3e) + basic_machine=alphaev5-cray + basic_os=unicos + ;; + t90) + basic_machine=t90-cray + basic_os=unicos + ;; + toad1) + basic_machine=pdp10-xkl + basic_os=tops20 + ;; + tpf) + basic_machine=s390x-ibm + basic_os=tpf + ;; + udi29k) + basic_machine=a29k-amd + basic_os=udi + ;; + ultra3) + basic_machine=a29k-nyu + basic_os=sym1 + ;; + v810 | necv810) + basic_machine=v810-nec + basic_os=none + ;; + vaxv) + basic_machine=vax-dec + basic_os=sysv + ;; + vms) + basic_machine=vax-dec + basic_os=vms + ;; + vsta) + basic_machine=i386-pc + basic_os=vsta + ;; + vxworks960) + basic_machine=i960-wrs + basic_os=vxworks + ;; + vxworks68) + basic_machine=m68k-wrs + basic_os=vxworks + ;; + vxworks29k) + basic_machine=a29k-wrs + basic_os=vxworks + ;; + xbox) + basic_machine=i686-pc + basic_os=mingw32 + ;; + ymp) + basic_machine=ymp-cray + basic_os=unicos + ;; + *) + basic_machine=$1 + basic_os= + ;; + esac ;; esac -# Decode aliases for certain CPU-COMPANY combinations. +# Decode 1-component or ad-hoc basic machines case $basic_machine in - # Recognize the basic CPU types without company name. - # Some are omitted here because they have special meanings below. - 1750a | 580 \ - | a29k \ - | aarch64 | aarch64_be \ - | alpha | alphaev[4-8] | alphaev56 | alphaev6[78] | alphapca5[67] \ - | alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] | alpha64pca5[67] \ - | am33_2.0 \ - | arc | arceb \ - | arm | arm[bl]e | arme[lb] | armv[2-8] | armv[3-8][lb] | armv7[arm] \ - | avr | avr32 \ - | be32 | be64 \ - | bfin \ - | c4x | c8051 | clipper \ - | d10v | d30v | dlx | dsp16xx \ - | epiphany \ - | fido | fr30 | frv \ - | h8300 | h8500 | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \ - | hexagon \ - | i370 | i860 | i960 | ia64 \ - | ip2k | iq2000 \ - | k1om \ - | le32 | le64 \ - | lm32 \ - | m32c | m32r | m32rle | m68000 | m68k | m88k \ - | maxq | mb | microblaze | microblazeel | mcore | mep | metag \ - | mips | mipsbe | mipseb | mipsel | mipsle \ - | mips16 \ - | mips64 | mips64el \ - | mips64octeon | mips64octeonel \ - | mips64orion | mips64orionel \ - | mips64r5900 | mips64r5900el \ - | mips64vr | mips64vrel \ - | mips64vr4100 | mips64vr4100el \ - | mips64vr4300 | mips64vr4300el \ - | mips64vr5000 | mips64vr5000el \ - | mips64vr5900 | mips64vr5900el \ - | mipsisa32 | mipsisa32el \ - | mipsisa32r2 | mipsisa32r2el \ - | mipsisa32r6 | mipsisa32r6el \ - | mipsisa64 | mipsisa64el \ - | mipsisa64r2 | mipsisa64r2el \ - | mipsisa64r6 | mipsisa64r6el \ - | mipsisa64sb1 | mipsisa64sb1el \ - | mipsisa64sr71k | mipsisa64sr71kel \ - | mipsr5900 | mipsr5900el \ - | mipstx39 | mipstx39el \ - | mn10200 | mn10300 \ - | moxie \ - | mt \ - | msp430 \ - | nds32 | nds32le | nds32be \ - | nios | nios2 | nios2eb | nios2el \ - | ns16k | ns32k \ - | open8 | or1k | or1knd | or32 \ - | pdp10 | pdp11 | pj | pjl \ - | powerpc | powerpc64 | powerpc64le | powerpcle \ - | pyramid \ - | riscv32 | riscv64 \ - | rl78 | rx \ - | score \ - | sh | sh[1234] | sh[24]a | sh[24]aeb | sh[23]e | sh[34]eb | sheb | shbe | shle | sh[1234]le | sh3ele \ - | sh64 | sh64le \ - | sparc | sparc64 | sparc64b | sparc64v | sparc86x | sparclet | sparclite \ - | sparcv8 | sparcv9 | sparcv9b | sparcv9v \ - | spu \ - | tahoe | tic4x | tic54x | tic55x | tic6x | tic80 | tron \ - | ubicom32 \ - | v850 | v850e | v850e1 | v850e2 | v850es | v850e2v3 \ - | visium \ - | we32k \ - | x86 | xc16x | xstormy16 | xtensa \ - | z8k | z80) - basic_machine=$basic_machine-unknown - ;; - c54x) - basic_machine=tic54x-unknown - ;; - c55x) - basic_machine=tic55x-unknown - ;; - c6x) - basic_machine=tic6x-unknown - ;; - leon|leon[3-9]) - basic_machine=sparc-$basic_machine - ;; - m6811 | m68hc11 | m6812 | m68hc12 | m68hcs12x | nvptx | picochip) - basic_machine=$basic_machine-unknown - os=-none + # Here we handle the default manufacturer of certain CPU types. It is in + # some cases the only manufacturer, in others, it is the most popular. + w89k) + cpu=hppa1.1 + vendor=winbond ;; - m88110 | m680[12346]0 | m683?2 | m68360 | m5200 | v70 | w65 | z8k) + op50n) + cpu=hppa1.1 + vendor=oki ;; - ms1) - basic_machine=mt-unknown + op60c) + cpu=hppa1.1 + vendor=oki ;; - - strongarm | thumb | xscale) - basic_machine=arm-unknown + ibm*) + cpu=i370 + vendor=ibm ;; - xgate) - basic_machine=$basic_machine-unknown - os=-none + orion105) + cpu=clipper + vendor=highlevel ;; - xscaleeb) - basic_machine=armeb-unknown + mac | mpw | mac-mpw) + cpu=m68k + vendor=apple ;; - - xscaleel) - basic_machine=armel-unknown + pmac | pmac-mpw) + cpu=powerpc + vendor=apple ;; - # We use `pc' rather than `unknown' - # because (1) that's what they normally are, and - # (2) the word "unknown" tends to confuse beginning users. - i*86 | x86_64) - basic_machine=$basic_machine-pc - ;; - # Object if more than one company name word. - *-*-*) - echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2 - exit 1 - ;; - # Recognize the basic CPU types with company name. - 580-* \ - | a29k-* \ - | aarch64-* | aarch64_be-* \ - | alpha-* | alphaev[4-8]-* | alphaev56-* | alphaev6[78]-* \ - | alpha64-* | alpha64ev[4-8]-* | alpha64ev56-* | alpha64ev6[78]-* \ - | alphapca5[67]-* | alpha64pca5[67]-* | arc-* | arceb-* \ - | arm-* | armbe-* | armle-* | armeb-* | armv*-* \ - | avr-* | avr32-* \ - | be32-* | be64-* \ - | bfin-* | bs2000-* \ - | c[123]* | c30-* | [cjt]90-* | c4x-* \ - | c8051-* | clipper-* | craynv-* | cydra-* \ - | d10v-* | d30v-* | dlx-* \ - | elxsi-* \ - | f30[01]-* | f700-* | fido-* | fr30-* | frv-* | fx80-* \ - | h8300-* | h8500-* \ - | hppa-* | hppa1.[01]-* | hppa2.0-* | hppa2.0[nw]-* | hppa64-* \ - | hexagon-* \ - | i*86-* | i860-* | i960-* | ia64-* \ - | ip2k-* | iq2000-* \ - | k1om-* \ - | le32-* | le64-* \ - | lm32-* \ - | m32c-* | m32r-* | m32rle-* \ - | m68000-* | m680[012346]0-* | m68360-* | m683?2-* | m68k-* \ - | m88110-* | m88k-* | maxq-* | mcore-* | metag-* \ - | microblaze-* | microblazeel-* \ - | mips-* | mipsbe-* | mipseb-* | mipsel-* | mipsle-* \ - | mips16-* \ - | mips64-* | mips64el-* \ - | mips64octeon-* | mips64octeonel-* \ - | mips64orion-* | mips64orionel-* \ - | mips64r5900-* | mips64r5900el-* \ - | mips64vr-* | mips64vrel-* \ - | mips64vr4100-* | mips64vr4100el-* \ - | mips64vr4300-* | mips64vr4300el-* \ - | mips64vr5000-* | mips64vr5000el-* \ - | mips64vr5900-* | mips64vr5900el-* \ - | mipsisa32-* | mipsisa32el-* \ - | mipsisa32r2-* | mipsisa32r2el-* \ - | mipsisa32r6-* | mipsisa32r6el-* \ - | mipsisa64-* | mipsisa64el-* \ - | mipsisa64r2-* | mipsisa64r2el-* \ - | mipsisa64r6-* | mipsisa64r6el-* \ - | mipsisa64sb1-* | mipsisa64sb1el-* \ - | mipsisa64sr71k-* | mipsisa64sr71kel-* \ - | mipsr5900-* | mipsr5900el-* \ - | mipstx39-* | mipstx39el-* \ - | mmix-* \ - | mt-* \ - | msp430-* \ - | nds32-* | nds32le-* | nds32be-* \ - | nios-* | nios2-* | nios2eb-* | nios2el-* \ - | none-* | np1-* | ns16k-* | ns32k-* \ - | open8-* \ - | or1k*-* \ - | orion-* \ - | pdp10-* | pdp11-* | pj-* | pjl-* | pn-* | power-* \ - | powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* \ - | pyramid-* \ - | rl78-* | romp-* | rs6000-* | rx-* \ - | sh-* | sh[1234]-* | sh[24]a-* | sh[24]aeb-* | sh[23]e-* | sh[34]eb-* | sheb-* | shbe-* \ - | shle-* | sh[1234]le-* | sh3ele-* | sh64-* | sh64le-* \ - | sparc-* | sparc64-* | sparc64b-* | sparc64v-* | sparc86x-* | sparclet-* \ - | sparclite-* \ - | sparcv8-* | sparcv9-* | sparcv9b-* | sparcv9v-* | sv1-* | sx?-* \ - | tahoe-* \ - | tic30-* | tic4x-* | tic54x-* | tic55x-* | tic6x-* | tic80-* \ - | tile*-* \ - | tron-* \ - | ubicom32-* \ - | v850-* | v850e-* | v850e1-* | v850es-* | v850e2-* | v850e2v3-* \ - | vax-* \ - | visium-* \ - | we32k-* \ - | x86-* | x86_64-* | xc16x-* | xps100-* \ - | xstormy16-* | xtensa*-* \ - | ymp-* \ - | z8k-* | z80-*) - ;; - # Recognize the basic CPU types without company name, with glob match. - xtensa*) - basic_machine=$basic_machine-unknown - ;; # Recognize the various machine names and aliases which stand # for a CPU type and a company and sometimes even an OS. - 386bsd) - basic_machine=i386-unknown - os=-bsd - ;; 3b1 | 7300 | 7300-att | att-7300 | pc7300 | safari | unixpc) - basic_machine=m68000-att + cpu=m68000 + vendor=att ;; 3b*) - basic_machine=we32k-att - ;; - a29khif) - basic_machine=a29k-amd - os=-udi - ;; - abacus) - basic_machine=abacus-unknown - ;; - adobe68k) - basic_machine=m68010-adobe - os=-scout - ;; - alliant | fx80) - basic_machine=fx80-alliant - ;; - altos | altos3068) - basic_machine=m68k-altos - ;; - am29k) - basic_machine=a29k-none - os=-bsd - ;; - amd64) - basic_machine=x86_64-pc - ;; - amd64-*) - basic_machine=x86_64-`echo $basic_machine | sed 's/^[^-]*-//'` - ;; - amdahl) - basic_machine=580-amdahl - os=-sysv - ;; - amiga | amiga-*) - basic_machine=m68k-unknown - ;; - amigaos | amigados) - basic_machine=m68k-unknown - os=-amigaos - ;; - amigaunix | amix) - basic_machine=m68k-unknown - os=-sysv4 - ;; - apollo68) - basic_machine=m68k-apollo - os=-sysv - ;; - apollo68bsd) - basic_machine=m68k-apollo - os=-bsd - ;; - aros) - basic_machine=i386-pc - os=-aros - ;; - aux) - basic_machine=m68k-apple - os=-aux - ;; - balance) - basic_machine=ns32k-sequent - os=-dynix - ;; - blackfin) - basic_machine=bfin-unknown - os=-linux - ;; - blackfin-*) - basic_machine=bfin-`echo $basic_machine | sed 's/^[^-]*-//'` - os=-linux + cpu=we32k + vendor=att ;; bluegene*) - basic_machine=powerpc-ibm - os=-cnk - ;; - c54x-*) - basic_machine=tic54x-`echo $basic_machine | sed 's/^[^-]*-//'` - ;; - c55x-*) - basic_machine=tic55x-`echo $basic_machine | sed 's/^[^-]*-//'` - ;; - c6x-*) - basic_machine=tic6x-`echo $basic_machine | sed 's/^[^-]*-//'` - ;; - c90) - basic_machine=c90-cray - os=-unicos - ;; - cegcc) - basic_machine=arm-unknown - os=-cegcc - ;; - convex-c1) - basic_machine=c1-convex - os=-bsd - ;; - convex-c2) - basic_machine=c2-convex - os=-bsd - ;; - convex-c32) - basic_machine=c32-convex - os=-bsd - ;; - convex-c34) - basic_machine=c34-convex - os=-bsd - ;; - convex-c38) - basic_machine=c38-convex - os=-bsd - ;; - cray | j90) - basic_machine=j90-cray - os=-unicos - ;; - craynv) - basic_machine=craynv-cray - os=-unicosmp - ;; - cr16 | cr16-*) - basic_machine=cr16-unknown - os=-elf - ;; - crds | unos) - basic_machine=m68k-crds - ;; - crisv32 | crisv32-* | etraxfs*) - basic_machine=crisv32-axis - ;; - cris | cris-* | etrax*) - basic_machine=cris-axis - ;; - crx) - basic_machine=crx-unknown - os=-elf - ;; - da30 | da30-*) - basic_machine=m68k-da30 - ;; - decstation | decstation-3100 | pmax | pmax-* | pmin | dec3100 | decstatn) - basic_machine=mips-dec + cpu=powerpc + vendor=ibm + basic_os=cnk ;; decsystem10* | dec10*) - basic_machine=pdp10-dec - os=-tops10 + cpu=pdp10 + vendor=dec + basic_os=tops10 ;; decsystem20* | dec20*) - basic_machine=pdp10-dec - os=-tops20 + cpu=pdp10 + vendor=dec + basic_os=tops20 ;; delta | 3300 | motorola-3300 | motorola-delta \ | 3300-motorola | delta-motorola) - basic_machine=m68k-motorola - ;; - delta88) - basic_machine=m88k-motorola - os=-sysv3 - ;; - dicos) - basic_machine=i686-pc - os=-dicos - ;; - djgpp) - basic_machine=i586-pc - os=-msdosdjgpp + cpu=m68k + vendor=motorola ;; - dpx20 | dpx20-*) - basic_machine=rs6000-bull - os=-bosx - ;; - dpx2* | dpx2*-bull) - basic_machine=m68k-bull - os=-sysv3 - ;; - ebmon29k) - basic_machine=a29k-amd - os=-ebmon - ;; - elxsi) - basic_machine=elxsi-elxsi - os=-bsd + dpx2*) + cpu=m68k + vendor=bull + basic_os=sysv3 ;; encore | umax | mmax) - basic_machine=ns32k-encore + cpu=ns32k + vendor=encore ;; - es1800 | OSE68k | ose68k | ose | OSE) - basic_machine=m68k-ericsson - os=-ose + elxsi) + cpu=elxsi + vendor=elxsi + basic_os=${basic_os:-bsd} ;; fx2800) - basic_machine=i860-alliant + cpu=i860 + vendor=alliant ;; genix) - basic_machine=ns32k-ns - ;; - gmicro) - basic_machine=tron-gmicro - os=-sysv - ;; - go32) - basic_machine=i386-pc - os=-go32 + cpu=ns32k + vendor=ns ;; h3050r* | hiux*) - basic_machine=hppa1.1-hitachi - os=-hiuxwe2 - ;; - h8300hms) - basic_machine=h8300-hitachi - os=-hms - ;; - h8300xray) - basic_machine=h8300-hitachi - os=-xray - ;; - h8500hms) - basic_machine=h8500-hitachi - os=-hms - ;; - harris) - basic_machine=m88k-harris - os=-sysv3 - ;; - hp300-*) - basic_machine=m68k-hp - ;; - hp300bsd) - basic_machine=m68k-hp - os=-bsd - ;; - hp300hpux) - basic_machine=m68k-hp - os=-hpux + cpu=hppa1.1 + vendor=hitachi + basic_os=hiuxwe2 ;; hp3k9[0-9][0-9] | hp9[0-9][0-9]) - basic_machine=hppa1.0-hp + cpu=hppa1.0 + vendor=hp ;; hp9k2[0-9][0-9] | hp9k31[0-9]) - basic_machine=m68000-hp + cpu=m68000 + vendor=hp ;; hp9k3[2-9][0-9]) - basic_machine=m68k-hp + cpu=m68k + vendor=hp ;; hp9k6[0-9][0-9] | hp6[0-9][0-9]) - basic_machine=hppa1.0-hp + cpu=hppa1.0 + vendor=hp ;; hp9k7[0-79][0-9] | hp7[0-79][0-9]) - basic_machine=hppa1.1-hp + cpu=hppa1.1 + vendor=hp ;; hp9k78[0-9] | hp78[0-9]) # FIXME: really hppa2.0-hp - basic_machine=hppa1.1-hp + cpu=hppa1.1 + vendor=hp ;; hp9k8[67]1 | hp8[67]1 | hp9k80[24] | hp80[24] | hp9k8[78]9 | hp8[78]9 | hp9k893 | hp893) # FIXME: really hppa2.0-hp - basic_machine=hppa1.1-hp + cpu=hppa1.1 + vendor=hp ;; hp9k8[0-9][13679] | hp8[0-9][13679]) - basic_machine=hppa1.1-hp + cpu=hppa1.1 + vendor=hp ;; hp9k8[0-9][0-9] | hp8[0-9][0-9]) - basic_machine=hppa1.0-hp - ;; - hppa-next) - os=-nextstep3 - ;; - hppaosf) - basic_machine=hppa1.1-hp - os=-osf - ;; - hppro) - basic_machine=hppa1.1-hp - os=-proelf - ;; - i370-ibm* | ibm*) - basic_machine=i370-ibm + cpu=hppa1.0 + vendor=hp ;; i*86v32) - basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` - os=-sysv32 + cpu=$(echo "$1" | sed -e 's/86.*/86/') + vendor=pc + basic_os=sysv32 ;; i*86v4*) - basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` - os=-sysv4 + cpu=$(echo "$1" | sed -e 's/86.*/86/') + vendor=pc + basic_os=sysv4 ;; i*86v) - basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` - os=-sysv + cpu=$(echo "$1" | sed -e 's/86.*/86/') + vendor=pc + basic_os=sysv ;; i*86sol2) - basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` - os=-solaris2 - ;; - i386mach) - basic_machine=i386-mach - os=-mach + cpu=$(echo "$1" | sed -e 's/86.*/86/') + vendor=pc + basic_os=solaris2 ;; - i386-vsta | vsta) - basic_machine=i386-unknown - os=-vsta + j90 | j90-cray) + cpu=j90 + vendor=cray + basic_os=${basic_os:-unicos} ;; iris | iris4d) - basic_machine=mips-sgi - case $os in - -irix*) + cpu=mips + vendor=sgi + case $basic_os in + irix*) ;; *) - os=-irix4 + basic_os=irix4 ;; esac ;; - isi68 | isi) - basic_machine=m68k-isi - os=-sysv - ;; - leon-*|leon[3-9]-*) - basic_machine=sparc-`echo $basic_machine | sed 's/-.*//'` - ;; - m68knommu) - basic_machine=m68k-unknown - os=-linux - ;; - m68knommu-*) - basic_machine=m68k-`echo $basic_machine | sed 's/^[^-]*-//'` - os=-linux - ;; - m88k-omron*) - basic_machine=m88k-omron - ;; - magnum | m3230) - basic_machine=mips-mips - os=-sysv - ;; - merlin) - basic_machine=ns32k-utek - os=-sysv - ;; - microblaze*) - basic_machine=microblaze-xilinx - ;; - mingw64) - basic_machine=x86_64-pc - os=-mingw64 - ;; - mingw32) - basic_machine=i686-pc - os=-mingw32 - ;; - mingw32ce) - basic_machine=arm-unknown - os=-mingw32ce - ;; miniframe) - basic_machine=m68000-convergent - ;; - *mint | -mint[0-9]* | *MiNT | *MiNT[0-9]*) - basic_machine=m68k-atari - os=-mint + cpu=m68000 + vendor=convergent ;; - mips3*-*) - basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'` - ;; - mips3*) - basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'`-unknown - ;; - monitor) - basic_machine=m68k-rom68k - os=-coff - ;; - morphos) - basic_machine=powerpc-unknown - os=-morphos - ;; - moxiebox) - basic_machine=moxie-unknown - os=-moxiebox - ;; - msdos) - basic_machine=i386-pc - os=-msdos - ;; - ms1-*) - basic_machine=`echo $basic_machine | sed -e 's/ms1-/mt-/'` - ;; - msys) - basic_machine=i686-pc - os=-msys - ;; - mvs) - basic_machine=i370-ibm - os=-mvs - ;; - nacl) - basic_machine=le32-unknown - os=-nacl - ;; - ncr3000) - basic_machine=i486-ncr - os=-sysv4 - ;; - netbsd386) - basic_machine=i386-unknown - os=-netbsd - ;; - netwinder) - basic_machine=armv4l-rebel - os=-linux - ;; - news | news700 | news800 | news900) - basic_machine=m68k-sony - os=-newsos - ;; - news1000) - basic_machine=m68030-sony - os=-newsos + *mint | mint[0-9]* | *MiNT | *MiNT[0-9]*) + cpu=m68k + vendor=atari + basic_os=mint ;; news-3600 | risc-news) - basic_machine=mips-sony - os=-newsos - ;; - necv70) - basic_machine=v70-nec - os=-sysv - ;; - next | m*-next ) - basic_machine=m68k-next - case $os in - -nextstep* ) + cpu=mips + vendor=sony + basic_os=newsos + ;; + next | m*-next) + cpu=m68k + vendor=next + case $basic_os in + openstep*) + ;; + nextstep*) ;; - -ns2*) - os=-nextstep2 + ns2*) + basic_os=nextstep2 ;; *) - os=-nextstep3 + basic_os=nextstep3 ;; esac ;; - nh3000) - basic_machine=m68k-harris - os=-cxux - ;; - nh[45]000) - basic_machine=m88k-harris - os=-cxux - ;; - nindy960) - basic_machine=i960-intel - os=-nindy - ;; - mon960) - basic_machine=i960-intel - os=-mon960 - ;; - nonstopux) - basic_machine=mips-compaq - os=-nonstopux - ;; np1) - basic_machine=np1-gould - ;; - neo-tandem) - basic_machine=neo-tandem - ;; - nse-tandem) - basic_machine=nse-tandem - ;; - nsr-tandem) - basic_machine=nsr-tandem + cpu=np1 + vendor=gould ;; op50n-* | op60c-*) - basic_machine=hppa1.1-oki - os=-proelf - ;; - openrisc | openrisc-*) - basic_machine=or32-unknown - ;; - os400) - basic_machine=powerpc-ibm - os=-os400 - ;; - OSE68000 | ose68000) - basic_machine=m68000-ericsson - os=-ose - ;; - os68k) - basic_machine=m68k-none - os=-os68k + cpu=hppa1.1 + vendor=oki + basic_os=proelf ;; pa-hitachi) - basic_machine=hppa1.1-hitachi - os=-hiuxwe2 - ;; - paragon) - basic_machine=i860-intel - os=-osf - ;; - parisc) - basic_machine=hppa-unknown - os=-linux - ;; - parisc-*) - basic_machine=hppa-`echo $basic_machine | sed 's/^[^-]*-//'` - os=-linux + cpu=hppa1.1 + vendor=hitachi + basic_os=hiuxwe2 ;; pbd) - basic_machine=sparc-tti + cpu=sparc + vendor=tti ;; pbb) - basic_machine=m68k-tti - ;; - pc532 | pc532-*) - basic_machine=ns32k-pc532 - ;; - pc98) - basic_machine=i386-pc - ;; - pc98-*) - basic_machine=i386-`echo $basic_machine | sed 's/^[^-]*-//'` + cpu=m68k + vendor=tti ;; - pentium | p5 | k5 | k6 | nexgen | viac3) - basic_machine=i586-pc - ;; - pentiumpro | p6 | 6x86 | athlon | athlon_*) - basic_machine=i686-pc - ;; - pentiumii | pentium2 | pentiumiii | pentium3) - basic_machine=i686-pc - ;; - pentium4) - basic_machine=i786-pc - ;; - pentium-* | p5-* | k5-* | k6-* | nexgen-* | viac3-*) - basic_machine=i586-`echo $basic_machine | sed 's/^[^-]*-//'` - ;; - pentiumpro-* | p6-* | 6x86-* | athlon-*) - basic_machine=i686-`echo $basic_machine | sed 's/^[^-]*-//'` - ;; - pentiumii-* | pentium2-* | pentiumiii-* | pentium3-*) - basic_machine=i686-`echo $basic_machine | sed 's/^[^-]*-//'` - ;; - pentium4-*) - basic_machine=i786-`echo $basic_machine | sed 's/^[^-]*-//'` + pc532) + cpu=ns32k + vendor=pc532 ;; pn) - basic_machine=pn-gould - ;; - power) basic_machine=power-ibm - ;; - ppc | ppcbe) basic_machine=powerpc-unknown - ;; - ppc-* | ppcbe-*) - basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'` + cpu=pn + vendor=gould ;; - ppcle | powerpclittle | ppc-le | powerpc-little) - basic_machine=powerpcle-unknown - ;; - ppcle-* | powerpclittle-*) - basic_machine=powerpcle-`echo $basic_machine | sed 's/^[^-]*-//'` - ;; - ppc64) basic_machine=powerpc64-unknown - ;; - ppc64-*) basic_machine=powerpc64-`echo $basic_machine | sed 's/^[^-]*-//'` - ;; - ppc64le | powerpc64little | ppc64-le | powerpc64-little) - basic_machine=powerpc64le-unknown - ;; - ppc64le-* | powerpc64little-*) - basic_machine=powerpc64le-`echo $basic_machine | sed 's/^[^-]*-//'` + power) + cpu=power + vendor=ibm ;; ps2) - basic_machine=i386-ibm - ;; - pw32) - basic_machine=i586-unknown - os=-pw32 - ;; - rdos | rdos64) - basic_machine=x86_64-pc - os=-rdos - ;; - rdos32) - basic_machine=i386-pc - os=-rdos - ;; - rom68k) - basic_machine=m68k-rom68k - os=-coff + cpu=i386 + vendor=ibm ;; rm[46]00) - basic_machine=mips-siemens + cpu=mips + vendor=siemens ;; rtpc | rtpc-*) - basic_machine=romp-ibm - ;; - s390 | s390-*) - basic_machine=s390-ibm - ;; - s390x | s390x-*) - basic_machine=s390x-ibm - ;; - sa29200) - basic_machine=a29k-amd - os=-udi + cpu=romp + vendor=ibm ;; - sb1) - basic_machine=mipsisa64sb1-unknown + sde) + cpu=mipsisa32 + vendor=sde + basic_os=${basic_os:-elf} ;; - sb1el) - basic_machine=mipsisa64sb1el-unknown + simso-wrs) + cpu=sparclite + vendor=wrs + basic_os=vxworks ;; - sde) - basic_machine=mipsisa32-sde - os=-elf + tower | tower-32) + cpu=m68k + vendor=ncr ;; - sei) - basic_machine=mips-sei - os=-seiux + vpp*|vx|vx-*) + cpu=f301 + vendor=fujitsu ;; - sequent) - basic_machine=i386-sequent + w65) + cpu=w65 + vendor=wdc ;; - sh) - basic_machine=sh-hitachi - os=-hms + w89k-*) + cpu=hppa1.1 + vendor=winbond + basic_os=proelf ;; - sh5el) - basic_machine=sh5le-unknown + none) + cpu=none + vendor=none ;; - sh64) - basic_machine=sh64-unknown + leon|leon[3-9]) + cpu=sparc + vendor=$basic_machine ;; - sparclite-wrs | simso-wrs) - basic_machine=sparclite-wrs - os=-vxworks + leon-*|leon[3-9]-*) + cpu=sparc + vendor=$(echo "$basic_machine" | sed 's/-.*//') ;; - sps7) - basic_machine=m68k-bull - os=-sysv2 + + *-*) + # shellcheck disable=SC2162 + IFS="-" read cpu vendor <&2 - exit 1 + # Recognize the canonical CPU types that are allowed with any + # company name. + case $cpu in + 1750a | 580 \ + | a29k \ + | aarch64 | aarch64_be \ + | abacus \ + | alpha | alphaev[4-8] | alphaev56 | alphaev6[78] \ + | alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] \ + | alphapca5[67] | alpha64pca5[67] \ + | am33_2.0 \ + | amdgcn \ + | arc | arceb \ + | arm | arm[lb]e | arme[lb] | armv* \ + | avr | avr32 \ + | asmjs \ + | ba \ + | be32 | be64 \ + | bfin | bpf | bs2000 \ + | c[123]* | c30 | [cjt]90 | c4x \ + | c8051 | clipper | craynv | csky | cydra \ + | d10v | d30v | dlx | dsp16xx \ + | e2k | elxsi | epiphany \ + | f30[01] | f700 | fido | fr30 | frv | ft32 | fx80 \ + | h8300 | h8500 \ + | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \ + | hexagon \ + | i370 | i*86 | i860 | i960 | ia16 | ia64 \ + | ip2k | iq2000 \ + | k1om \ + | le32 | le64 \ + | lm32 \ + | loongarch32 | loongarch64 | loongarchx32 \ + | m32c | m32r | m32rle \ + | m5200 | m68000 | m680[012346]0 | m68360 | m683?2 | m68k \ + | m6811 | m68hc11 | m6812 | m68hc12 | m68hcs12x \ + | m88110 | m88k | maxq | mb | mcore | mep | metag \ + | microblaze | microblazeel \ + | mips | mipsbe | mipseb | mipsel | mipsle \ + | mips16 \ + | mips64 | mips64eb | mips64el \ + | mips64octeon | mips64octeonel \ + | mips64orion | mips64orionel \ + | mips64r5900 | mips64r5900el \ + | mips64vr | mips64vrel \ + | mips64vr4100 | mips64vr4100el \ + | mips64vr4300 | mips64vr4300el \ + | mips64vr5000 | mips64vr5000el \ + | mips64vr5900 | mips64vr5900el \ + | mipsisa32 | mipsisa32el \ + | mipsisa32r2 | mipsisa32r2el \ + | mipsisa32r6 | mipsisa32r6el \ + | mipsisa64 | mipsisa64el \ + | mipsisa64r2 | mipsisa64r2el \ + | mipsisa64r6 | mipsisa64r6el \ + | mipsisa64sb1 | mipsisa64sb1el \ + | mipsisa64sr71k | mipsisa64sr71kel \ + | mipsr5900 | mipsr5900el \ + | mipstx39 | mipstx39el \ + | mmix \ + | mn10200 | mn10300 \ + | moxie \ + | mt \ + | msp430 \ + | nds32 | nds32le | nds32be \ + | nfp \ + | nios | nios2 | nios2eb | nios2el \ + | none | np1 | ns16k | ns32k | nvptx \ + | open8 \ + | or1k* \ + | or32 \ + | orion \ + | picochip \ + | pdp10 | pdp11 | pj | pjl | pn | power \ + | powerpc | powerpc64 | powerpc64le | powerpcle | powerpcspe \ + | pru \ + | pyramid \ + | riscv | riscv32 | riscv32be | riscv64 | riscv64be \ + | rl78 | romp | rs6000 | rx \ + | s390 | s390x \ + | score \ + | sh | shl \ + | sh[1234] | sh[24]a | sh[24]ae[lb] | sh[23]e | she[lb] | sh[lb]e \ + | sh[1234]e[lb] | sh[12345][lb]e | sh[23]ele | sh64 | sh64le \ + | sparc | sparc64 | sparc64b | sparc64v | sparc86x | sparclet \ + | sparclite \ + | sparcv8 | sparcv9 | sparcv9b | sparcv9v | sv1 | sx* \ + | spu \ + | tahoe \ + | thumbv7* \ + | tic30 | tic4x | tic54x | tic55x | tic6x | tic80 \ + | tron \ + | ubicom32 \ + | v70 | v850 | v850e | v850e1 | v850es | v850e2 | v850e2v3 \ + | vax \ + | visium \ + | w65 \ + | wasm32 | wasm64 \ + | we32k \ + | x86 | x86_64 | xc16x | xgate | xps100 \ + | xstormy16 | xtensa* \ + | ymp \ + | z8k | z80) + ;; + + *) + echo Invalid configuration \`"$1"\': machine \`"$cpu-$vendor"\' not recognized 1>&2 + exit 1 + ;; + esac ;; esac # Here we canonicalize certain aliases for manufacturers. -case $basic_machine in - *-digital*) - basic_machine=`echo $basic_machine | sed 's/digital.*/dec/'` +case $vendor in + digital*) + vendor=dec ;; - *-commodore*) - basic_machine=`echo $basic_machine | sed 's/commodore.*/cbm/'` + commodore*) + vendor=cbm ;; *) ;; @@ -1340,200 +1280,213 @@ esac # Decode manufacturer-specific aliases for certain operating systems. -if [ x"$os" != x"" ] +if test x$basic_os != x then + +# First recognize some ad-hoc caes, or perhaps split kernel-os, or else just +# set os. +case $basic_os in + gnu/linux*) + kernel=linux + os=$(echo $basic_os | sed -e 's|gnu/linux|gnu|') + ;; + os2-emx) + kernel=os2 + os=$(echo $basic_os | sed -e 's|os2-emx|emx|') + ;; + nto-qnx*) + kernel=nto + os=$(echo $basic_os | sed -e 's|nto-qnx|qnx|') + ;; + *-*) + # shellcheck disable=SC2162 + IFS="-" read kernel os <&2 - exit 1 + # No normalization, but not necessarily accepted, that comes below. ;; esac + else # Here we handle the default operating systems that come with various machines. @@ -1546,261 +1499,361 @@ else # will signal an error saying that MANUFACTURER isn't an operating # system, and we'll never get to this point. -case $basic_machine in +kernel= +case $cpu-$vendor in score-*) - os=-elf + os=elf ;; spu-*) - os=-elf + os=elf ;; *-acorn) - os=-riscix1.2 + os=riscix1.2 ;; arm*-rebel) - os=-linux + kernel=linux + os=gnu ;; arm*-semi) - os=-aout + os=aout ;; c4x-* | tic4x-*) - os=-coff + os=coff ;; c8051-*) - os=-elf + os=elf + ;; + clipper-intergraph) + os=clix ;; hexagon-*) - os=-elf + os=elf ;; tic54x-*) - os=-coff + os=coff ;; tic55x-*) - os=-coff + os=coff ;; tic6x-*) - os=-coff + os=coff ;; # This must come before the *-dec entry. pdp10-*) - os=-tops20 + os=tops20 ;; pdp11-*) - os=-none + os=none ;; *-dec | vax-*) - os=-ultrix4.2 + os=ultrix4.2 ;; m68*-apollo) - os=-domain + os=domain ;; i386-sun) - os=-sunos4.0.2 + os=sunos4.0.2 ;; m68000-sun) - os=-sunos3 + os=sunos3 ;; m68*-cisco) - os=-aout + os=aout ;; mep-*) - os=-elf + os=elf ;; mips*-cisco) - os=-elf + os=elf ;; mips*-*) - os=-elf + os=elf ;; or32-*) - os=-coff + os=coff ;; *-tti) # must be before sparc entry or we get the wrong os. - os=-sysv3 + os=sysv3 ;; sparc-* | *-sun) - os=-sunos4.1.1 + os=sunos4.1.1 ;; - *-be) - os=-beos + pru-*) + os=elf ;; - *-haiku) - os=-haiku + *-be) + os=beos ;; *-ibm) - os=-aix + os=aix ;; *-knuth) - os=-mmixware + os=mmixware ;; *-wec) - os=-proelf + os=proelf ;; *-winbond) - os=-proelf + os=proelf ;; *-oki) - os=-proelf + os=proelf ;; *-hp) - os=-hpux + os=hpux ;; *-hitachi) - os=-hiux + os=hiux ;; i860-* | *-att | *-ncr | *-altos | *-motorola | *-convergent) - os=-sysv + os=sysv ;; *-cbm) - os=-amigaos + os=amigaos ;; *-dg) - os=-dgux + os=dgux ;; *-dolphin) - os=-sysv3 + os=sysv3 ;; m68k-ccur) - os=-rtu + os=rtu ;; m88k-omron*) - os=-luna + os=luna ;; - *-next ) - os=-nextstep + *-next) + os=nextstep ;; *-sequent) - os=-ptx + os=ptx ;; *-crds) - os=-unos + os=unos ;; *-ns) - os=-genix + os=genix ;; i370-*) - os=-mvs - ;; - *-next) - os=-nextstep3 + os=mvs ;; *-gould) - os=-sysv + os=sysv ;; *-highlevel) - os=-bsd + os=bsd ;; *-encore) - os=-bsd + os=bsd ;; *-sgi) - os=-irix + os=irix ;; *-siemens) - os=-sysv4 + os=sysv4 ;; *-masscomp) - os=-rtu + os=rtu ;; f30[01]-fujitsu | f700-fujitsu) - os=-uxpv + os=uxpv ;; *-rom68k) - os=-coff + os=coff ;; *-*bug) - os=-coff + os=coff ;; *-apple) - os=-macos + os=macos ;; *-atari*) - os=-mint + os=mint + ;; + *-wrs) + os=vxworks ;; *) - os=-none + os=none ;; esac + fi +# Now, validate our (potentially fixed-up) OS. +case $os in + # Sometimes we do "kernel-libc", so those need to count as OSes. + musl* | newlib* | uclibc*) + ;; + # Likewise for "kernel-abi" + eabi* | gnueabi*) + ;; + # VxWorks passes extra cpu info in the 4th filed. + simlinux | simwindows | spe) + ;; + # Now accept the basic system types. + # The portable systems comes first. + # Each alternative MUST end in a * to match a version number. + gnu* | android* | bsd* | mach* | minix* | genix* | ultrix* | irix* \ + | *vms* | esix* | aix* | cnk* | sunos | sunos[34]* \ + | hpux* | unos* | osf* | luna* | dgux* | auroraux* | solaris* \ + | sym* | plan9* | psp* | sim* | xray* | os68k* | v88r* \ + | hiux* | abug | nacl* | netware* | windows* \ + | os9* | macos* | osx* | ios* \ + | mpw* | magic* | mmixware* | mon960* | lnews* \ + | amigaos* | amigados* | msdos* | newsos* | unicos* | aof* \ + | aos* | aros* | cloudabi* | sortix* | twizzler* \ + | nindy* | vxsim* | vxworks* | ebmon* | hms* | mvs* \ + | clix* | riscos* | uniplus* | iris* | isc* | rtu* | xenix* \ + | mirbsd* | netbsd* | dicos* | openedition* | ose* \ + | bitrig* | openbsd* | solidbsd* | libertybsd* | os108* \ + | ekkobsd* | freebsd* | riscix* | lynxos* | os400* \ + | bosx* | nextstep* | cxux* | aout* | elf* | oabi* \ + | ptx* | coff* | ecoff* | winnt* | domain* | vsta* \ + | udi* | lites* | ieee* | go32* | aux* | hcos* \ + | chorusrdb* | cegcc* | glidix* \ + | cygwin* | msys* | pe* | moss* | proelf* | rtems* \ + | midipix* | mingw32* | mingw64* | mint* \ + | uxpv* | beos* | mpeix* | udk* | moxiebox* \ + | interix* | uwin* | mks* | rhapsody* | darwin* \ + | openstep* | oskit* | conix* | pw32* | nonstopux* \ + | storm-chaos* | tops10* | tenex* | tops20* | its* \ + | os2* | vos* | palmos* | uclinux* | nucleus* | morphos* \ + | scout* | superux* | sysv* | rtmk* | tpf* | windiss* \ + | powermax* | dnix* | nx6 | nx7 | sei* | dragonfly* \ + | skyos* | haiku* | rdos* | toppers* | drops* | es* \ + | onefs* | tirtos* | phoenix* | fuchsia* | redox* | bme* \ + | midnightbsd* | amdhsa* | unleashed* | emscripten* | wasi* \ + | nsk* | powerunix* | genode* | zvmoe* | qnx* | emx*) + ;; + # This one is extra strict with allowed versions + sco3.2v2 | sco3.2v[4-9]* | sco5v6*) + # Don't forget version if it is 3.2v4 or newer. + ;; + none) + ;; + *) + echo Invalid configuration \`"$1"\': OS \`"$os"\' not recognized 1>&2 + exit 1 + ;; +esac + +# As a final step for OS-related things, validate the OS-kernel combination +# (given a valid OS), if there is a kernel. +case $kernel-$os in + linux-gnu* | linux-dietlibc* | linux-android* | linux-newlib* | linux-musl* | linux-uclibc* ) + ;; + uclinux-uclibc* ) + ;; + -dietlibc* | -newlib* | -musl* | -uclibc* ) + # These are just libc implementations, not actual OSes, and thus + # require a kernel. + echo "Invalid configuration \`$1': libc \`$os' needs explicit kernel." 1>&2 + exit 1 + ;; + kfreebsd*-gnu* | kopensolaris*-gnu*) + ;; + vxworks-simlinux | vxworks-simwindows | vxworks-spe) + ;; + nto-qnx*) + ;; + os2-emx) + ;; + *-eabi* | *-gnueabi*) + ;; + -*) + # Blank kernel with real OS is always fine. + ;; + *-*) + echo "Invalid configuration \`$1': Kernel \`$kernel' not known to work with OS \`$os'." 1>&2 + exit 1 + ;; +esac + # Here we handle the case where we know the os, and the CPU type, but not the # manufacturer. We pick the logical manufacturer. -vendor=unknown -case $basic_machine in - *-unknown) - case $os in - -riscix*) +case $vendor in + unknown) + case $cpu-$os in + *-riscix*) vendor=acorn ;; - -sunos*) + *-sunos*) vendor=sun ;; - -cnk*|-aix*) + *-cnk* | *-aix*) vendor=ibm ;; - -beos*) + *-beos*) vendor=be ;; - -hpux*) + *-hpux*) vendor=hp ;; - -mpeix*) + *-mpeix*) vendor=hp ;; - -hiux*) + *-hiux*) vendor=hitachi ;; - -unos*) + *-unos*) vendor=crds ;; - -dgux*) + *-dgux*) vendor=dg ;; - -luna*) + *-luna*) vendor=omron ;; - -genix*) + *-genix*) vendor=ns ;; - -mvs* | -opened*) + *-clix*) + vendor=intergraph + ;; + *-mvs* | *-opened*) + vendor=ibm + ;; + *-os400*) vendor=ibm ;; - -os400*) + s390-* | s390x-*) vendor=ibm ;; - -ptx*) + *-ptx*) vendor=sequent ;; - -tpf*) + *-tpf*) vendor=ibm ;; - -vxsim* | -vxworks* | -windiss*) + *-vxsim* | *-vxworks* | *-windiss*) vendor=wrs ;; - -aux*) + *-aux*) vendor=apple ;; - -hms*) + *-hms*) vendor=hitachi ;; - -mpw* | -macos*) + *-mpw* | *-macos*) vendor=apple ;; - -*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*) + *-*mint | *-mint[0-9]* | *-*MiNT | *-MiNT[0-9]*) vendor=atari ;; - -vos*) + *-vos*) vendor=stratus ;; esac - basic_machine=`echo $basic_machine | sed "s/unknown/$vendor/"` ;; esac -echo $basic_machine$os +echo "$cpu-$vendor-${kernel:+$kernel-}$os" exit # Local variables: -# eval: (add-hook 'write-file-hooks 'time-stamp) +# eval: (add-hook 'before-save-hook 'time-stamp) # time-stamp-start: "timestamp='" # time-stamp-format: "%:y-%02m-%02d" # time-stamp-end: "'" diff --git a/third-party/gasnet/gasnet-src/configure b/third-party/gasnet/gasnet-src/configure index ee99246bd220..a3285068b386 100755 --- a/third-party/gasnet/gasnet-src/configure +++ b/third-party/gasnet/gasnet-src/configure @@ -25,7 +25,7 @@ $_gasneti_envcmd > config.env echo > /dev/null \ . -# From configure.in gex-2020.10.0. +# From configure.in gex-2021.3.0-0-g9280a96. # Guess values for system-dependent variables and create Makefiles. # Generated by GNU Autoconf 2.69. # @@ -607,7 +607,7 @@ MAKEFLAGS= # Identity of this package. PACKAGE_NAME='GASNet' PACKAGE_TARNAME= -PACKAGE_VERSION='2020.10.0' +PACKAGE_VERSION='2021.3.0' PACKAGE_STRING= PACKAGE_BUGREPORT='https://gasnet-bugs.lbl.gov' PACKAGE_URL='https://gasnet.lbl.gov' @@ -673,8 +673,20 @@ DEBUGMALLOC_VAR DEBUGMALLOC DEBUGMALLOC_FALSE DEBUGMALLOC_TRUE +GASNETI_HWLOC_CALC_PATH +GASNETI_HWLOC_BIND_PATH +HWLOC_LIBS +HWLOC_LDFLAGS +HWLOC_CFLAGS +HWLOC_guess_prog USE_PLPA_FALSE USE_PLPA_TRUE +HAVE_MK_CLASS_CUDA_UVA_FALSE +HAVE_MK_CLASS_CUDA_UVA_TRUE +CUDA_UVA_LIBS +CUDA_UVA_LDFLAGS +CUDA_UVA_CFLAGS +CUDA_guess_prog LLDB_PATH DBX_PATH IDB_PATH @@ -1032,6 +1044,7 @@ enable_pdeathsig enable_fork enable_loopback_memsync enable_throttle_poll +with_maxeps enable_force_generic_atomicops enable_force_os_atomicops enable_force_compiler_atomicops @@ -1069,8 +1082,9 @@ with_fh_cflags enable_ibv_rcv_thread enable_ibv_conn_thread with_ibv_spawner -enable_ibv_multirail with_ibv_max_hcas +enable_ibv_multirail +with_ibv_ports with_ibv_physmem_max enable_ibv_physmem_probe enable_ibv_srq @@ -1081,6 +1095,17 @@ enable_aries enable_aries_udreg with_aries_max_medium enable_aries_multi_domain +enable_ofi +with_ofi_home +with_ofi_cflags +with_ofi_libs +with_ofi_ldflags +with_ofi_spawner +with_ofi_provider +enable_ofi_thread_domain +enable_ofi_mr_scalable +with_ofi_num_completions +with_ofi_max_medium with_ssh_cmd with_ssh_options with_ssh_nodefile @@ -1102,7 +1127,20 @@ enable_backtrace_idb enable_backtrace_dbx enable_backtrace_lldb enable_backtrace_printstack +enable_memory_kinds +enable_kind_cuda_uva +with_cuda_home +with_cuda_cflags +with_cuda_libs +with_cuda_ldflags enable_plpa +enable_hwloc +with_hwloc_home +with_hwloc_cflags +with_hwloc_libs +with_hwloc_ldflags +enable_hwloc_utils +with_hwloc_utils_home enable_mmap enable_bug3480_workaround with_max_segsize @@ -1774,6 +1812,7 @@ Optional Features and Packages: --(en|dis)able-fork Allow internal use of fork() and related calls (default is to probe at configure time) --enable-loopback-memsync Force memory barriers for GASNet local (loopback) puts and gets --enable-throttle-poll throttle polling threads in multi-threaded configurations to reduce contention (experimental, only implemented in some conduits) + --with-maxeps= Maximum number of endpoints per-process, subject to per-conduit limits (default is conduit-dependent) --enable-force-generic-atomicops Force mutex-based atomic ops (default is platform specific) --enable-force-os-atomicops Force os-provided atomic ops (default is platform specific) --enable-force-compiler-atomicops Force compiler-provided atomic ops (default is platform specific) @@ -1823,8 +1862,9 @@ ibv-conduit options: (InfiniBand IB Verbs conduit (ibv)) --(en|dis)able-ibv-rcv-thread See ibv-conduit/README (enabled by default if pthreads available) --(en|dis)able-ibv-conn-thread See ibv-conduit/README (enabled by default if pthreads available) --with-ibv-spawner= ibv job spawner ("ssh", "mpi" or "pmi", default is mpi when available) - --enable-ibv-multirail Enable IBV over multiple HCAs, see ibv-conduit/README (disabled by default) - --with-ibv-max-hcas= maximum number of IBV HCAs to open for multi-rail support (default is 2) + --with-ibv-max-hcas= Maximum number of IBV HCAs to open (default is 1) + --(en|dis)able-ibv-multirail Enable IBV over multiple HCAs. Use of --with-ibv-max-hcas=N is prefered (see ibv-conduit/README for more info). + --with-ibv-ports= Default value of GASNET_IBV_PORTS environment variable (default is empty) --with-ibv-physmem-max= Maximum physical memory IBV may pin: less than 1.0 is fraction of apparent physical memory, larger than 1 is absolute size with optional M, G and T suffix (default is "2/3") --(en|dis)able-ibv-physmem-probe Force default enable/disable of GASNET_PHYSMEM_PROBE in ibv-conduit --disable-ibv-srq Disable Shared Receive Queue (SRQ) support in ibv-conduit (enabled by default) @@ -1835,9 +1875,22 @@ ibv-conduit options: (InfiniBand IB Verbs conduit (ibv)) aries-conduit options: (Cray XC Aries conduit (aries)) --(en|dis)able-aries Enable/disable the Cray XC Aries conduit (aries) (auto-detected) --(en|dis)able-aries-udreg Use Cray's UDREG to cache memory registration (enabled by default if available) - --with-aries-max-medium= specify gasnet_AMMaxMedium() (default 4032) + --with-aries-max-medium= specify default value of gasnet_AMMaxMedium() (default 4032) --enable-aries-multi-domain Use experimental multi-domain support in PAR builds +ofi-conduit options: (Portable OpenFabrics Interfaces conduit (ofi)) **EXPERIMENTAL** + --(en|dis)able-ofi Enable/disable the Portable OpenFabrics Interfaces conduit (ofi) + --with-ofi-home= OFI_HOME setting: Install prefix of OFI libfabric (auto-detected from PATH) + --with-ofi-cflags= OFI_CFLAGS setting + --with-ofi-libs= OFI_LIBS setting + --with-ofi-ldflags= OFI_LDFLAGS setting + --with-ofi-spawner= ofi job spawner ("ssh", "mpi" or "pmi", default is mpi when available) + --with-ofi-provider= Statically configure ofi-conduit for the given provider + --(en|dis)able-ofi-thread-domain Indicates if the conduit should use the FI_THREAD_DOMAIN threading model(advanced users only) + --(en|dis)able-ofi-mr-scalable Indicates if the conduit should statically compile FI_MR_SCALABLE support into ofi-conduit (advanced users only) + --with-ofi-num-completions= Max number of completions for ofi-conduit to read from a CQ at one time (default 64) + --with-ofi-max-medium= gasnet_AMMaxMedium() for the ofi-conduit (default 8192) + Job spawner options: --with-ssh-cmd= default value for GASNET_SSH_CMD environment variable (default "ssh") --with-ssh-options= default value for GASNET_SSH_OPTIONS environment variable (defaults to empty) @@ -1863,8 +1916,23 @@ Backtrace options: --(en|dis)able-backtrace-lldb support backtrace via lldb (auto-detected) --(en|dis)able-backtrace-printstack support backtrace via printstack (auto-detected) +Memory-kinds options: + --enable-memory-kinds Prototype support for memory kinds (transfers to/from device memory). Enables default auto-detection of all device type applicable to the target platform. Individual --enable-kind-* options have precedence. + --(en|dis)able-kind-cuda-uva Support for memory kinds on UVA-capable CUDA devices (auto-detected with --enable-memory-kinds, otherwise disabled) + --with-cuda-home= CUDA_HOME setting: Install prefix of CUDA toolkit (auto-detected from PATH) + --with-cuda-cflags= CUDA_CFLAGS setting + --with-cuda-libs= CUDA_LIBS setting + --with-cuda-ldflags= CUDA_LDFLAGS setting + Misc options: --(en|dis)able-plpa use PLPA for CPU binding (default is to probe) + --(en|dis)able-hwloc hwloc library (auto-detected) + --with-hwloc-home= HWLOC_HOME setting: Install prefix of hwloc (auto-detected from PATH) + --with-hwloc-cflags= HWLOC_CFLAGS setting + --with-hwloc-libs= HWLOC_LIBS setting + --with-hwloc-ldflags= HWLOC_LDFLAGS setting + --(en|dis)able-hwloc-utils hwloc utilities (auto-detected) + --with-hwloc-utils-home= HWLOC_UTILS_HOME setting: Install prefix of hwloc command-line utilities (auto-detected from PATH) --(en|dis)able-mmap force the use or non-use of mmap (default to probe) --enable-bug3480-workaround See Aries conduit README --with-max-segsize= Upper bound for GASNet segment utilization. Can be absolute size or fraction of physmem. Suffix denotes per-process or per-host limit (ex: "2GB/P", "0.85/H") @@ -1951,7 +2019,7 @@ fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -GASNet configure 2020.10.0 +GASNet configure 2021.3.0 generated by GNU Autoconf 2.69 Copyright (C) 2012 Free Software Foundation, Inc. @@ -2666,7 +2734,7 @@ cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by GASNet $as_me 2020.10.0, which was +It was created by GASNet $as_me 2021.3.0, which was generated by GNU Autoconf 2.69. Invocation command line was $ $0 $@ @@ -3677,7 +3745,7 @@ fi # Define the identity of the package. PACKAGE=GASNet - VERSION=2020.10.0 + VERSION=2021.3.0 # Some tools Automake needs. @@ -3778,10 +3846,10 @@ $as_echo_n "checking for package version... " >&6; } display_version_info="$display_version_info GASNet" - display_version_info="$display_version_info 2020.10.0" + display_version_info="$display_version_info 2021.3.0" if test -d "$srcdir/.git" ; then - git_describe=`${GIT=git} --git-dir="$srcdir/.git" describe 2> /dev/null` + git_describe=`( cd "$srcdir" && ${GIT=git} describe --long --dirty --always ) 2> /dev/null` if test -n "$git_describe"; then display_version_info="$display_version_info ($git_describe)" fi @@ -6039,11 +6107,11 @@ fi gasnet_toolsonly_mode=no cat >>confdefs.h <<_ACEOF -#define GASNET_RELEASE_VERSION_MAJOR 2020 +#define GASNET_RELEASE_VERSION_MAJOR 2021 _ACEOF cat >>confdefs.h <<_ACEOF -#define GASNET_RELEASE_VERSION_MINOR 10 +#define GASNET_RELEASE_VERSION_MINOR 3 _ACEOF cat >>confdefs.h <<_ACEOF @@ -6055,7 +6123,7 @@ cat >>confdefs.h <<_ACEOF _ACEOF cat >>confdefs.h <<_ACEOF -#define GASNETI_EX_SPEC_VERSION_MINOR 10 +#define GASNETI_EX_SPEC_VERSION_MINOR 13 _ACEOF cat >>confdefs.h <<_ACEOF @@ -6071,11 +6139,11 @@ cat >>confdefs.h <<_ACEOF _ACEOF cat >>confdefs.h <<_ACEOF -#define GASNETI_TOOLS_SPEC_VERSION_MINOR 16 +#define GASNETI_TOOLS_SPEC_VERSION_MINOR 17 _ACEOF cat >>confdefs.h <<_ACEOF -#define GASNETI_RELEASE_VERSION 2020.10.0 +#define GASNETI_RELEASE_VERSION 2021.3.0 _ACEOF @@ -13162,16 +13230,6 @@ GASNETI_PTR_BITS=`expr $SIZEOF_VOID_P \* 8` # XXX: Incomplete... -case "$enable_arch_altix" in - '' | no) : - - ;; - *) : - force_arch_altix=yes - ;; -esac - - case "$enable_arch_ibmpe" in '' | no) : @@ -23242,15 +23300,144 @@ as_fn_error $? "See $CONFIG_FILE for details." "$LINENO" 5 ;; - ia64) + esac + case "$target_os" in + darwin*) + + + + +# vvvvvvvvvvvvvvvvvvvvvv GASNET_TRY_CFLAG(${misc_flag_prefix}-Wno-long-double) vvvvvvvvvvvvvvvvvvvvvv (L:1) + + + + +# vvvvvvvvvvvvvvvvvvvvvv GASNET_PUSHVAR(CFLAGS,"$CFLAGS ${misc_flag_prefix}-Wno-long-double") vvvvvvvvvvvvvvvvvvvvvv (L:2) + + if test "$_pushcnt_CFLAGS" = "" ; then + _pushcnt_CFLAGS=0 + fi + if test "$_total_pushcnt" = "" ; then + _total_pushcnt=0 + fi + if test "${CFLAGS+set}" = set; then + _gasnet_pushvar_isset=1 + else + _gasnet_pushvar_isset=0 + fi + eval _pushedvar_CFLAGS_$_pushcnt_CFLAGS=\$CFLAGS + eval _pushedvarset_CFLAGS_$_pushcnt_CFLAGS=$_gasnet_pushvar_isset + _pushcnt_CFLAGS=`expr $_pushcnt_CFLAGS + 1` + _total_pushcnt=`expr $_total_pushcnt + 1` + CFLAGS="$CFLAGS ${misc_flag_prefix}-Wno-long-double" + echo "pushed new CFLAGS value: $CFLAGS" >&5 + +# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_PUSHVAR(CFLAGS,"$CFLAGS ${misc_flag_prefix}-Wno-long-double") ^^^^^^^^^^^^^^^^^^^^^^ (L:2) + + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler flag ${misc_flag_prefix}-Wno-long-double" >&5 +$as_echo_n "checking for C compiler flag ${misc_flag_prefix}-Wno-long-double... " >&6; } + + + +# vvvvvvvvvvvvvvvvvvvvvv GASNET_TRY_CCOMPILE_WITHWARN_NORETRY(...) vvvvvvvvvvvvvvvvvvvvvv (L:2) + + gasnet_compile_cmd="${CC-cc} -c $CFLAGS $CPPFLAGS conftest.c" + cat > conftest.c <&5 + ( $gasnet_compile_cmd ) > conftest-runcmdout 2> conftest-runcmderr + gasnet_cmd_result="$?" + gasnet_cmd_stdout="`cat conftest-runcmdout`" + gasnet_cmd_stderr="`cat conftest-runcmderr`" + cat conftest-runcmdout >&5 + cat conftest-runcmderr >&5 + echo gasnet_cmd_result=$gasnet_cmd_result >&5 + rm -rf conftest-runcmdout conftest-runcmderr + if test "$gasnet_cmd_result" = "0" ; then + if test -z "$gasnet_cmd_stdout$gasnet_cmd_stderr" ; then + : + + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + + + +# vvvvvvvvvvvvvvvvvvvvvv GASNET_POPVAR(CFLAGS) vvvvvvvvvvvvvvvvvvvvvv (L:4) + + if test "$_pushcnt_CFLAGS" -ge "1"; then + _pushcnt_CFLAGS=`expr $_pushcnt_CFLAGS - 1` + _total_pushcnt=`expr $_total_pushcnt - 1` + eval _gasnet_pushvar_isset=\$_pushedvarset_CFLAGS_$_pushcnt_CFLAGS + if test "$_gasnet_pushvar_isset" = "1" ; then + eval CFLAGS=\$_pushedvar_CFLAGS_$_pushcnt_CFLAGS + echo "popping CFLAGS back to: $CFLAGS" >&5 + else + unset CFLAGS + echo "popping CFLAGS back to: " >&5 + fi + else + + +echo +echo "configure error: INTERNAL ERROR: GASNET_PUSH/POPVAR underflow on CFLAGS" +if test "" ; then +if test -f "conftest.$ac_ext" ; then + errfile=conftest.$ac_ext +else + errfile=gasnet_errsave_file +fi +if test -f "$errfile" ; then + echo + echo " --- Failed program --- " + cat $errfile + echo " -----------------------" +fi +fi +if test -f "conftest.err" ; then + errfile=conftest.err +else + errfile=gasnet_errsave_err +fi +if test -f "$errfile" ; then + echo + echo "Compilation error: " + echo + cat $errfile +fi +echo +CONFIG_FILE=`pwd`/config.log +as_fn_error $? "See $CONFIG_FILE for details." "$LINENO" 5 + + + fi + +# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_POPVAR(CFLAGS) ^^^^^^^^^^^^^^^^^^^^^^ (L:4) + -# vvvvvvvvvvvvvvvvvvvvvv GASNET_TRY_CFLAG(-mtls-size=64) vvvvvvvvvvvvvvvvvvvvvv (L:1) +# vvvvvvvvvvvvvvvvvvvvvv GASNET_TRY_CFLAG(${misc_flag_prefix}-Wlong-double) vvvvvvvvvvvvvvvvvvvvvv (L:1) + -# vvvvvvvvvvvvvvvvvvvvvv GASNET_PUSHVAR(CFLAGS,"$CFLAGS -mtls-size=64") vvvvvvvvvvvvvvvvvvvvvv (L:2) + + +# vvvvvvvvvvvvvvvvvvvvvv GASNET_PUSHVAR(CFLAGS,"$CFLAGS ${misc_flag_prefix}-Wlong-double") vvvvvvvvvvvvvvvvvvvvvv (L:2) if test "$_pushcnt_CFLAGS" = "" ; then _pushcnt_CFLAGS=0 @@ -23267,15 +23454,15 @@ as_fn_error $? "See $CONFIG_FILE for details." "$LINENO" 5 eval _pushedvarset_CFLAGS_$_pushcnt_CFLAGS=$_gasnet_pushvar_isset _pushcnt_CFLAGS=`expr $_pushcnt_CFLAGS + 1` _total_pushcnt=`expr $_total_pushcnt + 1` - CFLAGS="$CFLAGS -mtls-size=64" + CFLAGS="$CFLAGS ${misc_flag_prefix}-Wlong-double" echo "pushed new CFLAGS value: $CFLAGS" >&5 -# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_PUSHVAR(CFLAGS,"$CFLAGS -mtls-size=64") ^^^^^^^^^^^^^^^^^^^^^^ (L:2) +# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_PUSHVAR(CFLAGS,"$CFLAGS ${misc_flag_prefix}-Wlong-double") ^^^^^^^^^^^^^^^^^^^^^^ (L:2) -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler flag -mtls-size=64" >&5 -$as_echo_n "checking for C compiler flag -mtls-size=64... " >&6; } +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler flag ${misc_flag_prefix}-Wlong-double" >&5 +$as_echo_n "checking for C compiler flag ${misc_flag_prefix}-Wlong-double... " >&6; } @@ -23367,7 +23554,7 @@ as_fn_error $? "See $CONFIG_FILE for details." "$LINENO" 5 - MISC_CFLAGS="$MISC_CFLAGS -mtls-size=64" + MISC_CFLAGS="$MISC_CFLAGS ${misc_flag_prefix}-Wno-long-double" else : @@ -23478,7 +23665,7 @@ $as_echo "no/warning: $_GASNET_TRY_CFLAG_TMP" >&6; } if test "$gasnet_cmd_stdout$gasnet_cmd_stderr" = "$_GASNET_TRY_CFLAG_TMP" ; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes/persistent-warning: $_GASNET_TRY_CFLAG_TMP" >&5 $as_echo "yes/persistent-warning: $_GASNET_TRY_CFLAG_TMP" >&6; } - MISC_CFLAGS="$MISC_CFLAGS -mtls-size=64" + MISC_CFLAGS="$MISC_CFLAGS ${misc_flag_prefix}-Wno-long-double" else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no/new-warning: $_GASNET_TRY_CFLAG_TMP" >&5 $as_echo "no/new-warning: $_GASNET_TRY_CFLAG_TMP" >&6; } @@ -23622,83 +23809,18 @@ as_fn_error $? "See $CONFIG_FILE for details." "$LINENO" 5 -# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_TRY_CFLAG(-mtls-size=64) ^^^^^^^^^^^^^^^^^^^^^^ (L:1) - - -;; - esac - case "$target_os" in - darwin*) - - - - -# vvvvvvvvvvvvvvvvvvvvvv GASNET_TRY_CFLAG(${misc_flag_prefix}-Wno-long-double) vvvvvvvvvvvvvvvvvvvvvv (L:1) - - - - -# vvvvvvvvvvvvvvvvvvvvvv GASNET_PUSHVAR(CFLAGS,"$CFLAGS ${misc_flag_prefix}-Wno-long-double") vvvvvvvvvvvvvvvvvvvvvv (L:2) - - if test "$_pushcnt_CFLAGS" = "" ; then - _pushcnt_CFLAGS=0 - fi - if test "$_total_pushcnt" = "" ; then - _total_pushcnt=0 - fi - if test "${CFLAGS+set}" = set; then - _gasnet_pushvar_isset=1 - else - _gasnet_pushvar_isset=0 - fi - eval _pushedvar_CFLAGS_$_pushcnt_CFLAGS=\$CFLAGS - eval _pushedvarset_CFLAGS_$_pushcnt_CFLAGS=$_gasnet_pushvar_isset - _pushcnt_CFLAGS=`expr $_pushcnt_CFLAGS + 1` - _total_pushcnt=`expr $_total_pushcnt + 1` - CFLAGS="$CFLAGS ${misc_flag_prefix}-Wno-long-double" - echo "pushed new CFLAGS value: $CFLAGS" >&5 - -# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_PUSHVAR(CFLAGS,"$CFLAGS ${misc_flag_prefix}-Wno-long-double") ^^^^^^^^^^^^^^^^^^^^^^ (L:2) - - - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler flag ${misc_flag_prefix}-Wno-long-double" >&5 -$as_echo_n "checking for C compiler flag ${misc_flag_prefix}-Wno-long-double... " >&6; } - - - -# vvvvvvvvvvvvvvvvvvvvvv GASNET_TRY_CCOMPILE_WITHWARN_NORETRY(...) vvvvvvvvvvvvvvvvvvvvvv (L:2) - - gasnet_compile_cmd="${CC-cc} -c $CFLAGS $CPPFLAGS conftest.c" - cat > conftest.c <&5 - ( $gasnet_compile_cmd ) > conftest-runcmdout 2> conftest-runcmderr - gasnet_cmd_result="$?" - gasnet_cmd_stdout="`cat conftest-runcmdout`" - gasnet_cmd_stderr="`cat conftest-runcmderr`" - cat conftest-runcmdout >&5 - cat conftest-runcmderr >&5 - echo gasnet_cmd_result=$gasnet_cmd_result >&5 - rm -rf conftest-runcmdout conftest-runcmderr - if test "$gasnet_cmd_result" = "0" ; then - if test -z "$gasnet_cmd_stdout$gasnet_cmd_stderr" ; then + else : - { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 -$as_echo "yes" >&6; } + echo "configure: warned program was:" >&5 + cat $gasnet_testfile >&5 + + _GASNET_TRY_CFLAG_TMP="$gasnet_cmd_stdout$gasnet_cmd_stderr" @@ -23755,500 +23877,52 @@ as_fn_error $? "See $CONFIG_FILE for details." "$LINENO" 5 - - - -# vvvvvvvvvvvvvvvvvvvvvv GASNET_TRY_CFLAG(${misc_flag_prefix}-Wlong-double) vvvvvvvvvvvvvvvvvvvvvv (L:1) - - - - -# vvvvvvvvvvvvvvvvvvvvvv GASNET_PUSHVAR(CFLAGS,"$CFLAGS ${misc_flag_prefix}-Wlong-double") vvvvvvvvvvvvvvvvvvvvvv (L:2) - - if test "$_pushcnt_CFLAGS" = "" ; then - _pushcnt_CFLAGS=0 - fi - if test "$_total_pushcnt" = "" ; then - _total_pushcnt=0 - fi - if test "${CFLAGS+set}" = set; then - _gasnet_pushvar_isset=1 - else - _gasnet_pushvar_isset=0 - fi - eval _pushedvar_CFLAGS_$_pushcnt_CFLAGS=\$CFLAGS - eval _pushedvarset_CFLAGS_$_pushcnt_CFLAGS=$_gasnet_pushvar_isset - _pushcnt_CFLAGS=`expr $_pushcnt_CFLAGS + 1` - _total_pushcnt=`expr $_total_pushcnt + 1` - CFLAGS="$CFLAGS ${misc_flag_prefix}-Wlong-double" - echo "pushed new CFLAGS value: $CFLAGS" >&5 - -# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_PUSHVAR(CFLAGS,"$CFLAGS ${misc_flag_prefix}-Wlong-double") ^^^^^^^^^^^^^^^^^^^^^^ (L:2) - - - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler flag ${misc_flag_prefix}-Wlong-double" >&5 -$as_echo_n "checking for C compiler flag ${misc_flag_prefix}-Wlong-double... " >&6; } - - - -# vvvvvvvvvvvvvvvvvvvvvv GASNET_TRY_CCOMPILE_WITHWARN_NORETRY(...) vvvvvvvvvvvvvvvvvvvvvv (L:2) - - gasnet_compile_cmd="${CC-cc} -c $CFLAGS $CPPFLAGS conftest.c" - cat > conftest.c <&5 - ( $gasnet_compile_cmd ) > conftest-runcmdout 2> conftest-runcmderr - gasnet_cmd_result="$?" - gasnet_cmd_stdout="`cat conftest-runcmdout`" - gasnet_cmd_stderr="`cat conftest-runcmderr`" - cat conftest-runcmdout >&5 - cat conftest-runcmderr >&5 - echo gasnet_cmd_result=$gasnet_cmd_result >&5 - rm -rf conftest-runcmdout conftest-runcmderr - if test "$gasnet_cmd_result" = "0" ; then - if test -z "$gasnet_cmd_stdout$gasnet_cmd_stderr" ; then - : - - { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 -$as_echo "yes" >&6; } - - - -# vvvvvvvvvvvvvvvvvvvvvv GASNET_POPVAR(CFLAGS) vvvvvvvvvvvvvvvvvvvvvv (L:4) - - if test "$_pushcnt_CFLAGS" -ge "1"; then - _pushcnt_CFLAGS=`expr $_pushcnt_CFLAGS - 1` - _total_pushcnt=`expr $_total_pushcnt - 1` - eval _gasnet_pushvar_isset=\$_pushedvarset_CFLAGS_$_pushcnt_CFLAGS - if test "$_gasnet_pushvar_isset" = "1" ; then - eval CFLAGS=\$_pushedvar_CFLAGS_$_pushcnt_CFLAGS - echo "popping CFLAGS back to: $CFLAGS" >&5 - else - unset CFLAGS - echo "popping CFLAGS back to: " >&5 - fi - else - - -echo -echo "configure error: INTERNAL ERROR: GASNET_PUSH/POPVAR underflow on CFLAGS" -if test "" ; then -if test -f "conftest.$ac_ext" ; then - errfile=conftest.$ac_ext -else - errfile=gasnet_errsave_file -fi -if test -f "$errfile" ; then - echo - echo " --- Failed program --- " - cat $errfile - echo " -----------------------" -fi -fi -if test -f "conftest.err" ; then - errfile=conftest.err -else - errfile=gasnet_errsave_err -fi -if test -f "$errfile" ; then - echo - echo "Compilation error: " - echo - cat $errfile -fi -echo -CONFIG_FILE=`pwd`/config.log -as_fn_error $? "See $CONFIG_FILE for details." "$LINENO" 5 - - - fi - -# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_POPVAR(CFLAGS) ^^^^^^^^^^^^^^^^^^^^^^ (L:4) - - - - MISC_CFLAGS="$MISC_CFLAGS ${misc_flag_prefix}-Wno-long-double" - - else - : - - echo "configure: warned program was:" >&5 - cat $gasnet_testfile >&5 - - _GASNET_TRY_CFLAG_TMP="$gasnet_cmd_stdout$gasnet_cmd_stderr" - - - -# vvvvvvvvvvvvvvvvvvvvvv GASNET_POPVAR(CFLAGS) vvvvvvvvvvvvvvvvvvvvvv (L:4) - - if test "$_pushcnt_CFLAGS" -ge "1"; then - _pushcnt_CFLAGS=`expr $_pushcnt_CFLAGS - 1` - _total_pushcnt=`expr $_total_pushcnt - 1` - eval _gasnet_pushvar_isset=\$_pushedvarset_CFLAGS_$_pushcnt_CFLAGS - if test "$_gasnet_pushvar_isset" = "1" ; then - eval CFLAGS=\$_pushedvar_CFLAGS_$_pushcnt_CFLAGS - echo "popping CFLAGS back to: $CFLAGS" >&5 - else - unset CFLAGS - echo "popping CFLAGS back to: " >&5 - fi - else - - -echo -echo "configure error: INTERNAL ERROR: GASNET_PUSH/POPVAR underflow on CFLAGS" -if test "" ; then -if test -f "conftest.$ac_ext" ; then - errfile=conftest.$ac_ext -else - errfile=gasnet_errsave_file -fi -if test -f "$errfile" ; then - echo - echo " --- Failed program --- " - cat $errfile - echo " -----------------------" -fi -fi -if test -f "conftest.err" ; then - errfile=conftest.err -else - errfile=gasnet_errsave_err -fi -if test -f "$errfile" ; then - echo - echo "Compilation error: " - echo - cat $errfile -fi -echo -CONFIG_FILE=`pwd`/config.log -as_fn_error $? "See $CONFIG_FILE for details." "$LINENO" 5 - - - fi - -# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_POPVAR(CFLAGS) ^^^^^^^^^^^^^^^^^^^^^^ (L:4) - - - - - - -# vvvvvvvvvvvvvvvvvvvvvv GASNET_TRY_CCOMPILE_WITHWARN_NORETRY(...) vvvvvvvvvvvvvvvvvvvvvv (L:4) - - gasnet_compile_cmd="${CC-cc} -c $CFLAGS $CPPFLAGS conftest.c" - cat > conftest.c <&5 - ( $gasnet_compile_cmd ) > conftest-runcmdout 2> conftest-runcmderr - gasnet_cmd_result="$?" - gasnet_cmd_stdout="`cat conftest-runcmdout`" - gasnet_cmd_stderr="`cat conftest-runcmderr`" - cat conftest-runcmdout >&5 - cat conftest-runcmderr >&5 - echo gasnet_cmd_result=$gasnet_cmd_result >&5 - rm -rf conftest-runcmdout conftest-runcmderr - if test "$gasnet_cmd_result" = "0" ; then - if test -z "$gasnet_cmd_stdout$gasnet_cmd_stderr" ; then - : - - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no/warning: $_GASNET_TRY_CFLAG_TMP" >&5 -$as_echo "no/warning: $_GASNET_TRY_CFLAG_TMP" >&6; } - - - else - : - - echo "configure: warned program was:" >&5 - cat $gasnet_testfile >&5 - - if test "$gasnet_cmd_stdout$gasnet_cmd_stderr" = "$_GASNET_TRY_CFLAG_TMP" ; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes/persistent-warning: $_GASNET_TRY_CFLAG_TMP" >&5 -$as_echo "yes/persistent-warning: $_GASNET_TRY_CFLAG_TMP" >&6; } - MISC_CFLAGS="$MISC_CFLAGS ${misc_flag_prefix}-Wno-long-double" - else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no/new-warning: $_GASNET_TRY_CFLAG_TMP" >&5 -$as_echo "no/new-warning: $_GASNET_TRY_CFLAG_TMP" >&6; } - - fi - - - fi - else - : - - echo "configure: failed program was:" >&5 - cat $gasnet_testfile >&5 - - -echo -echo "configure error: unknown failure case in TRY_CFLAG" -if test "" ; then -if test -f "conftest.$ac_ext" ; then - errfile=conftest.$ac_ext -else - errfile=gasnet_errsave_file -fi -if test -f "$errfile" ; then - echo - echo " --- Failed program --- " - cat $errfile - echo " -----------------------" -fi -fi -if test -f "conftest.err" ; then - errfile=conftest.err -else - errfile=gasnet_errsave_err -fi -if test -f "$errfile" ; then - echo - echo "Compilation error: " - echo - cat $errfile -fi -echo -CONFIG_FILE=`pwd`/config.log -as_fn_error $? "See $CONFIG_FILE for details." "$LINENO" 5 - - - - - fi - -# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_TRY_RUNCMD($gasnet_compile_cmd,...) ^^^^^^^^^^^^^^^^^^^^^^ (L:5) - - - - rm -f $gasnet_testfile - -# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_TRY_CCOMPILE_WITHWARN_NORETRY(...) ^^^^^^^^^^^^^^^^^^^^^^ (L:4) - - - - - - fi - else - : - - echo "configure: failed program was:" >&5 - cat $gasnet_testfile >&5 - - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no/error: $gasnet_cmd_stdout$gasnet_cmd_stderr" >&5 -$as_echo "no/error: $gasnet_cmd_stdout$gasnet_cmd_stderr" >&6; } - - - -# vvvvvvvvvvvvvvvvvvvvvv GASNET_POPVAR(CFLAGS) vvvvvvvvvvvvvvvvvvvvvv (L:4) - - if test "$_pushcnt_CFLAGS" -ge "1"; then - _pushcnt_CFLAGS=`expr $_pushcnt_CFLAGS - 1` - _total_pushcnt=`expr $_total_pushcnt - 1` - eval _gasnet_pushvar_isset=\$_pushedvarset_CFLAGS_$_pushcnt_CFLAGS - if test "$_gasnet_pushvar_isset" = "1" ; then - eval CFLAGS=\$_pushedvar_CFLAGS_$_pushcnt_CFLAGS - echo "popping CFLAGS back to: $CFLAGS" >&5 - else - unset CFLAGS - echo "popping CFLAGS back to: " >&5 - fi - else - - -echo -echo "configure error: INTERNAL ERROR: GASNET_PUSH/POPVAR underflow on CFLAGS" -if test "" ; then -if test -f "conftest.$ac_ext" ; then - errfile=conftest.$ac_ext -else - errfile=gasnet_errsave_file -fi -if test -f "$errfile" ; then - echo - echo " --- Failed program --- " - cat $errfile - echo " -----------------------" -fi -fi -if test -f "conftest.err" ; then - errfile=conftest.err -else - errfile=gasnet_errsave_err -fi -if test -f "$errfile" ; then - echo - echo "Compilation error: " - echo - cat $errfile -fi -echo -CONFIG_FILE=`pwd`/config.log -as_fn_error $? "See $CONFIG_FILE for details." "$LINENO" 5 - - - fi - -# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_POPVAR(CFLAGS) ^^^^^^^^^^^^^^^^^^^^^^ (L:4) - - - - - - - fi - -# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_TRY_RUNCMD($gasnet_compile_cmd,...) ^^^^^^^^^^^^^^^^^^^^^^ (L:3) - - - - rm -f $gasnet_testfile - -# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_TRY_CCOMPILE_WITHWARN_NORETRY(...) ^^^^^^^^^^^^^^^^^^^^^^ (L:2) - - - - -# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_TRY_CFLAG(${misc_flag_prefix}-Wlong-double) ^^^^^^^^^^^^^^^^^^^^^^ (L:1) - - - - - else - : - - echo "configure: warned program was:" >&5 - cat $gasnet_testfile >&5 - - _GASNET_TRY_CFLAG_TMP="$gasnet_cmd_stdout$gasnet_cmd_stderr" - - - -# vvvvvvvvvvvvvvvvvvvvvv GASNET_POPVAR(CFLAGS) vvvvvvvvvvvvvvvvvvvvvv (L:4) - - if test "$_pushcnt_CFLAGS" -ge "1"; then - _pushcnt_CFLAGS=`expr $_pushcnt_CFLAGS - 1` - _total_pushcnt=`expr $_total_pushcnt - 1` - eval _gasnet_pushvar_isset=\$_pushedvarset_CFLAGS_$_pushcnt_CFLAGS - if test "$_gasnet_pushvar_isset" = "1" ; then - eval CFLAGS=\$_pushedvar_CFLAGS_$_pushcnt_CFLAGS - echo "popping CFLAGS back to: $CFLAGS" >&5 - else - unset CFLAGS - echo "popping CFLAGS back to: " >&5 - fi - else - - -echo -echo "configure error: INTERNAL ERROR: GASNET_PUSH/POPVAR underflow on CFLAGS" -if test "" ; then -if test -f "conftest.$ac_ext" ; then - errfile=conftest.$ac_ext -else - errfile=gasnet_errsave_file -fi -if test -f "$errfile" ; then - echo - echo " --- Failed program --- " - cat $errfile - echo " -----------------------" -fi -fi -if test -f "conftest.err" ; then - errfile=conftest.err -else - errfile=gasnet_errsave_err -fi -if test -f "$errfile" ; then - echo - echo "Compilation error: " - echo - cat $errfile -fi -echo -CONFIG_FILE=`pwd`/config.log -as_fn_error $? "See $CONFIG_FILE for details." "$LINENO" 5 - - - fi - -# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_POPVAR(CFLAGS) ^^^^^^^^^^^^^^^^^^^^^^ (L:4) - - - - - - -# vvvvvvvvvvvvvvvvvvvvvv GASNET_TRY_CCOMPILE_WITHWARN_NORETRY(...) vvvvvvvvvvvvvvvvvvvvvv (L:4) - - gasnet_compile_cmd="${CC-cc} -c $CFLAGS $CPPFLAGS conftest.c" - cat > conftest.c <&5 - ( $gasnet_compile_cmd ) > conftest-runcmdout 2> conftest-runcmderr - gasnet_cmd_result="$?" - gasnet_cmd_stdout="`cat conftest-runcmdout`" - gasnet_cmd_stderr="`cat conftest-runcmderr`" - cat conftest-runcmdout >&5 - cat conftest-runcmderr >&5 - echo gasnet_cmd_result=$gasnet_cmd_result >&5 - rm -rf conftest-runcmdout conftest-runcmderr - if test "$gasnet_cmd_result" = "0" ; then - if test -z "$gasnet_cmd_stdout$gasnet_cmd_stderr" ; then - : - - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no/warning: $_GASNET_TRY_CFLAG_TMP" >&5 -$as_echo "no/warning: $_GASNET_TRY_CFLAG_TMP" >&6; } - - - else - : - - echo "configure: warned program was:" >&5 - cat $gasnet_testfile >&5 - - if test "$gasnet_cmd_stdout$gasnet_cmd_stderr" = "$_GASNET_TRY_CFLAG_TMP" ; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes/persistent-warning: $_GASNET_TRY_CFLAG_TMP" >&5 -$as_echo "yes/persistent-warning: $_GASNET_TRY_CFLAG_TMP" >&6; } + + + +# vvvvvvvvvvvvvvvvvvvvvv GASNET_TRY_CCOMPILE_WITHWARN_NORETRY(...) vvvvvvvvvvvvvvvvvvvvvv (L:4) + + gasnet_compile_cmd="${CC-cc} -c $CFLAGS $CPPFLAGS conftest.c" + cat > conftest.c <&5 + ( $gasnet_compile_cmd ) > conftest-runcmdout 2> conftest-runcmderr + gasnet_cmd_result="$?" + gasnet_cmd_stdout="`cat conftest-runcmdout`" + gasnet_cmd_stderr="`cat conftest-runcmderr`" + cat conftest-runcmdout >&5 + cat conftest-runcmderr >&5 + echo gasnet_cmd_result=$gasnet_cmd_result >&5 + rm -rf conftest-runcmdout conftest-runcmderr + if test "$gasnet_cmd_result" = "0" ; then + if test -z "$gasnet_cmd_stdout$gasnet_cmd_stderr" ; then + : + + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no/warning: $_GASNET_TRY_CFLAG_TMP" >&5 +$as_echo "no/warning: $_GASNET_TRY_CFLAG_TMP" >&6; } + + + else + : + + echo "configure: warned program was:" >&5 + cat $gasnet_testfile >&5 + + if test "$gasnet_cmd_stdout$gasnet_cmd_stderr" = "$_GASNET_TRY_CFLAG_TMP" ; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes/persistent-warning: $_GASNET_TRY_CFLAG_TMP" >&5 +$as_echo "yes/persistent-warning: $_GASNET_TRY_CFLAG_TMP" >&6; } @@ -98922,7 +98596,6 @@ case "$target_cpu" in # when chip revs differ, we should err on the larger size aarch64) cache_line_guess=64 ;; rs6000) cache_line_guess=32 ;; # https://www.csee.umbc.edu/portal/help/architecture/rs6000_arch.ps powerpc*) cache_line_guess=128 ;; # https://www.7-cpu.com/cpu/Power8.html - ia64) cache_line_guess=128 ;; # https://www.7-cpu.com/cpu/Itanium2.html sparc) cache_line_guess=128 ;; # https://en.wikipedia.org/wiki/SPARC64_V *) cache_line_guess=128 ;; # the default when we have no other info esac @@ -101620,153 +101293,6 @@ fi fi -## Do toolchain and CPU support ia64 "cmp8xchg16"? - - - -# vvvvvvvvvvvvvvvvvvvvvv GASNET_PUSHVAR(CPPFLAGS,"$CPPFLAGS -I$TOP_SRCDIR/other") vvvvvvvvvvvvvvvvvvvvvv (L:2) - - if test "$_pushcnt_CPPFLAGS" = "" ; then - _pushcnt_CPPFLAGS=0 - fi - if test "$_total_pushcnt" = "" ; then - _total_pushcnt=0 - fi - if test "${CPPFLAGS+set}" = set; then - _gasnet_pushvar_isset=1 - else - _gasnet_pushvar_isset=0 - fi - eval _pushedvar_CPPFLAGS_$_pushcnt_CPPFLAGS=\$CPPFLAGS - eval _pushedvarset_CPPFLAGS_$_pushcnt_CPPFLAGS=$_gasnet_pushvar_isset - _pushcnt_CPPFLAGS=`expr $_pushcnt_CPPFLAGS + 1` - _total_pushcnt=`expr $_total_pushcnt + 1` - CPPFLAGS="$CPPFLAGS -I$TOP_SRCDIR/other" - echo "pushed new CPPFLAGS value: $CPPFLAGS" >&5 - -# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_PUSHVAR(CPPFLAGS,"$CPPFLAGS -I$TOP_SRCDIR/other") ^^^^^^^^^^^^^^^^^^^^^^ (L:2) - - - { $as_echo "$as_me:${as_lineno-$LINENO}: checking for ia64 cmp8xchg16 support" >&5 -$as_echo_n "checking for ia64 cmp8xchg16 support... " >&6; } -if test "$cross_compiling" = yes; then : - : # For now we assume "NO" when cross compiling - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } -else - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - - #include "gasnet_portable_platform.h" /* For PLATFORM_{ARCH,COMPILER}_* */ - - #if !PLATFORM_ARCH_IA64 - #error wrong CPU architecture - #elif PLATFORM_COMPILER_INTEL - #include - int cmp8xchg16(void volatile *ptr, unsigned long oldlo, unsigned long newlo, unsigned long newhi) { - return oldlo == _InterlockedCompare64Exchange128_acq(ptr, newhi, newlo, oldlo); - } - #elif PLATFORM_COMPILER_GNU - int cmp8xchg16(void volatile *ptr, unsigned long oldlo, unsigned long newlo, unsigned long newhi) { - register unsigned long tmp = newlo; - __asm__ __volatile__ ( - "mov ar.ccv=%1 \n\t" - "mov ar.csd=%2;; \n\t" - "cmp8xchg16.acq %0=[%3],%0,ar.csd,ar.ccv \n" - : "+r"(tmp) : "rO"(oldlo), "rO"(newhi), "r"(ptr) : "memory" ); - return tmp == oldlo; - } - #else - #error compiler is unsupported - #endif - - int main(void) { - unsigned long *array[3]; - unsigned long *p = (void *)(((unsigned long)array + 15) & ~(unsigned long)15); - int swapped; - - p[0] = 1234; - p[1] = 0; - - swapped = cmp8xchg16(p, 1234, 5678, 0xabcdef); - - /* Want SUCCESS and changed X */ - return (!swapped || (p[0] != 5678) || (p[1] != 0xabcdef)); - } - -_ACEOF -if ac_fn_c_try_run "$LINENO"; then : - $as_echo "#define GASNETI_HAVE_IA64_CMP8XCHG16 1" >>confdefs.h - - { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 -$as_echo "yes" >&6; } - -else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } - -fi -rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ - conftest.$ac_objext conftest.beam conftest.$ac_ext -fi - - - - -# vvvvvvvvvvvvvvvvvvvvvv GASNET_POPVAR(CPPFLAGS) vvvvvvvvvvvvvvvvvvvvvv (L:2) - - if test "$_pushcnt_CPPFLAGS" -ge "1"; then - _pushcnt_CPPFLAGS=`expr $_pushcnt_CPPFLAGS - 1` - _total_pushcnt=`expr $_total_pushcnt - 1` - eval _gasnet_pushvar_isset=\$_pushedvarset_CPPFLAGS_$_pushcnt_CPPFLAGS - if test "$_gasnet_pushvar_isset" = "1" ; then - eval CPPFLAGS=\$_pushedvar_CPPFLAGS_$_pushcnt_CPPFLAGS - echo "popping CPPFLAGS back to: $CPPFLAGS" >&5 - else - unset CPPFLAGS - echo "popping CPPFLAGS back to: " >&5 - fi - else - - -echo -echo "configure error: INTERNAL ERROR: GASNET_PUSH/POPVAR underflow on CPPFLAGS" -if test "" ; then -if test -f "conftest.$ac_ext" ; then - errfile=conftest.$ac_ext -else - errfile=gasnet_errsave_file -fi -if test -f "$errfile" ; then - echo - echo " --- Failed program --- " - cat $errfile - echo " -----------------------" -fi -fi -if test -f "conftest.err" ; then - errfile=conftest.err -else - errfile=gasnet_errsave_err -fi -if test -f "$errfile" ; then - echo - echo "Compilation error: " - echo - cat $errfile -fi -echo -CONFIG_FILE=`pwd`/config.log -as_fn_error $? "See $CONFIG_FILE for details." "$LINENO" 5 - - - fi - -# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_POPVAR(CPPFLAGS) ^^^^^^^^^^^^^^^^^^^^^^ (L:2) - - - - ## Do CC/gas and CPU support "cmpxchg16b"? if test "$cross_compiling" = "yes" ; then @@ -103194,6 +102720,50 @@ esac + + + +# vvvvvvvvvvvvvvvvvvvvvv GASNET_WITH(maxeps,...) vvvvvvvvvvvvvvvvvvvvvv (L:2) + + + +# Check whether --with-maxeps was given. +if test "${with_maxeps+set}" = set; then : + withval=$with_maxeps; +fi + + + + + if test "${with_maxeps+set}" = set; then : + withval=$with_maxeps; + case "$withval" in + no) : + as_fn_error $? "--with-maxeps requires a positive integer argument" "$LINENO" 5 + ;; + *) : + if expr "${withval}" : "[1-9][0-9]*" >/dev/null; then + cat >>confdefs.h <<_ACEOF +#define GASNETI_MAXEPS_CONFIGURE $withval +_ACEOF + + else + as_fn_error $? "--with-maxeps requires a positive integer argument" "$LINENO" 5 + fi + ;; + esac + else + : + + fi + + + +# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_WITH(maxeps,...) ^^^^^^^^^^^^^^^^^^^^^^ (L:2) + + + + # FORCED non-default implementations - used mainly for debugging # # NOTE: If adding an "--enable-forced-*" option here, please: @@ -110875,15 +110445,46 @@ as_fn_error $? "See $CONFIG_FILE for details." "$LINENO" 5 ;; - ia64) + esac + # set CXXDEBUGFLAGS + + + +# vvvvvvvvvvvvvvvvvvvvvv GASNET_PUSHVAR(CXXFLAGS,"${gxx_flag_prefix}-g3") vvvvvvvvvvvvvvvvvvvvvv (L:2) + + if test "$_pushcnt_CXXFLAGS" = "" ; then + _pushcnt_CXXFLAGS=0 + fi + if test "$_total_pushcnt" = "" ; then + _total_pushcnt=0 + fi + if test "${CXXFLAGS+set}" = set; then + _gasnet_pushvar_isset=1 + else + _gasnet_pushvar_isset=0 + fi + eval _pushedvar_CXXFLAGS_$_pushcnt_CXXFLAGS=\$CXXFLAGS + eval _pushedvarset_CXXFLAGS_$_pushcnt_CXXFLAGS=$_gasnet_pushvar_isset + _pushcnt_CXXFLAGS=`expr $_pushcnt_CXXFLAGS + 1` + _total_pushcnt=`expr $_total_pushcnt + 1` + CXXFLAGS="${gxx_flag_prefix}-g3" + echo "pushed new CXXFLAGS value: $CXXFLAGS" >&5 + +# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_PUSHVAR(CXXFLAGS,"${gxx_flag_prefix}-g3") ^^^^^^^^^^^^^^^^^^^^^^ (L:2) + + + + case "$target_os" in + darwin*) # Want -gstabs+ instead of -g3, if supported + -# vvvvvvvvvvvvvvvvvvvvvv GASNET_TRY_CXXFLAG(-mtls-size=64) vvvvvvvvvvvvvvvvvvvvvv (L:2) +# vvvvvvvvvvvvvvvvvvvvvv GASNET_TRY_CXXFLAG(${gxx_flag_prefix}-gstabs+) vvvvvvvvvvvvvvvvvvvvvv (L:2) -# vvvvvvvvvvvvvvvvvvvvvv GASNET_PUSHVAR(CXXFLAGS,"$CXXFLAGS -mtls-size=64") vvvvvvvvvvvvvvvvvvvvvv (L:3) +# vvvvvvvvvvvvvvvvvvvvvv GASNET_PUSHVAR(CXXFLAGS,"$CXXFLAGS ${gxx_flag_prefix}-gstabs+") vvvvvvvvvvvvvvvvvvvvvv (L:3) if test "$_pushcnt_CXXFLAGS" = "" ; then _pushcnt_CXXFLAGS=0 @@ -110900,15 +110501,15 @@ as_fn_error $? "See $CONFIG_FILE for details." "$LINENO" 5 eval _pushedvarset_CXXFLAGS_$_pushcnt_CXXFLAGS=$_gasnet_pushvar_isset _pushcnt_CXXFLAGS=`expr $_pushcnt_CXXFLAGS + 1` _total_pushcnt=`expr $_total_pushcnt + 1` - CXXFLAGS="$CXXFLAGS -mtls-size=64" + CXXFLAGS="$CXXFLAGS ${gxx_flag_prefix}-gstabs+" echo "pushed new CXXFLAGS value: $CXXFLAGS" >&5 -# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_PUSHVAR(CXXFLAGS,"$CXXFLAGS -mtls-size=64") ^^^^^^^^^^^^^^^^^^^^^^ (L:3) +# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_PUSHVAR(CXXFLAGS,"$CXXFLAGS ${gxx_flag_prefix}-gstabs+") ^^^^^^^^^^^^^^^^^^^^^^ (L:3) -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for C++ compiler flag -mtls-size=64" >&5 -$as_echo_n "checking for C++ compiler flag -mtls-size=64... " >&6; } +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for C++ compiler flag ${gxx_flag_prefix}-gstabs+" >&5 +$as_echo_n "checking for C++ compiler flag ${gxx_flag_prefix}-gstabs+... " >&6; } @@ -111000,7 +110601,7 @@ as_fn_error $? "See $CONFIG_FILE for details." "$LINENO" 5 - MISC_CXXFLAGS="$MISC_CXXFLAGS -mtls-size=64" + CXXFLAGS="${gxx_flag_prefix}-gstabs+" else : @@ -111111,7 +110712,7 @@ $as_echo "no/warning: $_GASNET_TRY_CXXFLAG_TMP" >&6; } if test "$gasnet_cmd_stdout$gasnet_cmd_stderr" = "$_GASNET_TRY_CXXFLAG_TMP" ; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes/persistent-warning: $_GASNET_TRY_CXXFLAG_TMP" >&5 $as_echo "yes/persistent-warning: $_GASNET_TRY_CXXFLAG_TMP" >&6; } - MISC_CXXFLAGS="$MISC_CXXFLAGS -mtls-size=64" + CXXFLAGS="${gxx_flag_prefix}-gstabs+" else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no/new-warning: $_GASNET_TRY_CXXFLAG_TMP" >&5 $as_echo "no/new-warning: $_GASNET_TRY_CXXFLAG_TMP" >&6; } @@ -111255,50 +110856,82 @@ as_fn_error $? "See $CONFIG_FILE for details." "$LINENO" 5 -# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_TRY_CXXFLAG(-mtls-size=64) ^^^^^^^^^^^^^^^^^^^^^^ (L:2) +# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_TRY_CXXFLAG(${gxx_flag_prefix}-gstabs+) ^^^^^^^^^^^^^^^^^^^^^^ (L:2) -;; + + ;; esac - # set CXXDEBUGFLAGS + CXXDEBUGFLAGS="$CXXFLAGS" -# vvvvvvvvvvvvvvvvvvvvvv GASNET_PUSHVAR(CXXFLAGS,"${gxx_flag_prefix}-g3") vvvvvvvvvvvvvvvvvvvvvv (L:2) +# vvvvvvvvvvvvvvvvvvvvvv GASNET_POPVAR(CXXFLAGS) vvvvvvvvvvvvvvvvvvvvvv (L:2) - if test "$_pushcnt_CXXFLAGS" = "" ; then - _pushcnt_CXXFLAGS=0 - fi - if test "$_total_pushcnt" = "" ; then - _total_pushcnt=0 - fi - if test "${CXXFLAGS+set}" = set; then - _gasnet_pushvar_isset=1 + if test "$_pushcnt_CXXFLAGS" -ge "1"; then + _pushcnt_CXXFLAGS=`expr $_pushcnt_CXXFLAGS - 1` + _total_pushcnt=`expr $_total_pushcnt - 1` + eval _gasnet_pushvar_isset=\$_pushedvarset_CXXFLAGS_$_pushcnt_CXXFLAGS + if test "$_gasnet_pushvar_isset" = "1" ; then + eval CXXFLAGS=\$_pushedvar_CXXFLAGS_$_pushcnt_CXXFLAGS + echo "popping CXXFLAGS back to: $CXXFLAGS" >&5 + else + unset CXXFLAGS + echo "popping CXXFLAGS back to: " >&5 + fi else - _gasnet_pushvar_isset=0 + + +echo +echo "configure error: INTERNAL ERROR: GASNET_PUSH/POPVAR underflow on CXXFLAGS" +if test "" ; then +if test -f "conftest.$ac_ext" ; then + errfile=conftest.$ac_ext +else + errfile=gasnet_errsave_file +fi +if test -f "$errfile" ; then + echo + echo " --- Failed program --- " + cat $errfile + echo " -----------------------" +fi +fi +if test -f "conftest.err" ; then + errfile=conftest.err +else + errfile=gasnet_errsave_err +fi +if test -f "$errfile" ; then + echo + echo "Compilation error: " + echo + cat $errfile +fi +echo +CONFIG_FILE=`pwd`/config.log +as_fn_error $? "See $CONFIG_FILE for details." "$LINENO" 5 + + fi - eval _pushedvar_CXXFLAGS_$_pushcnt_CXXFLAGS=\$CXXFLAGS - eval _pushedvarset_CXXFLAGS_$_pushcnt_CXXFLAGS=$_gasnet_pushvar_isset - _pushcnt_CXXFLAGS=`expr $_pushcnt_CXXFLAGS + 1` - _total_pushcnt=`expr $_total_pushcnt + 1` - CXXFLAGS="${gxx_flag_prefix}-g3" - echo "pushed new CXXFLAGS value: $CXXFLAGS" >&5 -# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_PUSHVAR(CXXFLAGS,"${gxx_flag_prefix}-g3") ^^^^^^^^^^^^^^^^^^^^^^ (L:2) +# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_POPVAR(CXXFLAGS) ^^^^^^^^^^^^^^^^^^^^^^ (L:2) - case "$target_os" in - darwin*) # Want -gstabs+ instead of -g3, if supported + ;; + PGI) MISC_CXXFLAGS="-Masmkeyword -Msignextend" + case "$GASNETI_PTR_BITS$target_os" in + 64darwin*) # PGI seems to have a different ABI -# vvvvvvvvvvvvvvvvvvvvvv GASNET_TRY_CXXFLAG(${gxx_flag_prefix}-gstabs+) vvvvvvvvvvvvvvvvvvvvvv (L:2) +# vvvvvvvvvvvvvvvvvvvvvv GASNET_TRY_CXXFLAG(-Mllalign) vvvvvvvvvvvvvvvvvvvvvv (L:2) -# vvvvvvvvvvvvvvvvvvvvvv GASNET_PUSHVAR(CXXFLAGS,"$CXXFLAGS ${gxx_flag_prefix}-gstabs+") vvvvvvvvvvvvvvvvvvvvvv (L:3) +# vvvvvvvvvvvvvvvvvvvvvv GASNET_PUSHVAR(CXXFLAGS,"$CXXFLAGS -Mllalign") vvvvvvvvvvvvvvvvvvvvvv (L:3) if test "$_pushcnt_CXXFLAGS" = "" ; then _pushcnt_CXXFLAGS=0 @@ -111315,15 +110948,15 @@ as_fn_error $? "See $CONFIG_FILE for details." "$LINENO" 5 eval _pushedvarset_CXXFLAGS_$_pushcnt_CXXFLAGS=$_gasnet_pushvar_isset _pushcnt_CXXFLAGS=`expr $_pushcnt_CXXFLAGS + 1` _total_pushcnt=`expr $_total_pushcnt + 1` - CXXFLAGS="$CXXFLAGS ${gxx_flag_prefix}-gstabs+" + CXXFLAGS="$CXXFLAGS -Mllalign" echo "pushed new CXXFLAGS value: $CXXFLAGS" >&5 -# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_PUSHVAR(CXXFLAGS,"$CXXFLAGS ${gxx_flag_prefix}-gstabs+") ^^^^^^^^^^^^^^^^^^^^^^ (L:3) +# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_PUSHVAR(CXXFLAGS,"$CXXFLAGS -Mllalign") ^^^^^^^^^^^^^^^^^^^^^^ (L:3) -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for C++ compiler flag ${gxx_flag_prefix}-gstabs+" >&5 -$as_echo_n "checking for C++ compiler flag ${gxx_flag_prefix}-gstabs+... " >&6; } +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for C++ compiler flag -Mllalign" >&5 +$as_echo_n "checking for C++ compiler flag -Mllalign... " >&6; } @@ -111415,7 +111048,7 @@ as_fn_error $? "See $CONFIG_FILE for details." "$LINENO" 5 - CXXFLAGS="${gxx_flag_prefix}-gstabs+" + MISC_CXXFLAGS="$MISC_CXXFLAGS -Mllalign" else : @@ -111526,7 +111159,7 @@ $as_echo "no/warning: $_GASNET_TRY_CXXFLAG_TMP" >&6; } if test "$gasnet_cmd_stdout$gasnet_cmd_stderr" = "$_GASNET_TRY_CXXFLAG_TMP" ; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes/persistent-warning: $_GASNET_TRY_CXXFLAG_TMP" >&5 $as_echo "yes/persistent-warning: $_GASNET_TRY_CXXFLAG_TMP" >&6; } - CXXFLAGS="${gxx_flag_prefix}-gstabs+" + MISC_CXXFLAGS="$MISC_CXXFLAGS -Mllalign" else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no/new-warning: $_GASNET_TRY_CXXFLAG_TMP" >&5 $as_echo "no/new-warning: $_GASNET_TRY_CXXFLAG_TMP" >&6; } @@ -111670,82 +111303,68 @@ as_fn_error $? "See $CONFIG_FILE for details." "$LINENO" 5 -# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_TRY_CXXFLAG(${gxx_flag_prefix}-gstabs+) ^^^^^^^^^^^^^^^^^^^^^^ (L:2) +# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_TRY_CXXFLAG(-Mllalign) ^^^^^^^^^^^^^^^^^^^^^^ (L:2) - ;; - esac - CXXDEBUGFLAGS="$CXXFLAGS" + ;; + esac -# vvvvvvvvvvvvvvvvvvvvvv GASNET_POPVAR(CXXFLAGS) vvvvvvvvvvvvvvvvvvvvvv (L:2) +# vvvvvvvvvvvvvvvvvvvvvv GASNET_PGI_VERSION_CHECK(CXX) vvvvvvvvvvvvvvvvvvvvvv (L:2) - if test "$_pushcnt_CXXFLAGS" -ge "1"; then - _pushcnt_CXXFLAGS=`expr $_pushcnt_CXXFLAGS - 1` - _total_pushcnt=`expr $_total_pushcnt - 1` - eval _gasnet_pushvar_isset=\$_pushedvarset_CXXFLAGS_$_pushcnt_CXXFLAGS - if test "$_gasnet_pushvar_isset" = "1" ; then - eval CXXFLAGS=\$_pushedvar_CXXFLAGS_$_pushcnt_CXXFLAGS - echo "popping CXXFLAGS back to: $CXXFLAGS" >&5 - else - unset CXXFLAGS - echo "popping CXXFLAGS back to: " >&5 - fi - else +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for known buggy compilers" >&5 +$as_echo_n "checking for known buggy compilers... " >&6; } +badpgimsg="" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#if ((10000 * __PGIC__) + (100 * __PGIC_MINOR__) + __PGIC_PATCHLEVEL__) < 70205 +# error +#endif +int +main () +{ -echo -echo "configure error: INTERNAL ERROR: GASNET_PUSH/POPVAR underflow on CXXFLAGS" -if test "" ; then -if test -f "conftest.$ac_ext" ; then - errfile=conftest.$ac_ext + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_compile "$LINENO"; then : + : else - errfile=gasnet_errsave_file -fi -if test -f "$errfile" ; then - echo - echo " --- Failed program --- " - cat $errfile - echo " -----------------------" -fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: CXX is PGI prior to 7.2-5" >&5 +$as_echo "CXX is PGI prior to 7.2-5" >&6; } +badpgimsg="Use of PGI compilers older than 7.2-5 is not supported. +Consider using \$CXX to select a different compiler." + fi -if test -f "conftest.err" ; then - errfile=conftest.err +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +if test -n "$badpgimsg"; then + as_fn_error $? "$badpgimsg" "$LINENO" 5 else - errfile=gasnet_errsave_err -fi -if test -f "$errfile" ; then - echo - echo "Compilation error: " - echo - cat $errfile + { $as_echo "$as_me:${as_lineno-$LINENO}: result: ok" >&5 +$as_echo "ok" >&6; } fi -echo -CONFIG_FILE=`pwd`/config.log -as_fn_error $? "See $CONFIG_FILE for details." "$LINENO" 5 - - fi - -# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_POPVAR(CXXFLAGS) ^^^^^^^^^^^^^^^^^^^^^^ (L:2) +# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_PGI_VERSION_CHECK(CXX) ^^^^^^^^^^^^^^^^^^^^^^ (L:2) - ;; - PGI) MISC_CXXFLAGS="-Masmkeyword -Msignextend" - case "$GASNETI_PTR_BITS$target_os" in - 64darwin*) # PGI seems to have a different ABI + # DOB: don't force this in client flags by default. The payoff is small and it has potential to break clients + # because it's not a valid option for PGI C compilation. -# vvvvvvvvvvvvvvvvvvvvvv GASNET_TRY_CXXFLAG(-Mllalign) vvvvvvvvvvvvvvvvvvvvvv (L:2) +# vvvvvvvvvvvvvvvvvvvvvv GASNET_TRY_CXXFLAG(--display_error_number) vvvvvvvvvvvvvvvvvvvvvv (L:2) -# vvvvvvvvvvvvvvvvvvvvvv GASNET_PUSHVAR(CXXFLAGS,"$CXXFLAGS -Mllalign") vvvvvvvvvvvvvvvvvvvvvv (L:3) +# vvvvvvvvvvvvvvvvvvvvvv GASNET_PUSHVAR(CXXFLAGS,"$CXXFLAGS --display_error_number") vvvvvvvvvvvvvvvvvvvvvv (L:3) if test "$_pushcnt_CXXFLAGS" = "" ; then _pushcnt_CXXFLAGS=0 @@ -111762,15 +111381,15 @@ as_fn_error $? "See $CONFIG_FILE for details." "$LINENO" 5 eval _pushedvarset_CXXFLAGS_$_pushcnt_CXXFLAGS=$_gasnet_pushvar_isset _pushcnt_CXXFLAGS=`expr $_pushcnt_CXXFLAGS + 1` _total_pushcnt=`expr $_total_pushcnt + 1` - CXXFLAGS="$CXXFLAGS -Mllalign" + CXXFLAGS="$CXXFLAGS --display_error_number" echo "pushed new CXXFLAGS value: $CXXFLAGS" >&5 -# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_PUSHVAR(CXXFLAGS,"$CXXFLAGS -Mllalign") ^^^^^^^^^^^^^^^^^^^^^^ (L:3) +# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_PUSHVAR(CXXFLAGS,"$CXXFLAGS --display_error_number") ^^^^^^^^^^^^^^^^^^^^^^ (L:3) -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for C++ compiler flag -Mllalign" >&5 -$as_echo_n "checking for C++ compiler flag -Mllalign... " >&6; } +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for C++ compiler flag --display_error_number" >&5 +$as_echo_n "checking for C++ compiler flag --display_error_number... " >&6; } @@ -111862,7 +111481,7 @@ as_fn_error $? "See $CONFIG_FILE for details." "$LINENO" 5 - MISC_CXXFLAGS="$MISC_CXXFLAGS -Mllalign" + DEVWARN_CXXFLAGS="--display_error_number" else : @@ -111973,7 +111592,7 @@ $as_echo "no/warning: $_GASNET_TRY_CXXFLAG_TMP" >&6; } if test "$gasnet_cmd_stdout$gasnet_cmd_stderr" = "$_GASNET_TRY_CXXFLAG_TMP" ; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes/persistent-warning: $_GASNET_TRY_CXXFLAG_TMP" >&5 $as_echo "yes/persistent-warning: $_GASNET_TRY_CXXFLAG_TMP" >&6; } - MISC_CXXFLAGS="$MISC_CXXFLAGS -Mllalign" + DEVWARN_CXXFLAGS="--display_error_number" else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no/new-warning: $_GASNET_TRY_CXXFLAG_TMP" >&5 $as_echo "no/new-warning: $_GASNET_TRY_CXXFLAG_TMP" >&6; } @@ -112117,68 +111736,20 @@ as_fn_error $? "See $CONFIG_FILE for details." "$LINENO" 5 -# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_TRY_CXXFLAG(-Mllalign) ^^^^^^^^^^^^^^^^^^^^^^ (L:2) - - - - ;; - esac - - - -# vvvvvvvvvvvvvvvvvvvvvv GASNET_PGI_VERSION_CHECK(CXX) vvvvvvvvvvvvvvvvvvvvvv (L:2) - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for known buggy compilers" >&5 -$as_echo_n "checking for known buggy compilers... " >&6; } -badpgimsg="" -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -#if ((10000 * __PGIC__) + (100 * __PGIC_MINOR__) + __PGIC_PATCHLEVEL__) < 70205 -# error -#endif - -int -main () -{ - - ; - return 0; -} -_ACEOF -if ac_fn_cxx_try_compile "$LINENO"; then : - : -else - -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: CXX is PGI prior to 7.2-5" >&5 -$as_echo "CXX is PGI prior to 7.2-5" >&6; } -badpgimsg="Use of PGI compilers older than 7.2-5 is not supported. -Consider using \$CXX to select a different compiler." - -fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -if test -n "$badpgimsg"; then - as_fn_error $? "$badpgimsg" "$LINENO" 5 -else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: ok" >&5 -$as_echo "ok" >&6; } -fi +# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_TRY_CXXFLAG(--display_error_number) ^^^^^^^^^^^^^^^^^^^^^^ (L:2) -# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_PGI_VERSION_CHECK(CXX) ^^^^^^^^^^^^^^^^^^^^^^ (L:2) - # DOB: don't force this in client flags by default. The payoff is small and it has potential to break clients - # because it's not a valid option for PGI C compilation. -# vvvvvvvvvvvvvvvvvvvvvv GASNET_TRY_CXXFLAG(--display_error_number) vvvvvvvvvvvvvvvvvvvvvv (L:2) +# vvvvvvvvvvvvvvvvvvvvvv GASNET_TRY_CXXFLAG([-Wc,--diag_suppress=177]) vvvvvvvvvvvvvvvvvvvvvv (L:2) -# vvvvvvvvvvvvvvvvvvvvvv GASNET_PUSHVAR(CXXFLAGS,"$CXXFLAGS --display_error_number") vvvvvvvvvvvvvvvvvvvvvv (L:3) +# vvvvvvvvvvvvvvvvvvvvvv GASNET_PUSHVAR(CXXFLAGS,"$CXXFLAGS -Wc,--diag_suppress=177") vvvvvvvvvvvvvvvvvvvvvv (L:3) if test "$_pushcnt_CXXFLAGS" = "" ; then _pushcnt_CXXFLAGS=0 @@ -112195,15 +111766,15 @@ fi eval _pushedvarset_CXXFLAGS_$_pushcnt_CXXFLAGS=$_gasnet_pushvar_isset _pushcnt_CXXFLAGS=`expr $_pushcnt_CXXFLAGS + 1` _total_pushcnt=`expr $_total_pushcnt + 1` - CXXFLAGS="$CXXFLAGS --display_error_number" + CXXFLAGS="$CXXFLAGS -Wc,--diag_suppress=177" echo "pushed new CXXFLAGS value: $CXXFLAGS" >&5 -# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_PUSHVAR(CXXFLAGS,"$CXXFLAGS --display_error_number") ^^^^^^^^^^^^^^^^^^^^^^ (L:3) +# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_PUSHVAR(CXXFLAGS,"$CXXFLAGS -Wc,--diag_suppress=177") ^^^^^^^^^^^^^^^^^^^^^^ (L:3) -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for C++ compiler flag --display_error_number" >&5 -$as_echo_n "checking for C++ compiler flag --display_error_number... " >&6; } +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for C++ compiler flag -Wc,--diag_suppress=177" >&5 +$as_echo_n "checking for C++ compiler flag -Wc,--diag_suppress=177... " >&6; } @@ -112295,7 +111866,9 @@ as_fn_error $? "See $CONFIG_FILE for details." "$LINENO" 5 - DEVWARN_CXXFLAGS="--display_error_number" + + # Intentionally empty - this is just to test whether surgical warning suppresion is available + else : @@ -112396,6 +111969,8 @@ EOF { $as_echo "$as_me:${as_lineno-$LINENO}: result: no/warning: $_GASNET_TRY_CXXFLAG_TMP" >&5 $as_echo "no/warning: $_GASNET_TRY_CXXFLAG_TMP" >&6; } + MISC_CXXFLAGS="-w $MISC_CXXFLAGS" # old versions of pgc++ have no way to indep control warnings + else : @@ -112406,11 +111981,15 @@ $as_echo "no/warning: $_GASNET_TRY_CXXFLAG_TMP" >&6; } if test "$gasnet_cmd_stdout$gasnet_cmd_stderr" = "$_GASNET_TRY_CXXFLAG_TMP" ; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes/persistent-warning: $_GASNET_TRY_CXXFLAG_TMP" >&5 $as_echo "yes/persistent-warning: $_GASNET_TRY_CXXFLAG_TMP" >&6; } - DEVWARN_CXXFLAGS="--display_error_number" + + # Intentionally empty - this is just to test whether surgical warning suppresion is available + else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no/new-warning: $_GASNET_TRY_CXXFLAG_TMP" >&5 $as_echo "no/new-warning: $_GASNET_TRY_CXXFLAG_TMP" >&6; } + MISC_CXXFLAGS="-w $MISC_CXXFLAGS" # old versions of pgc++ have no way to indep control warnings + fi @@ -112535,6 +112114,8 @@ as_fn_error $? "See $CONFIG_FILE for details." "$LINENO" 5 + MISC_CXXFLAGS="-w $MISC_CXXFLAGS" # old versions of pgc++ have no way to indep control warnings + fi @@ -112550,20 +112131,21 @@ as_fn_error $? "See $CONFIG_FILE for details." "$LINENO" 5 -# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_TRY_CXXFLAG(--display_error_number) ^^^^^^^^^^^^^^^^^^^^^^ (L:2) - +# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_TRY_CXXFLAG([-Wc,--diag_suppress=177]) ^^^^^^^^^^^^^^^^^^^^^^ (L:2) + ;; + Intel) MISC_CXXFLAGS="-wd654 -wd1125 -wd279" -# vvvvvvvvvvvvvvvvvvvvvv GASNET_TRY_CXXFLAG([-Wc,--diag_suppress=177]) vvvvvvvvvvvvvvvvvvvvvv (L:2) +# vvvvvvvvvvvvvvvvvvvvvv GASNET_TRY_CXXFLAG(-wd1572) vvvvvvvvvvvvvvvvvvvvvv (L:2) -# vvvvvvvvvvvvvvvvvvvvvv GASNET_PUSHVAR(CXXFLAGS,"$CXXFLAGS -Wc,--diag_suppress=177") vvvvvvvvvvvvvvvvvvvvvv (L:3) +# vvvvvvvvvvvvvvvvvvvvvv GASNET_PUSHVAR(CXXFLAGS,"$CXXFLAGS -wd1572") vvvvvvvvvvvvvvvvvvvvvv (L:3) if test "$_pushcnt_CXXFLAGS" = "" ; then _pushcnt_CXXFLAGS=0 @@ -112580,15 +112162,15 @@ as_fn_error $? "See $CONFIG_FILE for details." "$LINENO" 5 eval _pushedvarset_CXXFLAGS_$_pushcnt_CXXFLAGS=$_gasnet_pushvar_isset _pushcnt_CXXFLAGS=`expr $_pushcnt_CXXFLAGS + 1` _total_pushcnt=`expr $_total_pushcnt + 1` - CXXFLAGS="$CXXFLAGS -Wc,--diag_suppress=177" + CXXFLAGS="$CXXFLAGS -wd1572" echo "pushed new CXXFLAGS value: $CXXFLAGS" >&5 -# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_PUSHVAR(CXXFLAGS,"$CXXFLAGS -Wc,--diag_suppress=177") ^^^^^^^^^^^^^^^^^^^^^^ (L:3) +# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_PUSHVAR(CXXFLAGS,"$CXXFLAGS -wd1572") ^^^^^^^^^^^^^^^^^^^^^^ (L:3) -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for C++ compiler flag -Wc,--diag_suppress=177" >&5 -$as_echo_n "checking for C++ compiler flag -Wc,--diag_suppress=177... " >&6; } +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for C++ compiler flag -wd1572" >&5 +$as_echo_n "checking for C++ compiler flag -wd1572... " >&6; } @@ -112680,9 +112262,7 @@ as_fn_error $? "See $CONFIG_FILE for details." "$LINENO" 5 - - # Intentionally empty - this is just to test whether surgical warning suppresion is available - + MISC_CXXFLAGS="$MISC_CXXFLAGS -wd1572" else : @@ -112783,8 +112363,6 @@ EOF { $as_echo "$as_me:${as_lineno-$LINENO}: result: no/warning: $_GASNET_TRY_CXXFLAG_TMP" >&5 $as_echo "no/warning: $_GASNET_TRY_CXXFLAG_TMP" >&6; } - MISC_CXXFLAGS="-w $MISC_CXXFLAGS" # old versions of pgc++ have no way to indep control warnings - else : @@ -112795,15 +112373,11 @@ $as_echo "no/warning: $_GASNET_TRY_CXXFLAG_TMP" >&6; } if test "$gasnet_cmd_stdout$gasnet_cmd_stderr" = "$_GASNET_TRY_CXXFLAG_TMP" ; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes/persistent-warning: $_GASNET_TRY_CXXFLAG_TMP" >&5 $as_echo "yes/persistent-warning: $_GASNET_TRY_CXXFLAG_TMP" >&6; } - - # Intentionally empty - this is just to test whether surgical warning suppresion is available - + MISC_CXXFLAGS="$MISC_CXXFLAGS -wd1572" else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no/new-warning: $_GASNET_TRY_CXXFLAG_TMP" >&5 $as_echo "no/new-warning: $_GASNET_TRY_CXXFLAG_TMP" >&6; } - MISC_CXXFLAGS="-w $MISC_CXXFLAGS" # old versions of pgc++ have no way to indep control warnings - fi @@ -112928,8 +112502,6 @@ as_fn_error $? "See $CONFIG_FILE for details." "$LINENO" 5 - MISC_CXXFLAGS="-w $MISC_CXXFLAGS" # old versions of pgc++ have no way to indep control warnings - fi @@ -112945,21 +112517,23 @@ as_fn_error $? "See $CONFIG_FILE for details." "$LINENO" 5 -# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_TRY_CXXFLAG([-Wc,--diag_suppress=177]) ^^^^^^^^^^^^^^^^^^^^^^ (L:2) +# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_TRY_CXXFLAG(-wd1572) ^^^^^^^^^^^^^^^^^^^^^^ (L:2) ;; - Intel) MISC_CXXFLAGS="-wd654 -wd1125 -wd279" - - + XLC) -# vvvvvvvvvvvvvvvvvvvvvv GASNET_TRY_CXXFLAG(-wd1572) vvvvvvvvvvvvvvvvvvvvvv (L:2) + if test "$GASNETI_PTR_BITS" = 32 -a "$DISABLE_PPC64" != 1 -a "$cross_compiling" != "yes"; then + : + fi + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for PPC64 instruction support" >&5 +$as_echo_n "checking for PPC64 instruction support... " >&6; } -# vvvvvvvvvvvvvvvvvvvvvv GASNET_PUSHVAR(CXXFLAGS,"$CXXFLAGS -wd1572") vvvvvvvvvvvvvvvvvvvvvv (L:3) +# vvvvvvvvvvvvvvvvvvvvvv GASNET_PUSHVAR(CXXFLAGS,"$CXXFLAGS $MISC_CXXFLAGS") vvvvvvvvvvvvvvvvvvvvvv (L:2) if test "$_pushcnt_CXXFLAGS" = "" ; then _pushcnt_CXXFLAGS=0 @@ -112976,15 +112550,18 @@ as_fn_error $? "See $CONFIG_FILE for details." "$LINENO" 5 eval _pushedvarset_CXXFLAGS_$_pushcnt_CXXFLAGS=$_gasnet_pushvar_isset _pushcnt_CXXFLAGS=`expr $_pushcnt_CXXFLAGS + 1` _total_pushcnt=`expr $_total_pushcnt + 1` - CXXFLAGS="$CXXFLAGS -wd1572" + CXXFLAGS="$CXXFLAGS $MISC_CXXFLAGS" echo "pushed new CXXFLAGS value: $CXXFLAGS" >&5 -# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_PUSHVAR(CXXFLAGS,"$CXXFLAGS -wd1572") ^^^^^^^^^^^^^^^^^^^^^^ (L:3) +# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_PUSHVAR(CXXFLAGS,"$CXXFLAGS $MISC_CXXFLAGS") ^^^^^^^^^^^^^^^^^^^^^^ (L:2) + -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for C++ compiler flag -wd1572" >&5 -$as_echo_n "checking for C++ compiler flag -wd1572... " >&6; } + + +# vvvvvvvvvvvvvvvvvvvvvv GASNET_TRY_CXXCOMPILE_WITHWARN(...) vvvvvvvvvvvvvvvvvvvvvv (L:2) + @@ -112993,423 +112570,32 @@ $as_echo_n "checking for C++ compiler flag -wd1572... " >&6; } gasnet_compile_cmd="${CXX-c++} -c $CXXFLAGS $CPPFLAGS conftest.cc" cat > conftest.cc <&5 - ( $gasnet_compile_cmd ) > conftest-runcmdout 2> conftest-runcmderr - gasnet_cmd_result="$?" - gasnet_cmd_stdout="`cat conftest-runcmdout`" - gasnet_cmd_stderr="`cat conftest-runcmderr`" - cat conftest-runcmdout >&5 - cat conftest-runcmderr >&5 - echo gasnet_cmd_result=$gasnet_cmd_result >&5 - rm -rf conftest-runcmdout conftest-runcmderr - if test "$gasnet_cmd_result" = "0" ; then - if test -z "$gasnet_cmd_stdout$gasnet_cmd_stderr" ; then - : - - { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 -$as_echo "yes" >&6; } - - - -# vvvvvvvvvvvvvvvvvvvvvv GASNET_POPVAR(CXXFLAGS) vvvvvvvvvvvvvvvvvvvvvv (L:5) - - if test "$_pushcnt_CXXFLAGS" -ge "1"; then - _pushcnt_CXXFLAGS=`expr $_pushcnt_CXXFLAGS - 1` - _total_pushcnt=`expr $_total_pushcnt - 1` - eval _gasnet_pushvar_isset=\$_pushedvarset_CXXFLAGS_$_pushcnt_CXXFLAGS - if test "$_gasnet_pushvar_isset" = "1" ; then - eval CXXFLAGS=\$_pushedvar_CXXFLAGS_$_pushcnt_CXXFLAGS - echo "popping CXXFLAGS back to: $CXXFLAGS" >&5 - else - unset CXXFLAGS - echo "popping CXXFLAGS back to: " >&5 - fi - else - - -echo -echo "configure error: INTERNAL ERROR: GASNET_PUSH/POPVAR underflow on CXXFLAGS" -if test "" ; then -if test -f "conftest.$ac_ext" ; then - errfile=conftest.$ac_ext -else - errfile=gasnet_errsave_file -fi -if test -f "$errfile" ; then - echo - echo " --- Failed program --- " - cat $errfile - echo " -----------------------" -fi -fi -if test -f "conftest.err" ; then - errfile=conftest.err -else - errfile=gasnet_errsave_err -fi -if test -f "$errfile" ; then - echo - echo "Compilation error: " - echo - cat $errfile -fi -echo -CONFIG_FILE=`pwd`/config.log -as_fn_error $? "See $CONFIG_FILE for details." "$LINENO" 5 - - - fi - -# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_POPVAR(CXXFLAGS) ^^^^^^^^^^^^^^^^^^^^^^ (L:5) - - - - MISC_CXXFLAGS="$MISC_CXXFLAGS -wd1572" - - else - : - - echo "configure: warned program was:" >&5 - cat $gasnet_testfile >&5 - - _GASNET_TRY_CXXFLAG_TMP="$gasnet_cmd_stdout$gasnet_cmd_stderr" - - - -# vvvvvvvvvvvvvvvvvvvvvv GASNET_POPVAR(CXXFLAGS) vvvvvvvvvvvvvvvvvvvvvv (L:5) - - if test "$_pushcnt_CXXFLAGS" -ge "1"; then - _pushcnt_CXXFLAGS=`expr $_pushcnt_CXXFLAGS - 1` - _total_pushcnt=`expr $_total_pushcnt - 1` - eval _gasnet_pushvar_isset=\$_pushedvarset_CXXFLAGS_$_pushcnt_CXXFLAGS - if test "$_gasnet_pushvar_isset" = "1" ; then - eval CXXFLAGS=\$_pushedvar_CXXFLAGS_$_pushcnt_CXXFLAGS - echo "popping CXXFLAGS back to: $CXXFLAGS" >&5 - else - unset CXXFLAGS - echo "popping CXXFLAGS back to: " >&5 - fi - else - - -echo -echo "configure error: INTERNAL ERROR: GASNET_PUSH/POPVAR underflow on CXXFLAGS" -if test "" ; then -if test -f "conftest.$ac_ext" ; then - errfile=conftest.$ac_ext -else - errfile=gasnet_errsave_file -fi -if test -f "$errfile" ; then - echo - echo " --- Failed program --- " - cat $errfile - echo " -----------------------" -fi -fi -if test -f "conftest.err" ; then - errfile=conftest.err -else - errfile=gasnet_errsave_err -fi -if test -f "$errfile" ; then - echo - echo "Compilation error: " - echo - cat $errfile -fi -echo -CONFIG_FILE=`pwd`/config.log -as_fn_error $? "See $CONFIG_FILE for details." "$LINENO" 5 - - - fi - -# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_POPVAR(CXXFLAGS) ^^^^^^^^^^^^^^^^^^^^^^ (L:5) - - - - - - -# vvvvvvvvvvvvvvvvvvvvvv GASNET_TRY_CCOMPILE_WITHWARN_NORETRY(...) vvvvvvvvvvvvvvvvvvvvvv (L:5) - - gasnet_compile_cmd="${CC-cc} -c $CFLAGS $CPPFLAGS conftest.c" - cat > conftest.c <&5 - ( $gasnet_compile_cmd ) > conftest-runcmdout 2> conftest-runcmderr - gasnet_cmd_result="$?" - gasnet_cmd_stdout="`cat conftest-runcmdout`" - gasnet_cmd_stderr="`cat conftest-runcmderr`" - cat conftest-runcmdout >&5 - cat conftest-runcmderr >&5 - echo gasnet_cmd_result=$gasnet_cmd_result >&5 - rm -rf conftest-runcmdout conftest-runcmderr - if test "$gasnet_cmd_result" = "0" ; then - if test -z "$gasnet_cmd_stdout$gasnet_cmd_stderr" ; then - : - - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no/warning: $_GASNET_TRY_CXXFLAG_TMP" >&5 -$as_echo "no/warning: $_GASNET_TRY_CXXFLAG_TMP" >&6; } - - - else - : - - echo "configure: warned program was:" >&5 - cat $gasnet_testfile >&5 - - if test "$gasnet_cmd_stdout$gasnet_cmd_stderr" = "$_GASNET_TRY_CXXFLAG_TMP" ; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes/persistent-warning: $_GASNET_TRY_CXXFLAG_TMP" >&5 -$as_echo "yes/persistent-warning: $_GASNET_TRY_CXXFLAG_TMP" >&6; } - MISC_CXXFLAGS="$MISC_CXXFLAGS -wd1572" - else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no/new-warning: $_GASNET_TRY_CXXFLAG_TMP" >&5 -$as_echo "no/new-warning: $_GASNET_TRY_CXXFLAG_TMP" >&6; } - - fi - - - fi - else - : - - echo "configure: failed program was:" >&5 - cat $gasnet_testfile >&5 - - -echo -echo "configure error: unknown failure case in TRY_CXXFLAG" -if test "" ; then -if test -f "conftest.$ac_ext" ; then - errfile=conftest.$ac_ext -else - errfile=gasnet_errsave_file -fi -if test -f "$errfile" ; then - echo - echo " --- Failed program --- " - cat $errfile - echo " -----------------------" -fi -fi -if test -f "conftest.err" ; then - errfile=conftest.err -else - errfile=gasnet_errsave_err -fi -if test -f "$errfile" ; then - echo - echo "Compilation error: " - echo - cat $errfile -fi -echo -CONFIG_FILE=`pwd`/config.log -as_fn_error $? "See $CONFIG_FILE for details." "$LINENO" 5 - - - - - fi - -# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_TRY_RUNCMD($gasnet_compile_cmd,...) ^^^^^^^^^^^^^^^^^^^^^^ (L:6) - - - - rm -f $gasnet_testfile - -# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_TRY_CCOMPILE_WITHWARN_NORETRY(...) ^^^^^^^^^^^^^^^^^^^^^^ (L:5) - - - - - - fi - else - : - - echo "configure: failed program was:" >&5 - cat $gasnet_testfile >&5 - - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no/error: $gasnet_cmd_stdout$gasnet_cmd_stderr" >&5 -$as_echo "no/error: $gasnet_cmd_stdout$gasnet_cmd_stderr" >&6; } - - - -# vvvvvvvvvvvvvvvvvvvvvv GASNET_POPVAR(CXXFLAGS) vvvvvvvvvvvvvvvvvvvvvv (L:5) - - if test "$_pushcnt_CXXFLAGS" -ge "1"; then - _pushcnt_CXXFLAGS=`expr $_pushcnt_CXXFLAGS - 1` - _total_pushcnt=`expr $_total_pushcnt - 1` - eval _gasnet_pushvar_isset=\$_pushedvarset_CXXFLAGS_$_pushcnt_CXXFLAGS - if test "$_gasnet_pushvar_isset" = "1" ; then - eval CXXFLAGS=\$_pushedvar_CXXFLAGS_$_pushcnt_CXXFLAGS - echo "popping CXXFLAGS back to: $CXXFLAGS" >&5 - else - unset CXXFLAGS - echo "popping CXXFLAGS back to: " >&5 - fi - else - - -echo -echo "configure error: INTERNAL ERROR: GASNET_PUSH/POPVAR underflow on CXXFLAGS" -if test "" ; then -if test -f "conftest.$ac_ext" ; then - errfile=conftest.$ac_ext -else - errfile=gasnet_errsave_file -fi -if test -f "$errfile" ; then - echo - echo " --- Failed program --- " - cat $errfile - echo " -----------------------" -fi -fi -if test -f "conftest.err" ; then - errfile=conftest.err -else - errfile=gasnet_errsave_err -fi -if test -f "$errfile" ; then - echo - echo "Compilation error: " - echo - cat $errfile -fi -echo -CONFIG_FILE=`pwd`/config.log -as_fn_error $? "See $CONFIG_FILE for details." "$LINENO" 5 - - - fi - -# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_POPVAR(CXXFLAGS) ^^^^^^^^^^^^^^^^^^^^^^ (L:5) - - - - - - - fi - -# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_TRY_RUNCMD($gasnet_compile_cmd,...) ^^^^^^^^^^^^^^^^^^^^^^ (L:4) - - - - rm -f $gasnet_testfile - -# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_TRY_CXXCOMPILE_WITHWARN_NORETRY(...) ^^^^^^^^^^^^^^^^^^^^^^ (L:3) - - - - -# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_TRY_CXXFLAG(-wd1572) ^^^^^^^^^^^^^^^^^^^^^^ (L:2) - - - - ;; - XLC) - - - if test "$GASNETI_PTR_BITS" = 32 -a "$DISABLE_PPC64" != 1 -a "$cross_compiling" != "yes"; then - : - fi - { $as_echo "$as_me:${as_lineno-$LINENO}: checking for PPC64 instruction support" >&5 -$as_echo_n "checking for PPC64 instruction support... " >&6; } - - - -# vvvvvvvvvvvvvvvvvvvvvv GASNET_PUSHVAR(CXXFLAGS,"$CXXFLAGS $MISC_CXXFLAGS") vvvvvvvvvvvvvvvvvvvvvv (L:2) - - if test "$_pushcnt_CXXFLAGS" = "" ; then - _pushcnt_CXXFLAGS=0 - fi - if test "$_total_pushcnt" = "" ; then - _total_pushcnt=0 - fi - if test "${CXXFLAGS+set}" = set; then - _gasnet_pushvar_isset=1 - else - _gasnet_pushvar_isset=0 - fi - eval _pushedvar_CXXFLAGS_$_pushcnt_CXXFLAGS=\$CXXFLAGS - eval _pushedvarset_CXXFLAGS_$_pushcnt_CXXFLAGS=$_gasnet_pushvar_isset - _pushcnt_CXXFLAGS=`expr $_pushcnt_CXXFLAGS + 1` - _total_pushcnt=`expr $_total_pushcnt + 1` - CXXFLAGS="$CXXFLAGS $MISC_CXXFLAGS" - echo "pushed new CXXFLAGS value: $CXXFLAGS" >&5 - -# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_PUSHVAR(CXXFLAGS,"$CXXFLAGS $MISC_CXXFLAGS") ^^^^^^^^^^^^^^^^^^^^^^ (L:2) - - - - - - -# vvvvvvvvvvvvvvvvvvvvvv GASNET_TRY_CXXCOMPILE_WITHWARN(...) vvvvvvvvvvvvvvvvvvvvvv (L:2) - - - - -# vvvvvvvvvvvvvvvvvvvvvv GASNET_TRY_CXXCOMPILE_WITHWARN_NORETRY(...) vvvvvvvvvvvvvvvvvvvvvv (L:3) - - gasnet_compile_cmd="${CXX-c++} -c $CXXFLAGS $CPPFLAGS conftest.cc" - cat > conftest.cc < - #if (defined(__xlC__) || defined(__ibmxl__)) && !defined(__clang__) - static int testme(double *p); - #pragma mc_func testme { \\ - /* ARGS: r3 = p LOCAL: r4, r5 */ \\ - "38a00001" /* li r5,1 */ \\ - "38800000" /* li r4,0 */ \\ - "f8830000" /* std r4,0(r3) */ \\ - "e8a30000" /* ld r5,0(r3) */ \\ - "7ca32b78" /* mr r3,r5 */ \\ - } - #pragma reg_killed_by testme cr0, gr4, gr5 - #else - static int testme(double *p) { - int tmp, result; - __asm__ __volatile__ ( - "li %2,1 \n\t" - "li %1,0 \n\t" - "std %1,0(%3) \n\t" - "ld %2,0(%3) \n\t" - : "=m" (*p), "=&b" (tmp), "=&r" (result) - : "b" (p) ); - return result; - } - #endif + + #include + #if (defined(__xlC__) || defined(__ibmxl__)) && !defined(__clang__) + static int testme(double *p); + #pragma mc_func testme { \\ + /* ARGS: r3 = p LOCAL: r4, r5 */ \\ + "38a00001" /* li r5,1 */ \\ + "38800000" /* li r4,0 */ \\ + "f8830000" /* std r4,0(r3) */ \\ + "e8a30000" /* ld r5,0(r3) */ \\ + "7ca32b78" /* mr r3,r5 */ \\ + } + #pragma reg_killed_by testme cr0, gr4, gr5 + #else + static int testme(double *p) { + int tmp, result; + __asm__ __volatile__ ( + "li %2,1 \n\t" + "li %1,0 \n\t" + "std %1,0(%3) \n\t" + "ld %2,0(%3) \n\t" + : "=m" (*p), "=&b" (tmp), "=&r" (result) + : "b" (p) ); + return result; + } + #endif int main(void) { @@ -180381,15 +179567,144 @@ as_fn_error $? "See $CONFIG_FILE for details." "$LINENO" 5 ;; - ia64) + esac + case "$target_os" in + darwin*) + + + + +# vvvvvvvvvvvvvvvvvvvvvv GASNET_TRY_CFLAG(${misc_flag_prefix}-Wno-long-double) vvvvvvvvvvvvvvvvvvvvvv (L:3) + + + + +# vvvvvvvvvvvvvvvvvvvvvv GASNET_PUSHVAR(CFLAGS,"$CFLAGS ${misc_flag_prefix}-Wno-long-double") vvvvvvvvvvvvvvvvvvvvvv (L:4) + + if test "$_pushcnt_CFLAGS" = "" ; then + _pushcnt_CFLAGS=0 + fi + if test "$_total_pushcnt" = "" ; then + _total_pushcnt=0 + fi + if test "${CFLAGS+set}" = set; then + _gasnet_pushvar_isset=1 + else + _gasnet_pushvar_isset=0 + fi + eval _pushedvar_CFLAGS_$_pushcnt_CFLAGS=\$CFLAGS + eval _pushedvarset_CFLAGS_$_pushcnt_CFLAGS=$_gasnet_pushvar_isset + _pushcnt_CFLAGS=`expr $_pushcnt_CFLAGS + 1` + _total_pushcnt=`expr $_total_pushcnt + 1` + CFLAGS="$CFLAGS ${misc_flag_prefix}-Wno-long-double" + echo "pushed new CFLAGS value: $CFLAGS" >&5 + +# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_PUSHVAR(CFLAGS,"$CFLAGS ${misc_flag_prefix}-Wno-long-double") ^^^^^^^^^^^^^^^^^^^^^^ (L:4) + + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler flag ${misc_flag_prefix}-Wno-long-double" >&5 +$as_echo_n "checking for C compiler flag ${misc_flag_prefix}-Wno-long-double... " >&6; } + + + +# vvvvvvvvvvvvvvvvvvvvvv GASNET_TRY_CCOMPILE_WITHWARN_NORETRY(...) vvvvvvvvvvvvvvvvvvvvvv (L:4) + + gasnet_compile_cmd="${CC-cc} -c $CFLAGS $CPPFLAGS conftest.c" + cat > conftest.c <&5 + ( $gasnet_compile_cmd ) > conftest-runcmdout 2> conftest-runcmderr + gasnet_cmd_result="$?" + gasnet_cmd_stdout="`cat conftest-runcmdout`" + gasnet_cmd_stderr="`cat conftest-runcmderr`" + cat conftest-runcmdout >&5 + cat conftest-runcmderr >&5 + echo gasnet_cmd_result=$gasnet_cmd_result >&5 + rm -rf conftest-runcmdout conftest-runcmderr + if test "$gasnet_cmd_result" = "0" ; then + if test -z "$gasnet_cmd_stdout$gasnet_cmd_stderr" ; then + : + + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + + + +# vvvvvvvvvvvvvvvvvvvvvv GASNET_POPVAR(CFLAGS) vvvvvvvvvvvvvvvvvvvvvv (L:6) + + if test "$_pushcnt_CFLAGS" -ge "1"; then + _pushcnt_CFLAGS=`expr $_pushcnt_CFLAGS - 1` + _total_pushcnt=`expr $_total_pushcnt - 1` + eval _gasnet_pushvar_isset=\$_pushedvarset_CFLAGS_$_pushcnt_CFLAGS + if test "$_gasnet_pushvar_isset" = "1" ; then + eval CFLAGS=\$_pushedvar_CFLAGS_$_pushcnt_CFLAGS + echo "popping CFLAGS back to: $CFLAGS" >&5 + else + unset CFLAGS + echo "popping CFLAGS back to: " >&5 + fi + else + + +echo +echo "configure error: INTERNAL ERROR: GASNET_PUSH/POPVAR underflow on CFLAGS" +if test "" ; then +if test -f "conftest.$ac_ext" ; then + errfile=conftest.$ac_ext +else + errfile=gasnet_errsave_file +fi +if test -f "$errfile" ; then + echo + echo " --- Failed program --- " + cat $errfile + echo " -----------------------" +fi +fi +if test -f "conftest.err" ; then + errfile=conftest.err +else + errfile=gasnet_errsave_err +fi +if test -f "$errfile" ; then + echo + echo "Compilation error: " + echo + cat $errfile +fi +echo +CONFIG_FILE=`pwd`/config.log +as_fn_error $? "See $CONFIG_FILE for details." "$LINENO" 5 + + + fi + +# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_POPVAR(CFLAGS) ^^^^^^^^^^^^^^^^^^^^^^ (L:6) + + + + -# vvvvvvvvvvvvvvvvvvvvvv GASNET_TRY_CFLAG(-mtls-size=64) vvvvvvvvvvvvvvvvvvvvvv (L:3) +# vvvvvvvvvvvvvvvvvvvvvv GASNET_TRY_CFLAG(${misc_flag_prefix}-Wlong-double) vvvvvvvvvvvvvvvvvvvvvv (L:3) -# vvvvvvvvvvvvvvvvvvvvvv GASNET_PUSHVAR(CFLAGS,"$CFLAGS -mtls-size=64") vvvvvvvvvvvvvvvvvvvvvv (L:4) +# vvvvvvvvvvvvvvvvvvvvvv GASNET_PUSHVAR(CFLAGS,"$CFLAGS ${misc_flag_prefix}-Wlong-double") vvvvvvvvvvvvvvvvvvvvvv (L:4) if test "$_pushcnt_CFLAGS" = "" ; then _pushcnt_CFLAGS=0 @@ -180406,15 +179721,15 @@ as_fn_error $? "See $CONFIG_FILE for details." "$LINENO" 5 eval _pushedvarset_CFLAGS_$_pushcnt_CFLAGS=$_gasnet_pushvar_isset _pushcnt_CFLAGS=`expr $_pushcnt_CFLAGS + 1` _total_pushcnt=`expr $_total_pushcnt + 1` - CFLAGS="$CFLAGS -mtls-size=64" + CFLAGS="$CFLAGS ${misc_flag_prefix}-Wlong-double" echo "pushed new CFLAGS value: $CFLAGS" >&5 -# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_PUSHVAR(CFLAGS,"$CFLAGS -mtls-size=64") ^^^^^^^^^^^^^^^^^^^^^^ (L:4) +# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_PUSHVAR(CFLAGS,"$CFLAGS ${misc_flag_prefix}-Wlong-double") ^^^^^^^^^^^^^^^^^^^^^^ (L:4) -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler flag -mtls-size=64" >&5 -$as_echo_n "checking for C compiler flag -mtls-size=64... " >&6; } +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler flag ${misc_flag_prefix}-Wlong-double" >&5 +$as_echo_n "checking for C compiler flag ${misc_flag_prefix}-Wlong-double... " >&6; } @@ -180506,7 +179821,7 @@ as_fn_error $? "See $CONFIG_FILE for details." "$LINENO" 5 - MISC_CFLAGS="$MISC_CFLAGS -mtls-size=64" + MISC_CFLAGS="$MISC_CFLAGS ${misc_flag_prefix}-Wno-long-double" else : @@ -180617,7 +179932,7 @@ $as_echo "no/warning: $_GASNET_TRY_CFLAG_TMP" >&6; } if test "$gasnet_cmd_stdout$gasnet_cmd_stderr" = "$_GASNET_TRY_CFLAG_TMP" ; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes/persistent-warning: $_GASNET_TRY_CFLAG_TMP" >&5 $as_echo "yes/persistent-warning: $_GASNET_TRY_CFLAG_TMP" >&6; } - MISC_CFLAGS="$MISC_CFLAGS -mtls-size=64" + MISC_CFLAGS="$MISC_CFLAGS ${misc_flag_prefix}-Wno-long-double" else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no/new-warning: $_GASNET_TRY_CFLAG_TMP" >&5 $as_echo "no/new-warning: $_GASNET_TRY_CFLAG_TMP" >&6; } @@ -180761,83 +180076,18 @@ as_fn_error $? "See $CONFIG_FILE for details." "$LINENO" 5 -# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_TRY_CFLAG(-mtls-size=64) ^^^^^^^^^^^^^^^^^^^^^^ (L:3) - - -;; - esac - case "$target_os" in - darwin*) - - - - -# vvvvvvvvvvvvvvvvvvvvvv GASNET_TRY_CFLAG(${misc_flag_prefix}-Wno-long-double) vvvvvvvvvvvvvvvvvvvvvv (L:3) - - - - -# vvvvvvvvvvvvvvvvvvvvvv GASNET_PUSHVAR(CFLAGS,"$CFLAGS ${misc_flag_prefix}-Wno-long-double") vvvvvvvvvvvvvvvvvvvvvv (L:4) - - if test "$_pushcnt_CFLAGS" = "" ; then - _pushcnt_CFLAGS=0 - fi - if test "$_total_pushcnt" = "" ; then - _total_pushcnt=0 - fi - if test "${CFLAGS+set}" = set; then - _gasnet_pushvar_isset=1 - else - _gasnet_pushvar_isset=0 - fi - eval _pushedvar_CFLAGS_$_pushcnt_CFLAGS=\$CFLAGS - eval _pushedvarset_CFLAGS_$_pushcnt_CFLAGS=$_gasnet_pushvar_isset - _pushcnt_CFLAGS=`expr $_pushcnt_CFLAGS + 1` - _total_pushcnt=`expr $_total_pushcnt + 1` - CFLAGS="$CFLAGS ${misc_flag_prefix}-Wno-long-double" - echo "pushed new CFLAGS value: $CFLAGS" >&5 - -# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_PUSHVAR(CFLAGS,"$CFLAGS ${misc_flag_prefix}-Wno-long-double") ^^^^^^^^^^^^^^^^^^^^^^ (L:4) - - - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler flag ${misc_flag_prefix}-Wno-long-double" >&5 -$as_echo_n "checking for C compiler flag ${misc_flag_prefix}-Wno-long-double... " >&6; } - - - -# vvvvvvvvvvvvvvvvvvvvvv GASNET_TRY_CCOMPILE_WITHWARN_NORETRY(...) vvvvvvvvvvvvvvvvvvvvvv (L:4) - - gasnet_compile_cmd="${CC-cc} -c $CFLAGS $CPPFLAGS conftest.c" - cat > conftest.c <&5 - ( $gasnet_compile_cmd ) > conftest-runcmdout 2> conftest-runcmderr - gasnet_cmd_result="$?" - gasnet_cmd_stdout="`cat conftest-runcmdout`" - gasnet_cmd_stderr="`cat conftest-runcmderr`" - cat conftest-runcmdout >&5 - cat conftest-runcmderr >&5 - echo gasnet_cmd_result=$gasnet_cmd_result >&5 - rm -rf conftest-runcmdout conftest-runcmderr - if test "$gasnet_cmd_result" = "0" ; then - if test -z "$gasnet_cmd_stdout$gasnet_cmd_stderr" ; then + else : - { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 -$as_echo "yes" >&6; } + echo "configure: warned program was:" >&5 + cat $gasnet_testfile >&5 + + _GASNET_TRY_CFLAG_TMP="$gasnet_cmd_stdout$gasnet_cmd_stderr" @@ -180894,500 +180144,52 @@ as_fn_error $? "See $CONFIG_FILE for details." "$LINENO" 5 - - - -# vvvvvvvvvvvvvvvvvvvvvv GASNET_TRY_CFLAG(${misc_flag_prefix}-Wlong-double) vvvvvvvvvvvvvvvvvvvvvv (L:3) - - - - -# vvvvvvvvvvvvvvvvvvvvvv GASNET_PUSHVAR(CFLAGS,"$CFLAGS ${misc_flag_prefix}-Wlong-double") vvvvvvvvvvvvvvvvvvvvvv (L:4) - - if test "$_pushcnt_CFLAGS" = "" ; then - _pushcnt_CFLAGS=0 - fi - if test "$_total_pushcnt" = "" ; then - _total_pushcnt=0 - fi - if test "${CFLAGS+set}" = set; then - _gasnet_pushvar_isset=1 - else - _gasnet_pushvar_isset=0 - fi - eval _pushedvar_CFLAGS_$_pushcnt_CFLAGS=\$CFLAGS - eval _pushedvarset_CFLAGS_$_pushcnt_CFLAGS=$_gasnet_pushvar_isset - _pushcnt_CFLAGS=`expr $_pushcnt_CFLAGS + 1` - _total_pushcnt=`expr $_total_pushcnt + 1` - CFLAGS="$CFLAGS ${misc_flag_prefix}-Wlong-double" - echo "pushed new CFLAGS value: $CFLAGS" >&5 - -# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_PUSHVAR(CFLAGS,"$CFLAGS ${misc_flag_prefix}-Wlong-double") ^^^^^^^^^^^^^^^^^^^^^^ (L:4) - - - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler flag ${misc_flag_prefix}-Wlong-double" >&5 -$as_echo_n "checking for C compiler flag ${misc_flag_prefix}-Wlong-double... " >&6; } - - - -# vvvvvvvvvvvvvvvvvvvvvv GASNET_TRY_CCOMPILE_WITHWARN_NORETRY(...) vvvvvvvvvvvvvvvvvvvvvv (L:4) - - gasnet_compile_cmd="${CC-cc} -c $CFLAGS $CPPFLAGS conftest.c" - cat > conftest.c <&5 - ( $gasnet_compile_cmd ) > conftest-runcmdout 2> conftest-runcmderr - gasnet_cmd_result="$?" - gasnet_cmd_stdout="`cat conftest-runcmdout`" - gasnet_cmd_stderr="`cat conftest-runcmderr`" - cat conftest-runcmdout >&5 - cat conftest-runcmderr >&5 - echo gasnet_cmd_result=$gasnet_cmd_result >&5 - rm -rf conftest-runcmdout conftest-runcmderr - if test "$gasnet_cmd_result" = "0" ; then - if test -z "$gasnet_cmd_stdout$gasnet_cmd_stderr" ; then - : - - { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 -$as_echo "yes" >&6; } - - - -# vvvvvvvvvvvvvvvvvvvvvv GASNET_POPVAR(CFLAGS) vvvvvvvvvvvvvvvvvvvvvv (L:6) - - if test "$_pushcnt_CFLAGS" -ge "1"; then - _pushcnt_CFLAGS=`expr $_pushcnt_CFLAGS - 1` - _total_pushcnt=`expr $_total_pushcnt - 1` - eval _gasnet_pushvar_isset=\$_pushedvarset_CFLAGS_$_pushcnt_CFLAGS - if test "$_gasnet_pushvar_isset" = "1" ; then - eval CFLAGS=\$_pushedvar_CFLAGS_$_pushcnt_CFLAGS - echo "popping CFLAGS back to: $CFLAGS" >&5 - else - unset CFLAGS - echo "popping CFLAGS back to: " >&5 - fi - else - - -echo -echo "configure error: INTERNAL ERROR: GASNET_PUSH/POPVAR underflow on CFLAGS" -if test "" ; then -if test -f "conftest.$ac_ext" ; then - errfile=conftest.$ac_ext -else - errfile=gasnet_errsave_file -fi -if test -f "$errfile" ; then - echo - echo " --- Failed program --- " - cat $errfile - echo " -----------------------" -fi -fi -if test -f "conftest.err" ; then - errfile=conftest.err -else - errfile=gasnet_errsave_err -fi -if test -f "$errfile" ; then - echo - echo "Compilation error: " - echo - cat $errfile -fi -echo -CONFIG_FILE=`pwd`/config.log -as_fn_error $? "See $CONFIG_FILE for details." "$LINENO" 5 - - - fi - -# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_POPVAR(CFLAGS) ^^^^^^^^^^^^^^^^^^^^^^ (L:6) - - - - MISC_CFLAGS="$MISC_CFLAGS ${misc_flag_prefix}-Wno-long-double" - - else - : - - echo "configure: warned program was:" >&5 - cat $gasnet_testfile >&5 - - _GASNET_TRY_CFLAG_TMP="$gasnet_cmd_stdout$gasnet_cmd_stderr" - - - -# vvvvvvvvvvvvvvvvvvvvvv GASNET_POPVAR(CFLAGS) vvvvvvvvvvvvvvvvvvvvvv (L:6) - - if test "$_pushcnt_CFLAGS" -ge "1"; then - _pushcnt_CFLAGS=`expr $_pushcnt_CFLAGS - 1` - _total_pushcnt=`expr $_total_pushcnt - 1` - eval _gasnet_pushvar_isset=\$_pushedvarset_CFLAGS_$_pushcnt_CFLAGS - if test "$_gasnet_pushvar_isset" = "1" ; then - eval CFLAGS=\$_pushedvar_CFLAGS_$_pushcnt_CFLAGS - echo "popping CFLAGS back to: $CFLAGS" >&5 - else - unset CFLAGS - echo "popping CFLAGS back to: " >&5 - fi - else - - -echo -echo "configure error: INTERNAL ERROR: GASNET_PUSH/POPVAR underflow on CFLAGS" -if test "" ; then -if test -f "conftest.$ac_ext" ; then - errfile=conftest.$ac_ext -else - errfile=gasnet_errsave_file -fi -if test -f "$errfile" ; then - echo - echo " --- Failed program --- " - cat $errfile - echo " -----------------------" -fi -fi -if test -f "conftest.err" ; then - errfile=conftest.err -else - errfile=gasnet_errsave_err -fi -if test -f "$errfile" ; then - echo - echo "Compilation error: " - echo - cat $errfile -fi -echo -CONFIG_FILE=`pwd`/config.log -as_fn_error $? "See $CONFIG_FILE for details." "$LINENO" 5 - - - fi - -# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_POPVAR(CFLAGS) ^^^^^^^^^^^^^^^^^^^^^^ (L:6) - - - - - - -# vvvvvvvvvvvvvvvvvvvvvv GASNET_TRY_CCOMPILE_WITHWARN_NORETRY(...) vvvvvvvvvvvvvvvvvvvvvv (L:6) - - gasnet_compile_cmd="${CC-cc} -c $CFLAGS $CPPFLAGS conftest.c" - cat > conftest.c <&5 - ( $gasnet_compile_cmd ) > conftest-runcmdout 2> conftest-runcmderr - gasnet_cmd_result="$?" - gasnet_cmd_stdout="`cat conftest-runcmdout`" - gasnet_cmd_stderr="`cat conftest-runcmderr`" - cat conftest-runcmdout >&5 - cat conftest-runcmderr >&5 - echo gasnet_cmd_result=$gasnet_cmd_result >&5 - rm -rf conftest-runcmdout conftest-runcmderr - if test "$gasnet_cmd_result" = "0" ; then - if test -z "$gasnet_cmd_stdout$gasnet_cmd_stderr" ; then - : - - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no/warning: $_GASNET_TRY_CFLAG_TMP" >&5 -$as_echo "no/warning: $_GASNET_TRY_CFLAG_TMP" >&6; } - - - else - : - - echo "configure: warned program was:" >&5 - cat $gasnet_testfile >&5 - - if test "$gasnet_cmd_stdout$gasnet_cmd_stderr" = "$_GASNET_TRY_CFLAG_TMP" ; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes/persistent-warning: $_GASNET_TRY_CFLAG_TMP" >&5 -$as_echo "yes/persistent-warning: $_GASNET_TRY_CFLAG_TMP" >&6; } - MISC_CFLAGS="$MISC_CFLAGS ${misc_flag_prefix}-Wno-long-double" - else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no/new-warning: $_GASNET_TRY_CFLAG_TMP" >&5 -$as_echo "no/new-warning: $_GASNET_TRY_CFLAG_TMP" >&6; } - - fi - - - fi - else - : - - echo "configure: failed program was:" >&5 - cat $gasnet_testfile >&5 - - -echo -echo "configure error: unknown failure case in TRY_CFLAG" -if test "" ; then -if test -f "conftest.$ac_ext" ; then - errfile=conftest.$ac_ext -else - errfile=gasnet_errsave_file -fi -if test -f "$errfile" ; then - echo - echo " --- Failed program --- " - cat $errfile - echo " -----------------------" -fi -fi -if test -f "conftest.err" ; then - errfile=conftest.err -else - errfile=gasnet_errsave_err -fi -if test -f "$errfile" ; then - echo - echo "Compilation error: " - echo - cat $errfile -fi -echo -CONFIG_FILE=`pwd`/config.log -as_fn_error $? "See $CONFIG_FILE for details." "$LINENO" 5 - - - - - fi - -# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_TRY_RUNCMD($gasnet_compile_cmd,...) ^^^^^^^^^^^^^^^^^^^^^^ (L:7) - - - - rm -f $gasnet_testfile - -# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_TRY_CCOMPILE_WITHWARN_NORETRY(...) ^^^^^^^^^^^^^^^^^^^^^^ (L:6) - - - - - - fi - else - : - - echo "configure: failed program was:" >&5 - cat $gasnet_testfile >&5 - - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no/error: $gasnet_cmd_stdout$gasnet_cmd_stderr" >&5 -$as_echo "no/error: $gasnet_cmd_stdout$gasnet_cmd_stderr" >&6; } - - - -# vvvvvvvvvvvvvvvvvvvvvv GASNET_POPVAR(CFLAGS) vvvvvvvvvvvvvvvvvvvvvv (L:6) - - if test "$_pushcnt_CFLAGS" -ge "1"; then - _pushcnt_CFLAGS=`expr $_pushcnt_CFLAGS - 1` - _total_pushcnt=`expr $_total_pushcnt - 1` - eval _gasnet_pushvar_isset=\$_pushedvarset_CFLAGS_$_pushcnt_CFLAGS - if test "$_gasnet_pushvar_isset" = "1" ; then - eval CFLAGS=\$_pushedvar_CFLAGS_$_pushcnt_CFLAGS - echo "popping CFLAGS back to: $CFLAGS" >&5 - else - unset CFLAGS - echo "popping CFLAGS back to: " >&5 - fi - else - - -echo -echo "configure error: INTERNAL ERROR: GASNET_PUSH/POPVAR underflow on CFLAGS" -if test "" ; then -if test -f "conftest.$ac_ext" ; then - errfile=conftest.$ac_ext -else - errfile=gasnet_errsave_file -fi -if test -f "$errfile" ; then - echo - echo " --- Failed program --- " - cat $errfile - echo " -----------------------" -fi -fi -if test -f "conftest.err" ; then - errfile=conftest.err -else - errfile=gasnet_errsave_err -fi -if test -f "$errfile" ; then - echo - echo "Compilation error: " - echo - cat $errfile -fi -echo -CONFIG_FILE=`pwd`/config.log -as_fn_error $? "See $CONFIG_FILE for details." "$LINENO" 5 - - - fi - -# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_POPVAR(CFLAGS) ^^^^^^^^^^^^^^^^^^^^^^ (L:6) - - - - - - - fi - -# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_TRY_RUNCMD($gasnet_compile_cmd,...) ^^^^^^^^^^^^^^^^^^^^^^ (L:5) - - - - rm -f $gasnet_testfile - -# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_TRY_CCOMPILE_WITHWARN_NORETRY(...) ^^^^^^^^^^^^^^^^^^^^^^ (L:4) - - - - -# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_TRY_CFLAG(${misc_flag_prefix}-Wlong-double) ^^^^^^^^^^^^^^^^^^^^^^ (L:3) - - - - - else - : - - echo "configure: warned program was:" >&5 - cat $gasnet_testfile >&5 - - _GASNET_TRY_CFLAG_TMP="$gasnet_cmd_stdout$gasnet_cmd_stderr" - - - -# vvvvvvvvvvvvvvvvvvvvvv GASNET_POPVAR(CFLAGS) vvvvvvvvvvvvvvvvvvvvvv (L:6) - - if test "$_pushcnt_CFLAGS" -ge "1"; then - _pushcnt_CFLAGS=`expr $_pushcnt_CFLAGS - 1` - _total_pushcnt=`expr $_total_pushcnt - 1` - eval _gasnet_pushvar_isset=\$_pushedvarset_CFLAGS_$_pushcnt_CFLAGS - if test "$_gasnet_pushvar_isset" = "1" ; then - eval CFLAGS=\$_pushedvar_CFLAGS_$_pushcnt_CFLAGS - echo "popping CFLAGS back to: $CFLAGS" >&5 - else - unset CFLAGS - echo "popping CFLAGS back to: " >&5 - fi - else - - -echo -echo "configure error: INTERNAL ERROR: GASNET_PUSH/POPVAR underflow on CFLAGS" -if test "" ; then -if test -f "conftest.$ac_ext" ; then - errfile=conftest.$ac_ext -else - errfile=gasnet_errsave_file -fi -if test -f "$errfile" ; then - echo - echo " --- Failed program --- " - cat $errfile - echo " -----------------------" -fi -fi -if test -f "conftest.err" ; then - errfile=conftest.err -else - errfile=gasnet_errsave_err -fi -if test -f "$errfile" ; then - echo - echo "Compilation error: " - echo - cat $errfile -fi -echo -CONFIG_FILE=`pwd`/config.log -as_fn_error $? "See $CONFIG_FILE for details." "$LINENO" 5 - - - fi - -# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_POPVAR(CFLAGS) ^^^^^^^^^^^^^^^^^^^^^^ (L:6) - - - - - - -# vvvvvvvvvvvvvvvvvvvvvv GASNET_TRY_CCOMPILE_WITHWARN_NORETRY(...) vvvvvvvvvvvvvvvvvvvvvv (L:6) - - gasnet_compile_cmd="${CC-cc} -c $CFLAGS $CPPFLAGS conftest.c" - cat > conftest.c <&5 - ( $gasnet_compile_cmd ) > conftest-runcmdout 2> conftest-runcmderr - gasnet_cmd_result="$?" - gasnet_cmd_stdout="`cat conftest-runcmdout`" - gasnet_cmd_stderr="`cat conftest-runcmderr`" - cat conftest-runcmdout >&5 - cat conftest-runcmderr >&5 - echo gasnet_cmd_result=$gasnet_cmd_result >&5 - rm -rf conftest-runcmdout conftest-runcmderr - if test "$gasnet_cmd_result" = "0" ; then - if test -z "$gasnet_cmd_stdout$gasnet_cmd_stderr" ; then - : - - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no/warning: $_GASNET_TRY_CFLAG_TMP" >&5 -$as_echo "no/warning: $_GASNET_TRY_CFLAG_TMP" >&6; } - - - else - : - - echo "configure: warned program was:" >&5 - cat $gasnet_testfile >&5 - - if test "$gasnet_cmd_stdout$gasnet_cmd_stderr" = "$_GASNET_TRY_CFLAG_TMP" ; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes/persistent-warning: $_GASNET_TRY_CFLAG_TMP" >&5 -$as_echo "yes/persistent-warning: $_GASNET_TRY_CFLAG_TMP" >&6; } + + + +# vvvvvvvvvvvvvvvvvvvvvv GASNET_TRY_CCOMPILE_WITHWARN_NORETRY(...) vvvvvvvvvvvvvvvvvvvvvv (L:6) + + gasnet_compile_cmd="${CC-cc} -c $CFLAGS $CPPFLAGS conftest.c" + cat > conftest.c <&5 + ( $gasnet_compile_cmd ) > conftest-runcmdout 2> conftest-runcmderr + gasnet_cmd_result="$?" + gasnet_cmd_stdout="`cat conftest-runcmdout`" + gasnet_cmd_stderr="`cat conftest-runcmderr`" + cat conftest-runcmdout >&5 + cat conftest-runcmderr >&5 + echo gasnet_cmd_result=$gasnet_cmd_result >&5 + rm -rf conftest-runcmdout conftest-runcmderr + if test "$gasnet_cmd_result" = "0" ; then + if test -z "$gasnet_cmd_stdout$gasnet_cmd_stderr" ; then + : + + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no/warning: $_GASNET_TRY_CFLAG_TMP" >&5 +$as_echo "no/warning: $_GASNET_TRY_CFLAG_TMP" >&6; } + + + else + : + + echo "configure: warned program was:" >&5 + cat $gasnet_testfile >&5 + + if test "$gasnet_cmd_stdout$gasnet_cmd_stderr" = "$_GASNET_TRY_CFLAG_TMP" ; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes/persistent-warning: $_GASNET_TRY_CFLAG_TMP" >&5 +$as_echo "yes/persistent-warning: $_GASNET_TRY_CFLAG_TMP" >&6; } @@ -249036,34 +247838,10 @@ fi + # Multirail support options + # ibv-conduit/README explains the interactions gasnetc_ibv_max_hcas=2 - - - -# vvvvvvvvvvvvvvvvvvvvvv GASNET_IF_ENABLED(ibv-multirail,...) vvvvvvvvvvvvvvvvvvvvvv (L:4) - - - # Check whether --enable-ibv-multirail was given. -if test "${enable_ibv_multirail+set}" = set; then : - enableval=$enable_ibv_multirail; -fi - - - -case "$enable_ibv_multirail" in - '' | no) : - enabled_ibv_multirail=no - ;; - *) : - enabled_ibv_multirail=yes - ;; -esac - - -# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_IF_ENABLED(ibv-multirail,...) ^^^^^^^^^^^^^^^^^^^^^^ (L:4) - - - + enabled_ibv_multirail=no @@ -249088,6 +247866,7 @@ fi *) : if expr "${withval}" : "[1-9][0-9]*" >/dev/null; then gasnetc_ibv_max_hcas="$withval" + enabled_ibv_multirail=yes else as_fn_error $? "--with-ibv-max-hcas requires a positive integer argument" "$LINENO" 5 fi @@ -249104,13 +247883,86 @@ fi + + + +# vvvvvvvvvvvvvvvvvvvvvv GASNET_IF_ENABLED_WITH_AUTO(ibv-multirail,...) vvvvvvvvvvvvvvvvvvvvvv (L:4) + + + # Check whether --enable-ibv-multirail was given. +if test "${enable_ibv_multirail+set}" = set; then : + enableval=$enable_ibv_multirail; +fi + + + case "$enable_ibv_multirail" in + no) : + enabled_ibv_multirail=no + ;; + yes) : + enabled_ibv_multirail=yes + ;; + *) : + + ;; + esac + +# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_IF_ENABLED_WITH_AUTO(ibv-multirail,...) ^^^^^^^^^^^^^^^^^^^^^^ (L:4) + + + if test "$enabled_ibv_multirail" = yes; then cat >>confdefs.h <<_ACEOF -#define GASNETC_IBV_MAX_HCAS $gasnetc_ibv_max_hcas +#define GASNETC_IBV_MAX_HCAS_CONFIGURE $gasnetc_ibv_max_hcas _ACEOF fi + # Default GASNET_IBV_PORTS + + + +# vvvvvvvvvvvvvvvvvvvvvv GASNET_WITH(ibv-ports,...) vvvvvvvvvvvvvvvvvvvvvv (L:4) + + + +# Check whether --with-ibv-ports was given. +if test "${with_ibv_ports+set}" = set; then : + withval=$with_ibv_ports; +fi + + + + + if test "${with_ibv_ports+set}" = set; then : + withval=$with_ibv_ports; + case "$withval" in + no) : + + ;; + *) : + if test "$withval" = 'yes'; then + as_fn_error $? "--with-ibv-ports requires an argument" "$LINENO" 5 + else + cat >>confdefs.h <<_ACEOF +#define GASNETC_IBV_PORTS_CONFIGURE "$withval" +_ACEOF + + fi + ;; + esac + else + : + + fi + + + +# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_WITH(ibv-ports,...) ^^^^^^^^^^^^^^^^^^^^^^ (L:4) + + + + @@ -249489,6 +248341,7 @@ fi if test "$enabled_ibv_odp" = yes; then + have_ibv_odp=no @@ -249573,17 +248426,16 @@ fi -# vvvvvvvvvvvvvvvvvvvvvv GASNET_TRY_CACHE_LINK(for ibv ODP support,ibv_odp_support,...) vvvvvvvvvvvvvvvvvvvvvv (L:4) +# vvvvvvvvvvvvvvvvvvvvvv GASNET_TRY_CACHE_LINK(for ibv ODP support (Mellanox),ibv_odp_support_mlnx,...) vvvvvvvvvvvvvvvvvvvvvv (L:4) -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for ibv ODP support" >&5 -$as_echo_n "checking for ibv ODP support... " >&6; } -if ${gasnet_cv_ibv_odp_support+:} false; then : +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for ibv ODP support (Mellanox)" >&5 +$as_echo_n "checking for ibv ODP support (Mellanox)... " >&6; } +if ${gasnet_cv_ibv_odp_support_mlnx+:} false; then : $as_echo_n "(cached) " >&6 else cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ - #include #include int @@ -249607,24 +248459,86 @@ main () } _ACEOF if ac_fn_c_try_link "$LINENO"; then : - gasnet_cv_ibv_odp_support=yes + gasnet_cv_ibv_odp_support_mlnx=yes +else + gasnet_cv_ibv_odp_support_mlnx=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gasnet_cv_ibv_odp_support_mlnx" >&5 +$as_echo "$gasnet_cv_ibv_odp_support_mlnx" >&6; } +if test "$gasnet_cv_ibv_odp_support_mlnx" = yes; then + : + + have_ibv_odp=yes + $as_echo "#define GASNETC_IBV_ODP_MLNX 1" >>confdefs.h + + +else + : + +fi + +# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_TRY_CACHE_LINK(for ibv ODP support (Mellanox),ibv_odp_support_mlnx,...) ^^^^^^^^^^^^^^^^^^^^^^ (L:4) + + + + + + +# vvvvvvvvvvvvvvvvvvvvvv GASNET_TRY_CACHE_LINK(for ibv ODP support (RDMA Core),ibv_odp_support_core,...) vvvvvvvvvvvvvvvvvvvvvv (L:4) + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for ibv ODP support (RDMA Core)" >&5 +$as_echo_n "checking for ibv ODP support (RDMA Core)... " >&6; } +if ${gasnet_cv_ibv_odp_support_core+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + #include + +int +main () +{ + + struct ibv_device_attr_ex attr; + attr.comp_mask = 0; + int ret = ibv_query_device_ex(NULL, NULL, &attr); + int ok = (attr.odp_caps.general_caps & IBV_ODP_SUPPORT) && + (attr.odp_caps.general_caps & IBV_ODP_SUPPORT_IMPLICIT) && + (attr.odp_caps.per_transport_caps.rc_odp_caps & IBV_ODP_SUPPORT_READ) && + (attr.odp_caps.per_transport_caps.rc_odp_caps & IBV_ODP_SUPPORT_WRITE); + struct ibv_mr *mr = ibv_reg_mr(NULL, NULL, SIZE_MAX, IBV_ACCESS_ON_DEMAND | IBV_ACCESS_LOCAL_WRITE); + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + gasnet_cv_ibv_odp_support_core=yes else - gasnet_cv_ibv_odp_support=no + gasnet_cv_ibv_odp_support_core=no fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gasnet_cv_ibv_odp_support" >&5 -$as_echo "$gasnet_cv_ibv_odp_support" >&6; } -if test "$gasnet_cv_ibv_odp_support" = yes; then +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gasnet_cv_ibv_odp_support_core" >&5 +$as_echo "$gasnet_cv_ibv_odp_support_core" >&6; } +if test "$gasnet_cv_ibv_odp_support_core" = yes; then : - have_ibv_odp=yes + + have_ibv_odp=yes + $as_echo "#define GASNETC_IBV_ODP_CORE 1" >>confdefs.h + + else : - have_ibv_odp=no + fi -# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_TRY_CACHE_LINK(for ibv ODP support,ibv_odp_support,...) ^^^^^^^^^^^^^^^^^^^^^^ (L:4) +# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_TRY_CACHE_LINK(for ibv ODP support (RDMA Core),ibv_odp_support_core,...) ^^^^^^^^^^^^^^^^^^^^^^ (L:4) @@ -249855,29 +248769,13 @@ fi withval=$with_ibv_max_medium; case "$withval" in no) : - -if test -f "conftest.$ac_ext" ; then - cp conftest.$ac_ext gasnet_errsave_file -fi -if test -f "conftest.err" ; then - cp conftest.err gasnet_errsave_err -fi - - as_fn_error $? "--with-ibv-max-medium must be a power-of-two between 1024 and 262144, inclusive." "$LINENO" 5 + as_fn_error $? "--with-ibv-max-medium must be a power-of-two between 1024 and 262144, inclusive." "$LINENO" 5 ;; *) : case "$withval" in 1024|2048|4096|8192|16384|32768|65536|131072|262144) ibv_max_medium="$withval";; *) - -if test -f "conftest.$ac_ext" ; then - cp conftest.$ac_ext gasnet_errsave_file -fi -if test -f "conftest.err" ; then - cp conftest.err gasnet_errsave_err -fi - as_fn_error $? "--with-ibv-max-medium=\"$withval\" is not legal. Value must be a power-of-two between 1024 and 262144, inclusive." "$LINENO" 5 ;; esac @@ -251699,14 +250597,6 @@ $as_echo "no" >&6; } : if test "$gni_udreg" = yes; then - -if test -f "conftest.$ac_ext" ; then - cp conftest.$ac_ext gasnet_errsave_file -fi -if test -f "conftest.err" ; then - cp conftest.err gasnet_errsave_err -fi - as_fn_error $? "--enable-aries-udreg was passed but udreg_pub.h header was not found" "$LINENO" 5 fi @@ -251740,32 +250630,17 @@ fi withval=$with_aries_max_medium; case "$withval" in no) : - -if test -f "conftest.$ac_ext" ; then - cp conftest.$ac_ext gasnet_errsave_file -fi -if test -f "conftest.err" ; then - cp conftest.err gasnet_errsave_err -fi - - as_fn_error $? "--with-aries-max-medium must be a multiple of 64 and no smaller than 512" "$LINENO" 5 + as_fn_error $? "--with-aries-max-medium must be a multiple of 64 and no smaller than 512" "$LINENO" 5 ;; *) : - if expr "$withval" : '[0-9]*$' >/dev/null 2>&1 && \ - expr \( "$withval" / 64 \) \* 64 = "$withval" >/dev/null 2>&1 && - expr "$withval" '>=' 512 >/dev/null 2>&1 && - expr "$withval" '<' 65536 >/dev/null 2>&1 ; then + tmpval=`echo "$withval" | $AWK '{sub(/^\+/,"");}1'` # strip leading '+' + if expr "$tmpval" : '[0-9]*$' >/dev/null 2>&1 && \ + expr \( "$tmpval" / 64 \) \* 64 = "$tmpval" >/dev/null 2>&1 && + expr "$tmpval" '>=' 512 >/dev/null 2>&1 && + expr "$tmpval" '<=' 65408 >/dev/null 2>&1 ; then gni_max_medium="$withval" else - -if test -f "conftest.$ac_ext" ; then - cp conftest.$ac_ext gasnet_errsave_file -fi -if test -f "conftest.err" ; then - cp conftest.err gasnet_errsave_err -fi - - as_fn_error $? "--with-aries-max-medium=\"$withval\" is not legal. Value must be a multiple of 64, between 512 and 65472, inclusive." "$LINENO" 5 + as_fn_error $? "--with-aries-max-medium=\"$withval\" is invalid. Value must be a multiple of 64, between 512 and 65408, inclusive. Unless prefixed with '+', the value will be rounded-down to the next recommended value. See the aries-conduit README for additional details." "$LINENO" 5 fi ;; @@ -251782,7 +250657,7 @@ fi cat >>confdefs.h <<_ACEOF -#define GASNETC_GNI_MAX_MEDIUM $gni_max_medium +#define GASNETC_GNI_MAX_MEDIUM_DFLT "$gni_max_medium" _ACEOF @@ -252026,7 +250901,14 @@ fi - _conduit_list_ofi=no + _conduit_list_ofi=yes + + + + + + + @@ -252041,6 +250923,12 @@ fi # vvvvvvvvvvvvvvvvvvvvvv GASNET_IF_ENABLED_WITH_AUTO(ofi,...) vvvvvvvvvvvvvvvvvvvvvv (L:4) + # Check whether --enable-ofi was given. +if test "${enable_ofi+set}" = set; then : + enableval=$enable_ofi; +fi + + case "$enable_ofi" in no) : enabled_ofi=no; _conduit_reason_ofi=disabled @@ -252153,6 +251041,13 @@ done +# Check whether --with-ofi-home was given. +if test "${with_ofi_home+set}" = set; then : + withval=$with_ofi_home; +fi + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for OFI_HOME setting" >&5 @@ -252387,6 +251282,13 @@ $as_echo "probably not - missing include/rdma/fabric.h" >&6; } +# Check whether --with-ofi-cflags was given. +if test "${with_ofi_cflags+set}" = set; then : + withval=$with_ofi_cflags; +fi + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for OFI_CFLAGS setting" >&5 @@ -252558,6 +251460,13 @@ as_fn_error $? "See $CONFIG_FILE for details." "$LINENO" 5 +# Check whether --with-ofi-libs was given. +if test "${with_ofi_libs+set}" = set; then : + withval=$with_ofi_libs; +fi + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for OFI_LIBS setting" >&5 @@ -252738,6 +251647,13 @@ as_fn_error $? "See $CONFIG_FILE for details." "$LINENO" 5 +# Check whether --with-ofi-ldflags was given. +if test "${with_ofi_ldflags+set}" = set; then : + withval=$with_ofi_ldflags; +fi + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for OFI_LDFLAGS setting" >&5 @@ -253055,6 +251971,7 @@ fi + # vvvvvvvvvvvvvvvvvvvvvv GASNET_POPVAR(CFLAGS) vvvvvvvvvvvvvvvvvvvvvv (L:4) if test "$_pushcnt_CFLAGS" -ge "1"; then @@ -253223,6 +252140,61 @@ as_fn_error $? "See $CONFIG_FILE for details." "$LINENO" 5 fi if test "$have_ofi" = yes; then + # What we actually care about is GASNETI_HAVE_SPINLOCK, which is available + # whenever !GASNETI_ATOMICOPS_NOT_SIGNALSAFE + # For all platforms of likely interest, checking (compiler||native) is sufficient. + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for appropriate atomics support required by ofi-conduit" >&5 +$as_echo_n "checking for appropriate atomics support required by ofi-conduit... " >&6; } + + check_atomics_match='' + for check_atomics_val in native compiler; do + if test $GASNETI_ATOMIC_IMPL_STR = $check_atomics_val; then + check_atomics_match=1 + break + fi + done + if test -n "$check_atomics_match"; then + : + + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + + else + if test -n "$enabled_generic_atomicops" ; then + check_atomics_msg='user passed --enable-force-generic-atomicops' + elif test -n "$enabled_os_atomicops"; then + check_atomics_msg='user passed --enable-force-os-atomicops' + else + check_atomics_msg="this platform has defaulted to '$GASNETI_ATOMIC_IMPL_STR' atomicops" + fi + + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + + + + +# vvvvvvvvvvvvvvvvvvvvvv GASNET_MSG_WARN() vvvvvvvvvvvvvvvvvvvvvv (L:4) + + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: ofi-conduit requires native atomicops, but $check_atomics_msg" >&5 +$as_echo "$as_me: WARNING: ofi-conduit requires native atomicops, but $check_atomics_msg" >&2;} + echo "ofi-conduit requires native atomicops, but $check_atomics_msg" >> ".gasnet_cv_configure_warnings.tmp" + echo " " >> ".gasnet_cv_configure_warnings.tmp" + +# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_MSG_WARN() ^^^^^^^^^^^^^^^^^^^^^^ (L:4) + + + + + have_ofi=no + + unset check_atomics_msg + fi + unset check_atomics_match + + fi + + if test "$have_ofi" = yes; then @@ -253233,6 +252205,13 @@ as_fn_error $? "See $CONFIG_FILE for details." "$LINENO" 5 +# Check whether --with-ofi-spawner was given. +if test "${with_ofi_spawner+set}" = set; then : + withval=$with_ofi_spawner; +fi + + + if test "${with_ofi_spawner+set}" = set; then : withval=$with_ofi_spawner; @@ -253296,6 +252275,13 @@ as_fn_error $? "See $CONFIG_FILE for details." "$LINENO" 5 +# Check whether --with-ofi-provider was given. +if test "${with_ofi_provider+set}" = set; then : + withval=$with_ofi_provider; +fi + + + if test "${with_ofi_provider+set}" = set; then : withval=$with_ofi_provider; @@ -253478,6 +252464,12 @@ _ACEOF # vvvvvvvvvvvvvvvvvvvvvv GASNET_IF_ENABLED_WITH_AUTO(ofi-thread-domain,...) vvvvvvvvvvvvvvvvvvvvvv (L:4) + # Check whether --enable-ofi-thread-domain was given. +if test "${enable_ofi_thread_domain+set}" = set; then : + enableval=$enable_ofi_thread_domain; +fi + + case "$enable_ofi_thread_domain" in no) : ofi_use_thread_domain="0" @@ -253501,6 +252493,12 @@ _ACEOF # vvvvvvvvvvvvvvvvvvvvvv GASNET_IF_ENABLED_WITH_AUTO(ofi-mr-scalable,...) vvvvvvvvvvvvvvvvvvvvvv (L:4) + # Check whether --enable-ofi-mr-scalable was given. +if test "${enable_ofi_mr_scalable+set}" = set; then : + enableval=$enable_ofi_mr_scalable; +fi + + case "$enable_ofi_mr_scalable" in no) : ofi_define_mr_scalable="0" @@ -253564,6 +252562,13 @@ _ACEOF +# Check whether --with-ofi-num-completions was given. +if test "${with_ofi_num_completions+set}" = set; then : + withval=$with_ofi_num_completions; +fi + + + if test "${with_ofi_num_completions+set}" = set; then : withval=$with_ofi_num_completions; @@ -253604,6 +252609,13 @@ _ACEOF +# Check whether --with-ofi-max-medium was given. +if test "${with_ofi_max_medium+set}" = set; then : + withval=$with_ofi_max_medium; +fi + + + if test "${with_ofi_max_medium+set}" = set; then : withval=$with_ofi_max_medium; @@ -253645,7 +252657,7 @@ fi _conduit_enabled_ofi=no if test "$gasnet_toolsonly_mode" = "no" ; then if test "$enabled_ofi$have_ofi" = yesyes; then - case "g1" in + case "exp" in no) ;; g1) @@ -253824,8 +252836,6 @@ as_fn_error $? "See $CONFIG_FILE for details." "$LINENO" 5 - - if test "$enabled_ofi$have_ofi" = yesyes; then USE_OFI_CONDUIT_TRUE= USE_OFI_CONDUIT_FALSE='#' @@ -259006,902 +258016,3832 @@ done fi fi - - - - - - -# Runtime Var Arrays -#GASNET_IF_ENABLED_WITH_AUTO(vararray, Use stack arrays of variable size in code, -#AC_DEFINE(VARARRAY_WORKS), -#, -#GASNET_TRY_CACHE_CHECK(for variable stack arrays, cc_vararray_mod, -# [void dummy(int x) { char y[x]; }], [], -# AC_DEFINE(VARARRAY_WORKS))) - ######################################################################## -# different high-precision sleep libraries - -# Check for usleep - - - -# vvvvvvvvvvvvvvvvvvvvvv GASNET_TRY_CACHE_CHECK(for usleep(),cc_has_usleep,...) vvvvvvvvvvvvvvvvvvvvvv (L:4) - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for usleep()" >&5 -$as_echo_n "checking for usleep()... " >&6; } -if ${gasnet_cv_cc_has_usleep+:} false; then : - $as_echo_n "(cached) " >&6 -else - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -#include - -int -main () -{ - -usleep(500); - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO"; then : - gasnet_cv_cc_has_usleep=yes -else - gasnet_cv_cc_has_usleep=no -fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gasnet_cv_cc_has_usleep" >&5 -$as_echo "$gasnet_cv_cc_has_usleep" >&6; } -if test "$gasnet_cv_cc_has_usleep" = yes; then - : - { $as_echo "$as_me:${as_lineno-$LINENO}: checking for library containing usleep" >&5 -$as_echo_n "checking for library containing usleep... " >&6; } -if ${ac_cv_search_usleep+:} false; then : - $as_echo_n "(cached) " >&6 -else - ac_func_search_save_LIBS=$LIBS -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -/* Override any GCC internal prototype to avoid an error. - Use char because int might match the return type of a GCC - builtin and then its argument prototype would still apply. */ -#ifdef __cplusplus -extern "C" -#endif -char usleep (); -int -main () -{ -return usleep (); - ; - return 0; -} -_ACEOF -for ac_lib in '' posix4; do - if test -z "$ac_lib"; then - ac_res="none required" - else - ac_res=-l$ac_lib - LIBS="-l$ac_lib $ac_func_search_save_LIBS" - fi - if ac_fn_c_try_link "$LINENO"; then : - ac_cv_search_usleep=$ac_res -fi -rm -f core conftest.err conftest.$ac_objext \ - conftest$ac_exeext - if ${ac_cv_search_usleep+:} false; then : - break -fi -done -if ${ac_cv_search_usleep+:} false; then : - -else - ac_cv_search_usleep=no -fi -rm conftest.$ac_ext -LIBS=$ac_func_search_save_LIBS -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_usleep" >&5 -$as_echo "$ac_cv_search_usleep" >&6; } -ac_res=$ac_cv_search_usleep -if test "$ac_res" != no; then : - test "$ac_res" = "none required" || LIBS="$ac_res $LIBS" - $as_echo "#define HAVE_USLEEP 1" >>confdefs.h -else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: entry point not found - disabled" >&5 -$as_echo "entry point not found - disabled" >&6; } -fi -else - : -fi -# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_TRY_CACHE_CHECK(for usleep(),cc_has_usleep,...) ^^^^^^^^^^^^^^^^^^^^^^ (L:4) -# Check for nanosleep +# vvvvvvvvvvvvvvvvvvvvvv GASNET_IF_ENABLED(memory-kinds,...) vvvvvvvvvvvvvvvvvvvvvv (L:4) -# vvvvvvvvvvvvvvvvvvvvvv GASNET_TRY_CACHE_CHECK(for nanosleep(),cc_has_nanosleep,...) vvvvvvvvvvvvvvvvvvvvvv (L:4) - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for nanosleep()" >&5 -$as_echo_n "checking for nanosleep()... " >&6; } -if ${gasnet_cv_cc_has_nanosleep+:} false; then : - $as_echo_n "(cached) " >&6 -else - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -#include -#include - -int -main () -{ - - struct timespec tm, tmremaining; - tm.tv_sec =1; - tm.tv_nsec = 1000000; - nanosleep(&tm, &tmremaining); - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO"; then : - gasnet_cv_cc_has_nanosleep=yes -else - gasnet_cv_cc_has_nanosleep=no + # Check whether --enable-memory-kinds was given. +if test "${enable_memory_kinds+set}" = set; then : + enableval=$enable_memory_kinds; fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gasnet_cv_cc_has_nanosleep" >&5 -$as_echo "$gasnet_cv_cc_has_nanosleep" >&6; } -if test "$gasnet_cv_cc_has_nanosleep" = yes; then - : - { $as_echo "$as_me:${as_lineno-$LINENO}: checking for library containing nanosleep" >&5 -$as_echo_n "checking for library containing nanosleep... " >&6; } -if ${ac_cv_search_nanosleep+:} false; then : - $as_echo_n "(cached) " >&6 -else - ac_func_search_save_LIBS=$LIBS -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -/* Override any GCC internal prototype to avoid an error. - Use char because int might match the return type of a GCC - builtin and then its argument prototype would still apply. */ -#ifdef __cplusplus -extern "C" -#endif -char nanosleep (); -int -main () -{ -return nanosleep (); - ; - return 0; -} -_ACEOF -for ac_lib in '' posix4; do - if test -z "$ac_lib"; then - ac_res="none required" - else - ac_res=-l$ac_lib - LIBS="-l$ac_lib $ac_func_search_save_LIBS" - fi - if ac_fn_c_try_link "$LINENO"; then : - ac_cv_search_nanosleep=$ac_res -fi -rm -f core conftest.err conftest.$ac_objext \ - conftest$ac_exeext - if ${ac_cv_search_nanosleep+:} false; then : - break -fi -done -if ${ac_cv_search_nanosleep+:} false; then : -else - ac_cv_search_nanosleep=no -fi -rm conftest.$ac_ext -LIBS=$ac_func_search_save_LIBS -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_nanosleep" >&5 -$as_echo "$ac_cv_search_nanosleep" >&6; } -ac_res=$ac_cv_search_nanosleep -if test "$ac_res" != no; then : - test "$ac_res" = "none required" || LIBS="$ac_res $LIBS" - $as_echo "#define HAVE_NANOSLEEP 1" >>confdefs.h -else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: entry point not found - disabled" >&5 -$as_echo "entry point not found - disabled" >&6; } -fi - -else - : - -fi - -# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_TRY_CACHE_CHECK(for nanosleep(),cc_has_nanosleep,...) ^^^^^^^^^^^^^^^^^^^^^^ (L:4) - - - - -# Check for clock_nanosleep - - - -# vvvvvvvvvvvvvvvvvvvvvv GASNET_TRY_CACHE_CHECK(for clock_nanosleep(),cc_has_clock_nanosleep,...) vvvvvvvvvvvvvvvvvvvvvv (L:4) - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for clock_nanosleep()" >&5 -$as_echo_n "checking for clock_nanosleep()... " >&6; } -if ${gasnet_cv_cc_has_clock_nanosleep+:} false; then : - $as_echo_n "(cached) " >&6 -else - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -#include - -int -main () -{ - - struct timespec tm, tmremaining; - tm.tv_sec = 1; - tm.tv_nsec = 1000000; - clock_nanosleep(CLOCK_REALTIME, 0, &tm, &tmremaining); - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO"; then : - gasnet_cv_cc_has_clock_nanosleep=yes -else - gasnet_cv_cc_has_clock_nanosleep=no -fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gasnet_cv_cc_has_clock_nanosleep" >&5 -$as_echo "$gasnet_cv_cc_has_clock_nanosleep" >&6; } -if test "$gasnet_cv_cc_has_clock_nanosleep" = yes; then - : - { $as_echo "$as_me:${as_lineno-$LINENO}: checking for library containing clock_nanosleep" >&5 -$as_echo_n "checking for library containing clock_nanosleep... " >&6; } -if ${ac_cv_search_clock_nanosleep+:} false; then : - $as_echo_n "(cached) " >&6 -else - ac_func_search_save_LIBS=$LIBS -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -/* Override any GCC internal prototype to avoid an error. - Use char because int might match the return type of a GCC - builtin and then its argument prototype would still apply. */ -#ifdef __cplusplus -extern "C" -#endif -char clock_nanosleep (); -int -main () -{ -return clock_nanosleep (); - ; - return 0; -} -_ACEOF -for ac_lib in '' rt posix4; do - if test -z "$ac_lib"; then - ac_res="none required" - else - ac_res=-l$ac_lib - LIBS="-l$ac_lib $ac_func_search_save_LIBS" - fi - if ac_fn_c_try_link "$LINENO"; then : - ac_cv_search_clock_nanosleep=$ac_res -fi -rm -f core conftest.err conftest.$ac_objext \ - conftest$ac_exeext - if ${ac_cv_search_clock_nanosleep+:} false; then : - break -fi -done -if ${ac_cv_search_clock_nanosleep+:} false; then : - -else - ac_cv_search_clock_nanosleep=no -fi -rm conftest.$ac_ext -LIBS=$ac_func_search_save_LIBS -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_clock_nanosleep" >&5 -$as_echo "$ac_cv_search_clock_nanosleep" >&6; } -ac_res=$ac_cv_search_clock_nanosleep -if test "$ac_res" != no; then : - test "$ac_res" = "none required" || LIBS="$ac_res $LIBS" - $as_echo "#define HAVE_CLOCK_NANOSLEEP 1" >>confdefs.h - -else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: entry point not found - disabled" >&5 -$as_echo "entry point not found - disabled" >&6; } -fi - -else - : - -fi - -# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_TRY_CACHE_CHECK(for clock_nanosleep(),cc_has_clock_nanosleep,...) ^^^^^^^^^^^^^^^^^^^^^^ (L:4) - - - - -# Check for nsleep - - - -# vvvvvvvvvvvvvvvvvvvvvv GASNET_TRY_CACHE_CHECK(for nsleep(),cc_has_nsleep,...) vvvvvvvvvvvvvvvvvvvvvv (L:4) - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for nsleep()" >&5 -$as_echo_n "checking for nsleep()... " >&6; } -if ${gasnet_cv_cc_has_nsleep+:} false; then : - $as_echo_n "(cached) " >&6 -else - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -#include -#include - -int -main () -{ - - struct timespec tm, tmremaining; - tm.tv_sec =1; - tm.tv_nsec = 1000000; - nsleep(&tm, &tmremaining); - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO"; then : - gasnet_cv_cc_has_nsleep=yes -else - gasnet_cv_cc_has_nsleep=no -fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gasnet_cv_cc_has_nsleep" >&5 -$as_echo "$gasnet_cv_cc_has_nsleep" >&6; } -if test "$gasnet_cv_cc_has_nsleep" = yes; then - : - { $as_echo "$as_me:${as_lineno-$LINENO}: checking for library containing nsleep" >&5 -$as_echo_n "checking for library containing nsleep... " >&6; } -if ${ac_cv_search_nsleep+:} false; then : - $as_echo_n "(cached) " >&6 -else - ac_func_search_save_LIBS=$LIBS -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -/* Override any GCC internal prototype to avoid an error. - Use char because int might match the return type of a GCC - builtin and then its argument prototype would still apply. */ -#ifdef __cplusplus -extern "C" -#endif -char nsleep (); -int -main () -{ -return nsleep (); - ; - return 0; -} -_ACEOF -for ac_lib in '' posix4; do - if test -z "$ac_lib"; then - ac_res="none required" - else - ac_res=-l$ac_lib - LIBS="-l$ac_lib $ac_func_search_save_LIBS" - fi - if ac_fn_c_try_link "$LINENO"; then : - ac_cv_search_nsleep=$ac_res -fi -rm -f core conftest.err conftest.$ac_objext \ - conftest$ac_exeext - if ${ac_cv_search_nsleep+:} false; then : - break -fi -done -if ${ac_cv_search_nsleep+:} false; then : - -else - ac_cv_search_nsleep=no -fi -rm conftest.$ac_ext -LIBS=$ac_func_search_save_LIBS -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_nsleep" >&5 -$as_echo "$ac_cv_search_nsleep" >&6; } -ac_res=$ac_cv_search_nsleep -if test "$ac_res" != no; then : - test "$ac_res" = "none required" || LIBS="$ac_res $LIBS" - $as_echo "#define HAVE_NSLEEP 1" >>confdefs.h - -else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: entry point not found - disabled" >&5 -$as_echo "entry point not found - disabled" >&6; } -fi - -else - : - -fi - -# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_TRY_CACHE_CHECK(for nsleep(),cc_has_nsleep,...) ^^^^^^^^^^^^^^^^^^^^^^ (L:4) - - - - -# Check for sched_yield - - - -# vvvvvvvvvvvvvvvvvvvvvv GASNET_TRY_CACHE_CHECK(for sched_yield(),cc_has_sched_yield,...) vvvvvvvvvvvvvvvvvvvvvv (L:4) - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for sched_yield()" >&5 -$as_echo_n "checking for sched_yield()... " >&6; } -if ${gasnet_cv_cc_has_sched_yield+:} false; then : - $as_echo_n "(cached) " >&6 -else - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -#include - -int -main () -{ - - sched_yield(); - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO"; then : - gasnet_cv_cc_has_sched_yield=yes -else - gasnet_cv_cc_has_sched_yield=no -fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gasnet_cv_cc_has_sched_yield" >&5 -$as_echo "$gasnet_cv_cc_has_sched_yield" >&6; } -if test "$gasnet_cv_cc_has_sched_yield" = yes; then - : - { $as_echo "$as_me:${as_lineno-$LINENO}: checking for library containing sched_yield" >&5 -$as_echo_n "checking for library containing sched_yield... " >&6; } -if ${ac_cv_search_sched_yield+:} false; then : - $as_echo_n "(cached) " >&6 -else - ac_func_search_save_LIBS=$LIBS -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -/* Override any GCC internal prototype to avoid an error. - Use char because int might match the return type of a GCC - builtin and then its argument prototype would still apply. */ -#ifdef __cplusplus -extern "C" -#endif -char sched_yield (); -int -main () -{ -return sched_yield (); - ; - return 0; -} -_ACEOF -for ac_lib in '' posix4; do - if test -z "$ac_lib"; then - ac_res="none required" - else - ac_res=-l$ac_lib - LIBS="-l$ac_lib $ac_func_search_save_LIBS" - fi - if ac_fn_c_try_link "$LINENO"; then : - ac_cv_search_sched_yield=$ac_res -fi -rm -f core conftest.err conftest.$ac_objext \ - conftest$ac_exeext - if ${ac_cv_search_sched_yield+:} false; then : - break -fi -done -if ${ac_cv_search_sched_yield+:} false; then : - -else - ac_cv_search_sched_yield=no -fi -rm conftest.$ac_ext -LIBS=$ac_func_search_save_LIBS -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_sched_yield" >&5 -$as_echo "$ac_cv_search_sched_yield" >&6; } -ac_res=$ac_cv_search_sched_yield -if test "$ac_res" != no; then : - test "$ac_res" = "none required" || LIBS="$ac_res $LIBS" - $as_echo "#define HAVE_SCHED_YIELD 1" >>confdefs.h - -else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: entry point not found - disabled" >&5 -$as_echo "entry point not found - disabled" >&6; } -fi - -else - : - -fi - -# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_TRY_CACHE_CHECK(for sched_yield(),cc_has_sched_yield,...) ^^^^^^^^^^^^^^^^^^^^^^ (L:4) +case "$enable_memory_kinds" in + '' | no) : + enable_mk_default=no + ;; + *) : + enable_mk_default=probe + ;; +esac +# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_IF_ENABLED(memory-kinds,...) ^^^^^^^^^^^^^^^^^^^^^^ (L:4) -# timer setup -case "$target_os" in - freebsd* | netbsd*) - { $as_echo "$as_me:${as_lineno-$LINENO}: checking for sysctl machdep.tsc_freq" >&5 -$as_echo_n "checking for sysctl machdep.tsc_freq... " >&6; } - res="`/sbin/sysctl -n machdep.tsc_freq 2> /dev/null`" - if test "$res" != "" -a "$res" != 0; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes: $res" >&5 -$as_echo "yes: $res" >&6; } - $as_echo "#define GASNETI_HAVE_SYSCTL_MACHDEP_TSC_FREQ 1" >>confdefs.h - else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } - fi - ;; -esac -enabled_plpa="no" -if test "$cross_compiling" = "no" ; then - case "$target_os" in - linux*) - if test "$GASNET_PLATFORM" = "wsl" ; then - enabled_plpa="no" - else - enabled_plpa="yes" - fi - ;; - esac -else - case "$target" in - x86_64-cnl-linux*) enabled_plpa="yes";; - esac -fi -if test "$enabled_plpa" = "yes"; then +# GEX_MK_CLASS_CUDA_UVA -# vvvvvvvvvvvvvvvvvvvvvv GASNET_IF_ENABLED_WITH_AUTO(plpa,...) vvvvvvvvvvvvvvvvvvvvvv (L:4) +# vvvvvvvvvvvvvvvvvvvvvv GASNET_IF_ENABLED_WITH_AUTO(kind-cuda-uva,...) vvvvvvvvvvvvvvvvvvvvvv (L:4) - # Check whether --enable-plpa was given. -if test "${enable_plpa+set}" = set; then : - enableval=$enable_plpa; + # Check whether --enable-kind-cuda-uva was given. +if test "${enable_kind_cuda_uva+set}" = set; then : + enableval=$enable_kind_cuda_uva; fi - case "$enable_plpa" in + case "$enable_kind_cuda_uva" in no) : - enabled_plpa="no - user disabled" + enable_mk_cuda_uva="no" ;; yes) : - enabled_plpa="yes"; force_plpa="yes" + enable_mk_cuda_uva="yes"; force_mk_cuda_uva="yes" ;; *) : - enabled_plpa="yes" - ;; - esac - -# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_IF_ENABLED_WITH_AUTO(plpa,...) ^^^^^^^^^^^^^^^^^^^^^^ (L:4) - + if test "x$enable_kind_cuda_uva" = 'xprobe'; then + enable_mk_cuda_uva="probe" + elif test "$GASNETI_PTR_BITS" = 32; then + enable_mk_cuda_uva="no" + else + enable_mk_cuda_uva="$enable_mk_default" + fi - if test "$enabled_plpa" = yes; then - + ;; + esac - # Included mode, or standalone? +# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_IF_ENABLED_WITH_AUTO(kind-cuda-uva,...) ^^^^^^^^^^^^^^^^^^^^^^ (L:4) - if test "$enable_included_mode" = "yes"; then - plpa_mode=included - else - plpa_mode=standalone - fi - # Change the symbol prefix? - if test "$with_plpa_symbol_prefix" = ""; then - plpa_symbol_prefix_value=plpa_ +unset have_mk_cuda_uva +if test "$enable_mk_cuda_uva" != "no"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for target having CUDA UVA memory kinds support" >&5 +$as_echo_n "checking for target having CUDA UVA memory kinds support... " >&6; } + if test "$GASNETI_PTR_BITS" = 32; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no (ILP32 targets are not supported)" >&5 +$as_echo "no (ILP32 targets are not supported)" >&6; } + have_mk_cuda_uva=no + else + have_mk_cuda_uva=no + case "$target_cpu:$target_os" in + x86_64:linux*) have_mk_cuda_uva='yes';; + powerpc*:linux*) if test "$WORDS_BIGENDIAN" = '0'; then have_mk_cuda_uva='yes'; fi;; + aarch64:linux*) have_mk_cuda_uva='maybe';; + esac + if test "$have_mk_cuda_uva" = 'no'; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no (unsupported or unrecognized target $target)" >&5 +$as_echo "no (unsupported or unrecognized target $target)" >&6; } else - plpa_symbol_prefix_value=$with_plpa_symbol_prefix + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $have_mk_cuda_uva" >&5 +$as_echo "$have_mk_cuda_uva" >&6; } fi + fi + if test "$have_mk_cuda_uva" != "no"; then + # TODO: Can/should we scrape nvcc as UPC++ does? - plpa_symbol_prefix_value=gasneti_plpa_ - - - +# vvvvvvvvvvvvvvvvvvvvvv GASNET_PACKAGE_HOME(CUDA,CUDA toolkit,nvcc,/usr/local/cuda,include/cuda.h) vvvvvvvvvvvvvvvvvvvvvv (L:4) - # Check for syscall() - ac_fn_c_check_func "$LINENO" "syscall" "ac_cv_func_syscall" -if test "x$ac_cv_func_syscall" = xyes; then : - happy=1 -else - happy=0 -fi - - - # Look for syscall.h - if test "$happy" = 1; then - ac_fn_c_check_header_mongrel "$LINENO" "syscall.h" "ac_cv_header_syscall_h" "$ac_includes_default" -if test "x$ac_cv_header_syscall_h" = xyes; then : - happy=1 -else - happy=0 -fi - fi + CUDA_guess= - # Look for unistd.h - if test "$happy" = 1; then - ac_fn_c_check_header_mongrel "$LINENO" "unistd.h" "ac_cv_header_unistd_h" "$ac_includes_default" -if test "x$ac_cv_header_unistd_h" = xyes; then : - happy=1 + for ac_prog in nvcc +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_path_CUDA_guess_prog+:} false; then : + $as_echo_n "(cached) " >&6 else - happy=0 -fi - - - fi + case $CUDA_guess_prog in + [\\/]* | ?:[\\/]*) + ac_cv_path_CUDA_guess_prog="$CUDA_guess_prog" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_path_CUDA_guess_prog="$as_dir/$ac_word$ac_exec_ext" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS - # Check for __NR_sched_setaffinity - if test "$happy" = 1; then - { $as_echo "$as_me:${as_lineno-$LINENO}: checking for __NR_sched_setaffinity" >&5 -$as_echo_n "checking for __NR_sched_setaffinity... " >&6; } - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -#include -#include -int -main () -{ -#ifndef __NR_sched_setaffinity -#error __NR_sched_setaffinity_not found! -#endif -int i = 1; - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO"; then : - { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 -$as_echo "yes" >&6; } - happy=1 -else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } - happy=0 + ;; +esac fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - fi - - # Check for __NR_sched_getaffinity (probably overkill, but what - # the heck?) - if test "$happy" = 1; then - { $as_echo "$as_me:${as_lineno-$LINENO}: checking for __NR_sched_getaffinity" >&5 -$as_echo_n "checking for __NR_sched_getaffinity... " >&6; } - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -#include -#include -int -main () -{ -#ifndef __NR_sched_getaffinity -#error __NR_sched_getaffinity_not found! -#endif -int i = 1; - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO"; then : - { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 -$as_echo "yes" >&6; } - happy=1 +CUDA_guess_prog=$ac_cv_path_CUDA_guess_prog +if test -n "$CUDA_guess_prog"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CUDA_guess_prog" >&5 +$as_echo "$CUDA_guess_prog" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } - happy=0 fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - fi - - # If all was good, do the real init - if test "$happy" = 1; then + test -n "$CUDA_guess_prog" && break +done - # Are we building as standalone or included? - { $as_echo "$as_me:${as_lineno-$LINENO}: checking for PLPA building mode" >&5 -$as_echo_n "checking for PLPA building mode... " >&6; } - - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $plpa_mode" >&5 -$as_echo "$plpa_mode" >&6; } - - # What prefix are we using? - { $as_echo "$as_me:${as_lineno-$LINENO}: checking for PLPA symbol prefix" >&5 -$as_echo_n "checking for PLPA symbol prefix... " >&6; } - -cat >>confdefs.h <<_ACEOF -#define PLPA_SYM_PREFIX $plpa_symbol_prefix_value -_ACEOF - - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $plpa_symbol_prefix_value" >&5 -$as_echo "$plpa_symbol_prefix_value" >&6; } - - # Success - enabled_plpa="yes" - - else - enabled_plpa="no - probe failed" - : # in case enabled_plpa="no - probe failed" is empty. - fi - - # Cleanup - unset happy - - fi - { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to use PLPA for cpu binding" >&5 -$as_echo_n "checking whether to use PLPA for cpu binding... " >&6; } - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $enabled_plpa" >&5 -$as_echo "$enabled_plpa" >&6; } - if test "$enabled_plpa" = yes; then - $as_echo "#define HAVE_PLPA 1" >>confdefs.h - - elif test "$force_plpa" = yes; then - as_fn_error $? "User requested --enable-plpa but PLPA is not supported on your system" "$LINENO" 5 - fi -fi - if test "$enabled_plpa" = "yes"; then - USE_PLPA_TRUE= - USE_PLPA_FALSE='#' -else - USE_PLPA_TRUE='#' - USE_PLPA_FALSE= -fi - - -# AC_FUNC_MMAP -# Check for mmap functionality we actually use -if test "$cross_compiling" = "yes" ; then - - + if test -n "$CUDA_guess_prog"; then + CUDA_guess=`dirname \`dirname $CUDA_guess_prog\`` + fi -# vvvvvvvvvvvvvvvvvvvvvv GASNET_CROSS_VAR(HAVE_MMAP,HAVE_MMAP,) vvvvvvvvvvvvvvvvvvvvvv (L:4) + if test -z "$CUDA_guess" ; then + for CUDA_guess_file in "include/cuda.h" "."; do + for CUDA_guess in /usr/local/cuda /usr; do + if test -r "$CUDA_guess/$CUDA_guess_file" ; then + break 2 + fi + done + done + unset CUDA_guess_file + fi - if test "$cross_compiling" = "yes" ; then +# vvvvvvvvvvvvvvvvvvvvvv GASNET_ENV_DEFAULT([CUDA_HOME],[$CUDA_guess],[Install prefix of CUDA toolkit (auto-detected from PATH)]) vvvvvvvvvvvvvvvvvvvvvv (L:5) -# vvvvvvvvvvvvvvvvvvvvvv GASNET_ENV_DEFAULT([CROSS_HAVE_MMAP],[]) vvvvvvvvvvvvvvvvvvvvvv (L:5) +# Check whether --with-cuda-home was given. +if test "${with_cuda_home+set}" = set; then : + withval=$with_cuda_home; +fi - { $as_echo "$as_me:${as_lineno-$LINENO}: checking for CROSS_HAVE_MMAP setting" >&5 -$as_echo_n "checking for CROSS_HAVE_MMAP setting... " >&6; } + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for CUDA_HOME setting" >&5 +$as_echo_n "checking for CUDA_HOME setting... " >&6; } - envval_src_CROSS_HAVE_MMAP="cached" - if ${gasnet_cv_envvar_CROSS_HAVE_MMAP+:} false; then : + envval_src_CUDA_HOME="cached" + if ${gasnet_cv_envvar_CUDA_HOME+:} false; then : $as_echo_n "(cached) " >&6 else - if test "2" = "1" ; then # no default means unset - envval_default_CROSS_HAVE_MMAP="__=-=-=-__NOT_SET__-=-=-=__" + if test "3" = "1" ; then # no default means unset + envval_default_CUDA_HOME="__=-=-=-__NOT_SET__-=-=-=__" else - envval_default_CROSS_HAVE_MMAP="" + envval_default_CUDA_HOME="$CUDA_guess" fi # Lowest priority are the enclosing environment and the default value argument (lowest) - if test "${_gasneti_nenv_crosshavemmap+set}" = "set" ; then - gasnet_cv_envvar_CROSS_HAVE_MMAP="${_gasneti_nenv_crosshavemmap}" - envval_src_CROSS_HAVE_MMAP=given - elif test "${_gasneti_cenv_crosshavemmap+set}" = "set" ; then - gasnet_cv_envvar_CROSS_HAVE_MMAP="${_gasneti_cenv_crosshavemmap}" - envval_src_CROSS_HAVE_MMAP=conf + if test "${_gasneti_nenv_cudahome+set}" = "set" ; then + gasnet_cv_envvar_CUDA_HOME="${_gasneti_nenv_cudahome}" + envval_src_CUDA_HOME=given + elif test "${_gasneti_cenv_cudahome+set}" = "set" ; then + gasnet_cv_envvar_CUDA_HOME="${_gasneti_cenv_cudahome}" + envval_src_CUDA_HOME=conf else - gasnet_cv_envvar_CROSS_HAVE_MMAP=$envval_default_CROSS_HAVE_MMAP - envval_src_CROSS_HAVE_MMAP=default + gasnet_cv_envvar_CUDA_HOME=$envval_default_CUDA_HOME + envval_src_CUDA_HOME=default fi # Left-to-right parsing of commandline settings that includes both mechanisms # --with-VAR=val or VAR=val => set to val # --with-VAR => set to default # --without-VAR => set to blank (ie "", not "no") - eval gasnet_fn_env_helper CROSS_HAVE_MMAP crosshavemmap $gasnet_cv_configure_args_norm + eval gasnet_fn_env_helper CUDA_HOME cudahome $gasnet_cv_configure_args_norm fi - CROSS_HAVE_MMAP="$gasnet_cv_envvar_CROSS_HAVE_MMAP" + CUDA_HOME="$gasnet_cv_envvar_CUDA_HOME" - if test "$CROSS_HAVE_MMAP" = "__=-=-=-__NOT_SET__-=-=-=__" ; then - unset CROSS_HAVE_MMAP - if test "$envval_src_CROSS_HAVE_MMAP" = "cached"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: (not set)" >&5 -$as_echo " (not set)" >&6; } + if test "$CUDA_HOME" = "__=-=-=-__NOT_SET__-=-=-=__" ; then + unset CUDA_HOME + if test "$envval_src_CUDA_HOME" = "cached"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: (not set)" >&5 +$as_echo " (not set)" >&6; } else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: (not set)" >&5 -$as_echo " (not set)" >&6; } + { $as_echo "$as_me:${as_lineno-$LINENO}: result: (not set)" >&5 +$as_echo " (not set)" >&6; } fi else - case "$envval_src_CROSS_HAVE_MMAP" in + case "$envval_src_CUDA_HOME" in 'cached') - { $as_echo "$as_me:${as_lineno-$LINENO}: result: \"$CROSS_HAVE_MMAP\"" >&5 -$as_echo " \"$CROSS_HAVE_MMAP\"" >&6; } ;; + { $as_echo "$as_me:${as_lineno-$LINENO}: result: \"$CUDA_HOME\"" >&5 +$as_echo " \"$CUDA_HOME\"" >&6; } ;; 'default') - { $as_echo "$as_me:${as_lineno-$LINENO}: result: (default) \"$CROSS_HAVE_MMAP\"" >&5 -$as_echo " (default) \"$CROSS_HAVE_MMAP\"" >&6; } ;; + { $as_echo "$as_me:${as_lineno-$LINENO}: result: (default) \"$CUDA_HOME\"" >&5 +$as_echo " (default) \"$CUDA_HOME\"" >&6; } ;; 'disabled') - { $as_echo "$as_me:${as_lineno-$LINENO}: result: (disabled) \"$CROSS_HAVE_MMAP\"" >&5 -$as_echo " (disabled) \"$CROSS_HAVE_MMAP\"" >&6; } ;; + { $as_echo "$as_me:${as_lineno-$LINENO}: result: (disabled) \"$CUDA_HOME\"" >&5 +$as_echo " (disabled) \"$CUDA_HOME\"" >&6; } ;; 'given') - { $as_echo "$as_me:${as_lineno-$LINENO}: result: (provided) \"$CROSS_HAVE_MMAP\"" >&5 -$as_echo " (provided) \"$CROSS_HAVE_MMAP\"" >&6; } ;; + { $as_echo "$as_me:${as_lineno-$LINENO}: result: (provided) \"$CUDA_HOME\"" >&5 +$as_echo " (provided) \"$CUDA_HOME\"" >&6; } ;; 'conf') - { $as_echo "$as_me:${as_lineno-$LINENO}: result: (provided) \"$CROSS_HAVE_MMAP\"" >&5 -$as_echo " (provided) \"$CROSS_HAVE_MMAP\"" >&6; } + { $as_echo "$as_me:${as_lineno-$LINENO}: result: (provided) \"$CUDA_HOME\"" >&5 +$as_echo " (provided) \"$CUDA_HOME\"" >&6; } echo -echo "configure error: Ambiguous environment setting for \$CROSS_HAVE_MMAP. Please configure --with-CROSS_HAVE_MMAP=\"intended value\"" +echo "configure error: Ambiguous environment setting for \$CUDA_HOME. Please configure --with-CUDA_HOME=\"intended value\"" +if test "" ; then +if test -f "conftest.$ac_ext" ; then + errfile=conftest.$ac_ext +else + errfile=gasnet_errsave_file +fi +if test -f "$errfile" ; then + echo + echo " --- Failed program --- " + cat $errfile + echo " -----------------------" +fi +fi +if test -f "conftest.err" ; then + errfile=conftest.err +else + errfile=gasnet_errsave_err +fi +if test -f "$errfile" ; then + echo + echo "Compilation error: " + echo + cat $errfile +fi +echo +CONFIG_FILE=`pwd`/config.log +as_fn_error $? "See $CONFIG_FILE for details." "$LINENO" 5 + + + ;; + *) + +echo +echo "configure error: _GASNET_ENV_DEFAULT broken" +if test "" ; then +if test -f "conftest.$ac_ext" ; then + errfile=conftest.$ac_ext +else + errfile=gasnet_errsave_file +fi +if test -f "$errfile" ; then + echo + echo " --- Failed program --- " + cat $errfile + echo " -----------------------" +fi +fi +if test -f "conftest.err" ; then + errfile=conftest.err +else + errfile=gasnet_errsave_err +fi +if test -f "$errfile" ; then + echo + echo "Compilation error: " + echo + cat $errfile +fi +echo +CONFIG_FILE=`pwd`/config.log +as_fn_error $? "See $CONFIG_FILE for details." "$LINENO" 5 + + + esac + fi + + + + + + + +# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_ENV_DEFAULT([CUDA_HOME],[$CUDA_guess],[Install prefix of CUDA toolkit (auto-detected from PATH)]) ^^^^^^^^^^^^^^^^^^^^^^ (L:5) + + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $CUDA_HOME is the CUDA toolkit install prefix" >&5 +$as_echo_n "checking if $CUDA_HOME is the CUDA toolkit install prefix... " >&6; } + CUDA_HOME_good=no + if test -r "$CUDA_HOME/include/cuda.h"; then + CUDA_HOME_good=yes + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + fi + + if test $CUDA_HOME_good = no && test -n "$CUDA_guess_prog"; then + + + +# vvvvvvvvvvvvvvvvvvvvvv GASNET_FOLLOWLINKS(CUDA_guess_prog) vvvvvvvvvvvvvvvvvvvvvv (L:5) + + + gasnet_fl_file="$CUDA_guess_prog" + gasnet_fl_link=`/bin/ls -al "$gasnet_fl_file" | $AWK 'BEGIN{FS=">"}{split($2,A," ") ; print A[1]}'` + while test "$gasnet_fl_link"; do + gasnet_fl_file="$gasnet_fl_link" + gasnet_fl_link=`/bin/ls -al "$gasnet_fl_file" | $AWK 'BEGIN{FS=">"}{split($2,A," ") ; print A[1]}'` + done + CUDA_guess_prog="$gasnet_fl_file" + +# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_FOLLOWLINKS(CUDA_guess_prog) ^^^^^^^^^^^^^^^^^^^^^^ (L:5) + + + + CUDA_HOME_tmp=`dirname \`dirname $CUDA_guess_prog\`` + if test -r "$CUDA_HOME_tmp/include/cuda.h"; then + CUDA_HOME_good=yes + CUDA_HOME="$CUDA_HOME_tmp" + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no - followed symlink to $CUDA_HOME" >&5 +$as_echo "no - followed symlink to $CUDA_HOME" >&6; } + fi + unset CUDA_HOME_tmp + fi + + if test $CUDA_HOME_good = no; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: probably not - missing include/cuda.h" >&5 +$as_echo "probably not - missing include/cuda.h" >&6; } + fi + unset CUDA_HOME_good + unset CUDA_guess + + + + +# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_PACKAGE_HOME ^^^^^^^^^^^^^^^^^^^^^^ (L:4) + + + + + + +# vvvvvvvvvvvvvvvvvvvvvv GASNET_PACKAGE_CFLAGS([CUDA]) vvvvvvvvvvvvvvvvvvvvvv (L:4) + + + + + case "$CUDA_HOME" in + /usr|/) CUDA_cflags_guess='';; + *) if test -d "$CUDA_HOME/include/."; then + CUDA_cflags_guess="-I$CUDA_HOME/include" + else + CUDA_cflags_guess='' + fi;; + esac + + + +# vvvvvvvvvvvvvvvvvvvvvv GASNET_ENV_DEFAULT([CUDA_CFLAGS],[$CUDA_cflags_guess]) vvvvvvvvvvvvvvvvvvvvvv (L:5) + + + + + + + + +# Check whether --with-cuda-cflags was given. +if test "${with_cuda_cflags+set}" = set; then : + withval=$with_cuda_cflags; +fi + + + + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for CUDA_CFLAGS setting" >&5 +$as_echo_n "checking for CUDA_CFLAGS setting... " >&6; } + + envval_src_CUDA_CFLAGS="cached" + if ${gasnet_cv_envvar_CUDA_CFLAGS+:} false; then : + $as_echo_n "(cached) " >&6 +else + + if test "2" = "1" ; then # no default means unset + envval_default_CUDA_CFLAGS="__=-=-=-__NOT_SET__-=-=-=__" + else + envval_default_CUDA_CFLAGS="$CUDA_cflags_guess" + fi + + # Lowest priority are the enclosing environment and the default value argument (lowest) + if test "${_gasneti_nenv_cudacflags+set}" = "set" ; then + gasnet_cv_envvar_CUDA_CFLAGS="${_gasneti_nenv_cudacflags}" + envval_src_CUDA_CFLAGS=given + elif test "${_gasneti_cenv_cudacflags+set}" = "set" ; then + gasnet_cv_envvar_CUDA_CFLAGS="${_gasneti_cenv_cudacflags}" + envval_src_CUDA_CFLAGS=conf + else + gasnet_cv_envvar_CUDA_CFLAGS=$envval_default_CUDA_CFLAGS + envval_src_CUDA_CFLAGS=default + fi + # Left-to-right parsing of commandline settings that includes both mechanisms + # --with-VAR=val or VAR=val => set to val + # --with-VAR => set to default + # --without-VAR => set to blank (ie "", not "no") + eval gasnet_fn_env_helper CUDA_CFLAGS cudacflags $gasnet_cv_configure_args_norm + +fi + + + CUDA_CFLAGS="$gasnet_cv_envvar_CUDA_CFLAGS" + + if test "$CUDA_CFLAGS" = "__=-=-=-__NOT_SET__-=-=-=__" ; then + unset CUDA_CFLAGS + if test "$envval_src_CUDA_CFLAGS" = "cached"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: (not set)" >&5 +$as_echo " (not set)" >&6; } + else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: (not set)" >&5 +$as_echo " (not set)" >&6; } + fi + else + case "$envval_src_CUDA_CFLAGS" in + 'cached') + { $as_echo "$as_me:${as_lineno-$LINENO}: result: \"$CUDA_CFLAGS\"" >&5 +$as_echo " \"$CUDA_CFLAGS\"" >&6; } ;; + 'default') + { $as_echo "$as_me:${as_lineno-$LINENO}: result: (default) \"$CUDA_CFLAGS\"" >&5 +$as_echo " (default) \"$CUDA_CFLAGS\"" >&6; } ;; + 'disabled') + { $as_echo "$as_me:${as_lineno-$LINENO}: result: (disabled) \"$CUDA_CFLAGS\"" >&5 +$as_echo " (disabled) \"$CUDA_CFLAGS\"" >&6; } ;; + 'given') + { $as_echo "$as_me:${as_lineno-$LINENO}: result: (provided) \"$CUDA_CFLAGS\"" >&5 +$as_echo " (provided) \"$CUDA_CFLAGS\"" >&6; } ;; + 'conf') + { $as_echo "$as_me:${as_lineno-$LINENO}: result: (provided) \"$CUDA_CFLAGS\"" >&5 +$as_echo " (provided) \"$CUDA_CFLAGS\"" >&6; } + + +echo +echo "configure error: Ambiguous environment setting for \$CUDA_CFLAGS. Please configure --with-CUDA_CFLAGS=\"intended value\"" +if test "" ; then +if test -f "conftest.$ac_ext" ; then + errfile=conftest.$ac_ext +else + errfile=gasnet_errsave_file +fi +if test -f "$errfile" ; then + echo + echo " --- Failed program --- " + cat $errfile + echo " -----------------------" +fi +fi +if test -f "conftest.err" ; then + errfile=conftest.err +else + errfile=gasnet_errsave_err +fi +if test -f "$errfile" ; then + echo + echo "Compilation error: " + echo + cat $errfile +fi +echo +CONFIG_FILE=`pwd`/config.log +as_fn_error $? "See $CONFIG_FILE for details." "$LINENO" 5 + + + ;; + *) + +echo +echo "configure error: _GASNET_ENV_DEFAULT broken" +if test "" ; then +if test -f "conftest.$ac_ext" ; then + errfile=conftest.$ac_ext +else + errfile=gasnet_errsave_file +fi +if test -f "$errfile" ; then + echo + echo " --- Failed program --- " + cat $errfile + echo " -----------------------" +fi +fi +if test -f "conftest.err" ; then + errfile=conftest.err +else + errfile=gasnet_errsave_err +fi +if test -f "$errfile" ; then + echo + echo "Compilation error: " + echo + cat $errfile +fi +echo +CONFIG_FILE=`pwd`/config.log +as_fn_error $? "See $CONFIG_FILE for details." "$LINENO" 5 + + + esac + fi + + + + + + + +# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_ENV_DEFAULT([CUDA_CFLAGS],[$CUDA_cflags_guess]) ^^^^^^^^^^^^^^^^^^^^^^ (L:5) + + + + unset CUDA_cflags_guess + + + + + +# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_PACKAGE_CFLAGS ^^^^^^^^^^^^^^^^^^^^^^ (L:4) + + + + + + +# vvvvvvvvvvvvvvvvvvvvvv GASNET_ENV_DEFAULT([CUDA_LIBS],[-lcuda]) vvvvvvvvvvvvvvvvvvvvvv (L:4) + + + + + + + + +# Check whether --with-cuda-libs was given. +if test "${with_cuda_libs+set}" = set; then : + withval=$with_cuda_libs; +fi + + + + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for CUDA_LIBS setting" >&5 +$as_echo_n "checking for CUDA_LIBS setting... " >&6; } + + envval_src_CUDA_LIBS="cached" + if ${gasnet_cv_envvar_CUDA_LIBS+:} false; then : + $as_echo_n "(cached) " >&6 +else + + if test "2" = "1" ; then # no default means unset + envval_default_CUDA_LIBS="__=-=-=-__NOT_SET__-=-=-=__" + else + envval_default_CUDA_LIBS="-lcuda" + fi + + # Lowest priority are the enclosing environment and the default value argument (lowest) + if test "${_gasneti_nenv_cudalibs+set}" = "set" ; then + gasnet_cv_envvar_CUDA_LIBS="${_gasneti_nenv_cudalibs}" + envval_src_CUDA_LIBS=given + elif test "${_gasneti_cenv_cudalibs+set}" = "set" ; then + gasnet_cv_envvar_CUDA_LIBS="${_gasneti_cenv_cudalibs}" + envval_src_CUDA_LIBS=conf + else + gasnet_cv_envvar_CUDA_LIBS=$envval_default_CUDA_LIBS + envval_src_CUDA_LIBS=default + fi + # Left-to-right parsing of commandline settings that includes both mechanisms + # --with-VAR=val or VAR=val => set to val + # --with-VAR => set to default + # --without-VAR => set to blank (ie "", not "no") + eval gasnet_fn_env_helper CUDA_LIBS cudalibs $gasnet_cv_configure_args_norm + +fi + + + CUDA_LIBS="$gasnet_cv_envvar_CUDA_LIBS" + + if test "$CUDA_LIBS" = "__=-=-=-__NOT_SET__-=-=-=__" ; then + unset CUDA_LIBS + if test "$envval_src_CUDA_LIBS" = "cached"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: (not set)" >&5 +$as_echo " (not set)" >&6; } + else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: (not set)" >&5 +$as_echo " (not set)" >&6; } + fi + else + case "$envval_src_CUDA_LIBS" in + 'cached') + { $as_echo "$as_me:${as_lineno-$LINENO}: result: \"$CUDA_LIBS\"" >&5 +$as_echo " \"$CUDA_LIBS\"" >&6; } ;; + 'default') + { $as_echo "$as_me:${as_lineno-$LINENO}: result: (default) \"$CUDA_LIBS\"" >&5 +$as_echo " (default) \"$CUDA_LIBS\"" >&6; } ;; + 'disabled') + { $as_echo "$as_me:${as_lineno-$LINENO}: result: (disabled) \"$CUDA_LIBS\"" >&5 +$as_echo " (disabled) \"$CUDA_LIBS\"" >&6; } ;; + 'given') + { $as_echo "$as_me:${as_lineno-$LINENO}: result: (provided) \"$CUDA_LIBS\"" >&5 +$as_echo " (provided) \"$CUDA_LIBS\"" >&6; } ;; + 'conf') + { $as_echo "$as_me:${as_lineno-$LINENO}: result: (provided) \"$CUDA_LIBS\"" >&5 +$as_echo " (provided) \"$CUDA_LIBS\"" >&6; } + + +echo +echo "configure error: Ambiguous environment setting for \$CUDA_LIBS. Please configure --with-CUDA_LIBS=\"intended value\"" +if test "" ; then +if test -f "conftest.$ac_ext" ; then + errfile=conftest.$ac_ext +else + errfile=gasnet_errsave_file +fi +if test -f "$errfile" ; then + echo + echo " --- Failed program --- " + cat $errfile + echo " -----------------------" +fi +fi +if test -f "conftest.err" ; then + errfile=conftest.err +else + errfile=gasnet_errsave_err +fi +if test -f "$errfile" ; then + echo + echo "Compilation error: " + echo + cat $errfile +fi +echo +CONFIG_FILE=`pwd`/config.log +as_fn_error $? "See $CONFIG_FILE for details." "$LINENO" 5 + + + ;; + *) + +echo +echo "configure error: _GASNET_ENV_DEFAULT broken" +if test "" ; then +if test -f "conftest.$ac_ext" ; then + errfile=conftest.$ac_ext +else + errfile=gasnet_errsave_file +fi +if test -f "$errfile" ; then + echo + echo " --- Failed program --- " + cat $errfile + echo " -----------------------" +fi +fi +if test -f "conftest.err" ; then + errfile=conftest.err +else + errfile=gasnet_errsave_err +fi +if test -f "$errfile" ; then + echo + echo "Compilation error: " + echo + cat $errfile +fi +echo +CONFIG_FILE=`pwd`/config.log +as_fn_error $? "See $CONFIG_FILE for details." "$LINENO" 5 + + + esac + fi + + + + + + + +# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_ENV_DEFAULT([CUDA_LIBS],[-lcuda]) ^^^^^^^^^^^^^^^^^^^^^^ (L:4) + + + + + + +# vvvvvvvvvvvvvvvvvvvvvv GASNET_PACKAGE_LDFLAGS([CUDA],[cuda]) vvvvvvvvvvvvvvvvvvvvvv (L:4) + + + + + case "$CUDA_HOME" in + /usr|/) CUDA_ldflags_guess='';; + *) for CUDA_ldflags_guess in "$CUDA_HOME/lib${GASNETI_PTR_BITS}" "$CUDA_HOME/lib"; do + for CUDA_ldflags_guess_lib in cuda; do + for CUDA_ldflags_guess_suff in a so sl dylib la dll; do + if test -r "$CUDA_ldflags_guess/lib$CUDA_ldflags_guess_lib.$CUDA_ldflags_guess_suff"; then break 3; fi + done + done + done + CUDA_ldflags_guess="-L$CUDA_ldflags_guess";; + esac + + + +# vvvvvvvvvvvvvvvvvvvvvv GASNET_ENV_DEFAULT([CUDA_LDFLAGS],[$CUDA_ldflags_guess]) vvvvvvvvvvvvvvvvvvvvvv (L:5) + + + + + + + + +# Check whether --with-cuda-ldflags was given. +if test "${with_cuda_ldflags+set}" = set; then : + withval=$with_cuda_ldflags; +fi + + + + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for CUDA_LDFLAGS setting" >&5 +$as_echo_n "checking for CUDA_LDFLAGS setting... " >&6; } + + envval_src_CUDA_LDFLAGS="cached" + if ${gasnet_cv_envvar_CUDA_LDFLAGS+:} false; then : + $as_echo_n "(cached) " >&6 +else + + if test "2" = "1" ; then # no default means unset + envval_default_CUDA_LDFLAGS="__=-=-=-__NOT_SET__-=-=-=__" + else + envval_default_CUDA_LDFLAGS="$CUDA_ldflags_guess" + fi + + # Lowest priority are the enclosing environment and the default value argument (lowest) + if test "${_gasneti_nenv_cudaldflags+set}" = "set" ; then + gasnet_cv_envvar_CUDA_LDFLAGS="${_gasneti_nenv_cudaldflags}" + envval_src_CUDA_LDFLAGS=given + elif test "${_gasneti_cenv_cudaldflags+set}" = "set" ; then + gasnet_cv_envvar_CUDA_LDFLAGS="${_gasneti_cenv_cudaldflags}" + envval_src_CUDA_LDFLAGS=conf + else + gasnet_cv_envvar_CUDA_LDFLAGS=$envval_default_CUDA_LDFLAGS + envval_src_CUDA_LDFLAGS=default + fi + # Left-to-right parsing of commandline settings that includes both mechanisms + # --with-VAR=val or VAR=val => set to val + # --with-VAR => set to default + # --without-VAR => set to blank (ie "", not "no") + eval gasnet_fn_env_helper CUDA_LDFLAGS cudaldflags $gasnet_cv_configure_args_norm + +fi + + + CUDA_LDFLAGS="$gasnet_cv_envvar_CUDA_LDFLAGS" + + if test "$CUDA_LDFLAGS" = "__=-=-=-__NOT_SET__-=-=-=__" ; then + unset CUDA_LDFLAGS + if test "$envval_src_CUDA_LDFLAGS" = "cached"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: (not set)" >&5 +$as_echo " (not set)" >&6; } + else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: (not set)" >&5 +$as_echo " (not set)" >&6; } + fi + else + case "$envval_src_CUDA_LDFLAGS" in + 'cached') + { $as_echo "$as_me:${as_lineno-$LINENO}: result: \"$CUDA_LDFLAGS\"" >&5 +$as_echo " \"$CUDA_LDFLAGS\"" >&6; } ;; + 'default') + { $as_echo "$as_me:${as_lineno-$LINENO}: result: (default) \"$CUDA_LDFLAGS\"" >&5 +$as_echo " (default) \"$CUDA_LDFLAGS\"" >&6; } ;; + 'disabled') + { $as_echo "$as_me:${as_lineno-$LINENO}: result: (disabled) \"$CUDA_LDFLAGS\"" >&5 +$as_echo " (disabled) \"$CUDA_LDFLAGS\"" >&6; } ;; + 'given') + { $as_echo "$as_me:${as_lineno-$LINENO}: result: (provided) \"$CUDA_LDFLAGS\"" >&5 +$as_echo " (provided) \"$CUDA_LDFLAGS\"" >&6; } ;; + 'conf') + { $as_echo "$as_me:${as_lineno-$LINENO}: result: (provided) \"$CUDA_LDFLAGS\"" >&5 +$as_echo " (provided) \"$CUDA_LDFLAGS\"" >&6; } + + +echo +echo "configure error: Ambiguous environment setting for \$CUDA_LDFLAGS. Please configure --with-CUDA_LDFLAGS=\"intended value\"" +if test "" ; then +if test -f "conftest.$ac_ext" ; then + errfile=conftest.$ac_ext +else + errfile=gasnet_errsave_file +fi +if test -f "$errfile" ; then + echo + echo " --- Failed program --- " + cat $errfile + echo " -----------------------" +fi +fi +if test -f "conftest.err" ; then + errfile=conftest.err +else + errfile=gasnet_errsave_err +fi +if test -f "$errfile" ; then + echo + echo "Compilation error: " + echo + cat $errfile +fi +echo +CONFIG_FILE=`pwd`/config.log +as_fn_error $? "See $CONFIG_FILE for details." "$LINENO" 5 + + + ;; + *) + +echo +echo "configure error: _GASNET_ENV_DEFAULT broken" +if test "" ; then +if test -f "conftest.$ac_ext" ; then + errfile=conftest.$ac_ext +else + errfile=gasnet_errsave_file +fi +if test -f "$errfile" ; then + echo + echo " --- Failed program --- " + cat $errfile + echo " -----------------------" +fi +fi +if test -f "conftest.err" ; then + errfile=conftest.err +else + errfile=gasnet_errsave_err +fi +if test -f "$errfile" ; then + echo + echo "Compilation error: " + echo + cat $errfile +fi +echo +CONFIG_FILE=`pwd`/config.log +as_fn_error $? "See $CONFIG_FILE for details." "$LINENO" 5 + + + esac + fi + + + + + + + +# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_ENV_DEFAULT([CUDA_LDFLAGS],[$CUDA_ldflags_guess]) ^^^^^^^^^^^^^^^^^^^^^^ (L:5) + + + + unset CUDA_ldflags_guess + unset CUDA_ldflags_guess_lib + unset CUDA_ldflags_guess_suff + + + + + +# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_PACKAGE_LDFLAGS ^^^^^^^^^^^^^^^^^^^^^^ (L:4) + + + + + + +# vvvvvvvvvvvvvvvvvvvvvv GASNET_SPLIT_LINKER_OPTS([CUDA_LDFLAGS],[CUDA_LIBS]) vvvvvvvvvvvvvvvvvvvvvv (L:4) + + + eval gasnet_fn_split_linker_opts CUDA_LDFLAGS CUDA_LIBS $CUDA_LDFLAGS $CUDA_LIBS + #echo "CUDA_LDFLAGS=[$]CUDA_LDFLAGS" + #echo "CUDA_LIBS=[$]CUDA_LIBS" + +# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_SPLIT_LINKER_OPTS([CUDA_LDFLAGS],[CUDA_LIBS]) ^^^^^^^^^^^^^^^^^^^^^^ (L:4) + + + + + # Try to compile and link a basic program using the CUDA Driver API + + + +# vvvvvvvvvvvvvvvvvvvvvv GASNET_PUSHVAR(CFLAGS,"$CFLAGS $CUDA_CFLAGS") vvvvvvvvvvvvvvvvvvvvvv (L:4) + + if test "$_pushcnt_CFLAGS" = "" ; then + _pushcnt_CFLAGS=0 + fi + if test "$_total_pushcnt" = "" ; then + _total_pushcnt=0 + fi + if test "${CFLAGS+set}" = set; then + _gasnet_pushvar_isset=1 + else + _gasnet_pushvar_isset=0 + fi + eval _pushedvar_CFLAGS_$_pushcnt_CFLAGS=\$CFLAGS + eval _pushedvarset_CFLAGS_$_pushcnt_CFLAGS=$_gasnet_pushvar_isset + _pushcnt_CFLAGS=`expr $_pushcnt_CFLAGS + 1` + _total_pushcnt=`expr $_total_pushcnt + 1` + CFLAGS="$CFLAGS $CUDA_CFLAGS" + echo "pushed new CFLAGS value: $CFLAGS" >&5 + +# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_PUSHVAR(CFLAGS,"$CFLAGS $CUDA_CFLAGS") ^^^^^^^^^^^^^^^^^^^^^^ (L:4) + + + + + + +# vvvvvvvvvvvvvvvvvvvvvv GASNET_PUSHVAR(LDFLAGS,"$LDFLAGS $CUDA_LDFLAGS") vvvvvvvvvvvvvvvvvvvvvv (L:4) + + if test "$_pushcnt_LDFLAGS" = "" ; then + _pushcnt_LDFLAGS=0 + fi + if test "$_total_pushcnt" = "" ; then + _total_pushcnt=0 + fi + if test "${LDFLAGS+set}" = set; then + _gasnet_pushvar_isset=1 + else + _gasnet_pushvar_isset=0 + fi + eval _pushedvar_LDFLAGS_$_pushcnt_LDFLAGS=\$LDFLAGS + eval _pushedvarset_LDFLAGS_$_pushcnt_LDFLAGS=$_gasnet_pushvar_isset + _pushcnt_LDFLAGS=`expr $_pushcnt_LDFLAGS + 1` + _total_pushcnt=`expr $_total_pushcnt + 1` + LDFLAGS="$LDFLAGS $CUDA_LDFLAGS" + echo "pushed new LDFLAGS value: $LDFLAGS" >&5 + +# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_PUSHVAR(LDFLAGS,"$LDFLAGS $CUDA_LDFLAGS") ^^^^^^^^^^^^^^^^^^^^^^ (L:4) + + + + + + +# vvvvvvvvvvvvvvvvvvvvvv GASNET_PUSHVAR(LIBS,"$LIBS $CUDA_LIBS") vvvvvvvvvvvvvvvvvvvvvv (L:4) + + if test "$_pushcnt_LIBS" = "" ; then + _pushcnt_LIBS=0 + fi + if test "$_total_pushcnt" = "" ; then + _total_pushcnt=0 + fi + if test "${LIBS+set}" = set; then + _gasnet_pushvar_isset=1 + else + _gasnet_pushvar_isset=0 + fi + eval _pushedvar_LIBS_$_pushcnt_LIBS=\$LIBS + eval _pushedvarset_LIBS_$_pushcnt_LIBS=$_gasnet_pushvar_isset + _pushcnt_LIBS=`expr $_pushcnt_LIBS + 1` + _total_pushcnt=`expr $_total_pushcnt + 1` + LIBS="$LIBS $CUDA_LIBS" + echo "pushed new LIBS value: $LIBS" >&5 + +# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_PUSHVAR(LIBS,"$LIBS $CUDA_LIBS") ^^^^^^^^^^^^^^^^^^^^^^ (L:4) + + + + + + + +# vvvvvvvvvvvvvvvvvvvvvv GASNET_TRY_CACHE_LINK(for working CUDA configuration,libcuda_works,...) vvvvvvvvvvvvvvvvvvvvvv (L:4) + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for working CUDA configuration" >&5 +$as_echo_n "checking for working CUDA configuration... " >&6; } +if ${gasnet_cv_libcuda_works+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + #include + +int +main () +{ + + cuInit(0); + CUdevice dev = 0; + CUcontext ctx; + CUresult res = cuDevicePrimaryCtxRetain(&ctx, dev); + int isUVA; + cuDeviceGetAttribute(&isUVA, CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING, dev); + CUpointer_attribute attrs[3] = { CU_POINTER_ATTRIBUTE_MEMORY_TYPE, + CU_POINTER_ATTRIBUTE_IS_MANAGED, + CU_POINTER_ATTRIBUTE_CONTEXT }; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + gasnet_cv_libcuda_works=yes +else + gasnet_cv_libcuda_works=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gasnet_cv_libcuda_works" >&5 +$as_echo "$gasnet_cv_libcuda_works" >&6; } +if test "$gasnet_cv_libcuda_works" = yes; then + : + : +else + : + have_mk_cuda_uva=no; +if test -f "conftest.$ac_ext" ; then + cp conftest.$ac_ext gasnet_errsave_file +fi +if test -f "conftest.err" ; then + cp conftest.err gasnet_errsave_err +fi + +fi + +# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_TRY_CACHE_LINK(for working CUDA configuration,libcuda_works,...) ^^^^^^^^^^^^^^^^^^^^^^ (L:4) + + + + + + + +# vvvvvvvvvvvvvvvvvvvvvv GASNET_POPVAR(LIBS) vvvvvvvvvvvvvvvvvvvvvv (L:4) + + if test "$_pushcnt_LIBS" -ge "1"; then + _pushcnt_LIBS=`expr $_pushcnt_LIBS - 1` + _total_pushcnt=`expr $_total_pushcnt - 1` + eval _gasnet_pushvar_isset=\$_pushedvarset_LIBS_$_pushcnt_LIBS + if test "$_gasnet_pushvar_isset" = "1" ; then + eval LIBS=\$_pushedvar_LIBS_$_pushcnt_LIBS + echo "popping LIBS back to: $LIBS" >&5 + else + unset LIBS + echo "popping LIBS back to: " >&5 + fi + else + + +echo +echo "configure error: INTERNAL ERROR: GASNET_PUSH/POPVAR underflow on LIBS" +if test "" ; then +if test -f "conftest.$ac_ext" ; then + errfile=conftest.$ac_ext +else + errfile=gasnet_errsave_file +fi +if test -f "$errfile" ; then + echo + echo " --- Failed program --- " + cat $errfile + echo " -----------------------" +fi +fi +if test -f "conftest.err" ; then + errfile=conftest.err +else + errfile=gasnet_errsave_err +fi +if test -f "$errfile" ; then + echo + echo "Compilation error: " + echo + cat $errfile +fi +echo +CONFIG_FILE=`pwd`/config.log +as_fn_error $? "See $CONFIG_FILE for details." "$LINENO" 5 + + + fi + +# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_POPVAR(LIBS) ^^^^^^^^^^^^^^^^^^^^^^ (L:4) + + + + + + +# vvvvvvvvvvvvvvvvvvvvvv GASNET_POPVAR(LDFLAGS) vvvvvvvvvvvvvvvvvvvvvv (L:4) + + if test "$_pushcnt_LDFLAGS" -ge "1"; then + _pushcnt_LDFLAGS=`expr $_pushcnt_LDFLAGS - 1` + _total_pushcnt=`expr $_total_pushcnt - 1` + eval _gasnet_pushvar_isset=\$_pushedvarset_LDFLAGS_$_pushcnt_LDFLAGS + if test "$_gasnet_pushvar_isset" = "1" ; then + eval LDFLAGS=\$_pushedvar_LDFLAGS_$_pushcnt_LDFLAGS + echo "popping LDFLAGS back to: $LDFLAGS" >&5 + else + unset LDFLAGS + echo "popping LDFLAGS back to: " >&5 + fi + else + + +echo +echo "configure error: INTERNAL ERROR: GASNET_PUSH/POPVAR underflow on LDFLAGS" +if test "" ; then +if test -f "conftest.$ac_ext" ; then + errfile=conftest.$ac_ext +else + errfile=gasnet_errsave_file +fi +if test -f "$errfile" ; then + echo + echo " --- Failed program --- " + cat $errfile + echo " -----------------------" +fi +fi +if test -f "conftest.err" ; then + errfile=conftest.err +else + errfile=gasnet_errsave_err +fi +if test -f "$errfile" ; then + echo + echo "Compilation error: " + echo + cat $errfile +fi +echo +CONFIG_FILE=`pwd`/config.log +as_fn_error $? "See $CONFIG_FILE for details." "$LINENO" 5 + + + fi + +# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_POPVAR(LDFLAGS) ^^^^^^^^^^^^^^^^^^^^^^ (L:4) + + + + + + +# vvvvvvvvvvvvvvvvvvvvvv GASNET_POPVAR(CFLAGS) vvvvvvvvvvvvvvvvvvvvvv (L:4) + + if test "$_pushcnt_CFLAGS" -ge "1"; then + _pushcnt_CFLAGS=`expr $_pushcnt_CFLAGS - 1` + _total_pushcnt=`expr $_total_pushcnt - 1` + eval _gasnet_pushvar_isset=\$_pushedvarset_CFLAGS_$_pushcnt_CFLAGS + if test "$_gasnet_pushvar_isset" = "1" ; then + eval CFLAGS=\$_pushedvar_CFLAGS_$_pushcnt_CFLAGS + echo "popping CFLAGS back to: $CFLAGS" >&5 + else + unset CFLAGS + echo "popping CFLAGS back to: " >&5 + fi + else + + +echo +echo "configure error: INTERNAL ERROR: GASNET_PUSH/POPVAR underflow on CFLAGS" +if test "" ; then +if test -f "conftest.$ac_ext" ; then + errfile=conftest.$ac_ext +else + errfile=gasnet_errsave_file +fi +if test -f "$errfile" ; then + echo + echo " --- Failed program --- " + cat $errfile + echo " -----------------------" +fi +fi +if test -f "conftest.err" ; then + errfile=conftest.err +else + errfile=gasnet_errsave_err +fi +if test -f "$errfile" ; then + echo + echo "Compilation error: " + echo + cat $errfile +fi +echo +CONFIG_FILE=`pwd`/config.log +as_fn_error $? "See $CONFIG_FILE for details." "$LINENO" 5 + + + fi + +# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_POPVAR(CFLAGS) ^^^^^^^^^^^^^^^^^^^^^^ (L:4) + + + + fi + + # If the probe(s) above passed on a target identified as "maybe" having + # support, then either upgrade to "yes" if enabled explicitly, or downgrade + # to "no" if just probing. Either way, we issue a warning. + if test "$have_mk_cuda_uva" = "maybe"; then + if test "$enable_mk_cuda_uva" = "yes"; then + have_mk_cuda_uva=yes + + + + +# vvvvvvvvvvvvvvvvvvvvvv GASNET_MSG_WARN() vvvvvvvvvvvvvvvvvvvvvv (L:4) + + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: CUDA UVA memory kinds support enabled for $target, which may not be officially supported" >&5 +$as_echo "$as_me: WARNING: CUDA UVA memory kinds support enabled for $target, which may not be officially supported" >&2;} + echo "CUDA UVA memory kinds support enabled for $target, which may not be officially supported" >> ".gasnet_cv_configure_warnings.tmp" + echo " " >> ".gasnet_cv_configure_warnings.tmp" + +# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_MSG_WARN() ^^^^^^^^^^^^^^^^^^^^^^ (L:4) + + + + + else + have_mk_cuda_uva=no + + + + +# vvvvvvvvvvvvvvvvvvvvvv GASNET_MSG_WARN() vvvvvvvvvvvvvvvvvvvvvv (L:4) + + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: It appears your system has the software required for CUDA UVA memory kinds support. +However, $target may not be officially supported. +You can enable an unsupported build of this feature with --enable-kind-cuda-uva. +Otherwise, you can disable this message with --disable-kind-cuda-uva." >&5 +$as_echo "$as_me: WARNING: It appears your system has the software required for CUDA UVA memory kinds support. +However, $target may not be officially supported. +You can enable an unsupported build of this feature with --enable-kind-cuda-uva. +Otherwise, you can disable this message with --disable-kind-cuda-uva." >&2;} + echo "It appears your system has the software required for CUDA UVA memory kinds support. +However, $target may not be officially supported. +You can enable an unsupported build of this feature with --enable-kind-cuda-uva. +Otherwise, you can disable this message with --disable-kind-cuda-uva." >> ".gasnet_cv_configure_warnings.tmp" + echo " " >> ".gasnet_cv_configure_warnings.tmp" + +# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_MSG_WARN() ^^^^^^^^^^^^^^^^^^^^^^ (L:4) + + + + + fi + fi + + if test "$have_mk_cuda_uva" = "yes"; then + $as_echo "#define GASNETI_MK_CLASS_CUDA_UVA_ENABLED 1" >>confdefs.h + + CUDA_UVA_CFLAGS="$CUDA_CFLAGS" + CUDA_UVA_LDFLAGS="$CUDA_LDFLAGS" + CUDA_UVA_LIBS="$CUDA_LIBS" + elif test "$force_mk_cuda_uva" = "yes"; then + + +echo +echo "configure error: CUDA UVA memory kinds support was requested with --enable-kind-cuda-uva but is not available" +if test "" ; then +if test -f "conftest.$ac_ext" ; then + errfile=conftest.$ac_ext +else + errfile=gasnet_errsave_file +fi +if test -f "$errfile" ; then + echo + echo " --- Failed program --- " + cat $errfile + echo " -----------------------" +fi +fi +if test -f "conftest.err" ; then + errfile=conftest.err +else + errfile=gasnet_errsave_err +fi +if test -f "$errfile" ; then + echo + echo "Compilation error: " + echo + cat $errfile +fi +echo +CONFIG_FILE=`pwd`/config.log +as_fn_error $? "See $CONFIG_FILE for details." "$LINENO" 5 + + + else + unset CUDA_UVA_CFLAGS + unset CUDA_UVA_LDFLAGS + unset CUDA_UVA_LIBS + fi +fi + + + + if test "$have_mk_cuda_uva" = "yes"; then + HAVE_MK_CLASS_CUDA_UVA_TRUE= + HAVE_MK_CLASS_CUDA_UVA_FALSE='#' +else + HAVE_MK_CLASS_CUDA_UVA_TRUE='#' + HAVE_MK_CLASS_CUDA_UVA_FALSE= +fi + + +######################################################################## + + + + + + + +# Runtime Var Arrays +#GASNET_IF_ENABLED_WITH_AUTO(vararray, Use stack arrays of variable size in code, +#AC_DEFINE(VARARRAY_WORKS), +#, +#GASNET_TRY_CACHE_CHECK(for variable stack arrays, cc_vararray_mod, +# [void dummy(int x) { char y[x]; }], [], +# AC_DEFINE(VARARRAY_WORKS))) + +######################################################################## +# different high-precision sleep libraries + +# Check for usleep + + + +# vvvvvvvvvvvvvvvvvvvvvv GASNET_TRY_CACHE_CHECK(for usleep(),cc_has_usleep,...) vvvvvvvvvvvvvvvvvvvvvv (L:4) + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for usleep()" >&5 +$as_echo_n "checking for usleep()... " >&6; } +if ${gasnet_cv_cc_has_usleep+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#include + +int +main () +{ + +usleep(500); + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + gasnet_cv_cc_has_usleep=yes +else + gasnet_cv_cc_has_usleep=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gasnet_cv_cc_has_usleep" >&5 +$as_echo "$gasnet_cv_cc_has_usleep" >&6; } +if test "$gasnet_cv_cc_has_usleep" = yes; then + : + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for library containing usleep" >&5 +$as_echo_n "checking for library containing usleep... " >&6; } +if ${ac_cv_search_usleep+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_func_search_save_LIBS=$LIBS +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char usleep (); +int +main () +{ +return usleep (); + ; + return 0; +} +_ACEOF +for ac_lib in '' posix4; do + if test -z "$ac_lib"; then + ac_res="none required" + else + ac_res=-l$ac_lib + LIBS="-l$ac_lib $ac_func_search_save_LIBS" + fi + if ac_fn_c_try_link "$LINENO"; then : + ac_cv_search_usleep=$ac_res +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext + if ${ac_cv_search_usleep+:} false; then : + break +fi +done +if ${ac_cv_search_usleep+:} false; then : + +else + ac_cv_search_usleep=no +fi +rm conftest.$ac_ext +LIBS=$ac_func_search_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_usleep" >&5 +$as_echo "$ac_cv_search_usleep" >&6; } +ac_res=$ac_cv_search_usleep +if test "$ac_res" != no; then : + test "$ac_res" = "none required" || LIBS="$ac_res $LIBS" + $as_echo "#define HAVE_USLEEP 1" >>confdefs.h + +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: entry point not found - disabled" >&5 +$as_echo "entry point not found - disabled" >&6; } +fi + +else + : + +fi + +# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_TRY_CACHE_CHECK(for usleep(),cc_has_usleep,...) ^^^^^^^^^^^^^^^^^^^^^^ (L:4) + + + + +# Check for nanosleep + + + +# vvvvvvvvvvvvvvvvvvvvvv GASNET_TRY_CACHE_CHECK(for nanosleep(),cc_has_nanosleep,...) vvvvvvvvvvvvvvvvvvvvvv (L:4) + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for nanosleep()" >&5 +$as_echo_n "checking for nanosleep()... " >&6; } +if ${gasnet_cv_cc_has_nanosleep+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#include +#include + +int +main () +{ + + struct timespec tm, tmremaining; + tm.tv_sec =1; + tm.tv_nsec = 1000000; + nanosleep(&tm, &tmremaining); + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + gasnet_cv_cc_has_nanosleep=yes +else + gasnet_cv_cc_has_nanosleep=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gasnet_cv_cc_has_nanosleep" >&5 +$as_echo "$gasnet_cv_cc_has_nanosleep" >&6; } +if test "$gasnet_cv_cc_has_nanosleep" = yes; then + : + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for library containing nanosleep" >&5 +$as_echo_n "checking for library containing nanosleep... " >&6; } +if ${ac_cv_search_nanosleep+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_func_search_save_LIBS=$LIBS +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char nanosleep (); +int +main () +{ +return nanosleep (); + ; + return 0; +} +_ACEOF +for ac_lib in '' posix4; do + if test -z "$ac_lib"; then + ac_res="none required" + else + ac_res=-l$ac_lib + LIBS="-l$ac_lib $ac_func_search_save_LIBS" + fi + if ac_fn_c_try_link "$LINENO"; then : + ac_cv_search_nanosleep=$ac_res +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext + if ${ac_cv_search_nanosleep+:} false; then : + break +fi +done +if ${ac_cv_search_nanosleep+:} false; then : + +else + ac_cv_search_nanosleep=no +fi +rm conftest.$ac_ext +LIBS=$ac_func_search_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_nanosleep" >&5 +$as_echo "$ac_cv_search_nanosleep" >&6; } +ac_res=$ac_cv_search_nanosleep +if test "$ac_res" != no; then : + test "$ac_res" = "none required" || LIBS="$ac_res $LIBS" + $as_echo "#define HAVE_NANOSLEEP 1" >>confdefs.h + +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: entry point not found - disabled" >&5 +$as_echo "entry point not found - disabled" >&6; } +fi + +else + : + +fi + +# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_TRY_CACHE_CHECK(for nanosleep(),cc_has_nanosleep,...) ^^^^^^^^^^^^^^^^^^^^^^ (L:4) + + + + +# Check for clock_nanosleep + + + +# vvvvvvvvvvvvvvvvvvvvvv GASNET_TRY_CACHE_CHECK(for clock_nanosleep(),cc_has_clock_nanosleep,...) vvvvvvvvvvvvvvvvvvvvvv (L:4) + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for clock_nanosleep()" >&5 +$as_echo_n "checking for clock_nanosleep()... " >&6; } +if ${gasnet_cv_cc_has_clock_nanosleep+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#include + +int +main () +{ + + struct timespec tm, tmremaining; + tm.tv_sec = 1; + tm.tv_nsec = 1000000; + clock_nanosleep(CLOCK_REALTIME, 0, &tm, &tmremaining); + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + gasnet_cv_cc_has_clock_nanosleep=yes +else + gasnet_cv_cc_has_clock_nanosleep=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gasnet_cv_cc_has_clock_nanosleep" >&5 +$as_echo "$gasnet_cv_cc_has_clock_nanosleep" >&6; } +if test "$gasnet_cv_cc_has_clock_nanosleep" = yes; then + : + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for library containing clock_nanosleep" >&5 +$as_echo_n "checking for library containing clock_nanosleep... " >&6; } +if ${ac_cv_search_clock_nanosleep+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_func_search_save_LIBS=$LIBS +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char clock_nanosleep (); +int +main () +{ +return clock_nanosleep (); + ; + return 0; +} +_ACEOF +for ac_lib in '' rt posix4; do + if test -z "$ac_lib"; then + ac_res="none required" + else + ac_res=-l$ac_lib + LIBS="-l$ac_lib $ac_func_search_save_LIBS" + fi + if ac_fn_c_try_link "$LINENO"; then : + ac_cv_search_clock_nanosleep=$ac_res +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext + if ${ac_cv_search_clock_nanosleep+:} false; then : + break +fi +done +if ${ac_cv_search_clock_nanosleep+:} false; then : + +else + ac_cv_search_clock_nanosleep=no +fi +rm conftest.$ac_ext +LIBS=$ac_func_search_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_clock_nanosleep" >&5 +$as_echo "$ac_cv_search_clock_nanosleep" >&6; } +ac_res=$ac_cv_search_clock_nanosleep +if test "$ac_res" != no; then : + test "$ac_res" = "none required" || LIBS="$ac_res $LIBS" + $as_echo "#define HAVE_CLOCK_NANOSLEEP 1" >>confdefs.h + +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: entry point not found - disabled" >&5 +$as_echo "entry point not found - disabled" >&6; } +fi + +else + : + +fi + +# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_TRY_CACHE_CHECK(for clock_nanosleep(),cc_has_clock_nanosleep,...) ^^^^^^^^^^^^^^^^^^^^^^ (L:4) + + + + +# Check for nsleep + + + +# vvvvvvvvvvvvvvvvvvvvvv GASNET_TRY_CACHE_CHECK(for nsleep(),cc_has_nsleep,...) vvvvvvvvvvvvvvvvvvvvvv (L:4) + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for nsleep()" >&5 +$as_echo_n "checking for nsleep()... " >&6; } +if ${gasnet_cv_cc_has_nsleep+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#include +#include + +int +main () +{ + + struct timespec tm, tmremaining; + tm.tv_sec =1; + tm.tv_nsec = 1000000; + nsleep(&tm, &tmremaining); + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + gasnet_cv_cc_has_nsleep=yes +else + gasnet_cv_cc_has_nsleep=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gasnet_cv_cc_has_nsleep" >&5 +$as_echo "$gasnet_cv_cc_has_nsleep" >&6; } +if test "$gasnet_cv_cc_has_nsleep" = yes; then + : + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for library containing nsleep" >&5 +$as_echo_n "checking for library containing nsleep... " >&6; } +if ${ac_cv_search_nsleep+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_func_search_save_LIBS=$LIBS +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char nsleep (); +int +main () +{ +return nsleep (); + ; + return 0; +} +_ACEOF +for ac_lib in '' posix4; do + if test -z "$ac_lib"; then + ac_res="none required" + else + ac_res=-l$ac_lib + LIBS="-l$ac_lib $ac_func_search_save_LIBS" + fi + if ac_fn_c_try_link "$LINENO"; then : + ac_cv_search_nsleep=$ac_res +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext + if ${ac_cv_search_nsleep+:} false; then : + break +fi +done +if ${ac_cv_search_nsleep+:} false; then : + +else + ac_cv_search_nsleep=no +fi +rm conftest.$ac_ext +LIBS=$ac_func_search_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_nsleep" >&5 +$as_echo "$ac_cv_search_nsleep" >&6; } +ac_res=$ac_cv_search_nsleep +if test "$ac_res" != no; then : + test "$ac_res" = "none required" || LIBS="$ac_res $LIBS" + $as_echo "#define HAVE_NSLEEP 1" >>confdefs.h + +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: entry point not found - disabled" >&5 +$as_echo "entry point not found - disabled" >&6; } +fi + +else + : + +fi + +# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_TRY_CACHE_CHECK(for nsleep(),cc_has_nsleep,...) ^^^^^^^^^^^^^^^^^^^^^^ (L:4) + + + + +# Check for sched_yield + + + +# vvvvvvvvvvvvvvvvvvvvvv GASNET_TRY_CACHE_CHECK(for sched_yield(),cc_has_sched_yield,...) vvvvvvvvvvvvvvvvvvvvvv (L:4) + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for sched_yield()" >&5 +$as_echo_n "checking for sched_yield()... " >&6; } +if ${gasnet_cv_cc_has_sched_yield+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#include + +int +main () +{ + + sched_yield(); + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + gasnet_cv_cc_has_sched_yield=yes +else + gasnet_cv_cc_has_sched_yield=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gasnet_cv_cc_has_sched_yield" >&5 +$as_echo "$gasnet_cv_cc_has_sched_yield" >&6; } +if test "$gasnet_cv_cc_has_sched_yield" = yes; then + : + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for library containing sched_yield" >&5 +$as_echo_n "checking for library containing sched_yield... " >&6; } +if ${ac_cv_search_sched_yield+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_func_search_save_LIBS=$LIBS +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char sched_yield (); +int +main () +{ +return sched_yield (); + ; + return 0; +} +_ACEOF +for ac_lib in '' posix4; do + if test -z "$ac_lib"; then + ac_res="none required" + else + ac_res=-l$ac_lib + LIBS="-l$ac_lib $ac_func_search_save_LIBS" + fi + if ac_fn_c_try_link "$LINENO"; then : + ac_cv_search_sched_yield=$ac_res +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext + if ${ac_cv_search_sched_yield+:} false; then : + break +fi +done +if ${ac_cv_search_sched_yield+:} false; then : + +else + ac_cv_search_sched_yield=no +fi +rm conftest.$ac_ext +LIBS=$ac_func_search_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_sched_yield" >&5 +$as_echo "$ac_cv_search_sched_yield" >&6; } +ac_res=$ac_cv_search_sched_yield +if test "$ac_res" != no; then : + test "$ac_res" = "none required" || LIBS="$ac_res $LIBS" + $as_echo "#define HAVE_SCHED_YIELD 1" >>confdefs.h + +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: entry point not found - disabled" >&5 +$as_echo "entry point not found - disabled" >&6; } +fi + +else + : + +fi + +# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_TRY_CACHE_CHECK(for sched_yield(),cc_has_sched_yield,...) ^^^^^^^^^^^^^^^^^^^^^^ (L:4) + + + + +# timer setup +case "$target_os" in + freebsd* | netbsd*) + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for sysctl machdep.tsc_freq" >&5 +$as_echo_n "checking for sysctl machdep.tsc_freq... " >&6; } + res="`/sbin/sysctl -n machdep.tsc_freq 2> /dev/null`" + if test "$res" != "" -a "$res" != 0; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes: $res" >&5 +$as_echo "yes: $res" >&6; } + $as_echo "#define GASNETI_HAVE_SYSCTL_MACHDEP_TSC_FREQ 1" >>confdefs.h + + else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + fi + ;; +esac + +# PLPA +enabled_plpa="no" +if test "$cross_compiling" = "no" ; then + case "$target_os" in + linux*) + if test "$GASNET_PLATFORM" = "wsl" ; then + enabled_plpa="no" + else + enabled_plpa="yes" + fi + ;; + esac +else + case "$target" in + x86_64-cnl-linux*) enabled_plpa="yes";; + esac +fi +if test "$enabled_plpa" = "yes"; then + + + +# vvvvvvvvvvvvvvvvvvvvvv GASNET_IF_ENABLED_WITH_AUTO(plpa,...) vvvvvvvvvvvvvvvvvvvvvv (L:4) + + + # Check whether --enable-plpa was given. +if test "${enable_plpa+set}" = set; then : + enableval=$enable_plpa; +fi + + + case "$enable_plpa" in + no) : + enabled_plpa="no - user disabled" + ;; + yes) : + enabled_plpa="yes"; force_plpa="yes" + ;; + *) : + enabled_plpa="yes" + ;; + esac + +# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_IF_ENABLED_WITH_AUTO(plpa,...) ^^^^^^^^^^^^^^^^^^^^^^ (L:4) + + + + if test "$enabled_plpa" = yes; then + + + # Included mode, or standalone? + + if test "$enable_included_mode" = "yes"; then + plpa_mode=included + else + plpa_mode=standalone + fi + + # Change the symbol prefix? + + if test "$with_plpa_symbol_prefix" = ""; then + plpa_symbol_prefix_value=plpa_ + else + plpa_symbol_prefix_value=$with_plpa_symbol_prefix + fi + + + + + plpa_symbol_prefix_value=gasneti_plpa_ + + + + + + # Check for syscall() + ac_fn_c_check_func "$LINENO" "syscall" "ac_cv_func_syscall" +if test "x$ac_cv_func_syscall" = xyes; then : + happy=1 +else + happy=0 +fi + + + # Look for syscall.h + if test "$happy" = 1; then + ac_fn_c_check_header_mongrel "$LINENO" "syscall.h" "ac_cv_header_syscall_h" "$ac_includes_default" +if test "x$ac_cv_header_syscall_h" = xyes; then : + happy=1 +else + happy=0 +fi + + + fi + + # Look for unistd.h + if test "$happy" = 1; then + ac_fn_c_check_header_mongrel "$LINENO" "unistd.h" "ac_cv_header_unistd_h" "$ac_includes_default" +if test "x$ac_cv_header_unistd_h" = xyes; then : + happy=1 +else + happy=0 +fi + + + fi + + # Check for __NR_sched_setaffinity + if test "$happy" = 1; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for __NR_sched_setaffinity" >&5 +$as_echo_n "checking for __NR_sched_setaffinity... " >&6; } + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +#include +int +main () +{ +#ifndef __NR_sched_setaffinity +#error __NR_sched_setaffinity_not found! +#endif +int i = 1; + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + happy=1 +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + happy=0 +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + fi + + # Check for __NR_sched_getaffinity (probably overkill, but what + # the heck?) + if test "$happy" = 1; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for __NR_sched_getaffinity" >&5 +$as_echo_n "checking for __NR_sched_getaffinity... " >&6; } + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +#include +int +main () +{ +#ifndef __NR_sched_getaffinity +#error __NR_sched_getaffinity_not found! +#endif +int i = 1; + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + happy=1 +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + happy=0 +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + fi + + # If all was good, do the real init + if test "$happy" = 1; then + + + + # Are we building as standalone or included? + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for PLPA building mode" >&5 +$as_echo_n "checking for PLPA building mode... " >&6; } + + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $plpa_mode" >&5 +$as_echo "$plpa_mode" >&6; } + + # What prefix are we using? + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for PLPA symbol prefix" >&5 +$as_echo_n "checking for PLPA symbol prefix... " >&6; } + +cat >>confdefs.h <<_ACEOF +#define PLPA_SYM_PREFIX $plpa_symbol_prefix_value +_ACEOF + + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $plpa_symbol_prefix_value" >&5 +$as_echo "$plpa_symbol_prefix_value" >&6; } + + # Success + enabled_plpa="yes" + + else + enabled_plpa="no - probe failed" + : # in case enabled_plpa="no - probe failed" is empty. + fi + + # Cleanup + unset happy + + fi + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to use PLPA for cpu binding" >&5 +$as_echo_n "checking whether to use PLPA for cpu binding... " >&6; } + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $enabled_plpa" >&5 +$as_echo "$enabled_plpa" >&6; } + if test "$enabled_plpa" = yes; then + $as_echo "#define HAVE_PLPA 1" >>confdefs.h + + elif test "$force_plpa" = yes; then + as_fn_error $? "User requested --enable-plpa but PLPA is not supported on your system" "$LINENO" 5 + fi +fi + if test "$enabled_plpa" = "yes"; then + USE_PLPA_TRUE= + USE_PLPA_FALSE='#' +else + USE_PLPA_TRUE='#' + USE_PLPA_FALSE= +fi + + +# HWLOC header and library + + + +# vvvvvvvvvvvvvvvvvvvvvv GASNET_IF_ENABLED_WITH_AUTO(hwloc,...) vvvvvvvvvvvvvvvvvvvvvv (L:4) + + + # Check whether --enable-hwloc was given. +if test "${enable_hwloc+set}" = set; then : + enableval=$enable_hwloc; +fi + + + case "$enable_hwloc" in + no) : + enabled_hwloc_lib=no + ;; + yes) : + enabled_hwloc_lib=yes + ;; + *) : + enabled_hwloc_lib=probe + ;; + esac + +# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_IF_ENABLED_WITH_AUTO(hwloc,...) ^^^^^^^^^^^^^^^^^^^^^^ (L:4) + + + +have_hwloc_lib=no +if test "$enabled_hwloc_lib" != no; then + + + +# vvvvvvvvvvvvvvvvvvvvvv GASNET_PACKAGE_HOME(HWLOC,hwloc,hwloc-info,/usr/local,include/hwloc.h) vvvvvvvvvvvvvvvvvvvvvv (L:4) + + + + + HWLOC_guess= + + for ac_prog in hwloc-info +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_path_HWLOC_guess_prog+:} false; then : + $as_echo_n "(cached) " >&6 +else + case $HWLOC_guess_prog in + [\\/]* | ?:[\\/]*) + ac_cv_path_HWLOC_guess_prog="$HWLOC_guess_prog" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_path_HWLOC_guess_prog="$as_dir/$ac_word$ac_exec_ext" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + ;; +esac +fi +HWLOC_guess_prog=$ac_cv_path_HWLOC_guess_prog +if test -n "$HWLOC_guess_prog"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $HWLOC_guess_prog" >&5 +$as_echo "$HWLOC_guess_prog" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$HWLOC_guess_prog" && break +done + + if test -n "$HWLOC_guess_prog"; then + HWLOC_guess=`dirname \`dirname $HWLOC_guess_prog\`` + fi + + if test -z "$HWLOC_guess" ; then + for HWLOC_guess_file in "include/hwloc.h" "."; do + for HWLOC_guess in /usr/local /usr; do + if test -r "$HWLOC_guess/$HWLOC_guess_file" ; then + break 2 + fi + done + done + unset HWLOC_guess_file + fi + + + +# vvvvvvvvvvvvvvvvvvvvvv GASNET_ENV_DEFAULT([HWLOC_HOME],[$HWLOC_guess],[Install prefix of hwloc (auto-detected from PATH)]) vvvvvvvvvvvvvvvvvvvvvv (L:5) + + + + + + + + +# Check whether --with-hwloc-home was given. +if test "${with_hwloc_home+set}" = set; then : + withval=$with_hwloc_home; +fi + + + + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for HWLOC_HOME setting" >&5 +$as_echo_n "checking for HWLOC_HOME setting... " >&6; } + + envval_src_HWLOC_HOME="cached" + if ${gasnet_cv_envvar_HWLOC_HOME+:} false; then : + $as_echo_n "(cached) " >&6 +else + + if test "3" = "1" ; then # no default means unset + envval_default_HWLOC_HOME="__=-=-=-__NOT_SET__-=-=-=__" + else + envval_default_HWLOC_HOME="$HWLOC_guess" + fi + + # Lowest priority are the enclosing environment and the default value argument (lowest) + if test "${_gasneti_nenv_hwlochome+set}" = "set" ; then + gasnet_cv_envvar_HWLOC_HOME="${_gasneti_nenv_hwlochome}" + envval_src_HWLOC_HOME=given + elif test "${_gasneti_cenv_hwlochome+set}" = "set" ; then + gasnet_cv_envvar_HWLOC_HOME="${_gasneti_cenv_hwlochome}" + envval_src_HWLOC_HOME=conf + else + gasnet_cv_envvar_HWLOC_HOME=$envval_default_HWLOC_HOME + envval_src_HWLOC_HOME=default + fi + # Left-to-right parsing of commandline settings that includes both mechanisms + # --with-VAR=val or VAR=val => set to val + # --with-VAR => set to default + # --without-VAR => set to blank (ie "", not "no") + eval gasnet_fn_env_helper HWLOC_HOME hwlochome $gasnet_cv_configure_args_norm + +fi + + + HWLOC_HOME="$gasnet_cv_envvar_HWLOC_HOME" + + if test "$HWLOC_HOME" = "__=-=-=-__NOT_SET__-=-=-=__" ; then + unset HWLOC_HOME + if test "$envval_src_HWLOC_HOME" = "cached"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: (not set)" >&5 +$as_echo " (not set)" >&6; } + else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: (not set)" >&5 +$as_echo " (not set)" >&6; } + fi + else + case "$envval_src_HWLOC_HOME" in + 'cached') + { $as_echo "$as_me:${as_lineno-$LINENO}: result: \"$HWLOC_HOME\"" >&5 +$as_echo " \"$HWLOC_HOME\"" >&6; } ;; + 'default') + { $as_echo "$as_me:${as_lineno-$LINENO}: result: (default) \"$HWLOC_HOME\"" >&5 +$as_echo " (default) \"$HWLOC_HOME\"" >&6; } ;; + 'disabled') + { $as_echo "$as_me:${as_lineno-$LINENO}: result: (disabled) \"$HWLOC_HOME\"" >&5 +$as_echo " (disabled) \"$HWLOC_HOME\"" >&6; } ;; + 'given') + { $as_echo "$as_me:${as_lineno-$LINENO}: result: (provided) \"$HWLOC_HOME\"" >&5 +$as_echo " (provided) \"$HWLOC_HOME\"" >&6; } ;; + 'conf') + { $as_echo "$as_me:${as_lineno-$LINENO}: result: (provided) \"$HWLOC_HOME\"" >&5 +$as_echo " (provided) \"$HWLOC_HOME\"" >&6; } + + +echo +echo "configure error: Ambiguous environment setting for \$HWLOC_HOME. Please configure --with-HWLOC_HOME=\"intended value\"" +if test "" ; then +if test -f "conftest.$ac_ext" ; then + errfile=conftest.$ac_ext +else + errfile=gasnet_errsave_file +fi +if test -f "$errfile" ; then + echo + echo " --- Failed program --- " + cat $errfile + echo " -----------------------" +fi +fi +if test -f "conftest.err" ; then + errfile=conftest.err +else + errfile=gasnet_errsave_err +fi +if test -f "$errfile" ; then + echo + echo "Compilation error: " + echo + cat $errfile +fi +echo +CONFIG_FILE=`pwd`/config.log +as_fn_error $? "See $CONFIG_FILE for details." "$LINENO" 5 + + + ;; + *) + +echo +echo "configure error: _GASNET_ENV_DEFAULT broken" +if test "" ; then +if test -f "conftest.$ac_ext" ; then + errfile=conftest.$ac_ext +else + errfile=gasnet_errsave_file +fi +if test -f "$errfile" ; then + echo + echo " --- Failed program --- " + cat $errfile + echo " -----------------------" +fi +fi +if test -f "conftest.err" ; then + errfile=conftest.err +else + errfile=gasnet_errsave_err +fi +if test -f "$errfile" ; then + echo + echo "Compilation error: " + echo + cat $errfile +fi +echo +CONFIG_FILE=`pwd`/config.log +as_fn_error $? "See $CONFIG_FILE for details." "$LINENO" 5 + + + esac + fi + + + + + + + +# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_ENV_DEFAULT([HWLOC_HOME],[$HWLOC_guess],[Install prefix of hwloc (auto-detected from PATH)]) ^^^^^^^^^^^^^^^^^^^^^^ (L:5) + + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $HWLOC_HOME is the hwloc install prefix" >&5 +$as_echo_n "checking if $HWLOC_HOME is the hwloc install prefix... " >&6; } + HWLOC_HOME_good=no + if test -r "$HWLOC_HOME/include/hwloc.h"; then + HWLOC_HOME_good=yes + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + fi + + if test $HWLOC_HOME_good = no && test -n "$HWLOC_guess_prog"; then + + + +# vvvvvvvvvvvvvvvvvvvvvv GASNET_FOLLOWLINKS(HWLOC_guess_prog) vvvvvvvvvvvvvvvvvvvvvv (L:5) + + + gasnet_fl_file="$HWLOC_guess_prog" + gasnet_fl_link=`/bin/ls -al "$gasnet_fl_file" | $AWK 'BEGIN{FS=">"}{split($2,A," ") ; print A[1]}'` + while test "$gasnet_fl_link"; do + gasnet_fl_file="$gasnet_fl_link" + gasnet_fl_link=`/bin/ls -al "$gasnet_fl_file" | $AWK 'BEGIN{FS=">"}{split($2,A," ") ; print A[1]}'` + done + HWLOC_guess_prog="$gasnet_fl_file" + +# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_FOLLOWLINKS(HWLOC_guess_prog) ^^^^^^^^^^^^^^^^^^^^^^ (L:5) + + + + HWLOC_HOME_tmp=`dirname \`dirname $HWLOC_guess_prog\`` + if test -r "$HWLOC_HOME_tmp/include/hwloc.h"; then + HWLOC_HOME_good=yes + HWLOC_HOME="$HWLOC_HOME_tmp" + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no - followed symlink to $HWLOC_HOME" >&5 +$as_echo "no - followed symlink to $HWLOC_HOME" >&6; } + fi + unset HWLOC_HOME_tmp + fi + + if test $HWLOC_HOME_good = no; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: probably not - missing include/hwloc.h" >&5 +$as_echo "probably not - missing include/hwloc.h" >&6; } + fi + unset HWLOC_HOME_good + unset HWLOC_guess + + + + +# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_PACKAGE_HOME ^^^^^^^^^^^^^^^^^^^^^^ (L:4) + + + + + + +# vvvvvvvvvvvvvvvvvvvvvv GASNET_PACKAGE_CFLAGS([HWLOC]) vvvvvvvvvvvvvvvvvvvvvv (L:4) + + + + + case "$HWLOC_HOME" in + /usr|/) HWLOC_cflags_guess='';; + *) if test -d "$HWLOC_HOME/include/."; then + HWLOC_cflags_guess="-I$HWLOC_HOME/include" + else + HWLOC_cflags_guess='' + fi;; + esac + + + +# vvvvvvvvvvvvvvvvvvvvvv GASNET_ENV_DEFAULT([HWLOC_CFLAGS],[$HWLOC_cflags_guess]) vvvvvvvvvvvvvvvvvvvvvv (L:5) + + + + + + + + +# Check whether --with-hwloc-cflags was given. +if test "${with_hwloc_cflags+set}" = set; then : + withval=$with_hwloc_cflags; +fi + + + + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for HWLOC_CFLAGS setting" >&5 +$as_echo_n "checking for HWLOC_CFLAGS setting... " >&6; } + + envval_src_HWLOC_CFLAGS="cached" + if ${gasnet_cv_envvar_HWLOC_CFLAGS+:} false; then : + $as_echo_n "(cached) " >&6 +else + + if test "2" = "1" ; then # no default means unset + envval_default_HWLOC_CFLAGS="__=-=-=-__NOT_SET__-=-=-=__" + else + envval_default_HWLOC_CFLAGS="$HWLOC_cflags_guess" + fi + + # Lowest priority are the enclosing environment and the default value argument (lowest) + if test "${_gasneti_nenv_hwloccflags+set}" = "set" ; then + gasnet_cv_envvar_HWLOC_CFLAGS="${_gasneti_nenv_hwloccflags}" + envval_src_HWLOC_CFLAGS=given + elif test "${_gasneti_cenv_hwloccflags+set}" = "set" ; then + gasnet_cv_envvar_HWLOC_CFLAGS="${_gasneti_cenv_hwloccflags}" + envval_src_HWLOC_CFLAGS=conf + else + gasnet_cv_envvar_HWLOC_CFLAGS=$envval_default_HWLOC_CFLAGS + envval_src_HWLOC_CFLAGS=default + fi + # Left-to-right parsing of commandline settings that includes both mechanisms + # --with-VAR=val or VAR=val => set to val + # --with-VAR => set to default + # --without-VAR => set to blank (ie "", not "no") + eval gasnet_fn_env_helper HWLOC_CFLAGS hwloccflags $gasnet_cv_configure_args_norm + +fi + + + HWLOC_CFLAGS="$gasnet_cv_envvar_HWLOC_CFLAGS" + + if test "$HWLOC_CFLAGS" = "__=-=-=-__NOT_SET__-=-=-=__" ; then + unset HWLOC_CFLAGS + if test "$envval_src_HWLOC_CFLAGS" = "cached"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: (not set)" >&5 +$as_echo " (not set)" >&6; } + else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: (not set)" >&5 +$as_echo " (not set)" >&6; } + fi + else + case "$envval_src_HWLOC_CFLAGS" in + 'cached') + { $as_echo "$as_me:${as_lineno-$LINENO}: result: \"$HWLOC_CFLAGS\"" >&5 +$as_echo " \"$HWLOC_CFLAGS\"" >&6; } ;; + 'default') + { $as_echo "$as_me:${as_lineno-$LINENO}: result: (default) \"$HWLOC_CFLAGS\"" >&5 +$as_echo " (default) \"$HWLOC_CFLAGS\"" >&6; } ;; + 'disabled') + { $as_echo "$as_me:${as_lineno-$LINENO}: result: (disabled) \"$HWLOC_CFLAGS\"" >&5 +$as_echo " (disabled) \"$HWLOC_CFLAGS\"" >&6; } ;; + 'given') + { $as_echo "$as_me:${as_lineno-$LINENO}: result: (provided) \"$HWLOC_CFLAGS\"" >&5 +$as_echo " (provided) \"$HWLOC_CFLAGS\"" >&6; } ;; + 'conf') + { $as_echo "$as_me:${as_lineno-$LINENO}: result: (provided) \"$HWLOC_CFLAGS\"" >&5 +$as_echo " (provided) \"$HWLOC_CFLAGS\"" >&6; } + + +echo +echo "configure error: Ambiguous environment setting for \$HWLOC_CFLAGS. Please configure --with-HWLOC_CFLAGS=\"intended value\"" +if test "" ; then +if test -f "conftest.$ac_ext" ; then + errfile=conftest.$ac_ext +else + errfile=gasnet_errsave_file +fi +if test -f "$errfile" ; then + echo + echo " --- Failed program --- " + cat $errfile + echo " -----------------------" +fi +fi +if test -f "conftest.err" ; then + errfile=conftest.err +else + errfile=gasnet_errsave_err +fi +if test -f "$errfile" ; then + echo + echo "Compilation error: " + echo + cat $errfile +fi +echo +CONFIG_FILE=`pwd`/config.log +as_fn_error $? "See $CONFIG_FILE for details." "$LINENO" 5 + + + ;; + *) + +echo +echo "configure error: _GASNET_ENV_DEFAULT broken" +if test "" ; then +if test -f "conftest.$ac_ext" ; then + errfile=conftest.$ac_ext +else + errfile=gasnet_errsave_file +fi +if test -f "$errfile" ; then + echo + echo " --- Failed program --- " + cat $errfile + echo " -----------------------" +fi +fi +if test -f "conftest.err" ; then + errfile=conftest.err +else + errfile=gasnet_errsave_err +fi +if test -f "$errfile" ; then + echo + echo "Compilation error: " + echo + cat $errfile +fi +echo +CONFIG_FILE=`pwd`/config.log +as_fn_error $? "See $CONFIG_FILE for details." "$LINENO" 5 + + + esac + fi + + + + + + + +# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_ENV_DEFAULT([HWLOC_CFLAGS],[$HWLOC_cflags_guess]) ^^^^^^^^^^^^^^^^^^^^^^ (L:5) + + + + unset HWLOC_cflags_guess + + + + + +# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_PACKAGE_CFLAGS ^^^^^^^^^^^^^^^^^^^^^^ (L:4) + + + + + + +# vvvvvvvvvvvvvvvvvvvvvv GASNET_ENV_DEFAULT([HWLOC_LIBS],[-lhwloc]) vvvvvvvvvvvvvvvvvvvvvv (L:4) + + + + + + + + +# Check whether --with-hwloc-libs was given. +if test "${with_hwloc_libs+set}" = set; then : + withval=$with_hwloc_libs; +fi + + + + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for HWLOC_LIBS setting" >&5 +$as_echo_n "checking for HWLOC_LIBS setting... " >&6; } + + envval_src_HWLOC_LIBS="cached" + if ${gasnet_cv_envvar_HWLOC_LIBS+:} false; then : + $as_echo_n "(cached) " >&6 +else + + if test "2" = "1" ; then # no default means unset + envval_default_HWLOC_LIBS="__=-=-=-__NOT_SET__-=-=-=__" + else + envval_default_HWLOC_LIBS="-lhwloc" + fi + + # Lowest priority are the enclosing environment and the default value argument (lowest) + if test "${_gasneti_nenv_hwloclibs+set}" = "set" ; then + gasnet_cv_envvar_HWLOC_LIBS="${_gasneti_nenv_hwloclibs}" + envval_src_HWLOC_LIBS=given + elif test "${_gasneti_cenv_hwloclibs+set}" = "set" ; then + gasnet_cv_envvar_HWLOC_LIBS="${_gasneti_cenv_hwloclibs}" + envval_src_HWLOC_LIBS=conf + else + gasnet_cv_envvar_HWLOC_LIBS=$envval_default_HWLOC_LIBS + envval_src_HWLOC_LIBS=default + fi + # Left-to-right parsing of commandline settings that includes both mechanisms + # --with-VAR=val or VAR=val => set to val + # --with-VAR => set to default + # --without-VAR => set to blank (ie "", not "no") + eval gasnet_fn_env_helper HWLOC_LIBS hwloclibs $gasnet_cv_configure_args_norm + +fi + + + HWLOC_LIBS="$gasnet_cv_envvar_HWLOC_LIBS" + + if test "$HWLOC_LIBS" = "__=-=-=-__NOT_SET__-=-=-=__" ; then + unset HWLOC_LIBS + if test "$envval_src_HWLOC_LIBS" = "cached"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: (not set)" >&5 +$as_echo " (not set)" >&6; } + else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: (not set)" >&5 +$as_echo " (not set)" >&6; } + fi + else + case "$envval_src_HWLOC_LIBS" in + 'cached') + { $as_echo "$as_me:${as_lineno-$LINENO}: result: \"$HWLOC_LIBS\"" >&5 +$as_echo " \"$HWLOC_LIBS\"" >&6; } ;; + 'default') + { $as_echo "$as_me:${as_lineno-$LINENO}: result: (default) \"$HWLOC_LIBS\"" >&5 +$as_echo " (default) \"$HWLOC_LIBS\"" >&6; } ;; + 'disabled') + { $as_echo "$as_me:${as_lineno-$LINENO}: result: (disabled) \"$HWLOC_LIBS\"" >&5 +$as_echo " (disabled) \"$HWLOC_LIBS\"" >&6; } ;; + 'given') + { $as_echo "$as_me:${as_lineno-$LINENO}: result: (provided) \"$HWLOC_LIBS\"" >&5 +$as_echo " (provided) \"$HWLOC_LIBS\"" >&6; } ;; + 'conf') + { $as_echo "$as_me:${as_lineno-$LINENO}: result: (provided) \"$HWLOC_LIBS\"" >&5 +$as_echo " (provided) \"$HWLOC_LIBS\"" >&6; } + + +echo +echo "configure error: Ambiguous environment setting for \$HWLOC_LIBS. Please configure --with-HWLOC_LIBS=\"intended value\"" +if test "" ; then +if test -f "conftest.$ac_ext" ; then + errfile=conftest.$ac_ext +else + errfile=gasnet_errsave_file +fi +if test -f "$errfile" ; then + echo + echo " --- Failed program --- " + cat $errfile + echo " -----------------------" +fi +fi +if test -f "conftest.err" ; then + errfile=conftest.err +else + errfile=gasnet_errsave_err +fi +if test -f "$errfile" ; then + echo + echo "Compilation error: " + echo + cat $errfile +fi +echo +CONFIG_FILE=`pwd`/config.log +as_fn_error $? "See $CONFIG_FILE for details." "$LINENO" 5 + + + ;; + *) + +echo +echo "configure error: _GASNET_ENV_DEFAULT broken" +if test "" ; then +if test -f "conftest.$ac_ext" ; then + errfile=conftest.$ac_ext +else + errfile=gasnet_errsave_file +fi +if test -f "$errfile" ; then + echo + echo " --- Failed program --- " + cat $errfile + echo " -----------------------" +fi +fi +if test -f "conftest.err" ; then + errfile=conftest.err +else + errfile=gasnet_errsave_err +fi +if test -f "$errfile" ; then + echo + echo "Compilation error: " + echo + cat $errfile +fi +echo +CONFIG_FILE=`pwd`/config.log +as_fn_error $? "See $CONFIG_FILE for details." "$LINENO" 5 + + + esac + fi + + + + + + + +# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_ENV_DEFAULT([HWLOC_LIBS],[-lhwloc]) ^^^^^^^^^^^^^^^^^^^^^^ (L:4) + + + + + + +# vvvvvvvvvvvvvvvvvvvvvv GASNET_PACKAGE_LDFLAGS([HWLOC],[hwloc]) vvvvvvvvvvvvvvvvvvvvvv (L:4) + + + + + case "$HWLOC_HOME" in + /usr|/) HWLOC_ldflags_guess='';; + *) for HWLOC_ldflags_guess in "$HWLOC_HOME/lib${GASNETI_PTR_BITS}" "$HWLOC_HOME/lib"; do + for HWLOC_ldflags_guess_lib in hwloc; do + for HWLOC_ldflags_guess_suff in a so sl dylib la dll; do + if test -r "$HWLOC_ldflags_guess/lib$HWLOC_ldflags_guess_lib.$HWLOC_ldflags_guess_suff"; then break 3; fi + done + done + done + HWLOC_ldflags_guess="-L$HWLOC_ldflags_guess";; + esac + + + +# vvvvvvvvvvvvvvvvvvvvvv GASNET_ENV_DEFAULT([HWLOC_LDFLAGS],[$HWLOC_ldflags_guess]) vvvvvvvvvvvvvvvvvvvvvv (L:5) + + + + + + + + +# Check whether --with-hwloc-ldflags was given. +if test "${with_hwloc_ldflags+set}" = set; then : + withval=$with_hwloc_ldflags; +fi + + + + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for HWLOC_LDFLAGS setting" >&5 +$as_echo_n "checking for HWLOC_LDFLAGS setting... " >&6; } + + envval_src_HWLOC_LDFLAGS="cached" + if ${gasnet_cv_envvar_HWLOC_LDFLAGS+:} false; then : + $as_echo_n "(cached) " >&6 +else + + if test "2" = "1" ; then # no default means unset + envval_default_HWLOC_LDFLAGS="__=-=-=-__NOT_SET__-=-=-=__" + else + envval_default_HWLOC_LDFLAGS="$HWLOC_ldflags_guess" + fi + + # Lowest priority are the enclosing environment and the default value argument (lowest) + if test "${_gasneti_nenv_hwlocldflags+set}" = "set" ; then + gasnet_cv_envvar_HWLOC_LDFLAGS="${_gasneti_nenv_hwlocldflags}" + envval_src_HWLOC_LDFLAGS=given + elif test "${_gasneti_cenv_hwlocldflags+set}" = "set" ; then + gasnet_cv_envvar_HWLOC_LDFLAGS="${_gasneti_cenv_hwlocldflags}" + envval_src_HWLOC_LDFLAGS=conf + else + gasnet_cv_envvar_HWLOC_LDFLAGS=$envval_default_HWLOC_LDFLAGS + envval_src_HWLOC_LDFLAGS=default + fi + # Left-to-right parsing of commandline settings that includes both mechanisms + # --with-VAR=val or VAR=val => set to val + # --with-VAR => set to default + # --without-VAR => set to blank (ie "", not "no") + eval gasnet_fn_env_helper HWLOC_LDFLAGS hwlocldflags $gasnet_cv_configure_args_norm + +fi + + + HWLOC_LDFLAGS="$gasnet_cv_envvar_HWLOC_LDFLAGS" + + if test "$HWLOC_LDFLAGS" = "__=-=-=-__NOT_SET__-=-=-=__" ; then + unset HWLOC_LDFLAGS + if test "$envval_src_HWLOC_LDFLAGS" = "cached"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: (not set)" >&5 +$as_echo " (not set)" >&6; } + else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: (not set)" >&5 +$as_echo " (not set)" >&6; } + fi + else + case "$envval_src_HWLOC_LDFLAGS" in + 'cached') + { $as_echo "$as_me:${as_lineno-$LINENO}: result: \"$HWLOC_LDFLAGS\"" >&5 +$as_echo " \"$HWLOC_LDFLAGS\"" >&6; } ;; + 'default') + { $as_echo "$as_me:${as_lineno-$LINENO}: result: (default) \"$HWLOC_LDFLAGS\"" >&5 +$as_echo " (default) \"$HWLOC_LDFLAGS\"" >&6; } ;; + 'disabled') + { $as_echo "$as_me:${as_lineno-$LINENO}: result: (disabled) \"$HWLOC_LDFLAGS\"" >&5 +$as_echo " (disabled) \"$HWLOC_LDFLAGS\"" >&6; } ;; + 'given') + { $as_echo "$as_me:${as_lineno-$LINENO}: result: (provided) \"$HWLOC_LDFLAGS\"" >&5 +$as_echo " (provided) \"$HWLOC_LDFLAGS\"" >&6; } ;; + 'conf') + { $as_echo "$as_me:${as_lineno-$LINENO}: result: (provided) \"$HWLOC_LDFLAGS\"" >&5 +$as_echo " (provided) \"$HWLOC_LDFLAGS\"" >&6; } + + +echo +echo "configure error: Ambiguous environment setting for \$HWLOC_LDFLAGS. Please configure --with-HWLOC_LDFLAGS=\"intended value\"" +if test "" ; then +if test -f "conftest.$ac_ext" ; then + errfile=conftest.$ac_ext +else + errfile=gasnet_errsave_file +fi +if test -f "$errfile" ; then + echo + echo " --- Failed program --- " + cat $errfile + echo " -----------------------" +fi +fi +if test -f "conftest.err" ; then + errfile=conftest.err +else + errfile=gasnet_errsave_err +fi +if test -f "$errfile" ; then + echo + echo "Compilation error: " + echo + cat $errfile +fi +echo +CONFIG_FILE=`pwd`/config.log +as_fn_error $? "See $CONFIG_FILE for details." "$LINENO" 5 + + + ;; + *) + +echo +echo "configure error: _GASNET_ENV_DEFAULT broken" +if test "" ; then +if test -f "conftest.$ac_ext" ; then + errfile=conftest.$ac_ext +else + errfile=gasnet_errsave_file +fi +if test -f "$errfile" ; then + echo + echo " --- Failed program --- " + cat $errfile + echo " -----------------------" +fi +fi +if test -f "conftest.err" ; then + errfile=conftest.err +else + errfile=gasnet_errsave_err +fi +if test -f "$errfile" ; then + echo + echo "Compilation error: " + echo + cat $errfile +fi +echo +CONFIG_FILE=`pwd`/config.log +as_fn_error $? "See $CONFIG_FILE for details." "$LINENO" 5 + + + esac + fi + + + + + + + +# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_ENV_DEFAULT([HWLOC_LDFLAGS],[$HWLOC_ldflags_guess]) ^^^^^^^^^^^^^^^^^^^^^^ (L:5) + + + + unset HWLOC_ldflags_guess + unset HWLOC_ldflags_guess_lib + unset HWLOC_ldflags_guess_suff + + + + + +# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_PACKAGE_LDFLAGS ^^^^^^^^^^^^^^^^^^^^^^ (L:4) + + + + + + +# vvvvvvvvvvvvvvvvvvvvvv GASNET_SPLIT_LINKER_OPTS([HWLOC_LDFLAGS],[HWLOC_LIBS]) vvvvvvvvvvvvvvvvvvvvvv (L:4) + + + eval gasnet_fn_split_linker_opts HWLOC_LDFLAGS HWLOC_LIBS $HWLOC_LDFLAGS $HWLOC_LIBS + #echo "HWLOC_LDFLAGS=[$]HWLOC_LDFLAGS" + #echo "HWLOC_LIBS=[$]HWLOC_LIBS" + +# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_SPLIT_LINKER_OPTS([HWLOC_LDFLAGS],[HWLOC_LIBS]) ^^^^^^^^^^^^^^^^^^^^^^ (L:4) + + + + + # Try to compile and link a basic program using hwloc + + + +# vvvvvvvvvvvvvvvvvvvvvv GASNET_PUSHVAR(CFLAGS,"$CFLAGS $HWLOC_CFLAGS") vvvvvvvvvvvvvvvvvvvvvv (L:4) + + if test "$_pushcnt_CFLAGS" = "" ; then + _pushcnt_CFLAGS=0 + fi + if test "$_total_pushcnt" = "" ; then + _total_pushcnt=0 + fi + if test "${CFLAGS+set}" = set; then + _gasnet_pushvar_isset=1 + else + _gasnet_pushvar_isset=0 + fi + eval _pushedvar_CFLAGS_$_pushcnt_CFLAGS=\$CFLAGS + eval _pushedvarset_CFLAGS_$_pushcnt_CFLAGS=$_gasnet_pushvar_isset + _pushcnt_CFLAGS=`expr $_pushcnt_CFLAGS + 1` + _total_pushcnt=`expr $_total_pushcnt + 1` + CFLAGS="$CFLAGS $HWLOC_CFLAGS" + echo "pushed new CFLAGS value: $CFLAGS" >&5 + +# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_PUSHVAR(CFLAGS,"$CFLAGS $HWLOC_CFLAGS") ^^^^^^^^^^^^^^^^^^^^^^ (L:4) + + + + + + +# vvvvvvvvvvvvvvvvvvvvvv GASNET_PUSHVAR(LDFLAGS,"$LDFLAGS $HWLOC_LDFLAGS") vvvvvvvvvvvvvvvvvvvvvv (L:4) + + if test "$_pushcnt_LDFLAGS" = "" ; then + _pushcnt_LDFLAGS=0 + fi + if test "$_total_pushcnt" = "" ; then + _total_pushcnt=0 + fi + if test "${LDFLAGS+set}" = set; then + _gasnet_pushvar_isset=1 + else + _gasnet_pushvar_isset=0 + fi + eval _pushedvar_LDFLAGS_$_pushcnt_LDFLAGS=\$LDFLAGS + eval _pushedvarset_LDFLAGS_$_pushcnt_LDFLAGS=$_gasnet_pushvar_isset + _pushcnt_LDFLAGS=`expr $_pushcnt_LDFLAGS + 1` + _total_pushcnt=`expr $_total_pushcnt + 1` + LDFLAGS="$LDFLAGS $HWLOC_LDFLAGS" + echo "pushed new LDFLAGS value: $LDFLAGS" >&5 + +# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_PUSHVAR(LDFLAGS,"$LDFLAGS $HWLOC_LDFLAGS") ^^^^^^^^^^^^^^^^^^^^^^ (L:4) + + + + + + +# vvvvvvvvvvvvvvvvvvvvvv GASNET_PUSHVAR(LIBS,"$LIBS $HWLOC_LIBS") vvvvvvvvvvvvvvvvvvvvvv (L:4) + + if test "$_pushcnt_LIBS" = "" ; then + _pushcnt_LIBS=0 + fi + if test "$_total_pushcnt" = "" ; then + _total_pushcnt=0 + fi + if test "${LIBS+set}" = set; then + _gasnet_pushvar_isset=1 + else + _gasnet_pushvar_isset=0 + fi + eval _pushedvar_LIBS_$_pushcnt_LIBS=\$LIBS + eval _pushedvarset_LIBS_$_pushcnt_LIBS=$_gasnet_pushvar_isset + _pushcnt_LIBS=`expr $_pushcnt_LIBS + 1` + _total_pushcnt=`expr $_total_pushcnt + 1` + LIBS="$LIBS $HWLOC_LIBS" + echo "pushed new LIBS value: $LIBS" >&5 + +# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_PUSHVAR(LIBS,"$LIBS $HWLOC_LIBS") ^^^^^^^^^^^^^^^^^^^^^^ (L:4) + + + + + + + +# vvvvvvvvvvvvvvvvvvvvvv GASNET_TRY_CACHE_LINK(for libhwloc with API v1.0 or newer,libhwloc_works,...) vvvvvvvvvvvvvvvvvvvvvv (L:4) + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for libhwloc with API v1.0 or newer" >&5 +$as_echo_n "checking for libhwloc with API v1.0 or newer... " >&6; } +if ${gasnet_cv_libhwloc_works+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + #include + #if HWLOC_API_VERSION < 0x010000 + #error No support for 0.9 series + #endif + +int +main () +{ + + hwloc_topology_t topology; + hwloc_topology_init(&topology); + hwloc_topology_load(topology); + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + gasnet_cv_libhwloc_works=yes +else + gasnet_cv_libhwloc_works=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gasnet_cv_libhwloc_works" >&5 +$as_echo "$gasnet_cv_libhwloc_works" >&6; } +if test "$gasnet_cv_libhwloc_works" = yes; then + : + + have_hwloc_lib=yes + +else + : + + HWLOC_CFLAGS="" + HWLOC_LDFLAGS="" + HWLOC_LIBS="" + +fi + +# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_TRY_CACHE_LINK(for libhwloc with API v1.0 or newer,libhwloc_works,...) ^^^^^^^^^^^^^^^^^^^^^^ (L:4) + + + + + + + +# vvvvvvvvvvvvvvvvvvvvvv GASNET_POPVAR(LIBS) vvvvvvvvvvvvvvvvvvvvvv (L:4) + + if test "$_pushcnt_LIBS" -ge "1"; then + _pushcnt_LIBS=`expr $_pushcnt_LIBS - 1` + _total_pushcnt=`expr $_total_pushcnt - 1` + eval _gasnet_pushvar_isset=\$_pushedvarset_LIBS_$_pushcnt_LIBS + if test "$_gasnet_pushvar_isset" = "1" ; then + eval LIBS=\$_pushedvar_LIBS_$_pushcnt_LIBS + echo "popping LIBS back to: $LIBS" >&5 + else + unset LIBS + echo "popping LIBS back to: " >&5 + fi + else + + +echo +echo "configure error: INTERNAL ERROR: GASNET_PUSH/POPVAR underflow on LIBS" +if test "" ; then +if test -f "conftest.$ac_ext" ; then + errfile=conftest.$ac_ext +else + errfile=gasnet_errsave_file +fi +if test -f "$errfile" ; then + echo + echo " --- Failed program --- " + cat $errfile + echo " -----------------------" +fi +fi +if test -f "conftest.err" ; then + errfile=conftest.err +else + errfile=gasnet_errsave_err +fi +if test -f "$errfile" ; then + echo + echo "Compilation error: " + echo + cat $errfile +fi +echo +CONFIG_FILE=`pwd`/config.log +as_fn_error $? "See $CONFIG_FILE for details." "$LINENO" 5 + + + fi + +# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_POPVAR(LIBS) ^^^^^^^^^^^^^^^^^^^^^^ (L:4) + + + + + + +# vvvvvvvvvvvvvvvvvvvvvv GASNET_POPVAR(LDFLAGS) vvvvvvvvvvvvvvvvvvvvvv (L:4) + + if test "$_pushcnt_LDFLAGS" -ge "1"; then + _pushcnt_LDFLAGS=`expr $_pushcnt_LDFLAGS - 1` + _total_pushcnt=`expr $_total_pushcnt - 1` + eval _gasnet_pushvar_isset=\$_pushedvarset_LDFLAGS_$_pushcnt_LDFLAGS + if test "$_gasnet_pushvar_isset" = "1" ; then + eval LDFLAGS=\$_pushedvar_LDFLAGS_$_pushcnt_LDFLAGS + echo "popping LDFLAGS back to: $LDFLAGS" >&5 + else + unset LDFLAGS + echo "popping LDFLAGS back to: " >&5 + fi + else + + +echo +echo "configure error: INTERNAL ERROR: GASNET_PUSH/POPVAR underflow on LDFLAGS" +if test "" ; then +if test -f "conftest.$ac_ext" ; then + errfile=conftest.$ac_ext +else + errfile=gasnet_errsave_file +fi +if test -f "$errfile" ; then + echo + echo " --- Failed program --- " + cat $errfile + echo " -----------------------" +fi +fi +if test -f "conftest.err" ; then + errfile=conftest.err +else + errfile=gasnet_errsave_err +fi +if test -f "$errfile" ; then + echo + echo "Compilation error: " + echo + cat $errfile +fi +echo +CONFIG_FILE=`pwd`/config.log +as_fn_error $? "See $CONFIG_FILE for details." "$LINENO" 5 + + + fi + +# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_POPVAR(LDFLAGS) ^^^^^^^^^^^^^^^^^^^^^^ (L:4) + + + + + + +# vvvvvvvvvvvvvvvvvvvvvv GASNET_POPVAR(CFLAGS) vvvvvvvvvvvvvvvvvvvvvv (L:4) + + if test "$_pushcnt_CFLAGS" -ge "1"; then + _pushcnt_CFLAGS=`expr $_pushcnt_CFLAGS - 1` + _total_pushcnt=`expr $_total_pushcnt - 1` + eval _gasnet_pushvar_isset=\$_pushedvarset_CFLAGS_$_pushcnt_CFLAGS + if test "$_gasnet_pushvar_isset" = "1" ; then + eval CFLAGS=\$_pushedvar_CFLAGS_$_pushcnt_CFLAGS + echo "popping CFLAGS back to: $CFLAGS" >&5 + else + unset CFLAGS + echo "popping CFLAGS back to: " >&5 + fi + else + + +echo +echo "configure error: INTERNAL ERROR: GASNET_PUSH/POPVAR underflow on CFLAGS" +if test "" ; then +if test -f "conftest.$ac_ext" ; then + errfile=conftest.$ac_ext +else + errfile=gasnet_errsave_file +fi +if test -f "$errfile" ; then + echo + echo " --- Failed program --- " + cat $errfile + echo " -----------------------" +fi +fi +if test -f "conftest.err" ; then + errfile=conftest.err +else + errfile=gasnet_errsave_err +fi +if test -f "$errfile" ; then + echo + echo "Compilation error: " + echo + cat $errfile +fi +echo +CONFIG_FILE=`pwd`/config.log +as_fn_error $? "See $CONFIG_FILE for details." "$LINENO" 5 + + + fi + +# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_POPVAR(CFLAGS) ^^^^^^^^^^^^^^^^^^^^^^ (L:4) + + + + + if test "$have_hwloc_lib" = yes; then + $as_echo "#define GASNETI_HAVE_HWLOC_LIB 1" >>confdefs.h + + elif test "$enabled_hwloc_lib" = yes; then + as_fn_error $? "User requested --enable-hwloc but the required hwloc header or library were not found." "$LINENO" 5 + fi +fi + + + + +# HWLOC utilies: hwloc-{bind,calc} +# Used as fallback if !have_hwloc_lib + + + +# vvvvvvvvvvvvvvvvvvvvvv GASNET_IF_ENABLED_WITH_AUTO(hwloc-utils,...) vvvvvvvvvvvvvvvvvvvvvv (L:4) + + + # Check whether --enable-hwloc-utils was given. +if test "${enable_hwloc_utils+set}" = set; then : + enableval=$enable_hwloc_utils; +fi + + + case "$enable_hwloc_utils" in + no) : + enabled_hwloc_utils=no + ;; + yes) : + enabled_hwloc_utils=yes + ;; + *) : + if test "$cross_compiling" = "no" || test "x$enable_hwloc_utils" = 'xprobe'; then + enabled_hwloc_utils=probe + else + # utils on build system unlikely to represent the target + enabled_hwloc_utils=no + fi + ;; + esac + +# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_IF_ENABLED_WITH_AUTO(hwloc-utils,...) ^^^^^^^^^^^^^^^^^^^^^^ (L:4) + + + +have_hwloc_utils=no +if test "$enabled_hwloc_utils" != no; then + if test "$have_fork" = yes; then + + + +# vvvvvvvvvvvvvvvvvvvvvv GASNET_ENV_DEFAULT([HWLOC_UTILS_HOME],[],[Install prefix of hwloc command-line utilities (auto-detected from PATH)]) vvvvvvvvvvvvvvvvvvvvvv (L:4) + + + + + + + + +# Check whether --with-hwloc-utils-home was given. +if test "${with_hwloc_utils_home+set}" = set; then : + withval=$with_hwloc_utils_home; +fi + + + + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for HWLOC_UTILS_HOME setting" >&5 +$as_echo_n "checking for HWLOC_UTILS_HOME setting... " >&6; } + + envval_src_HWLOC_UTILS_HOME="cached" + if ${gasnet_cv_envvar_HWLOC_UTILS_HOME+:} false; then : + $as_echo_n "(cached) " >&6 +else + + if test "3" = "1" ; then # no default means unset + envval_default_HWLOC_UTILS_HOME="__=-=-=-__NOT_SET__-=-=-=__" + else + envval_default_HWLOC_UTILS_HOME="" + fi + + # Lowest priority are the enclosing environment and the default value argument (lowest) + if test "${_gasneti_nenv_hwlocutilshome+set}" = "set" ; then + gasnet_cv_envvar_HWLOC_UTILS_HOME="${_gasneti_nenv_hwlocutilshome}" + envval_src_HWLOC_UTILS_HOME=given + elif test "${_gasneti_cenv_hwlocutilshome+set}" = "set" ; then + gasnet_cv_envvar_HWLOC_UTILS_HOME="${_gasneti_cenv_hwlocutilshome}" + envval_src_HWLOC_UTILS_HOME=conf + else + gasnet_cv_envvar_HWLOC_UTILS_HOME=$envval_default_HWLOC_UTILS_HOME + envval_src_HWLOC_UTILS_HOME=default + fi + # Left-to-right parsing of commandline settings that includes both mechanisms + # --with-VAR=val or VAR=val => set to val + # --with-VAR => set to default + # --without-VAR => set to blank (ie "", not "no") + eval gasnet_fn_env_helper HWLOC_UTILS_HOME hwlocutilshome $gasnet_cv_configure_args_norm + +fi + + + HWLOC_UTILS_HOME="$gasnet_cv_envvar_HWLOC_UTILS_HOME" + + if test "$HWLOC_UTILS_HOME" = "__=-=-=-__NOT_SET__-=-=-=__" ; then + unset HWLOC_UTILS_HOME + if test "$envval_src_HWLOC_UTILS_HOME" = "cached"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: (not set)" >&5 +$as_echo " (not set)" >&6; } + else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: (not set)" >&5 +$as_echo " (not set)" >&6; } + fi + else + case "$envval_src_HWLOC_UTILS_HOME" in + 'cached') + { $as_echo "$as_me:${as_lineno-$LINENO}: result: \"$HWLOC_UTILS_HOME\"" >&5 +$as_echo " \"$HWLOC_UTILS_HOME\"" >&6; } ;; + 'default') + { $as_echo "$as_me:${as_lineno-$LINENO}: result: (default) \"$HWLOC_UTILS_HOME\"" >&5 +$as_echo " (default) \"$HWLOC_UTILS_HOME\"" >&6; } ;; + 'disabled') + { $as_echo "$as_me:${as_lineno-$LINENO}: result: (disabled) \"$HWLOC_UTILS_HOME\"" >&5 +$as_echo " (disabled) \"$HWLOC_UTILS_HOME\"" >&6; } ;; + 'given') + { $as_echo "$as_me:${as_lineno-$LINENO}: result: (provided) \"$HWLOC_UTILS_HOME\"" >&5 +$as_echo " (provided) \"$HWLOC_UTILS_HOME\"" >&6; } ;; + 'conf') + { $as_echo "$as_me:${as_lineno-$LINENO}: result: (provided) \"$HWLOC_UTILS_HOME\"" >&5 +$as_echo " (provided) \"$HWLOC_UTILS_HOME\"" >&6; } + + +echo +echo "configure error: Ambiguous environment setting for \$HWLOC_UTILS_HOME. Please configure --with-HWLOC_UTILS_HOME=\"intended value\"" +if test "" ; then +if test -f "conftest.$ac_ext" ; then + errfile=conftest.$ac_ext +else + errfile=gasnet_errsave_file +fi +if test -f "$errfile" ; then + echo + echo " --- Failed program --- " + cat $errfile + echo " -----------------------" +fi +fi +if test -f "conftest.err" ; then + errfile=conftest.err +else + errfile=gasnet_errsave_err +fi +if test -f "$errfile" ; then + echo + echo "Compilation error: " + echo + cat $errfile +fi +echo +CONFIG_FILE=`pwd`/config.log +as_fn_error $? "See $CONFIG_FILE for details." "$LINENO" 5 + + + ;; + *) + +echo +echo "configure error: _GASNET_ENV_DEFAULT broken" +if test "" ; then +if test -f "conftest.$ac_ext" ; then + errfile=conftest.$ac_ext +else + errfile=gasnet_errsave_file +fi +if test -f "$errfile" ; then + echo + echo " --- Failed program --- " + cat $errfile + echo " -----------------------" +fi +fi +if test -f "conftest.err" ; then + errfile=conftest.err +else + errfile=gasnet_errsave_err +fi +if test -f "$errfile" ; then + echo + echo "Compilation error: " + echo + cat $errfile +fi +echo +CONFIG_FILE=`pwd`/config.log +as_fn_error $? "See $CONFIG_FILE for details." "$LINENO" 5 + + + esac + fi + + + + + + + +# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_ENV_DEFAULT([HWLOC_UTILS_HOME],[],[Install prefix of hwloc command-line utilities (auto-detected from PATH)]) ^^^^^^^^^^^^^^^^^^^^^^ (L:4) + + + + if test -n "$HWLOC_UTILS_HOME"; then + hwloc_utils_path="$HWLOC_UTILS_HOME/bin" + else + hwloc_utils_path="$PATH" + fi + for ac_prog in hwloc-bind +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_path_GASNETI_HWLOC_BIND_PATH+:} false; then : + $as_echo_n "(cached) " >&6 +else + case $GASNETI_HWLOC_BIND_PATH in + [\\/]* | ?:[\\/]*) + ac_cv_path_GASNETI_HWLOC_BIND_PATH="$GASNETI_HWLOC_BIND_PATH" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $hwloc_utils_path +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_path_GASNETI_HWLOC_BIND_PATH="$as_dir/$ac_word$ac_exec_ext" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + ;; +esac +fi +GASNETI_HWLOC_BIND_PATH=$ac_cv_path_GASNETI_HWLOC_BIND_PATH +if test -n "$GASNETI_HWLOC_BIND_PATH"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $GASNETI_HWLOC_BIND_PATH" >&5 +$as_echo "$GASNETI_HWLOC_BIND_PATH" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$GASNETI_HWLOC_BIND_PATH" && break +done + + if test -n "$GASNETI_HWLOC_BIND_PATH" && test "$cross_compiling" = "no"; then + # Require --get, added in hwloc 1.0 + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for required features in hwloc-bind" >&5 +$as_echo_n "checking for required features in hwloc-bind... " >&6; } + if expr `$GASNETI_HWLOC_BIND_PATH --get 2>&1` : 0x >/dev/null 2>&1; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + cat >>confdefs.h <<_ACEOF +#define GASNETI_HWLOC_BIND_PATH "$GASNETI_HWLOC_BIND_PATH" +_ACEOF + + else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + unset GASNETI_HWLOC_BIND_PATH + fi + fi + for ac_prog in hwloc-calc +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_path_GASNETI_HWLOC_CALC_PATH+:} false; then : + $as_echo_n "(cached) " >&6 +else + case $GASNETI_HWLOC_CALC_PATH in + [\\/]* | ?:[\\/]*) + ac_cv_path_GASNETI_HWLOC_CALC_PATH="$GASNETI_HWLOC_CALC_PATH" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $hwloc_utils_path +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_path_GASNETI_HWLOC_CALC_PATH="$as_dir/$ac_word$ac_exec_ext" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + ;; +esac +fi +GASNETI_HWLOC_CALC_PATH=$ac_cv_path_GASNETI_HWLOC_CALC_PATH +if test -n "$GASNETI_HWLOC_CALC_PATH"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $GASNETI_HWLOC_CALC_PATH" >&5 +$as_echo "$GASNETI_HWLOC_CALC_PATH" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$GASNETI_HWLOC_CALC_PATH" && break +done + + if test -n "$GASNETI_HWLOC_CALC_PATH" && test "$cross_compiling" = "no"; then + # Require --intersect, added in hwloc 1.1 + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for required features in hwloc-calc" >&5 +$as_echo_n "checking for required features in hwloc-calc... " >&6; } + if $GASNETI_HWLOC_CALC_PATH --intersect Socket 0x0 /dev/null 2>&1; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + cat >>confdefs.h <<_ACEOF +#define GASNETI_HWLOC_CALC_PATH "$GASNETI_HWLOC_CALC_PATH" +_ACEOF + + else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + unset GASNETI_HWLOC_CALC_PATH + fi + fi + if test -n "$GASNETI_HWLOC_BIND_PATH" && test -n "$GASNETI_HWLOC_CALC_PATH"; then + have_hwloc_utils=yes # Have both + $as_echo "#define GASNETI_HAVE_HWLOC_UTILS 1" >>confdefs.h + + if test "$cross_compiling" = "yes"; then + + + + +# vvvvvvvvvvvvvvvvvvvvvv GASNET_MSG_WARN() vvvvvvvvvvvvvvvvvvvvvv (L:4) + + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Utilities hwloc-bind and hwloc-calc have been located, as show above. However, their validation has been skipped due to cross-compilation." >&5 +$as_echo "$as_me: WARNING: Utilities hwloc-bind and hwloc-calc have been located, as show above. However, their validation has been skipped due to cross-compilation." >&2;} + echo "Utilities hwloc-bind and hwloc-calc have been located, as show above. However, their validation has been skipped due to cross-compilation." >> ".gasnet_cv_configure_warnings.tmp" + echo " " >> ".gasnet_cv_configure_warnings.tmp" + +# ^^^^^^^^^^^^^^^^^^^^^^ GASNET_MSG_WARN() ^^^^^^^^^^^^^^^^^^^^^^ (L:4) + + + + + fi + elif test "$enabled_hwloc_utils" = yes; then + as_fn_error $? "User requested --with-hwloc-utils but one or both of hwloc-calc or hwloc-bind was not found." "$LINENO" 5 + fi + elif test "$enabled_hwloc_utils" = yes; then + as_fn_error $? "User requested --with-hwloc-utils but this option requires popen which $fork_reason." "$LINENO" 5 + fi +fi + +# AC_FUNC_MMAP +# Check for mmap functionality we actually use +if test "$cross_compiling" = "yes" ; then + + + +# vvvvvvvvvvvvvvvvvvvvvv GASNET_CROSS_VAR(HAVE_MMAP,HAVE_MMAP,) vvvvvvvvvvvvvvvvvvvvvv (L:4) + + + if test "$cross_compiling" = "yes" ; then + + + + +# vvvvvvvvvvvvvvvvvvvvvv GASNET_ENV_DEFAULT([CROSS_HAVE_MMAP],[]) vvvvvvvvvvvvvvvvvvvvvv (L:5) + + + + + + + + + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for CROSS_HAVE_MMAP setting" >&5 +$as_echo_n "checking for CROSS_HAVE_MMAP setting... " >&6; } + + envval_src_CROSS_HAVE_MMAP="cached" + if ${gasnet_cv_envvar_CROSS_HAVE_MMAP+:} false; then : + $as_echo_n "(cached) " >&6 +else + + if test "2" = "1" ; then # no default means unset + envval_default_CROSS_HAVE_MMAP="__=-=-=-__NOT_SET__-=-=-=__" + else + envval_default_CROSS_HAVE_MMAP="" + fi + + # Lowest priority are the enclosing environment and the default value argument (lowest) + if test "${_gasneti_nenv_crosshavemmap+set}" = "set" ; then + gasnet_cv_envvar_CROSS_HAVE_MMAP="${_gasneti_nenv_crosshavemmap}" + envval_src_CROSS_HAVE_MMAP=given + elif test "${_gasneti_cenv_crosshavemmap+set}" = "set" ; then + gasnet_cv_envvar_CROSS_HAVE_MMAP="${_gasneti_cenv_crosshavemmap}" + envval_src_CROSS_HAVE_MMAP=conf + else + gasnet_cv_envvar_CROSS_HAVE_MMAP=$envval_default_CROSS_HAVE_MMAP + envval_src_CROSS_HAVE_MMAP=default + fi + # Left-to-right parsing of commandline settings that includes both mechanisms + # --with-VAR=val or VAR=val => set to val + # --with-VAR => set to default + # --without-VAR => set to blank (ie "", not "no") + eval gasnet_fn_env_helper CROSS_HAVE_MMAP crosshavemmap $gasnet_cv_configure_args_norm + +fi + + + CROSS_HAVE_MMAP="$gasnet_cv_envvar_CROSS_HAVE_MMAP" + + if test "$CROSS_HAVE_MMAP" = "__=-=-=-__NOT_SET__-=-=-=__" ; then + unset CROSS_HAVE_MMAP + if test "$envval_src_CROSS_HAVE_MMAP" = "cached"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: (not set)" >&5 +$as_echo " (not set)" >&6; } + else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: (not set)" >&5 +$as_echo " (not set)" >&6; } + fi + else + case "$envval_src_CROSS_HAVE_MMAP" in + 'cached') + { $as_echo "$as_me:${as_lineno-$LINENO}: result: \"$CROSS_HAVE_MMAP\"" >&5 +$as_echo " \"$CROSS_HAVE_MMAP\"" >&6; } ;; + 'default') + { $as_echo "$as_me:${as_lineno-$LINENO}: result: (default) \"$CROSS_HAVE_MMAP\"" >&5 +$as_echo " (default) \"$CROSS_HAVE_MMAP\"" >&6; } ;; + 'disabled') + { $as_echo "$as_me:${as_lineno-$LINENO}: result: (disabled) \"$CROSS_HAVE_MMAP\"" >&5 +$as_echo " (disabled) \"$CROSS_HAVE_MMAP\"" >&6; } ;; + 'given') + { $as_echo "$as_me:${as_lineno-$LINENO}: result: (provided) \"$CROSS_HAVE_MMAP\"" >&5 +$as_echo " (provided) \"$CROSS_HAVE_MMAP\"" >&6; } ;; + 'conf') + { $as_echo "$as_me:${as_lineno-$LINENO}: result: (provided) \"$CROSS_HAVE_MMAP\"" >&5 +$as_echo " (provided) \"$CROSS_HAVE_MMAP\"" >&6; } + + +echo +echo "configure error: Ambiguous environment setting for \$CROSS_HAVE_MMAP. Please configure --with-CROSS_HAVE_MMAP=\"intended value\"" if test "" ; then if test -f "conftest.$ac_ext" ; then errfile=conftest.$ac_ext @@ -261631,8 +263571,9 @@ case "$target_os" in LDFLAGS="$LDFLAGS ${dash_Wl}-multiply_defined,suppress" # Darwin 11 (MacOSX Lion) and newer use address-space randomization by default + # But -no_pie is ignored (with a warning) on aarch64 tmp_ver=`expr "$target_os" : 'darwin\([0-9]*\)' 2>/dev/null` - if expr $tmp_ver \>= 11 >/dev/null; then + if test $target_cpu != aarch64 && expr $tmp_ver \>= 11 >/dev/null; then NOASLR_LDFLAGS="${dash_Wl}-no_pie" fi ;; @@ -262357,6 +264298,10 @@ if test -z "${HAVE_BOOTSTRAP_PMI_TRUE}" && test -z "${HAVE_BOOTSTRAP_PMI_FALSE}" as_fn_error $? "conditional \"HAVE_BOOTSTRAP_PMI\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi +if test -z "${HAVE_MK_CLASS_CUDA_UVA_TRUE}" && test -z "${HAVE_MK_CLASS_CUDA_UVA_FALSE}"; then + as_fn_error $? "conditional \"HAVE_MK_CLASS_CUDA_UVA\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi if test -z "${USE_PLPA_TRUE}" && test -z "${USE_PLPA_FALSE}"; then as_fn_error $? "conditional \"USE_PLPA\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 @@ -262766,7 +264711,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by GASNet $as_me 2020.10.0, which was +This file was extended by GASNet $as_me 2021.3.0, which was generated by GNU Autoconf 2.69. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -262833,7 +264778,7 @@ _ACEOF cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" ac_cs_version="\\ -GASNet config.status 2020.10.0 +GASNet config.status 2021.3.0 configured by $0, generated by GNU Autoconf 2.69, with options \\"\$ac_cs_config\\" diff --git a/third-party/gasnet/gasnet-src/configure.in b/third-party/gasnet/gasnet-src/configure.in index 6de64aaebdf4..815359df7c23 100644 --- a/third-party/gasnet/gasnet-src/configure.in +++ b/third-party/gasnet/gasnet-src/configure.in @@ -9,18 +9,18 @@ dnl -*- m4 -*- define([cv_prefix],[gasnet_cv_]) dnl Public release version packaging identifier: -define([GASNET_RELEASE_VERSION_MAJOR_D],[2020]) -define([GASNET_RELEASE_VERSION_MINOR_D],[10]) +define([GASNET_RELEASE_VERSION_MAJOR_D],[2021]) +define([GASNET_RELEASE_VERSION_MINOR_D],[3]) define([GASNET_RELEASE_VERSION_PATCH_D],[0]) dnl GASNet-EX spec version: define([GASNETEX_SPEC_VERSION_MAJOR_D],[0]) -define([GASNETEX_SPEC_VERSION_MINOR_D],[10]) +define([GASNETEX_SPEC_VERSION_MINOR_D],[13]) dnl GASNet-1 spec version: define([GASNET_SPEC_VERSION_MAJOR_D],[1]) define([GASNET_SPEC_VERSION_MINOR_D],[8]) dnl GASNet tools spec version: define([GASNETT_SPEC_VERSION_MAJOR_D],[1]) -define([GASNETT_SPEC_VERSION_MINOR_D],[16]) +define([GASNETT_SPEC_VERSION_MINOR_D],[17]) define([GASNET_RELEASE_VERSION_D], [GASNET_RELEASE_VERSION_MAJOR_D.GASNET_RELEASE_VERSION_MINOR_D.GASNET_RELEASE_VERSION_PATCH_D]) @@ -227,7 +227,6 @@ AC_SUBST(GASNETI_PTR_BITS) # Allow user forced ARCH and/or ISA settings w/o the corresponding probes # XXX: Incomplete... -GASNET_IF_ENABLED_NOHELP(arch-altix, [force_arch_altix=yes]) GASNET_IF_ENABLED_NOHELP(arch-ibmpe, [force_arch_ibmpe=yes]) GASNET_IF_ENABLED_NOHELP(arch-wsl, [force_arch_wsl=yes]) @@ -731,7 +730,6 @@ case "$CC_FAMILY" in case "$target_cpu" in sparc) GASNET_GET_ULTRASPARC_FLAGS(C,["-mcpu=ultrasparc3 -mtune=ultrasparc3" "-mcpu=ultrasparc -mtune=ultrasparc"]);; rs6000|powerpc*) GASNET_GET_PPC64_FLAGS(C,["-force_cpusubtype_ALL" "-Wa,-mppc64"]);; - ia64) GASNET_TRY_CFLAG([-mtls-size=64], [MISC_CFLAGS="$MISC_CFLAGS -mtls-size=64"]);; esac case "$target_os" in darwin*) @@ -1725,7 +1723,6 @@ case "$target_cpu" in # when chip revs differ, we should err on the larger size aarch64) cache_line_guess=64 ;; rs6000) cache_line_guess=32 ;; # https://www.csee.umbc.edu/portal/help/architecture/rs6000_arch.ps powerpc*) cache_line_guess=128 ;; # https://www.7-cpu.com/cpu/Power8.html - ia64) cache_line_guess=128 ;; # https://www.7-cpu.com/cpu/Itanium2.html sparc) cache_line_guess=128 ;; # https://en.wikipedia.org/wiki/SPARC64_V *) cache_line_guess=128 ;; # the default when we have no other info esac @@ -2113,53 +2110,6 @@ if test "$target_cpu" = "aarch64" ; then ], [AC_DEFINE(GASNETI_HAVE_AARCH64_CNTVCT_EL0)]) fi -## Do toolchain and CPU support ia64 "cmp8xchg16"? -GASNET_PUSHVAR(CPPFLAGS,"$CPPFLAGS -I$TOP_SRCDIR/other") dnl for gasnet_portable_platform.h -AC_MSG_CHECKING(for ia64 cmp8xchg16 support) -AC_TRY_RUN([ - #include "gasnet_portable_platform.h" /* For PLATFORM_{ARCH,COMPILER}_* */ - - #if !PLATFORM_ARCH_IA64 - #error wrong CPU architecture - #elif PLATFORM_COMPILER_INTEL - #include - int cmp8xchg16(void volatile *ptr, unsigned long oldlo, unsigned long newlo, unsigned long newhi) { - return oldlo == _InterlockedCompare64Exchange128_acq(ptr, newhi, newlo, oldlo); - } - #elif PLATFORM_COMPILER_GNU - int cmp8xchg16(void volatile *ptr, unsigned long oldlo, unsigned long newlo, unsigned long newhi) { - register unsigned long tmp = newlo; - __asm__ __volatile__ ( - "mov ar.ccv=%1 \n\t" - "mov ar.csd=%2;; \n\t" - "cmp8xchg16.acq %0=[%3],%0,ar.csd,ar.ccv \n" - : "+r"(tmp) : "rO"(oldlo), "rO"(newhi), "r"(ptr) : "memory" ); - return tmp == oldlo; - } - #else - #error compiler is unsupported - #endif - - int main(void) { - unsigned long *array[3]; - unsigned long *p = (void *)(((unsigned long)array + 15) & ~(unsigned long)15); - int swapped; - - p[0] = 1234; - p[1] = 0; - - swapped = cmp8xchg16(p, 1234, 5678, 0xabcdef); - - /* Want SUCCESS and changed X */ - return (!swapped || (p[0] != 5678) || (p[1] != 0xabcdef)); - } -], [AC_DEFINE(GASNETI_HAVE_IA64_CMP8XCHG16) - AC_MSG_RESULT(yes) -], [AC_MSG_RESULT(no) -], [ : # For now we assume "NO" when cross compiling - AC_MSG_RESULT(no)]) -GASNET_POPVAR(CPPFLAGS) - ## Do CC/gas and CPU support "cmpxchg16b"? if test "$cross_compiling" = "yes" ; then GASNET_CROSS_VAR(GASNETI_HAVE_X86_CMPXCHG16B,HAVE_X86_CMPXCHG16B,[probe]) @@ -2358,6 +2308,15 @@ GASNET_IF_ENABLED(loopback-memsync, Force memory barriers for GASNet local (loop [AC_DEFINE(GASNETI_MEMSYNC_ON_LOOPBACK)]) GASNET_IF_ENABLED(throttle-poll, throttle polling threads in multi-threaded configurations to reduce contention (experimental, only implemented in some conduits), [AC_DEFINE(GASNETI_THROTTLE_FEATURE_ENABLED)]) + +GASNET_WITH(maxeps, + [Maximum number of endpoints per-process, subject to per-conduit limits (default is conduit-dependent)], + [if expr "${withval}" : "[[1-9]][[0-9]]*" >/dev/null; then + AC_DEFINE_UNQUOTED(GASNETI_MAXEPS_CONFIGURE, $withval) + else + AC_MSG_ERROR([--with-maxeps requires a positive integer argument]) + fi], + [AC_MSG_ERROR([--with-maxeps requires a positive integer argument])]) dnl GASNET_TOOLSONLY_FALSE_END # FORCED non-default implementations - used mainly for debugging @@ -2971,7 +2930,6 @@ AC_DEFUN([GASNET_LINKHELPER_TRY_RUN],[GASNET_LINKHELPER_TRY_INNER([RUN_WITHCC],$ case "$target_cpu" in sparc) GASNET_GET_ULTRASPARC_FLAGS(CXX,["-mcpu=ultrasparc3 -mtune=ultrasparc3" "-mcpu=ultrasparc -mtune=ultrasparc"]);; rs6000|powerpc*) GASNET_GET_PPC64_FLAGS(CXX,["-force_cpusubtype_ALL" "-Wa,-mppc64"]);; - ia64) GASNET_TRY_CXXFLAG([-mtls-size=64], [MISC_CXXFLAGS="$MISC_CXXFLAGS -mtls-size=64"]);; esac # set CXXDEBUGFLAGS GASNET_PUSHVAR(CXXFLAGS,"${gxx_flag_prefix}-g3") @@ -3690,21 +3648,36 @@ if test "$enabled_ibv" = yes; then CONDUIT_SPAWNER + # Multirail support options + # ibv-conduit/README explains the interactions gasnetc_ibv_max_hcas=2 - GASNET_IF_ENABLED(ibv-multirail, [Enable IBV over multiple HCAs, see ibv-conduit/README (disabled by default)], enabled_ibv_multirail=yes, enabled_ibv_multirail=no) + enabled_ibv_multirail=no GASNET_WITH(ibv-max-hcas, - [maximum number of IBV HCAs to open for multi-rail support (default is 2)], + [Maximum number of IBV HCAs to open (default is 1)], [if expr "${withval}" : "[[1-9]][[0-9]]*" >/dev/null; then gasnetc_ibv_max_hcas="$withval" + enabled_ibv_multirail=yes else AC_MSG_ERROR([--with-ibv-max-hcas requires a positive integer argument]) fi], [AC_MSG_ERROR([--with-ibv-max-hcas requires a positive integer argument])], [:]) + GASNET_IF_ENABLED_WITH_AUTO(ibv-multirail, + [Enable IBV over multiple HCAs. Use of --with-ibv-max-hcas=N is prefered (see ibv-conduit/README for more info).], + enabled_ibv_multirail=yes, enabled_ibv_multirail=no) if test "$enabled_ibv_multirail" = yes; then - AC_DEFINE_UNQUOTED(GASNETC_IBV_MAX_HCAS, $gasnetc_ibv_max_hcas) + AC_DEFINE_UNQUOTED(GASNETC_IBV_MAX_HCAS_CONFIGURE, $gasnetc_ibv_max_hcas) fi + # Default GASNET_IBV_PORTS + GASNET_WITH(ibv-ports, + [Default value of GASNET_IBV_PORTS environment variable (default is empty)], + [if test "$withval" = 'yes'; then + AC_MSG_ERROR([--with-ibv-ports requires an argument]) + else + AC_DEFINE_UNQUOTED(GASNETC_IBV_PORTS_CONFIGURE, "$withval") + fi]) + CONDUIT_PHYSMEM("2/3") GASNET_IF_DISABLED([ibv-srq], @@ -3739,11 +3712,11 @@ if test "$enabled_ibv" = yes; then [enabled_ibv_odp="no"; AC_DEFINE(GASNETC_IBV_ODP_DISABLED)], [enabled_ibv_odp="yes"]) if test "$enabled_ibv_odp" = yes; then + have_ibv_odp=no GASNET_PUSHVAR(CFLAGS,"$CFLAGS $IBV_CFLAGS $GASNET_THREAD_DEFINES") GASNET_PUSHVAR(LDFLAGS,"$LDFLAGS $IBV_LDFLAGS") GASNET_PUSHVAR(LIBS,"$LIBS $IBV_LIBS $GASNET_THREAD_LIBS") - GASNET_TRY_CACHE_LINK(for ibv ODP support, ibv_odp_support, [ - #include + GASNET_TRY_CACHE_LINK([for ibv ODP support (Mellanox)], ibv_odp_support_mlnx, [ #include ], [ struct ibv_exp_device_attr attr; @@ -3757,7 +3730,25 @@ if test "$enabled_ibv" = yes; then x.exp_access = IBV_EXP_ACCESS_ON_DEMAND | IBV_EXP_ACCESS_LOCAL_WRITE; x.length = IBV_EXP_IMPLICIT_MR_SIZE; struct ibv_mr *mr = ibv_exp_reg_mr(&x); - ], [have_ibv_odp=yes], [have_ibv_odp=no]) + ], [ + have_ibv_odp=yes + AC_DEFINE(GASNETC_IBV_ODP_MLNX) + ]) + GASNET_TRY_CACHE_LINK(for ibv ODP support (RDMA Core), ibv_odp_support_core, [ + #include + ], [ + struct ibv_device_attr_ex attr; + attr.comp_mask = 0; + int ret = ibv_query_device_ex(NULL, NULL, &attr); + int ok = (attr.odp_caps.general_caps & IBV_ODP_SUPPORT) && + (attr.odp_caps.general_caps & IBV_ODP_SUPPORT_IMPLICIT) && + (attr.odp_caps.per_transport_caps.rc_odp_caps & IBV_ODP_SUPPORT_READ) && + (attr.odp_caps.per_transport_caps.rc_odp_caps & IBV_ODP_SUPPORT_WRITE); + struct ibv_mr *mr = ibv_reg_mr(NULL, NULL, SIZE_MAX, IBV_ACCESS_ON_DEMAND | IBV_ACCESS_LOCAL_WRITE); + ], [ + have_ibv_odp=yes + AC_DEFINE(GASNETC_IBV_ODP_CORE) + ]) GASNET_POPVAR(CFLAGS) GASNET_POPVAR(LDFLAGS) GASNET_POPVAR(LIBS) @@ -3774,13 +3765,11 @@ if test "$enabled_ibv" = yes; then 1024|2048|4096|8192|16384|32768|65536|131072|262144) ibv_max_medium="$withval";; *) - GASNET_ERR_SAVE() AC_MSG_ERROR([--with-ibv-max-medium="$withval" is not legal. Value must be a power-of-two between 1024 and 262144, inclusive.]) ;; esac ], - [GASNET_ERR_SAVE() - AC_MSG_ERROR([--with-ibv-max-medium must be a power-of-two between 1024 and 262144, inclusive.])], + [AC_MSG_ERROR([--with-ibv-max-medium must be a power-of-two between 1024 and 262144, inclusive.])], [ibv_max_medium=65536]) AC_DEFINE_UNQUOTED(GASNETC_IBV_MAX_MEDIUM, [$ibv_max_medium]) fi @@ -3923,7 +3912,6 @@ if test "$enabled_aries" = yes; then GNI_CFLAGS="$GNI_CFLAGS $CRAY_UDREG_INCLUDE_OPTS" ],[ if test "$gni_udreg" = yes; then - GASNET_ERR_SAVE() AC_MSG_ERROR([--enable-aries-udreg was passed but udreg_pub.h header was not found]) fi ]) @@ -3931,21 +3919,20 @@ if test "$enabled_aries" = yes; then fi GASNET_WITH(aries-max-medium, - [specify gasnet_AMMaxMedium() (default 4032)], - [if expr "$withval" : '[[0-9]]*$' >/dev/null 2>&1 && \ - expr \( "$withval" / 64 \) \* 64 = "$withval" >/dev/null 2>&1 && - expr "$withval" '>=' 512 >/dev/null 2>&1 && - expr "$withval" '<' 65536 >/dev/null 2>&1 ; then + [specify default value of gasnet_AMMaxMedium() (default 4032)], + [tmpval=`echo "$withval" | $AWK '{sub(/^\+/,"");}1'` # strip leading '+' + if expr "$tmpval" : '[[0-9]]*$' >/dev/null 2>&1 && \ + expr \( "$tmpval" / 64 \) \* 64 = "$tmpval" >/dev/null 2>&1 && + expr "$tmpval" '>=' 512 >/dev/null 2>&1 && + expr "$tmpval" '<=' 65408 >/dev/null 2>&1 ; then gni_max_medium="$withval" else - GASNET_ERR_SAVE() - AC_MSG_ERROR([--with-aries-max-medium="$withval" is not legal. Value must be a multiple of 64, between 512 and 65472, inclusive.]) + AC_MSG_ERROR([--with-aries-max-medium="$withval" is invalid. Value must be a multiple of 64, between 512 and 65408, inclusive. Unless prefixed with '+', the value will be rounded-down to the next recommended value. See the aries-conduit README for additional details.]) fi ], - [GASNET_ERR_SAVE() - AC_MSG_ERROR([--with-aries-max-medium must be a multiple of 64 and no smaller than 512])], + [AC_MSG_ERROR([--with-aries-max-medium must be a multiple of 64 and no smaller than 512])], [gni_max_medium=4032]) - AC_DEFINE_UNQUOTED(GASNETC_GNI_MAX_MEDIUM, [$gni_max_medium]) + AC_DEFINE_UNQUOTED(GASNETC_GNI_MAX_MEDIUM_DFLT, ["$gni_max_medium"]) GASNET_IF_ENABLED(aries-multi-domain, [Use experimental multi-domain support in PAR builds], [AC_DEFINE(GASNETC_GNI_MULTI_DOMAIN)]) @@ -3960,7 +3947,7 @@ AM_CONDITIONAL(USE_ARIES_CONDUIT, test "$enabled_aries$have_aries" = yesyes) #-------------------------------------------------------------------------------------------------------- # Libfabric Conduit -CONDUIT_BEGIN(ofi,g1,[Portable OpenFabrics Interfaces conduit (ofi)]) +CONDUIT_BEGIN(ofi,exp,[Portable OpenFabrics Interfaces conduit (ofi)]) if test "$enabled_ofi" = yes; then have_ofi=yes @@ -3981,11 +3968,26 @@ if test "$enabled_ofi" = yes; then rc = fi_getinfo(FI_VERSION(FI_MAJOR_VERSION,FI_MINOR_VERSION), 0, 0, 0, 0, 0); }], [ : ], [ have_ofi=no; GASNET_ERR_SAVE() ] ) + GASNET_POPVAR(CFLAGS) GASNET_POPVAR(LDFLAGS) GASNET_POPVAR(LIBS) fi + if test "$have_ofi" = yes; then + # What we actually care about is GASNETI_HAVE_SPINLOCK, which is available + # whenever !GASNETI_ATOMICOPS_NOT_SIGNALSAFE + # For all platforms of likely interest, checking (compiler||native) is sufficient. + AC_MSG_CHECKING(for appropriate atomics support required by ofi-conduit) + CHECK_ATOMICS([native compiler],[ + AC_MSG_RESULT(yes) + ],[ + AC_MSG_RESULT(no) + GASNET_MSG_WARN([ofi-conduit requires native atomicops, but $check_atomics_msg]) + have_ofi=no + ]) + fi + if test "$have_ofi" = yes; then CONDUIT_SPAWNER @@ -4689,6 +4691,117 @@ if test "$use_printstack" != no; then fi fi +######################################################################## +dnl GASNET_TOOLSONLY_FALSE_BEGIN +GASNET_HELP_OUTPUT([ +Memory-kinds options:]) + +GASNET_IF_ENABLED(memory-kinds, + [Prototype support for memory kinds (transfers to/from device memory). Enables default auto-detection of all device type applicable to the target platform. Individual --enable-kind-* options have precedence.], + [enable_mk_default=probe], [enable_mk_default=no]) + +# GEX_MK_CLASS_CUDA_UVA +GASNET_IF_ENABLED_WITH_AUTO(kind-cuda-uva, + [Support for memory kinds on UVA-capable CUDA devices (auto-detected with --enable-memory-kinds, otherwise disabled)], + [enable_mk_cuda_uva="yes"; force_mk_cuda_uva="yes"], + [enable_mk_cuda_uva="no"], [ + if test "x$enable_kind_cuda_uva" = 'xprobe'; then + enable_mk_cuda_uva="probe" + elif test "$GASNETI_PTR_BITS" = 32; then + enable_mk_cuda_uva="no" + else + enable_mk_cuda_uva="$enable_mk_default" + fi + ]) +unset have_mk_cuda_uva +if test "$enable_mk_cuda_uva" != "no"; then + AC_MSG_CHECKING(for target having CUDA UVA memory kinds support) + if test "$GASNETI_PTR_BITS" = 32; then + AC_MSG_RESULT([no (ILP32 targets are not supported)]) + have_mk_cuda_uva=no + else + have_mk_cuda_uva=no + case "$target_cpu:$target_os" in + x86_64:linux*) have_mk_cuda_uva='yes';; + powerpc*:linux*) if test "$WORDS_BIGENDIAN" = '0'; then have_mk_cuda_uva='yes'; fi;; + aarch64:linux*) have_mk_cuda_uva='maybe';; + esac + if test "$have_mk_cuda_uva" = 'no'; then + AC_MSG_RESULT([no (unsupported or unrecognized target $target)]) + else + AC_MSG_RESULT([$have_mk_cuda_uva]) + fi + fi + + if test "$have_mk_cuda_uva" != "no"; then + # TODO: Can/should we scrape nvcc as UPC++ does? + GASNET_PACKAGE_HOME(CUDA, [CUDA toolkit], [nvcc], [/usr/local/cuda], [include/cuda.h]) + GASNET_PACKAGE_CFLAGS(CUDA) + GASNET_ENV_DEFAULT(CUDA_LIBS, -lcuda) + GASNET_PACKAGE_LDFLAGS(CUDA, cuda) + GASNET_SPLIT_LINKER_OPTS(CUDA_LDFLAGS,CUDA_LIBS) + + # Try to compile and link a basic program using the CUDA Driver API + GASNET_PUSHVAR(CFLAGS,"$CFLAGS $CUDA_CFLAGS") + GASNET_PUSHVAR(LDFLAGS,"$LDFLAGS $CUDA_LDFLAGS") + GASNET_PUSHVAR(LIBS,"$LIBS $CUDA_LIBS") + + GASNET_TRY_CACHE_LINK(for working CUDA configuration, libcuda_works, [ + #include + ], [ + cuInit(0); + CUdevice dev = 0; + CUcontext ctx; + CUresult res = cuDevicePrimaryCtxRetain(&ctx, dev); + int isUVA; + cuDeviceGetAttribute(&isUVA, CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING, dev); + CUpointer_attribute attrs[[3]] = { CU_POINTER_ATTRIBUTE_MEMORY_TYPE, + CU_POINTER_ATTRIBUTE_IS_MANAGED, + CU_POINTER_ATTRIBUTE_CONTEXT }; + ], [ : ], [ have_mk_cuda_uva=no; GASNET_ERR_SAVE() ]) + + GASNET_POPVAR(LIBS) + GASNET_POPVAR(LDFLAGS) + GASNET_POPVAR(CFLAGS) + fi + + # If the probe(s) above passed on a target identified as "maybe" having + # support, then either upgrade to "yes" if enabled explicitly, or downgrade + # to "no" if just probing. Either way, we issue a warning. + if test "$have_mk_cuda_uva" = "maybe"; then + if test "$enable_mk_cuda_uva" = "yes"; then + have_mk_cuda_uva=yes + GASNET_MSG_WARN([CUDA UVA memory kinds support enabled for $target, which may not be officially supported]) + else + have_mk_cuda_uva=no + GASNET_MSG_WARN([It appears your system has the software required for CUDA UVA memory kinds support. +However, $target may not be officially supported. +You can enable an unsupported build of this feature with --enable-kind-cuda-uva. +Otherwise, you can disable this message with --disable-kind-cuda-uva.]) + fi + fi + + if test "$have_mk_cuda_uva" = "yes"; then + AC_DEFINE(GASNETI_MK_CLASS_CUDA_UVA_ENABLED) + CUDA_UVA_CFLAGS="$CUDA_CFLAGS" + CUDA_UVA_LDFLAGS="$CUDA_LDFLAGS" + CUDA_UVA_LIBS="$CUDA_LIBS" + elif test "$force_mk_cuda_uva" = "yes"; then + GASNET_MSG_ERROR([CUDA UVA memory kinds support was requested with --enable-kind-cuda-uva but is not available]) + else + unset CUDA_UVA_CFLAGS + unset CUDA_UVA_LDFLAGS + unset CUDA_UVA_LIBS + fi +fi +AC_SUBST(CUDA_UVA_CFLAGS) +AC_SUBST(CUDA_UVA_LDFLAGS) +AC_SUBST(CUDA_UVA_LIBS) +AM_CONDITIONAL(HAVE_MK_CLASS_CUDA_UVA, test "$have_mk_cuda_uva" = "yes") + +dnl GASNET_TOOLSONLY_FALSE_END +######################################################################## + GASNET_HELP_OUTPUT([ Misc options:]) @@ -4768,6 +4881,7 @@ case "$target_os" in ;; esac +# PLPA enabled_plpa="no" if test "$cross_compiling" = "no" ; then case "$target_os" in @@ -4803,6 +4917,110 @@ if test "$enabled_plpa" = "yes"; then fi AM_CONDITIONAL(USE_PLPA,test "$enabled_plpa" = "yes") +# HWLOC header and library +GASNET_IF_ENABLED_WITH_AUTO(hwloc, hwloc library (auto-detected), + enabled_hwloc_lib=yes, enabled_hwloc_lib=no, enabled_hwloc_lib=probe) +have_hwloc_lib=no +if test "$enabled_hwloc_lib" != no; then + GASNET_PACKAGE_HOME(HWLOC, [hwloc], [hwloc-info], [/usr/local], [include/hwloc.h]) + GASNET_PACKAGE_CFLAGS(HWLOC) + GASNET_ENV_DEFAULT(HWLOC_LIBS, -lhwloc) + GASNET_PACKAGE_LDFLAGS(HWLOC, hwloc) + GASNET_SPLIT_LINKER_OPTS(HWLOC_LDFLAGS,HWLOC_LIBS) + + # Try to compile and link a basic program using hwloc + GASNET_PUSHVAR(CFLAGS,"$CFLAGS $HWLOC_CFLAGS") + GASNET_PUSHVAR(LDFLAGS,"$LDFLAGS $HWLOC_LDFLAGS") + GASNET_PUSHVAR(LIBS,"$LIBS $HWLOC_LIBS") + + GASNET_TRY_CACHE_LINK(for libhwloc with API v1.0 or newer, libhwloc_works, [ + #include + #if HWLOC_API_VERSION < 0x010000 + #error No support for 0.9 series + #endif + ], [ + hwloc_topology_t topology; + hwloc_topology_init(&topology); + hwloc_topology_load(topology); + ], [ + have_hwloc_lib=yes + ],[ + HWLOC_CFLAGS="" + HWLOC_LDFLAGS="" + HWLOC_LIBS="" + ]) + + GASNET_POPVAR(LIBS) + GASNET_POPVAR(LDFLAGS) + GASNET_POPVAR(CFLAGS) + + if test "$have_hwloc_lib" = yes; then + AC_DEFINE(GASNETI_HAVE_HWLOC_LIB) + elif test "$enabled_hwloc_lib" = yes; then + AC_MSG_ERROR(User requested --enable-hwloc but the required hwloc header or library were not found.) + fi +fi +AC_SUBST(HWLOC_CFLAGS) +AC_SUBST(HWLOC_LDFLAGS) +AC_SUBST(HWLOC_LIBS) + +# HWLOC utilies: hwloc-{bind,calc} +# Used as fallback if !have_hwloc_lib +GASNET_IF_ENABLED_WITH_AUTO(hwloc-utils, hwloc utilities (auto-detected), + enabled_hwloc_utils=yes, enabled_hwloc_utils=no, + [ if test "$cross_compiling" = "no" || test "x$enable_hwloc_utils" = 'xprobe'; then + enabled_hwloc_utils=probe + else + # utils on build system unlikely to represent the target + enabled_hwloc_utils=no + fi ]) +have_hwloc_utils=no +if test "$enabled_hwloc_utils" != no; then + if test "$have_fork" = yes; then + GASNET_ENV_DEFAULT(HWLOC_UTILS_HOME, , [Install prefix of hwloc command-line utilities (auto-detected from PATH)]) + if test -n "$HWLOC_UTILS_HOME"; then + hwloc_utils_path="$HWLOC_UTILS_HOME/bin" + else + hwloc_utils_path="$PATH" + fi + AC_PATH_PROGS(GASNETI_HWLOC_BIND_PATH, hwloc-bind, , $hwloc_utils_path) + if test -n "$GASNETI_HWLOC_BIND_PATH" && test "$cross_compiling" = "no"; then + # Require --get, added in hwloc 1.0 + AC_MSG_CHECKING(for required features in hwloc-bind) + if expr `$GASNETI_HWLOC_BIND_PATH --get 2>&1` : 0x >/dev/null 2>&1; then + AC_MSG_RESULT(yes) + AC_DEFINE_UNQUOTED(GASNETI_HWLOC_BIND_PATH,"$GASNETI_HWLOC_BIND_PATH") + else + AC_MSG_RESULT(no) + unset GASNETI_HWLOC_BIND_PATH + fi + fi + AC_PATH_PROGS(GASNETI_HWLOC_CALC_PATH, hwloc-calc, , $hwloc_utils_path) + if test -n "$GASNETI_HWLOC_CALC_PATH" && test "$cross_compiling" = "no"; then + # Require --intersect, added in hwloc 1.1 + AC_MSG_CHECKING(for required features in hwloc-calc) + if $GASNETI_HWLOC_CALC_PATH --intersect Socket 0x0 /dev/null 2>&1; then + AC_MSG_RESULT(yes) + AC_DEFINE_UNQUOTED(GASNETI_HWLOC_CALC_PATH,"$GASNETI_HWLOC_CALC_PATH") + else + AC_MSG_RESULT(no) + unset GASNETI_HWLOC_CALC_PATH + fi + fi + if test -n "$GASNETI_HWLOC_BIND_PATH" && test -n "$GASNETI_HWLOC_CALC_PATH"; then + have_hwloc_utils=yes # Have both + AC_DEFINE(GASNETI_HAVE_HWLOC_UTILS) + if test "$cross_compiling" = "yes"; then + GASNET_MSG_WARN([Utilities hwloc-bind and hwloc-calc have been located, as show above. However, their validation has been skipped due to cross-compilation.]) + fi + elif test "$enabled_hwloc_utils" = yes; then + AC_MSG_ERROR(User requested --with-hwloc-utils but one or both of hwloc-calc or hwloc-bind was not found.) + fi + elif test "$enabled_hwloc_utils" = yes; then + AC_MSG_ERROR(User requested --with-hwloc-utils but this option requires popen which $fork_reason.) + fi +fi + # AC_FUNC_MMAP # Check for mmap functionality we actually use if test "$cross_compiling" = "yes" ; then @@ -5095,8 +5313,9 @@ case "$target_os" in LDFLAGS="$LDFLAGS ${dash_Wl}-multiply_defined,suppress" # Darwin 11 (MacOSX Lion) and newer use address-space randomization by default + # But -no_pie is ignored (with a warning) on aarch64 tmp_ver=`expr "$target_os" : 'darwin\([[0-9]]*\)' 2>/dev/null` - if expr $tmp_ver \>= 11 >/dev/null; then + if test $target_cpu != aarch64 && expr $tmp_ver \>= 11 >/dev/null; then NOASLR_LDFLAGS="${dash_Wl}-no_pie" fi ;; diff --git a/third-party/gasnet/gasnet-src/docs/GASNet-EX.txt b/third-party/gasnet/gasnet-src/docs/GASNet-EX.txt index 6b967c6a84a3..ca52cc50d4b8 100644 --- a/third-party/gasnet/gasnet-src/docs/GASNet-EX.txt +++ b/third-party/gasnet/gasnet-src/docs/GASNet-EX.txt @@ -1,7 +1,7 @@ /////////////////////////////// // GASNet-EX API Description // /////////////////////////////// -// + // This is *not* a final normative document. // This is "beta documentation" for a work-in-progress. // @@ -31,15 +31,15 @@ // // This takes the form YEAR.MONTH.PATCH in GASNet-EX releases, // providing a clear distinction from GASNet-1 with MAJOR==1. -#define GASNET_RELEASE_VERSION_MAJOR 2020 -#define GASNET_RELEASE_VERSION_MINOR 10 +#define GASNET_RELEASE_VERSION_MAJOR 2021 +#define GASNET_RELEASE_VERSION_MINOR 3 #define GASNET_RELEASE_VERSION_PATCH 0 // Major and Minor versions of the GASNet-EX specification. // // This is currently a version number for *this* document. #define GEX_SPEC_VERSION_MAJOR 0 -#define GEX_SPEC_VERSION_MINOR 10 +#define GEX_SPEC_VERSION_MINOR 13 // Major and Minor versions of the GASNet-1 specification. // @@ -53,7 +53,7 @@ // // This is the spec version for the GASNet Tools #define GASNETT_SPEC_VERSION_MAJOR 1 -#define GASNETT_SPEC_VERSION_MINOR 16 +#define GASNETT_SPEC_VERSION_MINOR 17 // // Relationship to GASNet-1 APIs: @@ -116,14 +116,21 @@ extern void gasnet_QueryGexObjects(gex_Client_t *client_p, // gasnet_mynode(), gasnet_nodes(), gasnet_hsl_*(), gasnet_exit(), gasnet_AMReply*(), // gasnet_QueryGexObjects(), gex_System_QueryNbrhdInfo(), gex_System_QueryHostInfo(), // gex_System_QueryMyPosition(), gex_System_QueryJob{Rank,Size}(), gex_HSL_*(), -// gex_*_{Set,Query}CData(), gex_{Client,Segment,EP,TM,AD}_Query*() +// gex_*_{Set,Query}CData(), gex_{Client,Segment,EP,TM,AD}_Query*(), gex_TM_Pair(), // gex_AM_Max*(), gex_AM_LUB*(), gex_Token_Max*(), gex_Token_Info(), +// gex_System_GetVerboseErrors(), gex_System_SetVerboseErrors(), +// gex_System_QueryMaxThreads() // // The following are conditionally permitted in handler context, the condition being the // caller must be within an AMRequest handler and not holding a handler-safe lock: // // gex_AM_Reply*() gex_AM_{Prepare,Commit}Reply*(), gex_AM_SrcDesc*() // +// The following are conditionally permitted in handler context, the condition +// being that the 'flags' argument must include GEX_FLAG_IMMEDIATE: +// +// gex_EP_QueryBoundSegmentNB() +// // All other functions are prohibited to be called from a thread within the // dynamic context of an AM handler, or while holding a handler-safe lock. // This prohibition notably prohibits all communication initiation (aside from Reply @@ -167,7 +174,8 @@ extern void gasnet_QueryGexObjects(gex_Client_t *client_p, // "Single-valued" // // This term is used to designate an argument to a collective call as one that -// must have the same value on all callers participating in the collective. +// must have the same value on all callers participating in the collective, or +// on a well-defined subset of callers. // // In the case of 'flags' arguments, this term may be applied in a qualified // form as "partially single-valued" when the constraint applies only to some @@ -208,6 +216,39 @@ gex_Rank_t gex_System_QueryJobRank(void); // Semantics in a resilient build will be defined in a later release. gex_Rank_t gex_System_QueryJobSize(void); +// Utility + +// By default, certain non-fatal error returns in GASNet-EX will print messages +// to the console. This behavior can be queried and set with the following. + +// Returns non-zero if console messages are enabled for certain non-fatal errors. +int gex_System_GetVerboseErrors(); + +// Enable (1) or disable (0) console messages for certain non-fatal errors. +// Values other than 0 and 1 are currently reserved. +void gex_System_SetVerboseErrors(int enable); + +// Threads + +// The maximum number of live client threads permitted to enter GASNet. +// +// In threaded (non-SEQ) builds of GASNet, client threads making GASNet calls may +// implicitly become associated with thread-specific state managed by the GASNet +// library. When such a thread exits, a thread destructor registered by the library +// cleans up any associated thread-specific state. The library is permitted to limit +// the number of live client threads that may concurrently be implicitly associated +// with GASNet-managed state. +// +// + The limit is per-process and the value returned is for the calling process. +// + The limit is process-wide, independent of gex_Client_t. +// + Threads internal to GASNet, if any, do not count against this limit. +// + Client threads which have not yet entered GASNet do not count against +// this limit. +// + Client threads which exit after having entered GASNet cease to count +// against this limit. +// +// In a SEQ build of GASNet, this query always returns 1. +uint64_t gex_System_QueryMaxThreads(void); // Events @@ -336,7 +377,7 @@ typedef [some integer type] gex_Flags_t; // // This flag indicates, to those calls explicitly documented as accepting it, // that the 'rank' (or equivalent argument) is a jobrank rather than a rank -// within the the normal associated team. +// within the normal associated team. // // Currently this flags is accepted by: // gex_AD_Op*() @@ -371,14 +412,14 @@ typedef [some integer type] gex_Flags_t; // The segment disposition flags come in two varieties: // // SELF - describes the segment disposition of addresses associated -// with local memory and the initiating endpoint (ie the EP +// with local buffers and the initiating endpoint (ie the EP // which is usually implicitly named by a gex_TM_t argument). // Eg in a Put operation this variety describes source locations, // and in a Get this variety describes destination locations. // -// PEER - describes the segment disposition of addresses associated +// PEER - describes the segment disposition of buffers associated // with (potentially) remote memory and the peer endpoint(s) -// (the EPs usually explicitly named by gex_Rank_t arguments) +// (the EPs usually explicitly named by gex_Rank_t arguments). // Eg in a Put operation this variety describes destination locations, // and in a Get this variety describes source locations. // @@ -386,8 +427,12 @@ typedef [some integer type] gex_Flags_t; // a given operation may specify at most one SELF flag and one PEER flag. // Unless otherwise noted, the default behavior for each variety in the // absence of an explicitly provided flag corresponds to: +// + When the local EP is unbound or bound to host memory: // GEX_FLAG_SELF_SEG_UNKNOWN, GEX_FLAG_PEER_SEG_BOUND -// which is backwards-compatible with GASNet-1 segment behavior. +// + When the local EP is bound to a device segment: +// GEX_FLAG_SELF_SEG_BOUND, GEX_FLAG_PEER_SEG_BOUND +// These are backwards-compatible with GASNet-1 segment behavior (where +// there is no support for device memory). // NOTE: the flags below are currently [UNIMPLEMENTED], and consequently // these defaults are also the only supported settings for all APIs. // @@ -438,7 +483,7 @@ typedef [some integer type] gex_Flags_t; #define GEX_FLAG_SELF_SEG_OFFSET ((gex_Flags_t)???) [UNIMPLEMENTED] #define GEX_FLAG_PEER_SEG_OFFSET ((gex_Flags_t)???) [UNIMPLEMENTED] -// COLLECTIVE SCRATCH ALLOCATION [PROPOSED] +// COLLECTIVE SCRATCH ALLOCATION // // The following family of flags control the interpretation of address ranges // provided to team construction APIs to describe collective scratch spaces. @@ -446,15 +491,17 @@ typedef [some integer type] gex_Flags_t; // TM_{GLOBAL,LOCAL,SYMMETRIC,NO}_SCRATCH // This mutually-exclusive group indicates the number and meaning of // a gex_Addr_t specified to certain team construction APIs. +// [Since spec v0.9:] // GLOBAL: gex_Addr_t per member of the output team // LOCAL: gex_Addr_t per local member of the output team // SYMMETRIC: single gex_Addr_t used for all members of the output team +// [Since spec v0.11:] // NO: no gex_Addr_t (and no scratch space is allocated). // #define GEX_FLAG_TM_GLOBAL_SCRATCH ((gex_Flags_t)???) // gex_TM_Create only #define GEX_FLAG_TM_LOCAL_SCRATCH ((gex_Flags_t)???) // gex_TM_Create only #define GEX_FLAG_TM_SYMMETRIC_SCRATCH ((gex_Flags_t)???) // gex_TM_Create only -#define GEX_FLAG_TM_NO_SCRATCH ((gex_Flags_t)???) [UNIMPLEMENTED] +#define GEX_FLAG_TM_NO_SCRATCH ((gex_Flags_t)???) // gex_TM_Create and gex_TM_Split // // SCRATCH_SEG_OFFSET // @@ -464,7 +511,13 @@ typedef [some integer type] gex_Flags_t; // that the specified range(s) are contained entirely within that segment. #define GEX_FLAG_SCRATCH_SEG_OFFSET ((gex_Flags_t)???) [UNIMPLEMENTED] -// MISC [PROPOSED] +// GEX_FLAG_GLOBALLY_QUIESCED +// [Since spec v0.10] +// +// This flag bit indicates to the corresponding object destructor call that +// the client has satisfied the call's documented global quiescence criteria. +// This permits, but does not require, the implementation to elide +// synchronization which might otherwise be required. #define GEX_FLAG_GLOBALLY_QUIESCED ((gex_Flags_t)???) // A "token" is an opaque scalar type @@ -495,7 +548,7 @@ typedef [some unsigned integer type] gex_RMA_Value_t; // // This is always an alias for `void*`, but is given a distinct type to make // prototypes self-documenting with respect to arguments which may (with the -// proper flags) be interepreted alternatively as addresses or offsets. +// proper flags) be interpreted alternatively as addresses or offsets. typedef void* gex_Addr_t; // Memvec @@ -513,8 +566,12 @@ typedef struct { // a local representative of an isolated communication context typedef ... gex_EP_t; +// Pre-defined value of type gex_EP_t +// This zero value is guaranteed never to alias a valid endpoint +#define GEX_EP_INVALID ((gex_EP_t)0) + // gex_EP_Index_t is an unsigned integer type. -// [PROPOSED] +// // Every EP within a given gex_Client_t can be uniquely identified by // the jobrank of a process and an endpoint index. // The primordial endpoint, created by gex_Client_Init(), will always have @@ -526,11 +583,9 @@ typedef ... gex_EP_Index_t; // This is an optimistic compile-time constant which cannot account // for limitations due to scarcity of network resources and/or memory. // The value is implementation-defined and may be conduit-specific. -// [PROPOSED] #define GASNET_MAXEPS ... -// gex_EP_Location_t is a (rank, ep_index) tuple -// [PROPOSED] +// gex_EP_Location_t is a (rank, ep_index) tuple. typedef struct { gex_Rank_t gex_rank; gex_EP_Index_t gex_ep_index; @@ -540,6 +595,10 @@ typedef struct { // an instance of the client interface to the GASNet library typedef ... gex_Client_t; +// Pre-defined value of type gex_Client_t +// This zero value is guaranteed never to alias a valid client +#define GEX_CLIENT_INVALID ((gex_Client_t)0) + // gex_Segment_t is an opaque scalar handle to a Segment, // a local client-declared memory range for use in communication typedef ... gex_Segment_t; @@ -548,13 +607,24 @@ typedef ... gex_Segment_t; // Used, for instance, to indicate no bound segment #define GEX_SEGMENT_INVALID ((gex_Segment_t)0) -// gex_TM_t is an opaque scalar handle to a Team Member, +// In general, gex_TM_t is an opaque scalar handle to a Team Member, // a collective communication context used for remote endpoint naming. -// A gex_TM_t specifies both an ordered set of Endpoints (local or remote), -// and a local gex_EP_t, a local representative of that team. +// There is also a less-general form, known as a "TM-pair" which carries only +// sufficient information for naming an endpoint in point-to-point communication +// or queries. +// In collective calls, an argument of type gex_TM_t specifies both an ordered +// set of Endpoints (local or remote), and a local gex_EP_t, a local +// representative of that team. Use of a TM-pair is prohibited in such calls. +// In point-to-point calls the local and remote gex_EP_t are named by a tuple +// consisting of one argument of type gex_TM_t and another of type gex_Rank_t +// together. Similarly, several queries take a '(tm,rank)' tuple to name an +// endpoint. Use of a TM-pair or a fully general gex_TM_t are both permitted +// in these non-collective calls. + typedef ... gex_TM_t; // Pre-defined value of type gex_TM_t +// This zero value will never to alias a valid gex_TM_t (including TM-pairs) #define GEX_TM_INVALID ((gex_TM_t)0) // @@ -620,8 +690,16 @@ extern int gex_Client_Init( // // Operations on gex_Segment_t // -// NOTE: currently gex_Segment_Attach() is the only way to create a segment. -// However, additional APIs for segment creation will be added. +// NOTE: *currently* gex_Segment_Attach() is the only way to create a segment +// suitable for use as the bound segment of a primordial endpoint (one created +// by gex_Client_Init). In particular, the current release does not *yet* +// support use of the APIs gex_Segment_Create(), gex_EP_BindSegment() and +// gex_EP_PublishBoundSegment() as an alternative to Attach. However, support +// for that usage may appear in a future release. +// +// See also in [PROPOSED] section: +// gex_Segment_Create() +// gex_EP_BindSegment() // // Query owning client @@ -631,44 +709,15 @@ gex_Client_t gex_Segment_QueryClient(gex_Segment_t seg); // There are no segment flags defined in the current release. gex_Flags_t gex_Segment_QueryFlags(gex_Segment_t seg); -// Query address and length of a segment +// Query base address of a segment +// For segments created using gex_Create_Segment() with a 'kind' not equal to +// GEX_MK_HOST, the return value is a device address. +// Otherwise, it is a host address. void * gex_Segment_QueryAddr(gex_Segment_t seg); + +// Query length of a segment uintptr_t gex_Segment_QuerySize(gex_Segment_t seg); -// Query addresses and length of a (possibly remote) bound segment [EXPERIMENTAL] -// -// This query takes a gex_TM_t and gex_Rank_t, which together name an endpoint. -// The remaining arguments are pointers to locations for outputs, each of which -// may be NULL if the caller does not need a particular value. -// -// If the endpoint named by (rm, rank) does not have a bound segment, this call -// returns non-zero, and the output locations are unmodified. Otherwise, this -// call returns 0 and writes the corresponding segment properties to each of -// the non-NULL output locations as follows: -// -// owneraddr_p: receives the address of the segment in the address space -// of the process which owns the segment. -// localaddr_p: receives the address of the segment in the address space -// of the calling process, if mapped, and NULL otherwise. -// size_p: receives the length of the segment. -// -// In this release the gex_Segment_Attach call (below) is the only mechanism to -// create segments, and unconditionally binds them to an endpoint. Thus all -// segments are "bound" in the current release. However, not all endpoints may -// have a segment bound to them. -// -// rank == GEX_RANK_INVALID is *not* permitted. -// rank == gex_TM_QueryRank(tm) *is* permitted. -// -// For rank != gex_TM_QueryRank(tm), this query MAY communicate. -// This call is not legal in contexts which prohibit communication, including -// (but not limited to) AM Handler context or when holding an HSL. -int gex_Segment_QueryBound( - gex_TM_t tm, - gex_Rank_t rank, - void **owneraddr_p, - void **localaddr_p, - uintptr_t *size_p); // Collective allocation and creation of Segments // Analogous to gasnet_attach (but see below) @@ -735,7 +784,7 @@ gex_Rank_t gex_TM_QuerySize(gex_TM_t tm); // that creates zero or more new teams. While this call is collective, the // arguments are NOT required to be single-valued over the parent team, except // as noted for certain bits in 'flags'. However, the value of 'scratch_size' -// must be collective over callers passing the same 'color'. +// (if applicable) must be collective over callers passing the same 'color'. // // + When passing any of the GEX_FLAG_TM_SCRATCH_SIZE_* family of flags, this // call is a collective query to determine the minimum or recommended value @@ -762,8 +811,8 @@ gex_Rank_t gex_TM_QuerySize(gex_TM_t tm); // or passed to any GASNet function, nor may the segment be destroyed, for // the lifetime of the newly created team. When the team is destroyed, // ownership of this memory is returned to the client. -// [NOTE: this release *requires* the caller to provide this space, but it -// is intended that this be optional in a future release.] +// To NOT provide a scratch space, the client must pass 'flags' containing +// 'GEX_FLAG_TM_NO_SCRATCH'. // [TBD: what about Unbind of the segment w/o destroying it?] // // new_tm_p: An OUT parameter that receives the gex_TM_t representing the @@ -773,24 +822,38 @@ gex_Rank_t gex_TM_QuerySize(gex_TM_t tm); // same new team. // key: An integer used to order the ranks within newly created teams. // scratch_addr, scratch_size: -// If scratch_addr is non-NULL, then the memory +// If 'GEX_FLAG_TM_NO_SCRATCH' appears in 'flags', then these two +// arguments are ignored. Otherwise, the memory // [scratch_addr, scratch_addr+scratch_size) // is granted to the implementation for internal use. // The value of 'scratch_size' must be single-valued over the members // of each new team to be created (non-NULL 'new_tm_p' and same 'color'). +// The value of 'scratch_size' must non-zero. // flags: // Single valued: // GEX_FLAG_TM_SCRATCH_SIZE_* // These mutually exclusive flags convert this call into a collective query. // No team is created in the presence of any flag in this family. -// - GEX_FLAG_TM_SCRATCH_SIZE_{MIN,RECOMMENDED} queries and returns the -// {minimum permissible, recommended optimal} value to be passed in +// - GEX_FLAG_TM_SCRATCH_SIZE_RECOMMENDED +// This query returns the recommended optimal value to be passed in // 'scratch_size' for a subsequent call to gex_TM_Split() with the same // value for the other arguments. In particular, a NULL value of the // 'new_tm_p' indicates the caller will not be a member of any team // created by the subsequent split (and thus the return will be zero). // Return values are guaranteed to be single-valued over the members // of each new team to be created (non-NULL 'new_tm_p' and same 'color'). +// - GEX_FLAG_TM_SCRATCH_SIZE_MIN [DEPRECATED at spec version 0.11] +// This flag is deprecated and will be removed in a future release. +// Use in this release will printing a warning at runtime. +// Partially single valued: +// GEX_FLAG_TM_NO_SCRATCH +// This flag causes creation of a team without a scratch space. The +// 'scratch_addr' and 'scratch_size' arguments are ignored. This flag +// is intended for use when creating teams which will not perform any +// significant collectives, and its use otherwise will most likely +// degrade the performance of collectives. +// Presence/absence of this flag must be single-valued over the members +// of each new team to be created (non-NULL 'new_tm_p' and same 'color'). // Non-single valued: // None currently defined // @@ -798,6 +861,245 @@ size_t gex_TM_Split(gex_TM_t *new_tm_p, gex_TM_t parent_tm, int color, int key, void *scratch_addr, size_t scratch_size, gex_Flags_t flags); +// Create zero or more new disjoint Teams +// [Since spec v0.9] +// +// This is a collective call which provides the means to construct one or more +// teams per call (at most one per caller) with greater generality than the +// gex_TM_Split(), including the ability to incorporate endpoints not yet in any +// team. +// +// While this call is collective, the arguments are NOT required to be +// single-valued over the parent team, except as noted for certain bits in +// 'flags'. However, the value of some arguments must be collective over +// callers which comprise the same "output team". +// +// + Collective over parent_tm, which must contain at least one member for every +// process named in the args[] of any caller. +// + When flags contains GEX_FLAG_TM_SCRATCH_SIZE_RECOMMENDED (presence of which +// must be single-valued over the parent team), this API behaves analogously +// to that documented for gex_TM_Split(): returning the recommended size for +// the collective scratch space of the team which would otherwise be created +// for this caller based on the arguments num_new_tms, numargs and args[], and +// ignoring the arguments new_tms, scratch_length and scratch_addrs. +// Similarly, passing the (deprecated) GEX_FLAG_TM_SCRATCH_SIZE_MIN returns +// the minimum scratch size. +// In the absence of these flags, the remaining semantics apply. +// + Creates either zero (for numargs == 0) teams or one team (for numargs > 0) +// per caller. +// + When passing numargs == 0, the caller must provide a value for flags which +// is consistent with any "single-valued over the parent team" constraints. +// However, all arguments other than parent_tm, numargs and flags are ignored +// (and subsequent semantics constraining the ignored arguments do not apply). +// + The args[] must contain numargs > 0 distinct elements naming every endpoint +// to become a member of the team the caller is creating, in rank order. +// + The gex_rank field of args[] specifies a process by jobrank if +// GEX_FLAG_RANK_IS_JOBRANK is present in flags, otherwise the gex_rank field +// is a rank relative to parent_tm and the process is the one associated with +// that team member. +// + The presence/absence of GEX_FLAG_RANK_IS_JOBRANK in flags must be +// single-valued over the output team. +// + The value of numargs and content of args[] must be single-valued over the +// output team. +// + Taken over all callers, any two non-empty args[] arrays must either be +// identical (constructing the same team) or name a disjoint set of endpoints +// (creating a distinct, non-overlapping team). A numargs == 0 caller is +// always disjoint. +// + The immediately preceding restriction applies not only to callers in +// distinct processes, but also to the case of multiple callers per process +// (due to multiple members in parent_team). +// + The value of numargs and content of args[] are not required to be +// single-valued over parent_tm, allowing for creation of multiple teams per +// collective call (but at most one per caller). +// + The endpoint corresponding to parent_tm is not required to be among the +// entries in args[]. +// + The value of num_new_tms must equal the number of local endpoints named in +// args[], and the location named by new_tms[] must have sufficient space to +// receive num_new_tms entries. +// + On output, the array new_tms[] will be populated with a distinct gex_TM_t +// for each local member in the newly created team, in their respective rank +// order. No entries will be populated or skipped/reserved for non-local +// members. +// + Each new team is created with a collective scratch space, which may be +// optionally provided from the bound segment of the corresponding endpoint +// via the scratch_length and scratch_addrs arguments. +// + As with gex_TM_Split(), this "option" is actually required in the current +// implementation. +// + The argument scratch_length must be single-valued over the output team. +// + If GEX_FLAG_SCRATCH_SEG_OFFSET is set in flags, then the value(s) in +// scratch_addrs[] are byte offsets into the respective bound segments of the +// endpoints being joined into the new team. Otherwise, these values are +// virtual addresses in those same bound segments. +// + The presence/absence of GEX_FLAG_SCRATCH_SEG_OFFSET in flags must be +// single-valued over the output team. +// + The length and contents of scratch_addrs[] depends on which of the +// following mutually-exclusive values are included in the value of flags +// (there is currently no default). +// - GEX_FLAG_TM_SYMMETRIC_SCRATCH +// There is exactly one entry in scratch_addrs[] and it provides the address +// or offset used for all members of the output team. +// - GEX_FLAG_TM_LOCAL_SCRATCH +// The array scratch_offsets[] has length num_new_tms and provides the +// addresses or offsets for each local member in the output team. +// - GEX_FLAG_TM_GLOBAL_SCRATCH +// The array scratch_offsets[] has length num_args and provides the +// addresses or offsets for every member in the output team. +// - GEX_FLAG_TM_NO_SCRATCH +// The arguments scratch_length and scratch_offsets[] are ignored. +// No scratch space is assigned and collectives over this team are prohibited +// (this prohibition may be relaxed in the future). +// + Scratch space, if any, must always reside in a bound segment with kind +// GEX_MK_HOST. Consequently, calls to this team constructor that include +// endpoints bound to segments with other memory kinds (such as devices) +// currently MUST pass GEX_FLAG_TM_NO_SCRATCH. +// This restriction might be relaxed in the future. +// + The mutually exclusive choice of +// GEX_FLAG_TM_{SYMMETRIC,LOCAL,GLOBAL,NO}_SCRATCH in flags must be +// single-valued over the output team. +// + This call is guaranteed to provide sufficient synchronization that the +// caller may begin using the new handles in new_tms[] immediately following +// return. The implementation is permitted but not required to include +// barrier synchronization, which may or may not be necessary to provide this +// guarantee. +// +// NOTE: The current implementation only supports creation of teams composed +// entirely of primordial endpoints, even with conduits which support creation +// of additional endpoints. This limitation will be removed in a later release. +size_t gex_TM_Create( + gex_TM_t *new_tms, // OUT + size_t num_new_tms, // Length of new_tms + gex_TM_t parent_tm, + gex_EP_Location_t *args, // IN + size_t numargs, // single-valued over output team + gex_Addr_t *scratch_addrs, // IN + size_t scratch_size // single-valued over output team + gex_Flags_t flags); // Flags (partially single-valued) + +// Destroy a (quiesced) team +// [Since spec v0.10] +// +// This is a collective call to destroy a team which is no longer needed and +// reclaim associated resources. +// +// + This call is collective over members of the team named by tm. +// + Destroys the team, releasing resources allocated to it by the +// implementation. +// + It is erroneous to destroy the primordial team. +// + Use of tm after return from this call is erroneous. +// + Does not destroy the endpoint associated with tm. +// + For the purpose of this API, a tm has been "locally quiesced" only when +// all of the following are true with respect to calls initiated on the local +// process: +// - No calls taking this tm as an argument are executing concurrently on +// other threads. +// - All collective operations using this tm are complete (client has synced +// their gex_Event_t's). +// - Any gex_AD_t objects created using this tm have been destroyed. +// + The identifier GEX_FLAG_GLOBALLY_QUIESCED is a preprocessor macro +// expanding to a constant integer expression suitable for use as a value of +// type gex_Flags_t. +// + By default, the tm must be locally quiesced on *each* caller before it may +// invoke this API. However, if GEX_FLAG_GLOBALLY_QUIESCED is passed in +// flags, then the caller is additionally asserting that the tm has been +// quiesced on *all* callers (globally) prior to any caller invoking this API. +// + The presence/absence of GEX_FLAG_GLOBALLY_QUIESCED in flags must be +// single-valued. +// + Regardless of the presence/absence of GEX_FLAG_GLOBALLY_QUIESCED in flags, +// this call is permitted, but not required, to incur barrier synchronization +// across tm. +// + The scratch_p argument may be NULL. If non-NULL then if-and-only-if the +// collective scratch space used by the team was provided by the client, then +// its location is written to the location named by the scratch_p argument. +// + If a value is written to *scratch_p then return value is non-zero. +// Otherwise, zero is returned. +// + [UNIMPLEMENTED] If GEX_FLAG_SCRATCH_SEG_OFFSET is set in flags, then the +// value (if any) written to the gex_addr field of *scratch_p is assigned the +// byte offset into the bound segment of the endpoint associated with tm. +// Otherwise, the value (if any) assigned to this field is a virtual address. +// + The presence/absence of GEX_FLAG_SCRATCH_SEG_OFFSET in flags need not be +// single-valued, and need not match the value used at team construction. +// + Any cleanup action with respect to ClientData associated with the tm is +// the client's responsibility. +// +// The specification of GEX_FLAG_GLOBALLY_QUIESCED is intended to make the +// synchronization optional in order to remove unnecessary barriers. For +// instance given a scenario in which a client has a "row team" and a "column +// team" with a common parent, it would be sufficient to locally quiesce both +// teams, followed by a barrier over their common parent, followed by making +// back-to-back calls to destroy these row and column teams with this flag. +// +// The definition of "locally quiesced" intentionally excludes completion of +// non-blocking point-to-point operations using tm at their initiation. This +// is possible because the semantics of such operations depend on the endpoints +// involved, and not on the tm used to name them. +// +// The optional scratch_p argument is intended to assist the client in +// reclaiming use of the space it may have granted to the collectives +// implementation when the team was created, without creating a requirement +// for the client to track something GASNet-EX already tracks. +int gex_TM_Destroy( + gex_TM_t tm, + gex_Memvec_t *scratch_p, // OUT + gex_Flags_t flags); + + +// Create an "ad hoc" TM for point-to-point communication +// [Since spec v0.12] +// +// This API provides the means to locally construct a value which can be passed +// as the tm argument to point-to-point communication calls in lieu of a +// collectively created team, allowing communication between endpoints which +// might not be members of any common team (or of any team at all). +// +// With the exception of AM Replies, all GASNet-EX point-to-point +// communications APIs name both the local and remote endpoints using a pair of +// arguments of type gex_TM_t and gex_Rank_t. However, a gex_TM_t +// corresponding to a team has associated semantics that are not well-suited to +// inclusion of endpoints which lack corresponding host CPU threads to perform +// collective calls. This API allows for communication to/from the memory in +// segments bound to any endpoint in the job without the need include it in +// a team. +// +// + This is not a collective operation. +// + Returns a value of type gex_TM_t representing an ad hoc "TM-pair" +// consisting of the given local_ep in the calling process and the endpoint +// with index remote_ep_index in the process with a jobrank given by the rank +// argument passed along with this gex_TM_t in a point-to-point communication +// call. +// + gex_TM_Pair is a lightweight, non-communicating utility call. +// + The result is a TM-pair value which may be stored, reused or discarded, +// and has no corresponding free or release call (although it only remains +// valid for use while the referenced endpoints exist). +// + Two TM-pair values will compare equal if and only if they were created by +// calls to gex_TM_Pair() with the same arguments, and will never compare +// equal to a gex_TM_t created by other means. +// + The result is not a valid argument to any API with a prefix of gex_TM_, +// gex_AD_ or gex_Coll_, nor to any API documented as collective over the +// argument (regardless of prefix). +// + The result is valid for use in AM payload limit queries: +// gex_AM_Max{Request,Reply}{Medium,Long}() +// + The result is valid for use in bound segment queries: +// gex_Segment_QueryBound() [DEPRECATED] and gex_EP_QueryBoundSegmentNB() +// + The result is valid for use in point-to-point communication calls in the +// gex_RMA_*(), gex_VIS_*() and gex_AM_*() families when used in a manner +// similar to what is shown in examples below. +// +// Example 1. +// A call to gex_RMA_GetNBI() to read from the endpoint with index rem_idx on +// the process with the given jobrank, and initiated using the local endpoint +// loc_ep: +// gex_RMA_GetNBI(gex_TM_pair(loc_ep, rem_idx), dest, jobrank, src, nbytes, flags); +// +// Example 2. +// Communicating between a local endpoint ep0 and the remote endpoints with +// index 1 in several processes, using a single TM-Pair: +// gex_TM_t tm_pair_01 = gex_TM_pair(ep0, 1); +// for (int i = 0; i < num_peers; ++i) +// gex_RMA_GetNBI(tm_pair_01, dest[i], jobrank[i], src[i], nbytes, flags); +gex_TM_t gex_TM_Pair( + gex_EP_t local_ep, + gex_EP_Index_t remote_ep_index); + // Translations between (tm,rank) and jobrank // // These functions provide translations in either direction between a @@ -820,7 +1122,6 @@ gex_Rank_t gex_TM_TranslateRankToJobrank(gex_TM_t tm, gex_Rank_t rank); gex_Rank_t gex_TM_TranslateJobrankToRank(gex_TM_t tm, gex_Rank_t jobrank); // Translation from (tm,rank) to gex_EP_Location_t -// [PROPOSED] // // This function provides translation from a (tm,rank) pair to a // gex_EP_Location_t, which is a (jobrank,epidx) pair. @@ -859,14 +1160,199 @@ gex_Flags_t gex_EP_QueryFlags(gex_EP_t ep); gex_Segment_t gex_EP_QuerySegment(gex_EP_t ep); // Query the endpoint index -// [PROPOSED] gex_EP_Index_t gex_EP_QueryIndex(gex_EP_t ep); -// Create an endpoint [UNIMPLEMENTED] -extern int gex_EP_Create( - gex_EP_t *ep_p, - gex_Client_t client, - gex_Flags_t flags); +// Query addresses and length of a (possibly remote) bound segment +// [Since spec v0.13] +// +// This query takes a gex_TM_t and gex_Rank_t, which together name an endpoint. +// Other than flags, the remaining arguments are pointers to locations for +// outputs, each of which may be NULL if the caller does not need a particular +// value. +// +// If the value of flags does NOT include GEX_FLAG_IMMEDIATE, then this API +// behaves as follows: +// + The return value is a root event which can be successfully synchronized +// (return from gex_Event_Wait*() or zero return from gex_Event_Test*()) +// once the query results have been written to the output locations. +// It is permitted to be GEX_EVENT_INVALID (but not GEX_EVENT_NO_OP). +// + Between entering this call and synchronizing the event it returns, the +// content of the output locations is undefined. +// + A "successful" query is one in which the endpoint named by (tm, rank) has a +// bound segment *and* one or more of the following are true: +// + The endpoint resides in the calling process +// + The endpoint has a segment that was bound via gex_Segment_Attach() +// + The endpoint had the bound segment at the time it was the subject of a +// preceding call to gex_EP_PublishBoundSegment() in which the calling +// process was a participant. +// + A successful query writes the corresponding segment's properties to each of +// the non-NULL output locations as described in "Segment properties and output +// locations", below. +// + If the endpoint named by (tm, rank) does not satisfy the above conditions +// for a successful query, then the query may be "unsuccessful", whereby the +// size_p output (unless NULL) will receive the value 0 and the remaining +// outputs are undefined. The implementation is thus permitted, but not +// required, to be successful for a non-primordial bound segment which has not +// yet been published to the calling process. +// + Since a segment cannot have zero-length, a caller can reliably distinguish +// between a successful or unsuccessful query via the size_p output. +// + The current definition of "unsuccessful" notably includes the case of a +// remote endpoint with a bound segment which has not been published to the +// calling process. However, the behavior for this case is subject to +// possible change in a future release. +// +// In the case that flags DOES include GEX_FLAG_IMMEDIATE, then this API +// behaves as follows: +// + If the query can be resolved without communication, then the return value +// is GEX_EVENT_INVALID, with the behavior otherwise identical to the case +// without GEX_FLAG_IMMEDIATE. +// + Queries which would require communication to resolve will return +// GEX_EVENT_NO_OP. +// + All queries for which (tm, rank) names an endpoint which resides in the +// calling process are guaranteed to return GEX_EVENT_INVALID. +// + Queries for which (tm, rank) names an endpoint which does not reside in the +// calling process may return either GEX_EVENT_INVALID or GEX_EVENT_NO_OP and +// the same query is not guaranteed to return the same value each time. +// This permits an implementation to cache information for remote endpoints. +// +// Segment properties and output locations: +// owneraddr_p: receives the address of the segment in the address space +// of the process which owns the segment. +// For segments of kind GEX_MK_HOST, this is a host address +// while for all other kinds this is a device address. In +// either case it is the address which would be returned by +// gex_Segment_QueryAddr() immediately after segment creation +// (via either gex_Segment_Attach() or gex_Segment_Create()). +// localaddr_p: receives the address of the segment in the address space +// of the calling process, *if* mapped, and NULL otherwise. +// size_p: receives the length of the segment. +// +// Only segments of kind GEX_MK_HOST may report a non-NULL localaddr property, +// and all other kinds will yield NULL. The current release additionally +// limits the reporting of non-NULL values to primordial segments (those +// created by gex_Segment_Attach()). +// +// Passing GEX_RANK_INVALID as the rank argument is *not* permitted. +// Use of a TM-pair for the 'tm' argument *is* permitted. +// Passing a '(tm,rank)' tuple naming an endpoint residing on the calling +// process *is* permitted. +// +// When passing a '(tm,rank)' tuple naming an endpoint not residing on the +// calling process, this query MAY communicate unless GEX_FLAG_IMMEDIATE is +// included in flags. +// If and only if GEX_FLAG_IMMEDIATE is included in flags, then this call is +// permitted in contexts which prohibit communication (such as AM Handler +// context or when holding an HSL). +extern gex_Event_t gex_EP_QueryBoundSegmentNB( + gex_TM_t tm, + gex_Rank_t rank, + void **owneraddr_p, + void **localaddr_p, + uintptr_t *size_p, + gex_Flags_t flags); + +// Query addresses and length of a (possibly remote) bound segment +// [DEPRECATED since spec v0.13 - see gex_EP_QueryBoundSegmentNB(), above] +// +// This query provides semantics similar to +// gex_Event_Wait( gex_EP_QueryBoundSegmentNB([...args...], 0) ) +// where "[...args...]" represent the five arguments to this query. +// +// The semantic differences are as follows: +// + Success/failure +// - This call returns zero for a "successful" query, defined as one in which +// (tm, rank) names an endpoint with a bound segment (and, if remote, that +// segment is primordial or has been published to the caller). Otherwise, +// a non-zero value is returned. +// - An successful query with gex_EP_QueryBoundSegmentNB() is distinguishable +// by a non-zero size output, while an unsuccessful query will write zero +// to the size output. +// + Preservation of outputs on failure +// - This call guarantees that an unsuccessful query leaves the outputs +// unmodified. +// - An unsuccessful query with gex_EP_QueryBoundSegmentNB() writes zero to +// the size output and leaves the others undefined. +// +// This call is not legal in contexts which prohibit communication, including +// (but not limited to) AM Handler context or when holding an HSL. +int gex_Segment_QueryBound( + gex_TM_t tm, + gex_Rank_t rank, + void **owneraddr_p, + void **localaddr_p, + uintptr_t *size_p); + +// Publish of EP's Bound Segment "RMA Credentials" +// +// Description: +// Some conduits require "credentials" to initiate communication targeting +// the bound segment of a remote endpoint. This call performs any +// communication and setup necessary to ensure that after successful return +// the local process may safely initiate such communication with any +// endpoint named in this call which had a bound segment at the time of +// this call. +// +// Semantics: +// + On success, returns GASNET_OK. +// + Non-fatal failures return a documented error code. +// + Lack of sufficient resources to satisfy the given request will yield a +// return of GASNET_ERR_RESOURCE. +// + This call is collective over tm, which identifies a team used for +// underlying communication. +// + The eps argument is an array of length num_eps (possibly zero) of valid +// endpoints. +// + The num_eps argument may vary by caller (it is not required to be +// single-valued). +// + This call publishes the bound segments, if any, of the endpoints named +// by the eps argument. +// + The endpoint associated with tm is not implicitly Published, but it may +// be explicitly included in eps if Publication is desired. +// + The concatenation of eps arrays must name distinct endpoints. +// Duplication is prohibited both within a given eps array, and across eps +// arrays passed by multiple tm (from the same team) within a given +// process. This restriction may be relaxed in a future release. +// + Upon successful return, the local process may safely initiate +// communication targeting the bound segment of any endpoint named by the +// eps arguments which had a bound segment prior to the corresponding entry +// to this collective call. +// + It is permitted for eps to contain endpoints without a bound segment, in +// which case no credential will be published for such endpoints. +// + It is permitted for the same endpoint to be the subject of multiple +// successive Publish operations and any bound segment will replace a prior +// Publish in which an endpoint had no bound segment. +// + The allowance for multiple Publish operations includes the one implicit +// in gex_Segment_Attach(). +// + The endpoints named by eps must be idle for the duration of this operation. +// - No communication operations may be in-flight on any named endpoint +// when this operation starts. +// - No communication operations may be initiated on any named endpoint +// concurrent with this operation. +// - No AM Request may target any named endpoint for the duration of this +// operation. +// - As an exception to the restrictions above, inclusion of the endpoint +// associated with tm in eps is explicitly permitted. +// - A named endpoint may not be the subject of concurrent segment +// operations including (but not limited to) gex_Segment_QueryBound, +// gex_EP_BindSegment, gex_EP_PublishBoundSegment, and +// gex_EP_QueryBoundSegmentNB. +// + The publication of credentials is per local process and remote endpoint, +// independent of the specific team used to perform this operation. This +// means that upon return, initiation of communication is permitted using +// any (tm_x, rank) pair from a participating process naming a participating +// remote endpoint, including initiation using a gex_TM_t created using +// gex_TM_Pair(). Additionally, this persists beyond destruction of the +// team used to Publish. +// + The flags argument is reserved for future use and must currently be +// zero. +// + This call is permitted but not required to incur barrier synchronization +// across the team. + +extern int gex_EP_PublishBoundSegment( + gex_TM_t tm, + gex_EP_t *eps, // IN + size_t num_eps, + gex_Flags_t flags); + // Minimum permitted fixed index for AM handler registration. // Applies to both gasnet_attach() and gex_EP_RegisterHandlers(). @@ -892,7 +1378,7 @@ extern int gex_EP_Create( // // The gex_nargs and gex_flags fields are used by the client to supply the implementation // with assertions regarding the future invocations and behavior of each AM handler. -// If a handler invocation (eg via an AM injection targetting a given handler) or +// If a handler invocation (eg via an AM injection targeting a given handler) or // execution of an AM handler violates its registration assertions, behavior is undefined. typedef struct { gex_AM_Index_t gex_index; // 0 or in [GEX_AM_INDEX_BASE .. 255] @@ -947,7 +1433,11 @@ typedef struct { // unspecified (only promising that it is deterministic) this specification // guarantees that entries with gex_index==0 are processed in the same order // they appear in 'table' and are assigned the highest-numbered index which is -// then still unallocated (where 255 is the highest possible). +// then still unallocated (where 255 is the highest possible). However, in +// the case of concurrent calls to gex_EP_RegisterHandlers() and/or +// gasnet_attach() on the same endpoint with gex_index==0, the order in which +// such entries are processed is unspecified and may be non-deterministic. +// // Updating of gex_index fields that were passed as 0 upon input is the only // modification this function will perform upon the contents of 'table' // (whose elements are otherwise treated as if const-qualified by this call). @@ -979,41 +1469,66 @@ unsigned int gex_AM_MaxArgs(void); // + In the absence of the GEX_FLAG_AM_PREPARE_LEAST_{CLIENT,ALLOC} flags, // these queries return the maximum legal 'nbytes' argument value for the // corresponding gex_AM_{Request,Reply}{Medium,Long}*() call (collectively -// known as "fixed-payload AM" injection calls). +// known as "fixed-payload AM" injection calls) using the named local and +// remote endpoint and the same 'lc_opt', 'numargs' and 'flags' arguments. // + When passed either of the GEX_FLAG_AM_PREPARE_LEAST_{CLIENT,ALLOC} flags, // these queries return the maximum legal 'least_payload' argument value for // the corresponding gex_AM_Prepare{Request,Reply}{Medium,Long}() call -// (collectively known as "negotiated-payload AM" prepare calls). -// -// These are max payload queries for specific peer, numargs, lc_opt and flags, -// except that (other_rank == GEX_RANK_INVALID) does not ask about a specific -// rank, it the yields min-of-maxes over all team members. -// -// 1. The result of each query function is guaranteed to be symmetric - ie if -// two team members execute a given query on each other's ranks, with all -// other input arguments being equal, the queries are guaranteed to return +// (collectively known as "negotiated-payload AM" prepare calls) using the +// named local and remote endpoint and the same 'lc_opt', numargs' and 'flags' +// arguments. +// +// 1. If 'tm' names a local endpoint which is not AM-capable, then the call +// is erroneous. Here "AM-capable endpoint" is defined as any primordial +// endpoint or a non-primordial endpoint which was created with +// GEX_EP_CAPABILITY_AM. +// 2. When (other_rank != GEX_RANK_INVALID) +// a. The result of each query is a function of the 'numargs', 'lc_opt' and +// 'flags' arguments, and the two endpoints (one local and one remote) +// named by the tuple consisting of the 'tm' and 'other_rank' arguments. +// b. The result is independent of *how* the endpoints are named, such as by +// distinct 'tm' values with overlapping membership or use of a TM-pair. +// c. If the remote endpoint named by the '(tm,other_rank)' tuple is not +// AM-capable or does not exist (only possible with a TM-pair), then the +// call is erroneous. +// 3. When (other_rank == GEX_RANK_INVALID) +// a. The result of each query is a min-of-maxes over all AM-capable remote +// endpoints that are addressable with the given 'tm' when 'other_rank' +// is varied over its valid range, with the given 'numargs', 'lc_opt' and +// 'flags' arguments. +// b. This valid range excludes any endpoints which are not AM-capable. +// c. In the case that 'tm' is a TM-pair, the valid range also excludes +// jobranks which do not have an endpoint at the associated remote +// endpoint index. +// d. If valid range defined above is empty (no AM-capable endpoints are +// addressable), then the call is erroneous. +// 4. The result of each query function is guaranteed to be symmetric with +// respect to exchanging the local and remote endpoints. Two calls, by +// appropriate processes, that reverse the local and remote endpoint roles +// while keeping all other input arguments equal, are guaranteed to return // the same value. Note this does NOT imply any relationship between the // results of different query functions (eg MaxRequestMedium versus // MaxReplyMedium). -// 2. If other_rank == GEX_RANK_INVALID, then all team members are guaranteed -// to get the same result given the same values of the other input -// arguments. Otherwise, other_rank specifies the other team member -// involved in the operation being queried (where the rank making the query -// is implicitly involved). Note that due to the symmetry described above, -// 'other_rank' can (and should) always name the other party in the -// communication, regardless of whether that rank or the caller is to be -// the sender or the receiver. -// 3. 'numargs' must be between 0 and gex_AM_MaxArgs(), inclusive. It is +// 5. When (other_rank == GEX_RANK_INVALID) all callers providing a 'tm' naming +// the same set of participating endpoints are guaranteed to get the same +// result when given the same values for the other input arguments. Due to +// the symmetry noted above, this includes two calls using TM-pairs to +// identify the same two endpoints. +// 6. Due to the symmetry properties described above, 'other_rank' can (and +// therefore should) always name the other party in the communication, +// regardless of whether that rank or the caller is to be the sender or the +// receiver. +// 7. 'numargs' must be between 0 and gex_AM_MaxArgs(), inclusive. It is // guaranteed that increasing 'numargs' will produce monotonically non- // increasing results when all other parameters are held fixed. -// 4. 'lc_opt' indicates the payload local completion option to be used for +// 8. 'lc_opt' indicates the payload local completion option to be used for // the AM injection or prepare call in question. The predefined constants // GEX_EVENT_NOW and GEX_EVENT_GROUP should be used directly, while a // pointer to any variable of type gex_Event_t (or a NULL pointer) may be // used interchangeably to indicate that the injection or prepare call // passes any such value (without requiring that the same pointer value be // passed). -// 5. 'flags' indicates the flags that will be provided to the corresponding +// 9. 'flags' indicates the flags that will be provided to the corresponding // AM injection or prepare function (and should not to be confused with the // handler registration flags). The result of the query is only guaranteed // to be correct for an injection of prepare call with exactly the same @@ -1022,15 +1537,17 @@ unsigned int gex_AM_MaxArgs(void); // // The result of all four query functions is guaranteed to be at least 512 (bytes). // -// The result is guaranteed to be stable - ie for the same set of input arguments, -// it will always return the same value. +// The result is guaranteed to be stable throughout a given job execution - ie +// for the same set of input arguments, it will always return the same value. // -// Aside from the explicit guarantees above, the result may otherwise vary -// with the input arguments in unspecified ways, and thus only defines the -// documented limit for an call with corresponding values of (tm, other_rank, -// lc_opt, flags and numargs). For example, limits may vary between different -// pairs of ranks on the same team, or even between the same pair of processes -// linked via different team or client. +// Aside from the explicit guarantees above, the result may otherwise vary with +// the input arguments in unspecified ways, and thus only defines the documented +// limit for an call with corresponding local and remote endpoints and values of +// lc_opt, flags and numargs. For example, limits often vary between different +// conduits and may also vary based on job layout, between pairs of ranks in +// the same team, or between different pair of endpoints linking the same two +// processes. + size_t gex_AM_MaxRequestLong( gex_TM_t tm, gex_Rank_t other_rank, @@ -1059,11 +1576,9 @@ size_t gex_AM_MaxReplyMedium( // Token-specific max fixed-payload queries for specific nargs, lc_opt and flags // // Semantics are identical to the may payload queries above, except that -// a gex_Token_t replaces the (tm,rank) pair. The token represents a (tm, -// rank) pair, where tm is the local representative of the gex_TM_t used to -// initiate the AM which resulted in the execution of the handler which -// received that token, and rank denotes the rank in that team that initiated -// the same AM. In particular, this implies the queries return the limits +// a gex_Token_t replaces the (tm,rank) tuple. The token names the local +// endpoint on which the AM has been received and the remote endpoint which +// sent it. In particular, this implies the queries return the limits // governing the AM Reply operations that can be performed using this token. // // These are only permitted in Request handlers. @@ -1284,7 +1799,7 @@ int gex_AM_ReplyShort[M]( [,arg0, ... ,argM-1]); // -// Negotiated-payload AM APIs +// Negotiated-payload AM APIs (aka "NPAM") // // The fixed-payload APIs for Active Message Mediums and Longs (brought @@ -1320,11 +1835,8 @@ int gex_AM_ReplyShort[M]( // It is important to note that passing NULL for the client_buf argument to a // Prepare call requires GASNet to allocate buffer space of size no smaller // than least_payload. Use of gex_AM_Max{Request,Reply}{Medium,Long}() with -// the GEX_FLAG_AM_PREPARE_LEAST_ALLOC flag is intended to enforce limits on -// the space GASNet is required to allocate. However, the current release does -// not yet impose limits tighter than the corresponding fixed-payload queries. -// Therefore, some extra care should be taken to keep such allocation demands -// reasonable. +// the GEX_FLAG_AM_PREPARE_LEAST_ALLOC flag gives the limits on the space GASNet +// is required to allocate. Larger values of least_payload are erroneous. // // Between the Prepare and the Commit calls the client is responsible for // assembling its payload (or the prefix of the given length) at the selected @@ -1377,6 +1889,26 @@ void *gex_AM_SrcDescAddr(gex_AM_SrcDesc_t sd); // to the Prepare call (inclusive). size_t gex_AM_SrcDescSize(gex_AM_SrcDesc_t sd); +// Native implementation indicators for negotiated-payload active messages +// GASNET_NATIVE_NP_ALLOC_{REQ,REP}_{MEDIUM,LONG} symbols are defined to 1 or +// undefined to indicate whether (or not, respectively) the implementation +// of negotiated-payload AM Request/Reply Medium/Long (with a GASNet-allocated +// source buffer, i.e., initiated with client_buf == NULL) for the network +// transport of the current conduit are "native". This is a performance hint +// to clients, and does not affect correctness or normative behavior. +// The native designation implies that AM injection using these calls can avoid +// one or more payload copies relative to the corresponding fixed-payload AM +// call under the right conditions (which may be implementation dependent). +// Note that in configurations providing GASNet shared-memory bypass for AM +// to intra-nbrhd peers (activated by --enable-pshm, enabled by default), +// these only denote the behavior of the network transport (AM to peers outside +// the caller's nbrhd). The shared-memory transport for all conduits always +// provides native behavior for Medium requests and replies. +#define GASNET_NATIVE_NP_ALLOC_REQ_MEDIUM 1 or undefined +#define GASNET_NATIVE_NP_ALLOC_REP_MEDIUM 1 or undefined +#define GASNET_NATIVE_NP_ALLOC_REQ_LONG 1 or undefined +#define GASNET_NATIVE_NP_ALLOC_REP_LONG 1 or undefined + // // gex_AM_Prepare calls // @@ -1588,6 +2120,54 @@ extern void gex_AM_CommitReplyLong[M]( // to a variable of type 'gex_Event_t' allows the call to return without // delay, and allows the client to check local completion using // gex_Event_{Test,Wait}*(). +// +// NOTE 3: Local addressing +// +// Let "the local endpoint" refer to the endpoint associated with 'tm'. +// +// Let "device segment" denote a segment created using gex_Segment_Create() +// with a 'kind' argument other than GEX_MK_HOST. +// +// Let "in the local bound segment" mean that a given range of addresses +// lies entirely within the range of the segment as might be determined by +// applying gex_Segment_QueryAddr() and gex_Segment_QuerySize() to the +// segment bound to the local endpoint. +// +// The local address (src of a Put, dest of a Get) is interpreted and +// constrained as follows: +// + [UNIMPLEMENTED] In the presence of GEX_FLAG_SELF_SEG_OFFSET in 'flags' +// the address argument is interpreted as an unsigned offset in bytes from +// the start address of the local endpoint's (required) bound segment. +// The memory so named must be in the local bound segment. +// + In the absence of GEX_FLAG_SELF_SEG_OFFSET in 'flags': +// - If the local endpoint has a bound device segment, then the address is +// a device address and the memory so named must be in the local bound +// segment. +// - Otherwise the address is a host address, and the named memory is not +// constrained to lie within the local bound segment (if any). +// +// NOTE 4: Remote addressing +// +// Let "the remote endpoint" refer to the endpoint named by '(tm,rank)'. +// +// Let "device segment" denote a segment created using gex_Segment_Create() +// with a 'kind' argument other than GEX_MK_HOST. +// +// Let "in the remote bound segment" mean that a given range of addresses +// lies entirely within the range of the segment as might be determined from +// the owneraddr and size properties obtained using gex_Segment_QueryBound() +// or gex_EP_QueryBoundSegmentNB() applied to the '(tm,rank)' tuple. +// +// The remote address (dest of a Put, src of a Get) is interpreted as +// follows: +// + [UNIMPLEMENTED] In the presence of GEX_FLAG_PEER_SEG_OFFSET in 'flags' +// the address argument is interpreted as an unsigned offset in bytes from +// the start address of the remote endpoint's bound segment. +// + In the absence of GEX_FLAG_PEER_SEG_OFFSET in 'flags': +// - If the bound segment of the remote endpoint is a device segment, then +// the address is a device address. +// - Otherwise the address is a host address. +// In all cases, the remote memory must be in the remote bound segment. // Put int gex_RMA_PutBlocking( @@ -3122,39 +3702,108 @@ gex_Event_t gex_Coll_ReduceToAllNB( //---------------------------------------------------------------------- // [PROPOSED] -// The remainder of this document is for APIs described in more detail in -// GASNet-EX API Proposal: Multi-EP, Revision 2020.6.1 -// Only prototypes and supported flags are provided here. +// This section of this document covers APIs described in more detail in +// GASNet-EX API Proposal: Memory Kinds, Revision 2020.11.0 // +// For functions, only prototypes and supported flags are provided here. +// For function semantics, one should consult the document cited above. // Copies of the most recent revision of that document are available on // request from gasnet-staff@lbl.gov. -size_t gex_TM_Create( - gex_TM_t *new_tms, // OUT - size_t num_new_tms, // Length of new_tms - gex_TM_t parent_tm, - gex_EP_Location_t *args, // IN - size_t numargs, // single-valued over output team - gex_Addr_t *scratch_addrs, // IN - size_t scratch_size // single-valued over output team - gex_Flags_t flags); // Flags (partially single-valued) +// NOTE: Changes relative to the 2020.6.1 Multi-EP API proposal include: +// + `gex_Segment_Create()` takes a `client` argument +// + the `gex_MemKind_t` type has been renamed to `gex_MK_t` +// + the `GEX_MEMKIND_HOST` constant has been renamed to `GEX_MK_HOST`. +int gex_Segment_Create( + gex_Segment_t *segment_p, // OUT + gex_Client_t client, + gex_Addr_t address, + uintptr_t length, + gex_MK_t kind, + gex_Flags_t flags); // Valid flags: - // GEX_FLAG_RANK_IS_JOBRANK - // GEX_FLAG_SCRATCH_SEG_OFFSET - // GEX_FLAG_TM_SYMMETRIC_SCRATCH \ - // GEX_FLAG_TM_LOCAL_SCRATCH | mutually-exclusive group - // GEX_FLAG_TM_GLOBAL_SCRATCH | - // GEX_FLAG_TM_NO_SCRATCH / - // GEX_FLAG_TM_SCRATCH_SIZE_MIN \ - // GEX_FLAG_TM_SCRATCH_SIZE_RECOMMENDED / mutually-exclusive pair - -int gasneti_TM_Destroy( - gex_TM_t tm, - gex_Memvec_t *scratch_p, - gex_Flags_t flags); + // NONE - zero is currently required + +// NOTE: Changes relative to the 2020.6.1 Multi-EP API proposal include: +// + this is a renamed and argument-permuted replacement for the API +// `gex_Segment_EP_Bind()` +void gex_EP_BindSegment( + gex_EP_t ep, + gex_Segment_t segment, + gex_Flags_t flags); // Valid flags: - // GEX_FLAG_GLOBALLY_QUIESCED + // NONE - zero is currently required + +typedef [some integer type] gex_EP_Capabilities_t; +#define GEX_EP_CAPABILITY_{RMA,AM,VIS,COLL,AD,ALL} ??? + +#define GEX_FLAG_HINT_ACCEL_{AD,COLL,ALL} ??? + +// NOTE: implemented only by ibv-conduit and GEX_EP_CAPABILITY_RMA +int gex_EP_Create( + gex_EP_t *ep_p, // OUT + gex_Client_t client, + gex_EP_Capabilities_t capabilities, + gex_Flags_t flags); + // Valid flags: + // NONE - zero is currently required + +// gex_MK_t is an opaque scalar handle to a Memory Kind (MK) +typedef ... gex_MK_t; + +// GEX_MK_INVALID is a predefined constant guaranteed to have the value +// zero and never alias a valid memory kind +#define GEX_MK_INVALID ((gex_MK_t)0) + +// GEX_MK_HOST is a predefined constant for the "kind" for host memory +#define GEX_MK_HOST ((gex_MK_t)???) + +// **** +// The APIs in the remainder of this section are provided by gasnet_mk.h +// **** + +// gex_MK_Class_t is enum naming available "classes" of memory kinds. +// It includes at least the following values (in unspecified order): +typedef enum { + GEX_MK_CLASS_HOST, // "normal" memory (eg GEX_MK_HOST) + GEX_MK_CLASS_CUDA_UVA, // CUDA UVA memory + ??? +} gex_MK_Class_t; + +// The gex_MK_Create_args_t struct is passed to gex_MK_Create to create a +// per-device instance of a memory kind of the given class, and includes +// at least the following members (in unspecified order): +typedef struct { + uint64_t gex_flags; // Reserved. Must be 0 currently. + gex_MK_Class_t gex_class; + union { + struct { + int gex_CUdevice; + } gex_class_cuda_uva; + } gex_args; +} gex_MK_Create_args_t; + +int gex_MK_Create( + gex_MK_t *memkind_p, // OUT + gex_Client_t client, + const gex_MK_Create_args_t *args, // IN + gex_Flags_t flags // Reserved. Must be 0 currently. + ); + // Valid flags: + // NONE - zero is currently required + +void gex_MK_Destroy( + gex_MK_t memkind, + gex_Flags_t flags // Reserved. Must be 0 currently. + ); + // Valid flags: + // NONE - zero is currently required + +// **** +// End of APIs provided by gasnet_mk.h +// **** +// End of "GASNet-EX API Proposal: Memory Kinds, Revision 2020.11.0" section //---------------------------------------------------------------------- // vim: syntax=c diff --git a/third-party/gasnet/gasnet-src/docs/memory_kinds.md b/third-party/gasnet/gasnet-src/docs/memory_kinds.md new file mode 100644 index 000000000000..b6c3915b9696 --- /dev/null +++ b/third-party/gasnet/gasnet-src/docs/memory_kinds.md @@ -0,0 +1,439 @@ +# Preface + +``` +NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE +NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE +NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE + + This file documents the "Memory Kinds" feature of GASNet-EX, intended + only for use by developers with a specific interest in this feature. + Other client developers should limit themselves to the interfaces and + behaviors given in docs/GASNet-EX.txt and the GASNet-1 specification. + + While it is intended that features and capabilities described here will + make their way into the GASNet-EX specification, the APIs in this file + and their implementation are only a prototype. All aspects of the APIs + and capabilities first introduced in this file are subject to + non-trivial changes before the prototype stage ends. + +NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE +NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE +NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE +``` + +# Introduction + +This document provides a detailed status of the Memory Kinds feature +implementation and is updated as that status changes. + +This document makes references to an external document, which is available on +request from gasnet-staff@lbl.gov: + + + GASNet-EX API Proposal: Memory Kinds, Revision 2020.11.0 + +For brevity, this will be referenced as simply "the API Proposal". + +# General Usage + +By default, the `configure` script does not enable support for +any non-host memory kinds. Use of new configure option `--enable-memory-kinds` +enables probes for the necessary headers and libraries for all available device +"kinds" (presently only "CUDA_UVA") and enables the prototype implementation of +memory kinds if such support is found. This is the recommended mechanism to +enable memory kinds support, since it will enable additional kinds as they are +added. For more detailed control for a given kind (such as "cuda-uva") the +following take precedence over `--(en|dis)able-memory-kinds`: + + + `--disable-kind-[name]` disables probing for support for the named kind. + + `--enable-kind-[name]` probes for support for the named kind, with failure + of the probe being a fatal `configure` error. + + `--enable-kind-[name]=probe` probes for support for the named kind, with + failure of the probe being non-fatal (the same behavior requested for all + kinds by using `--enable-memory-kinds`). + +On our main development platforms, the logic in `configure` is sufficient to +locate the required headers and libraries with no additional options. However, +the following options (and environment equivalents) are available to guide the +probe if needed: + + + `--with-cuda-home=...` or `CUDA_HOME` + + `--with-cuda-cflags=...` or `CUDA_CFLAGS` + + `--with-cuda-libs=...` or `CUDA_LIBS` + + `--with-cuda-ldflags=...` or `CUDA_LDFLAGS` + +Generally, it is sufficient to provide the installation prefix of the CUDA +toolkit using either `--with-cuda-home=...` or `CUDA_HOME`, since the others +all have sensible defaults once the installation prefix is known. + +Clients can use the preprocessor identifier `GASNET_HAVE_MK_CLASS_CUDA_UVA` +(defined to `1` or undefined) to determine if support for the CUDA_UVA memory +kind was detected at configure time. + +The preprocessor identifier `GASNET_HAVE_MK_CLASS_MULTIPLE` is more general, +providing the client with an indication if configure detected support for *any* +memory kinds other than host memory. + +# Supported Configurations + +All current memory kinds implementation work is limited to devices with the +CUDA Device API and Unified Virtual Addressing (UVA). This should include +all modern NVIDIA GPUs and CUDA Toolkit versions. + +Support is further limited to ibv-conduit on Linux and only when using Mellanox +InfiniBand hardware and drivers with support for "GPUDirect RDMA" (GDR). In +some cases additional optional software, such as "nvidia_peer_memory" must be +installed. Please consult Mellanox documentation for assistance determining +what driver software is needed for your specific hardware and Linux +distribution. The Open MPI and MVAPICH projects also have some documentation +regarding deployment of GPUDirect RDMA for their respective MPI implementations. + +Furthermore, only `GASNET_SEGMENT_FAST` segment mode is supported. This is the +default segment mode, but can be specified explicitly at configure time using +the `--enable-segment-fast` option. To be clear: `--enable-segment-large` and +`--enable-segment-everything` configurations of ibv-conduit do not support +the memory kinds work in the current implementation. + +To the best of our knowledge, Mellanox currently disclaims support for GPUDirect +RDMA on aarch64 (aka ARM64 or ARMv8) and NVIDIA does not support UVA on ILP32 +platforms. Therefore, this work currently supports only x86-64 and ppc64le. + +For any configurations that do not meet all of the configure-testable +requirements outlined above, the memory kinds support in the current prototype +implementation will be disabled (or `configure` will fail if it was passed +`--with-kind-cuda-uva`). Specifically, `GASNET_HAVE_MK_CLASS_CUDA_UVA` will be +undefined and attempts to create device segments will fail at runtime. Future +releases are expected to eventually include a "reference implementation" that +will allow creation of device segments on a wider range of platforms and +transparently stage transfers through host memory bounce buffers, but that is +not yet present. + +If support is enabled at configure time, then the implementation will attempt +to determine at runtime if GDR support is present or not. If it is not, then +`gex_MK_Create()` will fail with an appropriate message. + +## Limits on GPU segment size + +Modern NVIDIA GPUs with GPUDirect utilize a "Base Address Register" mechanism +to map the device memory into the PCIe address space. The amount of memory +which can be mapped by GASNet-EX as a GPU segment is limited by the `BAR1` +capability of your GPU, which can also be severely limited by the motherboard +and/or BIOS. + +To query the BAR1 capability of your GPU, run `nvidia-smi -q` and look for the +"Total" value in the "BAR1 Memory Usage" section for an (optimistic) maximum on +the amount of memory which can be mapped. This limit is per GPU across all +process and runtimes on a given node. Thus a portion is consumed by each +GASNet-EX segment created on a given node, as well as by other uses of +GPUDirect RDMA such as an MPI implementation. Typically a few tens of MB are +also reserved by the driver itself. + +## GDR and Multi-rail + +Though our test and development systems have multi-rail InfiniBand networks, +there are currently unresolved issues with respect to use of multiple rails +within a given process. Consequentially, support is currently limited to +single-rail configurations, which can be achieved ether at configure time by +using the option `--disable-ibv-multirail`, or at runtime by setting the +environment variable `GASNET_IBV_PORTS` to name a single valid port. Both +mechanisms are documented in `ibv-conduit/README`. + +For the most up-to-date information on this issue see +[bug 4148](https://gasnet-bugs.lbl.gov/bugzilla/show_bug.cgi?id=4148) + +Additionally, the BAR1 usage (described earlier in this document) has been +observed to be *per-HCA* and thus use of multiple rails may limit the size of +GPU segments. + +## Loopback + +The current implementation does not handle RMA operations between combinations +of host and GPU memory in the same processes (loopback), though this will be +supported in the future. + +It should be noted that this temporary limitation precludes use of GASNet for +transfers which could alternatively be performed using `cudaMemcpy()`, or +`cuMemcpy{DtoH,HtoD,DtoD}()` (possibly with some CUDA calls to enable +peer-to-peer access). Therefore, it is recommended practice (and will +*continue* to be so when this limitation is removed) that clients bypass +GASNet-EX to perform such transfers. + +For the most up-to-date information on this issue see +[bug 4149](https://gasnet-bugs.lbl.gov/bugzilla/show_bug.cgi?id=4149) + +## GDR and PSHM + +Currently the implementation is sufficient (when using supported hardware, +drivers and libraries) to perform RMA operations between combinations of host +and GPU memory in which the two involved endpoints are in distinct "nbrhds". + +There is a temporary limitation (in addition to the no-loopback limitation, +above) which prohibits intra-nbrhd RMA operations. In other words, there is no +support for RMA operations in which one or both endpoints has a GPU memory +segment and the two processes are in the same shared memory domain (aka "nbrhd" +in GASNet-EX documentation). + +Currently, RMA transfers involving GPU memory between processes in the same +compute node are supported only when PSHM is "inactive" (meaning either +`--disable-pshm` at configure time, or `GASNET_SUPERNODE_MAXSIZE=1` in ones +environment at runtime). + +For the most up-to-date information on this issue see +[bug 4148](https://gasnet-bugs.lbl.gov/bugzilla/show_bug.cgi?id=4148) + +## Premature local completion of GDR Puts from device memory + +In addition to the multi-path issues described above (under "GDR and +Multi-rail" and "GDR and PSHM" sub-headings), Put operations with their source +in device memory have been observed to signal local completion prior to actual +transfer of the data (as can be demonstrated by writing data to the source +after sync and observing it arrive in the destination buffer). + +This is believed to be an issue with how ibv and GDR interact, and we are +hopeful that a workaround can be implemented in a future release. + +For the most up-to-date information on this issue see +[bug 4150](https://gasnet-bugs.lbl.gov/bugzilla/show_bug.cgi?id=4150) + +## GDR and small Gets into device memory + +As documented +[here](https://github.com/linux-rdma/rdma-core/blob/master/providers/mlx5/man/mlx5dv_create_qp.3.md) +current versions of libibverbs for "mlx5" generation HCAs may default to +performing small RDMA Gets into an unused space in the work request structure +to minimize the number of PCI bus crossings required. Such Get operations are +eventually completed by a `memcpy()` to the original destination when the +completion queue entry is reaped. This `memcpy()` fails (with a `SIGSEGV`) +when the destination is device memory. + +As noted in Mellanox's documentation, setting `MLX5_SCATTER_TO_CQE=0` in the +environment disables this undesired behavior. We hope to be able to provide a +better solution (automatic and specific to device memory Gets) in a future +release. + +For the most up-to-date information on this issue see +[bug 4151](https://gasnet-bugs.lbl.gov/bugzilla/show_bug.cgi?id=4151) + +## CUDA Multi-Process Service (MPS) + +Testing conducted to date is insufficient to establish whether the current +implementation is compatible with the use of CUDA MPS. Reports (positive or +negative) regarding such compatibility are welcome. + +# Tested Configurations + +We have yet to establish the minimum required versions of hardware, drivers or +libraries. However, the following two platforms are our primary development +systems and represent "known good" combinations (modulo those caveats listed +elsewhere in this document, such as with regards to multi-rail): + +Summit: + + + ppc64le (IBM POWER9) + + CUDA 10.1.243 + + Mellanox ConnectX-5 HCAs + + NVIDIA Volta-class GPUs + +Dirac: + + + x86_64 (Intel Nahalem) + + CUDA 11.1 + + Mellanox ConnectX-5 HCAs + + NVIDIA Maxwell-class GPUs + +Eventual minimum requirements may be lower than those of either platform, or +possibly higher. + +# Implementation Status Summary + +Currently the implementation is sufficient (when hardware, software and +configuration constraints are met) to use the pseudo-code below to perform RMA +operations between combinations of host and GPU memory (subject to the +previously noted temporary prohibitions on loopback and intra-nbrhd transfers). + +Please note that all error checking has been elided from this example. Proper +checking of return codes, if any, is especially important when using this +prototype. + +``` + // Bootstrap and establish host memory segment for the primordial endpoints + gex_Client_Init(&myClient, &myEP, &myTM, "MK example", &argc, &argv, 0) + gex_Segment_Attach(...); + + // Create memory kind object for CUDA device 0 + gex_MK_t dev0Kind; + gex_MK_Create_args_t args; + args.gex_flags = 0; + args.gex_class = GEX_MK_CLASS_CUDA_UVA; + args.gex_args.gex_class_cuda_uva.gex_CUdevice = 0; + gex_MK_Create(&dev0_kind, myClient, args, 0); + + // Ask GASNet to allocate a 1GB CUDA UVA segment + gex_Segment_t dev0Segment; + gex_Segment_Create(&dev0Segment, myClient, NULL, 1024*1024*1024, dev0Kind, 0); + + // Create an RMA-only endpoint, bind dev0Segment and publish RMA credentials + gex_EP_t dev0EP; + gex_EP_Create(&dev0EP, myClient, GEX_EP_CAPABILITY_RMA, 0); + gex_EP_BindSegment(dev0EP, dev0Segment, 0); + gex_EP_PublishBoundSegment(myTM, &dev0EP, 1, 0); + + // Note assumptions made in remainder of this example + assert( gex_EP_QueryIndex(myEP) == 0 ); + assert( gex_EP_QueryIndex(dev0EP) == 1 ); + + // Query device addresses needed for RMA + void *loc_dev0, *rem_dev0; + loc_dev0 = gex_Segment_QueryAddr(dev0Segment); + gex_Event_t ev = gex_EP_QueryBoundSegmentNB(gex_TM_Pair(myEP,1), peer_rank, &rem_dev0, NULL, NULL, 0); + gex_Event_Wait(ev); + + // [ do something that places data in GPU memory ] + + // Perform a blocking 4MB GPU-to-GPU Get + gex_RMA_GetBlocking(gex_TM_Pair(dev0EP,1), loc_dev0, peer_rank, rem_dev0, 4*1024*1024, 0); +``` + +# Implementation Status by GASNet-EX API + +This section describes the known limitations of each of the APIs introduced +recently in order to support memory kinds. Due to interaction among +APIs, it is impossible to completely avoid forward references. + +## Additions: + +The preprocessor identifier `GASNET_HAVE_MK_CLASS_MULTIPLE` is defined to `1` if +support has been compiled in for any memory kinds other than host memory. + +## Renames: + +Some types, constants and functions have been renamed relative to their first +appearances in the API Proposal (when it was known as "GASNet API Proposal: +Multi-EP. Revision 2020.6.1"): + + + `gex_Segment_EP_Bind()` is replaced by `gex_EP_SegmentBind()` + + `gex_MemKind_Create()` is replaced by `gex_MK_Create()` + + `gex_MemKind_Destroy()` is replaced by `gex_MK_Destroy()` + + `gex_MemKind_t` is replaced by `gex_MK_t` + - With the constant `GEX_MEMKIND_HOST` replaced by `GEX_MK_HOST` + + `gex_MemKind_Class_t` becomes `gex_MK_Class_t` + - With `GEX_MEMKIND_CLASS_` shortened `GEX_MK_CLASS_` in the naming of the + enum values + + `gex_MemKind_Create_args_t` to `gex_MK_Create_args_t` + - With `gex_mk_` shortened to `gex_` in naming of struct and union members + +The current (2020.11.0) revision of the API Proposal uses the names above, and +the remainder of this section will utilize the new names exclusively. + +## `gex_Segment_Attach()` + +The `gex_Segment_Attach()` call remains the only supported means by which to +create and bind a segment to the primordial endpoint. This is true despite the +recent introduction of APIs which, when used in the proper sequence, would +appear to provide a suitable replacement. + +While this is technically a limitation of the alternative APIs, it is +documented here for clarity. + +## `gex_Segment_Create()` + +This API, along with all types and constants required to specify its arguments, +are defined and will link in any conduit. However, it is useful only when +multi-EP support exists (see `gex_EP_Create()` for the current scope of +multi-EP support). + +On ibv-conduit, specifically, the implementation of this API is believed to be +complete with respect to the API Proposal. In particular, it is capable of +creating segments of both client-allocated and GASNet-allocated memory, using +either the defined `kind` value `GEX_MK_HOST` or a kind created using +`gex_MK_Create()` with a class of `GEX_MK_CLASS_CUDA_UVA`. + +Notably lacking from both specification and implementation is a means to request +or demand allocation of memory suitable for intra-nbrhd cross-mapping via PSHM. + +## `gex_Segment_Destroy()` + +Not currently implemented. + +## `gex_EP_Create()` + +This API, along with all types and constants required to specify its arguments, +are defined and will link in any conduit. However, it is useful only when +multi-EP support exists, as described in the following paragraphs. + +Current builds of GASNet-EX will define a preprocessor macro +`GASNET_MAXEPS` which advertises the optimistic maximum number of endpoints per +process, inclusive of the primordial endpoint created by `gex_Client_Init()`. +Any call to `gex_EP_Create()` which would exceed this limit will fail with +a return of GASNET_ERR_RESOURCE. + +Currently, only ibv-conduit in FAST segment mode has a value of `GASNET_MAXEPS` +larger than 1 (it is currently 33). Additionally, ibv-conduit only supports the +`GEX_EP_CAPABILITY_RMA` capability for non-primordial endpoints. + +The `GEX_FLAG_HINT_ACCEL_*` values are currently defined, but ignored. + +## `gex_EP_BindSegment()` + +This API, along with all types and constants required to specify its arguments, +are defined and will link in any conduit. However, it is useful only when +multi-EP support exists (see `gex_EP_Create()` for the current scope of +multi-EP support). + +## `gex_EP_PublishBoundSegment()` + +This API does not appear in the API Proposal, nor in related documents which +preceded it. Complete semantics are documented in `docs/GASNet-EX.txt`. + +This call is currently necessary as the only means to actively distribute the +RMA credentials required by some conduits (ibv among them). While this task is +performed in `gex_Segment_Attach()` for primordial endpoints, use of this API is +required prior to use of `gex_RMA_*()` APIs using non-primordial endpoints. It +is hoped that this call can become optional in the future. + +This API, along with all types and constants required to specify its arguments, +are defined and will link in any conduit. However, it is useful only when +multi-EP support exists (see `gex_EP_Create()` for the current scope of +multi-EP support). + +## `gex_TM_Pair()` + +This API is believed to be fully implemented in all conduits and accepted by +all APIs required to do so by the API Proposal (notably the `gex_RMA_*()`, +`gex_AM_*()` and `gex_VIS_*() API families). + +Since multi-EP support is currently exclusive to ibv-conduit in FAST segment +mode, the use in other conduits is effectively limited to aliasing of the +primordial team. + +## `gex_TM_Create()` + +At this point in time, there is no support for non-primordial endpoints as +members of teams, and more specifically they are not accepted in the `args` +passed to `gex_TM_Create()`. This means all RMA communication involving +non-primordial endpoints must currently utilize a `gex_TM_t` returned by +`gex_TM_pair()`. + +## `gex_TM_Dup()` + +Not implemented. + +## `gex_MK_Create()` + +This API is implemented as described in the API Proposal (with some renames +relative to their first appearance, as detailed earlier in this document), This +includes the conditional definition (defined to `1` or undefined) of +`GASNET_HAVE_MK_CLASS_CUDA_UVA`, which is currently defined only when the +necessary headers and libs were located at configure time *and* one is using +ibv-conduit in FAST segment mode. In all other circumstances +`GASNET_HAVE_MK_CLASS_CUDA_UVA` will be undefined. + +While `GASNET_HAVE_MK_CLASS_CUDA_UVA` has only a conditional definition, the +enum value `GEX_MK_CLASS_CUDA_UVA` is defined unconditionally in `gasnet-mk.h`. +Any calls to `gex_MK_Create()` specifying this class when *not* supported will +return `GASNET_ERR_BAD_ARG`, as documented in the API Proposal. + +## `gex_MK_Destroy()` + +This API is implemented, but is only legal for a kind not currently in use by +any segment. Given the lack of a `gex_Segment_Destroy()` implementation, this +API is therefore only useful for a kind which has been created but never used. diff --git a/third-party/gasnet/gasnet-src/extended-ref/coll/gasnet_autotune.c b/third-party/gasnet/gasnet-src/extended-ref/coll/gasnet_autotune.c index bba2bbe57d9f..1d4fb4689bc3 100644 --- a/third-party/gasnet/gasnet-src/extended-ref/coll/gasnet_autotune.c +++ b/third-party/gasnet/gasnet-src/extended-ref/coll/gasnet_autotune.c @@ -593,9 +593,6 @@ gasnete_coll_autotune_info_t* gasnete_coll_autotune_init(gasnet_team_handle_t te ret->exchange_dissem_limit = MIN(dissem_limit, temp_size); ret->exchange_dissem_radix = MIN(gasneti_getenv_int_withdefault("GASNET_COLL_EXCHANGE_DISSEM_RADIX", 2, 0),total_images); - if(min_scratch_size < total_images) { - gasneti_fatalerror("SCRATCH SPACE TOO SMALL Please set it to at least (%"PRIuPTR" bytes) through the GASNET_COLL_SCRATCH_SIZE environment variable", (uintptr_t) total_images); - } ret->pipe_seg_size = gasneti_getenv_int_withdefault("GASNET_COLL_PIPE_SEG_SIZE", MIN(min_scratch_size, gex_AM_LUBRequestLong())/total_images, 1); /* if(ret->pipe_seg_size == 0) { ret->pipe_seg_size = MIN(min_scratch_size, gex_AM_LUBRequestLong())/total_images; @@ -618,14 +615,7 @@ gasnete_coll_autotune_info_t* gasnete_coll_autotune_init(gasnet_team_handle_t te } } - if(ret->pipe_seg_size == 0) { - if(mynode == 0) { - fprintf(stderr, "WARNING: GASNET_COLL_PIPE_SEG_SIZE has been set to 0 bytes\n"); - fprintf(stderr, "WARNING: Disabling Optimized Rooted Collectives\n"); - } - - } - + /*initialize the autotune size array to 2 so we always get a binary tree*/ for(i=0; ibcast_tree_radix_limits[i] = 3; @@ -1818,6 +1808,10 @@ gasnete_coll_implementation_t gasnete_coll_autotune_get_bcast_algorithm(gasnet_t gasnete_coll_implementation_print(ret, stderr); } + if ((nbytes > eager_limit) && !(flags & GASNETE_COLL_SUBORDINATE)) { + GASNETE_COLL_CHECK_NO_SCRATCH(team); + } + return ret; } @@ -1893,6 +1887,10 @@ gasnete_coll_autotune_get_scatter_algorithm(gasnet_team_handle_t team, void *dst gasnete_coll_implementation_print(ret, stderr); } + if ((nbytes > eager_limit) && !(flags & GASNETE_COLL_SUBORDINATE)) { + GASNETE_COLL_CHECK_NO_SCRATCH(team); + } + return ret; } @@ -1972,6 +1970,10 @@ gasnete_coll_autotune_get_gather_algorithm(gasnet_team_handle_t team,gasnet_imag gasnete_coll_implementation_print(ret, stderr); } + if ((nbytes > eager_limit) && !(flags & GASNETE_COLL_SUBORDINATE)) { + GASNETE_COLL_CHECK_NO_SCRATCH(team); + } + return ret; @@ -2015,6 +2017,8 @@ gasnete_coll_autotune_get_gather_all_algorithm(gasnet_team_handle_t team, void * gasnete_coll_implementation_print(ret, stderr); } + GASNETE_COLL_CHECK_NO_SCRATCH(team); // All Gather_all calls "count" + return ret; } @@ -2060,6 +2064,8 @@ gasnete_coll_autotune_get_exchange_algorithm(gasnet_team_handle_t team, void *ds gasnete_coll_implementation_print(ret, stderr); } + GASNETE_COLL_CHECK_NO_SCRATCH(team); // All Exchange calls "count" + return ret; } diff --git a/third-party/gasnet/gasnet-src/extended-ref/coll/gasnet_coll.h b/third-party/gasnet/gasnet-src/extended-ref/coll/gasnet_coll.h index d51cda285d2b..c31989e3a37f 100644 --- a/third-party/gasnet/gasnet-src/extended-ref/coll/gasnet_coll.h +++ b/third-party/gasnet/gasnet-src/extended-ref/coll/gasnet_coll.h @@ -140,8 +140,7 @@ extern void gasnet_coll_set_dissem_limit(gasnet_team_handle_t _team, size_t _dis #else #define gasnete_coll_format_addrlist(list,flags) gasneti_extern_strdup("[LIST]") #endif - // TODO-EX: Remove this work-around for fact that collective's "team" is not a "tm" - #define GASNETI_RADDRSTR_COLL(root,ptr) GASNETI_RADDRSTR((gex_TM_t)(uintptr_t)1,root,ptr) + #define GASNETI_RADDRSTR_COLL(root,ptr) GASNETI_RADDRSTR(team->e_tm,root,ptr) // Legacy Collective OPs #define GASNETI_TRACE_COLL_BROADCAST(name,team,dst,root,src,nbytes,flags) do { \ GASNETI_TRACE_EVENT_VAL(W,name,nbytes); \ diff --git a/third-party/gasnet/gasnet-src/extended-ref/coll/gasnet_coll_internal.h b/third-party/gasnet/gasnet-src/extended-ref/coll/gasnet_coll_internal.h index f27495ca474b..7c1255c67d12 100644 --- a/third-party/gasnet/gasnet-src/extended-ref/coll/gasnet_coll_internal.h +++ b/third-party/gasnet/gasnet-src/extended-ref/coll/gasnet_coll_internal.h @@ -62,7 +62,7 @@ /*---------------------------------------------------------------------------------*/ /* conduits may override this to relocate the ref-coll handlers */ #ifndef GASNETE_COLL_HANDLER_BASE -#define GASNETE_COLL_HANDLER_BASE 118 +#define GASNETE_COLL_HANDLER_BASE 117 #endif #define _hidx_gasnete_coll_p2p_memcpy_reqh (GASNETE_COLL_HANDLER_BASE+0) @@ -75,6 +75,7 @@ #define _hidx_gasnete_coll_p2p_med_counting_reqh (GASNETE_COLL_HANDLER_BASE+7) #define _hidx_gasnete_coll_scratch_update_reqh (GASNETE_COLL_HANDLER_BASE+8) #define _hidx_gasnete_subteam_op_reqh (GASNETE_COLL_HANDLER_BASE+9) +#define _hidx_gasnete_rexchgv_reqh (GASNETE_COLL_HANDLER_BASE+10) /*---------------------------------------------------------------------------------*/ /* Forward type decls and typedefs: */ @@ -168,7 +169,6 @@ extern void gasnete_coll_active_fini(void); /*---------------------------------------------------------------------------------*/ -#define GASNETE_COLL_MIN_SCRATCH_SIZE_DEFAULT 1024 #define GASNETE_COLL_MAX_SCRATCH_SIZE 0xffffffff #ifndef GASNETE_COLL_SCRATCH_SIZE @@ -176,6 +176,11 @@ extern void gasnete_coll_active_fini(void); #define GASNETE_COLL_SCRATCH_SIZE_DEFAULT (2*(1024*1024)) #endif +#ifndef GASNETE_COLL_SCRATCH_SIZE_MIN +// Default minimum recommendation +#define GASNETE_COLL_SCRATCH_SIZE_MIN MIN(GASNETI_CACHE_LINE_BYTES, 64) +#endif + #if 0 #define GASNETE_COLL_MIN_LOC_SCRATCH_SIZE 256 #define GASNETE_COLL_MAX_LOC_SCRATCH_SIZE 0xffffffff @@ -304,6 +309,9 @@ struct gasnete_coll_team_t_ { uint32_t sequence; /* arbitrary non-zero starting value */ + // Count of collectives on NO_SCRATCH teams + int no_scratch_count; + #if GASNET_PAR && GASNET_DEBUG gasneti_mutex_t threads_mutex; #endif @@ -333,6 +341,14 @@ struct gasnete_coll_team_t_ { gasnete_all_barrier_fini barrier_fini; gasneti_progressfn_t barrier_pf; + // Stuff for UnorderedExchangeV + struct { + uint8_t *data[2]; + gasneti_weakatomic32_t rcvd[2][32]; + gex_HSL_t lock; // protects data pointers + int phase; + } rexchgv; + #if GASNET_DEBUG gasneti_mutex_t barrier_lock; #endif @@ -770,6 +786,16 @@ GASNETE_COLL_VALIDATE(T,GEX_RANK_INVALID,D,(N)*gasneti_nodes,GEX_RANK_INVALID,S, /* XXX: following arg validation unimplemented */ #define GASNETE_COLL_VALIDATE_REDUCE(T,DI,D,S,SB,SO,ES,EC,FN,FA,F) +// Diagnostic for non-trivial use of collectives in a NO_SCRATCH team +GASNETI_COLD extern void gasnete_count_no_scratch(gasnet_team_handle_t team); +#define GASNETE_COLL_CHECK_NO_SCRATCH(team) \ + do { \ + if_pf (!(team)->scratch_size && \ + !(team)->myrank && \ + ((team)->total_ranks > 1)) { \ + gasnete_count_no_scratch(team); \ + } \ + } while(0) /*---------------------------------------------------------------------------------*/ /* Forward decls and macros */ diff --git a/third-party/gasnet/gasnet-src/extended-ref/coll/gasnet_reduce.c b/third-party/gasnet/gasnet-src/extended-ref/coll/gasnet_reduce.c index 7fde6894df70..28b1ee25befc 100644 --- a/third-party/gasnet/gasnet-src/extended-ref/coll/gasnet_reduce.c +++ b/third-party/gasnet/gasnet-src/extended-ref/coll/gasnet_reduce.c @@ -209,7 +209,7 @@ GASNETE_TM_DECLARE_REDUCE_ALG(BinomialEager) #if GASNET_DEBUG // make sure this is a valid choice of algorithm gex_Rank_t rel_rank = gasnete_tm_binom_rel_root(tm, root); gex_Rank_t child_cnt = gasnete_tm_binom_children(tm, rel_rank); - gasnet_team_handle_t team = gasneti_import_tm(tm)->_coll_team; + gasnet_team_handle_t team = gasneti_import_tm_nonpair(tm)->_coll_team; gasneti_assert(team->p2p_eager_buffersz >= dt_sz * dt_cnt * child_cnt); gasneti_assert(gex_AM_LUBRequestMedium() >= dt_sz * dt_cnt ); #endif @@ -264,7 +264,7 @@ static int gasnete_coll_pf_tm_reduce_BinomialEagerSeg(gasnete_coll_op_t *op GASN pdata->parent = gasnete_tm_binom_parent(tm, pdata->rel_rank); pdata->age = gasnete_tm_binom_age(tm, pdata->rel_rank); - pdata->width = 1 + gasnete_coll_log2_rank(gasneti_import_tm(tm)->_size - 1); + pdata->width = 1 + gasnete_coll_log2_rank(gasneti_import_tm_nonpair(tm)->_size - 1); pdata->chunk_cnt = MIN(op->team->p2p_eager_buffersz / pdata->width, gex_AM_LUBRequestMedium()) / args->dt_sz; @@ -422,7 +422,7 @@ GASNETE_TM_DECLARE_REDUCE_ALG(BinomialEagerSeg) #if GASNET_DEBUG // make sure this is a valid choice of algorithm gex_Rank_t rel_rank = gasnete_tm_binom_rel_root(tm, root); gex_Rank_t child_cnt = gasnete_tm_binom_children(tm, rel_rank); - gasnet_team_handle_t team = gasneti_import_tm(tm)->_coll_team; + gasnet_team_handle_t team = gasneti_import_tm_nonpair(tm)->_coll_team; gasneti_assert(team->p2p_eager_buffersz >= dt_sz * child_cnt); gasneti_assert(gex_AM_LUBRequestMedium() >= dt_sz); #endif @@ -525,7 +525,7 @@ static int gasnete_coll_pf_tm_reduce_TreePut(gasnete_coll_op_t *op GASNETI_THREA GASNETE_TM_DECLARE_REDUCE_ALG(TreePut) { const size_t nbytes = dt_sz * dt_cnt; // TODO-EX: compute this only *once* - gasnet_team_handle_t team = gasneti_import_tm(tm)->_coll_team; + gasnet_team_handle_t team = gasneti_import_tm_nonpair(tm)->_coll_team; gasneti_assert(coll_params); gasnete_coll_local_tree_geom_t *geom = (gasnete_coll_local_tree_geom_t *)coll_params; @@ -855,7 +855,7 @@ static int gasnete_coll_pf_tm_reduce_TreePutSeg(gasnete_coll_op_t *op GASNETI_TH GASNETE_TM_DECLARE_REDUCE_ALG(TreePutSeg) { - gasnet_team_handle_t team = gasneti_import_tm(tm)->_coll_team; + gasnet_team_handle_t team = gasneti_import_tm_nonpair(tm)->_coll_team; gasneti_assert(coll_params); gasnete_coll_local_tree_geom_t *geom = (gasnete_coll_local_tree_geom_t *)coll_params; diff --git a/third-party/gasnet/gasnet-src/extended-ref/coll/gasnet_refcoll.c b/third-party/gasnet/gasnet-src/extended-ref/coll/gasnet_refcoll.c index ca1ed7020f07..b06f2cf4b675 100644 --- a/third-party/gasnet/gasnet-src/extended-ref/coll/gasnet_refcoll.c +++ b/third-party/gasnet/gasnet-src/extended-ref/coll/gasnet_refcoll.c @@ -511,10 +511,11 @@ size_t gasnete_coll_auxseg_offset = 0; gasneti_auxseg_request_t gasnete_coll_auxseg_alloc(gasnet_seginfo_t *auxseg_info) { gasneti_auxseg_request_t retval; - retval.minsz = gasneti_getenv_int_withdefault("GASNET_COLL_MIN_SCRATCH_SIZE", - GASNETE_COLL_MIN_SCRATCH_SIZE_DEFAULT,1); - retval.optimalsz = gasneti_getenv_int_withdefault("GASNET_COLL_SCRATCH_SIZE", + uintptr_t envval = gasneti_getenv_int_withdefault("GASNET_COLL_SCRATCH_SIZE", GASNETE_COLL_SCRATCH_SIZE_DEFAULT,1); + // Silently raise to implementation-defined minimum + retval.minsz = retval.optimalsz = MAX(envval, GASNETE_COLL_SCRATCH_SIZE_MIN); + if (auxseg_info == NULL){ return retval; /* initial query */ } @@ -528,6 +529,25 @@ gasneti_auxseg_request_t gasnete_coll_auxseg_alloc(gasnet_seginfo_t *auxseg_info return retval; } +// Diagnostic for non-trivial use of collectives in a NO_SCRATCH team +static int no_scratch_warn_threshold = 0; +extern void gasnete_count_no_scratch(gasnet_team_handle_t team) +{ + if (! no_scratch_warn_threshold) return; // disabled + int count = (team->no_scratch_count += 1); + if (count == no_scratch_warn_threshold) { + gasneti_console_message("WARNING", + "TM%x has reached the threshold of %d non-trivial collectives on teams " + "created with GEX_FLAG_TM_NO_SCRATCH. This flag is intended for use with " + "teams which do not perform any significant collectives, and its use in this" + "instance is most likely degrading performance. One should consider " + "allocating scratch space for this team. Alternatively, one may set the " + "environment variable GASNET_NO_SCRATCH_WARN_THRESHOLD to a higher threshold," + "or to zero to disable this warning entirely.", + team->team_id, count); + } +} + // Initialize legacy coll_team subsystem for use by gex_TM/gex_Coll // TODO-EX: remove any portions displaced by gex-ification extern void gasnete_coll_init_subsystem(void) @@ -559,6 +579,9 @@ extern void gasnete_coll_init_subsystem(void) gasnetc_tm_reduce_tree_type = gasnete_coll_make_tree_type_str(reduce_tree_type); gasnete_coll_threaddata_t *td = GASNETE_COLL_MYTHREAD; // Force allocation + + + no_scratch_warn_threshold = gasneti_getenv_int_withdefault("GASNET_NO_SCRATCH_WARN_THRESHOLD", 8, 0); } /*---------------------------------------------------------------------------------*/ @@ -1365,6 +1388,7 @@ _gasnet_coll_broadcast_nb(gasnet_team_handle_t team, gex_Event_t handle; GASNETI_TRACE_COLL_BROADCAST(COLL_BROADCAST_NB,team,dst,srcimage,src,nbytes,flags); + GASNETI_CHECK_INJECT(); GASNETE_COLL_VALIDATE_BROADCAST(team,dst,srcimage,src,nbytes,flags); handle = gasnete_coll_broadcast_nb(team,dst,srcimage,src,nbytes,flags,0 GASNETI_THREAD_PASS); gasneti_AMPoll(); // No progress made until now @@ -1395,6 +1419,7 @@ GASNETI_COLL_FN_HEADER(_gasnet_coll_broadcast) gasnet_image_t srcimage, void *src, size_t nbytes, int flags GASNETI_THREAD_FARG) { GASNETI_TRACE_COLL_BROADCAST(COLL_BROADCAST,team,dst,srcimage,src,nbytes,flags); + GASNETI_CHECK_INJECT(); GASNETE_COLL_VALIDATE_BROADCAST(team,dst,srcimage,src,nbytes,flags); gasnete_coll_broadcast(team,dst,srcimage,src,nbytes,flags GASNETI_THREAD_PASS); } @@ -1424,6 +1449,7 @@ _gasnet_coll_scatter_nb(gasnet_team_handle_t team, size_t nbytes, int flags GASNETI_THREAD_FARG) { gex_Event_t handle; GASNETI_TRACE_COLL_SCATTER(COLL_SCATTER_NB,team,dst,srcimage,src,nbytes,flags); + GASNETI_CHECK_INJECT(); GASNETE_COLL_VALIDATE_SCATTER(team,dst,srcimage,src,nbytes,flags); handle = gasnete_coll_scatter_nb(team,dst,srcimage,src,nbytes,flags,0 GASNETI_THREAD_PASS); gasneti_AMPoll(); // No progress made until now @@ -1454,6 +1480,7 @@ GASNETI_COLL_FN_HEADER(_gasnet_coll_scatter) gasnet_image_t srcimage, void *src, size_t nbytes, int flags GASNETI_THREAD_FARG) { GASNETI_TRACE_COLL_SCATTER(COLL_SCATTER,team,dst,srcimage,src,nbytes,flags); + GASNETI_CHECK_INJECT(); GASNETE_COLL_VALIDATE_SCATTER(team,dst,srcimage,src,nbytes,flags); gasnete_coll_scatter(team,dst,srcimage,src,nbytes,flags GASNETI_THREAD_PASS); } @@ -1484,6 +1511,7 @@ _gasnet_coll_gather_nb(gasnet_team_handle_t team, size_t nbytes, int flags GASNETI_THREAD_FARG) { gex_Event_t handle; GASNETI_TRACE_COLL_GATHER(COLL_GATHER_NB,team,dstimage,dst,src,nbytes,flags); + GASNETI_CHECK_INJECT(); GASNETE_COLL_VALIDATE_GATHER(team,dstimage,dst,src,nbytes,flags); handle = gasnete_coll_gather_nb(team,dstimage,dst,src,nbytes,flags,0 GASNETI_THREAD_PASS); gasneti_AMPoll(); // No progress made until now @@ -1513,6 +1541,7 @@ GASNETI_COLL_FN_HEADER(_gasnet_coll_gather) void *src, size_t nbytes, int flags GASNETI_THREAD_FARG) { GASNETI_TRACE_COLL_GATHER(COLL_GATHER,team,dstimage,dst,src,nbytes,flags); + GASNETI_CHECK_INJECT(); GASNETE_COLL_VALIDATE_GATHER(team,dstimage,dst,src,nbytes,flags); gasnete_coll_gather(team,dstimage,dst,src,nbytes,flags GASNETI_THREAD_PASS); } @@ -1540,6 +1569,7 @@ _gasnet_coll_gather_all_nb(gasnet_team_handle_t team, gex_Event_t handle; GASNETI_TRACE_COLL_GATHER_ALL(COLL_GATHER_ALL_NB,team,dst,src,nbytes,flags); + GASNETI_CHECK_INJECT(); GASNETE_COLL_VALIDATE_GATHER_ALL(team,dst,src,nbytes,flags); handle = gasnete_coll_gather_all_nb(team,dst,src,nbytes,flags,0 GASNETI_THREAD_PASS); gasneti_AMPoll(); // No progress made until now @@ -1566,6 +1596,7 @@ GASNETI_COLL_FN_HEADER(_gasnet_coll_gather_all) void *dst, void *src, size_t nbytes, int flags GASNETI_THREAD_FARG) { GASNETI_TRACE_COLL_GATHER_ALL(COLL_GATHER_ALL,team,dst,src,nbytes,flags); + GASNETI_CHECK_INJECT(); GASNETE_COLL_VALIDATE_GATHER_ALL(team,dst,src,nbytes,flags); gasnete_coll_gather_all(team,dst,src,nbytes,flags GASNETI_THREAD_PASS); } @@ -1593,6 +1624,7 @@ _gasnet_coll_exchange_nb(gasnet_team_handle_t team, size_t nbytes, int flags GASNETI_THREAD_FARG) { gex_Event_t handle; GASNETI_TRACE_COLL_EXCHANGE(COLL_EXCHANGE_NB,team,dst,src,nbytes,flags); + GASNETI_CHECK_INJECT(); GASNETE_COLL_VALIDATE_EXCHANGE(team,dst,src,nbytes,flags); handle = gasnete_coll_exchange_nb(team,dst,src,nbytes,flags,0 GASNETI_THREAD_PASS); gasneti_AMPoll(); // No progress made until now @@ -1619,6 +1651,7 @@ GASNETI_COLL_FN_HEADER(_gasnet_coll_exchange) void *dst, void *src, size_t nbytes, int flags GASNETI_THREAD_FARG) { GASNETI_TRACE_COLL_EXCHANGE(COLL_EXCHANGE,team,dst,src,nbytes,flags); + GASNETI_CHECK_INJECT(); GASNETE_COLL_VALIDATE_EXCHANGE(team,dst,src,nbytes,flags); gasnete_coll_exchange(team,dst,src,nbytes,flags GASNETI_THREAD_PASS); } @@ -2185,7 +2218,7 @@ static int gasnete_coll_pf_barrier(gasnete_coll_op_t *op GASNETI_THREAD_FARG) { extern gex_Event_t gasnete_tm_barrier_nb_default(gex_TM_t e_tm, gex_Flags_t flags GASNETI_THREAD_FARG) { - gasnet_team_handle_t team = gasneti_import_tm(e_tm)->_coll_team; + gasnet_team_handle_t team = gasneti_import_tm_nonpair(e_tm)->_coll_team; const int coll_flags = 0; gex_Event_t result; @@ -2219,7 +2252,7 @@ gasnete_tm_barrier_nb_default(gex_TM_t e_tm, gex_Flags_t flags GASNETI_THREAD_FA extern void gasnete_tm_barrier_default(gex_TM_t e_tm, gex_Flags_t flags GASNETI_THREAD_FARG) { - gasnet_team_handle_t team = gasneti_import_tm(e_tm)->_coll_team; + gasnet_team_handle_t team = gasneti_import_tm_nonpair(e_tm)->_coll_team; gasnete_coll_consensus_barrier(team GASNETI_THREAD_PASS); } @@ -2240,7 +2273,7 @@ gasnete_tm_broadcast_nb_default(gex_TM_t e_tm, gex_Rank_t root, size_t nbytes, gex_Flags_t flags, uint32_t sequence GASNETI_THREAD_FARG) { - gasnet_team_handle_t team = gasneti_import_tm(e_tm)->_coll_team; + gasnet_team_handle_t team = gasneti_import_tm_nonpair(e_tm)->_coll_team; int coll_flags = GASNET_COLL_LOCAL | GASNET_COLL_IN_MYSYNC | GASNET_COLL_OUT_MYSYNC; coll_flags |= (flags & GASNETI_FLAG_COLL_SUBORDINATE) ? GASNETE_COLL_SUBORDINATE : 0; return gasnete_coll_broadcast_nb(team, dst, root, (/*non-const*/ void*)src, @@ -2260,7 +2293,7 @@ gasnete_tm_generic_reduce_nb(gex_TM_t tm, gex_Rank_t root, void *dst, const void gasnete_coll_scratch_req_t *scratch_req GASNETI_THREAD_FARG) { - gasnet_team_handle_t team = gasneti_import_tm(tm)->_coll_team; + gasnet_team_handle_t team = gasneti_import_tm_nonpair(tm)->_coll_team; gex_Event_t result; gasnete_coll_threads_lock(team, coll_flags GASNETI_THREAD_PASS); @@ -2328,9 +2361,10 @@ gasnete_tm_reduce_nb_default( gex_OP_t opcode, gex_Coll_ReduceFn_t user_fnptr, void *user_cdata, gex_Flags_t flags, uint32_t sequence GASNETI_THREAD_FARG) { - gasneti_TM_t i_tm = gasneti_import_tm(e_tm); + gasneti_TM_t i_tm = gasneti_import_tm_nonpair(e_tm); GASNETI_TRACE_TM_REDUCE(COLL_REDUCE_NB,e_tm,root,dst,src,dt,dt_sz,dt_cnt,opcode,user_fnptr,user_cdata,flags); + GASNETI_CHECK_INJECT(); // Argument validation // TODO-EX: factor to avoid cloning this logic to conduit collectives @@ -2380,6 +2414,9 @@ gasnete_tm_reduce_nb_default( gasneti_fatalerror("gex_Coll_ReduceToOneNB: (dt_sz == %"PRIuSZ") is TOO LARGE for this implementation", dt_sz); } + if ( !(flags & GASNETI_FLAG_COLL_SUBORDINATE)) { + GASNETE_COLL_CHECK_NO_SCRATCH(team); + } } // TODO-EX: stop abusing implementation_t argument to pass the geom @@ -2408,7 +2445,7 @@ gasnete_tm_generic_reduce_all_nb( gasnete_coll_scratch_req_t *scratch_req GASNETI_THREAD_FARG) { - gasnet_team_handle_t team = gasneti_import_tm(tm)->_coll_team; + gasnet_team_handle_t team = gasneti_import_tm_nonpair(tm)->_coll_team; gex_Event_t result; gasnete_coll_threads_lock(team, coll_flags GASNETI_THREAD_PASS); @@ -2476,9 +2513,10 @@ gasnete_tm_reduce_all_nb_default( gex_OP_t opcode, gex_Coll_ReduceFn_t user_fnptr, void *user_cdata, gex_Flags_t flags, uint32_t sequence GASNETI_THREAD_FARG) { - gasneti_TM_t i_tm = gasneti_import_tm(e_tm); + gasneti_TM_t i_tm = gasneti_import_tm_nonpair(e_tm); GASNETI_TRACE_TM_REDUCE_ALL(COLL_REDUCE_ALL_NB,e_tm,dst,src,dt,dt_sz,dt_cnt,opcode,user_fnptr,user_cdata,flags); + GASNETI_CHECK_INJECT(); // Argument validation // TODO-EX: factor to avoid cloning this logic to conduit collectives @@ -2508,6 +2546,8 @@ gasnete_tm_reduce_all_nb_default( opcode, user_fnptr, user_cdata, 0, NULL, 0 GASNETI_THREAD_PASS); + GASNETE_COLL_CHECK_NO_SCRATCH((gasnet_team_handle_t)i_tm->_coll_team); // All reduce-to-all calls "count" + gasneti_AMPoll(); // No progress made until now return result; } diff --git a/third-party/gasnet/gasnet-src/extended-ref/coll/gasnet_refcoll.h b/third-party/gasnet/gasnet-src/extended-ref/coll/gasnet_refcoll.h index 96421db0f0f2..84f9cea52c47 100644 --- a/third-party/gasnet/gasnet-src/extended-ref/coll/gasnet_refcoll.h +++ b/third-party/gasnet/gasnet-src/extended-ref/coll/gasnet_refcoll.h @@ -48,9 +48,11 @@ SHORT_HANDLER_NOBITS_DECL(gasnete_coll_scratch_update_reqh, 2); #endif MEDIUM_HANDLER_NOBITS_DECL(gasnete_subteam_op_reqh,4); +MEDIUM_HANDLER_NOBITS_DECL(gasnete_rexchgv_reqh,4); #define GASNETE_REFCOLL_HANDLERS() \ GASNETE_COLL_P2P_HANDLERS() GASNETE_COLL_SCRATCH_HANDLERS() \ - gasneti_handler_tableentry_no_bits(gasnete_subteam_op_reqh,4,REQUEST,MEDIUM,0), + gasneti_handler_tableentry_no_bits(gasnete_subteam_op_reqh,4,REQUEST,MEDIUM,0), \ + gasneti_handler_tableentry_no_bits(gasnete_rexchgv_reqh,4,REQUEST,MEDIUM,0), #endif diff --git a/third-party/gasnet/gasnet-src/extended-ref/coll/gasnet_team.c b/third-party/gasnet/gasnet-src/extended-ref/coll/gasnet_team.c index ddae8244dcb6..b13b418dc223 100644 --- a/third-party/gasnet/gasnet-src/extended-ref/coll/gasnet_team.c +++ b/third-party/gasnet/gasnet-src/extended-ref/coll/gasnet_team.c @@ -89,7 +89,7 @@ static void initialize_team_fields( // Detect and optimize for storage in symmetric offset case uintptr_t symmetric_offset = 0; int is_symmetric = 0; - if (team->total_ranks > 1) { + if (scratch_size && (team->total_ranks > 1)) { const gasnet_seginfo_t *si = gasneti_seginfo + team->rel2act_map[0]; symmetric_offset = (uintptr_t)scratch_addrs[0] - (uintptr_t)(si->addr); if (symmetric_offset < si->size) { @@ -130,7 +130,7 @@ static void initialize_team_fields( GASNETI_TRACE_PRINTF(W,("Team TM0:%i scratch: size=%"PRIuSZ" symmetric_offset=%"PRIuPTR" (auxseg)", gasneti_mynode, scratch_size, gasnete_coll_auxseg_offset)); } - team->myscratch = (void *)gasnete_coll_scratch_base(team, team->myrank); + team->myscratch = team->scratch_size ? (void *)gasnete_coll_scratch_base(team, team->myrank) : NULL; #if GASNET_PAR && GASNET_DEBUG gasneti_mutex_init(&team->threads_mutex); @@ -151,6 +151,7 @@ static void initialize_team_fields( gasnete_coll_alloc_new_scratch_status(team); team->scratch_free_list = NULL; gex_HSL_Init(&team->child.lock); + gex_HSL_Init(&team->rexchgv.lock); #ifndef GASNETE_COLL_P2P_OVERRIDE gex_HSL_Init(&team->p2p_lock); @@ -502,12 +503,6 @@ gasnet_team_handle_t gasnete_coll_team_split(gasnet_team_handle_t parent, /* It would be better to add some sanity check for team correctness here. */ /* create a team */ - - // scratch address info is "local" by construction - gasneti_assert(! (flags & (GEX_FLAG_TM_GLOBAL_SCRATCH | GEX_FLAG_TM_LOCAL_SCRATCH | - GEX_FLAG_TM_SYMMETRIC_SCRATCH | GEX_FLAG_TM_NO_SCRATCH))); - flags |= GEX_FLAG_TM_LOCAL_SCRATCH; - newteam = gasnete_coll_team_create(parent, new_total_ranks, new_myrank, rank_map, scratch_size, &scratch_addr, flags GASNETI_THREAD_PASS); @@ -867,3 +862,216 @@ void gasnete_subteam_ID( } /* ------------------------------------------------------------------------------------ */ + +// Blocking Rotated, ExchangeV utility function +// +// Takes only local data and length, and computes (and returns) the total length. +// Returns data address via *dst_p +// Returns optional lengths array via *len_p, if non-NULL. +// Both arrays are dynamically allocated and the caller is responsible for freeing them. +// In the event of an "empty" result (returning 0) both pointers returns are NULL; +// +// In order to avoid the cost (time and space) of an in-memory rotation, this +// implementation does not return its result in the normal [0...nranks) order. +// Instead the data starts with the local contribution, followed by the remaining +// ranks in order with wrap-around: +// myrank, (myrank+1)%nranks, (myrank+2)%nranks, ... +// A caller can index the data in "normal" order with some modular arithmetic. +// +// Boundaries between the variable contributions can be determined by examining +// the (optional) array of lengths available via len_p. Note that this array +// is in the same rotated order as the data buffer. +// +// TODO-EX: use relevant gex_Coll_*() facilities, if any, when available +// TODO-EX: support for total_len > 2^32 + +static void * +gasnete_rexchgv_data(gasnete_coll_team_t team, int phase, size_t size) { + uint8_t *data = team->rexchgv.data[phase]; + if_pf (! data) { + gex_HSL_Lock(&team->rexchgv.lock); + data = team->rexchgv.data[phase]; + if (! data) { + data = gasneti_malloc(size); + team->rexchgv.data[phase] = data; + } + gex_HSL_Unlock(&team->rexchgv.lock); + } + return data; +} + +// It is not permissible to omit zero-length transfers from the second +// ExchangeV, because their synchronization side-effect is required. However, +// adding nbytes==0 to rexchgv.rcvd[phase][step] does not provide any signal of +// the arrival. So, we replace zero by an arbitrary non-zero value in the rcvd +// accounting. +#define gasnete_rexchgv_zero_recvd 42 + +void gasnete_rexchgv_reqh( + gex_Token_t token, void *buf, size_t nbytes, + gex_AM_Arg_t team_id, gex_AM_Arg_t arg1, + gex_AM_Arg_t total_len, gex_AM_Arg_t offset) +{ + gasnete_coll_team_t team = gasnete_coll_team_lookup(team_id); + int phase = arg1 & 1; + int step = arg1 >> 1; + uint8_t *data = gasnete_rexchgv_data(team, phase, total_len); + GASNETI_MEMCPY(data + (uint32_t)offset, buf, nbytes); + size_t increment = nbytes ? nbytes : gasnete_rexchgv_zero_recvd; + gasneti_weakatomic32_add(&team->rexchgv.rcvd[phase][step], increment, GASNETI_ATOMIC_REL); +} + +size_t gasneti_blockingRotatedExchangeV( + gex_TM_t tm, + const void *src, + size_t len, + void **dst_p, + size_t **len_p) +{ + GASNET_BEGIN_FUNCTION(); // TODO: remove this lookup + + gasnete_coll_team_t team = gasneti_import_tm_nonpair(tm)->_coll_team; + uint32_t team_id = team->team_id; + gex_Rank_t self = gex_TM_QueryRank(tm); + gex_Rank_t team_sz = gex_TM_QuerySize(tm); + int steps = 0; // ceil(log_2(team_sz)); + for (gex_Rank_t tmp = team_sz-1; tmp; tmp >>= 1) ++steps; + // Without the following hint, some gcc versions warn about massive malloc sizes below + gasneti_assume(steps <= 8*sizeof(gex_Rank_t)); + + int phase = team->rexchgv.phase; + gasneti_assert(phase == 0 || phase == 1); + + gex_Event_t event0 = GEX_EVENT_INVALID; + gex_Event_t event1 = GEX_EVENT_INVALID; + + // + // Step 1. Exchange the lengths using Bruck's concatenation algorithm + // The final rotation is omitted, saving time and space, as well as greatly + // simplifying the index arithmetic in the next step. + // + size_t len_array_sz = team_sz * sizeof(len); + uint8_t *data0 = gasnete_rexchgv_data(team, phase, len_array_sz); + GASNETI_MEMCPY(data0, &len, sizeof(len)); + gex_NBI_BeginAccessRegion(0); + for (unsigned int step = 0, distance = 1; step < steps; ++step, distance *= 2) { + // Send data using stream of AMMediums + gex_Rank_t dest_rank = (self + team_sz - distance) % team_sz; + uint32_t offset = distance * sizeof(len); + uint32_t nbytes = MIN(offset, len_array_sz - offset); + uint32_t sent = 0; + uint32_t arg1 = phase | (step << 1); + size_t limit = gex_AM_MaxRequestMedium(tm,dest_rank,GEX_EVENT_GROUP,0,4); + do { + const uint32_t to_xfer = MIN(nbytes - sent, limit); + gex_AM_RequestMedium4(tm, dest_rank, _hidx_gasnete_rexchgv_reqh, + data0 + sent, to_xfer, GEX_EVENT_GROUP, 0, + team_id, arg1, len_array_sz, offset + sent); + sent += to_xfer; + } while (sent < nbytes); + + // Wait to receive this step's data + GASNET_BLOCKUNTIL(gasneti_weakatomic32_read(&team->rexchgv.rcvd[phase][step], 0) >= nbytes); + gasneti_assert_uint(gasneti_weakatomic32_read(&team->rexchgv.rcvd[phase][step], 0) ,==, nbytes); + gasneti_weakatomic32_set(&team->rexchgv.rcvd[phase][step], 0, 0); // reset + } + event0 = gex_NBI_EndAccessRegion(0); + // reset/advance + team->rexchgv.data[phase] = NULL; + phase ^= 1; + + // + // Step 2. Compute sum and 2*log(P) partial sums + // Indexing is nearly trivial due to omitting the rotation of the lengths array + // + size_t *len_array = (size_t *)data0; // NOTE: final rotation omitted + // Total size: + size_t total_len = 0; + // Local received size and per-step partials: + size_t l_sum = 0; + size_t *l_sums = gasneti_malloc(sizeof(size_t) * (steps+1)); + // Remote received size and per-step partials: + size_t r_sum = 0; + size_t *r_sums = gasneti_malloc(sizeof(size_t) * steps); + // Indexing from both ends: + int fwd_idx = 0, bwd_idx = team_sz - 1; + for (int i = 0; i < steps; ++i) { + int step_sz = 1 << MAX(0,i-1); // 1,1,2,4,8,... + for (int j = 0; j < step_sz && bwd_idx; ++j) { + total_len += len_array[fwd_idx]; + r_sum += len_array[bwd_idx--]; + l_sum += len_array[fwd_idx++]; + } + l_sums[i] = l_sum; + r_sums[i] = r_sum; + } + while (fwd_idx < team_sz) { + total_len += len_array[fwd_idx++]; + } + l_sums[steps] = total_len; + + // This code is for slinging metadata, not user data. + // As such, we've assumed 32-bit (single handler argument) sizes and offsets + // are sufficient to this purpose. + if_pf ((uint64_t)total_len > UINT32_MAX) { + gasneti_fatalerror("blockingRotatedExchangeV size limit of 4GB exceeded: total_len=%"PRIuSZ, total_len); + } + + if (! total_len) { // Empty! + *dst_p = NULL; + goto out_zero_len; + } + + // + // Step 3. Bruck's concatenation algorithm generalized for variable lengths + // + uint8_t *data1 = gasnete_rexchgv_data(team, phase, total_len); + GASNETI_MEMCPY_SAFE_EMPTY(data1, src, len); + gex_NBI_BeginAccessRegion(0); + for (unsigned int step = 0, distance = 1; step < steps; ++step, distance *= 2) { + // Send data using stream of AMMediums + gex_Rank_t dest_rank = (self + team_sz - distance) % team_sz; + uint32_t offset = r_sums[step]; + uint32_t nbytes = MIN(l_sums[step], total_len - offset); + uint32_t sent = 0; + uint32_t arg1 = phase | (step << 1); + size_t limit = gex_AM_MaxRequestMedium(tm,dest_rank,GEX_EVENT_GROUP,0,4); + do { // Note: must not skip nbytes==0 case, since message is needed for synchronization + const uint32_t to_xfer = MIN(nbytes - sent, limit); + gex_AM_RequestMedium4(tm, dest_rank, _hidx_gasnete_rexchgv_reqh, + data1 + sent, to_xfer, GEX_EVENT_GROUP, 0, + team_id, arg1, total_len, offset + sent); + sent += to_xfer; + } while (sent < nbytes); + + // Wait to receive this step's data (if any) + uint32_t to_recv = l_sums[step+1] - l_sums[step]; + if (!to_recv) to_recv = gasnete_rexchgv_zero_recvd; // a non-zero value used for zero-length recv + GASNET_BLOCKUNTIL(gasneti_weakatomic32_read(&team->rexchgv.rcvd[phase][step], 0) >= to_recv); + gasneti_assert_uint(gasneti_weakatomic32_read(&team->rexchgv.rcvd[phase][step], 0) ,==, to_recv); + gasneti_weakatomic32_set(&team->rexchgv.rcvd[phase][step], 0, 0); // reset + } + event1 = gex_NBI_EndAccessRegion(0); + // reset/advance + team->rexchgv.data[phase] = NULL; + phase ^= 1; + + *dst_p = data1; + +out_zero_len: + team->rexchgv.phase = phase; + gasneti_free(r_sums); + gasneti_free(l_sums); + gex_Event_Wait(event0); // Source data in len_array + if (total_len && len_p) { + *len_p = len_array; + } else { + // Either len_array is empty, or caller didn't request it + if (len_p) *len_p = NULL; + gasneti_free(len_array); + } + gex_Event_Wait(event1); // Source data in data1 + + return total_len; +} +/* ------------------------------------------------------------------------------------ */ diff --git a/third-party/gasnet/gasnet-src/extended-ref/gasnet_extended.h b/third-party/gasnet/gasnet-src/extended-ref/gasnet_extended.h index f3f20bb91aee..c686316370f6 100644 --- a/third-party/gasnet/gasnet-src/extended-ref/gasnet_extended.h +++ b/third-party/gasnet/gasnet-src/extended-ref/gasnet_extended.h @@ -22,13 +22,6 @@ Initialization ============== */ -/* passes back a pointer to a handler table containing the handlers of - the extended API, which the core should register on its behalf - (the table is terminated with an entry where fnptr == NULL) - all handlers will have an index in range 100-199 // <== TODO-EX: update range here - may be called before gasnete_init() -*/ -extern gex_AM_Entry_t const *gasnete_get_handlertable(void); /* Initialize the Extended API: must be called by the core API at the end of gasnet_attach() before calls to extended API @@ -65,6 +58,7 @@ gex_Event_t _gex_RMA_GetNB( gex_Rank_t _rank, void *_src, size_t _nbytes, gex_Flags_t _flags GASNETI_THREAD_FARG) { + GASNETI_CHECK_INJECT(); GASNETI_CHECKZEROSZ_GET(NB,_tm,_dest,_rank,_src,_nbytes); gasneti_boundscheck(_tm, _rank, _src, _nbytes); if (gasnete_islocal(_tm,_rank)) { @@ -87,6 +81,7 @@ gex_Event_t _gex_RMA_PutNB( /*const*/ void *_src, // TODO-EX: un-comment const size_t _nbytes, gex_Event_t *_lc_opt, gex_Flags_t _flags GASNETI_THREAD_FARG) { + GASNETI_CHECK_INJECT(); GASNETI_CHECK_PUT_LCOPT(_lc_opt, 0); GASNETI_CHECKZEROSZ_PUT(NB,_tm,_rank,_dest,_src,_nbytes); gasneti_boundscheck(_tm, _rank, _dest, _nbytes); @@ -123,6 +118,7 @@ extern int gasnete_test_all (gex_Event_t *_pevent, size_t _numevents GASNETI_THR GASNETI_INLINE(_gex_Event_Test) GASNETI_WARN_UNUSED_RESULT int _gex_Event_Test(gex_Event_t _event GASNETI_THREAD_FARG) { + GASNETI_CHECK_INJECT(); int _result = GASNET_OK; if_pt (_event != GEX_EVENT_INVALID) _result = gasnete_test(_event GASNETI_THREAD_PASS); @@ -134,6 +130,7 @@ int _gex_Event_Test(gex_Event_t _event GASNETI_THREAD_FARG) { GASNETI_INLINE(_gex_Event_TestSome) int _gex_Event_TestSome(gex_Event_t *_pevent, size_t _numevents, gex_Flags_t _flags GASNETI_THREAD_FARG) { + GASNETI_CHECK_INJECT(); int _result = gasnete_test_some(_pevent,_numevents GASNETI_THREAD_PASS); GASNETI_TRACE_TRYSYNC(TEST_SYNCNB_SOME,_result); return _result; @@ -143,6 +140,7 @@ int _gex_Event_TestSome(gex_Event_t *_pevent, size_t _numevents, gex_Flags_t _fl GASNETI_INLINE(_gex_Event_TestAll) int _gex_Event_TestAll(gex_Event_t *_pevent, size_t _numevents, gex_Flags_t _flags GASNETI_THREAD_FARG) { + GASNETI_CHECK_INJECT(); int _result = gasnete_test_all(_pevent,_numevents GASNETI_THREAD_PASS); GASNETI_TRACE_TRYSYNC(TEST_SYNCNB_ALL,_result); return _result; @@ -163,6 +161,7 @@ int _gex_Event_TestAll(gex_Event_t *_pevent, size_t _numevents, gex_Flags_t _fla GASNETI_INLINE(_gex_Event_Wait) void _gex_Event_Wait(gex_Event_t _event GASNETI_THREAD_FARG) { + GASNETI_CHECK_INJECT(); GASNETI_TRACE_WAITSYNC_BEGIN(); gasnete_wait(_event GASNETI_THREAD_PASS); GASNETI_TRACE_WAITSYNC_END(WAIT_SYNCNB); @@ -180,6 +179,7 @@ void _gex_Event_Wait(gex_Event_t _event GASNETI_THREAD_FARG) { GASNETI_INLINE(_gex_Event_WaitSome) void _gex_Event_WaitSome(gex_Event_t *_pevent, size_t _numevents, gex_Flags_t _flags GASNETI_THREAD_FARG) { + GASNETI_CHECK_INJECT(); GASNETI_TRACE_WAITSYNC_BEGIN(); gasnete_wait_some(_pevent, _numevents GASNETI_THREAD_PASS); GASNETI_TRACE_WAITSYNC_END(WAIT_SYNCNB_SOME); @@ -190,6 +190,7 @@ void _gex_Event_WaitSome(gex_Event_t *_pevent, size_t _numevents, gex_Flags_t _f #ifndef gasnete_wait_all // TODO-EX: a non-inline function could allow some optimizations GASNETI_INLINE(gasnete_wait_all) void gasnete_wait_all(gex_Event_t *_pevent, size_t _numevents GASNETI_THREAD_FARG) { + GASNETI_CHECK_INJECT(); gasneti_AMPoll(); /* Ensure at least one poll - TODO: remove? */ gasneti_pollwhile(gasnete_test_all(_pevent, _numevents GASNETI_THREAD_PASS) == GASNET_ERR_NOT_READY); } @@ -197,6 +198,7 @@ void _gex_Event_WaitSome(gex_Event_t *_pevent, size_t _numevents, gex_Flags_t _f GASNETI_INLINE(_gex_Event_WaitAll) void _gex_Event_WaitAll(gex_Event_t *_pevent, size_t _numevents, gex_Flags_t _flags GASNETI_THREAD_FARG) { + GASNETI_CHECK_INJECT(); GASNETI_TRACE_WAITSYNC_BEGIN(); gasnete_wait_all(_pevent, _numevents GASNETI_THREAD_PASS); GASNETI_TRACE_WAITSYNC_END(WAIT_SYNCNB_ALL); @@ -230,6 +232,7 @@ int _gex_RMA_GetNBI (gex_TM_t _tm, void *_dest, gex_Rank_t _rank, void *_src, size_t _nbytes, gex_Flags_t _flags GASNETI_THREAD_FARG) { + GASNETI_CHECK_INJECT(); GASNETI_CHECKZEROSZ_GET(NBI,_tm,_dest,_rank,_src,_nbytes); gasneti_boundscheck(_tm, _rank, _src, _nbytes); if (gasnete_islocal(_tm,_rank)) { @@ -251,6 +254,7 @@ int _gex_RMA_PutNBI (gex_TM_t _tm, /*const*/ void *_src, // TODO-EX: un-comment const size_t _nbytes, gex_Event_t *_lc_opt, gex_Flags_t _flags GASNETI_THREAD_FARG) { + GASNETI_CHECK_INJECT(); GASNETI_CHECK_PUT_LCOPT(_lc_opt, 1); GASNETI_CHECKZEROSZ_PUT(NBI,_tm,_rank,_dest,_src,_nbytes); gasneti_boundscheck(_tm, _rank, _dest, _nbytes); @@ -306,6 +310,7 @@ typedef unsigned int gex_EC_t; GASNETI_INLINE(_gex_NBI_Test) GASNETI_WARN_UNUSED_RESULT int _gex_NBI_Test(gex_EC_t _mask, gex_Flags_t _flags GASNETI_THREAD_FARG) { + GASNETI_CHECK_INJECT(); int _retval = gasnete_test_syncnbi_mask(_mask, _flags GASNETI_THREAD_PASS); GASNETI_TRACE_TRYSYNC(TEST_SYNCNBI,_retval); return _retval; @@ -319,6 +324,7 @@ int _gex_NBI_Test(gex_EC_t _mask, gex_Flags_t _flags GASNETI_THREAD_FARG) { #endif #define gex_NBI_Wait(mask, flags) do { \ + GASNETI_CHECK_INJECT(); \ GASNETI_TRACE_WAITSYNC_BEGIN(); \ gasneti_AMPoll(); /* ensure at least one poll */ \ gasnete_wait_syncnbi_mask(mask, flags GASNETI_THREAD_GET); \ @@ -405,6 +411,7 @@ int _gex_RMA_GetBlocking (gex_TM_t _tm, void *_dest, gex_Rank_t _rank, void *_src, size_t _nbytes, gex_Flags_t _flags GASNETI_THREAD_FARG) { + GASNETI_CHECK_INJECT(); GASNETI_CHECKZEROSZ_NAMED(GASNETI_TRACE_GET_NAMED(GET_LOCAL,LOCAL,_tm,_dest,_rank,_src,_nbytes),_nbytes); gasneti_boundscheck(_tm, _rank, _src, _nbytes); if (gasnete_islocal(_tm,_rank)) { @@ -426,6 +433,7 @@ int _gex_RMA_PutBlocking (gex_TM_t _tm, /*const*/ void *_src, // TODO-EX: un-comment const size_t _nbytes, gex_Flags_t _flags GASNETI_THREAD_FARG) { + GASNETI_CHECK_INJECT(); GASNETI_CHECKZEROSZ_NAMED(GASNETI_TRACE_PUT_NAMED(PUT_LOCAL,LOCAL,_tm,_rank,_dest,_src,_nbytes),_nbytes); gasneti_boundscheck(_tm, _rank, _dest, _nbytes); if (gasnete_islocal(_tm,_rank)) { @@ -476,6 +484,7 @@ int _gex_RMA_PutVal( gex_TM_t _tm, size_t _nbytes, gex_Flags_t _flags GASNETI_THREAD_FARG) { + GASNETI_CHECK_INJECT(); gasneti_assume(_nbytes > 0); gasneti_assert_uint(_nbytes ,<=, sizeof(gex_RMA_Value_t)); gasneti_assume(_nbytes <= sizeof(gex_RMA_Value_t)); @@ -510,6 +519,7 @@ gex_Event_t _gex_RMA_PutNBVal ( size_t _nbytes, gex_Flags_t _flags GASNETI_THREAD_FARG) { + GASNETI_CHECK_INJECT(); gasneti_assume(_nbytes > 0); gasneti_assert_uint(_nbytes ,<=, sizeof(gex_RMA_Value_t)); gasneti_assume(_nbytes <= sizeof(gex_RMA_Value_t)); @@ -565,6 +575,7 @@ int _gex_RMA_PutNBIVal( size_t _nbytes, gex_Flags_t _flags GASNETI_THREAD_FARG) { + GASNETI_CHECK_INJECT(); gasneti_assume(_nbytes > 0); gasneti_assert_uint(_nbytes ,<=, sizeof(gex_RMA_Value_t)); gasneti_assume(_nbytes <= sizeof(gex_RMA_Value_t)); @@ -607,6 +618,7 @@ gex_RMA_Value_t _gex_RMA_GetBlockingVal ( size_t _nbytes, gex_Flags_t _flags GASNETI_THREAD_FARG) { + GASNETI_CHECK_INJECT(); gasneti_boundscheck(_tm, _rank, _src, _nbytes); if (gasnete_islocal(_tm,_rank)) { GASNETI_TRACE_GET_LOCAL(VAL,_tm,NULL,_rank,_src,_nbytes); @@ -652,6 +664,7 @@ extern int gasnet_barrier_result(int *_id); GASNETI_INLINE(_gex_Coll_BarrierBlocking) void _gex_Coll_BarrierBlocking(gex_TM_t _tm, gex_Flags_t _flags GASNETI_THREAD_FARG) { + GASNETI_CHECK_INJECT(); GASNETI_TRACE_BARRIER1(_tm,_flags); gasnete_tm_barrier(_tm, _flags GASNETI_THREAD_PASS); } @@ -665,6 +678,7 @@ void _gex_Coll_BarrierBlocking(gex_TM_t _tm, gex_Flags_t _flags GASNETI_THREAD_F GASNETI_INLINE(_gex_Coll_BarrierNB) GASNETI_WARN_UNUSED_RESULT gex_Event_t _gex_Coll_BarrierNB(gex_TM_t _tm, gex_Flags_t _flags GASNETI_THREAD_FARG) { + GASNETI_CHECK_INJECT(); GASNETI_TRACE_BARRIER2(_tm,_flags); return gasnete_tm_barrier_nb(_tm, _flags GASNETI_THREAD_PASS); } @@ -704,15 +718,15 @@ extern size_t gasneti_TM_Create( // extern gex_Rank_t gex_TM_TranslateRankToJobrank(gex_TM_t tm, gex_Rank_t rank); #define gex_TM_TranslateRankToJobrank(tm,rank) \ - gasneti_e_tm_rank_to_jobrank(tm,rank) + (GASNETI_CHECK_INJECT(), gasneti_e_tm_rank_to_jobrank(tm,rank)) // extern gex_Rank_t gex_TM_TranslateJobrankToRank(gex_TM_t tm, gex_Rank_t jobrank); #define gex_TM_TranslateJobrankToRank(tm,jobrank) \ - gasneti_e_tm_jobrank_to_rank(tm,jobrank) + (GASNETI_CHECK_INJECT(), gasneti_e_tm_jobrank_to_rank(tm,jobrank)) // extern gex_EP_Location_t gex_TM_TranslateRankToEP(gex_TM_t tm, gex_Rank_t rank, gex_Flags_t flags); #define gex_TM_TranslateRankToEP(tm,rank,flags) \ - gasneti_e_tm_rank_to_location(tm,rank,flags) + (GASNETI_CHECK_INJECT(), gasneti_e_tm_rank_to_location(tm,rank,flags)) /* ------------------------------------------------------------------------------------ */ diff --git a/third-party/gasnet/gasnet-src/extended-ref/gasnet_extended_help.h b/third-party/gasnet/gasnet-src/extended-ref/gasnet_extended_help.h index c256caf992e5..5fe3818f2da2 100644 --- a/third-party/gasnet/gasnet-src/extended-ref/gasnet_extended_help.h +++ b/third-party/gasnet/gasnet-src/extended-ref/gasnet_extended_help.h @@ -21,7 +21,7 @@ // TODO-EX: REMOVE THE GASNETE_PUTGET_ALWAYS* DEFINES ENTIRELY #if GASNET_CONDUIT_SMP #if GASNET_PSHM // smp w/pshm: the PSHM support handles smp loopback - #define gasnete_islocal(e_tm,rank) (gasneti_check_tm_rank(e_tm,rank),0) + #define gasnete_islocal(e_tm,rank) (gasneti_check_e_tm_rank(e_tm,rank),0) #else // smp nopshm: single-process loopback handled in header #define gasnete_islocal(e_tm,rank) (gasneti_assert(gasneti_e_tm_rank_to_jobrank(e_tm,rank) == 0),1) #endif @@ -369,7 +369,8 @@ typedef union { #define GASNETI_CHECK_PUT_LCOPT(lc_opt, isnbi) do { } while (0) #endif -// GASNETI_NBRHD_* convenience macros (same semantics w/ and w/o PSHM) +// GASNETI_NBRHD_* convenience macros +// These have the same semantics both w/ and w/o PSHM, and correctly handle TM-pair and multi-EP. // LOCAL(e_tm,rank) -> non-zero iff the indicated rank is in caller's neighborhood // LOCAL_ADDR(e_tm,rank,addr) -> address in caller's address space if the indicated rank is // in caller's neighborhood, and undefined otherwise. @@ -377,12 +378,13 @@ typedef union { // LOCAL_ADDR_OR_NULL(e_tm,rank,addr) -> address in caller's address space if the indicated rank is // in caller's neighborhood, and NULL otherwise. // input addr must be non-NULL -// Equivalents for callers using jobrank: +// Equivalents for callers using jobrank +// Due to use of only the jobrank, these are not multi-EP aware and therefore cannot be used +// alone to determine if a address is cross-mapped. // JOBRANK_IS_LOCAL(jobrank) -// JOBRANK_LOCAL_ADDR(jobrank,addr) -// JOBRANK_LOCAL_ADDR_OR_NULL(jobrank,addr) +// JOBRANK_LOCAL_ADDR(jobrank,addr) [DEPRECATED] // TODO-EX: -// + LOCAL_ADDR might be made smarter? +// + GASNETI_NBRHD_JOBRANK_LOCAL_ADDR needs a replacement // #if GASNET_PSHM #define _GASNETI_NBRHD_LOCAL(e_tm,rank) gasneti_pshm_in_supernode(e_tm,rank) @@ -417,13 +419,13 @@ void *gasneti_nbrhd_local_addr_or_null(gex_TM_t _e_tm, gex_Rank_t _rank, void *_ GASNETI_PUREP(gasneti_nbrhd_local_addr_or_null) #define GASNETI_NBRHD_LOCAL(e_tm,rank) \ - (gasneti_assert((rank) < gex_TM_QuerySize(e_tm)), \ + (gasneti_check_e_tm_rank((e_tm),(rank)), \ _GASNETI_NBRHD_LOCAL(e_tm,rank)) #define GASNETI_NBRHD_LOCAL_ADDR(e_tm,rank,addr)\ - (gasneti_assert((rank) < gex_TM_QuerySize(e_tm)), gasneti_assert(addr), \ + (gasneti_check_e_tm_rank((e_tm),(rank)), gasneti_assert(addr), \ _GASNETI_NBRHD_LOCAL_ADDR(e_tm,rank,addr)) #define GASNETI_NBRHD_LOCAL_ADDR_OR_NULL(e_tm,rank,addr) \ - (gasneti_assert((rank) < gex_TM_QuerySize(e_tm)), gasneti_assert(addr), \ + (gasneti_check_e_tm_rank((e_tm),(rank)), gasneti_assert(addr), \ gasneti_nbrhd_local_addr_or_null(e_tm,rank,addr)) #define GASNETI_NBRHD_JOBRANK_IS_LOCAL(jobrank)\ @@ -432,9 +434,6 @@ GASNETI_PUREP(gasneti_nbrhd_local_addr_or_null) #define GASNETI_NBRHD_JOBRANK_LOCAL_ADDR(jobrank,addr)\ (gasneti_assert((jobrank) < gasneti_nodes), gasneti_assert(addr), \ _GASNETI_NBRHD_JOBRANK_LOCAL_ADDR(jobrank,addr)) -#define GASNETI_NBRHD_JOBRANK_LOCAL_ADDR_OR_NULL(jobrank,addr) \ - (gasneti_assert((jobrank) < gasneti_nodes), gasneti_assert(addr), \ - gasneti_nbrhd_jobrank_local_addr_or_null(jobrank,addr)) /* ------------------------------------------------------------------------------------ */ diff --git a/third-party/gasnet/gasnet-src/extended-ref/gasnet_extended_refbarrier.c b/third-party/gasnet/gasnet-src/extended-ref/gasnet_extended_refbarrier.c index 7acf51d42e18..88e71caf5dc4 100644 --- a/third-party/gasnet/gasnet-src/extended-ref/gasnet_extended_refbarrier.c +++ b/third-party/gasnet/gasnet-src/extended-ref/gasnet_extended_refbarrier.c @@ -1996,6 +1996,7 @@ int gasnete_coll_barrier_result(gasnete_coll_team_t team, int *id GASNETI_THREAD */ void gasnet_barrier_notify(int id, int flags) { + GASNETI_CHECK_INJECT(); GASNETI_TRACE_PRINTF(B, ("BARRIER_NOTIFY(team=GASNET_TEAM_ALL,id=%i,flags=%i)", id, flags)); #if GASNETI_STATS_OR_TRACE gasnete_barrier_notifytime = GASNETI_TICKS_NOW_IFENABLED(B); @@ -2007,6 +2008,7 @@ void gasnet_barrier_notify(int id, int flags) { } int gasnet_barrier_wait(int id, int flags) { + GASNETI_CHECK_INJECT(); #if GASNETI_STATS_OR_TRACE gasneti_tick_t wait_start = GASNETI_TICKS_NOW_IFENABLED(B); #endif @@ -2022,6 +2024,7 @@ int gasnet_barrier_wait(int id, int flags) { } int gasnet_barrier_try(int id, int flags) { + GASNETI_CHECK_INJECT(); int retval; gasneti_assert(GASNET_TEAM_ALL->barrier_try); @@ -2033,6 +2036,7 @@ int gasnet_barrier_try(int id, int flags) { } int gasnet_barrier(int id, int flags) { + GASNETI_CHECK_INJECT(); GASNETI_TRACE_PRINTF(B, ("BARRIER(team=GASNET_TEAM_ALL,id=%i,flags=%i)", id, flags)); gasneti_assert(GASNET_TEAM_ALL->barrier); diff --git a/third-party/gasnet/gasnet-src/extended-ref/ratomic/gasnet_ratomic.h b/third-party/gasnet/gasnet-src/extended-ref/ratomic/gasnet_ratomic.h index d2758abde253..fbf380f52cf0 100644 --- a/third-party/gasnet/gasnet-src/extended-ref/ratomic/gasnet_ratomic.h +++ b/third-party/gasnet/gasnet-src/extended-ref/ratomic/gasnet_ratomic.h @@ -576,6 +576,7 @@ gex_Rank_t gasnete_ratomic_self(gasneti_AD_t _ad, gex_Flags_t _flags) { GASNETI_INLINE(fname) _GASNETE_RATOMIC_DISP_WARN##nbnbi \ rettype fname(_GASNETE_RATOMIC_DISP_ARGS(type)) \ { \ + GASNETI_CHECK_INJECT(); \ GASNETE_TRACE_RATOMIC##nbnbi(_ad, dtcode##_dtype, _result_p, \ _tgt_rank,_tgt_addr,_opcode,_flags, \ dtcode##_fmt,dtcode##_fmt_cast, \ diff --git a/third-party/gasnet/gasnet-src/extended-ref/ratomic/gasnet_ratomic_fwd.h b/third-party/gasnet/gasnet-src/extended-ref/ratomic/gasnet_ratomic_fwd.h index 2693f3dc6124..998c9b1dfd7d 100644 --- a/third-party/gasnet/gasnet-src/extended-ref/ratomic/gasnet_ratomic_fwd.h +++ b/third-party/gasnet/gasnet-src/extended-ref/ratomic/gasnet_ratomic_fwd.h @@ -109,4 +109,18 @@ #define GASNETE_RATOMIC_ALWAYS_TOOLS_SAFE_gex_dt_FLT 1 #define GASNETE_RATOMIC_ALWAYS_TOOLS_SAFE_gex_dt_DBL 1 +// 3. Hooks for conduit-specific extension to create and destroy +// +// These hooks are analogous to the following: +// GASNETC_CLIENT_EXTRA_DECLS +// GASNETC_CLIENT_INIT_HOOK +// GASNETC_CLIENT_FINI_HOOK +// GASNETC_SIZEOF_CLIENT_T +// which are documented in template-conduit/gasnet_core_fwd.h + +//#define GASNETC_AD_EXTRA_DECLS (###) +//#define GASNETC_AD_INIT_HOOK(i_ad) (###) +//#define GASNETC_AD_FINI_HOOK(i_ad) (###) +//#define GASNETC_SIZEOF_AD_T() (###) + #endif // _GASNET_RATOMIC_FWD_H diff --git a/third-party/gasnet/gasnet-src/extended-ref/ratomic/gasnet_ratomic_internal.h b/third-party/gasnet/gasnet-src/extended-ref/ratomic/gasnet_ratomic_internal.h index 10bd3d1688bd..369b6b92231b 100644 --- a/third-party/gasnet/gasnet-src/extended-ref/ratomic/gasnet_ratomic_internal.h +++ b/third-party/gasnet/gasnet-src/extended-ref/ratomic/gasnet_ratomic_internal.h @@ -19,19 +19,9 @@ extern gasneti_AD_t gasneti_alloc_ad( gasneti_TM_t tm, gex_DT_t dt, gex_OP_t ops, - gex_Flags_t flags, - size_t alloc_size); + gex_Flags_t flags); void gasneti_free_ad(gasneti_AD_t ad); -#ifdef GASNETI_AD_CREATE_HOOK - extern void GASNETI_AD_CREATE_HOOK( - gasneti_AD_t real_ad, - gasneti_TM_t real_tm, - gex_DT_t dt, - gex_OP_t ops, - gex_Flags_t flags); -#endif - // // Initalizer for gasnete_ratomic[dtcode]_fn_tbl_t // @@ -76,16 +66,11 @@ void gasneti_free_ad(gasneti_AD_t ad); #endif #if GASNETE_BUILD_AMRATOMIC - extern void gasnete_amratomic_create_hook( - gasneti_AD_t real_ad, - gasneti_TM_t real_tm, - gex_DT_t dt, - gex_OP_t ops, - gex_Flags_t flags); - - #ifndef GASNETI_AD_CREATE_HOOK - #define GASNETI_AD_CREATE_HOOK gasnete_amratomic_create_hook - #endif + // This is effectively the default GASNETC_AD_INIT_HOOK, which is invoked + // implicitly for conduits lacking ratomics, and also explicitly from + // GASNETC_AD_INIT_HOOK when a conduit decides to use AM fallback for a + // given AD + extern void gasnete_amratomic_init_hook(gasneti_AD_t real_ad); // Force use of default "no" if tools are not suited to mixing // with RMA Put and Get. diff --git a/third-party/gasnet/gasnet-src/extended-ref/ratomic/gasnet_refratomic.c b/third-party/gasnet/gasnet-src/extended-ref/ratomic/gasnet_refratomic.c index 318a17ae8b54..0b5875e33581 100644 --- a/third-party/gasnet/gasnet-src/extended-ref/ratomic/gasnet_refratomic.c +++ b/third-party/gasnet/gasnet-src/extended-ref/ratomic/gasnet_refratomic.c @@ -28,15 +28,24 @@ gex_AD_t gasneti_export_ad(gasneti_AD_t _real_ad) { } #endif +#ifdef GASNETC_AD_EXTRA_DECLS +GASNETC_AD_EXTRA_DECLS +#endif + extern gasneti_AD_t gasneti_alloc_ad( gasneti_TM_t tm, gex_DT_t dt, gex_OP_t ops, - gex_Flags_t flags, - size_t alloc_size) + gex_Flags_t flags) { - gasneti_AD_t ad = gasneti_malloc(alloc_size ? alloc_size : sizeof(*ad)); - gasneti_assert(!alloc_size || alloc_size >= sizeof(*ad)); + gasneti_AD_t ad; +#ifdef GASNETC_SIZEOF_AD_T + size_t alloc_size = GASNETC_SIZEOF_AD_T(); + gasneti_assert_uint(alloc_size ,>=, sizeof(*ad)); +#else + size_t alloc_size = sizeof(*ad); +#endif + ad = gasneti_malloc(alloc_size); GASNETI_INIT_MAGIC(ad, GASNETI_AD_MAGIC); ad->_cdata = NULL; ad->_tm = tm; @@ -50,16 +59,17 @@ extern gasneti_AD_t gasneti_alloc_ad( ad->_tools_safe = -1; ad->_fn_tbl = NULL; #endif -#ifdef GASNETI_AD_ALLOC_EXTRA - GASNETI_AD_ALLOC_EXTRA(ad); +#ifndef GASNETC_AD_INIT_HOOK + size_t extra = alloc_size - sizeof(*ad); + if (extra) memset(ad + 1, 0, extra); #endif return ad; } void gasneti_free_ad(gasneti_AD_t ad) { -#ifdef GASNETI_AD_FREE_EXTRA - GASNETI_AD_FREE_EXTRA(ad); +#ifdef GASNETI_AD_FINI_HOOK + GASNETI_AD_FINI_HOOK(ad); #endif GASNETI_INIT_MAGIC(ad, GASNETI_AD_BAD_MAGIC); gasneti_free(ad); @@ -72,7 +82,7 @@ void gasneti_AD_Create( gex_OP_t ops, gex_Flags_t flags) { - gasneti_TM_t real_tm = gasneti_import_tm(tm); + gasneti_TM_t real_tm = gasneti_import_tm_nonpair(tm); // Argument validation is done here, rather than gasneti_alloc_ad(), to // allow conduit-specific extensions (such as additional types or ops). @@ -117,14 +127,16 @@ void gasneti_AD_Create( gasneti_assert((dt != GEX_DT_DBL) || sizeof(double) == 8); #endif - gasneti_AD_t real_ad = gasneti_alloc_ad(real_tm, dt, ops, flags, 0); + gasneti_AD_t real_ad = gasneti_alloc_ad(real_tm, dt, ops, flags); // Algorithm selection: -#ifdef GASNETI_AD_CREATE_HOOK - GASNETI_AD_CREATE_HOOK(real_ad, real_tm, dt, ops, flags); +#ifdef GASNETC_AD_INIT_HOOK + GASNETC_AD_INIT_HOOK(real_ad); +#else + gasnete_amratomic_init_hook(real_ad); +#endif gasneti_assert(real_ad->_tools_safe >= 0); gasneti_assert(real_ad->_fn_tbl != NULL); -#endif *ad_p = gasneti_export_ad(real_ad); return; @@ -867,13 +879,11 @@ GASNETE_DT_APPLY(GASNETE_AMRATOMIC_TBL) // // Create-hook to install the dispatch tables // -void gasnete_amratomic_create_hook( - gasneti_AD_t real_ad, - gasneti_TM_t real_tm, - gex_DT_t dt, - gex_OP_t ops, - gex_Flags_t flags) +void gasnete_amratomic_init_hook(gasneti_AD_t real_ad) { + gex_DT_t dt = real_ad->_dt; + gex_OP_t ops = real_ad->_ops; + real_ad->_tools_safe = 1; #define GASNETE_AMRATOMIC_TBL_CASE(dtcode) \ case dtcode##_dtype: \ @@ -886,9 +896,9 @@ void gasnete_amratomic_create_hook( #undef GASNETE_AMRATOMIC_TBL_CASE #if GASNETE_BUILD_AMRATOMIC_STUBS - GASNETI_TRACE_PRINTF(C,("gex_AD_Create(dt=%d, ops=0x%x) -> AM_stubs", (int)dt, (unsigned int)ops)); + GASNETI_TRACE_PRINTF(O,("gex_AD_Create(dt=%d, ops=0x%x) -> AM_stubs", (int)dt, (unsigned int)ops)); #else - GASNETI_TRACE_PRINTF(C,("gex_AD_Create(dt=%d, ops=0x%x) -> AM", (int)dt, (unsigned int)ops)); + GASNETI_TRACE_PRINTF(O,("gex_AD_Create(dt=%d, ops=0x%x) -> AM", (int)dt, (unsigned int)ops)); #endif } diff --git a/third-party/gasnet/gasnet-src/extended-ref/vis/gasnet_indexed.c b/third-party/gasnet/gasnet-src/extended-ref/vis/gasnet_indexed.c index bf106b797bdd..9bec8fcdd251 100644 --- a/third-party/gasnet/gasnet-src/extended-ref/vis/gasnet_indexed.c +++ b/third-party/gasnet/gasnet-src/extended-ref/vis/gasnet_indexed.c @@ -704,7 +704,7 @@ extern gex_Event_t gasnete_puti(gasnete_synctype_t synctype, if (GASNETI_NBRHD_LOCAL(tm,rank)) { /* purely local */ GASNETI_TRACE_EVENT(C, PUTI_NBRHD); - gasnete_indexed_memcpy(rank, 1, + gasnete_indexed_memcpy(gex_TM_TranslateRankToJobrank(tm,rank), 1, dstcount,dstlist,dstlen,srccount,srclist,srclen, flags); return GEX_EVENT_INVALID; @@ -755,7 +755,7 @@ extern gex_Event_t gasnete_geti(gasnete_synctype_t synctype, if (GASNETI_NBRHD_LOCAL(tm,rank)) { /* purely local */ GASNETI_TRACE_EVENT(C, GETI_NBRHD); - gasnete_indexed_memcpy(rank, 0, + gasnete_indexed_memcpy(gex_TM_TranslateRankToJobrank(tm,rank), 0, dstcount,dstlist,dstlen,srccount,srclist,srclen, flags); return GEX_EVENT_INVALID; diff --git a/third-party/gasnet/gasnet-src/extended-ref/vis/gasnet_vector.c b/third-party/gasnet/gasnet-src/extended-ref/vis/gasnet_vector.c index f962105bf7c2..906e814359a7 100644 --- a/third-party/gasnet/gasnet-src/extended-ref/vis/gasnet_vector.c +++ b/third-party/gasnet/gasnet-src/extended-ref/vis/gasnet_vector.c @@ -818,7 +818,7 @@ extern gex_Event_t gasnete_putv(gasnete_synctype_t synctype, if (GASNETI_NBRHD_LOCAL(tm,rank)) { /* purely local */ GASNETI_TRACE_EVENT(C, PUTV_NBRHD); - gasnete_vector_memcpy(rank, 1, + gasnete_vector_memcpy(gex_TM_TranslateRankToJobrank(tm,rank), 1, dstcount,dstlist,srccount,srclist, flags); return GEX_EVENT_INVALID; @@ -851,7 +851,7 @@ extern gex_Event_t gasnete_getv(gasnete_synctype_t synctype, if (GASNETI_NBRHD_LOCAL(tm,rank)) { /* purely local */ GASNETI_TRACE_EVENT(C, GETV_NBRHD); - gasnete_vector_memcpy(rank, 0, + gasnete_vector_memcpy(gex_TM_TranslateRankToJobrank(tm,rank), 0, dstcount,dstlist,srccount,srclist, flags); return GEX_EVENT_INVALID; diff --git a/third-party/gasnet/gasnet-src/extended-ref/vis/gasnet_vis.h b/third-party/gasnet/gasnet-src/extended-ref/vis/gasnet_vis.h index 6a76204a6fa7..23c0d2b5f0b1 100644 --- a/third-party/gasnet/gasnet-src/extended-ref/vis/gasnet_vis.h +++ b/third-party/gasnet/gasnet-src/extended-ref/vis/gasnet_vis.h @@ -247,7 +247,6 @@ int _gasnete_vis_havepc(const void * const _ti) { gasnete_vis_pcinfo_t const * const _vis_ti = ((gasnete_vis_pcinfo_t const * const *)_ti)[2]; return _vis_ti && _vis_ti->_handler; } -// TODO-EX: comment out GASNETI_MYTHREAD_GET_OR_LOOKUP defn once we remove this sole use #define GASNETE_VIS_HAVEPC() _gasnete_vis_havepc(GASNETI_MYTHREAD_GET_OR_LOOKUP) #define _GASNETE_VIS_PCWRAP(tm,rank,flags,fnbase,syncmode,opargs) ( \ GASNETE_VIS_HAVEPC() ? \ @@ -340,6 +339,7 @@ extern gex_Event_t gasnete_VIS_pcwrapNB (_GASNETE_VIS_PCWRAP_ARGS) GASNETI_ gasnete_boundscheck_memveclist(_tm, _dstrank, _dstcount, _dstlist); \ _GASNETE_VECTOR_COMMON(degencontigop, PUTV_DEGENERATE) #define _GASNETE_VECTOR_COMMON(degencontigop,degentoken) \ + GASNETI_CHECK_INJECT(); \ gasnete_memveclist_checksizematch(_dstcount, _dstlist, _srccount, _srclist); \ if_pf (_dstcount == 0 || _srccount == 0) { /* no-op */ \ GASNETI_TRACE_EVENT(C, degentoken); \ @@ -471,6 +471,7 @@ int _gex_VIS_VectorGetNBI( gasnete_boundscheck_addrlist(_tm, _dstrank, _dstcount, _dstlist, _dstlen); \ _GASNETE_INDEXED_COMMON(degencontigop, PUTI_DEGENERATE) #define _GASNETE_INDEXED_COMMON(degencontigop,degentoken) \ + GASNETI_CHECK_INJECT(); \ gasnete_addrlist_checksizematch(_dstcount, _dstlen, _srccount, _srclen); \ if_pf (_dstcount*_dstlen == 0) { /* no-op */ \ gasneti_assert_uint(_srccount*_srclen ,==, 0); \ @@ -605,6 +606,7 @@ int _gex_VIS_IndexedGetNBI( gex_Event_t _lc_dummy; \ _GASNETE_STRIDED_COMMON(degencontigop, PUTS_DEGENERATE) #define _GASNETE_STRIDED_COMMON(degencontigop,degentoken) \ + GASNETI_CHECK_INJECT(); \ if_pf (_elemsz == 0) { \ GASNETI_TRACE_EVENT(C, degentoken); \ return 0; \ diff --git a/third-party/gasnet/gasnet-src/extended-ref/vis/gasnet_vis_internal.h b/third-party/gasnet/gasnet-src/extended-ref/vis/gasnet_vis_internal.h index bfc87cc7e29c..aace9bf365fc 100644 --- a/third-party/gasnet/gasnet-src/extended-ref/vis/gasnet_vis_internal.h +++ b/third-party/gasnet/gasnet-src/extended-ref/vis/gasnet_vis_internal.h @@ -339,7 +339,7 @@ extern void gasnete_packetize_verify(gasnete_packetdesc_t *pt, size_t ptidx, int // 2 = Use NP AM with a negotiated-payload size // Default is 1 for conduits with a "real" NP AM implementation, and 0 elsewhere #ifndef GASNETE_VIS_NPAM - #if GASNETC_HAVE_NP_REQ_MEDIUM && GASNETC_HAVE_NP_REP_MEDIUM + #if GASNET_NATIVE_NP_ALLOC_REQ_MEDIUM && GASNET_NATIVE_NP_ALLOC_REP_MEDIUM #define GASNETE_VIS_NPAM 1 #else #define GASNETE_VIS_NPAM 0 diff --git a/third-party/gasnet/gasnet-src/gasnet.h b/third-party/gasnet/gasnet-src/gasnet.h index 862e0515e566..86023dac54e0 100644 --- a/third-party/gasnet/gasnet-src/gasnet.h +++ b/third-party/gasnet/gasnet-src/gasnet.h @@ -85,6 +85,7 @@ GASNETT_INLINE(gasnet_attach) int gasnet_attach( gasnet_handlerentry_t *_table, int _numentries, uintptr_t _segsize, uintptr_t _minheapoffset ) { gasneti_legacy_attach_checks(0); + if (! _segsize) _segsize = GASNET_PAGESIZE; int _result = gasnetc_attach( gasneti_thunk_tm, _table, _numentries, _segsize); #if GASNET_SEGMENT_EVERYTHING gasneti_legacy_attach_checks(0); diff --git a/third-party/gasnet/gasnet-src/gasnet_am.c b/third-party/gasnet/gasnet-src/gasnet_am.c index 3e86e9ad210c..b48e18a420f1 100644 --- a/third-party/gasnet/gasnet-src/gasnet_am.c +++ b/third-party/gasnet/gasnet-src/gasnet_am.c @@ -78,7 +78,8 @@ static void gasneti_am_validate( #endif // Register handlers in the range [lowlimit,highlimit) -extern int gasneti_amregister( gex_AM_Entry_t *output, +// Thread-safety is the caller's responsibility +extern int gasneti_amregister( gasneti_EP_t i_ep, gex_AM_Entry_t *input, int numentries, int lowlimit, int highlimit, int dontcare, int *numregistered) { @@ -87,6 +88,8 @@ extern int gasneti_amregister( gex_AM_Entry_t *output, gasneti_am_validate(input, numentries); + gex_AM_Entry_t *output = i_ep->_amtbl; + for (i = 0; i < numentries; i++) { int newindex; @@ -136,8 +139,13 @@ extern int gasneti_amregister( gex_AM_Entry_t *output, } // Register client handlers +// This function backs gex_EP_RegisterHandlers() and gasnet_attach() +// and provides per-EP serialization of such calls. +// Internal calls occuring within gex_Client_Init() and gex_Client_Create() +// do not participate in this serialization, since they operated exclusively +// on an EP prior to returning it to the client. extern int gasneti_amregister_client( - gex_AM_Entry_t *output, + gasneti_EP_t i_ep, gex_AM_Entry_t *input, size_t numentries) { @@ -147,22 +155,28 @@ extern int gasneti_amregister_client( if_pf (input == NULL) GASNETI_RETURN_ERRR(BAD_ARG,"Invalid AM handler table"); + gasneti_mutex_lock(&i_ep->_amtbl_lock); + /* first pass - assign all fixed-index handlers */ int numreg1 = 0; - if (gasneti_amregister(output, input, numentries, + if (gasneti_amregister(i_ep, input, numentries, GASNETI_CLIENT_HANDLER_BASE, GASNETC_MAX_NUMHANDLERS, 0, &numreg1) != GASNET_OK) { + gasneti_mutex_unlock(&i_ep->_amtbl_lock); GASNETI_RETURN_ERRR(RESOURCE,"Error registering fixed-index client handlers"); } /* second pass - fill in dontcare-index handlers */ int numreg2 = 0; - if (gasneti_amregister(output, input, numentries, + if (gasneti_amregister(i_ep, input, numentries, GASNETI_CLIENT_HANDLER_BASE, GASNETC_MAX_NUMHANDLERS, 1, &numreg2) != GASNET_OK) { + gasneti_mutex_unlock(&i_ep->_amtbl_lock); GASNETI_RETURN_ERRR(RESOURCE,"Error registering variable-index client handlers"); } + gasneti_mutex_unlock(&i_ep->_amtbl_lock); + gasneti_assert_uint(numreg1 + numreg2 ,==, numentries); return GASNET_OK; @@ -171,7 +185,7 @@ extern int gasneti_amregister_client( // Wrapper to provide continued support for GASNet-1 legacy handler tables, // such as through gasnet_attach(). Only supports the clients's index range. // TODO-EX: should be absorbed into an eventual conduit-indep gasnet_attach() -extern int gasneti_amregister_legacy( gex_AM_Entry_t *output, +extern int gasneti_amregister_legacy( gasneti_EP_t i_ep, gasnet_handlerentry_t *table, int numentries) { if_pf (numentries == 0) return GASNET_OK; @@ -192,7 +206,7 @@ extern int gasneti_amregister_legacy( gex_AM_Entry_t *output, } /* register */ - if (gasneti_amregister_client(output, extable, numentries) != GASNET_OK) { + if (gasneti_amregister_client(i_ep, extable, numentries) != GASNET_OK) { gasneti_free(extable); GASNETI_RETURN_ERRR(RESOURCE,"Error registering client handlers"); } @@ -206,8 +220,9 @@ extern int gasneti_amregister_legacy( gex_AM_Entry_t *output, return GASNET_OK; } -// Initialize a caller-allocated handler table -extern int gasneti_amtbl_init(gex_AM_Entry_t *output) { +// Initialize handler table in a given EP +extern int gasneti_amtbl_init(gasneti_EP_t i_ep) { + gex_AM_Entry_t *output = i_ep->_amtbl; static const char *fnname = "gasneti_defaultAMHandler"; for (int i = 0; i < GASNETC_MAX_NUMHANDLERS; i++) { output[i].gex_index = 0; // marks an unused entry @@ -217,6 +232,7 @@ extern int gasneti_amtbl_init(gex_AM_Entry_t *output) { output[i].gex_cdata = NULL; output[i].gex_name = fnname; } + gasneti_mutex_init(&i_ep->_amtbl_lock); return GASNET_OK; } @@ -377,7 +393,8 @@ size_t gex_AM_Max##reqrep##cat( \ const char *fname = "gex_AM_Max" #reqrep #cat; \ gasneti_TM_t real_tm = gasneti_import_tm(tm); \ /* TODO-EX: remove allowance for real_tm == NULL */ \ - if (real_tm && (rank != GEX_RANK_INVALID) && (rank >= real_tm->_size)) { \ + gex_Rank_t tm_size = gasneti_i_tm_size(real_tm); \ + if (real_tm && (rank != GEX_RANK_INVALID) && (rank >= tm_size)) { \ gasneti_fatalerror("Call to %s() with invalid rank=%i", \ fname, (int)rank); \ } \ @@ -474,9 +491,26 @@ void gasneti_init_srcdesc(GASNETI_THREAD_FARG_ALONE) mythread->sd_is_init = 1; } #endif // GASNETI_NEED_INIT_SRCDESC + +#if GASNET_DEBUG +void gasneti_checknpam(int for_reply GASNETI_THREAD_FARG) { + gasneti_threaddata_t * const mythread = GASNETI_MYTHREAD; + if (mythread->sd_is_init) { + // Never valid to communicate between Prepare/Commit of Reply + if (mythread->reply_sd._magic._u == GASNETI_AM_SRCDESC_MAGIC) { + gasneti_fatalerror("Invalid GASNet call (communication injection or poll) between gex_AM_PrepareReply() and the corresponding Commit on this thread"); + } + // It *is* valid to send a Reply which may dynamically run + // *within* the execution of gex_AM_{Prepare,Commit}Request() + if (!for_reply && mythread->request_sd._magic._u == GASNETI_AM_SRCDESC_MAGIC) { + gasneti_fatalerror("Invalid GASNet call (communication injection or poll) between gex_AM_PrepareRequest() and the corresponding Commit on this thread"); + } + } +} +#endif #endif // _GEX_AM_SRCDESC_T -#ifndef GASNETC_HAVE_NP_REQ_MEDIUM +#if !defined(GASNET_NATIVE_NP_ALLOC_REQ_MEDIUM) || GASNET_CONDUIT_SMP extern gex_AM_SrcDesc_t gasnetc_AM_PrepareRequestMedium( gex_TM_t tm, gex_Rank_t rank, @@ -488,37 +522,42 @@ extern gex_AM_SrcDesc_t gasnetc_AM_PrepareRequestMedium( GASNETI_THREAD_FARG, unsigned int nargs) { + GASNETI_TRACE_PREP_REQUESTMEDIUM(tm,rank,client_buf,least_payload,most_payload,flags,nargs); + + gex_Rank_t jobrank = gasneti_e_tm_rank_to_jobrank(tm, rank); + + // Ensure at least one poll upon Request injection (exactly one if possible) +#if GASNETC_REQUESTV_POLLS // Conduit's Request{Medium,Long}V will AMPoll in Commit + if (GASNETI_NBRHD_JOBRANK_IS_LOCAL(jobrank)) GASNETC_IMMEDIATE_MAYBE_POLL(flags); +#else + GASNETC_IMMEDIATE_MAYBE_POLL(flags); +#endif + gasneti_AM_SrcDesc_t sd = gasneti_init_request_srcdesc(GASNETI_THREAD_PASS_ALONE); GASNETI_COMMON_PREP_REQ(sd,tm,rank,client_buf,least_payload,most_payload,NULL,lc_opt,flags,nargs,Medium); flags &= ~(GEX_FLAG_AM_PREPARE_LEAST_CLIENT | GEX_FLAG_AM_PREPARE_LEAST_ALLOC); - gex_Rank_t jobrank = gasneti_e_tm_rank_to_jobrank(tm, rank); - if (GASNETI_NBRHD_JOBRANK_IS_LOCAL(jobrank)) { - GASNETC_IMMEDIATE_MAYBE_POLL(flags); // Ensure at least one poll upon Request injection sd = gasnetc_nbrhd_PrepareRequest(sd, gasneti_Medium, jobrank, client_buf, least_payload, most_payload, NULL, lc_opt, flags, nargs); } else { - // Ensure at least one poll upon Request injection (exactly one if possible) - #if GASNETC_REQUESTV_POLLS - // Conduit's Request{Medium,Long}V will AMPoll in Commit - #else - GASNETC_IMMEDIATE_MAYBE_POLL(flags); - #endif - size_t limit = gex_AM_MaxRequestMedium(tm, rank, lc_opt, flags, nargs); + // In reference implementation, GEX_FLAG_AM_PREPARE_LEAST_ALLOC is also the MAX we allocate + gex_Flags_t limit_flags = client_buf ? flags : (flags | GEX_FLAG_AM_PREPARE_LEAST_ALLOC); + size_t limit = gex_AM_MaxRequestMedium(tm, rank, lc_opt, limit_flags, nargs); size_t size = MIN(most_payload, limit); sd->_tofree = gasneti_prepare_request_common(sd, tm, rank, client_buf, size, lc_opt, flags, nargs); gasneti_init_sd_poison(sd); } GASNETI_TRACE_PREP_RETURN(REQUEST_MEDIUM, sd); + GASNETI_CHECK_SD(client_buf, least_payload, most_payload, sd); return gasneti_export_srcdesc(sd); } -#endif // GASNETC_HAVE_NP_REQ_MEDIUM +#endif // GASNET_NATIVE_NP_ALLOC_REQ_MEDIUM -#ifndef GASNETC_HAVE_NP_REP_MEDIUM +#if !defined(GASNET_NATIVE_NP_ALLOC_REP_MEDIUM) || GASNET_CONDUIT_SMP extern gex_AM_SrcDesc_t gasnetc_AM_PrepareReplyMedium( gex_Token_t token, const void *client_buf, @@ -528,6 +567,8 @@ extern gex_AM_SrcDesc_t gasnetc_AM_PrepareReplyMedium( gex_Flags_t flags, unsigned int nargs) { + GASNETI_TRACE_PREP_REPLYMEDIUM(token,client_buf,least_payload,most_payload,flags,nargs); + gasneti_AM_SrcDesc_t sd; flags &= ~(GEX_FLAG_AM_PREPARE_LEAST_CLIENT | GEX_FLAG_AM_PREPARE_LEAST_ALLOC); @@ -540,18 +581,21 @@ extern gex_AM_SrcDesc_t gasnetc_AM_PrepareReplyMedium( sd = gasneti_init_reply_srcdesc(GASNETI_THREAD_PASS_ALONE); GASNETI_COMMON_PREP_REP(sd,token,client_buf,least_payload,most_payload,NULL,lc_opt,flags,nargs,Medium); - size_t limit = gex_Token_MaxReplyMedium(token, lc_opt, flags, nargs); + // In reference implementation, GEX_FLAG_AM_PREPARE_LEAST_ALLOC is also the MAX we allocate + gex_Flags_t limit_flags = client_buf ? flags : (flags | GEX_FLAG_AM_PREPARE_LEAST_ALLOC); + size_t limit = gex_Token_MaxReplyMedium(token, lc_opt, limit_flags, nargs); size_t size = MIN(most_payload, limit); sd->_tofree = gasneti_prepare_reply_common(sd, token, client_buf, size, lc_opt, flags, nargs); gasneti_init_sd_poison(sd); } GASNETI_TRACE_PREP_RETURN(REPLY_MEDIUM, sd); + GASNETI_CHECK_SD(client_buf, least_payload, most_payload, sd); return gasneti_export_srcdesc(sd); } -#endif // GASNETC_HAVE_NP_REP_MEDIUM +#endif // GASNET_NATIVE_NP_ALLOC_REP_MEDIUM -#ifndef GASNETC_HAVE_NP_REQ_LONG +#ifndef GASNET_NATIVE_NP_ALLOC_REQ_LONG extern gex_AM_SrcDesc_t gasnetc_AM_PrepareRequestLong( gex_TM_t tm, gex_Rank_t rank, @@ -564,26 +608,29 @@ extern gex_AM_SrcDesc_t gasnetc_AM_PrepareRequestLong( GASNETI_THREAD_FARG, unsigned int nargs) { + GASNETI_TRACE_PREP_REQUESTLONG(tm,rank,client_buf,least_payload,most_payload,dest_addr,flags,nargs); + + gex_Rank_t jobrank = gasneti_e_tm_rank_to_jobrank(tm, rank); + // Ensure at least one poll upon Request injection (exactly one if possible) +#if GASNETC_REQUESTV_POLLS // Conduit's Request{Medium,Long}V will AMPoll in Commit + if (GASNETI_NBRHD_JOBRANK_IS_LOCAL(jobrank)) GASNETC_IMMEDIATE_MAYBE_POLL(flags); +#else + GASNETC_IMMEDIATE_MAYBE_POLL(flags); +#endif + gasneti_AM_SrcDesc_t sd = gasneti_init_request_srcdesc(GASNETI_THREAD_PASS_ALONE); GASNETI_COMMON_PREP_REQ(sd,tm,rank,client_buf,least_payload,most_payload,dest_addr,lc_opt,flags,nargs,Long); flags &= ~(GEX_FLAG_AM_PREPARE_LEAST_CLIENT | GEX_FLAG_AM_PREPARE_LEAST_ALLOC); - gex_Rank_t jobrank = gasneti_e_tm_rank_to_jobrank(tm, rank); - if (GASNETI_NBRHD_JOBRANK_IS_LOCAL(jobrank)) { - GASNETC_IMMEDIATE_MAYBE_POLL(flags); // Ensure at least one poll upon Request injection sd = gasnetc_nbrhd_PrepareRequest(sd, gasneti_Long, jobrank, client_buf, least_payload, most_payload, dest_addr, lc_opt, flags, nargs); } else { - // Ensure at least one poll upon Request injection (exactly one if possible) - #if GASNETC_REQUESTV_POLLS - // Conduit's Request{Medium,Long}V will AMPoll in Commit - #else - GASNETC_IMMEDIATE_MAYBE_POLL(flags); - #endif - size_t limit = gex_AM_MaxRequestLong(tm, rank, lc_opt, flags, nargs); + // In reference implementation, GEX_FLAG_AM_PREPARE_LEAST_ALLOC is also the MAX we allocate + gex_Flags_t limit_flags = client_buf ? flags : (flags | GEX_FLAG_AM_PREPARE_LEAST_ALLOC); + size_t limit = gex_AM_MaxRequestLong(tm, rank, lc_opt, limit_flags, nargs); size_t size = MIN(most_payload, limit); sd->_tofree = gasneti_prepare_request_common(sd, tm, rank, client_buf, size, lc_opt, flags, nargs); sd->_dest_addr = dest_addr; @@ -591,11 +638,12 @@ extern gex_AM_SrcDesc_t gasnetc_AM_PrepareRequestLong( } GASNETI_TRACE_PREP_RETURN(REQUEST_LONG, sd); + GASNETI_CHECK_SD(client_buf, least_payload, most_payload, sd); return gasneti_export_srcdesc(sd); } -#endif // GASNETC_HAVE_NP_REQ_LONG +#endif // GASNET_NATIVE_NP_ALLOC_REQ_LONG -#ifndef GASNETC_HAVE_NP_REP_LONG +#ifndef GASNET_NATIVE_NP_ALLOC_REP_LONG extern gex_AM_SrcDesc_t gasnetc_AM_PrepareReplyLong( gex_Token_t token, const void *client_buf, @@ -606,6 +654,8 @@ extern gex_AM_SrcDesc_t gasnetc_AM_PrepareReplyLong( gex_Flags_t flags, unsigned int nargs) { + GASNETI_TRACE_PREP_REPLYLONG(token,client_buf,least_payload,most_payload,dest_addr,flags,nargs); + gasneti_AM_SrcDesc_t sd; flags &= ~(GEX_FLAG_AM_PREPARE_LEAST_CLIENT | GEX_FLAG_AM_PREPARE_LEAST_ALLOC); @@ -618,7 +668,9 @@ extern gex_AM_SrcDesc_t gasnetc_AM_PrepareReplyLong( sd = gasneti_init_reply_srcdesc(GASNETI_THREAD_PASS_ALONE); GASNETI_COMMON_PREP_REP(sd,token,client_buf,least_payload,most_payload,dest_addr,lc_opt,flags,nargs,Long); - size_t limit = gex_Token_MaxReplyLong(token, lc_opt, flags, nargs); + // In reference implementation, GEX_FLAG_AM_PREPARE_LEAST_ALLOC is also the MAX we allocate + gex_Flags_t limit_flags = client_buf ? flags : (flags | GEX_FLAG_AM_PREPARE_LEAST_ALLOC); + size_t limit = gex_Token_MaxReplyLong(token, lc_opt, limit_flags, nargs); size_t size = MIN(most_payload, limit); sd->_tofree = gasneti_prepare_reply_common(sd, token, client_buf, size, lc_opt, flags, nargs); sd->_dest_addr = dest_addr; @@ -626,11 +678,12 @@ extern gex_AM_SrcDesc_t gasnetc_AM_PrepareReplyLong( } GASNETI_TRACE_PREP_RETURN(REPLY_LONG, sd); + GASNETI_CHECK_SD(client_buf, least_payload, most_payload, sd); return gasneti_export_srcdesc(sd); } -#endif // GASNETC_HAVE_NP_REP_LONG +#endif // GASNET_NATIVE_NP_ALLOC_REP_LONG -#ifndef GASNETC_HAVE_NP_REQ_MEDIUM +#if !defined(GASNET_NATIVE_NP_ALLOC_REQ_MEDIUM) || GASNET_CONDUIT_SMP void gasnetc_AM_CommitRequestMediumM( gex_AM_Index_t handler, size_t nbytes @@ -640,7 +693,9 @@ void gasnetc_AM_CommitRequestMediumM( #endif gex_AM_SrcDesc_t sd_arg, ...) { - gasneti_AM_SrcDesc_t sd = gasneti_import_srcdesc(sd_arg); + // Conduit authors are cautioned against use of gasneti_consume_srcdesc() in native + // NPAM implementations. See the comment preceding its definition in gasnet_am.h. + gasneti_AM_SrcDesc_t sd = gasneti_consume_srcdesc(sd_arg); GASNETI_COMMON_COMMIT_REQ(sd,handler,nbytes,NULL,nargs_arg,Medium); @@ -661,18 +716,15 @@ void gasnetc_AM_CommitRequestMediumM( int rc = gasneti_AMRequestMediumV(tm, rank, handler, src_addr, nbytes, lc_opt, flags, nargs, argptr); gasneti_assert(!rc); // IMMEDIATE is only permissible reason to return non-zero - if (sd->_tofree) { // Branch to avoid free(NULL) library call overhead for NPAM/cb - gasneti_free(sd->_tofree); - sd->_tofree = NULL; + if (sd->_tofree) { + gasneti_free_npam_buffer(sd); } } va_end(argptr); - - gasneti_reset_srcdesc(sd); } -#endif // GASNETC_HAVE_NP_REQ_MEDIUM +#endif // GASNET_NATIVE_NP_ALLOC_REQ_MEDIUM -#ifndef GASNETC_HAVE_NP_REP_MEDIUM +#if !defined(GASNET_NATIVE_NP_ALLOC_REP_MEDIUM) || GASNET_CONDUIT_SMP void gasnetc_AM_CommitReplyMediumM( gex_AM_Index_t handler, size_t nbytes, @@ -681,7 +733,9 @@ void gasnetc_AM_CommitReplyMediumM( #endif gex_AM_SrcDesc_t sd_arg, ...) { - gasneti_AM_SrcDesc_t sd = gasneti_import_srcdesc(sd_arg); + // Conduit authors are cautioned against use of gasneti_consume_srcdesc() in native + // NPAM implementations. See the comment preceding its definition in gasnet_am.h. + gasneti_AM_SrcDesc_t sd = gasneti_consume_srcdesc(sd_arg); GASNETI_COMMON_COMMIT_REP(sd,handler,nbytes,NULL,nargs_arg,Medium); @@ -701,18 +755,15 @@ void gasnetc_AM_CommitReplyMediumM( int rc = gasneti_AMReplyMediumV(token, handler, src_addr, nbytes, lc_opt, flags, nargs, argptr); gasneti_assert(!rc); // IMMEDIATE is only permissible reason to return non-zero - if (sd->_tofree) { // Branch to avoid free(NULL) library call overhead for NPAM/cb - gasneti_free(sd->_tofree); - sd->_tofree = NULL; + if (sd->_tofree) { + gasneti_free_npam_buffer(sd); } } va_end(argptr); - - gasneti_reset_srcdesc(sd); } -#endif // GASNETC_HAVE_NP_REP_MEDIUM +#endif // GASNET_NATIVE_NP_ALLOC_REP_MEDIUM -#ifndef GASNETC_HAVE_NP_REQ_LONG +#ifndef GASNET_NATIVE_NP_ALLOC_REQ_LONG void gasnetc_AM_CommitRequestLongM( gex_AM_Index_t handler, size_t nbytes, @@ -723,7 +774,9 @@ void gasnetc_AM_CommitRequestLongM( #endif gex_AM_SrcDesc_t sd_arg, ...) { - gasneti_AM_SrcDesc_t sd = gasneti_import_srcdesc(sd_arg); + // Conduit authors are cautioned against use of gasneti_consume_srcdesc() in native + // NPAM implementations. See the comment preceding its definition in gasnet_am.h. + gasneti_AM_SrcDesc_t sd = gasneti_consume_srcdesc(sd_arg); GASNETI_COMMON_COMMIT_REQ(sd,handler,nbytes,dest_addr,nargs_arg,Long); @@ -744,18 +797,15 @@ void gasnetc_AM_CommitRequestLongM( int rc = gasneti_AMRequestLongV(tm, rank, handler, src_addr, nbytes, dest_addr, lc_opt, flags, nargs, argptr); gasneti_assert(!rc); // IMMEDIATE is only permissible reason to return non-zero - if (sd->_tofree) { // Branch to avoid free(NULL) library call overhead for NPAM/cb - gasneti_free(sd->_tofree); - sd->_tofree = NULL; + if (sd->_tofree) { + gasneti_free_npam_buffer(sd); } } va_end(argptr); - - gasneti_reset_srcdesc(sd); } -#endif // GASNETC_HAVE_NP_REQ_LONG +#endif // GASNET_NATIVE_NP_ALLOC_REQ_LONG -#ifndef GASNETC_HAVE_NP_REP_LONG +#ifndef GASNET_NATIVE_NP_ALLOC_REP_LONG void gasnetc_AM_CommitReplyLongM( gex_AM_Index_t handler, size_t nbytes, @@ -765,7 +815,9 @@ void gasnetc_AM_CommitReplyLongM( #endif gex_AM_SrcDesc_t sd_arg, ...) { - gasneti_AM_SrcDesc_t sd = gasneti_import_srcdesc(sd_arg); + // Conduit authors are cautioned against use of gasneti_consume_srcdesc() in native + // NPAM implementations. See the comment preceding its definition in gasnet_am.h. + gasneti_AM_SrcDesc_t sd = gasneti_consume_srcdesc(sd_arg); GASNETI_COMMON_COMMIT_REP(sd,handler,nbytes,dest_addr,nargs_arg,Long); @@ -785,21 +837,18 @@ void gasnetc_AM_CommitReplyLongM( int rc = gasneti_AMReplyLongV(token, handler, src_addr, nbytes, dest_addr, lc_opt, flags, nargs, argptr); gasneti_assert(!rc); // IMMEDIATE is only permissible reason to return non-zero - if (sd->_tofree) { // Branch to avoid free(NULL) library call overhead for NPAM/cb - gasneti_free(sd->_tofree); - sd->_tofree = NULL; + if (sd->_tofree) { + gasneti_free_npam_buffer(sd); } } va_end(argptr); - - gasneti_reset_srcdesc(sd); } -#endif // GASNETC_HAVE_NP_REP_LONG +#endif // GASNET_NATIVE_NP_ALLOC_REP_LONG /* ------------------------------------------------------------------------------------ */ // gasneti_free_aligned() is a macro, preventing direct registration as a cleanupfn -void gasneti_loopback_cleanup_threaddata(void *buf) { +void gasneti_medium_buffer_cleanup_threaddata(void *buf) { gasneti_free_aligned(buf); } diff --git a/third-party/gasnet/gasnet-src/gasnet_am.h b/third-party/gasnet/gasnet-src/gasnet_am.h index 9e05d8fe5091..e0b7d4dded3b 100644 --- a/third-party/gasnet/gasnet-src/gasnet_am.h +++ b/third-party/gasnet/gasnet-src/gasnet_am.h @@ -15,6 +15,7 @@ #define GASNETI_COMMON_AMREQUESTSHORT(tm,rank,handler,flags,numargs) do { \ GASNETI_CHECKATTACH(); \ + GASNETI_CHECK_INJECT(); \ gasneti_assert(! (flags & GEX_FLAG_AM_PREPARE_LEAST_CLIENT)); \ gasneti_assert(! (flags & GEX_FLAG_AM_PREPARE_LEAST_ALLOC)); \ gasneti_assert_int(numargs ,>=, 0); \ @@ -24,6 +25,7 @@ } while (0) #define GASNETI_COMMON_AMREQUESTMEDIUM(tm,rank,handler,source_addr,nbytes,lc_opt,flags,numargs) do { \ GASNETI_CHECKATTACH(); \ + GASNETI_CHECK_INJECT(); \ gasneti_assert(! (flags & GEX_FLAG_AM_PREPARE_LEAST_CLIENT)); \ gasneti_assert(! (flags & GEX_FLAG_AM_PREPARE_LEAST_ALLOC)); \ gasneti_assert_int(numargs ,>=, 0); \ @@ -37,6 +39,7 @@ } while (0) #define GASNETI_COMMON_AMREQUESTLONG(tm,rank,handler,source_addr,nbytes,dest_addr,lc_opt,flags,numargs) do { \ GASNETI_CHECKATTACH(); \ + GASNETI_CHECK_INJECT(); \ gasneti_assert(! (flags & GEX_FLAG_AM_PREPARE_LEAST_CLIENT)); \ gasneti_assert(! (flags & GEX_FLAG_AM_PREPARE_LEAST_ALLOC)); \ gasneti_assert_int(numargs ,>=, 0); \ @@ -49,6 +52,7 @@ GASNETI_CHECK_ERRR((lc_opt == GEX_EVENT_DEFER),BAD_ARG,"EVENT_DEFER is invalid for Requests"); \ } while (0) #define GASNETI_COMMON_AMREPLYSHORT(token,handler,flags,numargs) do { \ + GASNETI_CHECK_INJECT_REPLY(); \ gasneti_assert(! (flags & GEX_FLAG_AM_PREPARE_LEAST_CLIENT)); \ gasneti_assert(! (flags & GEX_FLAG_AM_PREPARE_LEAST_ALLOC)); \ gasneti_assert_int(numargs ,>=, 0); \ @@ -56,6 +60,7 @@ GASNETI_TRACE_AMREPLYSHORT(token,handler,flags,numargs); \ } while (0) #define GASNETI_COMMON_AMREPLYMEDIUM(token,handler,source_addr,nbytes,lc_opt,flags,numargs) do { \ + GASNETI_CHECK_INJECT_REPLY(); \ gasneti_assert(! (flags & GEX_FLAG_AM_PREPARE_LEAST_CLIENT)); \ gasneti_assert(! (flags & GEX_FLAG_AM_PREPARE_LEAST_ALLOC)); \ gasneti_assert_int(numargs ,>=, 0); \ @@ -68,6 +73,7 @@ GASNETI_CHECK_ERRR((lc_opt == GEX_EVENT_GROUP),BAD_ARG,"EVENT_GROUP is invalid for Replies"); \ } while (0) #define GASNETI_COMMON_AMREPLYLONG(token,handler,source_addr,nbytes,dest_addr,lc_opt,flags,numargs) do { \ + GASNETI_CHECK_INJECT_REPLY(); \ gasneti_assert(! (flags & GEX_FLAG_AM_PREPARE_LEAST_CLIENT)); \ gasneti_assert(! (flags & GEX_FLAG_AM_PREPARE_LEAST_ALLOC)); \ gasneti_assert_int(numargs ,>=, 0); \ @@ -83,6 +89,36 @@ /* ------------------------------------------------------------------------------------ */ /* utility macros for dispatching AM handlers */ +#if GASNET_DEBUG +// Note: use of GASNETI_MYTHREAD_GET_OR_LOOKUP is necessary here +// to ensure we do not require FARG/POST'd context in callers. +#define GASNETI_HANDLER_ENTER(isReq) \ + do { \ + gasneti_threaddata_t * const mythread = \ + GASNETI_MYTHREAD_GET_OR_LOOKUP; \ + if (mythread) { /* some conduits use AMs very early */ \ + int *cntr_p = isReq ? &mythread->request_handler_active \ + : &mythread->reply_handler_active; \ + gasneti_assert_int(*cntr_p ,==, 0); /* No recursion */ \ + *cntr_p = 1; \ + } \ + } while (0) +#define GASNETI_HANDLER_LEAVE(isReq) \ + do { \ + gasneti_threaddata_t * const mythread = \ + GASNETI_MYTHREAD_GET_OR_LOOKUP; \ + if (mythread) { /* some conduits use AMs very early */ \ + int *cntr_p = isReq ? &mythread->request_handler_active \ + : &mythread->reply_handler_active; \ + gasneti_assert_int(*cntr_p ,==, 1); \ + *cntr_p = 0; \ + } \ + } while (0) +#else + #define GASNETI_HANDLER_ENTER(isReq) ((void)0) + #define GASNETI_HANDLER_LEAVE(isReq) ((void)0) +#endif + #define _gasneti_harg(a,b,c) ,gex_AM_Arg_t #define _gasneti_harg_pass(Nm1,N,Np1) ,_pArgs[Nm1] @@ -102,10 +138,12 @@ GASNETI_META_ASC16(_gasneti_Short_handlerfn_typedefN,_gasneti_Short_handlerfn_ty else GASNETI_TRACE_AMSHORT_REPHANDLER(hid, _token, _numargs, _pArgs); \ gasneti_assert(_phandlerfn); gasneti_assert(_token); \ gasneti_assert(_pArgs || !_numargs); \ + GASNETI_HANDLER_ENTER(isReq); \ switch (_numargs) { \ GASNETI_META_DES16(_gasneti_Short_RunCaseN,_gasneti_Short_RunCaseN) \ default: gasneti_unreachable_error(("Invalid numargs=%i",_numargs)); \ } \ + GASNETI_HANDLER_LEAVE(isReq); \ GASNETI_TRACE_PRINTF(A,("AM%s_SHORT_HANDLER: handler execution complete", (isReq?"REQUEST":"REPLY"))); \ } while (0) @@ -129,10 +167,12 @@ GASNETI_META_ASC16(_gasneti_MedLong_handlerfn_typedefN,_gasneti_MedLong_handlerf gasneti_assert(_pArgs || !_numargs); \ gasneti_assert(_pData || !_datalen); \ extrachecks; \ + GASNETI_HANDLER_ENTER(isReq); \ switch (_numargs) { \ GASNETI_META_DES16(_gasneti_MedLong_RunCaseN,_gasneti_MedLong_RunCaseN) \ default: gasneti_unreachable_error(("Invalid numargs=%i",_numargs)); \ } \ + GASNETI_HANDLER_LEAVE(isReq); \ } while (0) // by default, we guarantee double-word alignment for data payload of medium xfers @@ -165,14 +205,14 @@ typedef enum { GASNETI_COLD extern void gasneti_defaultAMHandler(gex_Token_t token); -extern int gasneti_amtbl_init(gex_AM_Entry_t *output); -extern int gasneti_amregister( gex_AM_Entry_t *output, +extern int gasneti_amtbl_init(gasneti_EP_t i_ep); +extern int gasneti_amregister( gasneti_EP_t i_ep, gex_AM_Entry_t *input, int numentries, int lowlimit, int highlimit, int dontcare, int *numregistered); -extern int gasneti_amregister_client(gex_AM_Entry_t *output, +extern int gasneti_amregister_client(gasneti_EP_t i_ep, gex_AM_Entry_t *input, size_t numentries); -extern int gasneti_amregister_legacy(gex_AM_Entry_t *output, +extern int gasneti_amregister_legacy(gasneti_EP_t i_ep, gasnet_handlerentry_t *input, int numentries); #if GASNET_DEBUG @@ -200,10 +240,73 @@ extern int gasneti_amregister_legacy(gex_AM_Entry_t *output, #define GASNETI_TOKEN_INFO_RETURN(result, info, mask) (result) #endif +/* ------------------------------------------------------------------------------------ */ + +// GASNETC_GET_HANDLER provides a conduit with the means to control how the +// neighborhood AM support accesses the AM handler table. +#ifndef GASNETC_GET_HANDLER + /* Assumes conduit has gasnetc_handler[] as in template-conduit */ + // TODO-EX: gasnetc_handler to be replaced w/ per-endpoint data when defined + #define gasnetc_get_hentry(_ep,_index) (&gasnetc_handler[(_index)]) + #define gasnetc_get_handler(_ep,_index,_field) (gasnetc_get_hentry((_ep),(_index))->gex_##_field) +#endif + +/* ------------------------------------------------------------------------------------ */ +// Management of per-thread MaxMedium-sized buffers, one each for Request and Reply. +// These are used for "loopback" (same process) AM Mediums. +// However, they are also useful for NPAM (so long as Commit provides synchronous LC). + +#include /* for gasnetc_handler[] */ + +#ifndef gasneti_alloc_perthread_medium_buffer // allows conduit-specifc overrides + extern void gasneti_medium_buffer_cleanup_threaddata(void *buf); + GASNETI_INLINE(gasneti_alloc_perthread_medium_buffer) + void *gasneti_alloc_perthread_medium_buffer(int isReq GASNETI_THREAD_FARG) { + gasneti_threaddata_t * const mythread = GASNETI_MYTHREAD; + if_pf (! mythread->requestBuf) { + // Allocate both buffers, ensuring GASNETI_MEDBUF_ALIGNMENT (dflt 8-byte) alignment of each + size_t padded_max_med = GASNETI_ALIGNUP(GASNETC_MAX_MEDIUM_NBRHD, GASNETI_MEDBUF_ALIGNMENT); + size_t sz = padded_max_med + GASNETC_MAX_MEDIUM_NBRHD; + uint8_t *buf = gasneti_malloc_aligned(GASNETI_MEDBUF_ALIGNMENT, sz); + gasneti_leak_aligned(buf); + mythread->requestBuf = buf; + mythread->replyBuf = buf + padded_max_med; + gasnete_register_threadcleanup(gasneti_medium_buffer_cleanup_threaddata, buf); + } + #if GASNET_DEBUG + if (isReq) { + gasneti_assert(! mythread->requestBuf_live); + mythread->requestBuf_live = 1; + } else { + gasneti_assert(! mythread->replyBuf_live); + mythread->replyBuf_live = 1; + } + #endif + return isReq ? mythread->requestBuf : mythread->replyBuf; + } + + #if GASNET_DEBUG + static void gasneti_free_perthread_medium_buffer(void *buf, int isReq GASNETI_THREAD_FARG) { + gasneti_threaddata_t * const mythread = GASNETI_MYTHREAD; + if (isReq) { + gasneti_assert(buf == mythread->requestBuf); + gasneti_assert(mythread->requestBuf_live); + mythread->requestBuf_live = 0; + } else { + gasneti_assert(buf == mythread->replyBuf); + gasneti_assert(mythread->replyBuf_live); + mythread->replyBuf_live = 0; + } + } + #else + #define gasneti_free_perthread_medium_buffer(buf, isReq_and_TI) ((void)0) + #endif +#endif + /* ------------------------------------------------------------------------------------ */ /* common logic for Negotiated Payload AMs */ -// Common argument processing and trace/stats +// Common argument processing, debug checks and trace/stats #if GASNET_DEBUG extern void gasneti_init_sd_poison(gasneti_AM_SrcDesc_t sd); extern int gasneti_test_sd_poison(void *addr, size_t len); @@ -247,22 +350,21 @@ extern int gasneti_amregister_legacy(gex_AM_Entry_t *output, } while(0) #define GASNETI_COMMON_PREP_REQ(sd,tm,dest,cbuf,least_pl,most_pl,dest_addr,lc_opt,flags,nargs,cat) \ do { \ - GASNETI_TRACE_PREP_REQUEST##cat(tm,dest,cbuf,least_pl,most_pl,dest_addr,lc_opt,flags,nargs);\ sd->_category = (int)gasneti_##cat; \ sd->_dest_addr = dest_addr; \ sd->_nargs = nargs; \ gex_Flags_t tmp_flags = flags | (cbuf ? GEX_FLAG_AM_PREPARE_LEAST_CLIENT \ : GEX_FLAG_AM_PREPARE_LEAST_ALLOC); \ size_t limit = gex_AM_MaxRequest##cat(tm,dest,lc_opt,tmp_flags,nargs); \ - if (dest >= gex_TM_QuerySize(tm)) \ + gex_Rank_t tm_size = gasneti_e_tm_size(tm); \ + if (dest >= tm_size) \ gasneti_fatalerror("gex_AM_PrepareRequest" _STRINGIFY(cat) ": " \ "destination rank out-of-range (%lu >= %lu)", \ - (unsigned long)dest, (unsigned long)gex_TM_QuerySize(tm)); \ + (unsigned long)dest, (unsigned long)tm_size); \ _GASNETI_CHECK_PREPARE(cbuf,least_pl,most_pl,limit,lc_opt,nargs,1,cat); \ } while(0) #define GASNETI_COMMON_PREP_REP(sd,token,cbuf,least_pl,most_pl,dest_addr,lc_opt,flags,nargs,cat) \ do { \ - GASNETI_TRACE_PREP_REPLY##cat(token,cbuf,least_pl,most_pl,dest_addr,lc_opt,flags,nargs);\ sd->_category = (int)gasneti_##cat; \ sd->_dest_addr = dest_addr; \ sd->_nargs = nargs; \ @@ -316,6 +418,18 @@ extern int gasneti_amregister_legacy(gex_AM_Entry_t *output, GASNETI_TRACE_COMMIT_REPLY##cat(handler,sd->_addr,sd->_size,dest_addr,sd->_nargs); \ _GASNETI_CHECK_COMMIT(sd,handler,nbytes,dest_addr,nargs,0,cat); \ } while(0) + + #define GASNETI_CHECK_SD(cbuf, least_payload, most_payload, sd) \ + do { \ + if (!sd) break; \ + if (cbuf) { \ + gasneti_assert(sd->_addr == cbuf); \ + } else { \ + gasneti_assert(0 == (((uintptr_t) sd->_addr) % GASNETI_MEDBUF_ALIGNMENT)); \ + } \ + gasneti_assert(sd->_size >= least_payload); \ + gasneti_assert(sd->_size <= most_payload); \ + } while(0) #else #define gasneti_init_sd_poison(sd) ((void)0) @@ -326,27 +440,17 @@ extern int gasneti_amregister_legacy(gex_AM_Entry_t *output, #endif #define GASNETI_COMMON_PREP_REQ(sd,tm,dest,cbuf,least_pl,most_pl,dest_addr,lc_opt,flags,nargs,cat) do { \ _GASNETI_COMMON_PREP_NARGS(sd,nargs); \ - GASNETI_TRACE_PREP_REQUEST##cat(tm,dest,cbuf,least_pl,most_pl,dest_addr,lc_opt,flags,nargs); \ } while(0) #define GASNETI_COMMON_PREP_REP(sd,token,cbuf,least_pl,most_pl,dest_addr,lc_opt,flags,nargs,cat) do { \ _GASNETI_COMMON_PREP_NARGS(sd,nargs); \ - GASNETI_TRACE_PREP_REPLY##cat(token,cbuf,least_pl,most_pl,dest_addr,lc_opt,flags,nargs); \ } while(0) #define GASNETI_COMMON_COMMIT_REQ(sd,handler,nbytes,dest_addr,nargs_arg,cat) \ GASNETI_TRACE_COMMIT_REQUEST##cat(handler,sd->_addr,sd->_size,dest_addr,sd->_nargs) #define GASNETI_COMMON_COMMIT_REP(sd,handler,nbytes,dest_addr,nargs_arg,cat) \ GASNETI_TRACE_COMMIT_REPLY##cat(handler,sd->_addr,sd->_size,dest_addr,sd->_nargs) + #define GASNETI_CHECK_SD(cbuf, least_payload, most_payload, sd) ((void)0) #endif -#define GASNETI_TRACE_PREP_REQUESTMedium(tm,dest,cbuf,least_pl,most_pl,dest_addr,lc_opt,flags,nargs) \ - GASNETI_TRACE_PREP_REQUESTMEDIUM(tm,dest,cbuf,least_pl,most_pl,flags,nargs) -#define GASNETI_TRACE_PREP_REQUESTLong(tm,dest,cbuf,least_pl,most_pl,dest_addr,lc_opt,flags,nargs) \ - GASNETI_TRACE_PREP_REQUESTLONG(tm,dest,cbuf,least_pl,most_pl,dest_addr,flags,nargs) -#define GASNETI_TRACE_PREP_REPLYMedium(token,cbuf,least_pl,most_pl,dest_addr,lc_opt,flags,nargs) \ - GASNETI_TRACE_PREP_REPLYMEDIUM(token,cbuf,least_pl,most_pl,flags,nargs) -#define GASNETI_TRACE_PREP_REPLYLong(token,cbuf,least_pl,most_pl,dest_addr,lc_opt,flags,nargs) \ - GASNETI_TRACE_PREP_REPLYLONG(token,cbuf,least_pl,most_pl,dest_addr,flags,nargs) - #define GASNETI_TRACE_COMMIT_REQUESTMedium(handler,source_addr,nbytes,dest_addr,numargs) \ GASNETI_TRACE_COMMIT_REQUESTMEDIUM(handler,source_addr,nbytes,numargs) #define GASNETI_TRACE_COMMIT_REQUESTLong(handler,source_addr,nbytes,dest_addr,numargs) \ @@ -357,17 +461,24 @@ extern int gasneti_amregister_legacy(gex_AM_Entry_t *output, GASNETI_TRACE_COMMIT_REPLYLONG(handler,source_addr,nbytes,dest_addr,numargs) #ifndef _GEX_AM_SRCDESC_T + // Allocate a buffer (use IFF client_buf is NULL) -GASNETI_INLINE(gasneti_prepare_alloc_buffer) -void *gasneti_prepare_alloc_buffer(gasneti_AM_SrcDesc_t sd) +GASNETI_INLINE(gasneti_alloc_npam_buffer) +void *gasneti_alloc_npam_buffer(gasneti_AM_SrcDesc_t sd, int isReq) { - size_t size = sd->_size; -#if GASNET_DEBUG - // Allocate at least one byte because zero-byte allocation - // returns NULL which then leads to ambiguity in argument checking. - if (!size) size = 1; -#endif - return (sd->_gex_buf = sd->_addr = gasneti_malloc(size)); + gasneti_assert_uint(sd->_size ,<=, GASNETC_REF_NPAM_MAX_ALLOC); + GASNET_POST_THREADINFO(sd->_thread); + void *result = gasneti_alloc_perthread_medium_buffer(isReq GASNETI_THREAD_PASS); + return (sd->_gex_buf = sd->_addr = result); +} + +GASNETI_INLINE(gasneti_free_npam_buffer) +void gasneti_free_npam_buffer(gasneti_AM_SrcDesc_t sd) +{ + gasneti_assert(sd->_tofree); + GASNET_POST_THREADINFO(sd->_thread); + gasneti_free_perthread_medium_buffer(sd->_tofree, sd->_isreq GASNETI_THREAD_PASS); + sd->_tofree = NULL; } #if GASNETI_NEED_INIT_SRCDESC @@ -387,8 +498,11 @@ gasneti_AM_SrcDesc_t gasneti_init_request_srcdesc(GASNETI_THREAD_FARG_ALONE) if (sd->_magic._u == GASNETI_AM_SRCDESC_MAGIC) { gasneti_fatalerror("Bad state - likely due to back-to-back gex_AM_PrepareRequest*() calls"); } + GASNETI_CHECK_INJECT(); GASNETI_CHECK_MAGIC(sd, GASNETI_AM_SRCDESC_BAD_MAGIC); GASNETI_INIT_MAGIC(sd, GASNETI_AM_SRCDESC_MAGIC); + // Check invariant(s) assumed by users of the srcdesc: + gasneti_assert(! sd->_tofree); #endif sd->_gex_buf = NULL; return sd; @@ -407,6 +521,7 @@ gasneti_AM_SrcDesc_t gasneti_init_reply_srcdesc(GASNETI_THREAD_FARG_ALONE) if (sd->_magic._u == GASNETI_AM_SRCDESC_MAGIC) { gasneti_fatalerror("Bad state - likely due to back-to-back gex_AM_PrepareReply*() calls"); } + GASNETI_CHECK_INJECT_REPLY(); GASNETI_CHECK_MAGIC(sd, GASNETI_AM_SRCDESC_BAD_MAGIC); GASNETI_INIT_MAGIC(sd, GASNETI_AM_SRCDESC_MAGIC); #endif @@ -414,7 +529,9 @@ gasneti_AM_SrcDesc_t gasneti_init_reply_srcdesc(GASNETI_THREAD_FARG_ALONE) return sd; } -// Return a thread-specfic SD to its "inactive" state +// Return a thread-specific SD to its "inactive" state +// Must remain non-destructive (other than manipulating _magic) +// so this can occur in a Commit prior to use of the data. GASNETI_INLINE(gasneti_reset_srcdesc) void gasneti_reset_srcdesc(gasneti_AM_SrcDesc_t sd) { @@ -428,9 +545,30 @@ void gasneti_reset_srcdesc(gasneti_AM_SrcDesc_t sd) #endif } +// Combined import and reset +// +// This is used in the reference implementation of Commit to "close" the +// Prepare/Commit interval immediately upon entry, rather than via a call to +// gasneti_reset_srcdesc() just prior to return (which is "best practice" as +// shown in template-conduit). This difference allows for the underlying +// conduit to use, as necessary, calls prohibited in a Prepare/Commit interval +// without concern over GASNETI_CHECK_INJECT() as seen in Bug 4174 prior to +// addition of this "consume" API. This is safe because such communication +// cannot deadlock on a limited resource given that the reference implementation +// holds (at most) a malloc'ed buffer. The same is not true in general of a +// native NPAM implementation. +GASNETI_INLINE(gasneti_consume_srcdesc) +gasneti_AM_SrcDesc_t gasneti_consume_srcdesc(gex_AM_SrcDesc_t e_sd) +{ + gasneti_AM_SrcDesc_t i_sd = gasneti_import_srcdesc(e_sd); + gasneti_reset_srcdesc(i_sd); + return i_sd; +} + GASNETI_INLINE(gasneti_prepare_common) GASNETI_WARN_UNUSED_RESULT void *gasneti_prepare_common( gasneti_AM_SrcDesc_t sd, + int isReq, const void *client_buf, size_t size, gex_Event_t *lc_opt, @@ -446,7 +584,7 @@ void *gasneti_prepare_common( sd->_addr = (/*non-const*/void *)client_buf; return NULL; } else { - return gasneti_prepare_alloc_buffer(sd); + return gasneti_alloc_npam_buffer(sd, isReq); } } @@ -463,7 +601,7 @@ void *gasneti_prepare_request_common( { sd->_dest._request._tm = tm; sd->_dest._request._rank = rank; - return gasneti_prepare_common(sd, client_buf, size, lc_opt, flags, nargs); + return gasneti_prepare_common(sd, 1, client_buf, size, lc_opt, flags, nargs); } GASNETI_INLINE(gasneti_prepare_reply_common) GASNETI_WARN_UNUSED_RESULT @@ -477,7 +615,7 @@ void *gasneti_prepare_reply_common( unsigned int nargs) { sd->_dest._reply._token = token; - return gasneti_prepare_common(sd, client_buf, size, lc_opt, flags, nargs); + return gasneti_prepare_common(sd, 0, client_buf, size, lc_opt, flags, nargs); } #endif // _GEX_AM_SRCDESC_T @@ -528,69 +666,6 @@ extern int gasnetc_AMReplyLongV( gasnetc_AMReplyLongV(token,hidx,src_addr,nbytes,dst_addr,lc_opt,flags,nargs,args) #endif -/* ------------------------------------------------------------------------------------ */ - -// GASNETC_MAX_{ARGS,MEDIUM,LONG}_NBRHD -// These are compile-time constants used by the "neighborhood" AM support, -// which includes "loopback" (same-process) and "AMPSHM" (shared-memory). -// As described below, these defaults are not suitable for all conduits. - -#ifndef GASNETC_MAX_ARGS_NBRHD - // Assumes gex_AM_MaxArgs() is a compile time constant. - // If not, the conduit must define GASNETC_MAX_ARGS_NBRHD to a compile-time - // constant in its gasnet_core_fwd.h. - // The value may be a conservative upper-bound if the real value cannot be - // known until run time (at the cost of wasted memory). - #define GASNETC_MAX_ARGS_NBRHD (gex_AM_MaxArgs()) -#endif -#ifndef GASNETC_MAX_MEDIUM_NBRHD - // Assumes gex_AM_LUB{Request,Reply}Medium() expand to compile-time constants - // AND that the LUB is the *greatest* upper-bound. If either property is not - // true for a given conduit, then it must define GASNETC_MAX_MEDIUM_NBRHD to - // an appropriate compile-time constant bound in its gasnet_core_fwd.h. - // The value may be a conservative upper-bound if the real value cannot be - // known until run time (at the cost of wasted memory). - #define GASNETC_MAX_MEDIUM_NBRHD MAX(gex_AM_LUBRequestMedium(),gex_AM_LUBReplyMedium()) -#endif -#ifndef GASNETC_MAX_LONG_NBRHD - // Same assumptions and usage as GASNETC_MAX_MEDIUM_NBRHD, above, but for Long. - #define GASNETC_MAX_LONG_NBRHD MAX(gex_AM_LUBRequestLong(),gex_AM_LUBReplyLong()) -#endif - -// GASNETC_GET_HANDLER provides a conduit with the means to control how the -// neighborhood AM support accesses the AM handler table. -#ifndef GASNETC_GET_HANDLER - /* Assumes conduit has gasnetc_handler[] as in template-conduit */ - // TODO-EX: gasnetc_handler to be replaced w/ per-endpoint data when defined - #define gasnetc_get_hentry(_ep,_index) (&gasnetc_handler[(_index)]) - #define gasnetc_get_handler(_ep,_index,_field) (gasnetc_get_hentry((_ep),(_index))->gex_##_field) -#endif - -/* ------------------------------------------------------------------------------------ */ -// Buffer management for loopback (same-process) Medium AMs - -#include /* for gasnetc_handler[] */ - -#ifndef gasneti_loopback_alloc_medium_buffer // allows conduit-specifc overrides - extern void gasneti_loopback_cleanup_threaddata(void *buf); - GASNETI_INLINE(gasneti_loopback_alloc_medium_buffer) - void *gasneti_loopback_alloc_medium_buffer(int isReq GASNETI_THREAD_FARG) { - gasneti_threaddata_t * const mythread = GASNETI_MYTHREAD; - if_pf (! mythread->loopback_requestBuf) { - // Allocate both buffers, ensuring GASNETI_MEDBUF_ALIGNMENT (dflt 8-byte) alignment of each - size_t sz = GASNETI_ALIGNUP(GASNETC_MAX_MEDIUM_NBRHD,8) + GASNETC_MAX_MEDIUM_NBRHD; - uint8_t *buf = gasneti_malloc_aligned(GASNETI_MEDBUF_ALIGNMENT, sz); - gasneti_leak_aligned(buf); - mythread->loopback_requestBuf = buf; - mythread->loopback_replyBuf = buf + GASNETI_ALIGNUP(GASNETC_MAX_MEDIUM_NBRHD,8); - gasnete_register_threadcleanup(gasneti_loopback_cleanup_threaddata, buf); - } - return isReq ? mythread->loopback_requestBuf : mythread->loopback_replyBuf; - } - - #define gasneti_loopback_free_medium_buffer(buf, isReq_and_TI) ((void)0) -#endif - /* ------------------------------------------------------------------------------------ */ // Types and macros common to loopback and PSHM // TODO-EX: "promote" conduit-independent gasnetc_nbrhd_* interfaces to gasneti_nbrhd_* @@ -697,27 +772,26 @@ int gasnetc_loopback_prepare_inner( { sd->_nargs = nargs; if (category == gasneti_Medium) { - sd->_void_p = gasneti_loopback_alloc_medium_buffer(isReq GASNETI_THREAD_PASS); + sd->_void_p = gasneti_alloc_perthread_medium_buffer(isReq GASNETI_THREAD_PASS); } gasneti_assert(sd->_tofree == NULL); if (isFixed) { sd->_addr = (/*non-const*/void *)client_buf; } else { - const size_t limit = (category == gasneti_Long) ? GASNETC_MAX_LONG_NBRHD : GASNETC_MAX_MEDIUM_NBRHD; - const size_t size = MIN(limit, most_payload); - sd->_size = size; - if (client_buf) { + size_t limit = (category == gasneti_Medium) ? GASNETC_REF_NPAM_MAX_ALLOC : GASNETC_MAX_LONG_NBRHD; + sd->_size = MIN(limit, most_payload); sd->_addr = (/*non-const*/void *)client_buf; gasneti_leaf_finish(lc_opt); } else if (category == gasneti_Medium) { + // NPAM Medium with GASNet-allocated buffer + sd->_size = MIN(GASNETC_REF_NPAM_MAX_ALLOC, most_payload); sd->_addr = sd->_gex_buf = sd->_void_p; - } else if (size <= GASNETC_MAX_MEDIUM_NBRHD) { - // Long can use medium buffer at less cost than calling malloc - sd->_addr = sd->_gex_buf = gasneti_loopback_alloc_medium_buffer(isReq GASNETI_THREAD_PASS); } else { - sd->_tofree = gasneti_prepare_alloc_buffer(sd); + // NPAM Long with GASNet-allocated buffer + sd->_size = MIN(GASNETC_REF_NPAM_MAX_ALLOC, most_payload); + sd->_tofree = gasneti_alloc_npam_buffer(sd, isReq); } } @@ -793,13 +867,12 @@ void gasnetc_loopback_commit_inner( #endif if (category == gasneti_Medium) { - gasneti_loopback_free_medium_buffer(buf, isReq GASNETI_THREAD_PASS); - } else if(!isFixed && sd->_gex_buf && (sd->_size <= GASNETC_MAX_MEDIUM_NBRHD)) { + // All Mediums + gasneti_free_perthread_medium_buffer(buf, isReq GASNETI_THREAD_PASS); + } else if(!isFixed && sd->_gex_buf) { + // NPAM Long with GASNet-allocated buffer gasneti_assert(category == gasneti_Long); - gasneti_loopback_free_medium_buffer(sd->_gex_buf, isReq GASNETI_THREAD_PASS); - } else if (sd->_tofree) { // Branch to avoid free(NULL) library call overhead for NPAM/cb - gasneti_free(sd->_tofree); - sd->_tofree = NULL; + gasneti_free_npam_buffer(sd); } } @@ -976,10 +1049,7 @@ gasneti_AM_SrcDesc_t gasnetc_nbrhd_PrepareRequest( dest_addr, lc_opt, flags, nargs); #endif if (imm) { - if (sd->_tofree) { // Branch to avoid free(NULL) library call overhead for NPAM/cb - gasneti_free(sd->_tofree); - sd->_tofree = NULL; - } + gasneti_assert(! sd->_tofree); gasneti_reset_srcdesc(sd); sd = NULL; // GEX_AM_SRCDESC_NO_OP } else { @@ -1050,10 +1120,7 @@ gasneti_AM_SrcDesc_t gasnetc_nbrhd_PrepareReply( #endif gasnetc_token_post_reply_checks(token, imm); if (imm) { - if (sd->_tofree) { // Branch to avoid free(NULL) library call overhead for NPAM/cb - gasneti_free(sd->_tofree); - sd->_tofree = NULL; - } + gasneti_assert(! sd->_tofree); gasneti_reset_srcdesc(sd); sd = NULL; // GEX_AM_SRCDESC_NO_OP } else { diff --git a/third-party/gasnet/gasnet-src/gasnet_atomic_bits.h b/third-party/gasnet/gasnet-src/gasnet_atomic_bits.h index 0b2619a595c7..eb33642153d0 100644 --- a/third-party/gasnet/gasnet-src/gasnet_atomic_bits.h +++ b/third-party/gasnet/gasnet-src/gasnet_atomic_bits.h @@ -99,7 +99,7 @@ side-effects (compiler fence or memory barriers) in the "_"-prefixed operations. When that is NOT the case, one can override this default behavior by defining the appropriate fencing macros. At present this is - done only for the x86/x86-64 and IA64. + done only for the x86/x86-64. In the case of the x86/x86-64 all of the read-modify-write operations include a full memory barrier but do NOT include a compiler fence. So @@ -1388,195 +1388,6 @@ #define _gasneti_atomic64_fetchadd _gasneti_atomic64_fetchadd #endif /* ------------------------------------------------------------------------------------ */ - #elif PLATFORM_ARCH_IA64 /* Itanium */ - #if GASNETI_ATOMIC64_NOINLINE - #error Internal error - unexpected split atomics implementation on IA64 - #endif - #if PLATFORM_COMPILER_INTEL - /* Intel compiler's inline assembly broken on Itanium (bug 384) - use intrinsics instead */ - #include - - #define GASNETI_HAVE_ATOMIC32_T 1 - typedef struct { volatile uint32_t gasneti_ctr; } gasneti_atomic32_t; - #define gasneti_atomic32_init(v) { (v) } - #define _gasneti_atomic32_increment(p) __fetchadd4_acq((unsigned int *)&((p)->gasneti_ctr),1) - #define _gasneti_atomic32_decrement(p) __fetchadd4_acq((unsigned int *)&((p)->gasneti_ctr),-1) - #define _gasneti_atomic32_read(p) ((p)->gasneti_ctr) - #define _gasneti_atomic32_set(p,v) ((p)->gasneti_ctr = (v)) - #define _gasneti_atomic32_decrement_and_test(p) \ - (__fetchadd4_acq((unsigned int *)&((p)->gasneti_ctr),-1) == 1) - #define _gasneti_atomic32_compare_and_swap(p,oval,nval) \ - (_InterlockedCompareExchange_acq((volatile unsigned int *)&((p)->gasneti_ctr),nval,oval) == (oval)) - #define _gasneti_atomic32_swap(p,nval) \ - _InterlockedExchange((volatile unsigned int *)&((p)->gasneti_ctr),nval) - GASNETI_INLINE(_gasneti_atomic32_addfetch) - uint32_t _gasneti_atomic32_addfetch(gasneti_atomic32_t *_p, const uint32_t _op) { - GASNETI_ASM_REGISTER_KEYWORD uint32_t _sum, _oval; - GASNETI_ASM_REGISTER_KEYWORD uint32_t _tmp = _p->gasneti_ctr; - do { - _sum = _op + (_oval = _tmp); - _tmp = _InterlockedCompareExchange_acq((volatile unsigned int *)&_p->gasneti_ctr,_oval,_sum); - } while (_oval != _tmp); - return _sum; - } - #define _gasneti_atomic32_addfetch _gasneti_atomic32_addfetch - - #define GASNETI_HAVE_ATOMIC64_T 1 - typedef struct { volatile uint64_t gasneti_ctr; } gasneti_atomic64_t; - #define gasneti_atomic64_init(v) { (v) } - #define _gasneti_atomic64_increment(p) __fetchadd8_acq((unsigned __int64 *)&((p)->gasneti_ctr),1) - #define _gasneti_atomic64_decrement(p) __fetchadd8_acq((unsigned __int64 *)&((p)->gasneti_ctr),-1) - #define _gasneti_atomic64_read(p) ((p)->gasneti_ctr) - #define _gasneti_atomic64_set(p,v) ((p)->gasneti_ctr = (v)) - #define _gasneti_atomic64_decrement_and_test(p) \ - (__fetchadd8_acq((unsigned __int64 *)&((p)->gasneti_ctr),-1) == 1) - #define _gasneti_atomic64_compare_and_swap(p,oval,nval) \ - (_InterlockedCompareExchange64_acq((volatile unsigned __int64 *)&((p)->gasneti_ctr),nval,oval) == (oval)) - #define _gasneti_atomic64_swap(p,nval) \ - _InterlockedExchange64((volatile unsigned int *)&((p)->gasneti_ctr),nval) - GASNETI_INLINE(_gasneti_atomic64_addfetch) - uint64_t _gasneti_atomic64_addfetch(gasneti_atomic64_t *_p, const uint64_t _op) { - GASNETI_ASM_REGISTER_KEYWORD uint64_t _sum, _oval; - GASNETI_ASM_REGISTER_KEYWORD uint64_t _tmp = _p->gasneti_ctr; - do { - _sum = _op + (_oval = _tmp); - _tmp = _InterlockedCompareExchange64_acq((volatile unsigned __int64 *)&_p->gasneti_ctr,_oval,_sum); - } while (_oval != _tmp); - return _sum; - } - #define _gasneti_atomic64_addfetch _gasneti_atomic64_addfetch - - /* See fence treatment after #endif */ - #elif PLATFORM_COMPILER_GNU - GASNETI_INLINE(gasneti_atomic32_xchg) - uint32_t gasneti_atomic32_xchg(uint32_t volatile *_ptr, uint32_t _newval) { - uint64_t _tmp; - __asm__ __volatile__ - ("xchg4 %0=[%1],%2" : "=r"(_tmp) : "r"(_ptr), "r"(_newval) ); - return (uint32_t) _tmp; - } - GASNETI_INLINE(gasneti_atomic32_cmpxchg) - uint32_t gasneti_atomic32_cmpxchg(uint32_t volatile *_ptr, uint32_t _oldval, uint32_t _newval) { - uint64_t _tmp = _oldval; - __asm__ __volatile__ ("mov ar.ccv=%0;;" :: "rO"(_tmp)); - __asm__ __volatile__ - ("cmpxchg4.acq %0=[%1],%2,ar.ccv" : "=r"(_tmp) : "r"(_ptr), "r"(_newval) ); - return (uint32_t) _tmp; - } - GASNETI_INLINE(gasneti_atomic32_fetchandinc) - uint32_t gasneti_atomic32_fetchandinc(uint32_t volatile *_ptr) { - uint64_t _result; - __asm__ __volatile__ - ("fetchadd4.acq %0=[%1],%2" : "=r"(_result) : "r"(_ptr), "i" (1) ); - return (uint32_t) _result; - } - GASNETI_INLINE(gasneti_atomic32_fetchanddec) - uint32_t gasneti_atomic32_fetchanddec(uint32_t volatile *_ptr) { - uint64_t _result; - __asm__ __volatile__ - ("fetchadd4.acq %0=[%1],%2" : "=r"(_result) : "r"(_ptr), "i" (-1) ); - return (uint32_t) _result; - } - - GASNETI_INLINE(gasneti_atomic64_xchg) - uint64_t gasneti_atomic64_xchg(uint64_t volatile *_ptr, uint64_t _newval) { - uint64_t _tmp; - __asm__ __volatile__ - ("xchg8 %0=[%1],%2" : "=r"(_tmp) : "r"(_ptr), "r"(_newval) ); - return (uint64_t) _tmp; - } - GASNETI_INLINE(gasneti_atomic64_cmpxchg) - uint64_t gasneti_atomic64_cmpxchg(uint64_t volatile *_ptr, uint64_t _oldval, uint64_t _newval) { - uint64_t _tmp = _oldval; - __asm__ __volatile__ ("mov ar.ccv=%0;;" :: "rO"(_tmp)); - __asm__ __volatile__ - ("cmpxchg8.acq %0=[%1],%2,ar.ccv" : "=r"(_tmp) : "r"(_ptr), "r"(_newval) ); - return (uint64_t) _tmp; - } - GASNETI_INLINE(gasneti_atomic64_fetchandinc) - uint64_t gasneti_atomic64_fetchandinc(uint64_t volatile *_ptr) { - uint64_t _result; - __asm__ __volatile__ - ("fetchadd8.acq %0=[%1],%2" : "=r"(_result) : "r"(_ptr), "i" (1) ); - return _result; - } - GASNETI_INLINE(gasneti_atomic64_fetchanddec) - uint64_t gasneti_atomic64_fetchanddec(uint64_t volatile *_ptr) { - uint64_t _result; - __asm__ __volatile__ - ("fetchadd8.acq %0=[%1],%2" : "=r"(_result) : "r"(_ptr), "i" (-1) ); - return _result; - } - - #define GASNETI_HAVE_ATOMIC32_T 1 - typedef struct { volatile uint32_t gasneti_ctr; } gasneti_atomic32_t; - #define gasneti_atomic32_init(v) { (v) } - #define _gasneti_atomic32_read(p) ((p)->gasneti_ctr) - #define _gasneti_atomic32_set(p,v) ((p)->gasneti_ctr = (v)) - #define _gasneti_atomic32_increment(p) (gasneti_atomic32_fetchandinc(&((p)->gasneti_ctr))) - #define _gasneti_atomic32_decrement(p) (gasneti_atomic32_fetchanddec(&((p)->gasneti_ctr))) - #define _gasneti_atomic32_decrement_and_test(p) (gasneti_atomic32_fetchanddec(&((p)->gasneti_ctr)) == 1) - #define _gasneti_atomic32_compare_and_swap(p,oval,nval) \ - (gasneti_atomic32_cmpxchg(&((p)->gasneti_ctr),oval,nval) == (oval)) - #define _gasneti_atomic32_swap(p,nval) (gasneti_atomic32_xchg(&((p)->gasneti_ctr),nval)) - GASNETI_INLINE(_gasneti_atomic32_addfetch) - uint32_t _gasneti_atomic32_addfetch(gasneti_atomic32_t *_p, const uint32_t _op) { - GASNETI_ASM_REGISTER_KEYWORD uint32_t _sum, _oval; - GASNETI_ASM_REGISTER_KEYWORD uint32_t _tmp = _p->gasneti_ctr; - do { - _sum = _op + (_oval = _tmp); - _tmp = gasneti_atomic32_cmpxchg(&_p->gasneti_ctr,_oval,_sum); - } while (_oval != _tmp); - return _sum; - } - #define _gasneti_atomic32_addfetch _gasneti_atomic32_addfetch - - #define GASNETI_HAVE_ATOMIC64_T 1 - typedef struct { volatile uint64_t gasneti_ctr; } gasneti_atomic64_t; - #define gasneti_atomic64_init(v) { (v) } - #define _gasneti_atomic64_read(p) ((p)->gasneti_ctr) - #define _gasneti_atomic64_set(p,v) ((p)->gasneti_ctr = (v)) - #define _gasneti_atomic64_increment(p) (gasneti_atomic64_fetchandinc(&((p)->gasneti_ctr))) - #define _gasneti_atomic64_decrement(p) (gasneti_atomic64_fetchanddec(&((p)->gasneti_ctr))) - #define _gasneti_atomic64_decrement_and_test(p) (gasneti_atomic64_fetchanddec(&((p)->gasneti_ctr)) == 1) - #define _gasneti_atomic64_compare_and_swap(p,oval,nval) \ - (gasneti_atomic64_cmpxchg(&((p)->gasneti_ctr),oval,nval) == (oval)) - #define _gasneti_atomic64_swap(p,nval) (gasneti_atomic64_xchg(&((p)->gasneti_ctr),nval)) - GASNETI_INLINE(_gasneti_atomic64_addfetch) - uint64_t _gasneti_atomic64_addfetch(gasneti_atomic64_t *_p, const uint64_t _op) { - GASNETI_ASM_REGISTER_KEYWORD uint64_t _sum, _oval; - GASNETI_ASM_REGISTER_KEYWORD uint64_t _tmp = _p->gasneti_ctr; - do { - _sum = _op + (_oval = _tmp); - _tmp = gasneti_atomic64_cmpxchg(&_p->gasneti_ctr,_oval,_sum); - } while (_oval != _tmp); - return _sum; - } - #define _gasneti_atomic64_addfetch _gasneti_atomic64_addfetch - - /* See fence treatment after #endif */ - #else - #error unrecognized Itanium compiler - need to implement GASNet atomics (or #define GASNETI_USE_GENERIC_ATOMICOPS) - #endif - - /* Since supported compilers are generating r-m-w with .acq variants, we can customize - * the atomic fencing implementation by noting that "mf;; foo.acq" is a full memory - * barrier both before and after. */ - #define _gasneti_atomic32_prologue_rmw(p,f) /*empty*/ - #define _gasneti_atomic32_fence_before_rmw(p, flags) \ - if (flags & (GASNETI_ATOMIC_MB_PRE | GASNETI_ATOMIC_MB_POST)) gasneti_local_mb(); - #define _gasneti_atomic32_fence_after_rmw(p, flags) \ - /* Nothing */ - #define _gasneti_atomic32_fence_after_bool(p, flags, val) \ - if (!(flags & (GASNETI_ATOMIC_MB_PRE | GASNETI_ATOMIC_MB_POST))) \ - { if (((flags & GASNETI_ATOMIC_RMB_POST_IF_TRUE ) && val) || \ - ((flags & GASNETI_ATOMIC_RMB_POST_IF_FALSE) && !val)) gasneti_local_rmb(); } - #define _gasneti_atomic64_prologue_rmw(p,f) /*empty*/ - #define _gasneti_atomic64_fence_before_rmw _gasneti_atomic32_fence_before_rmw - #define _gasneti_atomic64_fence_after_rmw _gasneti_atomic32_fence_after_rmw - #define _gasneti_atomic64_fence_after_bool _gasneti_atomic32_fence_after_bool - - /* ------------------------------------------------------------------------------------ */ #elif PLATFORM_ARCH_SPARC #if defined(__sparcv9) || defined(__sparcv9cpu) || \ defined(__sparc_v9__) || defined(GASNETI_ARCH_ULTRASPARC) /* SPARC v9 ISA */ diff --git a/third-party/gasnet/gasnet-src/gasnet_atomic_fwd.h b/third-party/gasnet/gasnet-src/gasnet_atomic_fwd.h index 2fbc6bb36f08..4b48f8ce7be1 100644 --- a/third-party/gasnet/gasnet-src/gasnet_atomic_fwd.h +++ b/third-party/gasnet/gasnet-src/gasnet_atomic_fwd.h @@ -115,12 +115,6 @@ #define GASNETI_ATOMIC32_IMPL GASNETI_ATOMIC_IMPL_NATIVE #define GASNETI_ATOMIC64_IMPL GASNETI_ATOMIC_IMPL_NATIVE #endif -#elif PLATFORM_ARCH_IA64 /* Itanium */ - #if PLATFORM_COMPILER_INTEL || PLATFORM_COMPILER_GNU - // Native via icc intrinsics or gcc inline asm - #define GASNETI_ATOMIC32_IMPL GASNETI_ATOMIC_IMPL_NATIVE - #define GASNETI_ATOMIC64_IMPL GASNETI_ATOMIC_IMPL_NATIVE - #endif #elif PLATFORM_ARCH_SPARC #if defined(__sparcv9) || defined(__sparcv9cpu) || \ defined(__sparc_v9__) || defined(GASNETI_CONFIG_ARCH_ULTRASPARC) /* SPARC v9 ISA */ diff --git a/third-party/gasnet/gasnet-src/gasnet_basic.h b/third-party/gasnet/gasnet-src/gasnet_basic.h index c74fe7ba1002..66c28703a476 100644 --- a/third-party/gasnet/gasnet-src/gasnet_basic.h +++ b/third-party/gasnet/gasnet-src/gasnet_basic.h @@ -209,7 +209,8 @@ #else // mismatch behavior: define away to nothing, which should always be safe // define to 1 because 0 triggers use of (void*) in place of the typedef - #define GASNETI_RESTRICT GASNETI_COMPILER_FEATURE(RESTRICT,) + #define GASNETI_RESTRICT_NOOP // this intermediate avoids empty macro argument on next line + #define GASNETI_RESTRICT GASNETI_COMPILER_FEATURE(RESTRICT,GASNETI_RESTRICT_NOOP) #define GASNETI_RESTRICT_MAY_QUALIFY_TYPEDEFS GASNETI_COMPILER_FEATURE(RESTRICT_MAY_QUALIFY_TYPEDEFS,1) #endif @@ -369,14 +370,20 @@ #endif /* magic numbers for identifying/protecting types - * WARNING: GASNETI_{CHECK,IMPORT}_MAGIC() may evaluate the pointer argument more than once! + * WARNING: GASNETI_{CHECK,IMPORT}_MAGIC() may evaluate the arguments more than once! */ #define GASNETI_MAKE_MAGIC(c0,c1,c2,c3) GASNETI_SIGNATURE8('g','e','x',':',c0,c1,c2,c3) #define GASNETI_MAKE_BAD_MAGIC(c0,c1,c2,c3) GASNETI_SIGNATURE8('B','A','D',':',c0,c1,c2,c3) typedef union { uint64_t _u; char _c[8]; } gasneti_magic_t; #if GASNET_DEBUG #define GASNETI_INIT_MAGIC(p,m) ((void)((p)->_magic._u = (m))) - #define GASNETI_CHECK_MAGIC(p,m) do { if (p) gasneti_assert_uint((p)->_magic._u ,==, (m)); } while (0) + #define GASNETI_CHECK_MAGIC(p,m) do { \ + if ((p) && ((p)->_magic._u != (m))) { \ + char buf1[GASNETI_MAX_MAGICSZ]; gasneti_format_magic(buf1, (p)->_magic._u); \ + char buf2[GASNETI_MAX_MAGICSZ]; gasneti_format_magic(buf2, (m)); \ + gasneti_fatalerror("Found magic %s when expecting %s, aka %s", buf1, buf2, #m); \ + } \ + } while (0) #define GASNETI_IMPORT_MAGIC(p,type) do { \ if ((p) && ((p)->_magic._u == GASNETI_##type##_BAD_MAGIC)) { \ gasneti_fatalerror("Likely use-after-free error for " #type " object"); \ @@ -847,5 +854,64 @@ typedef union { uint64_t _u; char _c[8]; } gasneti_magic_t; #if !defined(GASNETT_USE_BUILTIN_UNREACHABLE) && GASNETI_COMPILER_HAS_BUILTIN(UNREACHABLE,unreachable) #define GASNETT_USE_BUILTIN_UNREACHABLE 1 #endif + +/* ------------------------------------------------------------------------------------ */ +// Handling of unused macro arguments +// +// When writing function-like macros, it is often necessary to ensure that +// every argument is evaluated exactly once for side-effects. However, the +// simple idiom `((void)(arg))` is not always sufficient because some +// compilers will warn about expressions/statements "with no effect". +// +// This `GASNETI_UNUSED_ARGS{1..8}()` family of macros hides any +// compiler-specific means of suppressing such warnings, and +// provides a self-documenting name as well. +// +// Contrived example: +// #define OPTION_1_OF_3(x,y,z) (GASNETI_UNUSED_ARGS2(y,z),(x)) +// #define OPTION_2_OF_3(x,y,z) (GASNETI_UNUSED_ARGS2(x,z),(y)) +// #define OPTION_3_OF_3(x,y,z) (GASNETI_UNUSED_ARGS2(x,y),(z)) +// See also: gasnetc_{AM,Token}_Max*() macros. + +#if PLATFORM_COMPILER_PGI + GASNETI_INLINE(gasneti_empty_function) + void gasneti_empty_function(void) {} + #define GASNETI_UNUSED_ARG_PRE_ gasneti_empty_function(), +#endif + +#ifndef GASNETI_UNUSED_ARG_PRE_ + #define GASNETI_UNUSED_ARG_PRE_ //empty +#endif +#ifndef GASNETI_UNUSED_ARG_ + #define GASNETI_UNUSED_ARG_(x) (void)(x) +#endif + +#define GASNETI_UNUSED_ARGS1(a1) \ + (GASNETI_UNUSED_ARG_PRE_ GASNETI_UNUSED_ARG_(a1)) +#define GASNETI_UNUSED_ARGS2(a1,a2) \ + (GASNETI_UNUSED_ARG_PRE_ GASNETI_UNUSED_ARG_(a1),GASNETI_UNUSED_ARG_(a2)) +#define GASNETI_UNUSED_ARGS3(a1,a2,a3) \ + (GASNETI_UNUSED_ARG_PRE_ GASNETI_UNUSED_ARG_(a1),GASNETI_UNUSED_ARG_(a2),\ + GASNETI_UNUSED_ARG_(a3)) +#define GASNETI_UNUSED_ARGS4(a1,a2,a3,a4) \ + (GASNETI_UNUSED_ARG_PRE_ GASNETI_UNUSED_ARG_(a1),GASNETI_UNUSED_ARG_(a2),\ + GASNETI_UNUSED_ARG_(a3),GASNETI_UNUSED_ARG_(a4)) +#define GASNETI_UNUSED_ARGS5(a1,a2,a3,a4,a5) \ + (GASNETI_UNUSED_ARG_PRE_ GASNETI_UNUSED_ARG_(a1),GASNETI_UNUSED_ARG_(a2),\ + GASNETI_UNUSED_ARG_(a3),GASNETI_UNUSED_ARG_(a4),GASNETI_UNUSED_ARG_(a5)) +#define GASNETI_UNUSED_ARGS6(a1,a2,a3,a4,a5,a6) \ + (GASNETI_UNUSED_ARG_PRE_ GASNETI_UNUSED_ARG_(a1),GASNETI_UNUSED_ARG_(a2),\ + GASNETI_UNUSED_ARG_(a3),GASNETI_UNUSED_ARG_(a4),GASNETI_UNUSED_ARG_(a5),\ + GASNETI_UNUSED_ARG_(a6)) +#define GASNETI_UNUSED_ARGS7(a1,a2,a3,a4,a5,a6,a7) \ + (GASNETI_UNUSED_ARG_PRE_ GASNETI_UNUSED_ARG_(a1),GASNETI_UNUSED_ARG_(a2),\ + GASNETI_UNUSED_ARG_(a3),GASNETI_UNUSED_ARG_(a4),GASNETI_UNUSED_ARG_(a5),\ + GASNETI_UNUSED_ARG_(a6),GASNETI_UNUSED_ARG_(a7)) +#define GASNETI_UNUSED_ARGS8(a1,a2,a3,a4,a5,a6,a7,a8) \ + (GASNETI_UNUSED_ARG_PRE_ GASNETI_UNUSED_ARG_(a1),GASNETI_UNUSED_ARG_(a2),\ + GASNETI_UNUSED_ARG_(a3),GASNETI_UNUSED_ARG_(a4),GASNETI_UNUSED_ARG_(a5),\ + GASNETI_UNUSED_ARG_(a6),GASNETI_UNUSED_ARG_(a7),GASNETI_UNUSED_ARG_(a8)) + /* ------------------------------------------------------------------------------------ */ + #endif diff --git a/third-party/gasnet/gasnet-src/gasnet_config.h.in b/third-party/gasnet/gasnet-src/gasnet_config.h.in index 5683a2da01c2..1ed5e583dcf9 100644 --- a/third-party/gasnet/gasnet-src/gasnet_config.h.in +++ b/third-party/gasnet/gasnet-src/gasnet_config.h.in @@ -218,6 +218,12 @@ /* --with-max-threads value (if given) */ #undef GASNETI_MAX_THREADS_CONFIGURE +/* --with-maxeps value (if given) */ +#undef GASNETI_MAXEPS_CONFIGURE + +/* memory kinds support */ +#undef GASNETI_MK_CLASS_CUDA_UVA_ENABLED + /* has clock_gettime() */ #undef HAVE_CLOCK_GETTIME @@ -242,6 +248,12 @@ /* has Portable Linux Processor Affinity */ #undef HAVE_PLPA +/* Portable Hardware Locality (hwloc) library and command-line utils */ +#undef GASNETI_HAVE_HWLOC_LIB +#undef GASNETI_HAVE_HWLOC_UTILS +#undef GASNETI_HWLOC_BIND_PATH +#undef GASNETI_HWLOC_CALC_PATH + /* have ptmalloc's mallopt() options */ #undef HAVE_PTMALLOC @@ -339,9 +351,6 @@ /* has x86 EBX register (not reserved for GOT) */ #undef GASNETI_HAVE_X86_EBX -/* has support (toolchain and cpu) for ia64 cmp8xchg16 instruction */ -#undef GASNETI_HAVE_IA64_CMP8XCHG16 - /* has support (toolchain and cpu) for x86_64 cmpxchg16b instruction */ #undef GASNETI_HAVE_X86_CMPXCHG16B @@ -452,7 +461,7 @@ #undef GASNETC_USE_SOCKETPAIR /* GASNet aries-conduit settings */ -#undef GASNETC_GNI_MAX_MEDIUM +#undef GASNETC_GNI_MAX_MEDIUM_DFLT #undef GASNETC_GNI_MULTI_DOMAIN #undef GASNETC_GNI_UDREG @@ -468,12 +477,15 @@ #undef HAVE_IBV_TRANSPORT_TYPE #undef GASNETC_IBV_MAX_MEDIUM #undef GASNETC_IBV_ODP +#undef GASNETC_IBV_ODP_MLNX +#undef GASNETC_IBV_ODP_CORE #undef GASNETC_IBV_ODP_DISABLED #undef GASNETC_IBV_RCV_THREAD #undef GASNETC_IBV_CONN_THREAD -#undef GASNETC_IBV_MAX_HCAS +#undef GASNETC_IBV_MAX_HCAS_CONFIGURE #undef GASNETC_IBV_PHYSMEM_MAX_CONFIGURE #undef GASNETC_IBV_PHYSMEM_PROBE_CONFIGURE +#undef GASNETC_IBV_PORTS_CONFIGURE /* GASNet bug1389 detection/work-around */ #undef GASNETI_BUG1389_WORKAROUND diff --git a/third-party/gasnet/gasnet-src/gasnet_diagnostic.c b/third-party/gasnet/gasnet-src/gasnet_diagnostic.c index 4b0f9daa59fe..eafa91d18ef4 100644 --- a/third-party/gasnet/gasnet-src/gasnet_diagnostic.c +++ b/third-party/gasnet/gasnet-src/gasnet_diagnostic.c @@ -101,6 +101,8 @@ static void progressfns_test(int id); static void op_test(int id); static void spawner_test(void); +static void hbarrier_test(void); +static void rexchgv_test(void); static gex_TM_t myteam; @@ -198,6 +200,10 @@ extern int gasneti_run_diagnostics(int iter_cnt, int threadcnt, const char *test BARRIER(); #endif + TEST_HEADER("host-scoped barrier test") hbarrier_test(); + + TEST_HEADER("RotatedExchangeV test") rexchgv_test(); + #if GASNET_PAR num_threads = threadcnt; MSG0("spawning %i threads...", num_threads); @@ -1453,6 +1459,104 @@ static void spawner_test(void) { } #endif +/* ------------------------------------------------------------------------------------ */ + +static void hbarrier_test(void) { + // Currently, this is just to ensure this little-used interface gets tested + // TODO: + // + Maybe verify that barrier property holds? + // + Maybe verify AM progress made within this barrier? + for (int i = 0; i < iters0; ++i) { + gasneti_host_barrier(); + } +} + +/* ------------------------------------------------------------------------------------ */ + +static void rexchgv_test(void) { + uint64_t data[10]; + const size_t elem_sz = sizeof(data[0]); + const size_t total_len = gasneti_nodes * sizeof(data); + const int N = sizeof(data)/elem_sz; // total values per rank + for (int i = 0; i < N; ++i) { + data[i] = gasneti_mynode + i * gasneti_nodes; + } + + for (int iter = 0; iter < iters0; ++iter) { + // Choose splits between first and second exchanges + // First iteration is deterministic and the rest are random + int part1, part2; + if (!iter) { + part1 = 0; + part2 = N; + } else { + // Note that RAND() is in-sync across ranks, which we intentionally purturb + part1 = (TEST_RAND(0,N) + gasneti_mynode) % N; + part2 = N - part1; + } + + uint64_t *data1, *data2; + size_t *len1, *len2; + size_t partial_len1, partial_len2; + + partial_len1 = gasneti_blockingRotatedExchangeV(myteam, data, part1*elem_sz, (void**)&data1, &len1); + if (partial_len1) { + gasneti_assert_always_uint(*len1 ,==, part1*elem_sz); + } else { + gasneti_assert_always(data1 == NULL); + gasneti_assert_always(len1 == NULL); + } + + partial_len2 = gasneti_blockingRotatedExchangeV(myteam, data+part1, part2*elem_sz, (void**)&data2, &len2); + if (partial_len2) { + gasneti_assert_always_uint(*len2 ,==, part2*elem_sz); + } else { + gasneti_assert_always(data2 == NULL); + gasneti_assert_always(len2 == NULL); + } + + { // Validate lengths satisfy alignment and summation properties + gasneti_assert_always_uint(partial_len1 + partial_len2 ,==, total_len); + gasneti_assert_always_uint(partial_len1 % elem_sz ,==, 0); + gasneti_assert_always_uint(partial_len2 % elem_sz ,==, 0); + size_t sum1 = 0; + size_t sum2 = 0; + for (gex_Rank_t i = 0; i < gasneti_nodes; ++i) { + size_t tmp1 = len1 ? len1[i] : 0; + gasneti_assert_always_uint(tmp1 % elem_sz ,==, 0); + sum1 += tmp1; + size_t tmp2 = len2 ? len2[i] : 0; + gasneti_assert_always_uint(tmp2 % elem_sz ,==, 0); + sum2 += tmp2; + gasneti_assert_always_uint(tmp1 + tmp2 ,==, sizeof(data)); + } + gasneti_assert_always_uint(partial_len1 ,==, sum1); + gasneti_assert_always_uint(partial_len2 ,==, sum2); + } + + { // Validate content + uint64_t *p1 = data1; + uint64_t *p2 = data2; + for (gex_Rank_t i = 0; i < gasneti_nodes; ++i) { + int count1 = len1 ? (int)(len1[i] / elem_sz) : 0; + uint64_t want = (gasneti_mynode + i) % gasneti_nodes; // Not simply i, due to rotation + for (int j = 0; j < count1; ++j, ++p1, want += gasneti_nodes) { + gasneti_assert_always_uint(*p1 ,==, want); + } + int count2 = len2 ? (int)(len2[i] / elem_sz) : 0; + for (int j = 0; j < count2; ++j, ++p2, want += gasneti_nodes) { + gasneti_assert_always_uint(*p2 ,==, want); + } + } + } + + gasneti_free(data1); + gasneti_free(data2); + gasneti_free(len1); + gasneti_free(len2); + } +} + /* ------------------------------------------------------------------------------------ */ static gex_AM_Entry_t gasneti_diag_handlers[] = { #ifdef GASNETC_DIAG_HANDLERS diff --git a/third-party/gasnet/gasnet-src/gasnet_fwd.h b/third-party/gasnet/gasnet-src/gasnet_fwd.h index 2a8446cc30a3..eed5b34f4865 100644 --- a/third-party/gasnet/gasnet-src/gasnet_fwd.h +++ b/third-party/gasnet/gasnet-src/gasnet_fwd.h @@ -194,6 +194,24 @@ typedef uint32_t gex_Flags_t; #define GEX_FLAG_RANK_IS_JOBRANK (1U << 13) +#define GEX_FLAG_HINT_ACCEL_AD (1U << 0) +#define GEX_FLAG_HINT_ACCEL_COLL (1U << 1) +#define GEX_FLAG_HINT_ACCEL_ALL ((1U << 2)-1) + +//================================================ +// GEX_EP_CAPABILITY_* +// Flags used when creating endpoints +//================================================ + +typedef uint32_t gex_EP_Capabilities_t; + +#define GEX_EP_CAPABILITY_RMA (1U << 0) +#define GEX_EP_CAPABILITY_AM (1U << 1) +#define GEX_EP_CAPABILITY_VIS (1U << 2) +#define GEX_EP_CAPABILITY_COLL (1U << 3) +#define GEX_EP_CAPABILITY_AD (1U << 4) +#define GEX_EP_CAPABILITY_ALL ((1U << 5)-1) + //-------------------------------------------------------- // Private definitions // Identifiers defined below are not for public use diff --git a/third-party/gasnet/gasnet-src/gasnet_help.h b/third-party/gasnet/gasnet-src/gasnet_help.h index 6583f8b2484d..119b71248af5 100644 --- a/third-party/gasnet/gasnet-src/gasnet_help.h +++ b/third-party/gasnet/gasnet-src/gasnet_help.h @@ -24,7 +24,7 @@ typedef struct { } gasneti_heapstats_t; #if GASNET_DEBUGMALLOC - /* curloc is passed to debug mallocator as "file:line", + /* _curloc is passed to debug mallocator as "file:line", or the special constant "SRCPOS" to retrieve the info from gasnet_srclines To enable use of srcpos for a compilation unit, client should: #undef GASNETT_MALLOC_USE_SRCPOS @@ -33,20 +33,20 @@ typedef struct { #ifndef GASNETT_MALLOC_USE_SRCPOS #define GASNETT_MALLOC_USE_SRCPOS 0 /* off by default */ #endif - #define GASNETI_CURLOCFARG , const char *curloc + #define GASNETI_CURLOCFARG , const char *_curloc #define GASNETI_CURLOCAARG , (GASNETT_MALLOC_USE_SRCPOS ? \ "SRCPOS" : \ __FILE__ ":" _STRINGIFY(__LINE__)) - #define GASNETI_CURLOCPARG , curloc - extern size_t _gasneti_memcheck(void *ptr, const char *curloc, int checktype); - extern void _gasneti_memcheck_one(const char *curloc); - extern void _gasneti_memcheck_all(const char *curloc); + #define GASNETI_CURLOCPARG , _curloc + extern size_t _gasneti_memcheck(void *_ptr, const char *_curloc, int _checktype); + extern void _gasneti_memcheck_one(const char *_curloc); + extern void _gasneti_memcheck_all(const char *_curloc); #define gasneti_memcheck(ptr) (gasneti_assert(ptr != NULL), \ (void)_gasneti_memcheck(ptr, __FILE__ ":" _STRINGIFY(__LINE__), 0)) #define gasneti_memcheck_one() _gasneti_memcheck_one(__FILE__ ":" _STRINGIFY(__LINE__)) #define gasneti_memcheck_all() _gasneti_memcheck_all(__FILE__ ":" _STRINGIFY(__LINE__)) - extern int gasneti_getheapstats(gasneti_heapstats_t *pstat); - extern void gasneti_heapinfo_dump(const char *filename, int show_live_objects); + extern int gasneti_getheapstats(gasneti_heapstats_t *_pstat); + extern void gasneti_heapinfo_dump(const char *_filename, int _show_live_objects); #else #define GASNETI_CURLOCFARG #define GASNETI_CURLOCAARG @@ -59,16 +59,16 @@ typedef struct { #endif /* extern versions of gasnet malloc fns for use in public headers */ -extern void *_gasneti_extern_malloc(size_t sz GASNETI_CURLOCFARG) GASNETI_MALLOC; +extern void *_gasneti_extern_malloc(size_t _sz GASNETI_CURLOCFARG) GASNETI_MALLOC; GASNETI_MALLOCP(_gasneti_extern_malloc) -extern void *_gasneti_extern_realloc(void *ptr, size_t sz GASNETI_CURLOCFARG); -extern void *_gasneti_extern_calloc(size_t N, size_t S GASNETI_CURLOCFARG) GASNETI_MALLOC; +extern void *_gasneti_extern_realloc(void *_ptr, size_t _sz GASNETI_CURLOCFARG); +extern void *_gasneti_extern_calloc(size_t _n, size_t _s GASNETI_CURLOCFARG) GASNETI_MALLOC; GASNETI_MALLOCP(_gasneti_extern_calloc) -extern void _gasneti_extern_free(void *ptr GASNETI_CURLOCFARG); -extern void _gasneti_extern_leak(void *ptr GASNETI_CURLOCFARG); -extern char *_gasneti_extern_strdup(const char *s GASNETI_CURLOCFARG) GASNETI_MALLOC; +extern void _gasneti_extern_free(void *_ptr GASNETI_CURLOCFARG); +extern void _gasneti_extern_leak(void *_ptr GASNETI_CURLOCFARG); +extern char *_gasneti_extern_strdup(const char *_s GASNETI_CURLOCFARG) GASNETI_MALLOC; GASNETI_MALLOCP(_gasneti_extern_strdup) -extern char *_gasneti_extern_strndup(const char *s, size_t n GASNETI_CURLOCFARG) GASNETI_MALLOC; +extern char *_gasneti_extern_strndup(const char *_s, size_t _n GASNETI_CURLOCFARG) GASNETI_MALLOC; GASNETI_MALLOCP(_gasneti_extern_strndup) #define gasneti_extern_malloc(sz) _gasneti_extern_malloc((sz) GASNETI_CURLOCAARG) @@ -87,49 +87,49 @@ GASNETI_MALLOCP(_gasneti_extern_strndup) #define GASNETI_USE_POSIX_MEMALIGN 1 #endif GASNETI_INLINE(_gasneti_malloc_aligned) GASNETI_MALLOC -void * _gasneti_malloc_aligned(size_t alignment, size_t size GASNETI_CURLOCFARG) { - gasneti_assert(GASNETI_POWEROFTWO(alignment)); - gasneti_assert(alignment <= GASNET_PAGESIZE); +void * _gasneti_malloc_aligned(size_t _alignment, size_t _size GASNETI_CURLOCFARG) { + gasneti_assert(GASNETI_POWEROFTWO(_alignment)); + gasneti_assert(_alignment <= GASNET_PAGESIZE); #if GASNETI_USE_POSIX_MEMALIGN - if_pf(alignment < sizeof(void*)) alignment = sizeof(void*); - void *result = NULL; // init to avoid -Wmaybe-uninitialized warnings - int _return_code = posix_memalign(&result, alignment, size); + if_pf(_alignment < sizeof(void*)) _alignment = sizeof(void*); + void *_result = NULL; // init to avoid -Wmaybe-uninitialized warnings + int _return_code = posix_memalign(&_result, _alignment, _size); gasneti_assert_zeroret(_return_code); #else - size_t alloc_size = size + sizeof(void *) + alignment; - void *base = _gasneti_extern_malloc(alloc_size GASNETI_CURLOCPARG); - void **result = (void **)GASNETI_ALIGNUP((uintptr_t)base + sizeof(void *), alignment); - *(result - 1) = base; /* hidden base ptr for free() */ - gasneti_assert_ptr((void *)(result - 1) ,>=, base); - gasneti_assert_ptr(((uint8_t *)result + size) ,<=, ((uint8_t *)base + alloc_size)); + size_t _alloc_size = _size + sizeof(void *) + _alignment; + void *_base = _gasneti_extern_malloc(_alloc_size GASNETI_CURLOCPARG); + void **_result = (void **)GASNETI_ALIGNUP((uintptr_t)_base + sizeof(void *), _alignment); + *(_result - 1) = _base; /* hidden base ptr for free() */ + gasneti_assert_ptr((void *)(_result - 1) ,>=, _base); + gasneti_assert_ptr(((uint8_t *)_result + _size) ,<=, ((uint8_t *)_base + _alloc_size)); #endif - gasneti_assume(result); - gasneti_assert_ptr(result ,==, (void **)GASNETI_ALIGNUP(result, alignment)); - return (void *)result; + gasneti_assume(_result); + gasneti_assert_ptr(_result ,==, (void **)GASNETI_ALIGNUP(_result, _alignment)); + return (void *)_result; } GASNETI_MALLOCP(_gasneti_malloc_aligned) #define gasneti_malloc_aligned(align,sz) _gasneti_malloc_aligned((align), (sz) GASNETI_CURLOCAARG) GASNETI_INLINE(_gasneti_free_aligned) -void _gasneti_free_aligned(void *ptr GASNETI_CURLOCFARG) { - gasneti_assert(ptr); +void _gasneti_free_aligned(void *_ptr GASNETI_CURLOCFARG) { + gasneti_assert(_ptr); #if GASNETI_USE_POSIX_MEMALIGN - free(ptr); + free(_ptr); #else - void *base = *((void **)ptr - 1); - gasneti_assert(base); - _gasneti_extern_free(base GASNETI_CURLOCPARG); + void *_base = *((void **)_ptr - 1); + gasneti_assert(_base); + _gasneti_extern_free(_base GASNETI_CURLOCPARG); #endif } #define gasneti_free_aligned(ptr) _gasneti_free_aligned((ptr) GASNETI_CURLOCAARG) GASNETI_INLINE(_gasneti_leak_aligned) -void _gasneti_leak_aligned(void *ptr GASNETI_CURLOCFARG) { - gasneti_assert(ptr); +void _gasneti_leak_aligned(void *_ptr GASNETI_CURLOCFARG) { + gasneti_assert(_ptr); #if !GASNETI_USE_POSIX_MEMALIGN - void *base = *((void **)ptr - 1); - gasneti_assert(base); - _gasneti_extern_leak(base GASNETI_CURLOCPARG); + void *_base = *((void **)_ptr - 1); + gasneti_assert(_base); + _gasneti_extern_leak(_base GASNETI_CURLOCPARG); #endif } #define gasneti_leak_aligned(ptr) _gasneti_leak_aligned((ptr) GASNETI_CURLOCAARG) @@ -153,7 +153,16 @@ extern gex_Rank_t gasneti_mynode; extern gex_Rank_t gasneti_nodes; #define gex_System_QueryJobSize() (GASNETI_CHECKINIT(), (gex_Rank_t)gasneti_nodes) +/* ------------------------------------------------------------------------------------ */ +extern int gasneti_VerboseErrors; +#define gex_System_GetVerboseErrors() ((int)gasneti_VerboseErrors) +GASNETI_INLINE(gex_System_SetVerboseErrors) +void gex_System_SetVerboseErrors(int _enable) { + gasneti_assert(_enable == 1 || _enable == 0); + gasneti_VerboseErrors = _enable; +} +/* ------------------------------------------------------------------------------------ */ #if GASNETI_TM0_ALIGN // We can detect TM0 by its better alignment than other tm's GASNETI_INLINE(gasneti_is_tm0) @@ -168,26 +177,188 @@ extern gasneti_TM_t gasneti_thing_that_goes_thunk_in_the_dark; #define gasneti_is_tm0(_i_tm) ((_i_tm) == gasneti_thing_that_goes_thunk_in_the_dark) #endif +// "TM-pair" +// +// We have one external handle type `gex_TM_t`, but two corresponding internal +// types: `gasneti_TM_t` and `gasneti_TM_Pair_t`. A `gasneti_TM_t` can be +// either an actual pointer to a `struct gasneti_team_member_internal_s` (with +// fields describing the TM) OR it can be just a 32-bit inline representation +// of a TM-pair (with fields accessed via `gasneti_tm_pair_{loc,rem}_idx()`). +// +// Use of the query `gasneti_e_tm_is_pair()` reports whether a `gex_TM_t`, once +// imported, will have the TM-pair representation. +// Use of the query `gasneti_i_tm_is_pair()` reports whether a `gasneti_TM_t` +// has the TM-pair representation. +// +// Code not needing to access any fields may pass `gex_TM_t` or `gasneti_TM_t` +// values transparently, though the latter is preferred so that the debug check +// for the "MAGIC" signature upon import occurs early in the call stack. Such +// code does not require any explicit handling of TM-pairs. +// +// For code which does access fields, there are three approaches illustrated +// by the following examples in which +// + `e_tm` is a `gex_TM_t` +// + `i_tm` is a `gasneti_TM_t` without certainty of contents +// + `my_tm` is a `gasneti_TM_t` certain to point to a structure +// + `my_pair` is a `gasneti_TM_Pair_t` certain to hold a 32-bit inline pair +// +// 1. "import excluding pair" - for code paths where TM-pair is prohibited: +// my_tm = gasneti_import_tm_nonpair(e_tm); +// +// 2. "check then import" +// if (gasneti_e_tm_is_pair(e_tm)) +// my_pair = gasneti_import_tm_pair(e_tm); +// else +// my_tm = gasneti_import_tm(e_tm); +// +// 3. "import then check before use" +// i_tm = gasneti_import_tm(e_tm) +// [...] +// if (gasneti_i_tm_is_pair(i_tm)) +// my_pair = gasneti_i_tm_to_pair(i_tm); +// else +// my_tm = i_tm; +// +// The four type-conversion functions used in the examples above: +// + gasneti_TM_t gasneti_import_tm(gex_TM_t); +// Does not check if the argument is a TM-pair +// + gasneti_TM_t gasneti_import_tm_nonpair(gex_TM_t); +// Asserts that the argument is NOT a TM-pair +// + gasneti_TM_Pair_t gasneti_import_tm_pair(gex_TM_t); +// Asserts that the argument is a TM-pair +// + gasneti_TM_Pair_t gasneti_i_tm_to_pair(gasneti_TM_t); +// Asserts that the argument is a TM-pair +// +// -OR- +// +// At least the following internal functions operate an `e_tm` or `i_tm`, with +// transparent support for both encodings and may be sufficient to keep much +// code independent of TM-pairness: +// + gasneti_[ei]_tm_rank_to_jobrank() +// + gasneti_[ei]_tm_rank_to_ep_index() +// + gasneti_[ei]_tm_rank_to_location() +// + gasneti_[ei]_tm_jobrank_to_rank() +// + gasneti_[ei]_tm_size() +// Returns gex_System_QueryJobSize() for TM-pair, suitable for range checking a rank +// Generalize/replace the following idioms which do not accept a TM-pair: +// `gex_TM_QuerySize(e_tm)` +// `gasneti_import_tm(e_tm)->_size` +// `i_tm->_size` +// + gasneti_[ei]_tm_to_i_ep() +// Generalize/replace the following idioms which do not accept a TM-pair: +// `gex_TM_QueryEP(e_tm)` +// `gasneti_import_tm(e_tm)->_ep` +// `i_tm->_ep` +// + gasneti_[ei]_tm_to_ep_index() +// More efficient replacement for `gasneti_[ei]_tm_to_i_ep()->_index`, +// replacing multiple alternatives which do not accept a TM-pair +// + gasneti_boundscheck() +// + gasneti_boundscheck_allowoutseg() +// + gasneti_formattm() +// + gasneti_pshm_local_rank() +// + gasneti_pshm_in_supernode() +// + gasneti_pshm_addr2local() +// + GASNETI_NBRHD_LOCAL() +// + GASNETI_NBRHD_LOCAL_ADDR() +// + GASNETI_NBRHD_LOCAL_ADDR_OR_NULL() + +#if GASNET_DEBUG + GASNETI_INLINE(gasneti_assertvalid_tm_pair) + void gasneti_assertvalid_tm_pair(gasneti_TM_Pair_t _tm_pair) { + // TODO: check that client index (currently always zero) is in range + gasneti_assert_uint((_tm_pair & 0xff) ,==, 1); + } +#else + #define gasneti_assertvalid_tm_pair(_tm_pair) ((void)0) +#endif + +// Detect a TM generated by gex_TM_Pair +GASNETI_INLINE(gasneti_i_tm_is_pair) +int gasneti_i_tm_is_pair(gasneti_TM_t _i_tm) +{ + gasneti_TM_Pair_t _tm_pair = (gasneti_TM_Pair_t) _i_tm; + int _result = (_tm_pair & 1); + if (_result) gasneti_assertvalid_tm_pair(_tm_pair); + return _result; +} +#define gasneti_e_tm_is_pair(_e_tm) gasneti_i_tm_is_pair(gasneti_import_tm(_e_tm)) + +// Assertion-checking conversions from {gex,gasneti}_TM_t to gasneti_TM_Pair_t +GASNETI_INLINE(gasneti_i_tm_to_pair) +gasneti_TM_Pair_t gasneti_i_tm_to_pair(gasneti_TM_t _i_tm) +{ + gasneti_TM_Pair_t _tm_pair = (gasneti_TM_Pair_t) _i_tm; + gasneti_assertvalid_tm_pair(_tm_pair); + return _tm_pair; +} +#define gasneti_e_tm_to_pair(_e_tm) gasneti_i_tm_to_pair(gasneti_import_tm(_e_tm)) + +// Extract EP indices from a known TM_Pair +GASNETI_INLINE(gasneti_tm_pair_loc_idx) +gex_EP_Index_t gasneti_tm_pair_loc_idx(gasneti_TM_Pair_t _tm_pair) +{ + gasneti_assertvalid_tm_pair(_tm_pair); + return (_tm_pair >> GASNETI_TM_PAIR_LOC_IDX_SHIFT) & GASNETI_TM_PAIR_IDX_MASK; +} +GASNETI_INLINE(gasneti_tm_pair_rem_idx) +gex_EP_Index_t gasneti_tm_pair_rem_idx(gasneti_TM_Pair_t _tm_pair) +{ + gasneti_assertvalid_tm_pair(_tm_pair); + return (_tm_pair >> GASNETI_TM_PAIR_REM_IDX_SHIFT) & GASNETI_TM_PAIR_IDX_MASK; +} + + // Given (tm,rank) return the jobrank or ep_location -extern GASNETI_PURE gex_Rank_t gasneti_tm_fwd_rank(gasneti_TM_t tm, gex_Rank_t rank); +extern GASNETI_PURE gex_Rank_t gasneti_tm_fwd_rank(gasneti_TM_t _tm, gex_Rank_t _rank); GASNETI_PUREP(gasneti_tm_fwd_rank) -extern GASNETI_PURE gex_EP_Location_t gasneti_tm_fwd_location(gasneti_TM_t tm, gex_Rank_t rank, gex_Flags_t flags); +extern GASNETI_PURE gex_EP_Location_t gasneti_tm_fwd_location(gasneti_TM_t _tm, gex_Rank_t _rank, gex_Flags_t _flags); GASNETI_PUREP(gasneti_tm_fwd_location) // Given (tm,jobrank) return the rank of jobrank in tm, or GEX_RANK_INVALID -extern gex_Rank_t gasneti_tm_rev_rank(gasneti_TM_t tm, gex_Rank_t jobrank); +extern gex_Rank_t gasneti_tm_rev_rank(gasneti_TM_t _tm, gex_Rank_t _jobrank); -#if GASNET_DEBUG -GASNETI_INLINE(gasneti_check_tm_rank) -void gasneti_check_tm_rank(gex_TM_t _e_tm, gex_Rank_t _rank) { +// Helpers which deal correctly/transparently with TM-pair +GASNETI_INLINE(gasneti_e_tm_size) +gex_Rank_t gasneti_e_tm_size(gex_TM_t _e_tm) { gasneti_assert(_e_tm); - gasneti_assert_uint(_rank ,<, gex_TM_QuerySize(_e_tm)); + return gasneti_e_tm_is_pair(_e_tm) ? gex_System_QueryJobSize() : gex_TM_QuerySize(_e_tm); +} +GASNETI_INLINE(gasneti_i_tm_size) +gex_Rank_t gasneti_i_tm_size(gasneti_TM_t _i_tm) { + gasneti_assert(_i_tm); + return gasneti_i_tm_is_pair(_i_tm) ? gex_System_QueryJobSize() : _i_tm->_size; } -#define gasneti_check_jobrank(jobrank) \ - gasneti_assert_uint(jobrank ,<, gex_System_QuerySize()); + +GASNETI_INLINE(gasneti_i_tm_to_ep_index) +gex_Rank_t gasneti_i_tm_to_ep_index(gasneti_TM_t _i_tm) { + gasneti_assert(_i_tm); + if (gasneti_is_tm0(_i_tm)) { + return 0; // fast path + } else if (gasneti_i_tm_is_pair(_i_tm)) { + return gasneti_tm_pair_loc_idx(gasneti_i_tm_to_pair(_i_tm)); + } else { + return _i_tm->_ep->_index; + } +} +#define gasneti_e_tm_to_ep_index(_e_tm) gasneti_i_tm_to_ep_index(gasneti_import_tm(_e_tm)) + +#if GASNET_DEBUG + #define gasneti_check_jobrank(jobrank) \ + gasneti_assert_uint(jobrank ,<, gex_System_QueryJobSize()); + GASNETI_INLINE(gasneti_check_e_tm_rank) + void gasneti_check_e_tm_rank(gex_TM_t _e_tm, gex_Rank_t _rank) { + gasneti_assert(_e_tm); + gasneti_assert_uint(_rank ,<, gasneti_e_tm_size(_e_tm)); + } + GASNETI_INLINE(gasneti_check_i_tm_rank) + void gasneti_check_i_tm_rank(gasneti_TM_t _i_tm, gex_Rank_t _rank) { + gasneti_assert(_i_tm); + gasneti_assert_uint(_rank ,<, gasneti_i_tm_size(_i_tm)); + } #else - #define gasneti_check_tm_rank(tm,rank) ((void)0) #define gasneti_check_jobrank(jobrank) ((void)0) + #define gasneti_check_i_tm_rank(tm,rank) ((void)0) + #define gasneti_check_e_tm_rank(tm,rank) ((void)0) #endif // TODO-EX: remove when a runtime branch on tm->_rank_map is necessary @@ -195,9 +366,8 @@ void gasneti_check_tm_rank(gex_TM_t _e_tm, gex_Rank_t _rank) { GASNETI_INLINE(gasneti_i_tm_rank_to_jobrank) gex_Rank_t gasneti_i_tm_rank_to_jobrank(gasneti_TM_t _i_tm, gex_Rank_t _rank) { - gasneti_assert(_i_tm); - gasneti_assert_uint(_rank ,<, _i_tm->_size); - if (gasneti_is_tm0(_i_tm)) return _rank; + gasneti_check_i_tm_rank(_i_tm, _rank); + if (gasneti_is_tm0(_i_tm) || gasneti_i_tm_is_pair(_i_tm)) return _rank; if (!GASNETI_ALLOW_SPARSE_TEAMREP || _i_tm->_rank_map) { gasneti_assert(_i_tm->_rank_map); return _i_tm->_rank_map[_rank]; @@ -207,14 +377,37 @@ gex_Rank_t gasneti_i_tm_rank_to_jobrank(gasneti_TM_t _i_tm, gex_Rank_t _rank) { #define gasneti_e_tm_rank_to_jobrank(e_tm,rank) \ gasneti_i_tm_rank_to_jobrank(gasneti_import_tm(e_tm),rank) +GASNETI_INLINE(gasneti_i_tm_rank_to_ep_index) +gex_Rank_t gasneti_i_tm_rank_to_ep_index(gasneti_TM_t _i_tm, gex_Rank_t _rank) { + gasneti_check_i_tm_rank(_i_tm, _rank); + gex_EP_Index_t _result; + if (gasneti_is_tm0(_i_tm)) { + _result = 0; + } else if (gasneti_i_tm_is_pair(_i_tm)) { + _result = gasneti_tm_pair_rem_idx(gasneti_i_tm_to_pair(_i_tm)); + } else if (!GASNETI_ALLOW_SPARSE_TEAMREP || _i_tm->_rank_map) { + // NULL _index_map indicates all members of TM are primordial EPs (idx==0) + _result = _i_tm->_index_map ? _i_tm->_index_map[_rank] : 0; + } else { + gex_EP_Location_t _loc = gasneti_tm_fwd_location(_i_tm, _rank, 0); + _result = _loc.gex_ep_index; + } + gasneti_assert(_result < GASNET_MAXEPS); + return _result; +} +#define gasneti_e_tm_rank_to_ep_index(e_tm,rank) \ + gasneti_i_tm_rank_to_ep_index(gasneti_import_tm(e_tm),rank) + GASNETI_INLINE(gasneti_i_tm_rank_to_location) gex_EP_Location_t gasneti_i_tm_rank_to_location(gasneti_TM_t _i_tm, gex_Rank_t _rank, gex_Flags_t _flags) { - gasneti_assert(_i_tm); - gasneti_assert_uint(_rank ,<, _i_tm->_size); + gasneti_check_i_tm_rank(_i_tm, _rank); gex_EP_Location_t _result; if (gasneti_is_tm0(_i_tm)) { _result.gex_rank = _rank; _result.gex_ep_index = 0; + } else if (gasneti_i_tm_is_pair(_i_tm)) { + _result.gex_rank = _rank; + _result.gex_ep_index = gasneti_tm_pair_rem_idx(gasneti_i_tm_to_pair(_i_tm)); } else if (!GASNETI_ALLOW_SPARSE_TEAMREP || _i_tm->_rank_map) { gasneti_assert(_i_tm->_rank_map); _result.gex_rank = _i_tm->_rank_map[_rank]; @@ -232,7 +425,7 @@ GASNETI_INLINE(gasneti_i_tm_jobrank_to_rank) gex_Rank_t gasneti_i_tm_jobrank_to_rank(gasneti_TM_t _i_tm, gex_Rank_t _jobrank) { gasneti_assert(_i_tm); gasneti_assert_uint(_jobrank ,<, gex_System_QueryJobSize()); - if (gasneti_is_tm0(_i_tm)) return _jobrank; + if (gasneti_is_tm0(_i_tm) || gasneti_i_tm_is_pair(_i_tm)) return _jobrank; return gasneti_tm_rev_rank(_i_tm, _jobrank); } #define gasneti_e_tm_jobrank_to_rank(e_tm,jobrank) \ @@ -240,12 +433,17 @@ gex_Rank_t gasneti_i_tm_jobrank_to_rank(gasneti_TM_t _i_tm, gex_Rank_t _jobrank) extern gasnet_seginfo_t *gasneti_seginfo; extern gasnet_seginfo_t *gasneti_seginfo_aux; +extern gasnet_seginfo_t *gasneti_seginfo_tbl[GASNET_MAXEPS]; -// TODO: generalize for multi-{EP,segment} support // TODO: work towards dropping non-scalable seginfo tables GASNETI_INLINE(gasneti_client_seginfo) const gasnet_seginfo_t *gasneti_client_seginfo(gex_TM_t _e_tm, gex_Rank_t _rank) { - return gasneti_seginfo + gasneti_e_tm_rank_to_jobrank(_e_tm,_rank); + gex_EP_Location_t _loc = gasneti_e_tm_rank_to_location(_e_tm, _rank, 0); + gex_Rank_t _jobrank = _loc.gex_rank; + gex_EP_Index_t _idx = _loc.gex_ep_index; + gasnet_seginfo_t *_si_array = gasneti_seginfo_tbl[_idx]; + gasneti_assert(_si_array); + return _si_array + _jobrank; } GASNETI_INLINE(gasneti_aux_seginfo) const gasnet_seginfo_t *gasneti_aux_seginfo(gex_Rank_t _jobrank) { @@ -277,11 +475,11 @@ int _gasneti_in_segment_t(const void *_ptr, size_t _nbytes, const gex_Segment_t gasneti_assert(_ptr); } #define gasneti_in_clientsegment(e_tm,rank,ptr,nbytes) \ - (gasneti_inseg_helper(ptr,nbytes),gasneti_check_tm_rank(e_tm,rank), 1) + (gasneti_inseg_helper(ptr,nbytes),gasneti_check_e_tm_rank(e_tm,rank), 1) #define gasneti_in_auxsegment(jobrank,ptr,nbytes) \ (gasneti_inseg_helper(ptr,nbytes),gasneti_check_jobrank(jobrank), 1) #define gasneti_in_fullsegment(e_tm,rank,ptr,nbytes) \ - (gasneti_inseg_helper(ptr,nbytes), gasneti_check_tm_rank(e_tm,rank), 1) + (gasneti_inseg_helper(ptr,nbytes), gasneti_check_e_tm_rank(e_tm,rank), 1) #else #define gasneti_in_clientsegment(e_tm,rank,ptr,nbytes) \ _gasneti_in_seginfo_t(ptr,nbytes,gasneti_client_seginfo(e_tm,rank)) @@ -330,7 +528,7 @@ int _gasneti_in_segment_t(const void *_ptr, size_t _nbytes, const gex_Segment_t #ifdef GASNETI_SUPPORTS_OUTOFSEGMENT_PUTGET /* in-segment check for internal put/gets that may exploit outofseg support */ #define gasneti_in_segment_allowoutseg(e_tm,rank,ptr,nbytes) \ - (gasneti_check_tm_rank(e_tm,rank), 1) + (gasneti_check_e_tm_rank(e_tm,rank), 1) #else #define gasneti_in_segment_allowoutseg gasneti_in_segment #endif @@ -339,7 +537,7 @@ int _gasneti_in_segment_t(const void *_ptr, size_t _nbytes, const gex_Segment_t gex_TM_t _gex_bc_tm = (e_tm); \ gasneti_assert(_gex_bc_tm); \ gex_Rank_t _gex_bc_rank = (rank); \ - gex_Rank_t _gex_bc_size = gex_TM_QuerySize(_gex_bc_tm); \ + gex_Rank_t _gex_bc_size = gasneti_e_tm_size(_gex_bc_tm); \ const void *_gex_bc_ptr = (const void *)(ptr); \ size_t _gex_bc_nbytes = (size_t)(nbytes); \ gasneti_assert(_gex_bc_nbytes); /* avoids "fence post" error */ \ @@ -399,15 +597,15 @@ int _gasneti_in_segment_t(const void *_ptr, size_t _nbytes, const gex_Segment_t // TODO-EX: move to gasnet_event_internal.h #ifndef _GEX_EVENT_T GASNETI_INLINE(gasneti_leaf_is_pointer) GASNETI_PURE -int gasneti_leaf_is_pointer(const gex_Event_t *opt_val) { - gasneti_assert(opt_val != NULL); - return ((uintptr_t)(opt_val) >= (uintptr_t)4); +int gasneti_leaf_is_pointer(const gex_Event_t *_opt_val) { + gasneti_assert(_opt_val != NULL); + return ((uintptr_t)(_opt_val) >= (uintptr_t)4); } GASNETI_PUREP(gasneti_leaf_is_pointer) GASNETI_INLINE(gasneti_leaf_finish) -void gasneti_leaf_finish(gex_Event_t *opt_val) { - if (gasneti_leaf_is_pointer(opt_val)) *opt_val = GEX_EVENT_INVALID; +void gasneti_leaf_finish(gex_Event_t *_opt_val) { + if (gasneti_leaf_is_pointer(_opt_val)) *_opt_val = GEX_EVENT_INVALID; } #endif @@ -447,18 +645,18 @@ void gasneti_leaf_finish(gex_Event_t *opt_val) { #define GASNETI_SPINLOCK_UNLOCKED 0xaa55 #define GASNETI_SPINLOCK_DESTROYED 0xDEAD GASNETI_INLINE(gasneti_spinlock_is_valid) - int gasneti_spinlock_is_valid(gasneti_atomic_t *plock) { - uint32_t tmp = gasneti_atomic_read(plock, GASNETI_ATOMIC_RMB_PRE); - if_pf (tmp == GASNETI_SPINLOCK_DESTROYED) + int gasneti_spinlock_is_valid(gasneti_atomic_t *_plock) { + uint32_t _tmp = gasneti_atomic_read(_plock, GASNETI_ATOMIC_RMB_PRE); + if_pf (_tmp == GASNETI_SPINLOCK_DESTROYED) gasneti_fatalerror("Detected use of destroyed spinlock"); - if_pf (!((tmp == GASNETI_SPINLOCK_LOCKED) || (tmp == GASNETI_SPINLOCK_UNLOCKED))) + if_pf (!((_tmp == GASNETI_SPINLOCK_LOCKED) || (_tmp == GASNETI_SPINLOCK_UNLOCKED))) gasneti_fatalerror("Detected use of uninitialized or corrupted spinlock"); return 1; } GASNETI_INLINE(gasneti_spinlock_is_locked) - int gasneti_spinlock_is_locked(gasneti_atomic_t *plock) { - uint32_t tmp = gasneti_atomic_read(plock, GASNETI_ATOMIC_RMB_PRE); - return (tmp == GASNETI_SPINLOCK_LOCKED); + int gasneti_spinlock_is_locked(gasneti_atomic_t *_plock) { + uint32_t _tmp = gasneti_atomic_read(_plock, GASNETI_ATOMIC_RMB_PRE); + return (_tmp == GASNETI_SPINLOCK_LOCKED); } #else #define GASNETI_SPINLOCK_LOCKED 1 @@ -479,22 +677,22 @@ void gasneti_leaf_finish(gex_Event_t *opt_val) { gasneti_assert(gasneti_spinlock_is_locked(plock)); \ } while (0) GASNETI_INLINE(gasneti_spinlock_unlock) - int gasneti_spinlock_unlock(gasneti_atomic_t *plock) { + int gasneti_spinlock_unlock(gasneti_atomic_t *_plock) { #if GASNET_DEBUG /* Using CAS for release is more costly, but adds validation */ - gasneti_assert(gasneti_atomic_compare_and_swap(plock, GASNETI_SPINLOCK_LOCKED, GASNETI_SPINLOCK_UNLOCKED, GASNETI_ATOMIC_REL)); + gasneti_assert(gasneti_atomic_compare_and_swap(_plock, GASNETI_SPINLOCK_LOCKED, GASNETI_SPINLOCK_UNLOCKED, GASNETI_ATOMIC_REL)); #else - gasneti_atomic_set(plock, GASNETI_SPINLOCK_UNLOCKED, GASNETI_ATOMIC_REL); + gasneti_atomic_set(_plock, GASNETI_SPINLOCK_UNLOCKED, GASNETI_ATOMIC_REL); #endif return 0; } /* return 0/EBUSY on success/failure to match pthreads */ GASNETI_INLINE(gasneti_spinlock_trylock) GASNETI_WARN_UNUSED_RESULT - int gasneti_spinlock_trylock(gasneti_atomic_t *plock) { - gasneti_assert(gasneti_spinlock_is_valid(plock)); - if ((GASNETI_SPINLOCK_UNLOCKED == gasneti_atomic_read(plock, 0)) && - gasneti_atomic_compare_and_swap(plock, GASNETI_SPINLOCK_UNLOCKED, GASNETI_SPINLOCK_LOCKED, GASNETI_ATOMIC_ACQ_IF_TRUE)) { - gasneti_assert(gasneti_spinlock_is_locked(plock)); + int gasneti_spinlock_trylock(gasneti_atomic_t *_plock) { + gasneti_assert(gasneti_spinlock_is_valid(_plock)); + if ((GASNETI_SPINLOCK_UNLOCKED == gasneti_atomic_read(_plock, 0)) && + gasneti_atomic_compare_and_swap(_plock, GASNETI_SPINLOCK_UNLOCKED, GASNETI_SPINLOCK_LOCKED, GASNETI_ATOMIC_ACQ_IF_TRUE)) { + gasneti_assert(gasneti_spinlock_is_locked(_plock)); return 0; } else { return EBUSY; @@ -507,16 +705,16 @@ void gasneti_leaf_finish(gex_Event_t *opt_val) { #define GASNETI_SPINLOCK_DESTROYED 2 #if GASNET_DEBUG GASNETI_INLINE(gasneti_spinlock_is_valid) - int gasneti_spinlock_is_valid(gasneti_atomic_t *plock) { - uint32_t tmp = gasneti_atomic_read(plock, GASNETI_ATOMIC_RMB_PRE); - if_pf (tmp == GASNETI_SPINLOCK_DESTROYED) + int gasneti_spinlock_is_valid(gasneti_atomic_t *_plock) { + uint32_t _tmp = gasneti_atomic_read(_plock, GASNETI_ATOMIC_RMB_PRE); + if_pf (_tmp == GASNETI_SPINLOCK_DESTROYED) gasneti_fatalerror("Detected use of destroyed spinlock"); return 1; } GASNETI_INLINE(gasneti_spinlock_is_locked) - int gasneti_spinlock_is_locked(gasneti_atomic_t *plock) { - gasneti_atomic_val_t tmp = gasneti_atomic_read(plock, GASNETI_ATOMIC_RMB_PRE); - return (tmp != GASNETI_SPINLOCK_UNLOCKED); + int gasneti_spinlock_is_locked(gasneti_atomic_t *_plock) { + gasneti_atomic_val_t _tmp = gasneti_atomic_read(_plock, GASNETI_ATOMIC_RMB_PRE); + return (_tmp != GASNETI_SPINLOCK_UNLOCKED); } #else #define gasneti_spinlock_is_valid(plock) 1 @@ -529,10 +727,10 @@ void gasneti_leaf_finish(gex_Event_t *opt_val) { gasneti_atomic_set((plock), GASNETI_SPINLOCK_DESTROYED, GASNETI_ATOMIC_WMB_POST); \ } while (0) GASNETI_INLINE(_gasneti_spinlock_try) GASNETI_WARN_UNUSED_RESULT - int _gasneti_spinlock_try(gasneti_atomic_t *plock) { - gasneti_assert(gasneti_spinlock_is_valid(plock)); - return (gasneti_atomic_read(plock, 0) == GASNETI_SPINLOCK_UNLOCKED) && - gasneti_atomic_decrement_and_test(plock, GASNETI_ATOMIC_ACQ_IF_TRUE); + int _gasneti_spinlock_try(gasneti_atomic_t *_plock) { + gasneti_assert(gasneti_spinlock_is_valid(_plock)); + return (gasneti_atomic_read(_plock, 0) == GASNETI_SPINLOCK_UNLOCKED) && + gasneti_atomic_decrement_and_test(_plock, GASNETI_ATOMIC_ACQ_IF_TRUE); } /* Ick: forward reference to GASNETI_WAITHOOK only works because this is a macro */ #define gasneti_spinlock_lock(plock) do { \ @@ -541,15 +739,15 @@ void gasneti_leaf_finish(gex_Event_t *opt_val) { } \ } while (0) GASNETI_INLINE(gasneti_spinlock_unlock) - int gasneti_spinlock_unlock(gasneti_atomic_t *plock) { - gasneti_assert(gasneti_spinlock_is_locked(plock)); - gasneti_atomic_set(plock, GASNETI_SPINLOCK_UNLOCKED, GASNETI_ATOMIC_REL); + int gasneti_spinlock_unlock(gasneti_atomic_t *_plock) { + gasneti_assert(gasneti_spinlock_is_locked(_plock)); + gasneti_atomic_set(_plock, GASNETI_SPINLOCK_UNLOCKED, GASNETI_ATOMIC_REL); return 0; } /* return 0/EBUSY on success/failure to match pthreads */ GASNETI_INLINE(gasneti_spinlock_trylock) GASNETI_WARN_UNUSED_RESULT - int gasneti_spinlock_trylock(gasneti_atomic_t *plock) { - return _gasneti_spinlock_try(plock) ? 0 : EBUSY; + int gasneti_spinlock_trylock(gasneti_atomic_t *_plock) { + return _gasneti_spinlock_try(_plock) ? 0 : EBUSY; } #define GASNETI_HAVE_SPINLOCK 1 #endif @@ -603,8 +801,12 @@ void gasneti_leaf_finish(gex_Event_t *opt_val) { sizeof(_gasneti_threadinfo_cache) + sizeof(_gasneti_threadinfo_available); /* silly little trick to prevent unused variable warning on gcc -Wall */ + // tmp variable below solves scoping problems on cache for expressions like: + // GASNET_POST_THREADINFO(GASNET_GET_THREADINFO()) + // where the cache from an enclosing scope is consulted by that GET #define GASNET_POST_THREADINFO(info) \ - gasnet_threadinfo_t _gasneti_threadinfo_cache = (info); \ + gasnet_threadinfo_t const _gasneti_threadinfo_tmp = (info); \ + gasnet_threadinfo_t _gasneti_threadinfo_cache = _gasneti_threadinfo_tmp; \ uint32_t _gasneti_threadinfo_available = 0 /* if you get an unused variable warning on _gasneti_threadinfo_available, it means you POST'ed in a function which made no GASNet calls that needed it @@ -726,7 +928,12 @@ void gasneti_leaf_finish(gex_Event_t *opt_val) { // GASNETI_MYTHREAD_GET_OR_LOOKUP: force retrieve my (gasneti_threaddata_t *) from one of: // a prior GASNET_POST_THREADINFO, an FARG to the enclosing function, or dynamic lookup // This is essentially GASNETI_MYTHREAD without requiring FARG/POST'd context (allows lookup) - // Only valid known use is macros that expand threaddata field access directly into client code + // Only valid known use is macros that expand threaddata field access directly into an + // "unknown" context, such as in client code or certain cases of internal code with callers + // in multiple conduits and/or subsystems. + // This is NOT suitable for internal code in which the macro definition and its callers fall + // within a single conduit or subsystem. Such cases should instead establish FARG/POST'd + // context and use GASNETI_MYTHREAD. #define GASNETI_MYTHREAD_GET_OR_LOOKUP ((struct _gasneti_threaddata_t *)GASNET_GET_THREADINFO()) #else @@ -797,6 +1004,19 @@ void gasneti_leaf_finish(gex_Event_t *opt_val) { #endif /* returns the runtime size of the thread table (always <= GASNETI_MAX_THREADS) */ extern uint64_t gasneti_max_threads(void); +// same as above, except reduced by conduit-internal threads, if any +#if GASNET_SEQ + #define gex_System_QueryMaxThreads() ((uint64_t)1) +#elif GASNETE_CONDUIT_THREADS_USING_TD + GASNETI_INLINE(gex_System_QueryMaxThreads) + uint64_t gex_System_QueryMaxThreads(void) { + // This is conservative. + // A conduit may spawn _up to_ GASNETE_CONDUIT_THREADS_USING_TD, but could spawn fewer. + return gasneti_max_threads() - GASNETE_CONDUIT_THREADS_USING_TD; + } +#else + #define gex_System_QueryMaxThreads() gasneti_max_threads() +#endif extern void gasneti_fatal_threadoverflow(const char *_subsystem); #ifndef _GASNETI_MYTHREAD_SLOW @@ -865,6 +1085,23 @@ extern int gasnete_maxthreadidx; gasneti_memcheck(gasnete_threadtable[_thid]); \ } while (0) +// ------------------------------------------------------------------------------------ +// Checks for communication calls in invalid contexts +// +// TODO: should be expanded to check handler and HSL contexts as well (not just NPAM) + +#if GASNET_DEBUG + extern void gasneti_check_inject(int _for_reply GASNETI_THREAD_FARG); + #define GASNETI_CHECK_INJECT() gasneti_check_inject(0 GASNETI_THREAD_GET) + #define GASNETI_CHECK_INJECT_REPLY() gasneti_check_inject(1 GASNETI_THREAD_GET) + extern void gasneti_check_inject_reset(GASNETI_THREAD_FARG_ALONE); + #define GASNETI_CHECK_INJECT_RESET() gasneti_check_inject_reset(GASNETI_THREAD_GET_ALONE) +#else + #define GASNETI_CHECK_INJECT() ((void)0) + #define GASNETI_CHECK_INJECT_REPLY() ((void)0) + #define GASNETI_CHECK_INJECT_RESET() ((void)0) +#endif + /* ------------------------------------------------------------------------------------ */ /* GASNet progressfn support * progressfns are internal functions that are called "periodically" by a conduit to @@ -1038,12 +1275,12 @@ typedef void (*gasneti_progressfn_t)(void); #if !GASNETI_THROTTLE_POLLERS GASNETI_INLINE(_gasneti_AMPoll) int _gasneti_AMPoll(GASNETI_THREAD_FARG_ALONE) { - int retval; + int _retval; gasneti_AMPoll_spinpollers_check(); gasneti_memcheck_one(); - retval = gasnetc_AMPoll(GASNETI_THREAD_PASS_ALONE); + _retval = gasnetc_AMPoll(GASNETI_THREAD_PASS_ALONE); GASNETI_PROGRESSFNS_RUN(); - return retval; + return _retval; } #define gasneti_suspend_spinpollers() gasneti_suspend_spinpollers_check() #define gasneti_resume_spinpollers() gasneti_resume_spinpollers_check() @@ -1080,18 +1317,18 @@ typedef void (*gasneti_progressfn_t)(void); /* and finally, the throttled poll implementation */ GASNETI_INLINE(_gasneti_AMPoll) int _gasneti_AMPoll(GASNETI_THREAD_FARG_ALONE) { - int retval = GASNET_OK; + int _retval = GASNET_OK; gasneti_AMPoll_spinpollers_check(); gasneti_memcheck_one(); /* if another thread is spin-polling then skip both the poll and progress fns: */ if_pt (!gasneti_mutex_trylock(&gasneti_throttle_spinpoller)) { /* if another thread is sending then skip the poll: */ if_pt (!gasneti_atomic_read(&gasneti_throttle_haveusefulwork,0)) - retval = gasnetc_AMPoll(GASNETI_THREAD_PASS_ALONE); + _retval = gasnetc_AMPoll(GASNETI_THREAD_PASS_ALONE); gasneti_mutex_unlock(&gasneti_throttle_spinpoller); GASNETI_PROGRESSFNS_RUN(); } - return retval; + return _retval; } #endif #define gasneti_AMPoll() _gasneti_AMPoll(GASNETI_THREAD_GET_ALONE) @@ -1173,6 +1410,7 @@ extern int gasneti_wait_mode; /* current waitmode hint */ GASNETI_INLINE(_gasnet_AMPoll) int _gasnet_AMPoll(GASNETI_THREAD_FARG_ALONE) { GASNETI_TRACE_EVENT(X, AMPOLL); + GASNETI_CHECK_INJECT(); return _gasneti_AMPoll(GASNETI_THREAD_PASS_ALONE); } #define gasnet_AMPoll() _gasnet_AMPoll(GASNETI_THREAD_GET_ALONE) @@ -1181,9 +1419,9 @@ extern int gasneti_wait_mode; /* current waitmode hint */ #ifndef _GASNET_GETENV #define _GASNET_GETENV GASNETI_INLINE(gasnet_getenv) - char *gasnet_getenv(const char *s) { + char *gasnet_getenv(const char *_s) { GASNETI_CHECKINIT(); - return gasneti_getenv(s); + return gasneti_getenv(_s); } #endif @@ -1192,7 +1430,7 @@ extern int gasneti_wait_mode; /* current waitmode hint */ #define GASNET_WAIT_SPIN 0 /* contend aggressively for CPU resources while waiting (spin) */ #define GASNET_WAIT_BLOCK 1 /* yield CPU resources immediately while waiting (block) */ #define GASNET_WAIT_SPINBLOCK 2 /* spin for an implementation-dependent period, then block */ - extern int gasneti_set_waitmode(int wait_mode); + extern int gasneti_set_waitmode(int _wait_mode); #define gasnet_set_waitmode(wait_mode) gasneti_set_waitmode(wait_mode) #endif @@ -1214,30 +1452,19 @@ extern int gasneti_wait_mode; /* current waitmode hint */ #ifndef _GASNET_GETSEGMENTINFO #define _GASNET_GETSEGMENTINFO - extern int gasneti_getSegmentInfo(gasnet_seginfo_t *seginfo_table, int numentries); + extern int gasneti_getSegmentInfo(gasnet_seginfo_t *_seginfo_table, int _numentries); #define gasnet_getSegmentInfo(seginfo_table, numentries) \ gasneti_getSegmentInfo(seginfo_table, numentries) #endif #ifndef _GASNET_GETNODEINFO #define _GASNET_GETNODEINFO - extern int gasneti_getNodeInfo(gasnet_nodeinfo_t *nodeinfo_table, int numentries); + extern int gasneti_getNodeInfo(gasnet_nodeinfo_t *_nodeinfo_table, int _numentries); #define gasnet_getNodeInfo(nodeinfo_table, numentries) \ gasneti_getNodeInfo(nodeinfo_table, numentries) #endif extern gasnet_nodeinfo_t *gasneti_nodeinfo; -// TODO-EX: override? -#if 1 - extern int gasneti_Segment_QueryBound( gex_TM_t tm, - gex_Rank_t rank, - void **owneraddr_p, - void **localaddr_p, - uintptr_t *size_p); - #define gex_Segment_QueryBound(tm,rank,o_p,l_p,s_p) \ - gasneti_Segment_QueryBound(tm,rank,o_p,l_p,s_p) -#endif - #ifdef GASNETI_RECORD_DYNAMIC_THREADLOOKUP GASNETI_INLINE(gasneti_record_dynamic_threadlookup) void gasneti_record_dynamic_threadlookup(void) { @@ -1250,12 +1477,12 @@ extern gasnet_nodeinfo_t *gasneti_nodeinfo; #if defined(PTHREAD_MUTEX_INITIALIZER) /* only if pthread.h available */ && !GASNET_SEQ /* gasneti_pthread_create() available on all non-SEQ builds w/ pthreads */ typedef int (gasneti_pthread_create_fn_t)(pthread_t *, const pthread_attr_t *, void *(*)(void *), void *); - extern int gasneti_pthread_create(gasneti_pthread_create_fn_t *create_fn, pthread_t *thread, const pthread_attr_t *attr, void *(*start_routine)(void *), void *arg); + extern int gasneti_pthread_create(gasneti_pthread_create_fn_t *_create_fn, pthread_t *_thread, const pthread_attr_t *_attr, void *(*_start_routine)(void *), void *_arg); #if defined(GASNETC_PTHREAD_CREATE_OVERRIDE) /* Capture existing defn, which could be another library's override */ - static int gasneti_pthread_create_system(pthread_t *thread, const pthread_attr_t *attr, void *(*start_routine)(void *), void *arg) { - return pthread_create(thread,attr,start_routine,arg); + static int gasneti_pthread_create_system(pthread_t *_thread, const pthread_attr_t *_attr, void *(*_start_routine)(void *), void *_arg) { + return pthread_create(_thread,_attr,_start_routine,_arg); } /* Install our override */ #undef pthread_create @@ -1264,6 +1491,40 @@ extern gasnet_nodeinfo_t *gasneti_nodeinfo; #endif #endif +/* ------------------------------------------------------------------------------------ */ +// Memory Kinds + +// The following GASNET_HAVE_MK_CLASS_* identifiers are either `1` or unset + +#define GASNET_HAVE_MK_CLASS_HOST 1 // For consistency - always available + +#if GASNET_HAVE_MK_CLASS_CUDA_UVA + #undef GASNET_HAVE_MK_CLASS_CUDA_UVA + #define GASNET_HAVE_MK_CLASS_CUDA_UVA 1 + #define GASNETI_MK_CLASS_CUDA_UVA_CONFIG mk_class_cuda_uva +#else + #undef GASNET_HAVE_MK_CLASS_CUDA_UVA + #define GASNETI_MK_CLASS_CUDA_UVA_CONFIG nomk_class_cuda_uva +#endif + +#if GASNET_HAVE_MK_CLASS_CUDA_UVA // || GASNET_HAVE_MK_CLASS_[FOO] + #define GASNET_HAVE_MK_CLASS_MULTIPLE 1 +#endif + +#if GASNET_HAVE_MK_CLASS_MULTIPLE + GASNETI_INLINE(gasneti_i_segment_kind_is_host) + int gasneti_i_segment_kind_is_host(gasneti_Segment_t _segment) { + // Either NULL (such as for no bound segment, which is just fine for + // out-of-segment or in-aux-seg local addrs) OR the kind is GEX_MK_HOST. + return !_segment || (_segment->_kind == GEX_MK_HOST); + } + #define gasneti_e_segment_kind_is_host(segment) \ + gasneti_i_segment_kind_is_host(gasneti_import_segment(segment)) +#else + #define gasneti_i_segment_kind_is_host(segment) 1 + #define gasneti_e_segment_kind_is_host(segment) 1 +#endif + /* ------------------------------------------------------------------------------------ */ /* PSHM support */ #if GASNET_PSHM @@ -1346,19 +1607,64 @@ void *gasneti_pshm_jobrank_addr2local(gex_Rank_t _jobrank, const void *_addr) { } GASNETI_PUREP(gasneti_pshm_jobrank_addr2local) +// Helper for what follows +// Returns a jobrank or GEX_RANK_INVALID depending on whether the local and +// remote endpoints named by (tm,rank) are both "eligible" for PSHM, exclusive +// of the check on the jobrank being in-nbrhd. The eligibility criteria are: +// 1. Remote endpoint must be primordial (have EP index 0) +// 2. Local endpoint must be host memory +// However, checking these efficiently is not as simple as it sounds. +extern gasneti_Segment_t gasneti_tm_pair_to_segment(gasneti_TM_Pair_t _tm_pair); +GASNETI_INLINE(gasneti_pshm_jobrank_if_eligible) GASNETI_PURE +gex_Rank_t gasneti_pshm_jobrank_if_eligible(gex_TM_t _e_tm, gex_Rank_t _rank) { + gasneti_TM_t _i_tm = gasneti_import_tm(_e_tm); + if (gasneti_is_tm0(_i_tm)) { + // fast path for TM0 + return _rank; + } + gex_EP_Location_t _loc = gasneti_i_tm_rank_to_location(_i_tm, _rank, 0); + if (_loc.gex_ep_index) { + // not eligible due to non-primordial remote EP + return GEX_RANK_INVALID; + } + // If we've made it this far, the (tm,rank) is eligible only and only if + // the local ep is host memory (which can take some work to determine). + gex_Rank_t _jobrank = _loc.gex_rank; +#if !GASNET_HAVE_MK_CLASS_MULTIPLE + return _jobrank; // Trivial host memory when no device kinds are supported +#else + gasneti_Segment_t _segment; + if (! gasneti_i_tm_is_pair(_i_tm)) { + // Full TM object - can check objects directly + gasneti_EP_t _ep = _i_tm->_ep; + if (_ep->_index == 0) return _jobrank; // EP index 0 is primordial + _segment = _i_tm->_ep->_segment; + } else { + gasneti_TM_Pair_t _tm_pair = gasneti_i_tm_to_pair(_i_tm); + gex_EP_Index_t _idx = gasneti_tm_pair_loc_idx(_tm_pair); + if (_idx == 0) return _jobrank; // EP index 0 is primordial + _segment = gasneti_tm_pair_to_segment(_tm_pair); + } + return gasneti_i_segment_kind_is_host(_segment) ? _jobrank : GEX_RANK_INVALID; +#endif +} + // Same as the three functions above, but taking (tm,rank) in place of jobrank +// All are TM-pair aware, and the first two are multi-EP aware GASNETI_INLINE(gasneti_pshm_local_rank) GASNETI_PURE unsigned int gasneti_pshm_local_rank(gex_TM_t _e_tm, gex_Rank_t _rank) { - gex_Rank_t _jobrank = gasneti_e_tm_rank_to_jobrank(_e_tm,_rank); - return gasneti_pshm_jobrank_to_local_rank(_jobrank); + gex_Rank_t _jobrank = gasneti_pshm_jobrank_if_eligible(_e_tm, _rank); + return (_jobrank == GEX_RANK_INVALID) + ? (unsigned int)(-1) + : gasneti_pshm_jobrank_to_local_rank(_jobrank); } GASNETI_PUREP(gasneti_pshm_local_rank) GASNETI_INLINE(gasneti_pshm_in_supernode) GASNETI_PURE int gasneti_pshm_in_supernode(gex_TM_t _e_tm, gex_Rank_t _rank) { - gex_Rank_t _jobrank = gasneti_e_tm_rank_to_jobrank(_e_tm,_rank); - return gasneti_pshm_jobrank_in_supernode(_jobrank); + gex_Rank_t _jobrank = gasneti_pshm_jobrank_if_eligible(_e_tm, _rank); + return (_jobrank != GEX_RANK_INVALID) && gasneti_pshm_jobrank_in_supernode(_jobrank); } GASNETI_PUREP(gasneti_pshm_in_supernode) @@ -1515,12 +1821,12 @@ GASNETI_PUREP(gasneti_pshm_addr2local) // Is the argument a *single* valid data type? GASNETI_INLINE(gasneti_dt_valid) GASNETI_PURE -int gasneti_dt_valid(gex_DT_t dt) { - return (((dt) & _GEX_DT_VALID) && GASNETI_POWEROFTWO(dt)); +int gasneti_dt_valid(gex_DT_t _dt) { + return (((_dt) & _GEX_DT_VALID) && GASNETI_POWEROFTWO(_dt)); } GASNETI_INLINE(gasneti_dt_valid_atomic) GASNETI_PURE -int gasneti_dt_valid_atomic(gex_DT_t dt) { - return gasneti_dt_valid(dt) && (dt != GEX_DT_USER); +int gasneti_dt_valid_atomic(gex_DT_t _dt) { + return gasneti_dt_valid(_dt) && (_dt != GEX_DT_USER); } #define gasneti_dt_valid_reduce gasneti_dt_valid @@ -1541,9 +1847,9 @@ int gasneti_dt_valid_atomic(gex_DT_t dt) { // What is the size of the type? // TODO: might be made cheaper by encoding size into the GEX_DT_* constants GASNETI_INLINE(gasneti_dt_size) GASNETI_PURE -size_t gasneti_dt_size(gex_DT_t dt) { - gasneti_assert(!gasneti_dt_4byte(dt) ^ !gasneti_dt_8byte(dt)); - return (size_t) (gasneti_dt_4byte(dt) ? 4 : 8); +size_t gasneti_dt_size(gex_DT_t _dt) { + gasneti_assert(!gasneti_dt_4byte(_dt) ^ !gasneti_dt_8byte(_dt)); + return (size_t) (gasneti_dt_4byte(_dt) ? 4 : 8); } /* ------------------------------------------------------------------------------------ */ @@ -1626,16 +1932,16 @@ size_t gasneti_dt_size(gex_DT_t dt) { // Is the argument a *single* valid operation? GASNETI_INLINE(gasneti_op_valid) GASNETI_PURE -int gasneti_op_valid(gex_OP_t op) { - return (((op) & _GEX_OP_VALID) && GASNETI_POWEROFTWO(op)); +int gasneti_op_valid(gex_OP_t _op) { + return (((_op) & _GEX_OP_VALID) && GASNETI_POWEROFTWO(_op)); } GASNETI_INLINE(gasneti_op_valid_atomic) GASNETI_PURE -int gasneti_op_valid_atomic(gex_OP_t op) { - return gasneti_op_valid(op) && gasneti_op_atomic(op); +int gasneti_op_valid_atomic(gex_OP_t _op) { + return gasneti_op_valid(_op) && gasneti_op_atomic(_op); } GASNETI_INLINE(gasneti_op_valid_reduce) GASNETI_PURE -int gasneti_op_valid_reduce(gex_OP_t op) { - return gasneti_op_valid(op) && gasneti_op_reduce(op); +int gasneti_op_valid_reduce(gex_OP_t _op) { + return gasneti_op_valid(_op) && gasneti_op_reduce(_op); } // Predicates on masks: diff --git a/third-party/gasnet/gasnet-src/gasnet_internal.c b/third-party/gasnet/gasnet-src/gasnet_internal.c index b1d5dfcb4837..051c17b44dfd 100644 --- a/third-party/gasnet/gasnet-src/gasnet_internal.c +++ b/third-party/gasnet/gasnet-src/gasnet_internal.c @@ -118,6 +118,7 @@ int GASNETI_LINKCONFIG_IDIOTCHECK(GASNETI_ATOMIC_CONFIG) = 1; int GASNETI_LINKCONFIG_IDIOTCHECK(GASNETI_ATOMIC32_CONFIG) = 1; int GASNETI_LINKCONFIG_IDIOTCHECK(GASNETI_ATOMIC64_CONFIG) = 1; int GASNETI_LINKCONFIG_IDIOTCHECK(GASNETI_TIOPT_CONFIG) = 1; +int GASNETI_LINKCONFIG_IDIOTCHECK(GASNETI_MK_CLASS_CUDA_UVA_CONFIG) = 1; int GASNETI_LINKCONFIG_IDIOTCHECK(_CONCAT(HIDDEN_AM_CONCUR_,GASNET_HIDDEN_AM_CONCURRENCY_LEVEL)) = 1; int GASNETI_LINKCONFIG_IDIOTCHECK(_CONCAT(CACHE_LINE_BYTES_,GASNETI_CACHE_LINE_BYTES)) = 1; int GASNETI_LINKCONFIG_IDIOTCHECK(_CONCAT(GASNETI_TM0_ALIGN_,GASNETI_TM0_ALIGN)) = 1; @@ -155,6 +156,10 @@ gasneti_TM_t gasneti_thing_that_goes_thunk_in_the_dark = NULL; gasnet_seginfo_t *gasneti_seginfo = NULL; gasnet_seginfo_t *gasneti_seginfo_aux = NULL; +// TODO: this is proof-of-concept and not a scalable final solution (bug 4088) +// Note that (gasneti_seginfo_tbl[0] == gasneti_seginfo) to simplify some logic. +gasnet_seginfo_t *gasneti_seginfo_tbl[GASNET_MAXEPS] = {NULL, }; + /* ------------------------------------------------------------------------------------ */ /* conduit-independent sanity checks */ extern void gasneti_check_config_preinit(void) { @@ -360,6 +365,40 @@ extern void gasneti_check_config_postattach(void) { gasneti_flush_streams(); // flush above messages, and ensure FS_SYNC envvar is initted } +/* ------------------------------------------------------------------------------------ */ +// Helpers for debug checks + +#if GASNET_DEBUG +void gasneti_check_inject(int for_reply GASNETI_THREAD_FARG) { + gasneti_threaddata_t * const mythread = GASNETI_MYTHREAD; + if (!mythread) return; // Some conduits communicate very early + + if (mythread->reply_handler_active) { + gasneti_fatalerror("Invalid GASNet call (communication injection or poll) while executing a Reply handler"); + } + if (mythread->request_handler_active && !for_reply) { + gasneti_fatalerror("Invalid GASNet call (communication injection or poll) while executing a Request handler"); + } + + // NPAM checks are distinct to allow that entire subsytem to be overridden + gasneti_checknpam(for_reply GASNETI_THREAD_PASS); + + // TODO: check for HSL context +} + +// Resets all state indicative of restricted context. +// This is intended for use within `gasnet-exit()` which *is* valid from +// handler context, and is known to run with HSLs held on error paths. +// There is currently no other known-valid reason to use this call. +void gasneti_check_inject_reset(GASNETI_THREAD_FARG_ALONE) { + gasneti_threaddata_t * const mythread = GASNETI_MYTHREAD; + if (!mythread) return; // Some conduits communicate very early + mythread->reply_handler_active = 0; + mythread->request_handler_active = 0; + // TODO: reset HSL context +} +#endif + /* ------------------------------------------------------------------------------------ */ #ifndef _GASNET_ERRORNAME extern const char *gasnet_ErrorName(int errval) { @@ -395,6 +434,11 @@ extern void gasneti_freezeForDebugger(void) { } } /* ------------------------------------------------------------------------------------ */ +// Client management + +#ifdef GASNETC_CLIENT_EXTRA_DECLS +GASNETC_CLIENT_EXTRA_DECLS +#endif #ifndef _GEX_CLIENT_T #ifndef gasneti_import_client @@ -415,12 +459,15 @@ gex_Client_t gasneti_export_client(gasneti_Client_t _real_client) { // TODO-EX: either ensure name is unique OR perform "auto-increment" according to flags gasneti_Client_t gasneti_alloc_client( const char *name, - gex_Flags_t flags, - size_t requested_sz) + gex_Flags_t flags) { gasneti_Client_t client; - if (requested_sz) gasneti_assert_uint(requested_sz ,>=, sizeof(*client)); - size_t alloc_size = requested_sz ? requested_sz : sizeof(*client); +#ifdef GASNETC_SIZEOF_CLIENT_T + size_t alloc_size = GASNETC_SIZEOF_CLIENT_T(); + gasneti_assert_uint(alloc_size ,>=, sizeof(*client)); +#else + size_t alloc_size = sizeof(*client); +#endif client = gasneti_malloc(alloc_size); GASNETI_INIT_MAGIC(client, GASNETI_CLIENT_MAGIC); client->_tm0 = NULL; @@ -429,18 +476,20 @@ gasneti_Client_t gasneti_alloc_client( client->_flags = flags; gasneti_assert_always(sizeof(client->_next_ep_index) >= sizeof(gex_EP_Index_t)); gasneti_weakatomic32_set(&client->_next_ep_index, 0, 0); -#ifdef GASNETI_CLIENT_ALLOC_EXTRA - GASNETI_CLIENT_ALLOC_EXTRA(client); + memset(client->_ep_tbl, 0, sizeof(client->_ep_tbl)); +#ifdef GASNETC_CLIENT_INIT_HOOK + GASNETC_CLIENT_INIT_HOOK(client); #else - if (requested_sz) memset(client + 1, 0, alloc_size - sizeof(*client)); + size_t extra = alloc_size - sizeof(*client); + if (extra) memset(client + 1, 0, extra); #endif return client; } void gasneti_free_client(gasneti_Client_t client) { -#ifdef GASNETI_CLIENT_FREE_EXTRA - GASNETI_CLIENT_FREE_EXTRA(client); +#ifdef GASNETI_CLIENT_FINI_HOOK + GASNETI_CLIENT_FINI_HOOK(client); #endif gasneti_free((/*non-const*/char*)client->_name); GASNETI_INIT_MAGIC(client, GASNETI_CLIENT_BAD_MAGIC); @@ -448,6 +497,12 @@ void gasneti_free_client(gasneti_Client_t client) } #endif // _GEX_CLIENT_T +/* ------------------------------------------------------------------------------------ */ +// Segment management + +#ifdef GASNETC_SEGMENT_EXTRA_DECLS +GASNETC_SEGMENT_EXTRA_DECLS +#endif #ifndef _GEX_SEGMENT_T #ifndef gasneti_import_segment @@ -470,38 +525,50 @@ gasneti_Segment_t gasneti_alloc_segment( gasneti_Client_t client, void *addr, uintptr_t size, - gex_Flags_t flags, - size_t requested_sz) + gex_MK_t kind, + gex_Flags_t flags) { gasneti_Segment_t segment; - if (requested_sz) gasneti_assert_uint(requested_sz ,>=, sizeof(*segment)); - size_t alloc_size = requested_sz ? requested_sz : sizeof(*segment); +#ifdef GASNETC_SIZEOF_SEGMENT_T + size_t alloc_size = GASNETC_SIZEOF_SEGMENT_T(); + gasneti_assert_uint(alloc_size ,>=, sizeof(*segment)); +#else + size_t alloc_size = sizeof(*segment); +#endif segment = gasneti_malloc(alloc_size); GASNETI_INIT_MAGIC(segment, GASNETI_SEGMENT_MAGIC); segment->_client = client; segment->_cdata = NULL; + segment->_kind = kind; segment->_flags = flags; segment->_addr = addr; segment->_ub = (void*)((uintptr_t)addr + size); segment->_size = size; -#ifdef GASNETI_SEGMENT_ALLOC_EXTRA - GASNETI_SEGMENT_ALLOC_EXTRA(segment); +#ifdef GASNETC_SEGMENT_INIT_HOOK + GASNETC_SEGMENT_INIT_HOOK(segment); #else - if (requested_sz) memset(segment + 1, 0, alloc_size - sizeof(*segment)); + size_t extra = alloc_size - sizeof(*segment); + if (extra) memset(segment + 1, 0, extra); #endif return segment; } void gasneti_free_segment(gasneti_Segment_t segment) { -#ifdef GASNETI_SEGMENT_FREE_EXTRA - GASNETI_SEGMENT_FREE_EXTRA(segment); +#ifdef GASNETI_SEGMENT_FINI_HOOK + GASNETI_SEGMENT_FINI_HOOK(segment); #endif GASNETI_INIT_MAGIC(segment, GASNETI_SEGMENT_BAD_MAGIC); gasneti_free(segment); } #endif // _GEX_SEGMENT_T +/* ------------------------------------------------------------------------------------ */ +// Endpoint management + +#ifdef GASNETC_EP_EXTRA_DECLS +GASNETC_EP_EXTRA_DECLS +#endif #ifndef _GEX_EP_T #ifndef gasneti_import_ep @@ -519,49 +586,161 @@ gex_EP_t gasneti_export_ep(gasneti_EP_t _real_ep) { } #endif -// TODO-EX: probably need to add to a per-client container of some sort -// at which time _next_ep_index could be non-atomic, protected by same lock. -extern gasneti_EP_t gasneti_alloc_ep( +// Static on the assumption that all callers will reside in this file +// TODO: might subsume into gex_EP_Create() if there are no other callers +static gasneti_EP_t gasneti_alloc_ep( gasneti_Client_t client, + gex_EP_Capabilities_t caps, gex_Flags_t flags, - size_t requested_sz) + int new_index) { gasneti_EP_t endpoint; - if (requested_sz) gasneti_assert_uint(requested_sz ,>=, sizeof(*endpoint)); - size_t alloc_size = requested_sz ? requested_sz : sizeof(*endpoint); +#ifdef GASNETC_SIZEOF_EP_T + size_t alloc_size = GASNETC_SIZEOF_EP_T(); + gasneti_assert_uint(alloc_size ,>=, sizeof(*endpoint)); +#else + size_t alloc_size = sizeof(*endpoint); +#endif endpoint = gasneti_malloc(alloc_size); GASNETI_INIT_MAGIC(endpoint, GASNETI_EP_MAGIC); endpoint->_client = client; endpoint->_cdata = NULL; endpoint->_segment = NULL; + endpoint->_orig_caps = endpoint->_caps = caps; endpoint->_flags = flags; - endpoint->_index = gasneti_weakatomic32_add(&client->_next_ep_index, 1, 0) - 1; - gasneti_assert_always_uint(endpoint->_index ,<, GASNET_MAXEPS); - gasneti_amtbl_init(endpoint->_amtbl); -#ifdef GASNETI_EP_ALLOC_EXTRA - GASNETI_EP_ALLOC_EXTRA(endpoint); -#else - if (requested_sz) memset(endpoint + 1, 0, alloc_size - sizeof(*endpoint)); + endpoint->_index = new_index; + gasneti_assert(! client->_ep_tbl[new_index]); + client->_ep_tbl[new_index] = endpoint; + gasneti_amtbl_init(endpoint); +#ifndef GASNETC_EP_INIT_HOOK + size_t extra = alloc_size - sizeof(*endpoint); + if (extra) memset(endpoint + 1, 0, extra); #endif return endpoint; } -void gasneti_free_ep(gasneti_EP_t endpoint) +// Static on the assumption that all callers will reside in this file +static void gasneti_free_ep(gasneti_EP_t endpoint) { -#ifdef GASNETI_EP_FREE_EXTRA - GASNETI_EP_FREE_EXTRA(endpoint); +#ifdef GASNETI_EP_FINI_HOOK + GASNETI_EP_FINI_HOOK(endpoint); #endif GASNETI_INIT_MAGIC(endpoint, GASNETI_EP_BAD_MAGIC); gasneti_free(endpoint); } #endif // _GEX_EP_T +extern int gex_EP_Create( + gex_EP_t *ep_p, + gex_Client_t e_client, + gex_EP_Capabilities_t caps, + gex_Flags_t flags) +{ + gasneti_Client_t client = gasneti_import_client(e_client); + + // TODO: formatted printing for capabilities + GASNETI_TRACE_PRINTF(O,("gex_EP_Create: client='%s' capabilities=%d flags=%d", + client ? client->_name : "(NULL)", caps, flags)); + + if (! client) { + gasneti_fatalerror("Invalid call to gex_EP_Create with NULL client"); + } + + if (!ep_p) { + gasneti_fatalerror("Invalid call to gex_EP_Create with NULL ep_p"); + } + + GASNETI_CHECK_ERRR((! caps), BAD_ARG, + "no capabilities were requested"); + GASNETI_CHECK_ERRR((caps & ~GEX_EP_CAPABILITY_ALL), BAD_ARG, + "invalid capabilities were requested"); + + // Currently require/demand that primordial EP have ALL capabilities + gasneti_assert(gasneti_weakatomic32_read(&client->_next_ep_index, 0) + || caps == GEX_EP_CAPABILITY_ALL); + + // TODO: any other validation of caps + // TODO: maybe silently OR-in {VIS,AD,COLL} dependencies? + + // TODO: any validation of flags? any conditional behaviors? + + uint32_t new_index = gasneti_weakatomic32_add(&client->_next_ep_index, 1, 0) - 1; + if_pf (new_index >= GASNET_MAXEPS) { + gasneti_weakatomic32_decrement(&client->_next_ep_index, 0); + GASNETI_RETURN_ERRR(RESOURCE,"would exceed per-client EP limit of " _STRINGIFY(GASNET_MAXEPS)); + } + + gasneti_EP_t ep = gasneti_alloc_ep(client, caps, flags, new_index); + + // TODO: any need/want to omit on non-primordial EPs? + { /* core API handlers */ + gex_AM_Entry_t *ctable = (gex_AM_Entry_t *)gasnetc_get_handlertable(); + int len = 0; + int numreg = 0; + gasneti_assert(ctable); + while (ctable[len].gex_fnptr) len++; /* calc len */ + if (gasneti_amregister(ep, ctable, len, + GASNETC_HANDLER_BASE, GASNETE_HANDLER_BASE, + 0, &numreg) != GASNET_OK) + GASNETI_RETURN_ERRR(RESOURCE,"Error registering core API handlers"); + gasneti_assert_int(numreg ,==, len); + } + + // TODO: any need/want to omit on non-primordial EPs? + { /* extended API handlers */ + gex_AM_Entry_t *etable = (gex_AM_Entry_t *)gasnete_get_handlertable(); + int len = 0; + int numreg = 0; + gasneti_assert(etable); + while (etable[len].gex_fnptr) len++; /* calc len */ + if (gasneti_amregister(ep, etable, len, + GASNETE_HANDLER_BASE, GASNETI_CLIENT_HANDLER_BASE, + 0, &numreg) != GASNET_OK) + GASNETI_RETURN_ERRR(RESOURCE,"Error registering extended API handlers"); + gasneti_assert_int(numreg ,==, len); + } + +#ifdef GASNETC_EP_INIT_HOOK + int rc = GASNETC_EP_INIT_HOOK(ep); + if (rc != GASNET_OK) { + gasneti_free_ep(ep); + ep = NULL; + } +#else + int rc = GASNET_OK; +#endif + + *ep_p = gasneti_export_ep(ep); + return rc; +} + +/* ------------------------------------------------------------------------------------ */ +// TM management + +#ifdef GASNETC_TM_EXTRA_DECLS +GASNETC_TM_EXTRA_DECLS +#endif + #ifndef _GEX_TM_T #ifndef gasneti_import_tm gasneti_TM_t gasneti_import_tm(gex_TM_t _tm) { gasneti_assert(_tm != GEX_TM_INVALID); const gasneti_TM_t _real_tm = GASNETI_IMPORT_POINTER(gasneti_TM_t,_tm); + if (! gasneti_i_tm_is_pair(_real_tm)) { + GASNETI_IMPORT_MAGIC(_real_tm, TM); + } + return _real_tm; +} +#endif + +#ifndef gasneti_import_tm_nonpair +gasneti_TM_t gasneti_import_tm_nonpair(gex_TM_t _tm) { + gasneti_assert(_tm != GEX_TM_INVALID); + const gasneti_TM_t _real_tm = GASNETI_IMPORT_POINTER(gasneti_TM_t,_tm); + if (gasneti_i_tm_is_pair(_real_tm)) { + gasneti_fatalerror("Invalid use of a TM-Pair where such is prohibited"); + } GASNETI_IMPORT_MAGIC(_real_tm, TM); return _real_tm; } @@ -579,8 +758,7 @@ extern gasneti_TM_t gasneti_alloc_tm( gasneti_EP_t ep, gex_Rank_t rank, gex_Rank_t size, - gex_Flags_t flags, - size_t requested_sz) + gex_Flags_t flags) { gasneti_assert_uint(rank ,<, size); gasneti_assert_uint(size ,>, 0); @@ -590,8 +768,12 @@ extern gasneti_TM_t gasneti_alloc_tm( const int is_tm0 = (ep->_client->_tm0 == NULL); gasneti_TM_t tm; - if (requested_sz) gasneti_assert_uint(requested_sz ,>=, sizeof(*tm)); - size_t actual_sz = (requested_sz ? requested_sz : sizeof(*tm)); +#ifdef GASNETC_SIZEOF_TM_T + size_t actual_sz = GASNETC_SIZEOF_TM_T(); + gasneti_assert_uint(actual_sz ,>=, sizeof(*tm)); +#else + size_t actual_sz = sizeof(*tm); +#endif #if GASNETI_TM0_ALIGN // TM0 is aligned to GASNETI_TM0_ALIGN, and all others to half that @@ -608,10 +790,11 @@ extern gasneti_TM_t gasneti_alloc_tm( tm->_rank = rank; tm->_size = size; tm->_coll_team = NULL; -#ifdef GASNETI_TM_ALLOC_EXTRA - GASNETI_TM_ALLOC_EXTRA(tm); +#ifdef GASNETC_TM_INIT_HOOK + GASNETC_TM_INIT_HOOK(tm); #else - if (requested_sz) memset(tm + 1, 0, actual_sz - sizeof(*tm)); + size_t extra = actual_sz - sizeof(*tm); + if (extra) memset(tm + 1, 0, extra); #endif if (is_tm0) { @@ -629,8 +812,8 @@ extern gasneti_TM_t gasneti_alloc_tm( void gasneti_free_tm(gasneti_TM_t tm) { -#ifdef GASNETI_TM_FREE_EXTRA - GASNETI_TM_FREE_EXTRA(tm); +#ifdef GASNETI_TM_FINI_HOOK + GASNETI_TM_FINI_HOOK(tm); #endif GASNETI_INIT_MAGIC(tm, GASNETI_TM_BAD_MAGIC); gasneti_free_aligned((void*)((uintptr_t)tm - (GASNETI_TM0_ALIGN/2))); @@ -639,6 +822,38 @@ void gasneti_free_tm(gasneti_TM_t tm) /* ------------------------------------------------------------------------------------ */ +// TM-pair is NOT an object type, but must masquerade as a gex_TM_t. +// Therefore, we handle swizzling and internal/external type distinction +// in the same manner as for object types (but no MAGIC). + +#ifndef gasneti_import_tm_pair +gasneti_TM_Pair_t gasneti_import_tm_pair(gex_TM_t tm) { + gasneti_assert(tm != GEX_TM_INVALID); + gasneti_assert(gasneti_e_tm_is_pair(tm)); + return GASNETI_IMPORT_POINTER(gasneti_TM_Pair_t,tm); +} +#endif + +#ifndef gasneti_export_tm_pair +gex_TM_t gasneti_export_tm_pair(gasneti_TM_Pair_t tm_pair) { + gasneti_assert(gasneti_i_tm_is_pair((gasneti_TM_t) tm_pair)); + return GASNETI_EXPORT_POINTER(gex_TM_t, tm_pair); +} +#endif + +// Helper for PSHM queries which cannot inline THUNK_CLIENT +gasneti_Segment_t gasneti_tm_pair_to_segment(gasneti_TM_Pair_t tm_pair) { + gex_EP_Index_t ep_idx = gasneti_tm_pair_loc_idx(tm_pair); + gasneti_Client_t i_client = gasneti_import_client(gasneti_THUNK_CLIENT); // TODO: multi-client + gasneti_assert_int(ep_idx ,<, GASNET_MAXEPS); + gasneti_assert_int(ep_idx ,<, gasneti_weakatomic32_read(&i_client->_next_ep_index, 0)); + gasneti_EP_t i_ep = i_client->_ep_tbl[ep_idx]; + gasneti_assert(i_ep); + return i_ep->_segment; +} + +/* ------------------------------------------------------------------------------------ */ + #if GASNET_DEBUG // Verify that client did actually write to gasnet-allocated buffer // @@ -1180,10 +1395,6 @@ static void gasneti_check_portable_conduit(void) { /* check for portable conduit const char *desc; int hwid; } known_devs[] = { - #if PLATFORM_OS_LINUX && PLATFORM_ARCH_IA64 && GASNET_SEQ - { "/dev/hw/cpunum", S_IFDIR, "SGI Altix", 0 }, - { "/dev/xpmem", S_IFCHR, "SGI Altix", 0 }, - #endif { "/dev/infiniband/uverbs0", S_IFCHR, "InfiniBand IBV", 2 }, /* OFED 1.0 */ { "/dev/infiniband/ofs/uverbs0", S_IFCHR, "InfiniBand IBV", 2 }, /* Solaris */ #if !GASNET_SEGMENT_EVERYTHING @@ -1775,6 +1986,42 @@ extern gasneti_spawnerfn_t const *gasneti_spawnerInit(int *argc_p, char ***argv_ return res; } +/* ------------------------------------------------------------------------------------ */ +/* Simple container of segments + * + * Current implementation is a array, with deletions moving the last element + * into the vacated slot to retain a dense table. This design choice favors + * simple/efficient iteration. + * + * The field `_opaque_container_use` in gasneti_Segment_t stores the index + * of the segment in this table, to provide for O(1) deletion (w/o a search) + * and is not inteded to be used (for instance) as an identifer on the wire. + */ + +// State, protected by _gasneti_segtbl_lock +gasneti_mutex_t _gasneti_segtbl_lock = GASNETI_MUTEX_INITIALIZER; +gasneti_Segment_t *_gasneti_segtbl = NULL; +int _gasneti_segtbl_count = 0; + +void gasneti_segtbl_add(gasneti_Segment_t seg) { + gasneti_mutex_lock(&_gasneti_segtbl_lock); + seg->_opaque_container_use = _gasneti_segtbl_count++; + size_t space = _gasneti_segtbl_count * sizeof(gasneti_Segment_t); + _gasneti_segtbl = gasneti_realloc(_gasneti_segtbl, space); + gasneti_leak(_gasneti_segtbl); + _gasneti_segtbl[seg->_opaque_container_use] = seg; + gasneti_mutex_unlock(&_gasneti_segtbl_lock); +} + +void gasneti_segtbl_del(gasneti_Segment_t seg) { + gasneti_mutex_lock(&_gasneti_segtbl_lock); + gasneti_Segment_t last = _gasneti_segtbl[_gasneti_segtbl_count--]; + last->_opaque_container_use = seg->_opaque_container_use; + _gasneti_segtbl[last->_opaque_container_use] = last; + // TODO: realloc to shrink if we think this would lead to significant savings? + gasneti_mutex_unlock(&_gasneti_segtbl_lock); +} + /* ------------------------------------------------------------------------------------ */ /* Buffer management */ @@ -2369,6 +2616,27 @@ extern char *_gasneti_extern_strndup(const char *s, size_t n GASNETI_CURLOCFARG) return _gasneti_strndup(s,n GASNETI_CURLOCPARG); } +// append to a string with dynamic memory allocation +// not high-performance, but concise +char *gasneti_sappendf(char *s, const char *fmt, ...) { + // compute length of thing to append + va_list args; + va_start(args, fmt); + int add_len = vsnprintf(NULL, 0, fmt, args); + va_end(args); + + // grow (or allocate) the string, including space for '\0' + int old_len = s ? strlen(s) : 0; + s = gasneti_realloc(s, old_len + add_len + 1); + + // append + va_start(args, fmt); + vsprintf((s+old_len), fmt, args); + va_end(args); + + return s; +} + #if GASNET_DEBUGMALLOC extern void *(*gasnett_debug_malloc_fn)(size_t sz, const char *curloc); extern void *(*gasnett_debug_calloc_fn)(size_t N, size_t S, const char *curloc); diff --git a/third-party/gasnet/gasnet-src/gasnet_internal.h b/third-party/gasnet/gasnet-src/gasnet_internal.h index 25746882cd17..7fed6cb52b37 100644 --- a/third-party/gasnet/gasnet-src/gasnet_internal.h +++ b/third-party/gasnet/gasnet-src/gasnet_internal.h @@ -27,6 +27,10 @@ #include #endif +#if GASNETI_NEED_GASNET_MK_H +#include +#endif + #if GASNETI_COMPILER_IS_UNKNOWN #error "Invalid attempt to build GASNet with a compiler other than the one probed at configure time" #endif @@ -84,6 +88,10 @@ extern double gasneti_get_exittimeout(double dflt_max, double dflt_min, double d #define gasneti_strndup(ptr,sz) _gasneti_strndup((ptr),(sz) GASNETI_CURLOCAARG) /* corresponding gasneti_memcheck fns are in gasnet_help.h */ +// String append with safe-memory dynamic allocation +GASNETI_FORMAT_PRINTF(gasneti_sappendf,2,3, +extern char *gasneti_sappendf(char *s, const char *fmt, ...)); + #if GASNET_DEBUGMALLOC extern void *_gasneti_malloc(size_t nbytes, const char *curloc) GASNETI_MALLOC; extern void *_gasneti_malloc_allowfail(size_t nbytes, const char *curloc) GASNETI_MALLOC; @@ -224,13 +232,6 @@ GASNETI_MALLOCP(_gasneti_calloc) #endif #define gasneti_thunk_segment gasneti_thunk_error -#if 0 // this safety belt must be disabled until the cleanup in PR #126 fixes internal inclusion of public headers -#ifdef GASNETI_MYTHREAD_GET_OR_LOOKUP -#undef GASNETI_MYTHREAD_GET_OR_LOOKUP -#endif -#define GASNETI_MYTHREAD_GET_OR_LOOKUP ERROR__GASNet_conduit_code_should_use_GASNETI_MYTHREAD -#endif - /* ------------------------------------------------------------------------------------ */ /* Version of strdup() which is compatible w/ gasneti_free(), instead of plain free() */ GASNETI_INLINE(_gasneti_strdup) GASNETI_MALLOC @@ -334,8 +335,7 @@ extern void gasneti_freezeForDebugger(void); extern gasneti_Client_t gasneti_alloc_client( const char *name, - gex_Flags_t flags, - size_t alloc_size); + gex_Flags_t flags); void gasneti_free_client(gasneti_Client_t client); #define GASNETI_SEGMENT_MAGIC GASNETI_MAKE_MAGIC('S','E','G','t') @@ -345,19 +345,13 @@ extern gasneti_Segment_t gasneti_alloc_segment( gasneti_Client_t client, void *addr, uintptr_t len, - gex_Flags_t flags, - size_t alloc_size); + gex_MK_t kind, + gex_Flags_t flags); void gasneti_free_segment(gasneti_Segment_t segment); #define GASNETI_EP_MAGIC GASNETI_MAKE_MAGIC('E','P','_','t') #define GASNETI_EP_BAD_MAGIC GASNETI_MAKE_BAD_MAGIC('E','P','_','t') -extern gasneti_EP_t gasneti_alloc_ep( - gasneti_Client_t client, - gex_Flags_t flags, - size_t alloc_size); -void gasneti_free_ep(gasneti_EP_t endpoint); - #define GASNETI_TM_MAGIC GASNETI_MAKE_MAGIC('T','M','_','t') #define GASNETI_TM_BAD_MAGIC GASNETI_MAKE_BAD_MAGIC('T','M','_','t') @@ -365,10 +359,22 @@ extern gasneti_TM_t gasneti_alloc_tm( gasneti_EP_t ep, gex_Rank_t rank, gex_Rank_t size, - gex_Flags_t flags, - size_t alloc_size); + gex_Flags_t flags); void gasneti_free_tm(gasneti_TM_t tm); +/* ------------------------------------------------------------------------------------ */ +/* Return a pointer to a handler table containing the handlers of + the core (gasnetc_) or extended (gasnete_) API, which will be + automatically registered upon endpoint creation. + Tables are terminated with an entry where fnptr == NULL. + Core API handlers are restricted to indices in the range + [GASNETC_HANDLER_BASE, GASNETE_HANDLER_BASE) + Extended API handlers are restricted to indices in the range + [GASNETE_HANDLER_BASE, GASNETI_CLIENT_HANDLER_BASE) +*/ +extern gex_AM_Entry_t const *gasnetc_get_handlertable(void); +extern gex_AM_Entry_t const *gasnete_get_handlertable(void); + /* ------------------------------------------------------------------------------------ */ // TODO-EX: Please remove this! // @@ -384,6 +390,27 @@ extern gasneti_TM_t gasneti_thing_that_goes_thunk_in_the_dark; #define gasneti_THUNK_CLIENT gasneti_export_client(gasneti_thing_that_goes_thunk_in_the_dark->_ep->_client) #define gasneti_THUNK_SEGMENT gasneti_export_segment(gasneti_thing_that_goes_thunk_in_the_dark->_ep->_segment) +/* ------------------------------------------------------------------------------------ */ +// EP management + +GASNETI_INLINE(gasneti_i_tm_to_i_ep) +gasneti_EP_t gasneti_i_tm_to_i_ep(gasneti_TM_t i_tm) { + gasneti_assert(i_tm); + if (gasneti_i_tm_is_pair(i_tm)) { + // Lookup EP in per-client table + gex_EP_Index_t ep_idx = gasneti_tm_pair_loc_idx(gasneti_i_tm_to_pair(i_tm)); + gasneti_Client_t i_client = gasneti_import_client(gasneti_THUNK_CLIENT); // TODO: multi-client + gasneti_assert_int(ep_idx ,<, GASNET_MAXEPS); + gasneti_assert_int(ep_idx ,<, gasneti_weakatomic32_read(&i_client->_next_ep_index, 0)); + gasneti_EP_t i_ep = i_client->_ep_tbl[ep_idx]; + gasneti_assert(i_ep); + return i_ep; + } else { + return i_tm->_ep; + } +} +#define gasneti_e_tm_to_i_ep(e_tm) gasneti_i_tm_to_i_ep(gasneti_import_tm(e_tm)) + /* ------------------------------------------------------------------------------------ */ // Internal conduit interface to spawner @@ -433,6 +460,17 @@ uintptr_t gasneti_max_segsize(); #endif #endif +// Allocate/map memory intended for use as segment. +// May be called non-collectively, as from gex_Segment_Create(). +// Also called collectively, as from gex_Segment_Attach() and aux seg creation. +// Boolean 'pshm_compat' requests allocation of memory which is compatible with +// cross-mapping by PSHM (only fully implemented for the collective cases of +// gex_Segment_Attach() and aux seg creation). +int gasneti_segment_map(gasnet_seginfo_t *segment_p, + uintptr_t segsize, + int pshm_compat, + gex_Flags_t flags); + #ifndef GASNETI_USE_HIGHSEGMENT #define GASNETI_USE_HIGHSEGMENT 1 /* use the high end of mmap segments */ #endif @@ -449,11 +487,22 @@ void gasneti_segmentInit(uintptr_t localSegmentLimit, gex_Flags_t flags); gasnet_seginfo_t gasneti_segmentAttach( gex_Segment_t *segment_p, - size_t allocsz, gex_TM_t tm, uintptr_t segsize, - gasneti_bootstrapExchangefn_t exchangefn, gex_Flags_t flags); +int gasneti_segmentCreate( + gex_Segment_t *segment_t, + gasneti_Client_t client, + gex_Addr_t address, + uintptr_t length, + gex_MK_t kind, + gex_Flags_t flags); + +int gasneti_EP_PublishBoundSegment( + gex_TM_t tm, + gex_EP_t *eps, + size_t num_eps, + gex_Flags_t flags); extern void gasneti_legacy_segment_attach_hook(gasneti_EP_t ep); extern void gasneti_legacy_alloc_tm_hook(gasneti_TM_t _tm); @@ -580,6 +629,17 @@ gasneti_iop_t *gasneti_iop_register_rmw(unsigned int noperations GASNETI_THREAD_ /* marks in-flight remote atomic operation(s) as complete ... */ void gasneti_iop_markdone_rmw(gasneti_iop_t *iop, unsigned int noperations); +/* ------------------------------------------------------------------------------------ */ +// memory kinds hooks + +int gasneti_MK_Segment_Create( + gasneti_Segment_t *i_segment_p, + gasneti_Client_t i_client, + void *address, + uintptr_t length, + gex_MK_t e_kind, + gex_Flags_t flags); + /* ------------------------------------------------------------------------------------ */ /* macros for returning errors that allow verbose error tracking */ extern int gasneti_VerboseErrors; @@ -721,14 +781,34 @@ extern void gasneti_nodemapFini(void); #endif /* ------------------------------------------------------------------------------------ */ -// An AM-based gasneti_bootstrapExchangefn_t -// TODO-EX: any/all uses should hopefully use real collectives eventually +// Collective comms helpers + +// Convience wrapper for a blocking gather-to-all of elements of size 'len' bytes. +// In-place (src == (uint8_t*)dst + len*myrank) is permitted. +// Currently wraps legacy gasnet_coll_* but should use gex_Coll_* eventually. +void gasneti_blockingExchange(gex_TM_t tm, void *src, size_t len, void *dst); -void gasneti_defaultExchange(void *src, size_t len, void *dest); -extern void gasnetc_exchg_reqh(gex_Token_t token, void *buf, size_t nbytes, - gex_AM_Arg_t arg0, gex_AM_Arg_t len); +// Blocking "Rotated" ExchangeV utility function +// Takes only local data and length, and then discovers (and returns) the total length. +// Writes malloc()ed data pointer to *dst_p. +// Writes optional malloc()ed lengths-array pointer to *len_p, if non-NULL. +// +// "Rotated" because it does NOT generate the data in normal rank order. +// See comments in extended-ref/coll/gasnet_team.c for details. +size_t gasneti_blockingRotatedExchangeV(gex_TM_t tm, const void *src, size_t len, void **dst_p, size_t **len_p); + +// An AM-based host-scoped barrier +extern void gasneti_host_barrier(void); +extern void gasnetc_hbarr_reqh(gex_Token_t token, gex_AM_Arg_t arg0); #define GASNETC_COMMON_HANDLERS() \ - gasneti_handler_tableentry_no_bits(gasnetc_exchg_reqh,2,REQUEST,MEDIUM,0) + gasneti_handler_tableentry_no_bits(gasnetc_hbarr_reqh,1,REQUEST,SHORT,0) + +/* ------------------------------------------------------------------------------------ */ +// Helpers for debug checks + +#if GASNET_DEBUG +void gasneti_checknpam(int for_reply GASNETI_THREAD_FARG); +#endif /* ------------------------------------------------------------------------------------ */ @@ -773,9 +853,16 @@ typedef struct _gasneti_threaddata_t { #if GASNET_DEBUG || GASNETI_THREADINFO_OPT #define GASNETI_NEED_INIT_SRCDESC 1 int sd_is_init; +#endif +#if GASNET_DEBUG + int request_handler_active, reply_handler_active; #endif struct gasneti_AM_SrcDesc request_sd, reply_sd; - void *loopback_requestBuf, *loopback_replyBuf; + // Buffers, sized to max-medium, used by loopback AM and reference NPAM + void *requestBuf, *replyBuf; +#if GASNET_DEBUG + int requestBuf_live, replyBuf_live; +#endif // // Event data @@ -805,6 +892,36 @@ typedef struct _gasneti_threaddata_t { #endif } gasneti_threaddata_t; +/* ------------------------------------------------------------------------------------ */ +/* Simple container of segments + */ + +// Hidden state +extern gasneti_mutex_t _gasneti_segtbl_lock; +extern gasneti_Segment_t *_gasneti_segtbl; +extern int _gasneti_segtbl_count; + +// Public access to the lock +#define GASNETI_SEGTBL_LOCK() gasneti_mutex_lock(&_gasneti_segtbl_lock) +#define GASNETI_SEGTBL_UNLOCK() gasneti_mutex_unlock(&_gasneti_segtbl_lock) + +// Simple iterator. +// Caller must hold lock and must not call add or del (which acquire the lock). +// This macro provides a loop header and the caller provides the iteration +// variable and the loop body: +// gasneti_Segment_t p; +// GASNETI_SEGTBL_FOR_EACH(p) { visit(p); } +#define GASNETI_SEGTBL_FOR_EACH(segvar) \ + for (int _gasneti_segtbl_iter = 0; \ + (gasneti_mutex_assertlocked(&_gasneti_segtbl_lock), \ + (_gasneti_segtbl_iter < _gasneti_segtbl_count) && \ + (segvar = _gasneti_segtbl[_gasneti_segtbl_iter])); \ + ++_gasneti_segtbl_iter) + +// Add and Del +extern void gasneti_segtbl_add(gasneti_Segment_t seg); +extern void gasneti_segtbl_del(gasneti_Segment_t seg); + /* ------------------------------------------------------------------------------------ */ GASNETI_END_NOWARN GASNETI_END_EXTERNC diff --git a/third-party/gasnet/gasnet-src/gasnet_legacy.c b/third-party/gasnet/gasnet-src/gasnet_legacy.c index 4de6dfec87b2..f30c5ba83747 100644 --- a/third-party/gasnet/gasnet-src/gasnet_legacy.c +++ b/third-party/gasnet/gasnet-src/gasnet_legacy.c @@ -51,7 +51,7 @@ extern void gasneti_legacy_alloc_tm_hook(gasneti_TM_t _tm) { int len = 0; int numreg = 0; while (gasneti_legacy_handlers[len].gex_fnptr) len++; /* calc len */ - if (gasneti_amregister(gasneti_import_ep(gasneti_thunk_endpoint)->_amtbl, gasneti_legacy_handlers, len, + if (gasneti_amregister(gasneti_import_ep(gasneti_thunk_endpoint), gasneti_legacy_handlers, len, GASNETI_LEGACY_HANDLER_BASE, GASNETI_CLIENT_HANDLER_BASE, 0, &numreg) != GASNET_OK) gasneti_fatalerror("Error registering g2ex legacy AM handlers"); gasneti_assert(numreg == len); diff --git a/third-party/gasnet/gasnet-src/gasnet_membar.h b/third-party/gasnet/gasnet-src/gasnet_membar.h index 0b8be9e07a67..3a0907db0f45 100644 --- a/third-party/gasnet/gasnet-src/gasnet_membar.h +++ b/third-party/gasnet/gasnet-src/gasnet_membar.h @@ -140,37 +140,6 @@ #define GASNETI_RMB_IS_MB #define GASNETI_WMB_IS_MB /* ------------------------------------------------------------------------------------ */ -#elif PLATFORM_ARCH_IA64 /* Itanium */ - /* Empirically observed that IA64 requires a full "mf" for both wmb and rmb (see bug 1000). - * The reason is that the Itanium memory model only ensures ordering in one direction when - * using st.rel or ld.acq. In particular, they implement the minimum required for proper - * mutex implementation. While preventing loads and stores from moving OUT of the critical - * section, this still allows for loads before the lock and stores after the unlock to reorder - * INTO the critical section. We need more than that. - */ - #if PLATFORM_COMPILER_INTEL - /* Intel compiler's inline assembly broken on Itanium (bug 384) - use intrinsics instead */ - #include - #define gasneti_compiler_fence() \ - __memory_barrier() /* compiler optimization barrier */ - #define gasneti_local_wmb() do { \ - gasneti_compiler_fence(); \ - __mf(); /* memory fence instruction */ \ - } while (0) - #define gasneti_local_rmb() gasneti_local_wmb() - #define gasneti_local_mb() gasneti_local_wmb() - #define GASNETI_RMB_IS_MB - #define GASNETI_WMB_IS_MB - #elif GASNETI_HAVE_GCC_ASM - #define gasneti_local_wmb() GASNETI_ASM("mf") - #define gasneti_local_rmb() gasneti_local_wmb() - #define gasneti_local_mb() gasneti_local_wmb() - #define GASNETI_RMB_IS_MB - #define GASNETI_WMB_IS_MB - #else - #define GASNETI_USING_SLOW_MEMBARS 1 - #endif -/* ------------------------------------------------------------------------------------ */ #elif PLATFORM_ARCH_POWERPC #if GASNETI_HAVE_GCC_ASM /* "lwsync" = "sync 1", executed as "sync" on older CPUs */ @@ -220,7 +189,7 @@ #define GASNETI_RMB_IS_MB #define GASNETI_WMB_IS_MB /* ------------------------------------------------------------------------------------ */ -#elif PLATFORM_ARCH_AARCH64 && PLATFORM_OS_LINUX +#elif PLATFORM_ARCH_AARCH64 #if GASNETI_HAVE_GCC_ASM #define gasneti_local_wmb() GASNETI_ASM("dmb ishst") #define gasneti_local_rmb() GASNETI_ASM("dmb ishld") @@ -434,14 +403,8 @@ * is inserted in spin-loops - this instruction is documented as a "spin-loop hint" * which avoids a memory hazard stall on spin loop exit and reduces power consumption * Other Intel CPU's treat this instruction as a no-op - * - * IA64 includes a "hint" for use in spinloops */ #define gasneti_spinloop_hint() GASNETI_ASM(GASNETI_PAUSE_INSTRUCTION) - #elif PLATFORM_ARCH_IA64 && PLATFORM_COMPILER_INTEL && 0 /* DISABLED */ - /* Intel compiler's inline assembly broken on Itanium (bug 384) - use intrinsics instead */ - #include - #define gasneti_spinloop_hint() __hint(__hint_pause) #else #define gasneti_spinloop_hint() ((void)0) #endif diff --git a/third-party/gasnet/gasnet-src/gasnet_mmap.c b/third-party/gasnet/gasnet-src/gasnet_mmap.c index 82b1bff77b01..c55bdbccc335 100644 --- a/third-party/gasnet/gasnet-src/gasnet_mmap.c +++ b/third-party/gasnet/gasnet-src/gasnet_mmap.c @@ -241,8 +241,17 @@ extern void *gasneti_mmap(uintptr_t segsize) { #elif defined(GASNETI_PSHM_FILE) || defined(GASNETI_PSHM_POSIX) static char **gasneti_pshmname = NULL; #elif defined(GASNETI_PSHM_XPMEM) - static gasneti_xpmem_segid_t *gasneti_pshm_segids = NULL; + static gasneti_xpmem_segid_t *gasneti_pshm_segids = NULL; // shared + static gasneti_xpmem_segid_t gasneti_pshm_segid_aux; // private static gasneti_xpmem_apid_t *gasneti_pshm_apids = NULL; + + // Hooks to allocate shared space in which to to store gasneti_pshm_segids[] + size_t gasneti_pshm_private_data_size(void) { + return gasneti_pshm_nodes * sizeof(gasneti_xpmem_segid_t); + } + void gasneti_pshm_private_data_init(uintptr_t addr) { + gasneti_pshm_segids = (gasneti_xpmem_segid_t *)addr; + } #endif static char *gasneti_pshm_tmpfile_ = NULL; @@ -616,7 +625,10 @@ static void * gasneti_pshm_mmap(int pshm_rank, void *segbase, size_t segsize) { ptr = mmap(segbase, segsize, (PROT_READ|PROT_WRITE), mmap_flags, 0, 0); #endif } else { - gasneti_xpmem_apid_t apid = gasneti_xpmem_get(gasneti_pshm_segids[pshm_rank]); + gasneti_xpmem_segid_t segid; + segid = (pshm_rank == gasneti_pshm_nodes) ? gasneti_pshm_segid_aux + : gasneti_pshm_segids[pshm_rank]; + gasneti_xpmem_apid_t apid = gasneti_xpmem_get(segid); if (apid != (gasneti_xpmem_apid_t)-1) { #if HAVE_XPMEM_MAKE_2 @@ -681,15 +693,17 @@ static void gasneti_munmap_remote(gex_Rank_t pshm_rank, void *segbase, uintptr_t #endif } -/* Called collectively */ +// If `do_sync` is non-zero, then must be called collectively and will +// include a barrier if one is necessary for access to written data. +// Otherwise, safe to call non-collectively. GASNETI_INLINE(gasneti_publish_segment) -void gasneti_publish_segment(gasnet_seginfo_t segment) { +void gasneti_publish_segment(gasnet_seginfo_t segment, int do_sync) { void *segbase = segment.addr; uintptr_t segsize = segment.size; #if defined(GASNETI_PSHM_XPMEM) - /* Create and supernode-exchange xpmem segment ids */ - gasneti_xpmem_segid_t segid = gasneti_xpmem_make(segbase, segsize); - gasneti_pshmnet_bootstrapExchange(gasneti_request_pshmnet, &segid, sizeof(segid), gasneti_pshm_segids); + /* Create and publish xpmem segment ids */ + gasneti_pshm_segids[gasneti_pshm_mynode] = gasneti_xpmem_make(segbase, segsize); + if (do_sync) gasneti_pshmnet_bootstrapBarrier(); #else /* empty */ #endif @@ -757,8 +771,6 @@ static void gasneti_cleanup_shm(void) { } #elif defined(GASNETI_PSHM_XPMEM) #if 0 // TODO-EX: can't create multiple segments with these, but don't yet mark as leaked - gasneti_free(gasneti_pshm_segids); - gasneti_pshm_segids = NULL; gasneti_free(gasneti_pshm_apids); gasneti_pshm_apids = NULL; #endif @@ -890,7 +902,6 @@ extern void *gasneti_mmap_vnet(uintptr_t size, gasneti_bootstrapBroadcastfn_t sn gasneti_xpmem_segid_t segid = (gasneti_xpmem_segid_t)(-1); /* Initialization */ - gasneti_pshm_segids = gasneti_malloc(sizeof(gasneti_xpmem_segid_t) * (gasneti_pshm_nodes + 1)); gasneti_pshm_apids = gasneti_malloc(sizeof(gasneti_xpmem_apid_t) * (gasneti_pshm_nodes + 1)); /* First in each supernode creates the segment */ @@ -905,7 +916,7 @@ extern void *gasneti_mmap_vnet(uintptr_t size, gasneti_bootstrapBroadcastfn_t sn /* Supernode-scoped bcast communicates the segment identifier generated by the firsts */ (*snodebcastfn)(&segid, sizeof(segid), - &gasneti_pshm_segids[gasneti_pshm_nodes], + &gasneti_pshm_segid_aux, gasneti_pshm_firstnode); /* Non-first nodes attach */ @@ -994,6 +1005,14 @@ static void *gasneti_mmap_fixed_with_retry(void *segbase, uintptr_t segsize, int } #undef gasneti_do_mmap_fixed #define gasneti_do_mmap_fixed gasneti_mmap_fixed_with_retry + +// Host-scoped barrier used between unmap and re-map. +// This is needed even in the presence of PHSM support because +// gasneti_pshmnet_bootstrapBarrier() may have a narrower scope +// when GASNET_SUPERNODE_MAXSIZE is set. +#define gasneti_bug3480_fence() gasneti_host_barrier() +#else +#define gasneti_bug3480_fence() ((void)0) #endif // GASNETI_BUG3480_WORKAROUND /* binary search for segment - returns location, not mmaped */ @@ -1247,20 +1266,6 @@ uintptr_t gasneti_max_segsize() { return result; } -#if GASNETI_BUG3480_WORKAROUND - // Barrier used between unmap and re-map, via 1-byte exchange (a.k.a. GatherAll). - // This is a bit of a hack, but is the most expedient way to get a barrier - // with compute-node scope, since gasneti_pshmnet_bootstrapBarrier() may - // have a narrower scope when env var GASNET_SUPERNODE_MAXSIZE is set. - static void gasneti_bug3480_fence(gasneti_bootstrapExchangefn_t exchangefn) { - char a = 0; char *b = gasneti_malloc(gasneti_nodes); - (*exchangefn)(&a, sizeof(char), b); - gasneti_free(b); - } -#else - #define gasneti_bug3480_fence(_e) ((void)0) -#endif - // gasneti_sharedLimit() // // Returns the per-host shared memory limit ("the limit") imposed by the active @@ -1529,6 +1534,7 @@ void gasneti_segmentInit(uintptr_t localSegmentLimit, // Initialize global data gasneti_leak(gasneti_seginfo = gasneti_malloc(gasneti_nodes*sizeof(gasnet_seginfo_t))); + gasneti_seginfo_tbl[0] = gasneti_seginfo; for (gex_Rank_t i = 0; i < gasneti_nodes; i++) { gasneti_seginfo[i].addr = NULL; gasneti_seginfo[i].size = (uintptr_t)-1; @@ -1541,6 +1547,7 @@ void gasneti_segmentInit(uintptr_t localSegmentLimit, // PART 0: allocate (and zero-initialize) global data gasneti_leak(gasneti_seginfo = gasneti_calloc(gasneti_nodes, sizeof(gasnet_seginfo_t))); + gasneti_seginfo_tbl[0] = gasneti_seginfo; // PART I: allocate "pre-segment" @@ -1712,17 +1719,55 @@ void gasneti_segmentInit(uintptr_t localSegmentLimit, /* ------------------------------------------------------------------------------------ */ -static // TODO-EX: static for now, at least -void gasneti_segmentAttachLocal(gasnet_seginfo_t *segment_p, uintptr_t segsize, - gasneti_bootstrapExchangefn_t exchangefn) +// Allocate/map memory for a GASNet segment +// + Auxiliary segment via gex_Client_Init() or gasnet_init() +// + Client segment via gex_Segment_Attach() or gasnet_attach() +// + Client segment via gex_Segment_Create() +// +// If (flags & GASNETI_FLAG_INIT_LEGACY) this is a GASNet-1 segment +// If pshm_compat non-zero then allocate PSHM cross-mappable memory +int gasneti_segment_map( + gasnet_seginfo_t *segment_p, + uintptr_t segsize, + int pshm_compat, + gex_Flags_t flags) { +#ifdef GASNETI_MMAP_OR_PSHM + if (flags & GASNETI_FLAG_INIT_LEGACY) { + /* in "legacy_mode" we consume the presegment */ + *segment_p = gasneti_presegment; + } else +#endif + { + /* otherwise, we are working from scratch */ + segment_p->size = 0; + segment_p->addr = NULL; + } + void *segbase = NULL; #ifdef GASNETI_MMAP_OR_PSHM { + // Different (un)map ops to support shared (cross-mappable via PSHM) and private mappings + void* (*mmap_fn)(uintptr_t); + void* (*mmap_fixed_fn)(void *, uintptr_t, int); + void (*munmap_fn)(void *, uintptr_t); + #if GASNET_PSHM + if (pshm_compat) { + mmap_fn = gasneti_mmap_shared; + mmap_fixed_fn = gasneti_mmap_shared_fixed; + munmap_fn = gasneti_pshm_munmap; + } else + #endif + { + mmap_fn = gasneti_mmap; + mmap_fixed_fn = gasneti_mmap_fixed; + munmap_fn = gasneti_munmap; + } + if (segsize == 0) { /* no segment */ if (segment_p->addr && segment_p->size) { - gasneti_do_munmap(segment_p->addr, segment_p->size); + munmap_fn(segment_p->addr, segment_p->size); } segbase = NULL; } else if (segment_p->addr) { /* a pre-segment exists */ @@ -1733,17 +1778,17 @@ void gasneti_segmentAttachLocal(gasnet_seginfo_t *segment_p, uintptr_t segsize, #endif gasneti_assert_uint(segsize ,<=, segment_p->size); if (GASNET_PSHM || (segment_p->size != segsize) || (segment_p->addr != segbase)) { - gasneti_do_munmap(segment_p->addr, segment_p->size); - gasneti_bug3480_fence(exchangefn); + munmap_fn(segment_p->addr, segment_p->size); + gasneti_bug3480_fence(); #if GASNETI_PSHM_MAP_FIXED_IGNORED segbase = #endif - gasneti_do_mmap_fixed(segbase, segsize, 0); + mmap_fixed_fn(segbase, segsize, 0); } else { - gasneti_bug3480_fence(exchangefn); + gasneti_bug3480_fence(); } } else { /* need segment from scratch */ - segbase = gasneti_do_mmap(segsize); + segbase = mmap_fn(segsize); if (MAP_FAILED == segbase) { // TODO-EX: improve error handling here (e.g. ENOMEM might be returned to caller) int mmap_errno = errno; @@ -1778,18 +1823,20 @@ void gasneti_segmentAttachLocal(gasnet_seginfo_t *segment_p, uintptr_t segsize, segment_p->addr = segbase; segment_p->size = segsize; + + return GASNET_OK; } #if GASNET_PSHM -/* Map the remote shared segments */ -// TODO-EX: need scalable data structure in place of gasneti_nodeinfo +// Cross-map the remote shared segments +// TODO-EX: need scalable data structures in place of seginfo and gasneti_nodeinfo static // TODO-EX: static for now, at least -void gasneti_segmentAttachRemote(gasnet_seginfo_t *seginfo) +void gasneti_segment_cross_map(gasnet_seginfo_t *seginfo) { gasneti_nodeinfo[gasneti_mynode].offset = 0; gasneti_pshm_rank_t local_rank = 0; - gasneti_publish_segment(seginfo[gasneti_mynode]); + gasneti_publish_segment(seginfo[gasneti_mynode], 1); // collective // Note that we try to avoid iteration over all nodes. // For the case of supernode peers with contiguous ranks we examine no extra nodes @@ -1825,44 +1872,193 @@ static gasnet_seginfo_t gasneti_do_attach_segment( uintptr_t segsize, gasnet_seginfo_t *all_segments, + gex_TM_t tm, gasneti_bootstrapExchangefn_t exchangefn, gex_Flags_t flags) { - gasneti_assert(all_segments); - gasneti_assert(exchangefn); - #if GASNET_PSHM /* Avoid leaking shared memory files in case of non-collective exit between init/attach */ gasneti_pshm_cs_enter(&gasneti_cleanup_shm); gasneti_pshmnet_bootstrapBarrier(); #endif - gasnet_seginfo_t local_segment = {0,0}; -#ifdef GASNETI_MMAP_OR_PSHM - /* in "legacy_mode" we consume the presegment, otherwise working from scratch */ - if (flags & GASNETI_FLAG_INIT_LEGACY) local_segment = gasneti_presegment; -#endif - - gasneti_segmentAttachLocal(&local_segment, segsize, exchangefn); + gasnet_seginfo_t local_segment; - /* gather segment information */ // TODO-EX: need scalable replacement - (*exchangefn)(&local_segment, sizeof(gasnet_seginfo_t), all_segments); + int rc = gasneti_segment_map(&local_segment, segsize, 1, flags); + if (rc != GASNET_OK) { + gasneti_fatalerror("Unexpected failure return from gasneti_segment_map()"); + } + // Exchange segment information + // TODO-EX: need scalable replacement for full seginfo arrays + if (tm) { // Use collectives if available + gasneti_blockingExchange(tm, &local_segment, sizeof(gasnet_seginfo_t), all_segments); #if GASNET_PSHM - gasneti_segmentAttachRemote(all_segments); + // Needed if a pshm bootstrap operation may follow use of AMs + gasneti_pshmnet_bootstrapBarrierPoll(); +#endif + } else { + // gasneti_assert(all_segments == gasneti_seginfo_aux); // Eventually only auxseg should use exchangefn + gasneti_assert(exchangefn); + (*exchangefn)(&local_segment, sizeof(gasnet_seginfo_t), all_segments); + } + +#if GASNET_PSHM + gasneti_segment_cross_map(all_segments); gasneti_pshm_cs_leave(); #endif return local_segment; } +/* ------------------------------------------------------------------------------------ */ + + +static void gasneti_record_seginfo( + gex_Rank_t jobrank, + gex_EP_Index_t ep_index, + void *addr, + uintptr_t size) +{ + gasneti_assert(jobrank < gasneti_nodes); + gasneti_assert(ep_index < GASNET_MAXEPS); + + gasnet_seginfo_t *si_tbl = gasneti_seginfo_tbl[ep_index]; + + if_pf (!si_tbl) { + gasneti_assert(ep_index); // Never NULL for primordial EP + static gasneti_mutex_t lock = GASNETI_MUTEX_INITIALIZER; + gasneti_mutex_lock(&lock); + si_tbl = gasneti_seginfo_tbl[ep_index]; + if (!si_tbl) { + si_tbl = gasneti_calloc(gasneti_nodes, sizeof(gasnet_seginfo_t)); + gasneti_seginfo_tbl[ep_index] = si_tbl; + } + gasneti_mutex_unlock(&lock); + } + + gasnet_seginfo_t *si = si_tbl + jobrank; + + // Assert that we never overwrite a valid segment with new values +#if GASNET_SEGMENT_EVERYTHING + const uintptr_t dflt = ep_index ? 0 : ~(uintptr_t)0; +#else + const uintptr_t dflt = 0; +#endif + gasneti_assert((si->size == dflt) || si->size == size); + gasneti_assert((si->addr == NULL) || si->addr == addr); + + si->addr = addr; + si->size = size; +} + +extern void gex_EP_BindSegment( + gex_EP_t ep, + gex_Segment_t segment, + gex_Flags_t flags) +{ + gasneti_Segment_t i_segment = gasneti_import_segment(segment); + gasneti_EP_t i_ep = gasneti_import_ep(ep); + + // TODO: macros for formatting when naming segments in tracing? + // TODO: macros for formatting when naming endpoints in tracing? + GASNETI_TRACE_PRINTF(O,("gex_EP_BindSegment: segment=%p, EP index=%d, flags=%d", + (void *)segment, i_ep->_index, flags)); + + if (segment == GEX_SEGMENT_INVALID) { + gasneti_fatalerror("Invalid call to gex_EP_BindSegment() with GEX_SEGMENT_INVALID"); + } + if (flags) { + gasneti_fatalerror("Invalid call to gex_EP_BindSegment() with non-zero flags"); + } + if (i_ep->_segment) { + gasneti_fatalerror("Invalid call to gex_EP_BindSegment() on EP with a bound segment"); + } + + i_ep->_segment = i_segment; + gasneti_record_seginfo(gasneti_mynode, i_ep->_index, i_segment->_addr, i_segment->_size); + + gasneti_legacy_segment_attach_hook(i_ep); +} + +/* ------------------------------------------------------------------------------------ */ +extern int gasneti_EP_PublishBoundSegment( + gex_TM_t tm, + gex_EP_t *eps, + size_t num_eps, + gex_Flags_t flags) +{ + GASNETI_TRACE_PRINTF(O,("gex_EP_PublishBoundSegment: tm="GASNETI_TMSELFFMT", num_ep=%"PRIuSZ", flags=%d", + GASNETI_TMSELFSTR(tm), num_eps, flags)); + + if (flags) { + gasneti_fatalerror("Invalid call to gex_EP_PublishBoundSegment() with non-zero flags"); + } + + // Conduit-indep segment fields + struct exchg_data { + gex_EP_Location_t loc; + void *addr; + uintptr_t size; + // TODO: probably need the "class" from _kind + } *local, *global, *p; + + size_t elem_sz = sizeof(struct exchg_data); + local = gasneti_malloc(num_eps * elem_sz); + + // Pack + p = local; + for (gex_Rank_t i = 0; i < num_eps; ++i) { + gex_EP_t ep = eps[i]; + gasneti_Segment_t segment = gasneti_import_ep(ep)->_segment; + if (! segment) continue; + p->loc.gex_rank = gasneti_mynode; + p->loc.gex_ep_index = gex_EP_QueryIndex(ep); + p->addr = segment->_addr; + p->size = segment->_size; + // TODO: kind class + ++p; + } + + // ExchangeV (variable-contribution GatherAll) + // Since data is self-describing ('loc' field) we do NOT require that the + // payload is in rank order, and so can use gasneti_blockingRotatedExchangeV(). + // + // TODO: Build/use "VisitAllV" since no need to construct entire array in memory + // TODO: Use the lengths array (final argument) to omit the jobranks from the comms? + // The downside is the need for forward rank->jobrank lookups instead. + // If *those* might communicate, then the current scheme makes more sense. + size_t local_bytes = elem_sz * (p - local); + size_t total_bytes = gasneti_blockingRotatedExchangeV(tm, local, local_bytes, (void**)&global, NULL); + size_t total_eps = total_bytes / elem_sz; + gasneti_free(local); + + // Unpack + p = global; + for (size_t i = 0; i < total_eps; ++i, ++p) { + gasneti_record_seginfo(p->loc.gex_rank, p->loc.gex_ep_index, p->addr, p->size); + } + gasneti_free(global); + +#if GASNET_PSHM + // BIG-TODO: PSHM cross-mapping ?? + // * Currently even cross-mapping of the primordial EP's segment is not + // possible + // * Main issue is that, in general, the current logic is collective over + // supernode (in gasneti_publish_segment()). Only XPMEM currently + // communicates anything, but that case uses a supernode-scope exchange to + // populate a global variable (not workable for this case for two + // reasons). +#endif + + return GASNET_OK; +} + /* ------------------------------------------------------------------------------------ */ gasnet_seginfo_t gasneti_segmentAttach( gex_Segment_t *segment_p, - size_t allocsz, gex_TM_t tm, uintptr_t segsize, - gasneti_bootstrapExchangefn_t exchangefn, gex_Flags_t flags) { #if GASNET_DEBUG @@ -1872,21 +2068,28 @@ gasnet_seginfo_t gasneti_segmentAttach( called = 1; #endif + gasneti_EP_t i_ep = gasneti_import_tm_nonpair(tm)->_ep; + gasneti_Client_t i_client = i_ep->_client; + /* ------------------------------------------------------------------------------------ */ /* register segment */ - gasnet_seginfo_t myseg = gasneti_do_attach_segment(segsize, gasneti_seginfo, exchangefn, flags); + // First portion of Segment_Create, plus cross-mapping and seginfo propagation: + gasnet_seginfo_t myseg = gasneti_do_attach_segment(segsize, gasneti_seginfo, tm, NULL, flags); + // Sanity checks: void *segbase = myseg.addr; segsize = myseg.size; - gasneti_assert_uint(((uintptr_t)segbase) % GASNET_PAGESIZE ,==, 0); gasneti_assert_uint(segsize % GASNET_PAGESIZE ,==, 0); - gasneti_EP_t ep = gasneti_import_tm(tm)->_ep; - ep->_segment = gasneti_alloc_segment(ep->_client, segbase, segsize, flags, allocsz); - gasneti_legacy_segment_attach_hook(ep); - *segment_p = gasneti_export_segment(ep->_segment); + // Final portion of Segment_Create: + gasneti_Segment_t i_segment = gasneti_alloc_segment(i_client, segbase, segsize, GEX_MK_HOST, flags); + gasneti_segtbl_add(i_segment); + + // EP_BindSegment: + i_ep->_segment = i_segment; + gasneti_legacy_segment_attach_hook(i_ep); // After local segment is attached, call optional client-provided hook if (gasnet_client_attach_hook) { @@ -1897,8 +2100,75 @@ gasnet_seginfo_t gasneti_segmentAttach( gasneti_assert_ptr(gasneti_seginfo[gasneti_mynode].addr ,==, segbase); gasneti_assert_uint(gasneti_seginfo[gasneti_mynode].size ,==, segsize); + // Two "outputs": + *segment_p = gasneti_export_segment(i_segment); return myseg; } + +/* ------------------------------------------------------------------------------------ */ +uint8_t gasneti_segment_read_dummy; // global so compiler cannot realize this is write-only +int gasneti_segmentCreate( + gex_Segment_t *segment_p, + gasneti_Client_t client, + gex_Addr_t address, + uintptr_t length, + gex_MK_t kind, + gex_Flags_t flags) +{ + GASNETI_TRACE_PRINTF(O,("gex_Segment_Create: addr="GASNETI_LADDRFMT" len=%"PRIuPTR" flags=%d", + GASNETI_LADDRSTR(address), length, flags)); + + if (!segment_p) { + gasneti_fatalerror("Invalid call to gex_Segment_Create() with NULL segment_p"); + } + if (flags) { + gasneti_fatalerror("Invalid call to gex_Segment_Create() with non-zero flags"); + } + if (! length) { + gasneti_fatalerror("Invalid call to gex_Segment_Create() with zero length"); + } + if (kind == GEX_MK_INVALID) { + gasneti_fatalerror("Invalid call to gex_Segment_Create() with kind = GEX_MK_INVALID"); + } + + gasneti_Segment_t segment = gasneti_import_segment(GEX_SEGMENT_INVALID); + + if (kind == GEX_MK_HOST) { + if (address) { + // Client-allocated segment + // TODO: stronger checks such as for read-only memory? + + // Check that at least the first and last bytes can be read, with some + // trickery to hopefully prevent the compiler from discarding the access. + volatile uint8_t *bytes = address; + gasneti_segment_read_dummy += bytes[0] + bytes[length - 1]; + } else { + // GASNet-allocated segment + gasnet_seginfo_t seginfo; + int rc = gasneti_segment_map(&seginfo, GASNETI_PAGE_ALIGNUP(length), 0, flags); + if (rc != GASNET_OK) { + gasneti_fatalerror("Unexpected failure return from gasneti_segment_map()"); + } + address = seginfo.addr; + length = seginfo.size; + } + + // Create the Segment object + segment = gasneti_alloc_segment(client, address, length, kind, flags); + } else { + int rc = gasneti_MK_Segment_Create(&segment, client, address, length, kind, flags); + if (rc) return rc; + } + + gasneti_assert(segment != NULL); + gasneti_assert(segment->_client == client); + gasneti_assert(segment->_kind == kind); + + gasneti_segtbl_add(segment); + + *segment_p = gasneti_export_segment(segment); + return GASNET_OK; +} /* ------------------------------------------------------------------------------------ */ /* Used to pass the nodemap information to the client @@ -2065,7 +2335,8 @@ extern int gasneti_getSegmentInfo(gasnet_seginfo_t *seginfo_table, int numentrie return GASNET_OK; } -int gasneti_Segment_QueryBound( +// Inlining decision left to the compiler +static int gasneti_query_bound_segment( gex_TM_t tm, gex_Rank_t rank, void **owneraddr_p, @@ -2073,22 +2344,29 @@ int gasneti_Segment_QueryBound( uintptr_t *size_p) { // Trivial implementation using legacy data structures and assumptions. - gex_Rank_t jobrank = gasneti_e_tm_rank_to_jobrank(tm, rank); - // TODO-EX: cannot yet tell no segment from zero-length segment - gasneti_assert(gasneti_seginfo); - if (!gasneti_seginfo[jobrank].addr) return 1; // No (bound) segment + gex_EP_Location_t loc = gasneti_e_tm_rank_to_location(tm, rank, 0); + gex_Rank_t jobrank = loc.gex_rank; + gex_EP_Index_t idx = loc.gex_ep_index; + + // TODO-EX: Scalable storage + gasnet_seginfo_t *si_array = gasneti_seginfo_tbl[idx]; + if (!idx) gasneti_assert(si_array == gasneti_seginfo); + + // TODO-EX: cannot always tell no segment from zero-length segment + if (!si_array || !si_array[jobrank].addr) return 1; // No bound segment if (owneraddr_p) { - *owneraddr_p = gasneti_seginfo[jobrank].addr; + *owneraddr_p = si_array[jobrank].addr; } if (size_p){ - *size_p = gasneti_seginfo[jobrank].size; + *size_p = si_array[jobrank].size; } if (localaddr_p) { - if (GASNETI_NBRHD_JOBRANK_IS_LOCAL(jobrank)) { + // TODO-EX: this depends on legacy assumptions about cross-mapping + if (!idx && GASNETI_NBRHD_JOBRANK_IS_LOCAL(jobrank)) { #if GASNET_PSHM gasneti_assert(gasneti_nodeinfo); *localaddr_p = (void*)((uintptr_t)gasneti_seginfo[jobrank].addr + gasneti_nodeinfo[jobrank].offset); @@ -2103,6 +2381,45 @@ int gasneti_Segment_QueryBound( return 0; } +// DEPRECATED +int gex_Segment_QueryBound( + gex_TM_t tm, + gex_Rank_t rank, + void **owneraddr_p, + void **localaddr_p, + uintptr_t *size_p) +{ + GASNETI_TRACE_PRINTF(O,("gex_Segment_QueryBound: tm:rank=" GASNETI_TMRANKFMT, + GASNETI_TMRANKSTR(tm,rank))); + GASNETI_CHECK_INJECT(); + return gasneti_query_bound_segment(tm, rank, owneraddr_p, localaddr_p, size_p); +} + +// TODO: once representation is not dense, must return GEX_EVENT_NO_OP +// for unknown/missing data when flags contains GEX_FLAG_IMMEDIATE. +gex_Event_t gex_EP_QueryBoundSegmentNB( + gex_TM_t tm, + gex_Rank_t rank, + void **owneraddr_p, + void **localaddr_p, + uintptr_t *size_p, + gex_Flags_t flags) +{ + GASNETI_TRACE_PRINTF(O,("gex_EP_QueryBoundSegmentNB: tm:rank=" GASNETI_TMRANKFMT " flags=0x%x", + GASNETI_TMRANKSTR(tm,rank), flags)); + + if (! (flags & GEX_FLAG_IMMEDIATE)) GASNETI_CHECK_INJECT(); + + int rc = gasneti_query_bound_segment(tm, rank, owneraddr_p, localaddr_p, size_p); + if (rc && size_p) { + // non-zero rc means not bound, which we return as 0-length. + *size_p = 0; + } + + return GEX_EVENT_INVALID; +} + + /* ------------------------------------------------------------------------------------ */ /* Aux-seg support */ @@ -2278,80 +2595,43 @@ gasneti_auxsegAttach(uint64_t maxsize, gasneti_bootstrapExchangefn_t exchangefn) auxsize, maxsize); } gasneti_leak(gasneti_seginfo_aux = gasneti_malloc(gasneti_nodes*sizeof(gasnet_seginfo_t))); - gasnet_seginfo_t local_segment = gasneti_do_attach_segment(auxsize, gasneti_seginfo_aux, exchangefn, 0); + gasnet_seginfo_t local_segment = gasneti_do_attach_segment(auxsize, gasneti_seginfo_aux, NULL, exchangefn, 0); gasneti_auxseg_attach(gasneti_seginfo_aux); gasneti_assert_uint(gasneti_seginfo_aux[gasneti_mynode].size ,==, auxsize); return local_segment; } /* ------------------------------------------------------------------------------------ */ -// AM-based gasneti_bootstrapExchangefn_t - -static gasneti_weakatomic32_t gasneti_exchg_rcvd[2][32]; // Implicitly zero-initialized +// Host-scoped (potentially superset of supernode) barrier -static uint8_t *_gasneti_exchg_data[2] = {NULL,NULL}; -static uint8_t *gasneti_exchg_data(int phase, size_t elemsz) { - uint8_t *data = _gasneti_exchg_data[phase]; - if_pf (! data) { - static gasneti_mutex_t lock = GASNETI_MUTEX_INITIALIZER; - gasneti_mutex_lock(&lock); - data = _gasneti_exchg_data[phase]; - if (! data) { - data = gasneti_malloc(elemsz * gasneti_nodes); - _gasneti_exchg_data[phase] = data; - } - gasneti_mutex_unlock(&lock); - } - return data; -} +static gasneti_weakatomic32_t gasneti_hbarr_rcvd[2][32]; // Implicitly zero-initialized -extern void gasnetc_exchg_reqh(gex_Token_t token, void *buf, size_t nbytes, - gex_AM_Arg_t arg0, gex_AM_Arg_t elemsz) { - const int phase = arg0 & 1; - const int step = (arg0 >> 1) & 0x1f; // Max 2^5 steps => 2^32 nodes - const int seq = (arg0 >> 6); // Max 2^26 fragments * 512 => 32GB (and max sent is elemsz*nodes/2) - const int distance = (1 << step); - gasneti_assert_uint(distance ,<, gasneti_nodes); - uint8_t *data = gasneti_exchg_data(phase, elemsz); - uint8_t *dest = data + (elemsz * distance) + (seq * gex_AM_LUBRequestMedium()); - gasneti_assert_ptr(dest + nbytes ,<=, data + elemsz * gasneti_nodes); - memcpy(dest, buf, nbytes); - gasneti_weakatomic32_increment(&gasneti_exchg_rcvd[phase][step], GASNETI_ATOMIC_REL); +extern void gasnetc_hbarr_reqh(gex_Token_t token, gex_AM_Arg_t arg0) +{ + const int phase = arg0 & 1; + const int step = (arg0 >> 1) & 0x1f; // Max 2^5 steps => 2^32 proc/host! + const int distance = (1 << step); + gasneti_assert_uint(distance ,<, gasneti_myhost.node_count); + gasneti_weakatomic32_increment(&gasneti_hbarr_rcvd[phase][step], GASNETI_ATOMIC_REL); } -extern void gasneti_defaultExchange(void *src, size_t elemsz, void *dst) { +void gasneti_host_barrier(void) +{ + // Simple dissemination barrier with two phase static int phase = 0; - gasneti_sync_reads(); - - uint8_t *data = gasneti_exchg_data(phase, elemsz); - - /* copy in local contribution */ - memcpy(data, src, elemsz); - - /* Bruck's concatenation algorithm: */ - unsigned int step, distance; - for (step = 0, distance = 1; distance < gasneti_nodes; ++step, distance *= 2) { - gex_Rank_t peer = (distance <= gasneti_mynode) ? gasneti_mynode - distance - : gasneti_mynode + (gasneti_nodes - distance); - size_t nbytes = elemsz * MIN(distance, gasneti_nodes - distance); - size_t offset = 0; - uint32_t seq = 0; - - /* Send payload using AMMedium(s) */ - do { - const size_t to_xfer = MIN(nbytes, gex_AM_LUBRequestMedium()); - gex_AM_RequestMedium(gasneti_THUNK_TM, peer, _hidx_gasnetc_exchg_reqh, - data + offset, to_xfer, GEX_EVENT_NOW, 0, - phase | (step << 1) | (seq << 6), (uint32_t)elemsz); - ++seq; - offset += to_xfer; - nbytes -= to_xfer; - } while (nbytes); - - /* Poll until we have received the same number of messages as we sent */ - GASNET_BLOCKUNTIL((int)gasneti_weakatomic32_read(&gasneti_exchg_rcvd[phase][step], 0) >= (int)seq); - gasneti_assert_int((int)gasneti_weakatomic32_read(&gasneti_exchg_rcvd[phase][step], 0) ,==, (int)seq); - gasneti_weakatomic32_set(&gasneti_exchg_rcvd[phase][step], 0, 0); + const gex_Rank_t rank = gasneti_myhost.node_rank; + const gex_Rank_t size = gasneti_myhost.node_count; + for (unsigned int step = 0, distance = 1; distance < size; ++step, distance *= 2) { + gex_Rank_t peer = (distance <= rank) ? rank - distance : rank + (size - distance); + gex_AM_Arg_t arg0 = phase | (step << 1); + + gex_AM_RequestShort(gasneti_THUNK_TM, gasneti_myhost.nodes[peer], + gasneti_handleridx(gasnetc_hbarr_reqh), 0, arg0); + + // Poll until we have received the same phase we've just sent + GASNET_BLOCKUNTIL((int)gasneti_weakatomic32_read(&gasneti_hbarr_rcvd[phase][step], 0)); + gasneti_assert_int((int)gasneti_weakatomic32_read(&gasneti_hbarr_rcvd[phase][step], 0) ,==, 1); + gasneti_weakatomic32_set(&gasneti_hbarr_rcvd[phase][step], 0, 0); } #if GASNET_PSHM @@ -2359,17 +2639,6 @@ extern void gasneti_defaultExchange(void *src, size_t elemsz, void *dst) { gasneti_pshmnet_bootstrapBarrierPoll(); #endif - /* Copy to final destination while performing the rotation */ - const size_t a = elemsz * (gasneti_nodes - gasneti_mynode); - const size_t b = elemsz * gasneti_mynode; - memcpy(dst, data + a, b); - memcpy((uint8_t*)dst + b, data, a); - gasneti_assert(! memcmp((uint8_t*)dst + gasneti_mynode*elemsz, src, elemsz)); - - gasneti_free(data); - _gasneti_exchg_data[phase] = NULL; - - gasneti_sync_writes(); phase ^= 1; } diff --git a/third-party/gasnet/gasnet-src/gasnet_pshm.c b/third-party/gasnet/gasnet-src/gasnet_pshm.c index 4154cf163b42..cee8c65da70c 100644 --- a/third-party/gasnet/gasnet-src/gasnet_pshm.c +++ b/third-party/gasnet/gasnet-src/gasnet_pshm.c @@ -9,7 +9,7 @@ #if GASNET_PSHM /* Otherwise file is empty */ #include /* for gasnetc_handler[] */ -#include /* for gasneti_prepare_alloc_buffer() */ +#include /* for gasneti_{prepare_alloc,commit_free}_buffer() */ #include #include @@ -124,6 +124,11 @@ void *gasneti_pshm_init(gasneti_bootstrapBroadcastfn_t snodebcastfn, size_t aux_ sz2b = GASNETI_ALIGNUP(sz2b, GASNETI_CACHE_LINE_BYTES); sz2b += sizeof(gasneti_pshm_barrier_t) + (gasneti_pshm_nodes - 1) * sizeof(gasneti_pshm_barrier->node); + #ifdef GASNETI_PSHM_PRIVATE_DATA_SIZE + // Optional data private to an implementaion of PSHM + sz2b = GASNETI_ALIGNUP(sz2b, GASNETI_CACHE_LINE_BYTES); + sz2b += GASNETI_PSHM_PRIVATE_DATA_SIZE(); + #endif // final info_sz required: info_sz = sz1 + MAX(sz2a, sz2b); @@ -189,6 +194,11 @@ void *gasneti_pshm_init(gasneti_bootstrapBroadcastfn_t snodebcastfn, size_t aux_ gasneti_pshm_barrier = (gasneti_pshm_barrier_t *)addr; addr += sizeof(gasneti_pshm_barrier_t) + (gasneti_pshm_nodes-1) * sizeof(gasneti_pshm_barrier->node); + #ifdef GASNETI_PSHM_PRIVATE_DATA_INIT + // Optional private data (per implementaion of PSHM) + // If used, must be last since this does not advance 'addr'. + GASNETI_PSHM_PRIVATE_DATA_INIT(addr); + #endif } /* Populate gasneti_pshm_firsts[] */ @@ -276,8 +286,12 @@ typedef gasneti_AMPSHM_msg_t gasneti_AMPSHM_shortmsg_t; typedef struct { gasneti_AMPSHM_msg_t msg; uint32_t numbytes; - uint8_t mediumdata[4 + GASNETC_MAX_MEDIUM_NBRHD]; /* +4 to deal with 4 or 8-byte alignment */ + uint8_t mediumdata[1]; // flexible array member } gasneti_AMPSHM_medmsg_t; +// Note: we round offset up to GASNETI_MEDBUF_ALIGNMENT boundary, since that is where payload will be placed +#define GASNETI_AMPSHM_MEDMSG_DATA_OFFSET \ + GASNETI_ALIGNUP(offsetof(gasneti_AMPSHM_medmsg_t,mediumdata), GASNETI_MEDBUF_ALIGNMENT) +#define GASNETI_SIZEOF_AMPSHM_MEDMSG_T (GASNETI_AMPSHM_MEDMSG_DATA_OFFSET + GASNETC_MAX_MEDIUM_NBRHD) typedef struct { gasneti_AMPSHM_msg_t msg; @@ -290,6 +304,7 @@ typedef union { gasneti_AMPSHM_medmsg_t Medium; gasneti_AMPSHM_longmsg_t Long; } gasneti_AMPSHM_maxmsg_t; +#define GASNETI_SIZEOF_AMPSHM_MAXMSG_T GASNETI_SIZEOF_AMPSHM_MEDMSG_T /* atomic operations on queue tail */ #if defined(GASNETI_HAVE_ATOMIC_CAS) @@ -373,6 +388,8 @@ typedef struct gasneti_pshmnet_payload { size_t len; gasneti_AMPSHM_maxmsg_t data; } gasneti_pshmnet_payload_t; +#define GASNETI_SIZEOF_PSHMNET_PAYLOAD_T \ + (offsetof(gasneti_pshmnet_payload_t,data) + GASNETI_SIZEOF_AMPSHM_MAXMSG_T) /****************************************************************************** * Payload memory allocator interface. @@ -395,9 +412,11 @@ typedef struct { gasneti_atomic_t in_use; gasneti_pshmnet_payload_t payload; } gasneti_pshmnet_allocator_block_t; +#define GASNETI_SIZEOF_PSHMNET_ALLOCATOR_BLOCK_T \ + (offsetof(gasneti_pshmnet_allocator_block_t,payload) + GASNETI_SIZEOF_PSHMNET_PAYLOAD_T) #define GASNETI_PSHMNET_ALLOC_MAXSZ \ - round_up_to_pshmpage(sizeof(gasneti_pshmnet_allocator_block_t)) + round_up_to_pshmpage(GASNETI_SIZEOF_PSHMNET_ALLOCATOR_BLOCK_T) #define GASNETI_PSHMNET_ALLOC_MAXPG (GASNETI_PSHMNET_ALLOC_MAXSZ >> GASNETI_PSHMNET_PAGESHIFT) #define GASNETI_PSHMNET_MAX_PAYLOAD \ @@ -535,7 +554,10 @@ gasneti_pshmnet_init(void *region, size_t regionlen, gasneti_pshm_rank_t pshmnod void *myregion; /* make sure that our max buffer size fits all possible AMs */ - gasneti_assert(sizeof(gasneti_AMPSHM_maxmsg_t) <= GASNETI_PSHMNET_MAX_PAYLOAD); + gasneti_assert(GASNETI_SIZEOF_AMPSHM_MAXMSG_T <= GASNETI_PSHMNET_MAX_PAYLOAD); + gasneti_assert(GASNETI_SIZEOF_AMPSHM_MAXMSG_T >= sizeof(gasneti_AMPSHM_shortmsg_t)); + gasneti_assert(GASNETI_SIZEOF_AMPSHM_MAXMSG_T >= GASNETI_SIZEOF_AMPSHM_MEDMSG_T); + gasneti_assert(GASNETI_SIZEOF_AMPSHM_MAXMSG_T >= sizeof(gasneti_AMPSHM_longmsg_t)); gasneti_assert((offsetof(gasneti_AMPSHM_medmsg_t, mediumdata) % 4) == 0); @@ -544,12 +566,12 @@ gasneti_pshmnet_init(void *region, size_t regionlen, gasneti_pshm_rank_t pshmnod (PLATFORM_ARCH_X86 || PLATFORM_ARCH_X86_64) // Arbitrary choice of frequently-tested ABIs known to provide tight fit gasneti_assert((GASNETC_MAX_MEDIUM_NBRHD != GASNETC_MAX_MEDIUM_NBRHD_DFLT) || \ - (sizeof(gasneti_pshmnet_allocator_block_t) == 65536)); + (GASNETI_SIZEOF_PSHMNET_ALLOCATOR_BLOCK_T == 65536)); #else // Other ABIs may have less restrictive alignments (allow 16-byte slack) gasneti_assert((GASNETC_MAX_MEDIUM_NBRHD != GASNETC_MAX_MEDIUM_NBRHD_DFLT) || \ - ((sizeof(gasneti_pshmnet_allocator_block_t) <= 65536) && \ - (sizeof(gasneti_pshmnet_allocator_block_t) >= 65536 - 16))); + ((GASNETI_SIZEOF_PSHMNET_ALLOCATOR_BLOCK_T <= 65536) && \ + (GASNETI_SIZEOF_PSHMNET_ALLOCATOR_BLOCK_T >= 65536 - 16))); #endif szpernode = gasneti_pshmnet_memory_needed_pernode(pshmnodes); @@ -1105,9 +1127,9 @@ static void gasneti_pshmnet_free(gasneti_pshmnet_payload_t *p) /* The mediumdata field may not be aligned */ #define GASNETI_AMPSHM_MSG_MEDDATA_OFFSET \ - (offsetof(gasneti_pshmnet_allocator_block_t, payload.data.Medium.mediumdata)&7) + (offsetof(gasneti_pshmnet_allocator_block_t, payload.data.Medium.mediumdata)&(GASNETI_MEDBUF_ALIGNMENT-1)) #define GASNETI_AMPSHM_MSG_MEDDATA_SHIFT \ - (GASNETI_AMPSHM_MSG_MEDDATA_OFFSET?(8-GASNETI_AMPSHM_MSG_MEDDATA_OFFSET):0) + (GASNETI_AMPSHM_MSG_MEDDATA_OFFSET?(GASNETI_MEDBUF_ALIGNMENT-GASNETI_AMPSHM_MSG_MEDDATA_OFFSET):0) #define GASNETI_AMPSHM_MSG_CATEGORY(msg) (((gasneti_AMPSHM_msg_t*)msg)->category) #define GASNETI_AMPSHM_MSG_HANDLERID(msg) (((gasneti_AMPSHM_msg_t*)msg)->handler_id) @@ -1244,14 +1266,14 @@ static void * ampshm_buf_alloc( msgsz = sizeof(gasneti_AMPSHM_shortmsg_t); break; case gasneti_Medium: - msgsz = sizeof(gasneti_AMPSHM_medmsg_t) - (GASNETC_MAX_MEDIUM_NBRHD - nbytes); + msgsz = GASNETI_AMPSHM_MEDMSG_DATA_OFFSET + nbytes; break; case gasneti_Long: msgsz = sizeof(gasneti_AMPSHM_longmsg_t); break; default: gasneti_unreachable_error(("Invalid category=%i",(int)category)); } - gasneti_assert_uint(msgsz ,<=, sizeof(gasneti_AMPSHM_maxmsg_t)); + gasneti_assert_uint(msgsz ,<=, GASNETI_SIZEOF_AMPSHM_MAXMSG_T); /* Get buffer, poll if busy (unless IMMEDIATE) Lock serializes allocation so small messages can't starve large ones */ @@ -1310,13 +1332,12 @@ int ampshm_prepare_inner( } else if (category == gasneti_Medium) { size = MIN(most_payload, GASNETC_MAX_MEDIUM_NBRHD); } else { - size = MIN(most_payload, GASNETC_MAX_LONG_NBRHD); + size_t limit = client_buf ? GASNETC_MAX_LONG_NBRHD : GASNETC_REF_NPAM_MAX_ALLOC; + size = MIN(most_payload, limit); // For small enough Long use the free space after the header to avoid malloc/free inline_long = (size <= GASNETI_AMPSHM_MSG_LONG_INLINE); } - gasneti_assert(sd->_tofree == NULL); // check this before possible IMMEDIATE failure - // Allocate our buffer (honoring IMMEDIATE) gasneti_pshmnet_t *vnet = (isReq ? gasneti_request_pshmnet : gasneti_reply_pshmnet); void *msg = ampshm_buf_alloc(vnet, category, isReq, pshmrank, size, flags GASNETI_THREAD_PASS); @@ -1338,11 +1359,14 @@ int ampshm_prepare_inner( sd->_addr = (/*non-const*/void *)client_buf; gasneti_leaf_finish(lc_opt); } else if (category == gasneti_Medium) { + // NPAM Medium with GASNet-allocated buffer sd->_gex_buf = sd->_addr = GASNETI_AMPSHM_MSG_MED_DATA(msg); } else if (inline_long) { + // NPAM Long with GASNet-allocated buffer, "inline" with header sd->_gex_buf = sd->_addr = GASNETI_AMPSHM_MSG_LONG_TMP(msg); } else { - sd->_tofree = gasneti_prepare_alloc_buffer(sd); + // NPAM Long with GASNet-allocated buffer, general case + sd->_tofree = gasneti_alloc_npam_buffer(sd, isReq); } return 0; @@ -1350,7 +1374,7 @@ int ampshm_prepare_inner( // After sd, next 3 params (isFixed, isReq, category) will be manifest constants // which should lead to specialization of the code upon inlining. -GASNETI_INLINE(ampshm_comit_inner) +GASNETI_INLINE(ampshm_commit_inner) void ampshm_commit_inner( gasneti_AM_SrcDesc_t sd, const int isFixed, const int isReq, const int category, @@ -1401,9 +1425,8 @@ void ampshm_commit_inner( gasneti_pshmnet_t *vnet = (isReq ? gasneti_request_pshmnet : gasneti_reply_pshmnet); gasneti_pshmnet_deliver_send_buffer(vnet, msg, 0 /*msgsz unused*/, sd->_pshm._pshmrank); - if (sd->_tofree) { // Branch to avoid free(NULL) library call overhead for NPAM/cb - gasneti_free(sd->_tofree); - sd->_tofree = NULL; + if (sd->_tofree) { + gasneti_free_npam_buffer(sd); } } @@ -1463,7 +1486,7 @@ int ampshm_prepare(gasneti_AM_SrcDesc_t sd, // After sd, next 2 params (isReq, category) will be manifest constants // which should lead to specialization of the code upon inlining. -GASNETI_INLINE(ampshm_comit) +GASNETI_INLINE(ampshm_commit) void ampshm_commit(gasneti_AM_SrcDesc_t sd, const int isReq, const gasneti_category_t category, gex_AM_Index_t handler, size_t nbytes, diff --git a/third-party/gasnet/gasnet-src/gasnet_pshm.h b/third-party/gasnet/gasnet-src/gasnet_pshm.h index 2e5953f99aba..147b46d4f497 100644 --- a/third-party/gasnet/gasnet-src/gasnet_pshm.h +++ b/third-party/gasnet/gasnet-src/gasnet_pshm.h @@ -25,6 +25,11 @@ #elif !defined(GASNETI_PSHM_POSIX) && !defined(GASNETI_PSHM_SYSV) && !defined(GASNETI_PSHM_FILE) && defined(GASNETI_PSHM_XPMEM) #undef GASNETI_PSHM_XPMEM #define GASNETI_PSHM_XPMEM 1 + // Hooks: + extern size_t gasneti_pshm_private_data_size(void); + #define GASNETI_PSHM_PRIVATE_DATA_SIZE gasneti_pshm_private_data_size + extern void gasneti_pshm_private_data_init(uintptr_t); + #define GASNETI_PSHM_PRIVATE_DATA_INIT gasneti_pshm_private_data_init #else #error PSHM configuration must be exactly one of (GASNETI_PSHM_POSIX, GASNETI_PSHM_SYSV, GASNETI_PSHM_FILE, GASNETI_PSHM_XPMEM) #endif diff --git a/third-party/gasnet/gasnet-src/gasnet_syncops.h b/third-party/gasnet/gasnet-src/gasnet_syncops.h index 7e937d8f4d25..d5b2d582d6c7 100644 --- a/third-party/gasnet/gasnet-src/gasnet_syncops.h +++ b/third-party/gasnet/gasnet-src/gasnet_syncops.h @@ -882,92 +882,6 @@ gasneti_atomic_val_t gasneti_semaphore_trydown_partial_SEQ(gasneti_semaphore_t_S } #define GASNETI_LIFO_INITIALIZER_PAR {{0,}, gasneti_atomic128_init(0,0), {0,}} #define GASNETI_HAVE_ARCH_LIFO 1 -#elif PLATFORM_ARCH_IA64 && PLATFORM_ARCH_64 && GASNETI_HAVE_IA64_CMP8XCHG16 - /* Use the SCDS (Single-compare, double-swap) cmp8xchg16 instruction added to - * the Montecito processors. The algorithm is essentially the same as w/ CAS, - * but the TAG is advanced/checked on both Push and Pop operations. Note that - * we also need the "ld16" (128-bit atomic read) to ensure "tag" and "head" match - * (since we only compare on tag). - * - * We use compiler-specific code for: - * _gasneti_lifo_store16(): cmp8xchg16 w/ tag++ and return 0 on success - * _gasneti_lifo_load16(_addr, _tag, _addr): 16-byte atomic read macro - * _gasneti_lifo_st8_rel(): st8.rel instruction - * and implement push/pop in terms of those using compiler-independent code. - */ - #if PLATFORM_COMPILER_INTEL - #include - GASNETI_INLINE(_gasneti_lifo_store16) - int _gasneti_lifo_store16(void volatile *ptr, uint64_t oldtag, void *newval) { - return oldtag != _InterlockedCompare64Exchange128_acq(ptr, (uint64_t)newval, oldtag+1, oldtag); - } - #define _gasneti_lifo_load16(_addr, _tag, _head) \ - ((_tag) = __load128((_addr), &(_head))) - #define _gasneti_lifo_st8_rel(_addr, _val) \ - __st8_rel((_addr), (_val)) - - #define GASNETI_HAVE_ARCH_LIFO 1 - #elif PLATFORM_COMPILER_GNU - GASNETI_INLINE(_gasneti_lifo_store16) - int _gasneti_lifo_store16(void volatile *ptr, uint64_t oldtag, void *newval) { - register uint64_t tmp = oldtag + 1; - __asm__ __volatile__ ( - "mov ar.ccv=%1 \n\t" - "mov ar.csd=%2;; \n\t" - "cmp8xchg16.acq %0=[%3],%0,ar.csd,ar.ccv\n" - : "+r"(tmp) : "r"(oldtag), "r"(newval), "r"(ptr) : "memory" ); - return tmp != oldtag; - } - #define _gasneti_lifo_load16(_addr, _tag, _head) \ - __asm__ __volatile__ ( \ - "ld16 %0,ar.csd=[%2];;\n\t" \ - "mov %1=ar.csd \n" \ - : "=r"(_tag), "=r"(_head) : "r"(_addr) : "memory" ) - #define _gasneti_lifo_st8_rel(_addr, _val) \ - __asm__ __volatile__ ( "st8.rel [%0]=%1" : : "r"(_addr), "r"(_val) : "memory") - - #define GASNETI_HAVE_ARCH_LIFO 1 - #else - /* Unknown/unsupported compiler - mutexes will be used */ - #endif - - /* Here are the compiler-independent parts */ - #ifdef GASNETI_HAVE_ARCH_LIFO /* Only true if compiler-specific parts defined above */ - typedef struct { - void *array[3]; /* for 16-byte aligment use either 0+1 or 1+2 */ - char _pad[GASNETI_CACHE_PAD(3*sizeof(void *))]; - } gasneti_lifo_head_t_PAR; - - GASNETI_INLINE(_gasneti_lifo_push) - void _gasneti_lifo_push(gasneti_lifo_head_t_PAR *p, void **head, void **tail) { - uint64_t tag, old_head; - void *q = (void *)GASNETI_ALIGNUP(p, 16); - do { - _gasneti_lifo_load16(q, tag, old_head); - _gasneti_lifo_st8_rel(tail, old_head); - } while (_gasneti_lifo_store16(q, tag, head)); - } - GASNETI_INLINE(_gasneti_lifo_pop) - void *_gasneti_lifo_pop(gasneti_lifo_head_t_PAR *p) { - uint64_t tag, old_head; - void *q = (void *)GASNETI_ALIGNUP(p, 16); - do { - _gasneti_lifo_load16(q, tag, old_head); - if (!old_head) break; - } while (_gasneti_lifo_store16(q, tag, *(void **)old_head)); - return (void *)old_head; - } - GASNETI_INLINE(_gasneti_lifo_init) - void _gasneti_lifo_init(gasneti_lifo_head_t_PAR *p) { - void **q = (void **)GASNETI_ALIGNUP(p, 16); - q[0] = q[1] = NULL; - } - GASNETI_INLINE(_gasneti_lifo_destroy) - void _gasneti_lifo_destroy(gasneti_lifo_head_t_PAR *p) { - /* NOTHING */ - } - #define GASNETI_LIFO_INITIALIZE_PAR { { NULL, NULL, NULL} } - #endif /* Compiler-independent portion of 64-bit ia64 support */ #else /* The LL/SC algorithm used on the PPC will not work on the Alpha or MIPS, which don't * allow for the load we perform between the ll and the sc. More complex algorithms are @@ -975,8 +889,6 @@ gasneti_atomic_val_t gasneti_semaphore_trydown_partial_SEQ(gasneti_semaphore_t_S * * We do support x86-64 CPUs which implement their optional CAS2 (cmpxchg16b) instruction. * - * We do support IA64 CPUs which implement their optional SCDS (cmp8xchg16) instruction. - * * One possible solution for all remaining platforms is "software ll/sc". Using just pointer * CAS, one can implement an ideal LL/SC which allows for arbitrary loads and stores between * the LL and the SC. This would require a compare-and-swap-pointer atomic operation. diff --git a/third-party/gasnet/gasnet-src/gasnet_timer.h b/third-party/gasnet/gasnet-src/gasnet_timer.h index 193eac93d8b5..f6ac75b563aa 100644 --- a/third-party/gasnet/gasnet-src/gasnet_timer.h +++ b/third-party/gasnet/gasnet-src/gasnet_timer.h @@ -66,7 +66,7 @@ /* ------------------------------------------------------------------------------------ */ #elif (PLATFORM_OS_LINUX || PLATFORM_OS_CNL || PLATFORM_OS_WSL || PLATFORM_OS_OPENBSD || \ GASNETI_HAVE_SYSCTL_MACHDEP_TSC_FREQ) && \ - (PLATFORM_ARCH_X86 || PLATFORM_ARCH_X86_64 || PLATFORM_ARCH_MIC || PLATFORM_ARCH_IA64) + (PLATFORM_ARCH_X86 || PLATFORM_ARCH_X86_64 || PLATFORM_ARCH_MIC) typedef uint64_t gasneti_tick_t; #if GASNETI_HAVE_GCC_ASM GASNETI_INLINE(gasneti_ticks_now) @@ -82,10 +82,6 @@ __asm__ __volatile__("rdtsc" : "=A" (_ret) /* no inputs */); - #elif PLATFORM_ARCH_IA64 - __asm__ __volatile__("mov %0=ar.itc" - : "=r"(_ret) - /* no inputs */); #else #error Unreachable #endif @@ -114,22 +110,12 @@ "\trdtsc \n" \ "\tshlq $32, %rdx \n" \ "\torq %rdx, %rax" ); - #elif PLATFORM_ARCH_IA64 - /* For completeness. */ - #define GASNETI_TICKS_NOW_BODY \ - GASNETI_ASM_SPECIAL( "mov.m r8=ar.itc;" ); #endif #elif PLATFORM_COMPILER_CRAY GASNETI_INLINE(gasneti_ticks_now) uint64_t gasneti_ticks_now (void) { return (uint64_t) _rtc(); } - #elif PLATFORM_COMPILER_INTEL && PLATFORM_ARCH_IA64 - #include - GASNETI_INLINE(gasneti_ticks_now) - uint64_t gasneti_ticks_now (void) { - return (uint64_t)__getReg(_IA64_REG_AR_ITC); - } #else #define GASNETI_USING_SLOW_TIMERS 1 #endif diff --git a/third-party/gasnet/gasnet-src/gasnet_tm.c b/third-party/gasnet/gasnet-src/gasnet_tm.c index f581b0c34b96..4d2aa16a4f16 100644 --- a/third-party/gasnet/gasnet-src/gasnet_tm.c +++ b/third-party/gasnet/gasnet-src/gasnet_tm.c @@ -48,20 +48,22 @@ gex_Rank_t gasneti_tm_rev_rank(gasneti_TM_t tm, gex_Rank_t jobrank) { } static size_t -get_scratch_size(gasneti_TM_t i_parent, gex_Rank_t new_tm_size, gex_Flags_t flags) +get_scratch_size(gex_Rank_t new_tm_size, gex_Flags_t flags) { - if (!new_tm_size) return 0; + // Specially defined cases + if (0 == new_tm_size) return 0; + if (1 == new_tm_size) return GASNETE_COLL_SCRATCH_SIZE_MIN; - static size_t minimum, recommended; + static size_t recommended; static int is_init = 0; if_pf (!is_init) { static gasneti_mutex_t lock = GASNETI_MUTEX_INITIALIZER; gasneti_mutex_lock(&lock); if (!is_init) { - minimum = gasneti_getenv_int_withdefault("GASNET_COLL_MIN_SCRATCH_SIZE", - GASNETE_COLL_MIN_SCRATCH_SIZE_DEFAULT,1); recommended = gasneti_getenv_int_withdefault("GASNET_COLL_SCRATCH_SIZE", GASNETE_COLL_SCRATCH_SIZE_DEFAULT,1); + // Silently raise to implementation-defined minimum + recommended = MAX(recommended, GASNETE_COLL_SCRATCH_SIZE_MIN); gasneti_sync_writes(); is_init = 1; } @@ -70,30 +72,29 @@ get_scratch_size(gasneti_TM_t i_parent, gex_Rank_t new_tm_size, gex_Flags_t flag gasneti_sync_reads(); } - // The current true minimum is one byte for every member in the new team. - // TODO-EX: is this really the value we want to advertise? - if (flags & GEX_FLAG_TM_SCRATCH_SIZE_MIN) { - return MAX(minimum, GASNETI_ALIGNUP(new_tm_size, GASNETI_CACHE_LINE_BYTES)); - } - - if (flags & GEX_FLAG_TM_SCRATCH_SIZE_RECOMMENDED) { - return MAX(minimum, recommended); - } - - gasneti_fatalerror("Invalid team scratch size query"); - return 0; + return recommended; } size_t gasneti_TM_Split(gex_TM_t *new_tm_p, gex_TM_t e_parent, int color, int key, void *addr, size_t len, gex_Flags_t flags GASNETI_THREAD_FARG) { - gasneti_TM_t i_parent = gasneti_import_tm(e_parent); + gasneti_TM_t i_parent = gasneti_import_tm_nonpair(e_parent); gasneti_EP_t ep = i_parent->_ep; - GASNETI_TRACE_PRINTF(W,("TM_Split: parent="GASNETI_TMSELFFMT" color=%d key=%d flags=%d", + GASNETI_TRACE_PRINTF(O,("gex_TM_Split: parent="GASNETI_TMSELFFMT" color=%d key=%d flags=%d", GASNETI_TMSELFSTR(e_parent), color, key, flags)); + static int did_warn = 0; + if ((flags & GEX_FLAG_TM_SCRATCH_SIZE_MIN) && !did_warn) { + if (! i_parent->_rank) { + gasneti_console_message("WARNING", + "gex_TM_Split() called using GEX_FLAG_TM_SCRATCH_SIZE_MIN, " + "deprecated since specification 0.11."); + } + did_warn = 1; // Some process did, even if it was not us. + } + #if GASNET_DEBUG if ((flags & GEX_FLAG_TM_SCRATCH_SIZE_MIN) && (flags & GEX_FLAG_TM_SCRATCH_SIZE_RECOMMENDED)) { @@ -102,26 +103,39 @@ size_t gasneti_TM_Split(gex_TM_t *new_tm_p, gex_TM_t e_parent, int color, int ke "GEX_FLAG_TM_SCRATCH_SIZE_RECOMMENDED both set in flags argument"); } #endif + if (flags & (GEX_FLAG_TM_SCRATCH_SIZE_MIN | GEX_FLAG_TM_SCRATCH_SIZE_RECOMMENDED)) { - // The MINIMUM scratch requirement scales as size of new team, not the parent. - // However, performing a collective to size the teams seems unnecessary. - // So, we are passing the size of the parent. - size_t result = new_tm_p ? get_scratch_size(i_parent, i_parent->_size, flags) : 0; - GASNETI_TRACE_PRINTF(W,("TM_Split: scratch size query result=%"PRIuSZ, result)); + // Don't know true size w/o comms, but singleton parent can only produce singleton children + size_t result = new_tm_p ? get_scratch_size(i_parent->_size, flags) : 0; + GASNETI_TRACE_PRINTF(O,("gex_TM_Split: scratch size query result=%"PRIuSZ, result)); return result; } + // Split's scratch address is GEX_FLAG_TM_LOCAL_SCRATCH by default, + // but GEX_FLAG_TM_NO_SCRATCH is also accepted. + // TODO: support GEX_FLAG_TM_SYMMETRIC_SCRATCH too + if (flags & GEX_FLAG_TM_GLOBAL_SCRATCH) { + gasneti_fatalerror("Invalid call to gex_TM_Split with GEX_FLAG_TM_GLOBAL_SCRATCH"); + } else if (flags & GEX_FLAG_TM_SYMMETRIC_SCRATCH) { + gasneti_fatalerror("Invalid call to gex_TM_Split with GEX_FLAG_TM_SYMMETRIC_SCRATCH"); + } else if (! (flags & GEX_FLAG_TM_NO_SCRATCH)) { + flags |= GEX_FLAG_TM_LOCAL_SCRATCH; + } + if (!new_tm_p) { color = -1; // tell gasnete_coll_team_split() not to create a team for this caller } else { gasneti_assert_int(color ,>=, 0); -#if !GASNET_SEGMENT_EVERYTHING - gasneti_assert(ep->_segment); - gasneti_assert_ptr(addr ,>=, ep->_segment->_addr); - gasneti_assert_ptr((uint8_t*)addr+len ,<=, ep->_segment->_ub); -#endif - gasneti_assert_uint(len ,>=, get_scratch_size(i_parent, i_parent->_size, - flags | GEX_FLAG_TM_SCRATCH_SIZE_MIN)); + if (! (flags & GEX_FLAG_TM_NO_SCRATCH)) { + #if !GASNET_SEGMENT_EVERYTHING + gasneti_assert(ep->_segment); + gasneti_assert_ptr(addr ,>=, ep->_segment->_addr); + gasneti_assert_ptr((uint8_t*)addr+len ,<=, ep->_segment->_ub); + #endif + if (!len) { + gasneti_fatalerror("Invalid call to gex_TM_Split with scratch_size = 0"); + } + } } gasnete_coll_team_t team = @@ -130,13 +144,12 @@ size_t gasneti_TM_Split(gex_TM_t *new_tm_p, gex_TM_t e_parent, int color, int ke if (team == NULL) { gasneti_assert(!new_tm_p); - GASNETI_TRACE_PRINTF(W,("TM_Split: parent="GASNETI_TMSELFFMT" [No team created]", + GASNETI_TRACE_PRINTF(O,("gex_TM_Split: parent="GASNETI_TMSELFFMT" [No team created]", GASNETI_TMSELFSTR(e_parent))); return 0; } - // TODO-EX: use of a conduit-specific hook is needed here - gasneti_TM_t i_tm = gasneti_alloc_tm(ep, team->myrank, team->total_ranks, flags, 0); + gasneti_TM_t i_tm = gasneti_alloc_tm(ep, team->myrank, team->total_ranks, flags); i_tm->_coll_team = team; gex_TM_t e_tm = gasneti_export_tm(i_tm); team->e_tm = e_tm; @@ -145,9 +158,9 @@ size_t gasneti_TM_Split(gex_TM_t *new_tm_p, gex_TM_t e_parent, int color, int ke i_tm->_rank_map = team->rel2act_map; i_tm->_index_map = NULL; // TODO-EX: provide this for teams w/ non-primordial EPs - GASNETI_TRACE_PRINTF(W,("TM_Split: parent="GASNETI_TMSELFFMT" result="GASNETI_TMSELFFMT, + GASNETI_TRACE_PRINTF(O,("gex_TM_Split: parent="GASNETI_TMSELFFMT" result="GASNETI_TMSELFFMT, GASNETI_TMSELFSTR(e_parent), GASNETI_TMSELFSTR(e_tm))); - GASNETI_STAT_EVENT(W, TEAM_NEW_SPLIT); + GASNETI_STAT_EVENT(O, TEAM_NEW_SPLIT); return 1; // return is documented as undefined } @@ -158,7 +171,6 @@ size_t gasneti_TM_Split(gex_TM_t *new_tm_p, gex_TM_t e_parent, int color, int ke // - (num_new_tms > 1) // - non-zero gex_ep_index // - caller's EP not in members[] -// + GEX_FLAG_TM_NO_SCRATCH (fails "down stream" due to bug 4090) // + GEX_FLAG_SCRATCH_SEG_OFFSET size_t gasneti_TM_Create( gex_TM_t *new_tms, @@ -172,7 +184,7 @@ size_t gasneti_TM_Create( GASNETI_THREAD_FARG) { size_t result = 0; - gasneti_TM_t i_parent = gasneti_import_tm(e_parent); + gasneti_TM_t i_parent = gasneti_import_tm_nonpair(e_parent); // NOTE: we can simplify things by observing that ranks in TM0 are always jobranks flags |= gasneti_is_tm0(i_parent) ? GEX_FLAG_RANK_IS_JOBRANK : 0; @@ -180,35 +192,49 @@ size_t gasneti_TM_Create( gasneti_EP_t ep = i_parent->_ep; int is_jobrank = (flags & GEX_FLAG_RANK_IS_JOBRANK); - GASNETI_TRACE_PRINTF(W,("TM_Create: parent="GASNETI_TMSELFFMT" num_new_tms=%"PRIuSZ" nmembers=%"PRIuSZ" scratch_size=%"PRIuSZ" flags=%d", + GASNETI_TRACE_PRINTF(O,("gex_TM_Create: parent="GASNETI_TMSELFFMT" num_new_tms=%"PRIuSZ" nmembers=%"PRIuSZ" scratch_size=%"PRIuSZ" flags=%d", GASNETI_TMSELFSTR(e_parent), num_new_tms, nmembers, scratch_size, flags)); - // For now 0 or 1 are the only valid numbers of outputs. - gasneti_assert(!nmembers || num_new_tms == 1); + static int did_warn = 0; + if ((flags & GEX_FLAG_TM_SCRATCH_SIZE_MIN) && !did_warn) { + if (! i_parent->_rank) { + gasneti_console_message("WARNING", + "gex_TM_Create() called using GEX_FLAG_TM_SCRATCH_SIZE_MIN, " + "deprecated since specification 0.11."); + } + did_warn = 1; // Some process did, even if it was not us. + } #if GASNET_DEBUG if ((flags & GEX_FLAG_TM_SCRATCH_SIZE_MIN) && (flags & GEX_FLAG_TM_SCRATCH_SIZE_RECOMMENDED)) { - gasneti_fatalerror("Call to gex_TM_Split() with mutually-exclusive " + gasneti_fatalerror("Call to gex_TM_Create() with mutually-exclusive " "GEX_FLAG_TM_SCRATCH_SIZE_MIN and " "GEX_FLAG_TM_SCRATCH_SIZE_RECOMMENDED both set in flags argument"); } #endif + + // For now 0 or 1 are the only valid numbers of outputs. + gasneti_assert(!nmembers || num_new_tms == 1); + if (flags & (GEX_FLAG_TM_SCRATCH_SIZE_MIN | GEX_FLAG_TM_SCRATCH_SIZE_RECOMMENDED)) { - size_t result = nmembers ? get_scratch_size(i_parent, nmembers, flags) : 0; - GASNETI_TRACE_PRINTF(W,("TM_Create: scratch size query result=%"PRIuSZ, result)); + size_t result = nmembers ? get_scratch_size(nmembers, flags) : 0; + GASNETI_TRACE_PRINTF(O,("gex_TM_Create: scratch size query result=%"PRIuSZ, result)); return result; } if (num_new_tms && nmembers) { - GASNETI_TRACE_PRINTF(D,("TM_Create: members[ %s ]", gasneti_format_eploc(members, nmembers))); + if (!scratch_size && !(flags & GEX_FLAG_TM_NO_SCRATCH)) { + gasneti_fatalerror("Invalid call to gex_TM_Create with scratch_size = 0"); + } + GASNETI_TRACE_PRINTF(D,("gex_TM_Create: members[ %s ]", gasneti_format_eploc(members, nmembers))); } // TODO-EX: remove when subteam collectives no longer require a parent-scope entry barrier gasnete_coll_consensus_barrier(i_parent->_coll_team GASNETI_THREAD_PASS); if (! nmembers) { - GASNETI_TRACE_PRINTF(W,("TM_Create: parent="GASNETI_TMSELFFMT" [No team created]", + GASNETI_TRACE_PRINTF(O,("gex_TM_Create: parent="GASNETI_TMSELFFMT" [No team created]", GASNETI_TMSELFSTR(e_parent))); goto done; } @@ -245,8 +271,7 @@ size_t gasneti_TM_Create( scratch_size, scratch_addrs, flags GASNETI_THREAD_PASS); - // TODO-EX: use of a conduit-specific hook is needed here - gasneti_TM_t i_tm = gasneti_alloc_tm(ep, my_new_rank, nmembers, flags, 0); + gasneti_TM_t i_tm = gasneti_alloc_tm(ep, my_new_rank, nmembers, flags); i_tm->_coll_team = team; gex_TM_t e_tm = gasneti_export_tm(i_tm); team->e_tm = e_tm; @@ -256,9 +281,9 @@ size_t gasneti_TM_Create( i_tm->_index_map = NULL; // TODO-EX: provide this for teams w/ non-primordial EPs // TODO-EX: outut only correct for num_new_tms==1 - GASNETI_TRACE_PRINTF(W,("TM_Create: parent="GASNETI_TMSELFFMT" rank=%d size=%d result="GASNETI_TMSELFFMT, + GASNETI_TRACE_PRINTF(O,("gex_TM_Create: parent="GASNETI_TMSELFFMT" rank=%d size=%d result="GASNETI_TMSELFFMT, GASNETI_TMSELFSTR(e_parent), my_new_rank, (int)nmembers, GASNETI_TMSELFSTR(e_tm))); - GASNETI_STAT_EVENT(W, TEAM_NEW_CREATE); + GASNETI_STAT_EVENT(O, TEAM_NEW_CREATE); result = 1; // return is documented as undefined @@ -272,13 +297,13 @@ int gasneti_TM_Destroy( gex_Flags_t flags GASNETI_THREAD_FARG) { - gasneti_TM_t i_tm = gasneti_import_tm(e_tm); + gasneti_TM_t i_tm = gasneti_import_tm_nonpair(e_tm); gasnete_coll_team_t team = i_tm->_coll_team; - GASNETI_TRACE_PRINTF(W,("TM_Destroy: team="GASNETI_TMSELFFMT" flags=%d", + GASNETI_TRACE_PRINTF(O,("gex_TM_Destroy: team="GASNETI_TMSELFFMT" flags=%d", GASNETI_TMSELFSTR(e_tm), flags)); if (1) { // TODO: w/ multi-EP exactly one tm per proc should log this event - GASNETI_STAT_EVENT(W, TEAM_DESTROY); + GASNETI_STAT_EVENT(O, TEAM_DESTROY); } if (gasneti_is_tm0(i_tm)) { @@ -292,14 +317,29 @@ int gasneti_TM_Destroy( return gasnete_coll_team_free(team, scratch_p); } +/* ------------------------------------------------------------------------------------ */ +// Simple blocking Exchange utility function +extern void gasneti_blockingExchange(gex_TM_t tm, void *src, size_t len, void *dst) +{ + // TODO-EX: use gex_Coll_Exchange() once available + const int coll_flags = GASNET_COLL_LOCAL | GASNET_COLL_IN_MYSYNC | GASNET_COLL_OUT_MYSYNC; + gasnet_coll_gather_all(gasneti_import_tm_nonpair(tm)->_coll_team, dst, src, len, coll_flags); +} + /* ------------------------------------------------------------------------------------ */ /* TM trace formatting - legal even without STATS/TRACE */ // Format a gex_TM_t as a GUID extern const char *gasneti_formattm(gex_TM_t e_tm) { - if ((uintptr_t)e_tm == 1) return "N/A"; // GASNet-1 collectives team - if (e_tm == NULL) return "JOB"; // JobRank, as with token - gasnete_coll_team_t team = gasneti_import_tm(e_tm)->_coll_team; - if (team == NULL) return "TM0"; // Team0 before end of Client_Init - return gasneti_dynsprintf("TM%x", (unsigned int)team->team_id); + if (e_tm == NULL) return "JOB"; // JobRank, as with token and legacy collectives + if (gasneti_e_tm_is_pair(e_tm)) { + gasneti_TM_Pair_t pair = gasneti_import_tm_pair(e_tm); + gex_EP_Index_t loc_idx = gasneti_tm_pair_loc_idx(pair); + gex_EP_Index_t rem_idx = gasneti_tm_pair_rem_idx(pair); + return gasneti_dynsprintf("TM_PAIR(%x,%x)", loc_idx, rem_idx); + } else { + gasnete_coll_team_t team = gasneti_import_tm(e_tm)->_coll_team; + if (team == NULL) return "TM0"; // Team0 before end of Client_Init + return gasneti_dynsprintf("TM%x", (unsigned int)team->team_id); + } } diff --git a/third-party/gasnet/gasnet-src/gasnet_toolhelp.h b/third-party/gasnet/gasnet-src/gasnet_toolhelp.h index ef979a5c29d9..803379eaf16c 100644 --- a/third-party/gasnet/gasnet-src/gasnet_toolhelp.h +++ b/third-party/gasnet/gasnet-src/gasnet_toolhelp.h @@ -357,7 +357,7 @@ const char *gasnett_signame_fromval(int _sigval); extern void gasneti_registerExitHandler(void (*_exitfn)(int)); GASNETI_COLD -extern int gasneti_raise(int sig); // portability wrapper around POSIX raise(3) +extern int gasneti_raise(int _sig); // portability wrapper around POSIX raise(3) extern int gasneti_blocksig(int _sig); extern int gasneti_unblocksig(int _sig); diff --git a/third-party/gasnet/gasnet-src/gasnet_tools.c b/third-party/gasnet/gasnet-src/gasnet_tools.c index 9f10d31600f4..47867b4c47a4 100644 --- a/third-party/gasnet/gasnet-src/gasnet_tools.c +++ b/third-party/gasnet/gasnet-src/gasnet_tools.c @@ -398,8 +398,11 @@ GASNETI_IDENT(gasnett_IdentString_SystemName, GASNETI_IDENT(gasnett_IdentString_CompilerID, "$GASNetCompilerID: " PLATFORM_COMPILER_IDSTR " $"); -GASNETI_IDENT(gasnett_IdentString_GitHash, - "$GASNetGitHash: gex-2020.10.0 $"); +#ifndef GASNETI_GIT_HASH + #define GASNETI_GIT_HASH no-version-control-info +#endif +GASNETI_IDENT(gasnett_IdentString_GitHash, + "$GASNetGitHash: " _STRINGIFY(GASNETI_GIT_HASH) " $"); int GASNETT_LINKCONFIG_IDIOTCHECK(_CONCAT(RELEASE_MAJOR_,GASNET_RELEASE_VERSION_MAJOR)) = 1; int GASNETT_LINKCONFIG_IDIOTCHECK(_CONCAT(RELEASE_MINOR_,GASNET_RELEASE_VERSION_MINOR)) = 1; @@ -3527,7 +3530,7 @@ retry_calibration:; return mid; } -#if GASNETI_CALIBRATE_TSC /* x86, x86-64, MIC and ia64 */ +#if GASNETI_CALIBRATE_TSC /* x86, x86-64 and MIC */ extern double gasneti_calibrate_tsc_from_kernel(void) { double Tick = 0.0; /* Inverse GHz */ @@ -3552,23 +3555,6 @@ extern double gasneti_calibrate_tsc_from_kernel(void) { gasneti_assert_int(MHz ,>, 1); gasneti_assert_int(MHz ,<, 100000); Tick = 1000. / MHz; - #elif PLATFORM_ARCH_IA64 /* && ( PLATFORM_OS_LINUX || PLATFORM_OS_CNL ) */ - FILE *fp = fopen("/proc/cpuinfo","r"); - char input[255]; - if (!fp) gasneti_fatalerror("Failure in fopen('/proc/cpuinfo','r')=%s",strerror(errno)); - while (!feof(fp) && fgets(input, sizeof(input), fp)) { - if (strstr(input,"itc MHz")) { - char *p = strchr(input,':'); - double MHz = 0.0; - if (p) MHz = atof(p+1); - // ensure it looks reasonable - gasneti_assert_dbl(MHz ,>, 1); - gasneti_assert_dbl(MHz ,<, 100000); - Tick = 1000. / MHz; - break; - } - } - fclose(fp); #else /* (X86 || X86_64 || MIC) && (Linux || CNL || WSL) */ FILE *fp = NULL; char input[512]; /* 256 is too small for "flags" line in /proc/cpuino */ diff --git a/third-party/gasnet/gasnet-src/gasnet_tools.h b/third-party/gasnet/gasnet-src/gasnet_tools.h index 7b8c49c71427..8fc0fad91880 100644 --- a/third-party/gasnet/gasnet-src/gasnet_tools.h +++ b/third-party/gasnet/gasnet-src/gasnet_tools.h @@ -157,6 +157,17 @@ GASNETI_BEGIN_NOWARN #define gasnett_unreachable gasneti_unreachable +/* ------------------------------------------------------------------------------------ */ +/* discard macro aguments w/ compiler-specific warning supression */ +#define GASNETT_UNUSED_ARGS1 GASNETI_UNUSED_ARGS1 +#define GASNETT_UNUSED_ARGS2 GASNETI_UNUSED_ARGS2 +#define GASNETT_UNUSED_ARGS3 GASNETI_UNUSED_ARGS3 +#define GASNETT_UNUSED_ARGS4 GASNETI_UNUSED_ARGS4 +#define GASNETT_UNUSED_ARGS5 GASNETI_UNUSED_ARGS5 +#define GASNETT_UNUSED_ARGS6 GASNETI_UNUSED_ARGS6 +#define GASNETT_UNUSED_ARGS7 GASNETI_UNUSED_ARGS7 +#define GASNETT_UNUSED_ARGS8 GASNETI_UNUSED_ARGS8 + /* ------------------------------------------------------------------------------------ */ /* portable memory barriers */ @@ -535,10 +546,21 @@ static void _gasnett_trace_printf_noop(const char *_format, ...)) { (gasnett_stats_callback = (callbackfn), GASNETI_STATS_ENABLED(H)) #define GASNETT_STATS_GETMASK() GASNETI_STATS_GETMASK() #define GASNETT_STATS_SETMASK(mask) GASNETI_STATS_SETMASK(mask) + #define GASNETT_STATS_DUMP(reset) gasneti_stats_dump(reset) + extern void gasneti_stats_dump(int _reset); + GASNETI_FORMAT_PRINTF(_gasnett_stats_printf,1,2, + extern void _gasnett_stats_printf(const char *format, ...)); + GASNETI_FORMAT_PRINTF(_gasnett_stats_printf_force,1,2, + extern void _gasnett_stats_printf_force(const char *format, ...)); + #define GASNETT_STATS_PRINTF _gasnett_stats_printf + #define GASNETT_STATS_PRINTF_FORCE _gasnett_stats_printf_force #else #define GASNETT_STATS_INIT(callbackfn) 0 #define GASNETT_STATS_GETMASK() "" #define GASNETT_STATS_SETMASK(mask) ((void)0) + #define GASNETT_STATS_DUMP(reset) ((void)0) + #define GASNETT_STATS_PRINTF _gasnett_trace_printf_noop + #define GASNETT_STATS_PRINTF_FORCE _gasnett_trace_printf_noop #endif /* ------------------------------------------------------------------------------------ */ diff --git a/third-party/gasnet/gasnet-src/gasnet_trace.c b/third-party/gasnet/gasnet-src/gasnet_trace.c index e5b40e1559b8..89c418d50360 100644 --- a/third-party/gasnet/gasnet-src/gasnet_trace.c +++ b/third-party/gasnet/gasnet-src/gasnet_trace.c @@ -33,6 +33,11 @@ #endif gasneti_mutex_t gasneti_tracelock = GASNETI_MUTEX_INITIALIZER; +#if GASNET_STATS +static gasneti_mutex_t gasneti_statlock = GASNETI_MUTEX_INITIALIZER; +#define GASNETI_STAT_LOCK() gasneti_mutex_lock(&gasneti_statlock); +#define GASNETI_STAT_UNLOCK() gasneti_mutex_unlock(&gasneti_statlock); +#endif #define GASNETI_MAX_MASKBITS 256 char gasneti_tracetypes[GASNETI_MAX_MASKBITS]; char gasneti_tracetypes_all[GASNETI_MAX_MASKBITS]; @@ -47,49 +52,63 @@ FILE *gasneti_statsfile = NULL; static gasneti_tick_t starttime; #endif +/* ------------------------------------------------------------------------------------ */ +// Tools tracing interface support + #if GASNET_STATS void (*gasnett_stats_callback)( GASNETI_FORMAT_PRINTF_FUNCPTR_ARG(format,1,2,void (*format)(const char *, ...)) ) = NULL; #endif - static int _gasnett_trace_enabled_body(char tracecat) { return GASNETI_TRACE_ENABLED(tracecat); } int (*_gasnett_trace_enabled)(char tracecat) = &_gasnett_trace_enabled_body; -#if GASNET_TRACE - #define TMPBUFSZ 1024 - #define _GASNETT_TRACE_PRINTF_DOIT(cat) do { \ +#ifndef TMPBUFSZ +#define TMPBUFSZ 1024 +#endif +#define _GASNETT_TS_PRINTF_DOIT(TS,cat) do { \ char output[TMPBUFSZ]; \ - if (GASNETI_TRACE_ENABLED(cat)) { /* skip some varargs overhead */ \ + if (TS##_ENABLED(cat)) { /* skip some varargs overhead */ \ va_list argptr; \ va_start(argptr, format); /* pass in last argument */ \ { int sz = vsnprintf(output, TMPBUFSZ, format, argptr); \ if (sz >= (TMPBUFSZ-5) || sz < 0) strcpy(output+(TMPBUFSZ-5),"..."); \ } \ va_end(argptr); \ - GASNETI_TRACE_MSG(cat, output); \ + TS##_MSG(cat, output); \ } \ } while (0) +#if GASNET_TRACE GASNETI_FORMAT_PRINTF(_gasnett_trace_printf_body,1,2, static void _gasnett_trace_printf_body(const char *format, ...)) { - _GASNETT_TRACE_PRINTF_DOIT(H); + _GASNETT_TS_PRINTF_DOIT(GASNETI_TRACE,H); } GASNETI_FORMAT_PRINTF(_gasnett_trace_printf_force_body,1,2, static void _gasnett_trace_printf_force_body(const char *format, ...)) { - _GASNETT_TRACE_PRINTF_DOIT(U); + _GASNETT_TS_PRINTF_DOIT(GASNETI_TRACE,U); } GASNETT_FORMAT_PRINTF_FUNCPTR(_gasnett_trace_printf,1,2, void (*_gasnett_trace_printf)(const char *format, ...)) = _gasnett_trace_printf_body; GASNETT_FORMAT_PRINTF_FUNCPTR(_gasnett_trace_printf_force,1,2, void (*_gasnett_trace_printf_force)(const char *format, ...)) = _gasnett_trace_printf_force_body; +#endif - #undef _GASNETT_TRACE_PRINTF_DOIT - #undef TMPBUFSZ +#if GASNET_STATS + GASNETI_FORMAT_PRINTF(_gasnett_stats_printf,1,2, + extern void _gasnett_stats_printf(const char *format, ...)) { + _GASNETT_TS_PRINTF_DOIT(GASNETI_STATS,H); + } + GASNETI_FORMAT_PRINTF(_gasnett_stats_printf_force,1,2, + extern void _gasnett_stats_printf_force(const char *format, ...)) { + _GASNETT_TS_PRINTF_DOIT(GASNETI_STATS,U); + } #endif +#undef _GASNETT_TRACE_PRINTF_DOIT +#undef TMPBUFSZ /* ------------------------------------------------------------------------------------ */ /* VIS trace formatting - these are legal even without STATS/TRACE */ @@ -345,6 +364,21 @@ size_t gasneti_format_ti(char *buf, gex_TI_t ti) { return gasneti_format_mask(buf,ti,sizeof(names)/sizeof(char *),names,"GEX_TI_"); } +/* ------------------------------------------------------------------------------------ */ +/* Magic number trace formatting - legal even without STATS/TRACE */ + +void gasneti_format_magic(char *buf, uint64_t magic) { + char *p = buf + snprintf(buf, GASNETI_MAX_MAGICSZ, "0x%016" PRIx64 "(", magic); + gasneti_magic_t m; m._u = magic; + for (int i = 0; i < sizeof(uint64_t); ++i) { + unsigned char c = m._c[i]; + *(p++) = isprint((int)c) ? c : '?'; + } + *(p++) = ')'; + *(p++) = '\0'; + gasneti_assert_uint(strlen(buf) ,==, GASNETI_MAX_MAGICSZ-1); +} + /* ------------------------------------------------------------------------------------ */ /* line number control */ @@ -539,6 +573,11 @@ size_t gasneti_format_ti(char *buf, gex_TI_t ti) { if (gasneti_autoflush) fflush(fp); \ } while (0) + static void gasneti_tracestats_forceflush() { + if (gasneti_statsfile) fflush(gasneti_statsfile); + if (gasneti_tracefile) fflush(gasneti_tracefile); + } + /* private helper for gasneti_trace/stats_output */ static void gasneti_file_output(FILE *fp, double time, const char *type, const char *msg, int traceheader) { gasneti_mutex_assertlocked(&gasneti_tracelock); @@ -586,7 +625,7 @@ size_t gasneti_format_ti(char *buf, gex_TI_t ti) { if (gasneti_statsfile) gasneti_file_output(gasneti_statsfile, time, type, msg, traceheader); #if GASNETI_STATS_ECHOED_TO_TRACEFILE - if (gasneti_tracefile) /* stat output also goes to trace */ + if (gasneti_tracefile && gasneti_tracefile != gasneti_statsfile) /* stat output also goes to trace */ gasneti_file_output(gasneti_tracefile, time, type, msg, traceheader); #endif gasneti_mutex_unlock(&gasneti_tracelock); @@ -598,7 +637,7 @@ size_t gasneti_format_ti(char *buf, gex_TI_t ti) { gasneti_mutex_lock(&gasneti_tracelock); if (gasneti_statsfile) gasneti_file_output(gasneti_statsfile, time, type, msg, traceheader); - if (gasneti_tracefile) + if (gasneti_tracefile && gasneti_tracefile != gasneti_statsfile) gasneti_file_output(gasneti_tracefile, time, type, msg, traceheader); gasneti_mutex_unlock(&gasneti_tracelock); } @@ -640,7 +679,7 @@ size_t gasneti_format_ti(char *buf, gex_TI_t ti) { va_end(argptr); } #if GASNETI_STATS_ECHOED_TO_TRACEFILE - if (gasneti_tracefile) { /* stat output also goes to trace */ + if (gasneti_tracefile && gasneti_tracefile != gasneti_statsfile) { /* stat output also goes to trace */ va_start(argptr, format); /* pass in last argument */ gasneti_file_vprintf(gasneti_tracefile, format, argptr); va_end(argptr); @@ -658,7 +697,7 @@ size_t gasneti_format_ti(char *buf, gex_TI_t ti) { gasneti_file_vprintf(gasneti_statsfile, format, argptr); va_end(argptr); } - if (gasneti_tracefile) { + if (gasneti_tracefile && gasneti_tracefile != gasneti_statsfile) { va_start(argptr, format); /* pass in last argument */ gasneti_file_vprintf(gasneti_tracefile, format, argptr); va_end(argptr); @@ -701,14 +740,15 @@ extern FILE *gasneti_open_outputfile(const char *filename, const char *desc) { #endif } if (!fp) { - fprintf(stderr, "ERROR: Failed to open '%s' for %s output (%s). Redirecting output to stderr.\n", + gasneti_console_message("ERROR", + "Failed to open '%s' for %s output (%s). Redirecting output to stderr.\n", filename, desc, strerror(errno)); filename = "stderr"; fp = stderr; } } - fprintf(stderr, "GASNet reporting enabled - %s output directed to %s\n", - desc, filename); + gasneti_console_message("GASNet reporting enabled", + "%s output directed to %s\n", desc, filename); return fp; } @@ -1234,14 +1274,9 @@ extern void gasneti_trace_init(int *pargc, char ***pargv) { #endif #if GASNET_NDEBUG - { const char *NDEBUG_warning = - "WARNING: tracing/statistical collection may adversely affect application performance."; - gasneti_tracestats_printf("%s",NDEBUG_warning); - if (gasneti_tracefile != stdout && gasneti_tracefile != stderr && - gasneti_statsfile != stdout && gasneti_statsfile != stderr) { - fputs(NDEBUG_warning,stderr); - fputs("\n",stderr); - } + { const char *NDEBUG_warning = "tracing/statistical collection may adversely affect application performance."; + gasneti_tracestats_printf("WARNING: %s", NDEBUG_warning); + if (!gasneti_mynode) gasneti_console_message("WARNING", NDEBUG_warning); } #endif @@ -1252,22 +1287,16 @@ extern void gasneti_trace_init(int *pargc, char ***pargv) { gasneti_tracestats_printf("Timer granularity: ~ %.3f us, overhead: ~ %.3f us", gasneti_tick_granularity(), gasneti_tick_overhead()); - fflush(NULL); + gasneti_tracestats_forceflush(); #endif /* GASNETI_STATS_OR_TRACE */ #if GASNET_DEBUGMALLOC #if GASNET_NDEBUG - { const char *NDEBUG_warning = - "WARNING: debugging malloc may adversely affect application performance."; + { const char *NDEBUG_warning = "debugging malloc may adversely affect application performance."; #if GASNETI_STATS_OR_TRACE - gasneti_tracestats_printf(NDEBUG_warning); - if (gasneti_tracefile != stdout && gasneti_tracefile != stderr && - gasneti_statsfile != stdout && gasneti_statsfile != stderr) + gasneti_tracestats_printf("WARNING: %s", NDEBUG_warning); #endif - { - fputs(NDEBUG_warning,stderr); - fputs("\n",stderr); - } + if (!gasneti_mynode) gasneti_console_message("WARNING", NDEBUG_warning); } #endif gasneti_mallocreport_filename = gasneti_getenv_withdefault("GASNET_MALLOCFILE",""); @@ -1276,27 +1305,163 @@ extern void gasneti_trace_init(int *pargc, char ***pargv) { #endif } -#if GASNETI_STATS_OR_TRACE -#define AGGRNAME(cat,type) gasneti_aggregate_##cat##_##type -#define AGGR(type) \ - static gasneti_statctr_t AGGRNAME(ctr,type) = 0; \ - static gasneti_stat_intval_t AGGRNAME(intval,type) = \ - { 0, GASNETI_STATCTR_MAX, GASNETI_STATCTR_MIN, 0 }; \ - static gasneti_stat_timeval_t AGGRNAME(timeval,type) = \ - { 0, GASNETI_TICK_MAX, GASNETI_TICK_MIN, 0 } -AGGR(G); -AGGR(P); -AGGR(S); -AGGR(R); -AGGR(W); -AGGR(X); -AGGR(B); -AGGR(L); -AGGR(A); -AGGR(I); -AGGR(C); -AGGR(D); + +/* output statistical summary to statsfile, optionally resetting counters */ +extern void gasneti_stats_dump(int reset) { +#if GASNET_STATS + if (!gasneti_statsfile + #if GASNETI_STATS_ECHOED_TO_TRACEFILE + && !gasneti_tracefile + #endif + ) return; // output is disabled on this process + + static gasneti_mutex_t stats_dump_lock = GASNETI_MUTEX_INITIALIZER; + gasneti_mutex_lock(&stats_dump_lock); + + /* reenable all statistics that have ever been enabled, for the final aggregation dump */ + char statstypes_tmp[GASNETI_MAX_MASKBITS]; // save current mask + memcpy(statstypes_tmp, gasneti_statstypes, GASNETI_MAX_MASKBITS); + memcpy(gasneti_statstypes, gasneti_statstypes_all, GASNETI_MAX_MASKBITS); + + gasneti_tracestats_output("U","Generating statistical summary",1); + + if (gasnett_stats_callback && GASNETI_STATS_ENABLED(H)) { + gasneti_stats_printf("--------------------------------------------------------------------------------"); + (*gasnett_stats_callback)(gasneti_stats_printf); + } + + gasneti_stats_printf("--------------------------------------------------------------------------------"); + gasneti_stats_printf("GASNet Statistical Summary:"); + + const gasneti_statctr_t clear_ctr = 0; + const gasneti_stat_intval_t clear_intval = { 0, GASNETI_STATCTR_MAX, GASNETI_STATCTR_MIN, 0 }; + const gasneti_stat_timeval_t clear_timeval = { 0, GASNETI_TICK_MAX, GASNETI_TICK_MIN, 0 }; + + // initialize some stat accumulators + #define AGGRNAME(cat,type) aggregate_##cat##_##type + #define AGGR(type) \ + gasneti_statctr_t AGGRNAME(ctr,type) = clear_ctr; \ + gasneti_stat_intval_t AGGRNAME(intval,type) = clear_intval; \ + gasneti_stat_timeval_t AGGRNAME(timeval,type) = clear_timeval; + AGGR(G); + AGGR(P); + AGGR(S); + AGGR(R); + AGGR(W); + AGGR(X); + AGGR(B); + AGGR(L); + AGGR(A); + AGGR(I); + AGGR(C); + AGGR(D); + + #define ACCUM(pacc, pintval) do { \ + pacc->_count += pintval->_count; \ + if (pintval->_minval < pacc->_minval) pacc->_minval = pintval->_minval; \ + if (pintval->_maxval > pacc->_maxval) pacc->_maxval = pintval->_maxval; \ + pacc->_sumval += pintval->_sumval; \ + } while (0) + #define CALC_AVG(sum,count) ((count) == 0 ? (double)-1 : (double)(sum) / (double)(count)) + #define DUMP_CTR(type,name,desc) \ + if (GASNETI_STATS_ENABLED(type)) { \ + gasneti_statctr_t *p = &gasneti_stat_ctr_##name; \ + gasneti_stats_printf(" %-25s %6"PRIu64, \ + #name" "#desc":", *p); \ + AGGRNAME(ctr,type) += *p; \ + if (reset) *p = clear_ctr; \ + } + #define DUMP_INTVAL(type,name,desc) \ + if (GASNETI_STATS_ENABLED(type)) { \ + gasneti_stat_intval_t *p = &gasneti_stat_intval_##name; \ + const char *pdesc = #desc; \ + if (!p->_count) \ + gasneti_stats_printf(" %-25s %6i", #name":", 0); \ + else \ + gasneti_stats_printf(" %-25s %6"PRIu64" avg/min/max/total" \ + " %s = %.3f/%"PRIu64"/%"PRIu64"/%"PRIu64, \ + #name":", p->_count, pdesc, \ + CALC_AVG(p->_sumval,p->_count), \ + p->_minval, p->_maxval, p->_sumval); \ + ACCUM((&AGGRNAME(intval,type)), p); \ + if (reset) *p = clear_intval; \ + } + #define DUMP_TIMEVAL(type,name,desc) \ + if (GASNETI_STATS_ENABLED(type)) { \ + gasneti_stat_timeval_t *p = &gasneti_stat_timeval_##name; \ + const char *pdesc = #desc; \ + if (!p->_count) \ + gasneti_stats_printf(" %-25s %6i", #name":", 0); \ + else \ + gasneti_stats_printf(" %-25s %6"PRIu64" avg/min/max/total" \ + " %s (us) = %.3f/%.3f/%.3f/%.3f", \ + #name":", p->_count, pdesc, \ + gasneti_ticks_to_ns(CALC_AVG(p->_sumval, p->_count))/1000.0, \ + gasneti_ticks_to_ns(p->_minval)/1000.0, \ + gasneti_ticks_to_ns(p->_maxval)/1000.0, \ + gasneti_ticks_to_ns(p->_sumval)/1000.0); \ + ACCUM((&AGGRNAME(timeval,type)), p); \ + if (reset) *p = clear_timeval; \ + } + + GASNETI_STAT_LOCK(); + GASNETI_ALL_STATS(DUMP_CTR, DUMP_INTVAL, DUMP_TIMEVAL); + GASNETI_STAT_UNLOCK(); + + // output aggregated values in accumulators + gasneti_stats_printf(" "); + gasneti_stats_printf(" "); + #define DUMP_AGGR_SZ(type,name) do { \ + if (GASNETI_STATS_ENABLED(type)) { \ + gasneti_stat_intval_t *p = &AGGRNAME(intval,type); \ + if (!p->_count) \ + gasneti_stats_printf("%-25s %6i","Total "#name":",0); \ + else \ + gasneti_stats_printf("%-25s %6"PRIu64" avg/min/max/total" \ + " sz = %.3f/%"PRIu64"/%"PRIu64"/%"PRIu64, \ + "Total "#name":", \ + p->_count, CALC_AVG(p->_sumval,p->_count), \ + p->_minval, p->_maxval, p->_sumval); \ + } \ + } while (0) + DUMP_AGGR_SZ(G,gets); + DUMP_AGGR_SZ(P,puts); + DUMP_AGGR_SZ(W,collectives); + if (GASNETI_STATS_ENABLED(S)) { + gasneti_stat_intval_t *try_succ = &AGGRNAME(intval,S); + gasneti_stat_timeval_t *wait_time = &AGGRNAME(timeval,S); + if (!try_succ->_count) + gasneti_stats_printf("%-25s %6i","Total try sync. calls:",0); + else + gasneti_stats_printf("%-25s %6"PRIu64" try success rate = %.3f%% \n", + "Total try sync. calls:", try_succ->_count, + CALC_AVG(try_succ->_sumval, try_succ->_count) * 100.0); + if (!wait_time->_count) + gasneti_stats_printf("%-25s %6i","Total wait sync. calls:",0); + else + gasneti_stats_printf("%-25s %6"PRIu64" avg/min/max/total waittime (us) = %.3f/%.3f/%.3f/%.3f", + "Total wait sync. calls:", wait_time->_count, + gasneti_ticks_to_ns(CALC_AVG(wait_time->_sumval, wait_time->_count))/1000.0, + gasneti_ticks_to_ns(wait_time->_minval)/1000.0, + gasneti_ticks_to_ns(wait_time->_maxval)/1000.0, + gasneti_ticks_to_ns(wait_time->_sumval)/1000.0); + } + if (GASNETI_STATS_ENABLED(A)) + gasneti_stats_printf("%-25s %6"PRIu64, "Total AM's:", AGGRNAME(ctr,A)); + + gasneti_stats_printf("--------------------------------------------------------------------------------"); + + GASNETC_STATS_DUMP(reset); /* allow for dump of conduit-core specific statistics */ + GASNETE_STATS_DUMP(reset); /* allow for dump of conduit-extended specific statistics */ + + if (reset) gasneti_tracestats_output("U","Stats have been RESET at client request.",1); + + gasneti_tracestats_forceflush(); + + memcpy(gasneti_statstypes, statstypes_tmp, GASNETI_MAX_MASKBITS); // restore + gasneti_mutex_unlock(&stats_dump_lock); #endif +} extern void gasneti_trace_finish(void) { #if GASNETI_STATS_OR_TRACE @@ -1306,116 +1471,9 @@ extern void gasneti_trace_finish(void) { double time = gasneti_ticks_to_ns(gasneti_ticks_now() - starttime) / 1.0E9; gasneti_tracestats_printf("Total application run time: %10.6fs", time); + gasneti_tracestats_forceflush(); - fflush(NULL); - #if GASNET_STATS - { /* output statistical summary */ - - /* reenable all statistics that have ever been enabled, for the final aggregation dump */ - memcpy(gasneti_statstypes, gasneti_statstypes_all, GASNETI_MAX_MASKBITS); - - if (gasnett_stats_callback && GASNETI_STATS_ENABLED(H)) { - gasneti_stats_printf("--------------------------------------------------------------------------------"); - (*gasnett_stats_callback)(gasneti_stats_printf); - } - - gasneti_stats_printf("--------------------------------------------------------------------------------"); - gasneti_stats_printf("GASNet Statistical Summary:"); - - #define ACCUM(pacc, pintval) do { \ - pacc->_count += pintval->_count; \ - if (pintval->_minval < pacc->_minval) pacc->_minval = pintval->_minval; \ - if (pintval->_maxval > pacc->_maxval) pacc->_maxval = pintval->_maxval; \ - pacc->_sumval += pintval->_sumval; \ - } while (0) - #define CALC_AVG(sum,count) ((count) == 0 ? (double)-1 : (double)(sum) / (double)(count)) - #define DUMP_CTR(type,name,desc) \ - if (GASNETI_STATS_ENABLED(type)) { \ - gasneti_statctr_t *p = &gasneti_stat_ctr_##name; \ - gasneti_stats_printf(" %-25s %6"PRIu64, \ - #name" "#desc":", *p); \ - AGGRNAME(ctr,type) += *p; \ - } - #define DUMP_INTVAL(type,name,desc) \ - if (GASNETI_STATS_ENABLED(type)) { \ - gasneti_stat_intval_t *p = &gasneti_stat_intval_##name; \ - const char *pdesc = #desc; \ - if (!p->_count) \ - gasneti_stats_printf(" %-25s %6i", #name":", 0); \ - else \ - gasneti_stats_printf(" %-25s %6"PRIu64" avg/min/max/total" \ - " %s = %.3f/%"PRIu64"/%"PRIu64"/%"PRIu64, \ - #name":", p->_count, pdesc, \ - CALC_AVG(p->_sumval,p->_count), \ - p->_minval, p->_maxval, p->_sumval); \ - ACCUM((&AGGRNAME(intval,type)), p); \ - } - #define DUMP_TIMEVAL(type,name,desc) \ - if (GASNETI_STATS_ENABLED(type)) { \ - gasneti_stat_timeval_t *p = &gasneti_stat_timeval_##name; \ - const char *pdesc = #desc; \ - if (!p->_count) \ - gasneti_stats_printf(" %-25s %6i", #name":", 0); \ - else \ - gasneti_stats_printf(" %-25s %6"PRIu64" avg/min/max/total" \ - " %s (us) = %.3f/%.3f/%.3f/%.3f", \ - #name":", p->_count, pdesc, \ - gasneti_ticks_to_ns(CALC_AVG(p->_sumval, p->_count))/1000.0, \ - gasneti_ticks_to_ns(p->_minval)/1000.0, \ - gasneti_ticks_to_ns(p->_maxval)/1000.0, \ - gasneti_ticks_to_ns(p->_sumval)/1000.0); \ - ACCUM((&AGGRNAME(timeval,type)), p); \ - } - - GASNETI_ALL_STATS(DUMP_CTR, DUMP_INTVAL, DUMP_TIMEVAL); - - gasneti_stats_printf(" "); - gasneti_stats_printf(" "); - #define DUMP_AGGR_SZ(type,name) do { \ - if (GASNETI_STATS_ENABLED(type)) { \ - gasneti_stat_intval_t *p = &AGGRNAME(intval,type); \ - if (!p->_count) \ - gasneti_stats_printf("%-25s %6i","Total "#name":",0); \ - else \ - gasneti_stats_printf("%-25s %6"PRIu64" avg/min/max/total" \ - " sz = %.3f/%"PRIu64"/%"PRIu64"/%"PRIu64, \ - "Total "#name":", \ - p->_count, CALC_AVG(p->_sumval,p->_count), \ - p->_minval, p->_maxval, p->_sumval); \ - } \ - } while (0) - DUMP_AGGR_SZ(G,gets); - DUMP_AGGR_SZ(P,puts); - DUMP_AGGR_SZ(W,collectives); - if (GASNETI_STATS_ENABLED(S)) { - gasneti_stat_intval_t *try_succ = &AGGRNAME(intval,S); - gasneti_stat_timeval_t *wait_time = &AGGRNAME(timeval,S); - if (!try_succ->_count) - gasneti_stats_printf("%-25s %6i","Total try sync. calls:",0); - else - gasneti_stats_printf("%-25s %6"PRIu64" try success rate = %.3f%% \n", - "Total try sync. calls:", try_succ->_count, - CALC_AVG(try_succ->_sumval, try_succ->_count) * 100.0); - if (!wait_time->_count) - gasneti_stats_printf("%-25s %6i","Total wait sync. calls:",0); - else - gasneti_stats_printf("%-25s %6"PRIu64" avg/min/max/total waittime (us) = %.3f/%.3f/%.3f/%.3f", - "Total wait sync. calls:", wait_time->_count, - gasneti_ticks_to_ns(CALC_AVG(wait_time->_sumval, wait_time->_count))/1000.0, - gasneti_ticks_to_ns(wait_time->_minval)/1000.0, - gasneti_ticks_to_ns(wait_time->_maxval)/1000.0, - gasneti_ticks_to_ns(wait_time->_sumval)/1000.0); - } - if (GASNETI_STATS_ENABLED(A)) - gasneti_stats_printf("%-25s %6"PRIu64, "Total AM's:", AGGRNAME(ctr,A)); - - gasneti_stats_printf("--------------------------------------------------------------------------------"); - } - #endif - - GASNETC_TRACE_FINISH(); /* allow for final output of conduit-core specific statistics */ - GASNETE_TRACE_FINISH(); /* allow for final output of conduit-extended specific statistics */ - fflush(NULL); + gasneti_stats_dump(0); gasneti_mutex_lock(&gasneti_tracelock); if (gasneti_tracefile && gasneti_tracefile != stdout && gasneti_tracefile != stderr) @@ -1452,9 +1510,6 @@ extern void gasneti_trace_finish(void) { of statistical collection by using inlined functions that increment weak atomics or thread-private counters that are combined at shutdown. */ -static gasneti_mutex_t gasneti_statlock = GASNETI_MUTEX_INITIALIZER; -#define GASNETI_STAT_LOCK() gasneti_mutex_lock(&gasneti_statlock); -#define GASNETI_STAT_UNLOCK() gasneti_mutex_unlock(&gasneti_statlock); extern void gasneti_stat_count_accumulate(gasneti_statctr_t *pctr) { GASNETI_STAT_LOCK(); diff --git a/third-party/gasnet/gasnet-src/gasnet_trace.h b/third-party/gasnet/gasnet-src/gasnet_trace.h index 24304c2b26be..e361cd26ecdf 100644 --- a/third-party/gasnet/gasnet-src/gasnet_trace.h +++ b/third-party/gasnet/gasnet-src/gasnet_trace.h @@ -86,13 +86,13 @@ #define GASNETI_STATS_PRINTF(type, args) ((void)0) #endif -/* allow for final output of conduit-core specific statistics */ -#ifndef GASNETC_TRACE_FINISH -#define GASNETC_TRACE_FINISH() ((void)0) +/* allow for dump of conduit-core specific statistics */ +#ifndef GASNETC_STATS_DUMP +#define GASNETC_STATS_DUMP(reset) ((void)0) #endif -/* allow for final output of conduit-extended specific statistics */ -#ifndef GASNETE_TRACE_FINISH -#define GASNETE_TRACE_FINISH() ((void)0) +/* allow for dump of conduit-extended specific statistics */ +#ifndef GASNETE_STATS_DUMP +#define GASNETE_STATS_DUMP(reset) ((void)0) #endif #ifndef GASNETI_STATS_ECHOED_TO_TRACEFILE @@ -657,7 +657,7 @@ extern void gasneti_trace_finish(void); extern FILE *gasneti_open_outputfile(const char *_filename, const char *_desc); /* defines all the types */ -#define GASNETI_ALLTYPES "GPRSWXBLAICDNH" +#define GASNETI_ALLTYPES "GPRSWXBLAIOCDNH" /* GASNETI_ALL_STATS lists all the statistics values we gather, @@ -838,6 +838,10 @@ extern size_t gasneti_format_dt(char *_buf, gex_DT_t _dt); extern size_t gasneti_format_op(char *_buf, gex_OP_t _op); extern size_t gasneti_format_ti(char *_buf, gex_TI_t _ti); +// Magic number trace formatting - available even without STATS/TRACE +#define GASNETI_MAX_MAGICSZ 29 // "0x" + 16 hex digits + "(" + 8 chars + ")\0" +extern void gasneti_format_magic(char *_buf, uint64_t _magic); + GASNETI_FORMAT_PRINTF(gasneti_dynsprintf,1,2, extern char *gasneti_dynsprintf(const char *_format,...)); diff --git a/third-party/gasnet/gasnet-src/gasnetex.h b/third-party/gasnet/gasnet-src/gasnetex.h index 57ee72cd0d00..a70a7a2366e3 100644 --- a/third-party/gasnet/gasnet-src/gasnetex.h +++ b/third-party/gasnet/gasnet-src/gasnetex.h @@ -250,7 +250,33 @@ GASNETI_BEGIN_NOWARN #ifndef GASNET_MAXEPS // an integer representing the max supported number of endpoints per process - #define GASNET_MAXEPS (1 << 12) + // should be kept _STRINGIFY()-friendly (e.g. `4095` not `((1<<12)-1)`) + + // Defaults and sanity checks + #define GASNETI_MAXEPS_LIMIT 4096 // Maximum due to 12-bit field in TM-pair + #ifndef GASNETC_MAXEPS_MAX + #define GASNETC_MAXEPS_MAX GASNETI_MAXEPS_LIMIT + #elif (GASNETC_MAXEPS_MAX > GASNETI_MAXEPS_LIMIT) + #error GASNETC_MAXEPS_MAX exceeds GASNETI_MAXEPS_LIMIT + #endif + #if (GASNETC_MAXEPS_DFLT > GASNETI_MAXEPS_LIMIT) + #error GASNETC_MAXEPS_DFLT exceeds GASNETI_MAXEPS_LIMIT + #endif + + #if !defined(GASNETC_MAXEPS_DFLT) + // Conduit lacks multi-ep support + #define GASNET_MAXEPS 1 + #elif !defined(GASNETI_MAXEPS_CONFIGURE) + // No configure-time value provided - use conduit-specific default + #define GASNET_MAXEPS GASNETC_MAXEPS_DFLT + #else + // Take MIN of user's --with-maxeps setting and the maximum + #if (GASNETI_MAXEPS_CONFIGURE <= GASNETC_MAXEPS_MAX) + #define GASNET_MAXEPS GASNETI_MAXEPS_CONFIGURE + #else + #define GASNET_MAXEPS GASNETC_MAXEPS_MAX + #endif + #endif #endif #if !defined(GASNET_ALIGNED_SEGMENTS) || \ @@ -319,14 +345,21 @@ typedef struct gasneti_team_member_s *gex_TM_t; struct gasneti_client_s; typedef struct gasneti_client_s *gex_Client_t; +#define GEX_CLIENT_INVALID ((gex_Client_t)(uintptr_t)0) struct gasneti_endpoint_s; typedef struct gasneti_endpoint_s *gex_EP_t; +#define GEX_EP_INVALID ((gex_EP_t)(uintptr_t)0) struct gasneti_segment_s; typedef struct gasneti_segment_s *gex_Segment_t; #define GEX_SEGMENT_INVALID ((gex_Segment_t)(uintptr_t)0) +struct gasneti_memkind_s; +typedef struct gasneti_memkind_s *gex_MK_t; +#define GEX_MK_INVALID ((gex_MK_t)(uintptr_t)0) +#define GEX_MK_HOST ((gex_MK_t)(uintptr_t)1) + typedef void (*gex_AM_Fn_t)(); /* struct type used to perform handler registration */ @@ -352,15 +385,17 @@ typedef struct { const void * _cdata; \ gex_Flags_t _flags; -// Needed to break client/tm0 cycle +// Needed to break client/tm0 and client/ep_tbl cycles struct gasneti_team_member_internal_s; +struct gasneti_endpoint_internal_s; #ifndef _GEX_CLIENT_T #define GASNETI_CLIENT_COMMON \ GASNETI_OBJECT_HEADER \ struct gasneti_team_member_internal_s *_tm0; \ const char * _name; \ - gasneti_weakatomic32_t _next_ep_index; + gasneti_weakatomic32_t _next_ep_index; \ + struct gasneti_endpoint_internal_s *_ep_tbl[GASNET_MAXEPS]; typedef struct { GASNETI_CLIENT_COMMON } *gasneti_Client_t; #if GASNET_DEBUG extern gasneti_Client_t gasneti_import_client(gex_Client_t _client); @@ -381,7 +416,10 @@ struct gasneti_team_member_internal_s; gasneti_Client_t _client; \ void * _addr; \ void * _ub; \ - uintptr_t _size; + uintptr_t _size; \ + gex_MK_t _kind; \ + void * _opaque_mk_use; \ + unsigned int _opaque_container_use; typedef struct { GASNETI_SEGMENT_COMMON } *gasneti_Segment_t; #if GASNET_DEBUG extern gasneti_Segment_t gasneti_import_segment(gex_Segment_t _segment); @@ -403,9 +441,14 @@ struct gasneti_team_member_internal_s; GASNETI_OBJECT_HEADER \ gasneti_Client_t _client; \ gasneti_Segment_t _segment; \ + gex_EP_Capabilities_t _caps, _orig_caps; \ gex_Rank_t _index; \ - gex_AM_Entry_t _amtbl[GASNETC_MAX_NUMHANDLERS]; - typedef struct { GASNETI_EP_COMMON } *gasneti_EP_t; + gex_AM_Entry_t _amtbl[GASNETC_MAX_NUMHANDLERS]; \ + gasneti_mutex_t _amtbl_lock; + #ifdef __cplusplus // ensure this struct is anonymous to prevent C++ linkage issues + #define gasneti_endpoint_internal_s + #endif + typedef struct gasneti_endpoint_internal_s { GASNETI_EP_COMMON } *gasneti_EP_t; #if GASNET_DEBUG extern gasneti_EP_t gasneti_import_ep(gex_EP_t _ep); extern gex_EP_t gasneti_export_ep(gasneti_EP_t _real_ep); @@ -436,18 +479,20 @@ struct gasneti_team_member_internal_s; typedef struct gasneti_team_member_internal_s { GASNETI_TM_COMMON } *gasneti_TM_t; #if GASNET_DEBUG extern gasneti_TM_t gasneti_import_tm(gex_TM_t _tm); + extern gasneti_TM_t gasneti_import_tm_nonpair(gex_TM_t _tm); extern gex_TM_t gasneti_export_tm(gasneti_TM_t _real_tm); #else #define gasneti_import_tm(x) ((gasneti_TM_t)(x)) + #define gasneti_import_tm_nonpair(x) ((gasneti_TM_t)(x)) #define gasneti_export_tm(x) ((gex_TM_t)(x)) #endif - #define gex_TM_SetCData(tm,val) ((void)(gasneti_import_tm(tm)->_cdata = (val))) - #define gex_TM_QueryCData(tm) ((void*)gasneti_import_tm(tm)->_cdata) - #define gex_TM_QueryClient(tm) gasneti_export_client(gasneti_import_tm(tm)->_ep->_client) - #define gex_TM_QueryEP(tm) gasneti_export_ep(gasneti_import_tm(tm)->_ep) - #define gex_TM_QueryFlags(tm) ((gex_Flags_t)gasneti_import_tm(tm)->_flags) - #define gex_TM_QueryRank(tm) ((gex_Rank_t)gasneti_import_tm(tm)->_rank) - #define gex_TM_QuerySize(tm) ((gex_Rank_t)gasneti_import_tm(tm)->_size) + #define gex_TM_SetCData(tm,val) ((void)(gasneti_import_tm_nonpair(tm)->_cdata = (val))) + #define gex_TM_QueryCData(tm) ((void*)gasneti_import_tm_nonpair(tm)->_cdata) + #define gex_TM_QueryClient(tm) gasneti_export_client(gasneti_import_tm_nonpair(tm)->_ep->_client) + #define gex_TM_QueryEP(tm) gasneti_export_ep(gasneti_import_tm_nonpair(tm)->_ep) + #define gex_TM_QueryFlags(tm) ((gex_Flags_t)gasneti_import_tm_nonpair(tm)->_flags) + #define gex_TM_QueryRank(tm) ((gex_Rank_t)gasneti_import_tm_nonpair(tm)->_rank) + #define gex_TM_QuerySize(tm) ((gex_Rank_t)gasneti_import_tm_nonpair(tm)->_size) #endif // TODO-EX: remove these legacy checks @@ -473,6 +518,45 @@ struct gasneti_team_member_internal_s; #error "out-of-date #define of _GASNET_HANDLERENTRY_T" #endif +// TM-pair encoding macros +// Packed bits as [rem:loc:reserved:tag] = [12:12:7:1] +// Note: the 7 reserved bits will eventually be needed for client index +// Note: this fits in 32 bits, but LP64 could have wider (or better aligned?) fields +// Alternatively, gex_TM_t might be uint64_t to provide wide fields even on ILP32 +#define GASNETI_TM_PAIR_TAG_WIDTH 1 +#define GASNETI_TM_PAIR_RSV_WIDTH 7 +#define GASNETI_TM_PAIR_IDX_WIDTH 12 +#define GASNETI_TM_PAIR_IDX_MASK ((1< By default, each ibv-conduit process in a GASNet job will open at most one Host Channel Adapter (HCA). To allow a process to utilize more - than one HCA, specify '--enable-ibv-multirail' at configure time, - which will enable use of up to two HCAs. To use more than two, - additionally specify '--with-ibv-max-hcas=N' at configure time (where + than one HCA, specify '--with-ibv-max-hcas=N' at configure time (where 'N' is the number of HCAs to support per process). - + Alternatively, specifying '--enable-ibv-multirail' is equivalent to + '--with-ibv-max-hcas=2' unless an explicit '--with-ibv-max-hcas=N' + option provides a different value. Passing '--disable-ibv-multirail' + overrides any explicit '--with-ibv-max-hcas=N' options. + + Note that multirail support includes provisions for correctness which + can be relevant if using mutiple HCAs per *host*, even if using only + a single HCA in each process. So, '--with-ibv-max-hcas=1' is distinct + from '--disable-ibv-multirail'. + Enabling multirail support (using '--with-ibv-max-hcas=1' if appropriate) + is strongly recommended if one might ever use multiple HCAs per host. + See 'GASNET_USE_FENCED_PUTS' in the Runtime Configuration section and + "Bug 3447" in the Known Problems section for more information regarding + the correctness issues. + + The use of specific HCA ports is controlled at run time by the environment + variable GASNET_IBV_PORTS, described below. The default value of this + variable can be set at configure time using '--with-ibv-ports=...'. + When using dynamic connections (see GASNET_CONNECT_DYNAMIC env var, below) there is an extra thread spawned to block for the arrival of connection requests. If needed, this can be disabled at configure @@ -245,7 +261,66 @@ Paul H. Hargrove how to enumerate available HCAs and the status of their ports. In most IBV distributions the 'ibv_devinfo' utility is also available to list the HCAs and the status of their ports. - The default is no filter. + The default can be set at configure time using '--with-ibv-ports=...', + and is empty (no filter) in the absence of that configure option. + See also 'GASNET_IBV_PORTS_*', immediately below. + + + GASNET_IBV_PORTS_* + The environment variable 'GASNET_IBV_PORTS', described immediately above, + provides only a single setting and unless one uses some external means to + give per-process settings this cannot provide per-process control. This + can make it difficult to get the best performance from multi-rail systems + with multiple processes per node and architectural locality properties that + affect PCI/adapter access efficiency. + However, if 'hwloc' is detected at configure time, then it is possible to + give ibv-conduit values for 'GASNET_IBV_PORTS' which will vary per-process + based on cpu-binding and machine topology information as follows. + 1. The variable 'GASNET_IBV_PORTS_TYPE' names an object type using hwloc's + terminology, with the default being "Socket" (aka "Package"). + If the value is "None" (case-insensitive) then the logic below is + disabled and the value of 'GASNET_IBV_PORTS' is used by all processes. + 2. GASNet queries the set of objects of the given type which intersect the + process's cpuset, to construct a variable name 'GASNET_IBV_PORTS_[suff]' + where '[suff]' is an underscore-delimited ordered list of logical + object ids. For example, with the default object type, a process bound + only to cores in the first socket would have a variable name of + 'GASNET_IBV_PORTS_0'. Meanwhile, if the cpuset spans sockets 0 and 1 + (such as for an unbound process on a 2-socket system) then the variable + 'GASNET_IBV_PORTS_0_1' is used. + 3. If the environment variable determined in step 2 is set, then it is + used. Otherwise the un-suffixed 'GASNET_IBV_PORTS' is used. + As a concrete example, on OLCF's Summit there are four HCAs in software, + which represent connections from two I/O buses (one per socket) to two + distinct InfiniBand rails. Use of the further I/O bus introduces a latency + penalty, but achieving peak aggregate bandwidth requires the job to split + traffic over both I/O buses and both rails. + For most applications, we have observed the best latency and aggregate per- + node bandwidth is achieved using a single HCA connected to the local + socket's I/O bus. For an unbound process (or one bound to cores in both + sockets) the performance suffers relative to the bound case, but the best + average-case is achieved using two HCAs chosen to span both buses and both + network rails. This yields the following recommendation as a good + default for most applications running on this system: + GASNET_IBV_PORTS='mlx5_0+mlx5_3' # Spanning both sockets (e.g. unbound) + GASNET_IBV_PORTS_0='mlx5_0' # Bound to socket0 + GASNET_IBV_PORTS_1='mlx5_3' # Bound to socket1 + For an application which needs to maximize bandwidth of communication + to/from processes in a single socket at a time, one must allow process to + make use of both I/O buses and network rails (at the cost of increased + latency and potentially reduced aggregate per-node bandwidth). This can be + accomplished using two HCAs per processes as follows: + GASNET_IBV_PORTS='mlx5_0+mlx5_3' + GASNET_IBV_PORTS_1='mlx5_1+mlx5_2' + This example illustrates the use of un-suffixed 'GASNET_IBV_PORTS' as a + default when lacking a more specific setting. In particular, unbound + processes and those bound to socket 0 will both use 'mlx5_0+mlx5_3' + while processes bound to socket 1 will use 'mlx5_1+mlx5_2'. + These specific recommendations are appropriate to the specific composition + of a node of OLCF's Summit, and should not be considered as generic advice + for use of all multi-HCA systems. + Of course, even on the same system, your mileage may vary. + By default 'GASNET_IBV_PORTS_TYPE' is "Socket" and all other variables in + this family are unset. + GASNET_IBV_LIST_PORTS The value is a boolean: "0" to disable or "1" to enable the reporting of @@ -545,30 +620,37 @@ Paul H. Hargrove up to an hardware- and firmware-dependent maximum size. A value of 0 disables use of inline sends. + A value of -1 causes use of the maximum value reported by the HCA. The default of 72 is normally correct. - For ibv-conduit the default of -1 causes use of the maximum value - reported by the HCA. + GASNET_PACKEDLONG_LIMIT - To perform an AMLong or AMLongAsync with non-empty payload, + To perform an AMLong with non-empty payload, ibv-conduit must transfer both the payload and the header. For sufficiently small payloads, it is more efficient (in terms of both CPU overhead and network latency) to pack the header and payload together and copy the payload into place on the target before running the handler. Thus, for payload up to and including this size this packing is used. - The default value is the maximum that fits into a 4KB buffer together - with the maximum sized header (currently 4012). + The default value is the maximum that, together with the maximum sized + header, fits into a 4KiB transfer (currently 4012). A value of zero ensures the payload and header always travel separately. + GASNET_NONBULKPUT_BOUNCE_LIMIT - To perform a non-bulk PUT with nbytes > GASNET_INLINESEND_LIMIT or to - transfer the payload of an AMLong (but not AMLongAsync) with nbytes > - MAX(GASNET_INLINESEND_LIMIT, GASNET_PACKEDLONG_LIMIT), ibv-conduit must - either copy the data into bounce buffers, or block until remote - completion is signaled by the HCA. Such transfers up to and including - size GASNET_NONBULKPUT_BOUNCE_LIMIT are performed using bounce buffers - while larger transfers are transferred using blocking PUTs. + This parameter sets the limit on the use of bounce buffers to achieve + local completion of "non-bulk" PUT and AMLong payload transfers. When + passing GEX_EVENT_NOW to perform a PUT or AMLong, the implementation must + block until local completion. For PUTs with nbytes larger than + GASNET_INLINESEND_LIMIT, and for AMLongs with nbytes larger than both + GASNET_INLINESEND_LIMIT and GASNET_PACKEDLONG_LIMIT, ibv-conduit must + either copy the data into bounce buffers, or block until remote completion + is signaled by the HCA. Such transfers up to and including size + GASNET_NONBULKPUT_BOUNCE_LIMIT are performed using bounce buffers while + larger transfers stall return from injection until the RMA is acknowledged. + Currently, this parameter also controls the use of bounce buffers versus + non-blocking RMA for initiation of all AMLong payload transfers and + for PUTs not passing GEX_EVENT_DEFER for the lc_opt parameter. However, + cases other than GEX_EVENT_NOW are not the intended scope of this + parameter, and this behavior is subject to change. The default value is 64KB. A value of zero disables use of bounce buffers. @@ -663,7 +745,8 @@ Paul H. Hargrove correct remote completion detection in the presence of multiple HCAs. See "Bug 3447" in the Known Problems section for information on when one may wish to enable this setting. - If enabled when using only a single HCA a warning will be issued. + If enabled when multirail support was not enabled at configure time, a + warning will be issued. This setting defaults to 0 (disabled). Resource usage parameters: @@ -786,15 +869,30 @@ Paul H. Hargrove If you believe that throughput of these operations is too small, you may try increasing this value (or setting it to zero), at a cost in additional pinned memory. + + + GASNET_PINNED_REGIONS_MAX + This provides a limit on the number of pinned regions to be created. + Similar to GASNET_PHYSMEM_MAX, the value gives an upper bound on pinned + regions per host, which is divided equally among processes on each host. + This may constrain dynamic registration via firehose (below). + The value may specify either a relative or absolute size. If the value + parses as a floating-point value less than 1.0 (including fractions such as + "5/8"), then this is taken as a fraction of the maximum supported region + count reported by the HCA(s). Otherwise the value is taken as an absolute + region count. + The default is to use a fraction of the HCA pinning resources equal to + the fraction of physical memory given by GASNET_PHYSMEM_MAX, subject to + a system-dependent maximum value. Firehose configuration: ---------------------- These parameters must be equal across all nodes, and the behavior otherwise is undefined. - The following environment variables control the resources used by the - "firehose" [ref 1] dynamic registration library. By default firehose - will use as much pinned memory as the HCA and O/S will permit. + The following environment variables control the per-process resources used by + the "firehose" [ref 1] dynamic registration library. By default, firehose + will use as much pinned memory as the HCA and O/S will permit, bounded by + GASNET_PHYSMEM_MAX. Resource use is divided into two pools. The main pool is for managing of pinning of the GASNet segment on remote nodes, while the "victim" @@ -804,7 +902,7 @@ Paul H. Hargrove pool. In a GASNET_SEGMENT_FAST configuration, firehose is not needed for management of the statically pinned GASNet segment, and by default only a small fraction of the available memory is placed in the main - pool and the majority is placed in the victim pool. + pool for internal uses and the majority is placed in the victim pool. + GASNET_USE_FIREHOSE This environment variable is only available in a DEBUG build of @@ -830,40 +928,65 @@ Paul H. Hargrove required. Thus it is an error to disable firehose in such a configuration. - + GASNET_FIREHOSE_M - This gives the amount of memory to place in the main pool. The - suffixes "K", "M" and "G" are interpreted as Kilobytes, Megabytes - and Gigabytes respectively, with "M" assumed if no suffix is given. - When GASNET_FIREHOSE_MAXVICTIM_M is set, the default is the maximum - pinnable memory minus GASNET_FIREHOSE_MAXVICTIM_M. Otherwise the - default is 75% of the maximum pinnable memory (in a GASNET_SEGMENT_LARGE - or GASNET_SEGMENT_EVERYTHING configuration), or the size of the - prepinned bounce buffer pool (in a GASNET_SEGMENT_FAST configuration). - - + GASNET_FIREHOSE_MAXVICTIM_M - This gives the amount of memory to place in the victim (local) pool. - The suffixes "K", "M" and "G" are interpreted as Kilobytes, Megabytes - and Gigabytes respectively, with "M" assumed if no suffix is given. - The default is the maximum pinnable memory minus GASNET_FIREHOSE_M. + + GASNET_FIREHOSE_M and GASNET_FIREHOSE_MAXVICTIM_M + GASNET_FIREHOSE_M gives the amount of memory to place in the main pool, + while GASNET_FIREHOSE_MAXVICTIM_M gives the amount of memory to place in + the victim (local) pool. The suffixes "K", "M" and "G" are interpreted as + Kilobytes, Megabytes and Gigabytes respectively, with "M" assumed if no + suffix is given. + When neither variable is set, the defaults are respectively 75% and 25% of + the total pool. In a GASNET_SEGMENT_LARGE or GASNET_SEGMENT_EVERYTHING + configuration, this pool's size is the maximum pinnable memory (but see + below), while in a GASNET_SEGMENT_FAST configuration it is the same size as + the prepinned bounce buffer pool. Note that, as used here, "maximum + pinnable memory" may be less than determined from GASNET_PHYSMEM_MAX, and + in particular may be constrained by the product of the number of pinnable + regions and their maximum size. See, GASNET_FIREHOSE_MAXREGION_SIZE, + GASNET_FIREHOSE_R and GASNET_FIREHOSE_MAXVICTIM_R for more information. + If only one of these variables is set, then the other defaults such that + their sum equals the total pool size. Therefore, to enlarge or reduce the + total pool, one must set both. Since enlarging the total pool risks + exhausting resources, potentially leading to crashes at runtime, doing so + will result in a warning. + + + GASNET_FIREHOSE_R and GASNET_FIREHOSE_MAXVICTIM_R + GASNET_FIREHOSE_R gives the maximum number of pinned regions to allocate + for the management of the main pool, while GASNET_FIREHOSE_MAXVICTIM_R + gives the maximum number of pinned regions to allocate for the management + of the victim (local) pool. + When neither variable is set, the default is to split the available pool of + pinnable regions (see GASNET_PINNED_REGIONS_MAX) in proportion to the + values of GASNET_FIREHOSE_M and GASNET_FIREHOSE_MAXVICTIM_M. + If only one of these variables is set, then the other defaults such that + their sum equals the total pool size. Therefore, to enlarge or reduce the + total pool, one must set both. Since enlarging the total pool risks + exhausting resources, potentially leading to crashes at runtime, doing so + will result in a warning. + The value of GASNET_FIREHOSE_R will be silently truncated if larger than + (GASNET_FIREHOSE_M / GASNET_FIREHOSE_MAXREGION_SIZE), since additional + regions would not be used. Similarly, GASNET_FIREHOSE_MAXVICTIM_R will be + silently reduced if it would address more than GASNET_FIREHOSE_MAXVICTIM_M. + GASNET_FIREHOSE_MAXREGION_SIZE This gives the maximum size of a single dynamically pinned region, should be a multiple of the pagesize, and preferably a power of two. The suffixes "K", "M" and "G" are interpreted as Kilobytes, Megabytes and Gigabytes respectively, with "M" assumed if no suffix is given. - The current default is 128k. Larger values have been known to trigger - a performance anomaly in some HCAs. - - + GASNET_FIREHOSE_R - This gives the number of pinned regions to allocate for the management - of the main pool. Values will be truncated if larger than the - default of (GASNET_FIREHOSE_M / GASNET_FIREHOSE_MAXREGION_SIZE). - - + GASNET_FIREHOSE_MAXVICTIM_R - This gives the number of pinned regions to allocate for the management - of the victim (local) pool. Values will be truncated if larger than - the default of (GASNET_FIREHOSE_MAXVICTIM_M / - GASNET_FIREHOSE_MAXREGION_SIZE). + The maximum addressable size of the main and victim pools are limited by + the product of this region size and the number of firehose regions + allocated to each pool. + If the value of this parameter is set to 0, then it will be automatically + adjusted to allow the main and victim pools to be addressed within the + available number of regions, if doing so is possible subject to a + system-dependent maximum (pagesize squared, or larger). + The default value of this parameter is 128KB. + + + GASNET_FIREHOSE_TABLE_SCALE + This parameter gives a floating point factor, used to scale the size of + hash tables used in the firehose library relative to their default sizes. + Smaller values producer smaller tables, saving memory at the expense of + performance. + The default value is 1. + GASNET_FIREHOSE_VERBOSE This gives a boolean: "0" to disable or "1" to enable the output of @@ -952,44 +1075,62 @@ Paul H. Hargrove + Negotiated-payload Active Messages (NPAM) - TL;DR: The results below can be summarized in the following rules-of-thumb - for use of NPAM Medium with ibv-conduit in the current release: + TL;DR: The results below, from one particular system, can be summarized in + the following rules-of-thumb for use of NPAM with ibv-conduit in the + *current* release: + Use of client-provided buffer is never advantageous. - + Use of gasnet-provided buffer may be advantageous for payloads of - 512 bytes or larger. - - Calls to gex_AM_Prepare{Request,Reply}Medium() with client_buf != NULL - are known as "client-provided buffer" calls. In this mode of operation, - there is a small penalty in CPU overhead relative to the fixed-payload - AM (FPAM) calls gex_AM_{Request,Reply}Medium(), due primarily to the - split-phase calling convention. While the design of NPAM allows for the - possibility that NPAM with client-provided buffer could enable larger - payloads than FPAM, ibv-conduit currently does not provide that - capability. - - Measurements with client-provided buffer NPAM on OLCF's Summit show the - latency penalty relative to FPAM in a Request/Reply "ping-pong" test is - around 2% for payload sizes below a couple hundred bytes, and 1% or - lower for payloads of 512 bytes or larger. Throughput of a "flood" test - shows penalties of about 8% at the smallest payload sizes, declining - smoothly to 5% at the largest payloads. - Of course, your mileage may vary. - - Calls to gex_AM_Prepare{Request,Reply}Medium() with client_buf == NULL - are known as "gasnet-provided buffer" calls. In this mode of operation, - in code which assembles/generates the payload at AM injection time, - there is a measurable advantage to NPAM for sufficiently large payloads, - and a small penalty for small payloads. + + Use of gasnet-provided buffer may be advantageous for Medium with + sufficiently large payloads, where both latency and bandwidth can exceed those + of FPAM. + + Use of gasnet-provided buffer may be advantageous for Long payloads of + sufficient length, where bandwidth is better than FPAM (but at the + expense of worse latency). + Note that as development continues, these finding are subject to change. + + Calls to gex_AM_Prepare{Request,Reply}{Medium,Long}() with client_buf != + NULL are known as "client-provided buffer" calls. In this mode of + operation, there is a small penalty in CPU overhead relative to the + fixed-payload AM (FPAM) calls gex_AM_{Request,Reply}{Medium,Long}(), due + primarily to the split-phase calling convention. While the design of NPAM + allows for the possibility that NPAM with client-provided buffer could + enable larger Medium payloads than FPAM, ibv-conduit currently does not + provide that capability. + + Measurements of both AM Mediums and Long with client-provided buffer NPAM + on OLCF's Summit show the latency penalty relative to FPAM in a + Request/Reply "ping-pong" test is around 2% for payload sizes below a + couple hundred bytes, and 1% or lower for payloads of 512 bytes or larger. + For AM Longs the penalty eventually approaches zero for payloads of about + 512KiB or larger. + + Throughput of a "flood" test with client-provided buffer NPAM shows + penalties of about 8% at the smallest payload sizes, declining smoothly to + 5% at the largest Medium payloads and approaching zero for Long payloads of + about 512KiB or larger. + + Calls to gex_AM_Prepare{Request,Reply}{Medium,Long}() with client_buf == + NULL are known as "gasnet-provided buffer" calls. In this mode of + operation, in which GASNet allocates a buffer where client code + assembles/generates the payload at AM injection time, there is a measurable + advantage to NPAM for sufficiently large payloads, but a small penalty for + small payloads. Measurements with gasnet-provided buffer NPAM on OLCF's Summit show the latency penalty in a Request/Reply "ping-pong" test is around 2% for - payload sizes below a couple hundred bytes, but that latency is improved - over FPAM for payloads of 512 bytes or larger (by about 9% for large - payloads). Throughput of a "flood" test shows similar behavior with a - throughput penalty of up to 5% for payload sizes 512 bytes and below, - but improvements in throughput above 512 bytes (by about 17% for large - payloads). - Of course, your mileage may vary. + payload sizes below a couple hundred bytes for both Medium and Long. For + Mediums of 512 bytes or larger, the latency is improved over FPAM (by about + 9% for large payloads). For Longs, the large payload latency is worse + than for small payloads. + + Throughput of a "flood" test with Mediums shows similar behavior to + ping-pong, with a throughput penalty of up to 5% for payload sizes 512 + bytes and below, but improvements in throughput above 512 bytes (by about + 17% for large payloads). For Longs there is a throughput penalty of up to + 5% for payload sizes below about 2KiB, but for large payloads a latency + improvement of 40% or more can be seen. + + Your mileage may vary. + Relative performance may change in future releases. @ Section: Known Problems @ @@ -1247,6 +1388,10 @@ Paul H. Hargrove @ Section: Extended API @ +[This section is still *mostly* accurate, but has not been kept up-to-date +with respect to EX updates. Most notably: "LongAsync" is gone, "_bulk" has +been supplanted by lc_opt options, and all the function names have changed.] + Notes for myself for extended API: + The send completion facility consists of two pointers to counters, diff --git a/third-party/gasnet/gasnet-src/ibv-conduit/conduit.mak.in b/third-party/gasnet/gasnet-src/ibv-conduit/conduit.mak.in index b29ffec4ad07..2eef79d16432 100644 --- a/third-party/gasnet/gasnet-src/ibv-conduit/conduit.mak.in +++ b/third-party/gasnet/gasnet-src/ibv-conduit/conduit.mak.in @@ -22,8 +22,8 @@ CONDUIT_INCLUDES = -I@TOP_SRCDIR@/other/firehose ###NOINSTALL### -CONDUIT_LDFLAGS = @IBV_LDFLAGS@ @PMI_SPAWNER_LDFLAGS@ -CONDUIT_LIBS = @IBV_LIBS@ $(MPI_COMPAT_LIBS) $(SSH_LIBS) @PMI_SPAWNER_LIBS@ +CONDUIT_LDFLAGS = @IBV_LDFLAGS@ @PMI_SPAWNER_LDFLAGS@ @HWLOC_LDFLAGS@ @CUDA_UVA_LDFLAGS@ +CONDUIT_LIBS = @IBV_LIBS@ $(MPI_COMPAT_LIBS) $(SSH_LIBS) @PMI_SPAWNER_LIBS@ @HWLOC_LIBS@ @CUDA_UVA_LIBS@ # If ibv-conduit has internal conduit threads, then it needs # threading flags and libs - even in GASNET_SEQ mode diff --git a/third-party/gasnet/gasnet-src/ibv-conduit/contrib/Makefile.in b/third-party/gasnet/gasnet-src/ibv-conduit/contrib/Makefile.in index 7e2af4782640..ceb693393558 100644 --- a/third-party/gasnet/gasnet-src/ibv-conduit/contrib/Makefile.in +++ b/third-party/gasnet/gasnet-src/ibv-conduit/contrib/Makefile.in @@ -185,6 +185,10 @@ CONFIGURE_ARGS = @CONFIGURE_ARGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CROSS_COMPILING = @CROSS_COMPILING@ +CUDA_UVA_CFLAGS = @CUDA_UVA_CFLAGS@ +CUDA_UVA_LDFLAGS = @CUDA_UVA_LDFLAGS@ +CUDA_UVA_LIBS = @CUDA_UVA_LIBS@ +CUDA_guess_prog = @CUDA_guess_prog@ CXX = @CXX@ CXXCPP = @CXXCPP@ CXXDEPMODE = @CXXDEPMODE@ @@ -210,6 +214,8 @@ ENVCMD = @ENVCMD@ EXEEXT = @EXEEXT@ EXESUFFIX = @EXESUFFIX@ FH_CFLAGS = @FH_CFLAGS@ +GASNETI_HWLOC_BIND_PATH = @GASNETI_HWLOC_BIND_PATH@ +GASNETI_HWLOC_CALC_PATH = @GASNETI_HWLOC_CALC_PATH@ GASNETI_PTR_BITS = @GASNETI_PTR_BITS@ GASNET_EXTRA_DEFINES = @GASNET_EXTRA_DEFINES@ GASNET_IBV_SPAWNER_CONF = @GASNET_IBV_SPAWNER_CONF@ @@ -238,6 +244,10 @@ GSTACK_PATH = @GSTACK_PATH@ HAVE_INTTYPES_H = @HAVE_INTTYPES_H@ HAVE_STDINT_H = @HAVE_STDINT_H@ HAVE_SYS_TYPES_H = @HAVE_SYS_TYPES_H@ +HWLOC_CFLAGS = @HWLOC_CFLAGS@ +HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ +HWLOC_LIBS = @HWLOC_LIBS@ +HWLOC_guess_prog = @HWLOC_guess_prog@ IBV_CFLAGS = @IBV_CFLAGS@ IBV_LDFLAGS = @IBV_LDFLAGS@ IBV_LIBS = @IBV_LIBS@ diff --git a/third-party/gasnet/gasnet-src/ibv-conduit/firehose_fwd.h b/third-party/gasnet/gasnet-src/ibv-conduit/firehose_fwd.h index e80df9aa77ec..24489bf5127e 100644 --- a/third-party/gasnet/gasnet-src/ibv-conduit/firehose_fwd.h +++ b/third-party/gasnet/gasnet-src/ibv-conduit/firehose_fwd.h @@ -15,8 +15,8 @@ #else #include #endif - #ifdef GASNETC_IBV_MAX_HCAS - #define GASNETC_IB_MAX_HCAS GASNETC_IBV_MAX_HCAS + #ifdef GASNETC_IBV_MAX_HCAS_CONFIGURE + #define GASNETC_IB_MAX_HCAS GASNETC_IBV_MAX_HCAS_CONFIGURE #else /* no multi-rail support */ #define GASNETC_IB_MAX_HCAS 1 #endif diff --git a/third-party/gasnet/gasnet-src/ibv-conduit/gasnet_core.c b/third-party/gasnet/gasnet-src/ibv-conduit/gasnet_core.c index 00188f09aa0d..65f74f7251c1 100644 --- a/third-party/gasnet/gasnet-src/ibv-conduit/gasnet_core.c +++ b/third-party/gasnet/gasnet-src/ibv-conduit/gasnet_core.c @@ -4,15 +4,19 @@ * Terms of use are as specified in license.txt */ +#define GASNETI_NEED_GASNET_MK_H 1 #include #include #include #include // access to eop and iop +#include + #include #include #include +#include // (U)INT_MAX #include #include @@ -28,16 +32,34 @@ GASNETI_IDENT(gasnetc_IdentString_Name, "$GASNetCoreLibraryName: " GASNET_COR #endif #if GASNETC_IBV_XRC GASNETI_IDENT(gasnetc_IdentString_XRC, "$GASNetIbvXRC: 1 $"); + #if GASNETC_IBV_XRC_MLNX + GASNETI_IDENT(gasnetc_IdentString_XRCAPI, "$GASNetIbvXRCAPI: Mellanox $"); + #else + GASNETI_IDENT(gasnetc_IdentString_XRCAPI, "$GASNetIbvXRCAPI: rdma-core $"); + #endif #endif #if GASNETC_IBV_ODP GASNETI_IDENT(gasnetc_IdentString_ODP, "$GASNetIbvODP: 1 $"); + #if GASNETC_IBV_ODP_MLNX + GASNETI_IDENT(gasnetc_IdentString_ODPAPI, "$GASNetIbvODPAPI: Mellanox $"); + #else + GASNETI_IDENT(gasnetc_IdentString_ODPAPI, "$GASNetIbvODPAPI: rdma-core $"); + #endif #endif GASNETI_IDENT(gasneti_IdentString_AMMaxMedium, "$GASNetAMMaxMedium: " _STRINGIFY(GASNETC_IBV_MAX_MEDIUM) " $"); - -gex_AM_Entry_t const *gasnetc_get_handlertable(void); +GASNETI_IDENT(gasneti_IdentString_MaxHCAs, "$GASNetIbvMaxHCAs: " _STRINGIFY(GASNETC_IB_MAX_HCAS) " $"); gasnetc_EP_t gasnetc_ep0; // First EP created. Used by init, sys AMs, and shutdown. +size_t gasnetc_sizeof_segment_t(void) { + gasnetc_Segment_t segment; + return sizeof(*segment); +} +size_t gasnetc_sizeof_ep_t(void) { + gasnetc_EP_t ep; + return sizeof(*ep); +} + /* ------------------------------------------------------------------------------------ */ /* Configuration @@ -53,7 +75,11 @@ gasnetc_EP_t gasnetc_ep0; // First EP created. Used by init, sys AMs, and shutd */ /* Default is to open one physical port per HCA */ -#define GASNETC_DEFAULT_IBV_PORTS "" +#ifdef GASNETC_IBV_PORTS_CONFIGURE + #define GASNETC_DEFAULT_IBV_PORTS GASNETC_IBV_PORTS_CONFIGURE +#else + #define GASNETC_DEFAULT_IBV_PORTS "" +#endif /* Limits on in-flight (queued but not reaped) RDMA Ops */ #define GASNETC_DEFAULT_NETWORKDEPTH_TOTAL 255 /* Max ops (RDMA + AM) outstanding at source */ @@ -118,11 +144,12 @@ int gasnetc_qp_timeout, gasnetc_qp_retry_count; /* conduit-specific firehose region parameters * Note that these are kept to sane sizes rather than the HCA limit * 128kB is the peak of the bandwidth curve and thus a good size. - * With 32k * 128k = 4G we can pin upto 4GB of physical memory with these. - * We don't yet deal well with many small regions. + * Some adapters have no limit on the number of regions supported (Omni-Path), + * in which case we substitute gasnetc_fh_maxregions for the HCA queried value. + * With 16m * 128KB we can pin up to 2TB of physical memory per host. * Note that GASNET_FIREHOSE_* env vars can override these. */ -static unsigned int gasnetc_fh_maxregions = 32768; +static unsigned int gasnetc_fh_maxregions = 16777216; static unsigned int gasnetc_fh_maxsize = 131072; /* ------------------------------------------------------------------------------------ */ @@ -136,10 +163,6 @@ gasnetc_port_info_t *gasnetc_port_tbl = NULL; int gasnetc_num_ports = 0; static uint64_t gasnetc_pin_maxsz; -#if GASNETC_PIN_SEGMENT - uintptr_t gasnetc_seg_start; - uintptr_t gasnetc_seg_len; -#endif firehose_info_t gasnetc_firehose_info; static uintptr_t gasnetc_firehose_mem; static int gasnetc_firehose_reg; @@ -954,14 +977,55 @@ static void gasnetc_init_pin_info(int first_local, int num_local) { gasnetc_pin_info.physmemsz = physmemsz; gasnetc_pin_info.memory = ~((uintptr_t)0); gasnetc_pin_info.num_local = num_local; - gasnetc_pin_info.regions = gasnetc_fh_maxregions; + + // How many pinnable regions per host? + unsigned int max_regions = UINT_MAX; GASNETC_FOR_ALL_HCA_INDEX(i) { - if (! gasnetc_hca[i].hca_cap.max_mr) { // Treat zero as unbounded (e.g. Omni-Path) + unsigned int tmp = gasnetc_hca[i].hca_cap.max_mr; // Field has type `int` + // Zero or above INT_MAX should use gasnetc_fh_maxregions + if (tmp == 0) { // Treat zero as unbounded (e.g. Omni-Path) GASNETI_TRACE_PRINTF(I, ("HCA %d advertises hca_cap.max_mr == 0, treating as unbounded", i)); continue; + } else if (tmp >= (unsigned int)INT_MAX) { // Treat INT_MAX (or negative int) as unbounded + GASNETI_TRACE_PRINTF(I, ("HCA %d advertises hca_cap.max_mr >= INT_MAX, treating as unbounded", i)); + continue; } - gasnetc_pin_info.regions = MIN(gasnetc_pin_info.regions, gasnetc_hca[i].hca_cap.max_mr); + max_regions = MIN(max_regions, tmp); + } + if (max_regions == UINT_MAX) { + // For adapters not reporting a valid limit: + max_regions = gasnetc_fh_maxregions; } + { + const char *key = "GASNET_PINNED_REGIONS_MAX"; + const char *input = gasneti_getenv(key); + int using_dflt = !input || !input[0]; // unset or empty + if (using_dflt) { + // Default (heuristic): cap use at same fraction of HCA resources as of physical memory + gasnetc_pin_info.regions = max_regions * ((double)limit / physmemsz); + } else { + // User override - accept fractions or absolute value + double dbl; + int64_t val; + if (gasneti_parse_dbl(input, &dbl)) { // Not a valid double + val = gasneti_parse_int(input, 0); + } else if ((dbl > 0.) && (dbl < 1.)) { // A double in interval (0,1) + val = dbl * max_regions; + } else { // A valid double outside (0,1) + val = dbl; + } + const int64_t region_lower = 16; // arbitrary. firehose will further validate + const int64_t region_upper = max_regions * 0.95; // 95% is arbitrary + if (val > region_upper) { + gasneti_fatalerror("%s='%s' is above the maximum value %d.", key, input, (int)region_upper); + } else if (val < region_lower) { + gasneti_fatalerror("%s='%s' is below the minimum value %d.", key, input, (int)region_lower); + } + gasnetc_pin_info.regions = val; + } + gasneti_envint_display(key, gasnetc_pin_info.regions, using_dflt, 0); + } + GASNETI_TRACE_PRINTF(I, ("Max pinnable regions per host: %u", (unsigned int)gasnetc_pin_info.regions)); if (do_probe) { int did_warn = 0; @@ -1041,8 +1105,15 @@ static void gasnetc_init_pin_info(int first_local, int num_local) { gasnetc_physmem_check("Probing O/S limits and HCA capabilities", limit); } -GASNETI_NORETURN -static void gasneti_segreg_failed(size_t size, const char *which, int why) { +enum gasnetc_segreg { + gasnetc_segreg_aux, + gasnetc_segreg_attach, // aka "primordial" + gasnetc_segreg_create +}; + +static const char *gasnetc_segreg_failed(size_t size, enum gasnetc_segreg which, int why, gex_MK_Class_t mk_class) +{ + const char *descr = ""; const char *hint1 = ""; const char *hint2 = ""; #if !GASNETI_PSHM_POSIX @@ -1057,43 +1128,38 @@ static void gasneti_segreg_failed(size_t size, const char *which, int why) { // Cygwin, macOS and Solaris are not believed to back with a filesystem // Others are unknown #endif -#ifdef GASNETC_PSHM_FS - if (why == EFAULT) { - hint1 = "\n This could be caused by insufficient space in " GASNETC_PSHM_FS " (or similar)."; - } -#endif - if (! *which) { // empty string == NOT " aux" - hint2 = "\n Reducing the value of environment variable GASNET_MAX_SEGSIZE may help."; + switch (mk_class) { + case GEX_MK_CLASS_HOST: + #ifdef GASNETC_PSHM_FS + if (why == EFAULT) { + hint1 = "\n This could be caused by insufficient space in " GASNETC_PSHM_FS " (or similar)."; + } + #endif + if (which == gasnetc_segreg_attach) { + hint2 = "\n Reducing the value of environment variable GASNET_MAX_SEGSIZE may help."; + } else if (which == gasnetc_segreg_aux) { + descr = " aux"; + } + break; + + #if GASNET_HAVE_MK_CLASS_CUDA_UVA + case GEX_MK_CLASS_CUDA_UVA: + descr = " CUDA_UVA"; + if (why == EFAULT) { + hint1 = "\n This could be caused by exhaustion of BAR1 resources. See memory_kinds.md release notes."; + } + break; + #endif + + default: // avoids unhandled case warnings + break; } char sizestr[16]; - gasneti_fatalerror("Unexpected error %s (errno=%d) when registering a %s%s segment%s%s", + return gasneti_dynsprintf( + "Unexpected error %s (errno=%d) when registering a %s%s segment%s%s", strerror(why), why, gasnett_format_number(size, sizestr, sizeof(sizestr), 1), - which, hint1, hint2); -} - -// -// simple container of segments -// -static gasnetc_Segment_t *gasnetc_segment_table = NULL; -static int gasnetc_segment_count = 0; -static gasneti_mutex_t gasnetc_segment_lock = GASNETI_MUTEX_INITIALIZER; - -static void gasnetc_add_segment(gasnetc_Segment_t seg) { - gasneti_mutex_lock(&gasnetc_segment_lock); - seg->idx = gasnetc_segment_count++; - size_t space = gasnetc_segment_count * sizeof(gasnetc_Segment_t); - gasnetc_segment_table = gasneti_realloc(gasnetc_segment_table, space); - gasnetc_segment_table[seg->idx] = seg; - gasneti_mutex_unlock(&gasnetc_segment_lock); -} -static void gasnetc_del_segment(gasnetc_Segment_t seg) { - gasneti_mutex_lock(&gasnetc_segment_lock); - gasnetc_Segment_t last = gasnetc_segment_table[gasnetc_segment_count--]; - last->idx = seg->idx; - gasnetc_segment_table[last->idx] = last; - // lack of realloc to shrink is harmless - gasneti_mutex_unlock(&gasnetc_segment_lock); + descr, hint1, hint2); } #if GASNET_TRACE @@ -1123,7 +1189,7 @@ static int gasnetc_load_settings(void) { fprintf(stderr, "WARNING: GASNET_PORT_NUM set in environment, but ignored. See gasnet/ibv-conduit/README.\n"); } - gasnetc_ibv_ports = gasneti_getenv_withdefault("GASNET_IBV_PORTS", GASNETC_DEFAULT_IBV_PORTS); + gasnetc_ibv_ports = gasneti_getenv_hwloc_withdefault("GASNET_IBV_PORTS", GASNETC_DEFAULT_IBV_PORTS, "Socket"); #define GASNETC_ENVINT(program_var, env_key, default_val, minval, is_mem) do { \ int64_t _tmp = gasneti_getenv_int_withdefault(#env_key, default_val, is_mem); \ @@ -1243,7 +1309,7 @@ static int gasnetc_load_settings(void) { if_pf (gasnetc_packedlong_limit > GASNETC_MAX_PACKEDLONG) { fprintf(stderr, "WARNING: GASNET_PACKEDLONG_LIMIT reduced from %u to %u\n", - (unsigned int)gasnetc_packedlong_limit, GASNETC_MAX_PACKEDLONG); + (unsigned int)gasnetc_packedlong_limit, (unsigned int)GASNETC_MAX_PACKEDLONG); gasnetc_packedlong_limit = GASNETC_MAX_PACKEDLONG; } @@ -1584,18 +1650,16 @@ static void gasnetc_probe_ports(int max_ports) { #endif if ((ib_hcas > GASNETC_IB_MAX_HCAS) && (gasnetc_port_list == NULL)) { -#if GASNETC_IBV_MAX_HCAS +#if GASNETC_IBV_MAX_HCAS_CONFIGURE const char *current = "with '--with-ibv-max-hcas=" _STRINGIFY(GASNETC_IB_MAX_HCAS) "'"; - const char *enable = ""; #else const char *current = "without multi-rail support"; - const char *enable = "--enable-ibv-multirail "; #endif fprintf(stderr, "WARNING: Found %d IB HCAs, but GASNet was configured %s. " "To utilize all your HCAs, you should " - "reconfigure GASNet with '%s--with-ibv-max-hcas=%d'. You can silence this warning " + "reconfigure GASNet using '--with-ibv-max-hcas=%d'. You can silence this warning " "by setting the environment variable GASNET_IBV_PORTS as described in the file " - "'gasnet/ibv-conduit/README'.\n", num_hcas, current, enable, num_hcas); + "'gasnet/ibv-conduit/README'.\n", num_hcas, current, num_hcas); } int16_t pkey = get_pkey(); @@ -1830,25 +1894,45 @@ static void gasneti_odp_init(void) { gasnetc_hca_t *hca; GASNETC_FOR_ALL_HCA(hca) { enum gasneti_odp_missing missing = missing_none; + #if GASNETC_IBV_ODP_CORE + struct ibv_query_device_ex_input input; + input.comp_mask = 0; + struct ibv_device_attr_ex attr; + memset(&attr, 0, sizeof(attr)); + int ret = ibv_query_device_ex(hca->handle, &input, &attr); + int no_odp_general = !(attr.odp_caps.general_caps & IBV_ODP_SUPPORT); + int no_odp_implicit = !(attr.odp_caps.general_caps & IBV_ODP_SUPPORT_IMPLICIT); + uint32_t odp_caps = gasnetc_use_xrc ? attr.xrc_odp_caps + : attr.odp_caps.per_transport_caps.rc_odp_caps; + int no_odp_read = !(odp_caps & IBV_ODP_SUPPORT_READ); + int no_odp_write = !(odp_caps & IBV_ODP_SUPPORT_WRITE); + #elif GASNETC_IBV_ODP_MLNX struct ibv_exp_device_attr attr; memset(&attr, 0, sizeof(attr)); attr.comp_mask = IBV_EXP_DEVICE_ATTR_ODP | IBV_EXP_DEVICE_ATTR_EXP_CAP_FLAGS; int ret = ibv_exp_query_device(hca->handle, &attr); - if (! (attr.exp_device_cap_flags & IBV_EXP_DEVICE_ODP)) { + int no_odp_general = !(attr.exp_device_cap_flags & IBV_EXP_DEVICE_ODP); + int no_odp_implicit = !(attr.odp_caps.general_odp_caps & IBV_EXP_ODP_SUPPORT_IMPLICIT); + uint32_t odp_caps = gasnetc_use_xrc ? attr.odp_caps.per_transport_caps.xrc_odp_caps + : attr.odp_caps.per_transport_caps.rc_odp_caps; + int no_odp_read = !(odp_caps & IBV_EXP_ODP_SUPPORT_READ); + int no_odp_write = !(odp_caps & IBV_EXP_ODP_SUPPORT_WRITE); + #else + #error Unknown ODP API variant + #endif + if (no_odp_general) { missing = missing_general; - } else if (! (attr.odp_caps.general_odp_caps & IBV_EXP_ODP_SUPPORT_IMPLICIT)) { + } else if (no_odp_implicit) { missing = missing_implicit; // TODO-EX: maybe support older systems lacking this bit? // Prior to ConnectX-4, implicit ODP was done in s/w and // + This can be identified because this caps bit was not set // + Implicit ODP emulation had 128MB limit // + Implicit ODP was valid for local only (invalid rkey) - } else if (gasnetc_use_xrc) { - uint32_t odp_caps = gasnetc_use_xrc ? attr.odp_caps.per_transport_caps.xrc_odp_caps - : attr.odp_caps.per_transport_caps.rc_odp_caps; - if (! (odp_caps & IBV_EXP_ODP_SUPPORT_READ)) { + } else { + if (no_odp_read) { missing = gasnetc_use_xrc? missing_xrc_read : missing_rc_read; - } else if (! (odp_caps & IBV_EXP_ODP_SUPPORT_WRITE)) { + } else if (no_odp_write) { missing = gasnetc_use_xrc? missing_xrc_write : missing_rc_write; } } @@ -1863,8 +1947,14 @@ static void gasneti_odp_init(void) { my_odp_support[hca->hca_index].missing = missing; } if (gasnetc_use_odp) { + GASNETI_TRACE_PRINTF(I, ("Implicit ODP enabled")); // Create implict ODP registrations (currently only used locally) GASNETC_FOR_ALL_HCA(hca) { + #if GASNETC_IBV_ODP_CORE + unsigned int access_flags = (unsigned int)IBV_ACCESS_ON_DEMAND | + (unsigned int)IBV_ACCESS_LOCAL_WRITE; + hca->implicit_odp.handle = ibv_reg_mr(hca->pd, 0, SIZE_MAX, access_flags); + #else struct ibv_exp_reg_mr_in in; memset(&in, 0, sizeof(in)); in.pd = hca->pd; @@ -1872,7 +1962,8 @@ static void gasneti_odp_init(void) { IBV_EXP_ACCESS_LOCAL_WRITE ); in.length = IBV_EXP_IMPLICIT_MR_SIZE; hca->implicit_odp.handle = ibv_exp_reg_mr(&in); - GASNETC_IBV_CHECK_PTR(hca->implicit_odp.handle, "from ibv_exp_reg_mr(implicit)"); + #endif + GASNETC_IBV_CHECK_PTR(hca->implicit_odp.handle, "from ibv_reg_mr(implicit)"); hca->implicit_odp.lkey = hca->implicit_odp.handle->lkey; // flatten for quick access } } @@ -2034,6 +2125,9 @@ static int gasnetc_init( gex_Client_t *client_p, /* Now enable tracing of all the following steps */ gasneti_trace_init(argc, argv); + // Ensure work-arounds like MLX5_SCATTER_TO_CQE are propogated + gasneti_propagate_env("MLX5_", GASNETI_PROPAGATE_ENV_PREFIX); + /* bootstrapInit may set gasneti_nodes==0 if would overflow 16-bit field */ if (!gasneti_nodes || (gasneti_nodes > GASNET_MAXNODES)) { GASNETI_RETURN_ERRR(RESOURCE, "job size exceeds ibv-conduit capabilities"); @@ -2059,23 +2153,12 @@ static int gasnetc_init( gex_Client_t *client_p, } } -#if GASNETC_IB_MAX_HCAS > 1 +#if GASNETC_HAVE_FENCED_PUTS gasnetc_use_fenced_puts = gasneti_getenv_yesno_withdefault("GASNET_USE_FENCED_PUTS", 0); - if (gasnetc_use_fenced_puts && (gasnetc_num_hcas == 1)) { - if (!gasneti_mynode) { - fprintf(stderr, - "WARNING: GASNET_USE_FENCED_PUTS requested, but ignored because only a single\n" - " HCA was detected and/or enabled. To suppress this message, you may\n" - " either unset this environment variable or set it to '0'.\n" - " Alternatively, you may configure using '--disable-ibv-multirail'\n" - " if all nodes have only a single InfiniBand HCA.\n"); - } - gasnetc_use_fenced_puts = 0; - } #else if (!gasneti_mynode && gasneti_getenv_yesno_withdefault("GASNET_USE_FENCED_PUTS", 0)) { - fprintf(stderr, - "WARNING: GASNET_USE_FENCED_PUTS requested, but ignored because GASNet was\n" + gasneti_console_message("WARNING", + "GASNET_USE_FENCED_PUTS requested, but ignored because GASNet was\n" " configured without multi-rail support. To suppress this message,\n" " you may either unset this environment variable or set it to '0'.\n" " Alternatively, you may configure using '--enable-ibv-multirail'.\n"); @@ -2269,18 +2352,9 @@ static int gasnetc_init( gex_Client_t *client_p, gasneti_bootstrapExchange(local_lid, gasnetc_num_ports * sizeof(uint16_t), remote_lid); gasneti_free(local_lid); -#if PLATFORM_ARCH_MIC - /* In the case of multiple MICs in a host, the LIDs will be the same. - * So, use the default nodemap. - * TODO: distinguish single-MIC and "self hosted" MIC systems. - */ + // Derive nodemap by the default means. + // We cannot use LID info if GASNET_IBV_PORTS is inhomogeneous (bug 4208) gasneti_nodemapInit(&gasneti_bootstrapExchange, NULL, 0, 0); -#else - /* Derive nodemap from the LID info we have just exchanged */ - gasneti_nodemapInit(NULL, &remote_lid[0], - sizeof(remote_lid[0]), - sizeof(remote_lid[0]) * gasnetc_num_ports); -#endif /* compute various snd/rcv resource limits (requires node map) */ i = gasnetc_sndrcv_limits(); @@ -2344,8 +2418,10 @@ static int gasnetc_init( gex_Client_t *client_p, #if GASNETC_IBV_XRC /* shared qpn table: */ - shared_size += gasneti_nodes * gasnetc_alloc_qps * sizeof(uint32_t); - shared_size = GASNETI_ALIGNUP(shared_size, GASNETI_CACHE_LINE_BYTES); + if (gasnetc_use_xrc) { + shared_size += gasnetc_xrc_preinit(remote_lid); + shared_size = GASNETI_ALIGNUP(shared_size, GASNETI_CACHE_LINE_BYTES); + } #endif shared_mem = gasneti_pshm_init(&gasneti_bootstrapSNodeBroadcast, shared_size); @@ -2384,11 +2460,11 @@ static int gasnetc_init( gex_Client_t *client_p, gasneti_EP_t ep; { // allocate the client object - client = gasneti_alloc_client(clientName, flags, 0); + client = gasneti_alloc_client(clientName, flags); *client_p = gasneti_export_client(client); // create the initial endpoint with internal handlers - if (gasnetc_EP_Create(ep_p, *client_p, flags)) + if (gex_EP_Create(ep_p, *client_p, GEX_EP_CAPABILITY_ALL, flags)) GASNETI_RETURN_ERRR(RESOURCE,"Error creating initial endpoint"); ep = gasneti_import_ep(*ep_p); gasnetc_ep0 = (gasnetc_EP_t)ep; // TODO-EX: this global variable to be removed @@ -2501,7 +2577,8 @@ static int gasnetc_init( gex_Client_t *client_p, GASNETC_FOR_ALL_HCA(hca) { if (0 != gasnetc_pin(hca, auxbase, auxsize, gasneti_seg_access_flags, &hca->aux_reg)) { - gasneti_segreg_failed(auxsize, " aux", errno); + const char *msg = gasnetc_segreg_failed(auxsize, gasnetc_segreg_aux, errno, GEX_MK_CLASS_HOST); + gasneti_fatalerror("%s", msg); } // TODO_EX: need scalable and/or lazy storage of aux segments and their rkeys hca->aux_rkeys = gasneti_malloc(gasneti_nodes*sizeof(uint32_t)); @@ -2670,60 +2747,174 @@ static int gasnetc_attach_primary(void) { return GASNET_OK; } /* ------------------------------------------------------------------------------------ */ -static int gasnetc_attach_segment(gex_Segment_t *segment_p, - gex_TM_t tm, - uintptr_t segsize, - gasneti_bootstrapExchangefn_t exchangefn, - gex_Flags_t flags) { - /* ------------------------------------------------------------------------------------ */ - /* register client segment */ - gasnetc_Segment_t segment; - gasnet_seginfo_t myseg = gasneti_segmentAttach(segment_p, sizeof(*segment), tm, segsize, exchangefn, flags); - segment = (gasnetc_Segment_t) gasneti_import_tm(tm)->_ep->_segment; - - // Register client segment with NIC - - #if GASNETC_PIN_SEGMENT - { - gasnetc_add_segment(segment); - - gasnetc_seg_start = (uintptr_t)myseg.addr; - gasnetc_seg_len = myseg.size; - - /* pin the segment and exchange the RKeys, once per HCA */ +// Purely local memory registration and conduit-specific segment tracking +// Applicable to both primordial and non-primordial segments +static int gasnetc_segment_register(gasnetc_Segment_t segment, int is_attach) +{ +#if GASNETC_PIN_SEGMENT gasnetc_hca_t *hca; GASNETC_FOR_ALL_HCA(hca) { - hca->rkeys = gasneti_calloc(gasneti_nodes, sizeof(uint32_t)); - gasneti_leak(hca->rkeys); - + // Register page-aligned bounding-box (since client-provided need not be aligned). gasnetc_memreg_t memreg; - if (0 != gasnetc_pin(hca, myseg.addr, myseg.size, gasneti_seg_access_flags, &memreg)) { - gasneti_segreg_failed(segsize, "", errno); + uintptr_t lb = GASNETI_PAGE_ALIGNDOWN(segment->_addr); + uintptr_t ub = GASNETI_PAGE_ALIGNUP(segment->_ub); + uintptr_t bb_size = ub - lb; + int rc = gasnetc_pin(hca, (void*)lb, ub - lb, gasneti_seg_access_flags, &memreg); + + if (rc) { + if (gasneti_VerboseErrors) { + gex_MK_Class_t mk_class = (segment->_kind == GEX_MK_HOST) + ? GEX_MK_CLASS_HOST + : gex_MK_QueryClass(segment->_kind); + enum gasnetc_segreg which = is_attach ? gasnetc_segreg_attach : gasnetc_segreg_create; + gasneti_console_message("WARNING", "%s", gasnetc_segreg_failed(segment->_size, which, errno, mk_class)); + } + #if (GASNETC_IB_MAX_HCAS > 1) + for (int i = 0; i < hca->hca_index; ++i) { + gasnetc_unpin(gasnetc_hca+i, segment->seg_reg+i); + } + #endif + // TODO: can we do better sorting out failure modes? + return GASNET_ERR_RESOURCE; } + GASNETI_TRACE_PRINTF(I, ("Registered %"PRIuPTR" byte segment on HCA %d", segment->_size, hca->hca_index)); + segment->seg_lkey[hca->hca_index] = memreg.handle->lkey; - #if GASNETC_IBV_SHUTDOWN segment->seg_reg[hca->hca_index] = memreg; - #endif + } +#endif - GASNETI_TRACE_PRINTF(I, ("Attach registered %"PRIuPTR" bytes on HCA %d", segsize, hca->hca_index)); + return GASNET_OK; +} - /* XXX: hca->rkeys is one of the O(N) storage requirements we might reduce/eliminate. - * + When using PSHM we could store rkeys just once per supernode - * + When not fully connected, we could utilize sparse storage - */ - (*exchangefn)(&memreg.handle->rkey, sizeof(uint32_t), hca->rkeys); +#if GASNETC_PIN_SEGMENT +static void gasnetc_install_np_rkeys( + gex_Rank_t jobrank, + gex_EP_Index_t idx, + const uint32_t *new_rkeys) +{ + gasneti_assume(idx < GASNET_MAXEPS); + uint32_t *rkey_array = gasnetc_np_rkeys[idx]; + + if_pf (!rkey_array) { + static gasneti_mutex_t lock = GASNETI_MUTEX_INITIALIZER; + gasneti_mutex_lock(&lock); + rkey_array = gasnetc_np_rkeys[idx]; + if (!rkey_array) { + rkey_array = gasneti_calloc(gasneti_nodes * gasnetc_num_hcas, sizeof(uint32_t)); + gasnetc_np_rkeys[idx] = rkey_array; } + gasneti_mutex_unlock(&lock); } - #endif - /* Per-endpoint work */ - for (gex_Rank_t i = 0; i < gasneti_nodes; i++) { - gasnetc_cep_t *cep = GASNETC_NODE2CEP(gasnetc_ep0, i); - if (cep) { - gasnetc_sndrcv_attach_peer(i, cep); + GASNETI_MEMCPY_SAFE(rkey_array + jobrank * gasnetc_num_hcas, + new_rkeys, + gasnetc_num_hcas * sizeof(uint32_t)); +} +#endif + +static int gasnetc_segment_exchange(gex_TM_t tm, gex_EP_t *eps, size_t num_eps) +{ +#if GASNETC_PIN_SEGMENT + // Exchange one 32-bit rkey per HCA + struct exchg_data { + gex_EP_Location_t loc; + uint32_t rkey[1]; // Flexible array member + } *local, *global, *p; + + size_t elem_sz = offsetof(struct exchg_data, rkey) + gasnetc_num_hcas * sizeof(uint32_t); + local = gasneti_malloc(num_eps * elem_sz); + + // Pack + p = local; + for (gex_Rank_t i = 0; i < num_eps; ++i) { + gex_EP_t ep = eps[i]; + gasnetc_Segment_t segment = (gasnetc_Segment_t) gasneti_import_ep(ep)->_segment; + if (! segment) continue; + p->loc.gex_rank = gasneti_mynode; + p->loc.gex_ep_index = gex_EP_QueryIndex(ep); + for (int j = 0; j < gasnetc_num_hcas; ++j) { + p->rkey[j] = segment->seg_reg[j].handle->rkey; + } + p = (struct exchg_data *)(elem_sz + (uintptr_t)p); + } + + size_t local_bytes = (uintptr_t)p - (uintptr_t)local; + size_t total_bytes = gasneti_blockingRotatedExchangeV(tm, local, local_bytes, (void**)&global, NULL); + gasneti_assert(total_bytes % elem_sz == 0); + size_t total_eps = total_bytes / elem_sz; + gasneti_free(local); + + // Unpack + p = global; + for (size_t i = 0; i < total_eps; ++i) { + gex_Rank_t jobrank = p->loc.gex_rank; + gex_EP_Index_t idx = p->loc.gex_ep_index; + if (jobrank == gasneti_mynode) { + // Local: + // Fall through to advance p + } else if (! idx) { + // Remote + primordial: + uint32_t *rkey = p->rkey; + for (int j = 0; j < gasnetc_num_hcas; ++j) { + gasnetc_hca_t *hca = gasnetc_hca + j; + gasneti_assert(hca->rkeys); + gasneti_assert(!hca->rkeys[jobrank] || hca->rkeys[jobrank] == rkey[hca->hca_index]); + hca->rkeys[jobrank] = rkey[hca->hca_index]; + } + gasnetc_cep_t *cep = GASNETC_NODE2CEP(gasnetc_ep0, jobrank); + if (cep) gasnetc_sndrcv_attach_peer(jobrank, cep); + } else { + // Remote + non-primordial: + gasnetc_install_np_rkeys(jobrank, idx, p->rkey); } + p = (struct exchg_data *)(elem_sz + (uintptr_t)p); } + gasneti_free(global); +#else + // Per-endpoint work: + // TODO: multi-ep may require more work + gex_Rank_t team_size = gex_TM_QuerySize(tm); + for (size_t i = 0; i < team_size; ++i) { + gex_EP_Location_t loc = gasneti_i_tm_rank_to_location(gasneti_import_tm_nonpair(tm), i, 0); + gex_Rank_t jobrank = loc.gex_rank; + if (jobrank == gasneti_mynode) { + continue; + } else if (! loc.gex_ep_index) { + // TODO: this might be redundant? + gasnetc_cep_t *cep = GASNETC_NODE2CEP(gasnetc_ep0, jobrank); + if (cep) gasnetc_sndrcv_attach_peer(jobrank, cep); + } + } +#endif + + return GASNET_OK; +} + +static int gasnetc_attach_segment(gex_Segment_t *segment_p, + gex_TM_t tm, + uintptr_t segsize, + gex_Flags_t flags) { + /* ------------------------------------------------------------------------------------ */ + /* register client segment */ + + gasnet_seginfo_t myseg = gasneti_segmentAttach(segment_p, tm, segsize, flags); + + // Register client segment with NIC + + #if GASNETC_PIN_SEGMENT + // pin the segment + gasnetc_Segment_t segment = (gasnetc_Segment_t) gasneti_import_segment(*segment_p); + int rc = gasnetc_segment_register(segment, 1); + if (rc) { + gasneti_fatalerror("Unexpected failure return from gasnetc_segment_register()"); + } + #endif + + // Exchange registration info + gex_EP_t ep = gex_TM_QueryEP(tm); + gasnetc_segment_exchange(tm, &ep, 1); return GASNET_OK; } @@ -2736,7 +2927,7 @@ extern int gasnetc_attach( gex_TM_t _tm, { GASNETI_TRACE_PRINTF(C,("gasnetc_attach(table (%i entries), segsize=%"PRIuPTR")", numentries, segsize)); - gasneti_TM_t tm = gasneti_import_tm(_tm); + gasneti_TM_t tm = gasneti_import_tm_nonpair(_tm); gasneti_EP_t ep = tm->_ep; if (!gasneti_init_done) @@ -2761,12 +2952,12 @@ extern int gasnetc_attach( gex_TM_t _tm, #if GASNET_SEGMENT_FAST || GASNET_SEGMENT_LARGE /* register client segment */ gex_Segment_t seg; // g2ex segment is automatically saved by a hook - if (GASNET_OK != gasnetc_attach_segment(&seg, _tm, segsize, gasneti_defaultExchange, GASNETI_FLAG_INIT_LEGACY)) + if (GASNET_OK != gasnetc_attach_segment(&seg, _tm, segsize, GASNETI_FLAG_INIT_LEGACY)) GASNETI_RETURN_ERRR(RESOURCE,"Error attaching segment"); #endif /* register client handlers */ - if (table && gasneti_amregister_legacy(ep->_amtbl, table, numentries) != GASNET_OK) + if (table && gasneti_amregister_legacy(ep, table, numentries) != GASNET_OK) GASNETI_RETURN_ERRR(RESOURCE,"Error registering handlers"); /* ensure everything is initialized across all nodes */ @@ -2806,17 +2997,21 @@ extern int gasnetc_Client_Init( #endif } else { // NOT first client // allocate the client object - gasneti_Client_t client = gasneti_alloc_client(clientName, flags, 0); + gasneti_Client_t client = gasneti_alloc_client(clientName, flags); *client_p = gasneti_export_client(client); // create the initial endpoint with internal handlers - if (gasnetc_EP_Create(ep_p, *client_p, flags)) + if (gex_EP_Create(ep_p, *client_p, GEX_EP_CAPABILITY_ALL, flags)) GASNETI_RETURN_ERRR(RESOURCE,"Error creating initial endpoint"); } gasneti_EP_t ep = gasneti_import_ep(*ep_p); + // Do NOT move this prior to the gasneti_trace_init() call + GASNETI_TRACE_PRINTF(O,("gex_Client_Init: name='%s' argc_p=%p argv_p=%p flags=%d", + clientName, (void *)argc, (void *)argv, flags)); + // TODO-EX: create team - gasneti_TM_t tm = gasneti_alloc_tm(ep, gasneti_mynode, gasneti_nodes, flags, 0); + gasneti_TM_t tm = gasneti_alloc_tm(ep, gasneti_mynode, gasneti_nodes, flags); *tm_p = gasneti_export_tm(tm); if (0 == (flags & GASNETI_FLAG_INIT_LEGACY)) { @@ -2851,41 +3046,89 @@ extern int gasnetc_Segment_Attach( /* create a segment collectively */ // TODO-EX: this implementation only works *once* - // TODO-EX: should be using the team's exchange function if possible // TODO-EX: need to pass proper flags (e.g. pshm and bind) instead of 0 - if (GASNET_OK != gasnetc_attach_segment(segment_p, tm, length, gasneti_defaultExchange, 0)) + if (GASNET_OK != gasnetc_attach_segment(segment_p, tm, length, 0)) GASNETI_RETURN_ERRR(RESOURCE,"Error attaching segment"); return GASNET_OK; } -extern int gasnetc_EP_Create(gex_EP_t *ep_p, - gex_Client_t client, - gex_Flags_t flags) { - /* (###) add code here to create an endpoint belonging to the given client */ -#if 1 // TODO-EX: This is a stub, which assumes 1 implicit call from ClientCreate - static gasneti_mutex_t lock = GASNETI_MUTEX_INITIALIZER; - gasneti_mutex_lock(&lock); - static int once = 0; - int prev = once; - once = 1; - gasneti_mutex_unlock(&lock); - if (prev) gasneti_fatalerror("Multiple endpoints are not yet implemented"); +extern int gasnetc_Segment_Create( + gex_Segment_t *segment_p, + gex_Client_t client, + gex_Addr_t address, + uintptr_t length, + gex_MK_t kind, + gex_Flags_t flags) +{ + gasneti_assert(segment_p); + + // Create the Segment object, allocating memory if appropriate + gasneti_Client_t i_client = gasneti_import_client(client); + int rc = gasneti_segmentCreate(segment_p, i_client, address, length, kind, flags); + + if (rc == GASNET_OK) { + // Register the segment + gasnetc_Segment_t segment = (gasnetc_Segment_t) gasneti_import_segment(*segment_p); + rc = gasnetc_segment_register(segment, 0); + if ((GASNET_OK != rc) && (GASNET_ERR_RESOURCE != rc)) { + gasneti_fatalerror("Unexpected failure return from gasnetc_segment_register()"); + } + } + + return rc; +} + +#if GASNETC_HAVE_EP_PUBLISHBOUNDSEGMENT +extern int gasnetc_EP_PublishBoundSegment( + gex_TM_t tm, + gex_EP_t *eps, + size_t num_eps, + gex_Flags_t flags) +{ + // Conduit-independent parts + int rc = gasneti_EP_PublishBoundSegment(tm, eps, num_eps, flags); + if (GASNET_OK != rc) return rc; + + // Conduit-dependent parts + // TODO: merge comms into gasneti_EP_PublishBoundSegment(). + gasnetc_segment_exchange(tm, eps, num_eps); + + // Avoid race in which AMRequestLong triggers AMRepyLong before exchange completes remotely + // TODO: barrier for multi-tm per-process + gex_Event_Wait(gex_Coll_BarrierNB(tm, 0)); + + return GASNET_OK; +} #endif - gasnetc_EP_t conduit_ep; - gasneti_EP_t ep = gasneti_alloc_ep(gasneti_import_client(client), flags, sizeof(*conduit_ep)); - *ep_p = gasneti_export_ep(ep); +// Conduit-specififc hook to run at end of gex_EP_Create() +int gasnetc_ep_init_hook(gasneti_EP_t i_ep) +{ + gasnetc_EP_t c_ep = (gasnetc_EP_t) i_ep; - { /* core API handlers */ - gex_AM_Entry_t *ctable = (gex_AM_Entry_t *)gasnetc_get_handlertable(); - int len = 0; - int numreg = 0; - gasneti_assert(ctable); - while (ctable[len].gex_fnptr) len++; /* calc len */ - if (gasneti_amregister(ep->_amtbl, ctable, len, GASNETC_HANDLER_BASE, GASNETE_HANDLER_BASE, 0, &numreg) != GASNET_OK) - GASNETI_RETURN_ERRR(RESOURCE,"Error registering core API handlers"); - gasneti_assert(numreg == len); + // Conduit-specific validation +#if GASNETC_PIN_SEGMENT + if (i_ep->_index) { + // Current non-primordial EP support is RMA-only + if (i_ep->_caps & ~GEX_EP_CAPABILITY_RMA) { + // Unsupported capability/ies requested + GASNETI_RETURN_ERRR(BAD_ARG, + "ibv-conduit supports only GEX_EP_CAPABILITY_RMA for non-primordial endpoints"); + } + } +#else + gasneti_static_assert(GASNET_MAXEPS == 1); +#endif + + // Conduit-specific EP struct member(s): + if (! i_ep->_index) { + c_ep->cep_table = NULL; + } else { + // Simply share the QPs of EP0, which are sufficient for RMA + // TODO: AM will require isolation that this sharing cannot provide + gasneti_assert(gasnetc_ep0->cep_table); + c_ep->cep_table = gasnetc_ep0->cep_table; } #if !GASNETC_PIN_SEGMENT @@ -2893,32 +3136,24 @@ extern int gasnetc_EP_Create(gex_EP_t *ep_p, gex_AM_Entry_t *ftable = firehose_get_handlertable(); int len = 0; int numreg = 0; + int dontcare = ! i_ep->_index; // Allocate indices on first call gasneti_assert(ftable); while (ftable[len].gex_fnptr) len++; /* calc len */ - if (gasneti_amregister(ep->_amtbl, ftable, len, GASNETC_HANDLER_BASE, GASNETE_HANDLER_BASE, 1, &numreg) != GASNET_OK) + if (gasneti_amregister(i_ep, ftable, len, + GASNETC_HANDLER_BASE, GASNETE_HANDLER_BASE, + dontcare, &numreg) != GASNET_OK) GASNETI_RETURN_ERRR(RESOURCE, "Error registering firehose handlers"); - gasneti_assert(numreg == len); + gasneti_assert_int(numreg ,==, len); } #endif - { /* extended API handlers */ - gex_AM_Entry_t *etable = (gex_AM_Entry_t *)gasnete_get_handlertable(); - int len = 0; - int numreg = 0; - gasneti_assert(etable); - while (etable[len].gex_fnptr) len++; /* calc len */ - if (gasneti_amregister(ep->_amtbl, etable, len, GASNETE_HANDLER_BASE, GASNETI_CLIENT_HANDLER_BASE, 0, &numreg) != GASNET_OK) - GASNETI_RETURN_ERRR(RESOURCE,"Error registering extended API handlers"); - gasneti_assert(numreg == len); - } - return GASNET_OK; } extern int gasnetc_EP_RegisterHandlers(gex_EP_t ep, gex_AM_Entry_t *table, size_t numentries) { - return gasneti_amregister_client(gasneti_import_ep(ep)->_amtbl, table, numentries); + return gasneti_amregister_client(gasneti_import_ep(ep), table, numentries); } /* ------------------------------------------------------------------------------------ */ /* Shutdown code - not always used */ @@ -2927,7 +3162,7 @@ extern int gasnetc_EP_RegisterHandlers(gex_EP_t ep, void gasnetc_shutdown(void) { gasnetc_hca_t *hca; - int rc, i; + int rc; gasnetc_connect_shutdown(gasnetc_ep0); @@ -2938,12 +3173,12 @@ gasnetc_shutdown(void) { GASNETC_FOR_ALL_HCA(hca) { #if GASNETC_PIN_SEGMENT - gasneti_mutex_lock(&gasnetc_segment_lock); - for (int i = 0; i < gasnetc_segment_count; ++i) { - gasnetc_Segment_t seg = gasnetc_segment_table[i]; - gasnetc_unpin(hca, &seg->seg_reg[hca->hca_index]); + GASNETI_SEGTBL_LOCK(); + gasneti_Segment_t seg; + GASNETI_SEGTBL_FOR_EACH(seg) { + gasnetc_unpin(hca, &((gasnetc_Segment_t)seg)->seg_reg[hca->hca_index]); } - gasneti_mutex_unlock(&gasnetc_segment_lock); + GASNETI_SEGTBL_UNLOCK(); #endif #if GASNETC_IBV_ODP if (gasnetc_use_odp) { @@ -3498,6 +3733,9 @@ static void gasnetc_exit_body(void) { /* Disable processing of AMs, except core-specific ones */ gasnetc_disable_AMs(); + // prevent possible GASNETI_CHECK_INJECT() failures when we communicate + GASNETI_CHECK_INJECT_RESET(); + GASNETI_TRACE_PRINTF(C,("gasnet_exit(%i)\n", exitcode)); /* Timed MAX(exitcode) reduction to clearly distinguish collective exit */ @@ -3584,7 +3822,6 @@ static void gasnetc_exit_body(void) { #if GASNETC_IBV_SHUTDOWN GASNETC_EXIT_STATE("ibv quiesce"); alarm(30); - gasneti_bootstrapBarrier(); gasnetc_sndrcv_quiesce(); #endif if (gasnetc_did_firehose_init) { @@ -3859,8 +4096,8 @@ extern gex_TI_t gasnetc_Token_Info( info->gex_srcrank = GASNETC_MSG_SRCIDX(flags); result |= GEX_TI_SRCRANK; -#if GASNET_TRACE - // TRACE of source of bootstrap AMs can reach here before gasneti_THUNK_TM is set +#if GASNETI_STATS_OR_TRACE + // STATS/TRACE of source of bootstrap AMs can reach here before gasneti_THUNK_TM is set info->gex_ep = gasneti_THUNK_TM ? gasneti_THUNK_EP : NULL; #else info->gex_ep = gasneti_THUNK_EP; @@ -4063,7 +4300,7 @@ int gasnetc_am_get_buffer(size_t buf_len, { void *buf; if (buf_len <= gasnetc_am_inline_limit_sndrcv) { - // Inline send/put using the on-stack buffer + // Inline send/put using the small buffer on-stack or in-sd buf = (gasnetc_buffer_t *)GASNETI_ALIGNUP(inline_buf, 8); gasneti_assert(*buf_alloc_p == NULL); } else { @@ -4096,6 +4333,9 @@ void gasnetc_am_commit( gasnetc_buffer_t *buf, gasnetc_buffer_t *buf_alloc, gasnetc_counter_t *counter, va_list argptr GASNETI_THREAD_FARG) { + // AMs to in-nbrhd peers must currently use PSHM + gasneti_assert(!GASNETI_NBRHD_JOBRANK_IS_LOCAL(gasnetc_epid2node(cep->epid))); + // Set header fields and locate arguments gex_AM_Arg_t *args; switch (category) { @@ -4113,7 +4353,7 @@ void gasnetc_am_commit( gasnetc_buffer_t *buf, gasnetc_buffer_t *buf_alloc, void *data = (void*)((uintptr_t)buf + head_len); gasneti_assert_ptr(data ,==, GASNETC_MSG_MED_DATA(buf, numargs + have_flow)); gasneti_assert_uint(copy_len ,==, nbytes); - GASNETI_MEMCPY_SAFE(data, src_addr, copy_len); + GASNETI_MEMCPY(data, src_addr, copy_len); } break; @@ -4121,13 +4361,16 @@ void gasnetc_am_commit( gasnetc_buffer_t *buf, gasnetc_buffer_t *buf_alloc, buf->longmsg.destLoc = (uintptr_t)dst_addr; buf->longmsg.nBytes = nbytes; if (copy_len + gath_len) { // Packed Long optimization - gasneti_assume(nbytes <= GASNETC_MAX_PACKEDLONG); + gasneti_assume(nbytes <= GASNETC_MAX_PACKEDLONG_(numargs)); buf->longmsg.nBytes |= 0x80000000; /* IDs the packedlong case */ - if (copy_len) { + if (in_place) { + gasneti_assert_ptr(src_addr ,==, GASNETC_MSG_LONG_DATA(buf, numargs + have_flow)); + gasneti_assert_uint(copy_len ,==, nbytes); + } else if (copy_len) { void *data = (void*)((uintptr_t)buf + head_len); gasneti_assert_ptr(data ,==, GASNETC_MSG_LONG_DATA(buf, numargs + have_flow)); gasneti_assert_uint(copy_len ,==, nbytes); - GASNETI_MEMCPY_SAFE(data, src_addr, copy_len); + GASNETI_MEMCPY(data, src_addr, copy_len); } else { gasneti_assert_uint(gath_len ,==, nbytes); } @@ -4505,7 +4748,7 @@ int gasnetc_AMRequestShort( gex_TM_t tm, gex_Rank_t rank, gex_AM_Index_t handler { int retval; gasneti_assert(tm); - gasnetc_EP_t ep = (gasnetc_EP_t)gasneti_import_ep(gex_TM_QueryEP(tm)); + gasnetc_EP_t ep = (gasnetc_EP_t) gasneti_e_tm_to_i_ep(tm); gasneti_assert(ep == gasnetc_ep0); gex_Rank_t jobrank = gasneti_e_tm_rank_to_jobrank(tm, rank); if (GASNETI_NBRHD_JOBRANK_IS_LOCAL(jobrank)) { @@ -4529,7 +4772,7 @@ int gasnetc_AMRequestMedium(gex_TM_t tm, gex_Rank_t rank, gex_AM_Index_t handler { int retval; gasneti_assert(tm); - gasnetc_EP_t ep = (gasnetc_EP_t)gasneti_import_ep(gex_TM_QueryEP(tm)); + gasnetc_EP_t ep = (gasnetc_EP_t) gasneti_e_tm_to_i_ep(tm); gasneti_assert(ep == gasnetc_ep0); gex_Rank_t jobrank = gasneti_e_tm_rank_to_jobrank(tm, rank); if (GASNETI_NBRHD_JOBRANK_IS_LOCAL(jobrank)) { @@ -4596,7 +4839,7 @@ int gasnetc_AMRequestLong( gex_TM_t tm, gex_Rank_t rank, gex_AM_Index_t handler { int retval; gasneti_assert(tm); - gasnetc_EP_t ep = (gasnetc_EP_t)gasneti_import_ep(gex_TM_QueryEP(tm)); + gasnetc_EP_t ep = (gasnetc_EP_t) gasneti_e_tm_to_i_ep(tm); gasneti_assert(ep == gasnetc_ep0); gex_Rank_t jobrank = gasneti_e_tm_rank_to_jobrank(tm, rank); if (GASNETI_NBRHD_JOBRANK_IS_LOCAL(jobrank)) { @@ -4856,9 +5099,11 @@ int gasnetc_AMReplyLong( gex_Token_t token, gex_AM_Index_t handler, // ---- NPAM common ---- -GASNETI_INLINE(gasnetc_prepare_medium) -int gasnetc_prepare_medium( +GASNETI_INLINE(gasnetc_prepare_common) +int gasnetc_prepare_common( gasneti_AM_SrcDesc_t sd, + gasneti_category_t category, + const int is_reply, gasnetc_cep_t *cep, const void *client_buf, size_t least_payload, @@ -4868,11 +5113,35 @@ int gasnetc_prepare_medium( unsigned int nargs GASNETI_THREAD_FARG) { - const size_t nbytes = MIN(most_payload, GASNETC_MAX_MEDIUM_(nargs)); - // See gasnetc_ReqRepGeneric() for details const int have_flow = gasnetc_atomic_read(&(cep)->am_flow.credit, 0) ? 1 : 0; - const size_t head_len = GASNETC_MSG_MED_ARGSEND(nargs + have_flow); + + size_t nbytes, head_len; + switch (category) { + #if GASNET_NATIVE_NP_ALLOC_REQ_MEDIUM || GASNET_NATIVE_NP_ALLOC_REP_MEDIUM + case gasneti_Medium: + nbytes = MIN(most_payload, GASNETC_MAX_MEDIUM_(nargs)); + head_len = GASNETC_MSG_MED_ARGSEND(nargs + have_flow); + break; + #endif + + #if GASNET_NATIVE_NP_ALLOC_REQ_LONG || GASNET_NATIVE_NP_ALLOC_REP_LONG + case gasneti_Long: { + #if GASNETC_PIN_SEGMENT + gasneti_static_assert(GASNETC_MAX_LONG_REQ == GASNETC_MAX_LONG_REP); + size_t limit = client_buf ? GASNETC_MAX_LONG_REQ : GASNETC_MAX_PACKEDLONG_(nargs); + #else + size_t limit = client_buf ? (is_reply ? GASNETC_MAX_LONG_REP : GASNETC_MAX_LONG_REQ) + : GASNETC_MAX_PACKEDLONG_(nargs); + #endif + nbytes = MIN(most_payload, limit); + head_len = GASNETC_MSG_LONG_ARGSEND(nargs + have_flow); + break; + } + #endif + + default: gasneti_unreachable_error(("Invalid AM category: 0x%x",(int)category)); + } // Obtain an appropriate buffer in which to build the message // If an inline send is to be used, the buffer is in the sd itself @@ -4891,10 +5160,15 @@ int gasnetc_prepare_medium( sd->_size = nbytes; sd->_buf_alloc = buf_alloc; sd->_have_flow = have_flow; + sd->_head_len = head_len; sd->_cep = cep; if (client_buf) { sd->_addr = (/*non-const*/void *)client_buf; - gasneti_leaf_finish(lc_opt); + #if GASNET_NATIVE_NP_ALLOC_REQ_MEDIUM || GASNET_NATIVE_NP_ALLOC_REP_MEDIUM + if (category == gasneti_Medium) { + gasneti_leaf_finish(lc_opt); // Commit always yields synchronous LC of Medium + } + #endif } else { sd->_gex_buf = sd->_addr = (void*)((uintptr_t)sd->_void_p + head_len); } @@ -4903,11 +5177,13 @@ int gasnetc_prepare_medium( } static -void gasnetc_commit_medium( +void gasnetc_commit_common( gasneti_AM_SrcDesc_t sd, + gasneti_category_t category, const int is_reply, gex_AM_Index_t handler, size_t nbytes, + void *dest_addr, unsigned int nargs, va_list argptr GASNETI_THREAD_FARG) @@ -4917,11 +5193,11 @@ void gasnetc_commit_medium( gasnetc_cb_t local_cb; gasnete_eop_t *eop = NULL; - int is_Fixed = sd->_gex_buf == NULL; + int is_cbuf = sd->_gex_buf == NULL; gex_Event_t *lc_opt = sd->_lc_opt; size_t copy_len = 0; // Length of payload to be copied (if any) size_t gath_len = 0; // Length of payload to be sent using gather-on-send (if any) - if (is_Fixed) { + if (is_cbuf) { gasneti_assert(lc_opt); if (gasneti_leaf_is_pointer(lc_opt)) { eop = _gasnete_eop_new(GASNETI_MYTHREAD); @@ -4940,28 +5216,55 @@ void gasnetc_commit_medium( local_cnt = &op->initiated_alc_cnt; local_cb = op->next ? gasnetc_cb_nar_alc : gasnetc_cb_iop_alc; } else { - gasneti_fatalerror("Invalid lc_opt argument to Prepare/Commit %sMedium", - is_reply?"Reply":"Request"); + gasneti_fatalerror("Invalid lc_opt argument to Prepare/Commit %s%s", + is_reply?"Reply":"Request", + (category==gasneti_Medium)?"Medium":"Long"); } - if (gasnetc_am_use_gather(sd->_ep, sd->_addr, nbytes, local_cb)) { - gath_len = nbytes; - } else { - copy_len = nbytes; + switch (category) { + #if GASNET_NATIVE_NP_ALLOC_REQ_MEDIUM || GASNET_NATIVE_NP_ALLOC_REP_MEDIUM + case gasneti_Medium: + if (gasnetc_am_use_gather(sd->_ep, sd->_addr, nbytes, local_cb)) { + gath_len = nbytes; + } else { + copy_len = nbytes; + } + break; + #endif + + #if GASNET_NATIVE_NP_ALLOC_REQ_LONG || GASNET_NATIVE_NP_ALLOC_REP_LONG + case gasneti_Long: + if ((nbytes <= gasnetc_packedlong_limit) || (!GASNETC_PIN_SEGMENT && is_reply)) { + // Small enough to send like a Medium (forced for Reply w/ firehose) + if (gasnetc_am_use_gather(sd->_ep, sd->_addr, nbytes, local_cb)) { + gath_len = nbytes; + } else { + copy_len = nbytes; + } + } else { + // Inject RMA + int rc = gasnetc_rdma_long_put(sd->_ep, sd->_cep, sd->_addr, dest_addr, nbytes, + /*imm*/0, local_cnt, local_cb GASNETI_THREAD_PASS); + gasneti_assert(!rc); // Never fails, since never "immediate" + } + break; + #endif + + default: gasneti_unreachable_error(("Invalid AM category: 0x%x",(int)category)); } } else { gasneti_assert(!lc_opt); local_cb = NULL; local_cnt = NULL; + // TODO: RDMA of Long payload can be beneficial copy_len = nbytes; } - size_t head_len = GASNETC_MSG_MED_ARGSEND(nargs + sd->_have_flow); gasnetc_am_commit( sd->_void_p, sd->_buf_alloc, - gasneti_Medium, is_reply, + category, is_reply, sd->_ep, sd->_cep, - handler, sd->_addr, nbytes, NULL, - head_len, copy_len, gath_len, - !is_Fixed, sd->_have_flow, nargs, + handler, sd->_addr, nbytes, dest_addr, + sd->_head_len, copy_len, gath_len, + !is_cbuf, sd->_have_flow, nargs, local_cnt, local_cb, NULL, argptr GASNETI_THREAD_PASS); @@ -4974,13 +5277,24 @@ void gasnetc_commit_medium( gasnete_eop_free(eop GASNETI_THREAD_PASS); } } else if (lc_opt == GEX_EVENT_NOW) { -#if 0 // Currently always synchronous LC when (local_cb == gasnetc_cb_counter) - /* block for local completion of payload transfer */ - gasnetc_counter_wait(&counter, 0 GASNETI_THREAD_PASS); -#else - gasneti_assert(counter.initiated == 0); - gasneti_assert(gasnetc_atomic_read(&counter.completed,0) == 0); -#endif + switch (category) { + #if GASNET_NATIVE_NP_ALLOC_REQ_MEDIUM || GASNET_NATIVE_NP_ALLOC_REP_MEDIUM + case gasneti_Medium: + // Currently always synchronous LC + gasneti_assert(counter.initiated == 0); + gasneti_assert(gasnetc_atomic_read(&counter.completed,0) == 0); + break; + #endif + + #if GASNET_NATIVE_NP_ALLOC_REQ_LONG || GASNET_NATIVE_NP_ALLOC_REP_LONG + case gasneti_Long: + // block for local completion of RDMA transfer, if needed + if (is_cbuf) gasnetc_counter_wait(&counter, is_reply GASNETI_THREAD_PASS); + break; + #endif + + default: gasneti_unreachable_error(("Invalid AM category: 0x%x",(int)category)); + } } } @@ -5009,7 +5323,7 @@ extern int gasnetc_AMRequestShortM( return retval; } -#if !GASNETC_HAVE_NP_REQ_MEDIUM +#if !GASNET_NATIVE_NP_ALLOC_REQ_MEDIUM extern int gasnetc_AMRequestMediumV( gex_TM_t tm, gex_Rank_t rank, gex_AM_Index_t handler, void *source_addr, size_t nbytes, @@ -5045,6 +5359,7 @@ extern int gasnetc_AMRequestMediumM( return (retval == GASNETC_FAIL_IMM); } +#if !GASNET_NATIVE_NP_ALLOC_REQ_LONG extern int gasnetc_AMRequestLongV( gex_TM_t tm, gex_Rank_t rank, gex_AM_Index_t handler, void *source_addr, size_t nbytes, void *dest_addr, @@ -5053,6 +5368,7 @@ extern int gasnetc_AMRequestLongV( { return gasnetc_AMRequestLong(tm,rank,handler,source_addr,nbytes,dest_addr,lc_opt,flags,numargs,argptr GASNETI_THREAD_PASS); } +#endif extern int gasnetc_AMRequestLongM( gex_TM_t tm,/* local context */ @@ -5082,7 +5398,7 @@ extern int gasnetc_AMRequestLongM( // ---- external NPAM requests ---- -#if GASNETC_HAVE_NP_REQ_MEDIUM +#if GASNET_NATIVE_NP_ALLOC_REQ_MEDIUM extern gex_AM_SrcDesc_t gasnetc_AM_PrepareRequestMedium( gex_TM_t tm, @@ -5095,11 +5411,12 @@ extern gex_AM_SrcDesc_t gasnetc_AM_PrepareRequestMedium( GASNETI_THREAD_FARG, unsigned int nargs) { + GASNETI_TRACE_PREP_REQUESTMEDIUM(tm,rank,client_buf,least_payload,most_payload,flags,nargs); + GASNETC_IMMEDIATE_MAYBE_POLL(flags); // Ensure at least one poll upon Request injection + gasneti_AM_SrcDesc_t sd = gasneti_init_request_srcdesc(GASNETI_THREAD_PASS_ALONE); GASNETI_COMMON_PREP_REQ(sd,tm,rank,client_buf,least_payload,most_payload,NULL,lc_opt,flags,nargs,Medium); - GASNETC_IMMEDIATE_MAYBE_POLL(flags); // Ensure at least one poll upon Request injection - flags &= ~(GEX_FLAG_AM_PREPARE_LEAST_CLIENT | GEX_FLAG_AM_PREPARE_LEAST_ALLOC); gex_Rank_t jobrank = gasneti_e_tm_rank_to_jobrank(tm, rank); @@ -5111,12 +5428,12 @@ extern gex_AM_SrcDesc_t gasnetc_AM_PrepareRequestMedium( } else { const gex_Flags_t immediate = flags & GEX_FLAG_IMMEDIATE; - gasnetc_EP_t ep = (gasnetc_EP_t)gasneti_import_tm(tm)->_ep; + gasnetc_EP_t ep = (gasnetc_EP_t) gasneti_e_tm_to_i_ep(tm); gasneti_assert(ep == gasnetc_ep0); gasnetc_cep_t *cep = gasnetc_am_select_cep(ep, jobrank); if (gasnetc_am_get_credit(ep, cep, immediate GASNETI_THREAD_PASS)) { goto out_immediate; - } else if (gasnetc_prepare_medium(sd, cep, client_buf, + } else if (gasnetc_prepare_common(sd, gasneti_Medium, 0, cep, client_buf, least_payload, most_payload, lc_opt, flags, nargs GASNETI_THREAD_PASS)) { @@ -5130,6 +5447,7 @@ extern gex_AM_SrcDesc_t gasnetc_AM_PrepareRequestMedium( } GASNETI_TRACE_PREP_RETURN(REQUEST_MEDIUM, sd); + GASNETI_CHECK_SD(client_buf, least_payload, most_payload, sd); return gasneti_export_srcdesc(sd); out_immediate: @@ -5155,14 +5473,100 @@ extern void gasnetc_AM_CommitRequestMediumM( if (sd->_is_nbrhd) { gasnetc_nbrhd_CommitRequest(sd, gasneti_Medium, handler, nbytes, NULL, argptr); } else { - gasnetc_commit_medium(sd,0,handler,nbytes,nargs,argptr GASNETI_THREAD_PASS); + gasnetc_commit_common(sd,gasneti_Medium,0,handler,nbytes,NULL,nargs,argptr GASNETI_THREAD_PASS); + } + va_end(argptr); + + gasneti_reset_srcdesc(sd); +} + +#endif // GASNET_NATIVE_NP_ALLOC_REQ_MEDIUM + +#if GASNET_NATIVE_NP_ALLOC_REQ_LONG + +extern gex_AM_SrcDesc_t gasnetc_AM_PrepareRequestLong( + gex_TM_t tm, + gex_Rank_t rank, + const void *client_buf, + size_t least_payload, + size_t most_payload, + void *dest_addr, + gex_Event_t *lc_opt, + gex_Flags_t flags + GASNETI_THREAD_FARG, + unsigned int nargs) +{ + GASNETI_TRACE_PREP_REQUESTLONG(tm,rank,client_buf,least_payload,most_payload,dest_addr,flags,nargs); + GASNETC_IMMEDIATE_MAYBE_POLL(flags); // Ensure at least one poll upon Request injection + + gasneti_AM_SrcDesc_t sd = gasneti_init_request_srcdesc(GASNETI_THREAD_PASS_ALONE); + GASNETI_COMMON_PREP_REQ(sd,tm,rank,client_buf,least_payload,most_payload,dest_addr,lc_opt,flags,nargs,Long); + + flags &= ~(GEX_FLAG_AM_PREPARE_LEAST_CLIENT | GEX_FLAG_AM_PREPARE_LEAST_ALLOC); + + gex_Rank_t jobrank = gasneti_e_tm_rank_to_jobrank(tm, rank); + + if (GASNETI_NBRHD_JOBRANK_IS_LOCAL(jobrank)) { + sd = gasnetc_nbrhd_PrepareRequest(sd, gasneti_Long, jobrank, + client_buf, least_payload, most_payload, + dest_addr, lc_opt, flags, nargs); + } else { + const gex_Flags_t immediate = flags & GEX_FLAG_IMMEDIATE; + + gasnetc_EP_t ep = (gasnetc_EP_t) gasneti_e_tm_to_i_ep(tm); + gasneti_assert(ep == gasnetc_ep0); + gasnetc_cep_t *cep = gasnetc_am_select_cep(ep, jobrank); + if (gasnetc_am_get_credit(ep, cep, immediate GASNETI_THREAD_PASS)) { + goto out_immediate; + } else if (gasnetc_prepare_common(sd, gasneti_Long, 0, cep, client_buf, + least_payload, most_payload, + lc_opt, flags, nargs + GASNETI_THREAD_PASS)) { + gasnetc_am_put_credit(cep); + goto out_immediate; + } else { + gasneti_init_sd_poison(sd); + sd->_is_nbrhd = 0; + sd->_ep = ep; + } + } + + GASNETI_TRACE_PREP_RETURN(REQUEST_LONG, sd); + GASNETI_CHECK_SD(client_buf, least_payload, most_payload, sd); + return gasneti_export_srcdesc(sd); + +out_immediate: + gasneti_assert(flags & GEX_FLAG_IMMEDIATE); + gasneti_reset_srcdesc(sd); + GASNETI_TRACE_PREP_RETURN(REQUEST_LONG, NULL); + return gasneti_export_srcdesc(NULL); // GEX_AM_SRCDESC_NO_OP +} + +extern void gasnetc_AM_CommitRequestLongM( + gex_AM_Index_t handler, + size_t nbytes, + void *dest_addr + GASNETI_THREAD_FARG, + unsigned int nargs, + gex_AM_SrcDesc_t sd_arg, ...) +{ + gasneti_AM_SrcDesc_t sd = gasneti_import_srcdesc(sd_arg); + + GASNETI_COMMON_COMMIT_REQ(sd,handler,nbytes,dest_addr,nargs,Long); + + va_list argptr; + va_start(argptr, sd_arg); + if (sd->_is_nbrhd) { + gasnetc_nbrhd_CommitRequest(sd, gasneti_Long, handler, nbytes, dest_addr, argptr); + } else { + gasnetc_commit_common(sd,gasneti_Long,0,handler,nbytes,dest_addr,nargs,argptr GASNETI_THREAD_PASS); } va_end(argptr); gasneti_reset_srcdesc(sd); } -#endif // GASNETC_HAVE_NP_REQ_MEDIUM +#endif // GASNET_NATIVE_NP_ALLOC_REQ_LONG // ---- external FPAM replies ---- @@ -5182,7 +5586,7 @@ extern int gasnetc_AMReplyShortM( return retval; } -#if !GASNETC_HAVE_NP_REP_MEDIUM +#if !GASNET_NATIVE_NP_ALLOC_REP_MEDIUM extern int gasnetc_AMReplyMediumV( gex_Token_t token, gex_AM_Index_t handler, void *source_addr, size_t nbytes, @@ -5211,6 +5615,7 @@ extern int gasnetc_AMReplyMediumM( return retval; } +#if !GASNET_NATIVE_NP_ALLOC_REP_LONG extern int gasnetc_AMReplyLongV( gex_Token_t token, gex_AM_Index_t handler, void *source_addr, size_t nbytes, void *dest_addr, @@ -5219,6 +5624,7 @@ extern int gasnetc_AMReplyLongV( { return gasnetc_AMReplyLong(token,handler,source_addr,nbytes,dest_addr,lc_opt,flags,numargs,argptr); } +#endif extern int gasnetc_AMReplyLongM( gex_Token_t token, /* token provided on handler entry */ @@ -5241,7 +5647,7 @@ extern int gasnetc_AMReplyLongM( // ---- external NPAM replies ---- -#if GASNETC_HAVE_NP_REP_MEDIUM +#if GASNET_NATIVE_NP_ALLOC_REP_MEDIUM extern gex_AM_SrcDesc_t gasnetc_AM_PrepareReplyMedium( gex_Token_t token, @@ -5252,6 +5658,8 @@ extern gex_AM_SrcDesc_t gasnetc_AM_PrepareReplyMedium( gex_Flags_t flags, unsigned int nargs) { + GASNETI_TRACE_PREP_REPLYMEDIUM(token,client_buf,least_payload,most_payload,flags,nargs); + gasneti_AM_SrcDesc_t sd; flags &= ~(GEX_FLAG_AM_PREPARE_LEAST_CLIENT | GEX_FLAG_AM_PREPARE_LEAST_ALLOC); @@ -5270,7 +5678,7 @@ extern gex_AM_SrcDesc_t gasnetc_AM_PrepareReplyMedium( sd = gasneti_init_reply_srcdesc(GASNETI_THREAD_PASS_ALONE); GASNETI_COMMON_PREP_REP(sd,token,client_buf,least_payload,most_payload,NULL,lc_opt,flags,nargs,Medium); - if (gasnetc_prepare_medium(sd, rbuf->cep, client_buf, + if (gasnetc_prepare_common(sd, gasneti_Medium, 1, rbuf->cep, client_buf, least_payload, most_payload, lc_opt, flags, nargs GASNETI_THREAD_PASS)) { @@ -5285,6 +5693,7 @@ extern gex_AM_SrcDesc_t gasnetc_AM_PrepareReplyMedium( } GASNETI_TRACE_PREP_RETURN(REPLY_MEDIUM, sd); + GASNETI_CHECK_SD(client_buf, least_payload, most_payload, sd); return gasneti_export_srcdesc(sd); } @@ -5304,14 +5713,91 @@ extern void gasnetc_AM_CommitReplyMediumM( gasnetc_nbrhd_CommitReply(sd, gasneti_Medium, handler, nbytes, NULL, argptr); } else { GASNET_POST_THREADINFO(sd->_thread); - gasnetc_commit_medium(sd,1,handler,nbytes,nargs,argptr GASNETI_THREAD_PASS); + gasnetc_commit_common(sd,gasneti_Medium,1,handler,nbytes,NULL,nargs,argptr GASNETI_THREAD_PASS); } va_end(argptr); gasneti_reset_srcdesc(sd); } -#endif // GASNETC_HAVE_NP_REP_MEDIUM +#endif // GASNET_NATIVE_NP_ALLOC_REP_MEDIUM + +#if GASNET_NATIVE_NP_ALLOC_REP_LONG + +extern gex_AM_SrcDesc_t gasnetc_AM_PrepareReplyLong( + gex_Token_t token, + const void *client_buf, + size_t least_payload, + size_t most_payload, + void *dest_addr, + gex_Event_t *lc_opt, + gex_Flags_t flags, + unsigned int nargs) +{ + GASNETI_TRACE_PREP_REPLYLONG(token,client_buf,least_payload,most_payload,dest_addr,flags,nargs); + + gasneti_AM_SrcDesc_t sd; + flags &= ~(GEX_FLAG_AM_PREPARE_LEAST_CLIENT | GEX_FLAG_AM_PREPARE_LEAST_ALLOC); + + if (gasnetc_token_in_nbrhd(token)) { + sd = gasnetc_nbrhd_PrepareReply(gasneti_Long, token, + client_buf, least_payload, most_payload, + dest_addr, lc_opt, flags, nargs); + } else { + gasnetc_rbuf_t *rbuf = (gasnetc_rbuf_t *)token; + gasneti_assert(rbuf); + gasneti_assert(rbuf->rbuf_handlerRunning); + gasneti_assert(GASNETC_MSG_ISREQUEST(rbuf->rbuf_flags)); + gasneti_assert(rbuf->rbuf_needReply); + GASNET_POST_THREADINFO(rbuf->rbuf_threadinfo); + + sd = gasneti_init_reply_srcdesc(GASNETI_THREAD_PASS_ALONE); + GASNETI_COMMON_PREP_REP(sd,token,client_buf,least_payload,most_payload,dest_addr,lc_opt,flags,nargs,Long); + + if (gasnetc_prepare_common(sd, gasneti_Long, 1, rbuf->cep, client_buf, + least_payload, most_payload, + lc_opt, flags, nargs + GASNETI_THREAD_PASS)) { + gasneti_reset_srcdesc(sd); + sd = NULL; // GEX_AM_SRCDESC_NO_OP + } else { + gasneti_init_sd_poison(sd); + sd->_is_nbrhd = 0; + sd->_ep = rbuf->rr_ep; + rbuf->rbuf_needReply = 0; + } + } + + GASNETI_TRACE_PREP_RETURN(REPLY_LONG, sd); + GASNETI_CHECK_SD(client_buf, least_payload, most_payload, sd); + return gasneti_export_srcdesc(sd); +} + +extern void gasnetc_AM_CommitReplyLongM( + gex_AM_Index_t handler, + size_t nbytes, + void *dest_addr, + unsigned int nargs, + gex_AM_SrcDesc_t sd_arg, ...) +{ + gasneti_AM_SrcDesc_t sd = gasneti_import_srcdesc(sd_arg); + + GASNETI_COMMON_COMMIT_REP(sd,handler,nbytes,dest_addr,nargs,Long); + + va_list argptr; + va_start(argptr, sd_arg); + if (sd->_is_nbrhd) { + gasnetc_nbrhd_CommitReply(sd, gasneti_Long, handler, nbytes, dest_addr, argptr); + } else { + GASNET_POST_THREADINFO(sd->_thread); + gasnetc_commit_common(sd,gasneti_Long,1,handler,nbytes,dest_addr,nargs,argptr GASNETI_THREAD_PASS); + } + va_end(argptr); + + gasneti_reset_srcdesc(sd); +} + +#endif // GASNET_NATIVE_NP_ALLOC_REP_LONG /* ------------------------------------------------------------------------------------ */ /* @@ -5391,9 +5877,7 @@ extern int gasnetc_hsl_trylock(gex_HSL_t *hsl) { (for internal conduit use in bootstrapping, job management, etc.) */ static gex_AM_Entry_t const gasnetc_handlers[] = { - #ifdef GASNETC_COMMON_HANDLERS - GASNETC_COMMON_HANDLERS(), - #endif + GASNETC_COMMON_HANDLERS(), /* ptr-width independent handlers */ gasneti_handler_tableentry_no_bits(gasnetc_exit_reduce_reqh,2,REQUEST,SHORT,0), diff --git a/third-party/gasnet/gasnet-src/ibv-conduit/gasnet_core.h b/third-party/gasnet/gasnet-src/ibv-conduit/gasnet_core.h index 952bdca3fd15..59f4781272cb 100644 --- a/third-party/gasnet/gasnet-src/ibv-conduit/gasnet_core.h +++ b/third-party/gasnet/gasnet-src/ibv-conduit/gasnet_core.h @@ -19,7 +19,7 @@ ============== */ -extern void gasnetc_exit(int exitcode) GASNETI_NORETURN; +extern void gasnetc_exit(int _exitcode) GASNETI_NORETURN; GASNETI_NORETURNP(gasnetc_exit) #define gasnet_exit gasnetc_exit @@ -32,31 +32,34 @@ GASNETI_NORETURNP(gasnetc_exit) #endif /* ------------------------------------------------------------------------------------ */ extern int gasnetc_Client_Init( - gex_Client_t *client_p, - gex_EP_t *ep_p, - gex_TM_t *tm_p, - const char *clientName, - int *argc, - char ***argv, - gex_Flags_t flags); + gex_Client_t *_client_p, + gex_EP_t *_ep_p, + gex_TM_t *_tm_p, + const char *_clientName, + int *_argc, + char ***_argv, + gex_Flags_t _flags); // gasnetex.h handles name-shifting of gex_Client_Init() extern int gasnetc_Segment_Attach( - gex_Segment_t *segment_p, - gex_TM_t tm, - uintptr_t length); + gex_Segment_t *_segment_p, + gex_TM_t _tm, + uintptr_t _length); #define gex_Segment_Attach gasnetc_Segment_Attach -extern int gasnetc_EP_Create( - gex_EP_t *ep_p, - gex_Client_t client, - gex_Flags_t flags); -#define gex_EP_Create gasnetc_EP_Create +extern int gasnetc_Segment_Create( + gex_Segment_t *_segment_p, + gex_Client_t _client, + gex_Addr_t _address, + uintptr_t _length, + gex_MK_t _kind, + gex_Flags_t _flags); +#define gex_Segment_Create gasnetc_Segment_Create extern int gasnetc_EP_RegisterHandlers( - gex_EP_t ep, - gex_AM_Entry_t *table, - size_t numentries); + gex_EP_t _ep, + gex_AM_Entry_t *_table, + size_t _numentries); #define gex_EP_RegisterHandlers gasnetc_EP_RegisterHandlers /* ------------------------------------------------------------------------------------ */ /* @@ -100,11 +103,11 @@ typedef struct { #define gex_HSL_Unlock(hsl) #define gex_HSL_Trylock(hsl) GASNET_OK #else - extern void gasnetc_hsl_init (gex_HSL_t *hsl); - extern void gasnetc_hsl_destroy(gex_HSL_t *hsl); - extern void gasnetc_hsl_lock (gex_HSL_t *hsl); - extern void gasnetc_hsl_unlock (gex_HSL_t *hsl); - extern int gasnetc_hsl_trylock(gex_HSL_t *hsl) GASNETI_WARN_UNUSED_RESULT; + extern void gasnetc_hsl_init (gex_HSL_t *_hsl); + extern void gasnetc_hsl_destroy(gex_HSL_t *_hsl); + extern void gasnetc_hsl_lock (gex_HSL_t *_hsl); + extern void gasnetc_hsl_unlock (gex_HSL_t *_hsl); + extern int gasnetc_hsl_trylock(gex_HSL_t *_hsl) GASNETI_WARN_UNUSED_RESULT; #define gex_HSL_Init gasnetc_hsl_init #define gex_HSL_Destroy gasnetc_hsl_destroy @@ -128,11 +131,18 @@ typedef struct { #define GASNETC_MAX_MEDIUM_(nargs) \ (GASNETC_BUFSZ - \ - GASNETI_ALIGNUP_NOASSERT(GASNETC_MEDIUM_HDRSZ + 4*(GASNETC_MAX_ARGS_EXTRA+nargs), \ + GASNETI_ALIGNUP_NOASSERT(GASNETC_MEDIUM_HDRSZ + 4*(GASNETC_MAX_ARGS_EXTRA+(nargs)), \ 8)) #define GASNETC_MAX_MEDIUM GASNETC_MAX_MEDIUM_(GASNETC_MAX_ARGS_USER) + #define GASNETC_MAX_LONG_REQ (0x7fffffff) -#define GASNETC_MAX_PACKEDLONG (GASNETC_BUFSZ - GASNETC_LONG_HDRSZ - 4*GASNETC_MAX_ARGS) + +#define GASNETC_MAX_PACKEDLONG_(nargs) \ + (GASNETC_BUFSZ - \ + GASNETI_ALIGNUP_NOASSERT(GASNETC_LONG_HDRSZ + 4*(GASNETC_MAX_ARGS_EXTRA+(nargs)), \ + 8)) +#define GASNETC_MAX_PACKEDLONG GASNETC_MAX_PACKEDLONG_(GASNETC_MAX_ARGS_USER) + #if GASNETC_PIN_SEGMENT #define GASNETC_MAX_LONG_REP GASNETC_MAX_LONG_REQ #else @@ -146,12 +156,28 @@ typedef struct { #define gex_AM_LUBReplyLong() ((size_t)GASNETC_MAX_LONG_REP) // TODO-EX: Medium sizes can be further improved upon for PSHM case -#define gasnetc_AM_MaxRequestMedium(tm,rank,lc_opt,flags,nargs) ((size_t)GASNETC_MAX_MEDIUM_(nargs)) -#define gasnetc_AM_MaxReplyMedium(tm,rank,lc_opt,flags,nargs) ((size_t)GASNETC_MAX_MEDIUM_(nargs)) -#define gasnetc_AM_MaxRequestLong(tm,rank,lc_opt,flags,nargs) ((size_t)GASNETC_MAX_LONG_REQ) -#define gasnetc_AM_MaxReplyLong(tm,rank,lc_opt,flags,nargs) ((size_t)GASNETC_MAX_LONG_REP) -#define gasnetc_Token_MaxReplyMedium(token,lc_opt,flags,nargs) ((size_t)GASNETC_MAX_MEDIUM_(nargs)) -#define gasnetc_Token_MaxReplyLong(token,lc_opt,flags,nargs) ((size_t)GASNETC_MAX_LONG_REP) +#define gasnetc_AM_MaxRequestMedium(tm,rank,lc_opt,flags,nargs) \ + (GASNETI_UNUSED_ARGS4(tm,rank,lc_opt,flags),(size_t)GASNETC_MAX_MEDIUM_(nargs)) +#define gasnetc_AM_MaxReplyMedium(tm,rank,lc_opt,flags,nargs) \ + (GASNETI_UNUSED_ARGS4(tm,rank,lc_opt,flags),(size_t)GASNETC_MAX_MEDIUM_(nargs)) +#define gasnetc_Token_MaxReplyMedium(token,lc_opt,flags,nargs) \ + (GASNETI_UNUSED_ARGS3(token,lc_opt,flags),(size_t)GASNETC_MAX_MEDIUM_(nargs)) + +#define gasnetc_AM_MaxRequestLong(tm,rank,lc_opt,flags,nargs) \ + (GASNETI_UNUSED_ARGS4(tm,rank,lc_opt,nargs), \ + ((flags) & GEX_FLAG_AM_PREPARE_LEAST_ALLOC \ + ? GASNETC_MAX_PACKEDLONG_(nargs) \ + : gex_AM_LUBRequestLong())) +#define gasnetc_AM_MaxReplyLong(tm,rank,lc_opt,flags,nargs) \ + (GASNETI_UNUSED_ARGS4(tm,rank,lc_opt,nargs), \ + ((flags) & GEX_FLAG_AM_PREPARE_LEAST_ALLOC \ + ? GASNETC_MAX_PACKEDLONG_(nargs) \ + : gex_AM_LUBReplyLong())) +#define gasnetc_Token_MaxReplyLong(token,lc_opt,flags,nargs) \ + (GASNETI_UNUSED_ARGS3(token,lc_opt,nargs), \ + ((flags) & GEX_FLAG_AM_PREPARE_LEAST_ALLOC \ + ? GASNETC_MAX_PACKEDLONG_(nargs) \ + : gex_AM_LUBReplyLong())) /* ------------------------------------------------------------------------------------ */ /* diff --git a/third-party/gasnet/gasnet-src/ibv-conduit/gasnet_core_connect.c b/third-party/gasnet/gasnet-src/ibv-conduit/gasnet_core_connect.c index 65e90ca7fa86..0db89a74d791 100644 --- a/third-party/gasnet/gasnet-src/ibv-conduit/gasnet_core_connect.c +++ b/third-party/gasnet/gasnet-src/ibv-conduit/gasnet_core_connect.c @@ -11,6 +11,7 @@ #include #include #include +#include // INT_MAX /* The following configuration cannot yet be overridden by environment variables. @@ -26,6 +27,11 @@ for((_cep) = (_conn_info)->cep, (_qpi) = 0; \ (_qpi) < gasnetc_alloc_qps; ++(_cep), ++(_qpi)) +// AMs need at most 2 scatter-gather entries when using gather-send for Medium +// or packed-Long, while Puts and Long payloads need as many as GASNETC_SND_SG. +// All Send Queues need to be able to accomodate either. +#define GASNETC_MAX_SEND_SGE MAX(2, GASNETC_SND_SG) + /* ------------------------------------------------------------------------------------ */ /* Global data */ @@ -177,8 +183,20 @@ typedef struct gasnetc_xrc_snd_qp_s { } gasnetc_xrc_snd_qp_t; static gasnetc_xrc_snd_qp_t *gasnetc_xrc_snd_qp = NULL; -#define GASNETC_NODE2SND_QP(_node) \ - (&gasnetc_xrc_snd_qp[gasneti_node2supernode(_node) * gasnetc_alloc_qps]) +#if GASNET_MAXNODES <= 65535 +static uint16_t *gasnetc_xrcd_map = NULL; +#else +static uint32_t *gasnetc_xrcd_map = NULL; +#endif +static int gasnetc_xrcd_simple; + +static gasnetc_xrc_snd_qp_t * +gasnetc_node2snd_qp(gex_Rank_t rank) { + if (!gasnetc_use_xrc) return NULL; + int idx = gasnetc_xrcd_simple ? gasneti_node2supernode(rank) : gasnetc_xrcd_map[rank]; + return gasnetc_xrc_snd_qp + (idx * gasnetc_alloc_qps); +} +#define GASNETC_NODE2SND_QP(rank) gasnetc_node2snd_qp(rank) static uint32_t *gasnetc_xrc_rcv_qpn = NULL; @@ -284,15 +302,26 @@ gasnetc_xrc_modify_qp( /* XXX: Requires that at least the first call is collective */ static char* -gasnetc_xrc_tmpname(uint16_t mylid, int index) { +gasnetc_xrc_tmpname(uint16_t mylid, int index, int domain) { static const char *tmpdir = NULL; static int tmpdir_len = -1; static pid_t pid; - static const char pattern[] = "/GASNETxrc-%04x%01x-%06x"; /* Max 11 + 5 + 1 + 6 + 1 = 24 */ - const int filename_len = 24; + static const char pattern[] = "/GASNETxrc-%04x%01x%02x-%06x"; /* Max 11 + 7 + 1 + 6 + 1 = 26 */ + const int filename_len = 26; char *filename; - gasneti_assert(index >= 0 && index <= 16); + // At most 16 HCAs per process + gasneti_assert_always(index >= 0); + gasneti_assert_always_uint(index ,<, 16); + + // At most 256 XRC domains per supernode + // Worst case for `n` HCAs per host with `r` open per process is `C(n,r)` possible + // domains per host, where `C()` is "combinations of n pick r" = `n! / ( n! * (n-r)! )`. + // The maximum over `r` occurs at `r = floor(n/2)`. + // For n=10 this yields 252 possible domains, meaning that with current encoding, + // we are assured of handling any configuration with no more than ten HCAs per host. + gasneti_assert_always(domain >= 0); + gasneti_assert_always_uint(domain ,<, 256); /* Initialize tmpdir and pid only on first call */ if (!tmpdir) { @@ -313,13 +342,123 @@ gasnetc_xrc_tmpname(uint16_t mylid, int index) { pattern, (unsigned int)(mylid & 0xffff), (unsigned int)(index & 0xf), + (unsigned int)(domain & 0xf), (unsigned int)(pid & 0xffffff)); gasneti_assert(strlen(filename) < (tmpdir_len + filename_len)); return filename; } -/* Create an XRC domain per HCA (once per supernode) and a shared RCV QPN table */ +// XRD domain (xrcd) info +static int gasnetc_xrcd_global_count; // Number of XRC domains in the entire job +static int gasnetc_xrcd_local_count; // Number of XRC domains in my supernode +static int gasnetc_xrcd_local_rank; // Rank of my XRC domain within my suprnode +static int gasnetc_xrcd_iam_leader; // Boolean, true in exactly one proc per XRC domain + +// qsort comparison fn +static const uint16_t *gasnetc_xrc_remote_lids; +static int _gasnetc_xrc_compare_keys(gex_Rank_t a_r, gex_Rank_t b_r) { + // Primary key is supernode + int a_s = gasneti_node2supernode(a_r); + int b_s = gasneti_node2supernode(b_r); + int result = (a_s - b_s); + if (result) return result; + + // Secondary key is the array of gasnetc_num_ports lids + return memcmp(gasnetc_xrc_remote_lids + a_r * gasnetc_num_ports, + gasnetc_xrc_remote_lids + b_r * gasnetc_num_ports, + sizeof(uint16_t) * gasnetc_num_ports); +} +static int _gasnetc_xrc_compare_fn(const void *a_p, const void *b_p) { + gasneti_static_assert(GASNET_MAXNODES < INT_MAX); + gex_Rank_t a_r = *(gex_Rank_t *)a_p; + gex_Rank_t b_r = *(gex_Rank_t *)b_p; + + // Compare the keys + int result = _gasnetc_xrc_compare_keys(a_r, b_r); + if (result) return result; + + // tie-break using the rank itself + return (int)a_r - (int)b_r; +} + +// Compute XRC domain mebership +// Return size of shared memory required for its management +extern size_t +gasnetc_xrc_preinit(const uint16_t *remote_lids) { + // Map the xrc domains, where each is a unique (supernode, lids[]) tuple + // We cannot map by just lids[] due to GASNET_SUPERNODE_MAX (or GASNETI_PSHM_MAX_NODES) + // We don't form the actual keys in memory, and instead just permute an array of ranks + gasnetc_xrc_remote_lids = remote_lids; + gex_Rank_t *map = gasneti_malloc(gasneti_nodes * sizeof(gex_Rank_t)); + for (gex_Rank_t i = 0; i < gasneti_nodes; ++i) map[i] = i; + qsort(map, gasneti_nodes, sizeof(gex_Rank_t), _gasnetc_xrc_compare_fn); + + // Allocate gasnetc_xrcd_map[], which is mapping from rank to a global + // xrc domain number, used to index into gasnetc_xrc_snd_qp[]. + // This will move to shared memory after PSHM has been initialized + if (!gasneti_mysupernode.node_rank) { + gasnetc_xrcd_map = gasneti_malloc(gasneti_nodes * sizeof(*gasnetc_xrcd_map)); + } + + // Make a single pass over the sorted array to do the following: + // + populate gasnetc_xrcd_map[] (one per supernode) + // + count the number of xrc domains (distinct keys) globally + // + count the number of xrc domains local to this supernode + // + find which of the local xrc domains I belong to + // + determine if I am the leader (lowest ranked member) of my xrc domain + // Use the _gasnetc_xrc_compare_keys() for sanity + gasnetc_xrcd_global_count = 0; + gasnetc_xrcd_local_count = 0; + gasnetc_xrcd_iam_leader = 0; + for (gex_Rank_t i = 0; i < gasneti_nodes; ++i) { + gex_Rank_t curr = map[i]; + if (!i || _gasnetc_xrc_compare_keys(curr, map[i-1])) { // First instance of this key + ++gasnetc_xrcd_global_count; + if (gasneti_node2supernode(curr) == gasneti_mysupernode.grp_rank) { // in local supernode + ++gasnetc_xrcd_local_count; + } + if (curr == gasneti_mynode) { + gasnetc_xrcd_iam_leader = 1; + } + } + if (curr == gasneti_mynode) { + gasnetc_xrcd_local_rank = gasnetc_xrcd_local_count - 1; + } + if (gasnetc_xrcd_map) { + gasnetc_xrcd_map[curr] = gasnetc_xrcd_global_count - 1; + } + } + gasneti_free(map); + + // Do we have the simple case of one XRC domain per supernode? + gasnetc_xrcd_simple = (gasnetc_xrcd_global_count == gasneti_mysupernode.grp_count); + if (gasnetc_xrcd_simple) { + gasneti_assert_int(gasnetc_xrcd_local_rank ,==, 0); + gasneti_assert_int(gasnetc_xrcd_local_count ,==, 1); + if (gasnetc_xrcd_map) { + gasneti_free(gasnetc_xrcd_map); + gasnetc_xrcd_map = NULL; + } + } + + GASNETI_TRACE_PRINTF(I, ("Identified %d XRC domains globaly%s", + gasnetc_xrcd_global_count, + gasnetc_xrcd_simple?", one per supernode (simple case)":"")); + GASNETI_TRACE_PRINTF(I, ("I am %s of XRC domain %d of %d within my supernode", + gasnetc_xrcd_iam_leader?"the leader":"a member", + gasnetc_xrcd_local_rank, gasnetc_xrcd_local_count)); + + // *May* need a single gasnetc_xrcd_map[]... + size_t xrcd_map_bytes = gasnetc_xrcd_simple ? 0 : (gasneti_nodes * sizeof(*gasnetc_xrcd_map)); + // ... plus a full gasnetc_xrc_rcv_qpn[] per local xrc domain... + size_t xrc_rcv_qpn_bytes = gasneti_nodes * gasnetc_alloc_qps * sizeof(uint32_t); + // .. and we cache pad each + return GASNETI_ALIGNUP(xrcd_map_bytes, GASNETI_CACHE_LINE_BYTES) + + GASNETI_ALIGNUP(xrc_rcv_qpn_bytes * gasnetc_xrcd_local_count, GASNETI_CACHE_LINE_BYTES); +} + +/* Create an XRC domain per HCA and a shared RCV QPN table */ /* XXX: Requires that the call is collective */ extern int gasnetc_xrc_init(void **shared_mem_p) { @@ -327,10 +466,22 @@ gasnetc_xrc_init(void **shared_mem_p) { char *filename[GASNETC_IB_MAX_HCAS]; int index, fd; + if (! gasnetc_xrcd_simple) { + // We lack 1-to-1 correspondence between supernode and XRC domains, + // but at least we can share a single gasnetc_xrcd_map[] per supernode. + size_t xrcd_map_bytes = gasneti_nodes * sizeof(*gasnetc_xrcd_map); + if (gasnetc_xrcd_map) { // built once per supernode in preinit + memcpy(*shared_mem_p, gasnetc_xrcd_map, xrcd_map_bytes); + gasneti_free(gasnetc_xrcd_map); + } + gasnetc_xrcd_map = *shared_mem_p; + *shared_mem_p = (void *)GASNETI_ALIGNUP((uintptr_t)(*shared_mem_p) + xrcd_map_bytes, GASNETI_CACHE_LINE_BYTES); + } + /* Use per-supernode filename to create common XRC domain once per HCA */ GASNETC_FOR_ALL_HCA_INDEX(index) { gasnetc_hca_t *hca = &gasnetc_hca[index]; - filename[index] = gasnetc_xrc_tmpname(mylid, index); + filename[index] = gasnetc_xrc_tmpname(mylid, index, gasnetc_xrcd_local_rank); fd = open(filename[index], O_CREAT, S_IWUSR|S_IRUSR); if (fd < 0) { gasneti_fatalerror("failed to create xrc domain file '%s': %d:%s", filename[index], errno, strerror(errno)); @@ -355,13 +506,15 @@ gasnetc_xrc_init(void **shared_mem_p) { (void) close(fd); } - /* Place RCV QPN table in shared memory */ - gasnetc_xrc_rcv_qpn = (uint32_t *)(*shared_mem_p); - size_t count = gasneti_nodes * gasnetc_alloc_qps; - if (!gasneti_pshm_mynode) { - gasneti_pshm_prefault(gasnetc_xrc_rcv_qpn, count * sizeof(uint32_t)); + /* Place RCV QPN table in shared memory at per-domain offset */ + uint32_t *xrc_shared_mem = *shared_mem_p; + size_t domain_elems = gasneti_nodes * gasnetc_alloc_qps; + gasnetc_xrc_rcv_qpn = xrc_shared_mem + (gasnetc_xrcd_local_rank * domain_elems); + if (gasnetc_xrcd_iam_leader) { + gasneti_pshm_prefault(gasnetc_xrc_rcv_qpn, domain_elems * sizeof(uint32_t)); } - *shared_mem_p = (void *)GASNETI_ALIGNUP(gasnetc_xrc_rcv_qpn + count, GASNETI_CACHE_LINE_BYTES); + size_t total_elems = gasnetc_xrcd_local_count * domain_elems; + *shared_mem_p = (void *)GASNETI_ALIGNUP(xrc_shared_mem + total_elems, GASNETI_CACHE_LINE_BYTES); /* Clean up once everyone is done w/ all files, and RCV QPN table is prefaulted */ gasneti_pshmnet_bootstrapBarrier(); @@ -370,7 +523,7 @@ gasnetc_xrc_init(void **shared_mem_p) { } /* Allocate SND QP table */ - gasnetc_xrc_snd_qp = gasneti_calloc(gasneti_nodemap_global_count * gasnetc_alloc_qps, + gasnetc_xrc_snd_qp = gasneti_calloc(gasnetc_xrcd_global_count * gasnetc_alloc_qps, sizeof(gasnetc_xrc_snd_qp_t)); gasneti_leak(gasnetc_xrc_snd_qp); @@ -382,9 +535,6 @@ gasnetc_xrc_init(void **shared_mem_p) { Returns NULL for cases that should not have any connection */ static const gasnetc_port_info_t * gasnetc_select_port(gex_Rank_t node, int qpi) { - if (GASNETI_NBRHD_JOBRANK_IS_LOCAL(node)) { - return NULL; - } if (GASNETC_QPI_IS_REQ(qpi)) { /* Second half of table (if any) duplicates first half. */ qpi -= gasnetc_num_qps; @@ -430,7 +580,7 @@ gasnetc_setup_ports(gasnetc_conn_info_t *conn_info) /* Create and destroy QPs to determine the inline data limit */ static void -gasnetc_check_inline_limit(int port_num, int send_wr, int send_sge) +gasnetc_check_inline_limit(int port_num, int send_wr) { const gasnetc_port_info_t *port = &gasnetc_port_tbl[port_num]; gasnetc_hca_t *hca = &gasnetc_hca[port->hca_index]; @@ -445,7 +595,7 @@ gasnetc_check_inline_limit(int port_num, int send_wr, int send_sge) qp_init_attr.cap.max_send_wr = send_wr; qp_init_attr.cap.max_recv_wr = gasnetc_use_srq ? 0 : gasnetc_am_oust_pp * 2; - qp_init_attr.cap.max_send_sge = send_sge; + qp_init_attr.cap.max_send_sge = GASNETC_MAX_SEND_SGE; qp_init_attr.cap.max_recv_sge = 1; qp_init_attr.qp_context = NULL; /* XXX: Can/should we use this? */ #if GASNETC_IBV_XRC_OFED @@ -526,7 +676,7 @@ gasnetc_qp_create(gasnetc_conn_info_t *conn_info) qp_init_attr.cap.max_inline_data = gasnetc_inline_limit; qp_init_attr.cap.max_send_wr = max_send_wr; qp_init_attr.cap.max_recv_wr = max_recv_wr; - qp_init_attr.cap.max_send_sge = GASNETC_SND_SG; + qp_init_attr.cap.max_send_sge = GASNETC_MAX_SEND_SGE; qp_init_attr.cap.max_recv_sge = 1; qp_init_attr.qp_context = NULL; /* XXX: Can/should we use this? */ #if GASNETC_IBV_XRC_OFED @@ -562,11 +712,11 @@ gasnetc_qp_create(gasnetc_conn_info_t *conn_info) if (GASNETC_QPI_IS_REQ(qpi)) { qp_init_attr.srq = hca->rqst_srq; qp_init_attr.cap.max_send_wr = gasnetc_am_oust_pp; - qp_init_attr.cap.max_send_sge = 1; /* only AMs on this QP */ + qp_init_attr.cap.max_send_sge = GASNETC_MAX_SEND_SGE; } else { qp_init_attr.srq = hca->repl_srq; qp_init_attr.cap.max_send_wr = gasnetc_op_oust_pp; - qp_init_attr.cap.max_send_sge = GASNETC_SND_SG; + qp_init_attr.cap.max_send_sge = GASNETC_MAX_SEND_SGE; } cep->srq = qp_init_attr.srq; max_send_wr = qp_init_attr.cap.max_send_wr; @@ -623,7 +773,7 @@ gasnetc_qp_create(gasnetc_conn_info_t *conn_info) /* Advance QP state from RESET to INIT */ static int -gasnetc_qp_reset2init(gasnetc_conn_info_t *conn_info) +gasnetc_qp_reset2init(gasnetc_conn_info_t *conn_info, int active) { const gex_Rank_t node = conn_info->node; struct ibv_qp_attr qp_attr; @@ -653,7 +803,7 @@ gasnetc_qp_reset2init(gasnetc_conn_info_t *conn_info) qp_attr.pkey_index = port->pkey_index; #if GASNETC_IBV_XRC - if (gasnetc_use_xrc) { + if (gasnetc_use_xrc && active) { rc = gasnetc_xrc_modify_qp(cep, &qp_attr, qp_mask); GASNETC_IBV_CHECK(rc, "from gasnetc_xrc_modify_qp(INIT)" GASNETC_XRC_HELP_MSG); } @@ -675,7 +825,7 @@ gasnetc_qp_reset2init(gasnetc_conn_info_t *conn_info) /* Advance QP state from INIT to RTR */ static int -gasnetc_qp_init2rtr(gasnetc_conn_info_t *conn_info) +gasnetc_qp_init2rtr(gasnetc_conn_info_t *conn_info, int active) { const gex_Rank_t node = conn_info->node; struct ibv_qp_attr qp_attr; @@ -711,8 +861,10 @@ gasnetc_qp_init2rtr(gasnetc_conn_info_t *conn_info) #if GASNETC_IBV_XRC if (gasnetc_use_xrc) { - rc = gasnetc_xrc_modify_qp(cep, &qp_attr, qp_mask); - GASNETC_IBV_CHECK(rc, "from gasnetc_xrc_modify_qp(RTR)" GASNETC_XRC_HELP_MSG); + if (active) { + rc = gasnetc_xrc_modify_qp(cep, &qp_attr, qp_mask); + GASNETC_IBV_CHECK(rc, "from gasnetc_xrc_modify_qp(RTR)" GASNETC_XRC_HELP_MSG); + } /* The normal QP will connect, below, to the peer's XRC rcv QP */ qp_attr.dest_qp_num = conn_info->remote_xrc_qpn[qpi]; @@ -1696,7 +1848,6 @@ extern gasnetc_cep_t * gasnetc_connect_to(gasnetc_EP_t ep, gex_Rank_t node) { gasnetc_cep_t *result = NULL; - gasneti_assert(ep == gasnetc_ep0); // TODO: multi-EP support gasneti_mutex_lock(&gasnetc_conn_tbl_lock); do { @@ -1710,7 +1861,7 @@ gasnetc_connect_to(gasnetc_EP_t ep, gex_Rank_t node) conn->start_active = 1; #endif - if_pf (node >= gasneti_nodes || GASNETI_NBRHD_JOBRANK_IS_LOCAL(node)) { + if_pf (node >= gasneti_nodes) { gasneti_fatalerror("Connection requested to invalid node %d", (int)node); break; } @@ -1720,7 +1871,7 @@ gasnetc_connect_to(gasnetc_EP_t ep, gex_Rank_t node) conn_send_req(conn, GASNETC_CONN_IS_ORIG); - (void) gasnetc_qp_reset2init(&conn->info); + (void) gasnetc_qp_reset2init(&conn->info, 1); gasnetc_timed_conn_wait(conn, GASNETC_CONN_STATE_REQ_SENT, &conn_send_req); if ((conn->state == GASNETC_CONN_STATE_REP_SENT) || @@ -1731,12 +1882,16 @@ gasnetc_connect_to(gasnetc_EP_t ep, gex_Rank_t node) } gasneti_assert(conn->state == GASNETC_CONN_STATE_REP_RCVD); - (void) gasnetc_qp_init2rtr(&conn->info); + (void) gasnetc_qp_init2rtr(&conn->info, 1); gasneti_sync_writes(); /* "finalize" cep data */ GASNETC_NODE2CEP(ep, node) = conn->info.cep; conn->state = GASNETC_CONN_STATE_RTU_SENT; - conn_send_rtu(conn, GASNETC_CONN_IS_ORIG); + if (node == gasneti_mynode) { + conn->state = GASNETC_CONN_STATE_ACK_RCVD; + } else { + conn_send_rtu(conn, GASNETC_CONN_IS_ORIG); + } gasnetc_sndrcv_attach_peer(node, conn->info.cep); (void) gasnetc_qp_rtr2rts(&conn->info); @@ -1899,9 +2054,9 @@ gasnetc_conn_rcv_wc(struct ibv_wc *comp) /* Advance QP state, overlapped w/ network round-trip (if any) and remote work: */ if (conn->state == GASNETC_CONN_STATE_NONE) { - (void) gasnetc_qp_reset2init(&conn->info); + (void) gasnetc_qp_reset2init(&conn->info, 1); } - (void) gasnetc_qp_init2rtr(&conn->info); + (void) gasnetc_qp_init2rtr(&conn->info, 1); gasnetc_sndrcv_attach_peer(node, conn->info.cep); (void) gasnetc_qp_rtr2rts(&conn->info); (void) gasnetc_set_sq_sema(&conn->info); @@ -2112,7 +2267,7 @@ gasnetc_connect_static(gasnetc_EP_t ep) uint32_t *xrc_remote_srq_num = NULL; #endif gex_Rank_t node; - gex_Rank_t static_nodes = gasnetc_remote_nodes; + gex_Rank_t static_nodes = gasneti_nodes; #if GASNETC_IBV_XRC gex_Rank_t static_supernodes = gasneti_nodemap_global_count - 1; #endif @@ -2146,7 +2301,7 @@ gasnetc_connect_static(gasnetc_EP_t ep) { uint8_t *transposed_mask = gasneti_malloc(gasneti_nodes * sizeof(uint8_t)); gasneti_bootstrapAlltoall(peer_mask, sizeof(uint8_t), transposed_mask); for (static_nodes = node = 0; node < gasneti_nodes; ++node) { - peer_mask[node] = !GASNETI_NBRHD_JOBRANK_IS_LOCAL(node) && (peer_mask[node] || transposed_mask[node]); + peer_mask[node] = (peer_mask[node] || transposed_mask[node]); gasneti_assert((peer_mask[node] == 0) || (peer_mask[node] == 1)); static_nodes += peer_mask[node]; } @@ -2169,8 +2324,7 @@ gasnetc_connect_static(gasnetc_EP_t ep) } } - #define GASNETC_IS_REMOTE_NODE(_node) \ - (peer_mask ? peer_mask[_node] : !GASNETI_NBRHD_JOBRANK_IS_LOCAL(_node)) + #define GASNETC_IS_REMOTE_NODE(_node) (peer_mask ? peer_mask[_node] : 1) #define GASNETC_FOR_EACH_REMOTE_NODE(_node) \ for ((_node) = 0; (_node) < gasneti_nodes; ++(_node)) \ @@ -2253,6 +2407,12 @@ gasnetc_connect_static(gasnetc_EP_t ep) gasneti_bootstrapAlltoall(local_qpn, gasnetc_alloc_qps*sizeof(uint32_t), remote_qpn); /* Advance state RESET -> INIT -> RTR. */ + // One active process per XRC domain is sufficent (more just slow things down). +#if GASNETC_IBV_XRC + const int active = gasnetc_xrcd_iam_leader || !gasnetc_use_xrc; +#else + const int active = 1; +#endif GASNETC_FOR_EACH_REMOTE_NODE(node) { i = node * gasnetc_alloc_qps; conn_info[node].remote_qpn = &remote_qpn[i]; @@ -2261,8 +2421,8 @@ gasnetc_connect_static(gasnetc_EP_t ep) conn_info[node].xrc_remote_srq_num = &xrc_remote_srq_num[i]; #endif - (void)gasnetc_qp_reset2init(&conn_info[node]); - (void)gasnetc_qp_init2rtr(&conn_info[node]); + (void)gasnetc_qp_reset2init(&conn_info[node], active); + (void)gasnetc_qp_init2rtr(&conn_info[node], active); } /* QPs must reach RTS before we may continue @@ -2309,11 +2469,6 @@ gasnetc_connect_init(gasnetc_EP_t ep0) memset(ep0->cep_table, 0, size); } - if_pf (!gasnetc_remote_nodes) { - GASNETI_TRACE_PRINTF(I, ("No connection setup since there are no remote nodes")); - return GASNET_OK; - } - #if GASNETC_DYNAMIC_CONNECT /* Parse connection related env vars */ #if GASNET_DEBUG @@ -2367,10 +2522,10 @@ gasnetc_connect_init(gasnetc_EP_t ep0) int i; for (i = 0; i < gasnetc_num_ports; ++i) { - gasnetc_check_inline_limit(i, gasnetc_op_oust_pp, GASNETC_SND_SG); + gasnetc_check_inline_limit(i, gasnetc_op_oust_pp); if (gasnetc_use_srq) { /* Corresponds to a Request QP */ - gasnetc_check_inline_limit(i, gasnetc_am_oust_pp, 1); + gasnetc_check_inline_limit(i, gasnetc_am_oust_pp); } } @@ -2389,10 +2544,10 @@ gasnetc_connect_init(gasnetc_EP_t ep0) /* Create static connections unless disabled */ if (do_static) { gex_Rank_t static_nodes = gasnetc_connect_static(ep0); - fully_connected = (static_nodes == gasnetc_remote_nodes); + fully_connected = (static_nodes == gasneti_nodes); GASNETI_TRACE_PRINTF(I, ("%s connected at startup to %d of %d remote nodes", fully_connected ? "Fully" : "Partially", - (int)static_nodes, (int)gasnetc_remote_nodes)); + (int)static_nodes, (int)gasneti_nodes)); } else { GASNETI_TRACE_PRINTF(I, ("Static connection at startup has been disabled at user request")); } @@ -2564,7 +2719,7 @@ gasnetc_connect_fini(gasnetc_EP_t ep0) } } if (fd >= 0) dump_conn_done(fd); - GASNETI_TRACE_PRINTF(C, ("Network traffic sent to %d of %d remote nodes", (int)count, (int)gasnetc_remote_nodes)); + GASNETI_TRACE_PRINTF(C, ("Network traffic sent to %d of %d ranks", (int)count, (int)gasneti_nodes)); return GASNET_OK; } /* gasnetc_connect_fini */ diff --git a/third-party/gasnet/gasnet-src/ibv-conduit/gasnet_core_fwd.h b/third-party/gasnet/gasnet-src/ibv-conduit/gasnet_core_fwd.h index 2c0d2d2c465a..036ee51bea78 100644 --- a/third-party/gasnet/gasnet-src/ibv-conduit/gasnet_core_fwd.h +++ b/third-party/gasnet/gasnet-src/ibv-conduit/gasnet_core_fwd.h @@ -15,7 +15,7 @@ #error "VAPI-conduit is no longer supported" #endif -#define GASNET_CORE_VERSION 2.5 +#define GASNET_CORE_VERSION 2.7 #define GASNET_CORE_VERSION_STR _STRINGIFY(GASNET_CORE_VERSION) #define GASNET_CORE_NAME IBV #define GASNET_CORE_NAME_STR _STRINGIFY(GASNET_CORE_NAME) @@ -23,6 +23,12 @@ #define GASNET_CONDUIT_NAME_STR _STRINGIFY(GASNET_CONDUIT_NAME) #define GASNET_CONDUIT_IBV 1 +#if defined(GASNET_SEGMENT_FAST) + #define GASNETC_PIN_SEGMENT 1 +#else + #define GASNETC_PIN_SEGMENT 0 +#endif + // Size of a buffer to contain any AM with all its header, padding and payload #define GASNETC_BUFSZ GASNETC_IBV_MAX_MEDIUM @@ -53,6 +59,9 @@ #define GASNETI_SUPPORTS_OUTOFSEGMENT_PUTGET 1 #endif + // uncomment for each MK_CLASS which the conduit supports. leave commented otherwise +#define GASNET_HAVE_MK_CLASS_CUDA_UVA (GASNETI_MK_CLASS_CUDA_UVA_ENABLED && GASNET_SEGMENT_FAST) + /* conduits should define GASNETI_CONDUIT_THREADS to 1 if they have one or more "private" threads which may be used to run AM handlers, even under GASNET_SEQ this ensures locking is still done correctly, etc @@ -81,22 +90,27 @@ your conduit must provide the V-suffixed functions for any of these that are not defined. */ -#define GASNETC_HAVE_NP_REQ_MEDIUM 1 -#define GASNETC_HAVE_NP_REP_MEDIUM 1 -/* #define GASNETC_HAVE_NP_REQ_LONG 1 */ -/* #define GASNETC_HAVE_NP_REP_LONG 1 */ +#define GASNET_NATIVE_NP_ALLOC_REQ_MEDIUM 1 +#define GASNET_NATIVE_NP_ALLOC_REP_MEDIUM 1 +#if GASNETC_PIN_SEGMENT +#define GASNET_NATIVE_NP_ALLOC_REQ_LONG 1 +#define GASNET_NATIVE_NP_ALLOC_REP_LONG 1 +#endif - /* uncomment for each GASNETC_HAVE_NP_* enabled above if the Commit function + /* uncomment for each GASNET_NATIVE_NP_ALLOC_* enabled above if the Commit function has the numargs argument even in an NDEBUG build (it is always passed in DEBUG builds). */ #define GASNETC_AM_COMMIT_REQ_MEDIUM_NARGS 1 #define GASNETC_AM_COMMIT_REP_MEDIUM_NARGS 1 -//#define GASNETC_AM_COMMIT_REQ_LONG_NARGS 1 -//#define GASNETC_AM_COMMIT_REP_LONG_NARGS 1 +#if GASNETC_PIN_SEGMENT +#define GASNETC_AM_COMMIT_REQ_LONG_NARGS 1 +#define GASNETC_AM_COMMIT_REP_LONG_NARGS 1 +#endif #define GASNETI_AM_SRCDESC_EXTRA \ int _have_flow; \ + int _head_len; \ void * _buf_alloc; \ void * _cep; \ void * _ep; \ @@ -106,10 +120,64 @@ include a call to gasneti_AMPoll (or equivalent) for progress. The preferred implementation is to Poll only in the M-suffixed calls and not the V-suffixed calls (and GASNETC_REQUESTV_POLLS undefined). - Used if (and only if) any of the GASNETC_HAVE_NP_* values above are unset. + Used if (and only if) any of the GASNET_NATIVE_NP_ALLOC_* values above are unset. */ /* #define GASNETC_REQUESTV_POLLS 1 */ + // uncomment if conduit provides a gasnetc-prefixed override + // TODO: this should be a hook rather than an override +#if GASNETC_PIN_SEGMENT + #define GASNETC_HAVE_EP_PUBLISHBOUNDSEGMENT 1 +#endif + + /* If your conduit uses conduit-specific extensions to the basic object + types, then define the corresponding SIZEOF macros below to return + the total length of the conduit-specific object, including the prefix + portion which must be the matching GASNETI_[OBJECT]_COMMON fields. + Similarly, *_HOOK macros should be defined as callbacks to perform + conduit-specific initialization and finalization tasks, if any. + If a given SIZEOF macro is defined, but the corresponding INIT_HOOK is + not, then space beyond the COMMON fields will be zero-initialized. + In all cases, GASNETC_[OBJECT]_EXTRA_DECLS provides the place to + provide necessary declarations (since this file is included very early). + */ + +//#define GASNETC_CLIENT_EXTRA_DECLS (###) +//#define GASNETC_CLIENT_INIT_HOOK(i_client) (###) +//#define GASNETC_CLIENT_FINI_HOOK(i_client) (###) +//#define GASNETC_SIZEOF_CLIENT_T() (###) + +#define GASNETC_SEGMENT_EXTRA_DECLS \ + extern size_t gasnetc_sizeof_segment_t(void); +//#define GASNETC_SEGMENT_INIT_HOOK(i_segment) (###) +//#define GASNETC_SEGMENT_FINI_HOOK(i_segment) (###) +#define GASNETC_SIZEOF_SEGMENT_T() \ + gasnetc_sizeof_segment_t() + +//#define GASNETC_TM_EXTRA_DECLS (###) +//#define GASNETC_TM_INIT_HOOK(i_tm) (###) +//#define GASNETC_TM_FINI_HOOK(i_tm) (###) +//#define GASNETC_SIZEOF_TM_T() (###) + +#define GASNETC_EP_EXTRA_DECLS \ + extern size_t gasnetc_sizeof_ep_t(void); \ + extern int gasnetc_ep_init_hook(gasneti_EP_t); +#define GASNETC_EP_INIT_HOOK(i_ep) \ + gasnetc_ep_init_hook(i_ep) +//#define GASNETC_EP_FINI_HOOK(i_ep) (###) +#define GASNETC_SIZEOF_EP_T() \ + gasnetc_sizeof_ep_t() + +#if GASNETC_PIN_SEGMENT // multi-EP NOT supported with remote firehose +// If conduit supports GASNET_MAXEPS!=1, set default and (optional) max values here. +// Leaving GASNETC_MAXEPS_DFLT unset will result in GASNET_MAXEPS=1, independent +// of all other settings (appropriate for conduits without multi-ep support). +// If set, GASNETC_MAXEPS_MAX it is used to limit a user's --with-maxeps (and a +// global default limit is used otherwise). +#define GASNETC_MAXEPS_DFLT 33 // Initial (limited) multi-EP support +//#define GASNETC_MAXEPS_MAX ### // leave unset for default +#endif + /* this can be used to add conduit-specific statistical collection values (see gasnet_trace.h) */ #define GASNETC_CONDUIT_STATS(CNT,VAL,TIME) \ @@ -160,11 +228,11 @@ VAL(C, FIREHOSE_UNPIN, pages) #define GASNETC_FATALSIGNAL_CALLBACK(sig) gasnetc_fatalsignal_callback(sig) - extern void gasnetc_fatalsignal_callback(int sig); + extern void gasnetc_fatalsignal_callback(int _sig); #if GASNETC_IBV_ODP #define GASNETC_FATALSIGNAL_CLEANUP_CALLBACK(sig) gasnetc_fatalsignal_cleanup_callback(sig) - extern void gasnetc_fatalsignal_cleanup_callback(int sig); + extern void gasnetc_fatalsignal_cleanup_callback(int _sig); #endif #if PLATFORM_OS_DARWIN && !GASNET_SEQ diff --git a/third-party/gasnet/gasnet-src/ibv-conduit/gasnet_core_help.h b/third-party/gasnet/gasnet-src/ibv-conduit/gasnet_core_help.h index 142028277d30..a751a74d63a9 100644 --- a/third-party/gasnet/gasnet-src/ibv-conduit/gasnet_core_help.h +++ b/third-party/gasnet/gasnet-src/ibv-conduit/gasnet_core_help.h @@ -20,10 +20,4 @@ extern int gasnetc_pthread_create(gasneti_pthread_create_fn_t *, pthread_t *, const pthread_attr_t *, void *(*)(void *), void *); #endif -#if defined(GASNET_SEGMENT_FAST) - #define GASNETC_PIN_SEGMENT 1 -#else - #define GASNETC_PIN_SEGMENT 0 -#endif - #endif diff --git a/third-party/gasnet/gasnet-src/ibv-conduit/gasnet_core_internal.h b/third-party/gasnet/gasnet-src/ibv-conduit/gasnet_core_internal.h index a4042cb316d6..1d53478f766d 100644 --- a/third-party/gasnet/gasnet-src/ibv-conduit/gasnet_core_internal.h +++ b/third-party/gasnet/gasnet-src/ibv-conduit/gasnet_core_internal.h @@ -10,7 +10,12 @@ #include #include -#if GASNETC_IBV_ODP +// This establishes precedence in one central place +#if GASNETC_IBV_ODP_MLNX && GASNETC_IBV_ODP_CORE + #undef GASNETC_IBV_ODP_MLNX +#endif + +#if GASNETC_IBV_ODP_MLNX #define GASNETI_NEED_VERBS_EXP_H 1 #endif @@ -74,7 +79,7 @@ extern gasneti_atomic_t gasnetc_exit_running; * ENOSPC from ibv_create_cq() after a few thousand tests have run. * So, we will make a best-effort to at least destroy QPs and CQs. */ -#if PLATFORM_OS_SOLARIS || GASNET_DEBUG +#if PLATFORM_OS_SOLARIS || GASNETC_IBV_QUIESCE #define GASNETC_IBV_SHUTDOWN 1 #endif @@ -83,7 +88,7 @@ extern gasneti_atomic_t gasnetc_exit_running; * These are registered early and are available even before _attach() */ #define _hidx_gasnetc_ack 0 /* Special case */ -#define _hidx_gasnetc_exchg_reqh (GASNETC_HANDLER_BASE+0) +#define _hidx_gasnetc_hbarr_reqh (GASNETC_HANDLER_BASE+0) #define _hidx_gasnetc_exit_reduce_reqh (GASNETC_HANDLER_BASE+1) #define _hidx_gasnetc_exit_role_reqh (GASNETC_HANDLER_BASE+2) #define _hidx_gasnetc_exit_role_reph (GASNETC_HANDLER_BASE+3) @@ -195,8 +200,10 @@ typedef struct { int32_t nBytes; gex_AM_Arg_t args[GASNETC_MAX_ARGS]; } gasnetc_longmsg_t; -#define GASNETC_MSG_LONG_ARGSEND(nargs) GASNETC_ARGSEND_AUX(gasnetc_longmsg_t,nargs) -#define GASNETC_MSG_LONG_DATA(msg,nargs) (void *)(&msg->longmsg.args[(unsigned int)nargs]) +#define GASNETC_MSG_LONG_ARGSEND(nargs) /* Note 8-byte alignment for payload */ \ + GASNETI_ALIGNUP(GASNETC_ARGSEND_AUX(gasnetc_longmsg_t,nargs), 8) +#define GASNETC_MSG_LONG_DATA(msg,nargs) \ + ((void *)((uintptr_t)(msg) + GASNETC_MSG_LONG_ARGSEND(nargs))) typedef union { uint8_t raw[GASNETC_BUFSZ]; @@ -367,7 +374,12 @@ void gasnetc_counter_wait(gasnetc_counter_t *counter, int handler_context GASNET #if (GASNETC_IB_MAX_HCAS > 1) #define GASNETC_FOR_ALL_HCA_INDEX(h) for (h = 0; h < gasnetc_num_hcas; ++h) #define GASNETC_FOR_ALL_HCA(p) for (p = &gasnetc_hca[0]; p < &gasnetc_hca[gasnetc_num_hcas]; ++p) +#else + #define GASNETC_FOR_ALL_HCA_INDEX(h) for (h = 0; h < 1; ++h) + #define GASNETC_FOR_ALL_HCA(p) for (p = &gasnetc_hca[0]; p < &gasnetc_hca[1]; ++p) +#endif +#if GASNETC_IBV_MAX_HCAS_CONFIGURE // Includes multi-rail support w/ 1 HCA // Need a couple cache lines for dummy AMO accesses extern gasneti_auxseg_request_t gasnetc_fence_auxseg_alloc(gasnet_seginfo_t *auxseg_info); #define GASNETC_AUXSEG_FNS() gasnetc_fence_auxseg_alloc, @@ -375,11 +387,10 @@ void gasnetc_counter_wait(gasnetc_counter_t *counter, int handler_context GASNET // Use AMO after Put to fence for strict memory model adherence extern int gasnetc_use_fenced_puts; #define GASNETC_USE_FENCED_PUTS gasnetc_use_fenced_puts + #define GASNETC_HAVE_FENCED_PUTS 1 #else - #define GASNETC_FOR_ALL_HCA_INDEX(h) for (h = 0; h < 1; ++h) - #define GASNETC_FOR_ALL_HCA(p) for (p = &gasnetc_hca[0]; p < &gasnetc_hca[1]; ++p) - #define GASNETC_USE_FENCED_PUTS 0 + #undef GASNETC_HAVE_FENCED_PUTS #endif /* ------------------------------------------------------------------------------------ */ @@ -551,14 +562,10 @@ extern gasnetc_EP_t gasnetc_ep0; typedef struct gasnetc_Segment_t_ { GASNETI_SEGMENT_COMMON // conduit-indep part as prefix - int idx; // location in segment table - #if GASNETC_PIN_SEGMENT // memory registation info (per-HCA) uint32_t seg_lkey[GASNETC_IB_MAX_HCAS]; - #if GASNETC_IBV_SHUTDOWN - gasnetc_memreg_t seg_reg[GASNETC_IB_MAX_HCAS]; - #endif + gasnetc_memreg_t seg_reg[GASNETC_IB_MAX_HCAS]; #endif } *gasnetc_Segment_t; @@ -769,6 +776,7 @@ typedef union { /* Routines in gasnet_core_connect.c */ #if GASNETC_IBV_XRC +extern size_t gasnetc_xrc_preinit(const uint16_t *remote_lids); extern int gasnetc_xrc_init(void **shared_mem_p); #endif extern int gasnetc_connect_init(gasnetc_EP_t ep0); // TODO-EX: multi-ep support? @@ -917,6 +925,9 @@ extern size_t gasnetc_am_inline_limit_sndrcv; extern enum ibv_mtu gasnetc_max_mtu; extern int gasnetc_qp_timeout; extern int gasnetc_qp_retry_count; +#if GASNETC_PIN_SEGMENT + extern uint32_t *gasnetc_np_rkeys[GASNET_MAXEPS]; +#endif #if GASNETC_IBV_SRQ extern int gasnetc_rbuf_limit; @@ -944,16 +955,11 @@ extern gasnetc_hca_t gasnetc_hca[GASNETC_IB_MAX_HCAS]; extern uintptr_t gasnetc_max_msg_sz; extern size_t gasnetc_put_stripe_sz, gasnetc_put_stripe_split; extern size_t gasnetc_get_stripe_sz, gasnetc_get_stripe_split; -#if GASNETC_PIN_SEGMENT - extern uintptr_t gasnetc_seg_start; - extern uintptr_t gasnetc_seg_len; -#endif extern size_t gasnetc_fh_align; extern size_t gasnetc_fh_align_mask; extern firehose_info_t gasnetc_firehose_info; extern gasnetc_port_info_t *gasnetc_port_tbl; extern int gasnetc_num_ports; -extern gex_Rank_t gasnetc_remote_nodes; #if GASNETC_DYNAMIC_CONNECT extern gasnetc_sema_t gasnetc_zero_sema; #endif diff --git a/third-party/gasnet/gasnet-src/ibv-conduit/gasnet_core_sndrcv.c b/third-party/gasnet/gasnet-src/ibv-conduit/gasnet_core_sndrcv.c index d3b95b50880a..9dafd32cac9d 100644 --- a/third-party/gasnet/gasnet-src/ibv-conduit/gasnet_core_sndrcv.c +++ b/third-party/gasnet/gasnet-src/ibv-conduit/gasnet_core_sndrcv.c @@ -63,7 +63,13 @@ int gasnetc_am_credits_slack; int gasnetc_am_credits_slack_orig; int gasnetc_alloc_qps; int gasnetc_num_qps; -gex_Rank_t gasnetc_remote_nodes = 0; + +#if GASNETC_PIN_SEGMENT + // Rkeys for non-primordial remote EPs + // One dense array per ep_index, allocated lazily + // TODO: more scalable storage (bug 4088) + uint32_t *gasnetc_np_rkeys[GASNET_MAXEPS] = {NULL, }; +#endif /* ------------------------------------------------------------------------------------ * * File-scoped types * @@ -76,7 +82,7 @@ typedef struct { /* Thread-local list of sreq's. */ gasnetc_sreq_t *sreqs; - /* Nothing else yet, but lockfree algorithms for x84_64 and ia64 will also need + /* Nothing else yet, but lockfree algorithms for (at least) x84_64 will also need * some thread-local data if they are ever implemented. */ } gasnetc_per_thread_t; @@ -84,7 +90,7 @@ typedef struct { * File-scoped variables * ------------------------------------------------------------------------------------ */ -#if GASNETC_IB_MAX_HCAS > 1 +#if GASNETC_HAVE_FENCED_PUTS static int gasnetc_op_needs_fence_mask; #endif @@ -167,7 +173,7 @@ extern void gasnetc_cb_counter_rel(gasnetc_atomic_val_t *cnt) { } -#if GASNETC_IB_MAX_HCAS > 1 +#if GASNETC_HAVE_FENCED_PUTS /* ------------------------------------------------------------------------------------ * * AuxSeg space for dummy Atomic ops used to fence multi-rail Puts * TODO: this use of auxseg is yet another an O(ranks) table we must seek to eliminate @@ -324,14 +330,25 @@ gasnetc_create_cq(struct ibv_context * hca_hndl, int req_size, #define GASNETC_FH_RKEY(_cep, _fhptr) ((_fhptr)->client.rkey[GASNETC_HCA_IDX(_cep)]) #define GASNETC_FH_LKEY(_cep, _fhptr) ((_fhptr)->client.lkey[GASNETC_HCA_IDX(_cep)]) -// TODO-EX: following functions are a hack for auxseg keys, and will be -// replaced with more general multi-registration support later. +// TODO-EX: following functions are a hack for multi-segment keys, which +// should be replaced with more general multi-registration support later. #if GASNETC_PIN_SEGMENT + // idx = -1 is aux segment + // idx = 0 is primordial segment + // idx > 0 is non-primordial segment GASNETI_INLINE(gasnetc_seg_rkey) - uint32_t gasnetc_seg_rkey(gasnetc_cep_t *cep, int is_aux) + uint32_t gasnetc_seg_rkey(gasnetc_cep_t *cep, int idx) { - return ( ! is_aux ) ? GASNETC_SEG_RKEY(cep) - : cep->hca->aux_rkeys[gasnetc_epid2node(cep->epid)]; + if (!idx) { + return GASNETC_SEG_RKEY(cep); + } else if (idx < 0) { + return cep->hca->aux_rkeys[gasnetc_epid2node(cep->epid)]; + } else { + gasneti_assume(idx < GASNET_MAXEPS); + gasneti_assert(gasnetc_np_rkeys[idx]); + size_t offset = gasnetc_num_hcas * gasnetc_epid2node(cep->epid) + GASNETC_HCA_IDX(cep); + return gasnetc_np_rkeys[idx][offset]; + } } GASNETI_INLINE(gasnetc_seg_lkey) uint32_t gasnetc_seg_lkey(gasnetc_EP_t ep, gasnetc_cep_t *cep, int is_aux) @@ -385,8 +402,9 @@ void gasnetc_rcv_post(gasnetc_cep_t *cep, gasnetc_rbuf_t *rbuf) { gasneti_assert(cep); gasneti_assert(rbuf); - /* check for attempted loopback traffic */ - gasneti_assert(!GASNETI_NBRHD_JOBRANK_IS_LOCAL(gasnetc_epid2node(cep->epid))); + // In the absence of SRQ, check for attempted intra-nbrhd traffic + // With SRQ, however, initialization occurs via the first cep per HCA, which maybe in-nbrhd + gasneti_assert(gasnetc_use_srq || !GASNETI_NBRHD_JOBRANK_IS_LOCAL(gasnetc_epid2node(cep->epid))); rbuf->cep = cep; rbuf->rr_sg.lkey = GASNETC_RCV_LKEY(cep); @@ -516,7 +534,7 @@ void gasnetc_processPacket(gasnetc_cep_t *cep, gasnetc_rbuf_t *rbuf, uint32_t fl size_t nbytes = buf->longmsg.nBytes & 0x7fffffff; if (buf->longmsg.nBytes & 0x80000000) { /* Must relocate the payload which is packed like a Medium. */ - gasneti_assert(nbytes <= GASNETC_MAX_PACKEDLONG); + gasneti_assert(nbytes <= GASNETC_MAX_PACKEDLONG_(user_numargs)); GASNETI_MEMCPY(data, GASNETC_MSG_LONG_DATA(buf, full_numargs), (size_t)nbytes); } GASNETI_RUN_HANDLER_LONG(isreq,handler_id,handler_fn,token,args,user_numargs,data,(size_t)nbytes); @@ -544,8 +562,8 @@ void gasnetc_processPacket(gasnetc_cep_t *cep, gasnetc_rbuf_t *rbuf, uint32_t fl } \ } while(0) #define GASNETC_COLLECT_FHS() do { \ - gasneti_assert(sreq->fh_count >= 0); \ - gasneti_assert(sreq->fh_count <= GASNETC_MAX_FH); \ + gasneti_assert_int(sreq->fh_count ,>=, 0); \ + gasneti_assert_int(sreq->fh_count ,<=, GASNETC_MAX_FH); \ for (i=0; ifh_count; ++i, ++fh_num) { \ fh_ptrs[fh_num] = sreq->fh_ptr[i]; \ } \ @@ -565,9 +583,9 @@ void gasnetc_processPacket(gasnetc_cep_t *cep, gasnetc_rbuf_t *rbuf, uint32_t fl } while(0) #define GASNETC_FREE_BBUFS() do {} while (0) #define GASNETC_COLLECT_FHS() do { \ - gasneti_assert(sreq->fh_count >= 0); \ + gasneti_assert_int(sreq->fh_count ,>=, 0); \ if (sreq->fh_count > 0) { \ - gasneti_assert(sreq->fh_count <= GASNETC_MAX_FH); \ + gasneti_assert_int(sreq->fh_count ,<=, GASNETC_MAX_FH); \ firehose_release(sreq->fh_ptr, sreq->fh_count); \ } \ } while(0) @@ -729,7 +747,7 @@ static int gasnetc_snd_reap(int limit) { sreq->comp.cb(sreq->comp.data); } #if GASNETC_PIN_SEGMENT - gasneti_assert(sreq->fh_count == 0); + gasneti_assert_int(sreq->fh_count ,==, 0); #else GASNETC_COLLECT_FHS(); #endif @@ -1216,7 +1234,6 @@ void gasnetc_snd_validate(gasnetc_sreq_t *sreq, struct ibv_send_wr *sr_desc, int gasneti_assert(sreq); gasneti_assert(sreq->cep); - gasneti_assert(!GASNETI_NBRHD_JOBRANK_IS_LOCAL(gasnetc_epid2node(sreq->cep->epid))); gasneti_assert(sr_desc); gasneti_assert(sr_desc->num_sge >= 1); gasneti_assert(sr_desc->num_sge <= GASNETC_SND_SG); @@ -1341,7 +1358,6 @@ void gasnetc_snd_post_common(gasnetc_sreq_t *sreq, struct ibv_send_wr *sr_desc, /* Must be bound to a qp by now */ gasneti_assert(cep != NULL ); - gasneti_assert(!GASNETI_NBRHD_JOBRANK_IS_LOCAL(gasnetc_epid2node(sreq->epid))); gasneti_assert(sreq->opcode != GASNETC_OP_FREE); gasneti_assert(sreq->opcode != GASNETC_OP_INVALID); @@ -1372,7 +1388,7 @@ void gasnetc_snd_post_common(gasnetc_sreq_t *sreq, struct ibv_send_wr *sr_desc, gasnetc_snd_validate(sreq, sr_desc, 1, "POST_SR"); } -#if GASNETC_IB_MAX_HCAS > 1 +#if GASNETC_HAVE_FENCED_PUTS // When GASNET_USE_FENCED_PUTS is enabled, we must post both the Put and an // Atomic such that the conduit-level remote completion callback for the Put // will not execute until the ibv-level CQE for the Atomic. @@ -1489,7 +1505,7 @@ static void gasnetc_rcv_thread(struct ibv_wc *comp_p, void *arg) GASNETI_INLINE(gasnetc_bounce_common) void gasnetc_bounce_common( gasnetc_EP_t ep, gasnetc_epid_t epid, - int rem_auxseg, + int rem_epidx, struct ibv_send_wr *sr_desc, size_t len, gasnetc_sreq_t *sreq, @@ -1502,7 +1518,7 @@ void gasnetc_bounce_common( sr_desc->sg_list[0].length = len; gasnetc_cep_t *cep = gasnetc_bind_cep(ep, epid, sreq); - sr_desc->wr.rdma.rkey = gasnetc_seg_rkey(cep, rem_auxseg); + sr_desc->wr.rdma.rkey = gasnetc_seg_rkey(cep, rem_epidx); sr_desc->sg_list[0].lkey = GASNETC_SND_LKEY(cep); gasnetc_snd_post(sreq, sr_desc); @@ -1516,7 +1532,7 @@ void gasnetc_bounce_common( GASNETI_INLINE(gasnetc_zerocp_common) size_t gasnetc_zerocp_common( gasnetc_EP_t ep, gasnetc_epid_t epid, - int rem_auxseg, + int rem_epidx, struct ibv_send_wr *sr_desc, size_t len, gasnetc_sreq_t *sreq, @@ -1553,7 +1569,6 @@ size_t gasnetc_zerocp_common( size_t sent = 0; // TODO-EX: - // All uses of loc_auxseg are a temporary hack // This will be replaced by general multi-registration support later const int loc_auxseg = gasneti_in_local_auxsegment((gasneti_EP_t)ep, (void*)loc_addr, len); @@ -1599,7 +1614,7 @@ size_t gasnetc_zerocp_common( /* We hold a local firehose already, we can only 'try' or risk deadlock */ fh_loc = gasnetc_fh_try_local_pin(loc_addr, 1); } - gasneti_assert(sreq->fh_count > 0); + gasneti_assert_int(sreq->fh_count ,>, 0); sr_desc->num_sge = sreq->fh_count; cep = gasnetc_bind_cep(ep, epid, sreq); for (seg = 0; seg < sr_desc->num_sge; ++seg) { @@ -1609,7 +1624,7 @@ size_t gasnetc_zerocp_common( sent = len - remain; } - sr_desc->wr.rdma.rkey = gasnetc_seg_rkey(cep, rem_auxseg); + sr_desc->wr.rdma.rkey = gasnetc_seg_rkey(cep, rem_epidx); gasneti_assert(sent > 0); return sent; @@ -1619,7 +1634,7 @@ size_t gasnetc_zerocp_common( GASNETI_INLINE(gasnetc_do_put_inline) void gasnetc_do_put_inline( gasnetc_EP_t ep, const gasnetc_epid_t epid, - int rem_auxseg, + int rem_epidx, struct ibv_send_wr *sr_desc, size_t nbytes, gasnetc_atomic_val_t *remote_cnt, gasnetc_cb_t remote_cb @@ -1631,6 +1646,7 @@ void gasnetc_do_put_inline( gasneti_assert(nbytes != 0); gasneti_assert(nbytes <= gasnetc_inline_limit); + gasneti_assert(gasneti_i_segment_kind_is_host(ep->_segment)); sreq = gasnetc_get_sreq(GASNETC_OP_PUT_INLINE GASNETI_THREAD_PASS); sreq->fh_count = 0; @@ -1645,7 +1661,7 @@ void gasnetc_do_put_inline( sr_desc->sg_list[0].length = nbytes; cep = gasnetc_bind_cep(ep, epid, sreq); - sr_desc->wr.rdma.rkey = gasnetc_seg_rkey(cep, rem_auxseg); + sr_desc->wr.rdma.rkey = gasnetc_seg_rkey(cep, rem_epidx); gasnetc_snd_post_inline(sreq, sr_desc); sr_desc->wr.rdma.remote_addr += nbytes; @@ -1656,7 +1672,7 @@ void gasnetc_do_put_inline( GASNETI_INLINE(gasnetc_do_put_bounce) void gasnetc_do_put_bounce( gasnetc_EP_t ep, const gasnetc_epid_t epid, - int rem_auxseg, + int rem_epidx, struct ibv_send_wr *sr_desc, size_t nbytes, gasnetc_atomic_val_t *remote_cnt, gasnetc_cb_t remote_cb @@ -1669,6 +1685,7 @@ void gasnetc_do_put_bounce( : GASNETC_OP_PUT_BOUNCE; gasneti_assert(nbytes != 0); + gasneti_assert(gasneti_i_segment_kind_is_host(ep->_segment)); do { gasnetc_sreq_t * const sreq = gasnetc_get_sreq(sreq_op GASNETI_THREAD_PASS); @@ -1682,7 +1699,7 @@ void gasnetc_do_put_bounce( sreq->comp.cb = remote_cb; } - gasnetc_bounce_common(ep, epid, rem_auxseg, sr_desc, count, sreq, IBV_WR_RDMA_WRITE GASNETI_THREAD_PASS); + gasnetc_bounce_common(ep, epid, rem_epidx, sr_desc, count, sreq, IBV_WR_RDMA_WRITE GASNETI_THREAD_PASS); src += count; nbytes -= count; @@ -1695,7 +1712,7 @@ void gasnetc_do_put_bounce( GASNETI_INLINE(gasnetc_do_put_zerocp) size_t gasnetc_do_put_zerocp( gasnetc_EP_t ep, const gasnetc_epid_t epid, - int rem_auxseg, + int rem_epidx, struct ibv_send_wr *sr_desc, size_t nbytes, gasnetc_atomic_val_t *cnt, gasnetc_cb_t cb @@ -1712,7 +1729,7 @@ size_t gasnetc_do_put_zerocp( // loop over max-length xfers do { gasnetc_sreq_t * const sreq = gasnetc_get_sreq(sreq_op GASNETI_THREAD_PASS); - size_t count = gasnetc_zerocp_common(ep, epid, rem_auxseg, sr_desc, nbytes, sreq, + size_t count = gasnetc_zerocp_common(ep, epid, rem_epidx, sr_desc, nbytes, sreq, IBV_WR_RDMA_WRITE GASNETI_THREAD_PASS); if_pf (!count) { // Failed to register memory, such as for read-only memory (bug 3338) @@ -1743,7 +1760,7 @@ size_t gasnetc_do_put_zerocp( GASNETI_INLINE(gasnetc_do_get_bounce) void gasnetc_do_get_bounce( gasnetc_EP_t ep, const gasnetc_epid_t epid, - int rem_auxseg, + int rem_epidx, struct ibv_send_wr *sr_desc, size_t nbytes, gasnetc_atomic_val_t *remote_cnt, gasnetc_cb_t remote_cb @@ -1754,6 +1771,7 @@ void gasnetc_do_get_bounce( gasneti_assert(nbytes != 0); gasneti_assert(remote_cnt != NULL); + gasneti_assert(gasneti_i_segment_kind_is_host(ep->_segment)); do { gasnetc_sreq_t * const sreq = gasnetc_get_sreq(GASNETC_OP_GET_BOUNCE GASNETI_THREAD_PASS); @@ -1768,7 +1786,7 @@ void gasnetc_do_get_bounce( sreq->comp.cb = remote_cb; sreq->comp.data = remote_cnt; - gasnetc_bounce_common(ep, epid, rem_auxseg, sr_desc, count, sreq, IBV_WR_RDMA_READ GASNETI_THREAD_PASS); + gasnetc_bounce_common(ep, epid, rem_epidx, sr_desc, count, sreq, IBV_WR_RDMA_READ GASNETI_THREAD_PASS); dst += count; } while (nbytes); @@ -1782,7 +1800,7 @@ void gasnetc_do_get_bounce( GASNETI_INLINE(gasnetc_do_get_zerocp) void gasnetc_do_get_zerocp( gasnetc_EP_t ep, const gasnetc_epid_t epid, - int rem_auxseg, + int rem_epidx, struct ibv_send_wr *sr_desc, size_t nbytes, gasnetc_atomic_val_t *remote_cnt, gasnetc_cb_t remote_cb @@ -1796,7 +1814,7 @@ void gasnetc_do_get_zerocp( // loop over max-length xfers do { gasnetc_sreq_t * const sreq = gasnetc_get_sreq(GASNETC_OP_GET_ZEROCP GASNETI_THREAD_PASS); - size_t count = gasnetc_zerocp_common(ep, epid, rem_auxseg, sr_desc, nbytes, sreq, + size_t count = gasnetc_zerocp_common(ep, epid, rem_epidx, sr_desc, nbytes, sreq, IBV_WR_RDMA_READ GASNETI_THREAD_PASS); if_pf (!count) { // TODO: idealy we could retry memory registration to tolerate transient read-only @@ -1927,8 +1945,8 @@ void gasnetc_fh_post(gasnetc_sreq_t *sreq, enum ibv_wr_opcode op GASNETI_THREAD_ size_t remain; int i; - gasneti_assert(sreq->fh_count >= 2); - gasneti_assert(sreq->fh_count <= GASNETC_MAX_FH); + gasneti_assert_int(sreq->fh_count ,>=, 2); + gasneti_assert_int(sreq->fh_count ,<=, GASNETC_MAX_FH); gasneti_assert(sreq->fh_ptr[0] != NULL); gasneti_assert(sreq->fh_ptr[1] != NULL); @@ -1975,7 +1993,7 @@ static void gasnetc_fh_do_put(gasnetc_sreq_t *sreq GASNETI_THREAD_FARG) { if (sreq->comp.cb != NULL) { sreq->comp.cb(sreq->comp.data); } - gasneti_assert(sreq->fh_count > 0); + gasneti_assert_int(sreq->fh_count ,>, 0); firehose_release(sreq->fh_ptr, sreq->fh_count); sreq->opcode = GASNETC_OP_FREE; break; @@ -2317,8 +2335,6 @@ extern int gasnetc_sndrcv_limits(void) { gasnetc_rbuf_spares = MAX(1,gasneti_getenv_int_withdefault("GASNET_RBUF_SPARES", threads, 0)); } - gasnetc_remote_nodes = gasneti_nodes - (GASNET_PSHM ? gasneti_nodemap_local_count : 1); - /* Count normal qps to be placed on each HCA */ if (gasneti_nodes == 1) { GASNETC_FOR_ALL_HCA(hca) { @@ -2333,7 +2349,7 @@ extern int gasnetc_sndrcv_limits(void) { for (i = 0; i < gasnetc_num_qps; ++i) { hca = &gasnetc_hca[gasnetc_port_tbl[i % gasnetc_num_ports].hca_index]; hca->qps += 1; - hca->max_qps += gasnetc_remote_nodes; + hca->max_qps += gasneti_nodes; } } @@ -2354,7 +2370,7 @@ extern int gasnetc_sndrcv_limits(void) { } } gasnetc_op_oust_pp /= gasnetc_num_qps; - gasnetc_op_oust_per_qp = MIN(gasnetc_op_oust_per_qp, gasnetc_op_oust_pp*(gasneti_nodes-1)); + gasnetc_op_oust_per_qp = MIN(gasnetc_op_oust_per_qp, gasnetc_op_oust_pp*gasneti_nodes); gasnetc_op_oust_limit = gasnetc_num_qps * gasnetc_op_oust_per_qp; GASNETI_TRACE_PRINTF(I, ("Final/effective GASNET_NETWORKDEPTH_TOTAL = %d", gasnetc_op_oust_limit)); @@ -2364,11 +2380,6 @@ extern int gasnetc_sndrcv_limits(void) { * (2) (gasnetc_am_oust_pp * hca->max_qps) used to catch Requests * (3) (gasnetc_am_oust_pp * hca->max_qps) used to catch Replies * However distribution over QPs and SRQ may each reduce the second two. - * - * Note that we use (gasneti_nodes - 1) rather than gasnetc_remote_nodes. This is because - * gasnetc_remote_nodes may vary among processes, possibly leading to making different - * gasnet_use_srq decisions across nodes. - * TODO: As a result, some rbufs may be allocated for PSHM peers when SRQ is inactive. */ gasnetc_am_oust_pp /= gasnetc_num_qps; gasnetc_am_rqst_per_qp = gasnetc_am_oust_pp * (gasneti_nodes - 1); @@ -2388,7 +2399,7 @@ extern int gasnetc_sndrcv_limits(void) { } else { gasnetc_am_oust_limit = MIN(gasnetc_am_oust_limit, gasnetc_op_oust_limit); if (gasnetc_am_oust_limit > gasnetc_num_qps * gasnetc_am_rqst_per_qp) { - GASNETI_RETURN_ERRR(RESOURCE, "GASNET_AM_CREDIT_{PP,TOTAL} exceed HCA capabilities"); + GASNETI_RETURN_ERRR(RESOURCE, "GASNET_AM_CREDITS_{PP,TOTAL} exceed HCA capabilities"); } } GASNETI_TRACE_PRINTF(I, ("Final/effective GASNET_AM_CREDITS_TOTAL = %d", gasnetc_am_oust_limit)); @@ -2405,14 +2416,6 @@ extern int gasnetc_sndrcv_limits(void) { } else { gasnetc_bbuf_limit = MIN(gasnetc_bbuf_limit, gasnetc_op_oust_limit); } - if (gasnetc_remote_nodes == 0) { - #if GASNET_PSHM - /* PSHM will handle all of the loopback traffic */ - #else - /* no AM or RDMA on the wire, but still need bufs for constructing AMs */ - gasnetc_bbuf_limit = gasnetc_num_qps * gasnetc_am_oust_pp; - #endif - } /* SRQ may raise this. So, report is deferred. */ const int rcv_spares = gasnetc_num_hcas * gasnetc_rbuf_spares; @@ -2570,7 +2573,7 @@ extern int gasnetc_sndrcv_init(gasnetc_EP_t ep) { #if GASNETC_DYNAMIC_CONNECT /* Default to handling 4 + 2*lg(remote_nodes) incomming UD requests and 4 outgoing */ gasnetc_ud_rcvs = 1; - while ((1 << gasnetc_ud_rcvs) < (int)gasnetc_remote_nodes) { + while ((1 << gasnetc_ud_rcvs) < (int)gasneti_nodes) { ++gasnetc_ud_rcvs; } gasnetc_ud_rcvs = 4 + 2 * gasnetc_ud_rcvs; @@ -2602,7 +2605,7 @@ extern int gasnetc_sndrcv_init(gasnetc_EP_t ep) { gasneti_assert(act_size >= cqe_count); /* We don't set rcv_count = act_size here, as that could nearly double the memory allocated below */ - if (gasnetc_remote_nodes) { + if (gasneti_nodes > 1) { /* Allocated pinned memory for receive buffers */ size = GASNETI_PAGE_ALIGNUP(rcv_count * sizeof(gasnetc_buffer_t)); buf = gasnetc_mmap(size); @@ -2719,7 +2722,16 @@ extern int gasnetc_sndrcv_init(gasnetc_EP_t ep) { } } -#if GASNETC_IB_MAX_HCAS > 1 +#if GASNETC_PIN_SEGMENT + // Allocate rkey tables for primodial segments + // TODO: move primordial rkeys to shared (PSHM) storage + GASNETC_FOR_ALL_HCA(hca) { + hca->rkeys = gasneti_calloc(gasneti_nodes, sizeof(uint32_t)); + gasneti_leak(hca->rkeys); + } +#endif + +#if GASNETC_HAVE_FENCED_PUTS // Speed critical path checks gasnetc_op_needs_fence_mask = gasnetc_use_fenced_puts ? GASNETC_OP_NEEDS_FENCE : 0; #endif @@ -2731,35 +2743,36 @@ extern int gasnetc_sndrcv_init(gasnetc_EP_t ep) { } extern void gasnetc_sndrcv_init_peer(gex_Rank_t node, gasnetc_cep_t *cep) { - if (!GASNETI_NBRHD_JOBRANK_IS_LOCAL(node)) { - const int first = !cep->hca->num_qps; - for (int i = 0; i < gasnetc_alloc_qps; ++i, ++cep) { - gasnetc_hca_t *hca = cep->hca; - cep->epid = gasnetc_epid(node, i); - cep->rbuf_freelist = &hca->rbuf_freelist; - - #if GASNETC_IB_MAX_HCAS > 1 - /* "Cache" the local keys associated w/ this cep */ + const int first = !cep->hca->num_qps; + for (int i = 0; i < gasnetc_alloc_qps; ++i, ++cep) { + gasnetc_hca_t *hca = cep->hca; + cep->epid = gasnetc_epid(node, i); + cep->snd_cq_sema_p = &gasnetc_cq_semas[GASNETC_HCA_IDX(cep)]; + + #if GASNETC_IB_MAX_HCAS > 1 + /* "Cache" the local keys associated w/ this cep */ + if (gasneti_nodes > 1) { cep->rcv_lkey = hca->rcv_reg.handle->lkey; - cep->snd_lkey = hca->snd_reg.handle->lkey; - #endif + } + cep->snd_lkey = hca->snd_reg.handle->lkey; + #endif - hca->num_qps++; - gasneti_assert(hca->num_qps <= hca->max_qps); + cep->rbuf_freelist = &hca->rbuf_freelist; - if (gasnetc_use_srq) { - // Prepost to SRQ for only one peer on each HCA - if (first) { - for (int j = 0; j < gasnetc_am_rqst_per_qp; ++j) { - gasnetc_rbuf_t *rbuf = gasnetc_lifo_pop(cep->rbuf_freelist); - gasnetc_rcv_post(cep, rbuf); - } - } - } else - for (int j = 0; j < 2 * gasnetc_am_oust_pp; ++j) { - // Prepost one rcv buffer for each possible incomming Request or Reply + // Prepost to SRQ for only one peer on each HCA + if (gasnetc_use_srq && first) { + for (int j = 0; j < gasnetc_am_rqst_per_qp; ++j) { gasnetc_rcv_post(cep, gasnetc_lifo_pop(cep->rbuf_freelist)); } + } + + if (!GASNETI_NBRHD_JOBRANK_IS_LOCAL(node)) { // AM resources never used w/i NBRHD + if (!gasnetc_use_srq) { + for (int j = 0; j < 2 * gasnetc_am_oust_pp; ++j) { + // Prepost one rcv buffer for each possible incomming Request or Reply + gasnetc_rcv_post(cep, gasnetc_lifo_pop(cep->rbuf_freelist)); + } + } /* Setup semaphores/counters */ /* sq_sema now set when QP is created */ @@ -2768,21 +2781,15 @@ extern void gasnetc_sndrcv_init_peer(gex_Rank_t node, gasnetc_cep_t *cep) { } else { gasnetc_sema_init(&cep->am_rem, gasnetc_am_oust_pp, gasnetc_am_oust_pp); } - cep->snd_cq_sema_p = &gasnetc_cq_semas[GASNETC_HCA_IDX(cep)]; - } - } else { - /* Should never use these for loopback or same supernode */ - /* XXX: is this now unreachable with new connect code? */ - for (int i = 0; i < gasnetc_alloc_qps; ++i, ++cep) { - cep->epid = gasnetc_epid(node, i); - #if GASNETC_IBV_XRC - gasneti_assert(GASNETC_CEP_SQ_SEMA(cep) == NULL); - #else - gasnetc_sema_init(GASNETC_CEP_SQ_SEMA(cep), 0, 0); - #endif + } else { + /* Should never use AM resources for loopback or same supernode */ + /* XXX: is this now unreachable with new connect code? */ gasnetc_sema_init(&cep->am_rem, 0, 0); gasnetc_atomic_set(&cep->am_flow.credit, 0, 0); } + + hca->num_qps++; + gasneti_assert(hca->num_qps <= hca->max_qps); } } @@ -2799,6 +2806,7 @@ extern void gasnetc_sndrcv_attach_peer(gex_Rank_t node, gasnetc_cep_t *cep) { #if GASNETC_PIN_SEGMENT for (int i = 0; i < gasnetc_alloc_qps; ++i, ++cep) { gasnetc_hca_t *hca = cep->hca; + gasneti_assert(hca->rkeys); cep->rkey = hca->rkeys[node]; } #else @@ -2958,7 +2966,7 @@ extern int gasnetc_sndrcv_shutdown(void) { GASNETC_FOR_ALL_HCA(hca) { #if GASNETC_IBV_SRQ - if (gasnetc_use_srq && gasnetc_remote_nodes) { + if (gasnetc_use_srq && (gasneti_nodes > 1)) { rc = ibv_destroy_srq(hca->rqst_srq); GASNETC_IBV_CHECK(rc, "from ibv_destroy_srq(request)"); rc = ibv_destroy_srq(hca->repl_srq); @@ -2988,7 +2996,7 @@ extern int gasnetc_sndrcv_shutdown(void) { #if GASNETC_USE_RCV_THREAD extern void gasnetc_sndrcv_start_thread(void) { - if (gasnetc_remote_nodes && gasnetc_use_rcv_thread) { + if (gasnetc_use_rcv_thread) { int rcv_max_rate = gasneti_getenv_int_withdefault("GASNET_RCV_THREAD_RATE", 0, 0); gasnetc_hca_t *hca; @@ -3008,7 +3016,7 @@ extern void gasnetc_sndrcv_start_thread(void) { } extern void gasnetc_sndrcv_stop_thread(int block) { - if (gasnetc_remote_nodes && gasnetc_use_rcv_thread) { + if (gasnetc_use_rcv_thread) { gasnetc_hca_t *hca; GASNETC_FOR_ALL_HCA(hca) { @@ -3088,14 +3096,27 @@ extern int gasnetc_rdma_put( gasnetc_cb_t remote_cb GASNETI_THREAD_FARG) { - gex_Rank_t jobrank = gasneti_e_tm_rank_to_jobrank(tm, rank); - gasnetc_EP_t ep = (gasnetc_EP_t) gasneti_import_tm(tm)->_ep; + gasnetc_EP_t ep = (gasnetc_EP_t) gasneti_e_tm_to_i_ep(tm); GASNETC_DECL_SR_DESC(sr_desc, GASNETC_SND_SG); // TODO-EX: - // All uses of rem_auxseg are a temporary hack // This will be replaced by general multi-registration support later - const int rem_auxseg = gasneti_in_auxsegment(jobrank, dst_ptr, nbytes); + const gex_EP_Location_t loc = gasneti_e_tm_rank_to_location(tm, rank, 0); + const gex_Rank_t jobrank = loc.gex_rank; + const int rem_epidx = gasneti_in_auxsegment(jobrank, dst_ptr, nbytes) ? -1 : loc.gex_ep_index; + + // To reach here legally, at least one of three things must be true: + // 1. local ep is bound to non-host memory + // 2. remote ep is neither primordial nor aux-seg + // 3. remote jobrank is not in-nbrhd + // Otherwise, PSHM should be used. + gasneti_assert(!gasneti_i_segment_kind_is_host(ep->_segment) || + (rem_epidx > 0) || + !GASNETI_NBRHD_JOBRANK_IS_LOCAL(jobrank)); + + // Local "device memory" can never use inline or bounce buffers + // TODO: maybe some devices classes can in the future? + const int device_mem = !gasneti_i_segment_kind_is_host(ep->_segment); gasneti_assert(nbytes != 0); @@ -3107,9 +3128,9 @@ extern int gasnetc_rdma_put( * Note that we do this based only on the size, without checking whether * the caller cares about local completion, or whether zero-copy is possible. */ - if (nbytes <= gasnetc_inline_limit) + if ((nbytes <= gasnetc_inline_limit) && !device_mem) { - gasnetc_do_put_inline(ep, jobrank, rem_auxseg, sr_desc, nbytes, remote_cnt, remote_cb GASNETI_THREAD_PASS); + gasnetc_do_put_inline(ep, jobrank, rem_epidx, sr_desc, nbytes, remote_cnt, remote_cb GASNETI_THREAD_PASS); return 0; } @@ -3117,9 +3138,17 @@ extern int gasnetc_rdma_put( const int bias_remote_cnt = (remote_cb == gasnetc_cb_eop_put); if (bias_remote_cnt) ++(*remote_cnt); - // Distinct cases depending on whether LC matters or not + // Distinct cases below for host memory, depending on whether LC matters or not // TODO-EX: this may suggest 2 distinct functions are in order? - if (local_cb) { + if (device_mem) { + gasneti_assert(gasnetc_in_bound_segment(ep, (uintptr_t)src_ptr, nbytes)); + const int bias_local_cnt = (local_cb == gasnetc_cb_eop_alc); + if (bias_local_cnt) ++(*local_cnt); + size_t unsent = gasnetc_do_put_zerocp(ep, jobrank, rem_epidx, sr_desc, nbytes, + local_cnt, local_cb GASNETI_THREAD_PASS); + gasneti_assert_uint(unsent ,==, 0); + if (bias_local_cnt) local_cb(local_cnt); + } else if (local_cb) { const int bias_local_cnt = (local_cb == gasnetc_cb_eop_alc); if (bias_local_cnt) ++(*local_cnt); @@ -3133,9 +3162,9 @@ extern int gasnetc_rdma_put( (!GASNETC_USE_FIREHOSE && !gasnetc_in_bound_segment(ep, (uintptr_t)src_ptr, nbytes) && !gasneti_in_local_auxsegment((gasneti_EP_t)ep, src_ptr, nbytes)) || - ((to_xfer = gasnetc_do_put_zerocp(ep, jobrank, rem_auxseg, sr_desc, nbytes, + ((to_xfer = gasnetc_do_put_zerocp(ep, jobrank, rem_epidx, sr_desc, nbytes, local_cnt, local_cb GASNETI_THREAD_PASS)))) { - gasnetc_do_put_bounce(ep, jobrank, rem_auxseg, sr_desc, to_xfer, + gasnetc_do_put_bounce(ep, jobrank, rem_epidx, sr_desc, to_xfer, remote_cnt, remote_cb GASNETI_THREAD_PASS); } @@ -3147,9 +3176,9 @@ extern int gasnetc_rdma_put( if ((!GASNETC_USE_FIREHOSE && !gasnetc_in_bound_segment(ep, (uintptr_t)src_ptr, nbytes) && !gasneti_in_local_auxsegment((gasneti_EP_t)ep, src_ptr, nbytes)) || - ((to_xfer = gasnetc_do_put_zerocp(ep, jobrank, rem_auxseg, sr_desc, nbytes, + ((to_xfer = gasnetc_do_put_zerocp(ep, jobrank, rem_epidx, sr_desc, nbytes, remote_cnt, remote_cb GASNETI_THREAD_PASS)))) { - gasnetc_do_put_bounce(ep, jobrank, rem_auxseg, sr_desc, to_xfer, + gasnetc_do_put_bounce(ep, jobrank, rem_epidx, sr_desc, to_xfer, remote_cnt, remote_cb GASNETI_THREAD_PASS); } } @@ -3177,9 +3206,9 @@ extern int gasnetc_rdma_long_put( GASNETC_DECL_SR_DESC(sr_desc, GASNETC_SND_SG); // TODO-EX: - // All uses of rem_auxseg are a temporary hack // This will be replaced by general multi-registration support later - const int rem_auxseg = gasneti_in_auxsegment(gasnetc_epid2node(epid), dst_ptr, nbytes); + // TODO: this lacks support for non-primordial EP since we are missing the index information + const int rem_epidx = gasneti_in_auxsegment(gasnetc_epid2node(epid), dst_ptr, nbytes) ? -1 : 0; gasneti_assert(nbytes != 0); @@ -3200,9 +3229,9 @@ extern int gasnetc_rdma_long_put( (!GASNETC_USE_FIREHOSE && !gasnetc_in_bound_segment(ep, (uintptr_t)src_ptr, nbytes) && !gasneti_in_local_auxsegment((gasneti_EP_t)ep, src_ptr, nbytes)) || - ((to_xfer = gasnetc_do_put_zerocp(ep, epid, rem_auxseg, sr_desc, nbytes, + ((to_xfer = gasnetc_do_put_zerocp(ep, epid, rem_epidx, sr_desc, nbytes, local_cnt, local_cb GASNETI_THREAD_PASS)))) { - gasnetc_do_put_bounce(ep, epid, rem_auxseg, sr_desc, to_xfer, + gasnetc_do_put_bounce(ep, epid, rem_epidx, sr_desc, to_xfer, NULL, NULL GASNETI_THREAD_PASS); } @@ -3229,14 +3258,24 @@ extern int gasnetc_rdma_get( gasnetc_cb_t remote_cb GASNETI_THREAD_FARG) { - gex_Rank_t jobrank = gasneti_e_tm_rank_to_jobrank(tm, rank); - gasnetc_EP_t ep = (gasnetc_EP_t) gasneti_import_tm(tm)->_ep; + gasnetc_EP_t ep = (gasnetc_EP_t) gasneti_e_tm_to_i_ep(tm); GASNETC_DECL_SR_DESC(sr_desc, GASNETC_SND_SG); // TODO-EX: - // All uses of rem_auxseg are a temporary hack // This will be replaced by general multi-registration support later - const int rem_auxseg = gasneti_in_auxsegment(jobrank, src_ptr, nbytes); + const int loc_auxseg = gasneti_in_local_auxsegment((gasneti_EP_t)ep, dst_ptr, nbytes); + const gex_EP_Location_t loc = gasneti_e_tm_rank_to_location(tm, rank, 0); + const gex_Rank_t jobrank = loc.gex_rank; + const int rem_epidx = gasneti_in_auxsegment(jobrank, src_ptr, nbytes) ? -1 : loc.gex_ep_index; + + // To reach here legally, at least one of three things must be true: + // 1. local ep is bound to non-host memory + // 2. remote ep is neither primordial nor aux-seg + // 3. remote jobrank is not in-nbrhd + // Otherwise, PSHM should be used. + gasneti_assert(!gasneti_i_segment_kind_is_host(ep->_segment) || + (rem_epidx > 0) || + !GASNETI_NBRHD_JOBRANK_IS_LOCAL(jobrank)); gasneti_assert(nbytes != 0); gasneti_assert(remote_cnt != NULL); @@ -3250,9 +3289,9 @@ extern int gasnetc_rdma_get( !gasnetc_in_bound_segment(ep, (uintptr_t)dst_ptr, nbytes) && !gasneti_in_local_auxsegment((gasneti_EP_t)ep, dst_ptr, nbytes)) { /* Firehose disabled. Use bounce buffers since dst_ptr is out-of-segment */ - gasnetc_do_get_bounce(ep, jobrank, rem_auxseg, sr_desc, nbytes, remote_cnt, remote_cb GASNETI_THREAD_PASS); + gasnetc_do_get_bounce(ep, jobrank, rem_epidx, sr_desc, nbytes, remote_cnt, remote_cb GASNETI_THREAD_PASS); } else { - gasnetc_do_get_zerocp(ep, jobrank, rem_auxseg, sr_desc, nbytes, remote_cnt, remote_cb GASNETI_THREAD_PASS); + gasnetc_do_get_zerocp(ep, jobrank, rem_epidx, sr_desc, nbytes, remote_cnt, remote_cb GASNETI_THREAD_PASS); } return 0; @@ -3277,7 +3316,7 @@ extern int gasnetc_rdma_put( GASNETI_THREAD_FARG) { gex_Rank_t jobrank = gasneti_e_tm_rank_to_jobrank(tm, rank); - gasnetc_EP_t ep = (gasnetc_EP_t) gasneti_import_tm(tm)->_ep; + gasnetc_EP_t ep = (gasnetc_EP_t) gasneti_e_tm_to_i_ep(tm); uintptr_t src = (uintptr_t)src_ptr; uintptr_t dst = (uintptr_t)dst_ptr; @@ -3390,7 +3429,7 @@ extern int gasnetc_rdma_get( GASNETI_THREAD_FARG) { gex_Rank_t jobrank = gasneti_e_tm_rank_to_jobrank(tm, rank); - gasnetc_EP_t ep = (gasnetc_EP_t) gasneti_import_tm(tm)->_ep; + gasnetc_EP_t ep = (gasnetc_EP_t) gasneti_e_tm_to_i_ep(tm); uintptr_t src = (uintptr_t)src_ptr; uintptr_t dst = (uintptr_t)dst_ptr; diff --git a/third-party/gasnet/gasnet-src/ibv-conduit/gasnet_extended_fwd.h b/third-party/gasnet/gasnet-src/ibv-conduit/gasnet_extended_fwd.h index cfda722cbe01..4561f977fb87 100644 --- a/third-party/gasnet/gasnet-src/ibv-conduit/gasnet_extended_fwd.h +++ b/third-party/gasnet/gasnet-src/ibv-conduit/gasnet_extended_fwd.h @@ -13,7 +13,7 @@ #include -#define GASNET_EXTENDED_VERSION 2.5 +#define GASNET_EXTENDED_VERSION 2.7 #define GASNET_EXTENDED_VERSION_STR _STRINGIFY(GASNET_EXTENDED_VERSION) #define GASNET_EXTENDED_NAME IBV #define GASNET_EXTENDED_NAME_STR _STRINGIFY(GASNET_EXTENDED_NAME) @@ -30,8 +30,8 @@ */ /* Each RCV thread needs a slot in the threadtable. The CONN thread doesn't. */ #if GASNETC_IBV_RCV_THREAD - #ifdef GASNETC_IBV_MAX_HCAS - #define GASNETE_CONDUIT_THREADS_USING_TD GASNETC_IBV_MAX_HCAS + #ifdef GASNETC_IBV_MAX_HCAS_CONFIGURE + #define GASNETE_CONDUIT_THREADS_USING_TD GASNETC_IBV_MAX_HCAS_CONFIGURE #else #define GASNETE_CONDUIT_THREADS_USING_TD 1 #endif @@ -63,12 +63,8 @@ #define GASNETE_BUILD_AMREF_PUT 1 #if !defined(GASNET_DISABLE_MUNMAP_DEFAULT) && PLATFORM_ARCH_64 - // default to disabling munmap for bug 955 if firehose might be used - #if GASNET_SEGMENT_FAST && GASNETC_IBV_ODP - #define GASNET_DISABLE_MUNMAP_DEFAULT (!gasnetc_use_odp) - #else - #define GASNET_DISABLE_MUNMAP_DEFAULT 1 - #endif + // default to disabling munmap due to bug 955 (firhose correctness) and bug 4164 (odp performance) + #define GASNET_DISABLE_MUNMAP_DEFAULT 1 #endif // this VIS algorithm uses put/get with local-side buffers that are dynamically malloced and freed, // thus is only safe if we disabled malloc munmap to avoid running afowl of firehose bug3364/bug955 diff --git a/third-party/gasnet/gasnet-src/license.txt b/third-party/gasnet/gasnet-src/license.txt index 6fa105ee6685..3f8c2fad0745 100644 --- a/third-party/gasnet/gasnet-src/license.txt +++ b/third-party/gasnet/gasnet-src/license.txt @@ -10,7 +10,7 @@ for additional licensing terms governing those contributed components. *** Copyright Notice *** -Global-Address Space Networking for Exascale (GASNet-EX) Copyright (c) 2000-2020, +Global-Address Space Networking for Exascale (GASNet-EX) Copyright (c) 2000-2021, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from the U.S. Dept. of Energy). All rights reserved. @@ -27,7 +27,7 @@ perform publicly and display publicly, and to permit other to do so. *** License Agreement *** -Global-Address Space Networking for Exascale (GASNet-EX) Copyright (c) 2000-2020, +Global-Address Space Networking for Exascale (GASNet-EX) Copyright (c) 2000-2021, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from the U.S. Dept. of Energy). All rights reserved. diff --git a/third-party/gasnet/gasnet-src/mpi-conduit/Makefile.am b/third-party/gasnet/gasnet-src/mpi-conduit/Makefile.am index db45c9fde060..ec7ffd07a743 100644 --- a/third-party/gasnet/gasnet-src/mpi-conduit/Makefile.am +++ b/third-party/gasnet/gasnet-src/mpi-conduit/Makefile.am @@ -53,6 +53,10 @@ $(top_builddir)/other/ammpi/amx_portable_platform.h: $(top_builddir)/other/ammpi # forced targets in this file, and should probably use LIBINCLUDES/LIBDEFINES CONDUIT_SPECIAL_OBJS = +# memory kinds supported by this conduit (space separated) +# to be included in libgasnet if support was enabled at configure time +CONDUIT_KINDS = + # the default job spawn command to be used for "make run-tests" # The following substitutions are performed: # %P = program executable name diff --git a/third-party/gasnet/gasnet-src/mpi-conduit/Makefile.in b/third-party/gasnet/gasnet-src/mpi-conduit/Makefile.in index e96db831c7ec..789ab2719bd2 100644 --- a/third-party/gasnet/gasnet-src/mpi-conduit/Makefile.in +++ b/third-party/gasnet/gasnet-src/mpi-conduit/Makefile.in @@ -279,6 +279,10 @@ CONFIGURE_ARGS = @CONFIGURE_ARGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CROSS_COMPILING = @CROSS_COMPILING@ +CUDA_UVA_CFLAGS = @CUDA_UVA_CFLAGS@ +CUDA_UVA_LDFLAGS = @CUDA_UVA_LDFLAGS@ +CUDA_UVA_LIBS = @CUDA_UVA_LIBS@ +CUDA_guess_prog = @CUDA_guess_prog@ CXX = @CXX@ CXXCPP = @CXXCPP@ CXXDEPMODE = @CXXDEPMODE@ @@ -304,6 +308,8 @@ ENVCMD = @ENVCMD@ EXEEXT = @EXEEXT@ EXESUFFIX = @EXESUFFIX@ FH_CFLAGS = @FH_CFLAGS@ +GASNETI_HWLOC_BIND_PATH = @GASNETI_HWLOC_BIND_PATH@ +GASNETI_HWLOC_CALC_PATH = @GASNETI_HWLOC_CALC_PATH@ GASNETI_PTR_BITS = @GASNETI_PTR_BITS@ GASNET_EXTRA_DEFINES = @GASNET_EXTRA_DEFINES@ GASNET_IBV_SPAWNER_CONF = @GASNET_IBV_SPAWNER_CONF@ @@ -332,6 +338,10 @@ GSTACK_PATH = @GSTACK_PATH@ HAVE_INTTYPES_H = @HAVE_INTTYPES_H@ HAVE_STDINT_H = @HAVE_STDINT_H@ HAVE_SYS_TYPES_H = @HAVE_SYS_TYPES_H@ +HWLOC_CFLAGS = @HWLOC_CFLAGS@ +HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ +HWLOC_LIBS = @HWLOC_LIBS@ +HWLOC_guess_prog = @HWLOC_guess_prog@ IBV_CFLAGS = @IBV_CFLAGS@ IBV_LDFLAGS = @IBV_LDFLAGS@ IBV_LIBS = @IBV_LIBS@ @@ -530,6 +540,10 @@ CONDUIT_EXTRADEPS = $(top_builddir)/other/ammpi/libammpi.a $(top_srcdir)/other/a # forced targets in this file, and should probably use LIBINCLUDES/LIBDEFINES CONDUIT_SPECIAL_OBJS = +# memory kinds supported by this conduit (space separated) +# to be included in libgasnet if support was enabled at configure time +CONDUIT_KINDS = + # the default job spawn command to be used for "make run-tests" # The following substitutions are performed: # %P = program executable name diff --git a/third-party/gasnet/gasnet-src/mpi-conduit/contrib/Makefile.in b/third-party/gasnet/gasnet-src/mpi-conduit/contrib/Makefile.in index 3d5eee7d4371..61057b994d2c 100644 --- a/third-party/gasnet/gasnet-src/mpi-conduit/contrib/Makefile.in +++ b/third-party/gasnet/gasnet-src/mpi-conduit/contrib/Makefile.in @@ -186,6 +186,10 @@ CONFIGURE_ARGS = @CONFIGURE_ARGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CROSS_COMPILING = @CROSS_COMPILING@ +CUDA_UVA_CFLAGS = @CUDA_UVA_CFLAGS@ +CUDA_UVA_LDFLAGS = @CUDA_UVA_LDFLAGS@ +CUDA_UVA_LIBS = @CUDA_UVA_LIBS@ +CUDA_guess_prog = @CUDA_guess_prog@ CXX = @CXX@ CXXCPP = @CXXCPP@ CXXDEPMODE = @CXXDEPMODE@ @@ -211,6 +215,8 @@ ENVCMD = @ENVCMD@ EXEEXT = @EXEEXT@ EXESUFFIX = @EXESUFFIX@ FH_CFLAGS = @FH_CFLAGS@ +GASNETI_HWLOC_BIND_PATH = @GASNETI_HWLOC_BIND_PATH@ +GASNETI_HWLOC_CALC_PATH = @GASNETI_HWLOC_CALC_PATH@ GASNETI_PTR_BITS = @GASNETI_PTR_BITS@ GASNET_EXTRA_DEFINES = @GASNET_EXTRA_DEFINES@ GASNET_IBV_SPAWNER_CONF = @GASNET_IBV_SPAWNER_CONF@ @@ -239,6 +245,10 @@ GSTACK_PATH = @GSTACK_PATH@ HAVE_INTTYPES_H = @HAVE_INTTYPES_H@ HAVE_STDINT_H = @HAVE_STDINT_H@ HAVE_SYS_TYPES_H = @HAVE_SYS_TYPES_H@ +HWLOC_CFLAGS = @HWLOC_CFLAGS@ +HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ +HWLOC_LIBS = @HWLOC_LIBS@ +HWLOC_guess_prog = @HWLOC_guess_prog@ IBV_CFLAGS = @IBV_CFLAGS@ IBV_LDFLAGS = @IBV_LDFLAGS@ IBV_LIBS = @IBV_LIBS@ diff --git a/third-party/gasnet/gasnet-src/mpi-conduit/gasnet_core.c b/third-party/gasnet/gasnet-src/mpi-conduit/gasnet_core.c index c82f910f0cb2..db475cd34fe5 100644 --- a/third-party/gasnet/gasnet-src/mpi-conduit/gasnet_core.c +++ b/third-party/gasnet/gasnet-src/mpi-conduit/gasnet_core.c @@ -18,15 +18,8 @@ GASNETI_IDENT(gasnetc_IdentString_Version, "$GASNetCoreLibraryVersion: " GASNET_CORE_VERSION_STR " $"); GASNETI_IDENT(gasnetc_IdentString_Name, "$GASNetCoreLibraryName: " GASNET_CORE_NAME_STR " $"); -gex_AM_Entry_t const *gasnetc_get_handlertable(void); - gex_AM_Entry_t *gasnetc_handler; // TODO-EX: will be replaced with per-EP tables -// TODO-EX: This is a hack to support multiple segments w/ a single AM EP -#ifndef GASNETC_MOCK_EVERYTHING -#define GASNETC_MOCK_EVERYTHING 1 -#endif - static void gasnetc_traceoutput(int); eb_t gasnetc_bundle; @@ -275,9 +268,24 @@ static int gasnetc_attach_primary(void) { // register process exit-time hook gasneti_registerExitHandler(gasnetc_exit); - #if GASNETC_MOCK_EVERYTHING - retval = AM_SetSeg(gasnetc_endpoint, NULL, (uintptr_t)-1); - if (retval != AM_OK) INITERR(RESOURCE, "AM_SetSeg() failed"); + // register all of memory as the AMX-level segment + // this is needed for multi-segment support (aux + client at a minimum) + retval = AM_SetSeg(gasnetc_endpoint, NULL, (uintptr_t)-1); + if (retval != AM_OK) INITERR(RESOURCE, "AM_SetSeg() failed"); + + #if GASNETC_HSL_ERRCHECK || GASNET_TRACE || GASNET_DEBUG + #if !(GASNETC_HSL_ERRCHECK || GASNET_DEBUG) + if (GASNETI_TRACE_ENABLED(A)) + #endif + GASNETI_AM_SAFE(AMMPI_SetHandlerCallbacks(gasnetc_endpoint, + gasnetc_enteringHandler_hook, gasnetc_leavingHandler_hook)); + #endif + + #if GASNETC_HSL_ERRCHECK + // Historically needed to precede attach_done to avoid inf recursion on + // malloc/hold_interrupts. That is *probably* no longer the case, but this + // is still a reasonable place to initialize. + gasnetc_hsl_attach(); #endif /* ------------------------------------------------------------------------------------ */ @@ -308,38 +316,16 @@ static int gasnetc_attach_primary(void) { static int gasnetc_attach_segment(gex_Segment_t *segment_p, gex_TM_t tm, uintptr_t segsize, - gasneti_bootstrapExchangefn_t exchangefn, - gex_Flags_t flags) { - int retval = GASNET_OK; - + gex_Flags_t flags) +{ /* ------------------------------------------------------------------------------------ */ /* register client segment */ - gasnet_seginfo_t myseg = gasneti_segmentAttach(segment_p, 0, tm, segsize, exchangefn, flags); + gasnet_seginfo_t myseg = gasneti_segmentAttach(segment_p, tm, segsize, flags); -#if !GASNETC_MOCK_EVERYTHING - /* AMMPI allows arbitrary registration with no further action */ - if (segsize) { - retval = AM_SetSeg(gasnetc_endpoint, myseg.addr, myseg.size); - if (retval != AM_OK) INITERR(RESOURCE, "AM_SetSeg() failed"); - } -#endif + // Have called AM_SetSeg() previously w/ an "everything" segment - #if GASNETC_HSL_ERRCHECK || GASNET_TRACE || GASNET_DEBUG - #if !(GASNETC_HSL_ERRCHECK || GASNET_DEBUG) - if (GASNETI_TRACE_ENABLED(A)) - #endif - GASNETI_AM_SAFE(AMMPI_SetHandlerCallbacks(gasnetc_endpoint, - gasnetc_enteringHandler_hook, gasnetc_leavingHandler_hook)); - #endif - - #if GASNETC_HSL_ERRCHECK - gasnetc_hsl_attach(); /* must precede attach_done to avoid inf recursion on malloc/hold_interrupts */ - // TODO-EX: Is this recursion still an issue w/ removal of NIS? - #endif - -done: - GASNETI_RETURN(retval); + return GASNET_OK; } /* ------------------------------------------------------------------------------------ */ // TODO-EX: this is a candidate for factorization (once we understand the per-conduit variations) @@ -352,7 +338,7 @@ extern int gasnetc_attach( gex_TM_t _tm, GASNETI_TRACE_PRINTF(C,("gasnetc_attach(table (%i entries), segsize=%"PRIuPTR")", numentries, segsize)); - gasneti_TM_t tm = gasneti_import_tm(_tm); + gasneti_TM_t tm = gasneti_import_tm_nonpair(_tm); gasneti_EP_t ep = tm->_ep; if (!gasneti_init_done) @@ -374,16 +360,16 @@ extern int gasnetc_attach( gex_TM_t _tm, if (GASNET_OK != gasnetc_attach_primary()) GASNETI_RETURN_ERRR(RESOURCE,"Error in primary attach"); - AMLOCK(); #if GASNET_SEGMENT_FAST || GASNET_SEGMENT_LARGE /* register client segment */ gex_Segment_t seg; // g2ex segment is automatically saved by a hook - if (GASNET_OK != gasnetc_attach_segment(&seg, _tm, segsize, gasnetc_bootstrapExchange, GASNETI_FLAG_INIT_LEGACY)) - INITERR(RESOURCE,"Error attaching segment"); + if (GASNET_OK != gasnetc_attach_segment(&seg, _tm, segsize, GASNETI_FLAG_INIT_LEGACY)) + GASNETI_RETURN_ERRR(RESOURCE,"Error attaching segment"); #endif + AMLOCK(); /* register client handlers */ - if (table && gasneti_amregister_legacy(ep->_amtbl, table, numentries) != GASNET_OK) + if (table && gasneti_amregister_legacy(ep, table, numentries) != GASNET_OK) INITERR(RESOURCE,"Error registering handlers"); AMUNLOCK(); @@ -427,18 +413,22 @@ extern int gasnetc_Client_Init( #endif } + // Do NOT move this prior to the gasneti_trace_init() call + GASNETI_TRACE_PRINTF(O,("gex_Client_Init: name='%s' argc_p=%p argv_p=%p flags=%d", + clientName, (void *)argc, (void *)argv, flags)); + // allocate the client object - gasneti_Client_t client = gasneti_alloc_client(clientName, flags, 0); + gasneti_Client_t client = gasneti_alloc_client(clientName, flags); *client_p = gasneti_export_client(client); // create the initial endpoint with internal handlers - if (gasnetc_EP_Create(ep_p, *client_p, flags)) + if (gex_EP_Create(ep_p, *client_p, GEX_EP_CAPABILITY_ALL, flags)) GASNETI_RETURN_ERRR(RESOURCE,"Error creating initial endpoint"); gasneti_EP_t ep = gasneti_import_ep(*ep_p); gasnetc_handler = ep->_amtbl; // TODO-EX: this global variable to be removed // TODO-EX: create team - gasneti_TM_t tm = gasneti_alloc_tm(ep, gasneti_mynode, gasneti_nodes, flags, 0); + gasneti_TM_t tm = gasneti_alloc_tm(ep, gasneti_mynode, gasneti_nodes, flags); *tm_p = gasneti_export_tm(tm); if (0 == (flags & GASNETI_FLAG_INIT_LEGACY)) { @@ -473,60 +463,37 @@ extern int gasnetc_Segment_Attach( /* create a segment collectively */ // TODO-EX: this implementation only works *once* - // TODO-EX: should be using the team's exchange function if possible // TODO-EX: need to pass proper flags (e.g. pshm and bind) instead of 0 - if (GASNET_OK != gasnetc_attach_segment(segment_p, tm, length, gasneti_defaultExchange, 0)) + if (GASNET_OK != gasnetc_attach_segment(segment_p, tm, length, 0)) GASNETI_RETURN_ERRR(RESOURCE,"Error attaching segment"); return GASNET_OK; } -extern int gasnetc_EP_Create(gex_EP_t *ep_p, - gex_Client_t client, - gex_Flags_t flags) { - /* (###) add code here to create an endpoint belonging to the given client */ -#if 1 // TODO-EX: This is a stub, which assumes 1 implicit call from ClientCreate - static gasneti_mutex_t lock = GASNETI_MUTEX_INITIALIZER; - gasneti_mutex_lock(&lock); - static int once = 0; - int prev = once; - once = 1; - gasneti_mutex_unlock(&lock); - if (prev) gasneti_fatalerror("Multiple endpoints are not yet implemented"); -#endif +extern int gasnetc_Segment_Create( + gex_Segment_t *segment_p, + gex_Client_t client, + gex_Addr_t address, + uintptr_t length, + gex_MK_t kind, + gex_Flags_t flags) +{ + gasneti_assert(segment_p); - gasneti_EP_t ep = gasneti_alloc_ep(gasneti_import_client(client), flags, 0); - *ep_p = gasneti_export_ep(ep); - - { /* core API handlers */ - gex_AM_Entry_t *ctable = (gex_AM_Entry_t *)gasnetc_get_handlertable(); - int len = 0; - int numreg = 0; - gasneti_assert(ctable); - while (ctable[len].gex_fnptr) len++; /* calc len */ - if (gasneti_amregister(ep->_amtbl, ctable, len, GASNETC_HANDLER_BASE, GASNETE_HANDLER_BASE, 0, &numreg) != GASNET_OK) - GASNETI_RETURN_ERRR(RESOURCE,"Error registering core API handlers"); - gasneti_assert_int(numreg ,==, len); - } + // Create the Segment object, allocating memory if appropriate + gasneti_Client_t i_client = gasneti_import_client(client); + int rc = gasneti_segmentCreate(segment_p, i_client, address, length, kind, flags); - { /* extended API handlers */ - gex_AM_Entry_t *etable = (gex_AM_Entry_t *)gasnete_get_handlertable(); - int len = 0; - int numreg = 0; - gasneti_assert(etable); - while (etable[len].gex_fnptr) len++; /* calc len */ - if (gasneti_amregister(ep->_amtbl, etable, len, GASNETE_HANDLER_BASE, GASNETI_CLIENT_HANDLER_BASE, 0, &numreg) != GASNET_OK) - GASNETI_RETURN_ERRR(RESOURCE,"Error registering extended API handlers"); - gasneti_assert_int(numreg ,==, len); - } + // Have called AM_SetSeg() previously w/ an "everything" segment + // Thus no registration required - return GASNET_OK; + return rc; } extern int gasnetc_EP_RegisterHandlers(gex_EP_t ep, gex_AM_Entry_t *table, size_t numentries) { - return gasneti_amregister_client(gasneti_import_ep(ep)->_amtbl, table, numentries); + return gasneti_amregister_client(gasneti_import_ep(ep), table, numentries); } /* ------------------------------------------------------------------------------------ */ static int gasnetc_exitcalled = 0; @@ -801,12 +768,7 @@ int gasnetc_AMRequestLong( gex_TM_t tm, gex_Rank_t rank, gex_AM_Index_t handler source_addr, nbytes, dest_addr, flags, numargs, argptr GASNETI_THREAD_PASS); } else { - uintptr_t dest_offset; -#if GASNETC_MOCK_EVERYTHING - dest_offset = (uintptr_t)dest_addr; -#else - dest_offset = ((uintptr_t)dest_addr) - (uintptr_t)gasneti_client_seginfo(tm, rank)->addr; -#endif + uintptr_t dest_offset = (uintptr_t)dest_addr; AMLOCK_TOSEND(); GASNETI_AM_SAFE_NORETURN(retval, @@ -942,14 +904,7 @@ int gasnetc_AMReplyLong( gex_Token_t token, gex_AM_Index_t handler, source_addr, nbytes, dest_addr, flags, numargs, argptr); } else { - uintptr_t dest_offset; - -#if GASNETC_MOCK_EVERYTHING - dest_offset = (uintptr_t)dest_addr; -#else - gex_Rank_t dest = gasnetc_msgsource(token); - dest_offset = ((uintptr_t)dest_addr) - (uintptr_t)gasneti_client_seginfo(gasneti_THUNK_TM, dest)->addr; -#endif + uintptr_t dest_offset = (uintptr_t)dest_addr; AM_ASSERT_LOCKED(); GASNETI_AM_SAFE_NORETURN(retval, @@ -1019,9 +974,13 @@ extern int gasnetc_AMReplyLongM( if_pt (info) return info; /* first time we've seen this thread - need to set it up */ - { /* it's unsafe to call malloc or gasneti_malloc here after attach, - because we may be within a hold_interrupts call, so table is single-level - and initialized during gasnet_attach */ // TODO-EX: Still true w/ removal of NIS? + { /* It is (was?) unsafe to call malloc or gasneti_malloc here after attach, + because we may have been within a hold_interrupts call, so table is single-level + and initialized during gasnetc_attach_primary(). + While that problem has probably been eliminated with the removal of NIS, this + pre-initialization remains. + TODO: cleanup/simplify based on removal of NIS? + */ static gasnetc_hsl_errcheckinfo_t *hsl_errcheck_table = NULL; static gasneti_mutex_t hsl_errcheck_tablelock = GASNETI_MUTEX_INITIALIZER; int maxthreads = gasneti_max_threads(); @@ -1274,6 +1233,7 @@ extern int gasnetc_hsl_trylock(gex_HSL_t *hsl) { break; default: gasneti_unreachable_error(("Unknown handler type in gasnetc_enteringHandler_hook(): 0x%x",(int)cat)); } + GASNETI_HANDLER_ENTER(isReq); // TODO: absorb HSL check, below #if (!GASNETC_NULL_HSL && GASNETC_HSL_ERRCHECK) gasnetc_enteringHandler_hook_hsl(cat, isReq, handlerId, token, buf, nbytes, numargs, (gex_AM_Arg_t *)args); @@ -1292,6 +1252,7 @@ extern int gasnetc_hsl_trylock(gex_HSL_t *hsl) { break; default: gasneti_unreachable_error(("Unknown handler type in gasnetc_leavingHandler_hook(): 0x%x",(int)cat)); } + GASNETI_HANDLER_LEAVE(isReq); // TODO: absorb HSL check, below #if (!GASNETC_NULL_HSL && GASNETC_HSL_ERRCHECK) gasnetc_leavingHandler_hook_hsl(cat, isReq); #endif @@ -1303,9 +1264,7 @@ extern int gasnetc_hsl_trylock(gex_HSL_t *hsl) { ================ */ static gex_AM_Entry_t const gasnetc_handlers[] = { - #ifdef GASNETC_COMMON_HANDLERS - GASNETC_COMMON_HANDLERS(), - #endif + GASNETC_COMMON_HANDLERS(), /* ptr-width independent handlers */ diff --git a/third-party/gasnet/gasnet-src/mpi-conduit/gasnet_core.h b/third-party/gasnet/gasnet-src/mpi-conduit/gasnet_core.h index 7cde76234314..03954381a588 100644 --- a/third-party/gasnet/gasnet-src/mpi-conduit/gasnet_core.h +++ b/third-party/gasnet/gasnet-src/mpi-conduit/gasnet_core.h @@ -24,7 +24,7 @@ ============== */ -extern void gasnetc_exit(int exitcode) GASNETI_NORETURN; +extern void gasnetc_exit(int _exitcode) GASNETI_NORETURN; GASNETI_NORETURNP(gasnetc_exit) #define gasnet_exit gasnetc_exit @@ -37,31 +37,34 @@ GASNETI_NORETURNP(gasnetc_exit) #endif /* ------------------------------------------------------------------------------------ */ extern int gasnetc_Client_Init( - gex_Client_t *client_p, - gex_EP_t *ep_p, - gex_TM_t *tm_p, - const char *clientName, - int *argc, - char ***argv, - gex_Flags_t flags); + gex_Client_t *_client_p, + gex_EP_t *_ep_p, + gex_TM_t *_tm_p, + const char *_clientName, + int *_argc, + char ***_argv, + gex_Flags_t _flags); // gasnetex.h handles name-shifting of gex_Client_Init() extern int gasnetc_Segment_Attach( - gex_Segment_t *segment_p, - gex_TM_t tm, - uintptr_t length); + gex_Segment_t *_segment_p, + gex_TM_t _tm, + uintptr_t _length); #define gex_Segment_Attach gasnetc_Segment_Attach -extern int gasnetc_EP_Create( - gex_EP_t *ep_p, - gex_Client_t client, - gex_Flags_t flags); -#define gex_EP_Create gasnetc_EP_Create +extern int gasnetc_Segment_Create( + gex_Segment_t *_segment_p, + gex_Client_t _client, + gex_Addr_t _address, + uintptr_t _length, + gex_MK_t _kind, + gex_Flags_t _flags); +#define gex_Segment_Create gasnetc_Segment_Create extern int gasnetc_EP_RegisterHandlers( - gex_EP_t ep, - gex_AM_Entry_t *table, - size_t numentries); + gex_EP_t _ep, + gex_AM_Entry_t *_table, + size_t _numentries); #define gex_EP_RegisterHandlers gasnetc_EP_RegisterHandlers /* ------------------------------------------------------------------------------------ */ /* @@ -127,11 +130,11 @@ typedef struct gasneti_hsl_s { #define gex_HSL_Unlock(hsl) #define gex_HSL_Trylock(hsl) GASNET_OK #else - extern void gasnetc_hsl_init (gex_HSL_t *hsl); - extern void gasnetc_hsl_destroy(gex_HSL_t *hsl); - extern void gasnetc_hsl_lock (gex_HSL_t *hsl); - extern void gasnetc_hsl_unlock (gex_HSL_t *hsl); - extern int gasnetc_hsl_trylock(gex_HSL_t *hsl) GASNETI_WARN_UNUSED_RESULT; + extern void gasnetc_hsl_init (gex_HSL_t *_hsl); + extern void gasnetc_hsl_destroy(gex_HSL_t *_hsl); + extern void gasnetc_hsl_lock (gex_HSL_t *_hsl); + extern void gasnetc_hsl_unlock (gex_HSL_t *_hsl); + extern int gasnetc_hsl_trylock(gex_HSL_t *_hsl) GASNETI_WARN_UNUSED_RESULT; #define gex_HSL_Init gasnetc_hsl_init #define gex_HSL_Destroy gasnetc_hsl_destroy @@ -141,10 +144,10 @@ typedef struct gasneti_hsl_s { #endif #if GASNET_PSHM && GASNETC_HSL_ERRCHECK && !GASNETC_NULL_HSL - extern void gasnetc_enteringHandler_hook_hsl(int cat, int isReq, int handlerId, gex_Token_t token, - void *buf, size_t nbytes, int numargs, - gex_AM_Arg_t *args); - extern void gasnetc_leavingHandler_hook_hsl(int cat, int isReq); + extern void gasnetc_enteringHandler_hook_hsl(int _cat, int _isReq, int _handlerId, gex_Token_t _token, + void *_buf, size_t _nbytes, int _numargs, + gex_AM_Arg_t *_args); + extern void gasnetc_leavingHandler_hook_hsl(int _cat, int _isReq); #define GASNETC_ENTERING_HANDLER_HOOK gasnetc_enteringHandler_hook_hsl #define GASNETC_LEAVING_HANDLER_HOOK gasnetc_leavingHandler_hook_hsl @@ -164,12 +167,28 @@ typedef struct gasneti_hsl_s { #define gex_AM_LUBReplyLong() ((size_t)AM_MaxLong()) // TODO-EX: Can these be improved upon, at least for PSHM case -#define gasnetc_AM_MaxRequestMedium(tm,rank,lc_opt,flags,nargs) gex_AM_LUBRequestMedium() -#define gasnetc_AM_MaxReplyMedium(tm,rank,lc_opt,flags,nargs) gex_AM_LUBReplyMedium() -#define gasnetc_AM_MaxRequestLong(tm,rank,lc_opt,flags,nargs) gex_AM_LUBRequestLong() -#define gasnetc_AM_MaxReplyLong(tm,rank,lc_opt,flags,nargs) gex_AM_LUBReplyLong() -#define gasnetc_Token_MaxReplyMedium(token,lc_opt,flags,nargs) gex_AM_LUBReplyMedium() -#define gasnetc_Token_MaxReplyLong(token,lc_opt,flags,nargs) gex_AM_LUBReplyLong() +#define gasnetc_AM_MaxRequestMedium(tm,rank,lc_opt,flags,nargs) \ + (GASNETI_UNUSED_ARGS5(tm,rank,lc_opt,flags,nargs),gex_AM_LUBRequestMedium()) +#define gasnetc_AM_MaxReplyMedium(tm,rank,lc_opt,flags,nargs) \ + (GASNETI_UNUSED_ARGS5(tm,rank,lc_opt,flags,nargs),gex_AM_LUBReplyMedium()) +#define gasnetc_Token_MaxReplyMedium(token,lc_opt,flags,nargs) \ + (GASNETI_UNUSED_ARGS4(token,lc_opt,flags,nargs),gex_AM_LUBReplyMedium()) + +#define gasnetc_AM_MaxRequestLong(tm,rank,lc_opt,flags,nargs) \ + (GASNETI_UNUSED_ARGS4(tm,rank,lc_opt,nargs), \ + ((flags) & GEX_FLAG_AM_PREPARE_LEAST_ALLOC \ + ? GASNETC_REF_NPAM_MAX_ALLOC \ + : gex_AM_LUBRequestLong())) +#define gasnetc_AM_MaxReplyLong(tm,rank,lc_opt,flags,nargs) \ + (GASNETI_UNUSED_ARGS4(tm,rank,lc_opt,nargs), \ + ((flags) & GEX_FLAG_AM_PREPARE_LEAST_ALLOC \ + ? GASNETC_REF_NPAM_MAX_ALLOC \ + : gex_AM_LUBReplyLong())) +#define gasnetc_Token_MaxReplyLong(token,lc_opt,flags,nargs) \ + (GASNETI_UNUSED_ARGS3(token,lc_opt,nargs), \ + ((flags) & GEX_FLAG_AM_PREPARE_LEAST_ALLOC \ + ? GASNETC_REF_NPAM_MAX_ALLOC \ + : gex_AM_LUBReplyLong())) /* ------------------------------------------------------------------------------------ */ /* diff --git a/third-party/gasnet/gasnet-src/mpi-conduit/gasnet_core_fwd.h b/third-party/gasnet/gasnet-src/mpi-conduit/gasnet_core_fwd.h index b0522fdc5545..057bbcf557c7 100644 --- a/third-party/gasnet/gasnet-src/mpi-conduit/gasnet_core_fwd.h +++ b/third-party/gasnet/gasnet-src/mpi-conduit/gasnet_core_fwd.h @@ -43,6 +43,9 @@ #define GASNETI_SUPPORTS_OUTOFSEGMENT_PUTGET 1 #endif + // uncomment for each MK_CLASS which the conduit supports. leave commented otherwise +//#define GASNET_HAVE_MK_CLASS_CUDA_UVA GASNETI_MK_CLASS_CUDA_UVA_ENABLED + /* conduits should define GASNETI_CONDUIT_THREADS to 1 if they have one or more "private" threads which may be used to run AM handlers, even under GASNET_SEQ this ensures locking is still done correctly, etc @@ -66,12 +69,12 @@ your conduit must provide the V-suffixed functions for any of these that are not defined. */ -/* #define GASNETC_HAVE_NP_REQ_MEDIUM 1 */ -/* #define GASNETC_HAVE_NP_REP_MEDIUM 1 */ -/* #define GASNETC_HAVE_NP_REQ_LONG 1 */ -/* #define GASNETC_HAVE_NP_REP_LONG 1 */ +/* #define GASNET_NATIVE_NP_ALLOC_REQ_MEDIUM 1 */ +/* #define GASNET_NATIVE_NP_ALLOC_REP_MEDIUM 1 */ +/* #define GASNET_NATIVE_NP_ALLOC_REQ_LONG 1 */ +/* #define GASNET_NATIVE_NP_ALLOC_REP_LONG 1 */ - /* uncomment for each GASNETC_HAVE_NP_* enabled above if the Commit function + /* uncomment for each GASNET_NATIVE_NP_ALLOC_* enabled above if the Commit function has the numargs argument even in an NDEBUG build (it is always passed in DEBUG builds). */ @@ -84,10 +87,46 @@ include a call to gasneti_AMPoll (or equivalent) for progress. The preferred implementation is to Poll only in the M-suffixed calls and not the V-suffixed calls (and GASNETC_REQUESTV_POLLS undefined). - Used if (and only if) any of the GASNETC_HAVE_NP_* values above are unset. + Used if (and only if) any of the GASNET_NATIVE_NP_ALLOC_* values above are unset. */ #define GASNETC_REQUESTV_POLLS 1 + // uncomment if conduit provides a gasnetc-prefixed override + // TODO: this should be a hook rather than an override +//#define GASNETC_HAVE_EP_PUBLISHBOUNDSEGMENT 1 + + /* If your conduit uses conduit-specific extensions to the basic object + types, then define the corresponding SIZEOF macros below to return + the total length of the conduit-specific object, including the prefix + portion which must be the matching GASNETI_[OBJECT]_COMMON fields. + Similarly, *_HOOK macros should be defined as callbacks to perform + conduit-specific initialization and finalization tasks, if any. + If a given SIZEOF macro is defined, but the corresponding INIT_HOOK is + not, then space beyond the COMMON fields will be zero-initialized. + In all cases, GASNETC_[OBJECT]_EXTRA_DECLS provides the place to + provide necessary declarations (since this file is included very early). + */ + +//#define GASNETC_CLIENT_EXTRA_DECLS (###) +//#define GASNETC_CLIENT_INIT_HOOK(i_client) (###) +//#define GASNETC_CLIENT_FINI_HOOK(i_client) (###) +//#define GASNETC_SIZEOF_CLIENT_T() (###) + +//#define GASNETC_SEGMENT_EXTRA_DECLS (###) +//#define GASNETC_SEGMENT_INIT_HOOK(i_segment) (###) +//#define GASNETC_SEGMENT_FINI_HOOK(i_segment) (###) +//#define GASNETC_SIZEOF_SEGMENT_T() (###) + +//#define GASNETC_TM_EXTRA_DECLS (###) +//#define GASNETC_TM_INIT_HOOK(i_tm) (###) +//#define GASNETC_TM_FINI_HOOK(i_tm) (###) +//#define GASNETC_SIZEOF_TM_T() (###) + +//#define GASNETC_EP_EXTRA_DECLS (###) +//#define GASNETC_EP_INIT_HOOK(i_ep) (###) +//#define GASNETC_EP_FINI_HOOK(i_ep) (###) +//#define GASNETC_SIZEOF_EP_T() (###) + /* mpi-conduit supports top-level poll throttling */ #define GASNETC_USING_SUSPEND_RESUME 1 @@ -109,17 +148,25 @@ #define GASNET_ERR_NOT_READY (_GASNET_ERR_BASE+4) #define GASNET_ERR_BARRIER_MISMATCH (_GASNET_ERR_BASE+5) +// If conduit supports GASNET_MAXEPS!=1, set default and (optional) max values here. +// Leaving GASNETC_MAXEPS_DFLT unset will result in GASNET_MAXEPS=1, independent +// of all other settings (appropriate for conduits without multi-ep support). +// If set, GASNETC_MAXEPS_MAX it is used to limit a user's --with-maxeps (and a +// global default limit is used otherwise). +//#define GASNETC_MAXEPS_DFLT ### // default num endpoints this conduit supports, undef means no multi-ep support +//#define GASNETC_MAXEPS_MAX ### // leave unset for default + /* this can be used to add conduit-specific statistical collection values (see gasnet_trace.h) */ #define GASNETC_CONDUIT_STATS(CNT,VAL,TIME) #define GASNETC_FATALSIGNAL_CALLBACK(sig) gasnetc_fatalsignal_callback(sig) -extern void gasnetc_fatalsignal_callback(int sig); +extern void gasnetc_fatalsignal_callback(int _sig); /* hook getSegmentInfo for NIS check */ #define _GASNET_GETSEGMENTINFO struct gasneti_seginfo_s; -extern int gasnetc_getSegmentInfo(struct gasneti_seginfo_s *seginfo_table, int numentries); +extern int gasnetc_getSegmentInfo(struct gasneti_seginfo_s *_seginfo_table, int _numentries); #define gasnet_getSegmentInfo(seginfo_table, numentries) \ gasnetc_getSegmentInfo(seginfo_table, numentries) diff --git a/third-party/gasnet/gasnet-src/mpi-conduit/gasnet_core_internal.h b/third-party/gasnet/gasnet-src/mpi-conduit/gasnet_core_internal.h index b37599224601..aa6a5f45c9f9 100644 --- a/third-party/gasnet/gasnet-src/mpi-conduit/gasnet_core_internal.h +++ b/third-party/gasnet/gasnet-src/mpi-conduit/gasnet_core_internal.h @@ -76,7 +76,7 @@ const char *gasneti_AMErrorName(int errval) { } while (0) /* ------------------------------------------------------------------------------------ */ -#define _hidx_gasnetc_exchg_reqh (GASNETC_HANDLER_BASE+0) +#define _hidx_gasnetc_hbarr_reqh (GASNETC_HANDLER_BASE+0) /* add new core API handlers here and to the bottom of gasnet_core.c */ /* ------------------------------------------------------------------------------------ */ diff --git a/third-party/gasnet/gasnet-src/ofi-conduit/Makefile.am b/third-party/gasnet/gasnet-src/ofi-conduit/Makefile.am index 42ec50beb1ae..289f4de3f7f6 100644 --- a/third-party/gasnet/gasnet-src/ofi-conduit/Makefile.am +++ b/third-party/gasnet/gasnet-src/ofi-conduit/Makefile.am @@ -87,6 +87,10 @@ CONDUIT_EXTRADEPS = $(ssh_deps) $(mpi_deps) $(pmi_deps) # forced targets in this file, and should probably use LIBINCLUDES/LIBDEFINES CONDUIT_SPECIAL_OBJS = $(mpi_special_objs) +# memory kinds supported by this conduit (space separated) +# to be included in libgasnet if support was enabled at configure time +CONDUIT_KINDS = + # the default job spawn command to be used for "make run-tests" # The following substitutions are performed: # %P = program executable name diff --git a/third-party/gasnet/gasnet-src/ofi-conduit/Makefile.in b/third-party/gasnet/gasnet-src/ofi-conduit/Makefile.in index 2672cb78dbf3..7074a8e2c715 100644 --- a/third-party/gasnet/gasnet-src/ofi-conduit/Makefile.in +++ b/third-party/gasnet/gasnet-src/ofi-conduit/Makefile.in @@ -280,6 +280,10 @@ CONFIGURE_ARGS = @CONFIGURE_ARGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CROSS_COMPILING = @CROSS_COMPILING@ +CUDA_UVA_CFLAGS = @CUDA_UVA_CFLAGS@ +CUDA_UVA_LDFLAGS = @CUDA_UVA_LDFLAGS@ +CUDA_UVA_LIBS = @CUDA_UVA_LIBS@ +CUDA_guess_prog = @CUDA_guess_prog@ CXX = @CXX@ CXXCPP = @CXXCPP@ CXXDEPMODE = @CXXDEPMODE@ @@ -305,6 +309,8 @@ ENVCMD = @ENVCMD@ EXEEXT = @EXEEXT@ EXESUFFIX = @EXESUFFIX@ FH_CFLAGS = @FH_CFLAGS@ +GASNETI_HWLOC_BIND_PATH = @GASNETI_HWLOC_BIND_PATH@ +GASNETI_HWLOC_CALC_PATH = @GASNETI_HWLOC_CALC_PATH@ GASNETI_PTR_BITS = @GASNETI_PTR_BITS@ GASNET_EXTRA_DEFINES = @GASNET_EXTRA_DEFINES@ GASNET_IBV_SPAWNER_CONF = @GASNET_IBV_SPAWNER_CONF@ @@ -333,6 +339,10 @@ GSTACK_PATH = @GSTACK_PATH@ HAVE_INTTYPES_H = @HAVE_INTTYPES_H@ HAVE_STDINT_H = @HAVE_STDINT_H@ HAVE_SYS_TYPES_H = @HAVE_SYS_TYPES_H@ +HWLOC_CFLAGS = @HWLOC_CFLAGS@ +HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ +HWLOC_LIBS = @HWLOC_LIBS@ +HWLOC_guess_prog = @HWLOC_guess_prog@ IBV_CFLAGS = @IBV_CFLAGS@ IBV_LDFLAGS = @IBV_LDFLAGS@ IBV_LIBS = @IBV_LIBS@ @@ -546,6 +556,10 @@ CONDUIT_EXTRADEPS = $(ssh_deps) $(mpi_deps) $(pmi_deps) # forced targets in this file, and should probably use LIBINCLUDES/LIBDEFINES CONDUIT_SPECIAL_OBJS = $(mpi_special_objs) +# memory kinds supported by this conduit (space separated) +# to be included in libgasnet if support was enabled at configure time +CONDUIT_KINDS = + # the default job spawn command to be used for "make run-tests" # The following substitutions are performed: # %P = program executable name diff --git a/third-party/gasnet/gasnet-src/ofi-conduit/README b/third-party/gasnet/gasnet-src/ofi-conduit/README index 9bf396358965..6561a2100841 100644 --- a/third-party/gasnet/gasnet-src/ofi-conduit/README +++ b/third-party/gasnet/gasnet-src/ofi-conduit/README @@ -1,12 +1,12 @@ GASNet ofi-conduit documentation Copyright 2015-2017, Intel Corporation -Portions copyright 2018-2020, The Regents of the University of California. -$Revision: 1.1 $ +Portions copyright 2018-2021, The Regents of the University of California. **** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **** * * * This version of ofi-conduit is a work-in-progress port from * -* GASNet-1 to GASNet-EX and is currently disabled by default. * +* GASNet-1 to GASNet-EX and is currently EXPERIMENTAL. * +* * * Various aspects of this README are either out-of-date or * * have not been reverified against the current version. * * * @@ -22,6 +22,15 @@ libraries and applications used to export fabric services. See more details at: http://ofiwg.github.io/libfabric/ +This conduit is currently being re-implemented for GASNet-EX and is +considered to be EXPERIMENTAL. Therefore, it is disabled by default. +It can be enabled at GASNet configure time using the `--enable-ofi` option. + +The re-implementation work completed to date is believed to be functionally +complete and correct when run with OFI's sockets provider and default settings, +but it is not performant. In particular, GASNet-EX RMA is implemented via a +reference implementation over Active Messages, rather than OFI's RMA APIs. + Where this conduit runs: ----------------------- diff --git a/third-party/gasnet/gasnet-src/ofi-conduit/contrib/Makefile.in b/third-party/gasnet/gasnet-src/ofi-conduit/contrib/Makefile.in index 637e37d7c1e5..820a092ed63b 100644 --- a/third-party/gasnet/gasnet-src/ofi-conduit/contrib/Makefile.in +++ b/third-party/gasnet/gasnet-src/ofi-conduit/contrib/Makefile.in @@ -187,6 +187,10 @@ CONFIGURE_ARGS = @CONFIGURE_ARGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CROSS_COMPILING = @CROSS_COMPILING@ +CUDA_UVA_CFLAGS = @CUDA_UVA_CFLAGS@ +CUDA_UVA_LDFLAGS = @CUDA_UVA_LDFLAGS@ +CUDA_UVA_LIBS = @CUDA_UVA_LIBS@ +CUDA_guess_prog = @CUDA_guess_prog@ CXX = @CXX@ CXXCPP = @CXXCPP@ CXXDEPMODE = @CXXDEPMODE@ @@ -212,6 +216,8 @@ ENVCMD = @ENVCMD@ EXEEXT = @EXEEXT@ EXESUFFIX = @EXESUFFIX@ FH_CFLAGS = @FH_CFLAGS@ +GASNETI_HWLOC_BIND_PATH = @GASNETI_HWLOC_BIND_PATH@ +GASNETI_HWLOC_CALC_PATH = @GASNETI_HWLOC_CALC_PATH@ GASNETI_PTR_BITS = @GASNETI_PTR_BITS@ GASNET_EXTRA_DEFINES = @GASNET_EXTRA_DEFINES@ GASNET_IBV_SPAWNER_CONF = @GASNET_IBV_SPAWNER_CONF@ @@ -240,6 +246,10 @@ GSTACK_PATH = @GSTACK_PATH@ HAVE_INTTYPES_H = @HAVE_INTTYPES_H@ HAVE_STDINT_H = @HAVE_STDINT_H@ HAVE_SYS_TYPES_H = @HAVE_SYS_TYPES_H@ +HWLOC_CFLAGS = @HWLOC_CFLAGS@ +HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ +HWLOC_LIBS = @HWLOC_LIBS@ +HWLOC_guess_prog = @HWLOC_guess_prog@ IBV_CFLAGS = @IBV_CFLAGS@ IBV_LDFLAGS = @IBV_LDFLAGS@ IBV_LIBS = @IBV_LIBS@ diff --git a/third-party/gasnet/gasnet-src/ofi-conduit/gasnet_core.c b/third-party/gasnet/gasnet-src/ofi-conduit/gasnet_core.c index bf06b65cd5d8..23b54c646146 100644 --- a/third-party/gasnet/gasnet-src/ofi-conduit/gasnet_core.c +++ b/third-party/gasnet/gasnet-src/ofi-conduit/gasnet_core.c @@ -18,8 +18,6 @@ GASNETI_IDENT(gasnetc_IdentString_Version, "$GASNetCoreLibraryVersion: " GASNET_CORE_VERSION_STR " $"); GASNETI_IDENT(gasnetc_IdentString_Name, "$GASNetCoreLibraryName: " GASNET_CORE_NAME_STR " $"); -gex_AM_Entry_t const *gasnetc_get_handlertable(void); - gex_AM_Entry_t *gasnetc_handler; // TODO-EX: will be replaced with per-EP tables /* Exit coordination timeouts */ @@ -34,6 +32,11 @@ static int gasnetc_exit_init(void); struct gasnetc_ofi_locks_ gasnetc_ofi_locks; #endif +size_t gasnetc_sizeof_segment_t(void) { + gasnetc_Segment_t segment; + return sizeof(*segment); +} + /* ------------------------------------------------------------------------------------ */ /* Initialization @@ -104,14 +107,12 @@ static int gasnetc_init(int *argc, char ***argv, gex_Flags_t flags) { #endif /* allocate and attach an aux segment */ - - gasneti_auxsegAttach((uintptr_t)-1, gasneti_spawner->Exchange); + gasnet_seginfo_t auxseg = gasneti_auxsegAttach((uintptr_t)-1, gasneti_spawner->Exchange); + gasnetc_auxseg_register(auxseg); /* determine Max{Local,GLobal}SegmentSize */ gasneti_segmentInit(mmap_limit, gasneti_spawner->Exchange, flags); - // TODO-EX: MUST REGISTER THE AUXSEG AND UPDATE (AT LEAST) OFI_{WRITE,READ}() - gasneti_init_done = 1; return GASNET_OK; @@ -156,21 +157,29 @@ static int gasnetc_attach_primary(void) { */ gasneti_spawner->Cleanup(); +#if GASNET_SEGMENT_EVERYTHING + GASNETI_SAFE_PROPAGATE( gasnetc_segment_register(NULL) ); +#endif + return GASNET_OK; } /* ------------------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------------------ */ static int gasnetc_attach_segment(gex_Segment_t *segment_p, gex_TM_t tm, uintptr_t segsize, - gasneti_bootstrapExchangefn_t exchangefn, gex_Flags_t flags) { /* ------------------------------------------------------------------------------------ */ /* register client segment */ - gasnet_seginfo_t myseg = gasneti_segmentAttach(segment_p, 0, tm, segsize, exchangefn, flags); + gasnet_seginfo_t myseg = gasneti_segmentAttach(segment_p, tm, segsize, flags); + + // Register memory + gasnetc_Segment_t segment = (gasnetc_Segment_t) gasneti_import_segment(*segment_p); + GASNETI_SAFE_PROPAGATE( gasnetc_segment_register(segment) ); - gasnetc_ofi_attach(myseg.addr, myseg.size); + // Exchange memory keys + gex_EP_t ep = gex_TM_QueryEP(tm); + gasnetc_segment_exchange(tm, &ep, 1); return GASNET_OK; } @@ -183,7 +192,7 @@ extern int gasnetc_attach( gex_TM_t _tm, { GASNETI_TRACE_PRINTF(C,("gasnetc_attach(table (%i entries), segsize=%"PRIuPTR")", numentries, segsize)); - gasneti_TM_t tm = gasneti_import_tm(_tm); + gasneti_TM_t tm = gasneti_import_tm_nonpair(_tm); gasneti_EP_t ep = tm->_ep; if (!gasneti_init_done) @@ -208,13 +217,13 @@ extern int gasnetc_attach( gex_TM_t _tm, #if GASNET_SEGMENT_FAST || GASNET_SEGMENT_LARGE /* register client segment */ gex_Segment_t seg; // g2ex segment is automatically saved by a hook - if (GASNET_OK != gasnetc_attach_segment(&seg, _tm, segsize, gasneti_defaultExchange, GASNETI_FLAG_INIT_LEGACY)) + if (GASNET_OK != gasnetc_attach_segment(&seg, _tm, segsize, GASNETI_FLAG_INIT_LEGACY)) GASNETI_RETURN_ERRR(RESOURCE,"Error attaching segment"); #endif /* register client handlers */ - if (table && gasneti_amregister_legacy(ep->_amtbl, table, numentries) != GASNET_OK) + if (table && gasneti_amregister_legacy(ep, table, numentries) != GASNET_OK) GASNETI_RETURN_ERRR(RESOURCE,"Error registering handlers"); /* ensure everything is initialized across all nodes */ @@ -250,18 +259,22 @@ extern int gasnetc_Client_Init( gasneti_trace_init(argc, argv); } + // Do NOT move this prior to the gasneti_trace_init() call + GASNETI_TRACE_PRINTF(O,("gex_Client_Init: name='%s' argc_p=%p argv_p=%p flags=%d", + clientName, (void *)argc, (void *)argv, flags)); + // allocate the client object - gasneti_Client_t client = gasneti_alloc_client(clientName, flags, 0); + gasneti_Client_t client = gasneti_alloc_client(clientName, flags); *client_p = gasneti_export_client(client); // create the initial endpoint with internal handlers - if (gasnetc_EP_Create(ep_p, *client_p, flags)) + if (gex_EP_Create(ep_p, *client_p, GEX_EP_CAPABILITY_ALL, flags)) GASNETI_RETURN_ERRR(RESOURCE,"Error creating initial endpoint"); gasneti_EP_t ep = gasneti_import_ep(*ep_p); gasnetc_handler = ep->_amtbl; // TODO-EX: this global variable to be removed // TODO-EX: create team - gasneti_TM_t tm = gasneti_alloc_tm(ep, gasneti_mynode, gasneti_nodes, flags, 0); + gasneti_TM_t tm = gasneti_alloc_tm(ep, gasneti_mynode, gasneti_nodes, flags); *tm_p = gasneti_export_tm(tm); if (0 == (flags & GASNETI_FLAG_INIT_LEGACY)) { @@ -298,58 +311,61 @@ extern int gasnetc_Segment_Attach( // TODO-EX: this implementation only works *once* // TODO-EX: should be using the team's exchange function if possible // TODO-EX: need to pass proper flags (e.g. pshm and bind) instead of 0 - if (GASNET_OK != gasnetc_attach_segment(segment_p, tm, length, gasneti_defaultExchange, 0)) + if (GASNET_OK != gasnetc_attach_segment(segment_p, tm, length, 0)) GASNETI_RETURN_ERRR(RESOURCE,"Error attaching segment"); return GASNET_OK; } -extern int gasnetc_EP_Create(gex_EP_t *ep_p, - gex_Client_t client, - gex_Flags_t flags) { - /* (###) add code here to create an endpoint belonging to the given client */ -#if 1 // TODO-EX: This is a stub, which assumes 1 implicit call from ClientCreate - static gasneti_mutex_t lock = GASNETI_MUTEX_INITIALIZER; - gasneti_mutex_lock(&lock); - static int once = 0; - int prev = once; - once = 1; - gasneti_mutex_unlock(&lock); - if (prev) gasneti_fatalerror("Multiple endpoints are not yet implemented"); -#endif +extern int gasnetc_Segment_Create( + gex_Segment_t *segment_p, + gex_Client_t client, + gex_Addr_t address, + uintptr_t length, + gex_MK_t kind, + gex_Flags_t flags) +{ + gasneti_assert(segment_p); - gasneti_EP_t ep = gasneti_alloc_ep(gasneti_import_client(client), flags, 0); - *ep_p = gasneti_export_ep(ep); - - { /* core API handlers */ - gex_AM_Entry_t *ctable = (gex_AM_Entry_t *)gasnetc_get_handlertable(); - int len = 0; - int numreg = 0; - gasneti_assert(ctable); - while (ctable[len].gex_fnptr) len++; /* calc len */ - if (gasneti_amregister(ep->_amtbl, ctable, len, GASNETC_HANDLER_BASE, GASNETE_HANDLER_BASE, 0, &numreg) != GASNET_OK) - GASNETI_RETURN_ERRR(RESOURCE,"Error registering core API handlers"); - gasneti_assert_int(numreg ,==, len); - } + // Create the Segment object, allocating memory if appropriate + gasneti_Client_t i_client = gasneti_import_client(client); + int rc = gasneti_segmentCreate(segment_p, i_client, address, length, kind, flags); - { /* extended API handlers */ - gex_AM_Entry_t *etable = (gex_AM_Entry_t *)gasnete_get_handlertable(); - int len = 0; - int numreg = 0; - gasneti_assert(etable); - while (etable[len].gex_fnptr) len++; /* calc len */ - if (gasneti_amregister(ep->_amtbl, etable, len, GASNETE_HANDLER_BASE, GASNETI_CLIENT_HANDLER_BASE, 0, &numreg) != GASNET_OK) - GASNETI_RETURN_ERRR(RESOURCE,"Error registering extended API handlers"); - gasneti_assert_int(numreg ,==, len); + if (rc == GASNET_OK) { + #if 0 // TODO: register memory once gasnetc_segment_register() manages multiple keys + gasnetc_Segment_t segment = (gasnetc_Segment_t) gasneti_import_segment(*segment_p); + GASNETI_SAFE_PROPAGATE( gasnetc_segment_register(segment) ); + #endif } + return rc; +} + +extern int gasnetc_EP_PublishBoundSegment( + gex_TM_t tm, + gex_EP_t *eps, + size_t num_eps, + gex_Flags_t flags) +{ + // Conduit-independent parts + int rc = gasneti_EP_PublishBoundSegment(tm, eps, num_eps, flags); + if (GASNET_OK != rc) return rc; + + // Conduit-dependent parts + // TODO: merge comms into gasneti_EP_PublishBoundSegment(). + gasnetc_segment_exchange(tm, eps, num_eps); + + // Avoid race in which AMRequestLong triggers AMRepyLong before exchange completes remotely + // TODO: barrier for multi-tm per-process + gex_Event_Wait(gex_Coll_BarrierNB(tm, 0)); + return GASNET_OK; } extern int gasnetc_EP_RegisterHandlers(gex_EP_t ep, gex_AM_Entry_t *table, size_t numentries) { - return gasneti_amregister_client(gasneti_import_ep(ep)->_amtbl, table, numentries); + return gasneti_amregister_client(gasneti_import_ep(ep), table, numentries); } /* ------------------------------------------------------------------------------------ */ int gasnetc_exit_in_progress = 0; @@ -440,6 +456,9 @@ static int gasnetc_exit_coordinate(int exitcode) { gasnetc_handler[i].gex_fnptr = (gex_AM_Fn_t)&gasnetc_noop; } + // prevent possible GASNETI_CHECK_INJECT() failures when we communicate + GASNETI_CHECK_INJECT_RESET(); + /* Coordinate using dissemination-pattern, with timeout. * lg(N) rounds each of which sends and recvs 1 AM */ @@ -929,9 +948,7 @@ extern int gasnetc_hsl_trylock(gex_HSL_t *hsl) { (for internal conduit use in bootstrapping, job management, etc.) */ static gex_AM_Entry_t const gasnetc_handlers[] = { - #ifdef GASNETC_COMMON_HANDLERS - GASNETC_COMMON_HANDLERS(), - #endif + GASNETC_COMMON_HANDLERS(), /* ptr-width independent handlers */ gasneti_handler_tableentry_no_bits(gasnetc_exit_reqh,2,REQUEST,SHORT,0), diff --git a/third-party/gasnet/gasnet-src/ofi-conduit/gasnet_core.h b/third-party/gasnet/gasnet-src/ofi-conduit/gasnet_core.h index 2b3c4d8fef32..c0718f254eb9 100644 --- a/third-party/gasnet/gasnet-src/ofi-conduit/gasnet_core.h +++ b/third-party/gasnet/gasnet-src/ofi-conduit/gasnet_core.h @@ -21,7 +21,7 @@ ============== */ -extern void gasnetc_exit(int exitcode) GASNETI_NORETURN; +extern void gasnetc_exit(int _exitcode) GASNETI_NORETURN; GASNETI_NORETURNP(gasnetc_exit) #define gasnet_exit gasnetc_exit @@ -35,31 +35,34 @@ GASNETI_NORETURNP(gasnetc_exit) /* ------------------------------------------------------------------------------------ */ extern int gasnetc_Client_Init( - gex_Client_t *client_p, - gex_EP_t *ep_p, - gex_TM_t *tm_p, - const char *clientName, - int *argc, - char ***argv, - gex_Flags_t flags); + gex_Client_t *_client_p, + gex_EP_t *_ep_p, + gex_TM_t *_tm_p, + const char *_clientName, + int *_argc, + char ***_argv, + gex_Flags_t _flags); // gasnetex.h handles name-shifting of gex_Client_Init() extern int gasnetc_Segment_Attach( - gex_Segment_t *segment_p, - gex_TM_t tm, - uintptr_t length); + gex_Segment_t *_segment_p, + gex_TM_t _tm, + uintptr_t _length); #define gex_Segment_Attach gasnetc_Segment_Attach -extern int gasnetc_EP_Create( - gex_EP_t *ep_p, - gex_Client_t client, - gex_Flags_t flags); -#define gex_EP_Create gasnetc_EP_Create +extern int gasnetc_Segment_Create( + gex_Segment_t *_segment_p, + gex_Client_t _client, + gex_Addr_t _address, + uintptr_t _length, + gex_MK_t _kind, + gex_Flags_t _flags); +#define gex_Segment_Create gasnetc_Segment_Create extern int gasnetc_EP_RegisterHandlers( - gex_EP_t ep, - gex_AM_Entry_t *table, - size_t numentries); + gex_EP_t _ep, + gex_AM_Entry_t *_table, + size_t _numentries); #define gex_EP_RegisterHandlers gasnetc_EP_RegisterHandlers /* ------------------------------------------------------------------------------------ */ /* @@ -103,11 +106,11 @@ typedef struct { #define gex_HSL_Unlock(hsl) #define gex_HSL_Trylock(hsl) GASNET_OK #else - extern void gasnetc_hsl_init (gex_HSL_t *hsl); - extern void gasnetc_hsl_destroy(gex_HSL_t *hsl); - extern void gasnetc_hsl_lock (gex_HSL_t *hsl); - extern void gasnetc_hsl_unlock (gex_HSL_t *hsl); - extern int gasnetc_hsl_trylock(gex_HSL_t *hsl) GASNETI_WARN_UNUSED_RESULT; + extern void gasnetc_hsl_init (gex_HSL_t *_hsl); + extern void gasnetc_hsl_destroy(gex_HSL_t *_hsl); + extern void gasnetc_hsl_lock (gex_HSL_t *_hsl); + extern void gasnetc_hsl_unlock (gex_HSL_t *_hsl); + extern int gasnetc_hsl_trylock(gex_HSL_t *_hsl) GASNETI_WARN_UNUSED_RESULT; #define gex_HSL_Init gasnetc_hsl_init #define gex_HSL_Destroy gasnetc_hsl_destroy @@ -132,12 +135,28 @@ typedef struct { /* Provide tigher bounds based on parameters */ // TODO-EX: Medium sizes can be further improved upon for PSHM case -#define gasnetc_AM_MaxRequestMedium(tm,rank,lc_opt,flags,nargs) gex_AM_LUBRequestMedium() -#define gasnetc_AM_MaxReplyMedium(tm,rank,lc_opt,flags,nargs) gex_AM_LUBReplyMedium() -#define gasnetc_AM_MaxRequestLong(tm,rank,lc_opt,flags,nargs) gex_AM_LUBRequestLong() -#define gasnetc_AM_MaxReplyLong(tm,rank,lc_opt,flags,nargs) gex_AM_LUBReplyLong() -#define gasnetc_Token_MaxReplyMedium(token,lc_opt,flags,nargs) gex_AM_LUBReplyMedium() -#define gasnetc_Token_MaxReplyLong(token,lc_opt,flags,nargs) gex_AM_LUBReplyLong() +#define gasnetc_AM_MaxRequestMedium(tm,rank,lc_opt,flags,nargs) \ + (GASNETI_UNUSED_ARGS5(tm,rank,lc_opt,flags,nargs),gex_AM_LUBRequestMedium()) +#define gasnetc_AM_MaxReplyMedium(tm,rank,lc_opt,flags,nargs) \ + (GASNETI_UNUSED_ARGS5(tm,rank,lc_opt,flags,nargs),gex_AM_LUBReplyMedium()) +#define gasnetc_Token_MaxReplyMedium(token,lc_opt,flags,nargs) \ + (GASNETI_UNUSED_ARGS4(token,lc_opt,flags,nargs),gex_AM_LUBReplyMedium()) + +#define gasnetc_AM_MaxRequestLong(tm,rank,lc_opt,flags,nargs) \ + (GASNETI_UNUSED_ARGS4(tm,rank,lc_opt,nargs), \ + ((flags) & GEX_FLAG_AM_PREPARE_LEAST_ALLOC \ + ? GASNETC_REF_NPAM_MAX_ALLOC \ + : gex_AM_LUBRequestLong())) +#define gasnetc_AM_MaxReplyLong(tm,rank,lc_opt,flags,nargs) \ + (GASNETI_UNUSED_ARGS4(tm,rank,lc_opt,nargs), \ + ((flags) & GEX_FLAG_AM_PREPARE_LEAST_ALLOC \ + ? GASNETC_REF_NPAM_MAX_ALLOC \ + : gex_AM_LUBReplyLong())) +#define gasnetc_Token_MaxReplyLong(token,lc_opt,flags,nargs) \ + (GASNETI_UNUSED_ARGS3(token,lc_opt,nargs), \ + ((flags) & GEX_FLAG_AM_PREPARE_LEAST_ALLOC \ + ? GASNETC_REF_NPAM_MAX_ALLOC \ + : gex_AM_LUBReplyLong())) /* ------------------------------------------------------------------------------------ */ /* diff --git a/third-party/gasnet/gasnet-src/ofi-conduit/gasnet_core_fwd.h b/third-party/gasnet/gasnet-src/ofi-conduit/gasnet_core_fwd.h index 68bbd4212bd7..510f842cf75c 100644 --- a/third-party/gasnet/gasnet-src/ofi-conduit/gasnet_core_fwd.h +++ b/third-party/gasnet/gasnet-src/ofi-conduit/gasnet_core_fwd.h @@ -13,7 +13,7 @@ #ifndef _GASNET_CORE_FWD_H #define _GASNET_CORE_FWD_H -#define GASNET_CORE_VERSION 0.5 +#define GASNET_CORE_VERSION 0.6 #define GASNET_CORE_VERSION_STR _STRINGIFY(GASNET_CORE_VERSION) #define GASNET_CORE_NAME OFI #define GASNET_CORE_NAME_STR _STRINGIFY(GASNET_CORE_NAME) @@ -42,6 +42,9 @@ #endif #endif + // uncomment for each MK_CLASS which the conduit supports. leave commented otherwise +//#define GASNET_HAVE_MK_CLASS_CUDA_UVA GASNETI_MK_CLASS_CUDA_UVA_ENABLED + /* conduits should define GASNETI_CONDUIT_THREADS to 1 if they have one or more "private" threads which may be used to run AM handlers, even under GASNET_SEQ this ensures locking is still done correctly, etc @@ -71,12 +74,12 @@ your conduit must provide the V-suffixed functions for any of these that are not defined. */ -/* #define GASNETC_HAVE_NP_REQ_MEDIUM 1 */ -/* #define GASNETC_HAVE_NP_REP_MEDIUM 1 */ -/* #define GASNETC_HAVE_NP_REQ_LONG 1 */ -/* #define GASNETC_HAVE_NP_REP_LONG 1 */ +/* #define GASNET_NATIVE_NP_ALLOC_REQ_MEDIUM 1 */ +/* #define GASNET_NATIVE_NP_ALLOC_REP_MEDIUM 1 */ +/* #define GASNET_NATIVE_NP_ALLOC_REQ_LONG 1 */ +/* #define GASNET_NATIVE_NP_ALLOC_REP_LONG 1 */ - /* uncomment for each GASNETC_HAVE_NP_* enabled above if the Commit function + /* uncomment for each GASNET_NATIVE_NP_ALLOC_* enabled above if the Commit function has the numargs argument even in an NDEBUG build (it is always passed in DEBUG builds). */ @@ -89,10 +92,56 @@ include a call to gasneti_AMPoll (or equivalent) for progress. The preferred implementation is to Poll only in the M-suffixed calls and not the V-suffixed calls (and GASNETC_REQUESTV_POLLS undefined). - Used if (and only if) any of the GASNETC_HAVE_NP_* values above are unset. + Used if (and only if) any of the GASNET_NATIVE_NP_ALLOC_* values above are unset. */ /* #define GASNETC_REQUESTV_POLLS 1 */ + // uncomment if conduit provides a gasnetc-prefixed override + // TODO: this should be a hook rather than an override +#define GASNETC_HAVE_EP_PUBLISHBOUNDSEGMENT 1 + + /* If your conduit uses conduit-specific extensions to the basic object + types, then define the corresponding SIZEOF macros below to return + the total length of the conduit-specific object, including the prefix + portion which must be the matching GASNETI_[OBJECT]_COMMON fields. + Similarly, *_HOOK macros should be defined as callbacks to perform + conduit-specific initialization and finalization tasks, if any. + If a given SIZEOF macro is defined, but the corresponding INIT_HOOK is + not, then space beyond the COMMON fields will be zero-initialized. + In all cases, GASNETC_[OBJECT]_EXTRA_DECLS provides the place to + provide necessary declarations (since this file is included very early). + */ + +//#define GASNETC_CLIENT_EXTRA_DECLS (###) +//#define GASNETC_CLIENT_INIT_HOOK(i_client) (###) +//#define GASNETC_CLIENT_FINI_HOOK(i_client) (###) +//#define GASNETC_SIZEOF_CLIENT_T() (###) + +#define GASNETC_SEGMENT_EXTRA_DECLS \ + extern size_t gasnetc_sizeof_segment_t(void); +//#define GASNETC_SEGMENT_INIT_HOOK(i_segment) (###) +//#define GASNETC_SEGMENT_FINI_HOOK(i_segment) (###) +#define GASNETC_SIZEOF_SEGMENT_T() \ + gasnetc_sizeof_segment_t() + +//#define GASNETC_TM_EXTRA_DECLS (###) +//#define GASNETC_TM_INIT_HOOK(i_tm) (###) +//#define GASNETC_TM_FINI_HOOK(i_tm) (###) +//#define GASNETC_SIZEOF_TM_T() (###) + +//#define GASNETC_EP_EXTRA_DECLS (###) +//#define GASNETC_EP_INIT_HOOK(i_ep) (###) +//#define GASNETC_EP_FINI_HOOK(i_ep) (###) +//#define GASNETC_SIZEOF_EP_T() (###) + +// If conduit supports GASNET_MAXEPS!=1, set default and (optional) max values here. +// Leaving GASNETC_MAXEPS_DFLT unset will result in GASNET_MAXEPS=1, independent +// of all other settings (appropriate for conduits without multi-ep support). +// If set, GASNETC_MAXEPS_MAX it is used to limit a user's --with-maxeps (and a +// global default limit is used otherwise). +//#define GASNETC_MAXEPS_DFLT ### // default num endpoints this conduit supports, undef means no multi-ep support +//#define GASNETC_MAXEPS_MAX ### // leave unset for default + /* this can be used to add conduit-specific statistical collection values (see gasnet_trace.h) */ #define GASNETC_CONDUIT_STATS(CNT,VAL,TIME) \ diff --git a/third-party/gasnet/gasnet-src/ofi-conduit/gasnet_core_internal.h b/third-party/gasnet/gasnet-src/ofi-conduit/gasnet_core_internal.h index e642f112de1f..3e9ba3b801c5 100644 --- a/third-party/gasnet/gasnet-src/ofi-conduit/gasnet_core_internal.h +++ b/third-party/gasnet/gasnet-src/ofi-conduit/gasnet_core_internal.h @@ -15,7 +15,7 @@ #define GASNETC_HSL_SPINLOCK 1 /* ------------------------------------------------------------------------------------ */ -#define _hidx_gasnetc_exchg_reqh (GASNETC_HANDLER_BASE+0) +#define _hidx_gasnetc_hbarr_reqh (GASNETC_HANDLER_BASE+0) #define _hidx_gasnetc_exit_reqh (GASNETC_HANDLER_BASE+1) /* add new core API handlers here and to the bottom of gasnet_core.c */ diff --git a/third-party/gasnet/gasnet-src/ofi-conduit/gasnet_ofi.c b/third-party/gasnet/gasnet-src/ofi-conduit/gasnet_ofi.c index 462c95aad9dc..30d958994997 100644 --- a/third-party/gasnet/gasnet-src/ofi-conduit/gasnet_ofi.c +++ b/third-party/gasnet/gasnet-src/ofi-conduit/gasnet_ofi.c @@ -29,11 +29,12 @@ struct fid_domain* gasnetc_ofi_domainfd; struct fid_av* gasnetc_ofi_avfd; struct fid_cq* gasnetc_ofi_tx_cqfd; /* CQ for both AM and RDMA tx ops */ struct fid_ep* gasnetc_ofi_rdma_epfd; -struct fid_mr* gasnetc_ofi_rdma_mrfd; struct fid_ep* gasnetc_ofi_request_epfd; struct fid_ep* gasnetc_ofi_reply_epfd; struct fid_cq* gasnetc_ofi_request_cqfd; struct fid_cq* gasnetc_ofi_reply_cqfd; +struct fid_mr* gasnetc_segment_mrfd = NULL; +struct fid_mr* gasnetc_auxseg_mrfd = NULL; size_t gasnetc_ofi_bbuf_threshold; typedef struct gasnetc_ofi_recv_metadata { @@ -56,19 +57,30 @@ static addr_table_t *addr_table; #define GET_RDMA_DEST(dest) (fi_addr_t)((dest)*NUM_OFI_ENDPOINTS+2) #endif +// TODO: multi-EP/multi-segment will require generalizing this (and callers) +// CAUTION: macro arguments may be evaluated multiple times (or zero) #if GASNET_SEGMENT_FAST || GASNET_SEGMENT_LARGE -#define GET_REMOTEADDR(remote_addr, dest) (uintptr_t)((char*)remote_addr - (char*)gasneti_seginfo[dest].addr) +#define GASNETC_OFI_IS_AUX(_addr,jobrank) \ + ((uintptr_t)(_addr) - (uintptr_t)gasneti_seginfo_aux[jobrank].addr < gasneti_seginfo_aux[jobrank].size) +#define GET_REMOTEADDR_AUX(remote_addr, jobrank, is_auxseg) \ + ((uintptr_t)remote_addr - \ + (uintptr_t)((is_auxseg) ? gasneti_seginfo_aux[jobrank].addr : gasneti_seginfo[jobrank].addr)) +#define GET_REMOTEADDR(remote_addr, jobrank) \ + GET_REMOTEADDR_AUX(remote_addr, jobrank, GASNETC_OFI_IS_AUX(remote_addr, jobrank)) #else -#define GET_REMOTEADDR(remote_addr, dest) (uintptr_t)remote_addr +#define GASNETC_OFI_IS_AUX(addr,jobrank) 0 +#define GET_REMOTEADDR_AUX(remote_addr, jobrank, is_auxseg) ((uintptr_t)remote_addr) +#define GET_REMOTEADDR(remote_addr, jobrank) ((uintptr_t)remote_addr) #endif #define SCALABLE_NOT_AUTO_DETECTED (-1) static short has_mr_scalable = SCALABLE_NOT_AUTO_DETECTED; -/* This pointer will only be malloced if GASNETC_OFI_HAS_MR_SCALABLE is - * true at runtime */ +/* These two pointers will only be malloced if GASNETC_OFI_HAS_MR_SCALABLE is + * false at runtime */ static uint64_t* gasnetc_ofi_target_keys; +static uint64_t* gasnetc_ofi_target_aux_keys; #ifndef GASNETC_OFI_HAS_MR_SCALABLE #define GASNETC_OFI_HAS_MR_SCALABLE has_mr_scalable #endif @@ -77,30 +89,35 @@ static uint64_t* gasnetc_ofi_target_keys; #define GET_REMOTEADDR_PER_MR_MODE(dest_addr, dest)\ GASNETC_OFI_HAS_MR_SCALABLE ? GET_REMOTEADDR(dest_addr, dest) : (uintptr_t)dest_addr -#define GASNETC_OFI_GET_MR_KEY(dest) (gasneti_assert(!GASNETC_OFI_HAS_MR_SCALABLE),\ - gasnetc_ofi_target_keys[dest]) +#define GASNETC_OFI_GET_MR_KEY_AUX(jobrank,is_auxseg) (gasneti_assert(!GASNETC_OFI_HAS_MR_SCALABLE),\ + (is_auxseg) ? gasnetc_ofi_target_aux_keys[jobrank] \ + : gasnetc_ofi_target_keys[jobrank]) +#define GASNETC_OFI_GET_MR_KEY(addr,jobrank) (gasneti_assert(!GASNETC_OFI_HAS_MR_SCALABLE),\ + GASNETC_OFI_GET_MR_KEY_AUX(jobrank,GASNETC_OFI_IS_AUX(addr,jobrank))) #define OFI_WRITE(ep, src_addr, nbytes, dest, dest_addr, ctxt_ptr)\ do {\ + int _is_auxseg = GASNETC_OFI_IS_AUX(dest_addr, dest); /* also the SCALABLE key */\ if (GASNETC_OFI_HAS_MR_SCALABLE){\ ret = fi_write(ep, src_addr, nbytes, NULL, GET_RDMA_DEST(dest), \ - GET_REMOTEADDR(dest_addr, dest), 0, ctxt_ptr);\ + GET_REMOTEADDR_AUX(dest_addr, dest, _is_auxseg), _is_auxseg, ctxt_ptr);\ }\ else {\ ret = fi_write(ep, src_addr, nbytes, NULL, GET_RDMA_DEST(dest), \ - (uintptr_t)dest_addr, GASNETC_OFI_GET_MR_KEY(dest), ctxt_ptr);\ + (uintptr_t)dest_addr, GASNETC_OFI_GET_MR_KEY_AUX(dest,_is_auxseg), ctxt_ptr);\ }\ } while(0) #define OFI_READ(ep, dest_buf, nbytes, src, src_addr, ctxt_ptr)\ do {\ + int _is_auxseg = GASNETC_OFI_IS_AUX(src_addr, src); /* also the SCALABLE key */\ if (GASNETC_OFI_HAS_MR_SCALABLE) {\ ret = fi_read(ep, dest_buf, nbytes, NULL, GET_RDMA_DEST(src), \ - GET_REMOTEADDR(src_addr, src), 0, ctxt_ptr);\ + GET_REMOTEADDR_AUX(src_addr, src, _is_auxseg), _is_auxseg, ctxt_ptr);\ }\ else {\ ret = fi_read(ep, dest_buf, nbytes, NULL, GET_RDMA_DEST(src), \ - (uintptr_t)src_addr, GASNETC_OFI_GET_MR_KEY(dest),ctxt_ptr);\ + (uintptr_t)src_addr, GASNETC_OFI_GET_MR_KEY_AUX(src, _is_auxseg),ctxt_ptr);\ }\ } while(0) @@ -640,8 +657,8 @@ int gasnetc_ofi_init(void) fi_freeinfo(hints); if (!GASNETC_OFI_HAS_MR_SCALABLE) { - gasnetc_ofi_target_keys = gasneti_malloc(sizeof(uint64_t)*gasneti_nodes); - gasneti_assert(gasnetc_ofi_target_keys); + gasnetc_ofi_target_keys = gasneti_calloc(2*gasneti_nodes, sizeof(uint64_t)); + gasnetc_ofi_target_aux_keys = gasnetc_ofi_target_keys + gasneti_nodes; } receive_region_start = gasneti_malloc_aligned(GASNETI_PAGESIZE, multirecv_buff_size*num_multirecv_buffs); @@ -768,9 +785,25 @@ void gasnetc_ofi_exit(void) gasneti_fatalerror("close rdma epfd failed\n"); } - if(fi_close(&gasnetc_ofi_rdma_mrfd->fid)!=FI_SUCCESS) { +#if GASNET_SEGMENT_FAST || GASNET_SEGMENT_LARGE + GASNETI_SEGTBL_LOCK(); + gasneti_Segment_t seg; + GASNETI_SEGTBL_FOR_EACH(seg) { + struct fid_mr* mrfd = ((gasnetc_Segment_t)seg)->mrfd; + if(mrfd && (fi_close(&mrfd->fid)!=FI_SUCCESS)) { + gasneti_fatalerror("close mrfd failed\n"); + } + } + GASNETI_SEGTBL_UNLOCK(); +#else + if(gasnetc_segment_mrfd && (fi_close(&gasnetc_segment_mrfd->fid)!=FI_SUCCESS)) { gasneti_fatalerror("close mrfd failed\n"); } +#endif + + if (gasnetc_auxseg_mrfd && (fi_close(&gasnetc_auxseg_mrfd->fid) != FI_SUCCESS)) { + gasneti_fatalerror("close auxseg mrfd failed\n"); + } if(fi_close(&gasnetc_ofi_tx_cqfd->fid)!=FI_SUCCESS) { gasneti_fatalerror("close am scqfd failed\n"); @@ -974,33 +1007,96 @@ void gasnetc_ofi_handle_bounce_rdma(void *buf) /*------------------------------------------------ * Pre-post or pin-down memory * ----------------------------------------------*/ -void gasnetc_ofi_attach(void *segbase, uintptr_t segsize) -{ - int ret = FI_SUCCESS; - uint64_t local_mr_key; - /* Pin-down Memory Region */ +// Local registration of segment memory +int gasnetc_segment_register(gasnetc_Segment_t segment) +{ #if GASNET_SEGMENT_FAST || GASNET_SEGMENT_LARGE - ret = fi_mr_reg(gasnetc_ofi_domainfd, segbase, segsize, FI_REMOTE_READ | FI_REMOTE_WRITE, 0ULL, 0ULL, 0ULL, &gasnetc_ofi_rdma_mrfd, NULL); + void *segbase = segment->_addr; + uintptr_t segsize = segment->_size; + struct fid_mr** mrfd_p = &segment->mrfd; #else - ret = fi_mr_reg(gasnetc_ofi_domainfd, (void *)0, UINT64_MAX, FI_REMOTE_READ | FI_REMOTE_WRITE, 0ULL, 0ULL, 0ULL, &gasnetc_ofi_rdma_mrfd, NULL); + void *segbase = (void *)0; + uintptr_t segsize = UINT64_MAX; + struct fid_mr** mrfd_p = &gasnetc_segment_mrfd; if (!GASNETC_OFI_HAS_MR_SCALABLE) { gasneti_fatalerror("GASNET_SEGMENT_EVERYTHING is not supported when using FI_MR_BASIC.\n" "Pick an OFI provider that supports FI_MR_SCALABLE if EVERYTHING\n" "is needed.\n"); } #endif - if (FI_SUCCESS != ret) gasneti_fatalerror("fi_mr_reg for rdma failed: %d\n", ret); - - /* Exchange memory keys with other nodes.*/ - if (!GASNETC_OFI_HAS_MR_SCALABLE) { - local_mr_key = fi_mr_key(gasnetc_ofi_rdma_mrfd); - gasneti_bootstrapExchange(&local_mr_key, sizeof(uint64_t), - gasnetc_ofi_target_keys); + int ret = fi_mr_reg(gasnetc_ofi_domainfd, segbase, segsize, + FI_REMOTE_READ | FI_REMOTE_WRITE, 0ULL, 0ULL, 0ULL, + mrfd_p, NULL); + if (FI_SUCCESS != ret) { + gasneti_fatalerror("fi_mr_reg for rdma failed: %d(%s)\n", ret, fi_strerror(-ret)); } + return GASNET_OK; } +// Exchange memory keys with other nodes. +void gasnetc_segment_exchange(gex_TM_t tm, gex_EP_t *eps, size_t num_eps) +{ + if (GASNETC_OFI_HAS_MR_SCALABLE) return; + + // Exchange a 64-bit mr key + struct exchg_data { + gex_EP_Location_t loc; + uint64_t mr_key; + } *local, *global, *p; + + size_t elem_sz = sizeof(struct exchg_data); + local = gasneti_malloc(num_eps * elem_sz); + + // Pack + p = local; + for (gex_Rank_t i = 0; i < num_eps; ++i) { + gex_EP_t ep = eps[i]; + gasnetc_Segment_t segment = (gasnetc_Segment_t) gasneti_import_ep(ep)->_segment; + if (! segment) continue; + p->loc.gex_rank = gasneti_mynode; + p->loc.gex_ep_index = gex_EP_QueryIndex(ep); + p->mr_key = fi_mr_key(segment->mrfd); + ++p; + } + + size_t local_bytes = elem_sz * (p - local); + size_t total_bytes = gasneti_blockingRotatedExchangeV(tm, local, local_bytes, (void**)&global, NULL); + size_t total_eps = total_bytes / elem_sz; + gasneti_free(local); + + // Unpack + p = global; + for (size_t i = 0; i < total_eps; ++i, ++p) { + gex_Rank_t jobrank = p->loc.gex_rank; + if (! p->loc.gex_ep_index ) { // Primordial EP (includes loopback) + gasneti_assert(!gasnetc_ofi_target_keys[jobrank] || + gasnetc_ofi_target_keys[jobrank] == p->mr_key); + gasnetc_ofi_target_keys[jobrank] = p->mr_key; + } else { + // Non-primordial + gasneti_unreachable_error(("gex_EP_PublishBoundSegment does not yet handle non-primordial EPs")); + } + } + gasneti_free(global); +} + +void gasnetc_auxseg_register(gasnet_seginfo_t si) +{ + int ret = fi_mr_reg(gasnetc_ofi_domainfd, si.addr, si.size, + FI_REMOTE_READ | FI_REMOTE_WRITE, 0ULL, 1ULL, 0ULL, + &gasnetc_auxseg_mrfd, NULL); + if (FI_SUCCESS != ret) { + gasneti_fatalerror("fi_mr_reg for aux_seg failed: %d(%s)\n", ret, fi_strerror(-ret)); + } + + if (GASNETC_OFI_HAS_MR_SCALABLE) return; + + uint64_t mr_key = fi_mr_key(gasnetc_auxseg_mrfd); + gasneti_assert(gasnetc_ofi_target_aux_keys); + gasneti_bootstrapExchange(&mr_key, sizeof(mr_key), gasnetc_ofi_target_aux_keys); +} /*------------------------------------------------ * OFI conduit network poll function @@ -1436,7 +1532,7 @@ int gasnetc_rdma_put_non_bulk(gex_Rank_t dest, void* dest_addr, void* src_addr, iovec.iov_len = nbytes; rma_iov.addr = dest_ptr; rma_iov.len = nbytes; - rma_iov.key = GASNETC_OFI_HAS_MR_SCALABLE ? 0 : GASNETC_OFI_GET_MR_KEY(dest); + rma_iov.key = GASNETC_OFI_GET_MR_KEY(dest_addr, dest); msg.context = ctxt_ptr; msg.msg_iov = &iovec; diff --git a/third-party/gasnet/gasnet-src/ofi-conduit/gasnet_ofi.h b/third-party/gasnet/gasnet-src/ofi-conduit/gasnet_ofi.h index 65ba74c1af6e..89c3e8d022a6 100644 --- a/third-party/gasnet/gasnet-src/ofi-conduit/gasnet_ofi.h +++ b/third-party/gasnet/gasnet-src/ofi-conduit/gasnet_ofi.h @@ -50,7 +50,6 @@ extern struct fid_av* gasnetc_ofi_avfd; extern struct fid_cq* gasnetc_ofi_tx_cqfd; /* CQ for both AM and RDMA tx ops */ extern struct fid_ep* gasnetc_ofi_rdma_epfd; -extern struct fid_mr* gasnetc_ofi_rdma_mrfd; extern struct fid_ep* gasnetc_ofi_request_epfd; extern struct fid_ep* gasnetc_ofi_reply_epfd; @@ -146,9 +145,20 @@ typedef struct gasnetc_ofi_bounce_op_ctxt { gasnetc_paratomic_t cntr; } gasnetc_ofi_bounce_op_ctxt_t; +// Conduit-specific Segment type +typedef struct gasnetc_Segment_t_ { + GASNETI_SEGMENT_COMMON // conduit-indep part as prefix + + // conduit-specific fields + struct fid_mr* mrfd; +} *gasnetc_Segment_t; + +void gasnetc_auxseg_register(gasnet_seginfo_t si); +int gasnetc_segment_register(gasnetc_Segment_t segment); +void gasnetc_segment_exchange(gex_TM_t tm, gex_EP_t *eps, size_t num_eps); + int gasnetc_ofi_init(void); void gasnetc_ofi_poll(void); -void gasnetc_ofi_attach(void *segbase, uintptr_t segsize); void gasnetc_ofi_exit(void); /* Active Messages Send Functions */ diff --git a/third-party/gasnet/gasnet-src/other/Makefile-conduit.mak.in b/third-party/gasnet/gasnet-src/other/Makefile-conduit.mak.in index 80d25fc0a3a3..be0939a1dbeb 100644 --- a/third-party/gasnet/gasnet-src/other/Makefile-conduit.mak.in +++ b/third-party/gasnet/gasnet-src/other/Makefile-conduit.mak.in @@ -35,6 +35,10 @@ pkgconfig_files = $(pkgconfig_files_seq) $(pkgconfig_files_par) $(pkgconfig_file @GASNET_SEGMENT_LARGE_TRUE@GASNET_SEGMENT_STR="LARGE" @GASNET_SEGMENT_EVERYTHING_TRUE@GASNET_SEGMENT_STR="EVERYTHING" +# All configured-in memory kinds objects, and conduit's supported subset +@HAVE_MK_CLASS_CUDA_UVA_TRUE@kinds_cuda_uva_obj = $(builddir)/gasnet_cuda_uva-$(THREAD_MODEL).o +KINDS_SPECIAL_OBJS = $(foreach kind, $(CONDUIT_KINDS), $(kinds_$(kind)_obj)) + include $(top_builddir)/other/Makefile-libgasnet.mak EXTRA_DIST = $(CONDUIT_FILELIST) README @@ -342,7 +346,7 @@ do-pkgconfig-conduit: force rm -f $(pkgconfig_file) @echo Building $(pkgconfig_file) from $$FRAGMENT... @echo '# WARNING: This file is automatically generated - do NOT edit directly' > $(pkgconfig_file) - @echo '# Copyright 2017, The Regents of the University of California' >> $(pkgconfig_file) + @echo '# Copyright 2021, The Regents of the University of California' >> $(pkgconfig_file) @echo '# Terms of use are as specified in license.txt' >> $(pkgconfig_file) @echo '# See the GASNet README for instructions on using these variables' >> $(pkgconfig_file) @VARS="GASNET_CC GASNET_OPT_CFLAGS GASNET_MISC_CFLAGS \ @@ -351,10 +355,25 @@ do-pkgconfig-conduit: force GASNET_LD GASNET_LDFLAGS GASNET_LIBS" ; \ $(MAKE) --no-print-directory -f $(top_srcdir)/other/Makefile-echovar.mak VARS="$$VARS" echovars \ >> $(pkgconfig_file) - @cat $(pkgconfig_conduit) | \ + @unset GASNET_DESC; \ + if test -d $(top_srcdir)/.git ; then \ + GASNET_DESC=`( cd $(top_srcdir) && $${GIT=git} describe --long --dirty --always ) 2>/dev/null | head -n 1`; \ + fi; \ + if test -z "$$GASNET_DESC" && test -f $(top_srcdir)/version.git ; then \ + GASNET_DESC=`cat $(top_srcdir)/version.git` || exit $$? ; \ + fi; \ + if expr "x$$GASNET_DESC" : 'xgex-$(VERSION)-0-' >/dev/null 2>&1; then unset GASNET_DESC; fi; \ + cat $(pkgconfig_conduit) | \ sed -e 's@#conduit_name#@$(CONDUIT_NAME)@g' \ -e 's@#thread_model#@$(thread_model)@g' \ - -e 's@#version#@$(VERSION)@g' \ + -e "s@#version#@$(VERSION)$${GASNET_DESC:+ ($$GASNET_DESC)}@g" \ >> $(pkgconfig_file) +# +# Memory Kinds support "special objects" +# + +# Memory kinds for CUDA UVA devices +$(builddir)/gasnet_cuda_uva-$(THREAD_MODEL).o: force + $(CC) @CUDA_UVA_CFLAGS@ $(LIBCFLAGS) -o $@ -c $(top_srcdir)/other/kinds/gasnet_cuda_uva.c diff --git a/third-party/gasnet/gasnet-src/other/Makefile-libgasnet.mak.in b/third-party/gasnet/gasnet-src/other/Makefile-libgasnet.mak.in index 2ecb716df28a..f6b82eb44a15 100644 --- a/third-party/gasnet/gasnet-src/other/Makefile-libgasnet.mak.in +++ b/third-party/gasnet/gasnet-src/other/Makefile-libgasnet.mak.in @@ -28,6 +28,7 @@ TOOLLIBINCLUDES = \ $(PLPA_INCLUDES) LIBINCLUDES = $(TOOLLIBINCLUDES) \ + -I$(top_srcdir)/other/kinds \ -I$(top_srcdir)/extended-ref/coll \ -I$(top_srcdir)/extended-ref/vis \ -I$(top_srcdir)/extended-ref/ratomic \ @@ -86,6 +87,7 @@ libgasnet_sources = \ $(top_srcdir)/extended-ref/coll/gasnet_team.c \ $(top_srcdir)/extended-ref/coll/gasnet_hashtable.c \ $(top_srcdir)/extended-ref/coll/gasnet_reduce.c \ + $(top_srcdir)/other/kinds/gasnet_refkinds.c \ $(top_srcdir)/gasnet_event.c \ $(top_srcdir)/gasnet_legacy.c \ $(top_srcdir)/gasnet_internal.c \ @@ -97,7 +99,7 @@ libgasnet_sources = \ libgasnet_objects = \ `for file in $(libgasnet_sources) ; do echo \`basename $$file .c\`.o ; done` \ - $(CONDUIT_SPECIAL_OBJS) + $(CONDUIT_SPECIAL_OBJS) $(KINDS_SPECIAL_OBJS) libgasnet_tools_dependencies = \ $(CONFIG_HEADER) \ @@ -109,6 +111,7 @@ libgasnet_dependencies = \ $(srcdir)/*.[ch] \ $(top_srcdir)/extended-ref/*/*.[ch] \ $(top_srcdir)/extended-ref/*.[ch] \ + $(top_srcdir)/other/kinds/*.[ch] \ $(top_srcdir)/tests/test.h \ $(CONDUIT_SOURCELIST) \ $(CONDUIT_EXTRAHEADERS) \ @@ -118,15 +121,26 @@ libgasnet_dependencies = \ THREAD_MODEL=SEQ THREAD_MODEL_LC=`echo "$(THREAD_MODEL)" | @AWK@ '{print tolower($$0)}'` LIBGASNET_NAME=libgasnet-$(CONDUIT_NAME) -do-libgasnet: $(CONDUIT_SPECIAL_OBJS) +do-libgasnet: $(CONDUIT_SPECIAL_OBJS) $(KINDS_SPECIAL_OBJS) @mkdir -p .$(THREAD_MODEL) @libgasnet_objects="$(libgasnet_objects)" ; libgasnet_objects=`echo $$libgasnet_objects` ; \ pwd=`@PWD_PROG@`; keeptmps='$(KEEPTMPS)'; \ if test -z '$(KEEPTMPS)'; then rmcmd="&& rm -f $$libgasnet_objects"; fi; \ + unset GASNET_DESC; \ + if test -d $(top_srcdir)/.git ; then \ + GASNET_DESC=`( cd $(top_srcdir) && $${GIT=git} describe --long --dirty --always ) 2>/dev/null | head -n 1`; \ + fi; \ + if test -z "$$GASNET_DESC" && test -f $(top_srcdir)/version.git ; then \ + GASNET_DESC=`cat $(top_srcdir)/version.git` || exit $$? ; \ + fi; \ + unset DESCFLAG; \ + if test -n "$$GASNET_DESC"; then \ + DESCFLAG="-DGASNETI_GIT_HASH='$$GASNET_DESC'"; \ + fi; \ if test -n '$(SEPARATE_CC)' ; then \ - compcmd="for file in $(libgasnet_sources) ; do $(CC) $(LIBCFLAGS) -c "'$$file'" || exit "'$$?'" ; done" ; \ + compcmd="for file in $(libgasnet_sources) ; do $(CC) $(LIBCFLAGS) $$DESCFLAG -c "'$$file'" || exit "'$$?'" ; done" ; \ else \ - compcmd="$(CC) $(LIBCFLAGS) -c $(libgasnet_sources)" ; \ + compcmd="$(CC) $(LIBCFLAGS) $$DESCFLAG -c $(libgasnet_sources)" ; \ fi ; \ cmd="$$compcmd && \ $(AR) cru $$pwd/$(LIBGASNET_NAME)-$(THREAD_MODEL_LC).a $$libgasnet_objects && \ @@ -211,7 +225,7 @@ do-pkgconfig-tools: force rm -f $(pkgconfig_file) @echo Building $(pkgconfig_file) from $$FRAGMENT... @echo '# WARNING: This file is automatically generated - do NOT edit directly' > $(pkgconfig_file) - @echo '# Copyright 2017, The Regents of the University of California' >> $(pkgconfig_file) + @echo '# Copyright 2021, The Regents of the University of California' >> $(pkgconfig_file) @echo '# Terms of use are as specified in license.txt' >> $(pkgconfig_file) @echo '# See the GASNet README for instructions on using these variables' >> $(pkgconfig_file) @VARS="GASNETTOOLS_CC GASNETTOOLS_CPPFLAGS GASNETTOOLS_CFLAGS \ @@ -219,9 +233,17 @@ do-pkgconfig-tools: force GASNETTOOLS_LD GASNETTOOLS_LDFLAGS GASNETTOOLS_LIBS" ; \ $(MAKE) --no-print-directory -f $(top_srcdir)/other/Makefile-echovar.mak VARS="$$VARS" echovars \ >> $(pkgconfig_file) - @cat $(pkgconfig_tools) | \ + @unset GASNET_DESC; \ + if test -d $(top_srcdir)/.git ; then \ + GASNET_DESC=`( cd $(top_srcdir) && $${GIT=git} describe --long --dirty --always ) 2>/dev/null | head -n 1`; \ + fi; \ + if test -z "$$GASNET_DESC" && test -f $(top_srcdir)/version.git ; then \ + GASNET_DESC=`cat $(top_srcdir)/version.git` || exit $$? ; \ + fi; \ + if expr "x$$GASNET_DESC" : 'xgex-$(VERSION)-0-' >/dev/null 2>&1; then unset GASNET_DESC; fi; \ + cat $(pkgconfig_tools) | \ sed -e 's@#thread_model#@$(thread_model)@g' \ - -e 's@#version#@$(VERSION)@g' \ + -e "s@#version#@$(VERSION)$${GASNET_DESC:+ ($$GASNET_DESC)}@g" \ >> $(pkgconfig_file) do-pthreads-error: diff --git a/third-party/gasnet/gasnet-src/other/Makefile.am b/third-party/gasnet/gasnet-src/other/Makefile.am index ca5cc3b8611e..4a7f9c92382d 100644 --- a/third-party/gasnet/gasnet-src/other/Makefile.am +++ b/third-party/gasnet/gasnet-src/other/Makefile.am @@ -20,10 +20,12 @@ CONDUITMODE_SUBDIRS = \ CONDUITMODE_EXTRADIST = \ amx \ + hwloc \ ssh-spawner \ mpi-spawner \ pmi-spawner \ spawner \ + kinds \ myxml \ detect-pshm.c \ pkgconfig-conduit.pc \ diff --git a/third-party/gasnet/gasnet-src/other/Makefile.in b/third-party/gasnet/gasnet-src/other/Makefile.in index fa6f2d0c2582..113fc43f217d 100644 --- a/third-party/gasnet/gasnet-src/other/Makefile.in +++ b/third-party/gasnet/gasnet-src/other/Makefile.in @@ -251,6 +251,10 @@ CONFIGURE_ARGS = @CONFIGURE_ARGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CROSS_COMPILING = @CROSS_COMPILING@ +CUDA_UVA_CFLAGS = @CUDA_UVA_CFLAGS@ +CUDA_UVA_LDFLAGS = @CUDA_UVA_LDFLAGS@ +CUDA_UVA_LIBS = @CUDA_UVA_LIBS@ +CUDA_guess_prog = @CUDA_guess_prog@ CXX = @CXX@ CXXCPP = @CXXCPP@ CXXDEPMODE = @CXXDEPMODE@ @@ -276,6 +280,8 @@ ENVCMD = @ENVCMD@ EXEEXT = @EXEEXT@ EXESUFFIX = @EXESUFFIX@ FH_CFLAGS = @FH_CFLAGS@ +GASNETI_HWLOC_BIND_PATH = @GASNETI_HWLOC_BIND_PATH@ +GASNETI_HWLOC_CALC_PATH = @GASNETI_HWLOC_CALC_PATH@ GASNETI_PTR_BITS = @GASNETI_PTR_BITS@ GASNET_EXTRA_DEFINES = @GASNET_EXTRA_DEFINES@ GASNET_IBV_SPAWNER_CONF = @GASNET_IBV_SPAWNER_CONF@ @@ -304,6 +310,10 @@ GSTACK_PATH = @GSTACK_PATH@ HAVE_INTTYPES_H = @HAVE_INTTYPES_H@ HAVE_STDINT_H = @HAVE_STDINT_H@ HAVE_SYS_TYPES_H = @HAVE_SYS_TYPES_H@ +HWLOC_CFLAGS = @HWLOC_CFLAGS@ +HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ +HWLOC_LIBS = @HWLOC_LIBS@ +HWLOC_guess_prog = @HWLOC_guess_prog@ IBV_CFLAGS = @IBV_CFLAGS@ IBV_LDFLAGS = @IBV_LDFLAGS@ IBV_LIBS = @IBV_LIBS@ @@ -475,10 +485,12 @@ CONDUITMODE_SUBDIRS = \ CONDUITMODE_EXTRADIST = \ amx \ + hwloc \ ssh-spawner \ mpi-spawner \ pmi-spawner \ spawner \ + kinds \ myxml \ detect-pshm.c \ pkgconfig-conduit.pc \ diff --git a/third-party/gasnet/gasnet-src/other/ammpi/Makefile.in b/third-party/gasnet/gasnet-src/other/ammpi/Makefile.in index 66b0907708f6..d50083461a21 100644 --- a/third-party/gasnet/gasnet-src/other/ammpi/Makefile.in +++ b/third-party/gasnet/gasnet-src/other/ammpi/Makefile.in @@ -229,6 +229,10 @@ CONFIGURE_ARGS = @CONFIGURE_ARGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CROSS_COMPILING = @CROSS_COMPILING@ +CUDA_UVA_CFLAGS = @CUDA_UVA_CFLAGS@ +CUDA_UVA_LDFLAGS = @CUDA_UVA_LDFLAGS@ +CUDA_UVA_LIBS = @CUDA_UVA_LIBS@ +CUDA_guess_prog = @CUDA_guess_prog@ CXX = @CXX@ CXXCPP = @CXXCPP@ CXXDEPMODE = @CXXDEPMODE@ @@ -254,6 +258,8 @@ ENVCMD = @ENVCMD@ EXEEXT = @EXEEXT@ EXESUFFIX = @EXESUFFIX@ FH_CFLAGS = @FH_CFLAGS@ +GASNETI_HWLOC_BIND_PATH = @GASNETI_HWLOC_BIND_PATH@ +GASNETI_HWLOC_CALC_PATH = @GASNETI_HWLOC_CALC_PATH@ GASNETI_PTR_BITS = @GASNETI_PTR_BITS@ GASNET_EXTRA_DEFINES = @GASNET_EXTRA_DEFINES@ GASNET_IBV_SPAWNER_CONF = @GASNET_IBV_SPAWNER_CONF@ @@ -282,6 +288,10 @@ GSTACK_PATH = @GSTACK_PATH@ HAVE_INTTYPES_H = @HAVE_INTTYPES_H@ HAVE_STDINT_H = @HAVE_STDINT_H@ HAVE_SYS_TYPES_H = @HAVE_SYS_TYPES_H@ +HWLOC_CFLAGS = @HWLOC_CFLAGS@ +HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ +HWLOC_LIBS = @HWLOC_LIBS@ +HWLOC_guess_prog = @HWLOC_guess_prog@ IBV_CFLAGS = @IBV_CFLAGS@ IBV_LDFLAGS = @IBV_LDFLAGS@ IBV_LIBS = @IBV_LIBS@ diff --git a/third-party/gasnet/gasnet-src/other/ammpi/license.txt b/third-party/gasnet/gasnet-src/other/ammpi/license.txt index ae3ab2279df2..e1b34d17e0e4 100644 --- a/third-party/gasnet/gasnet-src/other/ammpi/license.txt +++ b/third-party/gasnet/gasnet-src/other/ammpi/license.txt @@ -3,7 +3,7 @@ * are subject to the following licensing terms: * * --------------------------------------------------------------------------- - * "Copyright (c) 2000-2004 The Regents of the University of California. + * "Copyright (c) 2000-2021 The Regents of the University of California. * All rights reserved. * * Permission to use, copy, modify, and distribute this software and its diff --git a/third-party/gasnet/gasnet-src/other/amudp/Makefile.in b/third-party/gasnet/gasnet-src/other/amudp/Makefile.in index 4c0ed1c672a2..3e0093ce99f4 100644 --- a/third-party/gasnet/gasnet-src/other/amudp/Makefile.in +++ b/third-party/gasnet/gasnet-src/other/amudp/Makefile.in @@ -250,6 +250,10 @@ CONFIGURE_ARGS = @CONFIGURE_ARGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CROSS_COMPILING = @CROSS_COMPILING@ +CUDA_UVA_CFLAGS = @CUDA_UVA_CFLAGS@ +CUDA_UVA_LDFLAGS = @CUDA_UVA_LDFLAGS@ +CUDA_UVA_LIBS = @CUDA_UVA_LIBS@ +CUDA_guess_prog = @CUDA_guess_prog@ CXX = @CXX@ CXXCPP = @CXXCPP@ CXXDEPMODE = @CXXDEPMODE@ @@ -275,6 +279,8 @@ ENVCMD = @ENVCMD@ EXEEXT = @EXEEXT@ EXESUFFIX = @EXESUFFIX@ FH_CFLAGS = @FH_CFLAGS@ +GASNETI_HWLOC_BIND_PATH = @GASNETI_HWLOC_BIND_PATH@ +GASNETI_HWLOC_CALC_PATH = @GASNETI_HWLOC_CALC_PATH@ GASNETI_PTR_BITS = @GASNETI_PTR_BITS@ GASNET_EXTRA_DEFINES = @GASNET_EXTRA_DEFINES@ GASNET_IBV_SPAWNER_CONF = @GASNET_IBV_SPAWNER_CONF@ @@ -303,6 +309,10 @@ GSTACK_PATH = @GSTACK_PATH@ HAVE_INTTYPES_H = @HAVE_INTTYPES_H@ HAVE_STDINT_H = @HAVE_STDINT_H@ HAVE_SYS_TYPES_H = @HAVE_SYS_TYPES_H@ +HWLOC_CFLAGS = @HWLOC_CFLAGS@ +HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ +HWLOC_LIBS = @HWLOC_LIBS@ +HWLOC_guess_prog = @HWLOC_guess_prog@ IBV_CFLAGS = @IBV_CFLAGS@ IBV_LDFLAGS = @IBV_LDFLAGS@ IBV_LIBS = @IBV_LIBS@ diff --git a/third-party/gasnet/gasnet-src/other/amudp/README b/third-party/gasnet/gasnet-src/other/amudp/README index d9fdbe18ad45..6c2fc054be02 100644 --- a/third-party/gasnet/gasnet-src/other/amudp/README +++ b/third-party/gasnet/gasnet-src/other/amudp/README @@ -68,6 +68,9 @@ AMUDP has a few notable departures from the AM-2 specification: Change Log ---------- +AMUDP 3.18 (03/2021) +- Add AMUDP_SPMDSetProc and envvar WORKER_RANK to support explicit rank assignment + AMUDP 3.17 (10/2020) - Remove BLCR support - This version breaks compatibility of the amudprun spawner with prior versions diff --git a/third-party/gasnet/gasnet-src/other/amudp/amudp_const.h b/third-party/gasnet/gasnet-src/other/amudp/amudp_const.h index 1bf706483270..e3f2df60b3c2 100644 --- a/third-party/gasnet/gasnet-src/other/amudp/amudp_const.h +++ b/third-party/gasnet/gasnet-src/other/amudp/amudp_const.h @@ -29,7 +29,7 @@ #endif #define AMUDP_LIBRARY_VERSION_MAJOR 3 -#define AMUDP_LIBRARY_VERSION_MINOR 17 +#define AMUDP_LIBRARY_VERSION_MINOR 18 #define AMUDP_LIBRARY_VERSION AMUDP_LIBRARY_VERSION_MAJOR.AMUDP_LIBRARY_VERSION_MINOR #define AMUDP_LIBRARY_VERSION_STR AMX_STRINGIFY(AMUDP_LIBRARY_VERSION) @@ -76,8 +76,14 @@ /* Internal constants */ #define AMUDP_MAX_SHORT 16 /* max number of handler arguments, >=8 */ #define AMUDP_MAX_MEDIUM AMUDP_MAX_LONG /* max. data transmission unit for medium messages, >= 512 */ - -#if PLATFORM_OS_IRIX +#ifdef AMUDP_MAX_LONG + /* compile-time override for debugging MTU problems + * users should never turn this knob unless directed by the maintainers + */ + #if AMUDP_MAX_LONG > 65000 || AMUDP_MAX_LONG < 512 + #error invalid AMUDP_MAX_LONG + #endif +#elif PLATFORM_OS_IRIX #define AMUDP_MAX_LONG 61000 /* max. UDP datagram on IRIX is apparently 61412 */ #elif PLATFORM_OS_TRU64 || PLATFORM_OS_FREEBSD || PLATFORM_OS_NETBSD || \ PLATFORM_OS_DARWIN || PLATFORM_OS_AIX diff --git a/third-party/gasnet/gasnet-src/other/amudp/amudp_internal.h b/third-party/gasnet/gasnet-src/other/amudp/amudp_internal.h index cfcb4137d9cc..e5608c6c9a7b 100644 --- a/third-party/gasnet/gasnet-src/other/amudp/amudp_internal.h +++ b/third-party/gasnet/gasnet-src/other/amudp/amudp_internal.h @@ -38,7 +38,6 @@ AMUDP_BEGIN_EXTERNC #endif #define AMUDP_PROCID_NEXT -1 /* Use next unallocated procid */ -#define AMUDP_PROCID_ALLOC -2 /* Allocate and return next procis, but do not bootstrap */ #ifndef AMUDP_INITIAL_REQUESTTIMEOUT_MICROSEC #define AMUDP_INITIAL_REQUESTTIMEOUT_MICROSEC 100000 /* usec until first retransmit */ diff --git a/third-party/gasnet/gasnet-src/other/amudp/amudp_spmd.cpp b/third-party/gasnet/gasnet-src/other/amudp/amudp_spmd.cpp index 82883c1c6d57..0c2ff10f2272 100644 --- a/third-party/gasnet/gasnet-src/other/amudp/amudp_spmd.cpp +++ b/third-party/gasnet/gasnet-src/other/amudp/amudp_spmd.cpp @@ -103,11 +103,9 @@ typedef struct { /* Protocol for TCP bootstrapping/control sockets initialization: - worker->master (int32) - send my procid for init + worker->master (int32) - send a forced rank, or -1 for default allocation worker->master (en_t) - send my endpoint name for init - if received procid == AMUDP_PROCID_ALLOC - master->worker (int32 next_rank++) - else + master->worker (int32 sizeof(AMUDP_SPMDBootstrapInfo_t)) master->worker (AMUDP_SPMDBootstrapInfo_t) master->worker (AMUDP_SPMDTranslation_name (variable size)) @@ -178,6 +176,25 @@ extern char *AMUDP_tagStr(tag_t tag, char *buf) { return buf; } //------------------------------------------------------------------------------------ +typedef struct { + en_t name; + SOCKET socket; +} workerinfo_t; +static int workerinfo_compare(const void *left_, const void *right_) { + workerinfo_t *left = (workerinfo_t *)left_; + workerinfo_t *right = (workerinfo_t *)right_; + uint32_t lip = ntohl(left->name.sin_addr.s_addr); + uint32_t rip = ntohl(right->name.sin_addr.s_addr); + uint16_t lport = ntohs(left->name.sin_port); + uint16_t rport = ntohs(right->name.sin_port); + // compare ascending by IP then port + if (lip < rip) return -1; + else if (rip < lip) return 1; + else if (lport < rport) return -1; + else if (rport < lport) return 1; + else return 0; +} +//------------------------------------------------------------------------------------ static void setupStdSocket(SOCKET& ls, SocketList& list, SocketList& allList) { if ((int)list.getCount() < AMUDP_SPMDNUMPROCS) { SockAddr remoteAddr; @@ -330,6 +347,16 @@ extern int AMUDP_SPMDMyProc() { return AMUDP_SPMDMYPROC; } /* ------------------------------------------------------------------------------------ */ +extern void AMUDP_SPMDSetProc(int rank) { + if (AMUDP_SPMDStartupCalled) + AMX_Err("called AMUDP_SPMDSetProc after AMUDP_SPMDStartup()"); + if (rank < 0 || AMUDP_SPMDMYPROC != AMUDP_PROCID_NEXT) + AMX_Err("AMUDP_SPMDSetProc may be called at most once before AMUDP_SPMDStartup()"); + + AMX_assert(rank != AMUDP_PROCID_NEXT); + AMUDP_SPMDMYPROC = rank; +} +/* ------------------------------------------------------------------------------------ */ extern int AMUDP_SPMDIsWorker(char **argv) { if (AMUDP_SPMDStartupCalled) return 1; else { @@ -668,30 +695,36 @@ extern int AMUDP_SPMDStartup(int *argc, char ***argv, } #endif + static int forced_ranks = 0; { // receive bootstrapping info - static int32_t next_procid = 0; - int32_t procid, procid_nb; + int32_t procid_nb; en_t name; - recvAll(newcoord, &procid_nb, sizeof(procid_nb)); - recvAll(newcoord, &name, sizeof(name)); - procid = ntoh32(procid_nb); - if (procid == AMUDP_PROCID_ALLOC) { - // This is a request (e.g. by a spawner) for a procid assignment - procid = next_procid++; - procid_nb = hton32(procid); - sendAll(newcoord, &procid_nb, sizeof(procid_nb)); - shutdown(newcoord, SHUT_RDWR); - close_socket(newcoord); - } else { - // This is a worker connecting - if (procid == AMUDP_PROCID_NEXT) procid = next_procid++; - AMUDP_SPMDWorkerSocket[procid] = newcoord; - AMUDP_SPMDTranslation_name[procid] = name; - coordList.insert(newcoord); - allList.insert(newcoord); - numWorkersAttached++; + // This is a worker connecting + + recvAll(newcoord, &procid_nb, sizeof(procid_nb)); // procid request (if any) + recvAll(newcoord, &name, sizeof(name)); // worker address + + int32_t procid = ntoh32(procid_nb); + if ( procid >= AMUDP_SPMDNUMPROCS || + (procid < 0 && procid != AMUDP_PROCID_NEXT) ) { + AMX_FatalErr("Invalid forced rank assignment (%i) via WORKER_RANK envvar or AMUDP_SPMDSetProc", procid); + } + if (numWorkersAttached == 0) forced_ranks = (procid != AMUDP_PROCID_NEXT); + if ( ( procid == AMUDP_PROCID_NEXT && forced_ranks ) || + ( procid != AMUDP_PROCID_NEXT && !forced_ranks ) ) { + AMX_FatalErr("Non-collective use of forced rank assignments via WORKER_RANK envvar or AMUDP_SPMDSetProc"); + } + if (procid != AMUDP_PROCID_NEXT && AMUDP_SPMDWorkerSocket[procid] != INVALID_SOCKET) { + AMX_FatalErr("Conflicting rank assignment (%i) by two or more worker processes via WORKER_RANK envvar or AMUDP_SPMDSetProc", procid); } + + if (procid == AMUDP_PROCID_NEXT) procid = numWorkersAttached; // provisional procid + AMUDP_SPMDWorkerSocket[procid] = newcoord; + AMUDP_SPMDTranslation_name[procid] = name; + coordList.insert(newcoord); + allList.insert(newcoord); + numWorkersAttached++; } if (numWorkersAttached == AMUDP_SPMDNUMPROCS) { // all have now reported in, so we can begin computation @@ -722,6 +755,20 @@ extern int AMUDP_SPMDStartup(int *argc, char ***argv, force_output = true; } + if (!forced_ranks) { // sort worker entries by name (ie IP address, port) + workerinfo_t *info_tmp = (workerinfo_t *)AMX_malloc(AMUDP_SPMDNUMPROCS * sizeof(workerinfo_t)); + for (int i=0; i < AMUDP_SPMDNUMPROCS; i++) { + info_tmp[i].name = AMUDP_SPMDTranslation_name[i]; + info_tmp[i].socket = AMUDP_SPMDWorkerSocket[i]; + } + qsort(info_tmp, AMUDP_SPMDNUMPROCS, sizeof(workerinfo_t), &workerinfo_compare); + for (int i=0; i < AMUDP_SPMDNUMPROCS; i++) { + AMUDP_SPMDTranslation_name[i] = info_tmp[i].name; + AMUDP_SPMDWorkerSocket[i] = info_tmp[i].socket; + } + AMX_free(info_tmp); + } + int32_t bootstrapinfosz_nb = hton32(sizeof(bootstrapinfo)); // transmit bootstrapping info for (int i=0; i < AMUDP_SPMDNUMPROCS; i++) { @@ -974,6 +1021,20 @@ extern int AMUDP_SPMDStartup(int *argc, char ***argv, } } + if (AMUDP_SPMDMYPROC == AMUDP_PROCID_NEXT) { + // WORKER_RANK is *deliberately* not propagated or fetched from the master environment, + // because it must be set non-collectively by each worker process + const char *rank_str = AMUDP_getenv_prefixed_withdefault("WORKER_RANK", AMX_STRINGIFY(AMUDP_PROCID_NEXT)); + if (rank_str[0] >= 'A') { // indirect envvar load + rank_str = getenv(rank_str); + if (!rank_str) rank_str = AMX_STRINGIFY(AMUDP_PROCID_NEXT); + } + int forced_rank = atoi( rank_str ); + if (forced_rank != AMUDP_PROCID_NEXT) { + AMUDP_SPMDSetProc(forced_rank); + } + } + try { if (!AMX_SilentMode) AMX_Info("worker connecting to %s:%i", masterAddr.IPStr(), masterAddr.port()); diff --git a/third-party/gasnet/gasnet-src/other/amudp/amudp_spmd.h b/third-party/gasnet/gasnet-src/other/amudp/amudp_spmd.h index 4e7ce87f42e8..44bea603785a 100644 --- a/third-party/gasnet/gasnet-src/other/amudp/amudp_spmd.h +++ b/third-party/gasnet/gasnet-src/other/amudp/amudp_spmd.h @@ -65,6 +65,13 @@ extern int AMUDP_SPMDIsWorker(char **argv); extern int AMUDP_SPMDNumProcs(void); /* return the number of processors in the parallel job */ extern int AMUDP_SPMDMyProc(void); /* return a zero-based unique identifier of this processor in the parallel job */ +extern void AMUDP_SPMDSetProc(int rank); + /* programmatically force a rank assignment for the calling worker process. + * ignoring the WORKER_RANK envvar (if any). + * May only be called collectively by all workers before AMUDP_SPMDStartup with disjoint + * rank ids in 0..NumProcs-1. Not supported for local spawn. + */ + extern int AMUDP_SPMDBarrier(void); /* block until all SPMD processors call this function, * and poll the SPMD endpoint while waiting diff --git a/third-party/gasnet/gasnet-src/other/amudp/license.txt b/third-party/gasnet/gasnet-src/other/amudp/license.txt index 08755c15d720..9d0e8dcad054 100644 --- a/third-party/gasnet/gasnet-src/other/amudp/license.txt +++ b/third-party/gasnet/gasnet-src/other/amudp/license.txt @@ -3,7 +3,7 @@ * are subject to the following licensing terms: * * --------------------------------------------------------------------------- - * "Copyright (c) 2000-2020 The Regents of the University of California. + * "Copyright (c) 2000-2021 The Regents of the University of California. * All rights reserved. * * Permission to use, copy, modify, and distribute this software and its diff --git a/third-party/gasnet/gasnet-src/other/amx/testam.h b/third-party/gasnet/gasnet-src/other/amx/testam.h index 093083b7fc85..2439a3c14beb 100644 --- a/third-party/gasnet/gasnet-src/other/amx/testam.h +++ b/third-party/gasnet/gasnet-src/other/amx/testam.h @@ -142,9 +142,10 @@ #define ALLAM_DONE(iters) ((int)NUMREP() == (int)(NUMHANDLERS_PER_TYPE*3*(iters))) #endif -typedef struct { +typedef struct testam_payload_s { double doublevar; uint64_t int64var; + struct testam_payload_s* partnerseg; int idx; } testam_payload_t; #define TESTAM_DOUBLEVAR_VAL (2.5f) @@ -422,7 +423,7 @@ typedef struct { ReplyLong(num,(token, (NUMHANDLERS_PER_TYPE+num)*sizeof(testam_payload_t), \ LONG_##num##REP_HANDLER, &mybuf, nbytes aa##num), \ (token, LONG_##num##REP_HANDLER, &mybuf, nbytes, \ - ((testam_payload_t*)TEST_SEG(partner))+NUMHANDLERS_PER_TYPE+num \ + payload->partnerseg+NUMHANDLERS_PER_TYPE+num \ EXTRA_ML aa##num)); \ memset(&mybuf, 0xBB, sizeof(testam_payload_t)); \ } \ @@ -540,6 +541,7 @@ HANDLERS(16) static testam_payload_t medbuf, longbuf; \ asyncbuf.doublevar = TESTAM_DOUBLEVAR_VAL; \ asyncbuf.int64var = TESTAM_INT64VAR_VAL; \ + asyncbuf.partnerseg = (testam_payload_t*)MYSEG; \ asyncbuf.idx = num; \ RequestShort(num,(ENDPOINT partner, SHORT_##num##REQ_HANDLER EXTRA_S AA##num)); \ memcpy(&medbuf, &asyncbuf, sizeof(testam_payload_t)); \ diff --git a/third-party/gasnet/gasnet-src/other/contrib/Makefile.in b/third-party/gasnet/gasnet-src/other/contrib/Makefile.in index 63f24e3ea5e2..ac4f1f5c0c5b 100644 --- a/third-party/gasnet/gasnet-src/other/contrib/Makefile.in +++ b/third-party/gasnet/gasnet-src/other/contrib/Makefile.in @@ -246,6 +246,10 @@ CONFIGURE_ARGS = @CONFIGURE_ARGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CROSS_COMPILING = @CROSS_COMPILING@ +CUDA_UVA_CFLAGS = @CUDA_UVA_CFLAGS@ +CUDA_UVA_LDFLAGS = @CUDA_UVA_LDFLAGS@ +CUDA_UVA_LIBS = @CUDA_UVA_LIBS@ +CUDA_guess_prog = @CUDA_guess_prog@ CXX = @CXX@ CXXCPP = @CXXCPP@ CXXDEPMODE = @CXXDEPMODE@ @@ -271,6 +275,8 @@ ENVCMD = @ENVCMD@ EXEEXT = @EXEEXT@ EXESUFFIX = @EXESUFFIX@ FH_CFLAGS = @FH_CFLAGS@ +GASNETI_HWLOC_BIND_PATH = @GASNETI_HWLOC_BIND_PATH@ +GASNETI_HWLOC_CALC_PATH = @GASNETI_HWLOC_CALC_PATH@ GASNETI_PTR_BITS = @GASNETI_PTR_BITS@ GASNET_EXTRA_DEFINES = @GASNET_EXTRA_DEFINES@ GASNET_IBV_SPAWNER_CONF = @GASNET_IBV_SPAWNER_CONF@ @@ -299,6 +305,10 @@ GSTACK_PATH = @GSTACK_PATH@ HAVE_INTTYPES_H = @HAVE_INTTYPES_H@ HAVE_STDINT_H = @HAVE_STDINT_H@ HAVE_SYS_TYPES_H = @HAVE_SYS_TYPES_H@ +HWLOC_CFLAGS = @HWLOC_CFLAGS@ +HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ +HWLOC_LIBS = @HWLOC_LIBS@ +HWLOC_guess_prog = @HWLOC_guess_prog@ IBV_CFLAGS = @IBV_CFLAGS@ IBV_LDFLAGS = @IBV_LDFLAGS@ IBV_LIBS = @IBV_LIBS@ diff --git a/third-party/gasnet/gasnet-src/other/firehose/Makefile.in b/third-party/gasnet/gasnet-src/other/firehose/Makefile.in index a76a4c959000..04ae6a99f3ea 100644 --- a/third-party/gasnet/gasnet-src/other/firehose/Makefile.in +++ b/third-party/gasnet/gasnet-src/other/firehose/Makefile.in @@ -214,6 +214,10 @@ CONFIGURE_ARGS = @CONFIGURE_ARGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CROSS_COMPILING = @CROSS_COMPILING@ +CUDA_UVA_CFLAGS = @CUDA_UVA_CFLAGS@ +CUDA_UVA_LDFLAGS = @CUDA_UVA_LDFLAGS@ +CUDA_UVA_LIBS = @CUDA_UVA_LIBS@ +CUDA_guess_prog = @CUDA_guess_prog@ CXX = @CXX@ CXXCPP = @CXXCPP@ CXXDEPMODE = @CXXDEPMODE@ @@ -239,6 +243,8 @@ ENVCMD = @ENVCMD@ EXEEXT = @EXEEXT@ EXESUFFIX = @EXESUFFIX@ FH_CFLAGS = @FH_CFLAGS@ +GASNETI_HWLOC_BIND_PATH = @GASNETI_HWLOC_BIND_PATH@ +GASNETI_HWLOC_CALC_PATH = @GASNETI_HWLOC_CALC_PATH@ GASNETI_PTR_BITS = @GASNETI_PTR_BITS@ GASNET_EXTRA_DEFINES = @GASNET_EXTRA_DEFINES@ GASNET_IBV_SPAWNER_CONF = @GASNET_IBV_SPAWNER_CONF@ @@ -267,6 +273,10 @@ GSTACK_PATH = @GSTACK_PATH@ HAVE_INTTYPES_H = @HAVE_INTTYPES_H@ HAVE_STDINT_H = @HAVE_STDINT_H@ HAVE_SYS_TYPES_H = @HAVE_SYS_TYPES_H@ +HWLOC_CFLAGS = @HWLOC_CFLAGS@ +HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ +HWLOC_LIBS = @HWLOC_LIBS@ +HWLOC_guess_prog = @HWLOC_guess_prog@ IBV_CFLAGS = @IBV_CFLAGS@ IBV_LDFLAGS = @IBV_LDFLAGS@ IBV_LIBS = @IBV_LIBS@ diff --git a/third-party/gasnet/gasnet-src/other/firehose/firehose.c b/third-party/gasnet/gasnet-src/other/firehose/firehose.c index 8af021357c60..60c9d330f48c 100644 --- a/third-party/gasnet/gasnet-src/other/firehose/firehose.c +++ b/third-party/gasnet/gasnet-src/other/firehose/firehose.c @@ -687,8 +687,9 @@ fh_request_free(firehose_request_t *req) * * Recognizes modifiers [Mm][Kk][Gg] in numbers */ -int64_t fh_getenv(const char *var, unsigned long multiplier) { +int64_t fh_getenv(const char *var, unsigned long multiplier, int *is_dflt) { const char *env = gasneti_getenv(var); + if (is_dflt) *is_dflt = !(env && *env); // unset or empty return gasneti_parse_int(env, multiplier); } diff --git a/third-party/gasnet/gasnet-src/other/firehose/firehose_hash.c b/third-party/gasnet/gasnet-src/other/firehose/firehose_hash.c index abf80270deef..9827a96e58f1 100644 --- a/third-party/gasnet/gasnet-src/other/firehose/firehose_hash.c +++ b/third-party/gasnet/gasnet-src/other/firehose/firehose_hash.c @@ -16,7 +16,11 @@ struct _fh_hash_t { void **fh_table; size_t fh_entries; size_t fh_elemsize; +#if FH_HASH_KNUTH + unsigned fh_shift; +#else unsigned fh_mask; +#endif #ifdef FH_HASH_STATS int *fh_col_table; @@ -27,14 +31,6 @@ struct _fh_hash_t { }; -/* The following functions implement Thomas Wang's integer hashing functions, - * found at http://www.concentric.net/~Ttwang/tech/inthash.htm. The hashing - * functions are useful for integer hashing and make use of CPU native - * instructions such as 'add complement' and 'shift and add'. - * - * A 32-bit and a 64-bit version are implemented below. - */ - /* In firehose, hash tables are created for both local bucket addresses and * remote firehoses. Local bucket addresses are hashed on page addresses (as * integers) and remote firehoses are hashed on the bitwise or of @@ -61,42 +57,52 @@ struct fh_dummy_entry { } fh_dummy_entry_t; -/* -int -inthash(fh_key_t key) +#if FH_HASH_KNUTH + +// Knuth's multiplicative hashing +// See "The Art of Computer Programming, Volume 3, Sorting and Searching", D.E. Knuth, section 6.4 + +GASNETI_INLINE(knuth_hash) +int +knuth_hash(fh_key_t full_key, fh_hash_t *hash) { - key += (key << 12); - key ^= (key >> 22); - key += (key << 4); - key ^= (key >> 9); - key += (key << 10); - key ^= (key >> 2); - key += (key << 7); - key ^= (key >> 12); - return key; + // Compute (k * (sqrt(5)-1)/2) mod 1.0 + // The following "just works" to get the 32 most signficant bits of the + // fractional part due to pre-shifting of the constant and overflow in the + // multiplication. + uint32_t kA_fract = 2654435769U * FH_KEY2INT(full_key); + + // Then extract the 'p' *most* significant bits of that fractional part + int result = kA_fract >> hash->fh_shift; + gasneti_assert(result >= 0); + gasneti_assert_int(result ,<, hash->fh_entries); + return result; } -*/ +#define KEYHASH(key,hash) knuth_hash((key),(hash)) + +#else + +/* The following functions implement Thomas Wang's integer hashing functions, + * found at http://www.concentric.net/~Ttwang/tech/inthash.htm. The hashing + * functions are useful for integer hashing and make use of CPU native + * instructions such as 'add complement' and 'shift and add'. + * + * A 32-bit and a 64-bit version are implemented below. + */ -#if 1 GASNETI_INLINE(inthash) int inthash(fh_key_t full_key) { intptr_t key = FH_KEY2INT(full_key); +#if PLATFORM_ARCH_32 key += ~(key << 15); key ^= (key >> 10); key += (key << 3); key ^= (key >> 6); key += ~(key << 11); key ^= (key >> 16); - return (int) key; -} #else -GASNETI_INLINE(inthash) -int -inthash(fh_key_t full_key) -{ - intptr_t key = FH_KEY2INT(full_key); key += ~(key << 32); key ^= (key >> 22); key += ~(key << 13); @@ -105,8 +111,11 @@ inthash(fh_key_t full_key) key ^= (key >> 15); key += ~(key << 27); key ^= (key >> 31); +#endif return (int) key; } +#define KEYHASH(key,hash) \ + (inthash(key) & (hash)->fh_mask); #endif /* fh_hash_create(keylen,entries) @@ -126,14 +135,30 @@ fh_hash_create(size_t entries) hash = (fh_hash_t *) gasneti_calloc(1,sizeof(fh_hash_t)); hash->fh_table = (void **) gasneti_calloc(entries, sizeof(void *)); +#if FH_HASH_KNUTH + // 32 minus desired bits of hash value + hash->fh_shift = 32; + for (size_t e = entries-1; e; e >>= 1) { + hash->fh_shift -= 1; + } +#else hash->fh_mask = entries-1; +#endif hash->fh_entries = entries; - #ifdef FH_HASH_STATS - hash->fh_col_table = (int *) gasneti_malloc(entries * sizeof(int)); - /*printf("hash create: entries=%d, mask=%x\n", entries, entries-1);*/ - hash->fh_used = 0; - hash->fh_collisions = 0; - #endif +#ifdef FH_HASH_STATS + hash->fh_col_table = (int *) gasneti_calloc(entries, sizeof(int)); + #if FH_HASH_KNUTH + gasneti_console_message("INFO", + "hash create: entries=%"PRIuSZ", bits=%u\n", + entries, (32 - hash->fh_shift)); + #else + gasneti_console_message("INFO", + "hash create: entries=%"PRIuSZ", mask=%"PRIxSZ"\n", + entries, entries-1); + #endif + hash->fh_used = 0; + hash->fh_collisions = 0; +#endif return hash; } @@ -143,15 +168,17 @@ void fh_hash_destroy(fh_hash_t *hash) { #ifdef FH_HASH_STATS - fprintf(stderr, "elements: %d, collisions: %d, avg=%2.5f%%\n", - hash->fh_used, hash->fh_collisions, - (float) hash->fh_collisions*100/hash->fh_used); - { + gasneti_console_message("INFO", + "entries: %"PRIuSZ" elements: %d, collisions: %d, avg=%2.5f%%, load %g\n", + hash->fh_entries, hash->fh_used, hash->fh_collisions, + (double) hash->fh_collisions*100/hash->fh_used, + (double) hash->fh_used/hash->fh_entries); + if (0) { int i, hits; for (i = 0; i < hash->fh_entries; i++) { hits = hash->fh_col_table[i]; if (hits) { - /*printf("%d\t%d\n", i, hits);*/ + printf("%d\t%d\n", i, hits); } } } @@ -170,7 +197,7 @@ void * fh_hash_find(fh_hash_t *hash, fh_key_t key) { void *val; - int keyhash = inthash(key) & hash->fh_mask; + int keyhash = KEYHASH(key, hash); val = hash->fh_table[keyhash]; @@ -192,7 +219,7 @@ fh_hash_insert(fh_hash_t *hash, fh_key_t key, void *newval) int keyhash; void *val; - keyhash = inthash(key) & hash->fh_mask; + keyhash = KEYHASH(key, hash); val = hash->fh_table[keyhash]; #ifdef FH_HASH_STATS @@ -296,7 +323,7 @@ fh_hash_replace(fh_hash_t *hash, void *val, void *newval) int keyhash; fh_dummy_entry_t *cur; - keyhash = inthash(((fh_dummy_entry_t *)val)->hash_key) & hash->fh_mask; + keyhash = KEYHASH(((fh_dummy_entry_t *)val)->hash_key, hash); cur = (fh_dummy_entry_t *)(hash->fh_table[keyhash]); /* Handle head of list case first */ diff --git a/third-party/gasnet/gasnet-src/other/firehose/firehose_internal.h b/third-party/gasnet/gasnet-src/other/firehose/firehose_internal.h index 1c741a5ea173..64c344e2101a 100644 --- a/third-party/gasnet/gasnet-src/other/firehose/firehose_internal.h +++ b/third-party/gasnet/gasnet-src/other/firehose/firehose_internal.h @@ -446,7 +446,7 @@ int fh_region_partial(gex_Rank_t node, uintptr_t *addr_p, size_t *len_p); /* ##################################################################### */ /* Misc functions (COMMON, firehose.c) */ /* ##################################################################### */ -int64_t fh_getenv(const char *var, unsigned long multiplier); +int64_t fh_getenv(const char *var, unsigned long multiplier, int *is_dflt); /* Common Queue Macros for Firehose FIFO and Local Bucket FIFO */ #define FH_TAILQ_HEAD(name, type) \ diff --git a/third-party/gasnet/gasnet-src/other/firehose/firehose_page.c b/third-party/gasnet/gasnet-src/other/firehose/firehose_page.c index e0cdd9381077..5c5019401228 100644 --- a/third-party/gasnet/gasnet-src/other/firehose/firehose_page.c +++ b/third-party/gasnet/gasnet-src/other/firehose/firehose_page.c @@ -505,10 +505,8 @@ firehose_get_params(uintptr_t max_pinnable_memory, fh_MaxPinnableMemory = max_pinnable_memory; - nM = fh_getenv("GASNET_FIREHOSE_M", (1<<20)); - dfltM = !nM; - nMaxvictim = fh_getenv("GASNET_FIREHOSE_MAXVICTIM_M", (1<<20)); - dfltMaxvictim = !nMaxvictim; + nM = fh_getenv("GASNET_FIREHOSE_M", (1<<20), &dfltM); + nMaxvictim = fh_getenv("GASNET_FIREHOSE_MAXVICTIM_M", (1<<20), &dfltMaxvictim); /* First assign values based on either what the user passed or what * is determined to be the best M and maxvictim parameters based on diff --git a/third-party/gasnet/gasnet-src/other/firehose/firehose_region.c b/third-party/gasnet/gasnet-src/other/firehose/firehose_region.c index f263cc26ec91..7adeab5b4a40 100644 --- a/third-party/gasnet/gasnet-src/other/firehose/firehose_region.c +++ b/third-party/gasnet/gasnet-src/other/firehose/firehose_region.c @@ -1279,11 +1279,28 @@ fh_init_plugin(uintptr_t max_pinnable_memory, gex_Rank_t num_nodes = gasneti_nodes; int dflt_M, dflt_VM; int dflt_R, dflt_VR; - int dflt_RS; - - /* Initialize the Bucket tables */ - fh_BucketTable1 = fh_hash_create(1<<16); /* 64k */ - fh_BucketTable2 = fh_hash_create(1<<17); /* 128k */ + int dflt_RS, auto_RS; + + // Minimum permissible values + uintptr_t M_min, VM_min; + int R_min, VR_min; + if ((fhi_InitFlags & FIREHOSE_INIT_FLAG_LOCAL_ONLY)) { + // Want at least 16MB worth of buckets in victim FIFO + VM_min = (16*1024*1024) / FH_BUCKET_SIZE; + // Want at least 32 regions of victim FIFO + VR_min = 32; + // Other two are unused + M_min = R_min = 0; + } else { + // Want at least 32 buckets per node + M_min = FH_BUCKET_SIZE * num_nodes * 32; + // Want at least 256 buckets of victim FIFO + VM_min = FH_BUCKET_SIZE * 256; + // Want at least 1 region per node -- XXX/PHH THIS IS REALLY A BARE MINIMUM + R_min = num_nodes; + // Want at least 2 regions of FIFO -- XXX/PHH THIS IS REALLY A BARE MINIMUM + VR_min = 2; + } #if 0 /* UNUSED - see param_RS computation for explanation */ /* Count how many regions fit into an AM Medium payload */ @@ -1306,16 +1323,12 @@ fh_init_plugin(uintptr_t max_pinnable_memory, m_prepinned = FH_BUCKET_SIZE * b_prepinned; /* Get limits from the environment */ - param_M = fh_getenv("GASNET_FIREHOSE_M", (1<<20)); - dflt_M = !param_M; - param_VM = fh_getenv("GASNET_FIREHOSE_MAXVICTIM_M", (1<<20)); - dflt_VM = !param_VM; - param_R = fh_getenv("GASNET_FIREHOSE_R", 1); - dflt_R = !param_R; - param_VR = fh_getenv("GASNET_FIREHOSE_MAXVICTIM_R", 1); - dflt_VR = !param_VR; - param_RS = fh_getenv("GASNET_FIREHOSE_MAXREGION_SIZE", (1<<20)); - dflt_RS = !param_RS; + param_M = fh_getenv("GASNET_FIREHOSE_M", (1<<20), &dflt_M); + param_VM = fh_getenv("GASNET_FIREHOSE_MAXVICTIM_M", (1<<20), &dflt_VM); + param_R = fh_getenv("GASNET_FIREHOSE_R", 1, &dflt_R); + param_VR = fh_getenv("GASNET_FIREHOSE_MAXVICTIM_R", 1, &dflt_VR); + param_RS = fh_getenv("GASNET_FIREHOSE_MAXREGION_SIZE", (1<<20), &dflt_RS); + auto_RS = !param_RS && !dflt_RS; // explicit zero GASNETI_TRACE_PRINTF(C, ("ENV: Firehose M=%"PRIuPTR", MAXVICTIM_M=%"PRIuPTR, param_M, param_VM)); GASNETI_TRACE_PRINTF(C, @@ -1324,7 +1337,7 @@ fh_init_plugin(uintptr_t max_pinnable_memory, ("ENV: Firehose max region size=%"PRIuPTR, (uintptr_t)param_RS)); /* Now assign decent "M" defaults based on physical memory */ - if (param_M == 0 && param_VM == 0) { + if (dflt_M && dflt_VM) { if ((fhi_InitFlags & FIREHOSE_INIT_FLAG_LOCAL_ONLY)) { param_M = m_prepinned; param_VM = max_pinnable_memory - param_M; @@ -1335,12 +1348,43 @@ fh_init_plugin(uintptr_t max_pinnable_memory, FH_MAXVICTIM_TO_PHYSMEM_RATIO; } } - else if (param_M == 0) + else if (dflt_M) { + uintptr_t limit = max_pinnable_memory - M_min; + if (param_VM > limit) { + char str0[24], str1[24]; + gasneti_fatalerror("GASNET_FIREHOSE_MAXVICTIM_M (%s) is larger than the maximum" + "(%s) permitted without also setting GASNET_FIREHOSE_M.", + gasneti_format_number(param_VM, str0, 24, 1), + gasneti_format_number(limit, str1, 24, 1)); + } param_M = max_pinnable_memory - param_VM; - else if (param_VM == 0) + } + else if (dflt_VM) { + uintptr_t limit = max_pinnable_memory - VM_min; + if (param_M > limit) { + char str0[24], str1[24]; + gasneti_fatalerror("GASNET_FIREHOSE_M (%s) is larger than the maximum (%s) " + "permitted without also setting GASNET_FIREHOSE_MAXVICTIM_M.", + gasneti_format_number(param_M, str0, 24, 1), + gasneti_format_number(limit, str1, 24, 1)); + } param_VM = max_pinnable_memory - param_M; + } + else if (param_M + param_VM > max_pinnable_memory) { + if (!gasneti_mynode) { + char str0[24], str1[24], str2[24]; + gasneti_console_message("WARNING", + "GASNET_FIREHOSE_M (%s) and GASNET_FIREHOSE_MAXVICTIM_M (%s) together " + "are more than the maximum of %s recommended based on GASNET_PHYSMEM_MAX.", + gasneti_format_number(param_M, str0, 24, 1), + gasneti_format_number(param_VM, str1, 24, 1), + gasneti_format_number(max_pinnable_memory, str2, 24, 1)); + } + } + uintptr_t orig_M = param_M; + uintptr_t orig_VM = param_VM; - if (param_RS == 0) { + if (!param_RS) { // default or explicit zero for "auto" if ((fhi_InitFlags & FIREHOSE_INIT_FLAG_LOCAL_ONLY)) { param_RS = max_region_size; } else { @@ -1364,38 +1408,83 @@ fh_init_plugin(uintptr_t max_pinnable_memory, } /* Round down to multiple of FH_BUCKET_SIZE for sanity */ param_RS &= ~FH_PAGE_MASK; - /* Ensure max size fits in available bits of fh_key_t */ - param_RS = MIN(param_RS, ((FH_BUCKET_SIZE - 1) << FH_BUCKET_SHIFT)); - +#if FH_KEY_PACKED + // Ensure max size fits in available bits of fh_key_t + uintptr_t RS_max = ((uintptr_t)(FH_BUCKET_SIZE - 1) << FH_BUCKET_SHIFT); + if (param_RS > RS_max) { + if (!gasneti_mynode) { + char str0[24], str1[24]; + gasneti_console_message("WARNING", + "GASNET_FIREHOSE_MAXREGION_SIZE (%s) has been " + "reduced to the largest supported value (%s).", + gasneti_format_number(param_RS, str0, 24, 1), + gasneti_format_number(RS_max, str1, 24, 1)); + } + param_RS = RS_max; + } +#endif - /* Try to work it all out with the given RS - * The goal is (currently) to honor the given region size and - * reduce the number of available regions as needed. - */ - if (param_R == 0 && param_VR == 0) { + // Try to "work it all out" to address the requested volume of memory. + // Normally we try to keep fixed RS, reducing the number of regions if needed. + // However, with "auto" RS, we first attempt to enlarge RS + const int avail_regions = max_regions - num_prepinned; + int rescaled = 0; + if (dflt_R && dflt_VR) { if ((fhi_InitFlags & FIREHOSE_INIT_FLAG_LOCAL_ONLY)) { param_R = num_prepinned; param_VR = max_regions - param_R; } else { - double ratio; - /* try naively... */ param_R = (param_M - m_prepinned) / param_RS; param_VR = param_VM / param_RS; - /* then rescale if needed */ - ratio = (max_regions - num_prepinned) / - (double)(param_R + param_VR); + // Two approaches to rescaling if needed: + double ratio = avail_regions / (double)(param_R + param_VR); + // 1. try to scale up region size IFF user-provided vaule was 0 + if (ratio < 1. && auto_RS) { + param_RS = GASNETI_ALIGNUP(param_RS / ratio, FH_BUCKET_SIZE); + #if FH_KEY_PACKED + param_RS = MIN(param_RS, RS_max); + #endif + param_R = (param_M - m_prepinned) / param_RS; + param_VR = param_VM / param_RS; + ratio = avail_regions / (double)(param_R + param_VR); + } + // 2. reduce R parameters if neccessary (possibly after auto-scaling RS) if (ratio < 1.) { param_R *= ratio; param_VR *= ratio; + rescaled = 1; } } } - else if (param_R == 0) - param_R = max_regions - num_prepinned - param_VR; - else if (param_VR == 0) - param_VR = max_regions - num_prepinned - param_R; + else if (dflt_R) { + int limit = avail_regions - R_min; + if (param_VR > limit) { + gasneti_fatalerror("GASNET_FIREHOSE_MAXVICTIM_R (%d) is larger than the " + "maximum permited (%d) without also setting GASNET_FIREHOSE_R.", + param_VR, limit); + } + param_R = avail_regions - param_VR; + } + else if (dflt_VR) { + int limit = avail_regions - VR_min; + if (param_R > limit) { + gasneti_fatalerror("GASNET_FIREHOSE_R (%d) is larger than the maximum permitted " + "(%d) without also setting GASNET_FIREHOSE_MAXVICTIM_R.", + param_R, limit); + } + param_VR = avail_regions - param_R; + } + else if (param_R + param_VR > avail_regions) { + if (!gasneti_mynode) { + char str0[24], str1[24]; + gasneti_console_message("WARNING", + "GASNET_FIREHOSE_R (%d) and GASNET_FIREHOSE_MAXVICTIM_R (%d) together " + "are more than the maximum of %d recommended based on GASNET_PINNED_REGIONS_MAX.", + param_R, param_VR, avail_regions); + } + } /* Trim and eliminate round-off so that limits are self-consistent */ param_R = MIN(param_R, num_prepinned + ((param_M - m_prepinned) / param_RS)); @@ -1403,6 +1492,23 @@ fh_init_plugin(uintptr_t max_pinnable_memory, param_M = param_RS * (param_R - num_prepinned) + m_prepinned; param_VM = param_RS * param_VR; + // If truncation occurred, report it + if (rescaled && !gasneti_mynode) { + char str0[24], str1[24], str2[24], str3[24], str4[24], str5[24]; + uintptr_t max_space = param_RS * avail_regions; + gasneti_console_message("WARNING", + "GASNET_FIREHOSE_M (%s) and GASNET_FIREHOSE_MAXVICTIM_M (%s) together " + "are more than the %s addressable with %d firehose regions of length %s. " + "M and MAXVICTIM_M have been reduced to %s and %s, respectively.", + gasneti_format_number(orig_M, str0, 24, 1), + gasneti_format_number(orig_VM, str1, 24, 1), + gasneti_format_number(max_space, str2, 24, 1), + avail_regions, + gasneti_format_number(param_RS, str3, 24, 1), + gasneti_format_number(param_M, str4, 24, 1), + gasneti_format_number(param_VM, str5, 24, 1)); + } + /* Report final values */ GASNETI_TRACE_PRINTF(C, ("param_M=%"PRIuPTR" param_VM=%"PRIuPTR, param_M, param_VM)); GASNETI_TRACE_PRINTF(C, ("param_RS=%"PRIuPTR, (uintptr_t)param_RS)); @@ -1415,14 +1521,9 @@ fh_init_plugin(uintptr_t max_pinnable_memory, /* * Validate firehose parameters parameters + * NOTE: some of these check may be redundant, but better safe than sorry */ if ((fhi_InitFlags & FIREHOSE_INIT_FLAG_LOCAL_ONLY)) { - /* Want at least 16MB worth of buckets in victim FIFO */ - uintptr_t VM_min = (16*1024*1024) / FH_BUCKET_SIZE; - - /* Want at least 32 regions of FIFO */ - int VR_min = 32; - if_pf (param_RS < FH_BUCKET_SIZE) gasneti_fatalerror("GASNET_FIREHOSE_MAXREGION_SIZE (%d) " "is less than the minimum %d", @@ -1449,20 +1550,6 @@ fh_init_plugin(uintptr_t max_pinnable_memory, "GASNET_FIREHOSE_R parameter (%d)", (uintptr_t)num_prepinned, param_R); } else { - /* Want at least 32 buckets per node */ - uintptr_t M_min = FH_BUCKET_SIZE * num_nodes * 32; - - /* Want at least 256 buckets of victim FIFO */ - uintptr_t VM_min = FH_BUCKET_SIZE * 256; - - /* Want at least 1 region per node */ - /* XXX/PHH THIS IS REALLY A BARE MINIMUM */ - int R_min = num_nodes; - - /* Want at least 2 regions of FIFO */ - /* XXX/PHH THIS IS REALLY A BARE MINIMUM */ - int VR_min = 2; - if_pf (param_RS < FH_BUCKET_SIZE) gasneti_fatalerror("GASNET_FIREHOSE_MAXREGION_SIZE (%d) " "is less than the minimum %d", @@ -1537,8 +1624,21 @@ fh_init_plugin(uintptr_t max_pinnable_memory, <= max_pinnable_memory); - /* Allocate hash table for region tracking */ - i = 1.2 * (param_R + param_VR + num_prepinned); /* factor 1.2 is arbitrary */ + // Initialize hash tables for buckets (per-page mapping to regions) + // See "BUCKET TABLE HANDLING" for more onfo on these tables + { + i = b_prepinned + (param_R + param_VR) * (param_RS >> FH_BUCKET_SHIFT); + double scale = gasneti_getenv_dbl_withdefault("GASNET_FIREHOSE_TABLE_SCALE", 1.); + i *= (scale / 16.); // default to 16:1, subject to adjustment by env var + i = MAX(65536, i); // subject to minimum of 64k + } + for (j = 1; j < i; j *= 2) { /* nothing */ } // next power of two + fh_BucketTable1 = fh_hash_create(j); + fh_BucketTable2 = fh_hash_create(j/2); + + // Allocate hash table for region tracking + // TODO: replace the arbitrary factor 1.2 with an env var? + i = 1.2 * (param_R + param_VR + num_prepinned); /* round 'i' up to a power of two: */ for (j = 1; j < i; j *= 2) { /* nothing */ } fh_PrivTable = fh_hash_create(j); diff --git a/third-party/gasnet/gasnet-src/other/fragment-body.mak.in b/third-party/gasnet/gasnet-src/other/fragment-body.mak.in index 2ea75e7a5090..a087b5fd666c 100644 --- a/third-party/gasnet/gasnet-src/other/fragment-body.mak.in +++ b/third-party/gasnet/gasnet-src/other/fragment-body.mak.in @@ -10,7 +10,7 @@ GASNET_LIBDIRS = -L###INSTALL_LIB### # Textual lines containing the string "###NOINSTALL###" are removed by the install process # (must be one continuous line) ###NOINSTALL### -GASNET_INCLUDES = -I@TOP_SRCDIR@ -I@TOP_SRCDIR@/#conduit_name#-conduit -I@TOP_SRCDIR@/other $(CONDUIT_INCLUDES) $(CONDUIT_INCLUDES_#THREAD_MODEL#) -I@TOP_SRCDIR@/extended-ref/vis -I@TOP_SRCDIR@/extended-ref/coll -I@TOP_SRCDIR@/extended-ref/ratomic -I@TOP_SRCDIR@/extended-ref -I@TOP_BUILDDIR@ ###NOINSTALL### +GASNET_INCLUDES = -I@TOP_SRCDIR@ -I@TOP_SRCDIR@/#conduit_name#-conduit -I@TOP_SRCDIR@/other $(CONDUIT_INCLUDES) $(CONDUIT_INCLUDES_#THREAD_MODEL#) -I@TOP_SRCDIR@/extended-ref/vis -I@TOP_SRCDIR@/extended-ref/coll -I@TOP_SRCDIR@/extended-ref/ratomic -I@TOP_SRCDIR@/extended-ref -I@TOP_SRCDIR@/other/kinds -I@TOP_BUILDDIR@ ###NOINSTALL### GASNET_LIBDIRS = -L@TOP_BUILDDIR@/#conduit_name#-conduit ###NOINSTALL### # ---------------------------------------------------------------------- diff --git a/third-party/gasnet/gasnet-src/other/gasnet_portable_platform.h b/third-party/gasnet/gasnet-src/other/gasnet_portable_platform.h index bbab8799f2ca..6efebd9fe8ed 100644 --- a/third-party/gasnet/gasnet-src/other/gasnet_portable_platform.h +++ b/third-party/gasnet/gasnet-src/other/gasnet_portable_platform.h @@ -29,9 +29,9 @@ /* Publish and enforce version number for the public interface to this header */ /* YOU ARE NOT PERMITTED TO CHANGE THIS SECTION WITHOUT DIRECT APPROVAL FROM DAN BONACHEA */ #if _PORTABLE_PLATFORM_H != PLATFORM_HEADER_VERSION \ - || PLATFORM_HEADER_VERSION < 12 + || PLATFORM_HEADER_VERSION < 14 #undef PLATFORM_HEADER_VERSION -#define PLATFORM_HEADER_VERSION 12 +#define PLATFORM_HEADER_VERSION 14 #undef _PORTABLE_PLATFORM_H #define _PORTABLE_PLATFORM_H PLATFORM_HEADER_VERSION /* End Header versioning handshake */ diff --git a/third-party/gasnet/gasnet-src/other/hwloc/gasnet_hwloc.c b/third-party/gasnet/gasnet-src/other/hwloc/gasnet_hwloc.c new file mode 100644 index 000000000000..50ef7ba3ffc6 --- /dev/null +++ b/third-party/gasnet/gasnet-src/other/hwloc/gasnet_hwloc.c @@ -0,0 +1,359 @@ +/* $Source: bitbucket.org:berkeleylab/gasnet.git/other/hwloc/gasnet_hwloc.c $ + * Description: GASNet conduit-independent hwloc utilities + * Copyright 2021, The Regents of the University of California + * Terms of use are as specified in license.txt + */ + +#include +#include + +#include +#include +#include + +#if GASNETI_HAVE_HWLOC_LIB + #include "hwloc.h" + #ifndef HWLOC_API_VERSION + #error hwloc.h did not define HWLOC_API_VERSION + #endif + #define USE_HWLOC_LIB 1 + #undef USE_HWLOC_UTILS + typedef hwloc_obj_type_t gasneti_hwloc_obj_type_t; + typedef hwloc_cpuset_t gasneti_hwloc_cpuset_t; + #if HWLOC_API_VERSION < 0x010100 // "cpuset" names used prior to 1.1.0 + #define hwloc_bitmap_alloc hwloc_cpuset_alloc + #define hwloc_bitmap_intersects hwloc_cpuset_intersects + #define hwloc_bitmap_free hwloc_cpuset_free + #endif +#elif GASNETI_HAVE_HWLOC_UTILS + #include + #include + #undef HWLOC_API_VERSION + #define USE_HWLOC_UTILS 1 + typedef const char *gasneti_hwloc_obj_type_t; + typedef const char *gasneti_hwloc_cpuset_t; + #define CLOSE_STDIN " = 0x020000 // since 2.0.0 + return hwloc_type_sscanf(string, result, NULL, 0); + #elif HWLOC_API_VERSION >= 0x010900 // since 1.9.0 + return hwloc_obj_type_sscanf(string, result, NULL, NULL, 0); + #else + *result = hwloc_obj_type_of_string(string); + return ((int)(*result) < 0) ? -1 : 0; + #endif +} + +// Convert 'typestring' to gasneti_hwloc_obj_type_t. +// If invalid, falls back to 'dflt_type, with a warning. +// Returns 0 on success, negative on error. +// Result is in `*result'. +static int get_selector_type( + gasneti_hwloc_obj_type_t *result, + const char *keyname, + const char *typestring, + const char *dflt_type) +{ + int rc = string_to_obj_type(typestring, result); + if (rc < 0) { + gasneti_console_message("WARNING", + "%s_TYPE = '%s' is invalid. Using default '%s' instead.", + keyname, typestring, dflt_type); + rc = string_to_obj_type(dflt_type, result); + gasneti_assert_int(rc ,==, 0); + } + + // Currently non-zero only if dflt_type is invalid + return rc; +} +#endif // USE_HWLOC_LIB || USE_HWLOC_UTILS + +#if USE_HWLOC_UTILS +// Wraps popen(cmd) and gasneti_getline() with appropriate error handling. +// Returns the first line of output (stripped of trailing newline, if any) or NULL on error. +// Caller should gasneti_free() the result. +// If 'len_p' is non-NULL, receives line length on success (not written on failure). +static char *run_hwloc_cmd(const char *cmd, size_t *len_p) +{ + char *output = NULL; + + FILE *stream = popen(cmd, "r"); + if (!stream) { + // failed popen() + // probably a failed fork(), pipe(), or malloc() (maybe should be fatal?) + return NULL; + } + + size_t n = 0; + ssize_t line_len = gasneti_getline(&output, &n, stream); + int status = pclose(stream); + if (!WIFEXITED(status) || WEXITSTATUS(status) || (line_len < 0)) { + // one or both of pclose() or getline() says that the command failed + gasneti_free(output); + return NULL; + } + + // strip trailing newline, if any + if (line_len && (output[line_len-1] == '\n')) { + output[line_len-1] = '\0'; + line_len -= 1; + } + + if (len_p) *len_p = line_len; + return output; +} +#endif // USE_HWLOC_UTILS + +// Look for any "_N"-suffixed env vars. +// Returns gasneti_malloc'ed string with first matched variable name, if any. +// TODO: could be more efficient if given an interface to iterate over the environment keys. +static +char *check_suffixed(const char *keyname) +{ + size_t keylen = strlen(keyname); + size_t fulllen = keylen + 5; // 4 = "_xx\0" + char *fullkey = gasneti_malloc(fulllen); + strcpy(fullkey, keyname); + strcat(fullkey, "_"); + for (int i = 0; i < 64; ++i) { // TODO: any realistic situation in which this is not enough? + snprintf(fullkey+(keylen+1), fulllen-(keylen+1), "%d", i); + gasneti_assert_uint(strlen(fullkey) ,<, fulllen); + if (gasneti_getenv(fullkey)) { + return fullkey; + } + } + gasneti_free(fullkey); + return NULL; +} + +// ------------------------------------------------------------------------------------ +// gasneti_getenv_hwloc_withdefault() +// +// 1. Check for suffixed env vars. +// If none, return result of gasneti_getenv_withdefault(keyname, dflt_val). +// 2. Check for env var "[keyname]_TYPE" equal to "None" (case insensitive). +// If YES, return result of gasneti_getenv_withdefault(keyname, dflt_val). +// With hwloc support: +// 3. Look for a hwloc object type in env var "[keyname]_TYPE", or dflt_type if none. +// 4. Find the intersection of this proc's cpu binding with options of the given type. +// 5. Return the value of env var "[keyname]_[binding]", if any, +// otherwise return the result of gasneti_getenv_withdefault(keyname, dflt_val). +// Without hwloc support: +// 3. If we get this far, warn at most once about lack ofhwloc support +// 4. Return the result of gasneti_getenv_withdefault(keyname, dflt_val). +// +// Detected hwloc errors result in a warning (at most once per "step") +// and use of the unsuffixed variable. +char *gasneti_getenv_hwloc_withdefault(const char *keyname, const char *dflt_val, const char *dflt_type) +{ + char *result = NULL; + + // Step 1 - check for suffixed vars + char *firstkey = check_suffixed(keyname); + if (! firstkey) { + // short-cut w/o using hwloc if there are no suffixed variables + return gasneti_getenv_withdefault(keyname, dflt_val); + } + + // Step 2 - check env var "[keyname]_TYPE" for "None" (which disables all additional intelligence) + char *typekey = gasneti_sappendf(NULL, "%s_TYPE", keyname); + const char *typestring = gasneti_getenv_withdefault(typekey, dflt_type); + gasneti_free(typekey); + if (typestring) { + const char none[] = "none"; + int match = 1; + for (int i = 0; i < sizeof(none); ++i) { // includes final '\0' + if (tolower(typestring[i]) != none[i]) { + match = 0; + break; + } + } + if (match) { + // short-cut w/o using hwloc if TYPE is "none" + gasneti_free(firstkey); + return gasneti_getenv_withdefault(keyname, dflt_val); + } + } + +#if USE_HWLOC_LIB || USE_HWLOC_UTILS + // The "real thing" via EITHER libhwloc OR hwloc-{bind,calc} + char *suffix = NULL; + gasneti_hwloc_obj_type_t type = (gasneti_hwloc_obj_type_t)0; + gasneti_hwloc_cpuset_t cpuset = NULL; + + // Step 3 - hwloc object type + // Note non-zero return indicates invalid dflt_type, not a user error + gasneti_assert_zeroret( get_selector_type(&type, keyname, typestring, dflt_type) ); + + // Step 4a - query the current proc's cpu binding + #if USE_HWLOC_LIB + int topo_is_init = 0; + hwloc_topology_t topology; + if (hwloc_topology_init(&topology) < 0) { + // failed to initialize hwloc + goto out_bad_cpuset; + } + topo_is_init = 1; + // Enable "whole system" mode for uniform counting/naming + #if HWLOC_API_VERSION >= 0x020100 // since 2.1.0 + (void)hwloc_topology_set_flags(topology, HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED); + #else + (void)hwloc_topology_set_flags(topology, HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM); + #endif + cpuset = hwloc_bitmap_alloc(); + if (!cpuset || + (hwloc_topology_load(topology) < 0) || + (hwloc_get_cpubind(topology, cpuset, HWLOC_CPUBIND_PROCESS) < 0 )) { + // failed to query cpu binding from hwloc + goto out_bad_cpuset; + } + #else + // Note: gasneti_hwloc_cpuset_t is "char *" when using utils + cpuset = run_hwloc_cmd(GASNETI_HWLOC_BIND_PATH " --get" CLOSE_STDIN, NULL); + // It is sufficient here to validate that we have a hexadecimal value. + // `hwloc-calc` will perform stronger validation when we pass this as argument. + if (!cpuset || cpuset[0] != '0' || cpuset[1] != 'x' || !isxdigit(cpuset[2])) { + // failed to query cpu binding from hwloc + goto out_bad_cpuset; + } + #endif + + // Step 4b - compute intersection between 'cpuset' and object(s) of 'type' + #if USE_HWLOC_LIB + { + int count = hwloc_get_nbobjs_by_type(topology, type); + if (count <= 0) { + // EITHER there are no objects of the given type + // OR type occurs at multiple levels (not a type suited to our purposes) + goto out_bad_intersect; + } + for (int i = 0; i < count; ++i) { + hwloc_obj_t obj = hwloc_get_obj_by_type(topology, type, i); + gasneti_assert(obj); + if (hwloc_bitmap_intersects(cpuset, obj->cpuset)) { + suffix = gasneti_sappendf(suffix, "_%d", i); + } + } + } + #else + { + size_t len = 0; + char *cmd = gasneti_sappendf(NULL, GASNETI_HWLOC_CALC_PATH " --intersect %s %s" CLOSE_STDIN, type, cpuset); + char *buf = run_hwloc_cmd(cmd, &len); + gasneti_free(cmd); + if (!buf || !buf[0]) { + // failed to compute the intersection + gasneti_free(buf); + goto out_bad_intersect; + } + gasneti_assert_uint(strlen(buf) ,==, len); + // In one pass: copy (to prepend '_'), validate and translate ',' to '_'. + // Note: `--sep X` option to set the delimiter was not available in older hwloc-calc. + suffix = gasneti_malloc(len + 2); // +2 = leading '_' and trailing '\0' + suffix[0] = '_'; + for (int i = 0; i < len; ++i) { + char c = buf[i]; + if (c == ',') { + suffix[i+1] = '_'; + } else if (isdigit(c)) { + suffix[i+1] = c; + } else { + // invalid character (such as in an error message?) + gasneti_free(buf); + goto out_bad_intersect; + } + } + suffix[len+1] = '\0'; + gasneti_free(buf); + } + #endif + + // Step 5 - query the environment with suffix + if (suffix && suffix[0]) { + char *fullkey = gasneti_sappendf(NULL, "%s%s", keyname, suffix); + result = gasneti_getenv(fullkey); + if (result) { + gasnett_envstr_display(fullkey, result, 0); + } + gasneti_free(fullkey); + } + gasneti_free(suffix); + +out: + gasneti_free(firstkey); + #if USE_HWLOC_LIB + if (cpuset) hwloc_bitmap_free(cpuset); + if (topo_is_init) hwloc_topology_destroy(topology); + #elif USE_HWLOC_UTILS + // casts below discard const qualifiers to avoid warnings + gasneti_free((void *)type); + gasneti_free((void *)cpuset); + #endif + + // Return the suffixed variable's value if any, else use unsuffixed + return result ? result : gasneti_getenv_withdefault(keyname, dflt_val); + +out_bad_cpuset: + { + static int did_cpuset_warning = 0; + if (!did_cpuset_warning) { + gasneti_console_message("WARNING", + "Failed to query hwloc for cpuset (binding) while processing environment variable '%s'. " + "You may set '%s_TYPE=none' to disable checks for suffixed variants of this variable. " + "Suppressing additional warnings, if any, for this error with additional variables.", + keyname, keyname); + did_cpuset_warning = 1; + } + goto out; + } + +out_bad_intersect: + { + static int did_intersect_warning = 0; + if (!did_intersect_warning) { + gasneti_console_message("WARNING", + "Failed to query hwloc for objects of type '%s' while processing environment variable '%s'. " + "You may set '%s_TYPE=none' to disable checks for suffixed variants of this variable. " + "Suppressing additional warnings, if any, for this error with additional variables.", + typestring, keyname, keyname), keyname; + did_intersect_warning = 1; + } + goto out; + } + +#else // !(USE_HWLOC_LIB || USE_HWLOC_UTILS) + // Fallback when hwloc is unavailable + + // Step 3. Warn at most once about presence of suffixed keys + static int did_warn = 0; + if (!did_warn) { + gasneti_console_message("WARNING", + "Ignoring environment variable '%s' (and maybe more) due to lack of hwloc support. " + "You may set '%s_TYPE=none' to disable checks for suffixed variants of '%s'", + firstkey, keyname, keyname); + did_warn = 1; + } + gasneti_free(firstkey); + + // Step 4. Return the only thing we can + return gasneti_getenv_withdefault(keyname, dflt_val); +#endif +} diff --git a/third-party/gasnet/gasnet-src/other/hwloc/gasnet_hwloc_internal.h b/third-party/gasnet/gasnet-src/other/hwloc/gasnet_hwloc_internal.h new file mode 100644 index 000000000000..dcf8607c6881 --- /dev/null +++ b/third-party/gasnet/gasnet-src/other/hwloc/gasnet_hwloc_internal.h @@ -0,0 +1,19 @@ +/* $Source: bitbucket.org:berkeleylab/gasnet.git/other/hwloc/gasnet_hwloc_internal.h $ + * Description: GASNet conduit-independent hwloc utilities internal header + * Copyright 2021, The Regents of the University of California + * Terms of use are as specified in license.txt + */ + +#ifndef _GASNET_HWLOC_INTERNAL_H +#define _GASNET_HWLOC_INTERNAL_H + +// For a given keyname: +// 1. Look for a hwloc object type in env var "[kename]_TYPE", or dflt_type if none. +// 2. Find the current procs binding(s) for the given type +// 3. Return the value of env var "[keyname]_[binding]", or of "[keyname]" if none +extern char *gasneti_getenv_hwloc_withdefault( + const char *keyname, + const char *dflt_val, + const char *dflt_type); + +#endif // _GASNET_HWLOC_INTERNAL_H diff --git a/third-party/gasnet/gasnet-src/other/kinds/gasnet_cuda_uva.c b/third-party/gasnet/gasnet-src/other/kinds/gasnet_cuda_uva.c new file mode 100644 index 000000000000..511900618d5b --- /dev/null +++ b/third-party/gasnet/gasnet-src/other/kinds/gasnet_cuda_uva.c @@ -0,0 +1,267 @@ +/* $Source: bitbucket.org:berkeleylab/gasnet.git/other/kinds/gasnet_cuda_uva.c $ + * Description: GASNet Memory Kinds Implementation for CUDA UVA devices + * Copyright (c) 2020, The Regents of the University of California + * Terms of use are as specified in license.txt + */ + +#define GASNETI_NEED_GASNET_MK_H 1 +#include +#include + +#if GASNET_HAVE_MK_CLASS_CUDA_UVA // Else empty + +#include + +GASNETI_IDENT(gasneti_IdentString_MKClassCUDAUVA, "$GASNetMKClassCUDAUVA: 1 $"); + +// +// Class-specific MK type and functions +// + +typedef struct my_MK_s { + GASNETI_MK_COMMON // Class-indep prefix + + CUcontext ctx; + CUdevice dev; + int use_sync_memops; +} *my_MK_t; + +// Wrapper and format for use of cuGetErrorName() +const char *_gasneti_cuerror_name(CUresult res) { + static const char *unknown = "UNKNOWN"; + const char *errorname; + if (cuGetErrorName(res, &errorname)) errorname = unknown; + return errorname; +} +#define GASNETI_CURESULT_FMT "%s(%d)" +#define GASNETI_CURESULT_STRING(res) _gasneti_cuerror_name(res),(res) + +// +// Error checking/reporting wrapper +// +#define gasneti_check_cudacall(op) do { \ + CUresult _retval = (op); \ + if_pf (_retval) { \ + gasneti_fatalerror("%s returned "GASNETI_CURESULT_FMT,#op,GASNETI_CURESULT_STRING(_retval));\ + } \ + } while (0) + +static void gasneti_MK_Destroy_cuda_uva( + gasneti_MK_t i_mk, + gex_Flags_t flags) +{ + my_MK_t mk = (my_MK_t) i_mk; + gasneti_check_cudacall(cuCtxSetCurrent(NULL)); + gasneti_check_cudacall(cuDevicePrimaryCtxRelease(mk->dev)); + gasneti_free_mk(i_mk); +} + +static int gasneti_MK_Segment_Create_cuda_uva( + gasneti_Segment_t *i_segment_p, + gasneti_MK_t i_mk, + void * addr, + uintptr_t size, + gex_Flags_t flags) +{ + my_MK_t kind = (my_MK_t) i_mk; + CUdeviceptr dptr; + CUresult result; + void * to_free = NULL; + int retval = GASNET_OK; + + gasneti_check_cudacall(cuCtxPushCurrent(kind->ctx)); + + // TODO: + // Might want additional care with respect to error returns from the CUDA device API. + // In particular, any call "may also return error codes from previous, asynchronous launches." + // Presently, we try to always provide the specific CUDA error code as we fatalerror. + + if (addr) { // Client-allocated + dptr = (CUdeviceptr)addr; + + // cuPointerGetAttributes available since CUDA 7.0 + unsigned int mem_type = 0; + unsigned int is_managed = 0; + CUcontext ctx = NULL; + void * ptrs[3] = { (void*)&mem_type, (void*)&is_managed, (void*)&ctx }; + CUpointer_attribute attrs[3] = { CU_POINTER_ATTRIBUTE_MEMORY_TYPE, + CU_POINTER_ATTRIBUTE_IS_MANAGED, + CU_POINTER_ATTRIBUTE_CONTEXT }; + + result = cuPointerGetAttributes(3, attrs, ptrs, dptr); + if (result) { + gasneti_fatalerror("Failed to query pointer attributes of client-allocated memory: " + GASNETI_CURESULT_FMT, GASNETI_CURESULT_STRING(result)); + } + + if (mem_type != CU_MEMORYTYPE_DEVICE) { + gasneti_fatalerror("Invalid call to gex_Segment_Create(CUDA_UVA) with non-device memory"); + } + if (is_managed) { + gasneti_fatalerror("Invalid call to gex_Segment_Create(CUDA_UVA) with managed memory"); + } + + // We currently accept memory allocated by *any* context for the same device. + // TODO: should we be more strict by checking equality of contexts instead of devices? + CUdevice dev; + if ((result = cuCtxPushCurrent(ctx)) || + (result = cuCtxGetDevice(&dev)) || + (result = cuCtxPopCurrent(&ctx))) { + gasneti_fatalerror("Failed to query CUDA device of client-allocated memory: " + GASNETI_CURESULT_FMT, GASNETI_CURESULT_STRING(result)); + } else if (dev != kind->dev) { + gasneti_fatalerror("gex_Segment_Create(CUDA_UVA) with memory associated with wrong device"); + } + } else { // GASNet-allocated + result = cuMemAlloc(&dptr, size); + + if (result == CUDA_ERROR_OUT_OF_MEMORY) { + retval = GASNET_ERR_RESOURCE; + goto out; + } else if (result != CUDA_SUCCESS) { + gasneti_fatalerror("cuMemAlloc() returned unexpected failure: " + GASNETI_CURESULT_FMT, GASNETI_CURESULT_STRING(result)); + } + + addr = to_free = (void *) dptr; + } + + if (kind->use_sync_memops) { + int one = 1; + gasneti_check_cudacall(cuPointerSetAttribute(&one, CU_POINTER_ATTRIBUTE_SYNC_MEMOPS, dptr)); + } + + gasneti_Client_t client = i_mk->_client; + gex_MK_t e_mk = gasneti_export_mk(i_mk); + gasneti_Segment_t i_segment = gasneti_alloc_segment(client, addr, size, e_mk, flags); + i_segment->_opaque_mk_use = to_free; + + *i_segment_p = i_segment; + +out: + { + CUcontext prev_ctx; + gasneti_check_cudacall(cuCtxPopCurrent(&prev_ctx)); + gasneti_assert(prev_ctx == kind->ctx); + } + return retval; +} + +// +// Class-specific "impl(ementation)": constants and function pointers. +// +// Due to lack of designated initializers in GASNet's required C99 subset, we +// address the fragility as the structure grows or changes by lazy explicit +// initialization. +static gasneti_mk_impl_t *get_impl(void) { + // Static storage duration ensures these are zero-initialized + static gasneti_mk_impl_t the_impl; + static gasneti_mk_impl_t *result; + + if (!result) { + static gasneti_mutex_t lock = GASNETI_MUTEX_INITIALIZER; + gasneti_mutex_lock(&lock); + if (!result) { + the_impl.mk_class = GEX_MK_CLASS_CUDA_UVA; + the_impl.mk_name = "CUDA_UVA"; + the_impl.mk_sizeof = sizeof(struct my_MK_s); + + the_impl.mk_destroy = &gasneti_MK_Destroy_cuda_uva; + the_impl.mk_segment_create + = &gasneti_MK_Segment_Create_cuda_uva; + + gasneti_sync_writes(); + result = &the_impl; + } + gasneti_mutex_unlock(&lock); + } + + gasneti_assert(result); + return result; +} + +// Class-specific create +int gasneti_MK_Create_cuda_uva( + gasneti_MK_t *i_memkind_p, + gasneti_Client_t client, + const gex_MK_Create_args_t *args, + gex_Flags_t flags) +{ + CUdevice dev = args->gex_args.gex_class_cuda_uva.gex_CUdevice; + GASNETI_TRACE_PRINTF(O,("gex_MK_Create: class=CUDA_UVA gex_CUdevice=%d", dev)); + + if (dev < 0) { + // This is always treated as programmer error + gasneti_fatalerror("gex_MK_Create called with negative CUdevice=%i", dev); + } + +#if PLATFORM_OS_LINUX && GASNET_CONDUIT_IBV + // Look for GDR support. + // Adapted from the GDR checking logic in Open MPI. + if (access("/sys/kernel/mm/memory_peers/nv_mem/version", F_OK)) { + // TODO: gracefully fall back to cuMemcpy() "reference implementation", + // once one is available, rather than failing. + GASNETI_RETURN_ERRR(BAD_ARG,"GEX_MK_CLASS_CUDA_UVA: kernel lacks GPUDirect RDMA support"); + } +#endif + + // Obtain the primary context for the given device, initializing if needed + CUcontext ctx; + CUresult res = cuDevicePrimaryCtxRetain(&ctx, dev); + if (res == CUDA_ERROR_NOT_INITIALIZED) { + int initRes = cuInit(0); + if (initRes == CUDA_SUCCESS) { + res = cuDevicePrimaryCtxRetain(&ctx, dev); + } else if (initRes == CUDA_ERROR_NO_DEVICE) { + GASNETI_RETURN_ERRR(BAD_ARG,"GEX_MK_CLASS_CUDA_UVA: no CUDA devices found"); + } else { + const char *errorname; + if (cuGetErrorName(initRes, &errorname)) errorname = "UNKNOWN"; + const char *msg = gasneti_dynsprintf("GEX_MK_CLASS_CUDA_UVA: cuInit() returned %s(%i)", errorname, initRes); + GASNETI_RETURN_ERRR(BAD_ARG,msg); + } + } + + // Failed to obtain the primary context, try to reason out why + // TODO: explicit diagnosis of more failure cases + if_pf (res != CUDA_SUCCESS) { + const char *why = "unknown failure"; + if (res == CUDA_ERROR_INVALID_DEVICE) { + int dev_count; + if (cuDeviceGetCount(&dev_count)) { + why = "cuDeviceGetCount() failed"; + } else if (! dev_count) { + why = "no CUDA devices found"; + } else { + why = gasneti_dynsprintf("invalid CUdevice=%i (%d devices found)", dev, dev_count); + } + } else { + const char *errorname; + if (cuGetErrorName(res, &errorname)) errorname = "UNKNOWN"; + why = gasneti_dynsprintf("cuDevicePrimaryCtxRetain() returned %s(%i)", errorname ,res); + } + const char *msg = gasneti_dynsprintf("GEX_MK_CLASS_CUDA_UVA: %s", why); + GASNETI_RETURN_ERRR(BAD_ARG,msg); + } + + int isUVA; + if (cuDeviceGetAttribute(&isUVA, CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING, dev)) { + GASNETI_RETURN_ERRR(BAD_ARG,"GEX_MK_CLASS_CUDA_UVA: failed to query CUDA device for UVA support"); + } + if (!isUVA) { + GASNETI_RETURN_ERRR(BAD_ARG,"GEX_MK_CLASS_CUDA_UVA: passed context for a non-UVA device"); + } + + my_MK_t result = (my_MK_t) gasneti_alloc_mk(client, get_impl(), flags); + result->dev = dev; + result->ctx = ctx; + + // TODO: could be a per-device setting? + // TODO: is '1' the best default? + result->use_sync_memops = gasneti_getenv_yesno_withdefault("GASNET_USE_CUDA_SYNC_MEMOPS", 1); + + *i_memkind_p = (gasneti_MK_t) result; + return GASNET_OK; +} + +#endif diff --git a/third-party/gasnet/gasnet-src/other/kinds/gasnet_kinds_internal.h b/third-party/gasnet/gasnet-src/other/kinds/gasnet_kinds_internal.h new file mode 100644 index 000000000000..cf205c4d6634 --- /dev/null +++ b/third-party/gasnet/gasnet-src/other/kinds/gasnet_kinds_internal.h @@ -0,0 +1,70 @@ +/* $Source: bitbucket.org:berkeleylab/gasnet.git/other/kinds/gasnet_kinds_internal.h $ + * Description: GASNet Memory Kinds Internal Header + * Copyright (c) 2020, The Regents of the University of California + * Terms of use are as specified in license.txt + */ + +#ifndef _GASNET_KINDS_INTERNAL_H +#define _GASNET_KINDS_INTERNAL_H + +/*---------------------------------------------------------------------------------*/ +// +// Common logic for use by memory kinds implementatons +// + +#define GASNETI_MK_MAGIC GASNETI_MAKE_MAGIC('M','K','_','t') +#define GASNETI_MK_BAD_MAGIC GASNETI_MAKE_BAD_MAGIC('M','K','_','t') + +// Optional default allocation +gasneti_MK_t gasneti_alloc_mk( + gasneti_Client_t i_client, + gasneti_mk_impl_t *mk_impl, + gex_Flags_t flags); + +// Optional default free +void gasneti_free_mk(gasneti_MK_t mk); + +/*---------------------------------------------------------------------------------*/ +// +// Declarations for class-specific create functions +// + +#define GASNETI_MK_CLASS_DECL(kind) \ + extern int gasneti_MK_Create_##kind( \ + gasneti_MK_t *i_mk_p, \ + gasneti_Client_t i_client, \ + const gex_MK_Create_args_t *args, \ + gex_Flags_t flags); + +#if GASNET_HAVE_MK_CLASS_CUDA_UVA +GASNETI_MK_CLASS_DECL(cuda_uva) +#endif + + +/*---------------------------------------------------------------------------------*/ +// +// Dispatch table for class-specific operations +// + +struct gasneti_mk_impl_s { + // GEX_MK_CLASS_* enum value + gex_MK_Class_t mk_class; + + // The characters after GEX_MK_CLASS_. Used in tracing and other messages + const char *mk_name; + + // Size of class-specific gasneti_MK_t. + // Must include the GASNETI_MK_COMMON fields (required as prefix) + // If zero, then only space for the COMMON fields is allocated + size_t mk_sizeof; + + // Hook for gex_MK_Destroy() + // If NULL, the default is to call gasneti_free_mk() + void (*mk_destroy)(gasneti_MK_t, gex_Flags_t); + + // Hook for gex_Segment_Create() + // If NULL, the default is failure of gex_Segment_Create() with GASNET_ERR_BAD_ARG + int (*mk_segment_create)(gasneti_Segment_t *, gasneti_MK_t, void *, uintptr_t, gex_Flags_t); +}; + +#endif diff --git a/third-party/gasnet/gasnet-src/other/kinds/gasnet_mk.h b/third-party/gasnet/gasnet-src/other/kinds/gasnet_mk.h new file mode 100644 index 000000000000..cca295f41e9e --- /dev/null +++ b/third-party/gasnet/gasnet-src/other/kinds/gasnet_mk.h @@ -0,0 +1,82 @@ +/* $Source: bitbucket.org:berkeleylab/gasnet.git/other/kinds/gasnet_mk.h $ + * Description: GASNet Memory Kinds API types and declarations + * Copyright (c) 2020, The Regents of the University of California + * Terms of use are as specified in license.txt + */ + +#ifndef _GASNET_MK_H +#define _GASNET_MK_H + +#if defined(_INCLUDED_GASNET_INTERNAL_H) && !defined(_IN_GASNET_INTERNAL_H) + #error Internal GASNet code should not directly include gasnet_mk.h, just gasnet_internal.h +#endif + +#include + +GASNETI_BEGIN_EXTERNC +GASNETI_BEGIN_NOWARN + +typedef enum { + GEX_MK_CLASS_HOST, // "normal" memory (eg GEX_MK_HOST) + GEX_MK_CLASS_CUDA_UVA, // CUDA UVA memory +} gex_MK_Class_t; + +// Struct containing a union and an enum to indicate which member has been populated. +// Each union member is a struct named with the lowercase of the enum identifier. +// All types in here are basic types, possibly type-erased/indirected versions of types +// provided in device headers. +typedef struct { + uint64_t gex_flags; // Reserved. Must be 0 currently. + gex_MK_Class_t gex_class; + union { + struct { + int gex_CUdevice; + } gex_class_cuda_uva; + } gex_args; +} gex_MK_Create_args_t; + +// Constructor for gex_MK_t +// This is a non-collective call +extern int gex_MK_Create( + gex_MK_t *_memkind_p, // OUT + gex_Client_t _client, + const gex_MK_Create_args_t *_args, // IN + gex_Flags_t _flags // Reserved. Must be 0 currently. + ); + +// Destructor +extern void gex_MK_Destroy(gex_MK_t, gex_Flags_t); + +// Per-class impl(ementation) constants and function pointers +struct gasneti_mk_impl_s; +typedef struct gasneti_mk_impl_s gasneti_mk_impl_t; + +#ifndef _GEX_MK_T + #define GASNETI_MK_COMMON \ + GASNETI_OBJECT_HEADER \ + gasneti_Client_t _client; \ + gex_MK_Class_t _mk_class; \ + gasneti_mk_impl_t *_mk_impl; \ + gasneti_weakatomic32_t _ref_count; + typedef struct { GASNETI_MK_COMMON } *gasneti_MK_t; + #if GASNET_DEBUG + extern gasneti_MK_t gasneti_import_mk(gex_MK_t _mk); + extern gasneti_MK_t gasneti_import_mk_nonhost(gex_MK_t _mk); + extern gex_MK_t gasneti_export_mk(gasneti_MK_t _real_mk); + #else + #define gasneti_import_mk(x) ((gasneti_MK_t)(x)) + #define gasneti_import_mk_nonhost(x) ((gasneti_MK_t)(x)) + #define gasneti_export_mk(x) ((gex_MK_t)(x)) + #endif + // TODO: Either document the following, prohibiting GEX_MK_HOST, or fix them for that case + #define gex_MK_SetCData(mk,val) ((void)(gasneti_import_mk_nonhost(mk)->_cdata = (val))) + #define gex_MK_QueryCData(mk) ((void*)gasneti_import_mk_nonhost(mk)->_cdata) + #define gex_MK_QueryFlags(mk) ((gex_Flags_t)gasneti_import_mk_nonhost(mk)->_flags) + #define gex_MK_QueryClient(mk) gasneti_export_client(gasneti_import_mk_nonhost(mk)->_client) + #define gex_MK_QueryClass(mk) ((gex_MK_Class_t)gasneti_import_mk_nonhost(mk)->_mk_class) +#endif + +GASNETI_END_NOWARN +GASNETI_END_EXTERNC + +#endif diff --git a/third-party/gasnet/gasnet-src/other/kinds/gasnet_refkinds.c b/third-party/gasnet/gasnet-src/other/kinds/gasnet_refkinds.c new file mode 100644 index 000000000000..276a9e173b2c --- /dev/null +++ b/third-party/gasnet/gasnet-src/other/kinds/gasnet_refkinds.c @@ -0,0 +1,186 @@ +/* $Source: bitbucket.org:berkeleylab/gasnet.git/other/kinds/gasnet_refkinds.c $ + * Description: GASNet Memory Kinds Implementation + * Copyright (c) 2020, The Regents of the University of California + * Terms of use are as specified in license.txt + */ + +#define GASNETI_NEED_GASNET_MK_H 1 +#include +#include + + +#ifndef gasneti_import_mk +gasneti_MK_t gasneti_import_mk(gex_MK_t _mk) { + const gasneti_MK_t _real_mk = GASNETI_IMPORT_POINTER(gasneti_MK_t,_mk); + GASNETI_IMPORT_MAGIC(_real_mk, MK); + return _real_mk; +} +#endif + +#ifndef gasneti_import_mk_nonhost +gasneti_MK_t gasneti_import_mk_nonhost(gex_MK_t _mk) { + if (_mk == GEX_MK_HOST) { + gasneti_fatalerror("Invalid use of GEX_MK_HOST where prohibited"); + } + return gasneti_import_mk(_mk); +} +#endif + +#ifndef gasneti_export_mk +gex_MK_t gasneti_export_mk(gasneti_MK_t _real_mk) { + GASNETI_CHECK_MAGIC(_real_mk, GASNETI_MK_MAGIC); + return GASNETI_EXPORT_POINTER(gex_MK_t, _real_mk); +} +#endif + +// TODO: what to do about conduit-spcific extension? +gasneti_MK_t gasneti_alloc_mk( + gasneti_Client_t i_client, + gasneti_mk_impl_t *mk_impl, + gex_Flags_t flags) +{ + gasneti_assert(mk_impl); + + gasneti_MK_t mk; + size_t alloc_size = mk_impl->mk_sizeof ? mk_impl->mk_sizeof : sizeof(*mk); + gasneti_assert(alloc_size >= sizeof(*mk)); + mk = gasneti_calloc(1, alloc_size); + + GASNETI_INIT_MAGIC(mk, GASNETI_MK_MAGIC); + mk->_cdata = NULL; + mk->_flags = flags; + mk->_client = i_client; + mk->_mk_class = mk_impl->mk_class; + mk->_mk_impl = mk_impl; + gasneti_weakatomic32_set(&mk->_ref_count, 0, 0); + return mk; +} + +void gasneti_free_mk(gasneti_MK_t mk) +{ + GASNETI_INIT_MAGIC(mk, GASNETI_MK_BAD_MAGIC); + gasneti_free(mk); +} + +// Convenience macro +#define MK_IMPL(i_mk,short_field) ((i_mk)->_mk_impl->mk_##short_field) + +int gex_MK_Create( + gex_MK_t *memkind_p, + gex_Client_t e_client, + const gex_MK_Create_args_t *args, + gex_Flags_t flags) +{ + gasneti_Client_t client = gasneti_import_client(e_client); + gasneti_MK_t result = NULL; + int rc = GASNET_ERR_BAD_ARG; + + GASNETI_TRACE_PRINTF(O,("gex_MK_Create: client='%s' flags=%d", + client ? client->_name : "(NULL)", flags)); + + if (! client) { + gasneti_fatalerror("Invalid call to gex_MK_Create with NULL client"); + } + if (!memkind_p) { + gasneti_fatalerror("Invalid call to gex_MK_Create with NULL memkind_p"); + } + if (!args) { + gasneti_fatalerror("Invalid call to gex_MK_Create with NULL args"); + } + if (flags) { + gasneti_fatalerror("Invalid call to gex_MK_Create with non-zero flags"); + } + if (args->gex_flags) { + gasneti_fatalerror("Invalid call to gex_MK_Create with non-zero args->gex_flags"); + } + + switch (args->gex_class) { + case GEX_MK_CLASS_HOST: + gasneti_fatalerror("Invalid call to gex_MK_Create with GEX_MK_CLASS_HOST"); + break; + + case GEX_MK_CLASS_CUDA_UVA: + #if GASNET_HAVE_MK_CLASS_CUDA_UVA + rc = gasneti_MK_Create_cuda_uva(&result, client, args, flags); + #else + GASNETI_RETURN_ERRR(BAD_ARG,"This build lacks support for GEX_MK_CLASS_CUDA_UVA"); + #endif + break; + + default: gasneti_unreachable_error(("Unknown MK class: %i",(int)args->gex_class)); + } + + if (! rc) { + // Sanity checks on per-class initialization + gasneti_assert(result->_mk_class == args->gex_class); + gasneti_assert(result->_mk_impl); + gasneti_assert(MK_IMPL(result,class) == args->gex_class); + gasneti_assert(MK_IMPL(result,name)); + gasneti_assert(strlen(MK_IMPL(result,name))); + + *memkind_p = gasneti_export_mk(result); + } + + return rc; +} + +void gex_MK_Destroy( + gex_MK_t e_mk, + gex_Flags_t flags) +{ + if (e_mk == GEX_MK_INVALID) { + gasneti_fatalerror("Invalid call to gex_MK_Destroy(GEX_MK_INVALID)"); + } + if (e_mk == GEX_MK_HOST) { + gasneti_fatalerror("Invalid call to gex_MK_Destroy(GEX_MK_HOST)"); + } + + gasneti_MK_t i_mk = gasneti_import_mk(e_mk); // "this" + gasneti_assert(i_mk->_mk_impl); + gasneti_assert(MK_IMPL(i_mk,name)); + + GASNETI_TRACE_PRINTF(O,("gex_MK_Destroy: memkind=%p, class='%s' flags=%d", + (void*)e_mk, MK_IMPL(i_mk,name), flags)); + + if (flags) { + gasneti_fatalerror("Invalid call to gex_MK_Destroy with non-zero flags"); + } + + uint32_t ref_count = gasneti_weakatomic32_read(&i_mk->_ref_count, 0); + if (ref_count) { + gasneti_fatalerror("Invalid call to gex_MK_Destroy with ref_count=%u", + (unsigned int)ref_count); + } + + // Class-specific hook or default if none + if (MK_IMPL(i_mk,destroy)) MK_IMPL(i_mk,destroy)(i_mk, flags); + else gasneti_free_mk(i_mk); +} + +int gasneti_MK_Segment_Create( + gasneti_Segment_t *i_segment_p, + gasneti_Client_t i_client, + void *addr, + uintptr_t size, + gex_MK_t e_mk, + gex_Flags_t flags) +{ + gasneti_assert(e_mk != GEX_MK_INVALID); // Caller should have already checked user args + + gasneti_MK_t i_mk = gasneti_import_mk_nonhost(e_mk); + + if (i_mk->_client != i_client) { + gasneti_fatalerror("Invalid call to gex_Segment_Create with a gex_MK_t from a different client"); + } + + // Class-specific hook, if any + if (MK_IMPL(i_mk,segment_create)) { + int rc = MK_IMPL(i_mk,segment_create)(i_segment_p, i_mk, addr, size, flags); + if (rc) return rc; + } else { + GASNETI_RETURN_ERRR(BAD_ARG,"gex_Segment_Create() called on unsupported memory kind"); + } + + gasneti_weakatomic32_increment(&i_mk->_ref_count, 0); + return GASNET_OK; +} diff --git a/third-party/gasnet/gasnet-src/other/ssh-spawner/gasnet_bootstrap_ssh.c b/third-party/gasnet/gasnet-src/other/ssh-spawner/gasnet_bootstrap_ssh.c index 40db79bca505..d01c484f2250 100644 --- a/third-party/gasnet/gasnet-src/other/ssh-spawner/gasnet_bootstrap_ssh.c +++ b/third-party/gasnet/gasnet-src/other/ssh-spawner/gasnet_bootstrap_ssh.c @@ -210,6 +210,7 @@ static struct fds { static int parent = -1; /* socket */ static gex_Rank_t myrank = 0; static int myname = -1; +static char my_host[1024] = "[unknown hostname]"; static int children = 0; static int ctrl_children = 0; static gex_Rank_t tree_ranks = GEX_RANK_INVALID; @@ -229,33 +230,6 @@ static void do_verbose(const char *fmt, ...)) { } #define BOOTSTRAP_VERBOSE(ARGS) if_pf (is_verbose) do_verbose ARGS -GASNETI_FORMAT_PRINTF(sappendf,2,3, -static char *sappendf(char *s, const char *fmt, ...)) { - va_list args; - int old_len, add_len; - - /* compute length of thing to append */ - va_start(args, fmt); - add_len = vsnprintf(NULL, 0, fmt, args); - va_end(args); - - /* grow the string, including space for '\0': */ - if (s) { - old_len = strlen(s); - s = gasneti_realloc(s, old_len + add_len + 1); - } else { - old_len = 0; - s = gasneti_malloc(add_len + 1); - } - - /* append */ - va_start(args, fmt); - vsprintf((s+old_len), fmt, args); - va_end(args); - - return s; -} - /* Add single quotes around a string, taking care of any existing quotes */ static char *quote_arg(const char *arg) { char *p, *q, *tmp; @@ -265,10 +239,10 @@ static char *quote_arg(const char *arg) { p = tmp = gasneti_strdup(arg); while ((q = strchr(p, '\'')) != NULL) { *q = '\0'; - result = sappendf(result, "%s'\\''", p); + result = gasneti_sappendf(result, "%s'\\''", p); p = q + 1; } - result = sappendf(result, "%s'", p); + result = gasneti_sappendf(result, "%s'", p); gasneti_free(tmp); return result; } @@ -522,7 +496,10 @@ static void reap_one(pid_t pid, int status) myname, kind, child[j].rank, tmp, fini)); if (!sock && (j < ctrl_children)) { // Ctrl proc which did not yet connect const char *host = child[j].nodelist ? child[j].nodelist[0] : nodelist[0]; - fprintf(stderr, "*** Failed to start processes on %s\n", host); + fprintf(stderr, "*** Failed to start processes on %s, possibly due to an " + "inability to establish an ssh connection from %s without " + "interactive authentication.\n", + host, my_host); } } else if (WIFSIGNALED(status)) { int tmp = WTERMSIG(status); @@ -936,7 +913,7 @@ static void configure_ssh(void) { /* Check for OpenSSH */ { - char *cmd = sappendf(NULL, "%s -V 2>&1 | grep OpenSSH >/dev/null 2>/dev/null", ssh_argv0); + char *cmd = gasneti_sappendf(NULL, "%s -V 2>&1 | grep OpenSSH >/dev/null 2>/dev/null", ssh_argv0); is_openssh = (0 == system(cmd)); gasneti_free(cmd); BOOTSTRAP_VERBOSE(("Configuring for OpenSSH\n")); @@ -1532,8 +1509,9 @@ static void spawn_one_control(gex_Rank_t child_id, const char *cmdline, const ch if (pid < 0) { gasneti_fatalerror("fork() failed"); } else if (pid == 0) { - char *cmd; - cmd = sappendf(NULL, "cd %s; exec %s %s " ENV_PREFIX "SPAWN_CONTROL=ssh " + char *cmd = + gasneti_sappendf(NULL, + "cd %s; exec %s %s " ENV_PREFIX "SPAWN_CONTROL=ssh " ENV_PREFIX "SPAWN_ARGS='%c%s%c%d%c%d%c%s' " "%s", quote_arg(cwd), @@ -1669,7 +1647,6 @@ static void spawn_rank(int argc, char **argv) { /* Spawn control procs via ssh (or fork() when possible) */ static void spawn_ctrl(int argc, char **argv) { - static char my_host[1024]; char *cmdline = quote_arg(argv[0]); int j; @@ -1690,7 +1667,7 @@ static void spawn_ctrl(int argc, char **argv) { if (null_init) { for (j = 1; j < argc; ++j) { char *tmp = quote_arg(argv[j]); - cmdline = sappendf(cmdline, " %s", tmp); + cmdline = gasneti_sappendf(cmdline, " %s", tmp); gasneti_free(tmp); } } diff --git a/third-party/gasnet/gasnet-src/other/valgrind/Makefile.in b/third-party/gasnet/gasnet-src/other/valgrind/Makefile.in index ea358dd4065c..49790922091f 100644 --- a/third-party/gasnet/gasnet-src/other/valgrind/Makefile.in +++ b/third-party/gasnet/gasnet-src/other/valgrind/Makefile.in @@ -244,6 +244,10 @@ CONFIGURE_ARGS = @CONFIGURE_ARGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CROSS_COMPILING = @CROSS_COMPILING@ +CUDA_UVA_CFLAGS = @CUDA_UVA_CFLAGS@ +CUDA_UVA_LDFLAGS = @CUDA_UVA_LDFLAGS@ +CUDA_UVA_LIBS = @CUDA_UVA_LIBS@ +CUDA_guess_prog = @CUDA_guess_prog@ CXX = @CXX@ CXXCPP = @CXXCPP@ CXXDEPMODE = @CXXDEPMODE@ @@ -269,6 +273,8 @@ ENVCMD = @ENVCMD@ EXEEXT = @EXEEXT@ EXESUFFIX = @EXESUFFIX@ FH_CFLAGS = @FH_CFLAGS@ +GASNETI_HWLOC_BIND_PATH = @GASNETI_HWLOC_BIND_PATH@ +GASNETI_HWLOC_CALC_PATH = @GASNETI_HWLOC_CALC_PATH@ GASNETI_PTR_BITS = @GASNETI_PTR_BITS@ GASNET_EXTRA_DEFINES = @GASNET_EXTRA_DEFINES@ GASNET_IBV_SPAWNER_CONF = @GASNET_IBV_SPAWNER_CONF@ @@ -297,6 +303,10 @@ GSTACK_PATH = @GSTACK_PATH@ HAVE_INTTYPES_H = @HAVE_INTTYPES_H@ HAVE_STDINT_H = @HAVE_STDINT_H@ HAVE_SYS_TYPES_H = @HAVE_SYS_TYPES_H@ +HWLOC_CFLAGS = @HWLOC_CFLAGS@ +HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ +HWLOC_LIBS = @HWLOC_LIBS@ +HWLOC_guess_prog = @HWLOC_guess_prog@ IBV_CFLAGS = @IBV_CFLAGS@ IBV_LDFLAGS = @IBV_LDFLAGS@ IBV_LIBS = @IBV_LIBS@ diff --git a/third-party/gasnet/gasnet-src/smp-conduit/Makefile.am b/third-party/gasnet/gasnet-src/smp-conduit/Makefile.am index f343d4fc1e29..126027e16edb 100644 --- a/third-party/gasnet/gasnet-src/smp-conduit/Makefile.am +++ b/third-party/gasnet/gasnet-src/smp-conduit/Makefile.am @@ -49,6 +49,10 @@ CONDUIT_EXTRADEPS = # forced targets in this file, and should probably use LIBINCLUDES/LIBDEFINES CONDUIT_SPECIAL_OBJS = +# memory kinds supported by this conduit (space separated) +# to be included in libgasnet if support was enabled at configure time +CONDUIT_KINDS = + # the default job spawn command to be used for "make run-tests" # The following substitutions are performed: # %P = program executable name diff --git a/third-party/gasnet/gasnet-src/smp-conduit/Makefile.in b/third-party/gasnet/gasnet-src/smp-conduit/Makefile.in index 66854e261266..28f824d4f381 100644 --- a/third-party/gasnet/gasnet-src/smp-conduit/Makefile.in +++ b/third-party/gasnet/gasnet-src/smp-conduit/Makefile.in @@ -279,6 +279,10 @@ CONFIGURE_ARGS = @CONFIGURE_ARGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CROSS_COMPILING = @CROSS_COMPILING@ +CUDA_UVA_CFLAGS = @CUDA_UVA_CFLAGS@ +CUDA_UVA_LDFLAGS = @CUDA_UVA_LDFLAGS@ +CUDA_UVA_LIBS = @CUDA_UVA_LIBS@ +CUDA_guess_prog = @CUDA_guess_prog@ CXX = @CXX@ CXXCPP = @CXXCPP@ CXXDEPMODE = @CXXDEPMODE@ @@ -304,6 +308,8 @@ ENVCMD = @ENVCMD@ EXEEXT = @EXEEXT@ EXESUFFIX = @EXESUFFIX@ FH_CFLAGS = @FH_CFLAGS@ +GASNETI_HWLOC_BIND_PATH = @GASNETI_HWLOC_BIND_PATH@ +GASNETI_HWLOC_CALC_PATH = @GASNETI_HWLOC_CALC_PATH@ GASNETI_PTR_BITS = @GASNETI_PTR_BITS@ GASNET_EXTRA_DEFINES = @GASNET_EXTRA_DEFINES@ GASNET_IBV_SPAWNER_CONF = @GASNET_IBV_SPAWNER_CONF@ @@ -332,6 +338,10 @@ GSTACK_PATH = @GSTACK_PATH@ HAVE_INTTYPES_H = @HAVE_INTTYPES_H@ HAVE_STDINT_H = @HAVE_STDINT_H@ HAVE_SYS_TYPES_H = @HAVE_SYS_TYPES_H@ +HWLOC_CFLAGS = @HWLOC_CFLAGS@ +HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ +HWLOC_LIBS = @HWLOC_LIBS@ +HWLOC_guess_prog = @HWLOC_guess_prog@ IBV_CFLAGS = @IBV_CFLAGS@ IBV_LDFLAGS = @IBV_LDFLAGS@ IBV_LIBS = @IBV_LIBS@ @@ -529,6 +539,10 @@ CONDUIT_EXTRADEPS = # using a special, conduit-specific command. These should also be included as # forced targets in this file, and should probably use LIBINCLUDES/LIBDEFINES CONDUIT_SPECIAL_OBJS = + +# memory kinds supported by this conduit (space separated) +# to be included in libgasnet if support was enabled at configure time +CONDUIT_KINDS = @USE_PSHM_FALSE@CONDUIT_RUNCMD = %P %A # the default job spawn command to be used for "make run-tests" diff --git a/third-party/gasnet/gasnet-src/smp-conduit/gasnet_core.c b/third-party/gasnet/gasnet-src/smp-conduit/gasnet_core.c index 87707554122e..0c52ce9df869 100644 --- a/third-party/gasnet/gasnet-src/smp-conduit/gasnet_core.c +++ b/third-party/gasnet/gasnet-src/smp-conduit/gasnet_core.c @@ -17,8 +17,6 @@ GASNETI_IDENT(gasnetc_IdentString_Version, "$GASNetCoreLibraryVersion: " GASNET_CORE_VERSION_STR " $"); GASNETI_IDENT(gasnetc_IdentString_Name, "$GASNetCoreLibraryName: " GASNET_CORE_NAME_STR " $"); -gex_AM_Entry_t const *gasnetc_get_handlertable(void); - gex_AM_Entry_t *gasnetc_handler; // TODO-EX: will be replaced with per-EP tables /* ------------------------------------------------------------------------------------ */ @@ -658,12 +656,11 @@ static int gasnetc_attach_primary(void) { static int gasnetc_attach_segment(gex_Segment_t *segment_p, gex_TM_t tm, uintptr_t segsize, - gasneti_bootstrapExchangefn_t exchangefn, gex_Flags_t flags) { /* ------------------------------------------------------------------------------------ */ /* register client segment */ - (void) gasneti_segmentAttach(segment_p, 0, tm, segsize, exchangefn, flags); + (void) gasneti_segmentAttach(segment_p, tm, segsize, flags); return GASNET_OK; } @@ -676,7 +673,7 @@ extern int gasnetc_attach( gex_TM_t _tm, { GASNETI_TRACE_PRINTF(C,("gasnetc_attach(table (%i entries), segsize=%"PRIuPTR")", numentries, segsize)); - gasneti_TM_t tm = gasneti_import_tm(_tm); + gasneti_TM_t tm = gasneti_import_tm_nonpair(_tm); gasneti_EP_t ep = tm->_ep; if (!gasneti_init_done) @@ -701,12 +698,12 @@ extern int gasnetc_attach( gex_TM_t _tm, #if GASNET_SEGMENT_FAST || GASNET_SEGMENT_LARGE /* register client segment */ gex_Segment_t seg; // g2ex segment is automatically saved by a hook - if (GASNET_OK != gasnetc_attach_segment(&seg, _tm, segsize, gasnetc_bootstrapExchange, GASNETI_FLAG_INIT_LEGACY)) + if (GASNET_OK != gasnetc_attach_segment(&seg, _tm, segsize, GASNETI_FLAG_INIT_LEGACY)) GASNETI_RETURN_ERRR(RESOURCE,"Error attaching segment"); #endif /* register client handlers */ - if (table && gasneti_amregister_legacy(ep->_amtbl, table, numentries) != GASNET_OK) + if (table && gasneti_amregister_legacy(ep, table, numentries) != GASNET_OK) GASNETI_RETURN_ERRR(RESOURCE,"Error registering handlers"); /* ensure everything is initialized across all nodes */ @@ -745,18 +742,22 @@ extern int gasnetc_Client_Init( #endif } + // Do NOT move this prior to the gasneti_trace_init() call + GASNETI_TRACE_PRINTF(O,("gex_Client_Init: name='%s' argc_p=%p argv_p=%p flags=%d", + clientName, (void *)argc, (void *)argv, flags)); + // allocate the client object - gasneti_Client_t client = gasneti_alloc_client(clientName, flags, 0); + gasneti_Client_t client = gasneti_alloc_client(clientName, flags); *client_p = gasneti_export_client(client); // create the initial endpoint with internal handlers - if (gasnetc_EP_Create(ep_p, *client_p, flags)) + if (gex_EP_Create(ep_p, *client_p, GEX_EP_CAPABILITY_ALL, flags)) GASNETI_RETURN_ERRR(RESOURCE,"Error creating initial endpoint"); gasneti_EP_t ep = gasneti_import_ep(*ep_p); gasnetc_handler = ep->_amtbl; // TODO-EX: this global variable to be removed // TODO-EX: create team - gasneti_TM_t tm = gasneti_alloc_tm(ep, gasneti_mynode, gasneti_nodes, flags, 0); + gasneti_TM_t tm = gasneti_alloc_tm(ep, gasneti_mynode, gasneti_nodes, flags); *tm_p = gasneti_export_tm(tm); if (0 == (flags & GASNETI_FLAG_INIT_LEGACY)) { @@ -791,60 +792,34 @@ extern int gasnetc_Segment_Attach( /* create a segment collectively */ // TODO-EX: this implementation only works *once* - // TODO-EX: should be using the team's exchange function if possible // TODO-EX: need to pass proper flags (e.g. pshm and bind) instead of 0 - if (GASNET_OK != gasnetc_attach_segment(segment_p, tm, length, gasneti_defaultExchange, 0)) + if (GASNET_OK != gasnetc_attach_segment(segment_p, tm, length, 0)) GASNETI_RETURN_ERRR(RESOURCE,"Error attaching segment"); return GASNET_OK; } -extern int gasnetc_EP_Create(gex_EP_t *ep_p, - gex_Client_t client, - gex_Flags_t flags) { - /* (###) add code here to create an endpoint belonging to the given client */ -#if 1 // TODO-EX: This is a stub, which assumes 1 implicit call from ClientCreate - static gasneti_mutex_t lock = GASNETI_MUTEX_INITIALIZER; - gasneti_mutex_lock(&lock); - static int once = 0; - int prev = once; - once = 1; - gasneti_mutex_unlock(&lock); - if (prev) gasneti_fatalerror("Multiple endpoints are not yet implemented"); -#endif - - gasneti_EP_t ep = gasneti_alloc_ep(gasneti_import_client(client), flags, 0); - *ep_p = gasneti_export_ep(ep); - - { /* core API handlers */ - gex_AM_Entry_t *ctable = (gex_AM_Entry_t *)gasnetc_get_handlertable(); - int len = 0; - int numreg = 0; - gasneti_assert(ctable); - while (ctable[len].gex_fnptr) len++; /* calc len */ - if (gasneti_amregister(ep->_amtbl, ctable, len, GASNETC_HANDLER_BASE, GASNETE_HANDLER_BASE, 0, &numreg) != GASNET_OK) - GASNETI_RETURN_ERRR(RESOURCE,"Error registering core API handlers"); - gasneti_assert(numreg == len); - } +extern int gasnetc_Segment_Create( + gex_Segment_t *segment_p, + gex_Client_t client, + gex_Addr_t address, + uintptr_t length, + gex_MK_t kind, + gex_Flags_t flags) +{ + gasneti_assert(segment_p); - { /* extended API handlers */ - gex_AM_Entry_t *etable = (gex_AM_Entry_t *)gasnete_get_handlertable(); - int len = 0; - int numreg = 0; - gasneti_assert(etable); - while (etable[len].gex_fnptr) len++; /* calc len */ - if (gasneti_amregister(ep->_amtbl, etable, len, GASNETE_HANDLER_BASE, GASNETI_CLIENT_HANDLER_BASE, 0, &numreg) != GASNET_OK) - GASNETI_RETURN_ERRR(RESOURCE,"Error registering extended API handlers"); - gasneti_assert(numreg == len); - } + // Create the Segment object, allocating memory if appropriate + gasneti_Client_t i_client = gasneti_import_client(client); + int rc = gasneti_segmentCreate(segment_p, i_client, address, length, kind, flags); - return GASNET_OK; + return rc; } extern int gasnetc_EP_RegisterHandlers(gex_EP_t ep, gex_AM_Entry_t *table, size_t numentries) { - return gasneti_amregister_client(gasneti_import_ep(ep)->_amtbl, table, numentries); + return gasneti_amregister_client(gasneti_import_ep(ep), table, numentries); } /* ------------------------------------------------------------------------------------ */ extern void gasnetc_exit(int exitcode) { @@ -1173,9 +1148,7 @@ extern int gasnetc_hsl_trylock(gex_HSL_t *hsl) { (for internal conduit use in bootstrapping, job management, etc.) */ static gex_AM_Entry_t const gasnetc_handlers[] = { - #ifdef GASNETC_COMMON_HANDLERS - GASNETC_COMMON_HANDLERS(), - #endif + GASNETC_COMMON_HANDLERS(), /* ptr-width independent handlers */ diff --git a/third-party/gasnet/gasnet-src/smp-conduit/gasnet_core.h b/third-party/gasnet/gasnet-src/smp-conduit/gasnet_core.h index 658573ba15ed..abe7b2c83456 100644 --- a/third-party/gasnet/gasnet-src/smp-conduit/gasnet_core.h +++ b/third-party/gasnet/gasnet-src/smp-conduit/gasnet_core.h @@ -19,7 +19,7 @@ ============== */ -extern void gasnetc_exit(int exitcode) GASNETI_NORETURN; +extern void gasnetc_exit(int _exitcode) GASNETI_NORETURN; GASNETI_NORETURNP(gasnetc_exit) #define gasnet_exit gasnetc_exit @@ -28,31 +28,34 @@ GASNETI_NORETURNP(gasnetc_exit) #define GASNET_NULL_ARGV_OK 1 /* ------------------------------------------------------------------------------------ */ extern int gasnetc_Client_Init( - gex_Client_t *client_p, - gex_EP_t *ep_p, - gex_TM_t *tm_p, - const char *clientName, - int *argc, - char ***argv, - gex_Flags_t flags); + gex_Client_t *_client_p, + gex_EP_t *_ep_p, + gex_TM_t *_tm_p, + const char *_clientName, + int *_argc, + char ***_argv, + gex_Flags_t _flags); // gasnetex.h handles name-shifting of gex_Client_Init() extern int gasnetc_Segment_Attach( - gex_Segment_t *segment_p, - gex_TM_t tm, - uintptr_t length); + gex_Segment_t *_segment_p, + gex_TM_t _tm, + uintptr_t _length); #define gex_Segment_Attach gasnetc_Segment_Attach -extern int gasnetc_EP_Create( - gex_EP_t *ep_p, - gex_Client_t client, - gex_Flags_t flags); -#define gex_EP_Create gasnetc_EP_Create +extern int gasnetc_Segment_Create( + gex_Segment_t *_segment_p, + gex_Client_t _client, + gex_Addr_t _address, + uintptr_t _length, + gex_MK_t _kind, + gex_Flags_t _flags); +#define gex_Segment_Create gasnetc_Segment_Create extern int gasnetc_EP_RegisterHandlers( - gex_EP_t ep, - gex_AM_Entry_t *table, - size_t numentries); + gex_EP_t _ep, + gex_AM_Entry_t *_table, + size_t _numentries); #define gex_EP_RegisterHandlers gasnetc_EP_RegisterHandlers /* ------------------------------------------------------------------------------------ */ /* @@ -96,11 +99,11 @@ typedef struct { #define gex_HSL_Unlock(hsl) #define gex_HSL_Trylock(hsl) GASNET_OK #else - extern void gasnetc_hsl_init (gex_HSL_t *hsl); - extern void gasnetc_hsl_destroy(gex_HSL_t *hsl); - extern void gasnetc_hsl_lock (gex_HSL_t *hsl); - extern void gasnetc_hsl_unlock (gex_HSL_t *hsl); - extern int gasnetc_hsl_trylock(gex_HSL_t *hsl) GASNETI_WARN_UNUSED_RESULT; + extern void gasnetc_hsl_init (gex_HSL_t *_hsl); + extern void gasnetc_hsl_destroy(gex_HSL_t *_hsl); + extern void gasnetc_hsl_lock (gex_HSL_t *_hsl); + extern void gasnetc_hsl_unlock (gex_HSL_t *_hsl); + extern int gasnetc_hsl_trylock(gex_HSL_t *_hsl) GASNETI_WARN_UNUSED_RESULT; #define gex_HSL_Init gasnetc_hsl_init #define gex_HSL_Destroy gasnetc_hsl_destroy @@ -121,12 +124,28 @@ typedef struct { #define gex_AM_LUBReplyLong() ((size_t)GASNETC_MAX_LONG) // TODO-EX: can these be improved upon? -#define gasnetc_AM_MaxRequestMedium(tm,rank,lc_opt,flags,nargs) gex_AM_LUBRequestMedium() -#define gasnetc_AM_MaxReplyMedium(tm,rank,lc_opt,flags,nargs) gex_AM_LUBReplyMedium() -#define gasnetc_AM_MaxRequestLong(tm,rank,lc_opt,flags,nargs) gex_AM_LUBRequestLong() -#define gasnetc_AM_MaxReplyLong(tm,rank,lc_opt,flags,nargs) gex_AM_LUBReplyLong() -#define gasnetc_Token_MaxReplyMedium(token,lc_opt,flags,nargs) gex_AM_LUBReplyMedium() -#define gasnetc_Token_MaxReplyLong(token,lc_opt,flags,nargs) gex_AM_LUBReplyLong() +#define gasnetc_AM_MaxRequestMedium(tm,rank,lc_opt,flags,nargs) \ + (GASNETI_UNUSED_ARGS5(tm,rank,lc_opt,flags,nargs),gex_AM_LUBRequestMedium()) +#define gasnetc_AM_MaxReplyMedium(tm,rank,lc_opt,flags,nargs) \ + (GASNETI_UNUSED_ARGS5(tm,rank,lc_opt,flags,nargs),gex_AM_LUBReplyMedium()) +#define gasnetc_Token_MaxReplyMedium(token,lc_opt,flags,nargs) \ + (GASNETI_UNUSED_ARGS4(token,lc_opt,flags,nargs),gex_AM_LUBReplyMedium()) + +#define gasnetc_AM_MaxRequestLong(tm,rank,lc_opt,flags,nargs) \ + (GASNETI_UNUSED_ARGS4(tm,rank,lc_opt,nargs), \ + ((flags) & GEX_FLAG_AM_PREPARE_LEAST_ALLOC \ + ? GASNETC_REF_NPAM_MAX_ALLOC \ + : gex_AM_LUBRequestLong())) +#define gasnetc_AM_MaxReplyLong(tm,rank,lc_opt,flags,nargs) \ + (GASNETI_UNUSED_ARGS4(tm,rank,lc_opt,nargs), \ + ((flags) & GEX_FLAG_AM_PREPARE_LEAST_ALLOC \ + ? GASNETC_REF_NPAM_MAX_ALLOC \ + : gex_AM_LUBReplyLong())) +#define gasnetc_Token_MaxReplyLong(token,lc_opt,flags,nargs) \ + (GASNETI_UNUSED_ARGS3(token,lc_opt,nargs), \ + ((flags) & GEX_FLAG_AM_PREPARE_LEAST_ALLOC \ + ? GASNETC_REF_NPAM_MAX_ALLOC \ + : gex_AM_LUBReplyLong())) /* ------------------------------------------------------------------------------------ */ /* diff --git a/third-party/gasnet/gasnet-src/smp-conduit/gasnet_core_fwd.h b/third-party/gasnet/gasnet-src/smp-conduit/gasnet_core_fwd.h index 7363abe7fee4..f932b34fd887 100644 --- a/third-party/gasnet/gasnet-src/smp-conduit/gasnet_core_fwd.h +++ b/third-party/gasnet/gasnet-src/smp-conduit/gasnet_core_fwd.h @@ -45,6 +45,9 @@ #define GASNETI_SUPPORTS_OUTOFSEGMENT_PUTGET 1 #endif + // uncomment for each MK_CLASS which the conduit supports. leave commented otherwise +//#define GASNET_HAVE_MK_CLASS_CUDA_UVA GASNETI_MK_CLASS_CUDA_UVA_ENABLED + /* conduits should define GASNETI_CONDUIT_THREADS to 1 if they have one or more "private" threads which may be used to run AM handlers, even under GASNET_SEQ this ensures locking is still done correctly, etc @@ -68,12 +71,21 @@ your conduit must provide the V-suffixed functions for any of these that are not defined. */ -/* #define GASNETC_HAVE_NP_REQ_MEDIUM 1 */ -/* #define GASNETC_HAVE_NP_REP_MEDIUM 1 */ -/* #define GASNETC_HAVE_NP_REQ_LONG 1 */ -/* #define GASNETC_HAVE_NP_REP_LONG 1 */ - - /* uncomment for each GASNETC_HAVE_NP_* enabled above if the Commit function +// smp-conduit falls into the degenerate case of these symbols, which +// are specified to only denote behavior of the "network transport" which +// does not exist in smp-conduit. In smp-conduit the *only* transport +// is the shared-memory one (which is guaranteed to provide native behavior +// for NPAM Medium). So setting these symbols for smp-conduit is +// technically redundant (and deliberately underspecified), but it was +// deemed more helpful than harmful to client authors to have them set, +// so that clients don't need additional work to recognize/optimize for +// this special case. +#define GASNET_NATIVE_NP_ALLOC_REQ_MEDIUM 1 +#define GASNET_NATIVE_NP_ALLOC_REP_MEDIUM 1 +/* #define GASNET_NATIVE_NP_ALLOC_REQ_LONG 1 */ +/* #define GASNET_NATIVE_NP_ALLOC_REP_LONG 1 */ + + /* uncomment for each GASNET_NATIVE_NP_ALLOC_* enabled above if the Commit function has the numargs argument even in an NDEBUG build (it is always passed in DEBUG builds). */ @@ -86,10 +98,54 @@ include a call to gasneti_AMPoll (or equivalent) for progress. The preferred implementation is to Poll only in the M-suffixed calls and not the V-suffixed calls (and GASNETC_REQUESTV_POLLS undefined). - Used if (and only if) any of the GASNETC_HAVE_NP_* values above are unset. + Used if (and only if) any of the GASNET_NATIVE_NP_ALLOC_* values above are unset. */ /* #define GASNETC_REQUESTV_POLLS 1 */ + // uncomment if conduit provides a gasnetc-prefixed override + // TODO: this should be a hook rather than an override +//#define GASNETC_HAVE_EP_PUBLISHBOUNDSEGMENT 1 + + /* If your conduit uses conduit-specific extensions to the basic object + types, then define the corresponding SIZEOF macros below to return + the total length of the conduit-specific object, including the prefix + portion which must be the matching GASNETI_[OBJECT]_COMMON fields. + Similarly, *_HOOK macros should be defined as callbacks to perform + conduit-specific initialization and finalization tasks, if any. + If a given SIZEOF macro is defined, but the corresponding INIT_HOOK is + not, then space beyond the COMMON fields will be zero-initialized. + In all cases, GASNETC_[OBJECT]_EXTRA_DECLS provides the place to + provide necessary declarations (since this file is included very early). + */ + +//#define GASNETC_CLIENT_EXTRA_DECLS (###) +//#define GASNETC_CLIENT_INIT_HOOK(i_client) (###) +//#define GASNETC_CLIENT_FINI_HOOK(i_client) (###) +//#define GASNETC_SIZEOF_CLIENT_T() (###) + +//#define GASNETC_SEGMENT_EXTRA_DECLS (###) +//#define GASNETC_SEGMENT_INIT_HOOK(i_segment) (###) +//#define GASNETC_SEGMENT_FINI_HOOK(i_segment) (###) +//#define GASNETC_SIZEOF_SEGMENT_T() (###) + +//#define GASNETC_TM_EXTRA_DECLS (###) +//#define GASNETC_TM_INIT_HOOK(i_tm) (###) +//#define GASNETC_TM_FINI_HOOK(i_tm) (###) +//#define GASNETC_SIZEOF_TM_T() (###) + +//#define GASNETC_EP_EXTRA_DECLS (###) +//#define GASNETC_EP_INIT_HOOK(i_ep) (###) +//#define GASNETC_EP_FINI_HOOK(i_ep) (###) +//#define GASNETC_SIZEOF_EP_T() (###) + +// If conduit supports GASNET_MAXEPS!=1, set default and (optional) max values here. +// Leaving GASNETC_MAXEPS_DFLT unset will result in GASNET_MAXEPS=1, independent +// of all other settings (appropriate for conduits without multi-ep support). +// If set, GASNETC_MAXEPS_MAX it is used to limit a user's --with-maxeps (and a +// global default limit is used otherwise). +//#define GASNETC_MAXEPS_DFLT ### // default num endpoints this conduit supports, undef means no multi-ep support +//#define GASNETC_MAXEPS_MAX ### // leave unset for default + /* this can be used to add conduit-specific statistical collection values (see gasnet_trace.h) */ #define GASNETC_CONDUIT_STATS(CNT,VAL,TIME) diff --git a/third-party/gasnet/gasnet-src/smp-conduit/gasnet_core_internal.h b/third-party/gasnet/gasnet-src/smp-conduit/gasnet_core_internal.h index 824b5ad514fe..97d1e5cea44a 100644 --- a/third-party/gasnet/gasnet-src/smp-conduit/gasnet_core_internal.h +++ b/third-party/gasnet/gasnet-src/smp-conduit/gasnet_core_internal.h @@ -13,7 +13,7 @@ #define GASNETC_HSL_SPINLOCK 1 /* ------------------------------------------------------------------------------------ */ -#define _hidx_gasnetc_exchg_reqh (GASNETC_HANDLER_BASE+0) +#define _hidx_gasnetc_hbarr_reqh (GASNETC_HANDLER_BASE+0) /* add new core API handlers here and to the bottom of gasnet_core.c */ /* ------------------------------------------------------------------------------------ */ diff --git a/third-party/gasnet/gasnet-src/tests/Makefile.in b/third-party/gasnet/gasnet-src/tests/Makefile.in index cd6dc1339e79..0f6804c0eab6 100644 --- a/third-party/gasnet/gasnet-src/tests/Makefile.in +++ b/third-party/gasnet/gasnet-src/tests/Makefile.in @@ -101,7 +101,10 @@ testprograms_seq = \ testimm \ testsplit \ testacc \ + testsegment \ + testtmpair \ testreadonly \ + testcudauva \ $(CONDUIT_TESTS) \ $(MPI_TESTS_SEQ) @@ -152,6 +155,10 @@ TEST_ENV=$(DEBUG_MALLOC) TEST_INOUT= +# max lines (per test) of "ERROR" output to present in the final summary from run-tests +# setting to 0 means unbounded, but that (or very large) may crash the shell +TEST_MAX_FAILURE_LINES=16 + # default node count TEST_NODES=2 # provide args, env and node counts for specific tests, user can override with MANUAL_TESTPARAMS @@ -278,7 +285,7 @@ run-tests: force ( eval env $$env $(TEST_ENV) \ $$TIMECMD $$cmd || echo "ERROR: Test exited with failure code=$$?" ) \ 2>&1 | tee .test-output ; \ - failure=`@PERL@ -ne 'print if ((/ERROR/ || /fatal signal/) && !(/Retrying allocation/ || /GASNET_FREEZE_ON_ERROR/));' .test-output` ; \ + failure=`@PERL@ -ne 'if ((/ERROR/ || /fatal signal/) && !(/Retrying allocation/ || /GASNET_FREEZE_ON_ERROR/)) { print; if (++$$lines == $(TEST_MAX_FAILURE_LINES)) { print "ERROR OUTPUT TRUNCATED AT $(TEST_MAX_FAILURE_LINES) LINES\n"; exit 0; }}' .test-output` ; \ if test "$$failure" != "" ; then \ echo " " >> $(TESTLOG) ; \ echo " *-* $$faildesc *-*" >> $(TESTLOG) ; \ @@ -394,6 +401,9 @@ testlegacy_extraincs = $(AMX_INC) testcxx_extraincs = $(AMX_INC) testlegacycxx_extraincs = $(AMX_INC) +# some tests need cuda.h +testcudauva_extraincs = @CUDA_UVA_CFLAGS@ + # delay function, used by multiple tests # need to deliberately exclude and disable optimization for this object file to prevent breaking delay NOOPT_FLAG=-O0 diff --git a/third-party/gasnet/gasnet-src/tests/test.h b/third-party/gasnet/gasnet-src/tests/test.h index d98f98d41f57..6d3bc3da5f90 100644 --- a/third-party/gasnet/gasnet-src/tests/test.h +++ b/third-party/gasnet/gasnet-src/tests/test.h @@ -1040,8 +1040,8 @@ static size_t test_num_am_handlers = 0; GASNET_Safe(gex_Segment_Attach(segment_p, tm, length)); BARRIER(); for (gex_Rank_t i=0; i < TEST_PROCS; i++) { - void *_addr; uintptr_t _size; - GASNET_Safe(gex_Segment_QueryBound(tm, i, &_addr, NULL, &_size)); + uintptr_t _size; + gex_Event_Wait( gex_EP_QueryBoundSegmentNB(tm, i, NULL, NULL, &_size, 0) ); assert_always(_size >= TEST_SEGSZ); assert_always(((uintptr_t)_size) % PAGESZ == 0); } @@ -1052,8 +1052,11 @@ static size_t test_num_am_handlers = 0; #define gex_Segment_Attach _test_Segment_Attach static void* _test_seg(gex_Rank_t rank) { - void *addr; - GASNET_Safe(gex_Segment_QueryBound(_test_tm0, rank, &addr, NULL, NULL)); + void *addr = NULL; + gex_Flags_t imm = (rank == TEST_MYPROC) ? GEX_FLAG_IMMEDIATE : 0; + gex_Event_t ev = gex_EP_QueryBoundSegmentNB(_test_tm0, rank, &addr, NULL, NULL, imm); + if (!imm) gex_Event_Wait(ev); + else assert (ev == GEX_EVENT_INVALID); return addr; } #define TEST_SEG(rank) _test_seg(rank) @@ -1067,7 +1070,10 @@ static void *TEST_SEG_TM(gex_TM_t tm, gex_Rank_t rank) { return TEST_SEG(gex_TM_TranslateRankToJobrank(tm, rank)); #else void *result; - GASNET_Safe(gex_Segment_QueryBound(tm, rank, &result, NULL, NULL)); + gex_Flags_t imm = (rank == gex_TM_QueryRank(tm)) ? GEX_FLAG_IMMEDIATE : 0; + gex_Event_t ev = gex_EP_QueryBoundSegmentNB(tm, rank, &result, NULL, NULL, imm); + if (!imm) gex_Event_Wait(ev); + else assert (ev == GEX_EVENT_INVALID); return result; #endif } @@ -1321,11 +1327,14 @@ static void _test_init(const char *testname, int reports_performance, int early, GASNETT_TRACE_FREEZESOURCELINE(); \ GASNETT_TRACE_UNFREEZESOURCELINE(); \ if (GASNETT_TRACE_ENABLED) \ - GASNETT_TRACE_PRINTF("TEST_TRACING_MACROS: GASNETT_TRACE_PRINTF()"); \ - GASNETT_TRACE_PRINTF_FORCE("TEST_TRACING_MACROS: GASNETT_TRACE_PRINTF_FORCE()"); \ + GASNETT_TRACE_PRINTF("TEST_TRACING_MACROS: GASNETT_TRACE_PRINTF(%i)",42); \ + GASNETT_TRACE_PRINTF_FORCE("TEST_TRACING_MACROS: GASNETT_TRACE_PRINTF_FORCE(%i)",42); \ GASNETT_TRACE_SETMASK(GASNETT_TRACE_GETMASK()); \ GASNETT_STATS_SETMASK(GASNETT_STATS_GETMASK()); \ GASNETT_TRACE_SET_TRACELOCAL(GASNETT_TRACE_GET_TRACELOCAL()); \ + GASNETT_STATS_PRINTF("TEST_TRACING_MACROS: GASNETT_STATS_PRINTF(%i)",42); \ + GASNETT_STATS_PRINTF_FORCE("TEST_TRACING_MACROS: GASNETT_STATS_PRINTF_FORCE(%i)",42); \ + GASNETT_STATS_DUMP(/*reset=*/1); \ } while (0) GASNETT_END_EXTERNC diff --git a/third-party/gasnet/gasnet-src/tests/testcore2.c b/third-party/gasnet/gasnet-src/tests/testcore2.c index 300aacad62bb..d10d181b5da2 100644 --- a/third-party/gasnet/gasnet-src/tests/testcore2.c +++ b/third-party/gasnet/gasnet-src/tests/testcore2.c @@ -32,10 +32,14 @@ int numprocs; int iters = 0; size_t maxmed; size_t maxlong; -size_t least_payload_req_med; -size_t least_payload_rep_med; -size_t least_payload_req_long; -size_t least_payload_rep_long; +size_t least_payload_req_med_client; +size_t least_payload_req_med_alloc; +size_t least_payload_rep_med_client; +size_t least_payload_rep_med_alloc; +size_t least_payload_req_long_client; +size_t least_payload_req_long_alloc; +size_t least_payload_rep_long_client; +size_t least_payload_rep_long_alloc; size_t *all_sizes; volatile int done = 0; int allowretry = 1; @@ -58,6 +62,9 @@ test_static_assert_file((CHUNK_BITS + SZ_BITS) <= 32); // Test three injection modes #define INJMODE(iter) ((iter)%3) +#define INJ_FP 0 +#define INJ_NP_CB 1 +#define INJ_NP_GB 2 GASNETT_THREADKEY_DECLARE(mythread); GASNETT_THREADKEY_DEFINE(mythread); @@ -131,15 +138,19 @@ void ping_medhandler(gex_Token_t token, void *buf, size_t nbytes, int imm = 0; gex_Flags_t flags = TEST_RAND_ONEIN(5) ? GEX_FLAG_IMMEDIATE : 0; size_t most_payload = TEST_RAND(nbytes, 2*nbytes); - size_t least_payload = TEST_RAND(nbytes - nbytes/2, MIN(most_payload, least_payload_rep_med)); + int injmode = INJMODE(iter); // [0..2] + size_t max_least_payload = (injmode == 1) ? least_payload_rep_med_client + : least_payload_rep_med_alloc; + size_t least_payload = TEST_RAND(MIN(nbytes - nbytes/2, max_least_payload), + MIN(most_payload, max_least_payload)); size_t len = TEST_RAND(nbytes - nbytes/2, nbytes); retry: - switch (INJMODE(iter)) { // [0..2] - case 0: // Fixed-payload + switch (injmode) { // [0..2] + case INJ_FP: // Fixed-payload imm = gex_AM_ReplyMedium2(token, hidx_pong_medhandler, buf, len, GEX_EVENT_NOW, flags, iter, arg1); break; - case 1: // Negotiated-payload with client-provided buffer + case INJ_NP_CB: // Negotiated-payload with client-provided buffer // TODO: (lc_opt = &event) is legal, but we lack logic to test/wait outside handler context // additionally, we could not safely send buf with async LC sd = gex_AM_PrepareReplyMedium(token, buf, least_payload, most_payload, GEX_EVENT_NOW, flags, 2); @@ -152,7 +163,7 @@ void ping_medhandler(gex_Token_t token, void *buf, size_t nbytes, gex_AM_CommitReplyMedium2(sd, hidx_pong_medhandler, len, iter, arg1); break; - case 2: // Negotiated-payload without client-provided buffer + case INJ_NP_GB: // Negotiated-payload without client-provided buffer sd = gex_AM_PrepareReplyMedium(token, NULL, least_payload, most_payload, NULL, flags, 2); imm = (sd == GEX_AM_SRCDESC_NO_OP); // IMMEDIATE was NO OP if (imm) break; @@ -168,7 +179,11 @@ void ping_medhandler(gex_Token_t token, void *buf, size_t nbytes, flags &= ~GEX_FLAG_IMMEDIATE; goto retry; } - validate_chunk("Medium Request (post-reply)", buf, nbytes, iter, arg1); + if ((injmode != INJ_NP_GB) && TEST_RAND_ONEIN(5)) { + memset(buf, 0xaa, len); + } else { + validate_chunk("Medium Request (post-reply)", buf, nbytes, iter, arg1); + } } void pong_medhandler(gex_Token_t token, void *buf, size_t nbytes, @@ -192,16 +207,20 @@ void ping_longhandler(gex_Token_t token, void *buf, size_t nbytes, gex_Flags_t flags = TEST_RAND_ONEIN(5) ? GEX_FLAG_IMMEDIATE : 0; void * maybe_dest = TEST_RAND_ONEIN(2) ? dstbuf : NULL; // Passing dest_addr to Prepare is optional size_t most_payload = TEST_RAND(nbytes, 2*nbytes); - size_t least_payload = TEST_RAND(nbytes, MIN(most_payload, least_payload_rep_long)); + int injmode = INJMODE(iter); // [0..2] + size_t max_least_payload = (injmode == 1) ? least_payload_rep_long_client + : least_payload_rep_long_alloc; + size_t least_payload = TEST_RAND(MIN(nbytes, max_least_payload), + MIN(most_payload, max_least_payload)); size_t len = TEST_RAND(nbytes - nbytes/2, nbytes); retry: - switch (INJMODE(iter)) { // [0..2] - case 0: // Fixed-payload + switch (injmode) { // [0..2] + case INJ_FP: // Fixed-payload if (srcbuf != buf) memcpy(srcbuf, buf, len); imm = gex_AM_ReplyLong2(token, hidx_pong_longhandler, srcbuf, len, dstbuf, GEX_EVENT_NOW, flags, iter, arg1); break; - case 1: // Negotiated-payload with client-provided buffer + case INJ_NP_CB: // Negotiated-payload with client-provided buffer // TODO: (lc_opt = &event) is legal, but we lack logic to test/wait outside handler context // additionally, we could not safely send buf with async LC sd = gex_AM_PrepareReplyLong(token, srcbuf, least_payload, most_payload, maybe_dest, GEX_EVENT_NOW, flags, 2); @@ -215,7 +234,7 @@ void ping_longhandler(gex_Token_t token, void *buf, size_t nbytes, gex_AM_CommitReplyLong2(sd, hidx_pong_longhandler, len, dstbuf, iter, arg1); break; - case 2: // Negotiated-payload without client-provided buffer + case INJ_NP_GB: // Negotiated-payload without client-provided buffer sd = gex_AM_PrepareReplyLong(token, NULL, least_payload, most_payload, maybe_dest, NULL, flags, 2); imm = (sd == GEX_AM_SRCDESC_NO_OP); // IMMEDIATE was NO OP if (imm) break; @@ -231,6 +250,9 @@ void ping_longhandler(gex_Token_t token, void *buf, size_t nbytes, flags &= ~GEX_FLAG_IMMEDIATE; goto retry; } + if ((injmode != INJ_NP_GB) && !INSEG(iter) && TEST_RAND_ONEIN(5)) { + memset(srcbuf, 0x55, len); + } } void pong_longhandler(gex_Token_t token, void *buf, size_t nbytes, @@ -305,7 +327,7 @@ int main(int argc, char **argv) { if (!depth) depth = 16; depth = MIN(depth, (1< hi) ranges. For instance: - // least_payload = TEST_RAND(sz - sz/2, MIN(most_payload, least_payload_req_med)) - // could have lo=maxmed/2, which could be larger than hi=least_payload_req_med. - GASNET_Safe(gex_Segment_Attach(&mysegment, myteam, TEST_SEGSZ_REQUEST)); GASNET_Safe(gex_EP_RegisterHandlers(myep, htable, sizeof(htable)/sizeof(gex_AM_Entry_t))); test_init("testcore2",0,"[options] (iters) (max_payload) (depth)\n" @@ -463,6 +482,7 @@ void *doit(void *id) { } assert_always(num_sz < (1<= least_payload); assert(gex_AM_SrcDescSize(sd) <= most_payload); - assert(gex_AM_SrcDescAddr(sd) == srcbuf); + assert(gex_AM_SrcDescAddr(sd) == src); len = MIN(len, gex_AM_SrcDescSize(sd)); gex_AM_CommitRequestMedium2(sd, hidx_ping_medhandler, len, iter, arg1); if (lc_opt == GEX_EVENT_GROUP) { @@ -520,7 +550,7 @@ void *doit(void *id) { break; } - case 2: // Negotiated-payload without client-provided buffer + case INJ_NP_GB: // Negotiated-payload without client-provided buffer sd = gex_AM_PrepareRequestMedium(myteam, peerproc, NULL, least_payload, most_payload, NULL, flags, 2); imm = (sd == GEX_AM_SRCDESC_NO_OP); // IMMEDIATE was NO OP if (imm) break; @@ -536,6 +566,7 @@ void *doit(void *id) { flags &= ~GEX_FLAG_IMMEDIATE; goto retry_med; } + if (src == tmpbuf) memset(tmpbuf, 0xa5, len); // overwrite source } /* wait for completion */ GASNET_BLOCKUNTIL(gasnett_atomic_read(&pong_recvd,0) == depth); @@ -553,16 +584,26 @@ void *doit(void *id) { gex_Flags_t flags = TEST_RAND_ONEIN(5) ? GEX_FLAG_IMMEDIATE : 0; void * maybe_dest = TEST_RAND_ONEIN(2) ? dstbuf : NULL; // Passing dest_addr to Prepare is optional size_t most_payload = TEST_RAND(sz, 2*sz); - size_t least_payload = TEST_RAND(sz - sz/2, MIN(most_payload, least_payload_req_long)); + int injmode = INJMODE(iter); // [0..2] + size_t max_least_payload = (injmode == INJ_NP_CB) + ? least_payload_req_long_client + : least_payload_req_long_alloc; + size_t least_payload = TEST_RAND(MIN(sz - sz/2, max_least_payload), + MIN(most_payload, max_least_payload)); size_t len = TEST_RAND(sz - sz/2, sz); + uint8_t *src = srcbuf; + if ((injmode != INJ_NP_GB) && TEST_RAND_ONEIN(5)) { + memcpy(tmpbuf, srcbuf, len); + src = tmpbuf; + } retry_long: - switch (INJMODE(iter)) { // [0..2] - case 0: // Fixed-payload - imm = gex_AM_RequestLong2(myteam, peerproc, hidx_ping_longhandler, srcbuf, len, + switch (injmode) { // [0..2] + case INJ_FP: // Fixed-payload + imm = gex_AM_RequestLong2(myteam, peerproc, hidx_ping_longhandler, src, len, dstbuf, GEX_EVENT_NOW, flags, iter, arg1); break; - case 1: // Negotiated-payload with client-provided buffer + case INJ_NP_CB: // Negotiated-payload with client-provided buffer { gex_Event_t lc = GEX_EVENT_NO_OP; gex_Event_t *lc_opt = NULL; @@ -571,13 +612,13 @@ void *doit(void *id) { case 1: lc_opt = GEX_EVENT_NOW; break; case 2: lc_opt = GEX_EVENT_GROUP; break; } - sd = gex_AM_PrepareRequestLong(myteam, peerproc, srcbuf, least_payload, most_payload, maybe_dest, lc_opt, flags, 2); + sd = gex_AM_PrepareRequestLong(myteam, peerproc, src, least_payload, most_payload, maybe_dest, lc_opt, flags, 2); imm = (sd == GEX_AM_SRCDESC_NO_OP); // IMMEDIATE was NO OP if (imm) break; assert(gex_AM_SrcDescSize(sd) >= least_payload); assert(gex_AM_SrcDescSize(sd) <= most_payload); - assert(gex_AM_SrcDescAddr(sd) == srcbuf); - len = MIN(len, sz); + assert(gex_AM_SrcDescAddr(sd) == src); + len = MIN(len, gex_AM_SrcDescSize(sd)); gex_AM_CommitRequestLong2(sd, hidx_ping_longhandler, len, dstbuf, iter, arg1); if (lc_opt == GEX_EVENT_GROUP) { gex_NBI_Wait(GEX_EC_AM,0); @@ -587,13 +628,13 @@ void *doit(void *id) { break; } - case 2: // Negotiated-payload without client-provided buffer + case INJ_NP_GB: // Negotiated-payload without client-provided buffer sd = gex_AM_PrepareRequestLong(myteam, peerproc, NULL, least_payload, most_payload, maybe_dest, NULL, flags, 2); imm = (sd == GEX_AM_SRCDESC_NO_OP); // IMMEDIATE was NO OP if (imm) break; assert(gex_AM_SrcDescSize(sd) >= least_payload); assert(gex_AM_SrcDescSize(sd) <= most_payload); - len = MIN(len, sz); + len = MIN(len, gex_AM_SrcDescSize(sd)); memcpy(gex_AM_SrcDescAddr(sd), srcbuf, len); gex_AM_CommitRequestLong2(sd, hidx_ping_longhandler, len, dstbuf, iter, arg1); break; @@ -603,6 +644,7 @@ void *doit(void *id) { flags &= ~GEX_FLAG_IMMEDIATE; goto retry_long; } + if (src == tmpbuf) memset(tmpbuf, 0x5a, len); // overwrite source } /* wait for completion */ GASNET_BLOCKUNTIL(gasnett_atomic_read(&pong_recvd,0) == depth); @@ -610,6 +652,7 @@ void *doit(void *id) { } } } + test_free(tmpbuf); BARRIER(); done = 1; diff --git a/third-party/gasnet/gasnet-src/tests/testcudauva.c b/third-party/gasnet/gasnet-src/tests/testcudauva.c new file mode 100644 index 000000000000..18615ca26639 --- /dev/null +++ b/third-party/gasnet/gasnet-src/tests/testcudauva.c @@ -0,0 +1,256 @@ +/* $Source: bitbucket.org:berkeleylab/gasnet.git/tests/testcudauva.c $ + * Copyright (c) 2021, The Regents of the University of California + * + * Description: test of GEX_MK_CLASS_CUDA_UVA + * + * This test verifies correctness of gex_MK_Create() for the device class + * GEX_MK_CLASS_CUDA_UVA. This includes checking the expected behavior of + * builds both with and without configure-time enable of support for this + * device class. + */ + +// WARNING: This test exercises one or more EXPERIMENTAL features. +// One should not clone the logic in this test, since details of such features +// may change without notice. + +#include +#include +#include + +#if GASNET_HAVE_MK_CLASS_CUDA_UVA +#include +#endif + +static size_t len = 0; +#ifndef TEST_SEGSZ +#define TEST_SEGSZ_EXPR (2*len) +#endif + +#include + +#define check_cudacall(op) do { \ + int _retval = (op); \ + if_pf(_retval) { \ + const char *_errorname; \ + cuGetErrorName(_retval, &_errorname); \ + FATALERR(#op": %s(%i)",_errorname,_retval); \ + } \ +} while (0) + +static gex_Client_t myclient; +static gex_EP_t myep; +static gex_TM_t myteam; +static gex_Segment_t mysegment; +static gex_Rank_t myrank; +static gex_Rank_t nranks; + + +int main(int argc, char **argv) +{ + int client_segment = 1; + int seed = 0; + int rc; + +#if GASNET_CONDUIT_IBV + { // These settings are a hack to avoid triggering known bugs/limitations, + // by disabling multiple paths. See bug 4148. + // Both of these variables can have potentially serious negative impacts + // on performance, and should not be used in general. + // As noted above "One should not clone the logic in this test". + setenv("GASNET_SUPERNODE_MAXSIZE", "1", 0 /* NO overwrite if already set */); + setenv("GASNET_NUM_QPS", "1", 0 /* NO overwrite if already set */); + } +#endif + + GASNET_Safe(gex_Client_Init(&myclient, &myep, &myteam, "testcudauva", &argc, &argv, 0)); + + test_init("testcudauva", 0, "[options] (size) (seed)\n" + " Segment allocation options:\n" + " -client-seg: Test client-allocated GPU segment (default)\n" + " -gasnet-seg: Test GASNet-allocated GPU segment\n" + " size length of segment\n" + " seed seed for PRNG\n"); + + int help = 0; + int argi = 1; + while (argc > argi) { + if (!strcmp(argv[argi], "-client-seg")) { + client_segment = 1; + ++argi; + } else if (!strcmp(argv[argi], "-gasnet-seg")) { + client_segment = 0; + ++argi; + } else if (argv[argi][0] == '-') { + help = 1; + ++argi; + } else break; + } + + if (argi < argc) { len = atol(argv[argi]); ++argi; } + if (len == 0) { len = 16*1024*1024; } + + if (argi < argc) { seed = atoi(argv[argi]); ++argi; } + if (seed == 0) { seed = (((unsigned int)TIME()) & 0xFFFF); } + + if (argi < argc || help) test_usage(); + + uint8_t *tmp = test_malloc(len); + uint8_t *array1 = test_malloc(len); + uint8_t *array2 = test_malloc(len); + + myrank = gex_TM_QueryRank(myteam); + nranks = gex_TM_QuerySize(myteam); + gex_Rank_t peer = (myrank + 1) % nranks; + + if (nranks == 1) { + // TODO: remove once loopback kinds works correctly + MSG0("WARNING: This test requires a minimum of two nodes. Test skipped.\n"); + gasnet_exit(0); // prevents false negatives, such as from test harnesses for smp-conduit + } + + GASNET_Safe(gex_Segment_Attach(&mysegment, myteam, TEST_SEGSZ_REQUEST)); + + MSG0("Running CUDA UVA non-local xfer tests with size %lu, PRNG seed %d, and %s-allocated GPU segment", + (unsigned long)len, seed, client_segment ? "client" : "GASNet"); + + TEST_BCAST(&seed, 0, &seed, sizeof(seed)); + TEST_SRAND(seed); + for (size_t i = 0; i < len; ++i) { + unsigned int r = TEST_RAND(0,65535); + array1[i] = r & 0xff; + array2[i] = (r >> 8) & 0xff; + } + + BARRIER(); + + gex_EP_t gpu_ep; + gex_MK_t kind; + gex_MK_Create_args_t args; + + args.gex_flags = 0; + args.gex_class = GEX_MK_CLASS_CUDA_UVA; + args.gex_args.gex_class_cuda_uva.gex_CUdevice = 0; + +#if GASNET_HAVE_MK_CLASS_CUDA_UVA + { + if (GASNET_HAVE_MK_CLASS_MULTIPLE != 1) { + ERR("Invalid GASNET_HAVE_MK_CLASS_MULTIPLE"); + } + + int count; + cuInit(0); + if (cuDeviceGetCount(&count) || !count) { + MSG("GEX_MK_CLASS_CUDA_UVA: skipped - could not find a CUDA device"); + // If this lack of a device is NOT a collective property, then we want + // to at least balance the collective operations (to avoid hanging). + // However, at least one peer will fail a gex_EP_QueryBoundSegmentNB(). + // For the case all ranks lack a GPU, this test *will* exit gracefully. + GASNET_Safe( gex_EP_PublishBoundSegment(myteam, NULL, 0, 0) ); + for (int i = 0; i < 4; ++i) BARRIER(); // currently exactly one per case + } else { + CUcontext ctx; + check_cudacall( cuDevicePrimaryCtxRetain(&ctx, 0) ); + check_cudacall( cuCtxPushCurrent(ctx) ); + + CUdeviceptr dptr; + uint8_t *client_gpu = NULL; + if (client_segment) { + check_cudacall( cuMemAlloc(&dptr, TEST_SEGSZ_REQUEST) ); + client_gpu = (uint8_t *) dptr; + } + + GASNET_Safe( gex_MK_Create(&kind, myclient, &args, 0) ); + gex_Segment_t d_segment = GEX_SEGMENT_INVALID; + GASNET_Safe( gex_Segment_Create(&d_segment, myclient, client_gpu, TEST_SEGSZ_REQUEST, kind, 0)); + uint8_t *loc_gpu = gex_Segment_QueryAddr(d_segment); + if (client_segment) assert_always(loc_gpu == client_gpu); + + GASNET_Safe( gex_EP_Create(&gpu_ep, myclient, GEX_EP_CAPABILITY_RMA, 0)); + gex_EP_BindSegment(gpu_ep, d_segment, 0); + GASNET_Safe( gex_EP_PublishBoundSegment(myteam, &gpu_ep, 1, 0) ); + + // TM (3 of 4 being pairs) for the four possible pairings + gex_EP_Index_t host_epidx = gex_EP_QueryIndex(myep); + gex_EP_Index_t gpu_epidx = gex_EP_QueryIndex(gpu_ep); + assert_always(host_epidx == 0); + assert_always(gpu_epidx == 1); + gex_TM_t LH_RH = myteam; + gex_TM_t LH_RG = gex_TM_Pair(myep, gpu_epidx); + gex_TM_t LG_RH = gex_TM_Pair(gpu_ep, host_epidx); + gex_TM_t LG_RG = gex_TM_Pair(gpu_ep, gpu_epidx); + + uint8_t *rem_gpu; + size_t queried_len; + gex_Event_Wait( gex_EP_QueryBoundSegmentNB(LH_RG, peer, (void**)&rem_gpu, NULL, &queried_len, 0) ); + assert_always(queried_len == TEST_SEGSZ_REQUEST); + +// Case 1. Put - local host to remote gpu + gex_RMA_PutBlocking(LH_RG, peer, rem_gpu, array1, len, 0); + BARRIER(); + cuMemcpyDtoH(tmp, (CUdeviceptr)loc_gpu, len); + if (memcmp(tmp, array1, len)) { + ERR("Case 1 verification failed"); + cuMemcpyHtoD((CUdeviceptr)loc_gpu, array1, len); + } else { + MSG("Case 1 verification passed"); + } + +// Case 2. Get - remote gpu to local host + memset(tmp, 0, len); + gex_RMA_GetBlocking(LH_RG, tmp, peer, rem_gpu, len, 0); + if (memcmp(tmp, array1, len)) { + ERR("Case 2 verification failed"); + } else { + MSG("Case 2 verification passed"); + } + BARRIER(); + +// Case 3. Put - local gpu to remote gpu + gex_RMA_PutBlocking(LG_RG, peer, rem_gpu+len, loc_gpu, len, 0); + BARRIER(); + cuMemcpyDtoH(tmp, (CUdeviceptr)loc_gpu+len, len); + if (memcmp(tmp, array1, len)) { + ERR("Case 3 verification failed"); + cuMemcpyHtoD((CUdeviceptr)loc_gpu+len, array1, len); + } else { + MSG("Case 3 verification passed"); + } + +// Case 4. Get - remote gpu to local gpu + cuMemcpyHtoD((CUdeviceptr)loc_gpu, array2, len); + BARRIER(); + gex_RMA_GetBlocking(LG_RG, loc_gpu+len, peer, rem_gpu, len, 0); + cuMemcpyDtoH(tmp, (CUdeviceptr)loc_gpu+len, len); + if (memcmp(tmp, array2, len)) { + ERR("Case 4 verification failed"); + cuMemcpyHtoD((CUdeviceptr)loc_gpu+len, array2, len); + } else { + MSG("Case 4 verification passed"); + } + + if (!test_errs) MSG("GEX_MK_CLASS_CUDA_UVA: success"); + + check_cudacall( cuCtxSetCurrent(NULL) ); + check_cudacall( cuDevicePrimaryCtxRelease(0) ); + } + } +#else + { + gex_System_SetVerboseErrors(0); + int rc = gex_MK_Create(&kind, myclient, &args, 0); + assert_always(rc == GASNET_ERR_BAD_ARG); + MSG("GEX_MK_CLASS_CUDA_UVA: correct failure due to missing support"); + } +#endif + + // Just to ensure these exist: + args.gex_class = GEX_MK_CLASS_HOST; + kind = GEX_MK_HOST; + + MSG("done."); + + BARRIER(); + gasnet_exit(0); + + return 0; +} diff --git a/third-party/gasnet/gasnet-src/tests/testcxx.cc b/third-party/gasnet/gasnet-src/tests/testcxx.cc index 91d1ef7a8ad0..8d256f26950e 100644 --- a/third-party/gasnet/gasnet-src/tests/testcxx.cc +++ b/third-party/gasnet/gasnet-src/tests/testcxx.cc @@ -6,6 +6,7 @@ #include #include #include +#include #include "testgasnet.c" diff --git a/third-party/gasnet/gasnet-src/tests/testgasnet.c b/third-party/gasnet/gasnet-src/tests/testgasnet.c index 3137b1ac3566..b443d3d67ac4 100644 --- a/third-party/gasnet/gasnet-src/tests/testgasnet.c +++ b/third-party/gasnet/gasnet-src/tests/testgasnet.c @@ -6,6 +6,7 @@ #include #include +#include #include /* limit segsz to prevent stack overflows for seg_everything tests */ @@ -43,6 +44,7 @@ void doit3(int partner, int *partnerseg); void doit5(int partner, int *partnerseg); void doit6(int partner, int *partnerseg); void doit7(int partner, int *partnerseg); +void doit8(int partner, int *partnerseg); static gex_Client_t myclient; static gex_EP_t myep; @@ -142,6 +144,16 @@ void test_threadinfo(int threadid, int numthreads) { gasnet_threadinfo_t ti = GASNET_GET_THREADINFO(); assert_always(ti == my_ti); } + { GASNET_BEGIN_FUNCTION(); + { GASNET_BEGIN_FUNCTION(); + gasnet_threadinfo_t ti = GASNET_GET_THREADINFO(); + assert_always(ti == my_ti); + } + { GASNET_POST_THREADINFO(GASNET_GET_THREADINFO()); + gasnet_threadinfo_t ti = GASNET_GET_THREADINFO(); + assert_always(ti == my_ti); + } + } assert(threadid < numthreads && numthreads <= MAX_THREADS); all_ti[threadid] = my_ti; PTHREAD_LOCALBARRIER(numthreads); @@ -218,6 +230,94 @@ void test_libgasnet_tools(void) { gasnett_getheapstats(&hs); } #endif + { + static int c1,c2,c3,c4,c5,c6,c7,c8; // zero-initialized counters + + #define MACRO8A(a1,a2,a3,a4,a5,a6,a7,a8) \ + (GASNETT_UNUSED_ARGS8(a1,a2,a3,a4,a5,a6,a7,a8),8) + #define MACRO7A(a1,a2,a3,a4,a5,a6,a7) \ + (GASNETT_UNUSED_ARGS7(a1,a2,a3,a4,a5,a6,a7),7) + #define MACRO6A(a1,a2,a3,a4,a5,a6) \ + (GASNETT_UNUSED_ARGS6(a1,a2,a3,a4,a5,a6),6) + #define MACRO5A(a1,a2,a3,a4,a5) \ + (GASNETT_UNUSED_ARGS5(a1,a2,a3,a4,a5),5) + #define MACRO4A(a1,a2,a3,a4) \ + (GASNETT_UNUSED_ARGS4(a1,a2,a3,a4),4) + #define MACRO3A(a1,a2,a3) \ + (GASNETT_UNUSED_ARGS3(a1,a2,a3),3) + #define MACRO2A(a1,a2) \ + (GASNETT_UNUSED_ARGS2(a1,a2),2) + #define MACRO1A(a1) \ + (GASNETT_UNUSED_ARGS1(a1),1) + int x; + x = MACRO8A(++c8,++c7,++c6,++c5,++c4,++c3,++c2,++c1); + x = MACRO7A(++c8,++c7,++c6,++c5,++c4,++c3,++c2); + x = MACRO6A(++c8,++c7,++c6,++c5,++c4,++c3); + x = MACRO5A(++c8,++c7,++c6,++c5,++c4); + x = MACRO4A(++c8,++c7,++c6,++c5); + x = MACRO3A(++c8,++c7,++c6); + x = MACRO2A(++c8,++c7); + x = MACRO1A(++c8); + assert_always(c1 == 1); + assert_always(c2 == 2); + assert_always(c3 == 3); + assert_always(c4 == 4); + assert_always(c5 == 5); + assert_always(c6 == 6); + assert_always(c7 == 7); + assert_always(c8 == 8); + assert_always(x == 1); + + #define MACRO8B(a0,a1,a2,a3,a4,a5,a6,a7,a8) do {\ + x += (a0);\ + GASNETT_UNUSED_ARGS8(a1,a2,a3,a4,a5,a6,a7,a8);\ + } while (0) + #define MACRO7B(a0,a1,a2,a3,a4,a5,a6,a7) do {\ + x += (a0);\ + GASNETT_UNUSED_ARGS7(a1,a2,a3,a4,a5,a6,a7);\ + } while (0) + #define MACRO6B(a0,a1,a2,a3,a4,a5,a6) do {\ + x += (a0);\ + GASNETT_UNUSED_ARGS6(a1,a2,a3,a4,a5,a6);\ + } while (0) + #define MACRO5B(a0,a1,a2,a3,a4,a5) do {\ + x += (a0);\ + GASNETT_UNUSED_ARGS5(a1,a2,a3,a4,a5);\ + } while (0) + #define MACRO4B(a0,a1,a2,a3,a4) do {\ + x += (a0);\ + GASNETT_UNUSED_ARGS4(a1,a2,a3,a4);\ + } while (0) + #define MACRO3B(a0,a1,a2,a3) do {\ + x += (a0);\ + GASNETT_UNUSED_ARGS3(a1,a2,a3);\ + } while (0) + #define MACRO2B(a0,a1,a2) do {\ + x += (a0);\ + GASNETT_UNUSED_ARGS2(a1,a2);\ + } while (0) + #define MACRO1B(a0,a1) do {\ + x += (a0);\ + GASNETT_UNUSED_ARGS1(a1);\ + } while (0) + MACRO8B(8,++c8,++c7,++c6,++c5,++c4,++c3,++c2,++c1); + MACRO7B(7,++c8,++c7,++c6,++c5,++c4,++c3,++c2); + MACRO6B(6,++c8,++c7,++c6,++c5,++c4,++c3); + MACRO5B(5,++c8,++c7,++c6,++c5,++c4); + MACRO4B(4,++c8,++c7,++c6,++c5); + MACRO3B(3,++c8,++c7,++c6); + MACRO2B(2,++c8,++c7); + MACRO1B(1,++c8); + assert_always(c1 == 1*2); + assert_always(c2 == 2*2); + assert_always(c3 == 3*2); + assert_always(c4 == 4*2); + assert_always(c5 == 5*2); + assert_always(c6 == 6*2); + assert_always(c7 == 7*2); + assert_always(c8 == 8*2); + assert_always(x == 37); + } #if GASNET_PAR num_threads = test_thread_limit(num_threads); test_createandjoin_pthreads(num_threads, &test_libgasnetpar_tools, NULL, 0); @@ -257,13 +357,26 @@ int main(int argc, char **argv) { assert_always(global_segsz > 0); #endif - { uintptr_t size = (uintptr_t)-5; + { uintptr_t size = (uintptr_t)-3; void *owneraddr = (void*)&size; void *localaddr = (void*)&size; // No segments have been created/bound yet. - // Local and remote bound-segment queries must return non-zero and preserve output locations. + // Local bound-segment query must succeed synchronously and return zero size: + gex_Event_t ev = gex_EP_QueryBoundSegmentNB(myteam, myrank, NULL, NULL, &size, 0); + if (ev != GEX_EVENT_INVALID || size) { + MSG("*** ERROR - FAILED NO BOUND SEGMENT TEST!!!!!"); + } + // Remote bound-segment query must not "fail", and must return zero size: + size = (uintptr_t)-4; gex_Rank_t peer = (myrank + 1) % numranks; + ev = gex_EP_QueryBoundSegmentNB(myteam, peer, NULL, NULL, &size, 0); + if (ev == GEX_EVENT_NO_OP || (gex_Event_Wait(ev),0) || size) { + MSG("*** ERROR - FAILED NO BOUND SEGMENT TEST!!!!!"); + } + + // DEPRECATED queries must return non-zero and preserve output locations: + size = (uintptr_t)-5; if (!gex_Segment_QueryBound(myteam, myrank, &owneraddr, &localaddr, &size) || !gex_Segment_QueryBound(myteam, peer, &owneraddr, &localaddr, &size) || owneraddr != (void*)&size || localaddr != (void*)&size || size != (uintptr_t)-5) { @@ -372,6 +485,19 @@ GASNETT_EXTERNC void sizecheck_reqh(gex_Token_t token, void *buf, size_t nbytes, } // lc #undef CHECK_MAX gex_AM_ReplyShort0(token, sizecheck_handlers[1].gex_index, 0); + +#if !PLATFORM_COMPILER_XLC // Skip due to external bug 4205 + // verify that payload queries evalute their args exactly once + #define CHECK_TOKEN_MAX_EVAL(cat) \ + do { \ + int a = 0, b = 0, c = 0, d = 0; \ + (void) gex_Token_MaxReply##cat((a++,token),(b++,GEX_EVENT_NOW),(c++,0),(d++,0)); \ + assert_always(a==1); assert_always(b==1); assert_always(c==1); assert_always(d==1); \ + } while (0) + CHECK_TOKEN_MAX_EVAL(Medium); + CHECK_TOKEN_MAX_EVAL(Long); + #undef CHECK_TOKEN_MAX_EVAL +#endif } gasnett_atomic_t sizecheck_ack = gasnett_atomic_init(0); GASNETT_EXTERNC void sizecheck_reph(gex_Token_t token) { @@ -395,6 +521,19 @@ void doit(int partner, int *partnerseg) { assert_always(!memcmp(&v,&vz,sizeof(type))); \ } while (0) CHECK_ZERO_CONSTANT(gex_Segment_t, GEX_SEGMENT_INVALID); + CHECK_ZERO_CONSTANT(gex_TM_t, GEX_TM_INVALID); + CHECK_ZERO_CONSTANT(gex_Client_t, GEX_CLIENT_INVALID); + CHECK_ZERO_CONSTANT(gex_EP_t, GEX_EP_INVALID); + CHECK_ZERO_CONSTANT(gex_MK_t, GEX_MK_INVALID); + + #define CHECK_NONZERO_CONSTANT(type, constant) do { \ + static type vz; \ + type v = constant; \ + test_static_assert(sizeof(constant) == sizeof(type)); \ + assert_always(sizeof(constant) == sizeof(v)); \ + assert_always(memcmp(&v,&vz,sizeof(type))); \ + } while (0) + CHECK_NONZERO_CONSTANT(gex_MK_t, GEX_MK_HOST); if (strcmp(clientname, gex_Client_QueryName(myclient))) { MSG("*** ERROR - FAILED CLIENT NAME TEST!!!!!"); @@ -441,7 +580,16 @@ void doit(int partner, int *partnerseg) { { void *owneraddr, *localaddr; uintptr_t size; - // Local bound-segment query must return 0 and give same data as direct queries + // Local segment query must locate the segment and give same data as direct queries + gex_Event_t ev = gex_EP_QueryBoundSegmentNB(myteam, myrank, &owneraddr, &localaddr, &size, GEX_FLAG_IMMEDIATE); + if (ev != GEX_EVENT_INVALID || + size != gex_Segment_QuerySize(mysegment) || + owneraddr != gex_Segment_QueryAddr(mysegment) || + owneraddr != localaddr) { + MSG("*** ERROR - FAILED LOCAL BOUND SEGMENT TEST!!!!!"); + } + // and DEPRECATED API should too: + owneraddr = localaddr = NULL; size = 0; if (gex_Segment_QueryBound(myteam, myrank, &owneraddr, &localaddr, &size) || size != gex_Segment_QuerySize(mysegment) || owneraddr != gex_Segment_QueryAddr(mysegment) || @@ -454,9 +602,29 @@ void doit(int partner, int *partnerseg) { size = 0; owneraddr = NULL; localaddr = (void*)&size; - // Remote bound-segment query must return 0 and set all outputs to "plausible" values - if (gex_Segment_QueryBound(myteam, peer, &owneraddr, &localaddr, &size) || - !size || !owneraddr || localaddr == (void*)&size) { + // Remote bound-segment IMMEDIATE queries may fail, but can never return a real event + ev = gex_EP_QueryBoundSegmentNB(myteam, peer, &owneraddr, &localaddr, &size, GEX_FLAG_IMMEDIATE); + if (ev == GEX_EVENT_NO_OP) { + // IMMEDIATE "failed. Non-IMMEDIATE retry must locate the segment. + ev = gex_EP_QueryBoundSegmentNB(myteam, peer, &owneraddr, &localaddr, &size, 0); + if (ev == GEX_EVENT_NO_OP) { + MSG("*** ERROR - FAILED REMOTE BOUND SEGMENT TEST!!!!!"); + } + gex_Event_Wait(ev); + } else if (ev != GEX_EVENT_INVALID) { + // "real" event (or entirely bogus value) returned from an IMMEDIATE query + MSG("*** ERROR - FAILED REMOTE BOUND SEGMENT TEST!!!!!"); + } + // Successfully query must set all outputs to "plausible" values + if (!size || !owneraddr || localaddr == (void*)&size) { + MSG("*** ERROR - FAILED REMOTE BOUND SEGMENT TEST!!!!!"); + } + // and DEPRECATED API should match: + void *owneraddr2 = NULL; + void *localaddr2 = NULL; + uintptr_t size2 = 0; + if (gex_Segment_QueryBound(myteam, peer, &owneraddr2, &localaddr2, &size2) || + size2 != size || owneraddr2 != owneraddr || localaddr2 != localaddr) { MSG("*** ERROR - FAILED REMOTE BOUND SEGMENT TEST!!!!!"); } } @@ -546,8 +714,10 @@ void doit(int partner, int *partnerseg) { BARRIER(); for (gex_Rank_t i = 0; i < neighbor_size; ++i) { gex_Rank_t *crossmap = NULL; - int rc = gex_Segment_QueryBound(myteam, neighbor_array[i].gex_jobrank, NULL, (void**)&crossmap, NULL); - assert_always(rc == 0); + size_t size; + gex_Event_Wait( + gex_EP_QueryBoundSegmentNB(myteam, neighbor_array[i].gex_jobrank, NULL, (void**)&crossmap, &size, 0) ); + assert_always(size != 0); assert_always(crossmap != NULL); crossmap[neighbor_rank] = myrank; } @@ -571,6 +741,12 @@ void doit(int partner, int *partnerseg) { assert_always(n_proc >= n_size && n_size >= h_size); } + assert_always(gex_System_GetVerboseErrors()); + gex_System_SetVerboseErrors(0); + assert_always(!gex_System_GetVerboseErrors()); + gex_System_SetVerboseErrors(1); + assert_always(gex_System_GetVerboseErrors()); + /* width-independent computation of an integer variable with unknown unsigned type */ #if PLATFORM_ARCH_LITTLE_ENDIAN #define compute_uint_val(lval_u64,var) do { \ @@ -681,6 +857,14 @@ void doit(int partner, int *partnerseg) { assert_always(myrank < numranks); assert_always(numranks < GEX_RANK_INVALID); + /* max thread query */ +#if GASNET_SEQ + assert_always(gex_System_QueryMaxThreads() == 1); +#else + // Not a spec requirement, but a reasonable assumption for any implementation + assert_always(gex_System_QueryMaxThreads() > 1); +#endif + /* ep_index/ep_location tests */ assert_unsigned(gex_EP_Index_t); for (gex_Rank_t i = 0; i < numranks; ++i) { @@ -697,6 +881,21 @@ void doit(int partner, int *partnerseg) { assert_always(gex_AM_LUBRequestLong() >= 512); assert_always(gex_AM_LUBReplyLong() >= 512); +#if !PLATFORM_COMPILER_XLC // Skip due to external bug 4205 + // verify that payload queries evalute their args exactly once + #define CHECK_AM_MAX_EVAL(name) \ + do { \ + int a = 0, b = 0, c = 0, d = 0, e = 0; \ + (void) gex_AM_Max##name((a++,myteam),(b++,GEX_RANK_INVALID),(c++,GEX_EVENT_NOW),(d++,0),(e++,0)); \ + assert_always(a==1); assert_always(b==1); assert_always(c==1); assert_always(d==1); assert_always(e==1); \ + } while (0) + CHECK_AM_MAX_EVAL(RequestMedium); + CHECK_AM_MAX_EVAL(RequestLong); + CHECK_AM_MAX_EVAL(ReplyMedium); + CHECK_AM_MAX_EVAL(ReplyLong); + #undef CHECK_AM_MAX_EVAL +#endif + static int firsttime = 1; if (firsttime) { size_t numhand = sizeof(sizecheck_handlers)/sizeof(gex_AM_Entry_t); @@ -861,7 +1060,7 @@ void doit0(int partner, int *partnerseg) { GEX_FLAG_ENABLE_LEAF_LC, - GEX_FLAG_TM_SCRATCH_SIZE_MIN, + GEX_FLAG_TM_SCRATCH_SIZE_MIN, // DEPRECATED since spec 0.11 but still valid GEX_FLAG_TM_SCRATCH_SIZE_RECOMMENDED, GEX_FLAG_TM_GLOBAL_SCRATCH, GEX_FLAG_TM_LOCAL_SCRATCH, @@ -872,6 +1071,10 @@ void doit0(int partner, int *partnerseg) { GEX_FLAG_GLOBALLY_QUIESCED, GEX_FLAG_RANK_IS_JOBRANK, + + GEX_FLAG_HINT_ACCEL_AD, + GEX_FLAG_HINT_ACCEL_COLL, + GEX_FLAG_HINT_ACCEL_ALL, }; assert_arr_nonzero(gex_Flags_t, flags_arr); // No zero values @@ -920,7 +1123,7 @@ void doit0(int partner, int *partnerseg) { }; assert_arr_unaliased(gex_Flags_t, flags_vis); static gex_Flags_t const flags_tm[] = { // gex_TM_Split, Create, etc. - GEX_FLAG_TM_SCRATCH_SIZE_MIN, + GEX_FLAG_TM_SCRATCH_SIZE_MIN, // DEPRECATED since spec 0.11 but still valid GEX_FLAG_TM_SCRATCH_SIZE_RECOMMENDED, GEX_FLAG_TM_GLOBAL_SCRATCH, GEX_FLAG_TM_LOCAL_SCRATCH, @@ -929,6 +1132,13 @@ void doit0(int partner, int *partnerseg) { GEX_FLAG_SCRATCH_SEG_OFFSET, }; assert_arr_unaliased(gex_Flags_t, flags_tm); + static gex_Flags_t const flags_ep[] = { // gex_EP_Create, excludes ALL + GEX_FLAG_HINT_ACCEL_AD, + GEX_FLAG_HINT_ACCEL_COLL, + }; + assert_arr_nonzero(gex_Flags_t, flags_ep); // No zero values + // Not yet specified: assert_arr_unaliased(gex_Flags_t, flags_ep); + assert_arr_all_val(gex_EP_Capabilities_t, flags_ep, GEX_FLAG_HINT_ACCEL_ALL); // ALL includes them all assert_inttype(gex_EC_t); static gex_EC_t const ec_all = GEX_EC_ALL; @@ -950,6 +1160,19 @@ void doit0(int partner, int *partnerseg) { assert_arr_all_val(gex_TI_t, ti_arr, ti_all); // ALL includes them all test_format(gex_TI_t, ti_arr, gasnett_format_ti); + assert_inttype(gex_EP_Capabilities_t); + static gex_EP_Capabilities_t const ep_cap_all = GEX_EP_CAPABILITY_ALL; + static gex_EP_Capabilities_t const ep_cap_arr[] = { // all flags but _ALL + GEX_EP_CAPABILITY_RMA, + GEX_EP_CAPABILITY_AM, + GEX_EP_CAPABILITY_VIS, + GEX_EP_CAPABILITY_COLL, + GEX_EP_CAPABILITY_AD + }; + assert_arr_nonzero(gex_EP_Capabilities_t, ep_cap_arr); // No zero values + // Not yet specified: assert_arr_unaliased(gex_EP_Capabilities_t, ep_cap_arr); + assert_arr_all_val(gex_EP_Capabilities_t, ep_cap_arr, ep_cap_all); // ALL includes them all + gex_RMA_Value_t val = 0; test_mark_used(val); test_static_assert(sizeof(gex_RMA_Value_t) == SIZEOF_GEX_RMA_VALUE_T); test_static_assert(sizeof(gex_RMA_Value_t) >= sizeof(void *)); @@ -1443,5 +1666,37 @@ void doit7(int partner, int *partnerseg) { * moved to gasnet_diagnostic.c (run from testinternal). */ +#ifndef TESTGASNET_NO_SPLIT + doit8(partner, partnerseg); +} +void doit8(int partner, int *partnerseg) { +#endif + BARRIER(); + + // Checks for graceful degradation where support is missing or limited. + // As features become widely support these should be removed in favor + // of complete tests (and conduit-specific KnownFailures if needed). + + // Suspend verbose errors since some of these test are expected to fail + gex_System_SetVerboseErrors(0); + + + // Sane GASNET_MAXEPS and graceful failure of EP_Create + if (GASNET_MAXEPS < 1) { + MSG("*** ERROR - INVALID MAXEPS SETTING!!!!!"); + } else if (GASNET_MAXEPS == 1) { + gex_EP_t ep; + int rc = gex_EP_Create(&ep, myclient, GEX_EP_CAPABILITY_RMA, 0); + if (rc != GASNET_ERR_RESOURCE) { + MSG("*** ERROR - EXCESS EP_CREATE DID NOT FAIL AS EXPECTED!!!!!"); + } + } else { + // testtmpair covers creation of multiple EPs where implemented + } + + + // Restore verbose errors + gex_System_SetVerboseErrors(1); + BARRIER(); } diff --git a/third-party/gasnet/gasnet-src/tests/testlarge.c b/third-party/gasnet/gasnet-src/tests/testlarge.c index efb6b8ebb7fe..46d1bf604f14 100644 --- a/third-party/gasnet/gasnet-src/tests/testlarge.c +++ b/third-party/gasnet/gasnet-src/tests/testlarge.c @@ -16,6 +16,9 @@ *************************************************************/ #include +#if GASNET_HAVE_MK_CLASS_CUDA_UVA + #include +#endif int numprocs; size_t maxsz = 0; @@ -109,7 +112,7 @@ void _print_stat(int myproc, stat_struct_t *st, const char *name, int operation) // Double payload at each iter, subject to max_step // but include max_payload which may not otherwise be visited #define ADVANCE(sz) do { \ - int step = MIN(max_step, sz); \ + size_t step = MIN(max_step, sz); \ if (!sz) { \ sz = 1; \ } else if (sz < max_payload && sz+step > max_payload) { \ @@ -288,6 +291,7 @@ int main(int argc, char **argv) int fullduplexmode = 0; int crossmachinemode = 0; int skipwarmup = 0; + int use_cuda_uva = 0; int help = 0; /* call startup */ @@ -327,6 +331,12 @@ int main(int argc, char **argv) ++arg; if (argc > arg) { max_step = atoi(argv[arg]); arg++; } else help = 1; +#if GASNET_HAVE_MK_CLASS_CUDA_UVA + // UNDOCUMENTED + } else if (!strcmp(argv[arg], "-cuda-uva")) { + use_cuda_uva = 1; + ++arg; +#endif } else if (argv[arg][0] == '-') { help = 1; ++arg; @@ -403,6 +413,30 @@ int main(int argc, char **argv) tgtmem = (numprocs > 1) ? TEST_SEG(peerproc) : (void*)(alignup(maxsz,PAGESZ) + (uintptr_t)myseg); +#if GASNET_HAVE_MK_CLASS_CUDA_UVA + gex_EP_t gpu_ep; + gex_MK_t kind; + if (use_cuda_uva) { + MSG0("***NOTICE***: Using EXPERIMENTAL support for CUDA UVA remote memory"); + test_static_assert(GASNET_MAXEPS >= 2); + + gex_MK_Create_args_t args; + args.gex_flags = 0; + args.gex_class = GEX_MK_CLASS_CUDA_UVA; + args.gex_args.gex_class_cuda_uva.gex_CUdevice = 0; + gex_Segment_t d_segment = GEX_SEGMENT_INVALID; + + GASNET_Safe( gex_MK_Create(&kind, myclient, &args, 0) ); + GASNET_Safe( gex_Segment_Create(&d_segment, myclient, NULL, TEST_SEGSZ_REQUEST, kind, 0) ); + GASNET_Safe( gex_EP_Create(&gpu_ep, myclient, GEX_EP_CAPABILITY_RMA, 0) ); + gex_EP_BindSegment(gpu_ep, d_segment, 0); + gex_EP_PublishBoundSegment(myteam, &gpu_ep, 1, 0); + + // The "trick" to diverting RMA operation to the remote GPU memory + myteam = gex_TM_Pair(myep, gex_EP_QueryIndex(gpu_ep)); + gex_Event_Wait( gex_EP_QueryBoundSegmentNB(myteam, peerproc, (void**)&tgtmem, NULL, NULL, 0) ); + } +#endif if (insegment) { msgbuf = (void *) myseg; diff --git a/third-party/gasnet/gasnet-src/tests/testlegacy.c b/third-party/gasnet/gasnet-src/tests/testlegacy.c index 001d46682aef..227f690cbe0a 100644 --- a/third-party/gasnet/gasnet-src/tests/testlegacy.c +++ b/third-party/gasnet/gasnet-src/tests/testlegacy.c @@ -144,9 +144,7 @@ void test_threadinfo(int threadid, int numthreads) { PTHREAD_LOCALBARRIER(num_threads); test_threadinfo(idx, num_threads); PTHREAD_LOCALBARRIER(num_threads); - #if GASNETI_ARCH_ALTIX - /* Don't pin threads because system is either shared or using cgroups */ - #elif GASNETI_ARCH_IBMPE + #if GASNETI_ARCH_IBMPE /* Don't pin threads because system s/w will have already done so */ #else if (gasnett_getenv_yesno_withdefault("GASNET_TEST_SET_AFFINITY",1)) { diff --git a/third-party/gasnet/gasnet-src/tests/testmpi.c b/third-party/gasnet/gasnet-src/tests/testmpi.c index 46bd28cc89f2..1745b6a5c31c 100644 --- a/third-party/gasnet/gasnet-src/tests/testmpi.c +++ b/third-party/gasnet/gasnet-src/tests/testmpi.c @@ -128,6 +128,14 @@ void init_test_mpi(int *argc, char ***argv) { dump_args(*argc, *argv); #endif + #if GASNET_CONDUIT_UDP && !DISABLE_UDP_MPI_RANKS + { // optional: tell udp-conduit to use the MPI process numbering + int mpirank; + MPI_SAFE(MPI_Comm_rank(MPI_COMM_WORLD, &mpirank)); + AMUDP_SPMDSetProc(mpirank); + } + #endif + MPI_SAFE(MPI_Barrier(MPI_COMM_WORLD)); } diff --git a/third-party/gasnet/gasnet-src/tests/testratomic.c b/third-party/gasnet/gasnet-src/tests/testratomic.c index 7a32dfad4437..d9bf72a8128f 100644 --- a/third-party/gasnet/gasnet-src/tests/testratomic.c +++ b/third-party/gasnet/gasnet-src/tests/testratomic.c @@ -865,8 +865,8 @@ int main(int argc, char **argv) { // need scratch sizes before Attatch gex_TM_t subtm = GEX_TM_INVALID; int color = gex_TM_QueryRank(myteam) & 1; // odds & evens - scratch_sz1 = gex_TM_Split(&subtm, myteam, color, 0, 0, 0, GEX_FLAG_TM_SCRATCH_SIZE_MIN); - scratch_sz2 = gex_TM_Split(&subtm, myteam, mynbrhd, 0, 0, 0, GEX_FLAG_TM_SCRATCH_SIZE_MIN); + scratch_sz1 = gex_TM_Split(&subtm, myteam, color, 0, 0, 0, GEX_FLAG_TM_SCRATCH_SIZE_RECOMMENDED); + scratch_sz2 = gex_TM_Split(&subtm, myteam, mynbrhd, 0, 0, 0, GEX_FLAG_TM_SCRATCH_SIZE_RECOMMENDED); GASNET_Safe(gex_Segment_Attach(&mysegment, myteam, TEST_SEGSZ_REQUEST)); diff --git a/third-party/gasnet/gasnet-src/tests/testsegment.c b/third-party/gasnet/gasnet-src/tests/testsegment.c new file mode 100644 index 000000000000..93b60ec19c1a --- /dev/null +++ b/third-party/gasnet/gasnet-src/tests/testsegment.c @@ -0,0 +1,339 @@ +/* $Source: bitbucket.org:berkeleylab/gasnet.git/tests/testsegment.c $ + * Copyright (c) 2020, The Regents of the University of California + * + * Description: GASNet "disaggregated Attach" test. + * + * This test binds a segment to a primordial endpoint using + * gex_Segment_Create(), gex_EP_BindSegment(), and gex_EP_PublishBoundSegment() + * rather than gex_Segment_Attach(), performing RMA, AM and VIS operations + * to verify correctness. + */ + +// WARNING: This test exercises an EXPERIMENTAL feature. +// One should not clone the logic in this test as a template for use of the +// gex_Segment_Create, gex_EP_BindSegment, and gex_EP_PublishBoundSegment APIs + +#include +#include + +// Unused +#ifndef TEST_SEGSZ +#define TEST_SEGSZ PAGESZ +#endif + +#include + +// ------------------------------------------------------------------------------------ + +static gex_Client_t myclient; +static gex_EP_t myep; +static gex_TM_t myteam; +static gex_Rank_t myrank, nranks; + +// ------------------------------------------------------------------------------------ + +// NOT fully general, but sufficient for this test +#if PLATFORM_ARCH_32 + #define PTR_NARGS 1 + #define PTR_ARGS gex_AM_Arg_t arg0 + #define PTR_PACK(ptr) ((gex_AM_Arg_t)(ptr)) + #define PTR_UNPACK() ((void *)arg0) +#elif PLATFORM_ARCH_64 + #define PTR_NARGS 2 + #define PTR_ARGS gex_AM_Arg_t arg0, gex_AM_Arg_t arg1 + #define PTR_PACK(ptr) ((gex_AM_Arg_t)TEST_HIWORD(ptr)), \ + ((gex_AM_Arg_t)TEST_LOWORD(ptr)) + #define PTR_UNPACK() ((void *)((((uint64_t)(arg0)) << 32) | \ + (((uint64_t)(arg1)) & 0xFFFFFFFF))) +#endif + +// ------------------------------------------------------------------------------------ + +static volatile int ping_rcvd = 0; +static volatile int pong_rcvd = 0; + +#define hidx_ping 200 +#define hidx_pong 201 + +static void ping_handler(gex_Token_t token, void *buf, size_t nbytes, PTR_ARGS) { + assert_always(! ping_rcvd); + ping_rcvd = 1; + + // Payload value is our jobrank, address is in the handler arg(s) + gex_AM_ReplyLong0(token, hidx_pong, &myrank, sizeof(gex_Rank_t), PTR_UNPACK(), GEX_EVENT_NOW, 0); +} + +static void pong_handler(gex_Token_t token, void *buf, size_t nbytes) { + assert_always(! pong_rcvd); + pong_rcvd = 1; +} + +// handler table +gex_AM_Entry_t htable[] = { + { hidx_ping, ping_handler, GEX_FLAG_AM_REQUEST|GEX_FLAG_AM_LONG, PTR_NARGS }, + { hidx_pong, pong_handler, GEX_FLAG_AM_REPLY |GEX_FLAG_AM_LONG, 0 } + }; +#define HANDLER_TABLE_SIZE (sizeof(htable)/sizeof(gex_AM_Entry_t)) + +// ------------------------------------------------------------------------------------ + +int main(int argc, char **argv) +{ + int seedoffset = 0; + int client_segment = 1; + int random_segment = 0; + int page_align = 0; + + GASNET_Safe(gex_Client_Init(&myclient, &myep, &myteam, "testsegment", &argc, &argv, 0)); + + int help = 0; + int argi = 1; + while (argc > argi) { + if (!strcmp(argv[argi], "-client-seg")) { + client_segment = 1; + ++argi; + } else if (!strcmp(argv[argi], "-gasnet-seg")) { + client_segment = 0; + ++argi; + } else if (!strcmp(argv[argi], "-random-seg")) { + random_segment = 1; + ++argi; + } else if (!strcmp(argv[argi], "-page-align")) { + page_align = 1; + ++argi; + } else if (!strcmp(argv[argi], "-no-page-align")) { + page_align = 0; + ++argi; + } else if (argv[argi][0] == '-') { + help = 1; + ++argi; + } else break; + } + + test_init("testsegment", 0, "[options] (seed)\n" + " Segment allocation options:\n" + " -client-seg: Test client-allocated segments (default)\n" + " -gasnet-seg: Test GASNet-allocated segments\n" + " -random-seg: Test with a random mix of the two options above\n" + " Segment alignment options:\n" + " -no-page-align: Use unaligned address and length (default)\n" + " -page-align: Use page-aligned adddress and length\n" + " seed seed offset for PRNG \n"); + if (argi < argc) { seedoffset = atoi(argv[argi]); argi++; } + if (argi < argc || help) test_usage(); + + myrank = gex_TM_QueryRank(myteam); + nranks = gex_TM_QuerySize(myteam); + + if (seedoffset == 0) { + seedoffset = (((unsigned int)TIME()) & 0xFFFF); + // TEST_BCAST() requires a gex_Segment_Attach() call, which we intentionally omit + gex_Event_Wait(gex_Coll_BroadcastNB(myteam, 0, &seedoffset, &seedoffset, sizeof(seedoffset), 0)); + } + TEST_SRAND(myrank+seedoffset); + + MSG0("Running segment bind/publish test with %saligned address and length, seed=%d", + page_align?"page-":"un", seedoffset); + if (random_segment) { + client_segment = TEST_RAND_ONEIN(2); + MSG("This process using a %s-allocated segment", client_segment?"client":"gasnet"); + } else { + MSG0("Using %s-allocated segment on all processes", client_segment?"client":"gasnet"); + } + + GASNET_Safe(gex_EP_RegisterHandlers(myep, htable, sizeof(htable)/sizeof(gex_AM_Entry_t))); + + { + unsigned int offset = page_align ? 0 : 16; + + // Test creation of a GASNet-allocated segment + gex_Segment_t g_segment = GEX_SEGMENT_INVALID; + size_t g_segment_size = GASNET_PAGESIZE - offset; + GASNET_Safe(gex_Segment_Create(&g_segment, myclient, NULL, g_segment_size, GEX_MK_HOST, 0)); + if ((g_segment == GEX_SEGMENT_INVALID) || + (gex_Segment_QueryAddr(g_segment) == NULL) || + (gex_Segment_QuerySize(g_segment) < g_segment_size)) { + ERR("FAILED GASNET-ALLOCATED SEGMENT CREATE TEST"); + } + + // Test creation of a client-allocated segment + // TODO: should also cover client allocation from mmap(), stack and static data. + gex_Segment_t c_segment = GEX_SEGMENT_INVALID; + uint8_t *c_segment_mem = (uint8_t *) test_malloc(GASNET_PAGESIZE); + uint8_t *c_segment_addr = c_segment_mem + offset; + size_t c_segment_size = GASNET_PAGESIZE - 2*offset; + GASNET_Safe(gex_Segment_Create(&c_segment, myclient, c_segment_addr, c_segment_size, GEX_MK_HOST, 0)); + if ((c_segment == GEX_SEGMENT_INVALID) || + (gex_Segment_QueryAddr(c_segment) != c_segment_addr) || + (gex_Segment_QuerySize(c_segment) != c_segment_size)) { + ERR("FAILED CLIENT-ALLOCATED SEGMENT CREATE TEST"); + } + + // Test pre-bind (no segments yet) Publish + // Should not fail, nor interfere with the post-Bind use of Publish + if (GASNET_OK != gex_EP_PublishBoundSegment(myteam, &myep, 1, 0)) { + ERR("FAILED EARLY SEGMENT PUBLISH TEST"); + } + + // Pick a segment to test and (TODO:) destroy the other + gex_Segment_t seg; + void * seg_addr; + uintptr_t seg_size; + if (client_segment) { + seg = c_segment; + seg_addr = c_segment_addr; + seg_size = c_segment_size; + // GASNET_Safe(gex_Segment_Destroy(g_segment, 0)); + } else { + seg = g_segment; + seg_addr = gex_Segment_QueryAddr(g_segment); + seg_size = gex_Segment_QuerySize(g_segment); + // GASNET_Safe(gex_Segment_Destroy(c_segment, 0)); + } + + // Bind the chosen segments and validate + gex_EP_BindSegment(myep, seg, 0); + { + void *tmp_addr; + size_t tmp_size; + gex_Event_Wait( gex_EP_QueryBoundSegmentNB(myteam, myrank, &tmp_addr, NULL, &tmp_size, 0) ); + if ((seg != gex_EP_QuerySegment(myep)) || + (tmp_addr != seg_addr) || + (tmp_size != seg_size)) { + ERR("FAILED SEGMENT EP BIND TEST"); + } + } + + // Publish the segment over a permuted temporary team, + // consisting all odds in reverse order followed by evens in reverse order. + // Additionally, we perform the Publish in two calls with random membership. + { + gex_TM_t tmp_tm = GEX_TM_INVALID; + int key = (myrank & 1 ? 0 : nranks) + (nranks - myrank); + gex_TM_Split(&tmp_tm, myteam, 0, key, NULL, 0, GEX_FLAG_TM_NO_SCRATCH); + assert_always(tmp_tm != GEX_TM_INVALID); + assert_always(nranks == gex_TM_QuerySize(tmp_tm)); + int coin_flip = TEST_RAND_ONEIN(2); + if ((GASNET_OK != gex_EP_PublishBoundSegment(tmp_tm, &myep, coin_flip, 0)) || + (GASNET_OK != gex_EP_PublishBoundSegment(tmp_tm, &myep, !coin_flip, 0))) { + ERR("FAILED PERMUTED SEGMENT PUBLISH TEST"); + } + GASNET_Safe(gex_TM_Destroy(tmp_tm, NULL, 0)); + } + + // Prepare for comms + gex_Rank_t peer = (myrank + 1) % nranks; + void *loc_base, *rem_base; + gex_Event_Wait( gex_EP_QueryBoundSegmentNB(myteam, peer, &rem_base, NULL, NULL, 0) ); + loc_base = seg_addr; + + // TODO: validate gex_EP_QueryBoundSegmentNB() for some non-trivial set of + // processes, not just self and the one peer chosen for communication. + + // TODO: fix PSHM support and remove this mess: + int sender = 1; // Initiates Put, Get and Request + int target = 1; // Target of a sender + gex_Rank_t nbrhd_set_size; + gex_System_QueryMyPosition(&nbrhd_set_size, NULL, NULL, NULL); + if (nbrhd_set_size != nranks) { + // More ranks than nbrhds => at least one non-singleton nbrhd + gex_Rank_t anti_peer = (myrank + nranks - 1) % nranks; // Sends us a Request + gex_RankInfo_t *nbrhdinfo; + gex_Rank_t nbrhdsize; + gex_System_QueryNbrhdInfo(&nbrhdinfo, &nbrhdsize, NULL); + for (gex_Rank_t i = 0; i < nbrhdsize; ++i) { + if (nbrhdinfo[i].gex_jobrank == anti_peer) { + target = 0; + } + if (nbrhdinfo[i].gex_jobrank == peer) { + // Would communicate w/i the nbrhd, which is not currently supported + MSG("WARNING: skipping intra-nbrhd initiator role on this process."); + sleep(2); + sender = 0; + } + } + #if !GASNET_CONDUIT_SMP + MSG0("NOTE: setting GASNET_SUPERNODE_MAXSIZE=1 will enable skipped tests."); + #endif + } + + // Put, Get and AMLong to exercise the segment + // TODO: Test collectives w/ scratch space carved out of the segment. + // TODO: Can Long payloads be made large to prevent packed-long optimizations? + { + gex_Rank_t *loc_array = (gex_Rank_t *)loc_base; + loc_array[0] = myrank; // Source of Gets + loc_array[1] = GEX_RANK_INVALID; // Destination of Put + loc_array[2] = GEX_RANK_INVALID; // Destination of RequestLong + loc_array[3] = GEX_RANK_INVALID; // Destination of ReplyLong + loc_array[4] = GEX_RANK_INVALID; // Destination of loopback Put + + BARRIER(); + + if (sender) { + int failed = 0; + gex_Rank_t *rem_array = (gex_Rank_t *)rem_base; + gex_Rank_t read_val; + + gex_Event_t get_ev = + gex_RMA_GetNB(myteam, &read_val, peer, rem_array, sizeof(gex_Rank_t), 0); + gex_RMA_PutBlocking(myteam, peer, rem_array + 1, &peer, sizeof(gex_Rank_t), 0); + gex_AM_RequestLong(myteam, peer, hidx_ping, &peer, sizeof(gex_Rank_t), rem_array + 2, + GEX_EVENT_NOW, 0, PTR_PACK(loc_array + 3)); + + // Validate Get result + gex_Event_Wait(get_ev); + failed += (read_val != peer); + + // Validate AMReplyLong payload + GASNET_BLOCKUNTIL(pong_rcvd); + failed += (loc_array[3] != peer); + + if (failed) { + ERR("Initiator checks FAILED %d test(s).", failed); + } else { + MSG("Initiator checks passed."); + } + } + + // Validate Put and AMRequestLong payload (if any) + // Ping follows blocking Put (Put+Sync+Send), thus proving Put completion + if (target) { + int failed = 0; + GASNET_BLOCKUNTIL(ping_rcvd); + failed += (loc_array[1] != myrank); // Put + failed += (loc_array[2] != myrank); // RequestLong payload + if (failed) { + ERR("Target checks FAILED %d test(s).", failed); + } else { + MSG("Target checks passed."); + } + } + + { + // Alawys test loopback + int failed = 0; + gex_Rank_t read_val = gex_RMA_GetBlockingVal(myteam, myrank, loc_array, sizeof(gex_Rank_t), 0); + failed += (read_val != myrank); + gex_RMA_PutBlockingVal(myteam, myrank, loc_array + 4, myrank, sizeof(gex_Rank_t), 0); + failed += (loc_array[4] != myrank); + if (failed) { + ERR("Loopback checks FAILED %d test(s).", failed); + } else { + MSG("Loopback checks passed."); + } + } + } + + // Test no-op (redundant) Publish + if (GASNET_OK != gex_EP_PublishBoundSegment(myteam, &myep, 1, 0)) { + ERR("FAILED NO-OP SEGMENT PUBLISH TEST"); + } + } + + BARRIER(); + MSG("done."); + + gasnet_exit(0); + return 0; +} diff --git a/third-party/gasnet/gasnet-src/tests/testsmall.c b/third-party/gasnet/gasnet-src/tests/testsmall.c index 58910673e4ca..9a052e146767 100644 --- a/third-party/gasnet/gasnet-src/tests/testsmall.c +++ b/third-party/gasnet/gasnet-src/tests/testsmall.c @@ -14,6 +14,10 @@ int maxsz = 0; #endif #include "test.h" +#if GASNET_HAVE_MK_CLASS_CUDA_UVA + #include +#endif + #define GASNET_HEADNODE 0 #define PRINT_LATENCY 0 #define PRINT_THROUGHPUT 1 @@ -426,6 +430,7 @@ int main(int argc, char **argv) int fullduplexmode = 0; int crossmachinemode = 0; int skipwarmup = 0; + int use_cuda_uva = 0; int help = 0; /* call startup */ @@ -460,6 +465,12 @@ int main(int argc, char **argv) } else if (!strcmp(argv[arg], "-s")) { skipwarmup = 1; ++arg; +#if GASNET_HAVE_MK_CLASS_CUDA_UVA + // UNDOCUMENTED + } else if (!strcmp(argv[arg], "-cuda-uva")) { + use_cuda_uva = 1; + ++arg; +#endif } else if (argv[arg][0] == '-') { help = 1; ++arg; @@ -532,6 +543,31 @@ int main(int argc, char **argv) myseg = TEST_SEG(myproc); tgtmem = (void*)(alignup(maxsz,PAGESZ) + (uintptr_t)TEST_SEG(peerproc)); +#if GASNET_HAVE_MK_CLASS_CUDA_UVA + gex_EP_t gpu_ep; + gex_MK_t kind; + if (use_cuda_uva) { + MSG0("***NOTICE***: Using EXPERIMENTAL support for CUDA UVA remote memory"); + test_static_assert(GASNET_MAXEPS >= 2); + + gex_MK_Create_args_t args; + args.gex_flags = 0; + args.gex_class = GEX_MK_CLASS_CUDA_UVA; + args.gex_args.gex_class_cuda_uva.gex_CUdevice = 0; + gex_Segment_t d_segment = GEX_SEGMENT_INVALID; + + GASNET_Safe( gex_MK_Create(&kind, myclient, &args, 0) ); + GASNET_Safe( gex_Segment_Create(&d_segment, myclient, NULL, TEST_SEGSZ_REQUEST, kind, 0) ); + GASNET_Safe( gex_EP_Create(&gpu_ep, myclient, GEX_EP_CAPABILITY_RMA, 0) ); + gex_EP_BindSegment(gpu_ep, d_segment, 0); + gex_EP_PublishBoundSegment(myteam, &gpu_ep, 1, 0); + + // The "trick" to diverting RMA operation to the remote GPU memory + myteam = gex_TM_Pair(myep, gex_EP_QueryIndex(gpu_ep)); + gex_Event_Wait( gex_EP_QueryBoundSegmentNB(myteam, peerproc, (void**)&tgtmem, NULL, NULL, 0) ); + } +#endif + if (insegment) { msgbuf = (void *) myseg; } else { diff --git a/third-party/gasnet/gasnet-src/tests/testsplit.c b/third-party/gasnet/gasnet-src/tests/testsplit.c index fd2a20eb1765..37376cf99bce 100644 --- a/third-party/gasnet/gasnet-src/tests/testsplit.c +++ b/third-party/gasnet/gasnet-src/tests/testsplit.c @@ -15,7 +15,6 @@ #ifndef SCRATCH_QUERY_FLAG #define SCRATCH_QUERY_FLAG GEX_FLAG_TM_SCRATCH_SIZE_RECOMMENDED -//#define SCRATCH_QUERY_FLAG GEX_FLAG_TM_SCRATCH_SIZE_MIN #endif static gex_Client_t myclient; @@ -94,12 +93,14 @@ static gex_TM_t eventm; static void *even_scratch; static size_t even_scratch_sz; static void do_evens(void) { + static int reps = 0; eventm = coltm; // init just to check whether overwritten int even = !(myrank & 1); gex_Rank_t nmembers = even ? (gex_TM_QuerySize(myteam) + 1)/2 : 0; gex_EP_Location_t *members = test_calloc(sizeof(gex_EP_Location_t), nmembers); for (gex_Rank_t i = 0; i < nmembers; ++ i) members[i].gex_rank = i * 2; - gex_TM_Create(&eventm, 1, myteam, members, nmembers, &even_scratch, even_scratch_sz, GEX_FLAG_TM_LOCAL_SCRATCH); + gex_Flags_t scratch_flag = (++reps & 1) ? GEX_FLAG_TM_LOCAL_SCRATCH : GEX_FLAG_TM_NO_SCRATCH; + gex_TM_Create(&eventm, 1, myteam, members, nmembers, &even_scratch, even_scratch_sz, scratch_flag); if (even) { assert_always(eventm != coltm); assert_always(gex_TM_QuerySize(eventm) == nmembers); @@ -164,8 +165,6 @@ int main(int argc, char **argv) size_t scratch_sz; // Spec says NULL new_tm_p returns zero. - scratch_sz = gex_TM_Split(NULL, myteam, 0, 1, 0, 0, GEX_FLAG_TM_SCRATCH_SIZE_MIN); - assert_always(scratch_sz == 0); scratch_sz = gex_TM_Split(NULL, myteam, 0, 1, 0, 0, GEX_FLAG_TM_SCRATCH_SIZE_RECOMMENDED); assert_always(scratch_sz == 0); @@ -205,12 +204,9 @@ int main(int argc, char **argv) assert_always(ep_loc.gex_ep_index == 0); } - // Singleton team (also tests a 2nd-level split, of coltm): + // Singleton team (also tests a 2nd-level split, of coltm, and GEX_FLAG_TM_NO_SCRATCH): gex_TM_t onetm = coltm; // init just to check whether overwritten - scratch_sz = gex_TM_Split(&onetm, coltm, myrank, 0, 0, 0, SCRATCH_QUERY_FLAG); - assert_always((scratch_addr + scratch_sz) <= scratch_end); - gex_TM_Split(&onetm, coltm, myrank, 0, (void*)scratch_addr, scratch_sz, 0); - scratch_addr += scratch_sz; + gex_TM_Split(&onetm, coltm, myrank, 0, NULL, 0, GEX_FLAG_TM_NO_SCRATCH); assert_always(onetm != coltm); assert_always(gex_TM_QueryRank(onetm) == 0); assert_always(gex_TM_QuerySize(onetm) == 1); @@ -330,7 +326,15 @@ int main(int argc, char **argv) } // More destruction - assert_always(! gex_TM_Destroy(onetm, NULL, 0)); + { + // NO_SCRATCH case must not write to *scratch_p + gex_Memvec_t scratch_out; + scratch_out.gex_addr = (void*)main; + scratch_out.gex_len = myrank; + assert_always(! gex_TM_Destroy(onetm, &scratch_out, 0)); + assert_always(scratch_out.gex_addr == (void*)main); + assert_always(scratch_out.gex_len == myrank); + } assert_always(! gex_TM_Destroy(rowtm, NULL, 0)); assert_always(! gex_TM_Destroy(coltm, NULL, 0)); assert_always(! gex_TM_Destroy(revtm, NULL, 0)); diff --git a/third-party/gasnet/gasnet-src/tests/testteam.c b/third-party/gasnet/gasnet-src/tests/testteam.c index de7a2808aee5..52899387ce28 100644 --- a/third-party/gasnet/gasnet-src/tests/testteam.c +++ b/third-party/gasnet/gasnet-src/tests/testteam.c @@ -70,10 +70,7 @@ int main(int argc, char **argv) uint8_t *addr = TEST_MYSEG(); uintptr_t size = TEST_SEGSZ / 2; - assert_always(size >= gex_TM_Split(&my_row_tm, myteam, my_row, my_col, 0, 0, - GEX_FLAG_TM_SCRATCH_SIZE_MIN)); - assert_always(size >= gex_TM_Split(&my_col_tm, myteam, my_col, my_row, 0, 0, - GEX_FLAG_TM_SCRATCH_SIZE_MIN)); + assert_always(size >= 4096); // some non-trivial (non-zero) size teamA_scratch.addr = addr; teamA_scratch.size = size; diff --git a/third-party/gasnet/gasnet-src/tests/testteambcast.c b/third-party/gasnet/gasnet-src/tests/testteambcast.c index f1fbf79d7319..36132546cd10 100644 --- a/third-party/gasnet/gasnet-src/tests/testteambcast.c +++ b/third-party/gasnet/gasnet-src/tests/testteambcast.c @@ -81,10 +81,7 @@ int main(int argc, char **argv) uint8_t *addr = A; uintptr_t size = SCRATCH_SIZE / 2; - assert_always(size >= gex_TM_Split(&my_row_tm, myteam, my_row, my_col, 0, 0, - GEX_FLAG_TM_SCRATCH_SIZE_MIN)); - assert_always(size >= gex_TM_Split(&my_col_tm, myteam, my_col, my_row, 0, 0, - GEX_FLAG_TM_SCRATCH_SIZE_MIN)); + assert_always(size >= 4096); // some non-trivial (non-zero) size teamA_scratch.addr = addr; teamA_scratch.size = size; diff --git a/third-party/gasnet/gasnet-src/tests/testtmpair.c b/third-party/gasnet/gasnet-src/tests/testtmpair.c new file mode 100644 index 000000000000..f0517e4743ac --- /dev/null +++ b/third-party/gasnet/gasnet-src/tests/testtmpair.c @@ -0,0 +1,521 @@ +/* $Source: bitbucket.org:berkeleylab/gasnet.git/tests/testtmpair.c $ + * Copyright (c) 2020, The Regents of the University of California + * + * Description: Test of gex_TM_Pair() for communication initiation + */ + +#include +#include + +struct test_segment { + gex_Rank_t local[6]; // Local in-segment temporaries + + gex_Rank_t get_src; // Source of various Gets + gex_Rank_t put_dst[6]; // Destinations of various Puts x6 + + gex_Rank_t fp_req_dst; // FPAM LongRequest dst + gex_Rank_t fp_rep_dst; // FPAM LongReply dst + gex_Rank_t np_req_dst; // NPAM LongRequest dst + gex_Rank_t np_rep_dst; // NPAM LongReply dst + + gex_Rank_t vis_src; // VIS Gets + gex_Rank_t vis_dst[6]; // VIS Puts x6 +}; + +#ifndef TEST_SEGSZ +#define TEST_SEGSZ_EXPR sizeof(struct test_segment) +#endif + +#include + +#if 1 + // To count failures, not reporting detail + #define CHECK(cond) failed += !(cond) +#else + // To stop at first failure + #define CHECK(cond) assert(cond) +#endif + +// ------------------------------------------------------------------------------------ + +static gex_Client_t myclient; +static gex_Segment_t mysegment; +static gex_EP_t myep; +static gex_TM_t myteam; +static gex_Rank_t myrank, nranks; +static gex_Rank_t next, prev; + +// ------------------------------------------------------------------------------------ + +static gex_Rank_t *fp_reply_dst; +static gex_Rank_t *np_reply_dst; + +enum { + hidx_short_ping = GEX_AM_INDEX_BASE, + hidx_short_pong, + hidx_med_ping, + hidx_med_pong, + hidx_long_ping, + hidx_long_pong, + hidx_npmed_ping, + hidx_npmed_pong, + hidx_nplong_ping, + hidx_nplong_pong, +}; + +#define HANDLER_DECLS(hname) \ + static gasnett_atomic_t hname##_cntr = gasnett_atomic_init(0); \ + static gex_Rank_t hname##_from = GEX_RANK_INVALID; + +#define HANDLER_COMMON(hname) do { \ + gex_Token_Info_t info; \ + gex_TI_t rc = gex_Token_Info(token, &info, GEX_TI_SRCRANK); \ + hname##_from = info.gex_srcrank; \ + gasnett_atomic_increment(&hname##_cntr, GASNETT_ATOMIC_REL); \ +} while (0) + +HANDLER_DECLS(short_ping) +static void short_ping_handler(gex_Token_t token) { + HANDLER_COMMON(short_ping); + gex_AM_ReplyShort0(token, hidx_short_pong, 0); +} + +HANDLER_DECLS(short_pong) +static void short_pong_handler(gex_Token_t token) { + HANDLER_COMMON(short_pong); +} + +HANDLER_DECLS(med_ping) +static void med_ping_handler(gex_Token_t token, void *buf, size_t nbytes) { + HANDLER_COMMON(med_ping); + gex_AM_ReplyMedium0(token, hidx_med_pong, buf, nbytes, GEX_EVENT_NOW, 0); + assert(myrank == *(gex_Rank_t*)buf); +} + +HANDLER_DECLS(med_pong) +static void med_pong_handler(gex_Token_t token, void *buf, size_t nbytes) { + HANDLER_COMMON(med_pong); + assert(next == *(gex_Rank_t*)buf); +} + +HANDLER_DECLS(long_ping) +static void long_ping_handler(gex_Token_t token, void *buf, size_t nbytes) { + HANDLER_COMMON(long_ping); + gex_AM_ReplyLong0(token, hidx_long_pong, buf, nbytes, fp_reply_dst, GEX_EVENT_NOW, 0); + assert(myrank == *(gex_Rank_t*)buf); +} + +HANDLER_DECLS(long_pong) +static void long_pong_handler(gex_Token_t token, void *buf, size_t nbytes) { + HANDLER_COMMON(long_pong); + assert(next == *(gex_Rank_t*)buf); +} + +HANDLER_DECLS(npmed_ping) +static void npmed_ping_handler(gex_Token_t token, void *buf, size_t nbytes) { + HANDLER_COMMON(npmed_ping); + // Not bothering w/ the complexity of NPAM here since token is indep of TM-Pair + gex_AM_ReplyMedium0(token, hidx_npmed_pong, buf, nbytes, GEX_EVENT_NOW, 0); + assert(myrank == *(gex_Rank_t*)buf); +} + +HANDLER_DECLS(npmed_pong) +static void npmed_pong_handler(gex_Token_t token, void *buf, size_t nbytes) { + HANDLER_COMMON(npmed_pong); + assert(next == *(gex_Rank_t*)buf); +} + +HANDLER_DECLS(nplong_ping) +static void nplong_ping_handler(gex_Token_t token, void *buf, size_t nbytes) { + HANDLER_COMMON(nplong_ping); + // Not bothering w/ the complexity of NPAM here since token is indep of TM-Pair + gex_AM_ReplyLong0(token, hidx_nplong_pong, buf, nbytes, np_reply_dst, GEX_EVENT_NOW, 0); + assert(myrank == *(gex_Rank_t*)buf); +} + +HANDLER_DECLS(nplong_pong) +static void nplong_pong_handler(gex_Token_t token, void *buf, size_t nbytes) { + HANDLER_COMMON(nplong_pong); + assert(next == *(gex_Rank_t*)buf); +} + +// handler table +gex_AM_Entry_t htable[] = { + { hidx_short_ping, short_ping_handler, GEX_FLAG_AM_REQUEST|GEX_FLAG_AM_SHORT, 0 }, + { hidx_short_pong, short_pong_handler, GEX_FLAG_AM_REPLY |GEX_FLAG_AM_SHORT, 0 }, + { hidx_med_ping, med_ping_handler, GEX_FLAG_AM_REQUEST|GEX_FLAG_AM_MEDIUM, 0 }, + { hidx_med_pong, med_pong_handler, GEX_FLAG_AM_REPLY |GEX_FLAG_AM_MEDIUM, 0 }, + { hidx_long_ping, long_ping_handler, GEX_FLAG_AM_REQUEST|GEX_FLAG_AM_LONG, 0 }, + { hidx_long_pong, long_pong_handler, GEX_FLAG_AM_REPLY |GEX_FLAG_AM_LONG, 0 }, + { hidx_npmed_ping, npmed_ping_handler, GEX_FLAG_AM_REQUEST|GEX_FLAG_AM_MEDIUM, 0 }, + { hidx_npmed_pong, npmed_pong_handler, GEX_FLAG_AM_REPLY |GEX_FLAG_AM_MEDIUM, 0 }, + { hidx_nplong_ping, nplong_ping_handler, GEX_FLAG_AM_REQUEST|GEX_FLAG_AM_LONG, 0 }, + { hidx_nplong_pong, nplong_pong_handler, GEX_FLAG_AM_REPLY |GEX_FLAG_AM_LONG, 0 } + }; +#define HANDLER_TABLE_SIZE (sizeof(htable)/sizeof(gex_AM_Entry_t)) + +// ------------------------------------------------------------------------------------ + +int main(int argc, char **argv) +{ + GASNET_Safe(gex_Client_Init(&myclient, &myep, &myteam, "testtmpair", &argc, &argv, 0)); + + // TODO: control over how many EPs and how they are paired (loc,rem) + test_init("testpair", 0, "(test_sections)"); + if (argc > 1) TEST_SECTION_PARSE(argv[1]); + if (argc > 2) test_usage(); + + myrank = gex_TM_QueryRank(myteam); + nranks = gex_TM_QuerySize(myteam); + + GASNET_Safe(gex_Segment_Attach(&mysegment, myteam, TEST_SEGSZ)); + GASNET_Safe(gex_EP_RegisterHandlers(myep, htable, HANDLER_TABLE_SIZE)); + + next = (myrank + 1) % nranks; + prev = (myrank + nranks - 1) % nranks; + + const int num_eps = MIN(4, GASNET_MAXEPS); // TODO: command line arg to control this + gex_EP_t *eps = test_malloc(num_eps * sizeof(gex_EP_t)); + gex_Segment_t *segs = test_malloc(num_eps * sizeof(gex_Segment_t)); + gex_EP_Capabilities_t ep_caps = 0; + eps[0] = myep; + segs[0] = mysegment; + { + // Limit capabilities to those required by the enabled tests + for (gex_EP_Index_t idx = 0; idx < num_eps; ++idx) { + if (TEST_SECTION_BEGIN_ENABLED()) ep_caps |= GEX_EP_CAPABILITY_RMA; // Get + if (TEST_SECTION_BEGIN_ENABLED()) ep_caps |= GEX_EP_CAPABILITY_RMA; // Put + if (TEST_SECTION_BEGIN_ENABLED()) ep_caps |= GEX_EP_CAPABILITY_AM; // AM + if (TEST_SECTION_BEGIN_ENABLED()) ep_caps |= GEX_EP_CAPABILITY_VIS; // VIS + } + test_section = '\0'; + + // Limit capabilities to those currently implementd by the current conduit + #if GASNET_CONDUIT_IBV + ep_caps &= GEX_EP_CAPABILITY_RMA; + #elif GASNET_MAXEPS > 1 + MSG0("Update required in testtmpair.c for conduit-specific capabilities."); + #endif + + for (gex_EP_Index_t idx = 1; idx < num_eps; ++idx) { + GASNET_Safe(gex_EP_Create(eps+idx, myclient, ep_caps, 0)); + } + + for (gex_EP_Index_t idx = 1; idx < num_eps; ++idx) { + GASNET_Safe(gex_Segment_Create(segs+idx, myclient, NULL, TEST_SEGSZ_EXPR, GEX_MK_HOST, 0)); + gex_EP_BindSegment(eps[idx], segs[idx], 0); + } + gex_EP_PublishBoundSegment(myteam, eps+1, num_eps-1, 0); + } + + // Paranoia to prevent accidental use: + myteam = GEX_TM_INVALID; + + for (int iter = 0; iter < num_eps; ++iter) { + gex_EP_Index_t loc_idx, rem_idx; + + // TODO: "mix it up", such that non-equal local and remote indices communicate. + // However, that requires addition thought to managing two local segments if we + // are to continue using in-segment local addresses. + loc_idx = rem_idx = iter; + + gex_EP_Capabilities_t test_caps = (loc_idx || rem_idx) ? ep_caps : GEX_EP_CAPABILITY_ALL; + + gex_TM_t pair = gex_TM_Pair(eps[loc_idx], rem_idx); + + const size_t rank_sz = sizeof(gex_Rank_t); + + struct test_segment *loc_seg; + if (!loc_idx) { + // Required for GASNET_SEGMENT_EVERYTHING, buy always correct + loc_seg = (struct test_segment *) TEST_MYSEG(); + } else { + loc_seg = gex_Segment_QueryAddr(segs[loc_idx]); + } + + struct test_segment *rem_seg; + struct test_segment *prev_seg; + if (!rem_idx) { + // Required for GASNET_SEGMENT_EVERYTHING, buy always correct + rem_seg = (struct test_segment *) TEST_SEG(next); + prev_seg = (struct test_segment *) TEST_SEG(prev); + } else { + gex_Event_Wait( gex_EP_QueryBoundSegmentNB(pair, next, (void**)&rem_seg, NULL, NULL, 0) ); + gex_Event_Wait( gex_EP_QueryBoundSegmentNB(pair, prev, (void**)&prev_seg, NULL, NULL, 0) ); + } + + loc_seg->get_src = myrank; + for (int i = 0; i < 6; ++i) { + loc_seg->put_dst[i] = GEX_RANK_INVALID; + } + loc_seg->fp_req_dst = GEX_RANK_INVALID; + loc_seg->fp_req_dst = GEX_RANK_INVALID; + loc_seg->np_rep_dst = GEX_RANK_INVALID; + loc_seg->np_rep_dst = GEX_RANK_INVALID; + loc_seg->vis_src = prev; + for (int i = 0; i < 6; ++i) { + loc_seg->vis_dst[i] = GEX_RANK_INVALID; + } + + // Cannot use QueryBound in handler context + fp_reply_dst = &prev_seg->fp_rep_dst; + np_reply_dst = &prev_seg->np_rep_dst; + + BARRIER(); + + // RMA Get tests + if (!TEST_SECTION_BEGIN_ENABLED()) { + // Nothing to do + } else if (! (test_caps & GEX_EP_CAPABILITY_RMA)) { + MSG0("%c: Skipping RMA Get tests for pair (%d,%d) - RMA not yet supported for non-primordial EPs", + TEST_SECTION_NAME(), loc_idx, rem_idx); + } else { + MSG0("%c: Starting RMA Get tests for pair (%d,%d)", + TEST_SECTION_NAME(), loc_idx, rem_idx); + + int rc; + gex_Event_t ev; + gex_Rank_t *dst_array = loc_seg->local; + gex_Rank_t *src_addr = &rem_seg->get_src; + + for (int i = 0; i < 4; ++i) { + dst_array[i] = GEX_RANK_INVALID; + } + gex_Rank_t *dst = &dst_array[0]; + + ev = gex_RMA_GetNB(pair, dst++, next, src_addr, rank_sz, 0); + rc = gex_RMA_GetNBI(pair, dst++, next, src_addr, rank_sz, 0); + assert(!rc); + rc = gex_RMA_GetBlocking(pair, dst++, next, src_addr, rank_sz, 0); + assert(!rc); + *(dst++) = gex_RMA_GetBlockingVal(pair, next, src_addr, rank_sz, 0); + + gex_Event_Wait(ev); + gex_NBI_Wait(GEX_EC_GET, 0); + + int failed = 0; + for (int i = 0; i < 4; ++i) { + CHECK(dst_array[i] == next); + } + + if (failed) { + ERR("Tests of Get APIs FAILED %d test(s).", failed); + } + } + + // RMA Put tests + if (!TEST_SECTION_BEGIN_ENABLED()) { + // Nothing to do + } else if (! (test_caps & GEX_EP_CAPABILITY_RMA)) { + MSG0("%c: Skipping RMA Put tests for pair (%d,%d) - RMA not yet supported for non-primordial EPs", + TEST_SECTION_NAME(), loc_idx, rem_idx); + } else { + MSG0("%c: Starting RMA Put tests for pair (%d,%d)", + TEST_SECTION_NAME(), loc_idx, rem_idx); + + int rc; + gex_Event_t ev[2]; + gex_Rank_t *dst = &rem_seg->put_dst[0]; + gex_Rank_t *src = &loc_seg->local[0]; + *src = next; + + ev[0] = gex_RMA_PutNB(pair, next, dst++, src, rank_sz, GEX_EVENT_NOW, 0); + ev[1] = gex_RMA_PutNBVal(pair, next, dst++, next, rank_sz, 0); + rc = gex_RMA_PutNBI(pair, next, dst++, src, rank_sz, GEX_EVENT_NOW, 0); + assert(!rc); + rc = gex_RMA_PutNBIVal(pair, next, dst++, next, rank_sz, 0); + assert(!rc); + rc = gex_RMA_PutBlocking(pair, next, dst++, src, rank_sz, 0); + assert(!rc); + rc = gex_RMA_PutBlockingVal(pair, next, dst++, next, rank_sz, 0); + assert(!rc); + + gex_Event_WaitAll(ev, sizeof(ev)/sizeof(ev[0]), 0); + gex_NBI_Wait(GEX_EC_PUT, 0); + + BARRIER(); + + int failed = 0; + for (int i = 0; i < 6; ++i) { + CHECK(loc_seg->put_dst[i] == myrank); + loc_seg->put_dst[i] = myrank; // prevent cascading failure, such as in VIS + } + + if (failed) { + ERR("Tests of Put APIs FAILED %d test(s).", failed); + } + } + + // TODO: Long tests probably don't use RMA on conduits w/ "packed long" + if (!TEST_SECTION_BEGIN_ENABLED()) { // AM Tests + // Nothing to do + } else if (! (test_caps & GEX_EP_CAPABILITY_AM)) { + MSG0("%c: Skipping AM Request tests for pair (%d,%d) - AMs not yet supported for non-primordial EPs", + TEST_SECTION_NAME(), loc_idx, rem_idx); + } else { + MSG0("%c: Starting AM Request tests for pair (%d,%d)", + TEST_SECTION_NAME(), loc_idx, rem_idx); + + static gasnett_atomic_val_t cntr_target = 0; + + // Payload limit queries + size_t maxmedreq = gex_AM_MaxRequestMedium(pair,next,GEX_EVENT_NOW,0,0); + assert_always(maxmedreq >= gex_AM_LUBRequestMedium()); + size_t maxmedrep = gex_AM_MaxReplyMedium (pair,next,GEX_EVENT_NOW,0,0); + assert_always(maxmedrep >= gex_AM_LUBReplyMedium()); + size_t maxlongreq = gex_AM_MaxRequestLong (pair,next,GEX_EVENT_NOW,0,0); + assert_always(maxlongreq >= gex_AM_LUBRequestLong()); + size_t maxlongrep = gex_AM_MaxReplyLong (pair,next,GEX_EVENT_NOW,0,0); + assert_always(maxlongrep >= gex_AM_LUBReplyLong()); + + // FPAM injection + gex_Rank_t *fp_dst = &rem_seg->fp_req_dst; + gex_AM_RequestShort0 (pair, next, hidx_short_ping, 0); + gex_AM_RequestMedium0(pair, next, hidx_med_ping, + &next, sizeof(gex_Rank_t), GEX_EVENT_NOW, 0); + gex_AM_RequestLong0 (pair, next, hidx_long_ping, + &next, sizeof(gex_Rank_t), fp_dst, GEX_EVENT_NOW, 0); + + // NPAM injection + gex_Rank_t *np_dst = &rem_seg->np_req_dst; + gex_AM_SrcDesc_t sd; + sd = gex_AM_PrepareRequestMedium(pair, next, &next, rank_sz, rank_sz, GEX_EVENT_NOW, 1, 0); + gex_AM_CommitRequestMedium0(sd, hidx_npmed_ping, rank_sz); + sd = gex_AM_PrepareRequestLong(pair, next, &next, rank_sz, rank_sz, NULL, GEX_EVENT_NOW, 0, 0); + gex_AM_CommitRequestLong0 (sd, hidx_nplong_ping, rank_sz, np_dst); + + cntr_target += 1; + GASNET_BLOCKUNTIL((cntr_target <= gasnett_atomic_read(&short_ping_cntr, 0)) && + (cntr_target <= gasnett_atomic_read(&short_pong_cntr, 0)) && + (cntr_target <= gasnett_atomic_read(&med_ping_cntr, 0)) && + (cntr_target <= gasnett_atomic_read(&med_pong_cntr, 0)) && + (cntr_target <= gasnett_atomic_read(&long_ping_cntr, 0)) && + (cntr_target <= gasnett_atomic_read(&long_pong_cntr, 0)) && + (cntr_target <= gasnett_atomic_read(&npmed_ping_cntr, 0)) && + (cntr_target <= gasnett_atomic_read(&npmed_pong_cntr, 0)) && + (cntr_target <= gasnett_atomic_read(&nplong_ping_cntr, 0)) && + (cntr_target <= gasnett_atomic_read(&nplong_pong_cntr, 0))); + // Note the ACQUIRE semantics of BLOCKUNTIL + + int failed = 0; + + CHECK(cntr_target == gasnett_atomic_read(&short_ping_cntr,0)); + CHECK(short_ping_from == prev); + + CHECK(cntr_target == gasnett_atomic_read(&short_pong_cntr,0)); + CHECK(short_pong_from == next); + + CHECK(cntr_target == gasnett_atomic_read(&med_ping_cntr,0)); + CHECK(med_ping_from == prev); + + CHECK(cntr_target == gasnett_atomic_read(&med_pong_cntr,0)); + CHECK(med_pong_from == next); + + CHECK(cntr_target == gasnett_atomic_read(&long_ping_cntr,0)); + CHECK(long_ping_from == prev); + CHECK(loc_seg->fp_req_dst == myrank); + + CHECK(cntr_target == gasnett_atomic_read(&long_pong_cntr,0)); + CHECK(long_pong_from == next); + CHECK(loc_seg->fp_rep_dst == next); + + CHECK(cntr_target == gasnett_atomic_read(&npmed_ping_cntr,0)); + CHECK(npmed_ping_from == prev); + + CHECK(cntr_target == gasnett_atomic_read(&npmed_pong_cntr,0)); + CHECK(npmed_pong_from == next); + + CHECK(cntr_target == gasnett_atomic_read(&nplong_ping_cntr,0)); + CHECK(nplong_ping_from == prev); + CHECK(loc_seg->np_req_dst == myrank); + + CHECK(cntr_target == gasnett_atomic_read(&nplong_pong_cntr,0)); + CHECK(nplong_pong_from == next); + CHECK(loc_seg->np_rep_dst == next); + + if (failed) { + ERR("Tests of AM APIs FAILED %d test(s).", failed); + } + } + + if (!TEST_SECTION_BEGIN_ENABLED()) { // VIS Indexed Tests + // Nothing to do + } else if (! (test_caps & GEX_EP_CAPABILITY_VIS)) { + MSG0("%c: Skipping VIS Indxed tests for pair (%d,%d) - VIS not yet supported for non-primordial EPs", + TEST_SECTION_NAME(), loc_idx, rem_idx); + } else { + MSG0("%c: Starting VIS Indexed tests for pair (%d,%d)", + TEST_SECTION_NAME(), loc_idx, rem_idx); + + // gex_VIS_{Vector,Indexed,Strided}{Put,Get}{NB,NBI,Blocking} == 18 entry points + // TODO: currently we cover only Indexed, which we *hope* is representative + + gex_Rank_t S[2]; S[0] = myrank; S[1] = next; + gex_Rank_t D[6] = { GEX_RANK_INVALID, GEX_RANK_INVALID, GEX_RANK_INVALID, + GEX_RANK_INVALID, GEX_RANK_INVALID, GEX_RANK_INVALID}; + + void *loc_src_list[2]; + loc_src_list[0] = S+0; + loc_src_list[1] = S+1; + void *rem_src_list[2]; + rem_src_list[0] = &rem_seg->get_src; + rem_src_list[1] = &rem_seg->vis_src; + + int dst_idx[] = {1,0,2,3,5,4}; + void *loc_dst_list[6]; + void *rem_dst_list[6]; + for (int i = 0; i < 6; ++i) { + loc_dst_list[i] = dst_idx[i] + D; + rem_dst_list[i] = dst_idx[i] + rem_seg->vis_dst; + } + + gex_Event_t puti_ev = + gex_VIS_IndexedPutNB(pair, next, 2, rem_dst_list+0, rank_sz, + 2, loc_src_list, rank_sz, 0); + gex_VIS_IndexedPutNBI(pair, next, 2, rem_dst_list+2, rank_sz, + 2, loc_src_list, rank_sz, 0); + gex_VIS_IndexedPutBlocking(pair, next, 2, rem_dst_list+4, rank_sz, + 2, loc_src_list, rank_sz, 0); + + + gex_Event_t geti_ev = + gex_VIS_IndexedGetNB(pair, 2, loc_dst_list+0, rank_sz, + next, 2, rem_src_list, rank_sz, 0); + gex_VIS_IndexedGetNBI(pair, 2, loc_dst_list+2, rank_sz, + next, 2, rem_src_list, rank_sz, 0); + gex_VIS_IndexedGetBlocking(pair, 2, loc_dst_list+4, rank_sz, + next, 2, rem_src_list, rank_sz, 0); + + gex_Event_Wait(puti_ev); + gex_Event_Wait(geti_ev); + gex_NBI_Wait(GEX_EC_GET | GEX_EC_PUT, 0); + + BARRIER(); + + int failed = 0; + + CHECK(loc_seg->vis_dst[0] == myrank); + CHECK(loc_seg->vis_dst[1] == prev); + CHECK(loc_seg->vis_dst[2] == prev); + CHECK(loc_seg->vis_dst[3] == myrank); + CHECK(loc_seg->vis_dst[4] == myrank); + CHECK(loc_seg->vis_dst[5] == prev); + CHECK(D[0] == myrank); + CHECK(D[1] == next); + CHECK(D[2] == next); + CHECK(D[3] == myrank); + CHECK(D[4] == myrank); + CHECK(D[5] == next); + + if (failed) { + ERR("Tests of VIS APIs FAILED %d test(s).", failed); + } + } + } + + BARRIER(); + MSG("done."); + + gasnet_exit(0); + return 0; +} diff --git a/third-party/gasnet/gasnet-src/tests/testvis.c b/third-party/gasnet/gasnet-src/tests/testvis.c index b8c216b8d14c..d36b42fd1156 100644 --- a/third-party/gasnet/gasnet-src/tests/testvis.c +++ b/third-party/gasnet/gasnet-src/tests/testvis.c @@ -18,8 +18,15 @@ uintptr_t segsz = (16*1024*1024); static gex_Client_t myclient; static gex_EP_t myep; static gex_TM_t myteam; +static gex_TM_t team0; +static gex_Rank_t ranks, jobrank; static gex_Segment_t mysegment; +#ifndef RENUMBER_TEAM +// can be defined to non-zero to use a renumbered team for communication (bug 4145) +#define RENUMBER_TEAM 0 +#endif + #include /* VEC_SZ sets the size/offset alignment of all data accesses @@ -1065,11 +1072,11 @@ test_pcheader_t *rand_pcheader(size_t *packetsz) { memset(pcheader, (uint8_t)sz, sz); } else { pcheader->packetsz = sz; - pcheader->srcjobrank = mynode; + pcheader->srcjobrank = jobrank; uint8_t *payload = (uint8_t*)(pcheader+1); size_t payloadsz = sz-sizeof(test_pcheader_t); for (size_t i=0; i < payloadsz; i++) { - payload[i] = PC_VALUE(mynode, i); + payload[i] = PC_VALUE(jobrank, i); } } return pcheader; @@ -1110,8 +1117,7 @@ gex_AM_Entry_t pcverify_reph_entry = 0, (void *)&pcarrival_cnt, "testvis_pcverify_reph" }; void pcverify_reph(gex_Token_t token, void *buf, size_t nbytes) { gasnett_atomic_increment(&pcarrival_cnt,0); - gex_Rank_t nranks = gex_TM_QuerySize(myteam); - gex_Rank_t mysender = (mynode + nranks - 1) % nranks; + gex_Rank_t mysender = (jobrank + ranks - 1) % ranks; // check all token properties are legit gex_Token_Info_t info; @@ -1846,7 +1852,7 @@ int main(int argc, char **argv) { int i; assert_always(VEC_SZ == sizeof(VEC_T)); - GASNET_Safe(gex_Client_Init(&myclient, &myep, &myteam, "testvis", &argc, &argv, 0)); + GASNET_Safe(gex_Client_Init(&myclient, &myep, &team0, "testvis", &argc, &argv, 0)); test_init_early("testvis",0, "[options] (iters) (seed)\n" " -v/-i/-s/-x/-n run vector/indexed/strided/transpositional/non-blocking tests (defaults to all)\n" " -d disable correctness verification checks\n" @@ -1903,13 +1909,23 @@ int main(int argc, char **argv) { if (i < argc) { iters = atoi(argv[i]); i++; } if (i < argc) { seedoffset = atoi(argv[i]); i++; } if (i < argc) test_usage_early(); - GASNET_Safe(gex_Segment_Attach(&mysegment, myteam, TEST_SEGSZ_REQUEST)); + GASNET_Safe(gex_Segment_Attach(&mysegment, team0, TEST_SEGSZ_REQUEST)); + + jobrank = gex_TM_QueryRank(team0); + ranks = gex_TM_QuerySize(team0); + + #if RENUMBER_TEAM + // use a non-primordial team that rotates all the rank numbers by RENUMBER_TEAM + gex_TM_Split(&myteam, team0, 0, (jobrank+RENUMBER_TEAM)%ranks, 0, 0, GEX_FLAG_TM_NO_SCRATCH); + #else + myteam = team0; + #endif areasz = TEST_SEGSZ/NUM_AREAS/VEC_SZ; /* in elem */ mynode = gex_TM_QueryRank(myteam); - myseg = TEST_SEG(mynode); - partner = (mynode + 1) % gex_TM_QuerySize(myteam); - partnerseg = TEST_SEG(partner); + myseg = TEST_SEG(jobrank); + partner = (mynode + 1) % ranks; + partnerseg = TEST_SEG((partner+ranks-(RENUMBER_TEAM%ranks))%ranks); heapseg = (VEC_T *)test_malloc(TEST_SEGSZ); assert_always(gex_EP_RegisterHandlers(myep, &pcverify_reph_entry, 1) == GASNET_OK); @@ -1937,6 +1953,10 @@ int main(int argc, char **argv) { doit(iters, runtests); test_free(heapseg); + #if RENUMBER_TEAM + gex_Memvec_t junk; + gex_TM_Destroy(myteam, &junk, GEX_FLAG_GLOBALLY_QUIESCED); + #endif MSG("done."); gasnet_exit(0); diff --git a/third-party/gasnet/gasnet-src/tests/upcr-harness/external-legion/Makefile b/third-party/gasnet/gasnet-src/tests/upcr-harness/external-legion/Makefile index 36389deb683d..e9d61e435863 100644 --- a/third-party/gasnet/gasnet-src/tests/upcr-harness/external-legion/Makefile +++ b/third-party/gasnet/gasnet-src/tests/upcr-harness/external-legion/Makefile @@ -35,9 +35,14 @@ endif RUNTIME_TARGETS = librealm.a liblegion.a SEED_DIR = $(LEGION_BLDDIR)/examples/circuit # NOTE: must not be an OMP or CUDA test +ifeq ($(LEGION_GASNET1),1) + LEGION_NET_ENV = USE_GASNET=1 GASNET="$(LEGION_GASNET_INST)" +else + LEGION_NET_ENV = REALM_NETWORKS=gasnetex GASNET_ROOT="$(LEGION_GASNET_INST)" +endif COMMON_ENV = \ - LG_RT_DIR="$(LEGION_BLDDIR)/runtime" USE_GASNET=1 \ - GASNET="$(LEGION_GASNET_INST)" CONDUIT=$(LEGION_CONDUIT) \ + LG_RT_DIR="$(LEGION_BLDDIR)/runtime" \ + $(LEGION_NET_ENV) CONDUIT=$(LEGION_CONDUIT) \ $(GASNET_VARS) DO_MAKE = env $(COMMON_ENV) $(LEGION_TEST_ENV) $(MAKE) $(LEGION_MAKE_ARGS) diff --git a/third-party/gasnet/gasnet-src/tests/upcr-harness/external-legion/harness.conf b/third-party/gasnet/gasnet-src/tests/upcr-harness/external-legion/harness.conf index 729b6c8b6e09..7ee9f63f4ec1 100644 --- a/third-party/gasnet/gasnet-src/tests/upcr-harness/external-legion/harness.conf +++ b/third-party/gasnet/gasnet-src/tests/upcr-harness/external-legion/harness.conf @@ -18,10 +18,9 @@ AppArgs: AppEnv: network_udp; GASNET_QUIET=1 TimeLimit: $DEFAULT$ RequireFeature: gasnet_has_par,legion -# debug: HSL issues # segment_everything: heap management # hack_for_nonupc_test: mark this suite as non-UPC -ProhibitFeature: debug,segment_everything,hack_for_nonupc_test +ProhibitFeature: segment_everything,hack_for_nonupc_test RunCmd: ./launcher -np %N %P %A WarningFilter: os_cnl ; .*?warning: Using .dlopen. in statically linked applications.*? WarningFilter: cc_intel ; .*?warning .*?: option .-Wno-strict-overflow. not supported.*? @@ -52,6 +51,8 @@ PassExpr: Test completed. TestName: realm_saxpy WarningFilter: cc_clang ; .*?warning: .*?Wenum-compare-switch.*? +# Output is version dependent +PassExpr: (SUCCESS!|success -) TestName: spmd_cgsolver AppArgs: -ll:cpu 4 diff --git a/third-party/gasnet/gasnet-src/tests/upcr-harness/external-upcxx/Makefile b/third-party/gasnet/gasnet-src/tests/upcr-harness/external-upcxx/Makefile index a1eb4fb3133e..36345495382a 100644 --- a/third-party/gasnet/gasnet-src/tests/upcr-harness/external-upcxx/Makefile +++ b/third-party/gasnet/gasnet-src/tests/upcr-harness/external-upcxx/Makefile @@ -5,7 +5,7 @@ UPCXX_EXTRAS_GIT_COMMIT ?= develop UPCXX_BLDDIR ?= $(HARNESS_WORKDIR)/upcxx UPCXX_INSTDIR ?= $(HARNESS_WORKDIR)/upcxx-inst UPCXX_TMPDIR ?= $(HARNESS_WORKDIR)/upcxx-tmp -UPCXX_CONDUIT ?= $(NETWORK) # Default to same conduit as the enclosing harness run +UPCXX_CONDUIT ?= $(NETWORK)# Default to same conduit as the enclosing harness run UPCXX_EXTRA_FLAGS ?=# extra flags to add to both compilers for lib and tests, may be set by pushbuild/end-user UPCXX_EXTRA_TEST_FLAGS ?=# extra flags to add to tests (but not lib), may be set by pushbuild/end-user UPCXX_TEST_FLAGS ?=# per-test flags reserved for use by harness.conf @@ -13,6 +13,11 @@ UPCXX_CONFIGURE_EXTRA ?=# extra flags to add to UPC++ configure command line UPCXX_MAKE_J ?= -j4 # concurrency of GNU Make steps UPCXX_FC ?= $(shell which gfortran) +# ensure all child invocations see the same conduit defined by UPCXX_CONDUIT, +# even if the enclosing environment contains conflicting values +export UPCXX_NETWORK := $(UPCXX_CONDUIT) +export UPCXX_GASNET_CONDUIT := $(UPCXX_CONDUIT) + # Options for providing GASNET: # 1. set UPCXX_GASNET to an existing build path to use. # The remaining options assume $(TOP_BUILDDIR)/gasnet is a valid build dir @@ -285,6 +290,13 @@ wrap_test: upcxx-install @set -x ; $(DO_ENV) && \ $(UPCXX_INSTDIR)/bin/upcxx -o $(TEST_EXE) $(TEST_PATH) $(UPCXX_EXTRA_TEST_FLAGS) $(UPCXX_TEST_FLAGS) +# Build a single test using the dev-tests target in the UPC++ Makefile (only valid for in-repo tests) +make_test: upcxx-install + @set -x ; $(DO_ENV) && \ + $(MAKE) -C $(UPCXX_BLDDIR) $(UPCXX_MAKE_J) dev-tests-$(GASNET_CODEMODE) NETWORKS=$(UPCXX_CONDUIT) \ + TESTS=^test-$(TEST_EXE) EXTRAFLAGS="$(UPCXX_EXTRA_TEST_FLAGS) $(UPCXX_TEST_FLAGS)" + @mv $(UPCXX_BLDDIR)/test-$(TEST_EXE)-$(GASNET_CODEMODE)-$(UPCXX_CONDUIT) $(TEST_EXE) + IGNORE_TESTS_PAT=neg-.+|getenv|promise_multiple_results|promise_reused|quiescence_failure|issue105 check-testsuite: $(MAKE) -C $(UPCXX_BLDDIR) $(UPCXX_MAKE_J) dev-tests-$(GASNET_CODEMODE) UPCXX_DRY_RUN=1 NETWORKS=$(UPCXX_CONDUIT) > check-testsuite.log @@ -317,6 +329,8 @@ VPATH = \ $(MAKE) inst_test TEST_PATH=$< TEST_EXE=$@ ; \ elif test "$$UPCXX_TESTMODE" = "wrap" ; then set -x ; \ $(MAKE) wrap_test TEST_PATH=$< TEST_EXE=$@ ; \ + elif test "$$UPCXX_TESTMODE" = "make" ; then set -x ; \ + $(MAKE) make_test TEST_EXE=$@ ; \ else \ echo "Unrecognized UPCXX_TESTMODE=$$UPCXX_TESTMODE" ; exit 100; \ fi @@ -327,7 +341,6 @@ guppie-%-seq guppie-%-par :: $(UPCXX_BLDDIR)/extras/examples/gups/upcxx upcxx-in *-par) export UPCXX_THREADMODE=par ;; esac; \ set -x ; env $(MAKE_TEST_ENV) $(MAKE) -C $< \ UPCXX_INSTALL=$(UPCXX_INSTDIR) \ - UPCXX_NETWORK=$(UPCXX_CONDUIT) \ clean guppie-$* mv $ "relocation truncated to fit: R_MIPS_GOT16" @@ -141,6 +156,18 @@ TestName: serialize_large_obj-seq TestName: segment_allocator-seq +TestName: view-seq +MakeFlags: UPCXX_TESTMODE=make + +TestName: lpc_barrier-seq +DynamicThreads: 1 +MakeFlags: UPCXX_TESTMODE=make +#MakeFlags: static_link ; UPCXX_TEST_FLAGS="-Wl,--whole-archive -lpthread -Wl,--no-whole-archive" UPCXX_TESTMODE=make # bug3813 + +TestName: lpc-stress-seq +DynamicThreads: 1 +MakeFlags: UPCXX_TESTMODE=make + TestName: global_ptr-seq # squash erroneous warnings on PGI 19.10(only, issue #284) from upcxx_memberof on non-trivial type (non-trivial default constructor) MakeFlags: cc_pgi ; UPCXX_TEST_FLAGS="--diag_suppress1427" @@ -148,6 +175,8 @@ MakeFlags: cc_pgi ; UPCXX_TEST_FLAGS="--diag_suppress1427" TestName: rpc-ctor-trace-seq MakeFlags: cc_pgi && upcxx_issue_390 && debug ; UPCXX_TEST_FLAGS="-purge-option=-g" +TestName: verbose-ctor-trace-seq + TestName: rpc_source_cx-seq TestName: lpc-ctor-trace-seq @@ -158,6 +187,11 @@ TestName: version-seq TestName: bad-alloc-seq +TestName: bad-segment-alloc-seq +RequireFeature: upcxx_cuda + +TestName: shared-seg-query-seq + TestName: nodiscard-seq MakeFlags: cc_pgi && upcxx_issue_390 && debug ; UPCXX_TEST_FLAGS="-purge-option=-g" #KnownFailure: compile-failure ; cc_pgi && debug ; issue 390 (PGI debug symbol ICE prior to 19.4) @@ -178,13 +212,22 @@ BenchmarkResult: bw=([^,]+),.*?via="rput"(,) KnownFailure: run-crash ; os_cygwin && cc_clang ; issue 188 (bench/nebr_exchange crash on Cygwin/clang) TestName: cuda_microbenchmark-seq -AppArgs: 10 10 +DynamicThreads: 2 # want to avoid 2+ppn which increases PCI traffic and actually runs much slower +TimeLimit: 2*$DEFAULT$ +AppArgs: -t 10 -w 10 -gg -sg -gs -pg # deliberately omit host memory xfers to avoid output line limit AppEnv: UPCXX_SHARED_HEAP_SIZE=1GB BenchmarkResult: message size = 4194304 byte.*?\n\s*Local GPU -> Remote GPU:[^,]+, ([^ ]+) (GB/s) RequireFeature: upcxx_cuda +FileLimit: 128*1024 +#KnownFailure: run-all ; cc_pgi ; issue 421: upcxx::copy unsupported on PGI C++ TestName: misc_perf-seq BenchmarkResult: upcxx::progress\s*:[^s]*s\s*([^ ]+)\s*(us) +MakeFlags: cc_pgi && upcxx_issue_390 && debug ; UPCXX_TEST_FLAGS="-purge-option=-g" + +TestName: rpc_perf-seq +AppArgs: 100 10 1048576 +BenchmarkResult: 0:\s+1\s+[^s]*s\s+([^ ]+)\s*(us) # ------------------------------------------ # Regression suite: @@ -283,9 +326,12 @@ TestName: issue400-seq TestName: issue402-seq TestName: issue405-seq +#KnownFailure: run-all ; cc_pgi ; issue 421: upcxx::copy unsupported on PGI C++ TestName: issue407-seq +TestName: issue408-seq + TestName: issue412-seq TestName: issue412b-seq @@ -294,6 +340,30 @@ TestName: issue413-seq TestName: issue419-seq +TestName: issue421-seq +#KnownFailure: run-all ; cc_pgi ; issue 421: upcxx::copy unsupported on PGI C++ + +TestName: issue421b-seq +#KnownFailure: run-all ; cc_pgi ; issue 421: upcxx::copy unsupported on PGI C++ + +TestName: issue421c-seq + +TestName: issue427-seq + +TestName: issue427b-seq + +TestName: issue428-seq +TimeLimit: 0 + +TestName: issue440-seq + +TestName: issue447-seq + +TestName: issue450-seq +TimeLimit: 0 + +TestName: issue462-seq + TestName: spec-issue104-seq TestName: spec-issue144b-seq @@ -360,11 +430,13 @@ TestName: h-d-seq FailExpr: Failure PassExpr: Success RequireFeature: upcxx_cuda +#KnownFailure: run-all ; cc_pgi ; issue 421: upcxx::copy unsupported on PGI C++ TestName: h-d-remote-seq FailExpr: Failure PassExpr: Success RequireFeature: upcxx_cuda +#KnownFailure: run-all ; cc_pgi ; issue 421: upcxx::copy unsupported on PGI C++ TestName: serial-fields-seq @@ -446,15 +518,18 @@ AppEnv: UPCXX_SHARED_HEAP_SIZE=256MB DynamicThreads: 1, 4, 16 # Cannon alg requires a perfect square rank count BenchmarkResult: Compute:\s*(\S+)\s*(s) ProhibitFeature: missing_cblas || (os_darwin && cc_gnu) # Homebrew gcc cannot parse Apple +#KnownFailure: run-all ; cc_pgi ; issue 421: upcxx::copy unsupported on PGI C++ TestName: cannon_cuda-seq -AppArgs: 2048 +AppArgs: (_threads == 1) ; 512 # reduce matrix size for single-rank, avoid BAR1 limit on Tesla +AppArgs: (_threads > 1) ; 2048 AppEnv: UPCXX_SHARED_HEAP_SIZE=256MB DynamicThreads: 1, 4, 16 # Cannon alg requires a perfect square rank count RequireFeature: upcxx_cuda_compute_30 BenchmarkResult: Compute:\s*(\S+)\s*(s) ProhibitFeature: missing_cblas || missing_cublas || (os_darwin && cc_gnu) # Homebrew gcc cannot parse Apple #KnownFailure: run-match ; (_threads > 4) ; issue 381 (cannon_cuda fails to validate with 16 ranks) +#KnownFailure: run-all ; cc_pgi ; issue 421: upcxx::copy unsupported on PGI C++ # ------------------------------------------ # upcxx-extras dist_array @@ -477,6 +552,7 @@ TestName: DA-scatter-seq TestName: cuda_vecadd-seq RequireFeature: upcxx_cuda_compute_30 MakeFlags: nodebug ; CXXFLAGS=-O3 +#KnownFailure: run-all ; cc_pgi ; issue 421: upcxx::copy unsupported on PGI C++ # ------------------------------------------ # Multi-threaded tests @@ -569,6 +645,8 @@ TestName: rpc_ff_ring-par TestName: rput-par +TestName: rput-cover-par + TestName: rput_rpc_cx-par TestName: rput_thread-par @@ -585,6 +663,8 @@ TestName: completion-par TestName: local_team-par +TestName: memory_kinds-par + TestName: vis-par #KnownFailure: run-crash ; os_darwin && cc_gnu ; issue 127 (Mac+gcc+VIS breaks on lpc operation with seq and par) @@ -596,6 +676,17 @@ FileLimit: 100 + 2200 * $THREADS$ TestName: copy-par FailExpr: WARNING: UPC.. CUDA support +#KnownFailure: run-all ; cc_pgi ; issue 421: upcxx::copy unsupported on PGI C++ + +TestName: copy-cover-par +# **TEMPORARY** +# The following environment settings are needed avoid triggering +# Bug 4148 - ibv/GDR completion issues with multiple communication paths +# Note this relies on mlx5_0 being a valid/desirable HCA port +AppEnv: network_ibv && upcxx_cuda && has_bug_4148; GASNET_IBV_PORTS=mlx5_0 GASNET_SUPERNODE_MAXSIZE=1 +# Similar bug in GDR Puts breaks source completion and this test notices +MakeFlags: network_ibv && upcxx_cuda && has_bug_4148 ; UPCXX_TEST_FLAGS="-DSKIP_KILL" +#KnownFailure: run-all ; cc_pgi ; issue 421: upcxx::copy unsupported on PGI C++ TestName: memberof-par ProhibitFeature: debug && (cpu_mips64 || cpu_mips64el) # too big -> "relocation truncated to fit: R_MIPS_GOT16" @@ -613,10 +704,13 @@ TestName: view-par TestName: lpc_barrier-par DynamicThreads: 1 -MakeFlags: static_link ; UPCXX_TEST_FLAGS="-Wl,--whole-archive -lpthread -Wl,--no-whole-archive" +#MakeFlags: static_link ; UPCXX_TEST_FLAGS="-Wl,--whole-archive -lpthread -Wl,--no-whole-archive" # bug3813 #KnownFailure: run-crash ; os_darwin && cc_gnu ; issue 49 (uts_{threads,hybrid,omp_ranks}/lpc_barrier crash on (High)Sierra w/ g++) #KnownFailure: run-crash ; os_cnl ; Bug 3813 - Crashes using C++11 threads on Cray systems +TestName: lpc-stress-par +DynamicThreads: 1 + TestName: uts_ranks-par TestName: uts_hybrid-par @@ -629,6 +723,8 @@ MakeFlags: cc_pgi ; UPCXX_TEST_FLAGS="--diag_suppress1427" TestName: rpc-ctor-trace-par MakeFlags: cc_pgi && upcxx_issue_390 && debug ; UPCXX_TEST_FLAGS="-purge-option=-g" +TestName: verbose-ctor-trace-par + TestName: rpc_source_cx-par TestName: lpc-ctor-trace-par @@ -639,6 +735,11 @@ TestName: version-par TestName: bad-alloc-par +TestName: bad-segment-alloc-par +RequireFeature: upcxx_cuda + +TestName: shared-seg-query-par + TestName: nodiscard-par MakeFlags: cc_pgi && upcxx_issue_390 && debug ; UPCXX_TEST_FLAGS="-purge-option=-g" #KnownFailure: compile-failure ; cc_pgi && debug ; issue 390 (PGI debug symbol ICE prior to 19.4) @@ -658,13 +759,22 @@ BenchmarkResult: bw=([^,]+),.*?via="rput"(,) KnownFailure: run-crash ; os_cygwin && cc_clang ; issue 188 (bench/nebr_exchange crash on Cygwin/clang) TestName: cuda_microbenchmark-par -AppArgs: 10 10 +DynamicThreads: 2 # want to avoid 2+ppn which increases PCI traffic and actually runs much slower +TimeLimit: 2*$DEFAULT$ +AppArgs: -t 10 -w 10 -gg -sg -gs -pg # deliberately omit host memory xfers to avoid output line limit AppEnv: UPCXX_SHARED_HEAP_SIZE=1GB BenchmarkResult: message size = 4194304 byte.*?\n\s*Local GPU -> Remote GPU:[^,]+, ([^ ]+) (GB/s) RequireFeature: gasnet_has_par,upcxx_cuda +FileLimit: 128*1024 +#KnownFailure: run-all ; cc_pgi ; issue 421: upcxx::copy unsupported on PGI C++ TestName: misc_perf-par BenchmarkResult: upcxx::progress\s*:[^s]*s\s*([^ ]+)\s*(us) +MakeFlags: cc_pgi && upcxx_issue_390 && debug ; UPCXX_TEST_FLAGS="-purge-option=-g" + +TestName: rpc_perf-par +AppArgs: 100 10 1048576 +BenchmarkResult: 0:\s+1\s+[^s]*s\s+([^ ]+)\s*(us) # ------------------------------------------ # Regression suite: @@ -768,9 +878,12 @@ TestName: issue400-par TestName: issue402-par TestName: issue405-par +#KnownFailure: run-all ; cc_pgi ; issue 421: upcxx::copy unsupported on PGI C++ TestName: issue407-par +TestName: issue408-par + TestName: issue412-par TestName: issue412b-par @@ -779,6 +892,34 @@ TestName: issue413-par TestName: issue419-par +TestName: issue421-par +#KnownFailure: run-all ; cc_pgi ; issue 421: upcxx::copy unsupported on PGI C++ + +TestName: issue421b-par +#KnownFailure: run-all ; cc_pgi ; issue 421: upcxx::copy unsupported on PGI C++ + +TestName: issue421c-par + +TestName: issue427-par + +TestName: issue427b-par + +TestName: issue428-par +TimeLimit: 0 + +TestName: issue432-par +RequireFeature: upcxx_cuda +#KnownFailure: run-all ; cc_pgi ; issue 421: upcxx::copy unsupported on PGI C++ + +TestName: issue440-par + +TestName: issue447-par + +TestName: issue450-par +TimeLimit: 0 + +TestName: issue462-par + TestName: spec-issue104-par TestName: spec-issue144b-par @@ -850,7 +991,7 @@ TestName: persona-example-par DynamicThreads: 2 AppArgs: 1000 AppEnv: UPCXX_OVERSUBSCRIBED=1 -KnownFailure: run-time ; upcxx_lpc_inbox_locked ; issue 245 (persona-example deadlocks with UPCXX_LPC_INBOX=locked) +#KnownFailure: run-time ; upcxx_lpc_inbox_locked ; issue 245 (persona-example deadlocks with UPCXX_LPC_INBOX=locked) TestName: persona-example-rputs-par @@ -858,11 +999,13 @@ TestName: h-d-par FailExpr: Failure PassExpr: Success RequireFeature: gasnet_has_par,upcxx_cuda +#KnownFailure: run-all ; cc_pgi ; issue 421: upcxx::copy unsupported on PGI C++ TestName: h-d-remote-par FailExpr: Failure PassExpr: Success RequireFeature: gasnet_has_par,upcxx_cuda +#KnownFailure: run-all ; cc_pgi ; issue 421: upcxx::copy unsupported on PGI C++ TestName: serial-fields-par @@ -941,15 +1084,18 @@ AppEnv: UPCXX_SHARED_HEAP_SIZE=256MB DynamicThreads: 1, 4, 16 # Cannon alg requires a perfect square rank count BenchmarkResult: Compute:\s*(\S+)\s*(s) ProhibitFeature: missing_cblas || (os_darwin && cc_gnu) # Homebrew gcc cannot parse Apple +#KnownFailure: run-all ; cc_pgi ; issue 421: upcxx::copy unsupported on PGI C++ TestName: cannon_cuda-par -AppArgs: 2048 +AppArgs: (_threads == 1) ; 512 # reduce matrix size for single-rank, avoid BAR1 limit on Tesla +AppArgs: (_threads > 1) ; 2048 AppEnv: UPCXX_SHARED_HEAP_SIZE=256MB DynamicThreads: 1, 4, 16 # Cannon alg requires a perfect square rank count RequireFeature: upcxx_cuda_compute_30 BenchmarkResult: Compute:\s*(\S+)\s*(s) ProhibitFeature: missing_cblas || missing_cublas || (os_darwin && cc_gnu) # Homebrew gcc cannot parse Apple #KnownFailure: run-match ; (_threads > 4) ; issue 381 (cannon_cuda fails to validate with 16 ranks) +#KnownFailure: run-all ; cc_pgi ; issue 421: upcxx::copy unsupported on PGI C++ # ------------------------------------------ # upcxx-extras dist_array @@ -974,6 +1120,7 @@ TestName: DA-threads-par TestName: cuda_vecadd-par RequireFeature: gasnet_has_par,upcxx_cuda_compute_30 MakeFlags: nodebug ; CXXFLAGS=-O3 +#KnownFailure: run-all ; cc_pgi ; issue 421: upcxx::copy unsupported on PGI C++ # ------------------------------------------ # OpenMP interop tests @@ -1106,9 +1253,11 @@ END_DEFAULT_CONFIG TestName: jac3d-seq RequireFeature: upcxx_cuda_compute_30 +#KnownFailure: run-all ; cc_pgi ; issue 421: upcxx::copy unsupported on PGI C++ TestName: jac3d-par RequireFeature: gasnet_has_par,upcxx_cuda_compute_30 +#KnownFailure: run-all ; cc_pgi ; issue 421: upcxx::copy unsupported on PGI C++ # ------------------------------------------ # upcxx-extras : extend-add diff --git a/third-party/gasnet/gasnet-src/tests/upcr-harness/gasnet-tests/harness.conf b/third-party/gasnet/gasnet-src/tests/upcr-harness/gasnet-tests/harness.conf index a5ed941c014a..6f4d477b22e7 100644 --- a/third-party/gasnet/gasnet-src/tests/upcr-harness/gasnet-tests/harness.conf +++ b/third-party/gasnet/gasnet-src/tests/upcr-harness/gasnet-tests/harness.conf @@ -442,12 +442,26 @@ ProcPerNode: 2 # many failure modes depend on multiple ppn AppArgs: (network_udp || network_mpi); 50000 AppArgs: network_smp ; 1000000 + +TestName: testsegment-seq +AppArgs: -random-seg + +TestName: testtmpair-seq +#KnownFailure: run-all ; ibv_dynamic_connect ; Bug 4196 - ibv: dynamic connect does not support multiple endpoints + TestName: testreadonly-seq #KnownFailure: run-all ; network_ibv ; Bug 3338 - ibv_reg_mr failure (EFAULT/Bad address) on read-only data #KnownFailure: run-all ; network_ibv && ibv_odp ; Bug 4008 - testreadonly failure with ibv/odp KnownFailure: run-all ; network_ucx ; Bug 4046 - testreadonly failure with ucx KnownFailure: run-all ; network_ibv && os_solaris ; Bug 4009 - testreadonly failure with ibv on Solaris +TestName: testcudauva-seq +# **TEMPORARY** +# The following environment settings are needed avoid triggering +# Bug 4148 - ibv/GDR completion issues with multiple communication paths +# Note this relies on mlx5_0 being a valid/desirable HCA port +AppEnv: network_ibv && has_bug_4148; GASNET_IBV_PORTS=mlx5_0 GASNET_SUPERNODE_MAXSIZE=1 + # minimal test for working on-demand backtrace TestName: testexit_bt-seq AppArgs: 100 @@ -576,6 +590,7 @@ AppEnv: network_ibv && cpu_32 ; GASNET_DISABLE_MUNMAP=1 AppArgs: network_ibv && cpu_32 ; -m # Only Medium, since ref NPAM Long tickles bug 955/3989 # Avoid triggering kernel OOM-killer on 32-bit MIPS Malta AppArgs: (_threads > 1) && malta && cpu_32; 0 524288 +KnownFailure: run-all ; nodebug && cc_sun && os_solaris && cpu_sparc && cpu_32 ; Bug 4207 - testcore2 failure on EX-solaris_sparc32-smp-cc-pshm-opt TestName: testcore3-par DynamicThreads: 1,$DEFAULT$ # test loopback and parallel @@ -768,12 +783,25 @@ ProcPerNode: 2 # many failure modes depend on multiple ppn AppArgs: (network_udp || network_mpi); 50000 AppArgs: network_smp ; 1000000 +TestName: testsegment-par +AppArgs: -random-seg + +TestName: testtmpair-par +#KnownFailure: run-all ; ibv_dynamic_connect ; Bug 4196 - ibv: dynamic connect does not support multiple endpoints + TestName: testreadonly-par #KnownFailure: run-all ; network_ibv ; Bug 3338 - ibv_reg_mr failure (EFAULT/Bad address) on read-only data #KnownFailure: run-all ; network_ibv && ibv_odp ; Bug 4008 - testreadonly failure with ibv/odp KnownFailure: run-all ; network_ucx ; Bug 4046 - testreadonly failure with ucx KnownFailure: run-all ; network_ibv && os_solaris ; Bug 4009 - testreadonly failure with ibv on Solaris +TestName: testcudauva-par +# **TEMPORARY** +# The following environment settings are needed avoid triggering +# Bug 4148 - ibv/GDR completion issues with multiple communication paths +# Note this relies on mlx5_0 being a valid/desirable HCA port +AppEnv: network_ibv && has_bug_4148; GASNET_IBV_PORTS=mlx5_0 GASNET_SUPERNODE_MAXSIZE=1 + # minimal test for working on-demand backtrace TestName: testexit_bt-par AppArgs: 300 2 diff --git a/third-party/gasnet/gasnet-src/tests/upcr-harness/libgasnet/harness.conf b/third-party/gasnet/gasnet-src/tests/upcr-harness/libgasnet/harness.conf index e353be725b17..8c476ad9ebf4 100644 --- a/third-party/gasnet/gasnet-src/tests/upcr-harness/libgasnet/harness.conf +++ b/third-party/gasnet/gasnet-src/tests/upcr-harness/libgasnet/harness.conf @@ -19,6 +19,9 @@ ProhibitFeature: hack_for_nonupc_test # mark this suite as non-UPC ProcPerNode: 1 Pthreads: 0 TimeLimit: 0 +# Ignore some warnings from cuda.h +WarningFilter: all ; '.*?include/cuda.h:.*?-Wc11-extensions.*?' +WarningFilter: all ; '.*?include/cuda.h:.*?-Wpedantic.*?' END_DEFAULT_CONFIG # Callers may need to alter the commands invoked by this suite, diff --git a/third-party/gasnet/gasnet-src/ucx-conduit/Makefile.am b/third-party/gasnet/gasnet-src/ucx-conduit/Makefile.am index 9091453be2af..a44e13e849c9 100644 --- a/third-party/gasnet/gasnet-src/ucx-conduit/Makefile.am +++ b/third-party/gasnet/gasnet-src/ucx-conduit/Makefile.am @@ -89,6 +89,10 @@ CONDUIT_EXTRADEPS = $(ssh_deps) $(mpi_deps) $(pmi_deps) # forced targets in this file, and should probably use LIBINCLUDES/LIBDEFINES CONDUIT_SPECIAL_OBJS = $(mpi_special_objs) +# memory kinds supported by this conduit (space separated) +# to be included in libgasnet if support was enabled at configure time +CONDUIT_KINDS = + # the default job spawn command to be used for "make run-tests" # The following substitutions are performed: # %P = program executable name diff --git a/third-party/gasnet/gasnet-src/ucx-conduit/Makefile.in b/third-party/gasnet/gasnet-src/ucx-conduit/Makefile.in index 4f21b96c83c7..b95e96652c7e 100644 --- a/third-party/gasnet/gasnet-src/ucx-conduit/Makefile.in +++ b/third-party/gasnet/gasnet-src/ucx-conduit/Makefile.in @@ -279,6 +279,10 @@ CONFIGURE_ARGS = @CONFIGURE_ARGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CROSS_COMPILING = @CROSS_COMPILING@ +CUDA_UVA_CFLAGS = @CUDA_UVA_CFLAGS@ +CUDA_UVA_LDFLAGS = @CUDA_UVA_LDFLAGS@ +CUDA_UVA_LIBS = @CUDA_UVA_LIBS@ +CUDA_guess_prog = @CUDA_guess_prog@ CXX = @CXX@ CXXCPP = @CXXCPP@ CXXDEPMODE = @CXXDEPMODE@ @@ -304,6 +308,8 @@ ENVCMD = @ENVCMD@ EXEEXT = @EXEEXT@ EXESUFFIX = @EXESUFFIX@ FH_CFLAGS = @FH_CFLAGS@ +GASNETI_HWLOC_BIND_PATH = @GASNETI_HWLOC_BIND_PATH@ +GASNETI_HWLOC_CALC_PATH = @GASNETI_HWLOC_CALC_PATH@ GASNETI_PTR_BITS = @GASNETI_PTR_BITS@ GASNET_EXTRA_DEFINES = @GASNET_EXTRA_DEFINES@ GASNET_IBV_SPAWNER_CONF = @GASNET_IBV_SPAWNER_CONF@ @@ -332,6 +338,10 @@ GSTACK_PATH = @GSTACK_PATH@ HAVE_INTTYPES_H = @HAVE_INTTYPES_H@ HAVE_STDINT_H = @HAVE_STDINT_H@ HAVE_SYS_TYPES_H = @HAVE_SYS_TYPES_H@ +HWLOC_CFLAGS = @HWLOC_CFLAGS@ +HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ +HWLOC_LIBS = @HWLOC_LIBS@ +HWLOC_guess_prog = @HWLOC_guess_prog@ IBV_CFLAGS = @IBV_CFLAGS@ IBV_LDFLAGS = @IBV_LDFLAGS@ IBV_LIBS = @IBV_LIBS@ @@ -551,6 +561,10 @@ CONDUIT_EXTRADEPS = $(ssh_deps) $(mpi_deps) $(pmi_deps) # forced targets in this file, and should probably use LIBINCLUDES/LIBDEFINES CONDUIT_SPECIAL_OBJS = $(mpi_special_objs) +# memory kinds supported by this conduit (space separated) +# to be included in libgasnet if support was enabled at configure time +CONDUIT_KINDS = + # the default job spawn command to be used for "make run-tests" # The following substitutions are performed: # %P = program executable name diff --git a/third-party/gasnet/gasnet-src/ucx-conduit/contrib/Makefile.in b/third-party/gasnet/gasnet-src/ucx-conduit/contrib/Makefile.in index b3384e97dab1..17fa9c48df28 100644 --- a/third-party/gasnet/gasnet-src/ucx-conduit/contrib/Makefile.in +++ b/third-party/gasnet/gasnet-src/ucx-conduit/contrib/Makefile.in @@ -185,6 +185,10 @@ CONFIGURE_ARGS = @CONFIGURE_ARGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CROSS_COMPILING = @CROSS_COMPILING@ +CUDA_UVA_CFLAGS = @CUDA_UVA_CFLAGS@ +CUDA_UVA_LDFLAGS = @CUDA_UVA_LDFLAGS@ +CUDA_UVA_LIBS = @CUDA_UVA_LIBS@ +CUDA_guess_prog = @CUDA_guess_prog@ CXX = @CXX@ CXXCPP = @CXXCPP@ CXXDEPMODE = @CXXDEPMODE@ @@ -210,6 +214,8 @@ ENVCMD = @ENVCMD@ EXEEXT = @EXEEXT@ EXESUFFIX = @EXESUFFIX@ FH_CFLAGS = @FH_CFLAGS@ +GASNETI_HWLOC_BIND_PATH = @GASNETI_HWLOC_BIND_PATH@ +GASNETI_HWLOC_CALC_PATH = @GASNETI_HWLOC_CALC_PATH@ GASNETI_PTR_BITS = @GASNETI_PTR_BITS@ GASNET_EXTRA_DEFINES = @GASNET_EXTRA_DEFINES@ GASNET_IBV_SPAWNER_CONF = @GASNET_IBV_SPAWNER_CONF@ @@ -238,6 +244,10 @@ GSTACK_PATH = @GSTACK_PATH@ HAVE_INTTYPES_H = @HAVE_INTTYPES_H@ HAVE_STDINT_H = @HAVE_STDINT_H@ HAVE_SYS_TYPES_H = @HAVE_SYS_TYPES_H@ +HWLOC_CFLAGS = @HWLOC_CFLAGS@ +HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ +HWLOC_LIBS = @HWLOC_LIBS@ +HWLOC_guess_prog = @HWLOC_guess_prog@ IBV_CFLAGS = @IBV_CFLAGS@ IBV_LDFLAGS = @IBV_LDFLAGS@ IBV_LIBS = @IBV_LIBS@ diff --git a/third-party/gasnet/gasnet-src/ucx-conduit/gasnet_core.c b/third-party/gasnet/gasnet-src/ucx-conduit/gasnet_core.c index 17c10ac91ffb..82ced0ecdea8 100644 --- a/third-party/gasnet/gasnet-src/ucx-conduit/gasnet_core.c +++ b/third-party/gasnet/gasnet-src/ucx-conduit/gasnet_core.c @@ -63,8 +63,6 @@ static gex_TM_t gasnetc_bootstrap_tm = NULL; static double gasnetc_exittimeout = GASNETC_DEFAULT_EXITTIMEOUT_MAX; -gex_AM_Entry_t const *gasnetc_get_handlertable(void); - gex_AM_Entry_t *gasnetc_handler; // TODO-EX: will be replaced with per-EP tables gasneti_spawnerfn_t const *gasneti_spawner = NULL; @@ -77,6 +75,11 @@ size_t gasnetc_AMHeaderSize(void) return sizeof(gasnetc_sreq_hdr_t); } +size_t gasnetc_sizeof_segment_t(void) { + gasnetc_Segment_t segment; + return sizeof(*segment); +} + /* ------------------------------------------------------------------------------------ */ /* Bootstrap collectives @@ -265,20 +268,13 @@ static void gasnetc_minfo_reset(gasnetc_mem_info_t *minfo) memset(minfo, 0, sizeof(gasnetc_mem_info_t)); } -static int gasnetc_pin_segment(void *seg_start, size_t segsize, - gasneti_bootstrapExchangefn_t exchangefn) +static gasnetc_mem_info_t* +gasnetc_segment_register(void *seg_start, size_t segsize) { ucs_status_t status; - int j; - void * mem_info_buf = NULL; - size_t mem_info_len; - size_t info_offset = 0; - size_t rkey_max_size = 0; gasnet_ep_info_t * my_ep_info = &gasneti_ucx_module.ep_tbl[gasneti_mynode]; - gex_Rank_t i; gasnetc_mem_info_t *mem_info; gasneti_list_t mem_info_list; - size_t *rkey_sizes; gasneti_list_init(&mem_info_list); GASNETI_LIST_ITEM_ALLOC(mem_info, gasnetc_mem_info_t, gasnetc_minfo_reset); @@ -307,69 +303,135 @@ static int gasnetc_pin_segment(void *seg_start, size_t segsize, /* move added mem_info to local table */ gasneti_list_enq(&my_ep_info->mem_tbl, mem_info); - /* identify max rkey size */ - rkey_max_size = MAX(rkey_max_size, mem_info->bsize); - rkey_sizes = gasneti_calloc(gasneti_nodes, sizeof(size_t)); - (*exchangefn)(&rkey_max_size, sizeof(rkey_max_size), rkey_sizes); - for (i = 0; i < gasneti_nodes; i++) { - if (i == gasneti_mynode) { - continue; - } + return mem_info; +} + +// TBD: hoist this *into* gasneti_blockingExchange()? +static void +gasnetc_segment_exchange_helper(gex_TM_t tm, void *src, size_t len, void *dst) +{ + if (tm) { + gasneti_blockingExchange(tm, src, len, dst); + } else { + gasneti_bootstrapExchange(src, len, dst); + } +} + +// TODO: multi-ep generalizations +static int +gasnetc_segment_exchange(gasnetc_mem_info_t* mem_info, gex_TM_t tm) +{ + gex_Rank_t team_size = (tm != GEX_TM_INVALID) ? gex_TM_QuerySize(tm) : gasneti_nodes; + + // identify max rkey size + // TODO: reduce-to-all in O(log(N)) time and O(1) storage + size_t rkey_max_size = mem_info ? mem_info->bsize : 0; + size_t *rkey_sizes = gasneti_calloc(team_size, sizeof(size_t)); + gasnetc_segment_exchange_helper(tm, &rkey_max_size, sizeof(rkey_max_size), rkey_sizes); + for (gex_Rank_t i = 0; i < team_size; i++) { rkey_max_size = MAX(rkey_max_size, rkey_sizes[i]); } gasneti_free(rkey_sizes); /* pack my mem map info */ - mem_info_len = - /* rkey size */ sizeof(uint64_t) - + /* rkey buf */ rkey_max_size - + /* addr */ sizeof(uint64_t) - + /* len */ sizeof(uint64_t); - mem_info_buf = gasneti_calloc(1, mem_info_len); - - gasneti_mem_pack(mem_info_buf, &mem_info->bsize, sizeof(uint64_t), - 0, info_offset); - gasneti_mem_pack(mem_info_buf, mem_info->buffer, - mem_info->bsize, rkey_max_size, info_offset); - gasneti_mem_pack(mem_info_buf, &mem_info->addr, sizeof(uint64_t), - 0, info_offset); - gasneti_mem_pack(mem_info_buf, &mem_info->length, sizeof(uint64_t), - 0, info_offset); - - char * recv_buf = gasneti_malloc(mem_info_len * gasneti_nodes); + size_t mem_info_len = + /* len */ sizeof(uint64_t) + + /* addr */ sizeof(void *) + + /* rkey size */ sizeof(uint64_t) // TODO: cannot imagine multi-GB rkeys! + + /* rkey buf */ rkey_max_size; + void * mem_info_buf = gasneti_calloc(1, mem_info_len); + size_t info_offset = 0; + + if (mem_info) { + gasneti_mem_pack(mem_info_buf, &mem_info->length, sizeof(uint64_t), + 0, info_offset); + gasneti_mem_pack(mem_info_buf, &mem_info->addr, sizeof(void *), + 0, info_offset); + gasneti_mem_pack(mem_info_buf, &mem_info->bsize, sizeof(uint64_t), + 0, info_offset); + gasneti_mem_pack(mem_info_buf, mem_info->buffer, mem_info->bsize, + rkey_max_size, info_offset); + } + + char * recv_buf = gasneti_malloc(mem_info_len * team_size); /* TODO: * + When using PSHM we could store rkeys just once per supernode * + When not fully connected, we could utilize sparse storage */ - (*exchangefn)(mem_info_buf, mem_info_len, recv_buf); + gasnetc_segment_exchange_helper(tm, mem_info_buf, mem_info_len, recv_buf); info_offset = 0; - for (i = 0; i < gasneti_nodes; i++) { - if (i == gasneti_mynode) { + for (gex_Rank_t i = 0; i < team_size; i++) { + gex_Rank_t jobrank; + if (tm) { + gex_EP_Location_t loc = gasneti_e_tm_rank_to_location(tm, i, 0); + if (loc.gex_ep_index) { // TODO: multi-ep support + gasneti_unreachable_error(("gex_EP_PublishBoundSegment does not yet handle non-primordial EPs")); + } + jobrank = loc.gex_rank; + } else { + jobrank = i; + } + + if (jobrank == gasneti_mynode) { info_offset += mem_info_len; continue; } - ucp_ep_h ep = GASNETC_UCX_GET_EP(i); - gasnet_ep_info_t * ep_info = &gasneti_ucx_module.ep_tbl[i]; - GASNETI_LIST_ITEM_ALLOC(mem_info, gasnetc_mem_info_t, gasnetc_minfo_reset); - gasneti_list_enq(&ep_info->mem_tbl, mem_info); + uint64_t length; + gasneti_mem_unpack(&length, recv_buf, + sizeof(uint64_t), 0, info_offset); + if (!length) { // GEX_SEGENT_INVALID + info_offset += mem_info_len - sizeof(length); + continue; + } - gasneti_mem_unpack(&mem_info->bsize, recv_buf, + void * addr; + gasneti_mem_unpack(&addr, recv_buf, + sizeof(void *), 0, info_offset); + uint64_t bsize; + gasneti_mem_unpack(&bsize, recv_buf, sizeof(uint64_t), 0, info_offset); - mem_info->buffer = - gasneti_calloc(1, mem_info->bsize); - gasneti_mem_unpack(mem_info->buffer, recv_buf, - mem_info->bsize, rkey_max_size, + char * buffer = gasneti_calloc(1, bsize); + gasneti_mem_unpack(buffer, recv_buf, + bsize, rkey_max_size, info_offset); - gasneti_rkey_unpack(ep, mem_info->buffer, &mem_info->rkey); - gasneti_mem_unpack(&mem_info->addr, recv_buf, - sizeof(uint64_t), 0, info_offset); - gasneti_mem_unpack(&mem_info->length, recv_buf, - sizeof(uint64_t), 0, info_offset); + + gasnet_ep_info_t * ep_info = &gasneti_ucx_module.ep_tbl[jobrank]; + + // Multiple calls to Publish must not create duplicate entries + // TODO: thread safety in list traversal? + { + int found = 0; + GASNETI_LIST_FOREACH(mem_info, &ep_info->mem_tbl, gasnetc_mem_info_t) { + if ((mem_info->addr == addr ) && + (mem_info->length == length) && + (mem_info->bsize == bsize ) && + !memcmp(mem_info->buffer, buffer, bsize)) { + gasneti_free(buffer); + found = 1; + break; + } + } + if (found) { + continue; + } + } + + GASNETI_LIST_ITEM_ALLOC(mem_info, gasnetc_mem_info_t, gasnetc_minfo_reset); + + mem_info->addr = addr; + mem_info->length = length; + mem_info->bsize = bsize; + mem_info->buffer = buffer; + + ucp_ep_h ep = GASNETC_UCX_GET_EP(jobrank); + gasneti_rkey_unpack(ep, buffer, &mem_info->rkey); + + gasneti_list_enq(&ep_info->mem_tbl, mem_info); // TODO: thread safety? } - gasneti_assert(info_offset == mem_info_len * gasneti_nodes); + gasneti_assert(info_offset == mem_info_len * team_size); gasneti_free(mem_info_buf); gasneti_free(recv_buf); @@ -596,16 +658,16 @@ static int gasnetc_init(gex_Client_t *client_p, gex_EP_t *ep_p, // Create first Client, EP and TM *here*, for use in subsequent bootstrap collectives { // allocate the client object - gasneti_Client_t client = gasneti_alloc_client(clientName, flags, 0); + gasneti_Client_t client = gasneti_alloc_client(clientName, flags); *client_p = gasneti_export_client(client); // create the initial endpoint with internal handlers - if (gasnetc_EP_Create(ep_p, *client_p, flags)) + if (gex_EP_Create(ep_p, *client_p, GEX_EP_CAPABILITY_ALL, flags)) GASNETI_RETURN_ERRR(RESOURCE,"Error creating initial endpoint"); gasneti_EP_t ep = gasneti_import_ep(*ep_p); gasnetc_handler = ep->_amtbl; // TODO-EX: this global variable to be removed - gasneti_TM_t tm = gasneti_alloc_tm(ep, gasneti_mynode, gasneti_nodes, flags, 0); + gasneti_TM_t tm = gasneti_alloc_tm(ep, gasneti_mynode, gasneti_nodes, flags); gasnetc_bootstrap_tm = gasneti_export_tm(tm); } @@ -633,7 +695,8 @@ static int gasnetc_init(gex_Client_t *client_p, gex_EP_t *ep_p, #if GASNETC_PIN_SEGMENT /* pin the aux segment and exchange the RKeys */ - gasnetc_pin_segment(auxbase, auxsize, &gasneti_bootstrapExchange); + gasnetc_mem_info_t *mem_info = gasnetc_segment_register(auxbase, auxsize); + gasnetc_segment_exchange(mem_info, GEX_TM_INVALID); #endif if (0 == gasneti_mynode) { @@ -696,16 +759,17 @@ static int gasnetc_attach_primary(void) { static int gasnetc_attach_segment(gex_Segment_t *segment_p, gex_TM_t tm, uintptr_t segsize, - gasneti_bootstrapExchangefn_t exchangefn, gex_Flags_t flags) { /* ------------------------------------------------------------------------------------ */ /* register client segment */ - gasnet_seginfo_t myseg = gasneti_segmentAttach(segment_p, 0, tm, segsize, exchangefn, flags); + gasnet_seginfo_t myseg = gasneti_segmentAttach(segment_p, tm, segsize, flags); #if GASNETC_PIN_SEGMENT /* pin the segment and exchange the RKeys */ - gasnetc_pin_segment(myseg.addr, myseg.size, exchangefn); + gasnetc_Segment_t segment = (gasnetc_Segment_t) gasneti_import_segment(*segment_p); + segment->mem_info = gasnetc_segment_register(myseg.addr, myseg.size); + gasnetc_segment_exchange(segment->mem_info, tm); #endif return GASNET_OK; @@ -719,7 +783,7 @@ extern int gasnetc_attach( gex_TM_t _tm, { GASNETI_TRACE_PRINTF(C,("gasnetc_attach(table (%i entries), segsize=%"PRIuPTR")", numentries, segsize)); - gasneti_TM_t tm = gasneti_import_tm(_tm); + gasneti_TM_t tm = gasneti_import_tm_nonpair(_tm); gasneti_EP_t ep = tm->_ep; if (!gasneti_init_done) @@ -744,14 +808,13 @@ extern int gasnetc_attach( gex_TM_t _tm, #if GASNETC_PIN_SEGMENT /* register client segment */ gex_Segment_t seg; // g2ex segment is automatically saved by a hook - /* (###) may replace gasneti_defaultExchange with a conduit-specific exchange if available */ - if (GASNET_OK != gasnetc_attach_segment(&seg, _tm, segsize, gasneti_defaultExchange, GASNETI_FLAG_INIT_LEGACY)) + if (GASNET_OK != gasnetc_attach_segment(&seg, _tm, segsize, GASNETI_FLAG_INIT_LEGACY)) GASNETI_RETURN_ERRR(RESOURCE,"Error attaching segment"); #endif // GASNETC_PIN_SEGMENT /* register client handlers */ - if (table && gasneti_amregister_legacy(ep->_amtbl, table, numentries) != GASNET_OK) + if (table && gasneti_amregister_legacy(ep, table, numentries) != GASNET_OK) GASNETI_RETURN_ERRR(RESOURCE,"Error registering handlers"); /* ensure everything is initialized across all nodes */ @@ -788,18 +851,22 @@ extern int gasnetc_Client_Init( gasneti_trace_init(argc, argv); } else { // allocate the client object - gasneti_Client_t client = gasneti_alloc_client(clientName, flags, 0); + gasneti_Client_t client = gasneti_alloc_client(clientName, flags); *client_p = gasneti_export_client(client); // create the initial endpoint with internal handlers - if (gasnetc_EP_Create(ep_p, *client_p, flags)) + if (gex_EP_Create(ep_p, *client_p, GEX_EP_CAPABILITY_ALL, flags)) GASNETI_RETURN_ERRR(RESOURCE,"Error creating initial endpoint"); } gasneti_EP_t ep = gasneti_import_ep(*ep_p); + // Do NOT move this prior to the gasneti_trace_init() call + GASNETI_TRACE_PRINTF(O,("gex_Client_Init: name='%s' argc_p=%p argv_p=%p flags=%d", + clientName, (void *)argc, (void *)argv, flags)); + // TODO-EX: create team gasneti_TM_t tm = gasneti_init_done ? gasneti_import_tm(gasnetc_bootstrap_tm) // gasnetc_init() creates very first TM - : gasneti_alloc_tm(ep, gasneti_mynode, gasneti_nodes, flags, 0); + : gasneti_alloc_tm(ep, gasneti_mynode, gasneti_nodes, flags); *tm_p = gasneti_export_tm(tm); if (0 == (flags & GASNETI_FLAG_INIT_LEGACY)) { @@ -828,50 +895,59 @@ extern int gasnetc_Segment_Attach( #endif /* (###) add code to create a segment collectively */ - if (GASNET_OK != gasnetc_attach_segment(segment_p, tm, length, gasneti_defaultExchange, 0)) + if (GASNET_OK != gasnetc_attach_segment(segment_p, tm, length, 0)) GASNETI_RETURN_ERRR(RESOURCE,"Error attaching segment"); return GASNET_OK; } -extern int gasnetc_EP_Create(gex_EP_t *ep_p, - gex_Client_t client, - gex_Flags_t flags) { - /* (###) add code here to create an endpoint belonging to the given client */ -#if 1 // TODO-EX: This is a stub, which assumes 1 implicit call from ClientCreate - static gasneti_mutex_t lock = GASNETI_MUTEX_INITIALIZER; - gasneti_mutex_lock(&lock); - static int once = 0; - int prev = once; - once = 1; - gasneti_mutex_unlock(&lock); - if (prev) gasneti_fatalerror("Multiple endpoints are not yet implemented"); -#endif +extern int gasnetc_Segment_Create( + gex_Segment_t *segment_p, + gex_Client_t client, + gex_Addr_t address, + uintptr_t length, + gex_MK_t kind, + gex_Flags_t flags) +{ + gasneti_assert(segment_p); - gasneti_EP_t ep = gasneti_alloc_ep(gasneti_import_client(client), flags, 0); - *ep_p = gasneti_export_ep(ep); - - { /* core API handlers */ - gex_AM_Entry_t *ctable = (gex_AM_Entry_t *)gasnetc_get_handlertable(); - int len = 0; - int numreg = 0; - gasneti_assert(ctable); - while (ctable[len].gex_fnptr) len++; /* calc len */ - if (gasneti_amregister(ep->_amtbl, ctable, len, GASNETC_HANDLER_BASE, GASNETE_HANDLER_BASE, 0, &numreg) != GASNET_OK) - GASNETI_RETURN_ERRR(RESOURCE,"Error registering core API handlers"); - gasneti_assert_int(numreg ,==, len); - } + // Create the Segment object, allocating memory if appropriate + gasneti_Client_t i_client = gasneti_import_client(client); + int rc = gasneti_segmentCreate(segment_p, i_client, address, length, kind, flags); - { /* extended API handlers */ - gex_AM_Entry_t *etable = (gex_AM_Entry_t *)gasnete_get_handlertable(); - int len = 0; - int numreg = 0; - gasneti_assert(etable); - while (etable[len].gex_fnptr) len++; /* calc len */ - if (gasneti_amregister(ep->_amtbl, etable, len, GASNETE_HANDLER_BASE, GASNETI_CLIENT_HANDLER_BASE, 0, &numreg) != GASNET_OK) - GASNETI_RETURN_ERRR(RESOURCE,"Error registering extended API handlers"); - gasneti_assert_int(numreg ,==, len); +#if GASNETC_PIN_SEGMENT + if (rc == GASNET_OK) { + // Register the segment + gasnetc_Segment_t segment = (gasnetc_Segment_t) gasneti_import_segment(*segment_p); + segment->mem_info = gasnetc_segment_register(segment->_addr, segment->_size); } +#endif + + return rc; +} + +extern int gasnetc_EP_PublishBoundSegment( + gex_TM_t tm, + gex_EP_t *eps, + size_t num_eps, + gex_Flags_t flags) +{ + // Conduit-independent parts + int rc = gasneti_EP_PublishBoundSegment(tm, eps, num_eps, flags); + if (GASNET_OK != rc) return rc; + +#if GASNETC_PIN_SEGMENT + // Conduit-dependent parts + // TODO: merge comms into gasneti_EP_PublishBoundSegment(). + // TODO: generalize for multi-ep + gasnetc_Segment_t segment; + segment = num_eps ? (gasnetc_Segment_t) gasneti_import_ep(eps[0])->_segment : NULL; + gasnetc_segment_exchange(segment ? segment->mem_info : NULL, tm); +#endif + + // Avoid race in which AMRequestLong triggers AMRepyLong before exchange completes remotely + // TODO: barrier for multi-tm per-process + gex_Event_Wait(gex_Coll_BarrierNB(tm, 0)); return GASNET_OK; } @@ -879,7 +955,7 @@ extern int gasnetc_EP_Create(gex_EP_t *ep_p, extern int gasnetc_EP_RegisterHandlers(gex_EP_t ep, gex_AM_Entry_t *table, size_t numentries) { - return gasneti_amregister_client(gasneti_import_ep(ep)->_amtbl, table, numentries); + return gasneti_amregister_client(gasneti_import_ep(ep), table, numentries); } /* ------------------------------------------------------------------------------------ */ @@ -1221,6 +1297,9 @@ static void gasnetc_exit_body(void) { /* Disable processing of AMs, except core-specific ones */ gasnetc_disable_AMs(); + // prevent possible GASNETI_CHECK_INJECT() failures when we communicate + GASNETI_CHECK_INJECT_RESET(); + GASNETI_TRACE_PRINTF(C,("gasnet_exit(%i)\n", exitcode)); /* Timed MAX(exitcode) reduction to clearly distinguish collective exit */ @@ -1602,7 +1681,7 @@ extern int gasnetc_AMRequestShortM( return retval; } -#if !GASNETC_HAVE_NP_REQ_MEDIUM // (###) +#if !GASNET_NATIVE_NP_ALLOC_REQ_MEDIUM // (###) // This provides a template implementing the following two external functions: // int gasnetc_AMRequestMediumV() @@ -1669,7 +1748,7 @@ extern int gasnetc_AMRequestMediumM( return retval; } -#else // GASNETC_HAVE_NP_REQ_MEDIUM +#else // GASNET_NATIVE_NP_ALLOC_REQ_MEDIUM // This provides a template implementing the following three external functions: // int gasnetc_AMRequestMediumM() @@ -1684,7 +1763,7 @@ extern int gasnetc_AMRequestMediumM( // This example provides a specialized implementation of Negotiated-Payload // RequestMedium (by providing gasnetc_AM_PrepareRequestMedium() and // gasnetc_AM_CommitRequestMediumM()) and one must -// #define GASNETC_HAVE_NP_REQ_MEDIUM 1 +// #define GASNET_NATIVE_NP_ALLOC_REQ_MEDIUM 1 // in the conduit's gasnet_core_fwd.h to disable (conflicting) definitions in // the reference implementation. @@ -1782,6 +1861,8 @@ extern gex_AM_SrcDesc_t gasnetc_AM_PrepareRequestMedium( GASNETI_THREAD_FARG, unsigned int nargs) { + GASNETI_TRACE_PREP_REQUESTMEDIUM(tm,rank,client_buf,least_payload,most_payload,flags,nargs); + gasneti_AM_SrcDesc_t sd = gasneti_init_request_srcdesc(GASNETI_THREAD_PASS_ALONE); GASNETI_COMMON_PREP_REQ(sd,tm,rank,client_buf,least_payload,most_payload,NULL,lc_opt,flags,nargs,Medium); @@ -1808,6 +1889,7 @@ extern gex_AM_SrcDesc_t gasnetc_AM_PrepareRequestMedium( } GASNETI_TRACE_PREP_RETURN(REQUEST_MEDIUM, sd); + GASNETI_CHECK_SD(client_buf, least_payload, most_payload, sd); return gasneti_export_srcdesc(sd); } @@ -1835,7 +1917,7 @@ extern void gasnetc_AM_CommitRequestMediumM( gasneti_reset_srcdesc(sd); } -#endif // GASNETC_HAVE_NP_REQ_MEDIUM +#endif // GASNET_NATIVE_NP_ALLOC_REQ_MEDIUM GASNETI_INLINE(gasnetc_AMRequestLong) int gasnetc_AMRequestLong( gex_TM_t tm, gex_Rank_t rank, gex_AM_Index_t handler, @@ -1928,7 +2010,7 @@ extern int gasnetc_AMReplyShortM( return retval; } -#if !GASNETC_HAVE_NP_REP_MEDIUM // (###) +#if !GASNET_NATIVE_NP_ALLOC_REP_MEDIUM // (###) // This provides a template implementing the following two external functions: // int gasnetc_AMReplyMediumV() @@ -1985,13 +2067,13 @@ extern int gasnetc_AMReplyMediumM( return retval; } -#else // GASNETC_HAVE_NP_REP_MEDIUM +#else // GASNET_NATIVE_NP_ALLOC_REP_MEDIUM // This provides a template implementing the following three external functions: // int gasnetc_AMReplyMediumM() // int gasnetc_AM_PrepareReplyMedium() // void gasnetc_AM_CommitReplyMediumM() -// See comments with GASNETC_HAVE_NP_REQ_MEDIUM for more information. +// See comments with GASNET_NATIVE_NP_ALLOC_REQ_MEDIUM for more information. GASNETI_INLINE(gasnetc_prepare_rep_medium) int gasnetc_prepare_rep_medium( @@ -2083,6 +2165,8 @@ extern gex_AM_SrcDesc_t gasnetc_AM_PrepareReplyMedium( gex_Flags_t flags unsigned int nargs) { + GASNETI_TRACE_PREP_REPLYMEDIUM(token,client_buf,least_payload,most_payload,flags,nargs); + gasneti_AM_SrcDesc_t sd; flags &= ~(GEX_FLAG_AM_PREPARE_LEAST_CLIENT | GEX_FLAG_AM_PREPARE_LEAST_ALLOC); @@ -2109,6 +2193,7 @@ extern gex_AM_SrcDesc_t gasnetc_AM_PrepareReplyMedium( } GASNETI_TRACE_PREP_RETURN(REPLY_MEDIUM, sd); + GASNETI_CHECK_SD(client_buf, least_payload, most_payload, sd); return gasneti_export_srcdesc(sd); } @@ -2136,7 +2221,7 @@ extern void gasnetc_AM_CommitReplyMediumM( gasneti_reset_srcdesc(sd); } -#endif // GASNETC_HAVE_NP_REP_MEDIUM +#endif // GASNET_NATIVE_NP_ALLOC_REP_MEDIUM GASNETI_INLINE(gasnetc_AMReplyLong) int gasnetc_AMReplyLong( gex_Token_t token, gex_AM_Index_t handler, @@ -2309,9 +2394,7 @@ static void gasnetc_exit_reduce_reqh(gex_Token_t token, (for internal conduit use in bootstrapping, job management, etc.) */ static gex_AM_Entry_t const gasnetc_handlers[] = { - #ifdef GASNETC_COMMON_HANDLERS - GASNETC_COMMON_HANDLERS(), - #endif + GASNETC_COMMON_HANDLERS(), /* ptr-width independent handlers */ gasneti_handler_tableentry_no_bits(gasnetc_exit_reduce_reqh,2,REQUEST,SHORT,0), diff --git a/third-party/gasnet/gasnet-src/ucx-conduit/gasnet_core.h b/third-party/gasnet/gasnet-src/ucx-conduit/gasnet_core.h index 6b9c05af758c..d460aa7db913 100644 --- a/third-party/gasnet/gasnet-src/ucx-conduit/gasnet_core.h +++ b/third-party/gasnet/gasnet-src/ucx-conduit/gasnet_core.h @@ -20,7 +20,7 @@ ============== */ -extern void gasnetc_exit(int exitcode) GASNETI_NORETURN; +extern void gasnetc_exit(int _exitcode) GASNETI_NORETURN; GASNETI_NORETURNP(gasnetc_exit) #define gasnet_exit gasnetc_exit @@ -33,31 +33,34 @@ GASNETI_NORETURNP(gasnetc_exit) #endif /* ------------------------------------------------------------------------------------ */ extern int gasnetc_Client_Init( - gex_Client_t *client_p, - gex_EP_t *ep_p, - gex_TM_t *tm_p, - const char *clientName, - int *argc, - char ***argv, - gex_Flags_t flags); + gex_Client_t *_client_p, + gex_EP_t *_ep_p, + gex_TM_t *_tm_p, + const char *_clientName, + int *_argc, + char ***_argv, + gex_Flags_t _flags); // gasnetex.h handles name-shifting of gex_Client_Init() extern int gasnetc_Segment_Attach( - gex_Segment_t *segment_p, - gex_TM_t tm, - uintptr_t length); + gex_Segment_t *_segment_p, + gex_TM_t _tm, + uintptr_t _length); #define gex_Segment_Attach gasnetc_Segment_Attach -extern int gasnetc_EP_Create( - gex_EP_t *ep_p, - gex_Client_t client, - gex_Flags_t flags); -#define gex_EP_Create gasnetc_EP_Create +extern int gasnetc_Segment_Create( + gex_Segment_t *_segment_p, + gex_Client_t _client, + gex_Addr_t _address, + uintptr_t _length, + gex_MK_t _kind, + gex_Flags_t _flags); +#define gex_Segment_Create gasnetc_Segment_Create extern int gasnetc_EP_RegisterHandlers( - gex_EP_t ep, - gex_AM_Entry_t *table, - size_t numentries); + gex_EP_t _ep, + gex_AM_Entry_t *_table, + size_t _numentries); #define gex_EP_RegisterHandlers gasnetc_EP_RegisterHandlers /* ------------------------------------------------------------------------------------ */ /* @@ -101,11 +104,11 @@ typedef struct { #define gex_HSL_Unlock(hsl) #define gex_HSL_Trylock(hsl) GASNET_OK #else - extern void gasnetc_hsl_init (gex_HSL_t *hsl); - extern void gasnetc_hsl_destroy(gex_HSL_t *hsl); - extern void gasnetc_hsl_lock (gex_HSL_t *hsl); - extern void gasnetc_hsl_unlock (gex_HSL_t *hsl); - extern int gasnetc_hsl_trylock(gex_HSL_t *hsl) GASNETI_WARN_UNUSED_RESULT; + extern void gasnetc_hsl_init (gex_HSL_t *_hsl); + extern void gasnetc_hsl_destroy(gex_HSL_t *_hsl); + extern void gasnetc_hsl_lock (gex_HSL_t *_hsl); + extern void gasnetc_hsl_unlock (gex_HSL_t *_hsl); + extern int gasnetc_hsl_trylock(gex_HSL_t *_hsl) GASNETI_WARN_UNUSED_RESULT; #define gex_HSL_Init gasnetc_hsl_init #define gex_HSL_Destroy gasnetc_hsl_destroy @@ -123,11 +126,8 @@ size_t gasnetc_AMHeaderSize(void); #define GASNETC_MAX_ARGS 16 #define GASNETC_UCX_HDR_SIZE gasnetc_AMHeaderSize() -#define GASNETC_MAX_MED 4096 #define GASNETC_MAX_LONG INT_MAX -#define GASNETC_MAX_MEDIUM_NBRHD GASNETC_MAX_MED - #define GASNETC_ARGS_SIZE(numargs) (sizeof(gex_AM_Arg_t) * (numargs)) #define GASNETC_MAX_MED_(nargs) \ @@ -151,38 +151,54 @@ size_t gasnetc_AMHeaderSize(void); #define gex_AM_LUBRequestLong() (GASNETC_MAX_LONG_(GASNETC_MAX_ARGS)) #define gex_AM_LUBReplyLong() (GASNETC_MAX_LONG_(GASNETC_MAX_ARGS)) -#define gasnetc_AM_MaxRequestMedium(tm,rank,lc_opt,flags,nargs) ((size_t)(GASNETC_MAX_MED_(nargs))) -#define gasnetc_AM_MaxReplyMedium(tm,rank,lc_opt,flags,nargs) ((size_t)(GASNETC_MAX_MED_(nargs))) -#define gasnetc_AM_MaxRequestLong(tm,rank,lc_opt,flags,nargs) ((size_t)(GASNETC_MAX_LONG_(nargs))) -#define gasnetc_AM_MaxReplyLong(tm,rank,lc_opt,flags,nargs) ((size_t)(GASNETC_MAX_LONG_(nargs))) -#define gasnetc_Token_MaxReplyMedium(token,lc_opt,flags,nargs) ((size_t)(GASNETC_MAX_MED_(nargs))) -#define gasnetc_Token_MaxReplyLong(token,lc_opt,flags,nargs) ((size_t)(GASNETC_MAX_LONG_(nargs))) +#define gasnetc_AM_MaxRequestMedium(tm,rank,lc_opt,flags,nargs) \ + (GASNETI_UNUSED_ARGS4(tm,rank,lc_opt,flags),(size_t)(GASNETC_MAX_MED_(nargs))) +#define gasnetc_AM_MaxReplyMedium(tm,rank,lc_opt,flags,nargs) \ + (GASNETI_UNUSED_ARGS4(tm,rank,lc_opt,flags),(size_t)(GASNETC_MAX_MED_(nargs))) +#define gasnetc_Token_MaxReplyMedium(token,lc_opt,flags,nargs) \ + (GASNETI_UNUSED_ARGS3(token,lc_opt,flags),(size_t)(GASNETC_MAX_MED_(nargs))) + +#define gasnetc_AM_MaxRequestLong(tm,rank,lc_opt,flags,nargs) \ + (GASNETI_UNUSED_ARGS3(tm,rank,lc_opt), \ + ((flags) & GEX_FLAG_AM_PREPARE_LEAST_ALLOC \ + ? (GASNETI_UNUSED_ARGS1(nargs),GASNETC_REF_NPAM_MAX_ALLOC) \ + : GASNETC_MAX_LONG_(nargs))) +#define gasnetc_AM_MaxReplyLong(tm,rank,lc_opt,flags,nargs) \ + (GASNETI_UNUSED_ARGS3(tm,rank,lc_opt), \ + ((flags) & GEX_FLAG_AM_PREPARE_LEAST_ALLOC \ + ? (GASNETI_UNUSED_ARGS1(nargs),GASNETC_REF_NPAM_MAX_ALLOC) \ + : GASNETC_MAX_LONG_(nargs))) +#define gasnetc_Token_MaxReplyLong(token,lc_opt,flags,nargs) \ + (GASNETI_UNUSED_ARGS2(token,lc_opt), \ + ((flags) & GEX_FLAG_AM_PREPARE_LEAST_ALLOC \ + ? (GASNETI_UNUSED_ARGS1(nargs),GASNETC_REF_NPAM_MAX_ALLOC) \ + : GASNETC_MAX_LONG_(nargs))) /* Example for true functions: */ #if 0 extern GASNETI_PURE size_t gasnetc_AM_MaxRequestMedium( - gex_TM_t tm, gex_Rank_t rank, - const gex_Event_t *lc_opt, gex_Flags_t flags, unsigned int nargs); + gex_TM_t _tm, gex_Rank_t _rank, + const gex_Event_t *_lc_opt, gex_Flags_t _flags, unsigned int _nargs); GASNETI_PUREP(gasnetc_AM_MaxRequestMedium) extern GASNETI_PURE size_t gasnetc_AM_MaxReplyMedium( - gex_TM_t tm, gex_Rank_t rank, - const gex_Event_t *lc_opt, gex_Flags_t flags, unsigned int nargs); + gex_TM_t _tm, gex_Rank_t _rank, + const gex_Event_t *_lc_opt, gex_Flags_t _flags, unsigned int _nargs); GASNETI_PUREP(gasnetc_AM_MaxReplyMedium) extern GASNETI_PURE size_t gasnetc_AM_MaxRequestLong( - gex_TM_t tm, gex_Rank_t rank, - const gex_Event_t *lc_opt, gex_Flags_t flags, unsigned int nargs); + gex_TM_t _tm, gex_Rank_t _rank, + const gex_Event_t *_lc_opt, gex_Flags_t _flags, unsigned int _nargs); GASNETI_PUREP(gasnetc_AM_MaxRequestLong) extern GASNETI_PURE size_t gasnetc_AM_MaxReplyLong( - gex_TM_t tm, gex_Rank_t rank, - const gex_Event_t *lc_opt, gex_Flags_t flags, unsigned int nargs); + gex_TM_t _tm, gex_Rank_t _rank, + const gex_Event_t *_lc_opt, gex_Flags_t _flags, unsigned int _nargs); GASNETI_PUREP(gasnetc_AM_MaxReplyLong) extern GASNETI_PURE size_t gasnetc_Token_MaxReplyMedium( - gex_Token_t token, - const gex_Event_t *lc_opt, gex_Flags_t flags, unsigned int nargs); + gex_Token_t _token, + const gex_Event_t *_lc_opt, gex_Flags_t _flags, unsigned int _nargs); GASNETI_PUREP(gasnetc_Token_MaxReplyMedium) extern GASNETI_PURE size_t gasnetc_Token_MaxReplyLong( - const gex_Token_t token, - gex_Event_t *lc_opt, gex_Flags_t flags, unsigned int nargs); + const gex_Token_t _token, + gex_Event_t *_lc_opt, gex_Flags_t _flags, unsigned int _nargs); GASNETI_PUREP(gasnetc_Token_MaxReplyLong) #endif diff --git a/third-party/gasnet/gasnet-src/ucx-conduit/gasnet_core_fwd.h b/third-party/gasnet/gasnet-src/ucx-conduit/gasnet_core_fwd.h index 51826c9baf80..4e3521e66a44 100644 --- a/third-party/gasnet/gasnet-src/ucx-conduit/gasnet_core_fwd.h +++ b/third-party/gasnet/gasnet-src/ucx-conduit/gasnet_core_fwd.h @@ -20,6 +20,9 @@ #define GASNET_CONDUIT_NAME_STR _STRINGIFY(GASNET_CONDUIT_NAME) #define GASNET_CONDUIT_UCX 1 +#define GASNETC_MAX_MED 4096 +#define GASNETC_MAX_MEDIUM_NBRHD GASNETC_MAX_MED + /* GASNET_PSHM defined 1 if this conduit supports PSHM. leave undefined otherwise. */ #if GASNETI_PSHM_ENABLED #define GASNET_PSHM 1 @@ -40,6 +43,9 @@ #endif + // uncomment for each MK_CLASS which the conduit supports. leave commented otherwise +//#define GASNET_HAVE_MK_CLASS_CUDA_UVA GASNETI_MK_CLASS_CUDA_UVA_ENABLED + /* conduits should define GASNETI_CONDUIT_THREADS to 1 if they have one or more "private" threads which may be used to run AM handlers, even under GASNET_SEQ this ensures locking is still done correctly, etc @@ -72,12 +78,12 @@ your conduit must provide the V-suffixed functions for any of these that are not defined. */ -/* #define GASNETC_HAVE_NP_REQ_MEDIUM 1 */ -/* #define GASNETC_HAVE_NP_REP_MEDIUM 1 */ -/* #define GASNETC_HAVE_NP_REQ_LONG 1 */ -/* #define GASNETC_HAVE_NP_REP_LONG 1 */ +/* #define GASNET_NATIVE_NP_ALLOC_REQ_MEDIUM 1 */ +/* #define GASNET_NATIVE_NP_ALLOC_REP_MEDIUM 1 */ +/* #define GASNET_NATIVE_NP_ALLOC_REQ_LONG 1 */ +/* #define GASNET_NATIVE_NP_ALLOC_REP_LONG 1 */ - /* uncomment for each GASNETC_HAVE_NP_* enabled above if the Commit function + /* uncomment for each GASNET_NATIVE_NP_ALLOC_* enabled above if the Commit function has the numargs argument even in an NDEBUG build (it is always passed in DEBUG builds). */ @@ -90,10 +96,56 @@ include a call to gasneti_AMPoll (or equivalent) for progress. The preferred implementation is to Poll only in the M-suffixed calls and not the V-suffixed calls (and GASNETC_REQUESTV_POLLS undefined). - Used if (and only if) any of the GASNETC_HAVE_NP_* values above are unset. + Used if (and only if) any of the GASNET_NATIVE_NP_ALLOC_* values above are unset. */ /* #define GASNETC_REQUESTV_POLLS 1 */ + // uncomment if conduit provides a gasnetc-prefixed override + // TODO: this should be a hook rather than an override +#define GASNETC_HAVE_EP_PUBLISHBOUNDSEGMENT 1 + + /* If your conduit uses conduit-specific extensions to the basic object + types, then define the corresponding SIZEOF macros below to return + the total length of the conduit-specific object, including the prefix + portion which must be the matching GASNETI_[OBJECT]_COMMON fields. + Similarly, *_HOOK macros should be defined as callbacks to perform + conduit-specific initialization and finalization tasks, if any. + If a given SIZEOF macro is defined, but the corresponding INIT_HOOK is + not, then space beyond the COMMON fields will be zero-initialized. + In all cases, GASNETC_[OBJECT]_EXTRA_DECLS provides the place to + provide necessary declarations (since this file is included very early). + */ + +//#define GASNETC_CLIENT_EXTRA_DECLS (###) +//#define GASNETC_CLIENT_INIT_HOOK(i_client) (###) +//#define GASNETC_CLIENT_FINI_HOOK(i_client) (###) +//#define GASNETC_SIZEOF_CLIENT_T() (###) + +#define GASNETC_SEGMENT_EXTRA_DECLS \ + extern size_t gasnetc_sizeof_segment_t(void); +//#define GASNETC_SEGMENT_INIT_HOOK(i_segment) (###) +//#define GASNETC_SEGMENT_FINI_HOOK(i_segment) (###) +#define GASNETC_SIZEOF_SEGMENT_T() \ + gasnetc_sizeof_segment_t() + +//#define GASNETC_TM_EXTRA_DECLS (###) +//#define GASNETC_TM_INIT_HOOK(i_tm) (###) +//#define GASNETC_TM_FINI_HOOK(i_tm) (###) +//#define GASNETC_SIZEOF_TM_T() (###) + +//#define GASNETC_EP_EXTRA_DECLS (###) +//#define GASNETC_EP_INIT_HOOK(i_ep) (###) +//#define GASNETC_EP_FINI_HOOK(i_ep) (###) +//#define GASNETC_SIZEOF_EP_T() (###) + +// If conduit supports GASNET_MAXEPS!=1, set default and (optional) max values here. +// Leaving GASNETC_MAXEPS_DFLT unset will result in GASNET_MAXEPS=1, independent +// of all other settings (appropriate for conduits without multi-ep support). +// If set, GASNETC_MAXEPS_MAX it is used to limit a user's --with-maxeps (and a +// global default limit is used otherwise). +//#define GASNETC_MAXEPS_DFLT ### // default num endpoints this conduit supports, undef means no multi-ep support +//#define GASNETC_MAXEPS_MAX ### // leave unset for default + /* this can be used to add conduit-specific statistical collection values (see gasnet_trace.h) */ #define GASNETC_CONDUIT_STATS(CNT,VAL,TIME) diff --git a/third-party/gasnet/gasnet-src/ucx-conduit/gasnet_core_internal.h b/third-party/gasnet/gasnet-src/ucx-conduit/gasnet_core_internal.h index b57786a8f53d..397050119f8f 100644 --- a/third-party/gasnet/gasnet-src/ucx-conduit/gasnet_core_internal.h +++ b/third-party/gasnet/gasnet-src/ucx-conduit/gasnet_core_internal.h @@ -17,7 +17,7 @@ #define GASNETC_HSL_SPINLOCK 1 /* ------------------------------------------------------------------------------------ */ -#define _hidx_gasnetc_exchg_reqh (GASNETC_HANDLER_BASE+0) +#define _hidx_gasnetc_hbarr_reqh (GASNETC_HANDLER_BASE+0) #define _hidx_gasnetc_exit_reduce_reqh (GASNETC_HANDLER_BASE+1) #define _hidx_gasnetc_exit_role_reqh (GASNETC_HANDLER_BASE+2) #define _hidx_gasnetc_exit_role_reph (GASNETC_HANDLER_BASE+3) @@ -288,6 +288,17 @@ typedef struct { #endif } gasnetc_token_t; +// Conduit-specific Segment type +typedef struct gasnetc_Segment_t_ { + GASNETI_SEGMENT_COMMON // conduit-indep part as prefix + +#if GASNETC_PIN_SEGMENT + // memory registation info + gasnetc_mem_info_t *mem_info; +#endif +} *gasnetc_Segment_t; + + typedef enum { gasnetc_rdma_op_put, gasnetc_rdma_op_get, diff --git a/third-party/gasnet/gasnet-src/ucx-conduit/gasnet_core_sndrcv.c b/third-party/gasnet/gasnet-src/ucx-conduit/gasnet_core_sndrcv.c index 202d00560d0c..ee2431c6be21 100644 --- a/third-party/gasnet/gasnet-src/ucx-conduit/gasnet_core_sndrcv.c +++ b/third-party/gasnet/gasnet-src/ucx-conduit/gasnet_core_sndrcv.c @@ -10,7 +10,7 @@ #include #include -GASNETI_INLINE(gasneti_ucx_progress) +GASNETI_INLINE(gasnetc_ucx_progress) int gasnetc_ucx_progress(void); GASNETI_INLINE(gasnetc_am_req_reset) @@ -19,7 +19,7 @@ void gasnetc_am_req_reset(gasnetc_am_req_t *am_req); GASNETI_INLINE(gasnetc_req_free) void gasnetc_req_free(void *req); -GASNETI_INLINE(gasnetc_am_req_release) +GASNETI_INLINE(gasnetc_rreq_release) void gasnetc_rreq_release(gasnetc_ucx_request_t *req); /* ------------------------------------------------------------------------------------ */ /* @@ -327,6 +327,7 @@ gasnetc_mem_info_t * gasnetc_find_mem_info(void *addr, int nbytes, gex_Rank_t ra // TODO-future: use UCS rcache gasnetc_mem_info_t *mem_info; + // TODO: thread safety in list traversal? GASNETI_LIST_FOREACH(mem_info, &gasneti_ucx_module.ep_tbl[rank].mem_tbl, gasnetc_mem_info_t) { if (GASNETC_ADDR_IN_RANGE(mem_info->addr, mem_info->length, addr, nbytes)) { @@ -755,7 +756,7 @@ void gasnetc_ProcessRecv(void *buf, size_t size) } } -GASNETI_INLINE(gasneti_ucx_progress) +GASNETI_INLINE(gasnetc_ucx_progress) int gasnetc_ucx_progress(void) { int status; @@ -1009,7 +1010,7 @@ void gasnetc_poll_snd(gasnetc_lock_mode_t lmode GASNETI_THREAD_FARG) #endif } -GASNETI_INLINE(gasneti_probe_recv_complete) +GASNETI_INLINE(gasneti_req_probe_complete) gasnetc_ucx_request_t *gasneti_req_probe_complete(gasneti_list_t *req_list) { gasnetc_ucx_request_t *req; diff --git a/third-party/gasnet/gasnet-src/ucx-conduit/gasnet_ratomic.c b/third-party/gasnet/gasnet-src/ucx-conduit/gasnet_ratomic.c index abfa7ec5d7c3..b5243b732539 100644 --- a/third-party/gasnet/gasnet-src/ucx-conduit/gasnet_ratomic.c +++ b/third-party/gasnet/gasnet-src/ucx-conduit/gasnet_ratomic.c @@ -569,18 +569,17 @@ GASNETE_DT_INT_APPLY(GASNETE_UCXRATOMIC_TBL) #define GASNETE_UCXRATOMIC_BADOPS_gex_dt_I64 (GASNETE_UCXRATOMIC_NO_MULT | GASNETE_UCXRATOMIC_NO_MINMAX) #define GASNETE_UCXRATOMIC_BADOPS_gex_dt_U64 (GASNETE_UCXRATOMIC_NO_MULT | GASNETE_UCXRATOMIC_NO_MINMAX) -void gasnete_ucxratomic_create_hook( - gasneti_AD_t real_ad, - gasneti_TM_t real_tm, - gex_DT_t dt, - gex_OP_t ops, - gex_Flags_t flags) +void gasnete_ucxratomic_init_hook(gasneti_AD_t real_ad) { + gex_DT_t dt = real_ad->_dt; + gex_OP_t ops = real_ad->_ops; + #define GASNETE_UCXRATOMIC_TBL_CASE(dtcode) \ case dtcode##_dtype: \ if (ops & GASNETE_UCXRATOMIC_BADOPS##dtcode) goto use_am; \ real_ad->_fn_tbl = (gasnete_ratomic_fn_tbl_t) &gasnete_ucxratomic##dtcode##_fn_tbl; \ real_ad->_tools_safe = 0; \ + GASNETI_TRACE_PRINTF(O,("gex_AD_Create(dt=%d, ops=0x%x) -> UCX", (int)dt, (unsigned int)ops)); \ return; switch(dt) { GASNETE_DT_INT_APPLY(GASNETE_UCXRATOMIC_TBL_CASE) @@ -589,6 +588,6 @@ void gasnete_ucxratomic_create_hook( #undef GASNETE_UCXRATOMIC_TBL_CASE use_am: - gasnete_amratomic_create_hook(real_ad, real_tm, dt, ops, flags); + gasnete_amratomic_init_hook(real_ad); return; } diff --git a/third-party/gasnet/gasnet-src/ucx-conduit/gasnet_ratomic_fwd.h b/third-party/gasnet/gasnet-src/ucx-conduit/gasnet_ratomic_fwd.h index 69e70807556d..eea0dd4901d6 100644 --- a/third-party/gasnet/gasnet-src/ucx-conduit/gasnet_ratomic_fwd.h +++ b/third-party/gasnet/gasnet-src/ucx-conduit/gasnet_ratomic_fwd.h @@ -85,8 +85,6 @@ // with atomics performed by the local CPU. // **** -#define GASNETI_AD_CREATE_HOOK gasnete_ucxratomic_create_hook - // 2a. GASNETE_RATOMIC_ALWAYS_TOOLS_SAFE_gex_dt_* // // Assert that all atomics implementations possible in the current build @@ -122,4 +120,19 @@ #define GASNETE_RATOMIC_ALWAYS_TOOLS_SAFE_gex_dt_DBL 1 #endif +// 3. Hooks for conduit-specific extension to create and destroy +// +// These hooks are analogous to the following: +// GASNETC_CLIENT_EXTRA_DECLS +// GASNETC_CLIENT_INIT_HOOK +// GASNETC_CLIENT_FINI_HOOK +// GASNETC_SIZEOF_CLIENT_T +// which are documented in template-conduit/gasnet_core_fwd.h + +#define GASNETC_AD_EXTRA_DECLS \ + extern void gasnete_ucxratomic_init_hook(gasneti_AD_t); +#define GASNETC_AD_INIT_HOOK(i_ad) gasnete_ucxratomic_init_hook(i_ad) +//#define GASNETC_AD_FINI_HOOK(i_ad) (###) +//#define GASNETC_SIZEOF_AD_T() (###) + #endif // _GASNET_RATOMIC_FWD_H diff --git a/third-party/gasnet/gasnet-src/udp-conduit/Makefile.am b/third-party/gasnet/gasnet-src/udp-conduit/Makefile.am index 5d81dcd14758..b2c8a0cc201b 100644 --- a/third-party/gasnet/gasnet-src/udp-conduit/Makefile.am +++ b/third-party/gasnet/gasnet-src/udp-conduit/Makefile.am @@ -55,6 +55,10 @@ $(top_builddir)/other/amudp/amx_portable_platform.h: $(top_builddir)/other/amudp # forced targets in this file, and should probably use LIBINCLUDES/LIBDEFINES CONDUIT_SPECIAL_OBJS = +# memory kinds supported by this conduit (space separated) +# to be included in libgasnet if support was enabled at configure time +CONDUIT_KINDS = + # the default job spawn command to be used for "make run-tests" # The following substitutions are performed: # %P = program executable name diff --git a/third-party/gasnet/gasnet-src/udp-conduit/Makefile.in b/third-party/gasnet/gasnet-src/udp-conduit/Makefile.in index f4b6a7c0b01d..a9ac31d816f6 100644 --- a/third-party/gasnet/gasnet-src/udp-conduit/Makefile.in +++ b/third-party/gasnet/gasnet-src/udp-conduit/Makefile.in @@ -279,6 +279,10 @@ CONFIGURE_ARGS = @CONFIGURE_ARGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CROSS_COMPILING = @CROSS_COMPILING@ +CUDA_UVA_CFLAGS = @CUDA_UVA_CFLAGS@ +CUDA_UVA_LDFLAGS = @CUDA_UVA_LDFLAGS@ +CUDA_UVA_LIBS = @CUDA_UVA_LIBS@ +CUDA_guess_prog = @CUDA_guess_prog@ CXX = @CXX@ CXXCPP = @CXXCPP@ CXXDEPMODE = @CXXDEPMODE@ @@ -304,6 +308,8 @@ ENVCMD = @ENVCMD@ EXEEXT = @EXEEXT@ EXESUFFIX = @EXESUFFIX@ FH_CFLAGS = @FH_CFLAGS@ +GASNETI_HWLOC_BIND_PATH = @GASNETI_HWLOC_BIND_PATH@ +GASNETI_HWLOC_CALC_PATH = @GASNETI_HWLOC_CALC_PATH@ GASNETI_PTR_BITS = @GASNETI_PTR_BITS@ GASNET_EXTRA_DEFINES = @GASNET_EXTRA_DEFINES@ GASNET_IBV_SPAWNER_CONF = @GASNET_IBV_SPAWNER_CONF@ @@ -332,6 +338,10 @@ GSTACK_PATH = @GSTACK_PATH@ HAVE_INTTYPES_H = @HAVE_INTTYPES_H@ HAVE_STDINT_H = @HAVE_STDINT_H@ HAVE_SYS_TYPES_H = @HAVE_SYS_TYPES_H@ +HWLOC_CFLAGS = @HWLOC_CFLAGS@ +HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ +HWLOC_LIBS = @HWLOC_LIBS@ +HWLOC_guess_prog = @HWLOC_guess_prog@ IBV_CFLAGS = @IBV_CFLAGS@ IBV_LDFLAGS = @IBV_LDFLAGS@ IBV_LIBS = @IBV_LIBS@ @@ -533,6 +543,10 @@ CONDUIT_EXTRADEPS = \ # forced targets in this file, and should probably use LIBINCLUDES/LIBDEFINES CONDUIT_SPECIAL_OBJS = +# memory kinds supported by this conduit (space separated) +# to be included in libgasnet if support was enabled at configure time +CONDUIT_KINDS = + # the default job spawn command to be used for "make run-tests" # The following substitutions are performed: # %P = program executable name diff --git a/third-party/gasnet/gasnet-src/udp-conduit/README b/third-party/gasnet/gasnet-src/udp-conduit/README index 16703001121d..53150ec8d6db 100644 --- a/third-party/gasnet/gasnet-src/udp-conduit/README +++ b/third-party/gasnet/gasnet-src/udp-conduit/README @@ -192,6 +192,7 @@ Similarly, one can use the srun command in SLURM: $ setenv GASNET_SPAWNFN 'C' $ setenv GASNET_CSPAWN_CMD 'srun -n %N %C' +$ setenv GASNET_WORKER_RANK "SLURM_PROCID" // optional, see docs below Recognized environment variables: --------------------------------- @@ -279,6 +280,16 @@ Recognized environment variables: identify such entries as ineligible for shared-memory communication. Default is "0" (hostnames and IP addresses are used). +* GASNET_WORKER_RANK + May be set to force a particular rank assignment for worker processes. + If set by any process, then it must be set by all worker processes before + init to a disjoint set of integers in the range [0..numprocs). + It may alternatively be set to the name of another environment variable in the + worker environment from which to retrieve the assignment (e.g. "SLURM_PROCID", + "PMIX_RANK", "OMPI_COMM_WORLD_RANK", etc). + Default behavior is arbitrary rank assignment that groups co-located processes. + + Known problems: --------------- diff --git a/third-party/gasnet/gasnet-src/udp-conduit/gasnet_core.c b/third-party/gasnet/gasnet-src/udp-conduit/gasnet_core.c index 739c57574836..de9818622a8b 100644 --- a/third-party/gasnet/gasnet-src/udp-conduit/gasnet_core.c +++ b/third-party/gasnet/gasnet-src/udp-conduit/gasnet_core.c @@ -18,15 +18,8 @@ GASNETI_IDENT(gasnetc_IdentString_Version, "$GASNetCoreLibraryVersion: " GASNET_CORE_VERSION_STR " $"); GASNETI_IDENT(gasnetc_IdentString_Name, "$GASNetCoreLibraryName: " GASNET_CORE_NAME_STR " $"); -gex_AM_Entry_t const *gasnetc_get_handlertable(void); - gex_AM_Entry_t *gasnetc_handler; // TODO-EX: will be replaced with per-EP tables -// TODO-EX: This is a hack to support multiple segments w/ a single AM EP -#ifndef GASNETC_MOCK_EVERYTHING -#define GASNETC_MOCK_EVERYTHING 1 -#endif - static void gasnetc_traceoutput(int); static uint64_t gasnetc_networkpid; @@ -369,10 +362,10 @@ static int gasnetc_attach_primary(void) { gasnetc_enteringHandler_hook, gasnetc_leavingHandler_hook)); #endif - #if GASNETC_MOCK_EVERYTHING - retval = AM_SetSeg(gasnetc_endpoint, NULL, (uintptr_t)-1); - if (retval != AM_OK) INITERR(RESOURCE, "AM_SetSeg() failed"); - #endif + // register all of memory as the AMX-level segment + // this is needed for multi-segment support (aux + client at a minimum) + retval = AM_SetSeg(gasnetc_endpoint, NULL, (uintptr_t)-1); + if (retval != AM_OK) INITERR(RESOURCE, "AM_SetSeg() failed"); /* ------------------------------------------------------------------------------------ */ /* primary attach complete */ @@ -402,25 +395,16 @@ static int gasnetc_attach_primary(void) { static int gasnetc_attach_segment(gex_Segment_t *segment_p, gex_TM_t tm, uintptr_t segsize, - gasneti_bootstrapExchangefn_t exchangefn, - gex_Flags_t flags) { - int retval = GASNET_OK; - + gex_Flags_t flags) +{ /* ------------------------------------------------------------------------------------ */ /* register client segment */ - gasnet_seginfo_t myseg = gasneti_segmentAttach(segment_p, 0, tm, segsize, exchangefn, flags); + gasnet_seginfo_t myseg = gasneti_segmentAttach(segment_p, tm, segsize, flags); -#if !GASNETC_MOCK_EVERYTHING - /* AMUDP allows arbitrary registration with no further action */ - if (segsize) { - retval = AM_SetSeg(gasnetc_endpoint, myseg.addr, myseg.size); - if (retval != AM_OK) INITERR(RESOURCE, "AM_SetSeg() failed"); - } -#endif + // Have called AM_SetSeg() previously w/ an "everything" segment -done: - GASNETI_RETURN(retval); + return GASNET_OK; } /* ------------------------------------------------------------------------------------ */ // TODO-EX: this is a candidate for factorization (once we understand the per-conduit variations) @@ -433,7 +417,7 @@ extern int gasnetc_attach( gex_TM_t _tm, GASNETI_TRACE_PRINTF(C,("gasnetc_attach(table (%i entries), segsize=%"PRIuPTR")", numentries, segsize)); - gasneti_TM_t tm = gasneti_import_tm(_tm); + gasneti_TM_t tm = gasneti_import_tm_nonpair(_tm); gasneti_EP_t ep = tm->_ep; if (!gasneti_init_done) @@ -458,13 +442,13 @@ extern int gasnetc_attach( gex_TM_t _tm, #if GASNET_SEGMENT_FAST || GASNET_SEGMENT_LARGE /* register client segment */ gex_Segment_t seg; // g2ex segment is automatically saved by a hook - if (GASNET_OK != gasnetc_attach_segment(&seg, _tm, segsize, gasneti_defaultExchange, GASNETI_FLAG_INIT_LEGACY)) + if (GASNET_OK != gasnetc_attach_segment(&seg, _tm, segsize, GASNETI_FLAG_INIT_LEGACY)) GASNETI_RETURN_ERRR(RESOURCE,"Error attaching segment"); #endif AMLOCK(); /* register client handlers */ - if (table && gasneti_amregister_legacy(ep->_amtbl, table, numentries) != GASNET_OK) + if (table && gasneti_amregister_legacy(ep, table, numentries) != GASNET_OK) INITERR(RESOURCE,"Error registering handlers"); AMUNLOCK(); @@ -508,18 +492,22 @@ extern int gasnetc_Client_Init( #endif } + // Do NOT move this prior to the gasneti_trace_init() call + GASNETI_TRACE_PRINTF(O,("gex_Client_Init: name='%s' argc_p=%p argv_p=%p flags=%d", + clientName, (void *)argc, (void *)argv, flags)); + // allocate the client object - gasneti_Client_t client = gasneti_alloc_client(clientName, flags, 0); + gasneti_Client_t client = gasneti_alloc_client(clientName, flags); *client_p = gasneti_export_client(client); // create the initial endpoint with internal handlers - if (gasnetc_EP_Create(ep_p, *client_p, flags)) + if (gex_EP_Create(ep_p, *client_p, GEX_EP_CAPABILITY_ALL, flags)) GASNETI_RETURN_ERRR(RESOURCE,"Error creating initial endpoint"); gasneti_EP_t ep = gasneti_import_ep(*ep_p); gasnetc_handler = ep->_amtbl; // TODO-EX: this global variable to be removed // TODO-EX: create team - gasneti_TM_t tm = gasneti_alloc_tm(ep, gasneti_mynode, gasneti_nodes, flags, 0); + gasneti_TM_t tm = gasneti_alloc_tm(ep, gasneti_mynode, gasneti_nodes, flags); *tm_p = gasneti_export_tm(tm); if (0 == (flags & GASNETI_FLAG_INIT_LEGACY)) { @@ -554,60 +542,37 @@ extern int gasnetc_Segment_Attach( /* create a segment collectively */ // TODO-EX: this implementation only works *once* - // TODO-EX: should be using the team's exchange function if possible // TODO-EX: need to pass proper flags (e.g. pshm and bind) instead of 0 - if (GASNET_OK != gasnetc_attach_segment(segment_p, tm, length, gasneti_defaultExchange, 0)) + if (GASNET_OK != gasnetc_attach_segment(segment_p, tm, length, 0)) GASNETI_RETURN_ERRR(RESOURCE,"Error attaching segment"); return GASNET_OK; } -extern int gasnetc_EP_Create(gex_EP_t *ep_p, - gex_Client_t client, - gex_Flags_t flags) { - /* (###) add code here to create an endpoint belonging to the given client */ -#if 1 // TODO-EX: This is a stub, which assumes 1 implicit call from ClientCreate - static gasneti_mutex_t lock = GASNETI_MUTEX_INITIALIZER; - gasneti_mutex_lock(&lock); - static int once = 0; - int prev = once; - once = 1; - gasneti_mutex_unlock(&lock); - if (prev) gasneti_fatalerror("Multiple endpoints are not yet implemented"); -#endif +extern int gasnetc_Segment_Create( + gex_Segment_t *segment_p, + gex_Client_t client, + gex_Addr_t address, + uintptr_t length, + gex_MK_t kind, + gex_Flags_t flags) +{ + gasneti_assert(segment_p); - gasneti_EP_t ep = gasneti_alloc_ep(gasneti_import_client(client), flags, 0); - *ep_p = gasneti_export_ep(ep); - - { /* core API handlers */ - gex_AM_Entry_t *ctable = (gex_AM_Entry_t *)gasnetc_get_handlertable(); - int len = 0; - int numreg = 0; - gasneti_assert(ctable); - while (ctable[len].gex_fnptr) len++; /* calc len */ - if (gasneti_amregister(ep->_amtbl, ctable, len, GASNETC_HANDLER_BASE, GASNETE_HANDLER_BASE, 0, &numreg) != GASNET_OK) - GASNETI_RETURN_ERRR(RESOURCE,"Error registering core API handlers"); - gasneti_assert_int(numreg ,==, len); - } + // Create the Segment object, allocating memory if appropriate + gasneti_Client_t i_client = gasneti_import_client(client); + int rc = gasneti_segmentCreate(segment_p, i_client, address, length, kind, flags); - { /* extended API handlers */ - gex_AM_Entry_t *etable = (gex_AM_Entry_t *)gasnete_get_handlertable(); - int len = 0; - int numreg = 0; - gasneti_assert(etable); - while (etable[len].gex_fnptr) len++; /* calc len */ - if (gasneti_amregister(ep->_amtbl, etable, len, GASNETE_HANDLER_BASE, GASNETI_CLIENT_HANDLER_BASE, 0, &numreg) != GASNET_OK) - GASNETI_RETURN_ERRR(RESOURCE,"Error registering extended API handlers"); - gasneti_assert_int(numreg ,==, len); - } + // Have called AM_SetSeg() previously w/ an "everything" segment + // Thus no registration required - return GASNET_OK; + return rc; } extern int gasnetc_EP_RegisterHandlers(gex_EP_t ep, gex_AM_Entry_t *table, size_t numentries) { - return gasneti_amregister_client(gasneti_import_ep(ep)->_amtbl, table, numentries); + return gasneti_amregister_client(gasneti_import_ep(ep), table, numentries); } /* ------------------------------------------------------------------------------------ */ static int gasnetc_exitcalled = 0; @@ -617,16 +582,17 @@ static void gasnetc_traceoutput(int exitcode) { gasneti_trace_finish(); } } -extern void gasnetc_trace_finish(void) { +extern void gasnetc_stats_dump(int reset) { /* dump AMUDP statistics */ - if (GASNETI_STATS_ENABLED(C) ) { + if (GASNETI_STATS_ENABLED(C) || reset) { const char *statdump; int isglobal = 0; int retval = 0; amudp_stats_t stats = AMUDP_initial_stats; /* bug 2181 - lock state is unknown, eg we may be in handler context */ - AMLOCK_CAUTIOUS(); + int shouldunlock; + AMLOCK_CAUTIOUS(shouldunlock); if (isglobal) { /* TODO: tricky bit - if this exit is collective, we can display more interesting and useful @@ -648,8 +614,10 @@ extern void gasnetc_trace_finish(void) { } else { GASNETI_AM_SAFE_NORETURN(retval, AMUDP_GetEndpointStatistics(gasnetc_endpoint, &stats)); /* get statistics */ } + if (reset && !retval) + GASNETI_AM_SAFE_NORETURN(retval, AMUDP_ResetEndpointStatistics(gasnetc_endpoint)); - if ((gasneti_mynode == 0 || !isglobal) && !retval) { + if (GASNETI_STATS_ENABLED(C) && (gasneti_mynode == 0 || !isglobal) && !retval) { GASNETI_STATS_PRINTF(C,("--------------------------------------------------------------------------------")); GASNETI_STATS_PRINTF(C,("AMUDP Statistics:")); if (!isglobal) @@ -658,6 +626,7 @@ extern void gasnetc_trace_finish(void) { GASNETI_STATS_PRINTF(C,("\n%s",statdump)); /* note, dump has embedded '%' chars */ GASNETI_STATS_PRINTF(C,("--------------------------------------------------------------------------------")); } + if (shouldunlock) AMUNLOCK(); } } extern void gasnetc_fatalsignal_callback(int sig) { @@ -692,7 +661,8 @@ extern void gasnetc_exit(int exitcode) { /* bug2181: try to prevent races where we exit while other local pthreads are in AMUDP can't use a blocking lock here, because may be in a signal context */ - AMLOCK_CAUTIOUS(); + int dummy; + AMLOCK_CAUTIOUS(dummy); AMUDP_SPMDExit(exitcode); gasneti_fatalerror("AMUDP_SPMDExit failed!"); @@ -892,12 +862,7 @@ int gasnetc_AMRequestLong( gex_TM_t tm, gex_Rank_t rank, gex_AM_Index_t handler source_addr, nbytes, dest_addr, flags, numargs, argptr GASNETI_THREAD_PASS); } else { - uintptr_t dest_offset; -#if GASNETC_MOCK_EVERYTHING - dest_offset = (uintptr_t)dest_addr; -#else - dest_offset = ((uintptr_t)dest_addr) - (uintptr_t)gasneti_client_seginfo(tm, rank)->addr; -#endif + uintptr_t dest_offset = (uintptr_t)dest_addr; AMLOCK_TOSEND(); GASNETI_AM_SAFE_NORETURN(retval, @@ -1031,14 +996,7 @@ int gasnetc_AMReplyLong( gex_Token_t token, gex_AM_Index_t handler, source_addr, nbytes, dest_addr, flags, numargs, argptr); } else { - uintptr_t dest_offset; - -#if GASNETC_MOCK_EVERYTHING - dest_offset = (uintptr_t)dest_addr; -#else - gex_Rank_t dest = gasnetc_msgsource(token); - dest_offset = ((uintptr_t)dest_addr) - (uintptr_t)gasneti_client_seginfo(gasneti_THUNK_TM, dest)->addr; -#endif + uintptr_t dest_offset = (uintptr_t)dest_addr; AM_ASSERT_LOCKED(); GASNETI_AM_SAFE_NORETURN(retval, @@ -1168,6 +1126,7 @@ extern int gasnetc_hsl_trylock(gex_HSL_t *hsl) { break; default: gasneti_unreachable_error(("Unknown handler type in gasnetc_enteringHandler_hook(): 0x%x",(int)cat)); } + GASNETI_HANDLER_ENTER(isReq); } extern void gasnetc_leavingHandler_hook(amudp_category_t cat, int isReq) { switch (cat) { @@ -1182,6 +1141,7 @@ extern int gasnetc_hsl_trylock(gex_HSL_t *hsl) { break; default: gasneti_unreachable_error(("Unknown handler type in gasnetc_leavingHandler_hook(): 0x%x",(int)cat)); } + GASNETI_HANDLER_LEAVE(isReq); } #endif @@ -1193,9 +1153,7 @@ extern int gasnetc_hsl_trylock(gex_HSL_t *hsl) { (for internal conduit use in bootstrapping, job management, etc.) */ static gex_AM_Entry_t const gasnetc_handlers[] = { - #ifdef GASNETC_COMMON_HANDLERS - GASNETC_COMMON_HANDLERS(), - #endif + GASNETC_COMMON_HANDLERS(), /* ptr-width independent handlers */ diff --git a/third-party/gasnet/gasnet-src/udp-conduit/gasnet_core.h b/third-party/gasnet/gasnet-src/udp-conduit/gasnet_core.h index 992b0004b257..a88ace605c28 100644 --- a/third-party/gasnet/gasnet-src/udp-conduit/gasnet_core.h +++ b/third-party/gasnet/gasnet-src/udp-conduit/gasnet_core.h @@ -24,40 +24,46 @@ ============== */ -extern void gasnetc_exit(int exitcode) GASNETI_NORETURN; +extern void gasnetc_exit(int _exitcode) GASNETI_NORETURN; GASNETI_NORETURNP(gasnetc_exit) #define gasnet_exit gasnetc_exit /* Some conduits permit gasnet_init(NULL,NULL). Define to 1 if this conduit supports this extension, or to 0 otherwise. */ #define GASNET_NULL_ARGV_OK 1 + +// convenience declaration +extern void AMUDP_SPMDSetProc(int); /* ------------------------------------------------------------------------------------ */ extern int gasnetc_Client_Init( - gex_Client_t *client_p, - gex_EP_t *ep_p, - gex_TM_t *tm_p, - const char *clientName, - int *argc, - char ***argv, - gex_Flags_t flags); + gex_Client_t *_client_p, + gex_EP_t *_ep_p, + gex_TM_t *_tm_p, + const char *_clientName, + int *_argc, + char ***_argv, + gex_Flags_t _flags); // gasnetex.h handles name-shifting of gex_Client_Init() extern int gasnetc_Segment_Attach( - gex_Segment_t *segment_p, - gex_TM_t tm, - uintptr_t length); + gex_Segment_t *_segment_p, + gex_TM_t _tm, + uintptr_t _length); #define gex_Segment_Attach gasnetc_Segment_Attach -extern int gasnetc_EP_Create( - gex_EP_t *ep_p, - gex_Client_t client, - gex_Flags_t flags); -#define gex_EP_Create gasnetc_EP_Create +extern int gasnetc_Segment_Create( + gex_Segment_t *_segment_p, + gex_Client_t _client, + gex_Addr_t _address, + uintptr_t _length, + gex_MK_t _kind, + gex_Flags_t _flags); +#define gex_Segment_Create gasnetc_Segment_Create extern int gasnetc_EP_RegisterHandlers( - gex_EP_t ep, - gex_AM_Entry_t *table, - size_t numentries); + gex_EP_t _ep, + gex_AM_Entry_t *_table, + size_t _numentries); #define gex_EP_RegisterHandlers gasnetc_EP_RegisterHandlers /* ------------------------------------------------------------------------------------ */ /* @@ -101,11 +107,11 @@ typedef struct { #define gex_HSL_Unlock(hsl) #define gex_HSL_Trylock(hsl) GASNET_OK #else - extern void gasnetc_hsl_init (gex_HSL_t *hsl); - extern void gasnetc_hsl_destroy(gex_HSL_t *hsl); - extern void gasnetc_hsl_lock (gex_HSL_t *hsl); - extern void gasnetc_hsl_unlock (gex_HSL_t *hsl); - extern int gasnetc_hsl_trylock(gex_HSL_t *hsl) GASNETI_WARN_UNUSED_RESULT; + extern void gasnetc_hsl_init (gex_HSL_t *_hsl); + extern void gasnetc_hsl_destroy(gex_HSL_t *_hsl); + extern void gasnetc_hsl_lock (gex_HSL_t *_hsl); + extern void gasnetc_hsl_unlock (gex_HSL_t *_hsl); + extern int gasnetc_hsl_trylock(gex_HSL_t *_hsl) GASNETI_WARN_UNUSED_RESULT; #define gex_HSL_Init gasnetc_hsl_init #define gex_HSL_Destroy gasnetc_hsl_destroy @@ -126,12 +132,28 @@ typedef struct { #define gex_AM_LUBReplyLong() ((size_t)AM_MaxLong()) // TODO-EX: Can these be improved upon, at least for PSHM case -#define gasnetc_AM_MaxRequestMedium(tm,rank,lc_opt,flags,nargs) gex_AM_LUBRequestMedium() -#define gasnetc_AM_MaxReplyMedium(tm,rank,lc_opt,flags,nargs) gex_AM_LUBReplyMedium() -#define gasnetc_AM_MaxRequestLong(tm,rank,lc_opt,flags,nargs) gex_AM_LUBRequestLong() -#define gasnetc_AM_MaxReplyLong(tm,rank,lc_opt,flags,nargs) gex_AM_LUBReplyLong() -#define gasnetc_Token_MaxReplyMedium(token,lc_opt,flags,nargs) gex_AM_LUBReplyMedium() -#define gasnetc_Token_MaxReplyLong(token,lc_opt,flags,nargs) gex_AM_LUBReplyLong() +#define gasnetc_AM_MaxRequestMedium(tm,rank,lc_opt,flags,nargs) \ + (GASNETI_UNUSED_ARGS5(tm,rank,lc_opt,flags,nargs),gex_AM_LUBRequestMedium()) +#define gasnetc_AM_MaxReplyMedium(tm,rank,lc_opt,flags,nargs) \ + (GASNETI_UNUSED_ARGS5(tm,rank,lc_opt,flags,nargs),gex_AM_LUBReplyMedium()) +#define gasnetc_Token_MaxReplyMedium(token,lc_opt,flags,nargs) \ + (GASNETI_UNUSED_ARGS4(token,lc_opt,flags,nargs),gex_AM_LUBReplyMedium()) + +#define gasnetc_AM_MaxRequestLong(tm,rank,lc_opt,flags,nargs) \ + (GASNETI_UNUSED_ARGS4(tm,rank,lc_opt,nargs), \ + ((flags) & GEX_FLAG_AM_PREPARE_LEAST_ALLOC \ + ? GASNETC_REF_NPAM_MAX_ALLOC \ + : gex_AM_LUBRequestLong())) +#define gasnetc_AM_MaxReplyLong(tm,rank,lc_opt,flags,nargs) \ + (GASNETI_UNUSED_ARGS4(tm,rank,lc_opt,nargs), \ + ((flags) & GEX_FLAG_AM_PREPARE_LEAST_ALLOC \ + ? GASNETC_REF_NPAM_MAX_ALLOC \ + : gex_AM_LUBReplyLong())) +#define gasnetc_Token_MaxReplyLong(token,lc_opt,flags,nargs) \ + (GASNETI_UNUSED_ARGS3(token,lc_opt,nargs), \ + ((flags) & GEX_FLAG_AM_PREPARE_LEAST_ALLOC \ + ? GASNETC_REF_NPAM_MAX_ALLOC \ + : gex_AM_LUBReplyLong())) /* ------------------------------------------------------------------------------------ */ /* diff --git a/third-party/gasnet/gasnet-src/udp-conduit/gasnet_core_fwd.h b/third-party/gasnet/gasnet-src/udp-conduit/gasnet_core_fwd.h index 4becc504fd0c..cebafba895d6 100644 --- a/third-party/gasnet/gasnet-src/udp-conduit/gasnet_core_fwd.h +++ b/third-party/gasnet/gasnet-src/udp-conduit/gasnet_core_fwd.h @@ -45,6 +45,9 @@ #define GASNET_MAXNODES AMUDP_MAX_NUMTRANSLATIONS + // uncomment for each MK_CLASS which the conduit supports. leave commented otherwise +//#define GASNET_HAVE_MK_CLASS_CUDA_UVA GASNETI_MK_CLASS_CUDA_UVA_ENABLED + /* conduits should define GASNETI_CONDUIT_THREADS to 1 if they have one or more "private" threads which may be used to run AM handlers, even under GASNET_SEQ this ensures locking is still done correctly, etc @@ -80,12 +83,12 @@ your conduit must provide the V-suffixed functions for any of these that are not defined. */ -/* #define GASNETC_HAVE_NP_REQ_MEDIUM 1 */ -/* #define GASNETC_HAVE_NP_REP_MEDIUM 1 */ -/* #define GASNETC_HAVE_NP_REQ_LONG 1 */ -/* #define GASNETC_HAVE_NP_REP_LONG 1 */ +/* #define GASNET_NATIVE_NP_ALLOC_REQ_MEDIUM 1 */ +/* #define GASNET_NATIVE_NP_ALLOC_REP_MEDIUM 1 */ +/* #define GASNET_NATIVE_NP_ALLOC_REQ_LONG 1 */ +/* #define GASNET_NATIVE_NP_ALLOC_REP_LONG 1 */ - /* uncomment for each GASNETC_HAVE_NP_* enabled above if the Commit function + /* uncomment for each GASNET_NATIVE_NP_ALLOC_* enabled above if the Commit function has the numargs argument even in an NDEBUG build (it is always passed in DEBUG builds). */ @@ -98,21 +101,65 @@ include a call to gasneti_AMPoll (or equivalent) for progress. The preferred implementation is to Poll only in the M-suffixed calls and not the V-suffixed calls (and GASNETC_REQUESTV_POLLS undefined). - Used if (and only if) any of the GASNETC_HAVE_NP_* values above are unset. + Used if (and only if) any of the GASNET_NATIVE_NP_ALLOC_* values above are unset. */ #define GASNETC_REQUESTV_POLLS 1 + // uncomment if conduit provides a gasnetc-prefixed override + // TODO: this should be a hook rather than an override +//#define GASNETC_HAVE_EP_PUBLISHBOUNDSEGMENT 1 + + /* If your conduit uses conduit-specific extensions to the basic object + types, then define the corresponding SIZEOF macros below to return + the total length of the conduit-specific object, including the prefix + portion which must be the matching GASNETI_[OBJECT]_COMMON fields. + Similarly, *_HOOK macros should be defined as callbacks to perform + conduit-specific initialization and finalization tasks, if any. + If a given SIZEOF macro is defined, but the corresponding INIT_HOOK is + not, then space beyond the COMMON fields will be zero-initialized. + In all cases, GASNETC_[OBJECT]_EXTRA_DECLS provides the place to + provide necessary declarations (since this file is included very early). + */ + +//#define GASNETC_CLIENT_EXTRA_DECLS (###) +//#define GASNETC_CLIENT_INIT_HOOK(i_client) (###) +//#define GASNETC_CLIENT_FINI_HOOK(i_client) (###) +//#define GASNETC_SIZEOF_CLIENT_T() (###) + +//#define GASNETC_SEGMENT_EXTRA_DECLS (###) +//#define GASNETC_SEGMENT_INIT_HOOK(i_segment) (###) +//#define GASNETC_SEGMENT_FINI_HOOK(i_segment) (###) +//#define GASNETC_SIZEOF_SEGMENT_T() (###) + +//#define GASNETC_TM_EXTRA_DECLS (###) +//#define GASNETC_TM_INIT_HOOK(i_tm) (###) +//#define GASNETC_TM_FINI_HOOK(i_tm) (###) +//#define GASNETC_SIZEOF_TM_T() (###) + +//#define GASNETC_EP_EXTRA_DECLS (###) +//#define GASNETC_EP_INIT_HOOK(i_ep) (###) +//#define GASNETC_EP_FINI_HOOK(i_ep) (###) +//#define GASNETC_SIZEOF_EP_T() (###) + +// If conduit supports GASNET_MAXEPS!=1, set default and (optional) max values here. +// Leaving GASNETC_MAXEPS_DFLT unset will result in GASNET_MAXEPS=1, independent +// of all other settings (appropriate for conduits without multi-ep support). +// If set, GASNETC_MAXEPS_MAX it is used to limit a user's --with-maxeps (and a +// global default limit is used otherwise). +//#define GASNETC_MAXEPS_DFLT ### // default num endpoints this conduit supports, undef means no multi-ep support +//#define GASNETC_MAXEPS_MAX ### // leave unset for default + /* this can be used to add conduit-specific statistical collection values (see gasnet_trace.h) */ #define GASNETC_CONDUIT_STATS(CNT,VAL,TIME) -#define GASNETC_TRACE_FINISH() gasnetc_trace_finish() -extern void gasnetc_trace_finish(void); +extern void gasnetc_stats_dump(int _reset); +#define GASNETC_STATS_DUMP(reset) gasnetc_stats_dump(reset) #define GASNETC_FATALSIGNAL_CALLBACK(sig) gasnetc_fatalsignal_callback(sig) -extern void gasnetc_fatalsignal_callback(int sig); +extern void gasnetc_fatalsignal_callback(int _sig); -extern void _gasnetc_set_waitmode(int wait_mode); +extern void _gasnetc_set_waitmode(int _wait_mode); #define gasnetc_set_waitmode(wait_mode) _gasnetc_set_waitmode(wait_mode) /* udp-conduit's default spawner produces random node placements. diff --git a/third-party/gasnet/gasnet-src/udp-conduit/gasnet_core_internal.h b/third-party/gasnet/gasnet-src/udp-conduit/gasnet_core_internal.h index 3138b174cf1f..d36599b16529 100644 --- a/third-party/gasnet/gasnet-src/udp-conduit/gasnet_core_internal.h +++ b/third-party/gasnet/gasnet-src/udp-conduit/gasnet_core_internal.h @@ -50,13 +50,15 @@ extern volatile int gasnetc_AMLockYield; #else #define _AMLOCK_CAUTIOUS_HELPER() ((void)0) #endif -#define AMLOCK_CAUTIOUS() do { \ +#define AMLOCK_CAUTIOUS(shouldunlock) do { \ int _i; \ + shouldunlock = 0; \ gasnetc_AMLockYield = 1; \ for (_i=0; _i < 50; _i++) { \ _AMLOCK_CAUTIOUS_HELPER(); \ - if (!gasneti_mutex_trylock(&gasnetc_AMlock)) break; \ - else gasneti_sched_yield(); \ + if (!gasneti_mutex_trylock(&gasnetc_AMlock)) { \ + shouldunlock = 1; break; \ + } else gasneti_sched_yield(); \ } \ gasnetc_AMLockYield = 0; \ } while (0) @@ -110,7 +112,7 @@ const char *gasneti_AMErrorName(int errval) { } while (0) /* ------------------------------------------------------------------------------------ */ -#define _hidx_gasnetc_exchg_reqh (GASNETC_HANDLER_BASE+0) +#define _hidx_gasnetc_hbarr_reqh (GASNETC_HANDLER_BASE+0) /* add new core API handlers here and to the bottom of gasnet_core.c */ /* ------------------------------------------------------------------------------------ */ diff --git a/third-party/gasnet/gasnet-src/version.git b/third-party/gasnet/gasnet-src/version.git index 00d6de62eed1..d8d643bf5b56 100644 --- a/third-party/gasnet/gasnet-src/version.git +++ b/third-party/gasnet/gasnet-src/version.git @@ -1 +1 @@ -gex-2020.10.0 +gex-2021.3.0-0-g9280a96 diff --git a/tools/c2chapel/Makefile b/tools/c2chapel/Makefile index bf63d92afa4a..a45fa3de3186 100644 --- a/tools/c2chapel/Makefile +++ b/tools/c2chapel/Makefile @@ -35,7 +35,7 @@ link=$(bdir)/c2chapel # Note, this version is used only for the fake headers, # but it should probably match third-party/chpl-venv/c2chapel-requirements.txt -VERSION=2.17 +VERSION=2.20 TAR=release_v$(VERSION).tar.gz RELEASE=https://github.com/eliben/pycparser/archive/$(TAR) diff --git a/tools/c2chapel/c2chapel.py b/tools/c2chapel/c2chapel.py index 7a7edcb90903..af0f072cd2c3 100755 --- a/tools/c2chapel/c2chapel.py +++ b/tools/c2chapel/c2chapel.py @@ -550,7 +550,11 @@ def findIgnores(): if line.startswith("typedef"): rhs = line.replace(";", "") rhs = rhs.replace("typedef int ", "") + rhs = rhs.replace("typedef uint32_t ", "") rhs = rhs.replace("typedef _Bool ", "") + rhs = rhs.replace("typedef void* ", "") + if "typedef struct" in rhs: + rhs = rhs.split()[-1] ret.add(rhs.strip()) diff --git a/tools/c2chapel/test/justC.chpl b/tools/c2chapel/test/justC.chpl index 5a35c9b38f22..835c8fd6f550 100644 --- a/tools/c2chapel/test/justC.chpl +++ b/tools/c2chapel/test/justC.chpl @@ -11,6 +11,37 @@ extern proc main() : c_int; /* extern type FILE = c_int; +// Opaque struct? +extern record MirBlob {}; + +// Opaque struct? +extern record MirBufferStream {}; + +// Opaque struct? +extern record MirConnection {}; + +// Opaque struct? +extern record MirDisplayConfig {}; + +extern type MirEGLNativeDisplayType = c_void_ptr; + +extern type MirEGLNativeWindowType = c_void_ptr; + +// Opaque struct? +extern record MirPersistentId {}; + +// Opaque struct? +extern record MirPromptSession {}; + +// Opaque struct? +extern record MirScreencast {}; + +// Opaque struct? +extern record MirSurface {}; + +// Opaque struct? +extern record MirSurfaceSpec {}; + extern type _LOCK_RECURSIVE_T = c_int; extern type _LOCK_T = c_int; @@ -281,6 +312,13 @@ extern type wchar_t = c_int; extern type wint_t = c_int; +// Opaque struct? +extern record xcb_connection_t {}; + +extern type xcb_visualid_t = uint(32); + +extern type xcb_window_t = uint(32); + extern type z_stream = c_int; */ diff --git a/tools/c2chapel/test/sysCTypes.chpl b/tools/c2chapel/test/sysCTypes.chpl index b3dd5a645924..548a1834ec43 100644 --- a/tools/c2chapel/test/sysCTypes.chpl +++ b/tools/c2chapel/test/sysCTypes.chpl @@ -32,6 +32,37 @@ extern proc unsignedWidths(a : uint(8), b : uint(16), c : uint(32), d : uint(64) /* extern type FILE = c_int; +// Opaque struct? +extern record MirBlob {}; + +// Opaque struct? +extern record MirBufferStream {}; + +// Opaque struct? +extern record MirConnection {}; + +// Opaque struct? +extern record MirDisplayConfig {}; + +extern type MirEGLNativeDisplayType = c_void_ptr; + +extern type MirEGLNativeWindowType = c_void_ptr; + +// Opaque struct? +extern record MirPersistentId {}; + +// Opaque struct? +extern record MirPromptSession {}; + +// Opaque struct? +extern record MirScreencast {}; + +// Opaque struct? +extern record MirSurface {}; + +// Opaque struct? +extern record MirSurfaceSpec {}; + extern type _LOCK_RECURSIVE_T = c_int; extern type _LOCK_T = c_int; @@ -302,6 +333,13 @@ extern type wchar_t = c_int; extern type wint_t = c_int; +// Opaque struct? +extern record xcb_connection_t {}; + +extern type xcb_visualid_t = uint(32); + +extern type xcb_window_t = uint(32); + extern type z_stream = c_int; */