Skip to content

Commit

Permalink
add detection for zen 5 (#56967)
Browse files Browse the repository at this point in the history
ref
llvm/llvm-project@149a150

---------

Co-authored-by: gbaraldi <[email protected]>
  • Loading branch information
simeonschaub and gbaraldi authored Jan 9, 2025
1 parent 11ce171 commit 4250be8
Show file tree
Hide file tree
Showing 2 changed files with 59 additions and 4 deletions.
28 changes: 27 additions & 1 deletion src/features_x86.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,13 @@
#else
#define JL_X86_64ONLY_VER(x) x
#endif
// The code is similar to what is here so the bits can be used as reference
// https://github.com/llvm/llvm-project/blob/3f7905733820851bc4f65cb4af693c3101cbf20d/llvm/lib/TargetParser/Host.cpp#L1257

// The way the bits here work is an index into the features array. This is a bit array
// The index works as follows:
// 32*i + j where i is the index into the array and j is the bit in the array.
// There is a reference to what each index corresponds to in _get_host_cpu

// X86 features definition
// EAX=1: ECX
Expand Down Expand Up @@ -79,6 +86,7 @@ JL_FEATURE_DEF(avx512vp2intersect, 32 * 4 + 8, 0)
JL_FEATURE_DEF(serialize, 32 * 4 + 14, 110000)
JL_FEATURE_DEF(tsxldtrk, 32 * 4 + 16, 110000)
JL_FEATURE_DEF(pconfig, 32 * 4 + 18, 0)
// JL_FEATURE_DEF(ibt, 32 * 4 + 20, 0)
JL_FEATURE_DEF_NAME(amx_bf16, 32 * 4 + 22, 110000, "amx-bf16")
JL_FEATURE_DEF(avx512fp16, 32 * 4 + 23, 140000)
JL_FEATURE_DEF_NAME(amx_tile, 32 * 4 + 24, 110000, "amx-tile")
Expand Down Expand Up @@ -110,10 +118,28 @@ JL_FEATURE_DEF(clzero, 32 * 8 + 0, 0)
JL_FEATURE_DEF(wbnoinvd, 32 * 8 + 9, 0)

// EAX=7,ECX=1: EAX
JL_FEATURE_DEF(sha512, 32 * 9 + 0, 170000)
JL_FEATURE_DEF(sm3, 32 * 9 + 1, 170000)
JL_FEATURE_DEF(sm4, 32 * 9 + 2, 170000)
JL_FEATURE_DEF(raoint, 32 * 9 + 3, 170000)
JL_FEATURE_DEF(avxvnni, 32 * 9 + 4, 120000)
JL_FEATURE_DEF(avx512bf16, 32 * 9 + 5, 0)
JL_FEATURE_DEF(cmpccxadd, 32 * 9 + 7, 160000)
JL_FEATURE_DEF_NAME(amx_fp16, 32 * 9 + 21, 160000, "amx-fp16")
JL_FEATURE_DEF(hreset, 32 * 9 + 22, 160000)
JL_FEATURE_DEF(avxifma, 32 * 9 + 23, 160000)

// EAX=7,ECX=1: EBX
JL_FEATURE_DEF(avxvnniint8, 32 * 10 + 4, 160000)
JL_FEATURE_DEF(avxneconvert, 32 * 10 + 5, 160000)
JL_FEATURE_DEF_NAME(amx_complex, 32 * 10 + 8, 170000, "amx-complex")
JL_FEATURE_DEF(avxvnniint16, 32 * 10 + 10, 170000)
JL_FEATURE_DEF(prefetchi, 32 * 10 + 14, 160000)
JL_FEATURE_DEF(usermsr, 32 * 10 + 15, 170000)
// JL_FEATURE_DEF(avx10, 32 * 10 + 19, 170000) // TODO: What to do about avx10 and it's mess?
// JL_FEATURE_DEF(apxf, 32 * 10 + 21, 190000)

// EAX=0x14,ECX=0: EBX
JL_FEATURE_DEF(ptwrite, 32 * 10 + 4, 0)
JL_FEATURE_DEF(ptwrite, 32 * 11 + 4, 0)

#undef JL_X86_64ONLY_VER
35 changes: 32 additions & 3 deletions src/processor_x86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -96,9 +96,10 @@ enum class CPU : uint32_t {
amd_znver2,
amd_znver3,
amd_znver4,
amd_znver5,
};

static constexpr size_t feature_sz = 11;
static constexpr size_t feature_sz = 12;
static constexpr FeatureName feature_names[] = {
#define JL_FEATURE_DEF(name, bit, llvmver) {#name, bit, llvmver},
#define JL_FEATURE_DEF_NAME(name, bit, llvmver, str) {str, bit, llvmver},
Expand Down Expand Up @@ -141,6 +142,10 @@ static constexpr FeatureDep deps[] = {
{vpclmulqdq, avx},
{vpclmulqdq, pclmul},
{avxvnni, avx2},
{avxvnniint8, avx2},
{avxvnniint16, avx2},
{avxifma, avx2},
{avxneconvert, avx2},
{avx512f, avx2},
{avx512dq, avx512f},
{avx512ifma, avx512f},
Expand All @@ -159,13 +164,18 @@ static constexpr FeatureDep deps[] = {
{avx512fp16, avx512vl},
{amx_int8, amx_tile},
{amx_bf16, amx_tile},
{amx_fp16, amx_tile},
{amx_complex, amx_tile},
{sse4a, sse3},
{xop, fma4},
{fma4, avx},
{fma4, sse4a},
{xsaveopt, xsave},
{xsavec, xsave},
{xsaves, xsave},
{sha512, avx2},
{sm3, avx},
{sm4, avx2},
};

// We require cx16 on 64bit by default. This can be overwritten with `-cx16`
Expand Down Expand Up @@ -236,6 +246,7 @@ constexpr auto znver2 = znver1 | get_feature_masks(clwb, rdpid, wbnoinvd);
constexpr auto znver3 = znver2 | get_feature_masks(shstk, pku, vaes, vpclmulqdq);
constexpr auto znver4 = znver3 | get_feature_masks(avx512f, avx512cd, avx512dq, avx512bw, avx512vl, avx512ifma, avx512vbmi,
avx512vbmi2, avx512vnni, avx512bitalg, avx512vpopcntdq, avx512bf16, gfni, shstk, xsaves);
constexpr auto znver5 = znver4 | get_feature_masks(avxvnni, movdiri, movdir64b, avx512vp2intersect, prefetchi, avxvnni);

}

Expand Down Expand Up @@ -298,6 +309,7 @@ static constexpr CPUSpec<CPU, feature_sz> cpus[] = {
{"znver2", CPU::amd_znver2, CPU::generic, 0, Feature::znver2},
{"znver3", CPU::amd_znver3, CPU::amd_znver2, 120000, Feature::znver3},
{"znver4", CPU::amd_znver4, CPU::amd_znver3, 160000, Feature::znver4},
{"znver5", CPU::amd_znver5, CPU::amd_znver4, 190000, Feature::znver5},
};
static constexpr size_t ncpu_names = sizeof(cpus) / sizeof(cpus[0]);

Expand Down Expand Up @@ -575,6 +587,9 @@ static CPU get_amd_processor_name(uint32_t family, uint32_t model, const uint32_
return CPU::amd_znver4;
}
return CPU::amd_znver3; // fallback
case 26:
// if (model <= 0x77)
return CPU::amd_znver5;
}
}

Expand Down Expand Up @@ -660,11 +675,12 @@ static NOINLINE std::pair<uint32_t,FeatureList<feature_sz>> _get_host_cpu(void)
int32_t info7[4];
jl_cpuidex(info7, 7, 1);
features[9] = info7[0];
features[10] = info7[1];
}
if (maxleaf >= 0x14) {
int32_t info14[4];
jl_cpuidex(info14, 0x14, 0);
features[10] = info14[1];
features[11] = info14[1];
}

// Fix up AVX bits to account for OS support and match LLVM model
Expand Down Expand Up @@ -705,7 +721,20 @@ static NOINLINE std::pair<uint32_t,FeatureList<feature_sz>> _get_host_cpu(void)
else {
cpu = uint32_t(CPU::generic);
}

/* Feature bits to register map
feature[0] = ecx
feature[1] = edx
feature[2] = leaf 7 ebx
feature[3] = leaf 7 ecx
feature[4] = leaf 7 edx
feature[5] = leaf 0x80000001 ecx
feature[6] = leaf 0x80000001 edx
feature[7] = leaf 0xd subleaf 1 eax
feature[8] = leaf 0x80000008 ebx
feature[9] = leaf 7 ebx subleaf 1 eax
feature[10] = leaf 7 ebx subleaf 1 ebx
feature[11] = leaf 0x14 ebx
*/
return std::make_pair(cpu, features);
}

Expand Down

2 comments on commit 4250be8

@nanosoldier
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Executing the daily package evaluation, I will reply here when finished:

@nanosoldier runtests(isdaily = true)

@nanosoldier
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The package evaluation job you requested has completed - possible new issues were detected.
The full report is available.

Report summary

❗ Packages that crashed

58 packages crashed.

  • The process was aborted: 35 packages
  • Invalid LLVM IR was generated: 1 packages
  • An internal error was encountered: 3 packages
  • An unreachable instruction was executed: 3 packages
  • A segmentation fault happened: 16 packages

✖ Packages that failed

44 packages failed only on the current version.

  • Package fails to precompile: 4 packages
  • Package has test failures: 4 packages
  • Package tests unexpectedly errored: 16 packages
  • Tests became inactive: 7 packages
  • Test duration exceeded the time limit: 12 packages
  • Test log exceeded the size limit: 1 packages

3375 packages failed on the previous version too.

✔ Packages that passed tests

139 packages passed tests only on the current version.

  • Other: 139 packages

5238 packages passed tests on the previous version too.

➖ Packages that were skipped altogether

1362 packages were skipped on the previous version too.

Please sign in to comment.