diff --git a/src/orhr_col.cc b/src/orhr_col.cc index c7d54b70..5f6e2a92 100644 --- a/src/orhr_col.cc +++ b/src/orhr_col.cc @@ -40,6 +40,9 @@ int64_t orhr_col( lapack_int ldt_ = (lapack_int) ldt; lapack_int info_ = 0; + // Work around bug in LAPACK <= 3.12. See https://github.com/Reference-LAPACK/lapack/pull/1018 + nb_ = min( nb_, n ); + LAPACK_sorhr_col( &m_, &n_, &nb_, A, &lda_, @@ -159,6 +162,9 @@ int64_t orhr_col( lapack_int ldt_ = (lapack_int) ldt; lapack_int info_ = 0; + // Work around bug in LAPACK <= 3.12. See https://github.com/Reference-LAPACK/lapack/pull/1018 + nb_ = min( nb_, n ); + LAPACK_dorhr_col( &m_, &n_, &nb_, A, &lda_, diff --git a/src/unhr_col.cc b/src/unhr_col.cc index bead387d..caca900c 100644 --- a/src/unhr_col.cc +++ b/src/unhr_col.cc @@ -40,6 +40,9 @@ int64_t unhr_col( lapack_int ldt_ = (lapack_int) ldt; lapack_int info_ = 0; + // Work around bug in LAPACK <= 3.12. See https://github.com/Reference-LAPACK/lapack/pull/1018 + nb_ = min( nb_, n ); + LAPACK_cunhr_col( &m_, &n_, &nb_, (lapack_complex_float*) A, &lda_, @@ -159,6 +162,9 @@ int64_t unhr_col( lapack_int ldt_ = (lapack_int) ldt; lapack_int info_ = 0; + // Work around bug in LAPACK <= 3.12. See https://github.com/Reference-LAPACK/lapack/pull/1018 + nb_ = min( nb_, n ); + LAPACK_zunhr_col( &m_, &n_, &nb_, (lapack_complex_double*) A, &lda_, diff --git a/test/lapacke_wrappers.hh b/test/lapacke_wrappers.hh index d28e27b4..f6a73181 100644 --- a/test/lapacke_wrappers.hh +++ b/test/lapacke_wrappers.hh @@ -10008,8 +10008,8 @@ inline lapack_int LAPACKE_ungtr( } // ----------------------------------------------------------------------------- -// LAPACKE_*unhr_col only in Intel MKL, not yet Netlib LAPACK as of 3.10. -#if LAPACK_VERSION >= 30900 && defined( LAPACK_HAVE_MKL ) // >= 3.9.0 +// LAPACKE_*unhr_col only in Intel MKL until LAPACK 3.12. +#if LAPACK_VERSION >= 31200 || (LAPACK_VERSION >= 30900 && defined( LAPACK_HAVE_MKL )) inline lapack_int LAPACKE_orhr_col( lapack_int m, lapack_int n, lapack_int nb, @@ -10083,7 +10083,7 @@ inline lapack_int LAPACKE_unhr_col( (lapack_complex_double*) D ); } -#endif // 3.9.0 and LAPACK_HAVE_MKL +#endif // 3.12.0 or (3.9.0 and LAPACK_HAVE_MKL) // ----------------------------------------------------------------------------- diff --git a/test/test_orhr_col.cc b/test/test_orhr_col.cc index 7eb592ad..9e84e017 100644 --- a/test/test_orhr_col.cc +++ b/test/test_orhr_col.cc @@ -53,23 +53,28 @@ void test_orhr_col_work( Params& params, bool run ) // ---------- run test testsweeper::flush_cache( params.cache() ); double time = testsweeper::get_wtime(); - int64_t info_tst = lapack::orhr_col( m, n, nb, &A_tst[0], lda, &T_tst[0], ldt, &D_tst[0] ); + int64_t info_tst = lapack::orhr_col( + m, n, nb, &A_tst[0], lda, &T_tst[0], ldt, &D_tst[0] ); time = testsweeper::get_wtime() - time; if (info_tst != 0) { - fprintf( stderr, "lapack::orhr_col returned error %lld\n", llong( info_tst ) ); + fprintf( stderr, "lapack::orhr_col returned error %lld\n", + llong( info_tst ) ); } params.time() = time; - #ifdef LAPACK_HAVE_MKL if (params.ref() == 'y' || params.check() == 'y') { + #if LAPACK_VERSION >= 31200 || defined( LAPACK_HAVE_MKL ) // ---------- run reference testsweeper::flush_cache( params.cache() ); time = testsweeper::get_wtime(); - int64_t info_ref = LAPACKE_orhr_col( m, n, nb, &A_ref[0], lda, &T_ref[0], ldt, &D_ref[0] ); + // min works around bug in LAPACK <= 3.12 + int64_t info_ref = LAPACKE_orhr_col( + m, n, blas::min( nb, n ), &A_ref[0], lda, &T_ref[0], ldt, &D_ref[0] ); time = testsweeper::get_wtime() - time; if (info_ref != 0) { - fprintf( stderr, "LAPACKE_orhr_col returned error %lld\n", llong( info_ref ) ); + fprintf( stderr, "LAPACKE_orhr_col returned error %lld\n", + llong( info_ref ) ); } params.ref_time() = time; @@ -84,11 +89,11 @@ void test_orhr_col_work( Params& params, bool run ) error += abs_error( D_tst, D_ref ); params.error() = error; params.okay() = (error == 0); // expect lapackpp == lapacke - } #else // LAPACKE_unhr_col not yet in LAPACK - params.msg() = "check requires Intel MKL, as of LAPACK 3.11"; + params.msg() = "check requires LAPACK >= 3.12 or Intel MKL"; #endif // LAPACK_HAVE_MKL + } } #endif // LAPACK >= 3.9.0 diff --git a/test/test_unhr_col.cc b/test/test_unhr_col.cc index 6ba4f073..1268476b 100644 --- a/test/test_unhr_col.cc +++ b/test/test_unhr_col.cc @@ -53,23 +53,28 @@ void test_unhr_col_work( Params& params, bool run ) // ---------- run test testsweeper::flush_cache( params.cache() ); double time = testsweeper::get_wtime(); - int64_t info_tst = lapack::unhr_col( m, n, nb, &A_tst[0], lda, &T_tst[0], ldt, &D_tst[0] ); + int64_t info_tst = lapack::unhr_col( + m, n, nb, &A_tst[0], lda, &T_tst[0], ldt, &D_tst[0] ); time = testsweeper::get_wtime() - time; if (info_tst != 0) { - fprintf( stderr, "lapack::unhr_col returned error %lld\n", llong( info_tst ) ); + fprintf( stderr, "lapack::unhr_col returned error %lld\n", + llong( info_tst ) ); } params.time() = time; - #ifdef LAPACK_HAVE_MKL if (params.ref() == 'y' || params.check() == 'y') { + #if LAPACK_VERSION >= 31200 || defined( LAPACK_HAVE_MKL ) // ---------- run reference testsweeper::flush_cache( params.cache() ); time = testsweeper::get_wtime(); - int64_t info_ref = LAPACKE_unhr_col( m, n, nb, &A_ref[0], lda, &T_ref[0], ldt, &D_ref[0] ); + // min works around bug in LAPACK <= 3.12 + int64_t info_ref = LAPACKE_unhr_col( + m, n, blas::min( nb, n ), &A_ref[0], lda, &T_ref[0], ldt, &D_ref[0] ); time = testsweeper::get_wtime() - time; if (info_ref != 0) { - fprintf( stderr, "LAPACKE_unhr_col returned error %lld\n", llong( info_ref ) ); + fprintf( stderr, "LAPACKE_unhr_col returned error %lld\n", + llong( info_ref ) ); } params.ref_time() = time; @@ -84,11 +89,11 @@ void test_unhr_col_work( Params& params, bool run ) error += abs_error( D_tst, D_ref ); params.error() = error; params.okay() = (error == 0); // expect lapackpp == lapacke - } #else // LAPACKE_unhr_col not yet in LAPACK - params.msg() = "check requires Intel MKL, as of LAPACK 3.11"; + params.msg() = "check requires LAPACK >= 3.12 or Intel MKL"; #endif // LAPACK_HAVE_MKL + } } #endif // LAPACK >= 3.9.0