Merge ~bullwinkle-team/ubuntu/+source/hipblas:bullwinkle/llvm-21/ubuntu/devel into ubuntu/+source/hipblas:ubuntu/devel

Proposed by Bruno Bernardo de Moura
Status: Merged
Approved by: Andreas Hasenack
Approved revision: fffc9a78dc56aa47aba8388f8ca39e072a705cca
Merged at revision: fffc9a78dc56aa47aba8388f8ca39e072a705cca
Proposed branch: ~bullwinkle-team/ubuntu/+source/hipblas:bullwinkle/llvm-21/ubuntu/devel
Merge into: ubuntu/+source/hipblas:ubuntu/devel
Diff against target: 168361 lines (+37638/-84616)
308 files modified
.githooks/install (+1/-1)
.jenkins/codecov.groovy (+119/-0)
.jenkins/common.groovy (+41/-21)
.jenkins/precheckin-cuda.groovy (+2/-6)
.jenkins/precheckin.groovy (+2/-6)
.jenkins/static.groovy (+4/-8)
.jenkins/staticanalysis.groovy (+1/-3)
CHANGELOG.md (+38/-1)
CMakeLists.txt (+48/-53)
LICENSE.md (+18/-4)
README.md (+26/-5)
bump_develop_version.sh (+12/-8)
clients/CMakeLists.txt (+80/-24)
clients/benchmarks/CMakeLists.txt (+10/-48)
clients/benchmarks/client.cpp (+12/-25)
clients/cmake/build-options.cmake (+0/-2)
clients/common/cblas_interface.cpp (+726/-701)
clients/common/clients_common.cpp (+35/-33)
clients/common/hipblas_datatype2string.cpp (+21/-19)
clients/common/hipblas_gentest.py (+2/-5)
clients/common/hipblas_parse_data.cpp (+12/-8)
clients/common/near.cpp (+37/-33)
clients/common/norm.cpp (+26/-22)
clients/common/unit.cpp (+28/-28)
clients/common/utility.cpp (+32/-17)
clients/gtest/CMakeLists.txt (+15/-65)
clients/gtest/auxil/set_get_matrix_vector_gtest.cpp (+2/-2)
clients/gtest/blas1/asum_gtest.cpp (+2/-2)
clients/gtest/blas1/axpy_gtest.cpp (+2/-2)
clients/gtest/blas1/copy_gtest.cpp (+2/-2)
clients/gtest/blas1/dot_gtest.cpp (+3/-3)
clients/gtest/blas1/dot_gtest.yaml (+0/-1)
clients/gtest/blas1/iamaxmin_gtest.cpp (+2/-2)
clients/gtest/blas1/nrm2_gtest.cpp (+2/-2)
clients/gtest/blas1/rot_gtest.cpp (+36/-36)
clients/gtest/blas1/scal_gtest.cpp (+6/-9)
clients/gtest/blas1/swap_gtest.cpp (+2/-2)
clients/gtest/blas2/gbmv_gtest.cpp (+2/-2)
clients/gtest/blas2/gemv_gtest.cpp (+2/-2)
clients/gtest/blas2/ger_gtest.cpp (+3/-3)
clients/gtest/blas2/hbmv_gtest.cpp (+2/-2)
clients/gtest/blas2/hemv_gtest.cpp (+2/-2)
clients/gtest/blas2/her2_gtest.cpp (+2/-2)
clients/gtest/blas2/her_gtest.cpp (+2/-2)
clients/gtest/blas2/hpmv_gtest.cpp (+2/-2)
clients/gtest/blas2/hpr2_gtest.cpp (+2/-2)
clients/gtest/blas2/hpr_gtest.cpp (+2/-2)
clients/gtest/blas2/spr_gtest.cpp (+2/-2)
clients/gtest/blas2/symv_gtest.cpp (+2/-2)
clients/gtest/blas2/syr2_gtest.cpp (+2/-2)
clients/gtest/blas2/syr_gtest.cpp (+2/-2)
clients/gtest/blas2/tbmv_gtest.cpp (+2/-2)
clients/gtest/blas2/tbsv_gtest.cpp (+2/-2)
clients/gtest/blas2/tpmv_gtest.cpp (+2/-2)
clients/gtest/blas2/tpsv_gtest.cpp (+2/-2)
clients/gtest/blas2/trmv_gtest.cpp (+2/-2)
clients/gtest/blas2/trsv_gtest.cpp (+2/-2)
clients/gtest/blas3/dgmm_gtest.cpp (+2/-2)
clients/gtest/blas3/geam_gtest.cpp (+2/-2)
clients/gtest/blas3/gemm_gtest.cpp (+2/-2)
clients/gtest/blas3/hemm_gtest.cpp (+2/-2)
clients/gtest/blas3/her2k_gtest.cpp (+2/-2)
clients/gtest/blas3/herk_gtest.cpp (+2/-2)
clients/gtest/blas3/herkx_gtest.cpp (+2/-2)
clients/gtest/blas3/symm_gtest.cpp (+2/-2)
clients/gtest/blas3/syr2k_gtest.cpp (+2/-2)
clients/gtest/blas3/syrk_gtest.cpp (+2/-2)
clients/gtest/blas3/syrkx_gtest.cpp (+2/-2)
clients/gtest/blas3/trmm_gtest.cpp (+2/-2)
clients/gtest/blas3/trsm_gtest.cpp (+2/-2)
clients/gtest/blas3/trsm_gtest.yaml (+1/-0)
clients/gtest/blas3/trtri_gtest.cpp (+2/-2)
clients/gtest/blas_ex/axpy_ex_gtest.cpp (+22/-22)
clients/gtest/blas_ex/dot_ex_gtest.cpp (+18/-17)
clients/gtest/blas_ex/gemm_ex_gtest.cpp (+1/-9)
clients/gtest/blas_ex/nrm2_ex_gtest.cpp (+4/-3)
clients/gtest/blas_ex/rot_ex_gtest.cpp (+4/-4)
clients/gtest/blas_ex/scal_ex_gtest.cpp (+4/-4)
clients/gtest/blas_ex/trsm_ex_gtest.cpp (+2/-2)
clients/gtest/hipblas_gtest_main.cpp (+1/-14)
clients/gtest/hipblas_test.cpp (+1/-7)
clients/gtest/solver/gels_gtest.cpp (+2/-2)
clients/gtest/solver/geqrf_gtest.cpp (+2/-2)
clients/gtest/solver/getrf_gtest.cpp (+2/-2)
clients/gtest/solver/getri_gtest.cpp (+2/-2)
clients/gtest/solver/getrs_gtest.cpp (+2/-2)
clients/hipblas_clients_readme.txt (+33/-0)
clients/include/argument_model.hpp (+6/-6)
clients/include/blas1/hipblas_iamax_iamin_ref.hpp (+3/-3)
clients/include/blas1/testing_axpy.hpp (+8/-5)
clients/include/blas1/testing_axpy_batched.hpp (+15/-7)
clients/include/blas1/testing_axpy_strided_batched.hpp (+16/-5)
clients/include/blas1/testing_dot.hpp (+9/-2)
clients/include/blas1/testing_dot_batched.hpp (+2/-2)
clients/include/blas1/testing_dot_strided_batched.hpp (+12/-4)
clients/include/blas1/testing_rot.hpp (+3/-2)
clients/include/blas1/testing_rot_batched.hpp (+3/-3)
clients/include/blas1/testing_rot_strided_batched.hpp (+12/-2)
clients/include/blas1/testing_rotg.hpp (+7/-2)
clients/include/blas1/testing_rotg_batched.hpp (+8/-2)
clients/include/blas1/testing_rotg_strided_batched.hpp (+11/-2)
clients/include/blas1/testing_scal.hpp (+11/-6)
clients/include/blas1/testing_scal_batched.hpp (+11/-7)
clients/include/blas1/testing_scal_strided_batched.hpp (+10/-7)
clients/include/blas2/testing_gbmv.hpp (+26/-9)
clients/include/blas2/testing_gbmv_batched.hpp (+13/-8)
clients/include/blas2/testing_gbmv_strided_batched.hpp (+13/-8)
clients/include/blas2/testing_gemv.hpp (+23/-7)
clients/include/blas2/testing_gemv_batched.hpp (+13/-8)
clients/include/blas2/testing_gemv_strided_batched.hpp (+13/-8)
clients/include/blas2/testing_ger.hpp (+8/-5)
clients/include/blas2/testing_ger_batched.hpp (+7/-5)
clients/include/blas2/testing_ger_strided_batched.hpp (+7/-5)
clients/include/blas2/testing_hbmv.hpp (+23/-7)
clients/include/blas2/testing_hbmv_batched.hpp (+13/-8)
clients/include/blas2/testing_hbmv_strided_batched.hpp (+13/-8)
clients/include/blas2/testing_hemv.hpp (+22/-7)
clients/include/blas2/testing_hemv_batched.hpp (+13/-8)
clients/include/blas2/testing_hemv_strided_batched.hpp (+13/-8)
clients/include/blas2/testing_her.hpp (+2/-2)
clients/include/blas2/testing_her2.hpp (+9/-6)
clients/include/blas2/testing_her2_batched.hpp (+7/-5)
clients/include/blas2/testing_her2_strided_batched.hpp (+7/-5)
clients/include/blas2/testing_her_batched.hpp (+2/-2)
clients/include/blas2/testing_her_strided_batched.hpp (+2/-2)
clients/include/blas2/testing_hpmv.hpp (+21/-7)
clients/include/blas2/testing_hpmv_batched.hpp (+13/-8)
clients/include/blas2/testing_hpmv_strided_batched.hpp (+13/-8)
clients/include/blas2/testing_hpr.hpp (+2/-2)
clients/include/blas2/testing_hpr2.hpp (+8/-5)
clients/include/blas2/testing_hpr2_batched.hpp (+7/-5)
clients/include/blas2/testing_hpr2_strided_batched.hpp (+7/-5)
clients/include/blas2/testing_hpr_batched.hpp (+2/-2)
clients/include/blas2/testing_hpr_strided_batched.hpp (+2/-2)
clients/include/blas2/testing_sbmv.hpp (+23/-7)
clients/include/blas2/testing_sbmv_batched.hpp (+13/-8)
clients/include/blas2/testing_sbmv_strided_batched.hpp (+13/-8)
clients/include/blas2/testing_spmv.hpp (+24/-8)
clients/include/blas2/testing_spmv_batched.hpp (+13/-8)
clients/include/blas2/testing_spmv_strided_batched.hpp (+13/-8)
clients/include/blas2/testing_spr.hpp (+7/-5)
clients/include/blas2/testing_spr2.hpp (+8/-5)
clients/include/blas2/testing_spr2_batched.hpp (+7/-5)
clients/include/blas2/testing_spr2_strided_batched.hpp (+7/-5)
clients/include/blas2/testing_spr_batched.hpp (+7/-5)
clients/include/blas2/testing_spr_strided_batched.hpp (+16/-5)
clients/include/blas2/testing_symv.hpp (+22/-7)
clients/include/blas2/testing_symv_batched.hpp (+13/-8)
clients/include/blas2/testing_symv_strided_batched.hpp (+13/-8)
clients/include/blas2/testing_syr.hpp (+8/-5)
clients/include/blas2/testing_syr2.hpp (+8/-5)
clients/include/blas2/testing_syr2_batched.hpp (+7/-5)
clients/include/blas2/testing_syr2_strided_batched.hpp (+7/-5)
clients/include/blas2/testing_syr_batched.hpp (+7/-5)
clients/include/blas2/testing_syr_strided_batched.hpp (+17/-5)
clients/include/blas3/testing_geam.hpp (+21/-8)
clients/include/blas3/testing_geam_batched.hpp (+9/-7)
clients/include/blas3/testing_geam_strided_batched.hpp (+9/-7)
clients/include/blas3/testing_gemm.hpp (+39/-10)
clients/include/blas3/testing_gemm_batched.hpp (+20/-16)
clients/include/blas3/testing_gemm_strided_batched.hpp (+14/-10)
clients/include/blas3/testing_hemm.hpp (+23/-7)
clients/include/blas3/testing_hemm_batched.hpp (+12/-8)
clients/include/blas3/testing_hemm_strided_batched.hpp (+12/-8)
clients/include/blas3/testing_her2k.hpp (+22/-8)
clients/include/blas3/testing_her2k_batched.hpp (+12/-10)
clients/include/blas3/testing_her2k_strided_batched.hpp (+10/-8)
clients/include/blas3/testing_herk.hpp (+2/-2)
clients/include/blas3/testing_herk_batched.hpp (+2/-2)
clients/include/blas3/testing_herk_strided_batched.hpp (+2/-2)
clients/include/blas3/testing_herkx.hpp (+22/-8)
clients/include/blas3/testing_herkx_batched.hpp (+12/-10)
clients/include/blas3/testing_herkx_strided_batched.hpp (+10/-8)
clients/include/blas3/testing_symm.hpp (+23/-7)
clients/include/blas3/testing_symm_batched.hpp (+12/-8)
clients/include/blas3/testing_symm_strided_batched.hpp (+12/-8)
clients/include/blas3/testing_syr2k.hpp (+23/-7)
clients/include/blas3/testing_syr2k_batched.hpp (+12/-8)
clients/include/blas3/testing_syr2k_strided_batched.hpp (+12/-8)
clients/include/blas3/testing_syrk.hpp (+21/-7)
clients/include/blas3/testing_syrk_batched.hpp (+12/-8)
clients/include/blas3/testing_syrk_strided_batched.hpp (+12/-8)
clients/include/blas3/testing_syrkx.hpp (+24/-9)
clients/include/blas3/testing_syrkx_batched.hpp (+12/-8)
clients/include/blas3/testing_syrkx_strided_batched.hpp (+12/-8)
clients/include/blas3/testing_trmm.hpp (+21/-7)
clients/include/blas3/testing_trmm_batched.hpp (+7/-5)
clients/include/blas3/testing_trmm_strided_batched.hpp (+7/-5)
clients/include/blas3/testing_trsm.hpp (+29/-7)
clients/include/blas3/testing_trsm_batched.hpp (+7/-5)
clients/include/blas3/testing_trsm_strided_batched.hpp (+7/-5)
clients/include/blas_ex/testing_axpy_batched_ex.hpp (+15/-13)
clients/include/blas_ex/testing_axpy_ex.hpp (+25/-13)
clients/include/blas_ex/testing_axpy_strided_batched_ex.hpp (+15/-13)
clients/include/blas_ex/testing_dot_batched_ex.hpp (+9/-9)
clients/include/blas_ex/testing_dot_ex.hpp (+10/-10)
clients/include/blas_ex/testing_dot_strided_batched_ex.hpp (+9/-9)
clients/include/blas_ex/testing_gemm_batched_ex.hpp (+21/-25)
clients/include/blas_ex/testing_gemm_ex.hpp (+21/-25)
clients/include/blas_ex/testing_gemm_strided_batched_ex.hpp (+21/-25)
clients/include/blas_ex/testing_nrm2_batched_ex.hpp (+7/-7)
clients/include/blas_ex/testing_nrm2_ex.hpp (+7/-7)
clients/include/blas_ex/testing_nrm2_strided_batched_ex.hpp (+7/-7)
clients/include/blas_ex/testing_rot_batched_ex.hpp (+9/-9)
clients/include/blas_ex/testing_rot_ex.hpp (+9/-9)
clients/include/blas_ex/testing_rot_strided_batched_ex.hpp (+9/-9)
clients/include/blas_ex/testing_scal_batched_ex.hpp (+31/-14)
clients/include/blas_ex/testing_scal_ex.hpp (+30/-13)
clients/include/blas_ex/testing_scal_strided_batched_ex.hpp (+20/-10)
clients/include/blas_ex/testing_trsm_batched_ex.hpp (+9/-7)
clients/include/blas_ex/testing_trsm_ex.hpp (+9/-7)
clients/include/blas_ex/testing_trsm_strided_batched_ex.hpp (+9/-7)
clients/include/cblas_interface.h (+18/-18)
clients/include/d_vector.hpp (+12/-12)
clients/include/device_batch_matrix.hpp (+29/-24)
clients/include/device_batch_vector.hpp (+27/-22)
clients/include/device_matrix.hpp (+8/-8)
clients/include/device_strided_batch_matrix.hpp (+17/-17)
clients/include/device_strided_batch_vector.hpp (+13/-13)
clients/include/device_vector.hpp (+8/-8)
clients/include/flops.hpp (+90/-88)
clients/include/hipblas.hpp (+4187/-11240)
clients/include/hipblas_arguments.hpp (+29/-28)
clients/include/hipblas_common.yaml (+12/-56)
clients/include/hipblas_datatype2string.hpp (+22/-26)
clients/include/hipblas_fortran.h.in (+3744/-3783)
clients/include/hipblas_fortran.hpp (+231/-246)
clients/include/hipblas_fortran_blas.f90 (+97/-97)
clients/include/hipblas_fortran_blas_64.f90 (+90/-90)
clients/include/hipblas_fortran_solver.f90 (+1/-1)
clients/include/host_batch_vector.hpp (+10/-1)
clients/include/host_strided_batch_vector.hpp (+9/-1)
clients/include/host_vector.hpp (+9/-1)
clients/include/lapack_utilities.hpp (+10/-10)
clients/include/near.h (+17/-20)
clients/include/solver/testing_gels_batched.hpp (+3/-3)
clients/include/solver/testing_geqrf_batched.hpp (+3/-3)
clients/include/solver/testing_getrs_batched.hpp (+3/-3)
clients/include/syrkx_reference.hpp (+4/-4)
clients/include/type_dispatch.hpp (+55/-58)
clients/include/type_utils.h (+39/-17)
clients/include/utility.h (+35/-23)
clients/samples/CMakeLists.txt (+10/-11)
clients/samples/example_gemm_ex.cpp (+3/-11)
clients/samples/example_gemm_ex_fortran.F90 (+6/-6)
clients/samples/example_hgemm.cpp (+3/-3)
clients/samples/example_hgemm_hip_half.cpp (+3/-4)
clients/samples/example_hip_complex_her2.cpp (+0/-1)
clients/samples/example_scal_ex.cpp (+1/-6)
clients/samples/example_sscal_fortran.F90 (+1/-1)
cmake/dependencies.cmake (+3/-21)
cmake/get-rocm-cmake.cmake (+21/-0)
debian/bin/run-tests (+1/-1)
debian/changelog (+60/-0)
debian/control (+54/-20)
debian/gbp.conf (+1/-1)
debian/libhipblas3-bench.install (+1/-0)
debian/libhipblas3-bench.links (+1/-0)
debian/libhipblas3-tests-data.install (+1/-0)
debian/libhipblas3-tests.install (+4/-0)
debian/libhipblas3-tests.links (+1/-0)
debian/libhipblas3.symbols (+1854/-0)
debian/patches/0001-remove-immintrin-header.patch (+3/-11)
debian/patches/0002-Disable-omp.patch (+11/-11)
debian/patches/0003-use-generic-blas-and-lapack.patch (+6/-5)
debian/patches/0004-Use-local-mathjax.patch (+1/-1)
debian/patches/0005-Gtest-add-verbose-flag-to-prevent-timeout.patch (+22/-0)
debian/patches/0006-drop-f16c-instructions.patch (+18/-14)
debian/patches/0007-optional-git.patch (+1/-1)
debian/patches/0008-fix-ambiguous-__half-constructor.patch (+1377/-0)
debian/patches/0009-enable-changing-test-data-dir.patch (+53/-0)
debian/patches/0010-remove-readme-from-doxygen-sources.patch (+24/-0)
debian/patches/0011-fix-arm64-immintrin-include.patch (+28/-0)
debian/patches/series (+10/-11)
debian/rules (+19/-4)
deps/requirements.txt (+1/-1)
dev/null (+0/-1562)
docs/conceptual/library-source-code-organization.rst (+3/-3)
docs/doxygen/Doxyfile (+1/-1)
docs/how-to/contributing-to-hipblas.rst (+1/-1)
docs/how-to/using-hipblas-clients.rst (+2/-2)
docs/index.rst (+9/-3)
docs/install/Linux_Install_Guide.rst (+44/-4)
docs/install/Windows_Install_Guide.rst (+35/-6)
docs/install/prerequisites.rst (+3/-3)
docs/reference/data-type-support.rst (+809/-0)
docs/reference/deprecation.rst (+95/-89)
docs/reference/hipblas-api-functions.rst (+172/-88)
docs/sphinx/_toc.yml.in (+3/-1)
docs/sphinx/requirements.in (+1/-1)
docs/sphinx/requirements.txt (+140/-10)
library/CMakeLists.txt (+2/-2)
library/include/hipblas.h (+6638/-16631)
library/src/CMakeLists.txt (+13/-50)
library/src/amd_detail/hipblas.cpp (+6960/-27155)
library/src/hipblas_module.f90 (+233/-203)
library/src/nvidia_detail/hipblas.cpp (+6882/-20452)
rdeps.py (+1/-1)
rmake.py (+17/-13)
scripts/performance/blas/datagraphs.asy (+0/-2)
scripts/performance/blas/timing.py (+0/-1)
scripts/performance/multiplot/README (+0/-4)
scripts/performance/multiplot/blas2/gfx90a/gbmv.csv (+0/-1)
scripts/performance/multiplot/blas2/gfx90a/hbmv.csv (+0/-1)
scripts/performance/multiplot/blas2/gfx90a/sbmv.csv (+0/-1)
scripts/performance/multiplot/blas2/gfx90a/spmv.csv (+0/-1)
scripts/performance/multiplot/blas2/gfx90a/symv.csv (+0/-1)
toolchain-windows.cmake (+18/-0)
Reviewer Review Type Date Requested Status
Andreas Hasenack Approve
Ubuntu Sponsors Pending
Review via email: mp+499357@code.launchpad.net

Description of the change

New upstream version 7.1.0

To post a comment you must log in.
Revision history for this message
Bruno Bernardo de Moura (bruno-bdmoura) wrote :
Revision history for this message
Igor Luppi (igorluppi) wrote :

Rebasing with ubuntu/devel

Revision history for this message
Igor Luppi (igorluppi) wrote :

Rebase done. New ppa build with version 7.1.0-0ubuntu4 here: https://launchpad.net/~igorluppi/+archive/ubuntu/hipblas-lp2139242

(-proposed and all archs)

Revision history for this message
Igor Luppi (igorluppi) wrote :

$ reverse-depends --arch ppc64el src:hipblas -x
Reverse-Depends
===============
* libggml-hip (for libhipblas0)

But it only actually requires: libhipblas-dev [amd64 arm64] <!pkg.ggml.nohip>,

I have done ggml builds in ppa for ppc64el without having any ppc64el rocm pkg. So safe to drop ppc64el for hipblas.

Revision history for this message
Andreas Hasenack (ahasenack) wrote :

What's this about in d/rules:

+LIB=libhipblas3
+LIBV=7.1.0
+REV=1
+gensymbols:
+ dpkg-deb -x ../$(LIB)_$(LIBV)-$(REV)_amd64.deb /tmp/$(LIB)
+ dpkg-gensymbols -v$(LIBV) -p$(LIB) -P/tmp/$(LIB) -Odebian/$(LIB).symbols

You are opening a built deb (amd64 only), and then doing what?

review: Needs Information
Revision history for this message
Igor Luppi (igorluppi) wrote :

I think this is outdated, we could just use

override_dh_makeshlibs:
    dh_makeshlibs -V

Revision history for this message
Andreas Hasenack (ahasenack) wrote :

Why even override it?

Revision history for this message
Igor Luppi (igorluppi) wrote :

Yeah, I actually deleted that temp. piece of code. Please take a look.

Revision history for this message
Andreas Hasenack (ahasenack) wrote :

Please also trigger a new build

Revision history for this message
Igor Luppi (igorluppi) wrote :

Here the recipe build with 3 child builds (amd64, amd64v3 and arm64):

https://code.launchpad.net/~bullwinkle-team/+archive/ubuntu/rocm-devel-21/+recipebuild/4006949

Revision history for this message
Andreas Hasenack (ahasenack) wrote :

- delta: OK
- upstream changes: OK
- debian packaging changes: OK

Debian could benefit from the fixes in the smartcard tests that you made, that will allow them to re-enable those tests (currently disabled in d/t/control in debian/sid).

+1

review: Approve
Revision history for this message
Andreas Hasenack (ahasenack) wrote (last edit ):

Bah, that was the wrong MP, sorry. I'll put it back to "needs fixing" to reset the approve state, sorry again.

Doing too many reviews in parallel :\

review: Needs Fixing
Revision history for this message
Andreas Hasenack (ahasenack) wrote :

> Here the recipe build with 3 child builds (amd64, amd64v3 and arm64):

Does that have the updated d/rules?

Revision history for this message
Igor Luppi (igorluppi) wrote :

I removed the section:

+LIB=libhipblas3
+LIBV=7.1.0
+REV=1
+gensymbols:
+ dpkg-deb -x ../$(LIB)_$(LIBV)-$(REV)_amd64.deb /tmp/$(LIB)
+ dpkg-gensymbols -v$(LIBV) -p$(LIB) -P/tmp/$(LIB) -Odebian/$(LIB).symbols

for those builds

Revision history for this message
Igor Luppi (igorluppi) wrote :
Revision history for this message
Igor Luppi (igorluppi) wrote :

Builds done successfully, please take a look.

Revision history for this message
Andreas Hasenack (ahasenack) wrote :

This depends on rocsolver 7, so it will say in the build queue until that other upload finishes building.

+1

review: Approve
Revision history for this message
Andreas Hasenack (ahasenack) wrote :

Sponsored:

Uploading hipblas_7.1.0-0ubuntu4.dsc
Uploading hipblas_7.1.0.orig.tar.gz
Uploading hipblas_7.1.0-0ubuntu4.debian.tar.xz
Uploading hipblas_7.1.0-0ubuntu4_source.buildinfo
Uploading hipblas_7.1.0-0ubuntu4_source.changes

Revision history for this message
Igor Luppi (igorluppi) wrote :

Indeed, it ll be waiting for build-depends. Let's hope rocsolver will be published soon! Thanks a lot Andreas!!

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1diff --git a/.githooks/install b/.githooks/install
2index cbb0569..685ea28 100755
3--- a/.githooks/install
4+++ b/.githooks/install
5@@ -3,6 +3,6 @@
6 cd $(git rev-parse --git-dir)
7 cd hooks
8
9-echo "Installing hooks..."
10+echo "Installing hooks..."
11 ln -s ../../.githooks/pre-commit pre-commit
12 echo "Done!"
13diff --git a/.jenkins/codecov.groovy b/.jenkins/codecov.groovy
14new file mode 100644
15index 0000000..dc11b47
16--- /dev/null
17+++ b/.jenkins/codecov.groovy
18@@ -0,0 +1,119 @@
19+#!/usr/bin/env groovy
20+// This shared library is available at https://github.com/ROCmSoftwarePlatform/rocJENKINS/
21+@Library('rocJenkins@pong') _
22+
23+// This is file for internal AMD use.
24+// If you are interested in running your own Jenkins, please raise a github issue for assistance.
25+
26+import com.amd.project.*
27+import com.amd.docker.*
28+import java.nio.file.Path
29+
30+def runCI =
31+{
32+ nodeDetails, jobName, buildCommand, label->
33+
34+ def prj = new rocProject('hipBLAS', 'CodeCov')
35+
36+ if (env.BRANCH_NAME ==~ /PR-\d+/ && pullRequest.labels.contains("noSolver"))
37+ {
38+ prj.libraryDependencies = ['hipBLAS-common', 'hipBLASLt', 'rocBLAS']
39+ }
40+ else
41+ {
42+ prj.libraryDependencies = ['rocPRIM', 'hipBLAS-common', 'hipBLASLt', 'rocBLAS', 'rocSPARSE', 'rocSOLVER']
43+ }
44+
45+ if (env.BRANCH_NAME ==~ /PR-\d+/ && pullRequest.labels.contains('g++'))
46+ {
47+ buildCommand += ' --compiler=g++'
48+ }
49+ else if (env.BRANCH_NAME ==~ /PR-\d+/ && pullRequest.labels.contains('clang'))
50+ {
51+ buildCommand += ' --compiler=clang++'
52+ }
53+ else
54+ {
55+ // buildCommand += ' --compiler=amdclang++' # leave as default
56+ }
57+
58+ //customize for project
59+ prj.paths.build_command = buildCommand
60+
61+ // Define test architectures, optional rocm version argument is available
62+ def nodes = new dockerNodes(nodeDetails, jobName, prj)
63+
64+ boolean formatCheck = false
65+
66+ def commonGroovy
67+
68+ def compileCommand =
69+ {
70+ platform, project->
71+
72+ commonGroovy = load "${project.paths.project_src_prefix}/.jenkins/common.groovy"
73+ commonGroovy.runCompileCommand(platform, project, jobName)
74+ }
75+
76+ def testCommand =
77+ {
78+ platform, project->
79+
80+ commonGroovy.runCoverageCommand(platform, project, "release-debug")
81+ }
82+
83+ def packageCommand =
84+ {
85+ platform, project->
86+
87+ commonGroovy.runPackageCommand(platform, project, jobName, label, "release-debug")
88+ }
89+
90+ buildProject(prj, formatCheck, nodes.dockerArray, compileCommand, testCommand, null)
91+}
92+
93+def setupCI(urlJobName, jobNameList, buildCommand, runCI, label)
94+{
95+ jobNameList = auxiliary.appendJobNameList(jobNameList)
96+
97+ jobNameList.each
98+ {
99+ jobName, nodeDetails->
100+ if (urlJobName == jobName)
101+ stage(label + ' ' + jobName) {
102+ runCI(nodeDetails, jobName, buildCommand, label)
103+ }
104+ }
105+
106+ // For url job names that are not listed by the jobNameList i.e. compute-rocm-dkms-no-npi-1901
107+ if(!jobNameList.keySet().contains(urlJobName))
108+ {
109+ properties(auxiliary.addCommonProperties([pipelineTriggers([cron('0 1 * * *')])]))
110+ stage(label + ' ' + urlJobName) {
111+ runCI([ubuntu18:['gfx906']], urlJobName, buildCommand, label)
112+ }
113+ }
114+
115+}
116+
117+ci: {
118+ String urlJobName = auxiliary.getTopJobName(env.BUILD_URL)
119+
120+ def propertyList = ["compute-rocm-dkms-no-npi-hipclang":[pipelineTriggers([cron('0 1 * * 0')])],
121+ "rocm-docker":[]]
122+ propertyList = auxiliary.appendPropertyList(propertyList)
123+
124+ def jobNameList = ["compute-rocm-dkms-no-npi-hipclang":([ubuntu18:['gfx900'],centos7:['gfx906'],sles15sp1:['gfx908']]),
125+ "rocm-docker":([ubuntu18:['gfx900'],centos7:['gfx906'],sles15sp1:['gfx906']])]
126+ jobNameList = auxiliary.appendJobNameList(jobNameList)
127+
128+ propertyList.each
129+ {
130+ jobName, property->
131+ if (urlJobName == jobName)
132+ properties(auxiliary.addCommonProperties(property))
133+ }
134+
135+ String hostBuildCommand = './install.sh -k --codecoverage -c'
136+ setupCI(urlJobName, jobNameList, hostBuildCommand, runCI, 'g++')
137+}
138diff --git a/.jenkins/common.groovy b/.jenkins/common.groovy
139index 987c1c4..f959eef 100644
140--- a/.jenkins/common.groovy
141+++ b/.jenkins/common.groovy
142@@ -3,8 +3,6 @@
143
144 def runCompileCommand(platform, project, jobName, boolean sameOrg=false)
145 {
146- project.paths.construct_build_prefix()
147-
148 def getDependenciesCommand = ""
149 if (project.installLibraryDependenciesFromCI)
150 {
151@@ -33,7 +31,7 @@ def runCompileCommand(platform, project, jobName, boolean sameOrg=false)
152 cd ${project.paths.project_build_prefix}
153 ${getDependenciesCommand}
154 ${centos}
155- LD_LIBRARY_PATH=/opt/rocm/lib ${project.paths.build_command}
156+ ${project.paths.build_command}
157 """
158 platform.runCommand(this, command)
159 }
160@@ -51,42 +49,60 @@ def runTestCommand (platform, project)
161 }
162 }
163
164- String gtestCommonEnv = "HIPBLAS_CLIENT_RAM_GB_LIMIT=95"
165+ String gtestCommonEnv = "HIPBLAS_CLIENT_RAM_GB_LIMIT=95 GTEST_LISTENER=NO_PASS_LINE_IN_LOG"
166+
167 def command = """#!/usr/bin/env bash
168 set -x
169- cd ${stagingDir}
170- ${sudo} LD_LIBRARY_PATH=/opt/rocm/lib ${gtestCommonEnv} GTEST_LISTENER=NO_PASS_LINE_IN_LOG ./hipblas-test --gtest_output=xml --gtest_color=yes
171+ pushd ${stagingDir}
172+ ${gtestCommonEnv} ./hipblas-test --gtest_output=xml --gtest_color=yes
173+ popd
174 """
175
176 platform.runCommand(this, command)
177
178- // In an upcoming release, we are replacing hipblasDatatype_t with hipDataType. We have created hipblas_v2-test to test the new
179- // interfaces while hipblasDatatype_t is deprecated. Thus, hipblas-test will be testing the old, deprecated, functions
180- // using hipblasDatatype_t, and hipblas_v2-test will be testing the upcoming interfaces.
181- def v2TestCommand = """#!/usr/bin/env bash
182- set -x
183- cd ${stagingDir}
184- ${sudo} LD_LIBRARY_PATH=/opt/rocm/lib ${gtestCommonEnv} GTEST_LISTENER=NO_PASS_LINE_IN_LOG ./hipblas_v2-test --gtest_output=xml --gtest_color=yes
185- """
186-
187- platform.runCommand(this, v2TestCommand)
188-
189 def yamlTestCommand = """#!/usr/bin/env bash
190 set -x
191- cd ${stagingDir}
192- ${sudo} LD_LIBRARY_PATH=/opt/rocm/lib ${gtestCommonEnv} GTEST_LISTENER=NO_PASS_LINE_IN_LOG ./hipblas-test --gtest_output=xml --gtest_color=yes --yaml hipblas_smoke.yaml
193+ pushd ${stagingDir}
194+ ${gtestCommonEnv} ./hipblas-test --gtest_output=xml --gtest_color=yes --yaml hipblas_smoke.yaml
195+ popd
196 """
197 platform.runCommand(this, yamlTestCommand)
198- junit "${stagingDir}/*.xml"
199 }
200
201-def runPackageCommand(platform, project, jobName, label='')
202+def runCoverageCommand (platform, project, String cmdDir = "release-debug")
203+{
204+ //Temporary workaround due to bug in container
205+ String centos7Workaround = platform.jenkinsLabel.contains('centos7') ? 'export LD_LIBRARY_PATH=\$LD_LIBRARY_PATH:/opt/rocm/lib64/' : ''
206+
207+ String gtestCommonEnv = "HIPBLAS_CLIENT_RAM_GB_LIMIT=95 GTEST_LISTENER=NO_PASS_LINE_IN_LOG"
208+
209+ def command = """#!/usr/bin/env bash
210+ set -x
211+ cd ${project.paths.project_build_prefix}/build/${cmdDir}
212+ export LD_LIBRARY_PATH=/opt/rocm/lib/
213+ ${centos7Workaround}
214+ ${gtestCommonEnv} make coverage_cleanup coverage GTEST_FILTER=-*known_bug*
215+ """
216+
217+ platform.runCommand(this, command)
218+
219+ publishHTML([allowMissing: false,
220+ alwaysLinkToLastBuild: false,
221+ keepAll: false,
222+ reportDir: "${project.paths.project_build_prefix}/build/${cmdDir}/coverage-report",
223+ reportFiles: "index.html",
224+ reportName: "Code coverage report",
225+ reportTitles: "Code coverage report"])
226+}
227+
228+def runPackageCommand(platform, project, jobName, label='', buildDir='')
229 {
230 def command
231
232 label = label != '' ? '-' + label.toLowerCase() : ''
233 String ext = platform.jenkinsLabel.contains('ubuntu') ? "deb" : "rpm"
234 String dir = jobName.contains('Debug') ? "debug" : "release"
235+
236 if (env.BRANCH_NAME ==~ /PR-\d+/)
237 {
238 if (pullRequest.labels.contains("debug"))
239@@ -94,6 +110,10 @@ def runPackageCommand(platform, project, jobName, label='')
240 dir = "debug"
241 }
242 }
243+ if (buildDir != '')
244+ {
245+ dir = buildDir
246+ }
247
248 command = """
249 set -x
250diff --git a/.jenkins/precheckin-cuda.groovy b/.jenkins/precheckin-cuda.groovy
251index 771c0a0..8378953 100644
252--- a/.jenkins/precheckin-cuda.groovy
253+++ b/.jenkins/precheckin-cuda.groovy
254@@ -58,18 +58,14 @@ def setupCI(urlJobName, jobNameList, buildCommand, runCI, label)
255 {
256 jobName, nodeDetails->
257 if (urlJobName == jobName)
258- stage(label + ' ' + jobName) {
259- runCI(nodeDetails, jobName, buildCommand, label)
260- }
261+ runCI(nodeDetails, jobName, buildCommand, label)
262 }
263
264 // For url job names that are not listed by the jobNameList i.e. compute-rocm-dkms-no-npi-1901
265 if(!jobNameList.keySet().contains(urlJobName))
266 {
267 properties(auxiliary.addCommonProperties([pipelineTriggers([cron('0 1 * * *')])]))
268- stage(label + ' ' + urlJobName) {
269- runCI(['ubuntu20-cuda11':['anycuda']], urlJobName, buildCommand, label)
270- }
271+ runCI(['ubuntu20-cuda11':['anycuda']], urlJobName, buildCommand, label)
272 }
273
274 }
275diff --git a/.jenkins/precheckin.groovy b/.jenkins/precheckin.groovy
276index 06aa474..8f5362f 100644
277--- a/.jenkins/precheckin.groovy
278+++ b/.jenkins/precheckin.groovy
279@@ -80,18 +80,14 @@ def setupCI(urlJobName, jobNameList, buildCommand, runCI, label)
280 {
281 jobName, nodeDetails->
282 if (urlJobName == jobName)
283- stage(label + ' ' + jobName) {
284- runCI(nodeDetails, jobName, buildCommand, label)
285- }
286+ runCI(nodeDetails, jobName, buildCommand, label)
287 }
288
289 // For url job names that are not listed by the jobNameList i.e. compute-rocm-dkms-no-npi-1901
290 if(!jobNameList.keySet().contains(urlJobName))
291 {
292 properties(auxiliary.addCommonProperties([pipelineTriggers([cron('0 1 * * *')])]))
293- stage(label + ' ' + urlJobName) {
294- runCI([ubuntu18:['gfx906']], urlJobName, buildCommand, label)
295- }
296+ runCI([ubuntu18:['gfx906']], urlJobName, buildCommand, label)
297 }
298
299 }
300diff --git a/.jenkins/static.groovy b/.jenkins/static.groovy
301index e7899e8..11e0e03 100644
302--- a/.jenkins/static.groovy
303+++ b/.jenkins/static.groovy
304@@ -18,10 +18,10 @@ def runCI =
305
306 if (env.BRANCH_NAME ==~ /PR-\d+/ && pullRequest.labels.contains("noSolver"))
307 {
308- prj.libraryDependencies = ['hipBLAS-common', 'hipBLASLt', 'rocBLAS'] }
309+ prj.libraryDependencies = ['hipBLAS-common', 'rocBLAS'] }
310 else
311 {
312- prj.libraryDependencies = ['rocPRIM', 'hipBLAS-common', 'hipBLASLt', 'rocBLAS', 'rocSPARSE', 'rocSOLVER']
313+ prj.libraryDependencies = ['rocPRIM', 'hipBLAS-common', 'rocBLAS', 'rocSPARSE', 'rocSOLVER']
314 }
315
316 // Define test architectures, optional rocm version argument is available
317@@ -76,17 +76,13 @@ ci: {
318 {
319 jobName, nodeDetails->
320 if (urlJobName == jobName)
321- stage(jobName) {
322- runCI(nodeDetails, jobName)
323- }
324+ runCI(nodeDetails, jobName)
325 }
326
327 // For url job names that are not listed by the jobNameList i.e. compute-rocm-dkms-no-npi-1901
328 if(!jobNameList.keySet().contains(urlJobName))
329 {
330 properties(auxiliary.addCommonProperties([pipelineTriggers([cron('0 1 * * *')])]))
331- stage(urlJobName) {
332- runCI([ubuntu18:['gfx906']], urlJobName)
333- }
334+ runCI([ubuntu18:['gfx906']], urlJobName)
335 }
336 }
337diff --git a/.jenkins/staticanalysis.groovy b/.jenkins/staticanalysis.groovy
338index 096e8c5..36d76d6 100644
339--- a/.jenkins/staticanalysis.groovy
340+++ b/.jenkins/staticanalysis.groovy
341@@ -45,8 +45,6 @@ ci: {
342 {
343 jobName, nodeDetails->
344 if (urlJobName == jobName)
345- stage(jobName) {
346- runCI(nodeDetails, jobName)
347- }
348+ runCI(nodeDetails, jobName)
349 }
350 }
351diff --git a/CHANGELOG.md b/CHANGELOG.md
352index 5e8395f..4501829 100644
353--- a/CHANGELOG.md
354+++ b/CHANGELOG.md
355@@ -3,7 +3,44 @@
356 Documentation for hipBLAS is available at
357 [https://rocm.docs.amd.com/projects/hipBLAS/en/latest/](https://rocm.docs.amd.com/projects/hipBLAS/en/latest/).
358
359-## hipBLAS 2.4.0 for ROCm 6.4.0
360+## hipBLAS 3.1.0 for ROCm 7.1
361+
362+### Added
363+
364+* `--clients-only` build option to only build clients against a prebuilt library.
365+* gfx1103, gfx1150, gfx1151, gfx1200, and gfx1201 support to clients.
366+* FORTRAN enabled for the Microsoft Windows build and tests.
367+* Additional reference library fallback options added.
368+
369+### Changed
370+
371+* Improve the build time for clients by removing `clients_common.cpp` from the hipblas-test build.
372+
373+## hipBLAS 3.0.0 for ROCm 7.0
374+
375+### Added
376+
377+* Added the `hipblasSetWorkspace()` API
378+* Support for codecoverage tests
379+
380+### Changed
381+
382+* HIPBLAS_V2 API is now the only available API using `hipComplex` and `hipDatatype` types
383+* Documentation updates
384+* Verbose compilation for `hipblas.cpp`
385+
386+### Removed
387+
388+* `hipblasDatatype_t` type
389+* `hipComplex` and `hipDoubleComplex` types
390+* Support code for non-production gfx targets
391+
392+### Resolved issues
393+
394+* The build time `CMake` configuration for the dependency on `hipBLAS-common` is fixed
395+* Compiler warnings for unhandled enums have been resolved
396+
397+## hipBLAS 2.4.0 for ROCm 6.4.0
398
399 ### Changed
400
401diff --git a/CMakeLists.txt b/CMakeLists.txt
402index c63dfe1..6b3d0b0 100644
403--- a/CMakeLists.txt
404+++ b/CMakeLists.txt
405@@ -1,5 +1,5 @@
406 # ########################################################################
407-# Copyright (C) 2016-2024 Advanced Micro Devices, Inc. All rights reserved.
408+# Copyright (C) 2016-2025 Advanced Micro Devices, Inc. All rights reserved.
409 #
410 # Permission is hereby granted, free of charge, to any person obtaining a copy
411 # of this software and associated documentation files (the "Software"), to deal
412@@ -42,12 +42,11 @@ if( NOT DEFINED CMAKE_CONFIGURATION_TYPES AND NOT DEFINED CMAKE_BUILD_TYPE )
413 set( CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel." )
414 endif()
415
416-if (NOT WIN32)
417- if ( NOT DEFINED CMAKE_Fortran_COMPILER AND NOT DEFINED ENV{FC} )
418- set( CMAKE_Fortran_COMPILER "gfortran" )
419- endif()
420- set( fortran_language "Fortran" )
421-endif( )
422+if ( NOT DEFINED CMAKE_Fortran_COMPILER AND NOT DEFINED ENV{FC} )
423+ set( CMAKE_Fortran_COMPILER "gfortran" )
424+endif()
425+
426+set( fortran_language "Fortran" )
427
428 project( hipblas LANGUAGES CXX ${fortran_language} )
429
430@@ -66,7 +65,7 @@ list( APPEND CMAKE_PREFIX_PATH ${ROCM_PATH}/lib/cmake/hip /opt/rocm /opt/rocm/ll
431 # rocm-cmake contains common cmake code for rocm projects to help setup and install
432 include(dependencies)
433
434-set ( VERSION_STRING "2.4.0" )
435+set ( VERSION_STRING "3.1.0" )
436 rocm_setup_version( VERSION ${VERSION_STRING} )
437
438 option( BUILD_VERBOSE "Output additional build information" OFF )
439@@ -114,10 +113,6 @@ if(HIP_PLATFORM STREQUAL nvidia)
440 endif()
441
442 option(BUILD_CODE_COVERAGE "Build with code coverage enabled" OFF)
443-if(BUILD_CODE_COVERAGE)
444- add_compile_options(-fprofile-arcs -ftest-coverage)
445- add_link_options(--coverage)
446-endif()
447
448 option(BUILD_ADDRESS_SANITIZER "Build with address sanitizer enabled" OFF)
449 if(BUILD_ADDRESS_SANITIZER)
450@@ -125,20 +120,10 @@ if(BUILD_ADDRESS_SANITIZER)
451 set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=address -shared-libasan")
452 endif()
453
454-
455-# FOR HANDLING ENABLE/DISABLE OPTIONAL BACKWARD COMPATIBILITY for FILE/FOLDER REORG
456-option(BUILD_FILE_REORG_BACKWARD_COMPATIBILITY "Build with file/folder reorg with backward compatibility enabled" OFF)
457-if(BUILD_FILE_REORG_BACKWARD_COMPATIBILITY AND NOT WIN32)
458- rocm_wrap_header_dir(
459- ${CMAKE_SOURCE_DIR}/library/include
460- PATTERNS "*.h"
461- GUARDS SYMLINK WRAPPER
462- WRAPPER_LOCATIONS ${CMAKE_INSTALL_INCLUDEDIR}
463- )
464+if(NOT SKIP_LIBRARY)
465+ add_subdirectory(library)
466 endif()
467
468-add_subdirectory( library )
469-
470 include( clients/cmake/build-options.cmake )
471
472 # Build clients of the library
473@@ -151,7 +136,7 @@ if( BUILD_CLIENTS_SAMPLES OR BUILD_CLIENTS_TESTS OR BUILD_CLIENTS_BENCHMARKS )
474 message(STATUS "OS: ${CLIENTS_OS} ${CLIENTS_OS_VERSION}")
475 set(GFORTRAN_RPM "libgfortran4")
476 set(GFORTRAN_DEB "libgfortran4")
477- if(CLIENTS_OS STREQUAL "centos" OR CLIENTS_OS STREQUAL "rhel")
478+ if(CLIENTS_OS STREQUAL "centos" OR CLIENTS_OS STREQUAL "rhel" OR CLIENTS_OS STREQUAL "almalinux")
479 if(CLIENTS_OS_VERSION VERSION_GREATER_EQUAL "8")
480 set(GFORTRAN_RPM "libgfortran")
481 endif()
482@@ -204,22 +189,30 @@ endif( )
483
484 # Package specific CPACK vars
485 if(HIP_PLATFORM STREQUAL amd)
486- set(rocblas_minimum 4.4.0)
487- set(rocsolver_minimum 3.28.0)
488+ set(rocblas_minimum 5.1.0)
489+ set(rocsolver_minimum 3.31.0)
490 rocm_package_add_dependencies(SHARED_DEPENDS "rocblas >= ${rocblas_minimum}" "rocsolver >= ${rocsolver_minimum}")
491 rocm_package_add_rpm_dependencies(STATIC_DEPENDS "rocblas-static-devel >= ${rocblas_minimum}" "rocsolver-static-devel >= ${rocsolver_minimum}")
492 rocm_package_add_deb_dependencies(STATIC_DEPENDS "rocblas-static-dev >= ${rocblas_minimum}" "rocsolver-static-dev >= ${rocsolver_minimum}")
493 endif( )
494
495-set(hipblas_common_minimum 1.0.0)
496-rocm_package_add_deb_dependencies(COMPONENT devel DEPENDS "hipblas-common-dev >= ${hipblas_common_minimum}")
497-rocm_package_add_rpm_dependencies(COMPONENT devel DEPENDS "hipblas-common-devel >= ${hipblas_common_minimum}")
498+set(hipblas_common_minimum 1.3.0)
499+
500+if(BUILD_SHARED_LIBS)
501+ rocm_package_add_deb_dependencies(COMPONENT devel DEPENDS "hipblas-common-dev >= ${hipblas_common_minimum}")
502+ rocm_package_add_rpm_dependencies(COMPONENT devel DEPENDS "hipblas-common-devel >= ${hipblas_common_minimum}")
503+else()
504+ rocm_package_add_deb_dependencies(COMPONENT devel DEPENDS "hipblas-common-static-dev >= ${hipblas_common_minimum}")
505+ rocm_package_add_rpm_dependencies(COMPONENT devel DEPENDS "hipblas-common-static-devel >= ${hipblas_common_minimum}")
506+endif()
507
508 set( CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE.md" )
509 set( CPACK_RPM_PACKAGE_LICENSE "MIT")
510
511 if (WIN32)
512- SET( CMAKE_INSTALL_PREFIX "C:/hipSDK" CACHE PATH "Install path" FORCE )
513+ if( CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT )
514+ SET( CMAKE_INSTALL_PREFIX "C:/hipSDK" CACHE PATH "Install path" FORCE )
515+ endif()
516 SET( INSTALL_PREFIX "C:/hipSDK" )
517 SET( CPACK_SET_DESTDIR FALSE )
518 SET( CPACK_PACKAGE_INSTALL_DIRECTORY "C:/hipSDK" )
519@@ -273,35 +266,37 @@ if(BUILD_CODE_COVERAGE)
520
521 add_custom_target(coverage_analysis
522 COMMAND echo Coverage GTEST_FILTER=\${GTEST_FILTER}
523- COMMAND ${coverage_test} --gtest_filter=\"\${GTEST_FILTER}\"
524+ COMMAND ${CMAKE_COMMAND} -E make_directory ./coverage/profraw
525+ COMMAND ${CMAKE_COMMAND} -E env LLVM_PROFILE_FILE="./coverage-report/profraw/hipblas-coverage_%m.profraw" ${coverage_test} --gtest_filter=\"\${GTEST_FILTER}\"
526 WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
527 )
528
529 add_dependencies(coverage_analysis hipblas)
530
531- #
532- # Prepare coverage output
533- # This little script is generated because the option '--gcov-tool <program name>' of lcov cannot take arguments.
534- #
535- add_custom_target(coverage_output
536- DEPENDS coverage_analysis
537- COMMAND mkdir -p lcoverage
538- COMMAND echo "\\#!/bin/bash" > llvm-gcov.sh
539- COMMAND echo "\\# THIS FILE HAS BEEN GENERATED" >> llvm-gcov.sh
540- COMMAND printf "exec /opt/rocm/llvm/bin/llvm-cov gcov $$\\@" >> llvm-gcov.sh
541- COMMAND chmod +x llvm-gcov.sh
542- )
543+ find_program(
544+ LLVM_PROFDATA
545+ llvm-profdata
546+ REQUIRED
547+ HINTS ${ROCM_PATH}/llvm/bin
548+ PATHS /opt/rocm/llvm/bin
549+ )
550
551- #
552- # Generate coverage output.
553- #
554- add_custom_command(TARGET coverage_output
555- COMMAND lcov --directory . --base-directory . --gcov-tool ${CMAKE_BINARY_DIR}/llvm-gcov.sh --capture -o lcoverage/raw_main_coverage.info
556- COMMAND lcov --remove lcoverage/raw_main_coverage.info "'/opt/*'" "'/usr/*'" -o lcoverage/main_coverage.info
557- COMMAND genhtml lcoverage/main_coverage.info --output-directory lcoverage
558- )
559+ find_program(
560+ LLVM_COV
561+ llvm-cov
562+ REQUIRED
563+ HINTS ${ROCM_PATH}/llvm/bin
564+ PATHS /opt/rocm/llvm/bin
565+ )
566
567- add_custom_target(coverage DEPENDS coverage_output)
568+ add_custom_target(
569+ coverage
570+ DEPENDS coverage_analysis
571+ COMMAND ${LLVM_PROFDATA} merge -sparse ./coverage-report/profraw/hipblas-coverage_*.profraw -o ./coverage-report/hipblas.profdata
572+ COMMAND ${LLVM_COV} report -object ./library/src/libhipblas.so -instr-profile=./coverage-report/hipblas.profdata
573+ COMMAND ${LLVM_COV} show -object ./library/src/libhipblas.so -instr-profile=./coverage-report/hipblas.profdata -format=html -output-dir=coverage-report
574+ WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
575+ )
576
577 #
578 # Coverage cleanup
579diff --git a/LICENSE.md b/LICENSE.md
580index 588320c..1461bfb 100644
581--- a/LICENSE.md
582+++ b/LICENSE.md
583@@ -1,12 +1,26 @@
584 MIT License
585
586-Copyright (C) 2017-2025 Advanced Micro Devices, Inc. All rights reserved.
587+Copyright (C) Advanced Micro Devices, Inc.
588
589-Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
590+Permission is hereby granted, free of charge, to any person obtaining a copy
591+of this software and associated documentation files (the "Software"), to deal
592+in the Software without restriction, including without limitation the rights
593+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
594+copies of the Software, and to permit persons to whom the Software is
595+furnished to do so, subject to the following conditions:
596
597-The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
598+The above copyright notice and this permission notice shall be included in all
599+copies or substantial portions of the Software.
600
601-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
602+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
603+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
604+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
605+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
606+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
607+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
608+SOFTWARE.
609+
610+---
611
612 This product includes software from copyright holders as shown below, and distributed under their license terms as specified.
613
614diff --git a/README.md b/README.md
615index bce3811..5990bae 100644
616--- a/README.md
617+++ b/README.md
618@@ -33,22 +33,43 @@ cmake -DBUILD_DOCS=ON ...
619
620 ## Build and install
621
622-1. Download the hipBLAS source code (clone this repository):
623+1. Checkout the hipBLAS code using either a sparse checkout or a full clone of the rocm-libraries repository.
624+
625+ To limit your local checkout to only the hipBLAS project, configure ``sparse-checkout`` before cloning.
626+ This uses the Git partial clone feature (``--filter=blob:none``) to reduce how much data is downloaded.
627+ Use the following commands for a sparse checkout:
628+
629+ ```bash
630+
631+ git clone --no-checkout --filter=blob:none https://github.com/ROCm/rocm-libraries.git
632+ cd rocm-libraries
633+ git sparse-checkout init --cone
634+ git sparse-checkout set projects/hipblas # add projects/rocsolver projects/rocblas projects/hipblas-common to include dependencies
635+ git checkout develop # or use the branch you want to work with
636+ ```
637+
638+ To clone the entire rocm-libraries repository, use the following commands. This process takes more time,
639+ but is recommended if you want to work with a large number of libraries.
640
641 ```bash
642- git clone https://github.com/ROCmSoftwarePlatform/hipBLAS.git
643+
644+ # Clone rocm-libraries, including hipBLAS, using Git
645+ git clone https://github.com/ROCm/rocm-libraries.git
646+
647+ # Go to hipBLAS directory
648+ cd rocm-libraries/projects/hipblas
649 ```
650
651 ```note
652 hipBLAS requires specific versions of rocBLAS and rocSOLVER. Refer to
653- [CMakeLists.txt](https://github.com/ROCmSoftwarePlatform/hipBLAS/blob/develop/library/CMakeLists.txt)
654+ [CMakeLists.txt](https://github.com/ROCm/rocm-libraries/blob/develop/projects/hipblas/library/CMakeLists.txt)
655 for details.
656 ```
657
658-2. Build hipBLAS and install it into `/opt/rocm/hipblas`:
659+2. Build hipBLAS using the `install.sh` script and install it into `/opt/rocm/hipblas`:
660
661 ```bash
662- cd hipblas
663+ cd rocm-libraries/projects/hipblas
664 ./install.sh -i
665 ```
666
667diff --git a/bump_develop_version.sh b/bump_develop_version.sh
668index cdfcdad..b83ca98 100755
669--- a/bump_develop_version.sh
670+++ b/bump_develop_version.sh
671@@ -2,19 +2,23 @@
672
673 # For the develop branch, bump hipblas version and rocblas/rocsolver dependency versions
674
675-OLD_HIPBLAS_VERSION="2.3.0"
676-NEW_HIPBLAS_VERSION="2.4.0"
677+OLD_HIPBLAS_VERSION="3.0.0"
678+NEW_HIPBLAS_VERSION="3.1.0"
679
680-OLD_MINIMUM_ROCBLAS_VERSION="4.3.0"
681-NEW_MINIMUM_ROCBLAS_VERSION="4.4.0"
682+OLD_MINIMUM_ROCBLAS_VERSION="5.0.0"
683+NEW_MINIMUM_ROCBLAS_VERSION="5.1.0"
684
685-OLD_MINIMUM_ROCSOLVER_VERSION="3.27.0"
686-NEW_MINIMUM_ROCSOLVER_VERSION="3.28.0"
687+OLD_MINIMUM_ROCSOLVER_VERSION="3.30.0"
688+NEW_MINIMUM_ROCSOLVER_VERSION="3.31.0"
689
690-OLD_SO_VERSION="hipblas_SOVERSION 2.3"
691-NEW_SO_VERSION="hipblas_SOVERSION 2.4"
692+OLD_MINIMUM_HIPBLAS_COMMON_VERSION="1.1.0"
693+NEW_MINIMUM_HIPBLAS_COMMON_VERSION="1.3.0"
694+
695+OLD_SO_VERSION="hipblas_SOVERSION 3.0"
696+NEW_SO_VERSION="hipblas_SOVERSION 3.1"
697
698 sed -i "s/${OLD_HIPBLAS_VERSION}/${NEW_HIPBLAS_VERSION}/g" CMakeLists.txt
699 sed -i "s/${OLD_MINIMUM_ROCBLAS_VERSION}/${NEW_MINIMUM_ROCBLAS_VERSION}/g" CMakeLists.txt
700 sed -i "s/${OLD_MINIMUM_ROCSOLVER_VERSION}/${NEW_MINIMUM_ROCSOLVER_VERSION}/g" CMakeLists.txt
701+sed -i "s/${OLD_MINIMUM_HIPBLAS_COMMON_VERSION}/${NEW_MINIMUM_HIPBLAS_COMMON_VERSION}/g" CMakeLists.txt
702 sed -i "s/${OLD_SO_VERSION}/${NEW_SO_VERSION}/g" library/CMakeLists.txt
703diff --git a/clients/CMakeLists.txt b/clients/CMakeLists.txt
704index 7cad5e1..f67828e 100644
705--- a/clients/CMakeLists.txt
706+++ b/clients/CMakeLists.txt
707@@ -1,5 +1,5 @@
708 # ########################################################################
709-# Copyright (C) 2016-2024 Advanced Micro Devices, Inc. All rights reserved.
710+# Copyright (C) 2016-2025 Advanced Micro Devices, Inc. All rights reserved.
711 #
712 # Permission is hereby granted, free of charge, to any person obtaining a copy
713 # of this software and associated documentation files (the "Software"), to deal
714@@ -28,8 +28,8 @@ function( get_lapack lapack_lib lapack_inc )
715 set( inc "${BUILD_DIR}/deps/deps-install/include" )
716 set( ${cblas_inc} ${inc} PARENT_SCOPE )
717 else()
718- find_package( lapack REQUIRED CONFIG )
719- set( lib "lapack" )
720+ find_package( LAPACK REQUIRED )
721+ set( lib "${LAPACK_LIBRARIES}" )
722 endif()
723 set( ${lapack_lib} ${lib} PARENT_SCOPE )
724 endfunction( )
725@@ -52,12 +52,31 @@ function( get_cblas cblas_libs cblas_inc )
726 set( ${cblas_inc} ${inc} PARENT_SCOPE )
727 else()
728 find_package( cblas REQUIRED CONFIG )
729- set( libs cblas blas )
730+ set( libs ${CBLAS_LIBRARIES} )
731 endif()
732 endif()
733 set( ${cblas_libs} ${libs} PARENT_SCOPE )
734 endfunction( )
735
736+function( apply_omp_settings lib_target_ )
737+ if (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND TARGET OpenMP::OpenMP_CXX)
738+ set_target_properties( ${lib_target_} PROPERTIES
739+ BUILD_RPATH "${HIP_CLANG_ROOT}/lib"
740+ )
741+ set_target_properties( ${lib_target_} PROPERTIES
742+ INSTALL_RPATH "$ORIGIN/../llvm/lib"
743+ )
744+ elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND TARGET OpenMP::omp)
745+ set_target_properties( ${lib_target_} PROPERTIES
746+ BUILD_RPATH "${HIP_CLANG_ROOT}/${openmp_LIB_DIR}"
747+ )
748+ set_target_properties( ${lib_target_} PROPERTIES
749+ INSTALL_RPATH "$ORIGIN/../llvm/${openmp_LIB_DIR}"
750+ )
751+ endif()
752+endfunction()
753+
754+
755 # Consider removing this in the future
756 # This should appear before the project command, because it does not use FORCE
757 if( WIN32 )
758@@ -90,19 +109,34 @@ if( NOT WIN32 )
759 set(hipblas_f90_source_clients_solver
760 include/hipblas_fortran_module.f90
761 )
762+
763+ set (hipblas_f90_source
764+ ../library/src/hipblas_module.f90
765+ )
766 endif()
767
768 if( BUILD_CLIENTS_TESTS OR BUILD_CLIENTS_BENCHMARKS OR BUILD_CLIENTS_SAMPLES )
769+ # Create hipBLAS Fortran module
770+ if(NOT WIN32)
771+ # Set Fortran module output directory
772+ set(CMAKE_Fortran_MODULE_DIRECTORY ${PROJECT_BINARY_DIR}/include/hipblas)
773+ add_library(hipblas_fortran OBJECT ${hipblas_f90_source})
774+ endif()
775+
776 if( NOT WIN32 )
777 if( BUILD_WITH_SOLVER )
778- add_library(hipblas_fortran_client STATIC ${hipblas_f90_source_clients_solver})
779+ add_library(hipblas_fortran_client STATIC ${hipblas_f90_source_clients_solver} $<TARGET_OBJECTS:hipblas_fortran>)
780 else()
781- add_library(hipblas_fortran_client STATIC ${hipblas_f90_source_clients_no_solver})
782+ add_library(hipblas_fortran_client STATIC ${hipblas_f90_source_clients_no_solver} $<TARGET_OBJECTS:hipblas_fortran>)
783 endif()
784- add_dependencies(hipblas_fortran_client hipblas_fortran)
785 endif()
786- include_directories(${CMAKE_BINARY_DIR}/include/hipblas)
787- include_directories(${CMAKE_BINARY_DIR}/include)
788+
789+ if(SKIP_LIBRARY)
790+ include_directories(${HIPBLAS_LIBRARY_DIR}/include/hipblas)
791+ else()
792+ include_directories(${CMAKE_BINARY_DIR}/include/hipblas)
793+ include_directories(${CMAKE_BINARY_DIR}/include)
794+ endif()
795 endif( )
796
797 if( BUILD_CLIENTS_SAMPLES )
798@@ -114,19 +148,20 @@ if( BUILD_CLIENTS_BENCHMARKS OR BUILD_CLIENTS_TESTS)
799 set(THREADS_PREFER_PTHREAD_FLAG ON)
800 find_package(Threads REQUIRED)
801
802- # if it fails to find OpenMP compile and link flags in strange configurations it can just use non-parallel reference computation
803- # if there is no omp.h to find the client compilation will fail and this should be obvious, used to be REQUIRED
804- find_package(OpenMP)
805+ if (CMAKE_CXX_COMPILER_ID MATCHES "Clang")
806+ # Look for openmp config in ROCm install to populate openmp_LIB_DIR and openmp_LIB_INSTALL_DIR
807+ find_package(OpenMP CONFIG PATHS "${HIP_CLANG_ROOT}/lib/cmake")
808+ endif()
809
810- if (TARGET OpenMP::OpenMP_CXX)
811- set( COMMON_LINK_LIBS "OpenMP::OpenMP_CXX")
812- if(HIP_PLATFORM STREQUAL amd)
813- list( APPEND COMMON_LINK_LIBS "-L\"${HIP_CLANG_ROOT}/lib\"")
814- if (NOT WIN32)
815- list( APPEND COMMON_LINK_LIBS "-Wl,-rpath=${HIP_CLANG_ROOT}/lib -lomp")
816- else()
817- list( APPEND COMMON_LINK_LIBS "libomp")
818- endif()
819+ if (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND TARGET OpenMP::omp)
820+ set( COMMON_LINK_LIBS "OpenMP::omp")
821+ message(STATUS "Found openmp-config.cmake at ${OpenMP_DIR}")
822+ else()
823+ # if it fails to find OpenMP compile and link flags in strange configurations it can just use non-parallel reference computation
824+ # if there is no omp.h to find the client compilation will fail and this should be obvious, used to be REQUIRED
825+ find_package(OpenMP)
826+ if (TARGET OpenMP::OpenMP_CXX)
827+ set( COMMON_LINK_LIBS "OpenMP::OpenMP_CXX")
828 endif()
829 endif()
830
831@@ -184,7 +219,7 @@ if( BUILD_CLIENTS_BENCHMARKS OR BUILD_CLIENTS_TESTS)
832 NO_DEFAULT_PATH
833 )
834 if (NOT BLAS_LIBRARY)
835- find_package( OPENBLAS CONFIG REQUIRED )
836+ find_package( OpenBLAS CONFIG REQUIRED )
837 set( BLAS_LIBRARY OpenBLAS::OpenBLAS )
838 set( BLAS_INCLUDE_DIR "" )
839 endif()
840@@ -195,8 +230,17 @@ if( BUILD_CLIENTS_BENCHMARKS OR BUILD_CLIENTS_TESTS)
841 set( BLIS_CPP ../common/blis_interface.cpp )
842 endif()
843
844+ if(EXISTS "${BUILD_DIR}/deps/deps-install/lib/libgtest.a")
845+ set( GTEST_ROOT "${BUILD_DIR}/deps/deps-install")
846+ endif()
847+ find_package( GTest REQUIRED )
848+
849 message(STATUS "Build Dir: ${BUILD_DIR}")
850- message(STATUS "Linking Ref. Libs: ${BLAS_LIBRARY}")
851+ message(STATUS "Linking Libs: ${BLAS_LIBRARY}")
852+
853+ if( NOT TARGET hipblas )
854+ find_package( hipblas REQUIRED CONFIG PATHS ${HIPBLAS_LIBRARY_DIR} )
855+ endif( )
856
857 if( BUILD_CLIENTS_TESTS )
858 add_subdirectory( gtest )
859@@ -232,8 +276,14 @@ add_custom_command( OUTPUT "${HIPBLAS_GENTEST}"
860 DEPENDS common/hipblas_gentest.py
861 WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" )
862
863+set( HIPBLAS_CLIENTS_README "${PROJECT_BINARY_DIR}/staging/hipblas_clients_readme.txt")
864+add_custom_command( OUTPUT "${HIPBLAS_CLIENTS_README}"
865+ COMMAND ${CMAKE_COMMAND} -E copy hipblas_clients_readme.txt "${HIPBLAS_CLIENTS_README}"
866+ DEPENDS hipblas_clients_readme.txt
867+ WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" )
868+
869
870-add_custom_target( hipblas-common DEPENDS "${HIPBLAS_COMMON}" "${HIPBLAS_TEMPLATE}" "${HIPBLAS_SMOKE}" "${HIPBLAS_GENTEST}" )
871+add_custom_target( hipblas-clients-common DEPENDS "${HIPBLAS_COMMON}" "${HIPBLAS_TEMPLATE}" "${HIPBLAS_SMOKE}" "${HIPBLAS_GENTEST}" "${HIPBLAS_CLIENTS_README}" )
872
873 if( BUILD_CLIENTS_TESTS OR BUILD_CLIENTS_BENCHMARKS )
874 rocm_install(
875@@ -246,4 +296,10 @@ if( BUILD_CLIENTS_TESTS OR BUILD_CLIENTS_BENCHMARKS )
876 DESTINATION "${CMAKE_INSTALL_BINDIR}"
877 COMPONENT clients-common
878 )
879+ # this readme also serves to prevent an empty package hipblas-clients which dpkg may auto-remove entire hipblas-clients and non empty children
880+ rocm_install(
881+ FILES ${HIPBLAS_CLIENTS_README}
882+ DESTINATION "${CMAKE_INSTALL_BINDIR}"
883+ COMPONENT clients
884+ )
885 endif()
886diff --git a/clients/benchmarks/CMakeLists.txt b/clients/benchmarks/CMakeLists.txt
887index d04d28e..5ed28e2 100644
888--- a/clients/benchmarks/CMakeLists.txt
889+++ b/clients/benchmarks/CMakeLists.txt
890@@ -1,5 +1,5 @@
891 # ########################################################################
892-# Copyright (C) 2016-2024 Advanced Micro Devices, Inc. All rights reserved.
893+# Copyright (C) 2016-2025 Advanced Micro Devices, Inc. All rights reserved.
894 #
895 # Permission is hereby granted, free of charge, to any person obtaining a copy
896 # of this software and associated documentation files (the "Software"), to deal
897@@ -27,10 +27,6 @@ enable_language( Fortran )
898
899 set(hipblas_bench_source client.cpp)
900
901-if( NOT TARGET hipblas )
902- find_package( hipblas REQUIRED CONFIG PATHS /opt/rocm/hipblas )
903-endif( )
904-
905 set( hipblas_benchmark_common
906 ../common/utility.cpp
907 ../common/cblas_interface.cpp
908@@ -43,26 +39,22 @@ set( hipblas_benchmark_common
909 ../common/near.cpp
910 ../common/arg_check.cpp
911 ../common/argument_model.cpp
912- ../common/hipblas_template_specialization.cpp
913 ../common/host_alloc.cpp
914 ${BLIS_CPP}
915 )
916
917-add_executable( hipblas-bench ${hipblas_bench_source} ${hipblas_benchmark_common} )
918-add_executable( hipblas_v2-bench ${hipblas_bench_source} ${hipblas_benchmark_common} )
919+if(NOT WIN32)
920+ add_executable( hipblas-bench ${hipblas_bench_source} ${hipblas_benchmark_common} $<TARGET_OBJECTS:hipblas_fortran_client>)
921+else()
922+ add_executable( hipblas-bench ${hipblas_bench_source} ${hipblas_benchmark_common} )
923+endif()
924
925 target_compile_features( hipblas-bench PRIVATE cxx_static_assert cxx_nullptr cxx_auto_type )
926-target_compile_features( hipblas_v2-bench PRIVATE cxx_static_assert cxx_nullptr cxx_auto_type )
927-
928 # Internal header includes
929 target_include_directories( hipblas-bench
930 PRIVATE
931 $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../include>
932 )
933-target_include_directories( hipblas_v2-bench
934- PRIVATE
935- $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../include>
936-)
937
938 # External header includes included as system files
939 target_include_directories( hipblas-bench
940@@ -73,47 +65,28 @@ target_include_directories( hipblas-bench
941 $<BUILD_INTERFACE:${BLIS_INCLUDE_DIR}>
942 $<BUILD_INTERFACE:${FLAME_INCLUDE_DIR}>
943 )
944-target_include_directories( hipblas_v2-bench
945- SYSTEM PRIVATE
946- $<BUILD_INTERFACE:${HIP_INCLUDE_DIRS}>
947- $<BUILD_INTERFACE:${CBLAS_INCLUDE_DIRS}>
948- $<BUILD_INTERFACE:${BLAS_INCLUDE_DIR}>
949- $<BUILD_INTERFACE:${BLIS_INCLUDE_DIR}>
950- $<BUILD_INTERFACE:${FLAME_INCLUDE_DIR}>
951-)
952-
953-target_link_libraries( hipblas-bench PRIVATE roc::hipblas )
954-target_link_libraries( hipblas_v2-bench PRIVATE roc::hipblas )
955
956-if (NOT WIN32)
957- target_link_libraries( hipblas-bench PRIVATE hipblas_fortran_client )
958- target_link_libraries( hipblas_v2-bench PRIVATE hipblas_fortran_client )
959-endif()
960+target_link_libraries( hipblas-bench PRIVATE roc::hipblas GTest::gtest GTest::gtest_main )
961
962 # need mf16c flag for float->half convertion
963 target_compile_options( hipblas-bench PRIVATE -mf16c ) # -Wno-deprecated-declarations )
964-target_compile_options( hipblas_v2-bench PRIVATE -mf16c ) # -Wno-deprecated-declarations )
965
966 target_compile_options(hipblas-bench PRIVATE $<$<COMPILE_LANGUAGE:CXX>:${COMMON_CXX_OPTIONS}>)
967-target_compile_options(hipblas_v2-bench PRIVATE $<$<COMPILE_LANGUAGE:CXX>:${COMMON_CXX_OPTIONS}>)
968
969 target_compile_definitions( hipblas-bench PRIVATE HIPBLAS_BENCH ${COMMON_DEFINES} ${BLIS_DEFINES} )
970-target_compile_definitions( hipblas_v2-bench PRIVATE HIPBLAS_BENCH ${COMMON_DEFINES} ${BLIS_DEFINES} HIPBLAS_V2 )
971
972 target_link_libraries( hipblas-bench PRIVATE ${BLAS_LIBRARY} ${COMMON_LINK_LIBS} )
973-target_link_libraries( hipblas_v2-bench PRIVATE ${BLAS_LIBRARY} ${COMMON_LINK_LIBS} )
974+apply_omp_settings( hipblas-bench )
975+
976 if (NOT WIN32)
977 target_link_libraries( hipblas-bench PRIVATE stdc++fs )
978- target_link_libraries( hipblas_v2-bench PRIVATE stdc++fs )
979 endif()
980
981 if(HIP_PLATFORM STREQUAL amd)
982 target_link_libraries( hipblas-bench PRIVATE hip::host )
983- target_link_libraries( hipblas_v2-bench PRIVATE hip::host )
984
985 if( CUSTOM_TARGET )
986 target_link_libraries( hipblas-bench PRIVATE hip::${CUSTOM_TARGET} )
987- target_link_libraries( hipblas_v2-bench PRIVATE hip::${CUSTOM_TARGET} )
988 endif()
989
990 else( )
991@@ -121,26 +94,15 @@ else( )
992 PRIVATE
993 $<BUILD_INTERFACE:${CUDA_INCLUDE_DIRS}>
994 )
995- target_include_directories( hipblas_v2-bench
996- PRIVATE
997- $<BUILD_INTERFACE:${CUDA_INCLUDE_DIRS}>
998- )
999
1000 target_link_libraries( hipblas-bench PRIVATE ${CUDA_LIBRARIES} )
1001- target_link_libraries( hipblas_v2-bench PRIVATE ${CUDA_LIBRARIES} )
1002 endif( )
1003
1004 set_target_properties( hipblas-bench PROPERTIES
1005 CXX_EXTENSIONS OFF
1006 RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/staging"
1007 )
1008-set_target_properties( hipblas_v2-bench PROPERTIES
1009- CXX_EXTENSIONS OFF
1010- RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/staging"
1011-)
1012
1013-add_dependencies( hipblas-bench hipblas-common )
1014-add_dependencies( hipblas_v2-bench hipblas-common )
1015+add_dependencies( hipblas-bench hipblas-clients-common )
1016
1017 rocm_install(TARGETS hipblas-bench COMPONENT benchmarks)
1018-rocm_install(TARGETS hipblas_v2-bench COMPONENT benchmarks)
1019diff --git a/clients/benchmarks/client.cpp b/clients/benchmarks/client.cpp
1020index 2aebc63..b412224 100644
1021--- a/clients/benchmarks/client.cpp
1022+++ b/clients/benchmarks/client.cpp
1023@@ -1,5 +1,5 @@
1024 /* ************************************************************************
1025- * Copyright (C) 2016-2023 Advanced Micro Devices, Inc. All rights reserved.
1026+ * Copyright (C) 2016-2025 Advanced Micro Devices, Inc. All rights reserved.
1027 *
1028 * Permission is hereby granted, free of charge, to any person obtaining a copy
1029 * of this software and associated documentation files (the "Software"), to deal
1030@@ -277,7 +277,7 @@ try
1031 "Options: h,s,d,c,z,f16_r,f32_r,f64_r,bf16_r,f32_c,f64_c,i8_r,i32_r")
1032
1033 ("compute_type_gemm",
1034- value<std::string>(&compute_type_gemm), "Precision of computation for gemm_ex with HIPBLAS_V2 define"
1035+ value<std::string>(&compute_type_gemm), "Precision of computation for gemm_ex"
1036 "Options: c16f,c16f_pedantic,c32f,c32f_pedantic,c32f_fast_16f,c32f_fast_16bf,c32f_fast_tf32,c64f,c64f_pedantic,c32i,c32i_pedantic")
1037
1038 ("initialization",
1039@@ -369,6 +369,10 @@ try
1040 value<int32_t>(&api)->default_value(0),
1041 "Use API, supercedes fortran flag (0==C, 1==C_64, ...)")
1042
1043+ ("workspace",
1044+ value<size_t>(&arg.user_allocated_workspace)->default_value(0),
1045+ "Set workspace available in handle using xxblasSetWorkspace() API after handle creation")
1046+
1047 ("help,h", "produces this help message");
1048
1049 //("version", "Prints the version number");
1050@@ -418,30 +422,13 @@ try
1051 return hipblas_bench_datafile();
1052
1053 std::transform(precision.begin(), precision.end(), precision.begin(), ::tolower);
1054- auto prec = string2hipblas_datatype(precision);
1055- if(prec == HIPBLAS_DATATYPE_INVALID)
1056- throw std::invalid_argument("Invalid value for --precision " + precision);
1057-
1058- arg.a_type = a_type == "" ? prec : string2hipblas_datatype(a_type);
1059- if(arg.a_type == HIPBLAS_DATATYPE_INVALID)
1060- throw std::invalid_argument("Invalid value for --a_type " + a_type);
1061-
1062- arg.b_type = b_type == "" ? prec : string2hipblas_datatype(b_type);
1063- if(arg.b_type == HIPBLAS_DATATYPE_INVALID)
1064- throw std::invalid_argument("Invalid value for --b_type " + b_type);
1065-
1066- arg.c_type = c_type == "" ? prec : string2hipblas_datatype(c_type);
1067- if(arg.c_type == HIPBLAS_DATATYPE_INVALID)
1068- throw std::invalid_argument("Invalid value for --c_type " + c_type);
1069-
1070- arg.d_type = d_type == "" ? prec : string2hipblas_datatype(d_type);
1071- if(arg.d_type == HIPBLAS_DATATYPE_INVALID)
1072- throw std::invalid_argument("Invalid value for --d_type " + d_type);
1073-
1074- arg.compute_type = compute_type == "" ? prec : string2hipblas_datatype(compute_type);
1075- if(arg.compute_type == HIPBLAS_DATATYPE_INVALID)
1076- throw std::invalid_argument("Invalid value for --compute_type " + compute_type);
1077+ auto prec = string2hip_datatype(precision);
1078
1079+ arg.a_type = a_type == "" ? prec : string2hip_datatype(a_type);
1080+ arg.b_type = b_type == "" ? prec : string2hip_datatype(b_type);
1081+ arg.c_type = c_type == "" ? prec : string2hip_datatype(c_type);
1082+ arg.d_type = d_type == "" ? prec : string2hip_datatype(d_type);
1083+ arg.compute_type = compute_type == "" ? prec : string2hip_datatype(compute_type);
1084 arg.compute_type_gemm = string2hipblas_computetype(compute_type_gemm);
1085
1086 arg.initialization = string2hipblas_initialization(initialization);
1087diff --git a/clients/cmake/build-options.cmake b/clients/cmake/build-options.cmake
1088index 25bb314..44e0b32 100644
1089--- a/clients/cmake/build-options.cmake
1090+++ b/clients/cmake/build-options.cmake
1091@@ -26,5 +26,3 @@ if( HIP_PLATFORM STREQUAL nvidia )
1092 else()
1093 option( LINK_BLIS "Link AOCL Blis reference library" ON )
1094 endif()
1095-
1096-
1097diff --git a/clients/common/cblas_interface.cpp b/clients/common/cblas_interface.cpp
1098index 2ccc666..f65b25c 100644
1099--- a/clients/common/cblas_interface.cpp
1100+++ b/clients/common/cblas_interface.cpp
1101@@ -1,5 +1,5 @@
1102 /* ************************************************************************
1103- * Copyright (C) 2016-2024 Advanced Micro Devices, Inc. All rights reserved.
1104+ * Copyright (C) 2016-2025 Advanced Micro Devices, Inc. All rights reserved.
1105 *
1106 * Permission is hereby granted, free of charge, to any person obtaining a copy
1107 * of this software and associated documentation files (the "Software"), to deal
1108@@ -46,14 +46,15 @@ extern "C" {
1109
1110 void spotrf_(char* uplo, int64_t* m, float* A, int64_t* lda, int64_t* info);
1111 void dpotrf_(char* uplo, int64_t* m, double* A, int64_t* lda, int64_t* info);
1112-void cpotrf_(char* uplo, int64_t* m, hipblasComplex* A, int64_t* lda, int64_t* info);
1113-void zpotrf_(char* uplo, int64_t* m, hipblasDoubleComplex* A, int64_t* lda, int64_t* info);
1114+void cpotrf_(char* uplo, int64_t* m, std::complex<float>* A, int64_t* lda, int64_t* info);
1115+void zpotrf_(char* uplo, int64_t* m, std::complex<double>* A, int64_t* lda, int64_t* info);
1116
1117 void sgetrf_(int64_t* m, int64_t* n, float* A, int64_t* lda, int64_t* ipiv, int64_t* info);
1118 void dgetrf_(int64_t* m, int64_t* n, double* A, int64_t* lda, int64_t* ipiv, int64_t* info);
1119-void cgetrf_(int64_t* m, int64_t* n, hipblasComplex* A, int64_t* lda, int64_t* ipiv, int64_t* info);
1120+void cgetrf_(
1121+ int64_t* m, int64_t* n, std::complex<float>* A, int64_t* lda, int64_t* ipiv, int64_t* info);
1122 void zgetrf_(
1123- int64_t* m, int64_t* n, hipblasDoubleComplex* A, int64_t* lda, int64_t* ipiv, int64_t* info);
1124+ int64_t* m, int64_t* n, std::complex<double>* A, int64_t* lda, int64_t* ipiv, int64_t* info);
1125
1126 void sgetrs_(char* trans,
1127 int64_t* n,
1128@@ -73,22 +74,22 @@ void dgetrs_(char* trans,
1129 double* B,
1130 int64_t* ldb,
1131 int64_t* info);
1132-void cgetrs_(char* trans,
1133- int64_t* n,
1134- int64_t* nrhs,
1135- hipblasComplex* A,
1136- int64_t* lda,
1137- int64_t* ipiv,
1138- hipblasComplex* B,
1139- int64_t* ldb,
1140- int64_t* info);
1141+void cgetrs_(char* trans,
1142+ int64_t* n,
1143+ int64_t* nrhs,
1144+ std::complex<float>* A,
1145+ int64_t* lda,
1146+ int64_t* ipiv,
1147+ std::complex<float>* B,
1148+ int64_t* ldb,
1149+ int64_t* info);
1150 void zgetrs_(char* trans,
1151 int64_t* n,
1152 int64_t* nrhs,
1153- hipblasDoubleComplex* A,
1154+ std::complex<double>* A,
1155 int64_t* lda,
1156 int64_t* ipiv,
1157- hipblasDoubleComplex* B,
1158+ std::complex<double>* B,
1159 int64_t* ldb,
1160 int64_t* info);
1161
1162@@ -101,18 +102,18 @@ void dgetri_(int64_t* n,
1163 double* work,
1164 int64_t* lwork,
1165 int64_t* info);
1166-void cgetri_(int64_t* n,
1167- hipblasComplex* A,
1168- int64_t* lda,
1169- int64_t* ipiv,
1170- hipblasComplex* work,
1171- int64_t* lwork,
1172- int64_t* info);
1173+void cgetri_(int64_t* n,
1174+ std::complex<float>* A,
1175+ int64_t* lda,
1176+ int64_t* ipiv,
1177+ std::complex<float>* work,
1178+ int64_t* lwork,
1179+ int64_t* info);
1180 void zgetri_(int64_t* n,
1181- hipblasDoubleComplex* A,
1182+ std::complex<double>* A,
1183 int64_t* lda,
1184 int64_t* ipiv,
1185- hipblasDoubleComplex* work,
1186+ std::complex<double>* work,
1187 int64_t* lwork,
1188 int64_t* info);
1189
1190@@ -132,20 +133,20 @@ void dgeqrf_(int64_t* m,
1191 double* work,
1192 int64_t* lwork,
1193 int64_t* info);
1194-void cgeqrf_(int64_t* m,
1195- int64_t* n,
1196- hipblasComplex* A,
1197- int64_t* lda,
1198- hipblasComplex* tau,
1199- hipblasComplex* work,
1200- int64_t* lwork,
1201- int64_t* info);
1202+void cgeqrf_(int64_t* m,
1203+ int64_t* n,
1204+ std::complex<float>* A,
1205+ int64_t* lda,
1206+ std::complex<float>* tau,
1207+ std::complex<float>* work,
1208+ int64_t* lwork,
1209+ int64_t* info);
1210 void zgeqrf_(int64_t* m,
1211 int64_t* n,
1212- hipblasDoubleComplex* A,
1213+ std::complex<double>* A,
1214 int64_t* lda,
1215- hipblasDoubleComplex* tau,
1216- hipblasDoubleComplex* work,
1217+ std::complex<double>* tau,
1218+ std::complex<double>* work,
1219 int64_t* lwork,
1220 int64_t* info);
1221
1222@@ -171,80 +172,80 @@ void dgels_(char* trans,
1223 double* work,
1224 int64_t* lwork,
1225 int64_t* info);
1226-void cgels_(char* trans,
1227- int64_t* m,
1228- int64_t* n,
1229- int64_t* nrhs,
1230- hipblasComplex* A,
1231- int64_t* lda,
1232- hipblasComplex* B,
1233- int64_t* ldb,
1234- hipblasComplex* work,
1235- int64_t* lwork,
1236- int64_t* info);
1237+void cgels_(char* trans,
1238+ int64_t* m,
1239+ int64_t* n,
1240+ int64_t* nrhs,
1241+ std::complex<float>* A,
1242+ int64_t* lda,
1243+ std::complex<float>* B,
1244+ int64_t* ldb,
1245+ std::complex<float>* work,
1246+ int64_t* lwork,
1247+ int64_t* info);
1248 void zgels_(char* trans,
1249 int64_t* m,
1250 int64_t* n,
1251 int64_t* nrhs,
1252- hipblasDoubleComplex* A,
1253+ std::complex<double>* A,
1254 int64_t* lda,
1255- hipblasDoubleComplex* B,
1256+ std::complex<double>* B,
1257 int64_t* ldb,
1258- hipblasDoubleComplex* work,
1259+ std::complex<double>* work,
1260 int64_t* lwork,
1261 int64_t* info);
1262
1263 /*
1264 void strtri_(char* uplo, char* diag, int64_t* n, float* A, int64_t* lda, int64_t* info);
1265 void dtrtri_(char* uplo, char* diag, int64_t* n, double* A, int64_t* lda, int64_t* info);
1266-void ctrtri_(char* uplo, char* diag, int64_t* n, hipblasComplex* A, int64_t* lda, int64_t* info);
1267-void ztrtri_(char* uplo, char* diag, int64_t* n, hipblasDoubleComplex* A, int64_t* lda, int64_t* info);
1268+void ctrtri_(char* uplo, char* diag, int64_t* n, std::complex<float>* A, int64_t* lda, int64_t* info);
1269+void ztrtri_(char* uplo, char* diag, int64_t* n, std::complex<double>* A, int64_t* lda, int64_t* info);
1270
1271 void cspr_(
1272- char* uplo, int64_t* n, hipblasComplex* alpha, hipblasComplex* x, int64_t* incx, hipblasComplex* A);
1273+ char* uplo, int64_t* n, std::complex<float>* alpha, std::complex<float>* x, int64_t* incx, std::complex<float>* A);
1274
1275 void zspr_(char* uplo,
1276 int64_t* n,
1277- hipblasDoubleComplex* alpha,
1278- hipblasDoubleComplex* x,
1279+ std::complex<double>* alpha,
1280+ std::complex<double>* x,
1281 int64_t* incx,
1282- hipblasDoubleComplex* A);
1283+ std::complex<double>* A);
1284
1285 void csyr_(char* uplo,
1286 int64_t* n,
1287- hipblasComplex* alpha,
1288- hipblasComplex* x,
1289+ std::complex<float>* alpha,
1290+ std::complex<float>* x,
1291 int64_t* incx,
1292- hipblasComplex* a,
1293+ std::complex<float>* a,
1294 int64_t* lda);
1295 void zsyr_(char* uplo,
1296 int64_t* n,
1297- hipblasDoubleComplex* alpha,
1298- hipblasDoubleComplex* x,
1299+ std::complex<double>* alpha,
1300+ std::complex<double>* x,
1301 int64_t* incx,
1302- hipblasDoubleComplex* a,
1303+ std::complex<double>* a,
1304 int64_t* lda);
1305
1306 void csymv_(char* uplo,
1307 int64_t* n,
1308- hipblasComplex* alpha,
1309- hipblasComplex* A,
1310+ std::complex<float>* alpha,
1311+ std::complex<float>* A,
1312 int64_t* lda,
1313- hipblasComplex* x,
1314+ std::complex<float>* x,
1315 int64_t* incx,
1316- hipblasComplex* beta,
1317- hipblasComplex* y,
1318+ std::complex<float>* beta,
1319+ std::complex<float>* y,
1320 int64_t* incy);
1321
1322 void zsymv_(char* uplo,
1323 int64_t* n,
1324- hipblasDoubleComplex* alpha,
1325- hipblasDoubleComplex* A,
1326+ std::complex<double>* alpha,
1327+ std::complex<double>* A,
1328 int64_t* lda,
1329- hipblasDoubleComplex* x,
1330+ std::complex<double>* x,
1331 int64_t* incx,
1332- hipblasDoubleComplex* beta,
1333- hipblasDoubleComplex* y,
1334+ std::complex<double>* beta,
1335+ std::complex<double>* y,
1336 int64_t* incy);
1337 */
1338
1339@@ -380,22 +381,22 @@ void ref_axpy<double, double>(
1340 }
1341
1342 template <>
1343-void ref_axpy<hipblasComplex, hipblasComplex>(int64_t n,
1344- const hipblasComplex alpha,
1345- const hipblasComplex* x,
1346- int64_t incx,
1347- hipblasComplex* y,
1348- int64_t incy)
1349+void ref_axpy<std::complex<float>, std::complex<float>>(int64_t n,
1350+ const std::complex<float> alpha,
1351+ const std::complex<float>* x,
1352+ int64_t incx,
1353+ std::complex<float>* y,
1354+ int64_t incy)
1355 {
1356 cblas_caxpy(n, &alpha, x, incx, y, incy);
1357 }
1358
1359 template <>
1360-void ref_axpy<hipblasDoubleComplex, hipblasDoubleComplex>(int64_t n,
1361- const hipblasDoubleComplex alpha,
1362- const hipblasDoubleComplex* x,
1363+void ref_axpy<std::complex<double>, std::complex<double>>(int64_t n,
1364+ const std::complex<double> alpha,
1365+ const std::complex<double>* x,
1366 int64_t incx,
1367- hipblasDoubleComplex* y,
1368+ std::complex<double>* y,
1369 int64_t incy)
1370 {
1371 cblas_zaxpy(n, &alpha, x, incx, y, incy);
1372@@ -515,33 +516,36 @@ void ref_scal<double>(int64_t n, const double alpha, double* x, int64_t incx)
1373 }
1374
1375 template <>
1376-void ref_scal<hipblasComplex>(int64_t n,
1377- const hipblasComplex alpha,
1378- hipblasComplex* x,
1379- int64_t incx)
1380+void ref_scal<std::complex<float>>(int64_t n,
1381+ const std::complex<float> alpha,
1382+ std::complex<float>* x,
1383+ int64_t incx)
1384 {
1385 cblas_cscal(n, &alpha, x, incx);
1386 }
1387
1388 template <>
1389-void ref_scal<hipblasComplex, float>(int64_t n, const float alpha, hipblasComplex* x, int64_t incx)
1390+void ref_scal<std::complex<float>, float>(int64_t n,
1391+ const float alpha,
1392+ std::complex<float>* x,
1393+ int64_t incx)
1394 {
1395 cblas_csscal(n, alpha, x, incx);
1396 }
1397
1398 template <>
1399-void ref_scal<hipblasDoubleComplex>(int64_t n,
1400- const hipblasDoubleComplex alpha,
1401- hipblasDoubleComplex* x,
1402+void ref_scal<std::complex<double>>(int64_t n,
1403+ const std::complex<double> alpha,
1404+ std::complex<double>* x,
1405 int64_t incx)
1406 {
1407 cblas_zscal(n, &alpha, x, incx);
1408 }
1409
1410 template <>
1411-void ref_scal<hipblasDoubleComplex, double>(int64_t n,
1412+void ref_scal<std::complex<double>, double>(int64_t n,
1413 const double alpha,
1414- hipblasDoubleComplex* x,
1415+ std::complex<double>* x,
1416 int64_t incx)
1417 {
1418 cblas_zdscal(n, alpha, x, incx);
1419@@ -561,15 +565,15 @@ void ref_copy<double>(int64_t n, double* x, int64_t incx, double* y, int64_t inc
1420 }
1421
1422 template <>
1423-void ref_copy<hipblasComplex>(
1424- int64_t n, hipblasComplex* x, int64_t incx, hipblasComplex* y, int64_t incy)
1425+void ref_copy<std::complex<float>>(
1426+ int64_t n, std::complex<float>* x, int64_t incx, std::complex<float>* y, int64_t incy)
1427 {
1428 cblas_ccopy(n, x, incx, y, incy);
1429 }
1430
1431 template <>
1432-void ref_copy<hipblasDoubleComplex>(
1433- int64_t n, hipblasDoubleComplex* x, int64_t incx, hipblasDoubleComplex* y, int64_t incy)
1434+void ref_copy<std::complex<double>>(
1435+ int64_t n, std::complex<double>* x, int64_t incx, std::complex<double>* y, int64_t incy)
1436 {
1437 cblas_zcopy(n, x, incx, y, incy);
1438 }
1439@@ -588,15 +592,15 @@ void ref_swap<double>(int64_t n, double* x, int64_t incx, double* y, int64_t inc
1440 }
1441
1442 template <>
1443-void ref_swap<hipblasComplex>(
1444- int64_t n, hipblasComplex* x, int64_t incx, hipblasComplex* y, int64_t incy)
1445+void ref_swap<std::complex<float>>(
1446+ int64_t n, std::complex<float>* x, int64_t incx, std::complex<float>* y, int64_t incy)
1447 {
1448 cblas_cswap(n, x, incx, y, incy);
1449 }
1450
1451 template <>
1452-void ref_swap<hipblasDoubleComplex>(
1453- int64_t n, hipblasDoubleComplex* x, int64_t incx, hipblasDoubleComplex* y, int64_t incy)
1454+void ref_swap<std::complex<double>>(
1455+ int64_t n, std::complex<double>* x, int64_t incx, std::complex<double>* y, int64_t incy)
1456 {
1457 cblas_zswap(n, x, incx, y, incy);
1458 }
1459@@ -659,23 +663,23 @@ void ref_dot<double>(
1460 }
1461
1462 template <>
1463-void ref_dot<hipblasComplex>(int64_t n,
1464- const hipblasComplex* x,
1465- int64_t incx,
1466- const hipblasComplex* y,
1467- int64_t incy,
1468- hipblasComplex* result)
1469+void ref_dot<std::complex<float>>(int64_t n,
1470+ const std::complex<float>* x,
1471+ int64_t incx,
1472+ const std::complex<float>* y,
1473+ int64_t incy,
1474+ std::complex<float>* result)
1475 {
1476 cblas_cdotu_sub(n, x, incx, y, incy, result);
1477 }
1478
1479 template <>
1480-void ref_dot<hipblasDoubleComplex>(int64_t n,
1481- const hipblasDoubleComplex* x,
1482+void ref_dot<std::complex<double>>(int64_t n,
1483+ const std::complex<double>* x,
1484 int64_t incx,
1485- const hipblasDoubleComplex* y,
1486+ const std::complex<double>* y,
1487 int64_t incy,
1488- hipblasDoubleComplex* result)
1489+ std::complex<double>* result)
1490 {
1491 cblas_zdotu_sub(n, x, incx, y, incy, result);
1492 }
1493@@ -721,23 +725,23 @@ void ref_dotc<double>(
1494 }
1495
1496 template <>
1497-void ref_dotc<hipblasComplex>(int64_t n,
1498- const hipblasComplex* x,
1499- int64_t incx,
1500- const hipblasComplex* y,
1501- int64_t incy,
1502- hipblasComplex* result)
1503+void ref_dotc<std::complex<float>>(int64_t n,
1504+ const std::complex<float>* x,
1505+ int64_t incx,
1506+ const std::complex<float>* y,
1507+ int64_t incy,
1508+ std::complex<float>* result)
1509 {
1510 cblas_cdotc_sub(n, x, incx, y, incy, result);
1511 }
1512
1513 template <>
1514-void ref_dotc<hipblasDoubleComplex>(int64_t n,
1515- const hipblasDoubleComplex* x,
1516+void ref_dotc<std::complex<double>>(int64_t n,
1517+ const std::complex<double>* x,
1518 int64_t incx,
1519- const hipblasDoubleComplex* y,
1520+ const std::complex<double>* y,
1521 int64_t incy,
1522- hipblasDoubleComplex* result)
1523+ std::complex<double>* result)
1524 {
1525 cblas_zdotc_sub(n, x, incx, y, incy, result);
1526 }
1527@@ -790,17 +794,17 @@ void ref_nrm2<double, double>(int64_t n, const double* x, int64_t incx, double*
1528 }
1529
1530 template <>
1531-void ref_nrm2<hipblasComplex, float>(int64_t n,
1532- const hipblasComplex* x,
1533- int64_t incx,
1534- float* result)
1535+void ref_nrm2<std::complex<float>, float>(int64_t n,
1536+ const std::complex<float>* x,
1537+ int64_t incx,
1538+ float* result)
1539 {
1540 *result = cblas_scnrm2(n, x, incx);
1541 }
1542
1543 template <>
1544-void ref_nrm2<hipblasDoubleComplex, double>(int64_t n,
1545- const hipblasDoubleComplex* x,
1546+void ref_nrm2<std::complex<double>, double>(int64_t n,
1547+ const std::complex<double>* x,
1548 int64_t incx,
1549 double* result)
1550 {
1551@@ -812,37 +816,37 @@ void ref_nrm2<hipblasDoubleComplex, double>(int64_t n,
1552 ///////////////////
1553 // LAPACK fortran library functionality
1554 extern "C" {
1555-void crot_(const int64_t* n,
1556- hipblasComplex* cx,
1557- const int64_t* incx,
1558- hipblasComplex* cy,
1559- const int64_t* incy,
1560- const float* c,
1561- const hipblasComplex* s);
1562-void csrot_(const int64_t* n,
1563- hipblasComplex* cx,
1564- const int64_t* incx,
1565- hipblasComplex* cy,
1566- const int64_t* incy,
1567- const float* c,
1568- const float* s);
1569+void crot_(const int64_t* n,
1570+ std::complex<float>* cx,
1571+ const int64_t* incx,
1572+ std::complex<float>* cy,
1573+ const int64_t* incy,
1574+ const float* c,
1575+ const std::complex<float>* s);
1576+void csrot_(const int64_t* n,
1577+ std::complex<float>* cx,
1578+ const int64_t* incx,
1579+ std::complex<float>* cy,
1580+ const int64_t* incy,
1581+ const float* c,
1582+ const float* s);
1583 void zrot_(const int64_t* n,
1584- hipblasDoubleComplex* cx,
1585+ std::complex<double>* cx,
1586 const int64_t* incx,
1587- hipblasDoubleComplex* cy,
1588+ std::complex<double>* cy,
1589 const int64_t* incy,
1590 const double* c,
1591- const hipblasDoubleComplex* s);
1592+ const std::complex<double>* s);
1593 void zdrot_(const int64_t* n,
1594- hipblasDoubleComplex* cx,
1595+ std::complex<double>* cx,
1596 const int64_t* incx,
1597- hipblasDoubleComplex* cy,
1598+ std::complex<double>* cy,
1599 const int64_t* incy,
1600 const double* c,
1601 const double* s);
1602
1603-void crotg_(hipblasComplex* a, hipblasComplex* b, float* c, hipblasComplex* s);
1604-void zrotg_(hipblasDoubleComplex* a, hipblasDoubleComplex* b, double* c, hipblasDoubleComplex* s);
1605+void crotg_(std::complex<float>* a, std::complex<float>* b, float* c, std::complex<float>* s);
1606+void zrotg_(std::complex<double>* a, std::complex<double>* b, double* c, std::complex<double>* s);
1607 }
1608
1609 // rot
1610@@ -936,67 +940,72 @@ void ref_rot<double>(
1611 }
1612
1613 template <>
1614-void ref_rot<hipblasComplex>(int64_t n,
1615- hipblasComplex* x,
1616- int64_t incx,
1617- hipblasComplex* y,
1618- int64_t incy,
1619- hipblasComplex c,
1620- hipblasComplex s)
1621+void ref_rot<std::complex<float>>(int64_t n,
1622+ std::complex<float>* x,
1623+ int64_t incx,
1624+ std::complex<float>* y,
1625+ int64_t incy,
1626+ std::complex<float> c,
1627+ std::complex<float> s)
1628 {
1629 float c_real = std::real(c);
1630 lapack_xrot(n, x, incx, y, incy, c_real, s);
1631 }
1632
1633 template <>
1634-void ref_rot<hipblasComplex, float>(int64_t n,
1635- hipblasComplex* x,
1636- int64_t incx,
1637- hipblasComplex* y,
1638- int64_t incy,
1639- float c,
1640- hipblasComplex s)
1641+void ref_rot<std::complex<float>, float>(int64_t n,
1642+ std::complex<float>* x,
1643+ int64_t incx,
1644+ std::complex<float>* y,
1645+ int64_t incy,
1646+ float c,
1647+ std::complex<float> s)
1648 {
1649 lapack_xrot(n, x, incx, y, incy, c, s);
1650 }
1651
1652 template <>
1653-void ref_rot<hipblasComplex, float, float>(
1654- int64_t n, hipblasComplex* x, int64_t incx, hipblasComplex* y, int64_t incy, float c, float s)
1655+void ref_rot<std::complex<float>, float, float>(int64_t n,
1656+ std::complex<float>* x,
1657+ int64_t incx,
1658+ std::complex<float>* y,
1659+ int64_t incy,
1660+ float c,
1661+ float s)
1662 {
1663 lapack_xrot(n, x, incx, y, incy, c, s);
1664 }
1665
1666 template <>
1667-void ref_rot<hipblasDoubleComplex>(int64_t n,
1668- hipblasDoubleComplex* x,
1669+void ref_rot<std::complex<double>>(int64_t n,
1670+ std::complex<double>* x,
1671 int64_t incx,
1672- hipblasDoubleComplex* y,
1673+ std::complex<double>* y,
1674 int64_t incy,
1675- hipblasDoubleComplex c,
1676- hipblasDoubleComplex s)
1677+ std::complex<double> c,
1678+ std::complex<double> s)
1679 {
1680 double c_real = std::real(c);
1681 lapack_xrot(n, x, incx, y, incy, c_real, s);
1682 }
1683
1684 template <>
1685-void ref_rot<hipblasDoubleComplex, double>(int64_t n,
1686- hipblasDoubleComplex* x,
1687+void ref_rot<std::complex<double>, double>(int64_t n,
1688+ std::complex<double>* x,
1689 int64_t incx,
1690- hipblasDoubleComplex* y,
1691+ std::complex<double>* y,
1692 int64_t incy,
1693 double c,
1694- hipblasDoubleComplex s)
1695+ std::complex<double> s)
1696 {
1697 lapack_xrot(n, x, incx, y, incy, c, s);
1698 }
1699
1700 template <>
1701-void ref_rot<hipblasDoubleComplex, double, double>(int64_t n,
1702- hipblasDoubleComplex* x,
1703+void ref_rot<std::complex<double>, double, double>(int64_t n,
1704+ std::complex<double>* x,
1705 int64_t incx,
1706- hipblasDoubleComplex* y,
1707+ std::complex<double>* y,
1708 int64_t incy,
1709 double c,
1710 double s)
1711@@ -1018,19 +1027,19 @@ void ref_rotg<double>(double* a, double* b, double* c, double* s)
1712 }
1713
1714 template <>
1715-void ref_rotg<hipblasComplex, float>(hipblasComplex* a,
1716- hipblasComplex* b,
1717- float* c,
1718- hipblasComplex* s)
1719+void ref_rotg<std::complex<float>, float>(std::complex<float>* a,
1720+ std::complex<float>* b,
1721+ float* c,
1722+ std::complex<float>* s)
1723 {
1724 lapack_xrotg(*a, *b, *c, *s);
1725 }
1726
1727 template <>
1728-void ref_rotg<hipblasDoubleComplex, double>(hipblasDoubleComplex* a,
1729- hipblasDoubleComplex* b,
1730+void ref_rotg<std::complex<double>, double>(std::complex<double>* a,
1731+ std::complex<double>* b,
1732 double* c,
1733- hipblasDoubleComplex* s)
1734+ std::complex<double>* s)
1735 {
1736 lapack_xrotg(*a, *b, *c, *s);
1737 }
1738@@ -1050,8 +1059,8 @@ void ref_asum<double, double>(int64_t n, const double* x, int64_t incx, double*
1739 }
1740
1741 template <>
1742-void ref_asum<hipblasComplex, float>(int64_t n,
1743- const hipblasComplex* x,
1744+void ref_asum<std::complex<float>, float>(int64_t n,
1745+ const std::complex<float>* x,
1746 int64_t incx,
1747 float* result)
1748 {
1749@@ -1059,8 +1068,8 @@ void ref_asum<hipblasComplex, float>(int64_t n,
1750 }
1751
1752 template <>
1753-void ref_asum<hipblasDoubleComplex, double>(int64_t n,
1754- const hipblasDoubleComplex* x,
1755+void ref_asum<std::complex<double>, double>(int64_t n,
1756+ const std::complex<double>* x,
1757 int64_t incx,
1758 double* result)
1759 {
1760@@ -1086,14 +1095,14 @@ void ref_iamax<double>(int64_t n, const double* x, int64_t incx, int64_t* result
1761 }
1762
1763 template <>
1764-void ref_iamax<hipblasComplex>(int64_t n, const hipblasComplex* x, int64_t incx, int64_t* result)
1765+void ref_iamax<std::complex<float>>(int64_t n, const std::complex<float>* x, int64_t incx, int64_t* result)
1766 {
1767 *result = (int64_t)cblas_icamax(n, x, incx);
1768 }
1769
1770 template <>
1771-void ref_iamax<hipblasDoubleComplex>(int64_t n,
1772- const hipblasDoubleComplex* x,
1773+void ref_iamax<std::complex<double>>(int64_t n,
1774+ const std::complex<double>* x,
1775 int64_t incx,
1776 int64_t* result)
1777 {
1778@@ -1110,13 +1119,13 @@ double hipblas_magnitude(T val)
1779 }
1780
1781 template <>
1782-double hipblas_magnitude(hipblasComplex val)
1783+double hipblas_magnitude(std::complex<float> val)
1784 {
1785 return std::abs(val.real()) + std::abs(val.imag());
1786 }
1787
1788 template <>
1789-double hipblas_magnitude(hipblasDoubleComplex val)
1790+double hipblas_magnitude(std::complex<double> val)
1791 {
1792 return std::abs(val.real()) + std::abs(val.imag());
1793 }
1794@@ -1155,14 +1164,14 @@ void ref_iamin<double>(int64_t n, const double* x, int64_t incx, int64_t* result
1795 }
1796
1797 template <>
1798-void ref_iamin<hipblasComplex>(int64_t n, const hipblasComplex* x, int64_t incx, int64_t* result)
1799+void ref_iamin<std::complex<float>>(int64_t n, const std::complex<float>* x, int64_t incx, int64_t* result)
1800 {
1801 *result = (int64_t)ref_iamin_helper(n, x, incx);
1802 }
1803
1804 template <>
1805-void ref_iamin<hipblasDoubleComplex>(int64_t n,
1806- const hipblasDoubleComplex* x,
1807+void ref_iamin<std::complex<double>>(int64_t n,
1808+ const std::complex<double>* x,
1809 int64_t incx,
1810 int64_t* result)
1811 {
1812@@ -1240,19 +1249,19 @@ void ref_gbmv<double>(hipblasOperation_t transA,
1813 }
1814
1815 template <>
1816-void ref_gbmv<hipblasComplex>(hipblasOperation_t transA,
1817- int64_t m,
1818- int64_t n,
1819- int64_t kl,
1820- int64_t ku,
1821- hipblasComplex alpha,
1822- hipblasComplex* A,
1823- int64_t lda,
1824- hipblasComplex* x,
1825- int64_t incx,
1826- hipblasComplex beta,
1827- hipblasComplex* y,
1828- int64_t incy)
1829+void ref_gbmv<std::complex<float>>(hipblasOperation_t transA,
1830+ int64_t m,
1831+ int64_t n,
1832+ int64_t kl,
1833+ int64_t ku,
1834+ std::complex<float> alpha,
1835+ std::complex<float>* A,
1836+ int64_t lda,
1837+ std::complex<float>* x,
1838+ int64_t incx,
1839+ std::complex<float> beta,
1840+ std::complex<float>* y,
1841+ int64_t incy)
1842 {
1843 cblas_cgbmv(CblasColMajor,
1844 (CBLAS_TRANSPOSE)transA,
1845@@ -1271,18 +1280,18 @@ void ref_gbmv<hipblasComplex>(hipblasOperation_t transA,
1846 }
1847
1848 template <>
1849-void ref_gbmv<hipblasDoubleComplex>(hipblasOperation_t transA,
1850+void ref_gbmv<std::complex<double>>(hipblasOperation_t transA,
1851 int64_t m,
1852 int64_t n,
1853 int64_t kl,
1854 int64_t ku,
1855- hipblasDoubleComplex alpha,
1856- hipblasDoubleComplex* A,
1857+ std::complex<double> alpha,
1858+ std::complex<double>* A,
1859 int64_t lda,
1860- hipblasDoubleComplex* x,
1861+ std::complex<double>* x,
1862 int64_t incx,
1863- hipblasDoubleComplex beta,
1864- hipblasDoubleComplex* y,
1865+ std::complex<double> beta,
1866+ std::complex<double>* y,
1867 int64_t incy)
1868 {
1869 cblas_zgbmv(CblasColMajor,
1870@@ -1337,33 +1346,33 @@ void ref_gemv<double>(hipblasOperation_t transA,
1871 }
1872
1873 template <>
1874-void ref_gemv<hipblasComplex>(hipblasOperation_t transA,
1875- int64_t m,
1876- int64_t n,
1877- hipblasComplex alpha,
1878- hipblasComplex* A,
1879- int64_t lda,
1880- hipblasComplex* x,
1881- int64_t incx,
1882- hipblasComplex beta,
1883- hipblasComplex* y,
1884- int64_t incy)
1885+void ref_gemv<std::complex<float>>(hipblasOperation_t transA,
1886+ int64_t m,
1887+ int64_t n,
1888+ std::complex<float> alpha,
1889+ std::complex<float>* A,
1890+ int64_t lda,
1891+ std::complex<float>* x,
1892+ int64_t incx,
1893+ std::complex<float> beta,
1894+ std::complex<float>* y,
1895+ int64_t incy)
1896 {
1897 cblas_cgemv(
1898 CblasColMajor, (CBLAS_TRANSPOSE)transA, m, n, &alpha, A, lda, x, incx, &beta, y, incy);
1899 }
1900
1901 template <>
1902-void ref_gemv<hipblasDoubleComplex>(hipblasOperation_t transA,
1903+void ref_gemv<std::complex<double>>(hipblasOperation_t transA,
1904 int64_t m,
1905 int64_t n,
1906- hipblasDoubleComplex alpha,
1907- hipblasDoubleComplex* A,
1908+ std::complex<double> alpha,
1909+ std::complex<double>* A,
1910 int64_t lda,
1911- hipblasDoubleComplex* x,
1912+ std::complex<double>* x,
1913 int64_t incx,
1914- hipblasDoubleComplex beta,
1915- hipblasDoubleComplex* y,
1916+ std::complex<double> beta,
1917+ std::complex<double>* y,
1918 int64_t incy)
1919 {
1920 cblas_zgemv(
1921@@ -1400,56 +1409,56 @@ void ref_ger<double, false>(int64_t m,
1922 }
1923
1924 template <>
1925-void ref_ger<hipblasComplex, false>(int64_t m,
1926- int64_t n,
1927- hipblasComplex alpha,
1928- hipblasComplex* x,
1929- int64_t incx,
1930- hipblasComplex* y,
1931- int64_t incy,
1932- hipblasComplex* A,
1933- int64_t lda)
1934+void ref_ger<std::complex<float>, false>(int64_t m,
1935+ int64_t n,
1936+ std::complex<float> alpha,
1937+ std::complex<float>* x,
1938+ int64_t incx,
1939+ std::complex<float>* y,
1940+ int64_t incy,
1941+ std::complex<float>* A,
1942+ int64_t lda)
1943 {
1944 cblas_cgeru(CblasColMajor, m, n, &alpha, x, incx, y, incy, A, lda);
1945 }
1946
1947 template <>
1948-void ref_ger<hipblasComplex, true>(int64_t m,
1949- int64_t n,
1950- hipblasComplex alpha,
1951- hipblasComplex* x,
1952- int64_t incx,
1953- hipblasComplex* y,
1954- int64_t incy,
1955- hipblasComplex* A,
1956- int64_t lda)
1957+void ref_ger<std::complex<float>, true>(int64_t m,
1958+ int64_t n,
1959+ std::complex<float> alpha,
1960+ std::complex<float>* x,
1961+ int64_t incx,
1962+ std::complex<float>* y,
1963+ int64_t incy,
1964+ std::complex<float>* A,
1965+ int64_t lda)
1966 {
1967 cblas_cgerc(CblasColMajor, m, n, &alpha, x, incx, y, incy, A, lda);
1968 }
1969
1970 template <>
1971-void ref_ger<hipblasDoubleComplex, false>(int64_t m,
1972+void ref_ger<std::complex<double>, false>(int64_t m,
1973 int64_t n,
1974- hipblasDoubleComplex alpha,
1975- hipblasDoubleComplex* x,
1976+ std::complex<double> alpha,
1977+ std::complex<double>* x,
1978 int64_t incx,
1979- hipblasDoubleComplex* y,
1980+ std::complex<double>* y,
1981 int64_t incy,
1982- hipblasDoubleComplex* A,
1983+ std::complex<double>* A,
1984 int64_t lda)
1985 {
1986 cblas_zgeru(CblasColMajor, m, n, &alpha, x, incx, y, incy, A, lda);
1987 }
1988
1989 template <>
1990-void ref_ger<hipblasDoubleComplex, true>(int64_t m,
1991+void ref_ger<std::complex<double>, true>(int64_t m,
1992 int64_t n,
1993- hipblasDoubleComplex alpha,
1994- hipblasDoubleComplex* x,
1995+ std::complex<double> alpha,
1996+ std::complex<double>* x,
1997 int64_t incx,
1998- hipblasDoubleComplex* y,
1999+ std::complex<double>* y,
2000 int64_t incy,
2001- hipblasDoubleComplex* A,
2002+ std::complex<double>* A,
2003 int64_t lda)
2004 {
2005 cblas_zgerc(CblasColMajor, m, n, &alpha, x, incx, y, incy, A, lda);
2006@@ -1457,32 +1466,32 @@ void ref_ger<hipblasDoubleComplex, true>(int64_t m,
2007
2008 // hbmv
2009 template <>
2010-void ref_hbmv<hipblasComplex>(hipblasFillMode_t uplo,
2011- int64_t n,
2012- int64_t k,
2013- hipblasComplex alpha,
2014- hipblasComplex* A,
2015- int64_t lda,
2016- hipblasComplex* x,
2017- int64_t incx,
2018- hipblasComplex beta,
2019- hipblasComplex* y,
2020- int64_t incy)
2021+void ref_hbmv<std::complex<float>>(hipblasFillMode_t uplo,
2022+ int64_t n,
2023+ int64_t k,
2024+ std::complex<float> alpha,
2025+ std::complex<float>* A,
2026+ int64_t lda,
2027+ std::complex<float>* x,
2028+ int64_t incx,
2029+ std::complex<float> beta,
2030+ std::complex<float>* y,
2031+ int64_t incy)
2032 {
2033 cblas_chbmv(CblasColMajor, (CBLAS_UPLO)uplo, n, k, &alpha, A, lda, x, incx, &beta, y, incy);
2034 }
2035
2036 template <>
2037-void ref_hbmv<hipblasDoubleComplex>(hipblasFillMode_t uplo,
2038+void ref_hbmv<std::complex<double>>(hipblasFillMode_t uplo,
2039 int64_t n,
2040 int64_t k,
2041- hipblasDoubleComplex alpha,
2042- hipblasDoubleComplex* A,
2043+ std::complex<double> alpha,
2044+ std::complex<double>* A,
2045 int64_t lda,
2046- hipblasDoubleComplex* x,
2047+ std::complex<double>* x,
2048 int64_t incx,
2049- hipblasDoubleComplex beta,
2050- hipblasDoubleComplex* y,
2051+ std::complex<double> beta,
2052+ std::complex<double>* y,
2053 int64_t incy)
2054 {
2055 cblas_zhbmv(CblasColMajor, (CBLAS_UPLO)uplo, n, k, &alpha, A, lda, x, incx, &beta, y, incy);
2056@@ -1490,30 +1499,30 @@ void ref_hbmv<hipblasDoubleComplex>(hipblasFillMode_t uplo,
2057
2058 // hemv
2059 template <>
2060-void ref_hemv<hipblasComplex>(hipblasFillMode_t uplo,
2061- int64_t n,
2062- hipblasComplex alpha,
2063- hipblasComplex* A,
2064- int64_t lda,
2065- hipblasComplex* x,
2066- int64_t incx,
2067- hipblasComplex beta,
2068- hipblasComplex* y,
2069- int64_t incy)
2070+void ref_hemv<std::complex<float>>(hipblasFillMode_t uplo,
2071+ int64_t n,
2072+ std::complex<float> alpha,
2073+ std::complex<float>* A,
2074+ int64_t lda,
2075+ std::complex<float>* x,
2076+ int64_t incx,
2077+ std::complex<float> beta,
2078+ std::complex<float>* y,
2079+ int64_t incy)
2080 {
2081 cblas_chemv(CblasColMajor, (CBLAS_UPLO)uplo, n, &alpha, A, lda, x, incx, &beta, y, incy);
2082 }
2083
2084 template <>
2085-void ref_hemv<hipblasDoubleComplex>(hipblasFillMode_t uplo,
2086+void ref_hemv<std::complex<double>>(hipblasFillMode_t uplo,
2087 int64_t n,
2088- hipblasDoubleComplex alpha,
2089- hipblasDoubleComplex* A,
2090+ std::complex<double> alpha,
2091+ std::complex<double>* A,
2092 int64_t lda,
2093- hipblasDoubleComplex* x,
2094+ std::complex<double>* x,
2095 int64_t incx,
2096- hipblasDoubleComplex beta,
2097- hipblasDoubleComplex* y,
2098+ std::complex<double> beta,
2099+ std::complex<double>* y,
2100 int64_t incy)
2101 {
2102 cblas_zhemv(CblasColMajor, (CBLAS_UPLO)uplo, n, &alpha, A, lda, x, incx, &beta, y, incy);
2103@@ -1521,24 +1530,24 @@ void ref_hemv<hipblasDoubleComplex>(hipblasFillMode_t uplo,
2104
2105 // her
2106 template <>
2107-void ref_her<hipblasComplex, float>(hipblasFillMode_t uplo,
2108- int64_t n,
2109- float alpha,
2110- hipblasComplex* x,
2111- int64_t incx,
2112- hipblasComplex* A,
2113- int64_t lda)
2114+void ref_her<std::complex<float>, float>(hipblasFillMode_t uplo,
2115+ int64_t n,
2116+ float alpha,
2117+ std::complex<float>* x,
2118+ int64_t incx,
2119+ std::complex<float>* A,
2120+ int64_t lda)
2121 {
2122 cblas_cher(CblasColMajor, (CBLAS_UPLO)uplo, n, alpha, x, incx, A, lda);
2123 }
2124
2125 template <>
2126-void ref_her<hipblasDoubleComplex, double>(hipblasFillMode_t uplo,
2127+void ref_her<std::complex<double>, double>(hipblasFillMode_t uplo,
2128 int64_t n,
2129 double alpha,
2130- hipblasDoubleComplex* x,
2131+ std::complex<double>* x,
2132 int64_t incx,
2133- hipblasDoubleComplex* A,
2134+ std::complex<double>* A,
2135 int64_t lda)
2136 {
2137 cblas_zher(CblasColMajor, (CBLAS_UPLO)uplo, n, alpha, x, incx, A, lda);
2138@@ -1546,28 +1555,28 @@ void ref_her<hipblasDoubleComplex, double>(hipblasFillMode_t uplo,
2139
2140 // her2
2141 template <>
2142-void ref_her2<hipblasComplex>(hipblasFillMode_t uplo,
2143- int64_t n,
2144- hipblasComplex alpha,
2145- hipblasComplex* x,
2146- int64_t incx,
2147- hipblasComplex* y,
2148- int64_t incy,
2149- hipblasComplex* A,
2150- int64_t lda)
2151+void ref_her2<std::complex<float>>(hipblasFillMode_t uplo,
2152+ int64_t n,
2153+ std::complex<float> alpha,
2154+ std::complex<float>* x,
2155+ int64_t incx,
2156+ std::complex<float>* y,
2157+ int64_t incy,
2158+ std::complex<float>* A,
2159+ int64_t lda)
2160 {
2161 cblas_cher2(CblasColMajor, (CBLAS_UPLO)uplo, n, &alpha, x, incx, y, incy, A, lda);
2162 }
2163
2164 template <>
2165-void ref_her2<hipblasDoubleComplex>(hipblasFillMode_t uplo,
2166+void ref_her2<std::complex<double>>(hipblasFillMode_t uplo,
2167 int64_t n,
2168- hipblasDoubleComplex alpha,
2169- hipblasDoubleComplex* x,
2170+ std::complex<double> alpha,
2171+ std::complex<double>* x,
2172 int64_t incx,
2173- hipblasDoubleComplex* y,
2174+ std::complex<double>* y,
2175 int64_t incy,
2176- hipblasDoubleComplex* A,
2177+ std::complex<double>* A,
2178 int64_t lda)
2179 {
2180 cblas_zher2(CblasColMajor, (CBLAS_UPLO)uplo, n, &alpha, x, incx, y, incy, A, lda);
2181@@ -1575,28 +1584,28 @@ void ref_her2<hipblasDoubleComplex>(hipblasFillMode_t uplo,
2182
2183 // hpmv
2184 template <>
2185-void ref_hpmv<hipblasComplex>(hipblasFillMode_t uplo,
2186- int64_t n,
2187- hipblasComplex alpha,
2188- hipblasComplex* AP,
2189- hipblasComplex* x,
2190- int64_t incx,
2191- hipblasComplex beta,
2192- hipblasComplex* y,
2193- int64_t incy)
2194+void ref_hpmv<std::complex<float>>(hipblasFillMode_t uplo,
2195+ int64_t n,
2196+ std::complex<float> alpha,
2197+ std::complex<float>* AP,
2198+ std::complex<float>* x,
2199+ int64_t incx,
2200+ std::complex<float> beta,
2201+ std::complex<float>* y,
2202+ int64_t incy)
2203 {
2204 cblas_chpmv(CblasColMajor, (CBLAS_UPLO)uplo, n, &alpha, AP, x, incx, &beta, y, incy);
2205 }
2206
2207 template <>
2208-void ref_hpmv<hipblasDoubleComplex>(hipblasFillMode_t uplo,
2209+void ref_hpmv<std::complex<double>>(hipblasFillMode_t uplo,
2210 int64_t n,
2211- hipblasDoubleComplex alpha,
2212- hipblasDoubleComplex* AP,
2213- hipblasDoubleComplex* x,
2214+ std::complex<double> alpha,
2215+ std::complex<double>* AP,
2216+ std::complex<double>* x,
2217 int64_t incx,
2218- hipblasDoubleComplex beta,
2219- hipblasDoubleComplex* y,
2220+ std::complex<double> beta,
2221+ std::complex<double>* y,
2222 int64_t incy)
2223 {
2224 cblas_zhpmv(CblasColMajor, (CBLAS_UPLO)uplo, n, &alpha, AP, x, incx, &beta, y, incy);
2225@@ -1604,12 +1613,12 @@ void ref_hpmv<hipblasDoubleComplex>(hipblasFillMode_t uplo,
2226
2227 // hpr
2228 template <>
2229-void ref_hpr(hipblasFillMode_t uplo,
2230- int64_t n,
2231- float alpha,
2232- hipblasComplex* x,
2233- int64_t incx,
2234- hipblasComplex* AP)
2235+void ref_hpr(hipblasFillMode_t uplo,
2236+ int64_t n,
2237+ float alpha,
2238+ std::complex<float>* x,
2239+ int64_t incx,
2240+ std::complex<float>* AP)
2241 {
2242 cblas_chpr(CblasColMajor, (CBLAS_UPLO)uplo, n, alpha, x, incx, AP);
2243 }
2244@@ -1618,23 +1627,23 @@ template <>
2245 void ref_hpr(hipblasFillMode_t uplo,
2246 int64_t n,
2247 double alpha,
2248- hipblasDoubleComplex* x,
2249+ std::complex<double>* x,
2250 int64_t incx,
2251- hipblasDoubleComplex* AP)
2252+ std::complex<double>* AP)
2253 {
2254 cblas_zhpr(CblasColMajor, (CBLAS_UPLO)uplo, n, alpha, x, incx, AP);
2255 }
2256
2257 // hpr2
2258 template <>
2259-void ref_hpr2(hipblasFillMode_t uplo,
2260- int64_t n,
2261- hipblasComplex alpha,
2262- hipblasComplex* x,
2263- int64_t incx,
2264- hipblasComplex* y,
2265- int64_t incy,
2266- hipblasComplex* AP)
2267+void ref_hpr2(hipblasFillMode_t uplo,
2268+ int64_t n,
2269+ std::complex<float> alpha,
2270+ std::complex<float>* x,
2271+ int64_t incx,
2272+ std::complex<float>* y,
2273+ int64_t incy,
2274+ std::complex<float>* AP)
2275 {
2276 cblas_chpr2(CblasColMajor, (CBLAS_UPLO)uplo, n, &alpha, x, incx, y, incy, AP);
2277 }
2278@@ -1642,12 +1651,12 @@ void ref_hpr2(hipblasFillMode_t uplo,
2279 template <>
2280 void ref_hpr2(hipblasFillMode_t uplo,
2281 int64_t n,
2282- hipblasDoubleComplex alpha,
2283- hipblasDoubleComplex* x,
2284+ std::complex<double> alpha,
2285+ std::complex<double>* x,
2286 int64_t incx,
2287- hipblasDoubleComplex* y,
2288+ std::complex<double>* y,
2289 int64_t incy,
2290- hipblasDoubleComplex* AP)
2291+ std::complex<double>* AP)
2292 {
2293 cblas_zhpr2(CblasColMajor, (CBLAS_UPLO)uplo, n, &alpha, x, incx, y, incy, AP);
2294 }
2295@@ -1728,12 +1737,12 @@ void ref_spr(hipblasFillMode_t uplo, int64_t n, double alpha, double* x, int64_t
2296 }
2297
2298 template <>
2299-void ref_spr(hipblasFillMode_t uplo,
2300- int64_t n,
2301- hipblasComplex alpha,
2302- hipblasComplex* x,
2303- int64_t incx,
2304- hipblasComplex* AP)
2305+void ref_spr(hipblasFillMode_t uplo,
2306+ int64_t n,
2307+ std::complex<float> alpha,
2308+ std::complex<float>* x,
2309+ int64_t incx,
2310+ std::complex<float>* AP)
2311 {
2312 lapack_xspr(uplo, n, alpha, x, incx, AP);
2313 }
2314@@ -1741,10 +1750,10 @@ void ref_spr(hipblasFillMode_t uplo,
2315 template <>
2316 void ref_spr(hipblasFillMode_t uplo,
2317 int64_t n,
2318- hipblasDoubleComplex alpha,
2319- hipblasDoubleComplex* x,
2320+ std::complex<double> alpha,
2321+ std::complex<double>* x,
2322 int64_t incx,
2323- hipblasDoubleComplex* AP)
2324+ std::complex<double>* AP)
2325 {
2326 lapack_xspr(uplo, n, alpha, x, incx, AP);
2327 }
2328@@ -1808,16 +1817,16 @@ void ref_symv(hipblasFillMode_t uplo,
2329 }
2330
2331 template <>
2332-void ref_symv(hipblasFillMode_t uplo,
2333- int64_t n,
2334- hipblasComplex alpha,
2335- hipblasComplex* A,
2336- int64_t lda,
2337- hipblasComplex* x,
2338- int64_t incx,
2339- hipblasComplex beta,
2340- hipblasComplex* y,
2341- int64_t incy)
2342+void ref_symv(hipblasFillMode_t uplo,
2343+ int64_t n,
2344+ std::complex<float> alpha,
2345+ std::complex<float>* A,
2346+ int64_t lda,
2347+ std::complex<float>* x,
2348+ int64_t incx,
2349+ std::complex<float> beta,
2350+ std::complex<float>* y,
2351+ int64_t incy)
2352 {
2353 lapack_xsymv(uplo, n, alpha, A, lda, x, incx, beta, y, incy);
2354 }
2355@@ -1825,13 +1834,13 @@ void ref_symv(hipblasFillMode_t uplo,
2356 template <>
2357 void ref_symv(hipblasFillMode_t uplo,
2358 int64_t n,
2359- hipblasDoubleComplex alpha,
2360- hipblasDoubleComplex* A,
2361+ std::complex<double> alpha,
2362+ std::complex<double>* A,
2363 int64_t lda,
2364- hipblasDoubleComplex* x,
2365+ std::complex<double>* x,
2366 int64_t incx,
2367- hipblasDoubleComplex beta,
2368- hipblasDoubleComplex* y,
2369+ std::complex<double> beta,
2370+ std::complex<double>* y,
2371 int64_t incy)
2372 {
2373 lapack_xsymv(uplo, n, alpha, A, lda, x, incx, beta, y, incy);
2374@@ -1858,13 +1867,13 @@ void ref_syr<double>(hipblasFillMode_t uplo,
2375 }
2376
2377 template <>
2378-void ref_syr(hipblasFillMode_t uplo,
2379- int64_t n,
2380- hipblasComplex alpha,
2381- hipblasComplex* xa,
2382- int64_t incx,
2383- hipblasComplex* A,
2384- int64_t lda)
2385+void ref_syr(hipblasFillMode_t uplo,
2386+ int64_t n,
2387+ std::complex<float> alpha,
2388+ std::complex<float>* xa,
2389+ int64_t incx,
2390+ std::complex<float>* A,
2391+ int64_t lda)
2392 {
2393 lapack_xsyr(uplo, n, alpha, xa, incx, A, lda);
2394 }
2395@@ -1872,10 +1881,10 @@ void ref_syr(hipblasFillMode_t uplo,
2396 template <>
2397 void ref_syr(hipblasFillMode_t uplo,
2398 int64_t n,
2399- hipblasDoubleComplex alpha,
2400- hipblasDoubleComplex* xa,
2401+ std::complex<double> alpha,
2402+ std::complex<double>* xa,
2403 int64_t incx,
2404- hipblasDoubleComplex* A,
2405+ std::complex<double>* A,
2406 int64_t lda)
2407 {
2408 lapack_xsyr(uplo, n, alpha, xa, incx, A, lda);
2409@@ -1912,15 +1921,15 @@ void ref_syr2(hipblasFillMode_t uplo,
2410 }
2411
2412 template <>
2413-void ref_syr2(hipblasFillMode_t uplo,
2414- int64_t n,
2415- hipblasComplex alpha,
2416- hipblasComplex* x,
2417- int64_t incx,
2418- hipblasComplex* y,
2419- int64_t incy,
2420- hipblasComplex* A,
2421- int64_t lda)
2422+void ref_syr2(hipblasFillMode_t uplo,
2423+ int64_t n,
2424+ std::complex<float> alpha,
2425+ std::complex<float>* x,
2426+ int64_t incx,
2427+ std::complex<float>* y,
2428+ int64_t incy,
2429+ std::complex<float>* A,
2430+ int64_t lda)
2431 {
2432 lapack_xsyr2(uplo, n, alpha, x, incx, y, incy, A, lda);
2433 }
2434@@ -1928,12 +1937,12 @@ void ref_syr2(hipblasFillMode_t uplo,
2435 template <>
2436 void ref_syr2(hipblasFillMode_t uplo,
2437 int64_t n,
2438- hipblasDoubleComplex alpha,
2439- hipblasDoubleComplex* x,
2440+ std::complex<double> alpha,
2441+ std::complex<double>* x,
2442 int64_t incx,
2443- hipblasDoubleComplex* y,
2444+ std::complex<double>* y,
2445 int64_t incy,
2446- hipblasDoubleComplex* A,
2447+ std::complex<double>* A,
2448 int64_t lda)
2449 {
2450 lapack_xsyr2(uplo, n, alpha, x, incx, y, incy, A, lda);
2451@@ -1987,15 +1996,15 @@ void ref_tbmv<double>(hipblasFillMode_t uplo,
2452 }
2453
2454 template <>
2455-void ref_tbmv<hipblasComplex>(hipblasFillMode_t uplo,
2456- hipblasOperation_t transA,
2457- hipblasDiagType_t diag,
2458- int64_t m,
2459- int64_t k,
2460- const hipblasComplex* A,
2461- int64_t lda,
2462- hipblasComplex* x,
2463- int64_t incx)
2464+void ref_tbmv<std::complex<float>>(hipblasFillMode_t uplo,
2465+ hipblasOperation_t transA,
2466+ hipblasDiagType_t diag,
2467+ int64_t m,
2468+ int64_t k,
2469+ const std::complex<float>* A,
2470+ int64_t lda,
2471+ std::complex<float>* x,
2472+ int64_t incx)
2473 {
2474 cblas_ctbmv(CblasColMajor,
2475 CBLAS_UPLO(uplo),
2476@@ -2010,14 +2019,14 @@ void ref_tbmv<hipblasComplex>(hipblasFillMode_t uplo,
2477 }
2478
2479 template <>
2480-void ref_tbmv<hipblasDoubleComplex>(hipblasFillMode_t uplo,
2481+void ref_tbmv<std::complex<double>>(hipblasFillMode_t uplo,
2482 hipblasOperation_t transA,
2483 hipblasDiagType_t diag,
2484 int64_t m,
2485 int64_t k,
2486- const hipblasDoubleComplex* A,
2487+ const std::complex<double>* A,
2488 int64_t lda,
2489- hipblasDoubleComplex* x,
2490+ std::complex<double>* x,
2491 int64_t incx)
2492 {
2493 cblas_ztbmv(CblasColMajor,
2494@@ -2080,15 +2089,15 @@ void ref_tbsv<double>(hipblasFillMode_t uplo,
2495 }
2496
2497 template <>
2498-void ref_tbsv<hipblasComplex>(hipblasFillMode_t uplo,
2499- hipblasOperation_t transA,
2500- hipblasDiagType_t diag,
2501- int64_t m,
2502- int64_t k,
2503- const hipblasComplex* A,
2504- int64_t lda,
2505- hipblasComplex* x,
2506- int64_t incx)
2507+void ref_tbsv<std::complex<float>>(hipblasFillMode_t uplo,
2508+ hipblasOperation_t transA,
2509+ hipblasDiagType_t diag,
2510+ int64_t m,
2511+ int64_t k,
2512+ const std::complex<float>* A,
2513+ int64_t lda,
2514+ std::complex<float>* x,
2515+ int64_t incx)
2516 {
2517 cblas_ctbsv(CblasColMajor,
2518 CBLAS_UPLO(uplo),
2519@@ -2103,14 +2112,14 @@ void ref_tbsv<hipblasComplex>(hipblasFillMode_t uplo,
2520 }
2521
2522 template <>
2523-void ref_tbsv<hipblasDoubleComplex>(hipblasFillMode_t uplo,
2524+void ref_tbsv<std::complex<double>>(hipblasFillMode_t uplo,
2525 hipblasOperation_t transA,
2526 hipblasDiagType_t diag,
2527 int64_t m,
2528 int64_t k,
2529- const hipblasDoubleComplex* A,
2530+ const std::complex<double>* A,
2531 int64_t lda,
2532- hipblasDoubleComplex* x,
2533+ std::complex<double>* x,
2534 int64_t incx)
2535 {
2536 cblas_ztbsv(CblasColMajor,
2537@@ -2153,13 +2162,13 @@ void ref_tpmv(hipblasFillMode_t uplo,
2538 }
2539
2540 template <>
2541-void ref_tpmv(hipblasFillMode_t uplo,
2542- hipblasOperation_t transA,
2543- hipblasDiagType_t diag,
2544- int64_t m,
2545- const hipblasComplex* A,
2546- hipblasComplex* x,
2547- int64_t incx)
2548+void ref_tpmv(hipblasFillMode_t uplo,
2549+ hipblasOperation_t transA,
2550+ hipblasDiagType_t diag,
2551+ int64_t m,
2552+ const std::complex<float>* A,
2553+ std::complex<float>* x,
2554+ int64_t incx)
2555 {
2556 cblas_ctpmv(
2557 CblasColMajor, CBLAS_UPLO(uplo), CBLAS_TRANSPOSE(transA), CBLAS_DIAG(diag), m, A, x, incx);
2558@@ -2170,8 +2179,8 @@ void ref_tpmv(hipblasFillMode_t uplo,
2559 hipblasOperation_t transA,
2560 hipblasDiagType_t diag,
2561 int64_t m,
2562- const hipblasDoubleComplex* A,
2563- hipblasDoubleComplex* x,
2564+ const std::complex<double>* A,
2565+ std::complex<double>* x,
2566 int64_t incx)
2567 {
2568 cblas_ztpmv(
2569@@ -2206,13 +2215,13 @@ void ref_tpsv(hipblasFillMode_t uplo,
2570 }
2571
2572 template <>
2573-void ref_tpsv(hipblasFillMode_t uplo,
2574- hipblasOperation_t transA,
2575- hipblasDiagType_t diag,
2576- int64_t n,
2577- const hipblasComplex* AP,
2578- hipblasComplex* x,
2579- int64_t incx)
2580+void ref_tpsv(hipblasFillMode_t uplo,
2581+ hipblasOperation_t transA,
2582+ hipblasDiagType_t diag,
2583+ int64_t n,
2584+ const std::complex<float>* AP,
2585+ std::complex<float>* x,
2586+ int64_t incx)
2587 {
2588 cblas_ctpsv(
2589 CblasColMajor, CBLAS_UPLO(uplo), CBLAS_TRANSPOSE(transA), CBLAS_DIAG(diag), n, AP, x, incx);
2590@@ -2223,8 +2232,8 @@ void ref_tpsv(hipblasFillMode_t uplo,
2591 hipblasOperation_t transA,
2592 hipblasDiagType_t diag,
2593 int64_t n,
2594- const hipblasDoubleComplex* AP,
2595- hipblasDoubleComplex* x,
2596+ const std::complex<double>* AP,
2597+ std::complex<double>* x,
2598 int64_t incx)
2599 {
2600 cblas_ztpsv(
2601@@ -2275,14 +2284,14 @@ void ref_trmv<double>(hipblasFillMode_t uplo,
2602 }
2603
2604 template <>
2605-void ref_trmv<hipblasComplex>(hipblasFillMode_t uplo,
2606- hipblasOperation_t transA,
2607- hipblasDiagType_t diag,
2608- int64_t m,
2609- const hipblasComplex* A,
2610- int64_t lda,
2611- hipblasComplex* x,
2612- int64_t incx)
2613+void ref_trmv<std::complex<float>>(hipblasFillMode_t uplo,
2614+ hipblasOperation_t transA,
2615+ hipblasDiagType_t diag,
2616+ int64_t m,
2617+ const std::complex<float>* A,
2618+ int64_t lda,
2619+ std::complex<float>* x,
2620+ int64_t incx)
2621 {
2622 cblas_ctrmv(CblasColMajor,
2623 CBLAS_UPLO(uplo),
2624@@ -2296,13 +2305,13 @@ void ref_trmv<hipblasComplex>(hipblasFillMode_t uplo,
2625 }
2626
2627 template <>
2628-void ref_trmv<hipblasDoubleComplex>(hipblasFillMode_t uplo,
2629+void ref_trmv<std::complex<double>>(hipblasFillMode_t uplo,
2630 hipblasOperation_t transA,
2631 hipblasDiagType_t diag,
2632 int64_t m,
2633- const hipblasDoubleComplex* A,
2634+ const std::complex<double>* A,
2635 int64_t lda,
2636- hipblasDoubleComplex* x,
2637+ std::complex<double>* x,
2638 int64_t incx)
2639 {
2640 cblas_ztrmv(CblasColMajor,
2641@@ -2362,15 +2371,15 @@ void ref_trsv<double>(hipblasHandle_t handle,
2642 }
2643
2644 template <>
2645-void ref_trsv<hipblasComplex>(hipblasHandle_t handle,
2646- hipblasFillMode_t uplo,
2647- hipblasOperation_t transA,
2648- hipblasDiagType_t diag,
2649- int64_t m,
2650- const hipblasComplex* A,
2651- int64_t lda,
2652- hipblasComplex* x,
2653- int64_t incx)
2654+void ref_trsv<std::complex<float>>(hipblasHandle_t handle,
2655+ hipblasFillMode_t uplo,
2656+ hipblasOperation_t transA,
2657+ hipblasDiagType_t diag,
2658+ int64_t m,
2659+ const std::complex<float>* A,
2660+ int64_t lda,
2661+ std::complex<float>* x,
2662+ int64_t incx)
2663 {
2664 cblas_ctrsv(CblasColMajor,
2665 CBLAS_UPLO(uplo),
2666@@ -2384,14 +2393,14 @@ void ref_trsv<hipblasComplex>(hipblasHandle_t handle,
2667 }
2668
2669 template <>
2670-void ref_trsv<hipblasDoubleComplex>(hipblasHandle_t handle,
2671+void ref_trsv<std::complex<double>>(hipblasHandle_t handle,
2672 hipblasFillMode_t uplo,
2673 hipblasOperation_t transA,
2674 hipblasDiagType_t diag,
2675 int64_t m,
2676- const hipblasDoubleComplex* A,
2677+ const std::complex<double>* A,
2678 int64_t lda,
2679- hipblasDoubleComplex* x,
2680+ std::complex<double>* x,
2681 int64_t incx)
2682 {
2683 cblas_ztrsv(CblasColMajor,
2684@@ -2436,10 +2445,13 @@ void ref_geam_helper(hipblasOperation_t transA,
2685 {
2686 T a_val = A[i * inc1_A + j * inc2_A];
2687 T b_val = B[i * inc1_B + j * inc2_B];
2688- if(transA == HIPBLAS_OP_C)
2689- a_val = std::conj(a_val);
2690- if(transB == HIPBLAS_OP_C)
2691- b_val = std::conj(b_val);
2692+ if constexpr(is_complex<T>)
2693+ {
2694+ if(transA == HIPBLAS_OP_C)
2695+ a_val = std::conj(a_val);
2696+ if(transB == HIPBLAS_OP_C)
2697+ b_val = std::conj(b_val);
2698+ }
2699 C[i + j * ldc] = alpha * a_val + beta * b_val;
2700 }
2701 }
2702@@ -2503,15 +2515,15 @@ void ref_dgmm(hipblasSideMode_t side,
2703 }
2704
2705 template <>
2706-void ref_dgmm(hipblasSideMode_t side,
2707- int64_t M,
2708- int64_t N,
2709- const hipblasComplex* A,
2710- int64_t lda,
2711- const hipblasComplex* x,
2712- int64_t incx,
2713- hipblasComplex* C,
2714- int64_t ldc)
2715+void ref_dgmm(hipblasSideMode_t side,
2716+ int64_t M,
2717+ int64_t N,
2718+ const std::complex<float>* A,
2719+ int64_t lda,
2720+ const std::complex<float>* x,
2721+ int64_t incx,
2722+ std::complex<float>* C,
2723+ int64_t ldc)
2724 {
2725 ref_dgmm_helper(side, M, N, A, lda, x, incx, C, ldc);
2726 }
2727@@ -2520,11 +2532,11 @@ template <>
2728 void ref_dgmm(hipblasSideMode_t side,
2729 int64_t M,
2730 int64_t N,
2731- const hipblasDoubleComplex* A,
2732+ const std::complex<double>* A,
2733 int64_t lda,
2734- const hipblasDoubleComplex* x,
2735+ const std::complex<double>* x,
2736 int64_t incx,
2737- hipblasDoubleComplex* C,
2738+ std::complex<double>* C,
2739 int64_t ldc)
2740 {
2741 ref_dgmm_helper(side, M, N, A, lda, x, incx, C, ldc);
2742@@ -2566,18 +2578,18 @@ void ref_geam(hipblasOperation_t transa,
2743 }
2744
2745 template <>
2746-void ref_geam(hipblasOperation_t transa,
2747- hipblasOperation_t transb,
2748- int64_t m,
2749- int64_t n,
2750- hipblasComplex* alpha,
2751- hipblasComplex* A,
2752- int64_t lda,
2753- hipblasComplex* beta,
2754- hipblasComplex* B,
2755- int64_t ldb,
2756- hipblasComplex* C,
2757- int64_t ldc)
2758+void ref_geam(hipblasOperation_t transa,
2759+ hipblasOperation_t transb,
2760+ int64_t m,
2761+ int64_t n,
2762+ std::complex<float>* alpha,
2763+ std::complex<float>* A,
2764+ int64_t lda,
2765+ std::complex<float>* beta,
2766+ std::complex<float>* B,
2767+ int64_t ldb,
2768+ std::complex<float>* C,
2769+ int64_t ldc)
2770 {
2771 return ref_geam_helper(transa, transb, m, n, *alpha, A, lda, *beta, B, ldb, C, ldc);
2772 }
2773@@ -2587,13 +2599,13 @@ void ref_geam(hipblasOperation_t transa,
2774 hipblasOperation_t transb,
2775 int64_t m,
2776 int64_t n,
2777- hipblasDoubleComplex* alpha,
2778- hipblasDoubleComplex* A,
2779+ std::complex<double>* alpha,
2780+ std::complex<double>* A,
2781 int64_t lda,
2782- hipblasDoubleComplex* beta,
2783- hipblasDoubleComplex* B,
2784+ std::complex<double>* beta,
2785+ std::complex<double>* B,
2786 int64_t ldb,
2787- hipblasDoubleComplex* C,
2788+ std::complex<double>* C,
2789 int64_t ldc)
2790 {
2791 return ref_geam_helper(transa, transb, m, n, *alpha, A, lda, *beta, B, ldb, C, ldc);
2792@@ -2955,19 +2967,19 @@ void ref_gemm<double>(hipblasOperation_t transA,
2793 }
2794
2795 template <>
2796-void ref_gemm<hipblasComplex>(hipblasOperation_t transA,
2797- hipblasOperation_t transB,
2798- int64_t m,
2799- int64_t n,
2800- int64_t k,
2801- hipblasComplex alpha,
2802- hipblasComplex* A,
2803- int64_t lda,
2804- hipblasComplex* B,
2805- int64_t ldb,
2806- hipblasComplex beta,
2807- hipblasComplex* C,
2808- int64_t ldc)
2809+void ref_gemm<std::complex<float>>(hipblasOperation_t transA,
2810+ hipblasOperation_t transB,
2811+ int64_t m,
2812+ int64_t n,
2813+ int64_t k,
2814+ std::complex<float> alpha,
2815+ std::complex<float>* A,
2816+ int64_t lda,
2817+ std::complex<float>* B,
2818+ int64_t ldb,
2819+ std::complex<float> beta,
2820+ std::complex<float>* C,
2821+ int64_t ldc)
2822 {
2823 //just directly cast, since transA, transB are integers in the enum
2824 cblas_cgemm(CblasColMajor,
2825@@ -2987,18 +2999,18 @@ void ref_gemm<hipblasComplex>(hipblasOperation_t transA,
2826 }
2827
2828 template <>
2829-void ref_gemm<hipblasDoubleComplex>(hipblasOperation_t transA,
2830+void ref_gemm<std::complex<double>>(hipblasOperation_t transA,
2831 hipblasOperation_t transB,
2832 int64_t m,
2833 int64_t n,
2834 int64_t k,
2835- hipblasDoubleComplex alpha,
2836- hipblasDoubleComplex* A,
2837+ std::complex<double> alpha,
2838+ std::complex<double>* A,
2839 int64_t lda,
2840- hipblasDoubleComplex* B,
2841+ std::complex<double>* B,
2842 int64_t ldb,
2843- hipblasDoubleComplex beta,
2844- hipblasDoubleComplex* C,
2845+ std::complex<double> beta,
2846+ std::complex<double>* C,
2847 int64_t ldc)
2848 {
2849 cblas_zgemm(CblasColMajor,
2850@@ -3077,18 +3089,18 @@ void ref_gemm<int8_t, int32_t, int32_t>(hipblasOperation_t transA,
2851
2852 // hemm
2853 template <>
2854-void ref_hemm(hipblasSideMode_t side,
2855- hipblasFillMode_t uplo,
2856- int64_t m,
2857- int64_t n,
2858- hipblasComplex alpha,
2859- hipblasComplex* A,
2860- int64_t lda,
2861- hipblasComplex* B,
2862- int64_t ldb,
2863- hipblasComplex beta,
2864- hipblasComplex* C,
2865- int64_t ldc)
2866+void ref_hemm(hipblasSideMode_t side,
2867+ hipblasFillMode_t uplo,
2868+ int64_t m,
2869+ int64_t n,
2870+ std::complex<float> alpha,
2871+ std::complex<float>* A,
2872+ int64_t lda,
2873+ std::complex<float>* B,
2874+ int64_t ldb,
2875+ std::complex<float> beta,
2876+ std::complex<float>* C,
2877+ int64_t ldc)
2878 {
2879 cblas_chemm(CblasColMajor,
2880 (CBLAS_SIDE)side,
2881@@ -3110,13 +3122,13 @@ void ref_hemm(hipblasSideMode_t side,
2882 hipblasFillMode_t uplo,
2883 int64_t m,
2884 int64_t n,
2885- hipblasDoubleComplex alpha,
2886- hipblasDoubleComplex* A,
2887+ std::complex<double> alpha,
2888+ std::complex<double>* A,
2889 int64_t lda,
2890- hipblasDoubleComplex* B,
2891+ std::complex<double>* B,
2892 int64_t ldb,
2893- hipblasDoubleComplex beta,
2894- hipblasDoubleComplex* C,
2895+ std::complex<double> beta,
2896+ std::complex<double>* C,
2897 int64_t ldc)
2898 {
2899 cblas_zhemm(CblasColMajor,
2900@@ -3136,16 +3148,16 @@ void ref_hemm(hipblasSideMode_t side,
2901
2902 // herk
2903 template <>
2904-void ref_herk(hipblasFillMode_t uplo,
2905- hipblasOperation_t transA,
2906- int64_t n,
2907- int64_t k,
2908- float alpha,
2909- hipblasComplex* A,
2910- int64_t lda,
2911- float beta,
2912- hipblasComplex* C,
2913- int64_t ldc)
2914+void ref_herk(hipblasFillMode_t uplo,
2915+ hipblasOperation_t transA,
2916+ int64_t n,
2917+ int64_t k,
2918+ float alpha,
2919+ std::complex<float>* A,
2920+ int64_t lda,
2921+ float beta,
2922+ std::complex<float>* C,
2923+ int64_t ldc)
2924 {
2925 cblas_cherk(CblasColMajor,
2926 (CBLAS_UPLO)uplo,
2927@@ -3166,10 +3178,10 @@ void ref_herk(hipblasFillMode_t uplo,
2928 int64_t n,
2929 int64_t k,
2930 double alpha,
2931- hipblasDoubleComplex* A,
2932+ std::complex<double>* A,
2933 int64_t lda,
2934 double beta,
2935- hipblasDoubleComplex* C,
2936+ std::complex<double>* C,
2937 int64_t ldc)
2938 {
2939 cblas_zherk(CblasColMajor,
2940@@ -3270,18 +3282,18 @@ void ref_herkx_local(hipblasFillMode_t uplo,
2941 }
2942
2943 template <>
2944-void ref_herkx(hipblasFillMode_t uplo,
2945- hipblasOperation_t transA,
2946- int64_t n,
2947- int64_t k,
2948- hipblasComplex alpha,
2949- hipblasComplex* A,
2950- int64_t lda,
2951- hipblasComplex* B,
2952- int64_t ldb,
2953- float beta,
2954- hipblasComplex* C,
2955- int64_t ldc)
2956+void ref_herkx(hipblasFillMode_t uplo,
2957+ hipblasOperation_t transA,
2958+ int64_t n,
2959+ int64_t k,
2960+ std::complex<float> alpha,
2961+ std::complex<float>* A,
2962+ int64_t lda,
2963+ std::complex<float>* B,
2964+ int64_t ldb,
2965+ float beta,
2966+ std::complex<float>* C,
2967+ int64_t ldc)
2968 {
2969 ref_herkx_local(uplo, transA, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
2970 }
2971@@ -3291,13 +3303,13 @@ void ref_herkx(hipblasFillMode_t uplo,
2972 hipblasOperation_t transA,
2973 int64_t n,
2974 int64_t k,
2975- hipblasDoubleComplex alpha,
2976- hipblasDoubleComplex* A,
2977+ std::complex<double> alpha,
2978+ std::complex<double>* A,
2979 int64_t lda,
2980- hipblasDoubleComplex* B,
2981+ std::complex<double>* B,
2982 int64_t ldb,
2983 double beta,
2984- hipblasDoubleComplex* C,
2985+ std::complex<double>* C,
2986 int64_t ldc)
2987 {
2988 ref_herkx_local(uplo, transA, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
2989@@ -3305,18 +3317,18 @@ void ref_herkx(hipblasFillMode_t uplo,
2990
2991 // her2k
2992 template <>
2993-void ref_her2k(hipblasFillMode_t uplo,
2994- hipblasOperation_t transA,
2995- int64_t n,
2996- int64_t k,
2997- hipblasComplex alpha,
2998- hipblasComplex* A,
2999- int64_t lda,
3000- hipblasComplex* B,
3001- int64_t ldb,
3002- float beta,
3003- hipblasComplex* C,
3004- int64_t ldc)
3005+void ref_her2k(hipblasFillMode_t uplo,
3006+ hipblasOperation_t transA,
3007+ int64_t n,
3008+ int64_t k,
3009+ std::complex<float> alpha,
3010+ std::complex<float>* A,
3011+ int64_t lda,
3012+ std::complex<float>* B,
3013+ int64_t ldb,
3014+ float beta,
3015+ std::complex<float>* C,
3016+ int64_t ldc)
3017 {
3018 cblas_cher2k(CblasColMajor,
3019 (CBLAS_UPLO)uplo,
3020@@ -3338,13 +3350,13 @@ void ref_her2k(hipblasFillMode_t uplo,
3021 hipblasOperation_t transA,
3022 int64_t n,
3023 int64_t k,
3024- hipblasDoubleComplex alpha,
3025- hipblasDoubleComplex* A,
3026+ std::complex<double> alpha,
3027+ std::complex<double>* A,
3028 int64_t lda,
3029- hipblasDoubleComplex* B,
3030+ std::complex<double>* B,
3031 int64_t ldb,
3032 double beta,
3033- hipblasDoubleComplex* C,
3034+ std::complex<double>* C,
3035 int64_t ldc)
3036 {
3037 cblas_zher2k(CblasColMajor,
3038@@ -3422,18 +3434,18 @@ void ref_symm(hipblasSideMode_t side,
3039 }
3040
3041 template <>
3042-void ref_symm(hipblasSideMode_t side,
3043- hipblasFillMode_t uplo,
3044- int64_t m,
3045- int64_t n,
3046- hipblasComplex alpha,
3047- hipblasComplex* A,
3048- int64_t lda,
3049- hipblasComplex* B,
3050- int64_t ldb,
3051- hipblasComplex beta,
3052- hipblasComplex* C,
3053- int64_t ldc)
3054+void ref_symm(hipblasSideMode_t side,
3055+ hipblasFillMode_t uplo,
3056+ int64_t m,
3057+ int64_t n,
3058+ std::complex<float> alpha,
3059+ std::complex<float>* A,
3060+ int64_t lda,
3061+ std::complex<float>* B,
3062+ int64_t ldb,
3063+ std::complex<float> beta,
3064+ std::complex<float>* C,
3065+ int64_t ldc)
3066 {
3067 cblas_csymm(CblasColMajor,
3068 (CBLAS_SIDE)side,
3069@@ -3455,13 +3467,13 @@ void ref_symm(hipblasSideMode_t side,
3070 hipblasFillMode_t uplo,
3071 int64_t m,
3072 int64_t n,
3073- hipblasDoubleComplex alpha,
3074- hipblasDoubleComplex* A,
3075+ std::complex<double> alpha,
3076+ std::complex<double>* A,
3077 int64_t lda,
3078- hipblasDoubleComplex* B,
3079+ std::complex<double>* B,
3080 int64_t ldb,
3081- hipblasDoubleComplex beta,
3082- hipblasDoubleComplex* C,
3083+ std::complex<double> beta,
3084+ std::complex<double>* C,
3085 int64_t ldc)
3086 {
3087 cblas_zsymm(CblasColMajor,
3088@@ -3531,16 +3543,16 @@ void ref_syrk(hipblasFillMode_t uplo,
3089 }
3090
3091 template <>
3092-void ref_syrk(hipblasFillMode_t uplo,
3093- hipblasOperation_t transA,
3094- int64_t n,
3095- int64_t k,
3096- hipblasComplex alpha,
3097- hipblasComplex* A,
3098- int64_t lda,
3099- hipblasComplex beta,
3100- hipblasComplex* C,
3101- int64_t ldc)
3102+void ref_syrk(hipblasFillMode_t uplo,
3103+ hipblasOperation_t transA,
3104+ int64_t n,
3105+ int64_t k,
3106+ std::complex<float> alpha,
3107+ std::complex<float>* A,
3108+ int64_t lda,
3109+ std::complex<float> beta,
3110+ std::complex<float>* C,
3111+ int64_t ldc)
3112 {
3113 cblas_csyrk(CblasColMajor,
3114 (CBLAS_UPLO)uplo,
3115@@ -3560,11 +3572,11 @@ void ref_syrk(hipblasFillMode_t uplo,
3116 hipblasOperation_t transA,
3117 int64_t n,
3118 int64_t k,
3119- hipblasDoubleComplex alpha,
3120- hipblasDoubleComplex* A,
3121+ std::complex<double> alpha,
3122+ std::complex<double>* A,
3123 int64_t lda,
3124- hipblasDoubleComplex beta,
3125- hipblasDoubleComplex* C,
3126+ std::complex<double> beta,
3127+ std::complex<double>* C,
3128 int64_t ldc)
3129 {
3130 cblas_zsyrk(CblasColMajor,
3131@@ -3640,18 +3652,18 @@ void ref_syr2k(hipblasFillMode_t uplo,
3132 }
3133
3134 template <>
3135-void ref_syr2k(hipblasFillMode_t uplo,
3136- hipblasOperation_t transA,
3137- int64_t n,
3138- int64_t k,
3139- hipblasComplex alpha,
3140- hipblasComplex* A,
3141- int64_t lda,
3142- hipblasComplex* B,
3143- int64_t ldb,
3144- hipblasComplex beta,
3145- hipblasComplex* C,
3146- int64_t ldc)
3147+void ref_syr2k(hipblasFillMode_t uplo,
3148+ hipblasOperation_t transA,
3149+ int64_t n,
3150+ int64_t k,
3151+ std::complex<float> alpha,
3152+ std::complex<float>* A,
3153+ int64_t lda,
3154+ std::complex<float>* B,
3155+ int64_t ldb,
3156+ std::complex<float> beta,
3157+ std::complex<float>* C,
3158+ int64_t ldc)
3159 {
3160 cblas_csyr2k(CblasColMajor,
3161 (CBLAS_UPLO)uplo,
3162@@ -3673,13 +3685,13 @@ void ref_syr2k(hipblasFillMode_t uplo,
3163 hipblasOperation_t transA,
3164 int64_t n,
3165 int64_t k,
3166- hipblasDoubleComplex alpha,
3167- hipblasDoubleComplex* A,
3168+ std::complex<double> alpha,
3169+ std::complex<double>* A,
3170 int64_t lda,
3171- hipblasDoubleComplex* B,
3172+ std::complex<double>* B,
3173 int64_t ldb,
3174- hipblasDoubleComplex beta,
3175- hipblasDoubleComplex* C,
3176+ std::complex<double> beta,
3177+ std::complex<double>* C,
3178 int64_t ldc)
3179 {
3180 cblas_zsyr2k(CblasColMajor,
3181@@ -3759,16 +3771,16 @@ void ref_trsm<double>(hipblasSideMode_t side,
3182 }
3183
3184 template <>
3185-void ref_trsm<hipblasComplex>(hipblasSideMode_t side,
3186+void ref_trsm<std::complex<float>>(hipblasSideMode_t side,
3187 hipblasFillMode_t uplo,
3188 hipblasOperation_t transA,
3189 hipblasDiagType_t diag,
3190 int64_t m,
3191 int64_t n,
3192- hipblasComplex alpha,
3193- const hipblasComplex* A,
3194+ std::complex<float> alpha,
3195+ const std::complex<float>* A,
3196 int64_t lda,
3197- hipblasComplex* B,
3198+ std::complex<float>* B,
3199 int64_t ldb)
3200 {
3201 cblas_ctrsm(CblasColMajor,
3202@@ -3786,16 +3798,16 @@ void ref_trsm<hipblasComplex>(hipblasSideMode_t side,
3203 }
3204
3205 template <>
3206-void ref_trsm<hipblasDoubleComplex>(hipblasSideMode_t side,
3207+void ref_trsm<std::complex<double>>(hipblasSideMode_t side,
3208 hipblasFillMode_t uplo,
3209 hipblasOperation_t transA,
3210 hipblasDiagType_t diag,
3211 int64_t m,
3212 int64_t n,
3213- hipblasDoubleComplex alpha,
3214- const hipblasDoubleComplex* A,
3215+ std::complex<double> alpha,
3216+ const std::complex<double>* A,
3217 int64_t lda,
3218- hipblasDoubleComplex* B,
3219+ std::complex<double>* B,
3220 int64_t ldb)
3221 {
3222 cblas_ztrsm(CblasColMajor,
3223@@ -3828,14 +3840,15 @@ void ref_trtri<double>(char uplo, char diag, int64_t n, double* A, int64_t lda)
3224 }
3225
3226 template <>
3227-void ref_trtri<hipblasComplex>(char uplo, char diag, int64_t n, hipblasComplex* A, int64_t lda)
3228+void ref_trtri<std::complex<float>>(
3229+ char uplo, char diag, int64_t n, std::complex<float>* A, int64_t lda)
3230 {
3231 lapack_xtrtri(uplo, diag, n, A, lda);
3232 }
3233
3234 template <>
3235-void ref_trtri<hipblasDoubleComplex>(
3236- char uplo, char diag, int64_t n, hipblasDoubleComplex* A, int64_t lda)
3237+void ref_trtri<std::complex<double>>(
3238+ char uplo, char diag, int64_t n, std::complex<double>* A, int64_t lda)
3239 {
3240 lapack_xtrtri(uplo, diag, n, A, lda);
3241 }
3242@@ -3898,17 +3911,17 @@ void ref_trmm<double>(hipblasSideMode_t side,
3243 }
3244
3245 template <>
3246-void ref_trmm<hipblasComplex>(hipblasSideMode_t side,
3247- hipblasFillMode_t uplo,
3248- hipblasOperation_t transA,
3249- hipblasDiagType_t diag,
3250- int64_t m,
3251- int64_t n,
3252- hipblasComplex alpha,
3253- const hipblasComplex* A,
3254- int64_t lda,
3255- hipblasComplex* B,
3256- int64_t ldb)
3257+void ref_trmm<std::complex<float>>(hipblasSideMode_t side,
3258+ hipblasFillMode_t uplo,
3259+ hipblasOperation_t transA,
3260+ hipblasDiagType_t diag,
3261+ int64_t m,
3262+ int64_t n,
3263+ std::complex<float> alpha,
3264+ const std::complex<float>* A,
3265+ int64_t lda,
3266+ std::complex<float>* B,
3267+ int64_t ldb)
3268 {
3269 cblas_ctrmm(CblasColMajor,
3270 (CBLAS_SIDE)side,
3271@@ -3925,16 +3938,16 @@ void ref_trmm<hipblasComplex>(hipblasSideMode_t side,
3272 }
3273
3274 template <>
3275-void ref_trmm<hipblasDoubleComplex>(hipblasSideMode_t side,
3276+void ref_trmm<std::complex<double>>(hipblasSideMode_t side,
3277 hipblasFillMode_t uplo,
3278 hipblasOperation_t transA,
3279 hipblasDiagType_t diag,
3280 int64_t m,
3281 int64_t n,
3282- hipblasDoubleComplex alpha,
3283- const hipblasDoubleComplex* A,
3284+ std::complex<double> alpha,
3285+ const std::complex<double>* A,
3286 int64_t lda,
3287- hipblasDoubleComplex* B,
3288+ std::complex<double>* B,
3289 int64_t ldb)
3290 {
3291 cblas_ztrmm(CblasColMajor,
3292@@ -3989,7 +4002,7 @@ int64_t ref_potrf(char uplo, int64_t m, double* A, int64_t lda)
3293 }
3294
3295 template <>
3296-int64_t ref_potrf(char uplo, int64_t m, hipblasComplex* A, int64_t lda)
3297+int64_t ref_potrf(char uplo, int64_t m, std::complex<float>* A, int64_t lda)
3298 {
3299 int64_t info;
3300
3301@@ -4003,7 +4016,7 @@ int64_t ref_potrf(char uplo, int64_t m, hipblasComplex* A, int64_t lda)
3302 }
3303
3304 template <>
3305-int64_t ref_potrf(char uplo, int64_t m, hipblasDoubleComplex* A, int64_t lda)
3306+int64_t ref_potrf(char uplo, int64_t m, std::complex<double>* A, int64_t lda)
3307 {
3308 int64_t info;
3309
3310@@ -4046,30 +4059,30 @@ int64_t ref_getrf<double>(int64_t m, int64_t n, double* A, int64_t lda, int64_t*
3311 }
3312
3313 template <>
3314-int64_t
3315- ref_getrf<hipblasComplex>(int64_t m, int64_t n, hipblasComplex* A, int64_t lda, int64_t* ipiv)
3316+int64_t ref_getrf<std::complex<float>>(
3317+ int64_t m, int64_t n, std::complex<float>* A, int64_t lda, int64_t* ipiv)
3318 {
3319 int64_t info;
3320
3321 #ifdef FLA_ENABLE_ILP64
3322 info = LAPACKE_cgetrf(LAPACK_COL_MAJOR, m, n, (lapack_complex_float*)A, lda, ipiv);
3323 #else
3324- cgetrf_(&m, &n, (hipblasComplex*)A, &lda, ipiv, &info);
3325+ cgetrf_(&m, &n, (std::complex<float>*)A, &lda, ipiv, &info);
3326 #endif
3327
3328 return info;
3329 }
3330
3331 template <>
3332-int64_t ref_getrf<hipblasDoubleComplex>(
3333- int64_t m, int64_t n, hipblasDoubleComplex* A, int64_t lda, int64_t* ipiv)
3334+int64_t ref_getrf<std::complex<double>>(
3335+ int64_t m, int64_t n, std::complex<double>* A, int64_t lda, int64_t* ipiv)
3336 {
3337 int64_t info;
3338
3339 #ifdef FLA_ENABLE_ILP64
3340 info = LAPACKE_zgetrf(LAPACK_COL_MAJOR, m, n, (lapack_complex_double*)A, lda, ipiv);
3341 #else
3342- zgetrf_(&m, &n, (hipblasDoubleComplex*)A, &lda, ipiv, &info);
3343+ zgetrf_(&m, &n, (std::complex<double>*)A, &lda, ipiv, &info);
3344 #endif
3345
3346 return info;
3347@@ -4119,14 +4132,14 @@ int64_t ref_getrs<double>(char trans,
3348 }
3349
3350 template <>
3351-int64_t ref_getrs<hipblasComplex>(char trans,
3352- int64_t n,
3353- int64_t nrhs,
3354- hipblasComplex* A,
3355- int64_t lda,
3356- int64_t* ipiv,
3357- hipblasComplex* B,
3358- int64_t ldb)
3359+int64_t ref_getrs<std::complex<float>>(char trans,
3360+ int64_t n,
3361+ int64_t nrhs,
3362+ std::complex<float>* A,
3363+ int64_t lda,
3364+ int64_t* ipiv,
3365+ std::complex<float>* B,
3366+ int64_t ldb)
3367 {
3368 int64_t info;
3369
3370@@ -4141,20 +4154,28 @@ int64_t ref_getrs<hipblasComplex>(char trans,
3371 (lapack_complex_float*)B,
3372 ldb);
3373 #else
3374- cgetrs_(&trans, &n, &nrhs, (hipblasComplex*)A, &lda, ipiv, (hipblasComplex*)B, &ldb, &info);
3375+ cgetrs_(&trans,
3376+ &n,
3377+ &nrhs,
3378+ (std::complex<float>*)A,
3379+ &lda,
3380+ ipiv,
3381+ (std::complex<float>*)B,
3382+ &ldb,
3383+ &info);
3384 #endif
3385
3386 return info;
3387 }
3388
3389 template <>
3390-int64_t ref_getrs<hipblasDoubleComplex>(char trans,
3391+int64_t ref_getrs<std::complex<double>>(char trans,
3392 int64_t n,
3393 int64_t nrhs,
3394- hipblasDoubleComplex* A,
3395+ std::complex<double>* A,
3396 int64_t lda,
3397 int64_t* ipiv,
3398- hipblasDoubleComplex* B,
3399+ std::complex<double>* B,
3400 int64_t ldb)
3401 {
3402
3403@@ -4174,10 +4195,10 @@ int64_t ref_getrs<hipblasDoubleComplex>(char trans,
3404 zgetrs_(&trans,
3405 &n,
3406 &nrhs,
3407- (hipblasDoubleComplex*)A,
3408+ (std::complex<double>*)A,
3409 &lda,
3410 ipiv,
3411- (hipblasDoubleComplex*)B,
3412+ (std::complex<double>*)B,
3413 &ldb,
3414 &info);
3415 #endif
3416@@ -4217,8 +4238,12 @@ int64_t
3417 }
3418
3419 template <>
3420-int64_t ref_getri<hipblasComplex>(
3421- int64_t n, hipblasComplex* A, int64_t lda, int64_t* ipiv, hipblasComplex* work, int64_t lwork)
3422+int64_t ref_getri<std::complex<float>>(int64_t n,
3423+ std::complex<float>* A,
3424+ int64_t lda,
3425+ int64_t* ipiv,
3426+ std::complex<float>* work,
3427+ int64_t lwork)
3428 {
3429 int64_t info;
3430
3431@@ -4238,11 +4263,11 @@ int64_t ref_getri<hipblasComplex>(
3432 }
3433
3434 template <>
3435-int64_t ref_getri<hipblasDoubleComplex>(int64_t n,
3436- hipblasDoubleComplex* A,
3437+int64_t ref_getri<std::complex<double>>(int64_t n,
3438+ std::complex<double>* A,
3439 int64_t lda,
3440 int64_t* ipiv,
3441- hipblasDoubleComplex* work,
3442+ std::complex<double>* work,
3443 int64_t lwork)
3444 {
3445 int64_t info;
3446@@ -4293,13 +4318,13 @@ int64_t ref_geqrf<double>(
3447 return info;
3448 }
3449 template <>
3450-int64_t ref_geqrf<hipblasComplex>(int64_t m,
3451- int64_t n,
3452- hipblasComplex* A,
3453- int64_t lda,
3454- hipblasComplex* tau,
3455- hipblasComplex* work,
3456- int64_t lwork)
3457+int64_t ref_geqrf<std::complex<float>>(int64_t m,
3458+ int64_t n,
3459+ std::complex<float>* A,
3460+ int64_t lda,
3461+ std::complex<float>* tau,
3462+ std::complex<float>* work,
3463+ int64_t lwork)
3464 {
3465 int64_t info;
3466
3467@@ -4320,12 +4345,12 @@ int64_t ref_geqrf<hipblasComplex>(int64_t m,
3468 }
3469
3470 template <>
3471-int64_t ref_geqrf<hipblasDoubleComplex>(int64_t m,
3472+int64_t ref_geqrf<std::complex<double>>(int64_t m,
3473 int64_t n,
3474- hipblasDoubleComplex* A,
3475+ std::complex<double>* A,
3476 int64_t lda,
3477- hipblasDoubleComplex* tau,
3478- hipblasDoubleComplex* work,
3479+ std::complex<double>* tau,
3480+ std::complex<double>* work,
3481 int64_t lwork)
3482 {
3483 int64_t info;
3484@@ -4394,16 +4419,16 @@ int64_t ref_gels<double>(char trans,
3485 }
3486
3487 template <>
3488-int64_t ref_gels<hipblasComplex>(char trans,
3489- int64_t m,
3490- int64_t n,
3491- int64_t nrhs,
3492- hipblasComplex* A,
3493- int64_t lda,
3494- hipblasComplex* B,
3495- int64_t ldb,
3496- hipblasComplex* work,
3497- int64_t lwork)
3498+int64_t ref_gels<std::complex<float>>(char trans,
3499+ int64_t m,
3500+ int64_t n,
3501+ int64_t nrhs,
3502+ std::complex<float>* A,
3503+ int64_t lda,
3504+ std::complex<float>* B,
3505+ int64_t ldb,
3506+ std::complex<float>* work,
3507+ int64_t lwork)
3508 {
3509 int64_t info;
3510 #ifdef FLA_ENABLE_ILP64
3511@@ -4426,15 +4451,15 @@ int64_t ref_gels<hipblasComplex>(char trans,
3512 }
3513
3514 template <>
3515-int64_t ref_gels<hipblasDoubleComplex>(char trans,
3516+int64_t ref_gels<std::complex<double>>(char trans,
3517 int64_t m,
3518 int64_t n,
3519 int64_t nrhs,
3520- hipblasDoubleComplex* A,
3521+ std::complex<double>* A,
3522 int64_t lda,
3523- hipblasDoubleComplex* B,
3524+ std::complex<double>* B,
3525 int64_t ldb,
3526- hipblasDoubleComplex* work,
3527+ std::complex<double>* work,
3528 int64_t lwork)
3529 {
3530 int64_t info;
3531diff --git a/clients/common/clients_common.cpp b/clients/common/clients_common.cpp
3532index bc84584..e62556a 100644
3533--- a/clients/common/clients_common.cpp
3534+++ b/clients/common/clients_common.cpp
3535@@ -1,5 +1,5 @@
3536 /* ************************************************************************
3537- * Copyright (C) 2016-2023 Advanced Micro Devices, Inc. All rights reserved.
3538+ * Copyright (C) 2016-2025 Advanced Micro Devices, Inc. All rights reserved.
3539 *
3540 * Permission is hereby granted, free of charge, to any person obtaining a copy
3541 * of this software and associated documentation files (the "Software"), to deal
3542@@ -265,7 +265,7 @@ void run_function(const func_map& map, const Arguments& arg, const std::string&
3543 auto match = map.find(arg.function);
3544 if(match == map.end())
3545 throw std::invalid_argument("Invalid combination --function "s + arg.function
3546- + " --a_type "s + hipblas_datatype2string(arg.a_type) + msg);
3547+ + " --a_type "s + hip_datatype2string(arg.a_type) + msg);
3548 match->second(arg);
3549 }
3550
3551@@ -743,11 +743,10 @@ struct perf_blas<T, U, std::enable_if_t<std::is_same<T, hipblasHalf>{}>> : hipbl
3552 };
3553
3554 template <typename T, typename U>
3555-struct perf_blas<
3556- T,
3557- U,
3558- std::enable_if_t<std::is_same<T, hipblasDoubleComplex>{} || std::is_same<T, hipblasComplex>{}>>
3559- : hipblas_test_valid
3560+struct perf_blas<T,
3561+ U,
3562+ std::enable_if_t<std::is_same<T, std::complex<double>>{}
3563+ || std::is_same<T, std::complex<float>>{}>> : hipblas_test_valid
3564 {
3565 void operator()(const Arguments& arg)
3566 {
3567@@ -946,10 +945,12 @@ struct perf_blas_axpy_ex<
3568 hipblasHalf> && std::is_same_v<Ta, Tx> && std::is_same_v<Tx, Ty> && std::is_same_v<Ty, Tex>)
3569 || (std::is_same_v<
3570 Ta,
3571- hipblasComplex> && std::is_same_v<Ta, Tx> && std::is_same_v<Tx, Ty> && std::is_same_v<Ty, Tex>)
3572+ std::complex<
3573+ float>> && std::is_same_v<Ta, Tx> && std::is_same_v<Tx, Ty> && std::is_same_v<Ty, Tex>)
3574 || (std::is_same_v<
3575 Ta,
3576- hipblasDoubleComplex> && std::is_same_v<Ta, Tx> && std::is_same_v<Tx, Ty> && std::is_same_v<Ty, Tex>)
3577+ std::complex<
3578+ double>> && std::is_same_v<Ta, Tx> && std::is_same_v<Tx, Ty> && std::is_same_v<Ty, Tex>)
3579 || (std::is_same_v<
3580 Ta,
3581 hipblasHalf> && std::is_same_v<Ta, Tx> && std::is_same_v<Tx, Ty> && std::is_same_v<Tex, float>)
3582@@ -992,9 +993,9 @@ struct perf_blas_dot_ex<
3583 && std::is_same<Ty, Tr>{} && std::is_same<Tr, Tex>{})
3584 || (std::is_same<Tx, hipblasHalf>{} && std::is_same<Tx, Ty>{}
3585 && std::is_same<Ty, Tr>{} && std::is_same<Tr, Tex>{})
3586- || (std::is_same<Tx, hipblasComplex>{} && std::is_same<Tx, Ty>{}
3587+ || (std::is_same<Tx, std::complex<float>>{} && std::is_same<Tx, Ty>{}
3588 && std::is_same<Ty, Tr>{} && std::is_same<Tr, Tex>{})
3589- || (std::is_same<Tx, hipblasDoubleComplex>{} && std::is_same<Tx, Ty>{}
3590+ || (std::is_same<Tx, std::complex<double>>{} && std::is_same<Tx, Ty>{}
3591 && std::is_same<Ty, Tr>{} && std::is_same<Tr, Tex>{})
3592 || (std::is_same<Tx, hipblasHalf>{} && std::is_same<Tx, Ty>{}
3593 && std::is_same<Ty, Tr>{} && std::is_same<Tex, float>{})
3594@@ -1029,9 +1030,9 @@ struct perf_blas_nrm2_ex<
3595 std::enable_if_t<
3596 (std::is_same<Tx, float>{} && std::is_same<Tx, Tr>{} && std::is_same<Tr, Tex>{})
3597 || (std::is_same<Tx, double>{} && std::is_same<Tx, Tr>{} && std::is_same<Tr, Tex>{})
3598- || (std::is_same<Tx, hipblasComplex>{} && std::is_same<Tr, float>{}
3599+ || (std::is_same<Tx, std::complex<float>>{} && std::is_same<Tr, float>{}
3600 && std::is_same<Tr, Tex>{})
3601- || (std::is_same<Tx, hipblasDoubleComplex>{} && std::is_same<Tr, double>{}
3602+ || (std::is_same<Tx, std::complex<double>>{} && std::is_same<Tr, double>{}
3603 && std::is_same<Tr, Tex>{})
3604 || (std::is_same<Tx, hipblasHalf>{} && std::is_same<Tr, Tx>{} && std::is_same<Tex, float>{})
3605 || (std::is_same<Tx, hipblasBfloat16>{} && std::is_same<Tr, Tx>{}
3606@@ -1063,15 +1064,15 @@ struct perf_blas_rot_ex<
3607 && std::is_same<Tcs, Tex>{})
3608 || (std::is_same<Tx, double>{} && std::is_same<Ty, Tx>{}
3609 && std::is_same<Ty, Tcs>{} && std::is_same<Tex, Tcs>{})
3610- || (std::is_same<Tx, hipblasComplex>{} && std::is_same<Ty, Tx>{}
3611+ || (std::is_same<Tx, std::complex<float>>{} && std::is_same<Ty, Tx>{}
3612 && std::is_same<Tcs, Ty>{} && std::is_same<Tcs, Tex>{})
3613- || (std::is_same<Tx, hipblasDoubleComplex>{} && std::is_same<Tx, Ty>{}
3614+ || (std::is_same<Tx, std::complex<double>>{} && std::is_same<Tx, Ty>{}
3615 && std::is_same<Tcs, Ty>{} && std::is_same<Tex, Tcs>{})
3616- || (std::is_same<Tx, hipblasComplex>{} && std::is_same<Ty, Tx>{}
3617- && std::is_same<Tcs, float>{} && std::is_same<Tex, hipblasComplex>{})
3618- || (std::is_same<Tx, hipblasDoubleComplex>{} && std::is_same<Tx, Ty>{}
3619+ || (std::is_same<Tx, std::complex<float>>{} && std::is_same<Ty, Tx>{}
3620+ && std::is_same<Tcs, float>{} && std::is_same<Tex, std::complex<float>>{})
3621+ || (std::is_same<Tx, std::complex<double>>{} && std::is_same<Tx, Ty>{}
3622 && std::is_same<Tcs, double>{}
3623- && std::is_same<Tex, hipblasDoubleComplex>{})
3624+ && std::is_same<Tex, std::complex<double>>{})
3625 || (std::is_same<Tx, hipblasHalf>{} && std::is_same<Ty, Tx>{}
3626 && std::is_same<Tcs, Ty>{} && std::is_same<Tex, float>{})
3627 || (std::is_same<Tx, hipblasBfloat16>{} && std::is_same<Ty, Tx>{}
3628@@ -1102,13 +1103,13 @@ struct perf_blas_rot<
3629 std::enable_if_t<(std::is_same<Ti, float>{} && std::is_same<Ti, To>{} && std::is_same<To, Tc>{})
3630 || (std::is_same<Ti, double>{} && std::is_same<Ti, To>{}
3631 && std::is_same<To, Tc>{})
3632- || (std::is_same<Ti, hipblasComplex>{} && std::is_same<To, float>{}
3633- && std::is_same<Tc, hipblasComplex>{})
3634- || (std::is_same<Ti, hipblasComplex>{} && std::is_same<To, float>{}
3635+ || (std::is_same<Ti, std::complex<float>>{} && std::is_same<To, float>{}
3636+ && std::is_same<Tc, std::complex<float>>{})
3637+ || (std::is_same<Ti, std::complex<float>>{} && std::is_same<To, float>{}
3638 && std::is_same<Tc, float>{})
3639- || (std::is_same<Ti, hipblasDoubleComplex>{} && std::is_same<To, double>{}
3640- && std::is_same<Tc, hipblasDoubleComplex>{})
3641- || (std::is_same<Ti, hipblasDoubleComplex>{} && std::is_same<To, double>{}
3642+ || (std::is_same<Ti, std::complex<double>>{} && std::is_same<To, double>{}
3643+ && std::is_same<Tc, std::complex<double>>{})
3644+ || (std::is_same<Ti, std::complex<double>>{} && std::is_same<To, double>{}
3645 && std::is_same<Tc, double>{})>> : hipblas_test_valid
3646 {
3647 void operator()(const Arguments& arg)
3648@@ -1131,12 +1132,12 @@ template <typename Ta, typename Tb>
3649 struct perf_blas_scal<
3650 Ta,
3651 Tb,
3652- std::enable_if_t<(std::is_same<Ta, double>{} && std::is_same<Tb, hipblasDoubleComplex>{})
3653- || (std::is_same<Ta, float>{} && std::is_same<Tb, hipblasComplex>{})
3654+ std::enable_if_t<(std::is_same<Ta, double>{} && std::is_same<Tb, std::complex<double>>{})
3655+ || (std::is_same<Ta, float>{} && std::is_same<Tb, std::complex<float>>{})
3656 || (std::is_same<Ta, Tb>{} && std::is_same<Ta, float>{})
3657 || (std::is_same<Ta, Tb>{} && std::is_same<Ta, double>{})
3658- || (std::is_same<Ta, Tb>{} && std::is_same<Ta, hipblasComplex>{})
3659- || (std::is_same<Ta, Tb>{} && std::is_same<Ta, hipblasDoubleComplex>{})>>
3660+ || (std::is_same<Ta, Tb>{} && std::is_same<Ta, std::complex<float>>{})
3661+ || (std::is_same<Ta, Tb>{} && std::is_same<Ta, std::complex<double>>{})>>
3662 : hipblas_test_valid
3663 {
3664 void operator()(const Arguments& arg)
3665@@ -1164,14 +1165,15 @@ struct perf_blas_scal_ex<
3666 (std::is_same<Ta, float>{} && std::is_same<Ta, Tx>{} && std::is_same<Tx, Tex>{})
3667 || (std::is_same<Ta, double>{} && std::is_same<Ta, Tx>{} && std::is_same<Tx, Tex>{})
3668 || (std::is_same<Ta, hipblasHalf>{} && std::is_same<Ta, Tx>{} && std::is_same<Tx, Tex>{})
3669- || (std::is_same<Ta, hipblasComplex>{} && std::is_same<Ta, Tx>{} && std::is_same<Tx, Tex>{})
3670- || (std::is_same<Ta, hipblasDoubleComplex>{} && std::is_same<Ta, Tx>{}
3671+ || (std::is_same<Ta, std::complex<float>>{} && std::is_same<Ta, Tx>{}
3672+ && std::is_same<Tx, Tex>{})
3673+ || (std::is_same<Ta, std::complex<double>>{} && std::is_same<Ta, Tx>{}
3674 && std::is_same<Tx, Tex>{})
3675 || (std::is_same<Ta, hipblasHalf>{} && std::is_same<Ta, Tx>{} && std::is_same<Tex, float>{})
3676 || (std::is_same<Ta, float>{} && std::is_same<Tx, hipblasHalf>{} && std::is_same<Ta, Tex>{})
3677- || (std::is_same<Ta, float>{} && std::is_same<Tx, hipblasComplex>{}
3678+ || (std::is_same<Ta, float>{} && std::is_same<Tx, std::complex<float>>{}
3679 && std::is_same<Tx, Tex>{})
3680- || (std::is_same<Ta, double>{} && std::is_same<Tx, hipblasDoubleComplex>{}
3681+ || (std::is_same<Ta, double>{} && std::is_same<Tx, std::complex<double>>{}
3682 && std::is_same<Tx, Tex>{})
3683 || (std::is_same<Ta, hipblasBfloat16>{} && std::is_same<Ta, Tx>{}
3684 && std::is_same<Tex, float>{})
3685diff --git a/clients/common/hipblas_datatype2string.cpp b/clients/common/hipblas_datatype2string.cpp
3686index 098ec70..42b0af9 100644
3687--- a/clients/common/hipblas_datatype2string.cpp
3688+++ b/clients/common/hipblas_datatype2string.cpp
3689@@ -1,5 +1,5 @@
3690 /* ************************************************************************
3691- * Copyright (C) 2016-2023 Advanced Micro Devices, Inc. All rights reserved.
3692+ * Copyright (C) 2016-2025 Advanced Micro Devices, Inc. All rights reserved.
3693 *
3694 * Permission is hereby granted, free of charge, to any person obtaining a copy
3695 * of this software and associated documentation files (the "Software"), to deal
3696@@ -165,28 +165,30 @@ hipblasSideMode_t char2hipblas_side(char value)
3697 }
3698
3699 // clang-format off
3700-hipblasDatatype_t string2hipblas_datatype(const std::string& value)
3701+/*! \brief Convert string to a hipDataType. Returns HIP_R_32F if invalid string. */
3702+hipDataType string2hip_datatype(const std::string& value)
3703 {
3704 return
3705- value == "f16_r" || value == "h" ? HIPBLAS_R_16F :
3706- value == "f32_r" || value == "s" ? HIPBLAS_R_32F :
3707- value == "f64_r" || value == "d" ? HIPBLAS_R_64F :
3708- value == "bf16_r" ? HIPBLAS_R_16B :
3709- value == "f16_c" ? HIPBLAS_C_16B :
3710- value == "f32_c" || value == "c" ? HIPBLAS_C_32F :
3711- value == "f64_c" || value == "z" ? HIPBLAS_C_64F :
3712- value == "bf16_c" ? HIPBLAS_C_16B :
3713- value == "i8_r" ? HIPBLAS_R_8I :
3714- value == "i32_r" ? HIPBLAS_R_32I :
3715- value == "i8_c" ? HIPBLAS_C_8I :
3716- value == "i32_c" ? HIPBLAS_C_32I :
3717- value == "u8_r" ? HIPBLAS_R_8U :
3718- value == "u32_r" ? HIPBLAS_R_32U :
3719- value == "u8_c" ? HIPBLAS_C_8U :
3720- value == "u32_c" ? HIPBLAS_C_32U :
3721- HIPBLAS_DATATYPE_INVALID;
3722+ value == "f16_r" || value == "h" ? HIP_R_16F :
3723+ value == "f32_r" || value == "s" ? HIP_R_32F :
3724+ value == "f64_r" || value == "d" ? HIP_R_64F :
3725+ value == "bf16_r" ? HIP_R_16BF :
3726+ value == "f16_c" ? HIP_C_16BF :
3727+ value == "f32_c" || value == "c" ? HIP_C_32F :
3728+ value == "f64_c" || value == "z" ? HIP_C_64F :
3729+ value == "bf16_c" ? HIP_C_16BF :
3730+ value == "i8_r" ? HIP_R_8I :
3731+ value == "i32_r" ? HIP_R_32I :
3732+ value == "i8_c" ? HIP_C_8I :
3733+ value == "i32_c" ? HIP_C_32I :
3734+ value == "u8_r" ? HIP_R_8U :
3735+ value == "u32_r" ? HIP_R_32U :
3736+ value == "u8_c" ? HIP_C_8U :
3737+ value == "u32_c" ? HIP_C_32U :
3738+ HIP_R_32F;
3739 }
3740
3741+/*! \brief Convert string to a hipblasComputeType_t. Returns HIPBLAS_COMPUTE_32F if invalid string. */
3742 hipblasComputeType_t string2hipblas_computetype(const std::string& value)
3743 {
3744 return value == "c16f" ? HIPBLAS_COMPUTE_16F :
3745diff --git a/clients/common/hipblas_gentest.py b/clients/common/hipblas_gentest.py
3746index 5e9143a..4f572e0 100755
3747--- a/clients/common/hipblas_gentest.py
3748+++ b/clients/common/hipblas_gentest.py
3749@@ -1,5 +1,5 @@
3750 #!/usr/bin/env python3
3751-"""Copyright (C) 2018-2023 Advanced Micro Devices, Inc. All rights reserved.
3752+"""Copyright (C) 2018-2025 Advanced Micro Devices, Inc. All rights reserved.
3753
3754 Permission is hereby granted, free of charge, to any person obtaining a copy
3755 of this software and associated documentation files (the "Software"), to deal
3756@@ -114,9 +114,6 @@ Expand hipBLAS YAML test data file into binary Arguments records
3757 default=[])
3758 parser.add_argument('-t', '--template',
3759 type=argparse.FileType('r'))
3760- parser.add_argument('--hipblas_v2',
3761- action='store_true',
3762- help="Uses HIPBLAS_V2 datatypes, ensure HIPBLAS_V2 is defined in your build when using this.")
3763 return parser.parse_args()
3764
3765
3766@@ -185,7 +182,7 @@ def get_datatypes(doc):
3767 for name, decl in declaration.items():
3768 if isinstance(decl, dict):
3769 # Create derived class type based on bases and attr entries
3770- decl_attr = decl.get('attr_v2') if args.get('hipblas_v2') else decl.get('attr')
3771+ decl_attr = decl.get('attr')
3772 dt[name] = type(name,
3773 tuple([eval(t, dt)
3774 for t in decl.get('bases') or ()
3775diff --git a/clients/common/hipblas_parse_data.cpp b/clients/common/hipblas_parse_data.cpp
3776index f2450d7..4b72b2a 100644
3777--- a/clients/common/hipblas_parse_data.cpp
3778+++ b/clients/common/hipblas_parse_data.cpp
3779@@ -1,5 +1,5 @@
3780 /* ************************************************************************
3781- * Copyright (C) 2019-2023 Advanced Micro Devices, Inc. All rights reserved.
3782+ * Copyright (C) 2019-2025 Advanced Micro Devices, Inc. All rights reserved.
3783 *
3784 * Permission is hereby granted, free of charge, to any person obtaining a copy
3785 * of this software and associated documentation files (the "Software"), to deal
3786@@ -35,15 +35,19 @@
3787 // Parse YAML data
3788 static std::string hipblas_parse_yaml(const std::string& yaml)
3789 {
3790+#ifdef WIN32
3791+ // Explicitly run via `python.exe`, without relying on the .py file being
3792+ // treated as an executable that should be run via the python interpreter.
3793+ std::string python_command_launcher = "python ";
3794+#else
3795+ // Rely on the shebang in the file, e.g. `#!/usr/bin/env python3`.
3796+ std::string python_command_launcher = "";
3797+#endif
3798+
3799 std::string tmp = hipblas_tempname();
3800 auto exepath = hipblas_exepath();
3801-#ifdef HIPBLAS_V2
3802- auto cmd = exepath + "hipblas_gentest.py --hipblas_v2 --template " + exepath
3803+ auto cmd = python_command_launcher + exepath + "hipblas_gentest.py --template " + exepath
3804 + "hipblas_template.yaml -o " + tmp + " " + yaml;
3805-#else
3806- auto cmd = exepath + "hipblas_gentest.py --template " + exepath + "hipblas_template.yaml -o "
3807- + tmp + " " + yaml;
3808-#endif
3809 std::cerr << cmd << std::endl;
3810
3811 #ifdef WIN32
3812@@ -51,7 +55,7 @@ static std::string hipblas_parse_yaml(const std::string& yaml)
3813 if(status == -1)
3814 exit(EXIT_FAILURE);
3815 #else
3816- int status = system(cmd.c_str());
3817+ int status = system(cmd.c_str());
3818 if(status == -1 || !WIFEXITED(status) || WEXITSTATUS(status))
3819 exit(EXIT_FAILURE);
3820 #endif
3821diff --git a/clients/common/hipblas_template_specialization.cpp b/clients/common/hipblas_template_specialization.cpp
3822deleted file mode 100644
3823index b073cdc..0000000
3824--- a/clients/common/hipblas_template_specialization.cpp
3825+++ /dev/null
3826@@ -1,14310 +0,0 @@
3827-/* ************************************************************************
3828- * Copyright (C) 2016-2024 Advanced Micro Devices, Inc. All rights reserved.
3829- *
3830- * Permission is hereby granted, free of charge, to any person obtaining a copy
3831- * of this software and associated documentation files (the "Software"), to deal
3832- * in the Software without restriction, including without limitation the rights
3833- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
3834- * copies of the Software, and to permit persons to whom the Software is
3835- * furnished to do so, subject to the following conditions:
3836- *
3837- * The above copyright notice and this permission notice shall be included in
3838- * all copies or substantial portions of the Software.
3839- *
3840- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
3841- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
3842- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
3843- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
3844- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
3845- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
3846- * SOFTWARE.
3847- *
3848- *
3849- * ************************************************************************/
3850-
3851-#include "hipblas.h"
3852-#include "hipblas.hpp"
3853-
3854-#ifndef WIN32
3855-#include "hipblas_fortran.hpp"
3856-#else
3857-#include "hipblas_no_fortran.hpp"
3858-#endif
3859-
3860-#include <typeinfo>
3861-
3862-// This file's purpose is now only for casting hipblasComplex -> hipComplex when necessary.
3863-// When we finish transitioning to hipComplex, this file can be deleted.
3864-
3865-/*
3866- * ===========================================================================
3867- * level 1 BLAS
3868- * ===========================================================================
3869- */
3870-
3871-#ifdef HIPBLAS_V2
3872-// axpy
3873-hipblasStatus_t hipblasCaxpyCast(hipblasHandle_t handle,
3874- int n,
3875- const hipblasComplex* alpha,
3876- const hipblasComplex* x,
3877- int incx,
3878- hipblasComplex* y,
3879- int incy)
3880-{
3881- return hipblasCaxpy(
3882- handle, n, (const hipComplex*)alpha, (const hipComplex*)x, incx, (hipComplex*)y, incy);
3883-}
3884-
3885-hipblasStatus_t hipblasZaxpyCast(hipblasHandle_t handle,
3886- int n,
3887- const hipblasDoubleComplex* alpha,
3888- const hipblasDoubleComplex* x,
3889- int incx,
3890- hipblasDoubleComplex* y,
3891- int incy)
3892-{
3893- return hipblasZaxpy(handle,
3894- n,
3895- (const hipDoubleComplex*)alpha,
3896- (const hipDoubleComplex*)x,
3897- incx,
3898- (hipDoubleComplex*)y,
3899- incy);
3900-}
3901-
3902-hipblasStatus_t hipblasCaxpyCast_64(hipblasHandle_t handle,
3903- int64_t n,
3904- const hipblasComplex* alpha,
3905- const hipblasComplex* x,
3906- int64_t incx,
3907- hipblasComplex* y,
3908- int64_t incy)
3909-{
3910- return hipblasCaxpy_64(
3911- handle, n, (const hipComplex*)alpha, (const hipComplex*)x, incx, (hipComplex*)y, incy);
3912-}
3913-
3914-hipblasStatus_t hipblasZaxpyCast_64(hipblasHandle_t handle,
3915- int64_t n,
3916- const hipblasDoubleComplex* alpha,
3917- const hipblasDoubleComplex* x,
3918- int64_t incx,
3919- hipblasDoubleComplex* y,
3920- int64_t incy)
3921-{
3922- return hipblasZaxpy_64(handle,
3923- n,
3924- (const hipDoubleComplex*)alpha,
3925- (const hipDoubleComplex*)x,
3926- incx,
3927- (hipDoubleComplex*)y,
3928- incy);
3929-}
3930-
3931-// axpy_batched
3932-hipblasStatus_t hipblasCaxpyBatchedCast(hipblasHandle_t handle,
3933- int n,
3934- const hipblasComplex* alpha,
3935- const hipblasComplex* const x[],
3936- int incx,
3937- hipblasComplex* const y[],
3938- int incy,
3939- int batch_count)
3940-{
3941- return hipblasCaxpyBatched(handle,
3942- n,
3943- (const hipComplex*)alpha,
3944- (const hipComplex* const*)x,
3945- incx,
3946- (hipComplex* const*)y,
3947- incy,
3948- batch_count);
3949-}
3950-
3951-hipblasStatus_t hipblasZaxpyBatchedCast(hipblasHandle_t handle,
3952- int n,
3953- const hipblasDoubleComplex* alpha,
3954- const hipblasDoubleComplex* const x[],
3955- int incx,
3956- hipblasDoubleComplex* const y[],
3957- int incy,
3958- int batch_count)
3959-{
3960- return hipblasZaxpyBatched(handle,
3961- n,
3962- (const hipDoubleComplex*)alpha,
3963- (const hipDoubleComplex* const*)x,
3964- incx,
3965- (hipDoubleComplex* const*)y,
3966- incy,
3967- batch_count);
3968-}
3969-
3970-hipblasStatus_t hipblasCaxpyBatchedCast_64(hipblasHandle_t handle,
3971- int64_t n,
3972- const hipblasComplex* alpha,
3973- const hipblasComplex* const x[],
3974- int64_t incx,
3975- hipblasComplex* const y[],
3976- int64_t incy,
3977- int64_t batch_count)
3978-{
3979- return hipblasCaxpyBatched_64(handle,
3980- n,
3981- (const hipComplex*)alpha,
3982- (const hipComplex* const*)x,
3983- incx,
3984- (hipComplex* const*)y,
3985- incy,
3986- batch_count);
3987-}
3988-
3989-hipblasStatus_t hipblasZaxpyBatchedCast_64(hipblasHandle_t handle,
3990- int64_t n,
3991- const hipblasDoubleComplex* alpha,
3992- const hipblasDoubleComplex* const x[],
3993- int64_t incx,
3994- hipblasDoubleComplex* const y[],
3995- int64_t incy,
3996- int64_t batch_count)
3997-{
3998- return hipblasZaxpyBatched_64(handle,
3999- n,
4000- (const hipDoubleComplex*)alpha,
4001- (const hipDoubleComplex* const*)x,
4002- incx,
4003- (hipDoubleComplex* const*)y,
4004- incy,
4005- batch_count);
4006-}
4007-
4008-// axpy_strided_batched
4009-hipblasStatus_t hipblasCaxpyStridedBatchedCast(hipblasHandle_t handle,
4010- int n,
4011- const hipblasComplex* alpha,
4012- const hipblasComplex* x,
4013- int incx,
4014- hipblasStride stridex,
4015- hipblasComplex* y,
4016- int incy,
4017- hipblasStride stridey,
4018- int batch_count)
4019-{
4020- return hipblasCaxpyStridedBatched(handle,
4021- n,
4022- (const hipComplex*)alpha,
4023- (const hipComplex*)x,
4024- incx,
4025- stridex,
4026- (hipComplex*)y,
4027- incy,
4028- stridey,
4029- batch_count);
4030-}
4031-
4032-hipblasStatus_t hipblasZaxpyStridedBatchedCast(hipblasHandle_t handle,
4033- int n,
4034- const hipblasDoubleComplex* alpha,
4035- const hipblasDoubleComplex* x,
4036- int incx,
4037- hipblasStride stridex,
4038- hipblasDoubleComplex* y,
4039- int incy,
4040- hipblasStride stridey,
4041- int batch_count)
4042-{
4043- return hipblasZaxpyStridedBatched(handle,
4044- n,
4045- (const hipDoubleComplex*)alpha,
4046- (const hipDoubleComplex*)x,
4047- incx,
4048- stridex,
4049- (hipDoubleComplex*)y,
4050- incy,
4051- stridey,
4052- batch_count);
4053-}
4054-
4055-hipblasStatus_t hipblasCaxpyStridedBatchedCast_64(hipblasHandle_t handle,
4056- int64_t n,
4057- const hipblasComplex* alpha,
4058- const hipblasComplex* x,
4059- int64_t incx,
4060- hipblasStride stridex,
4061- hipblasComplex* y,
4062- int64_t incy,
4063- hipblasStride stridey,
4064- int64_t batch_count)
4065-{
4066- return hipblasCaxpyStridedBatched_64(handle,
4067- n,
4068- (const hipComplex*)alpha,
4069- (const hipComplex*)x,
4070- incx,
4071- stridex,
4072- (hipComplex*)y,
4073- incy,
4074- stridey,
4075- batch_count);
4076-}
4077-
4078-hipblasStatus_t hipblasZaxpyStridedBatchedCast_64(hipblasHandle_t handle,
4079- int64_t n,
4080- const hipblasDoubleComplex* alpha,
4081- const hipblasDoubleComplex* x,
4082- int64_t incx,
4083- hipblasStride stridex,
4084- hipblasDoubleComplex* y,
4085- int64_t incy,
4086- hipblasStride stridey,
4087- int64_t batch_count)
4088-{
4089- return hipblasZaxpyStridedBatched_64(handle,
4090- n,
4091- (const hipDoubleComplex*)alpha,
4092- (const hipDoubleComplex*)x,
4093- incx,
4094- stridex,
4095- (hipDoubleComplex*)y,
4096- incy,
4097- stridey,
4098- batch_count);
4099-}
4100-
4101-// swap
4102-hipblasStatus_t hipblasCswapCast(
4103- hipblasHandle_t handle, int n, hipblasComplex* x, int incx, hipblasComplex* y, int incy)
4104-{
4105- return hipblasCswap(handle, n, (hipComplex*)x, incx, (hipComplex*)y, incy);
4106-}
4107-
4108-hipblasStatus_t hipblasZswapCast(hipblasHandle_t handle,
4109- int n,
4110- hipblasDoubleComplex* x,
4111- int incx,
4112- hipblasDoubleComplex* y,
4113- int incy)
4114-{
4115- return hipblasZswap(handle, n, (hipDoubleComplex*)x, incx, (hipDoubleComplex*)y, incy);
4116-}
4117-
4118-// swap_64
4119-hipblasStatus_t hipblasCswapCast_64(hipblasHandle_t handle,
4120- int64_t n,
4121- hipblasComplex* x,
4122- int64_t incx,
4123- hipblasComplex* y,
4124- int64_t incy)
4125-{
4126- return hipblasCswap_64(handle, n, (hipComplex*)x, incx, (hipComplex*)y, incy);
4127-}
4128-
4129-hipblasStatus_t hipblasZswapCast_64(hipblasHandle_t handle,
4130- int64_t n,
4131- hipblasDoubleComplex* x,
4132- int64_t incx,
4133- hipblasDoubleComplex* y,
4134- int64_t incy)
4135-{
4136- return hipblasZswap_64(handle, n, (hipDoubleComplex*)x, incx, (hipDoubleComplex*)y, incy);
4137-}
4138-
4139-// swap_batched
4140-hipblasStatus_t hipblasCswapBatchedCast(hipblasHandle_t handle,
4141- int n,
4142- hipblasComplex* const x[],
4143- int incx,
4144- hipblasComplex* const y[],
4145- int incy,
4146- int batch_count)
4147-{
4148- return hipblasCswapBatched(
4149- handle, n, (hipComplex* const*)x, incx, (hipComplex* const*)y, incy, batch_count);
4150-}
4151-
4152-hipblasStatus_t hipblasZswapBatchedCast(hipblasHandle_t handle,
4153- int n,
4154- hipblasDoubleComplex* const x[],
4155- int incx,
4156- hipblasDoubleComplex* const y[],
4157- int incy,
4158- int batch_count)
4159-{
4160- return hipblasZswapBatched(handle,
4161- n,
4162- (hipDoubleComplex* const*)x,
4163- incx,
4164- (hipDoubleComplex* const*)y,
4165- incy,
4166- batch_count);
4167-}
4168-
4169-// swap_batched_64
4170-hipblasStatus_t hipblasCswapBatchedCast_64(hipblasHandle_t handle,
4171- int64_t n,
4172- hipblasComplex* const x[],
4173- int64_t incx,
4174- hipblasComplex* const y[],
4175- int64_t incy,
4176- int64_t batch_count)
4177-{
4178- return hipblasCswapBatched_64(
4179- handle, n, (hipComplex* const*)x, incx, (hipComplex* const*)y, incy, batch_count);
4180-}
4181-
4182-hipblasStatus_t hipblasZswapBatchedCast_64(hipblasHandle_t handle,
4183- int64_t n,
4184- hipblasDoubleComplex* const x[],
4185- int64_t incx,
4186- hipblasDoubleComplex* const y[],
4187- int64_t incy,
4188- int64_t batch_count)
4189-{
4190- return hipblasZswapBatched_64(handle,
4191- n,
4192- (hipDoubleComplex* const*)x,
4193- incx,
4194- (hipDoubleComplex* const*)y,
4195- incy,
4196- batch_count);
4197-}
4198-
4199-// swap_strided_batched
4200-hipblasStatus_t hipblasCswapStridedBatchedCast(hipblasHandle_t handle,
4201- int n,
4202- hipblasComplex* x,
4203- int incx,
4204- hipblasStride stridex,
4205- hipblasComplex* y,
4206- int incy,
4207- hipblasStride stridey,
4208- int batch_count)
4209-{
4210- return hipblasCswapStridedBatched(
4211- handle, n, (hipComplex*)x, incx, stridex, (hipComplex*)y, incy, stridey, batch_count);
4212-}
4213-
4214-hipblasStatus_t hipblasZswapStridedBatchedCast(hipblasHandle_t handle,
4215- int n,
4216- hipblasDoubleComplex* x,
4217- int incx,
4218- hipblasStride stridex,
4219- hipblasDoubleComplex* y,
4220- int incy,
4221- hipblasStride stridey,
4222- int batch_count)
4223-{
4224- return hipblasZswapStridedBatched(handle,
4225- n,
4226- (hipDoubleComplex*)x,
4227- incx,
4228- stridex,
4229- (hipDoubleComplex*)y,
4230- incy,
4231- stridey,
4232- batch_count);
4233-}
4234-
4235-// swap_strided_batched_64
4236-hipblasStatus_t hipblasCswapStridedBatchedCast_64(hipblasHandle_t handle,
4237- int64_t n,
4238- hipblasComplex* x,
4239- int64_t incx,
4240- hipblasStride stridex,
4241- hipblasComplex* y,
4242- int64_t incy,
4243- hipblasStride stridey,
4244- int64_t batch_count)
4245-{
4246- return hipblasCswapStridedBatched_64(
4247- handle, n, (hipComplex*)x, incx, stridex, (hipComplex*)y, incy, stridey, batch_count);
4248-}
4249-
4250-hipblasStatus_t hipblasZswapStridedBatchedCast_64(hipblasHandle_t handle,
4251- int64_t n,
4252- hipblasDoubleComplex* x,
4253- int64_t incx,
4254- hipblasStride stridex,
4255- hipblasDoubleComplex* y,
4256- int64_t incy,
4257- hipblasStride stridey,
4258- int64_t batch_count)
4259-{
4260- return hipblasZswapStridedBatched_64(handle,
4261- n,
4262- (hipDoubleComplex*)x,
4263- incx,
4264- stridex,
4265- (hipDoubleComplex*)y,
4266- incy,
4267- stridey,
4268- batch_count);
4269-}
4270-
4271-// copy
4272-hipblasStatus_t hipblasCcopyCast(
4273- hipblasHandle_t handle, int n, const hipblasComplex* x, int incx, hipblasComplex* y, int incy)
4274-{
4275- return hipblasCcopy(handle, n, (const hipComplex*)x, incx, (hipComplex*)y, incy);
4276-}
4277-
4278-hipblasStatus_t hipblasZcopyCast(hipblasHandle_t handle,
4279- int n,
4280- const hipblasDoubleComplex* x,
4281- int incx,
4282- hipblasDoubleComplex* y,
4283- int incy)
4284-{
4285- return hipblasZcopy(handle, n, (const hipDoubleComplex*)x, incx, (hipDoubleComplex*)y, incy);
4286-}
4287-
4288-hipblasStatus_t hipblasCcopyCast_64(hipblasHandle_t handle,
4289- int64_t n,
4290- const hipblasComplex* x,
4291- int64_t incx,
4292- hipblasComplex* y,
4293- int64_t incy)
4294-{
4295- return hipblasCcopy_64(handle, n, (const hipComplex*)x, incx, (hipComplex*)y, incy);
4296-}
4297-
4298-hipblasStatus_t hipblasZcopyCast_64(hipblasHandle_t handle,
4299- int64_t n,
4300- const hipblasDoubleComplex* x,
4301- int64_t incx,
4302- hipblasDoubleComplex* y,
4303- int64_t incy)
4304-{
4305- return hipblasZcopy_64(handle, n, (const hipDoubleComplex*)x, incx, (hipDoubleComplex*)y, incy);
4306-}
4307-
4308-// batched
4309-hipblasStatus_t hipblasCcopyBatchedCast(hipblasHandle_t handle,
4310- int n,
4311- const hipblasComplex* const x[],
4312- int incx,
4313- hipblasComplex* const y[],
4314- int incy,
4315- int batch_count)
4316-{
4317- return hipblasCcopyBatched(
4318- handle, n, (const hipComplex* const*)x, incx, (hipComplex* const*)y, incy, batch_count);
4319-}
4320-
4321-hipblasStatus_t hipblasZcopyBatchedCast(hipblasHandle_t handle,
4322- int n,
4323- const hipblasDoubleComplex* const x[],
4324- int incx,
4325- hipblasDoubleComplex* const y[],
4326- int incy,
4327- int batch_count)
4328-{
4329- return hipblasZcopyBatched(handle,
4330- n,
4331- (const hipDoubleComplex* const*)x,
4332- incx,
4333- (hipDoubleComplex* const*)y,
4334- incy,
4335- batch_count);
4336-}
4337-
4338-hipblasStatus_t hipblasCcopyBatchedCast_64(hipblasHandle_t handle,
4339- int64_t n,
4340- const hipblasComplex* const x[],
4341- int64_t incx,
4342- hipblasComplex* const y[],
4343- int64_t incy,
4344- int64_t batch_count)
4345-{
4346- return hipblasCcopyBatched_64(
4347- handle, n, (const hipComplex* const*)x, incx, (hipComplex* const*)y, incy, batch_count);
4348-}
4349-
4350-hipblasStatus_t hipblasZcopyBatchedCast_64(hipblasHandle_t handle,
4351- int64_t n,
4352- const hipblasDoubleComplex* const x[],
4353- int64_t incx,
4354- hipblasDoubleComplex* const y[],
4355- int64_t incy,
4356- int64_t batch_count)
4357-{
4358- return hipblasZcopyBatched_64(handle,
4359- n,
4360- (const hipDoubleComplex* const*)x,
4361- incx,
4362- (hipDoubleComplex* const*)y,
4363- incy,
4364- batch_count);
4365-}
4366-
4367-// strided_batched
4368-hipblasStatus_t hipblasCcopyStridedBatchedCast(hipblasHandle_t handle,
4369- int n,
4370- const hipblasComplex* x,
4371- int incx,
4372- hipblasStride stridex,
4373- hipblasComplex* y,
4374- int incy,
4375- hipblasStride stridey,
4376- int batch_count)
4377-{
4378- return hipblasCcopyStridedBatched(
4379- handle, n, (const hipComplex*)x, incx, stridex, (hipComplex*)y, incy, stridey, batch_count);
4380-}
4381-
4382-hipblasStatus_t hipblasZcopyStridedBatchedCast(hipblasHandle_t handle,
4383- int n,
4384- const hipblasDoubleComplex* x,
4385- int incx,
4386- hipblasStride stridex,
4387- hipblasDoubleComplex* y,
4388- int incy,
4389- hipblasStride stridey,
4390- int batch_count)
4391-{
4392- return hipblasZcopyStridedBatched(handle,
4393- n,
4394- (const hipDoubleComplex*)x,
4395- incx,
4396- stridex,
4397- (hipDoubleComplex*)y,
4398- incy,
4399- stridey,
4400- batch_count);
4401-}
4402-
4403-hipblasStatus_t hipblasCcopyStridedBatchedCast_64(hipblasHandle_t handle,
4404- int64_t n,
4405- const hipblasComplex* x,
4406- int64_t incx,
4407- hipblasStride stridex,
4408- hipblasComplex* y,
4409- int64_t incy,
4410- hipblasStride stridey,
4411- int64_t batch_count)
4412-{
4413- return hipblasCcopyStridedBatched_64(
4414- handle, n, (const hipComplex*)x, incx, stridex, (hipComplex*)y, incy, stridey, batch_count);
4415-}
4416-
4417-hipblasStatus_t hipblasZcopyStridedBatchedCast_64(hipblasHandle_t handle,
4418- int64_t n,
4419- const hipblasDoubleComplex* x,
4420- int64_t incx,
4421- hipblasStride stridex,
4422- hipblasDoubleComplex* y,
4423- int64_t incy,
4424- hipblasStride stridey,
4425- int64_t batch_count)
4426-{
4427- return hipblasZcopyStridedBatched_64(handle,
4428- n,
4429- (const hipDoubleComplex*)x,
4430- incx,
4431- stridex,
4432- (hipDoubleComplex*)y,
4433- incy,
4434- stridey,
4435- batch_count);
4436-}
4437-
4438-// dot
4439-hipblasStatus_t hipblasCdotuCast(hipblasHandle_t handle,
4440- int n,
4441- const hipblasComplex* x,
4442- int incx,
4443- const hipblasComplex* y,
4444- int incy,
4445- hipblasComplex* result)
4446-{
4447- return hipblasCdotu(
4448- handle, n, (const hipComplex*)x, incx, (const hipComplex*)y, incy, (hipComplex*)result);
4449-}
4450-
4451-hipblasStatus_t hipblasZdotuCast(hipblasHandle_t handle,
4452- int n,
4453- const hipblasDoubleComplex* x,
4454- int incx,
4455- const hipblasDoubleComplex* y,
4456- int incy,
4457- hipblasDoubleComplex* result)
4458-{
4459- return hipblasZdotu(handle,
4460- n,
4461- (const hipDoubleComplex*)x,
4462- incx,
4463- (const hipDoubleComplex*)y,
4464- incy,
4465- (hipDoubleComplex*)result);
4466-}
4467-
4468-hipblasStatus_t hipblasCdotcCast(hipblasHandle_t handle,
4469- int n,
4470- const hipblasComplex* x,
4471- int incx,
4472- const hipblasComplex* y,
4473- int incy,
4474- hipblasComplex* result)
4475-{
4476- return hipblasCdotc(
4477- handle, n, (const hipComplex*)x, incx, (const hipComplex*)y, incy, (hipComplex*)result);
4478-}
4479-
4480-hipblasStatus_t hipblasZdotcCast(hipblasHandle_t handle,
4481- int n,
4482- const hipblasDoubleComplex* x,
4483- int incx,
4484- const hipblasDoubleComplex* y,
4485- int incy,
4486- hipblasDoubleComplex* result)
4487-{
4488- return hipblasZdotc(handle,
4489- n,
4490- (const hipDoubleComplex*)x,
4491- incx,
4492- (const hipDoubleComplex*)y,
4493- incy,
4494- (hipDoubleComplex*)result);
4495-}
4496-
4497-hipblasStatus_t hipblasCdotuCast_64(hipblasHandle_t handle,
4498- int64_t n,
4499- const hipblasComplex* x,
4500- int64_t incx,
4501- const hipblasComplex* y,
4502- int64_t incy,
4503- hipblasComplex* result)
4504-{
4505- return hipblasCdotu_64(
4506- handle, n, (const hipComplex*)x, incx, (const hipComplex*)y, incy, (hipComplex*)result);
4507-}
4508-
4509-hipblasStatus_t hipblasZdotuCast_64(hipblasHandle_t handle,
4510- int64_t n,
4511- const hipblasDoubleComplex* x,
4512- int64_t incx,
4513- const hipblasDoubleComplex* y,
4514- int64_t incy,
4515- hipblasDoubleComplex* result)
4516-{
4517- return hipblasZdotu_64(handle,
4518- n,
4519- (const hipDoubleComplex*)x,
4520- incx,
4521- (const hipDoubleComplex*)y,
4522- incy,
4523- (hipDoubleComplex*)result);
4524-}
4525-
4526-hipblasStatus_t hipblasCdotcCast_64(hipblasHandle_t handle,
4527- int64_t n,
4528- const hipblasComplex* x,
4529- int64_t incx,
4530- const hipblasComplex* y,
4531- int64_t incy,
4532- hipblasComplex* result)
4533-{
4534- return hipblasCdotc_64(
4535- handle, n, (const hipComplex*)x, incx, (const hipComplex*)y, incy, (hipComplex*)result);
4536-}
4537-
4538-hipblasStatus_t hipblasZdotcCast_64(hipblasHandle_t handle,
4539- int64_t n,
4540- const hipblasDoubleComplex* x,
4541- int64_t incx,
4542- const hipblasDoubleComplex* y,
4543- int64_t incy,
4544- hipblasDoubleComplex* result)
4545-{
4546- return hipblasZdotc_64(handle,
4547- n,
4548- (const hipDoubleComplex*)x,
4549- incx,
4550- (const hipDoubleComplex*)y,
4551- incy,
4552- (hipDoubleComplex*)result);
4553-}
4554-
4555-// dot_batched
4556-hipblasStatus_t hipblasCdotuBatchedCast(hipblasHandle_t handle,
4557- int n,
4558- const hipblasComplex* const x[],
4559- int incx,
4560- const hipblasComplex* const y[],
4561- int incy,
4562- int batch_count,
4563- hipblasComplex* result)
4564-{
4565- return hipblasCdotuBatched(handle,
4566- n,
4567- (const hipComplex* const*)x,
4568- incx,
4569- (const hipComplex* const*)y,
4570- incy,
4571- batch_count,
4572- (hipComplex*)result);
4573-}
4574-
4575-hipblasStatus_t hipblasCdotcBatchedCast(hipblasHandle_t handle,
4576- int n,
4577- const hipblasComplex* const x[],
4578- int incx,
4579- const hipblasComplex* const y[],
4580- int incy,
4581- int batch_count,
4582- hipblasComplex* result)
4583-{
4584- return hipblasCdotcBatched(handle,
4585- n,
4586- (const hipComplex* const*)x,
4587- incx,
4588- (const hipComplex* const*)y,
4589- incy,
4590- batch_count,
4591- (hipComplex*)result);
4592-}
4593-
4594-hipblasStatus_t hipblasZdotuBatchedCast(hipblasHandle_t handle,
4595- int n,
4596- const hipblasDoubleComplex* const x[],
4597- int incx,
4598- const hipblasDoubleComplex* const y[],
4599- int incy,
4600- int batch_count,
4601- hipblasDoubleComplex* result)
4602-{
4603- return hipblasZdotuBatched(handle,
4604- n,
4605- (const hipDoubleComplex* const*)x,
4606- incx,
4607- (const hipDoubleComplex* const*)y,
4608- incy,
4609- batch_count,
4610- (hipDoubleComplex*)result);
4611-}
4612-
4613-hipblasStatus_t hipblasZdotcBatchedCast(hipblasHandle_t handle,
4614- int n,
4615- const hipblasDoubleComplex* const x[],
4616- int incx,
4617- const hipblasDoubleComplex* const y[],
4618- int incy,
4619- int batch_count,
4620- hipblasDoubleComplex* result)
4621-{
4622- return hipblasZdotcBatched(handle,
4623- n,
4624- (const hipDoubleComplex* const*)x,
4625- incx,
4626- (const hipDoubleComplex* const*)y,
4627- incy,
4628- batch_count,
4629- (hipDoubleComplex*)result);
4630-}
4631-
4632-hipblasStatus_t hipblasCdotuBatchedCast_64(hipblasHandle_t handle,
4633- int64_t n,
4634- const hipblasComplex* const x[],
4635- int64_t incx,
4636- const hipblasComplex* const y[],
4637- int64_t incy,
4638- int64_t batch_count,
4639- hipblasComplex* result)
4640-{
4641- return hipblasCdotuBatched_64(handle,
4642- n,
4643- (const hipComplex* const*)x,
4644- incx,
4645- (const hipComplex* const*)y,
4646- incy,
4647- batch_count,
4648- (hipComplex*)result);
4649-}
4650-
4651-hipblasStatus_t hipblasCdotcBatchedCast_64(hipblasHandle_t handle,
4652- int64_t n,
4653- const hipblasComplex* const x[],
4654- int64_t incx,
4655- const hipblasComplex* const y[],
4656- int64_t incy,
4657- int64_t batch_count,
4658- hipblasComplex* result)
4659-{
4660- return hipblasCdotcBatched_64(handle,
4661- n,
4662- (const hipComplex* const*)x,
4663- incx,
4664- (const hipComplex* const*)y,
4665- incy,
4666- batch_count,
4667- (hipComplex*)result);
4668-}
4669-
4670-hipblasStatus_t hipblasZdotuBatchedCast_64(hipblasHandle_t handle,
4671- int64_t n,
4672- const hipblasDoubleComplex* const x[],
4673- int64_t incx,
4674- const hipblasDoubleComplex* const y[],
4675- int64_t incy,
4676- int64_t batch_count,
4677- hipblasDoubleComplex* result)
4678-{
4679- return hipblasZdotuBatched_64(handle,
4680- n,
4681- (const hipDoubleComplex* const*)x,
4682- incx,
4683- (const hipDoubleComplex* const*)y,
4684- incy,
4685- batch_count,
4686- (hipDoubleComplex*)result);
4687-}
4688-
4689-hipblasStatus_t hipblasZdotcBatchedCast_64(hipblasHandle_t handle,
4690- int64_t n,
4691- const hipblasDoubleComplex* const x[],
4692- int64_t incx,
4693- const hipblasDoubleComplex* const y[],
4694- int64_t incy,
4695- int64_t batch_count,
4696- hipblasDoubleComplex* result)
4697-{
4698- return hipblasZdotcBatched_64(handle,
4699- n,
4700- (const hipDoubleComplex* const*)x,
4701- incx,
4702- (const hipDoubleComplex* const*)y,
4703- incy,
4704- batch_count,
4705- (hipDoubleComplex*)result);
4706-}
4707-
4708-// dot_strided_batched
4709-hipblasStatus_t hipblasCdotuStridedBatchedCast(hipblasHandle_t handle,
4710- int n,
4711- const hipblasComplex* x,
4712- int incx,
4713- hipblasStride stridex,
4714- const hipblasComplex* y,
4715- int incy,
4716- hipblasStride stridey,
4717- int batch_count,
4718- hipblasComplex* result)
4719-{
4720- return hipblasCdotuStridedBatched(handle,
4721- n,
4722- (const hipComplex*)x,
4723- incx,
4724- stridex,
4725- (const hipComplex*)y,
4726- incy,
4727- stridey,
4728- batch_count,
4729- (hipComplex*)result);
4730-}
4731-
4732-hipblasStatus_t hipblasCdotcStridedBatchedCast(hipblasHandle_t handle,
4733- int n,
4734- const hipblasComplex* x,
4735- int incx,
4736- hipblasStride stridex,
4737- const hipblasComplex* y,
4738- int incy,
4739- hipblasStride stridey,
4740- int batch_count,
4741- hipblasComplex* result)
4742-{
4743- return hipblasCdotcStridedBatched(handle,
4744- n,
4745- (const hipComplex*)x,
4746- incx,
4747- stridex,
4748- (const hipComplex*)y,
4749- incy,
4750- stridey,
4751- batch_count,
4752- (hipComplex*)result);
4753-}
4754-
4755-hipblasStatus_t hipblasZdotuStridedBatchedCast(hipblasHandle_t handle,
4756- int n,
4757- const hipblasDoubleComplex* x,
4758- int incx,
4759- hipblasStride stridex,
4760- const hipblasDoubleComplex* y,
4761- int incy,
4762- hipblasStride stridey,
4763- int batch_count,
4764- hipblasDoubleComplex* result)
4765-{
4766- return hipblasZdotuStridedBatched(handle,
4767- n,
4768- (const hipDoubleComplex*)x,
4769- incx,
4770- stridex,
4771- (const hipDoubleComplex*)y,
4772- incy,
4773- stridey,
4774- batch_count,
4775- (hipDoubleComplex*)result);
4776-}
4777-
4778-hipblasStatus_t hipblasZdotcStridedBatchedCast(hipblasHandle_t handle,
4779- int n,
4780- const hipblasDoubleComplex* x,
4781- int incx,
4782- hipblasStride stridex,
4783- const hipblasDoubleComplex* y,
4784- int incy,
4785- hipblasStride stridey,
4786- int batch_count,
4787- hipblasDoubleComplex* result)
4788-{
4789- return hipblasZdotcStridedBatched(handle,
4790- n,
4791- (const hipDoubleComplex*)x,
4792- incx,
4793- stridex,
4794- (const hipDoubleComplex*)y,
4795- incy,
4796- stridey,
4797- batch_count,
4798- (hipDoubleComplex*)result);
4799-}
4800-
4801-hipblasStatus_t hipblasCdotuStridedBatchedCast_64(hipblasHandle_t handle,
4802- int64_t n,
4803- const hipblasComplex* x,
4804- int64_t incx,
4805- hipblasStride stridex,
4806- const hipblasComplex* y,
4807- int64_t incy,
4808- hipblasStride stridey,
4809- int64_t batch_count,
4810- hipblasComplex* result)
4811-{
4812- return hipblasCdotuStridedBatched_64(handle,
4813- n,
4814- (const hipComplex*)x,
4815- incx,
4816- stridex,
4817- (const hipComplex*)y,
4818- incy,
4819- stridey,
4820- batch_count,
4821- (hipComplex*)result);
4822-}
4823-
4824-hipblasStatus_t hipblasCdotcStridedBatchedCast_64(hipblasHandle_t handle,
4825- int64_t n,
4826- const hipblasComplex* x,
4827- int64_t incx,
4828- hipblasStride stridex,
4829- const hipblasComplex* y,
4830- int64_t incy,
4831- hipblasStride stridey,
4832- int64_t batch_count,
4833- hipblasComplex* result)
4834-{
4835- return hipblasCdotcStridedBatched_64(handle,
4836- n,
4837- (const hipComplex*)x,
4838- incx,
4839- stridex,
4840- (const hipComplex*)y,
4841- incy,
4842- stridey,
4843- batch_count,
4844- (hipComplex*)result);
4845-}
4846-
4847-hipblasStatus_t hipblasZdotuStridedBatchedCast_64(hipblasHandle_t handle,
4848- int64_t n,
4849- const hipblasDoubleComplex* x,
4850- int64_t incx,
4851- hipblasStride stridex,
4852- const hipblasDoubleComplex* y,
4853- int64_t incy,
4854- hipblasStride stridey,
4855- int64_t batch_count,
4856- hipblasDoubleComplex* result)
4857-{
4858- return hipblasZdotuStridedBatched_64(handle,
4859- n,
4860- (const hipDoubleComplex*)x,
4861- incx,
4862- stridex,
4863- (const hipDoubleComplex*)y,
4864- incy,
4865- stridey,
4866- batch_count,
4867- (hipDoubleComplex*)result);
4868-}
4869-
4870-hipblasStatus_t hipblasZdotcStridedBatchedCast_64(hipblasHandle_t handle,
4871- int64_t n,
4872- const hipblasDoubleComplex* x,
4873- int64_t incx,
4874- hipblasStride stridex,
4875- const hipblasDoubleComplex* y,
4876- int64_t incy,
4877- hipblasStride stridey,
4878- int64_t batch_count,
4879- hipblasDoubleComplex* result)
4880-{
4881- return hipblasZdotcStridedBatched_64(handle,
4882- n,
4883- (const hipDoubleComplex*)x,
4884- incx,
4885- stridex,
4886- (const hipDoubleComplex*)y,
4887- incy,
4888- stridey,
4889- batch_count,
4890- (hipDoubleComplex*)result);
4891-}
4892-
4893-// asum
4894-hipblasStatus_t hipblasScasumCast(
4895- hipblasHandle_t handle, int n, const hipblasComplex* x, int incx, float* result)
4896-{
4897- return hipblasScasum(handle, n, (const hipComplex*)x, incx, result);
4898-}
4899-
4900-hipblasStatus_t hipblasDzasumCast(
4901- hipblasHandle_t handle, int n, const hipblasDoubleComplex* x, int incx, double* result)
4902-{
4903- return hipblasDzasum(handle, n, (const hipDoubleComplex*)x, incx, result);
4904-}
4905-
4906-hipblasStatus_t hipblasScasumCast_64(
4907- hipblasHandle_t handle, int64_t n, const hipblasComplex* x, int64_t incx, float* result)
4908-{
4909- return hipblasScasum_64(handle, n, (const hipComplex*)x, incx, result);
4910-}
4911-
4912-hipblasStatus_t hipblasDzasumCast_64(
4913- hipblasHandle_t handle, int64_t n, const hipblasDoubleComplex* x, int64_t incx, double* result)
4914-{
4915- return hipblasDzasum_64(handle, n, (const hipDoubleComplex*)x, incx, result);
4916-}
4917-
4918-// asum_batched
4919-hipblasStatus_t hipblasScasumBatchedCast(hipblasHandle_t handle,
4920- int n,
4921- const hipblasComplex* const x[],
4922- int incx,
4923- int batch_count,
4924- float* result)
4925-{
4926- return hipblasScasumBatched(handle, n, (const hipComplex* const*)x, incx, batch_count, result);
4927-}
4928-
4929-hipblasStatus_t hipblasDzasumBatchedCast(hipblasHandle_t handle,
4930- int n,
4931- const hipblasDoubleComplex* const x[],
4932- int incx,
4933- int batch_count,
4934- double* result)
4935-{
4936- return hipblasDzasumBatched(
4937- handle, n, (const hipDoubleComplex* const*)x, incx, batch_count, result);
4938-}
4939-
4940-hipblasStatus_t hipblasScasumBatchedCast_64(hipblasHandle_t handle,
4941- int64_t n,
4942- const hipblasComplex* const x[],
4943- int64_t incx,
4944- int64_t batch_count,
4945- float* result)
4946-{
4947- return hipblasScasumBatched_64(
4948- handle, n, (const hipComplex* const*)x, incx, batch_count, result);
4949-}
4950-
4951-hipblasStatus_t hipblasDzasumBatchedCast_64(hipblasHandle_t handle,
4952- int64_t n,
4953- const hipblasDoubleComplex* const x[],
4954- int64_t incx,
4955- int64_t batch_count,
4956- double* result)
4957-{
4958- return hipblasDzasumBatched_64(
4959- handle, n, (const hipDoubleComplex* const*)x, incx, batch_count, result);
4960-}
4961-
4962-// asum_strided_batched
4963-hipblasStatus_t hipblasScasumStridedBatchedCast(hipblasHandle_t handle,
4964- int n,
4965- const hipblasComplex* x,
4966- int incx,
4967- hipblasStride stridex,
4968- int batch_count,
4969- float* result)
4970-{
4971- return hipblasScasumStridedBatched(
4972- handle, n, (const hipComplex*)x, incx, stridex, batch_count, result);
4973-}
4974-
4975-hipblasStatus_t hipblasDzasumStridedBatchedCast(hipblasHandle_t handle,
4976- int n,
4977- const hipblasDoubleComplex* x,
4978- int incx,
4979- hipblasStride stridex,
4980- int batch_count,
4981- double* result)
4982-{
4983- return hipblasDzasumStridedBatched(
4984- handle, n, (const hipDoubleComplex*)x, incx, stridex, batch_count, result);
4985-}
4986-
4987-hipblasStatus_t hipblasScasumStridedBatchedCast_64(hipblasHandle_t handle,
4988- int64_t n,
4989- const hipblasComplex* x,
4990- int64_t incx,
4991- hipblasStride stridex,
4992- int64_t batch_count,
4993- float* result)
4994-{
4995- return hipblasScasumStridedBatched_64(
4996- handle, n, (const hipComplex*)x, incx, stridex, batch_count, result);
4997-}
4998-
4999-hipblasStatus_t hipblasDzasumStridedBatchedCast_64(hipblasHandle_t handle,
5000- int64_t n,
The diff has been truncated for viewing.

Subscribers

People subscribed via source and target branches