Merge ~bullwinkle-team/ubuntu/+source/hipblas:bullwinkle/llvm-21/ubuntu/devel into ubuntu/+source/hipblas:ubuntu/devel
- Git
- lp:~bullwinkle-team/ubuntu/+source/hipblas
- bullwinkle/llvm-21/ubuntu/devel
- Merge into ubuntu/devel
| Status: | Merged | ||||
|---|---|---|---|---|---|
| Approved by: | Andreas Hasenack | ||||
| Approved revision: | fffc9a78dc56aa47aba8388f8ca39e072a705cca | ||||
| Merged at revision: | fffc9a78dc56aa47aba8388f8ca39e072a705cca | ||||
| Proposed branch: | ~bullwinkle-team/ubuntu/+source/hipblas:bullwinkle/llvm-21/ubuntu/devel | ||||
| Merge into: | ubuntu/+source/hipblas:ubuntu/devel | ||||
| Diff against target: |
168361 lines (+37638/-84616) 308 files modified
.githooks/install (+1/-1) .jenkins/codecov.groovy (+119/-0) .jenkins/common.groovy (+41/-21) .jenkins/precheckin-cuda.groovy (+2/-6) .jenkins/precheckin.groovy (+2/-6) .jenkins/static.groovy (+4/-8) .jenkins/staticanalysis.groovy (+1/-3) CHANGELOG.md (+38/-1) CMakeLists.txt (+48/-53) LICENSE.md (+18/-4) README.md (+26/-5) bump_develop_version.sh (+12/-8) clients/CMakeLists.txt (+80/-24) clients/benchmarks/CMakeLists.txt (+10/-48) clients/benchmarks/client.cpp (+12/-25) clients/cmake/build-options.cmake (+0/-2) clients/common/cblas_interface.cpp (+726/-701) clients/common/clients_common.cpp (+35/-33) clients/common/hipblas_datatype2string.cpp (+21/-19) clients/common/hipblas_gentest.py (+2/-5) clients/common/hipblas_parse_data.cpp (+12/-8) clients/common/near.cpp (+37/-33) clients/common/norm.cpp (+26/-22) clients/common/unit.cpp (+28/-28) clients/common/utility.cpp (+32/-17) clients/gtest/CMakeLists.txt (+15/-65) clients/gtest/auxil/set_get_matrix_vector_gtest.cpp (+2/-2) clients/gtest/blas1/asum_gtest.cpp (+2/-2) clients/gtest/blas1/axpy_gtest.cpp (+2/-2) clients/gtest/blas1/copy_gtest.cpp (+2/-2) clients/gtest/blas1/dot_gtest.cpp (+3/-3) clients/gtest/blas1/dot_gtest.yaml (+0/-1) clients/gtest/blas1/iamaxmin_gtest.cpp (+2/-2) clients/gtest/blas1/nrm2_gtest.cpp (+2/-2) clients/gtest/blas1/rot_gtest.cpp (+36/-36) clients/gtest/blas1/scal_gtest.cpp (+6/-9) clients/gtest/blas1/swap_gtest.cpp (+2/-2) clients/gtest/blas2/gbmv_gtest.cpp (+2/-2) clients/gtest/blas2/gemv_gtest.cpp (+2/-2) clients/gtest/blas2/ger_gtest.cpp (+3/-3) clients/gtest/blas2/hbmv_gtest.cpp (+2/-2) clients/gtest/blas2/hemv_gtest.cpp (+2/-2) clients/gtest/blas2/her2_gtest.cpp (+2/-2) clients/gtest/blas2/her_gtest.cpp (+2/-2) clients/gtest/blas2/hpmv_gtest.cpp (+2/-2) clients/gtest/blas2/hpr2_gtest.cpp (+2/-2) clients/gtest/blas2/hpr_gtest.cpp (+2/-2) clients/gtest/blas2/spr_gtest.cpp (+2/-2) clients/gtest/blas2/symv_gtest.cpp (+2/-2) clients/gtest/blas2/syr2_gtest.cpp (+2/-2) clients/gtest/blas2/syr_gtest.cpp (+2/-2) clients/gtest/blas2/tbmv_gtest.cpp (+2/-2) clients/gtest/blas2/tbsv_gtest.cpp (+2/-2) clients/gtest/blas2/tpmv_gtest.cpp (+2/-2) clients/gtest/blas2/tpsv_gtest.cpp (+2/-2) clients/gtest/blas2/trmv_gtest.cpp (+2/-2) clients/gtest/blas2/trsv_gtest.cpp (+2/-2) clients/gtest/blas3/dgmm_gtest.cpp (+2/-2) clients/gtest/blas3/geam_gtest.cpp (+2/-2) clients/gtest/blas3/gemm_gtest.cpp (+2/-2) clients/gtest/blas3/hemm_gtest.cpp (+2/-2) clients/gtest/blas3/her2k_gtest.cpp (+2/-2) clients/gtest/blas3/herk_gtest.cpp (+2/-2) clients/gtest/blas3/herkx_gtest.cpp (+2/-2) clients/gtest/blas3/symm_gtest.cpp (+2/-2) clients/gtest/blas3/syr2k_gtest.cpp (+2/-2) clients/gtest/blas3/syrk_gtest.cpp (+2/-2) clients/gtest/blas3/syrkx_gtest.cpp (+2/-2) clients/gtest/blas3/trmm_gtest.cpp (+2/-2) clients/gtest/blas3/trsm_gtest.cpp (+2/-2) clients/gtest/blas3/trsm_gtest.yaml (+1/-0) clients/gtest/blas3/trtri_gtest.cpp (+2/-2) clients/gtest/blas_ex/axpy_ex_gtest.cpp (+22/-22) clients/gtest/blas_ex/dot_ex_gtest.cpp (+18/-17) clients/gtest/blas_ex/gemm_ex_gtest.cpp (+1/-9) clients/gtest/blas_ex/nrm2_ex_gtest.cpp (+4/-3) clients/gtest/blas_ex/rot_ex_gtest.cpp (+4/-4) clients/gtest/blas_ex/scal_ex_gtest.cpp (+4/-4) clients/gtest/blas_ex/trsm_ex_gtest.cpp (+2/-2) clients/gtest/hipblas_gtest_main.cpp (+1/-14) clients/gtest/hipblas_test.cpp (+1/-7) clients/gtest/solver/gels_gtest.cpp (+2/-2) clients/gtest/solver/geqrf_gtest.cpp (+2/-2) clients/gtest/solver/getrf_gtest.cpp (+2/-2) clients/gtest/solver/getri_gtest.cpp (+2/-2) clients/gtest/solver/getrs_gtest.cpp (+2/-2) clients/hipblas_clients_readme.txt (+33/-0) clients/include/argument_model.hpp (+6/-6) clients/include/blas1/hipblas_iamax_iamin_ref.hpp (+3/-3) clients/include/blas1/testing_axpy.hpp (+8/-5) clients/include/blas1/testing_axpy_batched.hpp (+15/-7) clients/include/blas1/testing_axpy_strided_batched.hpp (+16/-5) clients/include/blas1/testing_dot.hpp (+9/-2) clients/include/blas1/testing_dot_batched.hpp (+2/-2) clients/include/blas1/testing_dot_strided_batched.hpp (+12/-4) clients/include/blas1/testing_rot.hpp (+3/-2) clients/include/blas1/testing_rot_batched.hpp (+3/-3) clients/include/blas1/testing_rot_strided_batched.hpp (+12/-2) clients/include/blas1/testing_rotg.hpp (+7/-2) clients/include/blas1/testing_rotg_batched.hpp (+8/-2) clients/include/blas1/testing_rotg_strided_batched.hpp (+11/-2) clients/include/blas1/testing_scal.hpp (+11/-6) clients/include/blas1/testing_scal_batched.hpp (+11/-7) clients/include/blas1/testing_scal_strided_batched.hpp (+10/-7) clients/include/blas2/testing_gbmv.hpp (+26/-9) clients/include/blas2/testing_gbmv_batched.hpp (+13/-8) clients/include/blas2/testing_gbmv_strided_batched.hpp (+13/-8) clients/include/blas2/testing_gemv.hpp (+23/-7) clients/include/blas2/testing_gemv_batched.hpp (+13/-8) clients/include/blas2/testing_gemv_strided_batched.hpp (+13/-8) clients/include/blas2/testing_ger.hpp (+8/-5) clients/include/blas2/testing_ger_batched.hpp (+7/-5) clients/include/blas2/testing_ger_strided_batched.hpp (+7/-5) clients/include/blas2/testing_hbmv.hpp (+23/-7) clients/include/blas2/testing_hbmv_batched.hpp (+13/-8) clients/include/blas2/testing_hbmv_strided_batched.hpp (+13/-8) clients/include/blas2/testing_hemv.hpp (+22/-7) clients/include/blas2/testing_hemv_batched.hpp (+13/-8) clients/include/blas2/testing_hemv_strided_batched.hpp (+13/-8) clients/include/blas2/testing_her.hpp (+2/-2) clients/include/blas2/testing_her2.hpp (+9/-6) clients/include/blas2/testing_her2_batched.hpp (+7/-5) clients/include/blas2/testing_her2_strided_batched.hpp (+7/-5) clients/include/blas2/testing_her_batched.hpp (+2/-2) clients/include/blas2/testing_her_strided_batched.hpp (+2/-2) clients/include/blas2/testing_hpmv.hpp (+21/-7) clients/include/blas2/testing_hpmv_batched.hpp (+13/-8) clients/include/blas2/testing_hpmv_strided_batched.hpp (+13/-8) clients/include/blas2/testing_hpr.hpp (+2/-2) clients/include/blas2/testing_hpr2.hpp (+8/-5) clients/include/blas2/testing_hpr2_batched.hpp (+7/-5) clients/include/blas2/testing_hpr2_strided_batched.hpp (+7/-5) clients/include/blas2/testing_hpr_batched.hpp (+2/-2) clients/include/blas2/testing_hpr_strided_batched.hpp (+2/-2) clients/include/blas2/testing_sbmv.hpp (+23/-7) clients/include/blas2/testing_sbmv_batched.hpp (+13/-8) clients/include/blas2/testing_sbmv_strided_batched.hpp (+13/-8) clients/include/blas2/testing_spmv.hpp (+24/-8) clients/include/blas2/testing_spmv_batched.hpp (+13/-8) clients/include/blas2/testing_spmv_strided_batched.hpp (+13/-8) clients/include/blas2/testing_spr.hpp (+7/-5) clients/include/blas2/testing_spr2.hpp (+8/-5) clients/include/blas2/testing_spr2_batched.hpp (+7/-5) clients/include/blas2/testing_spr2_strided_batched.hpp (+7/-5) clients/include/blas2/testing_spr_batched.hpp (+7/-5) clients/include/blas2/testing_spr_strided_batched.hpp (+16/-5) clients/include/blas2/testing_symv.hpp (+22/-7) clients/include/blas2/testing_symv_batched.hpp (+13/-8) clients/include/blas2/testing_symv_strided_batched.hpp (+13/-8) clients/include/blas2/testing_syr.hpp (+8/-5) clients/include/blas2/testing_syr2.hpp (+8/-5) clients/include/blas2/testing_syr2_batched.hpp (+7/-5) clients/include/blas2/testing_syr2_strided_batched.hpp (+7/-5) clients/include/blas2/testing_syr_batched.hpp (+7/-5) clients/include/blas2/testing_syr_strided_batched.hpp (+17/-5) clients/include/blas3/testing_geam.hpp (+21/-8) clients/include/blas3/testing_geam_batched.hpp (+9/-7) clients/include/blas3/testing_geam_strided_batched.hpp (+9/-7) clients/include/blas3/testing_gemm.hpp (+39/-10) clients/include/blas3/testing_gemm_batched.hpp (+20/-16) clients/include/blas3/testing_gemm_strided_batched.hpp (+14/-10) clients/include/blas3/testing_hemm.hpp (+23/-7) clients/include/blas3/testing_hemm_batched.hpp (+12/-8) clients/include/blas3/testing_hemm_strided_batched.hpp (+12/-8) clients/include/blas3/testing_her2k.hpp (+22/-8) clients/include/blas3/testing_her2k_batched.hpp (+12/-10) clients/include/blas3/testing_her2k_strided_batched.hpp (+10/-8) clients/include/blas3/testing_herk.hpp (+2/-2) clients/include/blas3/testing_herk_batched.hpp (+2/-2) clients/include/blas3/testing_herk_strided_batched.hpp (+2/-2) clients/include/blas3/testing_herkx.hpp (+22/-8) clients/include/blas3/testing_herkx_batched.hpp (+12/-10) clients/include/blas3/testing_herkx_strided_batched.hpp (+10/-8) clients/include/blas3/testing_symm.hpp (+23/-7) clients/include/blas3/testing_symm_batched.hpp (+12/-8) clients/include/blas3/testing_symm_strided_batched.hpp (+12/-8) clients/include/blas3/testing_syr2k.hpp (+23/-7) clients/include/blas3/testing_syr2k_batched.hpp (+12/-8) clients/include/blas3/testing_syr2k_strided_batched.hpp (+12/-8) clients/include/blas3/testing_syrk.hpp (+21/-7) clients/include/blas3/testing_syrk_batched.hpp (+12/-8) clients/include/blas3/testing_syrk_strided_batched.hpp (+12/-8) clients/include/blas3/testing_syrkx.hpp (+24/-9) clients/include/blas3/testing_syrkx_batched.hpp (+12/-8) clients/include/blas3/testing_syrkx_strided_batched.hpp (+12/-8) clients/include/blas3/testing_trmm.hpp (+21/-7) clients/include/blas3/testing_trmm_batched.hpp (+7/-5) clients/include/blas3/testing_trmm_strided_batched.hpp (+7/-5) clients/include/blas3/testing_trsm.hpp (+29/-7) clients/include/blas3/testing_trsm_batched.hpp (+7/-5) clients/include/blas3/testing_trsm_strided_batched.hpp (+7/-5) clients/include/blas_ex/testing_axpy_batched_ex.hpp (+15/-13) clients/include/blas_ex/testing_axpy_ex.hpp (+25/-13) clients/include/blas_ex/testing_axpy_strided_batched_ex.hpp (+15/-13) clients/include/blas_ex/testing_dot_batched_ex.hpp (+9/-9) clients/include/blas_ex/testing_dot_ex.hpp (+10/-10) clients/include/blas_ex/testing_dot_strided_batched_ex.hpp (+9/-9) clients/include/blas_ex/testing_gemm_batched_ex.hpp (+21/-25) clients/include/blas_ex/testing_gemm_ex.hpp (+21/-25) clients/include/blas_ex/testing_gemm_strided_batched_ex.hpp (+21/-25) clients/include/blas_ex/testing_nrm2_batched_ex.hpp (+7/-7) clients/include/blas_ex/testing_nrm2_ex.hpp (+7/-7) clients/include/blas_ex/testing_nrm2_strided_batched_ex.hpp (+7/-7) clients/include/blas_ex/testing_rot_batched_ex.hpp (+9/-9) clients/include/blas_ex/testing_rot_ex.hpp (+9/-9) clients/include/blas_ex/testing_rot_strided_batched_ex.hpp (+9/-9) clients/include/blas_ex/testing_scal_batched_ex.hpp (+31/-14) clients/include/blas_ex/testing_scal_ex.hpp (+30/-13) clients/include/blas_ex/testing_scal_strided_batched_ex.hpp (+20/-10) clients/include/blas_ex/testing_trsm_batched_ex.hpp (+9/-7) clients/include/blas_ex/testing_trsm_ex.hpp (+9/-7) clients/include/blas_ex/testing_trsm_strided_batched_ex.hpp (+9/-7) clients/include/cblas_interface.h (+18/-18) clients/include/d_vector.hpp (+12/-12) clients/include/device_batch_matrix.hpp (+29/-24) clients/include/device_batch_vector.hpp (+27/-22) clients/include/device_matrix.hpp (+8/-8) clients/include/device_strided_batch_matrix.hpp (+17/-17) clients/include/device_strided_batch_vector.hpp (+13/-13) clients/include/device_vector.hpp (+8/-8) clients/include/flops.hpp (+90/-88) clients/include/hipblas.hpp (+4187/-11240) clients/include/hipblas_arguments.hpp (+29/-28) clients/include/hipblas_common.yaml (+12/-56) clients/include/hipblas_datatype2string.hpp (+22/-26) clients/include/hipblas_fortran.h.in (+3744/-3783) clients/include/hipblas_fortran.hpp (+231/-246) clients/include/hipblas_fortran_blas.f90 (+97/-97) clients/include/hipblas_fortran_blas_64.f90 (+90/-90) clients/include/hipblas_fortran_solver.f90 (+1/-1) clients/include/host_batch_vector.hpp (+10/-1) clients/include/host_strided_batch_vector.hpp (+9/-1) clients/include/host_vector.hpp (+9/-1) clients/include/lapack_utilities.hpp (+10/-10) clients/include/near.h (+17/-20) clients/include/solver/testing_gels_batched.hpp (+3/-3) clients/include/solver/testing_geqrf_batched.hpp (+3/-3) clients/include/solver/testing_getrs_batched.hpp (+3/-3) clients/include/syrkx_reference.hpp (+4/-4) clients/include/type_dispatch.hpp (+55/-58) clients/include/type_utils.h (+39/-17) clients/include/utility.h (+35/-23) clients/samples/CMakeLists.txt (+10/-11) clients/samples/example_gemm_ex.cpp (+3/-11) clients/samples/example_gemm_ex_fortran.F90 (+6/-6) clients/samples/example_hgemm.cpp (+3/-3) clients/samples/example_hgemm_hip_half.cpp (+3/-4) clients/samples/example_hip_complex_her2.cpp (+0/-1) clients/samples/example_scal_ex.cpp (+1/-6) clients/samples/example_sscal_fortran.F90 (+1/-1) cmake/dependencies.cmake (+3/-21) cmake/get-rocm-cmake.cmake (+21/-0) debian/bin/run-tests (+1/-1) debian/changelog (+60/-0) debian/control (+54/-20) debian/gbp.conf (+1/-1) debian/libhipblas3-bench.install (+1/-0) debian/libhipblas3-bench.links (+1/-0) debian/libhipblas3-tests-data.install (+1/-0) debian/libhipblas3-tests.install (+4/-0) debian/libhipblas3-tests.links (+1/-0) debian/libhipblas3.symbols (+1854/-0) debian/patches/0001-remove-immintrin-header.patch (+3/-11) debian/patches/0002-Disable-omp.patch (+11/-11) debian/patches/0003-use-generic-blas-and-lapack.patch (+6/-5) debian/patches/0004-Use-local-mathjax.patch (+1/-1) debian/patches/0005-Gtest-add-verbose-flag-to-prevent-timeout.patch (+22/-0) debian/patches/0006-drop-f16c-instructions.patch (+18/-14) debian/patches/0007-optional-git.patch (+1/-1) debian/patches/0008-fix-ambiguous-__half-constructor.patch (+1377/-0) debian/patches/0009-enable-changing-test-data-dir.patch (+53/-0) debian/patches/0010-remove-readme-from-doxygen-sources.patch (+24/-0) debian/patches/0011-fix-arm64-immintrin-include.patch (+28/-0) debian/patches/series (+10/-11) debian/rules (+19/-4) deps/requirements.txt (+1/-1) dev/null (+0/-1562) docs/conceptual/library-source-code-organization.rst (+3/-3) docs/doxygen/Doxyfile (+1/-1) docs/how-to/contributing-to-hipblas.rst (+1/-1) docs/how-to/using-hipblas-clients.rst (+2/-2) docs/index.rst (+9/-3) docs/install/Linux_Install_Guide.rst (+44/-4) docs/install/Windows_Install_Guide.rst (+35/-6) docs/install/prerequisites.rst (+3/-3) docs/reference/data-type-support.rst (+809/-0) docs/reference/deprecation.rst (+95/-89) docs/reference/hipblas-api-functions.rst (+172/-88) docs/sphinx/_toc.yml.in (+3/-1) docs/sphinx/requirements.in (+1/-1) docs/sphinx/requirements.txt (+140/-10) library/CMakeLists.txt (+2/-2) library/include/hipblas.h (+6638/-16631) library/src/CMakeLists.txt (+13/-50) library/src/amd_detail/hipblas.cpp (+6960/-27155) library/src/hipblas_module.f90 (+233/-203) library/src/nvidia_detail/hipblas.cpp (+6882/-20452) rdeps.py (+1/-1) rmake.py (+17/-13) scripts/performance/blas/datagraphs.asy (+0/-2) scripts/performance/blas/timing.py (+0/-1) scripts/performance/multiplot/README (+0/-4) scripts/performance/multiplot/blas2/gfx90a/gbmv.csv (+0/-1) scripts/performance/multiplot/blas2/gfx90a/hbmv.csv (+0/-1) scripts/performance/multiplot/blas2/gfx90a/sbmv.csv (+0/-1) scripts/performance/multiplot/blas2/gfx90a/spmv.csv (+0/-1) scripts/performance/multiplot/blas2/gfx90a/symv.csv (+0/-1) toolchain-windows.cmake (+18/-0) |
||||
| Related bugs: |
|
| Reviewer | Review Type | Date Requested | Status |
|---|---|---|---|
| Andreas Hasenack | Approve | ||
| Ubuntu Sponsors | Pending | ||
|
Review via email:
|
|||
Commit message
Description of the change
New upstream version 7.1.0
| Bruno Bernardo de Moura (bruno-bdmoura) wrote : | # |
| Igor Luppi (igorluppi) wrote : | # |
Rebasing with ubuntu/devel
| Igor Luppi (igorluppi) wrote : | # |
Rebase done. New ppa build with version 7.1.0-0ubuntu4 here: https:/
(-proposed and all archs)
| Igor Luppi (igorluppi) wrote : | # |
$ reverse-depends --arch ppc64el src:hipblas -x
Reverse-Depends
===============
* libggml-hip (for libhipblas0)
But it only actually requires: libhipblas-dev [amd64 arm64] <!pkg.ggml.nohip>,
I have done ggml builds in ppa for ppc64el without having any ppc64el rocm pkg. So safe to drop ppc64el for hipblas.
| Andreas Hasenack (ahasenack) wrote : | # |
What's this about in d/rules:
+LIB=libhipblas3
+LIBV=7.1.0
+REV=1
+gensymbols:
+ dpkg-deb -x ../$(LIB)
+ dpkg-gensymbols -v$(LIBV) -p$(LIB) -P/tmp/$(LIB) -Odebian/
You are opening a built deb (amd64 only), and then doing what?
| Igor Luppi (igorluppi) wrote : | # |
I think this is outdated, we could just use
override_
dh_makeshlibs -V
| Andreas Hasenack (ahasenack) wrote : | # |
Why even override it?
| Igor Luppi (igorluppi) wrote : | # |
Yeah, I actually deleted that temp. piece of code. Please take a look.
| Andreas Hasenack (ahasenack) wrote : | # |
Please also trigger a new build
| Igor Luppi (igorluppi) wrote : | # |
Here the recipe build with 3 child builds (amd64, amd64v3 and arm64):
https:/
| Andreas Hasenack (ahasenack) wrote : | # |
- delta: OK
- upstream changes: OK
- debian packaging changes: OK
Debian could benefit from the fixes in the smartcard tests that you made, that will allow them to re-enable those tests (currently disabled in d/t/control in debian/sid).
+1
| Andreas Hasenack (ahasenack) wrote (last edit ): | # |
Bah, that was the wrong MP, sorry. I'll put it back to "needs fixing" to reset the approve state, sorry again.
Doing too many reviews in parallel :\
| Andreas Hasenack (ahasenack) wrote : | # |
> Here the recipe build with 3 child builds (amd64, amd64v3 and arm64):
Does that have the updated d/rules?
| Igor Luppi (igorluppi) wrote : | # |
I removed the section:
+LIB=libhipblas3
+LIBV=7.1.0
+REV=1
+gensymbols:
+ dpkg-deb -x ../$(LIB)
+ dpkg-gensymbols -v$(LIBV) -p$(LIB) -P/tmp/$(LIB) -Odebian/
for those builds
| Igor Luppi (igorluppi) wrote : | # |
Here the diff log: https:/
(Removed also annoying extra whitespaces from *.links files)
| Igor Luppi (igorluppi) wrote : | # |
Builds done successfully, please take a look.
| Andreas Hasenack (ahasenack) wrote : | # |
This depends on rocsolver 7, so it will say in the build queue until that other upload finishes building.
+1
| Andreas Hasenack (ahasenack) wrote : | # |
Sponsored:
Uploading hipblas_
Uploading hipblas_
Uploading hipblas_
Uploading hipblas_
Uploading hipblas_
| Igor Luppi (igorluppi) wrote : | # |
Indeed, it ll be waiting for build-depends. Let's hope rocsolver will be published soon! Thanks a lot Andreas!!
Preview Diff
| 1 | diff --git a/.githooks/install b/.githooks/install |
| 2 | index cbb0569..685ea28 100755 |
| 3 | --- a/.githooks/install |
| 4 | +++ b/.githooks/install |
| 5 | @@ -3,6 +3,6 @@ |
| 6 | cd $(git rev-parse --git-dir) |
| 7 | cd hooks |
| 8 | |
| 9 | -echo "Installing hooks..." |
| 10 | +echo "Installing hooks..." |
| 11 | ln -s ../../.githooks/pre-commit pre-commit |
| 12 | echo "Done!" |
| 13 | diff --git a/.jenkins/codecov.groovy b/.jenkins/codecov.groovy |
| 14 | new file mode 100644 |
| 15 | index 0000000..dc11b47 |
| 16 | --- /dev/null |
| 17 | +++ b/.jenkins/codecov.groovy |
| 18 | @@ -0,0 +1,119 @@ |
| 19 | +#!/usr/bin/env groovy |
| 20 | +// This shared library is available at https://github.com/ROCmSoftwarePlatform/rocJENKINS/ |
| 21 | +@Library('rocJenkins@pong') _ |
| 22 | + |
| 23 | +// This is file for internal AMD use. |
| 24 | +// If you are interested in running your own Jenkins, please raise a github issue for assistance. |
| 25 | + |
| 26 | +import com.amd.project.* |
| 27 | +import com.amd.docker.* |
| 28 | +import java.nio.file.Path |
| 29 | + |
| 30 | +def runCI = |
| 31 | +{ |
| 32 | + nodeDetails, jobName, buildCommand, label-> |
| 33 | + |
| 34 | + def prj = new rocProject('hipBLAS', 'CodeCov') |
| 35 | + |
| 36 | + if (env.BRANCH_NAME ==~ /PR-\d+/ && pullRequest.labels.contains("noSolver")) |
| 37 | + { |
| 38 | + prj.libraryDependencies = ['hipBLAS-common', 'hipBLASLt', 'rocBLAS'] |
| 39 | + } |
| 40 | + else |
| 41 | + { |
| 42 | + prj.libraryDependencies = ['rocPRIM', 'hipBLAS-common', 'hipBLASLt', 'rocBLAS', 'rocSPARSE', 'rocSOLVER'] |
| 43 | + } |
| 44 | + |
| 45 | + if (env.BRANCH_NAME ==~ /PR-\d+/ && pullRequest.labels.contains('g++')) |
| 46 | + { |
| 47 | + buildCommand += ' --compiler=g++' |
| 48 | + } |
| 49 | + else if (env.BRANCH_NAME ==~ /PR-\d+/ && pullRequest.labels.contains('clang')) |
| 50 | + { |
| 51 | + buildCommand += ' --compiler=clang++' |
| 52 | + } |
| 53 | + else |
| 54 | + { |
| 55 | + // buildCommand += ' --compiler=amdclang++' # leave as default |
| 56 | + } |
| 57 | + |
| 58 | + //customize for project |
| 59 | + prj.paths.build_command = buildCommand |
| 60 | + |
| 61 | + // Define test architectures, optional rocm version argument is available |
| 62 | + def nodes = new dockerNodes(nodeDetails, jobName, prj) |
| 63 | + |
| 64 | + boolean formatCheck = false |
| 65 | + |
| 66 | + def commonGroovy |
| 67 | + |
| 68 | + def compileCommand = |
| 69 | + { |
| 70 | + platform, project-> |
| 71 | + |
| 72 | + commonGroovy = load "${project.paths.project_src_prefix}/.jenkins/common.groovy" |
| 73 | + commonGroovy.runCompileCommand(platform, project, jobName) |
| 74 | + } |
| 75 | + |
| 76 | + def testCommand = |
| 77 | + { |
| 78 | + platform, project-> |
| 79 | + |
| 80 | + commonGroovy.runCoverageCommand(platform, project, "release-debug") |
| 81 | + } |
| 82 | + |
| 83 | + def packageCommand = |
| 84 | + { |
| 85 | + platform, project-> |
| 86 | + |
| 87 | + commonGroovy.runPackageCommand(platform, project, jobName, label, "release-debug") |
| 88 | + } |
| 89 | + |
| 90 | + buildProject(prj, formatCheck, nodes.dockerArray, compileCommand, testCommand, null) |
| 91 | +} |
| 92 | + |
| 93 | +def setupCI(urlJobName, jobNameList, buildCommand, runCI, label) |
| 94 | +{ |
| 95 | + jobNameList = auxiliary.appendJobNameList(jobNameList) |
| 96 | + |
| 97 | + jobNameList.each |
| 98 | + { |
| 99 | + jobName, nodeDetails-> |
| 100 | + if (urlJobName == jobName) |
| 101 | + stage(label + ' ' + jobName) { |
| 102 | + runCI(nodeDetails, jobName, buildCommand, label) |
| 103 | + } |
| 104 | + } |
| 105 | + |
| 106 | + // For url job names that are not listed by the jobNameList i.e. compute-rocm-dkms-no-npi-1901 |
| 107 | + if(!jobNameList.keySet().contains(urlJobName)) |
| 108 | + { |
| 109 | + properties(auxiliary.addCommonProperties([pipelineTriggers([cron('0 1 * * *')])])) |
| 110 | + stage(label + ' ' + urlJobName) { |
| 111 | + runCI([ubuntu18:['gfx906']], urlJobName, buildCommand, label) |
| 112 | + } |
| 113 | + } |
| 114 | + |
| 115 | +} |
| 116 | + |
| 117 | +ci: { |
| 118 | + String urlJobName = auxiliary.getTopJobName(env.BUILD_URL) |
| 119 | + |
| 120 | + def propertyList = ["compute-rocm-dkms-no-npi-hipclang":[pipelineTriggers([cron('0 1 * * 0')])], |
| 121 | + "rocm-docker":[]] |
| 122 | + propertyList = auxiliary.appendPropertyList(propertyList) |
| 123 | + |
| 124 | + def jobNameList = ["compute-rocm-dkms-no-npi-hipclang":([ubuntu18:['gfx900'],centos7:['gfx906'],sles15sp1:['gfx908']]), |
| 125 | + "rocm-docker":([ubuntu18:['gfx900'],centos7:['gfx906'],sles15sp1:['gfx906']])] |
| 126 | + jobNameList = auxiliary.appendJobNameList(jobNameList) |
| 127 | + |
| 128 | + propertyList.each |
| 129 | + { |
| 130 | + jobName, property-> |
| 131 | + if (urlJobName == jobName) |
| 132 | + properties(auxiliary.addCommonProperties(property)) |
| 133 | + } |
| 134 | + |
| 135 | + String hostBuildCommand = './install.sh -k --codecoverage -c' |
| 136 | + setupCI(urlJobName, jobNameList, hostBuildCommand, runCI, 'g++') |
| 137 | +} |
| 138 | diff --git a/.jenkins/common.groovy b/.jenkins/common.groovy |
| 139 | index 987c1c4..f959eef 100644 |
| 140 | --- a/.jenkins/common.groovy |
| 141 | +++ b/.jenkins/common.groovy |
| 142 | @@ -3,8 +3,6 @@ |
| 143 | |
| 144 | def runCompileCommand(platform, project, jobName, boolean sameOrg=false) |
| 145 | { |
| 146 | - project.paths.construct_build_prefix() |
| 147 | - |
| 148 | def getDependenciesCommand = "" |
| 149 | if (project.installLibraryDependenciesFromCI) |
| 150 | { |
| 151 | @@ -33,7 +31,7 @@ def runCompileCommand(platform, project, jobName, boolean sameOrg=false) |
| 152 | cd ${project.paths.project_build_prefix} |
| 153 | ${getDependenciesCommand} |
| 154 | ${centos} |
| 155 | - LD_LIBRARY_PATH=/opt/rocm/lib ${project.paths.build_command} |
| 156 | + ${project.paths.build_command} |
| 157 | """ |
| 158 | platform.runCommand(this, command) |
| 159 | } |
| 160 | @@ -51,42 +49,60 @@ def runTestCommand (platform, project) |
| 161 | } |
| 162 | } |
| 163 | |
| 164 | - String gtestCommonEnv = "HIPBLAS_CLIENT_RAM_GB_LIMIT=95" |
| 165 | + String gtestCommonEnv = "HIPBLAS_CLIENT_RAM_GB_LIMIT=95 GTEST_LISTENER=NO_PASS_LINE_IN_LOG" |
| 166 | + |
| 167 | def command = """#!/usr/bin/env bash |
| 168 | set -x |
| 169 | - cd ${stagingDir} |
| 170 | - ${sudo} LD_LIBRARY_PATH=/opt/rocm/lib ${gtestCommonEnv} GTEST_LISTENER=NO_PASS_LINE_IN_LOG ./hipblas-test --gtest_output=xml --gtest_color=yes |
| 171 | + pushd ${stagingDir} |
| 172 | + ${gtestCommonEnv} ./hipblas-test --gtest_output=xml --gtest_color=yes |
| 173 | + popd |
| 174 | """ |
| 175 | |
| 176 | platform.runCommand(this, command) |
| 177 | |
| 178 | - // In an upcoming release, we are replacing hipblasDatatype_t with hipDataType. We have created hipblas_v2-test to test the new |
| 179 | - // interfaces while hipblasDatatype_t is deprecated. Thus, hipblas-test will be testing the old, deprecated, functions |
| 180 | - // using hipblasDatatype_t, and hipblas_v2-test will be testing the upcoming interfaces. |
| 181 | - def v2TestCommand = """#!/usr/bin/env bash |
| 182 | - set -x |
| 183 | - cd ${stagingDir} |
| 184 | - ${sudo} LD_LIBRARY_PATH=/opt/rocm/lib ${gtestCommonEnv} GTEST_LISTENER=NO_PASS_LINE_IN_LOG ./hipblas_v2-test --gtest_output=xml --gtest_color=yes |
| 185 | - """ |
| 186 | - |
| 187 | - platform.runCommand(this, v2TestCommand) |
| 188 | - |
| 189 | def yamlTestCommand = """#!/usr/bin/env bash |
| 190 | set -x |
| 191 | - cd ${stagingDir} |
| 192 | - ${sudo} LD_LIBRARY_PATH=/opt/rocm/lib ${gtestCommonEnv} GTEST_LISTENER=NO_PASS_LINE_IN_LOG ./hipblas-test --gtest_output=xml --gtest_color=yes --yaml hipblas_smoke.yaml |
| 193 | + pushd ${stagingDir} |
| 194 | + ${gtestCommonEnv} ./hipblas-test --gtest_output=xml --gtest_color=yes --yaml hipblas_smoke.yaml |
| 195 | + popd |
| 196 | """ |
| 197 | platform.runCommand(this, yamlTestCommand) |
| 198 | - junit "${stagingDir}/*.xml" |
| 199 | } |
| 200 | |
| 201 | -def runPackageCommand(platform, project, jobName, label='') |
| 202 | +def runCoverageCommand (platform, project, String cmdDir = "release-debug") |
| 203 | +{ |
| 204 | + //Temporary workaround due to bug in container |
| 205 | + String centos7Workaround = platform.jenkinsLabel.contains('centos7') ? 'export LD_LIBRARY_PATH=\$LD_LIBRARY_PATH:/opt/rocm/lib64/' : '' |
| 206 | + |
| 207 | + String gtestCommonEnv = "HIPBLAS_CLIENT_RAM_GB_LIMIT=95 GTEST_LISTENER=NO_PASS_LINE_IN_LOG" |
| 208 | + |
| 209 | + def command = """#!/usr/bin/env bash |
| 210 | + set -x |
| 211 | + cd ${project.paths.project_build_prefix}/build/${cmdDir} |
| 212 | + export LD_LIBRARY_PATH=/opt/rocm/lib/ |
| 213 | + ${centos7Workaround} |
| 214 | + ${gtestCommonEnv} make coverage_cleanup coverage GTEST_FILTER=-*known_bug* |
| 215 | + """ |
| 216 | + |
| 217 | + platform.runCommand(this, command) |
| 218 | + |
| 219 | + publishHTML([allowMissing: false, |
| 220 | + alwaysLinkToLastBuild: false, |
| 221 | + keepAll: false, |
| 222 | + reportDir: "${project.paths.project_build_prefix}/build/${cmdDir}/coverage-report", |
| 223 | + reportFiles: "index.html", |
| 224 | + reportName: "Code coverage report", |
| 225 | + reportTitles: "Code coverage report"]) |
| 226 | +} |
| 227 | + |
| 228 | +def runPackageCommand(platform, project, jobName, label='', buildDir='') |
| 229 | { |
| 230 | def command |
| 231 | |
| 232 | label = label != '' ? '-' + label.toLowerCase() : '' |
| 233 | String ext = platform.jenkinsLabel.contains('ubuntu') ? "deb" : "rpm" |
| 234 | String dir = jobName.contains('Debug') ? "debug" : "release" |
| 235 | + |
| 236 | if (env.BRANCH_NAME ==~ /PR-\d+/) |
| 237 | { |
| 238 | if (pullRequest.labels.contains("debug")) |
| 239 | @@ -94,6 +110,10 @@ def runPackageCommand(platform, project, jobName, label='') |
| 240 | dir = "debug" |
| 241 | } |
| 242 | } |
| 243 | + if (buildDir != '') |
| 244 | + { |
| 245 | + dir = buildDir |
| 246 | + } |
| 247 | |
| 248 | command = """ |
| 249 | set -x |
| 250 | diff --git a/.jenkins/precheckin-cuda.groovy b/.jenkins/precheckin-cuda.groovy |
| 251 | index 771c0a0..8378953 100644 |
| 252 | --- a/.jenkins/precheckin-cuda.groovy |
| 253 | +++ b/.jenkins/precheckin-cuda.groovy |
| 254 | @@ -58,18 +58,14 @@ def setupCI(urlJobName, jobNameList, buildCommand, runCI, label) |
| 255 | { |
| 256 | jobName, nodeDetails-> |
| 257 | if (urlJobName == jobName) |
| 258 | - stage(label + ' ' + jobName) { |
| 259 | - runCI(nodeDetails, jobName, buildCommand, label) |
| 260 | - } |
| 261 | + runCI(nodeDetails, jobName, buildCommand, label) |
| 262 | } |
| 263 | |
| 264 | // For url job names that are not listed by the jobNameList i.e. compute-rocm-dkms-no-npi-1901 |
| 265 | if(!jobNameList.keySet().contains(urlJobName)) |
| 266 | { |
| 267 | properties(auxiliary.addCommonProperties([pipelineTriggers([cron('0 1 * * *')])])) |
| 268 | - stage(label + ' ' + urlJobName) { |
| 269 | - runCI(['ubuntu20-cuda11':['anycuda']], urlJobName, buildCommand, label) |
| 270 | - } |
| 271 | + runCI(['ubuntu20-cuda11':['anycuda']], urlJobName, buildCommand, label) |
| 272 | } |
| 273 | |
| 274 | } |
| 275 | diff --git a/.jenkins/precheckin.groovy b/.jenkins/precheckin.groovy |
| 276 | index 06aa474..8f5362f 100644 |
| 277 | --- a/.jenkins/precheckin.groovy |
| 278 | +++ b/.jenkins/precheckin.groovy |
| 279 | @@ -80,18 +80,14 @@ def setupCI(urlJobName, jobNameList, buildCommand, runCI, label) |
| 280 | { |
| 281 | jobName, nodeDetails-> |
| 282 | if (urlJobName == jobName) |
| 283 | - stage(label + ' ' + jobName) { |
| 284 | - runCI(nodeDetails, jobName, buildCommand, label) |
| 285 | - } |
| 286 | + runCI(nodeDetails, jobName, buildCommand, label) |
| 287 | } |
| 288 | |
| 289 | // For url job names that are not listed by the jobNameList i.e. compute-rocm-dkms-no-npi-1901 |
| 290 | if(!jobNameList.keySet().contains(urlJobName)) |
| 291 | { |
| 292 | properties(auxiliary.addCommonProperties([pipelineTriggers([cron('0 1 * * *')])])) |
| 293 | - stage(label + ' ' + urlJobName) { |
| 294 | - runCI([ubuntu18:['gfx906']], urlJobName, buildCommand, label) |
| 295 | - } |
| 296 | + runCI([ubuntu18:['gfx906']], urlJobName, buildCommand, label) |
| 297 | } |
| 298 | |
| 299 | } |
| 300 | diff --git a/.jenkins/static.groovy b/.jenkins/static.groovy |
| 301 | index e7899e8..11e0e03 100644 |
| 302 | --- a/.jenkins/static.groovy |
| 303 | +++ b/.jenkins/static.groovy |
| 304 | @@ -18,10 +18,10 @@ def runCI = |
| 305 | |
| 306 | if (env.BRANCH_NAME ==~ /PR-\d+/ && pullRequest.labels.contains("noSolver")) |
| 307 | { |
| 308 | - prj.libraryDependencies = ['hipBLAS-common', 'hipBLASLt', 'rocBLAS'] } |
| 309 | + prj.libraryDependencies = ['hipBLAS-common', 'rocBLAS'] } |
| 310 | else |
| 311 | { |
| 312 | - prj.libraryDependencies = ['rocPRIM', 'hipBLAS-common', 'hipBLASLt', 'rocBLAS', 'rocSPARSE', 'rocSOLVER'] |
| 313 | + prj.libraryDependencies = ['rocPRIM', 'hipBLAS-common', 'rocBLAS', 'rocSPARSE', 'rocSOLVER'] |
| 314 | } |
| 315 | |
| 316 | // Define test architectures, optional rocm version argument is available |
| 317 | @@ -76,17 +76,13 @@ ci: { |
| 318 | { |
| 319 | jobName, nodeDetails-> |
| 320 | if (urlJobName == jobName) |
| 321 | - stage(jobName) { |
| 322 | - runCI(nodeDetails, jobName) |
| 323 | - } |
| 324 | + runCI(nodeDetails, jobName) |
| 325 | } |
| 326 | |
| 327 | // For url job names that are not listed by the jobNameList i.e. compute-rocm-dkms-no-npi-1901 |
| 328 | if(!jobNameList.keySet().contains(urlJobName)) |
| 329 | { |
| 330 | properties(auxiliary.addCommonProperties([pipelineTriggers([cron('0 1 * * *')])])) |
| 331 | - stage(urlJobName) { |
| 332 | - runCI([ubuntu18:['gfx906']], urlJobName) |
| 333 | - } |
| 334 | + runCI([ubuntu18:['gfx906']], urlJobName) |
| 335 | } |
| 336 | } |
| 337 | diff --git a/.jenkins/staticanalysis.groovy b/.jenkins/staticanalysis.groovy |
| 338 | index 096e8c5..36d76d6 100644 |
| 339 | --- a/.jenkins/staticanalysis.groovy |
| 340 | +++ b/.jenkins/staticanalysis.groovy |
| 341 | @@ -45,8 +45,6 @@ ci: { |
| 342 | { |
| 343 | jobName, nodeDetails-> |
| 344 | if (urlJobName == jobName) |
| 345 | - stage(jobName) { |
| 346 | - runCI(nodeDetails, jobName) |
| 347 | - } |
| 348 | + runCI(nodeDetails, jobName) |
| 349 | } |
| 350 | } |
| 351 | diff --git a/CHANGELOG.md b/CHANGELOG.md |
| 352 | index 5e8395f..4501829 100644 |
| 353 | --- a/CHANGELOG.md |
| 354 | +++ b/CHANGELOG.md |
| 355 | @@ -3,7 +3,44 @@ |
| 356 | Documentation for hipBLAS is available at |
| 357 | [https://rocm.docs.amd.com/projects/hipBLAS/en/latest/](https://rocm.docs.amd.com/projects/hipBLAS/en/latest/). |
| 358 | |
| 359 | -## hipBLAS 2.4.0 for ROCm 6.4.0 |
| 360 | +## hipBLAS 3.1.0 for ROCm 7.1 |
| 361 | + |
| 362 | +### Added |
| 363 | + |
| 364 | +* `--clients-only` build option to only build clients against a prebuilt library. |
| 365 | +* gfx1103, gfx1150, gfx1151, gfx1200, and gfx1201 support to clients. |
| 366 | +* FORTRAN enabled for the Microsoft Windows build and tests. |
| 367 | +* Additional reference library fallback options added. |
| 368 | + |
| 369 | +### Changed |
| 370 | + |
| 371 | +* Improve the build time for clients by removing `clients_common.cpp` from the hipblas-test build. |
| 372 | + |
| 373 | +## hipBLAS 3.0.0 for ROCm 7.0 |
| 374 | + |
| 375 | +### Added |
| 376 | + |
| 377 | +* Added the `hipblasSetWorkspace()` API |
| 378 | +* Support for codecoverage tests |
| 379 | + |
| 380 | +### Changed |
| 381 | + |
| 382 | +* HIPBLAS_V2 API is now the only available API using `hipComplex` and `hipDatatype` types |
| 383 | +* Documentation updates |
| 384 | +* Verbose compilation for `hipblas.cpp` |
| 385 | + |
| 386 | +### Removed |
| 387 | + |
| 388 | +* `hipblasDatatype_t` type |
| 389 | +* `hipComplex` and `hipDoubleComplex` types |
| 390 | +* Support code for non-production gfx targets |
| 391 | + |
| 392 | +### Resolved issues |
| 393 | + |
| 394 | +* The build time `CMake` configuration for the dependency on `hipBLAS-common` is fixed |
| 395 | +* Compiler warnings for unhandled enums have been resolved |
| 396 | + |
| 397 | +## hipBLAS 2.4.0 for ROCm 6.4.0 |
| 398 | |
| 399 | ### Changed |
| 400 | |
| 401 | diff --git a/CMakeLists.txt b/CMakeLists.txt |
| 402 | index c63dfe1..6b3d0b0 100644 |
| 403 | --- a/CMakeLists.txt |
| 404 | +++ b/CMakeLists.txt |
| 405 | @@ -1,5 +1,5 @@ |
| 406 | # ######################################################################## |
| 407 | -# Copyright (C) 2016-2024 Advanced Micro Devices, Inc. All rights reserved. |
| 408 | +# Copyright (C) 2016-2025 Advanced Micro Devices, Inc. All rights reserved. |
| 409 | # |
| 410 | # Permission is hereby granted, free of charge, to any person obtaining a copy |
| 411 | # of this software and associated documentation files (the "Software"), to deal |
| 412 | @@ -42,12 +42,11 @@ if( NOT DEFINED CMAKE_CONFIGURATION_TYPES AND NOT DEFINED CMAKE_BUILD_TYPE ) |
| 413 | set( CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel." ) |
| 414 | endif() |
| 415 | |
| 416 | -if (NOT WIN32) |
| 417 | - if ( NOT DEFINED CMAKE_Fortran_COMPILER AND NOT DEFINED ENV{FC} ) |
| 418 | - set( CMAKE_Fortran_COMPILER "gfortran" ) |
| 419 | - endif() |
| 420 | - set( fortran_language "Fortran" ) |
| 421 | -endif( ) |
| 422 | +if ( NOT DEFINED CMAKE_Fortran_COMPILER AND NOT DEFINED ENV{FC} ) |
| 423 | + set( CMAKE_Fortran_COMPILER "gfortran" ) |
| 424 | +endif() |
| 425 | + |
| 426 | +set( fortran_language "Fortran" ) |
| 427 | |
| 428 | project( hipblas LANGUAGES CXX ${fortran_language} ) |
| 429 | |
| 430 | @@ -66,7 +65,7 @@ list( APPEND CMAKE_PREFIX_PATH ${ROCM_PATH}/lib/cmake/hip /opt/rocm /opt/rocm/ll |
| 431 | # rocm-cmake contains common cmake code for rocm projects to help setup and install |
| 432 | include(dependencies) |
| 433 | |
| 434 | -set ( VERSION_STRING "2.4.0" ) |
| 435 | +set ( VERSION_STRING "3.1.0" ) |
| 436 | rocm_setup_version( VERSION ${VERSION_STRING} ) |
| 437 | |
| 438 | option( BUILD_VERBOSE "Output additional build information" OFF ) |
| 439 | @@ -114,10 +113,6 @@ if(HIP_PLATFORM STREQUAL nvidia) |
| 440 | endif() |
| 441 | |
| 442 | option(BUILD_CODE_COVERAGE "Build with code coverage enabled" OFF) |
| 443 | -if(BUILD_CODE_COVERAGE) |
| 444 | - add_compile_options(-fprofile-arcs -ftest-coverage) |
| 445 | - add_link_options(--coverage) |
| 446 | -endif() |
| 447 | |
| 448 | option(BUILD_ADDRESS_SANITIZER "Build with address sanitizer enabled" OFF) |
| 449 | if(BUILD_ADDRESS_SANITIZER) |
| 450 | @@ -125,20 +120,10 @@ if(BUILD_ADDRESS_SANITIZER) |
| 451 | set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=address -shared-libasan") |
| 452 | endif() |
| 453 | |
| 454 | - |
| 455 | -# FOR HANDLING ENABLE/DISABLE OPTIONAL BACKWARD COMPATIBILITY for FILE/FOLDER REORG |
| 456 | -option(BUILD_FILE_REORG_BACKWARD_COMPATIBILITY "Build with file/folder reorg with backward compatibility enabled" OFF) |
| 457 | -if(BUILD_FILE_REORG_BACKWARD_COMPATIBILITY AND NOT WIN32) |
| 458 | - rocm_wrap_header_dir( |
| 459 | - ${CMAKE_SOURCE_DIR}/library/include |
| 460 | - PATTERNS "*.h" |
| 461 | - GUARDS SYMLINK WRAPPER |
| 462 | - WRAPPER_LOCATIONS ${CMAKE_INSTALL_INCLUDEDIR} |
| 463 | - ) |
| 464 | +if(NOT SKIP_LIBRARY) |
| 465 | + add_subdirectory(library) |
| 466 | endif() |
| 467 | |
| 468 | -add_subdirectory( library ) |
| 469 | - |
| 470 | include( clients/cmake/build-options.cmake ) |
| 471 | |
| 472 | # Build clients of the library |
| 473 | @@ -151,7 +136,7 @@ if( BUILD_CLIENTS_SAMPLES OR BUILD_CLIENTS_TESTS OR BUILD_CLIENTS_BENCHMARKS ) |
| 474 | message(STATUS "OS: ${CLIENTS_OS} ${CLIENTS_OS_VERSION}") |
| 475 | set(GFORTRAN_RPM "libgfortran4") |
| 476 | set(GFORTRAN_DEB "libgfortran4") |
| 477 | - if(CLIENTS_OS STREQUAL "centos" OR CLIENTS_OS STREQUAL "rhel") |
| 478 | + if(CLIENTS_OS STREQUAL "centos" OR CLIENTS_OS STREQUAL "rhel" OR CLIENTS_OS STREQUAL "almalinux") |
| 479 | if(CLIENTS_OS_VERSION VERSION_GREATER_EQUAL "8") |
| 480 | set(GFORTRAN_RPM "libgfortran") |
| 481 | endif() |
| 482 | @@ -204,22 +189,30 @@ endif( ) |
| 483 | |
| 484 | # Package specific CPACK vars |
| 485 | if(HIP_PLATFORM STREQUAL amd) |
| 486 | - set(rocblas_minimum 4.4.0) |
| 487 | - set(rocsolver_minimum 3.28.0) |
| 488 | + set(rocblas_minimum 5.1.0) |
| 489 | + set(rocsolver_minimum 3.31.0) |
| 490 | rocm_package_add_dependencies(SHARED_DEPENDS "rocblas >= ${rocblas_minimum}" "rocsolver >= ${rocsolver_minimum}") |
| 491 | rocm_package_add_rpm_dependencies(STATIC_DEPENDS "rocblas-static-devel >= ${rocblas_minimum}" "rocsolver-static-devel >= ${rocsolver_minimum}") |
| 492 | rocm_package_add_deb_dependencies(STATIC_DEPENDS "rocblas-static-dev >= ${rocblas_minimum}" "rocsolver-static-dev >= ${rocsolver_minimum}") |
| 493 | endif( ) |
| 494 | |
| 495 | -set(hipblas_common_minimum 1.0.0) |
| 496 | -rocm_package_add_deb_dependencies(COMPONENT devel DEPENDS "hipblas-common-dev >= ${hipblas_common_minimum}") |
| 497 | -rocm_package_add_rpm_dependencies(COMPONENT devel DEPENDS "hipblas-common-devel >= ${hipblas_common_minimum}") |
| 498 | +set(hipblas_common_minimum 1.3.0) |
| 499 | + |
| 500 | +if(BUILD_SHARED_LIBS) |
| 501 | + rocm_package_add_deb_dependencies(COMPONENT devel DEPENDS "hipblas-common-dev >= ${hipblas_common_minimum}") |
| 502 | + rocm_package_add_rpm_dependencies(COMPONENT devel DEPENDS "hipblas-common-devel >= ${hipblas_common_minimum}") |
| 503 | +else() |
| 504 | + rocm_package_add_deb_dependencies(COMPONENT devel DEPENDS "hipblas-common-static-dev >= ${hipblas_common_minimum}") |
| 505 | + rocm_package_add_rpm_dependencies(COMPONENT devel DEPENDS "hipblas-common-static-devel >= ${hipblas_common_minimum}") |
| 506 | +endif() |
| 507 | |
| 508 | set( CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE.md" ) |
| 509 | set( CPACK_RPM_PACKAGE_LICENSE "MIT") |
| 510 | |
| 511 | if (WIN32) |
| 512 | - SET( CMAKE_INSTALL_PREFIX "C:/hipSDK" CACHE PATH "Install path" FORCE ) |
| 513 | + if( CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT ) |
| 514 | + SET( CMAKE_INSTALL_PREFIX "C:/hipSDK" CACHE PATH "Install path" FORCE ) |
| 515 | + endif() |
| 516 | SET( INSTALL_PREFIX "C:/hipSDK" ) |
| 517 | SET( CPACK_SET_DESTDIR FALSE ) |
| 518 | SET( CPACK_PACKAGE_INSTALL_DIRECTORY "C:/hipSDK" ) |
| 519 | @@ -273,35 +266,37 @@ if(BUILD_CODE_COVERAGE) |
| 520 | |
| 521 | add_custom_target(coverage_analysis |
| 522 | COMMAND echo Coverage GTEST_FILTER=\${GTEST_FILTER} |
| 523 | - COMMAND ${coverage_test} --gtest_filter=\"\${GTEST_FILTER}\" |
| 524 | + COMMAND ${CMAKE_COMMAND} -E make_directory ./coverage/profraw |
| 525 | + COMMAND ${CMAKE_COMMAND} -E env LLVM_PROFILE_FILE="./coverage-report/profraw/hipblas-coverage_%m.profraw" ${coverage_test} --gtest_filter=\"\${GTEST_FILTER}\" |
| 526 | WORKING_DIRECTORY ${CMAKE_BINARY_DIR} |
| 527 | ) |
| 528 | |
| 529 | add_dependencies(coverage_analysis hipblas) |
| 530 | |
| 531 | - # |
| 532 | - # Prepare coverage output |
| 533 | - # This little script is generated because the option '--gcov-tool <program name>' of lcov cannot take arguments. |
| 534 | - # |
| 535 | - add_custom_target(coverage_output |
| 536 | - DEPENDS coverage_analysis |
| 537 | - COMMAND mkdir -p lcoverage |
| 538 | - COMMAND echo "\\#!/bin/bash" > llvm-gcov.sh |
| 539 | - COMMAND echo "\\# THIS FILE HAS BEEN GENERATED" >> llvm-gcov.sh |
| 540 | - COMMAND printf "exec /opt/rocm/llvm/bin/llvm-cov gcov $$\\@" >> llvm-gcov.sh |
| 541 | - COMMAND chmod +x llvm-gcov.sh |
| 542 | - ) |
| 543 | + find_program( |
| 544 | + LLVM_PROFDATA |
| 545 | + llvm-profdata |
| 546 | + REQUIRED |
| 547 | + HINTS ${ROCM_PATH}/llvm/bin |
| 548 | + PATHS /opt/rocm/llvm/bin |
| 549 | + ) |
| 550 | |
| 551 | - # |
| 552 | - # Generate coverage output. |
| 553 | - # |
| 554 | - add_custom_command(TARGET coverage_output |
| 555 | - COMMAND lcov --directory . --base-directory . --gcov-tool ${CMAKE_BINARY_DIR}/llvm-gcov.sh --capture -o lcoverage/raw_main_coverage.info |
| 556 | - COMMAND lcov --remove lcoverage/raw_main_coverage.info "'/opt/*'" "'/usr/*'" -o lcoverage/main_coverage.info |
| 557 | - COMMAND genhtml lcoverage/main_coverage.info --output-directory lcoverage |
| 558 | - ) |
| 559 | + find_program( |
| 560 | + LLVM_COV |
| 561 | + llvm-cov |
| 562 | + REQUIRED |
| 563 | + HINTS ${ROCM_PATH}/llvm/bin |
| 564 | + PATHS /opt/rocm/llvm/bin |
| 565 | + ) |
| 566 | |
| 567 | - add_custom_target(coverage DEPENDS coverage_output) |
| 568 | + add_custom_target( |
| 569 | + coverage |
| 570 | + DEPENDS coverage_analysis |
| 571 | + COMMAND ${LLVM_PROFDATA} merge -sparse ./coverage-report/profraw/hipblas-coverage_*.profraw -o ./coverage-report/hipblas.profdata |
| 572 | + COMMAND ${LLVM_COV} report -object ./library/src/libhipblas.so -instr-profile=./coverage-report/hipblas.profdata |
| 573 | + COMMAND ${LLVM_COV} show -object ./library/src/libhipblas.so -instr-profile=./coverage-report/hipblas.profdata -format=html -output-dir=coverage-report |
| 574 | + WORKING_DIRECTORY ${CMAKE_BINARY_DIR} |
| 575 | + ) |
| 576 | |
| 577 | # |
| 578 | # Coverage cleanup |
| 579 | diff --git a/LICENSE.md b/LICENSE.md |
| 580 | index 588320c..1461bfb 100644 |
| 581 | --- a/LICENSE.md |
| 582 | +++ b/LICENSE.md |
| 583 | @@ -1,12 +1,26 @@ |
| 584 | MIT License |
| 585 | |
| 586 | -Copyright (C) 2017-2025 Advanced Micro Devices, Inc. All rights reserved. |
| 587 | +Copyright (C) Advanced Micro Devices, Inc. |
| 588 | |
| 589 | -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: |
| 590 | +Permission is hereby granted, free of charge, to any person obtaining a copy |
| 591 | +of this software and associated documentation files (the "Software"), to deal |
| 592 | +in the Software without restriction, including without limitation the rights |
| 593 | +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
| 594 | +copies of the Software, and to permit persons to whom the Software is |
| 595 | +furnished to do so, subject to the following conditions: |
| 596 | |
| 597 | -The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. |
| 598 | +The above copyright notice and this permission notice shall be included in all |
| 599 | +copies or substantial portions of the Software. |
| 600 | |
| 601 | -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
| 602 | +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| 603 | +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| 604 | +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
| 605 | +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| 606 | +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| 607 | +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
| 608 | +SOFTWARE. |
| 609 | + |
| 610 | +--- |
| 611 | |
| 612 | This product includes software from copyright holders as shown below, and distributed under their license terms as specified. |
| 613 | |
| 614 | diff --git a/README.md b/README.md |
| 615 | index bce3811..5990bae 100644 |
| 616 | --- a/README.md |
| 617 | +++ b/README.md |
| 618 | @@ -33,22 +33,43 @@ cmake -DBUILD_DOCS=ON ... |
| 619 | |
| 620 | ## Build and install |
| 621 | |
| 622 | -1. Download the hipBLAS source code (clone this repository): |
| 623 | +1. Checkout the hipBLAS code using either a sparse checkout or a full clone of the rocm-libraries repository. |
| 624 | + |
| 625 | + To limit your local checkout to only the hipBLAS project, configure ``sparse-checkout`` before cloning. |
| 626 | + This uses the Git partial clone feature (``--filter=blob:none``) to reduce how much data is downloaded. |
| 627 | + Use the following commands for a sparse checkout: |
| 628 | + |
| 629 | + ```bash |
| 630 | + |
| 631 | + git clone --no-checkout --filter=blob:none https://github.com/ROCm/rocm-libraries.git |
| 632 | + cd rocm-libraries |
| 633 | + git sparse-checkout init --cone |
| 634 | + git sparse-checkout set projects/hipblas # add projects/rocsolver projects/rocblas projects/hipblas-common to include dependencies |
| 635 | + git checkout develop # or use the branch you want to work with |
| 636 | + ``` |
| 637 | + |
| 638 | + To clone the entire rocm-libraries repository, use the following commands. This process takes more time, |
| 639 | + but is recommended if you want to work with a large number of libraries. |
| 640 | |
| 641 | ```bash |
| 642 | - git clone https://github.com/ROCmSoftwarePlatform/hipBLAS.git |
| 643 | + |
| 644 | + # Clone rocm-libraries, including hipBLAS, using Git |
| 645 | + git clone https://github.com/ROCm/rocm-libraries.git |
| 646 | + |
| 647 | + # Go to hipBLAS directory |
| 648 | + cd rocm-libraries/projects/hipblas |
| 649 | ``` |
| 650 | |
| 651 | ```note |
| 652 | hipBLAS requires specific versions of rocBLAS and rocSOLVER. Refer to |
| 653 | - [CMakeLists.txt](https://github.com/ROCmSoftwarePlatform/hipBLAS/blob/develop/library/CMakeLists.txt) |
| 654 | + [CMakeLists.txt](https://github.com/ROCm/rocm-libraries/blob/develop/projects/hipblas/library/CMakeLists.txt) |
| 655 | for details. |
| 656 | ``` |
| 657 | |
| 658 | -2. Build hipBLAS and install it into `/opt/rocm/hipblas`: |
| 659 | +2. Build hipBLAS using the `install.sh` script and install it into `/opt/rocm/hipblas`: |
| 660 | |
| 661 | ```bash |
| 662 | - cd hipblas |
| 663 | + cd rocm-libraries/projects/hipblas |
| 664 | ./install.sh -i |
| 665 | ``` |
| 666 | |
| 667 | diff --git a/bump_develop_version.sh b/bump_develop_version.sh |
| 668 | index cdfcdad..b83ca98 100755 |
| 669 | --- a/bump_develop_version.sh |
| 670 | +++ b/bump_develop_version.sh |
| 671 | @@ -2,19 +2,23 @@ |
| 672 | |
| 673 | # For the develop branch, bump hipblas version and rocblas/rocsolver dependency versions |
| 674 | |
| 675 | -OLD_HIPBLAS_VERSION="2.3.0" |
| 676 | -NEW_HIPBLAS_VERSION="2.4.0" |
| 677 | +OLD_HIPBLAS_VERSION="3.0.0" |
| 678 | +NEW_HIPBLAS_VERSION="3.1.0" |
| 679 | |
| 680 | -OLD_MINIMUM_ROCBLAS_VERSION="4.3.0" |
| 681 | -NEW_MINIMUM_ROCBLAS_VERSION="4.4.0" |
| 682 | +OLD_MINIMUM_ROCBLAS_VERSION="5.0.0" |
| 683 | +NEW_MINIMUM_ROCBLAS_VERSION="5.1.0" |
| 684 | |
| 685 | -OLD_MINIMUM_ROCSOLVER_VERSION="3.27.0" |
| 686 | -NEW_MINIMUM_ROCSOLVER_VERSION="3.28.0" |
| 687 | +OLD_MINIMUM_ROCSOLVER_VERSION="3.30.0" |
| 688 | +NEW_MINIMUM_ROCSOLVER_VERSION="3.31.0" |
| 689 | |
| 690 | -OLD_SO_VERSION="hipblas_SOVERSION 2.3" |
| 691 | -NEW_SO_VERSION="hipblas_SOVERSION 2.4" |
| 692 | +OLD_MINIMUM_HIPBLAS_COMMON_VERSION="1.1.0" |
| 693 | +NEW_MINIMUM_HIPBLAS_COMMON_VERSION="1.3.0" |
| 694 | + |
| 695 | +OLD_SO_VERSION="hipblas_SOVERSION 3.0" |
| 696 | +NEW_SO_VERSION="hipblas_SOVERSION 3.1" |
| 697 | |
| 698 | sed -i "s/${OLD_HIPBLAS_VERSION}/${NEW_HIPBLAS_VERSION}/g" CMakeLists.txt |
| 699 | sed -i "s/${OLD_MINIMUM_ROCBLAS_VERSION}/${NEW_MINIMUM_ROCBLAS_VERSION}/g" CMakeLists.txt |
| 700 | sed -i "s/${OLD_MINIMUM_ROCSOLVER_VERSION}/${NEW_MINIMUM_ROCSOLVER_VERSION}/g" CMakeLists.txt |
| 701 | +sed -i "s/${OLD_MINIMUM_HIPBLAS_COMMON_VERSION}/${NEW_MINIMUM_HIPBLAS_COMMON_VERSION}/g" CMakeLists.txt |
| 702 | sed -i "s/${OLD_SO_VERSION}/${NEW_SO_VERSION}/g" library/CMakeLists.txt |
| 703 | diff --git a/clients/CMakeLists.txt b/clients/CMakeLists.txt |
| 704 | index 7cad5e1..f67828e 100644 |
| 705 | --- a/clients/CMakeLists.txt |
| 706 | +++ b/clients/CMakeLists.txt |
| 707 | @@ -1,5 +1,5 @@ |
| 708 | # ######################################################################## |
| 709 | -# Copyright (C) 2016-2024 Advanced Micro Devices, Inc. All rights reserved. |
| 710 | +# Copyright (C) 2016-2025 Advanced Micro Devices, Inc. All rights reserved. |
| 711 | # |
| 712 | # Permission is hereby granted, free of charge, to any person obtaining a copy |
| 713 | # of this software and associated documentation files (the "Software"), to deal |
| 714 | @@ -28,8 +28,8 @@ function( get_lapack lapack_lib lapack_inc ) |
| 715 | set( inc "${BUILD_DIR}/deps/deps-install/include" ) |
| 716 | set( ${cblas_inc} ${inc} PARENT_SCOPE ) |
| 717 | else() |
| 718 | - find_package( lapack REQUIRED CONFIG ) |
| 719 | - set( lib "lapack" ) |
| 720 | + find_package( LAPACK REQUIRED ) |
| 721 | + set( lib "${LAPACK_LIBRARIES}" ) |
| 722 | endif() |
| 723 | set( ${lapack_lib} ${lib} PARENT_SCOPE ) |
| 724 | endfunction( ) |
| 725 | @@ -52,12 +52,31 @@ function( get_cblas cblas_libs cblas_inc ) |
| 726 | set( ${cblas_inc} ${inc} PARENT_SCOPE ) |
| 727 | else() |
| 728 | find_package( cblas REQUIRED CONFIG ) |
| 729 | - set( libs cblas blas ) |
| 730 | + set( libs ${CBLAS_LIBRARIES} ) |
| 731 | endif() |
| 732 | endif() |
| 733 | set( ${cblas_libs} ${libs} PARENT_SCOPE ) |
| 734 | endfunction( ) |
| 735 | |
| 736 | +function( apply_omp_settings lib_target_ ) |
| 737 | + if (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND TARGET OpenMP::OpenMP_CXX) |
| 738 | + set_target_properties( ${lib_target_} PROPERTIES |
| 739 | + BUILD_RPATH "${HIP_CLANG_ROOT}/lib" |
| 740 | + ) |
| 741 | + set_target_properties( ${lib_target_} PROPERTIES |
| 742 | + INSTALL_RPATH "$ORIGIN/../llvm/lib" |
| 743 | + ) |
| 744 | + elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND TARGET OpenMP::omp) |
| 745 | + set_target_properties( ${lib_target_} PROPERTIES |
| 746 | + BUILD_RPATH "${HIP_CLANG_ROOT}/${openmp_LIB_DIR}" |
| 747 | + ) |
| 748 | + set_target_properties( ${lib_target_} PROPERTIES |
| 749 | + INSTALL_RPATH "$ORIGIN/../llvm/${openmp_LIB_DIR}" |
| 750 | + ) |
| 751 | + endif() |
| 752 | +endfunction() |
| 753 | + |
| 754 | + |
| 755 | # Consider removing this in the future |
| 756 | # This should appear before the project command, because it does not use FORCE |
| 757 | if( WIN32 ) |
| 758 | @@ -90,19 +109,34 @@ if( NOT WIN32 ) |
| 759 | set(hipblas_f90_source_clients_solver |
| 760 | include/hipblas_fortran_module.f90 |
| 761 | ) |
| 762 | + |
| 763 | + set (hipblas_f90_source |
| 764 | + ../library/src/hipblas_module.f90 |
| 765 | + ) |
| 766 | endif() |
| 767 | |
| 768 | if( BUILD_CLIENTS_TESTS OR BUILD_CLIENTS_BENCHMARKS OR BUILD_CLIENTS_SAMPLES ) |
| 769 | + # Create hipBLAS Fortran module |
| 770 | + if(NOT WIN32) |
| 771 | + # Set Fortran module output directory |
| 772 | + set(CMAKE_Fortran_MODULE_DIRECTORY ${PROJECT_BINARY_DIR}/include/hipblas) |
| 773 | + add_library(hipblas_fortran OBJECT ${hipblas_f90_source}) |
| 774 | + endif() |
| 775 | + |
| 776 | if( NOT WIN32 ) |
| 777 | if( BUILD_WITH_SOLVER ) |
| 778 | - add_library(hipblas_fortran_client STATIC ${hipblas_f90_source_clients_solver}) |
| 779 | + add_library(hipblas_fortran_client STATIC ${hipblas_f90_source_clients_solver} $<TARGET_OBJECTS:hipblas_fortran>) |
| 780 | else() |
| 781 | - add_library(hipblas_fortran_client STATIC ${hipblas_f90_source_clients_no_solver}) |
| 782 | + add_library(hipblas_fortran_client STATIC ${hipblas_f90_source_clients_no_solver} $<TARGET_OBJECTS:hipblas_fortran>) |
| 783 | endif() |
| 784 | - add_dependencies(hipblas_fortran_client hipblas_fortran) |
| 785 | endif() |
| 786 | - include_directories(${CMAKE_BINARY_DIR}/include/hipblas) |
| 787 | - include_directories(${CMAKE_BINARY_DIR}/include) |
| 788 | + |
| 789 | + if(SKIP_LIBRARY) |
| 790 | + include_directories(${HIPBLAS_LIBRARY_DIR}/include/hipblas) |
| 791 | + else() |
| 792 | + include_directories(${CMAKE_BINARY_DIR}/include/hipblas) |
| 793 | + include_directories(${CMAKE_BINARY_DIR}/include) |
| 794 | + endif() |
| 795 | endif( ) |
| 796 | |
| 797 | if( BUILD_CLIENTS_SAMPLES ) |
| 798 | @@ -114,19 +148,20 @@ if( BUILD_CLIENTS_BENCHMARKS OR BUILD_CLIENTS_TESTS) |
| 799 | set(THREADS_PREFER_PTHREAD_FLAG ON) |
| 800 | find_package(Threads REQUIRED) |
| 801 | |
| 802 | - # if it fails to find OpenMP compile and link flags in strange configurations it can just use non-parallel reference computation |
| 803 | - # if there is no omp.h to find the client compilation will fail and this should be obvious, used to be REQUIRED |
| 804 | - find_package(OpenMP) |
| 805 | + if (CMAKE_CXX_COMPILER_ID MATCHES "Clang") |
| 806 | + # Look for openmp config in ROCm install to populate openmp_LIB_DIR and openmp_LIB_INSTALL_DIR |
| 807 | + find_package(OpenMP CONFIG PATHS "${HIP_CLANG_ROOT}/lib/cmake") |
| 808 | + endif() |
| 809 | |
| 810 | - if (TARGET OpenMP::OpenMP_CXX) |
| 811 | - set( COMMON_LINK_LIBS "OpenMP::OpenMP_CXX") |
| 812 | - if(HIP_PLATFORM STREQUAL amd) |
| 813 | - list( APPEND COMMON_LINK_LIBS "-L\"${HIP_CLANG_ROOT}/lib\"") |
| 814 | - if (NOT WIN32) |
| 815 | - list( APPEND COMMON_LINK_LIBS "-Wl,-rpath=${HIP_CLANG_ROOT}/lib -lomp") |
| 816 | - else() |
| 817 | - list( APPEND COMMON_LINK_LIBS "libomp") |
| 818 | - endif() |
| 819 | + if (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND TARGET OpenMP::omp) |
| 820 | + set( COMMON_LINK_LIBS "OpenMP::omp") |
| 821 | + message(STATUS "Found openmp-config.cmake at ${OpenMP_DIR}") |
| 822 | + else() |
| 823 | + # if it fails to find OpenMP compile and link flags in strange configurations it can just use non-parallel reference computation |
| 824 | + # if there is no omp.h to find the client compilation will fail and this should be obvious, used to be REQUIRED |
| 825 | + find_package(OpenMP) |
| 826 | + if (TARGET OpenMP::OpenMP_CXX) |
| 827 | + set( COMMON_LINK_LIBS "OpenMP::OpenMP_CXX") |
| 828 | endif() |
| 829 | endif() |
| 830 | |
| 831 | @@ -184,7 +219,7 @@ if( BUILD_CLIENTS_BENCHMARKS OR BUILD_CLIENTS_TESTS) |
| 832 | NO_DEFAULT_PATH |
| 833 | ) |
| 834 | if (NOT BLAS_LIBRARY) |
| 835 | - find_package( OPENBLAS CONFIG REQUIRED ) |
| 836 | + find_package( OpenBLAS CONFIG REQUIRED ) |
| 837 | set( BLAS_LIBRARY OpenBLAS::OpenBLAS ) |
| 838 | set( BLAS_INCLUDE_DIR "" ) |
| 839 | endif() |
| 840 | @@ -195,8 +230,17 @@ if( BUILD_CLIENTS_BENCHMARKS OR BUILD_CLIENTS_TESTS) |
| 841 | set( BLIS_CPP ../common/blis_interface.cpp ) |
| 842 | endif() |
| 843 | |
| 844 | + if(EXISTS "${BUILD_DIR}/deps/deps-install/lib/libgtest.a") |
| 845 | + set( GTEST_ROOT "${BUILD_DIR}/deps/deps-install") |
| 846 | + endif() |
| 847 | + find_package( GTest REQUIRED ) |
| 848 | + |
| 849 | message(STATUS "Build Dir: ${BUILD_DIR}") |
| 850 | - message(STATUS "Linking Ref. Libs: ${BLAS_LIBRARY}") |
| 851 | + message(STATUS "Linking Libs: ${BLAS_LIBRARY}") |
| 852 | + |
| 853 | + if( NOT TARGET hipblas ) |
| 854 | + find_package( hipblas REQUIRED CONFIG PATHS ${HIPBLAS_LIBRARY_DIR} ) |
| 855 | + endif( ) |
| 856 | |
| 857 | if( BUILD_CLIENTS_TESTS ) |
| 858 | add_subdirectory( gtest ) |
| 859 | @@ -232,8 +276,14 @@ add_custom_command( OUTPUT "${HIPBLAS_GENTEST}" |
| 860 | DEPENDS common/hipblas_gentest.py |
| 861 | WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" ) |
| 862 | |
| 863 | +set( HIPBLAS_CLIENTS_README "${PROJECT_BINARY_DIR}/staging/hipblas_clients_readme.txt") |
| 864 | +add_custom_command( OUTPUT "${HIPBLAS_CLIENTS_README}" |
| 865 | + COMMAND ${CMAKE_COMMAND} -E copy hipblas_clients_readme.txt "${HIPBLAS_CLIENTS_README}" |
| 866 | + DEPENDS hipblas_clients_readme.txt |
| 867 | + WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" ) |
| 868 | + |
| 869 | |
| 870 | -add_custom_target( hipblas-common DEPENDS "${HIPBLAS_COMMON}" "${HIPBLAS_TEMPLATE}" "${HIPBLAS_SMOKE}" "${HIPBLAS_GENTEST}" ) |
| 871 | +add_custom_target( hipblas-clients-common DEPENDS "${HIPBLAS_COMMON}" "${HIPBLAS_TEMPLATE}" "${HIPBLAS_SMOKE}" "${HIPBLAS_GENTEST}" "${HIPBLAS_CLIENTS_README}" ) |
| 872 | |
| 873 | if( BUILD_CLIENTS_TESTS OR BUILD_CLIENTS_BENCHMARKS ) |
| 874 | rocm_install( |
| 875 | @@ -246,4 +296,10 @@ if( BUILD_CLIENTS_TESTS OR BUILD_CLIENTS_BENCHMARKS ) |
| 876 | DESTINATION "${CMAKE_INSTALL_BINDIR}" |
| 877 | COMPONENT clients-common |
| 878 | ) |
| 879 | + # this readme also serves to prevent an empty package hipblas-clients which dpkg may auto-remove entire hipblas-clients and non empty children |
| 880 | + rocm_install( |
| 881 | + FILES ${HIPBLAS_CLIENTS_README} |
| 882 | + DESTINATION "${CMAKE_INSTALL_BINDIR}" |
| 883 | + COMPONENT clients |
| 884 | + ) |
| 885 | endif() |
| 886 | diff --git a/clients/benchmarks/CMakeLists.txt b/clients/benchmarks/CMakeLists.txt |
| 887 | index d04d28e..5ed28e2 100644 |
| 888 | --- a/clients/benchmarks/CMakeLists.txt |
| 889 | +++ b/clients/benchmarks/CMakeLists.txt |
| 890 | @@ -1,5 +1,5 @@ |
| 891 | # ######################################################################## |
| 892 | -# Copyright (C) 2016-2024 Advanced Micro Devices, Inc. All rights reserved. |
| 893 | +# Copyright (C) 2016-2025 Advanced Micro Devices, Inc. All rights reserved. |
| 894 | # |
| 895 | # Permission is hereby granted, free of charge, to any person obtaining a copy |
| 896 | # of this software and associated documentation files (the "Software"), to deal |
| 897 | @@ -27,10 +27,6 @@ enable_language( Fortran ) |
| 898 | |
| 899 | set(hipblas_bench_source client.cpp) |
| 900 | |
| 901 | -if( NOT TARGET hipblas ) |
| 902 | - find_package( hipblas REQUIRED CONFIG PATHS /opt/rocm/hipblas ) |
| 903 | -endif( ) |
| 904 | - |
| 905 | set( hipblas_benchmark_common |
| 906 | ../common/utility.cpp |
| 907 | ../common/cblas_interface.cpp |
| 908 | @@ -43,26 +39,22 @@ set( hipblas_benchmark_common |
| 909 | ../common/near.cpp |
| 910 | ../common/arg_check.cpp |
| 911 | ../common/argument_model.cpp |
| 912 | - ../common/hipblas_template_specialization.cpp |
| 913 | ../common/host_alloc.cpp |
| 914 | ${BLIS_CPP} |
| 915 | ) |
| 916 | |
| 917 | -add_executable( hipblas-bench ${hipblas_bench_source} ${hipblas_benchmark_common} ) |
| 918 | -add_executable( hipblas_v2-bench ${hipblas_bench_source} ${hipblas_benchmark_common} ) |
| 919 | +if(NOT WIN32) |
| 920 | + add_executable( hipblas-bench ${hipblas_bench_source} ${hipblas_benchmark_common} $<TARGET_OBJECTS:hipblas_fortran_client>) |
| 921 | +else() |
| 922 | + add_executable( hipblas-bench ${hipblas_bench_source} ${hipblas_benchmark_common} ) |
| 923 | +endif() |
| 924 | |
| 925 | target_compile_features( hipblas-bench PRIVATE cxx_static_assert cxx_nullptr cxx_auto_type ) |
| 926 | -target_compile_features( hipblas_v2-bench PRIVATE cxx_static_assert cxx_nullptr cxx_auto_type ) |
| 927 | - |
| 928 | # Internal header includes |
| 929 | target_include_directories( hipblas-bench |
| 930 | PRIVATE |
| 931 | $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../include> |
| 932 | ) |
| 933 | -target_include_directories( hipblas_v2-bench |
| 934 | - PRIVATE |
| 935 | - $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../include> |
| 936 | -) |
| 937 | |
| 938 | # External header includes included as system files |
| 939 | target_include_directories( hipblas-bench |
| 940 | @@ -73,47 +65,28 @@ target_include_directories( hipblas-bench |
| 941 | $<BUILD_INTERFACE:${BLIS_INCLUDE_DIR}> |
| 942 | $<BUILD_INTERFACE:${FLAME_INCLUDE_DIR}> |
| 943 | ) |
| 944 | -target_include_directories( hipblas_v2-bench |
| 945 | - SYSTEM PRIVATE |
| 946 | - $<BUILD_INTERFACE:${HIP_INCLUDE_DIRS}> |
| 947 | - $<BUILD_INTERFACE:${CBLAS_INCLUDE_DIRS}> |
| 948 | - $<BUILD_INTERFACE:${BLAS_INCLUDE_DIR}> |
| 949 | - $<BUILD_INTERFACE:${BLIS_INCLUDE_DIR}> |
| 950 | - $<BUILD_INTERFACE:${FLAME_INCLUDE_DIR}> |
| 951 | -) |
| 952 | - |
| 953 | -target_link_libraries( hipblas-bench PRIVATE roc::hipblas ) |
| 954 | -target_link_libraries( hipblas_v2-bench PRIVATE roc::hipblas ) |
| 955 | |
| 956 | -if (NOT WIN32) |
| 957 | - target_link_libraries( hipblas-bench PRIVATE hipblas_fortran_client ) |
| 958 | - target_link_libraries( hipblas_v2-bench PRIVATE hipblas_fortran_client ) |
| 959 | -endif() |
| 960 | +target_link_libraries( hipblas-bench PRIVATE roc::hipblas GTest::gtest GTest::gtest_main ) |
| 961 | |
| 962 | # need mf16c flag for float->half convertion |
| 963 | target_compile_options( hipblas-bench PRIVATE -mf16c ) # -Wno-deprecated-declarations ) |
| 964 | -target_compile_options( hipblas_v2-bench PRIVATE -mf16c ) # -Wno-deprecated-declarations ) |
| 965 | |
| 966 | target_compile_options(hipblas-bench PRIVATE $<$<COMPILE_LANGUAGE:CXX>:${COMMON_CXX_OPTIONS}>) |
| 967 | -target_compile_options(hipblas_v2-bench PRIVATE $<$<COMPILE_LANGUAGE:CXX>:${COMMON_CXX_OPTIONS}>) |
| 968 | |
| 969 | target_compile_definitions( hipblas-bench PRIVATE HIPBLAS_BENCH ${COMMON_DEFINES} ${BLIS_DEFINES} ) |
| 970 | -target_compile_definitions( hipblas_v2-bench PRIVATE HIPBLAS_BENCH ${COMMON_DEFINES} ${BLIS_DEFINES} HIPBLAS_V2 ) |
| 971 | |
| 972 | target_link_libraries( hipblas-bench PRIVATE ${BLAS_LIBRARY} ${COMMON_LINK_LIBS} ) |
| 973 | -target_link_libraries( hipblas_v2-bench PRIVATE ${BLAS_LIBRARY} ${COMMON_LINK_LIBS} ) |
| 974 | +apply_omp_settings( hipblas-bench ) |
| 975 | + |
| 976 | if (NOT WIN32) |
| 977 | target_link_libraries( hipblas-bench PRIVATE stdc++fs ) |
| 978 | - target_link_libraries( hipblas_v2-bench PRIVATE stdc++fs ) |
| 979 | endif() |
| 980 | |
| 981 | if(HIP_PLATFORM STREQUAL amd) |
| 982 | target_link_libraries( hipblas-bench PRIVATE hip::host ) |
| 983 | - target_link_libraries( hipblas_v2-bench PRIVATE hip::host ) |
| 984 | |
| 985 | if( CUSTOM_TARGET ) |
| 986 | target_link_libraries( hipblas-bench PRIVATE hip::${CUSTOM_TARGET} ) |
| 987 | - target_link_libraries( hipblas_v2-bench PRIVATE hip::${CUSTOM_TARGET} ) |
| 988 | endif() |
| 989 | |
| 990 | else( ) |
| 991 | @@ -121,26 +94,15 @@ else( ) |
| 992 | PRIVATE |
| 993 | $<BUILD_INTERFACE:${CUDA_INCLUDE_DIRS}> |
| 994 | ) |
| 995 | - target_include_directories( hipblas_v2-bench |
| 996 | - PRIVATE |
| 997 | - $<BUILD_INTERFACE:${CUDA_INCLUDE_DIRS}> |
| 998 | - ) |
| 999 | |
| 1000 | target_link_libraries( hipblas-bench PRIVATE ${CUDA_LIBRARIES} ) |
| 1001 | - target_link_libraries( hipblas_v2-bench PRIVATE ${CUDA_LIBRARIES} ) |
| 1002 | endif( ) |
| 1003 | |
| 1004 | set_target_properties( hipblas-bench PROPERTIES |
| 1005 | CXX_EXTENSIONS OFF |
| 1006 | RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/staging" |
| 1007 | ) |
| 1008 | -set_target_properties( hipblas_v2-bench PROPERTIES |
| 1009 | - CXX_EXTENSIONS OFF |
| 1010 | - RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/staging" |
| 1011 | -) |
| 1012 | |
| 1013 | -add_dependencies( hipblas-bench hipblas-common ) |
| 1014 | -add_dependencies( hipblas_v2-bench hipblas-common ) |
| 1015 | +add_dependencies( hipblas-bench hipblas-clients-common ) |
| 1016 | |
| 1017 | rocm_install(TARGETS hipblas-bench COMPONENT benchmarks) |
| 1018 | -rocm_install(TARGETS hipblas_v2-bench COMPONENT benchmarks) |
| 1019 | diff --git a/clients/benchmarks/client.cpp b/clients/benchmarks/client.cpp |
| 1020 | index 2aebc63..b412224 100644 |
| 1021 | --- a/clients/benchmarks/client.cpp |
| 1022 | +++ b/clients/benchmarks/client.cpp |
| 1023 | @@ -1,5 +1,5 @@ |
| 1024 | /* ************************************************************************ |
| 1025 | - * Copyright (C) 2016-2023 Advanced Micro Devices, Inc. All rights reserved. |
| 1026 | + * Copyright (C) 2016-2025 Advanced Micro Devices, Inc. All rights reserved. |
| 1027 | * |
| 1028 | * Permission is hereby granted, free of charge, to any person obtaining a copy |
| 1029 | * of this software and associated documentation files (the "Software"), to deal |
| 1030 | @@ -277,7 +277,7 @@ try |
| 1031 | "Options: h,s,d,c,z,f16_r,f32_r,f64_r,bf16_r,f32_c,f64_c,i8_r,i32_r") |
| 1032 | |
| 1033 | ("compute_type_gemm", |
| 1034 | - value<std::string>(&compute_type_gemm), "Precision of computation for gemm_ex with HIPBLAS_V2 define" |
| 1035 | + value<std::string>(&compute_type_gemm), "Precision of computation for gemm_ex" |
| 1036 | "Options: c16f,c16f_pedantic,c32f,c32f_pedantic,c32f_fast_16f,c32f_fast_16bf,c32f_fast_tf32,c64f,c64f_pedantic,c32i,c32i_pedantic") |
| 1037 | |
| 1038 | ("initialization", |
| 1039 | @@ -369,6 +369,10 @@ try |
| 1040 | value<int32_t>(&api)->default_value(0), |
| 1041 | "Use API, supercedes fortran flag (0==C, 1==C_64, ...)") |
| 1042 | |
| 1043 | + ("workspace", |
| 1044 | + value<size_t>(&arg.user_allocated_workspace)->default_value(0), |
| 1045 | + "Set workspace available in handle using xxblasSetWorkspace() API after handle creation") |
| 1046 | + |
| 1047 | ("help,h", "produces this help message"); |
| 1048 | |
| 1049 | //("version", "Prints the version number"); |
| 1050 | @@ -418,30 +422,13 @@ try |
| 1051 | return hipblas_bench_datafile(); |
| 1052 | |
| 1053 | std::transform(precision.begin(), precision.end(), precision.begin(), ::tolower); |
| 1054 | - auto prec = string2hipblas_datatype(precision); |
| 1055 | - if(prec == HIPBLAS_DATATYPE_INVALID) |
| 1056 | - throw std::invalid_argument("Invalid value for --precision " + precision); |
| 1057 | - |
| 1058 | - arg.a_type = a_type == "" ? prec : string2hipblas_datatype(a_type); |
| 1059 | - if(arg.a_type == HIPBLAS_DATATYPE_INVALID) |
| 1060 | - throw std::invalid_argument("Invalid value for --a_type " + a_type); |
| 1061 | - |
| 1062 | - arg.b_type = b_type == "" ? prec : string2hipblas_datatype(b_type); |
| 1063 | - if(arg.b_type == HIPBLAS_DATATYPE_INVALID) |
| 1064 | - throw std::invalid_argument("Invalid value for --b_type " + b_type); |
| 1065 | - |
| 1066 | - arg.c_type = c_type == "" ? prec : string2hipblas_datatype(c_type); |
| 1067 | - if(arg.c_type == HIPBLAS_DATATYPE_INVALID) |
| 1068 | - throw std::invalid_argument("Invalid value for --c_type " + c_type); |
| 1069 | - |
| 1070 | - arg.d_type = d_type == "" ? prec : string2hipblas_datatype(d_type); |
| 1071 | - if(arg.d_type == HIPBLAS_DATATYPE_INVALID) |
| 1072 | - throw std::invalid_argument("Invalid value for --d_type " + d_type); |
| 1073 | - |
| 1074 | - arg.compute_type = compute_type == "" ? prec : string2hipblas_datatype(compute_type); |
| 1075 | - if(arg.compute_type == HIPBLAS_DATATYPE_INVALID) |
| 1076 | - throw std::invalid_argument("Invalid value for --compute_type " + compute_type); |
| 1077 | + auto prec = string2hip_datatype(precision); |
| 1078 | |
| 1079 | + arg.a_type = a_type == "" ? prec : string2hip_datatype(a_type); |
| 1080 | + arg.b_type = b_type == "" ? prec : string2hip_datatype(b_type); |
| 1081 | + arg.c_type = c_type == "" ? prec : string2hip_datatype(c_type); |
| 1082 | + arg.d_type = d_type == "" ? prec : string2hip_datatype(d_type); |
| 1083 | + arg.compute_type = compute_type == "" ? prec : string2hip_datatype(compute_type); |
| 1084 | arg.compute_type_gemm = string2hipblas_computetype(compute_type_gemm); |
| 1085 | |
| 1086 | arg.initialization = string2hipblas_initialization(initialization); |
| 1087 | diff --git a/clients/cmake/build-options.cmake b/clients/cmake/build-options.cmake |
| 1088 | index 25bb314..44e0b32 100644 |
| 1089 | --- a/clients/cmake/build-options.cmake |
| 1090 | +++ b/clients/cmake/build-options.cmake |
| 1091 | @@ -26,5 +26,3 @@ if( HIP_PLATFORM STREQUAL nvidia ) |
| 1092 | else() |
| 1093 | option( LINK_BLIS "Link AOCL Blis reference library" ON ) |
| 1094 | endif() |
| 1095 | - |
| 1096 | - |
| 1097 | diff --git a/clients/common/cblas_interface.cpp b/clients/common/cblas_interface.cpp |
| 1098 | index 2ccc666..f65b25c 100644 |
| 1099 | --- a/clients/common/cblas_interface.cpp |
| 1100 | +++ b/clients/common/cblas_interface.cpp |
| 1101 | @@ -1,5 +1,5 @@ |
| 1102 | /* ************************************************************************ |
| 1103 | - * Copyright (C) 2016-2024 Advanced Micro Devices, Inc. All rights reserved. |
| 1104 | + * Copyright (C) 2016-2025 Advanced Micro Devices, Inc. All rights reserved. |
| 1105 | * |
| 1106 | * Permission is hereby granted, free of charge, to any person obtaining a copy |
| 1107 | * of this software and associated documentation files (the "Software"), to deal |
| 1108 | @@ -46,14 +46,15 @@ extern "C" { |
| 1109 | |
| 1110 | void spotrf_(char* uplo, int64_t* m, float* A, int64_t* lda, int64_t* info); |
| 1111 | void dpotrf_(char* uplo, int64_t* m, double* A, int64_t* lda, int64_t* info); |
| 1112 | -void cpotrf_(char* uplo, int64_t* m, hipblasComplex* A, int64_t* lda, int64_t* info); |
| 1113 | -void zpotrf_(char* uplo, int64_t* m, hipblasDoubleComplex* A, int64_t* lda, int64_t* info); |
| 1114 | +void cpotrf_(char* uplo, int64_t* m, std::complex<float>* A, int64_t* lda, int64_t* info); |
| 1115 | +void zpotrf_(char* uplo, int64_t* m, std::complex<double>* A, int64_t* lda, int64_t* info); |
| 1116 | |
| 1117 | void sgetrf_(int64_t* m, int64_t* n, float* A, int64_t* lda, int64_t* ipiv, int64_t* info); |
| 1118 | void dgetrf_(int64_t* m, int64_t* n, double* A, int64_t* lda, int64_t* ipiv, int64_t* info); |
| 1119 | -void cgetrf_(int64_t* m, int64_t* n, hipblasComplex* A, int64_t* lda, int64_t* ipiv, int64_t* info); |
| 1120 | +void cgetrf_( |
| 1121 | + int64_t* m, int64_t* n, std::complex<float>* A, int64_t* lda, int64_t* ipiv, int64_t* info); |
| 1122 | void zgetrf_( |
| 1123 | - int64_t* m, int64_t* n, hipblasDoubleComplex* A, int64_t* lda, int64_t* ipiv, int64_t* info); |
| 1124 | + int64_t* m, int64_t* n, std::complex<double>* A, int64_t* lda, int64_t* ipiv, int64_t* info); |
| 1125 | |
| 1126 | void sgetrs_(char* trans, |
| 1127 | int64_t* n, |
| 1128 | @@ -73,22 +74,22 @@ void dgetrs_(char* trans, |
| 1129 | double* B, |
| 1130 | int64_t* ldb, |
| 1131 | int64_t* info); |
| 1132 | -void cgetrs_(char* trans, |
| 1133 | - int64_t* n, |
| 1134 | - int64_t* nrhs, |
| 1135 | - hipblasComplex* A, |
| 1136 | - int64_t* lda, |
| 1137 | - int64_t* ipiv, |
| 1138 | - hipblasComplex* B, |
| 1139 | - int64_t* ldb, |
| 1140 | - int64_t* info); |
| 1141 | +void cgetrs_(char* trans, |
| 1142 | + int64_t* n, |
| 1143 | + int64_t* nrhs, |
| 1144 | + std::complex<float>* A, |
| 1145 | + int64_t* lda, |
| 1146 | + int64_t* ipiv, |
| 1147 | + std::complex<float>* B, |
| 1148 | + int64_t* ldb, |
| 1149 | + int64_t* info); |
| 1150 | void zgetrs_(char* trans, |
| 1151 | int64_t* n, |
| 1152 | int64_t* nrhs, |
| 1153 | - hipblasDoubleComplex* A, |
| 1154 | + std::complex<double>* A, |
| 1155 | int64_t* lda, |
| 1156 | int64_t* ipiv, |
| 1157 | - hipblasDoubleComplex* B, |
| 1158 | + std::complex<double>* B, |
| 1159 | int64_t* ldb, |
| 1160 | int64_t* info); |
| 1161 | |
| 1162 | @@ -101,18 +102,18 @@ void dgetri_(int64_t* n, |
| 1163 | double* work, |
| 1164 | int64_t* lwork, |
| 1165 | int64_t* info); |
| 1166 | -void cgetri_(int64_t* n, |
| 1167 | - hipblasComplex* A, |
| 1168 | - int64_t* lda, |
| 1169 | - int64_t* ipiv, |
| 1170 | - hipblasComplex* work, |
| 1171 | - int64_t* lwork, |
| 1172 | - int64_t* info); |
| 1173 | +void cgetri_(int64_t* n, |
| 1174 | + std::complex<float>* A, |
| 1175 | + int64_t* lda, |
| 1176 | + int64_t* ipiv, |
| 1177 | + std::complex<float>* work, |
| 1178 | + int64_t* lwork, |
| 1179 | + int64_t* info); |
| 1180 | void zgetri_(int64_t* n, |
| 1181 | - hipblasDoubleComplex* A, |
| 1182 | + std::complex<double>* A, |
| 1183 | int64_t* lda, |
| 1184 | int64_t* ipiv, |
| 1185 | - hipblasDoubleComplex* work, |
| 1186 | + std::complex<double>* work, |
| 1187 | int64_t* lwork, |
| 1188 | int64_t* info); |
| 1189 | |
| 1190 | @@ -132,20 +133,20 @@ void dgeqrf_(int64_t* m, |
| 1191 | double* work, |
| 1192 | int64_t* lwork, |
| 1193 | int64_t* info); |
| 1194 | -void cgeqrf_(int64_t* m, |
| 1195 | - int64_t* n, |
| 1196 | - hipblasComplex* A, |
| 1197 | - int64_t* lda, |
| 1198 | - hipblasComplex* tau, |
| 1199 | - hipblasComplex* work, |
| 1200 | - int64_t* lwork, |
| 1201 | - int64_t* info); |
| 1202 | +void cgeqrf_(int64_t* m, |
| 1203 | + int64_t* n, |
| 1204 | + std::complex<float>* A, |
| 1205 | + int64_t* lda, |
| 1206 | + std::complex<float>* tau, |
| 1207 | + std::complex<float>* work, |
| 1208 | + int64_t* lwork, |
| 1209 | + int64_t* info); |
| 1210 | void zgeqrf_(int64_t* m, |
| 1211 | int64_t* n, |
| 1212 | - hipblasDoubleComplex* A, |
| 1213 | + std::complex<double>* A, |
| 1214 | int64_t* lda, |
| 1215 | - hipblasDoubleComplex* tau, |
| 1216 | - hipblasDoubleComplex* work, |
| 1217 | + std::complex<double>* tau, |
| 1218 | + std::complex<double>* work, |
| 1219 | int64_t* lwork, |
| 1220 | int64_t* info); |
| 1221 | |
| 1222 | @@ -171,80 +172,80 @@ void dgels_(char* trans, |
| 1223 | double* work, |
| 1224 | int64_t* lwork, |
| 1225 | int64_t* info); |
| 1226 | -void cgels_(char* trans, |
| 1227 | - int64_t* m, |
| 1228 | - int64_t* n, |
| 1229 | - int64_t* nrhs, |
| 1230 | - hipblasComplex* A, |
| 1231 | - int64_t* lda, |
| 1232 | - hipblasComplex* B, |
| 1233 | - int64_t* ldb, |
| 1234 | - hipblasComplex* work, |
| 1235 | - int64_t* lwork, |
| 1236 | - int64_t* info); |
| 1237 | +void cgels_(char* trans, |
| 1238 | + int64_t* m, |
| 1239 | + int64_t* n, |
| 1240 | + int64_t* nrhs, |
| 1241 | + std::complex<float>* A, |
| 1242 | + int64_t* lda, |
| 1243 | + std::complex<float>* B, |
| 1244 | + int64_t* ldb, |
| 1245 | + std::complex<float>* work, |
| 1246 | + int64_t* lwork, |
| 1247 | + int64_t* info); |
| 1248 | void zgels_(char* trans, |
| 1249 | int64_t* m, |
| 1250 | int64_t* n, |
| 1251 | int64_t* nrhs, |
| 1252 | - hipblasDoubleComplex* A, |
| 1253 | + std::complex<double>* A, |
| 1254 | int64_t* lda, |
| 1255 | - hipblasDoubleComplex* B, |
| 1256 | + std::complex<double>* B, |
| 1257 | int64_t* ldb, |
| 1258 | - hipblasDoubleComplex* work, |
| 1259 | + std::complex<double>* work, |
| 1260 | int64_t* lwork, |
| 1261 | int64_t* info); |
| 1262 | |
| 1263 | /* |
| 1264 | void strtri_(char* uplo, char* diag, int64_t* n, float* A, int64_t* lda, int64_t* info); |
| 1265 | void dtrtri_(char* uplo, char* diag, int64_t* n, double* A, int64_t* lda, int64_t* info); |
| 1266 | -void ctrtri_(char* uplo, char* diag, int64_t* n, hipblasComplex* A, int64_t* lda, int64_t* info); |
| 1267 | -void ztrtri_(char* uplo, char* diag, int64_t* n, hipblasDoubleComplex* A, int64_t* lda, int64_t* info); |
| 1268 | +void ctrtri_(char* uplo, char* diag, int64_t* n, std::complex<float>* A, int64_t* lda, int64_t* info); |
| 1269 | +void ztrtri_(char* uplo, char* diag, int64_t* n, std::complex<double>* A, int64_t* lda, int64_t* info); |
| 1270 | |
| 1271 | void cspr_( |
| 1272 | - char* uplo, int64_t* n, hipblasComplex* alpha, hipblasComplex* x, int64_t* incx, hipblasComplex* A); |
| 1273 | + char* uplo, int64_t* n, std::complex<float>* alpha, std::complex<float>* x, int64_t* incx, std::complex<float>* A); |
| 1274 | |
| 1275 | void zspr_(char* uplo, |
| 1276 | int64_t* n, |
| 1277 | - hipblasDoubleComplex* alpha, |
| 1278 | - hipblasDoubleComplex* x, |
| 1279 | + std::complex<double>* alpha, |
| 1280 | + std::complex<double>* x, |
| 1281 | int64_t* incx, |
| 1282 | - hipblasDoubleComplex* A); |
| 1283 | + std::complex<double>* A); |
| 1284 | |
| 1285 | void csyr_(char* uplo, |
| 1286 | int64_t* n, |
| 1287 | - hipblasComplex* alpha, |
| 1288 | - hipblasComplex* x, |
| 1289 | + std::complex<float>* alpha, |
| 1290 | + std::complex<float>* x, |
| 1291 | int64_t* incx, |
| 1292 | - hipblasComplex* a, |
| 1293 | + std::complex<float>* a, |
| 1294 | int64_t* lda); |
| 1295 | void zsyr_(char* uplo, |
| 1296 | int64_t* n, |
| 1297 | - hipblasDoubleComplex* alpha, |
| 1298 | - hipblasDoubleComplex* x, |
| 1299 | + std::complex<double>* alpha, |
| 1300 | + std::complex<double>* x, |
| 1301 | int64_t* incx, |
| 1302 | - hipblasDoubleComplex* a, |
| 1303 | + std::complex<double>* a, |
| 1304 | int64_t* lda); |
| 1305 | |
| 1306 | void csymv_(char* uplo, |
| 1307 | int64_t* n, |
| 1308 | - hipblasComplex* alpha, |
| 1309 | - hipblasComplex* A, |
| 1310 | + std::complex<float>* alpha, |
| 1311 | + std::complex<float>* A, |
| 1312 | int64_t* lda, |
| 1313 | - hipblasComplex* x, |
| 1314 | + std::complex<float>* x, |
| 1315 | int64_t* incx, |
| 1316 | - hipblasComplex* beta, |
| 1317 | - hipblasComplex* y, |
| 1318 | + std::complex<float>* beta, |
| 1319 | + std::complex<float>* y, |
| 1320 | int64_t* incy); |
| 1321 | |
| 1322 | void zsymv_(char* uplo, |
| 1323 | int64_t* n, |
| 1324 | - hipblasDoubleComplex* alpha, |
| 1325 | - hipblasDoubleComplex* A, |
| 1326 | + std::complex<double>* alpha, |
| 1327 | + std::complex<double>* A, |
| 1328 | int64_t* lda, |
| 1329 | - hipblasDoubleComplex* x, |
| 1330 | + std::complex<double>* x, |
| 1331 | int64_t* incx, |
| 1332 | - hipblasDoubleComplex* beta, |
| 1333 | - hipblasDoubleComplex* y, |
| 1334 | + std::complex<double>* beta, |
| 1335 | + std::complex<double>* y, |
| 1336 | int64_t* incy); |
| 1337 | */ |
| 1338 | |
| 1339 | @@ -380,22 +381,22 @@ void ref_axpy<double, double>( |
| 1340 | } |
| 1341 | |
| 1342 | template <> |
| 1343 | -void ref_axpy<hipblasComplex, hipblasComplex>(int64_t n, |
| 1344 | - const hipblasComplex alpha, |
| 1345 | - const hipblasComplex* x, |
| 1346 | - int64_t incx, |
| 1347 | - hipblasComplex* y, |
| 1348 | - int64_t incy) |
| 1349 | +void ref_axpy<std::complex<float>, std::complex<float>>(int64_t n, |
| 1350 | + const std::complex<float> alpha, |
| 1351 | + const std::complex<float>* x, |
| 1352 | + int64_t incx, |
| 1353 | + std::complex<float>* y, |
| 1354 | + int64_t incy) |
| 1355 | { |
| 1356 | cblas_caxpy(n, &alpha, x, incx, y, incy); |
| 1357 | } |
| 1358 | |
| 1359 | template <> |
| 1360 | -void ref_axpy<hipblasDoubleComplex, hipblasDoubleComplex>(int64_t n, |
| 1361 | - const hipblasDoubleComplex alpha, |
| 1362 | - const hipblasDoubleComplex* x, |
| 1363 | +void ref_axpy<std::complex<double>, std::complex<double>>(int64_t n, |
| 1364 | + const std::complex<double> alpha, |
| 1365 | + const std::complex<double>* x, |
| 1366 | int64_t incx, |
| 1367 | - hipblasDoubleComplex* y, |
| 1368 | + std::complex<double>* y, |
| 1369 | int64_t incy) |
| 1370 | { |
| 1371 | cblas_zaxpy(n, &alpha, x, incx, y, incy); |
| 1372 | @@ -515,33 +516,36 @@ void ref_scal<double>(int64_t n, const double alpha, double* x, int64_t incx) |
| 1373 | } |
| 1374 | |
| 1375 | template <> |
| 1376 | -void ref_scal<hipblasComplex>(int64_t n, |
| 1377 | - const hipblasComplex alpha, |
| 1378 | - hipblasComplex* x, |
| 1379 | - int64_t incx) |
| 1380 | +void ref_scal<std::complex<float>>(int64_t n, |
| 1381 | + const std::complex<float> alpha, |
| 1382 | + std::complex<float>* x, |
| 1383 | + int64_t incx) |
| 1384 | { |
| 1385 | cblas_cscal(n, &alpha, x, incx); |
| 1386 | } |
| 1387 | |
| 1388 | template <> |
| 1389 | -void ref_scal<hipblasComplex, float>(int64_t n, const float alpha, hipblasComplex* x, int64_t incx) |
| 1390 | +void ref_scal<std::complex<float>, float>(int64_t n, |
| 1391 | + const float alpha, |
| 1392 | + std::complex<float>* x, |
| 1393 | + int64_t incx) |
| 1394 | { |
| 1395 | cblas_csscal(n, alpha, x, incx); |
| 1396 | } |
| 1397 | |
| 1398 | template <> |
| 1399 | -void ref_scal<hipblasDoubleComplex>(int64_t n, |
| 1400 | - const hipblasDoubleComplex alpha, |
| 1401 | - hipblasDoubleComplex* x, |
| 1402 | +void ref_scal<std::complex<double>>(int64_t n, |
| 1403 | + const std::complex<double> alpha, |
| 1404 | + std::complex<double>* x, |
| 1405 | int64_t incx) |
| 1406 | { |
| 1407 | cblas_zscal(n, &alpha, x, incx); |
| 1408 | } |
| 1409 | |
| 1410 | template <> |
| 1411 | -void ref_scal<hipblasDoubleComplex, double>(int64_t n, |
| 1412 | +void ref_scal<std::complex<double>, double>(int64_t n, |
| 1413 | const double alpha, |
| 1414 | - hipblasDoubleComplex* x, |
| 1415 | + std::complex<double>* x, |
| 1416 | int64_t incx) |
| 1417 | { |
| 1418 | cblas_zdscal(n, alpha, x, incx); |
| 1419 | @@ -561,15 +565,15 @@ void ref_copy<double>(int64_t n, double* x, int64_t incx, double* y, int64_t inc |
| 1420 | } |
| 1421 | |
| 1422 | template <> |
| 1423 | -void ref_copy<hipblasComplex>( |
| 1424 | - int64_t n, hipblasComplex* x, int64_t incx, hipblasComplex* y, int64_t incy) |
| 1425 | +void ref_copy<std::complex<float>>( |
| 1426 | + int64_t n, std::complex<float>* x, int64_t incx, std::complex<float>* y, int64_t incy) |
| 1427 | { |
| 1428 | cblas_ccopy(n, x, incx, y, incy); |
| 1429 | } |
| 1430 | |
| 1431 | template <> |
| 1432 | -void ref_copy<hipblasDoubleComplex>( |
| 1433 | - int64_t n, hipblasDoubleComplex* x, int64_t incx, hipblasDoubleComplex* y, int64_t incy) |
| 1434 | +void ref_copy<std::complex<double>>( |
| 1435 | + int64_t n, std::complex<double>* x, int64_t incx, std::complex<double>* y, int64_t incy) |
| 1436 | { |
| 1437 | cblas_zcopy(n, x, incx, y, incy); |
| 1438 | } |
| 1439 | @@ -588,15 +592,15 @@ void ref_swap<double>(int64_t n, double* x, int64_t incx, double* y, int64_t inc |
| 1440 | } |
| 1441 | |
| 1442 | template <> |
| 1443 | -void ref_swap<hipblasComplex>( |
| 1444 | - int64_t n, hipblasComplex* x, int64_t incx, hipblasComplex* y, int64_t incy) |
| 1445 | +void ref_swap<std::complex<float>>( |
| 1446 | + int64_t n, std::complex<float>* x, int64_t incx, std::complex<float>* y, int64_t incy) |
| 1447 | { |
| 1448 | cblas_cswap(n, x, incx, y, incy); |
| 1449 | } |
| 1450 | |
| 1451 | template <> |
| 1452 | -void ref_swap<hipblasDoubleComplex>( |
| 1453 | - int64_t n, hipblasDoubleComplex* x, int64_t incx, hipblasDoubleComplex* y, int64_t incy) |
| 1454 | +void ref_swap<std::complex<double>>( |
| 1455 | + int64_t n, std::complex<double>* x, int64_t incx, std::complex<double>* y, int64_t incy) |
| 1456 | { |
| 1457 | cblas_zswap(n, x, incx, y, incy); |
| 1458 | } |
| 1459 | @@ -659,23 +663,23 @@ void ref_dot<double>( |
| 1460 | } |
| 1461 | |
| 1462 | template <> |
| 1463 | -void ref_dot<hipblasComplex>(int64_t n, |
| 1464 | - const hipblasComplex* x, |
| 1465 | - int64_t incx, |
| 1466 | - const hipblasComplex* y, |
| 1467 | - int64_t incy, |
| 1468 | - hipblasComplex* result) |
| 1469 | +void ref_dot<std::complex<float>>(int64_t n, |
| 1470 | + const std::complex<float>* x, |
| 1471 | + int64_t incx, |
| 1472 | + const std::complex<float>* y, |
| 1473 | + int64_t incy, |
| 1474 | + std::complex<float>* result) |
| 1475 | { |
| 1476 | cblas_cdotu_sub(n, x, incx, y, incy, result); |
| 1477 | } |
| 1478 | |
| 1479 | template <> |
| 1480 | -void ref_dot<hipblasDoubleComplex>(int64_t n, |
| 1481 | - const hipblasDoubleComplex* x, |
| 1482 | +void ref_dot<std::complex<double>>(int64_t n, |
| 1483 | + const std::complex<double>* x, |
| 1484 | int64_t incx, |
| 1485 | - const hipblasDoubleComplex* y, |
| 1486 | + const std::complex<double>* y, |
| 1487 | int64_t incy, |
| 1488 | - hipblasDoubleComplex* result) |
| 1489 | + std::complex<double>* result) |
| 1490 | { |
| 1491 | cblas_zdotu_sub(n, x, incx, y, incy, result); |
| 1492 | } |
| 1493 | @@ -721,23 +725,23 @@ void ref_dotc<double>( |
| 1494 | } |
| 1495 | |
| 1496 | template <> |
| 1497 | -void ref_dotc<hipblasComplex>(int64_t n, |
| 1498 | - const hipblasComplex* x, |
| 1499 | - int64_t incx, |
| 1500 | - const hipblasComplex* y, |
| 1501 | - int64_t incy, |
| 1502 | - hipblasComplex* result) |
| 1503 | +void ref_dotc<std::complex<float>>(int64_t n, |
| 1504 | + const std::complex<float>* x, |
| 1505 | + int64_t incx, |
| 1506 | + const std::complex<float>* y, |
| 1507 | + int64_t incy, |
| 1508 | + std::complex<float>* result) |
| 1509 | { |
| 1510 | cblas_cdotc_sub(n, x, incx, y, incy, result); |
| 1511 | } |
| 1512 | |
| 1513 | template <> |
| 1514 | -void ref_dotc<hipblasDoubleComplex>(int64_t n, |
| 1515 | - const hipblasDoubleComplex* x, |
| 1516 | +void ref_dotc<std::complex<double>>(int64_t n, |
| 1517 | + const std::complex<double>* x, |
| 1518 | int64_t incx, |
| 1519 | - const hipblasDoubleComplex* y, |
| 1520 | + const std::complex<double>* y, |
| 1521 | int64_t incy, |
| 1522 | - hipblasDoubleComplex* result) |
| 1523 | + std::complex<double>* result) |
| 1524 | { |
| 1525 | cblas_zdotc_sub(n, x, incx, y, incy, result); |
| 1526 | } |
| 1527 | @@ -790,17 +794,17 @@ void ref_nrm2<double, double>(int64_t n, const double* x, int64_t incx, double* |
| 1528 | } |
| 1529 | |
| 1530 | template <> |
| 1531 | -void ref_nrm2<hipblasComplex, float>(int64_t n, |
| 1532 | - const hipblasComplex* x, |
| 1533 | - int64_t incx, |
| 1534 | - float* result) |
| 1535 | +void ref_nrm2<std::complex<float>, float>(int64_t n, |
| 1536 | + const std::complex<float>* x, |
| 1537 | + int64_t incx, |
| 1538 | + float* result) |
| 1539 | { |
| 1540 | *result = cblas_scnrm2(n, x, incx); |
| 1541 | } |
| 1542 | |
| 1543 | template <> |
| 1544 | -void ref_nrm2<hipblasDoubleComplex, double>(int64_t n, |
| 1545 | - const hipblasDoubleComplex* x, |
| 1546 | +void ref_nrm2<std::complex<double>, double>(int64_t n, |
| 1547 | + const std::complex<double>* x, |
| 1548 | int64_t incx, |
| 1549 | double* result) |
| 1550 | { |
| 1551 | @@ -812,37 +816,37 @@ void ref_nrm2<hipblasDoubleComplex, double>(int64_t n, |
| 1552 | /////////////////// |
| 1553 | // LAPACK fortran library functionality |
| 1554 | extern "C" { |
| 1555 | -void crot_(const int64_t* n, |
| 1556 | - hipblasComplex* cx, |
| 1557 | - const int64_t* incx, |
| 1558 | - hipblasComplex* cy, |
| 1559 | - const int64_t* incy, |
| 1560 | - const float* c, |
| 1561 | - const hipblasComplex* s); |
| 1562 | -void csrot_(const int64_t* n, |
| 1563 | - hipblasComplex* cx, |
| 1564 | - const int64_t* incx, |
| 1565 | - hipblasComplex* cy, |
| 1566 | - const int64_t* incy, |
| 1567 | - const float* c, |
| 1568 | - const float* s); |
| 1569 | +void crot_(const int64_t* n, |
| 1570 | + std::complex<float>* cx, |
| 1571 | + const int64_t* incx, |
| 1572 | + std::complex<float>* cy, |
| 1573 | + const int64_t* incy, |
| 1574 | + const float* c, |
| 1575 | + const std::complex<float>* s); |
| 1576 | +void csrot_(const int64_t* n, |
| 1577 | + std::complex<float>* cx, |
| 1578 | + const int64_t* incx, |
| 1579 | + std::complex<float>* cy, |
| 1580 | + const int64_t* incy, |
| 1581 | + const float* c, |
| 1582 | + const float* s); |
| 1583 | void zrot_(const int64_t* n, |
| 1584 | - hipblasDoubleComplex* cx, |
| 1585 | + std::complex<double>* cx, |
| 1586 | const int64_t* incx, |
| 1587 | - hipblasDoubleComplex* cy, |
| 1588 | + std::complex<double>* cy, |
| 1589 | const int64_t* incy, |
| 1590 | const double* c, |
| 1591 | - const hipblasDoubleComplex* s); |
| 1592 | + const std::complex<double>* s); |
| 1593 | void zdrot_(const int64_t* n, |
| 1594 | - hipblasDoubleComplex* cx, |
| 1595 | + std::complex<double>* cx, |
| 1596 | const int64_t* incx, |
| 1597 | - hipblasDoubleComplex* cy, |
| 1598 | + std::complex<double>* cy, |
| 1599 | const int64_t* incy, |
| 1600 | const double* c, |
| 1601 | const double* s); |
| 1602 | |
| 1603 | -void crotg_(hipblasComplex* a, hipblasComplex* b, float* c, hipblasComplex* s); |
| 1604 | -void zrotg_(hipblasDoubleComplex* a, hipblasDoubleComplex* b, double* c, hipblasDoubleComplex* s); |
| 1605 | +void crotg_(std::complex<float>* a, std::complex<float>* b, float* c, std::complex<float>* s); |
| 1606 | +void zrotg_(std::complex<double>* a, std::complex<double>* b, double* c, std::complex<double>* s); |
| 1607 | } |
| 1608 | |
| 1609 | // rot |
| 1610 | @@ -936,67 +940,72 @@ void ref_rot<double>( |
| 1611 | } |
| 1612 | |
| 1613 | template <> |
| 1614 | -void ref_rot<hipblasComplex>(int64_t n, |
| 1615 | - hipblasComplex* x, |
| 1616 | - int64_t incx, |
| 1617 | - hipblasComplex* y, |
| 1618 | - int64_t incy, |
| 1619 | - hipblasComplex c, |
| 1620 | - hipblasComplex s) |
| 1621 | +void ref_rot<std::complex<float>>(int64_t n, |
| 1622 | + std::complex<float>* x, |
| 1623 | + int64_t incx, |
| 1624 | + std::complex<float>* y, |
| 1625 | + int64_t incy, |
| 1626 | + std::complex<float> c, |
| 1627 | + std::complex<float> s) |
| 1628 | { |
| 1629 | float c_real = std::real(c); |
| 1630 | lapack_xrot(n, x, incx, y, incy, c_real, s); |
| 1631 | } |
| 1632 | |
| 1633 | template <> |
| 1634 | -void ref_rot<hipblasComplex, float>(int64_t n, |
| 1635 | - hipblasComplex* x, |
| 1636 | - int64_t incx, |
| 1637 | - hipblasComplex* y, |
| 1638 | - int64_t incy, |
| 1639 | - float c, |
| 1640 | - hipblasComplex s) |
| 1641 | +void ref_rot<std::complex<float>, float>(int64_t n, |
| 1642 | + std::complex<float>* x, |
| 1643 | + int64_t incx, |
| 1644 | + std::complex<float>* y, |
| 1645 | + int64_t incy, |
| 1646 | + float c, |
| 1647 | + std::complex<float> s) |
| 1648 | { |
| 1649 | lapack_xrot(n, x, incx, y, incy, c, s); |
| 1650 | } |
| 1651 | |
| 1652 | template <> |
| 1653 | -void ref_rot<hipblasComplex, float, float>( |
| 1654 | - int64_t n, hipblasComplex* x, int64_t incx, hipblasComplex* y, int64_t incy, float c, float s) |
| 1655 | +void ref_rot<std::complex<float>, float, float>(int64_t n, |
| 1656 | + std::complex<float>* x, |
| 1657 | + int64_t incx, |
| 1658 | + std::complex<float>* y, |
| 1659 | + int64_t incy, |
| 1660 | + float c, |
| 1661 | + float s) |
| 1662 | { |
| 1663 | lapack_xrot(n, x, incx, y, incy, c, s); |
| 1664 | } |
| 1665 | |
| 1666 | template <> |
| 1667 | -void ref_rot<hipblasDoubleComplex>(int64_t n, |
| 1668 | - hipblasDoubleComplex* x, |
| 1669 | +void ref_rot<std::complex<double>>(int64_t n, |
| 1670 | + std::complex<double>* x, |
| 1671 | int64_t incx, |
| 1672 | - hipblasDoubleComplex* y, |
| 1673 | + std::complex<double>* y, |
| 1674 | int64_t incy, |
| 1675 | - hipblasDoubleComplex c, |
| 1676 | - hipblasDoubleComplex s) |
| 1677 | + std::complex<double> c, |
| 1678 | + std::complex<double> s) |
| 1679 | { |
| 1680 | double c_real = std::real(c); |
| 1681 | lapack_xrot(n, x, incx, y, incy, c_real, s); |
| 1682 | } |
| 1683 | |
| 1684 | template <> |
| 1685 | -void ref_rot<hipblasDoubleComplex, double>(int64_t n, |
| 1686 | - hipblasDoubleComplex* x, |
| 1687 | +void ref_rot<std::complex<double>, double>(int64_t n, |
| 1688 | + std::complex<double>* x, |
| 1689 | int64_t incx, |
| 1690 | - hipblasDoubleComplex* y, |
| 1691 | + std::complex<double>* y, |
| 1692 | int64_t incy, |
| 1693 | double c, |
| 1694 | - hipblasDoubleComplex s) |
| 1695 | + std::complex<double> s) |
| 1696 | { |
| 1697 | lapack_xrot(n, x, incx, y, incy, c, s); |
| 1698 | } |
| 1699 | |
| 1700 | template <> |
| 1701 | -void ref_rot<hipblasDoubleComplex, double, double>(int64_t n, |
| 1702 | - hipblasDoubleComplex* x, |
| 1703 | +void ref_rot<std::complex<double>, double, double>(int64_t n, |
| 1704 | + std::complex<double>* x, |
| 1705 | int64_t incx, |
| 1706 | - hipblasDoubleComplex* y, |
| 1707 | + std::complex<double>* y, |
| 1708 | int64_t incy, |
| 1709 | double c, |
| 1710 | double s) |
| 1711 | @@ -1018,19 +1027,19 @@ void ref_rotg<double>(double* a, double* b, double* c, double* s) |
| 1712 | } |
| 1713 | |
| 1714 | template <> |
| 1715 | -void ref_rotg<hipblasComplex, float>(hipblasComplex* a, |
| 1716 | - hipblasComplex* b, |
| 1717 | - float* c, |
| 1718 | - hipblasComplex* s) |
| 1719 | +void ref_rotg<std::complex<float>, float>(std::complex<float>* a, |
| 1720 | + std::complex<float>* b, |
| 1721 | + float* c, |
| 1722 | + std::complex<float>* s) |
| 1723 | { |
| 1724 | lapack_xrotg(*a, *b, *c, *s); |
| 1725 | } |
| 1726 | |
| 1727 | template <> |
| 1728 | -void ref_rotg<hipblasDoubleComplex, double>(hipblasDoubleComplex* a, |
| 1729 | - hipblasDoubleComplex* b, |
| 1730 | +void ref_rotg<std::complex<double>, double>(std::complex<double>* a, |
| 1731 | + std::complex<double>* b, |
| 1732 | double* c, |
| 1733 | - hipblasDoubleComplex* s) |
| 1734 | + std::complex<double>* s) |
| 1735 | { |
| 1736 | lapack_xrotg(*a, *b, *c, *s); |
| 1737 | } |
| 1738 | @@ -1050,8 +1059,8 @@ void ref_asum<double, double>(int64_t n, const double* x, int64_t incx, double* |
| 1739 | } |
| 1740 | |
| 1741 | template <> |
| 1742 | -void ref_asum<hipblasComplex, float>(int64_t n, |
| 1743 | - const hipblasComplex* x, |
| 1744 | +void ref_asum<std::complex<float>, float>(int64_t n, |
| 1745 | + const std::complex<float>* x, |
| 1746 | int64_t incx, |
| 1747 | float* result) |
| 1748 | { |
| 1749 | @@ -1059,8 +1068,8 @@ void ref_asum<hipblasComplex, float>(int64_t n, |
| 1750 | } |
| 1751 | |
| 1752 | template <> |
| 1753 | -void ref_asum<hipblasDoubleComplex, double>(int64_t n, |
| 1754 | - const hipblasDoubleComplex* x, |
| 1755 | +void ref_asum<std::complex<double>, double>(int64_t n, |
| 1756 | + const std::complex<double>* x, |
| 1757 | int64_t incx, |
| 1758 | double* result) |
| 1759 | { |
| 1760 | @@ -1086,14 +1095,14 @@ void ref_iamax<double>(int64_t n, const double* x, int64_t incx, int64_t* result |
| 1761 | } |
| 1762 | |
| 1763 | template <> |
| 1764 | -void ref_iamax<hipblasComplex>(int64_t n, const hipblasComplex* x, int64_t incx, int64_t* result) |
| 1765 | +void ref_iamax<std::complex<float>>(int64_t n, const std::complex<float>* x, int64_t incx, int64_t* result) |
| 1766 | { |
| 1767 | *result = (int64_t)cblas_icamax(n, x, incx); |
| 1768 | } |
| 1769 | |
| 1770 | template <> |
| 1771 | -void ref_iamax<hipblasDoubleComplex>(int64_t n, |
| 1772 | - const hipblasDoubleComplex* x, |
| 1773 | +void ref_iamax<std::complex<double>>(int64_t n, |
| 1774 | + const std::complex<double>* x, |
| 1775 | int64_t incx, |
| 1776 | int64_t* result) |
| 1777 | { |
| 1778 | @@ -1110,13 +1119,13 @@ double hipblas_magnitude(T val) |
| 1779 | } |
| 1780 | |
| 1781 | template <> |
| 1782 | -double hipblas_magnitude(hipblasComplex val) |
| 1783 | +double hipblas_magnitude(std::complex<float> val) |
| 1784 | { |
| 1785 | return std::abs(val.real()) + std::abs(val.imag()); |
| 1786 | } |
| 1787 | |
| 1788 | template <> |
| 1789 | -double hipblas_magnitude(hipblasDoubleComplex val) |
| 1790 | +double hipblas_magnitude(std::complex<double> val) |
| 1791 | { |
| 1792 | return std::abs(val.real()) + std::abs(val.imag()); |
| 1793 | } |
| 1794 | @@ -1155,14 +1164,14 @@ void ref_iamin<double>(int64_t n, const double* x, int64_t incx, int64_t* result |
| 1795 | } |
| 1796 | |
| 1797 | template <> |
| 1798 | -void ref_iamin<hipblasComplex>(int64_t n, const hipblasComplex* x, int64_t incx, int64_t* result) |
| 1799 | +void ref_iamin<std::complex<float>>(int64_t n, const std::complex<float>* x, int64_t incx, int64_t* result) |
| 1800 | { |
| 1801 | *result = (int64_t)ref_iamin_helper(n, x, incx); |
| 1802 | } |
| 1803 | |
| 1804 | template <> |
| 1805 | -void ref_iamin<hipblasDoubleComplex>(int64_t n, |
| 1806 | - const hipblasDoubleComplex* x, |
| 1807 | +void ref_iamin<std::complex<double>>(int64_t n, |
| 1808 | + const std::complex<double>* x, |
| 1809 | int64_t incx, |
| 1810 | int64_t* result) |
| 1811 | { |
| 1812 | @@ -1240,19 +1249,19 @@ void ref_gbmv<double>(hipblasOperation_t transA, |
| 1813 | } |
| 1814 | |
| 1815 | template <> |
| 1816 | -void ref_gbmv<hipblasComplex>(hipblasOperation_t transA, |
| 1817 | - int64_t m, |
| 1818 | - int64_t n, |
| 1819 | - int64_t kl, |
| 1820 | - int64_t ku, |
| 1821 | - hipblasComplex alpha, |
| 1822 | - hipblasComplex* A, |
| 1823 | - int64_t lda, |
| 1824 | - hipblasComplex* x, |
| 1825 | - int64_t incx, |
| 1826 | - hipblasComplex beta, |
| 1827 | - hipblasComplex* y, |
| 1828 | - int64_t incy) |
| 1829 | +void ref_gbmv<std::complex<float>>(hipblasOperation_t transA, |
| 1830 | + int64_t m, |
| 1831 | + int64_t n, |
| 1832 | + int64_t kl, |
| 1833 | + int64_t ku, |
| 1834 | + std::complex<float> alpha, |
| 1835 | + std::complex<float>* A, |
| 1836 | + int64_t lda, |
| 1837 | + std::complex<float>* x, |
| 1838 | + int64_t incx, |
| 1839 | + std::complex<float> beta, |
| 1840 | + std::complex<float>* y, |
| 1841 | + int64_t incy) |
| 1842 | { |
| 1843 | cblas_cgbmv(CblasColMajor, |
| 1844 | (CBLAS_TRANSPOSE)transA, |
| 1845 | @@ -1271,18 +1280,18 @@ void ref_gbmv<hipblasComplex>(hipblasOperation_t transA, |
| 1846 | } |
| 1847 | |
| 1848 | template <> |
| 1849 | -void ref_gbmv<hipblasDoubleComplex>(hipblasOperation_t transA, |
| 1850 | +void ref_gbmv<std::complex<double>>(hipblasOperation_t transA, |
| 1851 | int64_t m, |
| 1852 | int64_t n, |
| 1853 | int64_t kl, |
| 1854 | int64_t ku, |
| 1855 | - hipblasDoubleComplex alpha, |
| 1856 | - hipblasDoubleComplex* A, |
| 1857 | + std::complex<double> alpha, |
| 1858 | + std::complex<double>* A, |
| 1859 | int64_t lda, |
| 1860 | - hipblasDoubleComplex* x, |
| 1861 | + std::complex<double>* x, |
| 1862 | int64_t incx, |
| 1863 | - hipblasDoubleComplex beta, |
| 1864 | - hipblasDoubleComplex* y, |
| 1865 | + std::complex<double> beta, |
| 1866 | + std::complex<double>* y, |
| 1867 | int64_t incy) |
| 1868 | { |
| 1869 | cblas_zgbmv(CblasColMajor, |
| 1870 | @@ -1337,33 +1346,33 @@ void ref_gemv<double>(hipblasOperation_t transA, |
| 1871 | } |
| 1872 | |
| 1873 | template <> |
| 1874 | -void ref_gemv<hipblasComplex>(hipblasOperation_t transA, |
| 1875 | - int64_t m, |
| 1876 | - int64_t n, |
| 1877 | - hipblasComplex alpha, |
| 1878 | - hipblasComplex* A, |
| 1879 | - int64_t lda, |
| 1880 | - hipblasComplex* x, |
| 1881 | - int64_t incx, |
| 1882 | - hipblasComplex beta, |
| 1883 | - hipblasComplex* y, |
| 1884 | - int64_t incy) |
| 1885 | +void ref_gemv<std::complex<float>>(hipblasOperation_t transA, |
| 1886 | + int64_t m, |
| 1887 | + int64_t n, |
| 1888 | + std::complex<float> alpha, |
| 1889 | + std::complex<float>* A, |
| 1890 | + int64_t lda, |
| 1891 | + std::complex<float>* x, |
| 1892 | + int64_t incx, |
| 1893 | + std::complex<float> beta, |
| 1894 | + std::complex<float>* y, |
| 1895 | + int64_t incy) |
| 1896 | { |
| 1897 | cblas_cgemv( |
| 1898 | CblasColMajor, (CBLAS_TRANSPOSE)transA, m, n, &alpha, A, lda, x, incx, &beta, y, incy); |
| 1899 | } |
| 1900 | |
| 1901 | template <> |
| 1902 | -void ref_gemv<hipblasDoubleComplex>(hipblasOperation_t transA, |
| 1903 | +void ref_gemv<std::complex<double>>(hipblasOperation_t transA, |
| 1904 | int64_t m, |
| 1905 | int64_t n, |
| 1906 | - hipblasDoubleComplex alpha, |
| 1907 | - hipblasDoubleComplex* A, |
| 1908 | + std::complex<double> alpha, |
| 1909 | + std::complex<double>* A, |
| 1910 | int64_t lda, |
| 1911 | - hipblasDoubleComplex* x, |
| 1912 | + std::complex<double>* x, |
| 1913 | int64_t incx, |
| 1914 | - hipblasDoubleComplex beta, |
| 1915 | - hipblasDoubleComplex* y, |
| 1916 | + std::complex<double> beta, |
| 1917 | + std::complex<double>* y, |
| 1918 | int64_t incy) |
| 1919 | { |
| 1920 | cblas_zgemv( |
| 1921 | @@ -1400,56 +1409,56 @@ void ref_ger<double, false>(int64_t m, |
| 1922 | } |
| 1923 | |
| 1924 | template <> |
| 1925 | -void ref_ger<hipblasComplex, false>(int64_t m, |
| 1926 | - int64_t n, |
| 1927 | - hipblasComplex alpha, |
| 1928 | - hipblasComplex* x, |
| 1929 | - int64_t incx, |
| 1930 | - hipblasComplex* y, |
| 1931 | - int64_t incy, |
| 1932 | - hipblasComplex* A, |
| 1933 | - int64_t lda) |
| 1934 | +void ref_ger<std::complex<float>, false>(int64_t m, |
| 1935 | + int64_t n, |
| 1936 | + std::complex<float> alpha, |
| 1937 | + std::complex<float>* x, |
| 1938 | + int64_t incx, |
| 1939 | + std::complex<float>* y, |
| 1940 | + int64_t incy, |
| 1941 | + std::complex<float>* A, |
| 1942 | + int64_t lda) |
| 1943 | { |
| 1944 | cblas_cgeru(CblasColMajor, m, n, &alpha, x, incx, y, incy, A, lda); |
| 1945 | } |
| 1946 | |
| 1947 | template <> |
| 1948 | -void ref_ger<hipblasComplex, true>(int64_t m, |
| 1949 | - int64_t n, |
| 1950 | - hipblasComplex alpha, |
| 1951 | - hipblasComplex* x, |
| 1952 | - int64_t incx, |
| 1953 | - hipblasComplex* y, |
| 1954 | - int64_t incy, |
| 1955 | - hipblasComplex* A, |
| 1956 | - int64_t lda) |
| 1957 | +void ref_ger<std::complex<float>, true>(int64_t m, |
| 1958 | + int64_t n, |
| 1959 | + std::complex<float> alpha, |
| 1960 | + std::complex<float>* x, |
| 1961 | + int64_t incx, |
| 1962 | + std::complex<float>* y, |
| 1963 | + int64_t incy, |
| 1964 | + std::complex<float>* A, |
| 1965 | + int64_t lda) |
| 1966 | { |
| 1967 | cblas_cgerc(CblasColMajor, m, n, &alpha, x, incx, y, incy, A, lda); |
| 1968 | } |
| 1969 | |
| 1970 | template <> |
| 1971 | -void ref_ger<hipblasDoubleComplex, false>(int64_t m, |
| 1972 | +void ref_ger<std::complex<double>, false>(int64_t m, |
| 1973 | int64_t n, |
| 1974 | - hipblasDoubleComplex alpha, |
| 1975 | - hipblasDoubleComplex* x, |
| 1976 | + std::complex<double> alpha, |
| 1977 | + std::complex<double>* x, |
| 1978 | int64_t incx, |
| 1979 | - hipblasDoubleComplex* y, |
| 1980 | + std::complex<double>* y, |
| 1981 | int64_t incy, |
| 1982 | - hipblasDoubleComplex* A, |
| 1983 | + std::complex<double>* A, |
| 1984 | int64_t lda) |
| 1985 | { |
| 1986 | cblas_zgeru(CblasColMajor, m, n, &alpha, x, incx, y, incy, A, lda); |
| 1987 | } |
| 1988 | |
| 1989 | template <> |
| 1990 | -void ref_ger<hipblasDoubleComplex, true>(int64_t m, |
| 1991 | +void ref_ger<std::complex<double>, true>(int64_t m, |
| 1992 | int64_t n, |
| 1993 | - hipblasDoubleComplex alpha, |
| 1994 | - hipblasDoubleComplex* x, |
| 1995 | + std::complex<double> alpha, |
| 1996 | + std::complex<double>* x, |
| 1997 | int64_t incx, |
| 1998 | - hipblasDoubleComplex* y, |
| 1999 | + std::complex<double>* y, |
| 2000 | int64_t incy, |
| 2001 | - hipblasDoubleComplex* A, |
| 2002 | + std::complex<double>* A, |
| 2003 | int64_t lda) |
| 2004 | { |
| 2005 | cblas_zgerc(CblasColMajor, m, n, &alpha, x, incx, y, incy, A, lda); |
| 2006 | @@ -1457,32 +1466,32 @@ void ref_ger<hipblasDoubleComplex, true>(int64_t m, |
| 2007 | |
| 2008 | // hbmv |
| 2009 | template <> |
| 2010 | -void ref_hbmv<hipblasComplex>(hipblasFillMode_t uplo, |
| 2011 | - int64_t n, |
| 2012 | - int64_t k, |
| 2013 | - hipblasComplex alpha, |
| 2014 | - hipblasComplex* A, |
| 2015 | - int64_t lda, |
| 2016 | - hipblasComplex* x, |
| 2017 | - int64_t incx, |
| 2018 | - hipblasComplex beta, |
| 2019 | - hipblasComplex* y, |
| 2020 | - int64_t incy) |
| 2021 | +void ref_hbmv<std::complex<float>>(hipblasFillMode_t uplo, |
| 2022 | + int64_t n, |
| 2023 | + int64_t k, |
| 2024 | + std::complex<float> alpha, |
| 2025 | + std::complex<float>* A, |
| 2026 | + int64_t lda, |
| 2027 | + std::complex<float>* x, |
| 2028 | + int64_t incx, |
| 2029 | + std::complex<float> beta, |
| 2030 | + std::complex<float>* y, |
| 2031 | + int64_t incy) |
| 2032 | { |
| 2033 | cblas_chbmv(CblasColMajor, (CBLAS_UPLO)uplo, n, k, &alpha, A, lda, x, incx, &beta, y, incy); |
| 2034 | } |
| 2035 | |
| 2036 | template <> |
| 2037 | -void ref_hbmv<hipblasDoubleComplex>(hipblasFillMode_t uplo, |
| 2038 | +void ref_hbmv<std::complex<double>>(hipblasFillMode_t uplo, |
| 2039 | int64_t n, |
| 2040 | int64_t k, |
| 2041 | - hipblasDoubleComplex alpha, |
| 2042 | - hipblasDoubleComplex* A, |
| 2043 | + std::complex<double> alpha, |
| 2044 | + std::complex<double>* A, |
| 2045 | int64_t lda, |
| 2046 | - hipblasDoubleComplex* x, |
| 2047 | + std::complex<double>* x, |
| 2048 | int64_t incx, |
| 2049 | - hipblasDoubleComplex beta, |
| 2050 | - hipblasDoubleComplex* y, |
| 2051 | + std::complex<double> beta, |
| 2052 | + std::complex<double>* y, |
| 2053 | int64_t incy) |
| 2054 | { |
| 2055 | cblas_zhbmv(CblasColMajor, (CBLAS_UPLO)uplo, n, k, &alpha, A, lda, x, incx, &beta, y, incy); |
| 2056 | @@ -1490,30 +1499,30 @@ void ref_hbmv<hipblasDoubleComplex>(hipblasFillMode_t uplo, |
| 2057 | |
| 2058 | // hemv |
| 2059 | template <> |
| 2060 | -void ref_hemv<hipblasComplex>(hipblasFillMode_t uplo, |
| 2061 | - int64_t n, |
| 2062 | - hipblasComplex alpha, |
| 2063 | - hipblasComplex* A, |
| 2064 | - int64_t lda, |
| 2065 | - hipblasComplex* x, |
| 2066 | - int64_t incx, |
| 2067 | - hipblasComplex beta, |
| 2068 | - hipblasComplex* y, |
| 2069 | - int64_t incy) |
| 2070 | +void ref_hemv<std::complex<float>>(hipblasFillMode_t uplo, |
| 2071 | + int64_t n, |
| 2072 | + std::complex<float> alpha, |
| 2073 | + std::complex<float>* A, |
| 2074 | + int64_t lda, |
| 2075 | + std::complex<float>* x, |
| 2076 | + int64_t incx, |
| 2077 | + std::complex<float> beta, |
| 2078 | + std::complex<float>* y, |
| 2079 | + int64_t incy) |
| 2080 | { |
| 2081 | cblas_chemv(CblasColMajor, (CBLAS_UPLO)uplo, n, &alpha, A, lda, x, incx, &beta, y, incy); |
| 2082 | } |
| 2083 | |
| 2084 | template <> |
| 2085 | -void ref_hemv<hipblasDoubleComplex>(hipblasFillMode_t uplo, |
| 2086 | +void ref_hemv<std::complex<double>>(hipblasFillMode_t uplo, |
| 2087 | int64_t n, |
| 2088 | - hipblasDoubleComplex alpha, |
| 2089 | - hipblasDoubleComplex* A, |
| 2090 | + std::complex<double> alpha, |
| 2091 | + std::complex<double>* A, |
| 2092 | int64_t lda, |
| 2093 | - hipblasDoubleComplex* x, |
| 2094 | + std::complex<double>* x, |
| 2095 | int64_t incx, |
| 2096 | - hipblasDoubleComplex beta, |
| 2097 | - hipblasDoubleComplex* y, |
| 2098 | + std::complex<double> beta, |
| 2099 | + std::complex<double>* y, |
| 2100 | int64_t incy) |
| 2101 | { |
| 2102 | cblas_zhemv(CblasColMajor, (CBLAS_UPLO)uplo, n, &alpha, A, lda, x, incx, &beta, y, incy); |
| 2103 | @@ -1521,24 +1530,24 @@ void ref_hemv<hipblasDoubleComplex>(hipblasFillMode_t uplo, |
| 2104 | |
| 2105 | // her |
| 2106 | template <> |
| 2107 | -void ref_her<hipblasComplex, float>(hipblasFillMode_t uplo, |
| 2108 | - int64_t n, |
| 2109 | - float alpha, |
| 2110 | - hipblasComplex* x, |
| 2111 | - int64_t incx, |
| 2112 | - hipblasComplex* A, |
| 2113 | - int64_t lda) |
| 2114 | +void ref_her<std::complex<float>, float>(hipblasFillMode_t uplo, |
| 2115 | + int64_t n, |
| 2116 | + float alpha, |
| 2117 | + std::complex<float>* x, |
| 2118 | + int64_t incx, |
| 2119 | + std::complex<float>* A, |
| 2120 | + int64_t lda) |
| 2121 | { |
| 2122 | cblas_cher(CblasColMajor, (CBLAS_UPLO)uplo, n, alpha, x, incx, A, lda); |
| 2123 | } |
| 2124 | |
| 2125 | template <> |
| 2126 | -void ref_her<hipblasDoubleComplex, double>(hipblasFillMode_t uplo, |
| 2127 | +void ref_her<std::complex<double>, double>(hipblasFillMode_t uplo, |
| 2128 | int64_t n, |
| 2129 | double alpha, |
| 2130 | - hipblasDoubleComplex* x, |
| 2131 | + std::complex<double>* x, |
| 2132 | int64_t incx, |
| 2133 | - hipblasDoubleComplex* A, |
| 2134 | + std::complex<double>* A, |
| 2135 | int64_t lda) |
| 2136 | { |
| 2137 | cblas_zher(CblasColMajor, (CBLAS_UPLO)uplo, n, alpha, x, incx, A, lda); |
| 2138 | @@ -1546,28 +1555,28 @@ void ref_her<hipblasDoubleComplex, double>(hipblasFillMode_t uplo, |
| 2139 | |
| 2140 | // her2 |
| 2141 | template <> |
| 2142 | -void ref_her2<hipblasComplex>(hipblasFillMode_t uplo, |
| 2143 | - int64_t n, |
| 2144 | - hipblasComplex alpha, |
| 2145 | - hipblasComplex* x, |
| 2146 | - int64_t incx, |
| 2147 | - hipblasComplex* y, |
| 2148 | - int64_t incy, |
| 2149 | - hipblasComplex* A, |
| 2150 | - int64_t lda) |
| 2151 | +void ref_her2<std::complex<float>>(hipblasFillMode_t uplo, |
| 2152 | + int64_t n, |
| 2153 | + std::complex<float> alpha, |
| 2154 | + std::complex<float>* x, |
| 2155 | + int64_t incx, |
| 2156 | + std::complex<float>* y, |
| 2157 | + int64_t incy, |
| 2158 | + std::complex<float>* A, |
| 2159 | + int64_t lda) |
| 2160 | { |
| 2161 | cblas_cher2(CblasColMajor, (CBLAS_UPLO)uplo, n, &alpha, x, incx, y, incy, A, lda); |
| 2162 | } |
| 2163 | |
| 2164 | template <> |
| 2165 | -void ref_her2<hipblasDoubleComplex>(hipblasFillMode_t uplo, |
| 2166 | +void ref_her2<std::complex<double>>(hipblasFillMode_t uplo, |
| 2167 | int64_t n, |
| 2168 | - hipblasDoubleComplex alpha, |
| 2169 | - hipblasDoubleComplex* x, |
| 2170 | + std::complex<double> alpha, |
| 2171 | + std::complex<double>* x, |
| 2172 | int64_t incx, |
| 2173 | - hipblasDoubleComplex* y, |
| 2174 | + std::complex<double>* y, |
| 2175 | int64_t incy, |
| 2176 | - hipblasDoubleComplex* A, |
| 2177 | + std::complex<double>* A, |
| 2178 | int64_t lda) |
| 2179 | { |
| 2180 | cblas_zher2(CblasColMajor, (CBLAS_UPLO)uplo, n, &alpha, x, incx, y, incy, A, lda); |
| 2181 | @@ -1575,28 +1584,28 @@ void ref_her2<hipblasDoubleComplex>(hipblasFillMode_t uplo, |
| 2182 | |
| 2183 | // hpmv |
| 2184 | template <> |
| 2185 | -void ref_hpmv<hipblasComplex>(hipblasFillMode_t uplo, |
| 2186 | - int64_t n, |
| 2187 | - hipblasComplex alpha, |
| 2188 | - hipblasComplex* AP, |
| 2189 | - hipblasComplex* x, |
| 2190 | - int64_t incx, |
| 2191 | - hipblasComplex beta, |
| 2192 | - hipblasComplex* y, |
| 2193 | - int64_t incy) |
| 2194 | +void ref_hpmv<std::complex<float>>(hipblasFillMode_t uplo, |
| 2195 | + int64_t n, |
| 2196 | + std::complex<float> alpha, |
| 2197 | + std::complex<float>* AP, |
| 2198 | + std::complex<float>* x, |
| 2199 | + int64_t incx, |
| 2200 | + std::complex<float> beta, |
| 2201 | + std::complex<float>* y, |
| 2202 | + int64_t incy) |
| 2203 | { |
| 2204 | cblas_chpmv(CblasColMajor, (CBLAS_UPLO)uplo, n, &alpha, AP, x, incx, &beta, y, incy); |
| 2205 | } |
| 2206 | |
| 2207 | template <> |
| 2208 | -void ref_hpmv<hipblasDoubleComplex>(hipblasFillMode_t uplo, |
| 2209 | +void ref_hpmv<std::complex<double>>(hipblasFillMode_t uplo, |
| 2210 | int64_t n, |
| 2211 | - hipblasDoubleComplex alpha, |
| 2212 | - hipblasDoubleComplex* AP, |
| 2213 | - hipblasDoubleComplex* x, |
| 2214 | + std::complex<double> alpha, |
| 2215 | + std::complex<double>* AP, |
| 2216 | + std::complex<double>* x, |
| 2217 | int64_t incx, |
| 2218 | - hipblasDoubleComplex beta, |
| 2219 | - hipblasDoubleComplex* y, |
| 2220 | + std::complex<double> beta, |
| 2221 | + std::complex<double>* y, |
| 2222 | int64_t incy) |
| 2223 | { |
| 2224 | cblas_zhpmv(CblasColMajor, (CBLAS_UPLO)uplo, n, &alpha, AP, x, incx, &beta, y, incy); |
| 2225 | @@ -1604,12 +1613,12 @@ void ref_hpmv<hipblasDoubleComplex>(hipblasFillMode_t uplo, |
| 2226 | |
| 2227 | // hpr |
| 2228 | template <> |
| 2229 | -void ref_hpr(hipblasFillMode_t uplo, |
| 2230 | - int64_t n, |
| 2231 | - float alpha, |
| 2232 | - hipblasComplex* x, |
| 2233 | - int64_t incx, |
| 2234 | - hipblasComplex* AP) |
| 2235 | +void ref_hpr(hipblasFillMode_t uplo, |
| 2236 | + int64_t n, |
| 2237 | + float alpha, |
| 2238 | + std::complex<float>* x, |
| 2239 | + int64_t incx, |
| 2240 | + std::complex<float>* AP) |
| 2241 | { |
| 2242 | cblas_chpr(CblasColMajor, (CBLAS_UPLO)uplo, n, alpha, x, incx, AP); |
| 2243 | } |
| 2244 | @@ -1618,23 +1627,23 @@ template <> |
| 2245 | void ref_hpr(hipblasFillMode_t uplo, |
| 2246 | int64_t n, |
| 2247 | double alpha, |
| 2248 | - hipblasDoubleComplex* x, |
| 2249 | + std::complex<double>* x, |
| 2250 | int64_t incx, |
| 2251 | - hipblasDoubleComplex* AP) |
| 2252 | + std::complex<double>* AP) |
| 2253 | { |
| 2254 | cblas_zhpr(CblasColMajor, (CBLAS_UPLO)uplo, n, alpha, x, incx, AP); |
| 2255 | } |
| 2256 | |
| 2257 | // hpr2 |
| 2258 | template <> |
| 2259 | -void ref_hpr2(hipblasFillMode_t uplo, |
| 2260 | - int64_t n, |
| 2261 | - hipblasComplex alpha, |
| 2262 | - hipblasComplex* x, |
| 2263 | - int64_t incx, |
| 2264 | - hipblasComplex* y, |
| 2265 | - int64_t incy, |
| 2266 | - hipblasComplex* AP) |
| 2267 | +void ref_hpr2(hipblasFillMode_t uplo, |
| 2268 | + int64_t n, |
| 2269 | + std::complex<float> alpha, |
| 2270 | + std::complex<float>* x, |
| 2271 | + int64_t incx, |
| 2272 | + std::complex<float>* y, |
| 2273 | + int64_t incy, |
| 2274 | + std::complex<float>* AP) |
| 2275 | { |
| 2276 | cblas_chpr2(CblasColMajor, (CBLAS_UPLO)uplo, n, &alpha, x, incx, y, incy, AP); |
| 2277 | } |
| 2278 | @@ -1642,12 +1651,12 @@ void ref_hpr2(hipblasFillMode_t uplo, |
| 2279 | template <> |
| 2280 | void ref_hpr2(hipblasFillMode_t uplo, |
| 2281 | int64_t n, |
| 2282 | - hipblasDoubleComplex alpha, |
| 2283 | - hipblasDoubleComplex* x, |
| 2284 | + std::complex<double> alpha, |
| 2285 | + std::complex<double>* x, |
| 2286 | int64_t incx, |
| 2287 | - hipblasDoubleComplex* y, |
| 2288 | + std::complex<double>* y, |
| 2289 | int64_t incy, |
| 2290 | - hipblasDoubleComplex* AP) |
| 2291 | + std::complex<double>* AP) |
| 2292 | { |
| 2293 | cblas_zhpr2(CblasColMajor, (CBLAS_UPLO)uplo, n, &alpha, x, incx, y, incy, AP); |
| 2294 | } |
| 2295 | @@ -1728,12 +1737,12 @@ void ref_spr(hipblasFillMode_t uplo, int64_t n, double alpha, double* x, int64_t |
| 2296 | } |
| 2297 | |
| 2298 | template <> |
| 2299 | -void ref_spr(hipblasFillMode_t uplo, |
| 2300 | - int64_t n, |
| 2301 | - hipblasComplex alpha, |
| 2302 | - hipblasComplex* x, |
| 2303 | - int64_t incx, |
| 2304 | - hipblasComplex* AP) |
| 2305 | +void ref_spr(hipblasFillMode_t uplo, |
| 2306 | + int64_t n, |
| 2307 | + std::complex<float> alpha, |
| 2308 | + std::complex<float>* x, |
| 2309 | + int64_t incx, |
| 2310 | + std::complex<float>* AP) |
| 2311 | { |
| 2312 | lapack_xspr(uplo, n, alpha, x, incx, AP); |
| 2313 | } |
| 2314 | @@ -1741,10 +1750,10 @@ void ref_spr(hipblasFillMode_t uplo, |
| 2315 | template <> |
| 2316 | void ref_spr(hipblasFillMode_t uplo, |
| 2317 | int64_t n, |
| 2318 | - hipblasDoubleComplex alpha, |
| 2319 | - hipblasDoubleComplex* x, |
| 2320 | + std::complex<double> alpha, |
| 2321 | + std::complex<double>* x, |
| 2322 | int64_t incx, |
| 2323 | - hipblasDoubleComplex* AP) |
| 2324 | + std::complex<double>* AP) |
| 2325 | { |
| 2326 | lapack_xspr(uplo, n, alpha, x, incx, AP); |
| 2327 | } |
| 2328 | @@ -1808,16 +1817,16 @@ void ref_symv(hipblasFillMode_t uplo, |
| 2329 | } |
| 2330 | |
| 2331 | template <> |
| 2332 | -void ref_symv(hipblasFillMode_t uplo, |
| 2333 | - int64_t n, |
| 2334 | - hipblasComplex alpha, |
| 2335 | - hipblasComplex* A, |
| 2336 | - int64_t lda, |
| 2337 | - hipblasComplex* x, |
| 2338 | - int64_t incx, |
| 2339 | - hipblasComplex beta, |
| 2340 | - hipblasComplex* y, |
| 2341 | - int64_t incy) |
| 2342 | +void ref_symv(hipblasFillMode_t uplo, |
| 2343 | + int64_t n, |
| 2344 | + std::complex<float> alpha, |
| 2345 | + std::complex<float>* A, |
| 2346 | + int64_t lda, |
| 2347 | + std::complex<float>* x, |
| 2348 | + int64_t incx, |
| 2349 | + std::complex<float> beta, |
| 2350 | + std::complex<float>* y, |
| 2351 | + int64_t incy) |
| 2352 | { |
| 2353 | lapack_xsymv(uplo, n, alpha, A, lda, x, incx, beta, y, incy); |
| 2354 | } |
| 2355 | @@ -1825,13 +1834,13 @@ void ref_symv(hipblasFillMode_t uplo, |
| 2356 | template <> |
| 2357 | void ref_symv(hipblasFillMode_t uplo, |
| 2358 | int64_t n, |
| 2359 | - hipblasDoubleComplex alpha, |
| 2360 | - hipblasDoubleComplex* A, |
| 2361 | + std::complex<double> alpha, |
| 2362 | + std::complex<double>* A, |
| 2363 | int64_t lda, |
| 2364 | - hipblasDoubleComplex* x, |
| 2365 | + std::complex<double>* x, |
| 2366 | int64_t incx, |
| 2367 | - hipblasDoubleComplex beta, |
| 2368 | - hipblasDoubleComplex* y, |
| 2369 | + std::complex<double> beta, |
| 2370 | + std::complex<double>* y, |
| 2371 | int64_t incy) |
| 2372 | { |
| 2373 | lapack_xsymv(uplo, n, alpha, A, lda, x, incx, beta, y, incy); |
| 2374 | @@ -1858,13 +1867,13 @@ void ref_syr<double>(hipblasFillMode_t uplo, |
| 2375 | } |
| 2376 | |
| 2377 | template <> |
| 2378 | -void ref_syr(hipblasFillMode_t uplo, |
| 2379 | - int64_t n, |
| 2380 | - hipblasComplex alpha, |
| 2381 | - hipblasComplex* xa, |
| 2382 | - int64_t incx, |
| 2383 | - hipblasComplex* A, |
| 2384 | - int64_t lda) |
| 2385 | +void ref_syr(hipblasFillMode_t uplo, |
| 2386 | + int64_t n, |
| 2387 | + std::complex<float> alpha, |
| 2388 | + std::complex<float>* xa, |
| 2389 | + int64_t incx, |
| 2390 | + std::complex<float>* A, |
| 2391 | + int64_t lda) |
| 2392 | { |
| 2393 | lapack_xsyr(uplo, n, alpha, xa, incx, A, lda); |
| 2394 | } |
| 2395 | @@ -1872,10 +1881,10 @@ void ref_syr(hipblasFillMode_t uplo, |
| 2396 | template <> |
| 2397 | void ref_syr(hipblasFillMode_t uplo, |
| 2398 | int64_t n, |
| 2399 | - hipblasDoubleComplex alpha, |
| 2400 | - hipblasDoubleComplex* xa, |
| 2401 | + std::complex<double> alpha, |
| 2402 | + std::complex<double>* xa, |
| 2403 | int64_t incx, |
| 2404 | - hipblasDoubleComplex* A, |
| 2405 | + std::complex<double>* A, |
| 2406 | int64_t lda) |
| 2407 | { |
| 2408 | lapack_xsyr(uplo, n, alpha, xa, incx, A, lda); |
| 2409 | @@ -1912,15 +1921,15 @@ void ref_syr2(hipblasFillMode_t uplo, |
| 2410 | } |
| 2411 | |
| 2412 | template <> |
| 2413 | -void ref_syr2(hipblasFillMode_t uplo, |
| 2414 | - int64_t n, |
| 2415 | - hipblasComplex alpha, |
| 2416 | - hipblasComplex* x, |
| 2417 | - int64_t incx, |
| 2418 | - hipblasComplex* y, |
| 2419 | - int64_t incy, |
| 2420 | - hipblasComplex* A, |
| 2421 | - int64_t lda) |
| 2422 | +void ref_syr2(hipblasFillMode_t uplo, |
| 2423 | + int64_t n, |
| 2424 | + std::complex<float> alpha, |
| 2425 | + std::complex<float>* x, |
| 2426 | + int64_t incx, |
| 2427 | + std::complex<float>* y, |
| 2428 | + int64_t incy, |
| 2429 | + std::complex<float>* A, |
| 2430 | + int64_t lda) |
| 2431 | { |
| 2432 | lapack_xsyr2(uplo, n, alpha, x, incx, y, incy, A, lda); |
| 2433 | } |
| 2434 | @@ -1928,12 +1937,12 @@ void ref_syr2(hipblasFillMode_t uplo, |
| 2435 | template <> |
| 2436 | void ref_syr2(hipblasFillMode_t uplo, |
| 2437 | int64_t n, |
| 2438 | - hipblasDoubleComplex alpha, |
| 2439 | - hipblasDoubleComplex* x, |
| 2440 | + std::complex<double> alpha, |
| 2441 | + std::complex<double>* x, |
| 2442 | int64_t incx, |
| 2443 | - hipblasDoubleComplex* y, |
| 2444 | + std::complex<double>* y, |
| 2445 | int64_t incy, |
| 2446 | - hipblasDoubleComplex* A, |
| 2447 | + std::complex<double>* A, |
| 2448 | int64_t lda) |
| 2449 | { |
| 2450 | lapack_xsyr2(uplo, n, alpha, x, incx, y, incy, A, lda); |
| 2451 | @@ -1987,15 +1996,15 @@ void ref_tbmv<double>(hipblasFillMode_t uplo, |
| 2452 | } |
| 2453 | |
| 2454 | template <> |
| 2455 | -void ref_tbmv<hipblasComplex>(hipblasFillMode_t uplo, |
| 2456 | - hipblasOperation_t transA, |
| 2457 | - hipblasDiagType_t diag, |
| 2458 | - int64_t m, |
| 2459 | - int64_t k, |
| 2460 | - const hipblasComplex* A, |
| 2461 | - int64_t lda, |
| 2462 | - hipblasComplex* x, |
| 2463 | - int64_t incx) |
| 2464 | +void ref_tbmv<std::complex<float>>(hipblasFillMode_t uplo, |
| 2465 | + hipblasOperation_t transA, |
| 2466 | + hipblasDiagType_t diag, |
| 2467 | + int64_t m, |
| 2468 | + int64_t k, |
| 2469 | + const std::complex<float>* A, |
| 2470 | + int64_t lda, |
| 2471 | + std::complex<float>* x, |
| 2472 | + int64_t incx) |
| 2473 | { |
| 2474 | cblas_ctbmv(CblasColMajor, |
| 2475 | CBLAS_UPLO(uplo), |
| 2476 | @@ -2010,14 +2019,14 @@ void ref_tbmv<hipblasComplex>(hipblasFillMode_t uplo, |
| 2477 | } |
| 2478 | |
| 2479 | template <> |
| 2480 | -void ref_tbmv<hipblasDoubleComplex>(hipblasFillMode_t uplo, |
| 2481 | +void ref_tbmv<std::complex<double>>(hipblasFillMode_t uplo, |
| 2482 | hipblasOperation_t transA, |
| 2483 | hipblasDiagType_t diag, |
| 2484 | int64_t m, |
| 2485 | int64_t k, |
| 2486 | - const hipblasDoubleComplex* A, |
| 2487 | + const std::complex<double>* A, |
| 2488 | int64_t lda, |
| 2489 | - hipblasDoubleComplex* x, |
| 2490 | + std::complex<double>* x, |
| 2491 | int64_t incx) |
| 2492 | { |
| 2493 | cblas_ztbmv(CblasColMajor, |
| 2494 | @@ -2080,15 +2089,15 @@ void ref_tbsv<double>(hipblasFillMode_t uplo, |
| 2495 | } |
| 2496 | |
| 2497 | template <> |
| 2498 | -void ref_tbsv<hipblasComplex>(hipblasFillMode_t uplo, |
| 2499 | - hipblasOperation_t transA, |
| 2500 | - hipblasDiagType_t diag, |
| 2501 | - int64_t m, |
| 2502 | - int64_t k, |
| 2503 | - const hipblasComplex* A, |
| 2504 | - int64_t lda, |
| 2505 | - hipblasComplex* x, |
| 2506 | - int64_t incx) |
| 2507 | +void ref_tbsv<std::complex<float>>(hipblasFillMode_t uplo, |
| 2508 | + hipblasOperation_t transA, |
| 2509 | + hipblasDiagType_t diag, |
| 2510 | + int64_t m, |
| 2511 | + int64_t k, |
| 2512 | + const std::complex<float>* A, |
| 2513 | + int64_t lda, |
| 2514 | + std::complex<float>* x, |
| 2515 | + int64_t incx) |
| 2516 | { |
| 2517 | cblas_ctbsv(CblasColMajor, |
| 2518 | CBLAS_UPLO(uplo), |
| 2519 | @@ -2103,14 +2112,14 @@ void ref_tbsv<hipblasComplex>(hipblasFillMode_t uplo, |
| 2520 | } |
| 2521 | |
| 2522 | template <> |
| 2523 | -void ref_tbsv<hipblasDoubleComplex>(hipblasFillMode_t uplo, |
| 2524 | +void ref_tbsv<std::complex<double>>(hipblasFillMode_t uplo, |
| 2525 | hipblasOperation_t transA, |
| 2526 | hipblasDiagType_t diag, |
| 2527 | int64_t m, |
| 2528 | int64_t k, |
| 2529 | - const hipblasDoubleComplex* A, |
| 2530 | + const std::complex<double>* A, |
| 2531 | int64_t lda, |
| 2532 | - hipblasDoubleComplex* x, |
| 2533 | + std::complex<double>* x, |
| 2534 | int64_t incx) |
| 2535 | { |
| 2536 | cblas_ztbsv(CblasColMajor, |
| 2537 | @@ -2153,13 +2162,13 @@ void ref_tpmv(hipblasFillMode_t uplo, |
| 2538 | } |
| 2539 | |
| 2540 | template <> |
| 2541 | -void ref_tpmv(hipblasFillMode_t uplo, |
| 2542 | - hipblasOperation_t transA, |
| 2543 | - hipblasDiagType_t diag, |
| 2544 | - int64_t m, |
| 2545 | - const hipblasComplex* A, |
| 2546 | - hipblasComplex* x, |
| 2547 | - int64_t incx) |
| 2548 | +void ref_tpmv(hipblasFillMode_t uplo, |
| 2549 | + hipblasOperation_t transA, |
| 2550 | + hipblasDiagType_t diag, |
| 2551 | + int64_t m, |
| 2552 | + const std::complex<float>* A, |
| 2553 | + std::complex<float>* x, |
| 2554 | + int64_t incx) |
| 2555 | { |
| 2556 | cblas_ctpmv( |
| 2557 | CblasColMajor, CBLAS_UPLO(uplo), CBLAS_TRANSPOSE(transA), CBLAS_DIAG(diag), m, A, x, incx); |
| 2558 | @@ -2170,8 +2179,8 @@ void ref_tpmv(hipblasFillMode_t uplo, |
| 2559 | hipblasOperation_t transA, |
| 2560 | hipblasDiagType_t diag, |
| 2561 | int64_t m, |
| 2562 | - const hipblasDoubleComplex* A, |
| 2563 | - hipblasDoubleComplex* x, |
| 2564 | + const std::complex<double>* A, |
| 2565 | + std::complex<double>* x, |
| 2566 | int64_t incx) |
| 2567 | { |
| 2568 | cblas_ztpmv( |
| 2569 | @@ -2206,13 +2215,13 @@ void ref_tpsv(hipblasFillMode_t uplo, |
| 2570 | } |
| 2571 | |
| 2572 | template <> |
| 2573 | -void ref_tpsv(hipblasFillMode_t uplo, |
| 2574 | - hipblasOperation_t transA, |
| 2575 | - hipblasDiagType_t diag, |
| 2576 | - int64_t n, |
| 2577 | - const hipblasComplex* AP, |
| 2578 | - hipblasComplex* x, |
| 2579 | - int64_t incx) |
| 2580 | +void ref_tpsv(hipblasFillMode_t uplo, |
| 2581 | + hipblasOperation_t transA, |
| 2582 | + hipblasDiagType_t diag, |
| 2583 | + int64_t n, |
| 2584 | + const std::complex<float>* AP, |
| 2585 | + std::complex<float>* x, |
| 2586 | + int64_t incx) |
| 2587 | { |
| 2588 | cblas_ctpsv( |
| 2589 | CblasColMajor, CBLAS_UPLO(uplo), CBLAS_TRANSPOSE(transA), CBLAS_DIAG(diag), n, AP, x, incx); |
| 2590 | @@ -2223,8 +2232,8 @@ void ref_tpsv(hipblasFillMode_t uplo, |
| 2591 | hipblasOperation_t transA, |
| 2592 | hipblasDiagType_t diag, |
| 2593 | int64_t n, |
| 2594 | - const hipblasDoubleComplex* AP, |
| 2595 | - hipblasDoubleComplex* x, |
| 2596 | + const std::complex<double>* AP, |
| 2597 | + std::complex<double>* x, |
| 2598 | int64_t incx) |
| 2599 | { |
| 2600 | cblas_ztpsv( |
| 2601 | @@ -2275,14 +2284,14 @@ void ref_trmv<double>(hipblasFillMode_t uplo, |
| 2602 | } |
| 2603 | |
| 2604 | template <> |
| 2605 | -void ref_trmv<hipblasComplex>(hipblasFillMode_t uplo, |
| 2606 | - hipblasOperation_t transA, |
| 2607 | - hipblasDiagType_t diag, |
| 2608 | - int64_t m, |
| 2609 | - const hipblasComplex* A, |
| 2610 | - int64_t lda, |
| 2611 | - hipblasComplex* x, |
| 2612 | - int64_t incx) |
| 2613 | +void ref_trmv<std::complex<float>>(hipblasFillMode_t uplo, |
| 2614 | + hipblasOperation_t transA, |
| 2615 | + hipblasDiagType_t diag, |
| 2616 | + int64_t m, |
| 2617 | + const std::complex<float>* A, |
| 2618 | + int64_t lda, |
| 2619 | + std::complex<float>* x, |
| 2620 | + int64_t incx) |
| 2621 | { |
| 2622 | cblas_ctrmv(CblasColMajor, |
| 2623 | CBLAS_UPLO(uplo), |
| 2624 | @@ -2296,13 +2305,13 @@ void ref_trmv<hipblasComplex>(hipblasFillMode_t uplo, |
| 2625 | } |
| 2626 | |
| 2627 | template <> |
| 2628 | -void ref_trmv<hipblasDoubleComplex>(hipblasFillMode_t uplo, |
| 2629 | +void ref_trmv<std::complex<double>>(hipblasFillMode_t uplo, |
| 2630 | hipblasOperation_t transA, |
| 2631 | hipblasDiagType_t diag, |
| 2632 | int64_t m, |
| 2633 | - const hipblasDoubleComplex* A, |
| 2634 | + const std::complex<double>* A, |
| 2635 | int64_t lda, |
| 2636 | - hipblasDoubleComplex* x, |
| 2637 | + std::complex<double>* x, |
| 2638 | int64_t incx) |
| 2639 | { |
| 2640 | cblas_ztrmv(CblasColMajor, |
| 2641 | @@ -2362,15 +2371,15 @@ void ref_trsv<double>(hipblasHandle_t handle, |
| 2642 | } |
| 2643 | |
| 2644 | template <> |
| 2645 | -void ref_trsv<hipblasComplex>(hipblasHandle_t handle, |
| 2646 | - hipblasFillMode_t uplo, |
| 2647 | - hipblasOperation_t transA, |
| 2648 | - hipblasDiagType_t diag, |
| 2649 | - int64_t m, |
| 2650 | - const hipblasComplex* A, |
| 2651 | - int64_t lda, |
| 2652 | - hipblasComplex* x, |
| 2653 | - int64_t incx) |
| 2654 | +void ref_trsv<std::complex<float>>(hipblasHandle_t handle, |
| 2655 | + hipblasFillMode_t uplo, |
| 2656 | + hipblasOperation_t transA, |
| 2657 | + hipblasDiagType_t diag, |
| 2658 | + int64_t m, |
| 2659 | + const std::complex<float>* A, |
| 2660 | + int64_t lda, |
| 2661 | + std::complex<float>* x, |
| 2662 | + int64_t incx) |
| 2663 | { |
| 2664 | cblas_ctrsv(CblasColMajor, |
| 2665 | CBLAS_UPLO(uplo), |
| 2666 | @@ -2384,14 +2393,14 @@ void ref_trsv<hipblasComplex>(hipblasHandle_t handle, |
| 2667 | } |
| 2668 | |
| 2669 | template <> |
| 2670 | -void ref_trsv<hipblasDoubleComplex>(hipblasHandle_t handle, |
| 2671 | +void ref_trsv<std::complex<double>>(hipblasHandle_t handle, |
| 2672 | hipblasFillMode_t uplo, |
| 2673 | hipblasOperation_t transA, |
| 2674 | hipblasDiagType_t diag, |
| 2675 | int64_t m, |
| 2676 | - const hipblasDoubleComplex* A, |
| 2677 | + const std::complex<double>* A, |
| 2678 | int64_t lda, |
| 2679 | - hipblasDoubleComplex* x, |
| 2680 | + std::complex<double>* x, |
| 2681 | int64_t incx) |
| 2682 | { |
| 2683 | cblas_ztrsv(CblasColMajor, |
| 2684 | @@ -2436,10 +2445,13 @@ void ref_geam_helper(hipblasOperation_t transA, |
| 2685 | { |
| 2686 | T a_val = A[i * inc1_A + j * inc2_A]; |
| 2687 | T b_val = B[i * inc1_B + j * inc2_B]; |
| 2688 | - if(transA == HIPBLAS_OP_C) |
| 2689 | - a_val = std::conj(a_val); |
| 2690 | - if(transB == HIPBLAS_OP_C) |
| 2691 | - b_val = std::conj(b_val); |
| 2692 | + if constexpr(is_complex<T>) |
| 2693 | + { |
| 2694 | + if(transA == HIPBLAS_OP_C) |
| 2695 | + a_val = std::conj(a_val); |
| 2696 | + if(transB == HIPBLAS_OP_C) |
| 2697 | + b_val = std::conj(b_val); |
| 2698 | + } |
| 2699 | C[i + j * ldc] = alpha * a_val + beta * b_val; |
| 2700 | } |
| 2701 | } |
| 2702 | @@ -2503,15 +2515,15 @@ void ref_dgmm(hipblasSideMode_t side, |
| 2703 | } |
| 2704 | |
| 2705 | template <> |
| 2706 | -void ref_dgmm(hipblasSideMode_t side, |
| 2707 | - int64_t M, |
| 2708 | - int64_t N, |
| 2709 | - const hipblasComplex* A, |
| 2710 | - int64_t lda, |
| 2711 | - const hipblasComplex* x, |
| 2712 | - int64_t incx, |
| 2713 | - hipblasComplex* C, |
| 2714 | - int64_t ldc) |
| 2715 | +void ref_dgmm(hipblasSideMode_t side, |
| 2716 | + int64_t M, |
| 2717 | + int64_t N, |
| 2718 | + const std::complex<float>* A, |
| 2719 | + int64_t lda, |
| 2720 | + const std::complex<float>* x, |
| 2721 | + int64_t incx, |
| 2722 | + std::complex<float>* C, |
| 2723 | + int64_t ldc) |
| 2724 | { |
| 2725 | ref_dgmm_helper(side, M, N, A, lda, x, incx, C, ldc); |
| 2726 | } |
| 2727 | @@ -2520,11 +2532,11 @@ template <> |
| 2728 | void ref_dgmm(hipblasSideMode_t side, |
| 2729 | int64_t M, |
| 2730 | int64_t N, |
| 2731 | - const hipblasDoubleComplex* A, |
| 2732 | + const std::complex<double>* A, |
| 2733 | int64_t lda, |
| 2734 | - const hipblasDoubleComplex* x, |
| 2735 | + const std::complex<double>* x, |
| 2736 | int64_t incx, |
| 2737 | - hipblasDoubleComplex* C, |
| 2738 | + std::complex<double>* C, |
| 2739 | int64_t ldc) |
| 2740 | { |
| 2741 | ref_dgmm_helper(side, M, N, A, lda, x, incx, C, ldc); |
| 2742 | @@ -2566,18 +2578,18 @@ void ref_geam(hipblasOperation_t transa, |
| 2743 | } |
| 2744 | |
| 2745 | template <> |
| 2746 | -void ref_geam(hipblasOperation_t transa, |
| 2747 | - hipblasOperation_t transb, |
| 2748 | - int64_t m, |
| 2749 | - int64_t n, |
| 2750 | - hipblasComplex* alpha, |
| 2751 | - hipblasComplex* A, |
| 2752 | - int64_t lda, |
| 2753 | - hipblasComplex* beta, |
| 2754 | - hipblasComplex* B, |
| 2755 | - int64_t ldb, |
| 2756 | - hipblasComplex* C, |
| 2757 | - int64_t ldc) |
| 2758 | +void ref_geam(hipblasOperation_t transa, |
| 2759 | + hipblasOperation_t transb, |
| 2760 | + int64_t m, |
| 2761 | + int64_t n, |
| 2762 | + std::complex<float>* alpha, |
| 2763 | + std::complex<float>* A, |
| 2764 | + int64_t lda, |
| 2765 | + std::complex<float>* beta, |
| 2766 | + std::complex<float>* B, |
| 2767 | + int64_t ldb, |
| 2768 | + std::complex<float>* C, |
| 2769 | + int64_t ldc) |
| 2770 | { |
| 2771 | return ref_geam_helper(transa, transb, m, n, *alpha, A, lda, *beta, B, ldb, C, ldc); |
| 2772 | } |
| 2773 | @@ -2587,13 +2599,13 @@ void ref_geam(hipblasOperation_t transa, |
| 2774 | hipblasOperation_t transb, |
| 2775 | int64_t m, |
| 2776 | int64_t n, |
| 2777 | - hipblasDoubleComplex* alpha, |
| 2778 | - hipblasDoubleComplex* A, |
| 2779 | + std::complex<double>* alpha, |
| 2780 | + std::complex<double>* A, |
| 2781 | int64_t lda, |
| 2782 | - hipblasDoubleComplex* beta, |
| 2783 | - hipblasDoubleComplex* B, |
| 2784 | + std::complex<double>* beta, |
| 2785 | + std::complex<double>* B, |
| 2786 | int64_t ldb, |
| 2787 | - hipblasDoubleComplex* C, |
| 2788 | + std::complex<double>* C, |
| 2789 | int64_t ldc) |
| 2790 | { |
| 2791 | return ref_geam_helper(transa, transb, m, n, *alpha, A, lda, *beta, B, ldb, C, ldc); |
| 2792 | @@ -2955,19 +2967,19 @@ void ref_gemm<double>(hipblasOperation_t transA, |
| 2793 | } |
| 2794 | |
| 2795 | template <> |
| 2796 | -void ref_gemm<hipblasComplex>(hipblasOperation_t transA, |
| 2797 | - hipblasOperation_t transB, |
| 2798 | - int64_t m, |
| 2799 | - int64_t n, |
| 2800 | - int64_t k, |
| 2801 | - hipblasComplex alpha, |
| 2802 | - hipblasComplex* A, |
| 2803 | - int64_t lda, |
| 2804 | - hipblasComplex* B, |
| 2805 | - int64_t ldb, |
| 2806 | - hipblasComplex beta, |
| 2807 | - hipblasComplex* C, |
| 2808 | - int64_t ldc) |
| 2809 | +void ref_gemm<std::complex<float>>(hipblasOperation_t transA, |
| 2810 | + hipblasOperation_t transB, |
| 2811 | + int64_t m, |
| 2812 | + int64_t n, |
| 2813 | + int64_t k, |
| 2814 | + std::complex<float> alpha, |
| 2815 | + std::complex<float>* A, |
| 2816 | + int64_t lda, |
| 2817 | + std::complex<float>* B, |
| 2818 | + int64_t ldb, |
| 2819 | + std::complex<float> beta, |
| 2820 | + std::complex<float>* C, |
| 2821 | + int64_t ldc) |
| 2822 | { |
| 2823 | //just directly cast, since transA, transB are integers in the enum |
| 2824 | cblas_cgemm(CblasColMajor, |
| 2825 | @@ -2987,18 +2999,18 @@ void ref_gemm<hipblasComplex>(hipblasOperation_t transA, |
| 2826 | } |
| 2827 | |
| 2828 | template <> |
| 2829 | -void ref_gemm<hipblasDoubleComplex>(hipblasOperation_t transA, |
| 2830 | +void ref_gemm<std::complex<double>>(hipblasOperation_t transA, |
| 2831 | hipblasOperation_t transB, |
| 2832 | int64_t m, |
| 2833 | int64_t n, |
| 2834 | int64_t k, |
| 2835 | - hipblasDoubleComplex alpha, |
| 2836 | - hipblasDoubleComplex* A, |
| 2837 | + std::complex<double> alpha, |
| 2838 | + std::complex<double>* A, |
| 2839 | int64_t lda, |
| 2840 | - hipblasDoubleComplex* B, |
| 2841 | + std::complex<double>* B, |
| 2842 | int64_t ldb, |
| 2843 | - hipblasDoubleComplex beta, |
| 2844 | - hipblasDoubleComplex* C, |
| 2845 | + std::complex<double> beta, |
| 2846 | + std::complex<double>* C, |
| 2847 | int64_t ldc) |
| 2848 | { |
| 2849 | cblas_zgemm(CblasColMajor, |
| 2850 | @@ -3077,18 +3089,18 @@ void ref_gemm<int8_t, int32_t, int32_t>(hipblasOperation_t transA, |
| 2851 | |
| 2852 | // hemm |
| 2853 | template <> |
| 2854 | -void ref_hemm(hipblasSideMode_t side, |
| 2855 | - hipblasFillMode_t uplo, |
| 2856 | - int64_t m, |
| 2857 | - int64_t n, |
| 2858 | - hipblasComplex alpha, |
| 2859 | - hipblasComplex* A, |
| 2860 | - int64_t lda, |
| 2861 | - hipblasComplex* B, |
| 2862 | - int64_t ldb, |
| 2863 | - hipblasComplex beta, |
| 2864 | - hipblasComplex* C, |
| 2865 | - int64_t ldc) |
| 2866 | +void ref_hemm(hipblasSideMode_t side, |
| 2867 | + hipblasFillMode_t uplo, |
| 2868 | + int64_t m, |
| 2869 | + int64_t n, |
| 2870 | + std::complex<float> alpha, |
| 2871 | + std::complex<float>* A, |
| 2872 | + int64_t lda, |
| 2873 | + std::complex<float>* B, |
| 2874 | + int64_t ldb, |
| 2875 | + std::complex<float> beta, |
| 2876 | + std::complex<float>* C, |
| 2877 | + int64_t ldc) |
| 2878 | { |
| 2879 | cblas_chemm(CblasColMajor, |
| 2880 | (CBLAS_SIDE)side, |
| 2881 | @@ -3110,13 +3122,13 @@ void ref_hemm(hipblasSideMode_t side, |
| 2882 | hipblasFillMode_t uplo, |
| 2883 | int64_t m, |
| 2884 | int64_t n, |
| 2885 | - hipblasDoubleComplex alpha, |
| 2886 | - hipblasDoubleComplex* A, |
| 2887 | + std::complex<double> alpha, |
| 2888 | + std::complex<double>* A, |
| 2889 | int64_t lda, |
| 2890 | - hipblasDoubleComplex* B, |
| 2891 | + std::complex<double>* B, |
| 2892 | int64_t ldb, |
| 2893 | - hipblasDoubleComplex beta, |
| 2894 | - hipblasDoubleComplex* C, |
| 2895 | + std::complex<double> beta, |
| 2896 | + std::complex<double>* C, |
| 2897 | int64_t ldc) |
| 2898 | { |
| 2899 | cblas_zhemm(CblasColMajor, |
| 2900 | @@ -3136,16 +3148,16 @@ void ref_hemm(hipblasSideMode_t side, |
| 2901 | |
| 2902 | // herk |
| 2903 | template <> |
| 2904 | -void ref_herk(hipblasFillMode_t uplo, |
| 2905 | - hipblasOperation_t transA, |
| 2906 | - int64_t n, |
| 2907 | - int64_t k, |
| 2908 | - float alpha, |
| 2909 | - hipblasComplex* A, |
| 2910 | - int64_t lda, |
| 2911 | - float beta, |
| 2912 | - hipblasComplex* C, |
| 2913 | - int64_t ldc) |
| 2914 | +void ref_herk(hipblasFillMode_t uplo, |
| 2915 | + hipblasOperation_t transA, |
| 2916 | + int64_t n, |
| 2917 | + int64_t k, |
| 2918 | + float alpha, |
| 2919 | + std::complex<float>* A, |
| 2920 | + int64_t lda, |
| 2921 | + float beta, |
| 2922 | + std::complex<float>* C, |
| 2923 | + int64_t ldc) |
| 2924 | { |
| 2925 | cblas_cherk(CblasColMajor, |
| 2926 | (CBLAS_UPLO)uplo, |
| 2927 | @@ -3166,10 +3178,10 @@ void ref_herk(hipblasFillMode_t uplo, |
| 2928 | int64_t n, |
| 2929 | int64_t k, |
| 2930 | double alpha, |
| 2931 | - hipblasDoubleComplex* A, |
| 2932 | + std::complex<double>* A, |
| 2933 | int64_t lda, |
| 2934 | double beta, |
| 2935 | - hipblasDoubleComplex* C, |
| 2936 | + std::complex<double>* C, |
| 2937 | int64_t ldc) |
| 2938 | { |
| 2939 | cblas_zherk(CblasColMajor, |
| 2940 | @@ -3270,18 +3282,18 @@ void ref_herkx_local(hipblasFillMode_t uplo, |
| 2941 | } |
| 2942 | |
| 2943 | template <> |
| 2944 | -void ref_herkx(hipblasFillMode_t uplo, |
| 2945 | - hipblasOperation_t transA, |
| 2946 | - int64_t n, |
| 2947 | - int64_t k, |
| 2948 | - hipblasComplex alpha, |
| 2949 | - hipblasComplex* A, |
| 2950 | - int64_t lda, |
| 2951 | - hipblasComplex* B, |
| 2952 | - int64_t ldb, |
| 2953 | - float beta, |
| 2954 | - hipblasComplex* C, |
| 2955 | - int64_t ldc) |
| 2956 | +void ref_herkx(hipblasFillMode_t uplo, |
| 2957 | + hipblasOperation_t transA, |
| 2958 | + int64_t n, |
| 2959 | + int64_t k, |
| 2960 | + std::complex<float> alpha, |
| 2961 | + std::complex<float>* A, |
| 2962 | + int64_t lda, |
| 2963 | + std::complex<float>* B, |
| 2964 | + int64_t ldb, |
| 2965 | + float beta, |
| 2966 | + std::complex<float>* C, |
| 2967 | + int64_t ldc) |
| 2968 | { |
| 2969 | ref_herkx_local(uplo, transA, n, k, alpha, A, lda, B, ldb, beta, C, ldc); |
| 2970 | } |
| 2971 | @@ -3291,13 +3303,13 @@ void ref_herkx(hipblasFillMode_t uplo, |
| 2972 | hipblasOperation_t transA, |
| 2973 | int64_t n, |
| 2974 | int64_t k, |
| 2975 | - hipblasDoubleComplex alpha, |
| 2976 | - hipblasDoubleComplex* A, |
| 2977 | + std::complex<double> alpha, |
| 2978 | + std::complex<double>* A, |
| 2979 | int64_t lda, |
| 2980 | - hipblasDoubleComplex* B, |
| 2981 | + std::complex<double>* B, |
| 2982 | int64_t ldb, |
| 2983 | double beta, |
| 2984 | - hipblasDoubleComplex* C, |
| 2985 | + std::complex<double>* C, |
| 2986 | int64_t ldc) |
| 2987 | { |
| 2988 | ref_herkx_local(uplo, transA, n, k, alpha, A, lda, B, ldb, beta, C, ldc); |
| 2989 | @@ -3305,18 +3317,18 @@ void ref_herkx(hipblasFillMode_t uplo, |
| 2990 | |
| 2991 | // her2k |
| 2992 | template <> |
| 2993 | -void ref_her2k(hipblasFillMode_t uplo, |
| 2994 | - hipblasOperation_t transA, |
| 2995 | - int64_t n, |
| 2996 | - int64_t k, |
| 2997 | - hipblasComplex alpha, |
| 2998 | - hipblasComplex* A, |
| 2999 | - int64_t lda, |
| 3000 | - hipblasComplex* B, |
| 3001 | - int64_t ldb, |
| 3002 | - float beta, |
| 3003 | - hipblasComplex* C, |
| 3004 | - int64_t ldc) |
| 3005 | +void ref_her2k(hipblasFillMode_t uplo, |
| 3006 | + hipblasOperation_t transA, |
| 3007 | + int64_t n, |
| 3008 | + int64_t k, |
| 3009 | + std::complex<float> alpha, |
| 3010 | + std::complex<float>* A, |
| 3011 | + int64_t lda, |
| 3012 | + std::complex<float>* B, |
| 3013 | + int64_t ldb, |
| 3014 | + float beta, |
| 3015 | + std::complex<float>* C, |
| 3016 | + int64_t ldc) |
| 3017 | { |
| 3018 | cblas_cher2k(CblasColMajor, |
| 3019 | (CBLAS_UPLO)uplo, |
| 3020 | @@ -3338,13 +3350,13 @@ void ref_her2k(hipblasFillMode_t uplo, |
| 3021 | hipblasOperation_t transA, |
| 3022 | int64_t n, |
| 3023 | int64_t k, |
| 3024 | - hipblasDoubleComplex alpha, |
| 3025 | - hipblasDoubleComplex* A, |
| 3026 | + std::complex<double> alpha, |
| 3027 | + std::complex<double>* A, |
| 3028 | int64_t lda, |
| 3029 | - hipblasDoubleComplex* B, |
| 3030 | + std::complex<double>* B, |
| 3031 | int64_t ldb, |
| 3032 | double beta, |
| 3033 | - hipblasDoubleComplex* C, |
| 3034 | + std::complex<double>* C, |
| 3035 | int64_t ldc) |
| 3036 | { |
| 3037 | cblas_zher2k(CblasColMajor, |
| 3038 | @@ -3422,18 +3434,18 @@ void ref_symm(hipblasSideMode_t side, |
| 3039 | } |
| 3040 | |
| 3041 | template <> |
| 3042 | -void ref_symm(hipblasSideMode_t side, |
| 3043 | - hipblasFillMode_t uplo, |
| 3044 | - int64_t m, |
| 3045 | - int64_t n, |
| 3046 | - hipblasComplex alpha, |
| 3047 | - hipblasComplex* A, |
| 3048 | - int64_t lda, |
| 3049 | - hipblasComplex* B, |
| 3050 | - int64_t ldb, |
| 3051 | - hipblasComplex beta, |
| 3052 | - hipblasComplex* C, |
| 3053 | - int64_t ldc) |
| 3054 | +void ref_symm(hipblasSideMode_t side, |
| 3055 | + hipblasFillMode_t uplo, |
| 3056 | + int64_t m, |
| 3057 | + int64_t n, |
| 3058 | + std::complex<float> alpha, |
| 3059 | + std::complex<float>* A, |
| 3060 | + int64_t lda, |
| 3061 | + std::complex<float>* B, |
| 3062 | + int64_t ldb, |
| 3063 | + std::complex<float> beta, |
| 3064 | + std::complex<float>* C, |
| 3065 | + int64_t ldc) |
| 3066 | { |
| 3067 | cblas_csymm(CblasColMajor, |
| 3068 | (CBLAS_SIDE)side, |
| 3069 | @@ -3455,13 +3467,13 @@ void ref_symm(hipblasSideMode_t side, |
| 3070 | hipblasFillMode_t uplo, |
| 3071 | int64_t m, |
| 3072 | int64_t n, |
| 3073 | - hipblasDoubleComplex alpha, |
| 3074 | - hipblasDoubleComplex* A, |
| 3075 | + std::complex<double> alpha, |
| 3076 | + std::complex<double>* A, |
| 3077 | int64_t lda, |
| 3078 | - hipblasDoubleComplex* B, |
| 3079 | + std::complex<double>* B, |
| 3080 | int64_t ldb, |
| 3081 | - hipblasDoubleComplex beta, |
| 3082 | - hipblasDoubleComplex* C, |
| 3083 | + std::complex<double> beta, |
| 3084 | + std::complex<double>* C, |
| 3085 | int64_t ldc) |
| 3086 | { |
| 3087 | cblas_zsymm(CblasColMajor, |
| 3088 | @@ -3531,16 +3543,16 @@ void ref_syrk(hipblasFillMode_t uplo, |
| 3089 | } |
| 3090 | |
| 3091 | template <> |
| 3092 | -void ref_syrk(hipblasFillMode_t uplo, |
| 3093 | - hipblasOperation_t transA, |
| 3094 | - int64_t n, |
| 3095 | - int64_t k, |
| 3096 | - hipblasComplex alpha, |
| 3097 | - hipblasComplex* A, |
| 3098 | - int64_t lda, |
| 3099 | - hipblasComplex beta, |
| 3100 | - hipblasComplex* C, |
| 3101 | - int64_t ldc) |
| 3102 | +void ref_syrk(hipblasFillMode_t uplo, |
| 3103 | + hipblasOperation_t transA, |
| 3104 | + int64_t n, |
| 3105 | + int64_t k, |
| 3106 | + std::complex<float> alpha, |
| 3107 | + std::complex<float>* A, |
| 3108 | + int64_t lda, |
| 3109 | + std::complex<float> beta, |
| 3110 | + std::complex<float>* C, |
| 3111 | + int64_t ldc) |
| 3112 | { |
| 3113 | cblas_csyrk(CblasColMajor, |
| 3114 | (CBLAS_UPLO)uplo, |
| 3115 | @@ -3560,11 +3572,11 @@ void ref_syrk(hipblasFillMode_t uplo, |
| 3116 | hipblasOperation_t transA, |
| 3117 | int64_t n, |
| 3118 | int64_t k, |
| 3119 | - hipblasDoubleComplex alpha, |
| 3120 | - hipblasDoubleComplex* A, |
| 3121 | + std::complex<double> alpha, |
| 3122 | + std::complex<double>* A, |
| 3123 | int64_t lda, |
| 3124 | - hipblasDoubleComplex beta, |
| 3125 | - hipblasDoubleComplex* C, |
| 3126 | + std::complex<double> beta, |
| 3127 | + std::complex<double>* C, |
| 3128 | int64_t ldc) |
| 3129 | { |
| 3130 | cblas_zsyrk(CblasColMajor, |
| 3131 | @@ -3640,18 +3652,18 @@ void ref_syr2k(hipblasFillMode_t uplo, |
| 3132 | } |
| 3133 | |
| 3134 | template <> |
| 3135 | -void ref_syr2k(hipblasFillMode_t uplo, |
| 3136 | - hipblasOperation_t transA, |
| 3137 | - int64_t n, |
| 3138 | - int64_t k, |
| 3139 | - hipblasComplex alpha, |
| 3140 | - hipblasComplex* A, |
| 3141 | - int64_t lda, |
| 3142 | - hipblasComplex* B, |
| 3143 | - int64_t ldb, |
| 3144 | - hipblasComplex beta, |
| 3145 | - hipblasComplex* C, |
| 3146 | - int64_t ldc) |
| 3147 | +void ref_syr2k(hipblasFillMode_t uplo, |
| 3148 | + hipblasOperation_t transA, |
| 3149 | + int64_t n, |
| 3150 | + int64_t k, |
| 3151 | + std::complex<float> alpha, |
| 3152 | + std::complex<float>* A, |
| 3153 | + int64_t lda, |
| 3154 | + std::complex<float>* B, |
| 3155 | + int64_t ldb, |
| 3156 | + std::complex<float> beta, |
| 3157 | + std::complex<float>* C, |
| 3158 | + int64_t ldc) |
| 3159 | { |
| 3160 | cblas_csyr2k(CblasColMajor, |
| 3161 | (CBLAS_UPLO)uplo, |
| 3162 | @@ -3673,13 +3685,13 @@ void ref_syr2k(hipblasFillMode_t uplo, |
| 3163 | hipblasOperation_t transA, |
| 3164 | int64_t n, |
| 3165 | int64_t k, |
| 3166 | - hipblasDoubleComplex alpha, |
| 3167 | - hipblasDoubleComplex* A, |
| 3168 | + std::complex<double> alpha, |
| 3169 | + std::complex<double>* A, |
| 3170 | int64_t lda, |
| 3171 | - hipblasDoubleComplex* B, |
| 3172 | + std::complex<double>* B, |
| 3173 | int64_t ldb, |
| 3174 | - hipblasDoubleComplex beta, |
| 3175 | - hipblasDoubleComplex* C, |
| 3176 | + std::complex<double> beta, |
| 3177 | + std::complex<double>* C, |
| 3178 | int64_t ldc) |
| 3179 | { |
| 3180 | cblas_zsyr2k(CblasColMajor, |
| 3181 | @@ -3759,16 +3771,16 @@ void ref_trsm<double>(hipblasSideMode_t side, |
| 3182 | } |
| 3183 | |
| 3184 | template <> |
| 3185 | -void ref_trsm<hipblasComplex>(hipblasSideMode_t side, |
| 3186 | +void ref_trsm<std::complex<float>>(hipblasSideMode_t side, |
| 3187 | hipblasFillMode_t uplo, |
| 3188 | hipblasOperation_t transA, |
| 3189 | hipblasDiagType_t diag, |
| 3190 | int64_t m, |
| 3191 | int64_t n, |
| 3192 | - hipblasComplex alpha, |
| 3193 | - const hipblasComplex* A, |
| 3194 | + std::complex<float> alpha, |
| 3195 | + const std::complex<float>* A, |
| 3196 | int64_t lda, |
| 3197 | - hipblasComplex* B, |
| 3198 | + std::complex<float>* B, |
| 3199 | int64_t ldb) |
| 3200 | { |
| 3201 | cblas_ctrsm(CblasColMajor, |
| 3202 | @@ -3786,16 +3798,16 @@ void ref_trsm<hipblasComplex>(hipblasSideMode_t side, |
| 3203 | } |
| 3204 | |
| 3205 | template <> |
| 3206 | -void ref_trsm<hipblasDoubleComplex>(hipblasSideMode_t side, |
| 3207 | +void ref_trsm<std::complex<double>>(hipblasSideMode_t side, |
| 3208 | hipblasFillMode_t uplo, |
| 3209 | hipblasOperation_t transA, |
| 3210 | hipblasDiagType_t diag, |
| 3211 | int64_t m, |
| 3212 | int64_t n, |
| 3213 | - hipblasDoubleComplex alpha, |
| 3214 | - const hipblasDoubleComplex* A, |
| 3215 | + std::complex<double> alpha, |
| 3216 | + const std::complex<double>* A, |
| 3217 | int64_t lda, |
| 3218 | - hipblasDoubleComplex* B, |
| 3219 | + std::complex<double>* B, |
| 3220 | int64_t ldb) |
| 3221 | { |
| 3222 | cblas_ztrsm(CblasColMajor, |
| 3223 | @@ -3828,14 +3840,15 @@ void ref_trtri<double>(char uplo, char diag, int64_t n, double* A, int64_t lda) |
| 3224 | } |
| 3225 | |
| 3226 | template <> |
| 3227 | -void ref_trtri<hipblasComplex>(char uplo, char diag, int64_t n, hipblasComplex* A, int64_t lda) |
| 3228 | +void ref_trtri<std::complex<float>>( |
| 3229 | + char uplo, char diag, int64_t n, std::complex<float>* A, int64_t lda) |
| 3230 | { |
| 3231 | lapack_xtrtri(uplo, diag, n, A, lda); |
| 3232 | } |
| 3233 | |
| 3234 | template <> |
| 3235 | -void ref_trtri<hipblasDoubleComplex>( |
| 3236 | - char uplo, char diag, int64_t n, hipblasDoubleComplex* A, int64_t lda) |
| 3237 | +void ref_trtri<std::complex<double>>( |
| 3238 | + char uplo, char diag, int64_t n, std::complex<double>* A, int64_t lda) |
| 3239 | { |
| 3240 | lapack_xtrtri(uplo, diag, n, A, lda); |
| 3241 | } |
| 3242 | @@ -3898,17 +3911,17 @@ void ref_trmm<double>(hipblasSideMode_t side, |
| 3243 | } |
| 3244 | |
| 3245 | template <> |
| 3246 | -void ref_trmm<hipblasComplex>(hipblasSideMode_t side, |
| 3247 | - hipblasFillMode_t uplo, |
| 3248 | - hipblasOperation_t transA, |
| 3249 | - hipblasDiagType_t diag, |
| 3250 | - int64_t m, |
| 3251 | - int64_t n, |
| 3252 | - hipblasComplex alpha, |
| 3253 | - const hipblasComplex* A, |
| 3254 | - int64_t lda, |
| 3255 | - hipblasComplex* B, |
| 3256 | - int64_t ldb) |
| 3257 | +void ref_trmm<std::complex<float>>(hipblasSideMode_t side, |
| 3258 | + hipblasFillMode_t uplo, |
| 3259 | + hipblasOperation_t transA, |
| 3260 | + hipblasDiagType_t diag, |
| 3261 | + int64_t m, |
| 3262 | + int64_t n, |
| 3263 | + std::complex<float> alpha, |
| 3264 | + const std::complex<float>* A, |
| 3265 | + int64_t lda, |
| 3266 | + std::complex<float>* B, |
| 3267 | + int64_t ldb) |
| 3268 | { |
| 3269 | cblas_ctrmm(CblasColMajor, |
| 3270 | (CBLAS_SIDE)side, |
| 3271 | @@ -3925,16 +3938,16 @@ void ref_trmm<hipblasComplex>(hipblasSideMode_t side, |
| 3272 | } |
| 3273 | |
| 3274 | template <> |
| 3275 | -void ref_trmm<hipblasDoubleComplex>(hipblasSideMode_t side, |
| 3276 | +void ref_trmm<std::complex<double>>(hipblasSideMode_t side, |
| 3277 | hipblasFillMode_t uplo, |
| 3278 | hipblasOperation_t transA, |
| 3279 | hipblasDiagType_t diag, |
| 3280 | int64_t m, |
| 3281 | int64_t n, |
| 3282 | - hipblasDoubleComplex alpha, |
| 3283 | - const hipblasDoubleComplex* A, |
| 3284 | + std::complex<double> alpha, |
| 3285 | + const std::complex<double>* A, |
| 3286 | int64_t lda, |
| 3287 | - hipblasDoubleComplex* B, |
| 3288 | + std::complex<double>* B, |
| 3289 | int64_t ldb) |
| 3290 | { |
| 3291 | cblas_ztrmm(CblasColMajor, |
| 3292 | @@ -3989,7 +4002,7 @@ int64_t ref_potrf(char uplo, int64_t m, double* A, int64_t lda) |
| 3293 | } |
| 3294 | |
| 3295 | template <> |
| 3296 | -int64_t ref_potrf(char uplo, int64_t m, hipblasComplex* A, int64_t lda) |
| 3297 | +int64_t ref_potrf(char uplo, int64_t m, std::complex<float>* A, int64_t lda) |
| 3298 | { |
| 3299 | int64_t info; |
| 3300 | |
| 3301 | @@ -4003,7 +4016,7 @@ int64_t ref_potrf(char uplo, int64_t m, hipblasComplex* A, int64_t lda) |
| 3302 | } |
| 3303 | |
| 3304 | template <> |
| 3305 | -int64_t ref_potrf(char uplo, int64_t m, hipblasDoubleComplex* A, int64_t lda) |
| 3306 | +int64_t ref_potrf(char uplo, int64_t m, std::complex<double>* A, int64_t lda) |
| 3307 | { |
| 3308 | int64_t info; |
| 3309 | |
| 3310 | @@ -4046,30 +4059,30 @@ int64_t ref_getrf<double>(int64_t m, int64_t n, double* A, int64_t lda, int64_t* |
| 3311 | } |
| 3312 | |
| 3313 | template <> |
| 3314 | -int64_t |
| 3315 | - ref_getrf<hipblasComplex>(int64_t m, int64_t n, hipblasComplex* A, int64_t lda, int64_t* ipiv) |
| 3316 | +int64_t ref_getrf<std::complex<float>>( |
| 3317 | + int64_t m, int64_t n, std::complex<float>* A, int64_t lda, int64_t* ipiv) |
| 3318 | { |
| 3319 | int64_t info; |
| 3320 | |
| 3321 | #ifdef FLA_ENABLE_ILP64 |
| 3322 | info = LAPACKE_cgetrf(LAPACK_COL_MAJOR, m, n, (lapack_complex_float*)A, lda, ipiv); |
| 3323 | #else |
| 3324 | - cgetrf_(&m, &n, (hipblasComplex*)A, &lda, ipiv, &info); |
| 3325 | + cgetrf_(&m, &n, (std::complex<float>*)A, &lda, ipiv, &info); |
| 3326 | #endif |
| 3327 | |
| 3328 | return info; |
| 3329 | } |
| 3330 | |
| 3331 | template <> |
| 3332 | -int64_t ref_getrf<hipblasDoubleComplex>( |
| 3333 | - int64_t m, int64_t n, hipblasDoubleComplex* A, int64_t lda, int64_t* ipiv) |
| 3334 | +int64_t ref_getrf<std::complex<double>>( |
| 3335 | + int64_t m, int64_t n, std::complex<double>* A, int64_t lda, int64_t* ipiv) |
| 3336 | { |
| 3337 | int64_t info; |
| 3338 | |
| 3339 | #ifdef FLA_ENABLE_ILP64 |
| 3340 | info = LAPACKE_zgetrf(LAPACK_COL_MAJOR, m, n, (lapack_complex_double*)A, lda, ipiv); |
| 3341 | #else |
| 3342 | - zgetrf_(&m, &n, (hipblasDoubleComplex*)A, &lda, ipiv, &info); |
| 3343 | + zgetrf_(&m, &n, (std::complex<double>*)A, &lda, ipiv, &info); |
| 3344 | #endif |
| 3345 | |
| 3346 | return info; |
| 3347 | @@ -4119,14 +4132,14 @@ int64_t ref_getrs<double>(char trans, |
| 3348 | } |
| 3349 | |
| 3350 | template <> |
| 3351 | -int64_t ref_getrs<hipblasComplex>(char trans, |
| 3352 | - int64_t n, |
| 3353 | - int64_t nrhs, |
| 3354 | - hipblasComplex* A, |
| 3355 | - int64_t lda, |
| 3356 | - int64_t* ipiv, |
| 3357 | - hipblasComplex* B, |
| 3358 | - int64_t ldb) |
| 3359 | +int64_t ref_getrs<std::complex<float>>(char trans, |
| 3360 | + int64_t n, |
| 3361 | + int64_t nrhs, |
| 3362 | + std::complex<float>* A, |
| 3363 | + int64_t lda, |
| 3364 | + int64_t* ipiv, |
| 3365 | + std::complex<float>* B, |
| 3366 | + int64_t ldb) |
| 3367 | { |
| 3368 | int64_t info; |
| 3369 | |
| 3370 | @@ -4141,20 +4154,28 @@ int64_t ref_getrs<hipblasComplex>(char trans, |
| 3371 | (lapack_complex_float*)B, |
| 3372 | ldb); |
| 3373 | #else |
| 3374 | - cgetrs_(&trans, &n, &nrhs, (hipblasComplex*)A, &lda, ipiv, (hipblasComplex*)B, &ldb, &info); |
| 3375 | + cgetrs_(&trans, |
| 3376 | + &n, |
| 3377 | + &nrhs, |
| 3378 | + (std::complex<float>*)A, |
| 3379 | + &lda, |
| 3380 | + ipiv, |
| 3381 | + (std::complex<float>*)B, |
| 3382 | + &ldb, |
| 3383 | + &info); |
| 3384 | #endif |
| 3385 | |
| 3386 | return info; |
| 3387 | } |
| 3388 | |
| 3389 | template <> |
| 3390 | -int64_t ref_getrs<hipblasDoubleComplex>(char trans, |
| 3391 | +int64_t ref_getrs<std::complex<double>>(char trans, |
| 3392 | int64_t n, |
| 3393 | int64_t nrhs, |
| 3394 | - hipblasDoubleComplex* A, |
| 3395 | + std::complex<double>* A, |
| 3396 | int64_t lda, |
| 3397 | int64_t* ipiv, |
| 3398 | - hipblasDoubleComplex* B, |
| 3399 | + std::complex<double>* B, |
| 3400 | int64_t ldb) |
| 3401 | { |
| 3402 | |
| 3403 | @@ -4174,10 +4195,10 @@ int64_t ref_getrs<hipblasDoubleComplex>(char trans, |
| 3404 | zgetrs_(&trans, |
| 3405 | &n, |
| 3406 | &nrhs, |
| 3407 | - (hipblasDoubleComplex*)A, |
| 3408 | + (std::complex<double>*)A, |
| 3409 | &lda, |
| 3410 | ipiv, |
| 3411 | - (hipblasDoubleComplex*)B, |
| 3412 | + (std::complex<double>*)B, |
| 3413 | &ldb, |
| 3414 | &info); |
| 3415 | #endif |
| 3416 | @@ -4217,8 +4238,12 @@ int64_t |
| 3417 | } |
| 3418 | |
| 3419 | template <> |
| 3420 | -int64_t ref_getri<hipblasComplex>( |
| 3421 | - int64_t n, hipblasComplex* A, int64_t lda, int64_t* ipiv, hipblasComplex* work, int64_t lwork) |
| 3422 | +int64_t ref_getri<std::complex<float>>(int64_t n, |
| 3423 | + std::complex<float>* A, |
| 3424 | + int64_t lda, |
| 3425 | + int64_t* ipiv, |
| 3426 | + std::complex<float>* work, |
| 3427 | + int64_t lwork) |
| 3428 | { |
| 3429 | int64_t info; |
| 3430 | |
| 3431 | @@ -4238,11 +4263,11 @@ int64_t ref_getri<hipblasComplex>( |
| 3432 | } |
| 3433 | |
| 3434 | template <> |
| 3435 | -int64_t ref_getri<hipblasDoubleComplex>(int64_t n, |
| 3436 | - hipblasDoubleComplex* A, |
| 3437 | +int64_t ref_getri<std::complex<double>>(int64_t n, |
| 3438 | + std::complex<double>* A, |
| 3439 | int64_t lda, |
| 3440 | int64_t* ipiv, |
| 3441 | - hipblasDoubleComplex* work, |
| 3442 | + std::complex<double>* work, |
| 3443 | int64_t lwork) |
| 3444 | { |
| 3445 | int64_t info; |
| 3446 | @@ -4293,13 +4318,13 @@ int64_t ref_geqrf<double>( |
| 3447 | return info; |
| 3448 | } |
| 3449 | template <> |
| 3450 | -int64_t ref_geqrf<hipblasComplex>(int64_t m, |
| 3451 | - int64_t n, |
| 3452 | - hipblasComplex* A, |
| 3453 | - int64_t lda, |
| 3454 | - hipblasComplex* tau, |
| 3455 | - hipblasComplex* work, |
| 3456 | - int64_t lwork) |
| 3457 | +int64_t ref_geqrf<std::complex<float>>(int64_t m, |
| 3458 | + int64_t n, |
| 3459 | + std::complex<float>* A, |
| 3460 | + int64_t lda, |
| 3461 | + std::complex<float>* tau, |
| 3462 | + std::complex<float>* work, |
| 3463 | + int64_t lwork) |
| 3464 | { |
| 3465 | int64_t info; |
| 3466 | |
| 3467 | @@ -4320,12 +4345,12 @@ int64_t ref_geqrf<hipblasComplex>(int64_t m, |
| 3468 | } |
| 3469 | |
| 3470 | template <> |
| 3471 | -int64_t ref_geqrf<hipblasDoubleComplex>(int64_t m, |
| 3472 | +int64_t ref_geqrf<std::complex<double>>(int64_t m, |
| 3473 | int64_t n, |
| 3474 | - hipblasDoubleComplex* A, |
| 3475 | + std::complex<double>* A, |
| 3476 | int64_t lda, |
| 3477 | - hipblasDoubleComplex* tau, |
| 3478 | - hipblasDoubleComplex* work, |
| 3479 | + std::complex<double>* tau, |
| 3480 | + std::complex<double>* work, |
| 3481 | int64_t lwork) |
| 3482 | { |
| 3483 | int64_t info; |
| 3484 | @@ -4394,16 +4419,16 @@ int64_t ref_gels<double>(char trans, |
| 3485 | } |
| 3486 | |
| 3487 | template <> |
| 3488 | -int64_t ref_gels<hipblasComplex>(char trans, |
| 3489 | - int64_t m, |
| 3490 | - int64_t n, |
| 3491 | - int64_t nrhs, |
| 3492 | - hipblasComplex* A, |
| 3493 | - int64_t lda, |
| 3494 | - hipblasComplex* B, |
| 3495 | - int64_t ldb, |
| 3496 | - hipblasComplex* work, |
| 3497 | - int64_t lwork) |
| 3498 | +int64_t ref_gels<std::complex<float>>(char trans, |
| 3499 | + int64_t m, |
| 3500 | + int64_t n, |
| 3501 | + int64_t nrhs, |
| 3502 | + std::complex<float>* A, |
| 3503 | + int64_t lda, |
| 3504 | + std::complex<float>* B, |
| 3505 | + int64_t ldb, |
| 3506 | + std::complex<float>* work, |
| 3507 | + int64_t lwork) |
| 3508 | { |
| 3509 | int64_t info; |
| 3510 | #ifdef FLA_ENABLE_ILP64 |
| 3511 | @@ -4426,15 +4451,15 @@ int64_t ref_gels<hipblasComplex>(char trans, |
| 3512 | } |
| 3513 | |
| 3514 | template <> |
| 3515 | -int64_t ref_gels<hipblasDoubleComplex>(char trans, |
| 3516 | +int64_t ref_gels<std::complex<double>>(char trans, |
| 3517 | int64_t m, |
| 3518 | int64_t n, |
| 3519 | int64_t nrhs, |
| 3520 | - hipblasDoubleComplex* A, |
| 3521 | + std::complex<double>* A, |
| 3522 | int64_t lda, |
| 3523 | - hipblasDoubleComplex* B, |
| 3524 | + std::complex<double>* B, |
| 3525 | int64_t ldb, |
| 3526 | - hipblasDoubleComplex* work, |
| 3527 | + std::complex<double>* work, |
| 3528 | int64_t lwork) |
| 3529 | { |
| 3530 | int64_t info; |
| 3531 | diff --git a/clients/common/clients_common.cpp b/clients/common/clients_common.cpp |
| 3532 | index bc84584..e62556a 100644 |
| 3533 | --- a/clients/common/clients_common.cpp |
| 3534 | +++ b/clients/common/clients_common.cpp |
| 3535 | @@ -1,5 +1,5 @@ |
| 3536 | /* ************************************************************************ |
| 3537 | - * Copyright (C) 2016-2023 Advanced Micro Devices, Inc. All rights reserved. |
| 3538 | + * Copyright (C) 2016-2025 Advanced Micro Devices, Inc. All rights reserved. |
| 3539 | * |
| 3540 | * Permission is hereby granted, free of charge, to any person obtaining a copy |
| 3541 | * of this software and associated documentation files (the "Software"), to deal |
| 3542 | @@ -265,7 +265,7 @@ void run_function(const func_map& map, const Arguments& arg, const std::string& |
| 3543 | auto match = map.find(arg.function); |
| 3544 | if(match == map.end()) |
| 3545 | throw std::invalid_argument("Invalid combination --function "s + arg.function |
| 3546 | - + " --a_type "s + hipblas_datatype2string(arg.a_type) + msg); |
| 3547 | + + " --a_type "s + hip_datatype2string(arg.a_type) + msg); |
| 3548 | match->second(arg); |
| 3549 | } |
| 3550 | |
| 3551 | @@ -743,11 +743,10 @@ struct perf_blas<T, U, std::enable_if_t<std::is_same<T, hipblasHalf>{}>> : hipbl |
| 3552 | }; |
| 3553 | |
| 3554 | template <typename T, typename U> |
| 3555 | -struct perf_blas< |
| 3556 | - T, |
| 3557 | - U, |
| 3558 | - std::enable_if_t<std::is_same<T, hipblasDoubleComplex>{} || std::is_same<T, hipblasComplex>{}>> |
| 3559 | - : hipblas_test_valid |
| 3560 | +struct perf_blas<T, |
| 3561 | + U, |
| 3562 | + std::enable_if_t<std::is_same<T, std::complex<double>>{} |
| 3563 | + || std::is_same<T, std::complex<float>>{}>> : hipblas_test_valid |
| 3564 | { |
| 3565 | void operator()(const Arguments& arg) |
| 3566 | { |
| 3567 | @@ -946,10 +945,12 @@ struct perf_blas_axpy_ex< |
| 3568 | hipblasHalf> && std::is_same_v<Ta, Tx> && std::is_same_v<Tx, Ty> && std::is_same_v<Ty, Tex>) |
| 3569 | || (std::is_same_v< |
| 3570 | Ta, |
| 3571 | - hipblasComplex> && std::is_same_v<Ta, Tx> && std::is_same_v<Tx, Ty> && std::is_same_v<Ty, Tex>) |
| 3572 | + std::complex< |
| 3573 | + float>> && std::is_same_v<Ta, Tx> && std::is_same_v<Tx, Ty> && std::is_same_v<Ty, Tex>) |
| 3574 | || (std::is_same_v< |
| 3575 | Ta, |
| 3576 | - hipblasDoubleComplex> && std::is_same_v<Ta, Tx> && std::is_same_v<Tx, Ty> && std::is_same_v<Ty, Tex>) |
| 3577 | + std::complex< |
| 3578 | + double>> && std::is_same_v<Ta, Tx> && std::is_same_v<Tx, Ty> && std::is_same_v<Ty, Tex>) |
| 3579 | || (std::is_same_v< |
| 3580 | Ta, |
| 3581 | hipblasHalf> && std::is_same_v<Ta, Tx> && std::is_same_v<Tx, Ty> && std::is_same_v<Tex, float>) |
| 3582 | @@ -992,9 +993,9 @@ struct perf_blas_dot_ex< |
| 3583 | && std::is_same<Ty, Tr>{} && std::is_same<Tr, Tex>{}) |
| 3584 | || (std::is_same<Tx, hipblasHalf>{} && std::is_same<Tx, Ty>{} |
| 3585 | && std::is_same<Ty, Tr>{} && std::is_same<Tr, Tex>{}) |
| 3586 | - || (std::is_same<Tx, hipblasComplex>{} && std::is_same<Tx, Ty>{} |
| 3587 | + || (std::is_same<Tx, std::complex<float>>{} && std::is_same<Tx, Ty>{} |
| 3588 | && std::is_same<Ty, Tr>{} && std::is_same<Tr, Tex>{}) |
| 3589 | - || (std::is_same<Tx, hipblasDoubleComplex>{} && std::is_same<Tx, Ty>{} |
| 3590 | + || (std::is_same<Tx, std::complex<double>>{} && std::is_same<Tx, Ty>{} |
| 3591 | && std::is_same<Ty, Tr>{} && std::is_same<Tr, Tex>{}) |
| 3592 | || (std::is_same<Tx, hipblasHalf>{} && std::is_same<Tx, Ty>{} |
| 3593 | && std::is_same<Ty, Tr>{} && std::is_same<Tex, float>{}) |
| 3594 | @@ -1029,9 +1030,9 @@ struct perf_blas_nrm2_ex< |
| 3595 | std::enable_if_t< |
| 3596 | (std::is_same<Tx, float>{} && std::is_same<Tx, Tr>{} && std::is_same<Tr, Tex>{}) |
| 3597 | || (std::is_same<Tx, double>{} && std::is_same<Tx, Tr>{} && std::is_same<Tr, Tex>{}) |
| 3598 | - || (std::is_same<Tx, hipblasComplex>{} && std::is_same<Tr, float>{} |
| 3599 | + || (std::is_same<Tx, std::complex<float>>{} && std::is_same<Tr, float>{} |
| 3600 | && std::is_same<Tr, Tex>{}) |
| 3601 | - || (std::is_same<Tx, hipblasDoubleComplex>{} && std::is_same<Tr, double>{} |
| 3602 | + || (std::is_same<Tx, std::complex<double>>{} && std::is_same<Tr, double>{} |
| 3603 | && std::is_same<Tr, Tex>{}) |
| 3604 | || (std::is_same<Tx, hipblasHalf>{} && std::is_same<Tr, Tx>{} && std::is_same<Tex, float>{}) |
| 3605 | || (std::is_same<Tx, hipblasBfloat16>{} && std::is_same<Tr, Tx>{} |
| 3606 | @@ -1063,15 +1064,15 @@ struct perf_blas_rot_ex< |
| 3607 | && std::is_same<Tcs, Tex>{}) |
| 3608 | || (std::is_same<Tx, double>{} && std::is_same<Ty, Tx>{} |
| 3609 | && std::is_same<Ty, Tcs>{} && std::is_same<Tex, Tcs>{}) |
| 3610 | - || (std::is_same<Tx, hipblasComplex>{} && std::is_same<Ty, Tx>{} |
| 3611 | + || (std::is_same<Tx, std::complex<float>>{} && std::is_same<Ty, Tx>{} |
| 3612 | && std::is_same<Tcs, Ty>{} && std::is_same<Tcs, Tex>{}) |
| 3613 | - || (std::is_same<Tx, hipblasDoubleComplex>{} && std::is_same<Tx, Ty>{} |
| 3614 | + || (std::is_same<Tx, std::complex<double>>{} && std::is_same<Tx, Ty>{} |
| 3615 | && std::is_same<Tcs, Ty>{} && std::is_same<Tex, Tcs>{}) |
| 3616 | - || (std::is_same<Tx, hipblasComplex>{} && std::is_same<Ty, Tx>{} |
| 3617 | - && std::is_same<Tcs, float>{} && std::is_same<Tex, hipblasComplex>{}) |
| 3618 | - || (std::is_same<Tx, hipblasDoubleComplex>{} && std::is_same<Tx, Ty>{} |
| 3619 | + || (std::is_same<Tx, std::complex<float>>{} && std::is_same<Ty, Tx>{} |
| 3620 | + && std::is_same<Tcs, float>{} && std::is_same<Tex, std::complex<float>>{}) |
| 3621 | + || (std::is_same<Tx, std::complex<double>>{} && std::is_same<Tx, Ty>{} |
| 3622 | && std::is_same<Tcs, double>{} |
| 3623 | - && std::is_same<Tex, hipblasDoubleComplex>{}) |
| 3624 | + && std::is_same<Tex, std::complex<double>>{}) |
| 3625 | || (std::is_same<Tx, hipblasHalf>{} && std::is_same<Ty, Tx>{} |
| 3626 | && std::is_same<Tcs, Ty>{} && std::is_same<Tex, float>{}) |
| 3627 | || (std::is_same<Tx, hipblasBfloat16>{} && std::is_same<Ty, Tx>{} |
| 3628 | @@ -1102,13 +1103,13 @@ struct perf_blas_rot< |
| 3629 | std::enable_if_t<(std::is_same<Ti, float>{} && std::is_same<Ti, To>{} && std::is_same<To, Tc>{}) |
| 3630 | || (std::is_same<Ti, double>{} && std::is_same<Ti, To>{} |
| 3631 | && std::is_same<To, Tc>{}) |
| 3632 | - || (std::is_same<Ti, hipblasComplex>{} && std::is_same<To, float>{} |
| 3633 | - && std::is_same<Tc, hipblasComplex>{}) |
| 3634 | - || (std::is_same<Ti, hipblasComplex>{} && std::is_same<To, float>{} |
| 3635 | + || (std::is_same<Ti, std::complex<float>>{} && std::is_same<To, float>{} |
| 3636 | + && std::is_same<Tc, std::complex<float>>{}) |
| 3637 | + || (std::is_same<Ti, std::complex<float>>{} && std::is_same<To, float>{} |
| 3638 | && std::is_same<Tc, float>{}) |
| 3639 | - || (std::is_same<Ti, hipblasDoubleComplex>{} && std::is_same<To, double>{} |
| 3640 | - && std::is_same<Tc, hipblasDoubleComplex>{}) |
| 3641 | - || (std::is_same<Ti, hipblasDoubleComplex>{} && std::is_same<To, double>{} |
| 3642 | + || (std::is_same<Ti, std::complex<double>>{} && std::is_same<To, double>{} |
| 3643 | + && std::is_same<Tc, std::complex<double>>{}) |
| 3644 | + || (std::is_same<Ti, std::complex<double>>{} && std::is_same<To, double>{} |
| 3645 | && std::is_same<Tc, double>{})>> : hipblas_test_valid |
| 3646 | { |
| 3647 | void operator()(const Arguments& arg) |
| 3648 | @@ -1131,12 +1132,12 @@ template <typename Ta, typename Tb> |
| 3649 | struct perf_blas_scal< |
| 3650 | Ta, |
| 3651 | Tb, |
| 3652 | - std::enable_if_t<(std::is_same<Ta, double>{} && std::is_same<Tb, hipblasDoubleComplex>{}) |
| 3653 | - || (std::is_same<Ta, float>{} && std::is_same<Tb, hipblasComplex>{}) |
| 3654 | + std::enable_if_t<(std::is_same<Ta, double>{} && std::is_same<Tb, std::complex<double>>{}) |
| 3655 | + || (std::is_same<Ta, float>{} && std::is_same<Tb, std::complex<float>>{}) |
| 3656 | || (std::is_same<Ta, Tb>{} && std::is_same<Ta, float>{}) |
| 3657 | || (std::is_same<Ta, Tb>{} && std::is_same<Ta, double>{}) |
| 3658 | - || (std::is_same<Ta, Tb>{} && std::is_same<Ta, hipblasComplex>{}) |
| 3659 | - || (std::is_same<Ta, Tb>{} && std::is_same<Ta, hipblasDoubleComplex>{})>> |
| 3660 | + || (std::is_same<Ta, Tb>{} && std::is_same<Ta, std::complex<float>>{}) |
| 3661 | + || (std::is_same<Ta, Tb>{} && std::is_same<Ta, std::complex<double>>{})>> |
| 3662 | : hipblas_test_valid |
| 3663 | { |
| 3664 | void operator()(const Arguments& arg) |
| 3665 | @@ -1164,14 +1165,15 @@ struct perf_blas_scal_ex< |
| 3666 | (std::is_same<Ta, float>{} && std::is_same<Ta, Tx>{} && std::is_same<Tx, Tex>{}) |
| 3667 | || (std::is_same<Ta, double>{} && std::is_same<Ta, Tx>{} && std::is_same<Tx, Tex>{}) |
| 3668 | || (std::is_same<Ta, hipblasHalf>{} && std::is_same<Ta, Tx>{} && std::is_same<Tx, Tex>{}) |
| 3669 | - || (std::is_same<Ta, hipblasComplex>{} && std::is_same<Ta, Tx>{} && std::is_same<Tx, Tex>{}) |
| 3670 | - || (std::is_same<Ta, hipblasDoubleComplex>{} && std::is_same<Ta, Tx>{} |
| 3671 | + || (std::is_same<Ta, std::complex<float>>{} && std::is_same<Ta, Tx>{} |
| 3672 | + && std::is_same<Tx, Tex>{}) |
| 3673 | + || (std::is_same<Ta, std::complex<double>>{} && std::is_same<Ta, Tx>{} |
| 3674 | && std::is_same<Tx, Tex>{}) |
| 3675 | || (std::is_same<Ta, hipblasHalf>{} && std::is_same<Ta, Tx>{} && std::is_same<Tex, float>{}) |
| 3676 | || (std::is_same<Ta, float>{} && std::is_same<Tx, hipblasHalf>{} && std::is_same<Ta, Tex>{}) |
| 3677 | - || (std::is_same<Ta, float>{} && std::is_same<Tx, hipblasComplex>{} |
| 3678 | + || (std::is_same<Ta, float>{} && std::is_same<Tx, std::complex<float>>{} |
| 3679 | && std::is_same<Tx, Tex>{}) |
| 3680 | - || (std::is_same<Ta, double>{} && std::is_same<Tx, hipblasDoubleComplex>{} |
| 3681 | + || (std::is_same<Ta, double>{} && std::is_same<Tx, std::complex<double>>{} |
| 3682 | && std::is_same<Tx, Tex>{}) |
| 3683 | || (std::is_same<Ta, hipblasBfloat16>{} && std::is_same<Ta, Tx>{} |
| 3684 | && std::is_same<Tex, float>{}) |
| 3685 | diff --git a/clients/common/hipblas_datatype2string.cpp b/clients/common/hipblas_datatype2string.cpp |
| 3686 | index 098ec70..42b0af9 100644 |
| 3687 | --- a/clients/common/hipblas_datatype2string.cpp |
| 3688 | +++ b/clients/common/hipblas_datatype2string.cpp |
| 3689 | @@ -1,5 +1,5 @@ |
| 3690 | /* ************************************************************************ |
| 3691 | - * Copyright (C) 2016-2023 Advanced Micro Devices, Inc. All rights reserved. |
| 3692 | + * Copyright (C) 2016-2025 Advanced Micro Devices, Inc. All rights reserved. |
| 3693 | * |
| 3694 | * Permission is hereby granted, free of charge, to any person obtaining a copy |
| 3695 | * of this software and associated documentation files (the "Software"), to deal |
| 3696 | @@ -165,28 +165,30 @@ hipblasSideMode_t char2hipblas_side(char value) |
| 3697 | } |
| 3698 | |
| 3699 | // clang-format off |
| 3700 | -hipblasDatatype_t string2hipblas_datatype(const std::string& value) |
| 3701 | +/*! \brief Convert string to a hipDataType. Returns HIP_R_32F if invalid string. */ |
| 3702 | +hipDataType string2hip_datatype(const std::string& value) |
| 3703 | { |
| 3704 | return |
| 3705 | - value == "f16_r" || value == "h" ? HIPBLAS_R_16F : |
| 3706 | - value == "f32_r" || value == "s" ? HIPBLAS_R_32F : |
| 3707 | - value == "f64_r" || value == "d" ? HIPBLAS_R_64F : |
| 3708 | - value == "bf16_r" ? HIPBLAS_R_16B : |
| 3709 | - value == "f16_c" ? HIPBLAS_C_16B : |
| 3710 | - value == "f32_c" || value == "c" ? HIPBLAS_C_32F : |
| 3711 | - value == "f64_c" || value == "z" ? HIPBLAS_C_64F : |
| 3712 | - value == "bf16_c" ? HIPBLAS_C_16B : |
| 3713 | - value == "i8_r" ? HIPBLAS_R_8I : |
| 3714 | - value == "i32_r" ? HIPBLAS_R_32I : |
| 3715 | - value == "i8_c" ? HIPBLAS_C_8I : |
| 3716 | - value == "i32_c" ? HIPBLAS_C_32I : |
| 3717 | - value == "u8_r" ? HIPBLAS_R_8U : |
| 3718 | - value == "u32_r" ? HIPBLAS_R_32U : |
| 3719 | - value == "u8_c" ? HIPBLAS_C_8U : |
| 3720 | - value == "u32_c" ? HIPBLAS_C_32U : |
| 3721 | - HIPBLAS_DATATYPE_INVALID; |
| 3722 | + value == "f16_r" || value == "h" ? HIP_R_16F : |
| 3723 | + value == "f32_r" || value == "s" ? HIP_R_32F : |
| 3724 | + value == "f64_r" || value == "d" ? HIP_R_64F : |
| 3725 | + value == "bf16_r" ? HIP_R_16BF : |
| 3726 | + value == "f16_c" ? HIP_C_16BF : |
| 3727 | + value == "f32_c" || value == "c" ? HIP_C_32F : |
| 3728 | + value == "f64_c" || value == "z" ? HIP_C_64F : |
| 3729 | + value == "bf16_c" ? HIP_C_16BF : |
| 3730 | + value == "i8_r" ? HIP_R_8I : |
| 3731 | + value == "i32_r" ? HIP_R_32I : |
| 3732 | + value == "i8_c" ? HIP_C_8I : |
| 3733 | + value == "i32_c" ? HIP_C_32I : |
| 3734 | + value == "u8_r" ? HIP_R_8U : |
| 3735 | + value == "u32_r" ? HIP_R_32U : |
| 3736 | + value == "u8_c" ? HIP_C_8U : |
| 3737 | + value == "u32_c" ? HIP_C_32U : |
| 3738 | + HIP_R_32F; |
| 3739 | } |
| 3740 | |
| 3741 | +/*! \brief Convert string to a hipblasComputeType_t. Returns HIPBLAS_COMPUTE_32F if invalid string. */ |
| 3742 | hipblasComputeType_t string2hipblas_computetype(const std::string& value) |
| 3743 | { |
| 3744 | return value == "c16f" ? HIPBLAS_COMPUTE_16F : |
| 3745 | diff --git a/clients/common/hipblas_gentest.py b/clients/common/hipblas_gentest.py |
| 3746 | index 5e9143a..4f572e0 100755 |
| 3747 | --- a/clients/common/hipblas_gentest.py |
| 3748 | +++ b/clients/common/hipblas_gentest.py |
| 3749 | @@ -1,5 +1,5 @@ |
| 3750 | #!/usr/bin/env python3 |
| 3751 | -"""Copyright (C) 2018-2023 Advanced Micro Devices, Inc. All rights reserved. |
| 3752 | +"""Copyright (C) 2018-2025 Advanced Micro Devices, Inc. All rights reserved. |
| 3753 | |
| 3754 | Permission is hereby granted, free of charge, to any person obtaining a copy |
| 3755 | of this software and associated documentation files (the "Software"), to deal |
| 3756 | @@ -114,9 +114,6 @@ Expand hipBLAS YAML test data file into binary Arguments records |
| 3757 | default=[]) |
| 3758 | parser.add_argument('-t', '--template', |
| 3759 | type=argparse.FileType('r')) |
| 3760 | - parser.add_argument('--hipblas_v2', |
| 3761 | - action='store_true', |
| 3762 | - help="Uses HIPBLAS_V2 datatypes, ensure HIPBLAS_V2 is defined in your build when using this.") |
| 3763 | return parser.parse_args() |
| 3764 | |
| 3765 | |
| 3766 | @@ -185,7 +182,7 @@ def get_datatypes(doc): |
| 3767 | for name, decl in declaration.items(): |
| 3768 | if isinstance(decl, dict): |
| 3769 | # Create derived class type based on bases and attr entries |
| 3770 | - decl_attr = decl.get('attr_v2') if args.get('hipblas_v2') else decl.get('attr') |
| 3771 | + decl_attr = decl.get('attr') |
| 3772 | dt[name] = type(name, |
| 3773 | tuple([eval(t, dt) |
| 3774 | for t in decl.get('bases') or () |
| 3775 | diff --git a/clients/common/hipblas_parse_data.cpp b/clients/common/hipblas_parse_data.cpp |
| 3776 | index f2450d7..4b72b2a 100644 |
| 3777 | --- a/clients/common/hipblas_parse_data.cpp |
| 3778 | +++ b/clients/common/hipblas_parse_data.cpp |
| 3779 | @@ -1,5 +1,5 @@ |
| 3780 | /* ************************************************************************ |
| 3781 | - * Copyright (C) 2019-2023 Advanced Micro Devices, Inc. All rights reserved. |
| 3782 | + * Copyright (C) 2019-2025 Advanced Micro Devices, Inc. All rights reserved. |
| 3783 | * |
| 3784 | * Permission is hereby granted, free of charge, to any person obtaining a copy |
| 3785 | * of this software and associated documentation files (the "Software"), to deal |
| 3786 | @@ -35,15 +35,19 @@ |
| 3787 | // Parse YAML data |
| 3788 | static std::string hipblas_parse_yaml(const std::string& yaml) |
| 3789 | { |
| 3790 | +#ifdef WIN32 |
| 3791 | + // Explicitly run via `python.exe`, without relying on the .py file being |
| 3792 | + // treated as an executable that should be run via the python interpreter. |
| 3793 | + std::string python_command_launcher = "python "; |
| 3794 | +#else |
| 3795 | + // Rely on the shebang in the file, e.g. `#!/usr/bin/env python3`. |
| 3796 | + std::string python_command_launcher = ""; |
| 3797 | +#endif |
| 3798 | + |
| 3799 | std::string tmp = hipblas_tempname(); |
| 3800 | auto exepath = hipblas_exepath(); |
| 3801 | -#ifdef HIPBLAS_V2 |
| 3802 | - auto cmd = exepath + "hipblas_gentest.py --hipblas_v2 --template " + exepath |
| 3803 | + auto cmd = python_command_launcher + exepath + "hipblas_gentest.py --template " + exepath |
| 3804 | + "hipblas_template.yaml -o " + tmp + " " + yaml; |
| 3805 | -#else |
| 3806 | - auto cmd = exepath + "hipblas_gentest.py --template " + exepath + "hipblas_template.yaml -o " |
| 3807 | - + tmp + " " + yaml; |
| 3808 | -#endif |
| 3809 | std::cerr << cmd << std::endl; |
| 3810 | |
| 3811 | #ifdef WIN32 |
| 3812 | @@ -51,7 +55,7 @@ static std::string hipblas_parse_yaml(const std::string& yaml) |
| 3813 | if(status == -1) |
| 3814 | exit(EXIT_FAILURE); |
| 3815 | #else |
| 3816 | - int status = system(cmd.c_str()); |
| 3817 | + int status = system(cmd.c_str()); |
| 3818 | if(status == -1 || !WIFEXITED(status) || WEXITSTATUS(status)) |
| 3819 | exit(EXIT_FAILURE); |
| 3820 | #endif |
| 3821 | diff --git a/clients/common/hipblas_template_specialization.cpp b/clients/common/hipblas_template_specialization.cpp |
| 3822 | deleted file mode 100644 |
| 3823 | index b073cdc..0000000 |
| 3824 | --- a/clients/common/hipblas_template_specialization.cpp |
| 3825 | +++ /dev/null |
| 3826 | @@ -1,14310 +0,0 @@ |
| 3827 | -/* ************************************************************************ |
| 3828 | - * Copyright (C) 2016-2024 Advanced Micro Devices, Inc. All rights reserved. |
| 3829 | - * |
| 3830 | - * Permission is hereby granted, free of charge, to any person obtaining a copy |
| 3831 | - * of this software and associated documentation files (the "Software"), to deal |
| 3832 | - * in the Software without restriction, including without limitation the rights |
| 3833 | - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
| 3834 | - * copies of the Software, and to permit persons to whom the Software is |
| 3835 | - * furnished to do so, subject to the following conditions: |
| 3836 | - * |
| 3837 | - * The above copyright notice and this permission notice shall be included in |
| 3838 | - * all copies or substantial portions of the Software. |
| 3839 | - * |
| 3840 | - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| 3841 | - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| 3842 | - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
| 3843 | - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| 3844 | - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| 3845 | - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
| 3846 | - * SOFTWARE. |
| 3847 | - * |
| 3848 | - * |
| 3849 | - * ************************************************************************/ |
| 3850 | - |
| 3851 | -#include "hipblas.h" |
| 3852 | -#include "hipblas.hpp" |
| 3853 | - |
| 3854 | -#ifndef WIN32 |
| 3855 | -#include "hipblas_fortran.hpp" |
| 3856 | -#else |
| 3857 | -#include "hipblas_no_fortran.hpp" |
| 3858 | -#endif |
| 3859 | - |
| 3860 | -#include <typeinfo> |
| 3861 | - |
| 3862 | -// This file's purpose is now only for casting hipblasComplex -> hipComplex when necessary. |
| 3863 | -// When we finish transitioning to hipComplex, this file can be deleted. |
| 3864 | - |
| 3865 | -/* |
| 3866 | - * =========================================================================== |
| 3867 | - * level 1 BLAS |
| 3868 | - * =========================================================================== |
| 3869 | - */ |
| 3870 | - |
| 3871 | -#ifdef HIPBLAS_V2 |
| 3872 | -// axpy |
| 3873 | -hipblasStatus_t hipblasCaxpyCast(hipblasHandle_t handle, |
| 3874 | - int n, |
| 3875 | - const hipblasComplex* alpha, |
| 3876 | - const hipblasComplex* x, |
| 3877 | - int incx, |
| 3878 | - hipblasComplex* y, |
| 3879 | - int incy) |
| 3880 | -{ |
| 3881 | - return hipblasCaxpy( |
| 3882 | - handle, n, (const hipComplex*)alpha, (const hipComplex*)x, incx, (hipComplex*)y, incy); |
| 3883 | -} |
| 3884 | - |
| 3885 | -hipblasStatus_t hipblasZaxpyCast(hipblasHandle_t handle, |
| 3886 | - int n, |
| 3887 | - const hipblasDoubleComplex* alpha, |
| 3888 | - const hipblasDoubleComplex* x, |
| 3889 | - int incx, |
| 3890 | - hipblasDoubleComplex* y, |
| 3891 | - int incy) |
| 3892 | -{ |
| 3893 | - return hipblasZaxpy(handle, |
| 3894 | - n, |
| 3895 | - (const hipDoubleComplex*)alpha, |
| 3896 | - (const hipDoubleComplex*)x, |
| 3897 | - incx, |
| 3898 | - (hipDoubleComplex*)y, |
| 3899 | - incy); |
| 3900 | -} |
| 3901 | - |
| 3902 | -hipblasStatus_t hipblasCaxpyCast_64(hipblasHandle_t handle, |
| 3903 | - int64_t n, |
| 3904 | - const hipblasComplex* alpha, |
| 3905 | - const hipblasComplex* x, |
| 3906 | - int64_t incx, |
| 3907 | - hipblasComplex* y, |
| 3908 | - int64_t incy) |
| 3909 | -{ |
| 3910 | - return hipblasCaxpy_64( |
| 3911 | - handle, n, (const hipComplex*)alpha, (const hipComplex*)x, incx, (hipComplex*)y, incy); |
| 3912 | -} |
| 3913 | - |
| 3914 | -hipblasStatus_t hipblasZaxpyCast_64(hipblasHandle_t handle, |
| 3915 | - int64_t n, |
| 3916 | - const hipblasDoubleComplex* alpha, |
| 3917 | - const hipblasDoubleComplex* x, |
| 3918 | - int64_t incx, |
| 3919 | - hipblasDoubleComplex* y, |
| 3920 | - int64_t incy) |
| 3921 | -{ |
| 3922 | - return hipblasZaxpy_64(handle, |
| 3923 | - n, |
| 3924 | - (const hipDoubleComplex*)alpha, |
| 3925 | - (const hipDoubleComplex*)x, |
| 3926 | - incx, |
| 3927 | - (hipDoubleComplex*)y, |
| 3928 | - incy); |
| 3929 | -} |
| 3930 | - |
| 3931 | -// axpy_batched |
| 3932 | -hipblasStatus_t hipblasCaxpyBatchedCast(hipblasHandle_t handle, |
| 3933 | - int n, |
| 3934 | - const hipblasComplex* alpha, |
| 3935 | - const hipblasComplex* const x[], |
| 3936 | - int incx, |
| 3937 | - hipblasComplex* const y[], |
| 3938 | - int incy, |
| 3939 | - int batch_count) |
| 3940 | -{ |
| 3941 | - return hipblasCaxpyBatched(handle, |
| 3942 | - n, |
| 3943 | - (const hipComplex*)alpha, |
| 3944 | - (const hipComplex* const*)x, |
| 3945 | - incx, |
| 3946 | - (hipComplex* const*)y, |
| 3947 | - incy, |
| 3948 | - batch_count); |
| 3949 | -} |
| 3950 | - |
| 3951 | -hipblasStatus_t hipblasZaxpyBatchedCast(hipblasHandle_t handle, |
| 3952 | - int n, |
| 3953 | - const hipblasDoubleComplex* alpha, |
| 3954 | - const hipblasDoubleComplex* const x[], |
| 3955 | - int incx, |
| 3956 | - hipblasDoubleComplex* const y[], |
| 3957 | - int incy, |
| 3958 | - int batch_count) |
| 3959 | -{ |
| 3960 | - return hipblasZaxpyBatched(handle, |
| 3961 | - n, |
| 3962 | - (const hipDoubleComplex*)alpha, |
| 3963 | - (const hipDoubleComplex* const*)x, |
| 3964 | - incx, |
| 3965 | - (hipDoubleComplex* const*)y, |
| 3966 | - incy, |
| 3967 | - batch_count); |
| 3968 | -} |
| 3969 | - |
| 3970 | -hipblasStatus_t hipblasCaxpyBatchedCast_64(hipblasHandle_t handle, |
| 3971 | - int64_t n, |
| 3972 | - const hipblasComplex* alpha, |
| 3973 | - const hipblasComplex* const x[], |
| 3974 | - int64_t incx, |
| 3975 | - hipblasComplex* const y[], |
| 3976 | - int64_t incy, |
| 3977 | - int64_t batch_count) |
| 3978 | -{ |
| 3979 | - return hipblasCaxpyBatched_64(handle, |
| 3980 | - n, |
| 3981 | - (const hipComplex*)alpha, |
| 3982 | - (const hipComplex* const*)x, |
| 3983 | - incx, |
| 3984 | - (hipComplex* const*)y, |
| 3985 | - incy, |
| 3986 | - batch_count); |
| 3987 | -} |
| 3988 | - |
| 3989 | -hipblasStatus_t hipblasZaxpyBatchedCast_64(hipblasHandle_t handle, |
| 3990 | - int64_t n, |
| 3991 | - const hipblasDoubleComplex* alpha, |
| 3992 | - const hipblasDoubleComplex* const x[], |
| 3993 | - int64_t incx, |
| 3994 | - hipblasDoubleComplex* const y[], |
| 3995 | - int64_t incy, |
| 3996 | - int64_t batch_count) |
| 3997 | -{ |
| 3998 | - return hipblasZaxpyBatched_64(handle, |
| 3999 | - n, |
| 4000 | - (const hipDoubleComplex*)alpha, |
| 4001 | - (const hipDoubleComplex* const*)x, |
| 4002 | - incx, |
| 4003 | - (hipDoubleComplex* const*)y, |
| 4004 | - incy, |
| 4005 | - batch_count); |
| 4006 | -} |
| 4007 | - |
| 4008 | -// axpy_strided_batched |
| 4009 | -hipblasStatus_t hipblasCaxpyStridedBatchedCast(hipblasHandle_t handle, |
| 4010 | - int n, |
| 4011 | - const hipblasComplex* alpha, |
| 4012 | - const hipblasComplex* x, |
| 4013 | - int incx, |
| 4014 | - hipblasStride stridex, |
| 4015 | - hipblasComplex* y, |
| 4016 | - int incy, |
| 4017 | - hipblasStride stridey, |
| 4018 | - int batch_count) |
| 4019 | -{ |
| 4020 | - return hipblasCaxpyStridedBatched(handle, |
| 4021 | - n, |
| 4022 | - (const hipComplex*)alpha, |
| 4023 | - (const hipComplex*)x, |
| 4024 | - incx, |
| 4025 | - stridex, |
| 4026 | - (hipComplex*)y, |
| 4027 | - incy, |
| 4028 | - stridey, |
| 4029 | - batch_count); |
| 4030 | -} |
| 4031 | - |
| 4032 | -hipblasStatus_t hipblasZaxpyStridedBatchedCast(hipblasHandle_t handle, |
| 4033 | - int n, |
| 4034 | - const hipblasDoubleComplex* alpha, |
| 4035 | - const hipblasDoubleComplex* x, |
| 4036 | - int incx, |
| 4037 | - hipblasStride stridex, |
| 4038 | - hipblasDoubleComplex* y, |
| 4039 | - int incy, |
| 4040 | - hipblasStride stridey, |
| 4041 | - int batch_count) |
| 4042 | -{ |
| 4043 | - return hipblasZaxpyStridedBatched(handle, |
| 4044 | - n, |
| 4045 | - (const hipDoubleComplex*)alpha, |
| 4046 | - (const hipDoubleComplex*)x, |
| 4047 | - incx, |
| 4048 | - stridex, |
| 4049 | - (hipDoubleComplex*)y, |
| 4050 | - incy, |
| 4051 | - stridey, |
| 4052 | - batch_count); |
| 4053 | -} |
| 4054 | - |
| 4055 | -hipblasStatus_t hipblasCaxpyStridedBatchedCast_64(hipblasHandle_t handle, |
| 4056 | - int64_t n, |
| 4057 | - const hipblasComplex* alpha, |
| 4058 | - const hipblasComplex* x, |
| 4059 | - int64_t incx, |
| 4060 | - hipblasStride stridex, |
| 4061 | - hipblasComplex* y, |
| 4062 | - int64_t incy, |
| 4063 | - hipblasStride stridey, |
| 4064 | - int64_t batch_count) |
| 4065 | -{ |
| 4066 | - return hipblasCaxpyStridedBatched_64(handle, |
| 4067 | - n, |
| 4068 | - (const hipComplex*)alpha, |
| 4069 | - (const hipComplex*)x, |
| 4070 | - incx, |
| 4071 | - stridex, |
| 4072 | - (hipComplex*)y, |
| 4073 | - incy, |
| 4074 | - stridey, |
| 4075 | - batch_count); |
| 4076 | -} |
| 4077 | - |
| 4078 | -hipblasStatus_t hipblasZaxpyStridedBatchedCast_64(hipblasHandle_t handle, |
| 4079 | - int64_t n, |
| 4080 | - const hipblasDoubleComplex* alpha, |
| 4081 | - const hipblasDoubleComplex* x, |
| 4082 | - int64_t incx, |
| 4083 | - hipblasStride stridex, |
| 4084 | - hipblasDoubleComplex* y, |
| 4085 | - int64_t incy, |
| 4086 | - hipblasStride stridey, |
| 4087 | - int64_t batch_count) |
| 4088 | -{ |
| 4089 | - return hipblasZaxpyStridedBatched_64(handle, |
| 4090 | - n, |
| 4091 | - (const hipDoubleComplex*)alpha, |
| 4092 | - (const hipDoubleComplex*)x, |
| 4093 | - incx, |
| 4094 | - stridex, |
| 4095 | - (hipDoubleComplex*)y, |
| 4096 | - incy, |
| 4097 | - stridey, |
| 4098 | - batch_count); |
| 4099 | -} |
| 4100 | - |
| 4101 | -// swap |
| 4102 | -hipblasStatus_t hipblasCswapCast( |
| 4103 | - hipblasHandle_t handle, int n, hipblasComplex* x, int incx, hipblasComplex* y, int incy) |
| 4104 | -{ |
| 4105 | - return hipblasCswap(handle, n, (hipComplex*)x, incx, (hipComplex*)y, incy); |
| 4106 | -} |
| 4107 | - |
| 4108 | -hipblasStatus_t hipblasZswapCast(hipblasHandle_t handle, |
| 4109 | - int n, |
| 4110 | - hipblasDoubleComplex* x, |
| 4111 | - int incx, |
| 4112 | - hipblasDoubleComplex* y, |
| 4113 | - int incy) |
| 4114 | -{ |
| 4115 | - return hipblasZswap(handle, n, (hipDoubleComplex*)x, incx, (hipDoubleComplex*)y, incy); |
| 4116 | -} |
| 4117 | - |
| 4118 | -// swap_64 |
| 4119 | -hipblasStatus_t hipblasCswapCast_64(hipblasHandle_t handle, |
| 4120 | - int64_t n, |
| 4121 | - hipblasComplex* x, |
| 4122 | - int64_t incx, |
| 4123 | - hipblasComplex* y, |
| 4124 | - int64_t incy) |
| 4125 | -{ |
| 4126 | - return hipblasCswap_64(handle, n, (hipComplex*)x, incx, (hipComplex*)y, incy); |
| 4127 | -} |
| 4128 | - |
| 4129 | -hipblasStatus_t hipblasZswapCast_64(hipblasHandle_t handle, |
| 4130 | - int64_t n, |
| 4131 | - hipblasDoubleComplex* x, |
| 4132 | - int64_t incx, |
| 4133 | - hipblasDoubleComplex* y, |
| 4134 | - int64_t incy) |
| 4135 | -{ |
| 4136 | - return hipblasZswap_64(handle, n, (hipDoubleComplex*)x, incx, (hipDoubleComplex*)y, incy); |
| 4137 | -} |
| 4138 | - |
| 4139 | -// swap_batched |
| 4140 | -hipblasStatus_t hipblasCswapBatchedCast(hipblasHandle_t handle, |
| 4141 | - int n, |
| 4142 | - hipblasComplex* const x[], |
| 4143 | - int incx, |
| 4144 | - hipblasComplex* const y[], |
| 4145 | - int incy, |
| 4146 | - int batch_count) |
| 4147 | -{ |
| 4148 | - return hipblasCswapBatched( |
| 4149 | - handle, n, (hipComplex* const*)x, incx, (hipComplex* const*)y, incy, batch_count); |
| 4150 | -} |
| 4151 | - |
| 4152 | -hipblasStatus_t hipblasZswapBatchedCast(hipblasHandle_t handle, |
| 4153 | - int n, |
| 4154 | - hipblasDoubleComplex* const x[], |
| 4155 | - int incx, |
| 4156 | - hipblasDoubleComplex* const y[], |
| 4157 | - int incy, |
| 4158 | - int batch_count) |
| 4159 | -{ |
| 4160 | - return hipblasZswapBatched(handle, |
| 4161 | - n, |
| 4162 | - (hipDoubleComplex* const*)x, |
| 4163 | - incx, |
| 4164 | - (hipDoubleComplex* const*)y, |
| 4165 | - incy, |
| 4166 | - batch_count); |
| 4167 | -} |
| 4168 | - |
| 4169 | -// swap_batched_64 |
| 4170 | -hipblasStatus_t hipblasCswapBatchedCast_64(hipblasHandle_t handle, |
| 4171 | - int64_t n, |
| 4172 | - hipblasComplex* const x[], |
| 4173 | - int64_t incx, |
| 4174 | - hipblasComplex* const y[], |
| 4175 | - int64_t incy, |
| 4176 | - int64_t batch_count) |
| 4177 | -{ |
| 4178 | - return hipblasCswapBatched_64( |
| 4179 | - handle, n, (hipComplex* const*)x, incx, (hipComplex* const*)y, incy, batch_count); |
| 4180 | -} |
| 4181 | - |
| 4182 | -hipblasStatus_t hipblasZswapBatchedCast_64(hipblasHandle_t handle, |
| 4183 | - int64_t n, |
| 4184 | - hipblasDoubleComplex* const x[], |
| 4185 | - int64_t incx, |
| 4186 | - hipblasDoubleComplex* const y[], |
| 4187 | - int64_t incy, |
| 4188 | - int64_t batch_count) |
| 4189 | -{ |
| 4190 | - return hipblasZswapBatched_64(handle, |
| 4191 | - n, |
| 4192 | - (hipDoubleComplex* const*)x, |
| 4193 | - incx, |
| 4194 | - (hipDoubleComplex* const*)y, |
| 4195 | - incy, |
| 4196 | - batch_count); |
| 4197 | -} |
| 4198 | - |
| 4199 | -// swap_strided_batched |
| 4200 | -hipblasStatus_t hipblasCswapStridedBatchedCast(hipblasHandle_t handle, |
| 4201 | - int n, |
| 4202 | - hipblasComplex* x, |
| 4203 | - int incx, |
| 4204 | - hipblasStride stridex, |
| 4205 | - hipblasComplex* y, |
| 4206 | - int incy, |
| 4207 | - hipblasStride stridey, |
| 4208 | - int batch_count) |
| 4209 | -{ |
| 4210 | - return hipblasCswapStridedBatched( |
| 4211 | - handle, n, (hipComplex*)x, incx, stridex, (hipComplex*)y, incy, stridey, batch_count); |
| 4212 | -} |
| 4213 | - |
| 4214 | -hipblasStatus_t hipblasZswapStridedBatchedCast(hipblasHandle_t handle, |
| 4215 | - int n, |
| 4216 | - hipblasDoubleComplex* x, |
| 4217 | - int incx, |
| 4218 | - hipblasStride stridex, |
| 4219 | - hipblasDoubleComplex* y, |
| 4220 | - int incy, |
| 4221 | - hipblasStride stridey, |
| 4222 | - int batch_count) |
| 4223 | -{ |
| 4224 | - return hipblasZswapStridedBatched(handle, |
| 4225 | - n, |
| 4226 | - (hipDoubleComplex*)x, |
| 4227 | - incx, |
| 4228 | - stridex, |
| 4229 | - (hipDoubleComplex*)y, |
| 4230 | - incy, |
| 4231 | - stridey, |
| 4232 | - batch_count); |
| 4233 | -} |
| 4234 | - |
| 4235 | -// swap_strided_batched_64 |
| 4236 | -hipblasStatus_t hipblasCswapStridedBatchedCast_64(hipblasHandle_t handle, |
| 4237 | - int64_t n, |
| 4238 | - hipblasComplex* x, |
| 4239 | - int64_t incx, |
| 4240 | - hipblasStride stridex, |
| 4241 | - hipblasComplex* y, |
| 4242 | - int64_t incy, |
| 4243 | - hipblasStride stridey, |
| 4244 | - int64_t batch_count) |
| 4245 | -{ |
| 4246 | - return hipblasCswapStridedBatched_64( |
| 4247 | - handle, n, (hipComplex*)x, incx, stridex, (hipComplex*)y, incy, stridey, batch_count); |
| 4248 | -} |
| 4249 | - |
| 4250 | -hipblasStatus_t hipblasZswapStridedBatchedCast_64(hipblasHandle_t handle, |
| 4251 | - int64_t n, |
| 4252 | - hipblasDoubleComplex* x, |
| 4253 | - int64_t incx, |
| 4254 | - hipblasStride stridex, |
| 4255 | - hipblasDoubleComplex* y, |
| 4256 | - int64_t incy, |
| 4257 | - hipblasStride stridey, |
| 4258 | - int64_t batch_count) |
| 4259 | -{ |
| 4260 | - return hipblasZswapStridedBatched_64(handle, |
| 4261 | - n, |
| 4262 | - (hipDoubleComplex*)x, |
| 4263 | - incx, |
| 4264 | - stridex, |
| 4265 | - (hipDoubleComplex*)y, |
| 4266 | - incy, |
| 4267 | - stridey, |
| 4268 | - batch_count); |
| 4269 | -} |
| 4270 | - |
| 4271 | -// copy |
| 4272 | -hipblasStatus_t hipblasCcopyCast( |
| 4273 | - hipblasHandle_t handle, int n, const hipblasComplex* x, int incx, hipblasComplex* y, int incy) |
| 4274 | -{ |
| 4275 | - return hipblasCcopy(handle, n, (const hipComplex*)x, incx, (hipComplex*)y, incy); |
| 4276 | -} |
| 4277 | - |
| 4278 | -hipblasStatus_t hipblasZcopyCast(hipblasHandle_t handle, |
| 4279 | - int n, |
| 4280 | - const hipblasDoubleComplex* x, |
| 4281 | - int incx, |
| 4282 | - hipblasDoubleComplex* y, |
| 4283 | - int incy) |
| 4284 | -{ |
| 4285 | - return hipblasZcopy(handle, n, (const hipDoubleComplex*)x, incx, (hipDoubleComplex*)y, incy); |
| 4286 | -} |
| 4287 | - |
| 4288 | -hipblasStatus_t hipblasCcopyCast_64(hipblasHandle_t handle, |
| 4289 | - int64_t n, |
| 4290 | - const hipblasComplex* x, |
| 4291 | - int64_t incx, |
| 4292 | - hipblasComplex* y, |
| 4293 | - int64_t incy) |
| 4294 | -{ |
| 4295 | - return hipblasCcopy_64(handle, n, (const hipComplex*)x, incx, (hipComplex*)y, incy); |
| 4296 | -} |
| 4297 | - |
| 4298 | -hipblasStatus_t hipblasZcopyCast_64(hipblasHandle_t handle, |
| 4299 | - int64_t n, |
| 4300 | - const hipblasDoubleComplex* x, |
| 4301 | - int64_t incx, |
| 4302 | - hipblasDoubleComplex* y, |
| 4303 | - int64_t incy) |
| 4304 | -{ |
| 4305 | - return hipblasZcopy_64(handle, n, (const hipDoubleComplex*)x, incx, (hipDoubleComplex*)y, incy); |
| 4306 | -} |
| 4307 | - |
| 4308 | -// batched |
| 4309 | -hipblasStatus_t hipblasCcopyBatchedCast(hipblasHandle_t handle, |
| 4310 | - int n, |
| 4311 | - const hipblasComplex* const x[], |
| 4312 | - int incx, |
| 4313 | - hipblasComplex* const y[], |
| 4314 | - int incy, |
| 4315 | - int batch_count) |
| 4316 | -{ |
| 4317 | - return hipblasCcopyBatched( |
| 4318 | - handle, n, (const hipComplex* const*)x, incx, (hipComplex* const*)y, incy, batch_count); |
| 4319 | -} |
| 4320 | - |
| 4321 | -hipblasStatus_t hipblasZcopyBatchedCast(hipblasHandle_t handle, |
| 4322 | - int n, |
| 4323 | - const hipblasDoubleComplex* const x[], |
| 4324 | - int incx, |
| 4325 | - hipblasDoubleComplex* const y[], |
| 4326 | - int incy, |
| 4327 | - int batch_count) |
| 4328 | -{ |
| 4329 | - return hipblasZcopyBatched(handle, |
| 4330 | - n, |
| 4331 | - (const hipDoubleComplex* const*)x, |
| 4332 | - incx, |
| 4333 | - (hipDoubleComplex* const*)y, |
| 4334 | - incy, |
| 4335 | - batch_count); |
| 4336 | -} |
| 4337 | - |
| 4338 | -hipblasStatus_t hipblasCcopyBatchedCast_64(hipblasHandle_t handle, |
| 4339 | - int64_t n, |
| 4340 | - const hipblasComplex* const x[], |
| 4341 | - int64_t incx, |
| 4342 | - hipblasComplex* const y[], |
| 4343 | - int64_t incy, |
| 4344 | - int64_t batch_count) |
| 4345 | -{ |
| 4346 | - return hipblasCcopyBatched_64( |
| 4347 | - handle, n, (const hipComplex* const*)x, incx, (hipComplex* const*)y, incy, batch_count); |
| 4348 | -} |
| 4349 | - |
| 4350 | -hipblasStatus_t hipblasZcopyBatchedCast_64(hipblasHandle_t handle, |
| 4351 | - int64_t n, |
| 4352 | - const hipblasDoubleComplex* const x[], |
| 4353 | - int64_t incx, |
| 4354 | - hipblasDoubleComplex* const y[], |
| 4355 | - int64_t incy, |
| 4356 | - int64_t batch_count) |
| 4357 | -{ |
| 4358 | - return hipblasZcopyBatched_64(handle, |
| 4359 | - n, |
| 4360 | - (const hipDoubleComplex* const*)x, |
| 4361 | - incx, |
| 4362 | - (hipDoubleComplex* const*)y, |
| 4363 | - incy, |
| 4364 | - batch_count); |
| 4365 | -} |
| 4366 | - |
| 4367 | -// strided_batched |
| 4368 | -hipblasStatus_t hipblasCcopyStridedBatchedCast(hipblasHandle_t handle, |
| 4369 | - int n, |
| 4370 | - const hipblasComplex* x, |
| 4371 | - int incx, |
| 4372 | - hipblasStride stridex, |
| 4373 | - hipblasComplex* y, |
| 4374 | - int incy, |
| 4375 | - hipblasStride stridey, |
| 4376 | - int batch_count) |
| 4377 | -{ |
| 4378 | - return hipblasCcopyStridedBatched( |
| 4379 | - handle, n, (const hipComplex*)x, incx, stridex, (hipComplex*)y, incy, stridey, batch_count); |
| 4380 | -} |
| 4381 | - |
| 4382 | -hipblasStatus_t hipblasZcopyStridedBatchedCast(hipblasHandle_t handle, |
| 4383 | - int n, |
| 4384 | - const hipblasDoubleComplex* x, |
| 4385 | - int incx, |
| 4386 | - hipblasStride stridex, |
| 4387 | - hipblasDoubleComplex* y, |
| 4388 | - int incy, |
| 4389 | - hipblasStride stridey, |
| 4390 | - int batch_count) |
| 4391 | -{ |
| 4392 | - return hipblasZcopyStridedBatched(handle, |
| 4393 | - n, |
| 4394 | - (const hipDoubleComplex*)x, |
| 4395 | - incx, |
| 4396 | - stridex, |
| 4397 | - (hipDoubleComplex*)y, |
| 4398 | - incy, |
| 4399 | - stridey, |
| 4400 | - batch_count); |
| 4401 | -} |
| 4402 | - |
| 4403 | -hipblasStatus_t hipblasCcopyStridedBatchedCast_64(hipblasHandle_t handle, |
| 4404 | - int64_t n, |
| 4405 | - const hipblasComplex* x, |
| 4406 | - int64_t incx, |
| 4407 | - hipblasStride stridex, |
| 4408 | - hipblasComplex* y, |
| 4409 | - int64_t incy, |
| 4410 | - hipblasStride stridey, |
| 4411 | - int64_t batch_count) |
| 4412 | -{ |
| 4413 | - return hipblasCcopyStridedBatched_64( |
| 4414 | - handle, n, (const hipComplex*)x, incx, stridex, (hipComplex*)y, incy, stridey, batch_count); |
| 4415 | -} |
| 4416 | - |
| 4417 | -hipblasStatus_t hipblasZcopyStridedBatchedCast_64(hipblasHandle_t handle, |
| 4418 | - int64_t n, |
| 4419 | - const hipblasDoubleComplex* x, |
| 4420 | - int64_t incx, |
| 4421 | - hipblasStride stridex, |
| 4422 | - hipblasDoubleComplex* y, |
| 4423 | - int64_t incy, |
| 4424 | - hipblasStride stridey, |
| 4425 | - int64_t batch_count) |
| 4426 | -{ |
| 4427 | - return hipblasZcopyStridedBatched_64(handle, |
| 4428 | - n, |
| 4429 | - (const hipDoubleComplex*)x, |
| 4430 | - incx, |
| 4431 | - stridex, |
| 4432 | - (hipDoubleComplex*)y, |
| 4433 | - incy, |
| 4434 | - stridey, |
| 4435 | - batch_count); |
| 4436 | -} |
| 4437 | - |
| 4438 | -// dot |
| 4439 | -hipblasStatus_t hipblasCdotuCast(hipblasHandle_t handle, |
| 4440 | - int n, |
| 4441 | - const hipblasComplex* x, |
| 4442 | - int incx, |
| 4443 | - const hipblasComplex* y, |
| 4444 | - int incy, |
| 4445 | - hipblasComplex* result) |
| 4446 | -{ |
| 4447 | - return hipblasCdotu( |
| 4448 | - handle, n, (const hipComplex*)x, incx, (const hipComplex*)y, incy, (hipComplex*)result); |
| 4449 | -} |
| 4450 | - |
| 4451 | -hipblasStatus_t hipblasZdotuCast(hipblasHandle_t handle, |
| 4452 | - int n, |
| 4453 | - const hipblasDoubleComplex* x, |
| 4454 | - int incx, |
| 4455 | - const hipblasDoubleComplex* y, |
| 4456 | - int incy, |
| 4457 | - hipblasDoubleComplex* result) |
| 4458 | -{ |
| 4459 | - return hipblasZdotu(handle, |
| 4460 | - n, |
| 4461 | - (const hipDoubleComplex*)x, |
| 4462 | - incx, |
| 4463 | - (const hipDoubleComplex*)y, |
| 4464 | - incy, |
| 4465 | - (hipDoubleComplex*)result); |
| 4466 | -} |
| 4467 | - |
| 4468 | -hipblasStatus_t hipblasCdotcCast(hipblasHandle_t handle, |
| 4469 | - int n, |
| 4470 | - const hipblasComplex* x, |
| 4471 | - int incx, |
| 4472 | - const hipblasComplex* y, |
| 4473 | - int incy, |
| 4474 | - hipblasComplex* result) |
| 4475 | -{ |
| 4476 | - return hipblasCdotc( |
| 4477 | - handle, n, (const hipComplex*)x, incx, (const hipComplex*)y, incy, (hipComplex*)result); |
| 4478 | -} |
| 4479 | - |
| 4480 | -hipblasStatus_t hipblasZdotcCast(hipblasHandle_t handle, |
| 4481 | - int n, |
| 4482 | - const hipblasDoubleComplex* x, |
| 4483 | - int incx, |
| 4484 | - const hipblasDoubleComplex* y, |
| 4485 | - int incy, |
| 4486 | - hipblasDoubleComplex* result) |
| 4487 | -{ |
| 4488 | - return hipblasZdotc(handle, |
| 4489 | - n, |
| 4490 | - (const hipDoubleComplex*)x, |
| 4491 | - incx, |
| 4492 | - (const hipDoubleComplex*)y, |
| 4493 | - incy, |
| 4494 | - (hipDoubleComplex*)result); |
| 4495 | -} |
| 4496 | - |
| 4497 | -hipblasStatus_t hipblasCdotuCast_64(hipblasHandle_t handle, |
| 4498 | - int64_t n, |
| 4499 | - const hipblasComplex* x, |
| 4500 | - int64_t incx, |
| 4501 | - const hipblasComplex* y, |
| 4502 | - int64_t incy, |
| 4503 | - hipblasComplex* result) |
| 4504 | -{ |
| 4505 | - return hipblasCdotu_64( |
| 4506 | - handle, n, (const hipComplex*)x, incx, (const hipComplex*)y, incy, (hipComplex*)result); |
| 4507 | -} |
| 4508 | - |
| 4509 | -hipblasStatus_t hipblasZdotuCast_64(hipblasHandle_t handle, |
| 4510 | - int64_t n, |
| 4511 | - const hipblasDoubleComplex* x, |
| 4512 | - int64_t incx, |
| 4513 | - const hipblasDoubleComplex* y, |
| 4514 | - int64_t incy, |
| 4515 | - hipblasDoubleComplex* result) |
| 4516 | -{ |
| 4517 | - return hipblasZdotu_64(handle, |
| 4518 | - n, |
| 4519 | - (const hipDoubleComplex*)x, |
| 4520 | - incx, |
| 4521 | - (const hipDoubleComplex*)y, |
| 4522 | - incy, |
| 4523 | - (hipDoubleComplex*)result); |
| 4524 | -} |
| 4525 | - |
| 4526 | -hipblasStatus_t hipblasCdotcCast_64(hipblasHandle_t handle, |
| 4527 | - int64_t n, |
| 4528 | - const hipblasComplex* x, |
| 4529 | - int64_t incx, |
| 4530 | - const hipblasComplex* y, |
| 4531 | - int64_t incy, |
| 4532 | - hipblasComplex* result) |
| 4533 | -{ |
| 4534 | - return hipblasCdotc_64( |
| 4535 | - handle, n, (const hipComplex*)x, incx, (const hipComplex*)y, incy, (hipComplex*)result); |
| 4536 | -} |
| 4537 | - |
| 4538 | -hipblasStatus_t hipblasZdotcCast_64(hipblasHandle_t handle, |
| 4539 | - int64_t n, |
| 4540 | - const hipblasDoubleComplex* x, |
| 4541 | - int64_t incx, |
| 4542 | - const hipblasDoubleComplex* y, |
| 4543 | - int64_t incy, |
| 4544 | - hipblasDoubleComplex* result) |
| 4545 | -{ |
| 4546 | - return hipblasZdotc_64(handle, |
| 4547 | - n, |
| 4548 | - (const hipDoubleComplex*)x, |
| 4549 | - incx, |
| 4550 | - (const hipDoubleComplex*)y, |
| 4551 | - incy, |
| 4552 | - (hipDoubleComplex*)result); |
| 4553 | -} |
| 4554 | - |
| 4555 | -// dot_batched |
| 4556 | -hipblasStatus_t hipblasCdotuBatchedCast(hipblasHandle_t handle, |
| 4557 | - int n, |
| 4558 | - const hipblasComplex* const x[], |
| 4559 | - int incx, |
| 4560 | - const hipblasComplex* const y[], |
| 4561 | - int incy, |
| 4562 | - int batch_count, |
| 4563 | - hipblasComplex* result) |
| 4564 | -{ |
| 4565 | - return hipblasCdotuBatched(handle, |
| 4566 | - n, |
| 4567 | - (const hipComplex* const*)x, |
| 4568 | - incx, |
| 4569 | - (const hipComplex* const*)y, |
| 4570 | - incy, |
| 4571 | - batch_count, |
| 4572 | - (hipComplex*)result); |
| 4573 | -} |
| 4574 | - |
| 4575 | -hipblasStatus_t hipblasCdotcBatchedCast(hipblasHandle_t handle, |
| 4576 | - int n, |
| 4577 | - const hipblasComplex* const x[], |
| 4578 | - int incx, |
| 4579 | - const hipblasComplex* const y[], |
| 4580 | - int incy, |
| 4581 | - int batch_count, |
| 4582 | - hipblasComplex* result) |
| 4583 | -{ |
| 4584 | - return hipblasCdotcBatched(handle, |
| 4585 | - n, |
| 4586 | - (const hipComplex* const*)x, |
| 4587 | - incx, |
| 4588 | - (const hipComplex* const*)y, |
| 4589 | - incy, |
| 4590 | - batch_count, |
| 4591 | - (hipComplex*)result); |
| 4592 | -} |
| 4593 | - |
| 4594 | -hipblasStatus_t hipblasZdotuBatchedCast(hipblasHandle_t handle, |
| 4595 | - int n, |
| 4596 | - const hipblasDoubleComplex* const x[], |
| 4597 | - int incx, |
| 4598 | - const hipblasDoubleComplex* const y[], |
| 4599 | - int incy, |
| 4600 | - int batch_count, |
| 4601 | - hipblasDoubleComplex* result) |
| 4602 | -{ |
| 4603 | - return hipblasZdotuBatched(handle, |
| 4604 | - n, |
| 4605 | - (const hipDoubleComplex* const*)x, |
| 4606 | - incx, |
| 4607 | - (const hipDoubleComplex* const*)y, |
| 4608 | - incy, |
| 4609 | - batch_count, |
| 4610 | - (hipDoubleComplex*)result); |
| 4611 | -} |
| 4612 | - |
| 4613 | -hipblasStatus_t hipblasZdotcBatchedCast(hipblasHandle_t handle, |
| 4614 | - int n, |
| 4615 | - const hipblasDoubleComplex* const x[], |
| 4616 | - int incx, |
| 4617 | - const hipblasDoubleComplex* const y[], |
| 4618 | - int incy, |
| 4619 | - int batch_count, |
| 4620 | - hipblasDoubleComplex* result) |
| 4621 | -{ |
| 4622 | - return hipblasZdotcBatched(handle, |
| 4623 | - n, |
| 4624 | - (const hipDoubleComplex* const*)x, |
| 4625 | - incx, |
| 4626 | - (const hipDoubleComplex* const*)y, |
| 4627 | - incy, |
| 4628 | - batch_count, |
| 4629 | - (hipDoubleComplex*)result); |
| 4630 | -} |
| 4631 | - |
| 4632 | -hipblasStatus_t hipblasCdotuBatchedCast_64(hipblasHandle_t handle, |
| 4633 | - int64_t n, |
| 4634 | - const hipblasComplex* const x[], |
| 4635 | - int64_t incx, |
| 4636 | - const hipblasComplex* const y[], |
| 4637 | - int64_t incy, |
| 4638 | - int64_t batch_count, |
| 4639 | - hipblasComplex* result) |
| 4640 | -{ |
| 4641 | - return hipblasCdotuBatched_64(handle, |
| 4642 | - n, |
| 4643 | - (const hipComplex* const*)x, |
| 4644 | - incx, |
| 4645 | - (const hipComplex* const*)y, |
| 4646 | - incy, |
| 4647 | - batch_count, |
| 4648 | - (hipComplex*)result); |
| 4649 | -} |
| 4650 | - |
| 4651 | -hipblasStatus_t hipblasCdotcBatchedCast_64(hipblasHandle_t handle, |
| 4652 | - int64_t n, |
| 4653 | - const hipblasComplex* const x[], |
| 4654 | - int64_t incx, |
| 4655 | - const hipblasComplex* const y[], |
| 4656 | - int64_t incy, |
| 4657 | - int64_t batch_count, |
| 4658 | - hipblasComplex* result) |
| 4659 | -{ |
| 4660 | - return hipblasCdotcBatched_64(handle, |
| 4661 | - n, |
| 4662 | - (const hipComplex* const*)x, |
| 4663 | - incx, |
| 4664 | - (const hipComplex* const*)y, |
| 4665 | - incy, |
| 4666 | - batch_count, |
| 4667 | - (hipComplex*)result); |
| 4668 | -} |
| 4669 | - |
| 4670 | -hipblasStatus_t hipblasZdotuBatchedCast_64(hipblasHandle_t handle, |
| 4671 | - int64_t n, |
| 4672 | - const hipblasDoubleComplex* const x[], |
| 4673 | - int64_t incx, |
| 4674 | - const hipblasDoubleComplex* const y[], |
| 4675 | - int64_t incy, |
| 4676 | - int64_t batch_count, |
| 4677 | - hipblasDoubleComplex* result) |
| 4678 | -{ |
| 4679 | - return hipblasZdotuBatched_64(handle, |
| 4680 | - n, |
| 4681 | - (const hipDoubleComplex* const*)x, |
| 4682 | - incx, |
| 4683 | - (const hipDoubleComplex* const*)y, |
| 4684 | - incy, |
| 4685 | - batch_count, |
| 4686 | - (hipDoubleComplex*)result); |
| 4687 | -} |
| 4688 | - |
| 4689 | -hipblasStatus_t hipblasZdotcBatchedCast_64(hipblasHandle_t handle, |
| 4690 | - int64_t n, |
| 4691 | - const hipblasDoubleComplex* const x[], |
| 4692 | - int64_t incx, |
| 4693 | - const hipblasDoubleComplex* const y[], |
| 4694 | - int64_t incy, |
| 4695 | - int64_t batch_count, |
| 4696 | - hipblasDoubleComplex* result) |
| 4697 | -{ |
| 4698 | - return hipblasZdotcBatched_64(handle, |
| 4699 | - n, |
| 4700 | - (const hipDoubleComplex* const*)x, |
| 4701 | - incx, |
| 4702 | - (const hipDoubleComplex* const*)y, |
| 4703 | - incy, |
| 4704 | - batch_count, |
| 4705 | - (hipDoubleComplex*)result); |
| 4706 | -} |
| 4707 | - |
| 4708 | -// dot_strided_batched |
| 4709 | -hipblasStatus_t hipblasCdotuStridedBatchedCast(hipblasHandle_t handle, |
| 4710 | - int n, |
| 4711 | - const hipblasComplex* x, |
| 4712 | - int incx, |
| 4713 | - hipblasStride stridex, |
| 4714 | - const hipblasComplex* y, |
| 4715 | - int incy, |
| 4716 | - hipblasStride stridey, |
| 4717 | - int batch_count, |
| 4718 | - hipblasComplex* result) |
| 4719 | -{ |
| 4720 | - return hipblasCdotuStridedBatched(handle, |
| 4721 | - n, |
| 4722 | - (const hipComplex*)x, |
| 4723 | - incx, |
| 4724 | - stridex, |
| 4725 | - (const hipComplex*)y, |
| 4726 | - incy, |
| 4727 | - stridey, |
| 4728 | - batch_count, |
| 4729 | - (hipComplex*)result); |
| 4730 | -} |
| 4731 | - |
| 4732 | -hipblasStatus_t hipblasCdotcStridedBatchedCast(hipblasHandle_t handle, |
| 4733 | - int n, |
| 4734 | - const hipblasComplex* x, |
| 4735 | - int incx, |
| 4736 | - hipblasStride stridex, |
| 4737 | - const hipblasComplex* y, |
| 4738 | - int incy, |
| 4739 | - hipblasStride stridey, |
| 4740 | - int batch_count, |
| 4741 | - hipblasComplex* result) |
| 4742 | -{ |
| 4743 | - return hipblasCdotcStridedBatched(handle, |
| 4744 | - n, |
| 4745 | - (const hipComplex*)x, |
| 4746 | - incx, |
| 4747 | - stridex, |
| 4748 | - (const hipComplex*)y, |
| 4749 | - incy, |
| 4750 | - stridey, |
| 4751 | - batch_count, |
| 4752 | - (hipComplex*)result); |
| 4753 | -} |
| 4754 | - |
| 4755 | -hipblasStatus_t hipblasZdotuStridedBatchedCast(hipblasHandle_t handle, |
| 4756 | - int n, |
| 4757 | - const hipblasDoubleComplex* x, |
| 4758 | - int incx, |
| 4759 | - hipblasStride stridex, |
| 4760 | - const hipblasDoubleComplex* y, |
| 4761 | - int incy, |
| 4762 | - hipblasStride stridey, |
| 4763 | - int batch_count, |
| 4764 | - hipblasDoubleComplex* result) |
| 4765 | -{ |
| 4766 | - return hipblasZdotuStridedBatched(handle, |
| 4767 | - n, |
| 4768 | - (const hipDoubleComplex*)x, |
| 4769 | - incx, |
| 4770 | - stridex, |
| 4771 | - (const hipDoubleComplex*)y, |
| 4772 | - incy, |
| 4773 | - stridey, |
| 4774 | - batch_count, |
| 4775 | - (hipDoubleComplex*)result); |
| 4776 | -} |
| 4777 | - |
| 4778 | -hipblasStatus_t hipblasZdotcStridedBatchedCast(hipblasHandle_t handle, |
| 4779 | - int n, |
| 4780 | - const hipblasDoubleComplex* x, |
| 4781 | - int incx, |
| 4782 | - hipblasStride stridex, |
| 4783 | - const hipblasDoubleComplex* y, |
| 4784 | - int incy, |
| 4785 | - hipblasStride stridey, |
| 4786 | - int batch_count, |
| 4787 | - hipblasDoubleComplex* result) |
| 4788 | -{ |
| 4789 | - return hipblasZdotcStridedBatched(handle, |
| 4790 | - n, |
| 4791 | - (const hipDoubleComplex*)x, |
| 4792 | - incx, |
| 4793 | - stridex, |
| 4794 | - (const hipDoubleComplex*)y, |
| 4795 | - incy, |
| 4796 | - stridey, |
| 4797 | - batch_count, |
| 4798 | - (hipDoubleComplex*)result); |
| 4799 | -} |
| 4800 | - |
| 4801 | -hipblasStatus_t hipblasCdotuStridedBatchedCast_64(hipblasHandle_t handle, |
| 4802 | - int64_t n, |
| 4803 | - const hipblasComplex* x, |
| 4804 | - int64_t incx, |
| 4805 | - hipblasStride stridex, |
| 4806 | - const hipblasComplex* y, |
| 4807 | - int64_t incy, |
| 4808 | - hipblasStride stridey, |
| 4809 | - int64_t batch_count, |
| 4810 | - hipblasComplex* result) |
| 4811 | -{ |
| 4812 | - return hipblasCdotuStridedBatched_64(handle, |
| 4813 | - n, |
| 4814 | - (const hipComplex*)x, |
| 4815 | - incx, |
| 4816 | - stridex, |
| 4817 | - (const hipComplex*)y, |
| 4818 | - incy, |
| 4819 | - stridey, |
| 4820 | - batch_count, |
| 4821 | - (hipComplex*)result); |
| 4822 | -} |
| 4823 | - |
| 4824 | -hipblasStatus_t hipblasCdotcStridedBatchedCast_64(hipblasHandle_t handle, |
| 4825 | - int64_t n, |
| 4826 | - const hipblasComplex* x, |
| 4827 | - int64_t incx, |
| 4828 | - hipblasStride stridex, |
| 4829 | - const hipblasComplex* y, |
| 4830 | - int64_t incy, |
| 4831 | - hipblasStride stridey, |
| 4832 | - int64_t batch_count, |
| 4833 | - hipblasComplex* result) |
| 4834 | -{ |
| 4835 | - return hipblasCdotcStridedBatched_64(handle, |
| 4836 | - n, |
| 4837 | - (const hipComplex*)x, |
| 4838 | - incx, |
| 4839 | - stridex, |
| 4840 | - (const hipComplex*)y, |
| 4841 | - incy, |
| 4842 | - stridey, |
| 4843 | - batch_count, |
| 4844 | - (hipComplex*)result); |
| 4845 | -} |
| 4846 | - |
| 4847 | -hipblasStatus_t hipblasZdotuStridedBatchedCast_64(hipblasHandle_t handle, |
| 4848 | - int64_t n, |
| 4849 | - const hipblasDoubleComplex* x, |
| 4850 | - int64_t incx, |
| 4851 | - hipblasStride stridex, |
| 4852 | - const hipblasDoubleComplex* y, |
| 4853 | - int64_t incy, |
| 4854 | - hipblasStride stridey, |
| 4855 | - int64_t batch_count, |
| 4856 | - hipblasDoubleComplex* result) |
| 4857 | -{ |
| 4858 | - return hipblasZdotuStridedBatched_64(handle, |
| 4859 | - n, |
| 4860 | - (const hipDoubleComplex*)x, |
| 4861 | - incx, |
| 4862 | - stridex, |
| 4863 | - (const hipDoubleComplex*)y, |
| 4864 | - incy, |
| 4865 | - stridey, |
| 4866 | - batch_count, |
| 4867 | - (hipDoubleComplex*)result); |
| 4868 | -} |
| 4869 | - |
| 4870 | -hipblasStatus_t hipblasZdotcStridedBatchedCast_64(hipblasHandle_t handle, |
| 4871 | - int64_t n, |
| 4872 | - const hipblasDoubleComplex* x, |
| 4873 | - int64_t incx, |
| 4874 | - hipblasStride stridex, |
| 4875 | - const hipblasDoubleComplex* y, |
| 4876 | - int64_t incy, |
| 4877 | - hipblasStride stridey, |
| 4878 | - int64_t batch_count, |
| 4879 | - hipblasDoubleComplex* result) |
| 4880 | -{ |
| 4881 | - return hipblasZdotcStridedBatched_64(handle, |
| 4882 | - n, |
| 4883 | - (const hipDoubleComplex*)x, |
| 4884 | - incx, |
| 4885 | - stridex, |
| 4886 | - (const hipDoubleComplex*)y, |
| 4887 | - incy, |
| 4888 | - stridey, |
| 4889 | - batch_count, |
| 4890 | - (hipDoubleComplex*)result); |
| 4891 | -} |
| 4892 | - |
| 4893 | -// asum |
| 4894 | -hipblasStatus_t hipblasScasumCast( |
| 4895 | - hipblasHandle_t handle, int n, const hipblasComplex* x, int incx, float* result) |
| 4896 | -{ |
| 4897 | - return hipblasScasum(handle, n, (const hipComplex*)x, incx, result); |
| 4898 | -} |
| 4899 | - |
| 4900 | -hipblasStatus_t hipblasDzasumCast( |
| 4901 | - hipblasHandle_t handle, int n, const hipblasDoubleComplex* x, int incx, double* result) |
| 4902 | -{ |
| 4903 | - return hipblasDzasum(handle, n, (const hipDoubleComplex*)x, incx, result); |
| 4904 | -} |
| 4905 | - |
| 4906 | -hipblasStatus_t hipblasScasumCast_64( |
| 4907 | - hipblasHandle_t handle, int64_t n, const hipblasComplex* x, int64_t incx, float* result) |
| 4908 | -{ |
| 4909 | - return hipblasScasum_64(handle, n, (const hipComplex*)x, incx, result); |
| 4910 | -} |
| 4911 | - |
| 4912 | -hipblasStatus_t hipblasDzasumCast_64( |
| 4913 | - hipblasHandle_t handle, int64_t n, const hipblasDoubleComplex* x, int64_t incx, double* result) |
| 4914 | -{ |
| 4915 | - return hipblasDzasum_64(handle, n, (const hipDoubleComplex*)x, incx, result); |
| 4916 | -} |
| 4917 | - |
| 4918 | -// asum_batched |
| 4919 | -hipblasStatus_t hipblasScasumBatchedCast(hipblasHandle_t handle, |
| 4920 | - int n, |
| 4921 | - const hipblasComplex* const x[], |
| 4922 | - int incx, |
| 4923 | - int batch_count, |
| 4924 | - float* result) |
| 4925 | -{ |
| 4926 | - return hipblasScasumBatched(handle, n, (const hipComplex* const*)x, incx, batch_count, result); |
| 4927 | -} |
| 4928 | - |
| 4929 | -hipblasStatus_t hipblasDzasumBatchedCast(hipblasHandle_t handle, |
| 4930 | - int n, |
| 4931 | - const hipblasDoubleComplex* const x[], |
| 4932 | - int incx, |
| 4933 | - int batch_count, |
| 4934 | - double* result) |
| 4935 | -{ |
| 4936 | - return hipblasDzasumBatched( |
| 4937 | - handle, n, (const hipDoubleComplex* const*)x, incx, batch_count, result); |
| 4938 | -} |
| 4939 | - |
| 4940 | -hipblasStatus_t hipblasScasumBatchedCast_64(hipblasHandle_t handle, |
| 4941 | - int64_t n, |
| 4942 | - const hipblasComplex* const x[], |
| 4943 | - int64_t incx, |
| 4944 | - int64_t batch_count, |
| 4945 | - float* result) |
| 4946 | -{ |
| 4947 | - return hipblasScasumBatched_64( |
| 4948 | - handle, n, (const hipComplex* const*)x, incx, batch_count, result); |
| 4949 | -} |
| 4950 | - |
| 4951 | -hipblasStatus_t hipblasDzasumBatchedCast_64(hipblasHandle_t handle, |
| 4952 | - int64_t n, |
| 4953 | - const hipblasDoubleComplex* const x[], |
| 4954 | - int64_t incx, |
| 4955 | - int64_t batch_count, |
| 4956 | - double* result) |
| 4957 | -{ |
| 4958 | - return hipblasDzasumBatched_64( |
| 4959 | - handle, n, (const hipDoubleComplex* const*)x, incx, batch_count, result); |
| 4960 | -} |
| 4961 | - |
| 4962 | -// asum_strided_batched |
| 4963 | -hipblasStatus_t hipblasScasumStridedBatchedCast(hipblasHandle_t handle, |
| 4964 | - int n, |
| 4965 | - const hipblasComplex* x, |
| 4966 | - int incx, |
| 4967 | - hipblasStride stridex, |
| 4968 | - int batch_count, |
| 4969 | - float* result) |
| 4970 | -{ |
| 4971 | - return hipblasScasumStridedBatched( |
| 4972 | - handle, n, (const hipComplex*)x, incx, stridex, batch_count, result); |
| 4973 | -} |
| 4974 | - |
| 4975 | -hipblasStatus_t hipblasDzasumStridedBatchedCast(hipblasHandle_t handle, |
| 4976 | - int n, |
| 4977 | - const hipblasDoubleComplex* x, |
| 4978 | - int incx, |
| 4979 | - hipblasStride stridex, |
| 4980 | - int batch_count, |
| 4981 | - double* result) |
| 4982 | -{ |
| 4983 | - return hipblasDzasumStridedBatched( |
| 4984 | - handle, n, (const hipDoubleComplex*)x, incx, stridex, batch_count, result); |
| 4985 | -} |
| 4986 | - |
| 4987 | -hipblasStatus_t hipblasScasumStridedBatchedCast_64(hipblasHandle_t handle, |
| 4988 | - int64_t n, |
| 4989 | - const hipblasComplex* x, |
| 4990 | - int64_t incx, |
| 4991 | - hipblasStride stridex, |
| 4992 | - int64_t batch_count, |
| 4993 | - float* result) |
| 4994 | -{ |
| 4995 | - return hipblasScasumStridedBatched_64( |
| 4996 | - handle, n, (const hipComplex*)x, incx, stridex, batch_count, result); |
| 4997 | -} |
| 4998 | - |
| 4999 | -hipblasStatus_t hipblasDzasumStridedBatchedCast_64(hipblasHandle_t handle, |
| 5000 | - int64_t n, |

Build in ppa: https:/ /launchpad. net/~bruno- bdmoura/ +archive/ ubuntu/ lp-2139242- hipblas
(~proposed and target archs)