8#include <hipblas/hipblas.h>
9#include <hip/hip_runtime.h>
16 hipError_t rc = hipGetDeviceCount(&ngpus);
17 if (rc != hipSuccess) {
25 fprintf(stderr,
"gpu_set_device: invalid device index %d (must be >= 0)\n", igpu);
30 fprintf(stderr,
"gpu_set_device: invalid device index %d (only %d devices available)\n", igpu, ngpus);
33 hipError_t rc = hipSetDevice((
int)igpu);
34 if (rc != hipSuccess) {
35 fprintf(stderr,
"hipSetDevice(%d) failed: %s\n", igpu, hipGetErrorString(rc));
36 assert (rc == hipSuccess);
41 hipError_t rc = hipMemGetInfo( free, total );
42 if (rc != hipSuccess) {
52 fprintf(stderr,
"gpu_allocate: ptr argument is NULL\n");
57 hipError_t rc = hipMemGetInfo( &free, &total );
58 if (rc != hipSuccess) {
62 rc = hipMalloc(ptr, size);
64 if (rc != hipSuccess) {
65 fprintf(stderr,
"hipMalloc failed: %s\n", hipGetErrorString(rc));
66 assert (rc == hipSuccess);
71 if (ptr == NULL || *ptr == NULL) {
74 hipError_t rc = hipFree(*ptr);
75 if (rc != hipSuccess) {
76 fprintf(stderr,
"hipFree failed: %s\n", hipGetErrorString(rc));
77 assert (rc == hipSuccess);
89void gpu_upload(
const void* cpu_ptr,
void* gpu_ptr,
const int64_t n) {
90 if (cpu_ptr == NULL || gpu_ptr == NULL) {
91 fprintf(stderr,
"gpu_upload: NULL pointer argument\n");
94 hipError_t rc = hipMemcpy (gpu_ptr, cpu_ptr, n, hipMemcpyHostToDevice);
95 if (rc != hipSuccess) {
96 fprintf(stderr,
"hipMemcpy (upload) failed: %s\n", hipGetErrorString(rc));
97 assert (rc == hipSuccess);
101void gpu_download(
const void* gpu_ptr,
void* cpu_ptr,
const int64_t n) {
102 if (gpu_ptr == NULL || cpu_ptr == NULL) {
103 fprintf(stderr,
"gpu_download: NULL pointer argument\n");
106 hipError_t rc = hipMemcpy (cpu_ptr, gpu_ptr, n, hipMemcpyDeviceToHost);
107 if (rc != hipSuccess) {
108 fprintf(stderr,
"hipMemcpy (download) failed: %s\n", hipGetErrorString(rc));
109 assert (rc == hipSuccess);
113void gpu_copy(
const void* gpu_ptr_src,
void* gpu_ptr_dest,
const int64_t n) {
114 if (gpu_ptr_src == NULL || gpu_ptr_dest == NULL) {
115 fprintf(stderr,
"gpu_copy: NULL pointer argument\n");
118 hipError_t rc = hipMemcpy (gpu_ptr_dest, gpu_ptr_src, n, hipMemcpyDeviceToDevice);
119 if (rc != hipSuccess) {
120 fprintf(stderr,
"hipMemcpy (copy) failed: %s\n", hipGetErrorString(rc));
121 assert (rc == hipSuccess);
129 hipError_t rc = hipStreamCreate(ptr);
130 if (rc != hipSuccess) {
131 fprintf(stderr,
"hipStreamCreate failed: %s\n", hipGetErrorString(rc));
132 assert (rc == hipSuccess);
137 assert (ptr != NULL);
138 hipError_t rc = hipStreamDestroy(*ptr);
139 if (rc != hipSuccess) {
140 fprintf(stderr,
"hipStreamDestroy failed: %s\n", hipGetErrorString(rc));
141 assert (rc == hipSuccess);
147 hipblasStatus_t rc = hipblasSetStream(handle, stream);
148 if (rc != HIPBLAS_STATUS_SUCCESS) {
149 fprintf(stderr,
"hipblasSetStream failed\n");
150 assert (rc == HIPBLAS_STATUS_SUCCESS);
155 hipError_t rc = hipDeviceSynchronize();
156 if (rc != hipSuccess) {
157 fprintf(stderr,
"hipDeviceSynchronize failed: %s\n", hipGetErrorString(rc));
158 assert (rc == hipSuccess);
163 hipError_t rc = hipStreamSynchronize(stream);
164 if (rc != hipSuccess) {
165 fprintf(stderr,
"hipStreamSynchronize failed: %s\n", hipGetErrorString(rc));
166 assert (rc == hipSuccess);
175 fprintf(stderr,
"gpu_blas_create: ptr argument is NULL\n");
178 hipblasStatus_t rc = hipblasCreate(ptr);
179 if (rc != HIPBLAS_STATUS_SUCCESS) {
180 fprintf(stderr,
"hipblasCreate failed\n");
181 assert (rc == HIPBLAS_STATUS_SUCCESS);
187 assert (ptr != NULL);
188 hipblasStatus_t rc = hipblasDestroy(*ptr);
189 if (rc != HIPBLAS_STATUS_SUCCESS) {
190 fprintf(stderr,
"hipblasDestroy failed\n");
192 assert (rc == HIPBLAS_STATUS_SUCCESS);
197void gpu_ddot(hipblasHandle_t handle,
const int64_t n,
const double* x,
const int64_t incx,
const double* y,
const int64_t incy,
double* result) {
198 assert (handle != NULL);
200 int n_, incx_, incy_;
206 assert ( (int64_t) n_ == n );
207 assert ( (int64_t) incx_ == incx);
208 assert ( (int64_t) incy_ == incy);
210 hipblasStatus_t rc = hipblasDdot(handle, n_, x, incx_, y, incy_, result);
211 assert (rc == HIPBLAS_STATUS_SUCCESS);
216void gpu_sdot(hipblasHandle_t handle,
const int64_t n,
const float* x,
const int64_t incx,
const float* y,
const int64_t incy,
float* result) {
217 assert (handle != NULL);
220 int n_, incx_, incy_;
227 assert ( (int64_t) n_ == n );
228 assert ( (int64_t) incx_ == incx);
229 assert ( (int64_t) incy_ == incy);
231 hipblasStatus_t rc = hipblasSdot(handle, n_, x, incx_, y, incy_, result);
232 assert (rc == HIPBLAS_STATUS_SUCCESS);
237void gpu_dgemv(hipblasHandle_t handle,
const char* transa,
const int64_t m,
const int64_t n,
const double* alpha,
238 const double* a,
const int64_t lda,
const double* x,
const int64_t incx,
const double* beta,
double* y,
const int64_t incy) {
240 assert (handle != NULL);
243 int m_, n_, lda_, incx_, incy_;
252 assert ( (int64_t) m_ == m );
253 assert ( (int64_t) n_ == n );
254 assert ( (int64_t) lda_ == lda );
255 assert ( (int64_t) incx_ == incx);
256 assert ( (int64_t) incy_ == incy);
258 hipblasOperation_t transa_ = HIPBLAS_OP_N;
259 if (*transa ==
'T' || *transa ==
't') transa_ = HIPBLAS_OP_T;
261 hipblasStatus_t rc = hipblasDgemv(handle, transa_, m_, n_, alpha, a, lda_, x, incx_, beta, y, incy_);
262 assert (rc == HIPBLAS_STATUS_SUCCESS);
267void gpu_sgemv(hipblasHandle_t handle,
const char* transa,
const int64_t m,
const int64_t n,
const float* alpha,
268 const float* a,
const int64_t lda,
const float* x,
const int64_t incx,
const float* beta,
float* y,
const int64_t incy) {
270 assert (handle != NULL);
273 int m_, n_, lda_, incx_, incy_;
282 assert ( (int64_t) m_ == m );
283 assert ( (int64_t) n_ == n );
284 assert ( (int64_t) lda_ == lda );
285 assert ( (int64_t) incx_ == incx);
286 assert ( (int64_t) incy_ == incy);
288 hipblasOperation_t transa_ = HIPBLAS_OP_N;
289 if (*transa ==
'T' || *transa ==
't') transa_ = HIPBLAS_OP_T;
291 hipblasStatus_t rc = hipblasSgemv(handle, transa_, m_, n_, alpha, a, lda_, x, incx_, beta, y, incy_);
292 assert (rc == HIPBLAS_STATUS_SUCCESS);
296void gpu_dgemm(hipblasHandle_t handle,
const char* transa,
const char* transb,
const int64_t m,
const int64_t n,
const int64_t k,
const double* alpha,
297 const double* a,
const int64_t lda,
const double* b,
const int64_t ldb,
const double* beta,
double* c,
const int64_t ldc) {
299 assert (handle != NULL);
302 int m_, n_, k_, lda_, ldb_, ldc_;
312 assert ( (int64_t) m_ == m );
313 assert ( (int64_t) n_ == n );
314 assert ( (int64_t) k_ == k );
315 assert ( (int64_t) lda_ == lda);
316 assert ( (int64_t) ldb_ == ldb);
317 assert ( (int64_t) ldc_ == ldc);
319 hipblasOperation_t transa_ = HIPBLAS_OP_N;
320 hipblasOperation_t transb_ = HIPBLAS_OP_N;
321 if (*transa ==
'T' || *transa ==
't') transa_ = HIPBLAS_OP_T;
322 if (*transb ==
'T' || *transb ==
't') transb_ = HIPBLAS_OP_T;
324 hipblasStatus_t rc = hipblasDgemm(handle, transa_, transb_, m_, n_, k_, alpha, a, lda_, b, ldb_, beta, c, ldc_);
325 assert (rc == HIPBLAS_STATUS_SUCCESS);
330void gpu_sgemm(hipblasHandle_t handle,
const char* transa,
const char* transb,
const int64_t m,
const int64_t n,
const int64_t k,
const float* alpha,
331 const float* a,
const int64_t lda,
const float* b,
const int64_t ldb,
const float* beta,
float* c,
const int64_t ldc) {
333 assert (handle != NULL);
336 int m_, n_, k_, lda_, ldb_, ldc_;
346 assert ( (int64_t) m_ == m );
347 assert ( (int64_t) n_ == n );
348 assert ( (int64_t) k_ == k );
349 assert ( (int64_t) lda_ == lda);
350 assert ( (int64_t) ldb_ == ldb);
351 assert ( (int64_t) ldc_ == ldc);
353 hipblasOperation_t transa_ = HIPBLAS_OP_N;
354 hipblasOperation_t transb_ = HIPBLAS_OP_N;
355 if (*transa ==
'T' || *transa ==
't') transa_ = HIPBLAS_OP_T;
356 if (*transb ==
'T' || *transb ==
't') transb_ = HIPBLAS_OP_T;
358 hipblasStatus_t rc = hipblasSgemm(handle, transa_, transb_, m_, n_, k_, alpha, a, lda_, b, ldb_, beta, c, ldc_);
359 assert (rc == HIPBLAS_STATUS_SUCCESS);
363void gpu_dgeam(hipblasHandle_t handle,
const char* transa,
const char* transb,
const int64_t m,
const int64_t n,
const double* alpha,
364 const double* a,
const int64_t lda,
const double* beta,
const double* b,
const int64_t ldb,
double* c,
const int64_t ldc) {
365 assert (handle != NULL);
368 int m_, n_, lda_, ldb_, ldc_;
377 assert ( (int64_t) m_ == m );
378 assert ( (int64_t) n_ == n );
379 assert ( (int64_t) lda_ == lda);
380 assert ( (int64_t) ldb_ == ldb);
381 assert ( (int64_t) ldc_ == ldc);
383 hipblasOperation_t transa_ = HIPBLAS_OP_N;
384 hipblasOperation_t transb_ = HIPBLAS_OP_N;
385 if (*transa ==
'T' || *transa ==
't') transa_ = HIPBLAS_OP_T;
386 if (*transb ==
'T' || *transb ==
't') transb_ = HIPBLAS_OP_T;
388 hipblasStatus_t rc = hipblasDgeam(handle, transa_, transb_, m_, n_, alpha, a, lda_, beta, b, ldb_, c, ldc_);
389 assert (rc == HIPBLAS_STATUS_SUCCESS);
394void gpu_sgeam(hipblasHandle_t handle,
const char* transa,
const char* transb,
const int64_t m,
const int64_t n,
const float* alpha,
395 const float* a,
const int64_t lda,
const float* beta,
const float* b,
const int64_t ldb,
float* c,
const int64_t ldc) {
396 assert (handle != NULL);
399 int m_, n_, lda_, ldb_, ldc_;
408 assert ( (int64_t) m_ == m );
409 assert ( (int64_t) n_ == n );
410 assert ( (int64_t) lda_ == lda);
411 assert ( (int64_t) ldb_ == ldb);
412 assert ( (int64_t) ldc_ == ldc);
414 hipblasOperation_t transa_ = HIPBLAS_OP_N;
415 hipblasOperation_t transb_ = HIPBLAS_OP_N;
416 if (*transa ==
'T' || *transa ==
't') transa_ = HIPBLAS_OP_T;
417 if (*transb ==
'T' || *transb ==
't') transb_ = HIPBLAS_OP_T;
419 hipblasStatus_t rc = hipblasSgeam(handle, transa_, transb_, m_, n_, alpha, a, lda_, beta, b, ldb_, c, ldc_);
420 assert (rc == HIPBLAS_STATUS_SUCCESS);
void gpu_sgeam(hipblasHandle_t handle, const char *transa, const char *transb, const int64_t m, const int64_t n, const float *alpha, const float *a, const int64_t lda, const float *beta, const float *b, const int64_t ldb, float *c, const int64_t ldc)
void gpu_dgemv(hipblasHandle_t handle, const char *transa, const int64_t m, const int64_t n, const double *alpha, const double *a, const int64_t lda, const double *x, const int64_t incx, const double *beta, double *y, const int64_t incy)
void gpu_sgemm(hipblasHandle_t handle, const char *transa, const char *transb, const int64_t m, const int64_t n, const int64_t k, const float *alpha, const float *a, const int64_t lda, const float *b, const int64_t ldb, const float *beta, float *c, const int64_t ldc)
void gpu_free(void **ptr)
void gpu_copy(const void *gpu_ptr_src, void *gpu_ptr_dest, const int64_t n)
void gpu_upload(const void *cpu_ptr, void *gpu_ptr, const int64_t n)
void gpu_get_memory(size_t *free, size_t *total)
void gpu_stream_create(hipStream_t *ptr)
void gpu_blas_destroy(hipblasHandle_t *ptr)
void gpu_blas_create(hipblasHandle_t *ptr)
void gpu_deallocate(void **ptr)
void gpu_stream_synchronize(void *stream)
void gpu_allocate(void **ptr, const int64_t size)
void gpu_set_stream(hipblasHandle_t handle, hipStream_t stream)
void gpu_download(const void *gpu_ptr, void *cpu_ptr, const int64_t n)
void gpu_dgeam(hipblasHandle_t handle, const char *transa, const char *transb, const int64_t m, const int64_t n, const double *alpha, const double *a, const int64_t lda, const double *beta, const double *b, const int64_t ldb, double *c, const int64_t ldc)
void gpu_dgemm(hipblasHandle_t handle, const char *transa, const char *transb, const int64_t m, const int64_t n, const int64_t k, const double *alpha, const double *a, const int64_t lda, const double *b, const int64_t ldb, const double *beta, double *c, const int64_t ldc)
void gpu_ddot(hipblasHandle_t handle, const int64_t n, const double *x, const int64_t incx, const double *y, const int64_t incy, double *result)
void gpu_sdot(hipblasHandle_t handle, const int64_t n, const float *x, const int64_t incx, const float *y, const int64_t incy, float *result)
void gpu_stream_destroy(hipStream_t *ptr)
void gpu_sgemv(hipblasHandle_t handle, const char *transa, const int64_t m, const int64_t n, const float *alpha, const float *a, const int64_t lda, const float *x, const int64_t incx, const float *beta, float *y, const int64_t incy)
void gpu_set_device(int32_t igpu)