Simple GPU 1.0
Fortran GPU Computing Library with transparent CPU/GPU support
Loading...
Searching...
No Matches
gpu_amd.c
Go to the documentation of this file.
1#include <stdint.h>
2#include <stdio.h>
3#include <stdbool.h>
4#include <stdlib.h>
5#include <string.h>
6#include <assert.h>
7
8#include <hipblas/hipblas.h>
9#include <hip/hip_runtime.h>
10
11
12/* Generic functions */
13
15 int ngpus;
16 hipError_t rc = hipGetDeviceCount(&ngpus);
17 if (rc != hipSuccess) {
18 return 0;
19 }
20 return ngpus;
21}
22
23void gpu_set_device(int32_t igpu) {
24 if (igpu < 0) {
25 fprintf(stderr, "gpu_set_device: invalid device index %d (must be >= 0)\n", igpu);
26 return;
27 }
28 int ngpus = gpu_ndevices();
29 if (igpu >= ngpus) {
30 fprintf(stderr, "gpu_set_device: invalid device index %d (only %d devices available)\n", igpu, ngpus);
31 return;
32 }
33 hipError_t rc = hipSetDevice((int)igpu);
34 if (rc != hipSuccess) {
35 fprintf(stderr,"hipSetDevice(%d) failed: %s\n", igpu, hipGetErrorString(rc));
36 assert (rc == hipSuccess);
37 }
38}
39
40void gpu_get_memory(size_t* free, size_t* total) {
41 hipError_t rc = hipMemGetInfo( free, total );
42 if (rc != hipSuccess) {
43 *free = 0;
44 *total = 0;
45 }
46}
47
48/* Allocation functions */
49
50void gpu_allocate(void** ptr, const int64_t size) {
51 if (ptr == NULL) {
52 fprintf(stderr, "gpu_allocate: ptr argument is NULL\n");
53 return;
54 }
55
56 size_t free, total;
57 hipError_t rc = hipMemGetInfo( &free, &total );
58 if (rc != hipSuccess) {
59 free = INT64_MAX;
60 }
61
62 rc = hipMalloc(ptr, size);
63
64 if (rc != hipSuccess) {
65 fprintf(stderr,"hipMalloc failed: %s\n", hipGetErrorString(rc));
66 assert (rc == hipSuccess);
67 }
68}
69
70void gpu_deallocate(void** ptr) {
71 if (ptr == NULL || *ptr == NULL) {
72 return;
73 }
74 hipError_t rc = hipFree(*ptr);
75 if (rc != hipSuccess) {
76 fprintf(stderr,"hipFree failed: %s\n", hipGetErrorString(rc));
77 assert (rc == hipSuccess);
78 }
79 *ptr = NULL;
80}
81
82void gpu_free(void** ptr) {
83 gpu_deallocate(ptr);
84}
85
86
87/* Memory transfer functions */
88
89void gpu_upload(const void* cpu_ptr, void* gpu_ptr, const int64_t n) {
90 if (cpu_ptr == NULL || gpu_ptr == NULL) {
91 fprintf(stderr, "gpu_upload: NULL pointer argument\n");
92 return;
93 }
94 hipError_t rc = hipMemcpy (gpu_ptr, cpu_ptr, n, hipMemcpyHostToDevice);
95 if (rc != hipSuccess) {
96 fprintf(stderr,"hipMemcpy (upload) failed: %s\n", hipGetErrorString(rc));
97 assert (rc == hipSuccess);
98 }
99}
100
101void gpu_download(const void* gpu_ptr, void* cpu_ptr, const int64_t n) {
102 if (gpu_ptr == NULL || cpu_ptr == NULL) {
103 fprintf(stderr, "gpu_download: NULL pointer argument\n");
104 return;
105 }
106 hipError_t rc = hipMemcpy (cpu_ptr, gpu_ptr, n, hipMemcpyDeviceToHost);
107 if (rc != hipSuccess) {
108 fprintf(stderr,"hipMemcpy (download) failed: %s\n", hipGetErrorString(rc));
109 assert (rc == hipSuccess);
110 }
111}
112
113void gpu_copy(const void* gpu_ptr_src, void* gpu_ptr_dest, const int64_t n) {
114 if (gpu_ptr_src == NULL || gpu_ptr_dest == NULL) {
115 fprintf(stderr, "gpu_copy: NULL pointer argument\n");
116 return;
117 }
118 hipError_t rc = hipMemcpy (gpu_ptr_dest, gpu_ptr_src, n, hipMemcpyDeviceToDevice);
119 if (rc != hipSuccess) {
120 fprintf(stderr,"hipMemcpy (copy) failed: %s\n", hipGetErrorString(rc));
121 assert (rc == hipSuccess);
122 }
123}
124
125
126/* Streams */
127
128void gpu_stream_create(hipStream_t* ptr) {
129 hipError_t rc = hipStreamCreate(ptr);
130 if (rc != hipSuccess) {
131 fprintf(stderr,"hipStreamCreate failed: %s\n", hipGetErrorString(rc));
132 assert (rc == hipSuccess);
133 }
134}
135
136void gpu_stream_destroy(hipStream_t* ptr) {
137 assert (ptr != NULL);
138 hipError_t rc = hipStreamDestroy(*ptr);
139 if (rc != hipSuccess) {
140 fprintf(stderr,"hipStreamDestroy failed: %s\n", hipGetErrorString(rc));
141 assert (rc == hipSuccess);
142 }
143 *ptr = NULL;
144}
145
146void gpu_set_stream(hipblasHandle_t handle, hipStream_t stream) {
147 hipblasStatus_t rc = hipblasSetStream(handle, stream);
148 if (rc != HIPBLAS_STATUS_SUCCESS) {
149 fprintf(stderr,"hipblasSetStream failed\n");
150 assert (rc == HIPBLAS_STATUS_SUCCESS);
151 }
152}
153
155 hipError_t rc = hipDeviceSynchronize();
156 if (rc != hipSuccess) {
157 fprintf(stderr,"hipDeviceSynchronize failed: %s\n", hipGetErrorString(rc));
158 assert (rc == hipSuccess);
159 }
160}
161
162void gpu_stream_synchronize(void* stream) {
163 hipError_t rc = hipStreamSynchronize(stream);
164 if (rc != hipSuccess) {
165 fprintf(stderr,"hipStreamSynchronize failed: %s\n", hipGetErrorString(rc));
166 assert (rc == hipSuccess);
167 }
168}
169
170
171/* BLAS functions */
172
173void gpu_blas_create(hipblasHandle_t* ptr) {
174 if (ptr == NULL) {
175 fprintf(stderr, "gpu_blas_create: ptr argument is NULL\n");
176 return;
177 }
178 hipblasStatus_t rc = hipblasCreate(ptr);
179 if (rc != HIPBLAS_STATUS_SUCCESS) {
180 fprintf(stderr,"hipblasCreate failed\n");
181 assert (rc == HIPBLAS_STATUS_SUCCESS);
182 }
183}
184
185
186void gpu_blas_destroy(hipblasHandle_t* ptr) {
187 assert (ptr != NULL);
188 hipblasStatus_t rc = hipblasDestroy(*ptr);
189 if (rc != HIPBLAS_STATUS_SUCCESS) {
190 fprintf(stderr,"hipblasDestroy failed\n");
191 }
192 assert (rc == HIPBLAS_STATUS_SUCCESS);
193 *ptr = NULL;
194}
195
196
197void gpu_ddot(hipblasHandle_t handle, const int64_t n, const double* x, const int64_t incx, const double* y, const int64_t incy, double* result) {
198 assert (handle != NULL);
199 /* Convert to int */
200 int n_, incx_, incy_;
201
202 n_ = (int)n;
203 incx_ = (int)incx;
204 incy_ = (int)incy;
205
206 assert ( (int64_t) n_ == n );
207 assert ( (int64_t) incx_ == incx);
208 assert ( (int64_t) incy_ == incy);
209
210 hipblasStatus_t rc = hipblasDdot(handle, n_, x, incx_, y, incy_, result);
211 assert (rc == HIPBLAS_STATUS_SUCCESS);
212}
213
214
215
216void gpu_sdot(hipblasHandle_t handle, const int64_t n, const float* x, const int64_t incx, const float* y, const int64_t incy, float* result) {
217 assert (handle != NULL);
218
219 /* Convert to int */
220 int n_, incx_, incy_;
221
222 n_ = (int)n;
223 incx_ = (int)incx;
224 incy_ = (int)incy;
225
226 /* Check for integer overflows */
227 assert ( (int64_t) n_ == n );
228 assert ( (int64_t) incx_ == incx);
229 assert ( (int64_t) incy_ == incy);
230
231 hipblasStatus_t rc = hipblasSdot(handle, n_, x, incx_, y, incy_, result);
232 assert (rc == HIPBLAS_STATUS_SUCCESS);
233}
234
235
236
237void gpu_dgemv(hipblasHandle_t handle, const char* transa, const int64_t m, const int64_t n, const double* alpha,
238 const double* a, const int64_t lda, const double* x, const int64_t incx, const double* beta, double* y, const int64_t incy) {
239
240 assert (handle != NULL);
241
242 /* Convert to int */
243 int m_, n_, lda_, incx_, incy_;
244
245 m_ = (int)m;
246 n_ = (int)n;
247 lda_ = (int)lda;
248 incx_ = (int)incx;
249 incy_ = (int)incy;
250
251 /* Check for integer overflows */
252 assert ( (int64_t) m_ == m );
253 assert ( (int64_t) n_ == n );
254 assert ( (int64_t) lda_ == lda );
255 assert ( (int64_t) incx_ == incx);
256 assert ( (int64_t) incy_ == incy);
257
258 hipblasOperation_t transa_ = HIPBLAS_OP_N;
259 if (*transa == 'T' || *transa == 't') transa_ = HIPBLAS_OP_T;
260
261 hipblasStatus_t rc = hipblasDgemv(handle, transa_, m_, n_, alpha, a, lda_, x, incx_, beta, y, incy_);
262 assert (rc == HIPBLAS_STATUS_SUCCESS);
263}
264
265
266
267void gpu_sgemv(hipblasHandle_t handle, const char* transa, const int64_t m, const int64_t n, const float* alpha,
268 const float* a, const int64_t lda, const float* x, const int64_t incx, const float* beta, float* y, const int64_t incy) {
269
270 assert (handle != NULL);
271
272 /* Convert to int */
273 int m_, n_, lda_, incx_, incy_;
274
275 m_ = (int)m;
276 n_ = (int)n;
277 lda_ = (int)lda;
278 incx_ = (int)incx;
279 incy_ = (int)incy;
280
281 /* Check for integer overflows */
282 assert ( (int64_t) m_ == m );
283 assert ( (int64_t) n_ == n );
284 assert ( (int64_t) lda_ == lda );
285 assert ( (int64_t) incx_ == incx);
286 assert ( (int64_t) incy_ == incy);
287
288 hipblasOperation_t transa_ = HIPBLAS_OP_N;
289 if (*transa == 'T' || *transa == 't') transa_ = HIPBLAS_OP_T;
290
291 hipblasStatus_t rc = hipblasSgemv(handle, transa_, m_, n_, alpha, a, lda_, x, incx_, beta, y, incy_);
292 assert (rc == HIPBLAS_STATUS_SUCCESS);
293}
294
295
296void gpu_dgemm(hipblasHandle_t handle, const char* transa, const char* transb, const int64_t m, const int64_t n, const int64_t k, const double* alpha,
297 const double* a, const int64_t lda, const double* b, const int64_t ldb, const double* beta, double* c, const int64_t ldc) {
298
299 assert (handle != NULL);
300
301 /* Convert to int */
302 int m_, n_, k_, lda_, ldb_, ldc_;
303
304 m_ = (int)m;
305 n_ = (int)n;
306 k_ = (int)k;
307 lda_ = (int)lda;
308 ldb_ = (int)ldb;
309 ldc_ = (int)ldc;
310
311 /* Check for integer overflows */
312 assert ( (int64_t) m_ == m );
313 assert ( (int64_t) n_ == n );
314 assert ( (int64_t) k_ == k );
315 assert ( (int64_t) lda_ == lda);
316 assert ( (int64_t) ldb_ == ldb);
317 assert ( (int64_t) ldc_ == ldc);
318
319 hipblasOperation_t transa_ = HIPBLAS_OP_N;
320 hipblasOperation_t transb_ = HIPBLAS_OP_N;
321 if (*transa == 'T' || *transa == 't') transa_ = HIPBLAS_OP_T;
322 if (*transb == 'T' || *transb == 't') transb_ = HIPBLAS_OP_T;
323
324 hipblasStatus_t rc = hipblasDgemm(handle, transa_, transb_, m_, n_, k_, alpha, a, lda_, b, ldb_, beta, c, ldc_);
325 assert (rc == HIPBLAS_STATUS_SUCCESS);
326}
327
328
329
330void gpu_sgemm(hipblasHandle_t handle, const char* transa, const char* transb, const int64_t m, const int64_t n, const int64_t k, const float* alpha,
331 const float* a, const int64_t lda, const float* b, const int64_t ldb, const float* beta, float* c, const int64_t ldc) {
332
333 assert (handle != NULL);
334
335 /* Convert to int */
336 int m_, n_, k_, lda_, ldb_, ldc_;
337
338 m_ = (int)m;
339 n_ = (int)n;
340 k_ = (int)k;
341 lda_ = (int)lda;
342 ldb_ = (int)ldb;
343 ldc_ = (int)ldc;
344
345 /* Check for integer overflows */
346 assert ( (int64_t) m_ == m );
347 assert ( (int64_t) n_ == n );
348 assert ( (int64_t) k_ == k );
349 assert ( (int64_t) lda_ == lda);
350 assert ( (int64_t) ldb_ == ldb);
351 assert ( (int64_t) ldc_ == ldc);
352
353 hipblasOperation_t transa_ = HIPBLAS_OP_N;
354 hipblasOperation_t transb_ = HIPBLAS_OP_N;
355 if (*transa == 'T' || *transa == 't') transa_ = HIPBLAS_OP_T;
356 if (*transb == 'T' || *transb == 't') transb_ = HIPBLAS_OP_T;
357
358 hipblasStatus_t rc = hipblasSgemm(handle, transa_, transb_, m_, n_, k_, alpha, a, lda_, b, ldb_, beta, c, ldc_);
359 assert (rc == HIPBLAS_STATUS_SUCCESS);
360}
361
362
363void gpu_dgeam(hipblasHandle_t handle, const char* transa, const char* transb, const int64_t m, const int64_t n, const double* alpha,
364 const double* a, const int64_t lda, const double* beta, const double* b, const int64_t ldb, double* c, const int64_t ldc) {
365 assert (handle != NULL);
366
367 /* Convert to int */
368 int m_, n_, lda_, ldb_, ldc_;
369
370 m_ = (int)m;
371 n_ = (int)n;
372 lda_ = (int)lda;
373 ldb_ = (int)ldb;
374 ldc_ = (int)ldc;
375
376 /* Check for integer overflows */
377 assert ( (int64_t) m_ == m );
378 assert ( (int64_t) n_ == n );
379 assert ( (int64_t) lda_ == lda);
380 assert ( (int64_t) ldb_ == ldb);
381 assert ( (int64_t) ldc_ == ldc);
382
383 hipblasOperation_t transa_ = HIPBLAS_OP_N;
384 hipblasOperation_t transb_ = HIPBLAS_OP_N;
385 if (*transa == 'T' || *transa == 't') transa_ = HIPBLAS_OP_T;
386 if (*transb == 'T' || *transb == 't') transb_ = HIPBLAS_OP_T;
387
388 hipblasStatus_t rc = hipblasDgeam(handle, transa_, transb_, m_, n_, alpha, a, lda_, beta, b, ldb_, c, ldc_);
389 assert (rc == HIPBLAS_STATUS_SUCCESS);
390
391}
392
393
394void gpu_sgeam(hipblasHandle_t handle, const char* transa, const char* transb, const int64_t m, const int64_t n, const float* alpha,
395 const float* a, const int64_t lda, const float* beta, const float* b, const int64_t ldb, float* c, const int64_t ldc) {
396 assert (handle != NULL);
397
398 /* Convert to int */
399 int m_, n_, lda_, ldb_, ldc_;
400
401 m_ = (int)m;
402 n_ = (int)n;
403 lda_ = (int)lda;
404 ldb_ = (int)ldb;
405 ldc_ = (int)ldc;
406
407 /* Check for integer overflows */
408 assert ( (int64_t) m_ == m );
409 assert ( (int64_t) n_ == n );
410 assert ( (int64_t) lda_ == lda);
411 assert ( (int64_t) ldb_ == ldb);
412 assert ( (int64_t) ldc_ == ldc);
413
414 hipblasOperation_t transa_ = HIPBLAS_OP_N;
415 hipblasOperation_t transb_ = HIPBLAS_OP_N;
416 if (*transa == 'T' || *transa == 't') transa_ = HIPBLAS_OP_T;
417 if (*transb == 'T' || *transb == 't') transb_ = HIPBLAS_OP_T;
418
419 hipblasStatus_t rc = hipblasSgeam(handle, transa_, transb_, m_, n_, alpha, a, lda_, beta, b, ldb_, c, ldc_);
420 assert (rc == HIPBLAS_STATUS_SUCCESS);
421
422}
void gpu_sgeam(hipblasHandle_t handle, const char *transa, const char *transb, const int64_t m, const int64_t n, const float *alpha, const float *a, const int64_t lda, const float *beta, const float *b, const int64_t ldb, float *c, const int64_t ldc)
Definition gpu_amd.c:394
void gpu_dgemv(hipblasHandle_t handle, const char *transa, const int64_t m, const int64_t n, const double *alpha, const double *a, const int64_t lda, const double *x, const int64_t incx, const double *beta, double *y, const int64_t incy)
Definition gpu_amd.c:237
void gpu_sgemm(hipblasHandle_t handle, const char *transa, const char *transb, const int64_t m, const int64_t n, const int64_t k, const float *alpha, const float *a, const int64_t lda, const float *b, const int64_t ldb, const float *beta, float *c, const int64_t ldc)
Definition gpu_amd.c:330
void gpu_free(void **ptr)
Definition gpu_amd.c:82
void gpu_copy(const void *gpu_ptr_src, void *gpu_ptr_dest, const int64_t n)
Definition gpu_amd.c:113
void gpu_upload(const void *cpu_ptr, void *gpu_ptr, const int64_t n)
Definition gpu_amd.c:89
void gpu_get_memory(size_t *free, size_t *total)
Definition gpu_amd.c:40
void gpu_stream_create(hipStream_t *ptr)
Definition gpu_amd.c:128
void gpu_blas_destroy(hipblasHandle_t *ptr)
Definition gpu_amd.c:186
void gpu_blas_create(hipblasHandle_t *ptr)
Definition gpu_amd.c:173
void gpu_deallocate(void **ptr)
Definition gpu_amd.c:70
void gpu_stream_synchronize(void *stream)
Definition gpu_amd.c:162
int gpu_ndevices()
Definition gpu_amd.c:14
void gpu_allocate(void **ptr, const int64_t size)
Definition gpu_amd.c:50
void gpu_set_stream(hipblasHandle_t handle, hipStream_t stream)
Definition gpu_amd.c:146
void gpu_download(const void *gpu_ptr, void *cpu_ptr, const int64_t n)
Definition gpu_amd.c:101
void gpu_dgeam(hipblasHandle_t handle, const char *transa, const char *transb, const int64_t m, const int64_t n, const double *alpha, const double *a, const int64_t lda, const double *beta, const double *b, const int64_t ldb, double *c, const int64_t ldc)
Definition gpu_amd.c:363
void gpu_dgemm(hipblasHandle_t handle, const char *transa, const char *transb, const int64_t m, const int64_t n, const int64_t k, const double *alpha, const double *a, const int64_t lda, const double *b, const int64_t ldb, const double *beta, double *c, const int64_t ldc)
Definition gpu_amd.c:296
void gpu_ddot(hipblasHandle_t handle, const int64_t n, const double *x, const int64_t incx, const double *y, const int64_t incy, double *result)
Definition gpu_amd.c:197
void gpu_synchronize()
Definition gpu_amd.c:154
void gpu_sdot(hipblasHandle_t handle, const int64_t n, const float *x, const int64_t incx, const float *y, const int64_t incy, float *result)
Definition gpu_amd.c:216
void gpu_stream_destroy(hipStream_t *ptr)
Definition gpu_amd.c:136
void gpu_sgemv(hipblasHandle_t handle, const char *transa, const int64_t m, const int64_t n, const float *alpha, const float *a, const int64_t lda, const float *x, const int64_t incx, const float *beta, float *y, const int64_t incy)
Definition gpu_amd.c:267
void gpu_set_device(int32_t igpu)
Definition gpu_amd.c:23