Simple GPU 1.0
Fortran GPU Computing Library with transparent CPU/GPU support
Loading...
Searching...
No Matches
gpu_nvidia.c
Go to the documentation of this file.
1#include <stdint.h>
2#include <stdio.h>
3#include <stdbool.h>
4#include <stdlib.h>
5#include <string.h>
6#include <assert.h>
7
8#include <cublas_v2.h>
9#include <cuda_runtime.h>
10
11
12/* Generic functions */
13
15 int ngpus;
16 cudaGetDeviceCount(&ngpus);
17 return ngpus;
18}
19
20void gpu_set_device(int32_t igpu) {
21 if (igpu < 0) {
22 fprintf(stderr, "gpu_set_device: invalid device index %d (must be >= 0)\n", igpu);
23 return;
24 }
25 int ngpus = gpu_ndevices();
26 if (igpu >= ngpus) {
27 fprintf(stderr, "gpu_set_device: invalid device index %d (only %d devices available)\n", igpu, ngpus);
28 return;
29 }
30 cudaError_t rc = cudaSetDevice((int) igpu);
31 if (rc != cudaSuccess) {
32 fprintf(stderr,"cudaSetDevice(%d) failed: %s\n", igpu, cudaGetErrorString(rc));
33 assert (rc == cudaSuccess);
34 }
35}
36
37void gpu_get_memory(size_t* free, size_t* total) {
38 cudaError_t rc = cudaMemGetInfo( free, total );
39 if (rc != cudaSuccess) {
40 *free = 0;
41 *total = 0;
42 }
43}
44
45/* Allocation functions */
46
47void gpu_allocate(void** ptr, const int64_t size) {
48 if (ptr == NULL) {
49 fprintf(stderr, "gpu_allocate: ptr argument is NULL\n");
50 return;
51 }
52
53 size_t free, total;
54 cudaError_t rc = cudaMemGetInfo( &free, &total );
55 if (rc != cudaSuccess) {
56 free = INT64_MAX;
57 }
58
59 rc = cudaMalloc(ptr, size);
60
61 if (rc != cudaSuccess) {
62 fprintf(stderr,"cudaMalloc failed: %s\n", cudaGetErrorString(rc));
63 assert (rc == cudaSuccess);
64 }
65}
66
67void gpu_deallocate(void** ptr) {
68 if (ptr == NULL || *ptr == NULL) {
69 return;
70 }
71 cudaFree(*ptr);
72 *ptr = NULL;
73}
74
75void gpu_free(void** ptr) {
76 gpu_deallocate(ptr);
77}
78
79
80/* Memory transfer functions */
81
82void gpu_upload(const void* cpu_ptr, void* gpu_ptr, const int64_t n) {
83 if (cpu_ptr == NULL || gpu_ptr == NULL) {
84 fprintf(stderr, "gpu_upload: NULL pointer argument\n");
85 return;
86 }
87 cudaError_t rc = cudaMemcpy (gpu_ptr, cpu_ptr, n, cudaMemcpyHostToDevice);
88 if (rc != cudaSuccess) {
89 fprintf(stderr,"cudaMemcpy (upload) failed: %s\n", cudaGetErrorString(rc));
90 assert (rc == cudaSuccess);
91 }
92}
93
94void gpu_download(const void* gpu_ptr, void* cpu_ptr, const int64_t n) {
95 if (gpu_ptr == NULL || cpu_ptr == NULL) {
96 fprintf(stderr, "gpu_download: NULL pointer argument\n");
97 return;
98 }
99 cudaError_t rc = cudaMemcpy (cpu_ptr, gpu_ptr, n, cudaMemcpyDeviceToHost);
100 if (rc != cudaSuccess) {
101 fprintf(stderr,"cudaMemcpy (download) failed: %s\n", cudaGetErrorString(rc));
102 assert (rc == cudaSuccess);
103 }
104}
105
106void gpu_copy(const void* gpu_ptr_src, void* gpu_ptr_dest, const int64_t n) {
107 if (gpu_ptr_src == NULL || gpu_ptr_dest == NULL) {
108 fprintf(stderr, "gpu_copy: NULL pointer argument\n");
109 return;
110 }
111 cudaError_t rc = cudaMemcpy (gpu_ptr_dest, gpu_ptr_src, n, cudaMemcpyDeviceToDevice);
112 if (rc != cudaSuccess) {
113 fprintf(stderr,"cudaMemcpy (copy) failed: %s\n", cudaGetErrorString(rc));
114 assert (rc == cudaSuccess);
115 }
116}
117
118
119/* Streams */
120
121void gpu_stream_create(cudaStream_t* ptr) {
122 cudaError_t rc = cudaStreamCreate(ptr);
123 if (rc != cudaSuccess) {
124 fprintf(stderr,"cudaStreamCreate failed: %s\n", cudaGetErrorString(rc));
125 assert (rc == cudaSuccess);
126 }
127}
128
129void gpu_stream_destroy(cudaStream_t* ptr) {
130 assert (ptr != NULL);
131 cudaError_t rc = cudaStreamDestroy(*ptr);
132 if (rc != cudaSuccess) {
133 fprintf(stderr,"cudaStreamDestroy failed: %s\n", cudaGetErrorString(rc));
134 assert (rc == cudaSuccess);
135 }
136 *ptr = NULL;
137}
138
139void gpu_set_stream(cublasHandle_t handle, cudaStream_t stream) {
140 cublasStatus_t rc = cublasSetStream(handle, stream);
141 if (rc != CUBLAS_STATUS_SUCCESS) {
142 fprintf(stderr,"cublasSetStream failed\n");
143 assert (rc == CUBLAS_STATUS_SUCCESS);
144 }
145}
146
148 cudaError_t rc = cudaDeviceSynchronize();
149 if (rc != cudaSuccess) {
150 fprintf(stderr,"cudaDeviceSynchronize failed: %s\n", cudaGetErrorString(rc));
151 assert (rc == cudaSuccess);
152 }
153}
154
155void gpu_stream_synchronize(void* stream) {
156 cudaError_t rc = cudaStreamSynchronize(stream);
157 if (rc != cudaSuccess) {
158 fprintf(stderr,"cudaStreamSynchronize failed: %s\n", cudaGetErrorString(rc));
159 assert (rc == cudaSuccess);
160 }
161}
162
163
164/* BLAS functions */
165
166void gpu_blas_create(cublasHandle_t* ptr) {
167 cublasStatus_t rc = cublasCreate(ptr);
168 assert (rc == CUBLAS_STATUS_SUCCESS);
169}
170
171
172void gpu_blas_destroy(cublasHandle_t* ptr) {
173 assert (ptr != NULL);
174 cublasStatus_t rc = cublasDestroy(*ptr);
175 assert (rc == CUBLAS_STATUS_SUCCESS);
176 *ptr = NULL;
177}
178
179
180void gpu_ddot(cublasHandle_t handle, const int64_t n, const double* x, const int64_t incx, const double* y, const int64_t incy, double* result) {
181 assert (handle != NULL);
182 /* Convert to int */
183 int n_, incx_, incy_;
184
185 n_ = (int) n;
186 incx_ = (int) incx;
187 incy_ = (int) incy;
188
189 assert ( (int64_t) n_ == n );
190 assert ( (int64_t) incx_ == incx);
191 assert ( (int64_t) incy_ == incy);
192
193 cublasStatus_t rc = cublasDdot(handle, n_, x, incx_, y, incy_, result);
194 assert (rc == CUBLAS_STATUS_SUCCESS);
195}
196
197
198
199void gpu_sdot(cublasHandle_t handle, const int64_t n, const float* x, const int64_t incx, const float* y, const int64_t incy, float* result) {
200 assert (handle != NULL);
201
202 /* Convert to int */
203 int n_, incx_, incy_;
204
205 n_ = (int) n;
206 incx_ = (int) incx;
207 incy_ = (int) incy;
208
209 /* Check for integer overflows */
210 assert ( (int64_t) n_ == n );
211 assert ( (int64_t) incx_ == incx);
212 assert ( (int64_t) incy_ == incy);
213
214 float result_ = 0.;
215 cublasStatus_t rc = cublasSdot(handle, n_, x, incx_, y, incy_, &result_);
216 assert (rc == CUBLAS_STATUS_SUCCESS);
217 *result = result_;
218}
219
220
221
222void gpu_dgemv(cublasHandle_t handle, const char* transa, const int64_t m, const int64_t n, const double* alpha,
223 const double* a, const int64_t lda, const double* x, const int64_t incx, const double* beta, double* y, const int64_t incy) {
224
225 assert (handle != NULL);
226
227 /* Convert to int */
228 int m_, n_, lda_, incx_, incy_;
229
230 m_ = (int) m;
231 n_ = (int) n;
232 lda_ = (int) lda;
233 incx_ = (int) incx;
234 incy_ = (int) incy;
235
236 /* Check for integer overflows */
237 assert ( (int64_t) m_ == m );
238 assert ( (int64_t) n_ == n );
239 assert ( (int64_t) lda_ == lda );
240 assert ( (int64_t) incx_ == incx);
241 assert ( (int64_t) incy_ == incy);
242
243 cublasOperation_t transa_ = CUBLAS_OP_N;
244 if (*transa == 'T' || *transa == 't') transa_ = CUBLAS_OP_T;
245
246 cublasDgemv(handle, transa_, m_, n_, alpha, a, lda_, x, incx_, beta, y, incy_);
247}
248
249
250
251void gpu_sgemv(cublasHandle_t handle, const char* transa, const int64_t m, const int64_t n, const float* alpha,
252 const float* a, const int64_t lda, const float* x, const int64_t incx, const float* beta, float* y, const int64_t incy) {
253
254 assert (handle != NULL);
255
256 /* Convert to int */
257 int m_, n_, lda_, incx_, incy_;
258
259 m_ = (int) m;
260 n_ = (int) n;
261 lda_ = (int) lda;
262 incx_ = (int) incx;
263 incy_ = (int) incy;
264
265 /* Check for integer overflows */
266 assert ( (int64_t) m_ == m );
267 assert ( (int64_t) n_ == n );
268 assert ( (int64_t) lda_ == lda );
269 assert ( (int64_t) incx_ == incx);
270 assert ( (int64_t) incy_ == incy);
271
272 cublasOperation_t transa_ = CUBLAS_OP_N;
273 if (*transa == 'T' || *transa == 't') transa_ = CUBLAS_OP_T;
274
275 cublasSgemv(handle, transa_, m_, n_, alpha, a, lda_, x, incx_, beta, y, incy_);
276}
277
278
279void gpu_dgemm(cublasHandle_t handle, const char* transa, const char* transb, const int64_t m, const int64_t n, const int64_t k, const double* alpha,
280 const double* a, const int64_t lda, const double* b, const int64_t ldb, const double* beta, double* c, const int64_t ldc) {
281
282 assert (handle != NULL);
283
284 /* Convert to int */
285 int m_, n_, k_, lda_, ldb_, ldc_;
286
287 m_ = (int) m;
288 n_ = (int) n;
289 k_ = (int) k;
290 lda_ = (int) lda;
291 ldb_ = (int) ldb;
292 ldc_ = (int) ldc;
293
294 /* Check for integer overflows */
295 assert ( (int64_t) m_ == m );
296 assert ( (int64_t) n_ == n );
297 assert ( (int64_t) k_ == k );
298 assert ( (int64_t) lda_ == lda);
299 assert ( (int64_t) ldb_ == ldb);
300 assert ( (int64_t) ldc_ == ldc);
301
302 cublasOperation_t transa_ = CUBLAS_OP_N;
303 cublasOperation_t transb_ = CUBLAS_OP_N;
304 if (*transa == 'T' || *transa == 't') transa_ = CUBLAS_OP_T;
305 if (*transb == 'T' || *transb == 't') transb_ = CUBLAS_OP_T;
306
307 cublasDgemm(handle, transa_, transb_, m_, n_, k_, alpha, a, lda_, b, ldb_, beta, c, ldc_);
308}
309
310
311
312void gpu_sgemm(cublasHandle_t handle, const char* transa, const char* transb, const int64_t m, const int64_t n, const int64_t k, const float* alpha,
313 const float* a, const int64_t lda, const float* b, const int64_t ldb, const float* beta, float* c, const int64_t ldc) {
314
315 assert (handle != NULL);
316
317 /* Convert to int */
318 int m_, n_, k_, lda_, ldb_, ldc_;
319
320 m_ = (int) m;
321 n_ = (int) n;
322 k_ = (int) k;
323 lda_ = (int) lda;
324 ldb_ = (int) ldb;
325 ldc_ = (int) ldc;
326
327 /* Check for integer overflows */
328 assert ( (int64_t) m_ == m );
329 assert ( (int64_t) n_ == n );
330 assert ( (int64_t) k_ == k );
331 assert ( (int64_t) lda_ == lda);
332 assert ( (int64_t) ldb_ == ldb);
333 assert ( (int64_t) ldc_ == ldc);
334
335 cublasOperation_t transa_ = CUBLAS_OP_N;
336 cublasOperation_t transb_ = CUBLAS_OP_N;
337 if (*transa == 'T' || *transa == 't') transa_ = CUBLAS_OP_T;
338 if (*transb == 'T' || *transb == 't') transb_ = CUBLAS_OP_T;
339
340 cublasSgemm(handle, transa_, transb_, m_, n_, k_, alpha, a, lda_, b, ldb_, beta, c, ldc_);
341}
342
343
344void gpu_dgeam(cublasHandle_t handle, const char* transa, const char* transb, const int64_t m, const int64_t n, const double* alpha,
345 const double* a, const int64_t lda, const double* beta, const double* b, const int64_t ldb, double* c, const int64_t ldc) {
346 assert (handle != NULL);
347
348 /* Convert to int */
349 int m_, n_, lda_, ldb_, ldc_;
350
351 m_ = (int) m;
352 n_ = (int) n;
353 lda_ = (int) lda;
354 ldb_ = (int) ldb;
355 ldc_ = (int) ldc;
356
357 /* Check for integer overflows */
358 assert ( (int64_t) m_ == m );
359 assert ( (int64_t) n_ == n );
360 assert ( (int64_t) lda_ == lda);
361 assert ( (int64_t) ldb_ == ldb);
362 assert ( (int64_t) ldc_ == ldc);
363
364 cublasOperation_t transa_ = CUBLAS_OP_N;
365 cublasOperation_t transb_ = CUBLAS_OP_N;
366 if (*transa == 'T' || *transa == 't') transa_ = CUBLAS_OP_T;
367 if (*transb == 'T' || *transb == 't') transb_ = CUBLAS_OP_T;
368
369 cublasDgeam(handle, transa_, transb_, m_, n_, alpha, a, lda_, beta, b, ldb_, c, ldc_);
370
371}
372
373
374void gpu_sgeam(cublasHandle_t handle, const char* transa, const char* transb, const int64_t m, const int64_t n, const float* alpha,
375 const float* a, const int64_t lda, const float* beta, const float* b, const int64_t ldb, float* c, const int64_t ldc) {
376 assert (handle != NULL);
377
378 /* Convert to int */
379 int m_, n_, lda_, ldb_, ldc_;
380
381 m_ = (int) m;
382 n_ = (int) n;
383 lda_ = (int) lda;
384 ldb_ = (int) ldb;
385 ldc_ = (int) ldc;
386
387 /* Check for integer overflows */
388 assert ( (int64_t) m_ == m );
389 assert ( (int64_t) n_ == n );
390 assert ( (int64_t) lda_ == lda);
391 assert ( (int64_t) ldb_ == ldb);
392 assert ( (int64_t) ldc_ == ldc);
393
394 cublasOperation_t transa_ = CUBLAS_OP_N;
395 cublasOperation_t transb_ = CUBLAS_OP_N;
396 if (*transa == 'T' || *transa == 't') transa_ = CUBLAS_OP_T;
397 if (*transb == 'T' || *transb == 't') transb_ = CUBLAS_OP_T;
398
399 cublasSgeam(handle, transa_, transb_, m_, n_, alpha, a, lda_, beta, b, ldb_, c, ldc_);
400
401}
void gpu_ddot(cublasHandle_t handle, const int64_t n, const double *x, const int64_t incx, const double *y, const int64_t incy, double *result)
Definition gpu_nvidia.c:180
void gpu_dgemv(cublasHandle_t handle, const char *transa, const int64_t m, const int64_t n, const double *alpha, const double *a, const int64_t lda, const double *x, const int64_t incx, const double *beta, double *y, const int64_t incy)
Definition gpu_nvidia.c:222
void gpu_stream_create(cudaStream_t *ptr)
Definition gpu_nvidia.c:121
void gpu_free(void **ptr)
Definition gpu_nvidia.c:75
void gpu_copy(const void *gpu_ptr_src, void *gpu_ptr_dest, const int64_t n)
Definition gpu_nvidia.c:106
void gpu_upload(const void *cpu_ptr, void *gpu_ptr, const int64_t n)
Definition gpu_nvidia.c:82
void gpu_get_memory(size_t *free, size_t *total)
Definition gpu_nvidia.c:37
void gpu_dgeam(cublasHandle_t handle, const char *transa, const char *transb, const int64_t m, const int64_t n, const double *alpha, const double *a, const int64_t lda, const double *beta, const double *b, const int64_t ldb, double *c, const int64_t ldc)
Definition gpu_nvidia.c:344
void gpu_deallocate(void **ptr)
Definition gpu_nvidia.c:67
void gpu_set_stream(cublasHandle_t handle, cudaStream_t stream)
Definition gpu_nvidia.c:139
void gpu_blas_create(cublasHandle_t *ptr)
Definition gpu_nvidia.c:166
void gpu_sgeam(cublasHandle_t handle, const char *transa, const char *transb, const int64_t m, const int64_t n, const float *alpha, const float *a, const int64_t lda, const float *beta, const float *b, const int64_t ldb, float *c, const int64_t ldc)
Definition gpu_nvidia.c:374
void gpu_stream_synchronize(void *stream)
Definition gpu_nvidia.c:155
int gpu_ndevices()
Definition gpu_nvidia.c:14
void gpu_allocate(void **ptr, const int64_t size)
Definition gpu_nvidia.c:47
void gpu_blas_destroy(cublasHandle_t *ptr)
Definition gpu_nvidia.c:172
void gpu_stream_destroy(cudaStream_t *ptr)
Definition gpu_nvidia.c:129
void gpu_download(const void *gpu_ptr, void *cpu_ptr, const int64_t n)
Definition gpu_nvidia.c:94
void gpu_sdot(cublasHandle_t handle, const int64_t n, const float *x, const int64_t incx, const float *y, const int64_t incy, float *result)
Definition gpu_nvidia.c:199
void gpu_sgemm(cublasHandle_t handle, const char *transa, const char *transb, const int64_t m, const int64_t n, const int64_t k, const float *alpha, const float *a, const int64_t lda, const float *b, const int64_t ldb, const float *beta, float *c, const int64_t ldc)
Definition gpu_nvidia.c:312
void gpu_sgemv(cublasHandle_t handle, const char *transa, const int64_t m, const int64_t n, const float *alpha, const float *a, const int64_t lda, const float *x, const int64_t incx, const float *beta, float *y, const int64_t incy)
Definition gpu_nvidia.c:251
void gpu_synchronize()
Definition gpu_nvidia.c:147
void gpu_dgemm(cublasHandle_t handle, const char *transa, const char *transb, const int64_t m, const int64_t n, const int64_t k, const double *alpha, const double *a, const int64_t lda, const double *b, const int64_t ldb, const double *beta, double *c, const int64_t ldc)
Definition gpu_nvidia.c:279
void gpu_set_device(int32_t igpu)
Definition gpu_nvidia.c:20