Simple GPU 1.0
Fortran GPU Computing Library with transparent CPU/GPU support
Loading...
Searching...
No Matches
gpu_cpu.c
Go to the documentation of this file.
1#include <stdint.h>
2#include <stdio.h>
3#include <stdlib.h>
4#include <string.h>
5#include <stdbool.h>
6#include <assert.h>
7
8/* Generic functions */
9
11 return 0;
12}
13
14void gpu_set_device(int32_t i) {
15 return;
16}
17
18void gpu_get_memory(size_t* free, size_t* total) {
19 *free = 0;
20 *total = 0;
21}
22
23
24/* Allocation functions */
25
26void gpu_allocate(void** ptr, const int64_t n) {
27 if (ptr == NULL) {
28 fprintf(stderr, "gpu_allocate: ptr argument is NULL\n");
29 return;
30 }
31 *ptr = malloc((size_t) n);
32 if (*ptr == NULL) {
33 perror("gpu_allocate: malloc failed");
34 assert(*ptr != NULL);
35 }
36}
37
38void gpu_deallocate(void** ptr) {
39 if (ptr == NULL || *ptr == NULL) {
40 return;
41 }
42 free(*ptr);
43 *ptr = NULL;
44}
45
46void gpu_free(void** ptr) {
47 gpu_deallocate(ptr);
48}
49
50
51/* Memory transfer functions */
52
53void gpu_upload(const void* cpu_ptr, void* gpu_ptr, const int64_t n) {
54 if (cpu_ptr == NULL || gpu_ptr == NULL) {
55 fprintf(stderr, "gpu_upload: NULL pointer argument\n");
56 return;
57 }
58 memcpy(gpu_ptr, cpu_ptr, n);
59}
60
61void gpu_download(const void* gpu_ptr, void* cpu_ptr, const int64_t n) {
62 if (gpu_ptr == NULL || cpu_ptr == NULL) {
63 fprintf(stderr, "gpu_download: NULL pointer argument\n");
64 return;
65 }
66 memcpy(cpu_ptr, gpu_ptr, n);
67}
68
69void gpu_copy(const void* gpu_ptr_src, void* gpu_ptr_dest, const int64_t n) {
70 if (gpu_ptr_src == NULL || gpu_ptr_dest == NULL) {
71 fprintf(stderr, "gpu_copy: NULL pointer argument\n");
72 return;
73 }
74 memcpy(gpu_ptr_dest, gpu_ptr_src, n);
75}
76
77
78/* Streams */
79
80void gpu_stream_create(void** ptr) {
81 *ptr = (void*) malloc(sizeof(char));
82}
83
84void gpu_stream_destroy(void** ptr) {
85 free(*ptr);
86 *ptr = NULL;
87}
88
89void gpu_set_stream(void* handle, void* stream) {
90 return;
91}
92
94 return;
95}
96
97void gpu_stream_synchronize(void* stream) {
98 return;
99}
100
101
102/* BLAS functions */
103
110static inline bool check_int32_overflow(int64_t value, const char* name) {
111 if (value > INT32_MAX || value < INT32_MIN) {
112 fprintf(stderr, "Integer overflow: %s value %lld exceeds int32_t range\n", name, (long long)value);
113 return true;
114 }
115 return false;
116}
117
118void gpu_blas_create(void** handle) {
119 *handle = (void*) malloc(sizeof(char));
120}
121
122
123void gpu_blas_destroy(void** handle) {
124 free(*handle);
125 *handle = NULL;
126}
127
128
129double ddot_(const int32_t* n, const double* x, const int32_t* incx, const double* y, const int32_t* incy);
130
131void gpu_ddot(void* handle, const int64_t n, const double* x, const int64_t incx, const double* y, const int64_t incy, double* result) {
132 assert (handle != NULL);
133
134 /* Convert to int32_t */
135 int32_t n_, incx_, incy_;
136
137 n_ = (int32_t) n;
138 incx_ = (int32_t) incx;
139 incy_ = (int32_t) incy;
140
141 /* Check for integer overflows */
142 if (check_int32_overflow(n, "n") ||
143 check_int32_overflow(incx, "incx") ||
144 check_int32_overflow(incy, "incy")) {
145 *result = 0.0;
146 return;
147 }
148
149 *result = ddot_(&n_, x, &incx_, y, &incy_);
150}
151
152
153float sdot_(const int32_t* n, const float* x, const int32_t* incx, const float* y, const int32_t* incy);
154
155void gpu_sdot(void* handle, const int64_t n, const float* x, const int64_t incx, const float* y, const int64_t incy, float* result) {
156 assert (handle != NULL);
157
158 /* Convert to int32_t */
159 int32_t n_, incx_, incy_;
160
161 n_ = (int32_t) n;
162 incx_ = (int32_t) incx;
163 incy_ = (int32_t) incy;
164
165 /* Check for integer overflows */
166 assert ( (int64_t) n_ == n );
167 assert ( (int64_t) incx_ == incx);
168 assert ( (int64_t) incy_ == incy);
169
170 *result = sdot_(&n_, x, &incx_, y, &incy_);
171}
172
173
174void dgemv_(const char* transa, const int32_t* m, const int32_t* n, const double* alpha,
175 const double* a, const int32_t* lda, const double* x, const int32_t* incx, const double* beta, double* y, const int32_t* incy);
176
177void gpu_dgemv(void* handle, const char* transa, const int64_t m, const int64_t n, const double* alpha,
178 const double* a, const int64_t lda, const double* x, const int64_t incx, const double* beta, double* y, const int64_t incy) {
179
180 assert (handle != NULL);
181
182 /* Convert to int32_t */
183 int32_t m_, n_, lda_, incx_, incy_;
184
185 m_ = (int32_t) m;
186 n_ = (int32_t) n;
187 lda_ = (int32_t) lda;
188 incx_ = (int32_t) incx;
189 incy_ = (int32_t) incy;
190
191 /* Check for integer overflows */
192 assert ( (int64_t) m_ == m );
193 assert ( (int64_t) n_ == n );
194 assert ( (int64_t) lda_ == lda );
195 assert ( (int64_t) incx_ == incx);
196 assert ( (int64_t) incy_ == incy);
197
198 dgemv_(transa, &m_, &n_, alpha, a, &lda_, x, &incx_, beta, y, &incy_);
199}
200
201
202void sgemv_(const char* transa, const int32_t* m, const int32_t* n, const float* alpha,
203 const float* a, const int32_t* lda, const float* x, const int32_t* incx, const float* beta, float* y, const int32_t* incy);
204
205void gpu_sgemv(void* handle, const char* transa, const int64_t m, const int64_t n, const float* alpha,
206 const float* a, const int64_t lda, const float* x, const int64_t incx, const float* beta, float* y, const int64_t incy) {
207
208 assert (handle != NULL);
209
210 /* Convert to int32_t */
211 int32_t m_, n_, lda_, incx_, incy_;
212
213 m_ = (int32_t) m;
214 n_ = (int32_t) n;
215 lda_ = (int32_t) lda;
216 incx_ = (int32_t) incx;
217 incy_ = (int32_t) incy;
218
219 /* Check for integer overflows */
220 assert ( (int64_t) m_ == m );
221 assert ( (int64_t) n_ == n );
222 assert ( (int64_t) lda_ == lda );
223 assert ( (int64_t) incx_ == incx);
224 assert ( (int64_t) incy_ == incy);
225
226 sgemv_(transa, &m_, &n_, alpha, a, &lda_, x, &incx_, beta, y, &incy_);
227}
228
229
230void dgemm_(const char* transa, const char* transb, const int32_t* m, const int32_t* n, const int32_t* k, const double* alpha,
231 const double* a, const int32_t* lda, const double* b, const int32_t* ldb, const double* beta, double* c, const int32_t* ldc);
232
233void gpu_dgemm(void* handle, const char* transa, const char* transb, const int64_t m, const int64_t n, const int64_t k, const double* alpha,
234 const double* a, const int64_t lda, const double* b, const int64_t ldb, const double* beta, double* c, const int64_t ldc) {
235
236 assert (handle != NULL);
237
238 /* Convert to int32_t */
239 int32_t m_, n_, k_, lda_, ldb_, ldc_;
240
241 m_ = (int32_t) m;
242 n_ = (int32_t) n;
243 k_ = (int32_t) k;
244 lda_ = (int32_t) lda;
245 ldb_ = (int32_t) ldb;
246 ldc_ = (int32_t) ldc;
247
248 /* Check for integer overflows */
249 assert ( (int64_t) m_ == m );
250 assert ( (int64_t) n_ == n );
251 assert ( (int64_t) k_ == k );
252 assert ( (int64_t) lda_ == lda);
253 assert ( (int64_t) ldb_ == ldb);
254 assert ( (int64_t) ldc_ == ldc);
255
256 dgemm_(transa, transb, &m_, &n_, &k_, alpha, a, &lda_, b, &ldb_, beta, c, &ldc_);
257}
258
259
260
261void sgemm_(const char* transa, const char* transb, const int32_t* m, const int32_t* n, const int32_t* k, const float* alpha,
262 const float* a, const int32_t* lda, const float* b, const int32_t* ldb, const float* beta, float* c, const int32_t* ldc);
263
264void gpu_sgemm(void* handle, const char* transa, const char* transb, const int64_t m, const int64_t n, const int64_t k, const float* alpha,
265 const float* a, const int64_t lda, const float* b, const int64_t ldb, const float* beta, float* c, const int64_t ldc) {
266
267 assert (handle != NULL);
268
269 /* Convert to int32_t */
270 int32_t m_, n_, k_, lda_, ldb_, ldc_;
271
272 m_ = (int32_t) m;
273 n_ = (int32_t) n;
274 k_ = (int32_t) k;
275 lda_ = (int32_t) lda;
276 ldb_ = (int32_t) ldb;
277 ldc_ = (int32_t) ldc;
278
279 /* Check for integer overflows */
280 assert ( (int64_t) m_ == m );
281 assert ( (int64_t) n_ == n );
282 assert ( (int64_t) k_ == k );
283 assert ( (int64_t) lda_ == lda);
284 assert ( (int64_t) ldb_ == ldb);
285 assert ( (int64_t) ldc_ == ldc);
286
287 sgemm_(transa, transb, &m_, &n_, &k_, alpha, a, &lda_, b, &ldb_, beta, c, &ldc_);
288}
289
290
291void gpu_dgeam(void* handle, const char* transa, const char* transb, const int64_t m, const int64_t n, const double* alpha,
292 const double* a, const int64_t lda, const double* beta, const double* b, const int64_t ldb, double* c, const int64_t ldc) {
293 assert (handle != NULL);
294
295 if ( (*transa == 'N' && *transb == 'N') ||
296 (*transa == 'n' && *transb == 'N') ||
297 (*transa == 'N' && *transb == 'n') ||
298 (*transa == 'n' && *transb == 'n') ) {
299
300 if (*alpha == 0.) {
301
302 for (int64_t j=0 ; j<n ; ++j) {
303 for (int64_t i=0 ; i<m ; ++i) {
304 c[j*ldc+i] = *beta * b[j*ldb+i];
305 }
306 }
307
308 } else if (*beta == 0.) {
309
310 for (int64_t j=0 ; j<n ; ++j) {
311 for (int64_t i=0 ; i<m ; ++i) {
312 c[j*ldc+i] = *alpha * a[j*lda+i];
313 }
314 }
315
316 } else {
317
318 for (int64_t j=0 ; j<n ; ++j) {
319 for (int64_t i=0 ; i<m ; ++i) {
320 c[j*ldc+i] = *alpha * a[j*lda+i] + *beta * b[j*ldb+i];
321 }
322 }
323
324 }
325
326 } else if ( (*transa == 'N' && *transb == 'T') ||
327 (*transa == 'n' && *transb == 'T') ||
328 (*transa == 'N' && *transb == 't') ||
329 (*transa == 'n' && *transb == 't') ) {
330
331 if (*alpha == 0.) {
332
333 for (int64_t j=0 ; j<n ; ++j) {
334 for (int64_t i=0 ; i<m ; ++i) {
335 c[j*ldc+i] = *beta * b[i*ldb+j];
336 }
337 }
338
339 } else if (*beta == 0.) {
340
341 for (int64_t j=0 ; j<n ; ++j) {
342 for (int64_t i=0 ; i<m ; ++i) {
343 c[j*ldc+i] = *alpha * a[j*lda+i];
344 }
345 }
346
347 } else {
348
349 for (int64_t j=0 ; j<n ; ++j) {
350 for (int64_t i=0 ; i<m ; ++i) {
351 c[j*ldc+i] = *alpha * a[j*lda+i] + *beta * b[i*ldb+j];
352 }
353 }
354
355 }
356
357 } else if ( (*transa == 'T' && *transb == 'N') ||
358 (*transa == 't' && *transb == 'N') ||
359 (*transa == 'T' && *transb == 'n') ||
360 (*transa == 't' && *transb == 'n') ) {
361
362 if (*alpha == 0.) {
363
364 for (int64_t j=0 ; j<n ; ++j) {
365 for (int64_t i=0 ; i<m ; ++i) {
366 c[j*ldc+i] = *beta * b[j*ldb+i];
367 }
368 }
369
370 } else if (*beta == 0.) {
371
372 for (int64_t j=0 ; j<n ; ++j) {
373 for (int64_t i=0 ; i<m ; ++i) {
374 c[j*ldc+i] = *alpha * a[i*lda+j];
375 }
376 }
377
378 } else {
379
380 for (int64_t j=0 ; j<n ; ++j) {
381 for (int64_t i=0 ; i<m ; ++i) {
382 c[j*ldc+i] = *alpha * a[i*lda+j] + *beta * b[j*ldb+i];
383 }
384 }
385
386 }
387
388 } else if ( (*transa == 'T' && *transb == 'T') ||
389 (*transa == 't' && *transb == 'T') ||
390 (*transa == 'T' && *transb == 't') ||
391 (*transa == 't' && *transb == 't') ) {
392
393 if (*alpha == 0.) {
394
395 for (int64_t j=0 ; j<n ; ++j) {
396 for (int64_t i=0 ; i<m ; ++i) {
397 c[j*ldc+i] = *beta * b[i*ldb+j];
398 }
399 }
400
401 } else if (*beta == 0.) {
402
403 for (int64_t j=0 ; j<n ; ++j) {
404 for (int64_t i=0 ; i<m ; ++i) {
405 c[j*ldc+i] = *alpha * a[i*lda+j];
406 }
407 }
408
409 } else {
410
411 for (int64_t j=0 ; j<n ; ++j) {
412 for (int64_t i=0 ; i<m ; ++i) {
413 c[j*ldc+i] = *alpha * a[i*lda+j] + *beta * b[i*ldb+j];
414 }
415 }
416
417 }
418
419 }
420}
421
422
423void gpu_sgeam(void* handle, const char* transa, const char* transb, const int64_t m, const int64_t n, const float* alpha,
424 const float* a, const int64_t lda, const float* beta, const float* b, const int64_t ldb, float* c, const int64_t ldc) {
425 assert (handle != NULL);
426
427 if ( (*transa == 'N' && *transb == 'N') ||
428 (*transa == 'n' && *transb == 'N') ||
429 (*transa == 'N' && *transb == 'n') ||
430 (*transa == 'n' && *transb == 'n') ) {
431
432 if (*alpha == 0.) {
433
434 for (int64_t j=0 ; j<n ; ++j) {
435 for (int64_t i=0 ; i<m ; ++i) {
436 c[j*ldc+i] = *beta * b[j*ldb+i];
437 }
438 }
439
440 } else if (*beta == 0.) {
441
442 for (int64_t j=0 ; j<n ; ++j) {
443 for (int64_t i=0 ; i<m ; ++i) {
444 c[j*ldc+i] = *alpha * a[j*lda+i];
445 }
446 }
447
448 } else {
449
450 for (int64_t j=0 ; j<n ; ++j) {
451 for (int64_t i=0 ; i<m ; ++i) {
452 c[j*ldc+i] = *alpha * a[j*lda+i] + *beta * b[j*ldb+i];
453 }
454 }
455
456 }
457
458 } else if ( (*transa == 'N' && *transb == 'T') ||
459 (*transa == 'n' && *transb == 'T') ||
460 (*transa == 'N' && *transb == 't') ||
461 (*transa == 'n' && *transb == 't') ) {
462
463 if (*alpha == 0.) {
464
465 for (int64_t j=0 ; j<n ; ++j) {
466 for (int64_t i=0 ; i<m ; ++i) {
467 c[j*ldc+i] = *beta * b[i*ldb+j];
468 }
469 }
470
471 } else if (*beta == 0.) {
472
473 for (int64_t j=0 ; j<n ; ++j) {
474 for (int64_t i=0 ; i<m ; ++i) {
475 c[j*ldc+i] = *alpha * a[j*lda+i];
476 }
477 }
478
479 } else {
480
481 for (int64_t j=0 ; j<n ; ++j) {
482 for (int64_t i=0 ; i<m ; ++i) {
483 c[j*ldc+i] = *alpha * a[j*lda+i] + *beta * b[i*ldb+j];
484 }
485 }
486
487 }
488
489 } else if ( (*transa == 'T' && *transb == 'N') ||
490 (*transa == 't' && *transb == 'N') ||
491 (*transa == 'T' && *transb == 'n') ||
492 (*transa == 't' && *transb == 'n') ) {
493
494 if (*alpha == 0.) {
495
496 for (int64_t j=0 ; j<n ; ++j) {
497 for (int64_t i=0 ; i<m ; ++i) {
498 c[j*ldc+i] = *beta * b[j*ldb+i];
499 }
500 }
501
502 } else if (*beta == 0.) {
503
504 for (int64_t j=0 ; j<n ; ++j) {
505 for (int64_t i=0 ; i<m ; ++i) {
506 c[j*ldc+i] = *alpha * a[i*lda+j];
507 }
508 }
509
510 } else {
511
512 for (int64_t j=0 ; j<n ; ++j) {
513 for (int64_t i=0 ; i<m ; ++i) {
514 c[j*ldc+i] = *alpha * a[i*lda+j] + *beta * b[j*ldb+i];
515 }
516 }
517
518 }
519
520 } else if ( (*transa == 'T' && *transb == 'T') ||
521 (*transa == 't' && *transb == 'T') ||
522 (*transa == 'T' && *transb == 't') ||
523 (*transa == 't' && *transb == 't') ) {
524
525 if (*alpha == 0.) {
526
527 for (int64_t j=0 ; j<n ; ++j) {
528 for (int64_t i=0 ; i<m ; ++i) {
529 c[j*ldc+i] = *beta * b[i*ldb+j];
530 }
531 }
532
533 } else if (*beta == 0.) {
534
535 for (int64_t j=0 ; j<n ; ++j) {
536 for (int64_t i=0 ; i<m ; ++i) {
537 c[j*ldc+i] = *alpha * a[i*lda+j];
538 }
539 }
540
541 } else {
542
543 for (int64_t j=0 ; j<n ; ++j) {
544 for (int64_t i=0 ; i<m ; ++i) {
545 c[j*ldc+i] = *alpha * a[i*lda+j] + *beta * b[i*ldb+j];
546 }
547 }
548
549 }
550
551 }
552}
void gpu_set_stream(void *handle, void *stream)
Definition gpu_cpu.c:89
void gpu_blas_destroy(void **handle)
Definition gpu_cpu.c:123
void gpu_stream_create(void **ptr)
Definition gpu_cpu.c:80
void gpu_allocate(void **ptr, const int64_t n)
Definition gpu_cpu.c:26
void gpu_sgeam(void *handle, const char *transa, const char *transb, const int64_t m, const int64_t n, const float *alpha, const float *a, const int64_t lda, const float *beta, const float *b, const int64_t ldb, float *c, const int64_t ldc)
Definition gpu_cpu.c:423
void dgemm_(const char *transa, const char *transb, const int32_t *m, const int32_t *n, const int32_t *k, const double *alpha, const double *a, const int32_t *lda, const double *b, const int32_t *ldb, const double *beta, double *c, const int32_t *ldc)
void gpu_free(void **ptr)
Definition gpu_cpu.c:46
void gpu_copy(const void *gpu_ptr_src, void *gpu_ptr_dest, const int64_t n)
Definition gpu_cpu.c:69
double ddot_(const int32_t *n, const double *x, const int32_t *incx, const double *y, const int32_t *incy)
void gpu_set_device(int32_t i)
Definition gpu_cpu.c:14
void gpu_upload(const void *cpu_ptr, void *gpu_ptr, const int64_t n)
Definition gpu_cpu.c:53
void gpu_dgemm(void *handle, const char *transa, const char *transb, const int64_t m, const int64_t n, const int64_t k, const double *alpha, const double *a, const int64_t lda, const double *b, const int64_t ldb, const double *beta, double *c, const int64_t ldc)
Definition gpu_cpu.c:233
void gpu_get_memory(size_t *free, size_t *total)
Definition gpu_cpu.c:18
void gpu_sgemv(void *handle, const char *transa, const int64_t m, const int64_t n, const float *alpha, const float *a, const int64_t lda, const float *x, const int64_t incx, const float *beta, float *y, const int64_t incy)
Definition gpu_cpu.c:205
void gpu_sdot(void *handle, const int64_t n, const float *x, const int64_t incx, const float *y, const int64_t incy, float *result)
Definition gpu_cpu.c:155
void gpu_deallocate(void **ptr)
Definition gpu_cpu.c:38
void gpu_sgemm(void *handle, const char *transa, const char *transb, const int64_t m, const int64_t n, const int64_t k, const float *alpha, const float *a, const int64_t lda, const float *b, const int64_t ldb, const float *beta, float *c, const int64_t ldc)
Definition gpu_cpu.c:264
void gpu_dgeam(void *handle, const char *transa, const char *transb, const int64_t m, const int64_t n, const double *alpha, const double *a, const int64_t lda, const double *beta, const double *b, const int64_t ldb, double *c, const int64_t ldc)
Definition gpu_cpu.c:291
void gpu_stream_synchronize(void *stream)
Definition gpu_cpu.c:97
int gpu_ndevices()
Definition gpu_cpu.c:10
void gpu_dgemv(void *handle, const char *transa, const int64_t m, const int64_t n, const double *alpha, const double *a, const int64_t lda, const double *x, const int64_t incx, const double *beta, double *y, const int64_t incy)
Definition gpu_cpu.c:177
void sgemv_(const char *transa, const int32_t *m, const int32_t *n, const float *alpha, const float *a, const int32_t *lda, const float *x, const int32_t *incx, const float *beta, float *y, const int32_t *incy)
void gpu_download(const void *gpu_ptr, void *cpu_ptr, const int64_t n)
Definition gpu_cpu.c:61
void gpu_ddot(void *handle, const int64_t n, const double *x, const int64_t incx, const double *y, const int64_t incy, double *result)
Definition gpu_cpu.c:131
void gpu_blas_create(void **handle)
Definition gpu_cpu.c:118
float sdot_(const int32_t *n, const float *x, const int32_t *incx, const float *y, const int32_t *incy)
void gpu_synchronize()
Definition gpu_cpu.c:93
static bool check_int32_overflow(int64_t value, const char *name)
Check if an int64_t value can be safely converted to int32_t.
Definition gpu_cpu.c:110
void gpu_stream_destroy(void **ptr)
Definition gpu_cpu.c:84
void sgemm_(const char *transa, const char *transb, const int32_t *m, const int32_t *n, const int32_t *k, const float *alpha, const float *a, const int32_t *lda, const float *b, const int32_t *ldb, const float *beta, float *c, const int32_t *ldc)
void dgemv_(const char *transa, const int32_t *m, const int32_t *n, const double *alpha, const double *a, const int32_t *lda, const double *x, const int32_t *incx, const double *beta, double *y, const int32_t *incy)