Simple GPU 1.0
Fortran GPU Computing Library with transparent CPU/GPU support
Loading...
Searching...
No Matches
simple_gpu.h
Go to the documentation of this file.
1#include <stdint.h>
2
3/* =========================================================================
4 * GPU Device Management
5 * ========================================================================= */
6
7/* Returns the number of available GPU devices on the system. */
8int gpu_ndevices();
9
10/* Sets the active GPU device to the device at index i. */
11void gpu_set_device(int32_t i);
12
13/* Queries the memory status of the current GPU device.
14 * Writes the free and total memory (in bytes) to the provided pointers. */
15void gpu_get_memory(size_t* free, size_t* total);
16
17
18/* =========================================================================
19 * GPU Memory Management
20 * ========================================================================= */
21
22/* Allocates n bytes of memory on the GPU and writes the pointer to *ptr. */
23void gpu_allocate(void** ptr, const int64_t n);
24
25/* Frees GPU memory at *ptr and sets *ptr to NULL. */
26void gpu_free(void** ptr);
27
28
29/* =========================================================================
30 * GPU Memory Transfer
31 * ========================================================================= */
32
33/* Copies n bytes from CPU memory (cpu_ptr) to GPU memory (gpu_ptr). */
34void gpu_upload(const void* cpu_ptr, void* gpu_ptr, const int64_t n);
35
36/* Copies n bytes from GPU memory (gpu_ptr) to CPU memory (cpu_ptr). */
37void gpu_download(const void* gpu_ptr, void* cpu_ptr, const int64_t n);
38
39/* Copies n bytes between two GPU memory regions (device-to-device). */
40void gpu_copy(const void* gpu_ptr_src, void* gpu_ptr_dest, const int64_t n);
41
42
43/* =========================================================================
44 * GPU Stream Management
45 * ========================================================================= */
46
47/* Creates a GPU stream and writes its handle to *ptr.
48 * Streams allow asynchronous, potentially concurrent kernel execution. */
49void gpu_stream_create(void** ptr);
50
51/* Destroys the GPU stream at *ptr and sets *ptr to NULL. */
52void gpu_stream_destroy(void** ptr);
53
54/* Associates a stream with a BLAS handle so that subsequent BLAS operations
55 * are submitted to the given stream rather than the default stream. */
56void gpu_set_stream(void* handle, void* stream);
57
58/* Blocks the calling CPU thread until all pending GPU operations complete. */
59void gpu_synchronize();
60
61
62/* =========================================================================
63 * GPU BLAS Handle Management
64 * ========================================================================= */
65
66/* Creates a GPU BLAS library handle and writes it to *handle.
67 * The handle must be passed to all subsequent BLAS calls. */
68void gpu_blas_create(void** handle);
69
70/* Destroys a GPU BLAS library handle and sets *handle to NULL. */
71void gpu_blas_destroy(void** handle);
72
73
74/* =========================================================================
75 * GPU BLAS Operations
76 *
77 * Conventions:
78 * - 'inc*' parameters are element strides (1 = contiguous).
79 * - 'ld*' parameters are leading dimensions of matrices in memory.
80 * - 'trans' parameters control transposition: 'N' = no transpose,
81 * 'T' = transpose, 'C' = conjugate transpose.
82 * - All matrix/vector pointers refer to GPU (device) memory.
83 * ========================================================================= */
84
85/* Double-precision dot product: result = x^T * y
86 * Computes the inner product of two n-element vectors x and y. */
87void gpu_ddot(const void* handle, const int64_t n,
88 const double* x, const int64_t incx,
89 const double* y, const int64_t incy,
90 double* result);
91
92/* Single-precision dot product: result = x^T * y
93 * Computes the inner product of two n-element vectors x and y. */
94void gpu_sdot(const void* handle, const int64_t n,
95 const float* x, const int64_t incx,
96 const float* y, const int64_t incy,
97 float* result);
98
99/* Double-precision matrix-vector multiply: y = alpha * op(A) * x + beta * y
100 * op(A) is an m x n matrix (after applying transa), x is a vector of length n,
101 * and y is a vector of length m. */
102void gpu_dgemv(const void* handle, const char transa,
103 const int64_t m, const int64_t n,
104 const double* alpha,
105 const double* a, const int64_t lda,
106 const double* x, const int64_t incx,
107 const double* beta, double* y, const int64_t incy);
108
109/* Single-precision matrix-vector multiply: y = alpha * op(A) * x + beta * y
110 * op(A) is an m x n matrix (after applying transa), x is a vector of length n,
111 * and y is a vector of length m. */
112void gpu_sgemv(const void* handle, const char transa,
113 const int64_t m, const int64_t n,
114 const float* alpha,
115 const float* a, const int64_t lda,
116 const float* x, const int64_t incx,
117 const float* beta, float* y, const int64_t incy);
118
119/* Double-precision matrix-matrix multiply: C = alpha * op(A) * op(B) + beta * C
120 * op(A) is m x k, op(B) is k x n, and C is m x n. */
121void gpu_dgemm(const void* handle, const char transa, const char transb,
122 const int64_t m, const int64_t n, const int64_t k,
123 const double* alpha,
124 const double* a, const int64_t lda,
125 const double* b, const int64_t ldb,
126 const double* beta, double* c, const int64_t ldc);
127
128/* Single-precision matrix-matrix multiply: C = alpha * op(A) * op(B) + beta * C
129 * op(A) is m x k, op(B) is k x n, and C is m x n. */
130void gpu_sgemm(const void* handle, const char transa, const char transb,
131 const int64_t m, const int64_t n, const int64_t k,
132 const float* alpha,
133 const float* a, const int64_t lda,
134 const float* b, const int64_t ldb,
135 const float* beta, float* c, const int64_t ldc);
136
137/* Double-precision matrix addition: C = alpha * op(A) + beta * op(B)
138 * All matrices are m x n. op() applies the transposition specified by
139 * transa and transb respectively. */
140void gpu_dgeam(const void* handle, const char transa, const char transb,
141 const int64_t m, const int64_t n,
142 const double* alpha,
143 const double* a, const int64_t lda,
144 const double* beta,
145 const double* b, const int64_t ldb,
146 double* c, const int64_t ldc);
147
148/* Single-precision matrix addition: C = alpha * op(A) + beta * op(B)
149 * All matrices are m x n. op() applies the transposition specified by
150 * transa and transb respectively. */
151void gpu_sgeam(const void* handle, const char transa, const char transb,
152 const int64_t m, const int64_t n,
153 const float* alpha,
154 const float* a, const int64_t lda,
155 const float* beta,
156 const float* b, const int64_t ldb,
157 float* c, const int64_t ldc);
void gpu_set_stream(void *handle, void *stream)
Definition gpu_cpu.c:89
void gpu_blas_destroy(void **handle)
Definition gpu_cpu.c:123
void gpu_stream_create(void **ptr)
Definition gpu_cpu.c:80
void gpu_sdot(const void *handle, const int64_t n, const float *x, const int64_t incx, const float *y, const int64_t incy, float *result)
void gpu_allocate(void **ptr, const int64_t n)
Definition gpu_amd.c:50
void gpu_free(void **ptr)
Definition gpu_amd.c:82
void gpu_copy(const void *gpu_ptr_src, void *gpu_ptr_dest, const int64_t n)
Definition gpu_amd.c:113
void gpu_set_device(int32_t i)
Definition gpu_amd.c:23
void gpu_upload(const void *cpu_ptr, void *gpu_ptr, const int64_t n)
Definition gpu_amd.c:89
void gpu_get_memory(size_t *free, size_t *total)
Definition gpu_amd.c:40
void gpu_ddot(const void *handle, const int64_t n, const double *x, const int64_t incx, const double *y, const int64_t incy, double *result)
void gpu_dgeam(const void *handle, const char transa, const char transb, const int64_t m, const int64_t n, const double *alpha, const double *a, const int64_t lda, const double *beta, const double *b, const int64_t ldb, double *c, const int64_t ldc)
int gpu_ndevices()
Definition gpu_amd.c:14
void gpu_sgemm(const void *handle, const char transa, const char transb, const int64_t m, const int64_t n, const int64_t k, const float *alpha, const float *a, const int64_t lda, const float *b, const int64_t ldb, const float *beta, float *c, const int64_t ldc)
void gpu_download(const void *gpu_ptr, void *cpu_ptr, const int64_t n)
Definition gpu_amd.c:101
void gpu_dgemm(const void *handle, const char transa, const char transb, const int64_t m, const int64_t n, const int64_t k, const double *alpha, const double *a, const int64_t lda, const double *b, const int64_t ldb, const double *beta, double *c, const int64_t ldc)
void gpu_blas_create(void **handle)
Definition gpu_cpu.c:118
void gpu_dgemv(const void *handle, const char transa, const int64_t m, const int64_t n, const double *alpha, const double *a, const int64_t lda, const double *x, const int64_t incx, const double *beta, double *y, const int64_t incy)
void gpu_sgeam(const void *handle, const char transa, const char transb, const int64_t m, const int64_t n, const float *alpha, const float *a, const int64_t lda, const float *beta, const float *b, const int64_t ldb, float *c, const int64_t ldc)
void gpu_synchronize()
Definition gpu_amd.c:154
void gpu_sgemv(const void *handle, const char transa, const int64_t m, const int64_t n, const float *alpha, const float *a, const int64_t lda, const float *x, const int64_t incx, const float *beta, float *y, const int64_t incy)
void gpu_stream_destroy(void **ptr)
Definition gpu_cpu.c:84