18 use,
intrinsic :: iso_c_binding
48 double precision,
pointer :: f(:)
62 double precision,
pointer :: f(:,:)
68 double precision,
pointer :: f(:,:,:)
74 double precision,
pointer :: f(:,:,:,:)
80 double precision,
pointer :: f(:,:,:,:,:)
86 double precision,
pointer :: f(:,:,:,:,:,:)
102 real,
pointer :: f(:,:)
108 real,
pointer :: f(:,:,:)
114 real,
pointer :: f(:,:,:,:)
120 real,
pointer :: f(:,:,:,:,:)
126 real,
pointer :: f(:,:,:,:,:,:)
179 integer(c_int32_t),
value :: id
185 integer(c_size_t) :: free, total
192 integer(c_int64_t),
value :: n
202 subroutine gpu_upload_c(cpu_ptr, gpu_ptr, n) bind(C, name='gpu_upload')
204 type(c_ptr),
value :: cpu_ptr
205 type(c_ptr),
value :: gpu_ptr
206 integer(c_int64_t),
value :: n
211 type(c_ptr),
value :: gpu_ptr
212 type(c_ptr),
value :: cpu_ptr
213 integer(c_int64_t),
value :: n
216 subroutine gpu_copy_c(gpu_ptr_src, gpu_ptr_dest, n) bind(C, name='gpu_copy')
218 type(c_ptr),
value :: gpu_ptr_src
219 type(c_ptr),
value :: gpu_ptr_dest
220 integer(c_int64_t),
value :: n
225 type(c_ptr) :: stream
230 type(c_ptr) :: stream
235 type(c_ptr),
value :: handle, stream
240 type(c_ptr),
value :: stream
249 type(c_ptr) :: handle
254 type(c_ptr) :: handle
257 subroutine gpu_ddot_c(handle, n, dx, incx, dy, incy, res) bind(C, name='gpu_ddot')
259 type(c_ptr),
value,
intent(in) :: handle
260 integer(c_int64_t),
value :: n, incx, incy
261 type(c_ptr),
value :: dx, dy
262 real(c_double),
intent(out) :: res
265 subroutine gpu_sdot_c(handle, n, dx, incx, dy, incy, res) bind(C, name='gpu_sdot')
267 type(c_ptr),
value,
intent(in) :: handle
268 integer(c_int64_t),
value :: n, incx, incy
269 type(c_ptr),
intent(in),
value :: dx, dy
270 real(c_float),
intent(out) :: res
273 subroutine gpu_dgeam_c(handle, transa, transb, m, n, alpha, a, lda, beta, &
274 b, ldb, c, ldc) bind(C, name='gpu_dgeam')
276 type(c_ptr),
value,
intent(in) :: handle
277 character(c_char),
intent(in) :: transa, transb
278 integer(c_int64_t),
intent(in),
value :: m, n, lda, ldb, ldc
279 real(c_double),
intent(in) :: alpha, beta
280 real(c_double) :: a, b, c
283 subroutine gpu_sgeam_c(handle, transa, transb, m, n, alpha, a, lda, beta, &
284 b, ldb, c, ldc) bind(C, name='gpu_sgeam')
286 type(c_ptr),
value,
intent(in) :: handle
287 character(c_char),
intent(in) :: transa, transb
288 integer(c_int64_t),
intent(in),
value :: m, n, lda, ldb, ldc
289 real(c_float),
intent(in) :: alpha, beta
290 real(c_float) :: a, b, c
294 x, incx, beta, y, incy) bind(C, name='gpu_dgemv')
296 type(c_ptr),
value,
intent(in) :: handle
297 character(c_char),
intent(in) :: transa
298 integer(c_int64_t),
intent(in),
value :: m, n, lda, incx, incy
299 real(c_double),
intent(in) :: alpha, beta
300 real(c_double) :: a, x, y
304 x, incx, beta, y, incy) bind(C, name='gpu_sgemv')
306 type(c_ptr),
value,
intent(in) :: handle
307 character(c_char),
intent(in) :: transa
308 integer(c_int64_t),
intent(in),
value :: m, n, lda, incx, incy
309 real(c_float),
intent(in) :: alpha, beta
310 real(c_float) :: a, x, y
314 subroutine gpu_dgemm_c(handle, transa, transb, m, n, k, alpha, a, lda, &
315 b, ldb, beta, c, ldc) bind(C, name='gpu_dgemm')
317 type(c_ptr),
value,
intent(in) :: handle
318 character(c_char),
intent(in) :: transa, transb
319 integer(c_int64_t),
intent(in),
value :: m, n, k, lda, ldb, ldc
320 real(c_double),
intent(in) :: alpha, beta
321 real(c_double) :: a, b, c
324 subroutine gpu_sgemm_c(handle, transa, transb, m, n, k, alpha, a, lda, &
325 b, ldb, beta, c, ldc) bind(C, name='gpu_sgemm')
327 type(c_ptr),
value,
intent(in) :: handle
328 character(c_char),
intent(in) :: transa, transb
329 integer(c_int64_t),
intent(in),
value :: m, n, k, lda, ldb, ldc
330 real(c_float),
intent(in) :: alpha, beta
331 real(c_float) :: a, b, c
372 procedure gpu_allocate_double1 &
373 ,gpu_allocate_double2 &
374 ,gpu_allocate_double3 &
375 ,gpu_allocate_double4 &
376 ,gpu_allocate_double5 &
377 ,gpu_allocate_double6 &
378 ,gpu_allocate_double1_64 &
379 ,gpu_allocate_double2_64 &
380 ,gpu_allocate_double3_64 &
381 ,gpu_allocate_double4_64 &
382 ,gpu_allocate_double5_64 &
383 ,gpu_allocate_double6_64 &
384 ,gpu_allocate_real1 &
385 ,gpu_allocate_real2 &
386 ,gpu_allocate_real3 &
387 ,gpu_allocate_real4 &
388 ,gpu_allocate_real5 &
389 ,gpu_allocate_real6 &
390 ,gpu_allocate_real1_64 &
391 ,gpu_allocate_real2_64 &
392 ,gpu_allocate_real3_64 &
393 ,gpu_allocate_real4_64 &
394 ,gpu_allocate_real5_64 &
395 ,gpu_allocate_real6_64
409 procedure gpu_deallocate_double1 &
410 ,gpu_deallocate_double2 &
411 ,gpu_deallocate_double3 &
412 ,gpu_deallocate_double4 &
413 ,gpu_deallocate_double5 &
414 ,gpu_deallocate_double6 &
415 ,gpu_deallocate_real1 &
416 ,gpu_deallocate_real2 &
417 ,gpu_deallocate_real3 &
418 ,gpu_deallocate_real4 &
419 ,gpu_deallocate_real5 &
420 ,gpu_deallocate_real6
439 procedure gpu_upload_double0 &
440 ,gpu_upload_double1 &
441 ,gpu_upload_double2 &
442 ,gpu_upload_double3 &
443 ,gpu_upload_double4 &
444 ,gpu_upload_double5 &
445 ,gpu_upload_double6 &
469 procedure gpu_download_double0 &
470 ,gpu_download_double1 &
471 ,gpu_download_double2 &
472 ,gpu_download_double3 &
473 ,gpu_download_double4 &
474 ,gpu_download_double5 &
475 ,gpu_download_double6 &
476 ,gpu_download_real0 &
477 ,gpu_download_real1 &
478 ,gpu_download_real2 &
479 ,gpu_download_real3 &
480 ,gpu_download_real4 &
481 ,gpu_download_real5 &
501 procedure gpu_copy_double0 &
533 integer,
intent(in) :: s
540 call c_f_pointer(ptr%c, ptr%f, (/ s /))
546 integer,
intent(in) :: s1, s2
547 integer*8 :: s1_8, s2_8, n
551 n = s1_8 * s2_8 * 8_8
554 call c_f_pointer(ptr%c, ptr%f, (/ s1, s2 /))
560 integer,
intent(in) :: s1, s2, s3
561 integer*8 :: s1_8, s2_8, s3_8, n
566 n = s1_8 * s2_8 * s3_8 * 8_8
569 call c_f_pointer(ptr%c, ptr%f, (/ s1, s2, s3 /))
575 integer,
intent(in) :: s1, s2, s3, s4
576 integer*8 :: s1_8, s2_8, s3_8, s4_8, n
582 n = s1_8 * s2_8 * s3_8 * s4_8 * 8_8
585 call c_f_pointer(ptr%c, ptr%f, (/ s1, s2, s3, s4 /))
591 integer,
intent(in) :: s1, s2, s3, s4, s5
592 integer*8 :: s1_8, s2_8, s3_8, s4_8, s5_8, n
599 n = s1_8 * s2_8 * s3_8 * s4_8 * s5_8 * 8_8
602 call c_f_pointer(ptr%c, ptr%f, (/ s1, s2, s3, s4, s5 /))
608 integer,
intent(in) :: s1, s2, s3, s4, s5, s6
609 integer*8 :: s1_8, s2_8, s3_8, s4_8, s5_8, s6_8, n
617 n = s1_8 * s2_8 * s3_8 * s4_8 * s5_8 * s6_8 * 8_8
620 call c_f_pointer(ptr%c, ptr%f, (/ s1, s2, s3, s4, s5, s6 /))
627 integer*8,
intent(in) :: s
630 call c_f_pointer(ptr%c, ptr%f, (/ s /))
636 integer*8,
intent(in) :: s1, s2
639 call c_f_pointer(ptr%c, ptr%f, (/ s1, s2 /))
645 integer*8,
intent(in) :: s1, s2, s3
648 call c_f_pointer(ptr%c, ptr%f, (/ s1, s2, s3 /))
654 integer*8,
intent(in) :: s1, s2, s3, s4
657 call c_f_pointer(ptr%c, ptr%f, (/ s1, s2, s3, s4 /))
663 integer*8,
intent(in) :: s1, s2, s3, s4, s5
666 call c_f_pointer(ptr%c, ptr%f, (/ s1, s2, s3, s4, s5 /))
672 integer*8,
intent(in) :: s1, s2, s3, s4, s5, s6
675 call c_f_pointer(ptr%c, ptr%f, (/ s1, s2, s3, s4, s5, s6 /))
681 integer,
intent(in) :: s
688 call c_f_pointer(ptr%c, ptr%f, (/ s /))
694 integer,
intent(in) :: s1, s2
695 integer*8 :: s1_8, s2_8, n
699 n = s1_8 * s2_8 * 4_8
702 call c_f_pointer(ptr%c, ptr%f, (/ s1, s2 /))
708 integer,
intent(in) :: s1, s2, s3
709 integer*8 :: s1_8, s2_8, s3_8, n
714 n = s1_8 * s2_8 * s3_8 * 4_8
717 call c_f_pointer(ptr%c, ptr%f, (/ s1, s2, s3 /))
723 integer,
intent(in) :: s1, s2, s3, s4
724 integer*8 :: s1_8, s2_8, s3_8, s4_8, n
730 n = s1_8 * s2_8 * s3_8 * s4_8 * 4_8
733 call c_f_pointer(ptr%c, ptr%f, (/ s1, s2, s3, s4 /))
739 integer,
intent(in) :: s1, s2, s3, s4, s5
740 integer*8 :: s1_8, s2_8, s3_8, s4_8, s5_8, n
747 n = s1_8 * s2_8 * s3_8 * s4_8 * s5_8 * 4_8
750 call c_f_pointer(ptr%c, ptr%f, (/ s1, s2, s3, s4, s5 /))
756 integer,
intent(in) :: s1, s2, s3, s4, s5, s6
757 integer*8 :: s1_8, s2_8, s3_8, s4_8, s5_8, s6_8, n
765 n = s1_8 * s2_8 * s3_8 * s4_8 * s5_8 * s6_8 * 4_8
768 call c_f_pointer(ptr%c, ptr%f, (/ s1, s2, s3, s4, s5, s6 /))
775 integer*8,
intent(in) :: s
778 call c_f_pointer(ptr%c, ptr%f, (/ s /))
784 integer*8,
intent(in) :: s1, s2
787 call c_f_pointer(ptr%c, ptr%f, (/ s1, s2 /))
793 integer*8,
intent(in) :: s1, s2, s3
796 call c_f_pointer(ptr%c, ptr%f, (/ s1, s2, s3 /))
802 integer*8,
intent(in) :: s1, s2, s3, s4
805 call c_f_pointer(ptr%c, ptr%f, (/ s1, s2, s3, s4 /))
811 integer*8,
intent(in) :: s1, s2, s3, s4, s5
814 call c_f_pointer(ptr%c, ptr%f, (/ s1, s2, s3, s4, s5 /))
820 integer*8,
intent(in) :: s1, s2, s3, s4, s5, s6
823 call c_f_pointer(ptr%c, ptr%f, (/ s1, s2, s3, s4, s5, s6 /))
920 double precision,
target,
intent(in) :: cpu_ptr
921 double precision,
target,
intent(in) :: gpu_ptr
922 integer,
intent(in) :: n
923 call gpu_upload_c(c_loc(cpu_ptr), c_loc(gpu_ptr), 8_8*n)
928 double precision,
target,
intent(in) :: cpu_ptr(*)
930 call gpu_upload_c(c_loc(cpu_ptr), gpu_ptr%c, 8_8*
size(gpu_ptr%f))
935 double precision,
target,
intent(in) :: cpu_ptr(:,:)
937 call gpu_upload_c(c_loc(cpu_ptr), gpu_ptr%c, product(shape(gpu_ptr%f)*1_8)*8_8)
942 double precision,
target,
intent(in) :: cpu_ptr(:,:,:)
944 call gpu_upload_c(c_loc(cpu_ptr), gpu_ptr%c, product(shape(gpu_ptr%f)*1_8)*8_8)
949 double precision,
target,
intent(in) :: cpu_ptr(:,:,:,:)
951 call gpu_upload_c(c_loc(cpu_ptr), gpu_ptr%c, product(shape(gpu_ptr%f)*1_8)*8_8)
956 double precision,
target,
intent(in) :: cpu_ptr(:,:,:,:,:)
958 call gpu_upload_c(c_loc(cpu_ptr), gpu_ptr%c, product(shape(gpu_ptr%f)*1_8)*8_8)
963 double precision,
target,
intent(in) :: cpu_ptr(:,:,:,:,:,:)
965 call gpu_upload_c(c_loc(cpu_ptr), gpu_ptr%c, product(shape(gpu_ptr%f)*1_8)*8_8)
971 real,
target,
intent(in) :: cpu_ptr
972 real,
target,
intent(in) :: gpu_ptr
973 integer,
intent(in) :: n
974 call gpu_upload_c(c_loc(cpu_ptr), c_loc(gpu_ptr), 4_8*n)
979 real,
target,
intent(in) :: cpu_ptr(*)
981 call gpu_upload_c(c_loc(cpu_ptr), gpu_ptr%c, 4_8*
size(gpu_ptr%f))
986 real,
target,
intent(in) :: cpu_ptr(:,:)
988 call gpu_upload_c(c_loc(cpu_ptr), gpu_ptr%c, product(shape(gpu_ptr%f)*1_8)*4_8)
993 real,
target,
intent(in) :: cpu_ptr(:,:,:)
995 call gpu_upload_c(c_loc(cpu_ptr), gpu_ptr%c, product(shape(gpu_ptr%f)*1_8)*4_8)
1000 real,
target,
intent(in) :: cpu_ptr(:,:,:,:)
1002 call gpu_upload_c(c_loc(cpu_ptr), gpu_ptr%c, product(shape(gpu_ptr%f)*1_8)*4_8)
1007 real,
target,
intent(in) :: cpu_ptr(:,:,:,:,:)
1009 call gpu_upload_c(c_loc(cpu_ptr), gpu_ptr%c, product(shape(gpu_ptr%f)*1_8)*4_8)
1014 real,
target,
intent(in) :: cpu_ptr(:,:,:,:,:,:)
1016 call gpu_upload_c(c_loc(cpu_ptr), gpu_ptr%c, product(shape(gpu_ptr%f)*1_8)*4_8)
1025 double precision,
target,
intent(in) :: gpu_ptr
1026 double precision,
target,
intent(in) :: cpu_ptr
1027 integer,
intent(in) :: n
1034 double precision,
target,
intent(in) :: cpu_ptr(:)
1035 call gpu_download_c(gpu_ptr%c, c_loc(cpu_ptr), 8_8*
size(gpu_ptr%f))
1041 double precision,
target,
intent(in) :: cpu_ptr(:,:)
1042 call gpu_download_c(gpu_ptr%c, c_loc(cpu_ptr), 8_8*product(shape(gpu_ptr%f)*1_8))
1048 double precision,
target,
intent(in) :: cpu_ptr(:,:,:)
1049 call gpu_download_c(gpu_ptr%c, c_loc(cpu_ptr), 8_8*product(shape(gpu_ptr%f)*1_8))
1055 double precision,
target,
intent(in) :: cpu_ptr(:,:,:,:)
1056 call gpu_download_c(gpu_ptr%c, c_loc(cpu_ptr), 8_8*product(shape(gpu_ptr%f)*1_8))
1062 double precision,
target,
intent(in) :: cpu_ptr(:,:,:,:,:)
1063 call gpu_download_c(gpu_ptr%c, c_loc(cpu_ptr), 8_8*product(shape(gpu_ptr%f)*1_8))
1069 double precision,
target,
intent(in) :: cpu_ptr(:,:,:,:,:,:)
1070 call gpu_download_c(gpu_ptr%c, c_loc(cpu_ptr), 8_8*product(shape(gpu_ptr%f)*1_8))
1075 real,
target,
intent(in) :: gpu_ptr
1076 real,
target,
intent(in) :: cpu_ptr
1077 integer,
intent(in) :: n
1084 real,
target,
intent(in) :: cpu_ptr(:)
1085 call gpu_download_c(gpu_ptr%c, c_loc(cpu_ptr), 4_8*
size(gpu_ptr%f))
1091 real,
target,
intent(in) :: cpu_ptr(:,:)
1092 call gpu_download_c(gpu_ptr%c, c_loc(cpu_ptr), 4_8*product(shape(gpu_ptr%f)*1_8))
1098 real,
target,
intent(in) :: cpu_ptr(:,:,:)
1099 call gpu_download_c(gpu_ptr%c, c_loc(cpu_ptr), 4_8*product(shape(gpu_ptr%f)*1_8))
1105 real,
target,
intent(in) :: cpu_ptr(:,:,:,:)
1106 call gpu_download_c(gpu_ptr%c, c_loc(cpu_ptr), 4_8*product(shape(gpu_ptr%f)*1_8))
1112 real,
target,
intent(in) :: cpu_ptr(:,:,:,:,:)
1113 call gpu_download_c(gpu_ptr%c, c_loc(cpu_ptr), 4_8*product(shape(gpu_ptr%f)*1_8))
1119 real,
target,
intent(in) :: cpu_ptr(:,:,:,:,:,:)
1120 call gpu_download_c(gpu_ptr%c, c_loc(cpu_ptr), 4_8*product(shape(gpu_ptr%f)*1_8))
1128 double precision,
target,
intent(in) :: gpu_ptr_src
1129 double precision,
target,
intent(in) :: gpu_ptr_dest
1130 integer,
intent(in) :: n
1131 call gpu_copy_c(c_loc(gpu_ptr_src), c_loc(gpu_ptr_dest), 8_8*n)
1138 call gpu_copy_c(gpu_ptr_src%c, gpu_ptr_dest%c, 8_8*
size(gpu_ptr_dest%f))
1145 call gpu_copy_c(gpu_ptr_src%c, gpu_ptr_dest%c, 8_8*product(shape(gpu_ptr_dest%f)*1_8))
1152 call gpu_copy_c(gpu_ptr_src%c, gpu_ptr_dest%c, 8_8*product(shape(gpu_ptr_dest%f)*1_8))
1159 call gpu_copy_c(gpu_ptr_src%c, gpu_ptr_dest%c, 8_8*product(shape(gpu_ptr_dest%f)*1_8))
1166 call gpu_copy_c(gpu_ptr_src%c, gpu_ptr_dest%c, 8_8*product(shape(gpu_ptr_dest%f)*1_8))
1173 call gpu_copy_c(gpu_ptr_src%c, gpu_ptr_dest%c, 8_8*product(shape(gpu_ptr_dest%f)*1_8))
1178 real,
target,
intent(in) :: gpu_ptr_src
1179 real,
target,
intent(in) :: gpu_ptr_dest
1180 integer,
intent(in) :: n
1181 call gpu_copy_c(c_loc(gpu_ptr_src), c_loc(gpu_ptr_dest), 4_8*n)
1186 type(
gpu_real1),
intent(in) :: gpu_ptr_src
1187 type(
gpu_real1),
intent(in) :: gpu_ptr_dest
1188 call gpu_copy_c(gpu_ptr_src%c, gpu_ptr_dest%c, 4_8*
size(gpu_ptr_dest%f))
1193 type(
gpu_real2),
intent(in) :: gpu_ptr_src
1194 type(
gpu_real2),
intent(in) :: gpu_ptr_dest
1195 call gpu_copy_c(gpu_ptr_src%c, gpu_ptr_dest%c, 4_8*product(shape(gpu_ptr_dest%f)*1_8))
1200 type(
gpu_real3),
intent(in) :: gpu_ptr_src
1201 type(
gpu_real3),
intent(in) :: gpu_ptr_dest
1202 call gpu_copy_c(gpu_ptr_src%c, gpu_ptr_dest%c, 4_8*product(shape(gpu_ptr_dest%f)*1_8))
1207 type(
gpu_real4),
intent(in) :: gpu_ptr_src
1208 type(
gpu_real4),
intent(in) :: gpu_ptr_dest
1209 call gpu_copy_c(gpu_ptr_src%c, gpu_ptr_dest%c, 4_8*product(shape(gpu_ptr_dest%f)*1_8))
1214 type(
gpu_real5),
intent(in) :: gpu_ptr_src
1215 type(
gpu_real5),
intent(in) :: gpu_ptr_dest
1216 call gpu_copy_c(gpu_ptr_src%c, gpu_ptr_dest%c, 4_8*product(shape(gpu_ptr_dest%f)*1_8))
1221 type(
gpu_real6),
intent(in) :: gpu_ptr_src
1222 type(
gpu_real6),
intent(in) :: gpu_ptr_dest
1223 call gpu_copy_c(gpu_ptr_src%c, gpu_ptr_dest%c, 4_8*product(shape(gpu_ptr_dest%f)*1_8))
1294 type(
gpu_blas),
intent(in) :: handle
1295 integer*4 :: n, incx, incy
1296 double precision,
target :: dx, dy
1297 double precision,
intent(out) :: res
1298 call gpu_ddot_c(handle%c, int(n,c_int64_t), c_loc(dx), int(incx,c_int64_t), c_loc(dy), int(incy,c_int64_t), res)
1315 type(
gpu_blas),
intent(in) :: handle
1316 integer*8 :: n, incx, incy
1317 double precision,
target :: dx, dy
1318 double precision,
intent(out) :: res
1319 call gpu_ddot_c(handle%c, n, c_loc(dx), incx, c_loc(dy), incy, res)
1324 type(
gpu_blas),
intent(in) :: handle
1325 integer*4 :: n, incx, incy
1326 real,
target :: dx, dy
1327 real,
intent(out) :: res
1328 call gpu_sdot_c(handle%c, int(n,c_int64_t), c_loc(dx), int(incx,c_int64_t), c_loc(dy), int(incy,c_int64_t), res)
1333 type(
gpu_blas),
intent(in) :: handle
1334 integer*8 :: n, incx, incy
1335 real,
target :: dx, dy
1336 real,
intent(out) :: res
1337 call gpu_sdot_c(handle%c, n, c_loc(dx), incx, c_loc(dy), incy, res)
1344subroutine gpu_dgeam(handle, transa, transb, m, n, alpha, a, lda, beta, &
1346 type(
gpu_blas),
intent(in) :: handle
1347 character,
intent(in) :: transa, transb
1348 integer*4,
intent(in) :: m, n, lda, ldb, ldc
1349 double precision,
intent(in) :: alpha, beta
1350 double precision :: a, b, c
1351 call gpu_dgeam_c(handle%c, transa, transb, int(m,c_int64_t), int(n,c_int64_t), alpha, a, int(lda,c_int64_t), beta, &
1352 b, int(ldb,c_int64_t), c, int(ldc,c_int64_t))
1356subroutine gpu_dgeam_64(handle, transa, transb, m, n, alpha, a, lda, beta, &
1358 type(
gpu_blas),
intent(in) :: handle
1359 character,
intent(in) :: transa, transb
1360 integer*8,
intent(in) :: m, n, lda, ldb, ldc
1361 double precision,
intent(in) :: alpha, beta
1362 double precision :: a, b, c
1363 call gpu_dgeam_c(handle%c, transa, transb, int(m,c_int64_t), int(n,c_int64_t), alpha, a, int(lda,c_int64_t), beta, &
1364 b, int(ldb,c_int64_t), c, int(ldc,c_int64_t))
1368subroutine gpu_sgeam(handle, transa, transb, m, n, alpha, a, lda, beta, &
1370 type(
gpu_blas),
intent(in) :: handle
1371 character,
intent(in) :: transa, transb
1372 integer*4,
intent(in) :: m, n, lda, ldb, ldc
1373 real,
intent(in) :: alpha, beta
1375 call gpu_sgeam_c(handle%c, transa, transb, int(m,c_int64_t), int(n,c_int64_t), alpha, a, int(lda,c_int64_t), beta, &
1376 b, int(ldb,c_int64_t), c, int(ldc,c_int64_t))
1380subroutine gpu_sgeam_64(handle, transa, transb, m, n, alpha, a, lda, beta, &
1382 type(
gpu_blas),
intent(in) :: handle
1383 character,
intent(in) :: transa, transb
1384 integer*8,
intent(in) :: m, n, lda, ldb, ldc
1385 real,
intent(in) :: alpha, beta
1387 call gpu_sgeam_c(handle%c, transa, transb, int(m,c_int64_t), int(n,c_int64_t), alpha, a, int(lda,c_int64_t), beta, &
1388 b, int(ldb,c_int64_t), c, int(ldc,c_int64_t))
1396 x, incx, beta, y, incy)
1397 type(
gpu_blas),
intent(in) :: handle
1398 character,
intent(in) :: transa
1399 integer*4,
intent(in) :: m, n, lda, incx, incy
1400 double precision,
intent(in) :: alpha, beta
1401 double precision :: a, x, y
1402 call gpu_dgemv_c(handle%c, transa, int(m,c_int64_t), int(n,c_int64_t), &
1403 alpha, a, int(lda,c_int64_t), &
1404 x, int(incx,c_int64_t), beta, y, int(incy,c_int64_t))
1408 x, incx, beta, y, incy)
1409 type(
gpu_blas),
intent(in) :: handle
1410 character,
intent(in) :: transa
1411 integer*8,
intent(in) :: m, n, lda, incx, incy
1412 double precision,
intent(in) :: alpha, beta
1413 double precision :: a, x, y
1414 call gpu_dgemv_c(handle%c, transa, int(m,c_int64_t), int(n,c_int64_t), &
1415 alpha, a, int(lda,c_int64_t), &
1416 x, int(incx,c_int64_t), beta, y, int(incy,c_int64_t))
1421 x, incx, beta, y, incy)
1422 type(
gpu_blas),
intent(in) :: handle
1423 character,
intent(in) :: transa
1424 integer*4,
intent(in) :: m, n, lda, incx, incy
1425 real,
intent(in) :: alpha, beta
1427 call gpu_sgemv_c(handle%c, transa, int(m,c_int64_t), int(n,c_int64_t), &
1428 alpha, a, int(lda,c_int64_t), &
1429 x, int(incx,c_int64_t), beta, y, int(incy,c_int64_t))
1433 x, incx, beta, y, incy)
1434 type(
gpu_blas),
intent(in) :: handle
1435 character,
intent(in) :: transa
1436 integer*8,
intent(in) :: m, n, lda, incx, incy
1437 real,
intent(in) :: alpha, beta
1439 call gpu_sgemv_c(handle%c, transa, int(m,c_int64_t), int(n,c_int64_t), &
1440 alpha, a, int(lda,c_int64_t), &
1441 x, int(incx,c_int64_t), beta, y, int(incy,c_int64_t))
1448subroutine gpu_dgemm(handle, transa, transb, m, n, k, alpha, a, lda, &
1449 b, ldb, beta, c, ldc)
1450 type(
gpu_blas),
intent(in) :: handle
1451 character,
intent(in) :: transa, transb
1452 integer*4,
intent(in) :: m, n, k, lda, ldb, ldc
1453 double precision,
intent(in) :: alpha, beta
1454 double precision :: a, b, c
1455 call gpu_dgemm_c(handle%c, transa, transb, int(m,c_int64_t), int(n,c_int64_t), int(k,c_int64_t), &
1456 alpha, a, int(lda,c_int64_t), &
1457 b, int(ldb,c_int64_t), beta, c, int(ldc,c_int64_t))
1461 b, ldb, beta, c, ldc)
1462 type(
gpu_blas),
intent(in) :: handle
1463 character,
intent(in) :: transa, transb
1464 integer*8,
intent(in) :: m, n, k, lda, ldb, ldc
1465 double precision,
intent(in) :: alpha, beta
1466 double precision :: a, b, c
1467 call gpu_dgemm_c(handle%c, transa, transb, int(m,c_int64_t), int(n,c_int64_t), int(k,c_int64_t), &
1468 alpha, a, int(lda,c_int64_t), b, int(ldb,c_int64_t), beta, c, int(ldc,c_int64_t))
1471subroutine gpu_sgemm(handle, transa, transb, m, n, k, alpha, a, lda, &
1472 b, ldb, beta, c, ldc)
1473 type(
gpu_blas),
intent(in) :: handle
1474 character,
intent(in) :: transa, transb
1475 integer*4,
intent(in) :: m, n, k, lda, ldb, ldc
1476 real,
intent(in) :: alpha, beta
1478 call gpu_sgemm_c(handle%c, transa, transb, int(m,c_int64_t), int(n,c_int64_t), int(k,c_int64_t), &
1479 alpha, a, int(lda,c_int64_t), &
1480 b, int(ldb,c_int64_t), beta, c, int(ldc,c_int64_t))
1484 b, ldb, beta, c, ldc)
1485 type(
gpu_blas),
intent(in) :: handle
1486 character,
intent(in) :: transa, transb
1487 integer*8,
intent(in) :: m, n, k, lda, ldb, ldc
1488 real,
intent(in) :: alpha, beta
1490 call gpu_sgemm_c(handle%c, transa, transb, int(m,c_int64_t), int(n,c_int64_t), int(k,c_int64_t), &
1491 alpha, a, int(lda,c_int64_t), b, int(ldb,c_int64_t), beta, c, int(ldc,c_int64_t))
Allocate GPU/CPU memory (C binding)
Allocate GPU/CPU memory for arrays.
Copy data between GPU arrays.
Free GPU/CPU memory (C binding)
Download data from device (GPU) to host (CPU)
Query GPU memory usage (C binding)
Get number of GPU devices (C binding)
Set active GPU device (C binding)
Upload data to GPU (C binding)
Upload data from host (CPU) to device (GPU)
Simple GPU - Fortran GPU Computing Library.
subroutine gpu_deallocate_double5(ptr)
subroutine gpu_allocate_double1(ptr, s)
Allocate 1D double precision array (32-bit dimensions)
subroutine gpu_upload_real6(cpu_ptr, gpu_ptr)
subroutine gpu_sgeam(handle, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc)
subroutine gpu_download_real1(gpu_ptr, cpu_ptr)
subroutine gpu_upload_real3(cpu_ptr, gpu_ptr)
subroutine gpu_copy_real4(gpu_ptr_src, gpu_ptr_dest)
subroutine gpu_blas_destroy(handle)
Destroy a BLAS handle.
subroutine gpu_dgemv_64(handle, transa, m, n, alpha, a, lda, x, incx, beta, y, incy)
subroutine gpu_allocate_real5_64(ptr, s1, s2, s3, s4, s5)
subroutine gpu_download_real2(gpu_ptr, cpu_ptr)
subroutine gpu_copy_real5(gpu_ptr_src, gpu_ptr_dest)
subroutine gpu_download_double2(gpu_ptr, cpu_ptr)
subroutine gpu_copy_real0(gpu_ptr_src, gpu_ptr_dest, n)
subroutine gpu_deallocate_real1(ptr)
subroutine gpu_sgemv(handle, transa, m, n, alpha, a, lda, x, incx, beta, y, incy)
subroutine gpu_deallocate_real3(ptr)
subroutine gpu_deallocate_double3(ptr)
subroutine gpu_copy_real3(gpu_ptr_src, gpu_ptr_dest)
subroutine gpu_download_double3(gpu_ptr, cpu_ptr)
subroutine gpu_download_real4(gpu_ptr, cpu_ptr)
subroutine gpu_allocate_real2_64(ptr, s1, s2)
subroutine gpu_allocate_double3(ptr, s1, s2, s3)
subroutine gpu_upload_double0(cpu_ptr, gpu_ptr, n)
subroutine gpu_upload_real5(cpu_ptr, gpu_ptr)
subroutine gpu_download_double1(gpu_ptr, cpu_ptr)
subroutine gpu_allocate_real1(ptr, s)
subroutine gpu_upload_double6(cpu_ptr, gpu_ptr)
subroutine gpu_allocate_double5_64(ptr, s1, s2, s3, s4, s5)
subroutine gpu_allocate_double2(ptr, s1, s2)
subroutine gpu_upload_double4(cpu_ptr, gpu_ptr)
subroutine gpu_ddot(handle, n, dx, incx, dy, incy, res)
Double precision dot product (32-bit dimensions)
subroutine gpu_allocate_real4_64(ptr, s1, s2, s3, s4)
subroutine gpu_copy_double2(gpu_ptr_src, gpu_ptr_dest)
subroutine gpu_download_double6(gpu_ptr, cpu_ptr)
subroutine gpu_copy_real1(gpu_ptr_src, gpu_ptr_dest)
subroutine gpu_set_stream(handle, stream)
subroutine gpu_download_real5(gpu_ptr, cpu_ptr)
subroutine gpu_allocate_double4_64(ptr, s1, s2, s3, s4)
subroutine gpu_allocate_real4(ptr, s1, s2, s3, s4)
subroutine gpu_upload_double5(cpu_ptr, gpu_ptr)
subroutine gpu_deallocate_real4(ptr)
subroutine gpu_sgeam_64(handle, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc)
subroutine gpu_allocate_double1_64(ptr, s)
subroutine gpu_allocate_double2_64(ptr, s1, s2)
subroutine gpu_stream_create(stream)
subroutine gpu_allocate_real6(ptr, s1, s2, s3, s4, s5, s6)
subroutine gpu_download_real0(gpu_ptr, cpu_ptr, n)
subroutine gpu_sgemm_64(handle, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
subroutine gpu_download_real3(gpu_ptr, cpu_ptr)
subroutine gpu_dgemm(handle, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
subroutine gpu_dgemv(handle, transa, m, n, alpha, a, lda, x, incx, beta, y, incy)
subroutine gpu_download_double4(gpu_ptr, cpu_ptr)
subroutine gpu_allocate_real6_64(ptr, s1, s2, s3, s4, s5, s6)
subroutine gpu_download_double5(gpu_ptr, cpu_ptr)
subroutine gpu_allocate_real2(ptr, s1, s2)
subroutine gpu_copy_double0(gpu_ptr_src, gpu_ptr_dest, n)
subroutine gpu_dgemm_64(handle, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
subroutine gpu_sdot_64(handle, n, dx, incx, dy, incy, res)
subroutine gpu_deallocate_real5(ptr)
subroutine gpu_dgeam(handle, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc)
subroutine gpu_deallocate_real6(ptr)
subroutine gpu_ddot_64(handle, n, dx, incx, dy, incy, res)
Double precision dot product (64-bit dimensions)
subroutine gpu_upload_double2(cpu_ptr, gpu_ptr)
subroutine gpu_upload_real0(cpu_ptr, gpu_ptr, n)
subroutine gpu_deallocate_real2(ptr)
subroutine gpu_copy_real6(gpu_ptr_src, gpu_ptr_dest)
subroutine gpu_allocate_real3(ptr, s1, s2, s3)
subroutine gpu_download_double0(gpu_ptr, cpu_ptr, n)
subroutine gpu_allocate_double4(ptr, s1, s2, s3, s4)
subroutine gpu_upload_double3(cpu_ptr, gpu_ptr)
subroutine gpu_deallocate_double1(ptr)
subroutine gpu_deallocate_double2(ptr)
subroutine gpu_deallocate_double4(ptr)
subroutine gpu_sdot(handle, n, dx, incx, dy, incy, res)
subroutine gpu_sgemv_64(handle, transa, m, n, alpha, a, lda, x, incx, beta, y, incy)
subroutine gpu_upload_real1(cpu_ptr, gpu_ptr)
subroutine gpu_allocate_double6_64(ptr, s1, s2, s3, s4, s5, s6)
subroutine gpu_allocate_real3_64(ptr, s1, s2, s3)
subroutine gpu_copy_double5(gpu_ptr_src, gpu_ptr_dest)
subroutine gpu_allocate_double6(ptr, s1, s2, s3, s4, s5, s6)
subroutine gpu_copy_double4(gpu_ptr_src, gpu_ptr_dest)
subroutine gpu_deallocate_double6(ptr)
subroutine gpu_allocate_double5(ptr, s1, s2, s3, s4, s5)
subroutine gpu_copy_real2(gpu_ptr_src, gpu_ptr_dest)
subroutine gpu_allocate_real5(ptr, s1, s2, s3, s4, s5)
subroutine gpu_download_real6(gpu_ptr, cpu_ptr)
subroutine gpu_sgemm(handle, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
subroutine gpu_copy_double3(gpu_ptr_src, gpu_ptr_dest)
subroutine gpu_upload_double1(cpu_ptr, gpu_ptr)
subroutine gpu_dgeam_64(handle, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc)
subroutine gpu_copy_double6(gpu_ptr_src, gpu_ptr_dest)
subroutine gpu_upload_real2(cpu_ptr, gpu_ptr)
subroutine gpu_upload_real4(cpu_ptr, gpu_ptr)
subroutine gpu_stream_destroy(stream)
subroutine gpu_allocate_real1_64(ptr, s)
subroutine gpu_blas_create(handle)
Create a BLAS handle.
subroutine gpu_allocate_double3_64(ptr, s1, s2, s3)
subroutine gpu_copy_double1(gpu_ptr_src, gpu_ptr_dest)
Handle for BLAS operations.
1-dimensional array of double precision values
2-dimensional array of double precision values
3-dimensional array of double precision values
4-dimensional array of double precision values
5-dimensional array of double precision values
6-dimensional array of double precision values
1-dimensional array of single precision values
2-dimensional array of single precision values
3-dimensional array of single precision values
4-dimensional array of single precision values
5-dimensional array of single precision values
6-dimensional array of single precision values