Simple GPU 1.0
Fortran GPU Computing Library with transparent CPU/GPU support
Loading...
Searching...
No Matches
gpu Module Reference

Simple GPU - Fortran GPU Computing Library. More...

Data Types

interface  gpu_allocate
 Allocate GPU/CPU memory for arrays. More...
 
interface  gpu_allocate_c
 Allocate GPU/CPU memory (C binding) More...
 
type  gpu_blas
 Handle for BLAS operations. More...
 
interface  gpu_blas_create_c
 
interface  gpu_blas_destroy_c
 
interface  gpu_copy
 Copy data between GPU arrays. More...
 
interface  gpu_copy_c
 
interface  gpu_ddot_c
 
interface  gpu_deallocate
 Free GPU/CPU memory. More...
 
interface  gpu_deallocate_c
 Free GPU/CPU memory (C binding) More...
 
interface  gpu_dgeam_c
 
interface  gpu_dgemm_c
 
interface  gpu_dgemv_c
 
type  gpu_double1
 1-dimensional array of double precision values More...
 
type  gpu_double2
 2-dimensional array of double precision values More...
 
type  gpu_double3
 3-dimensional array of double precision values More...
 
type  gpu_double4
 4-dimensional array of double precision values More...
 
type  gpu_double5
 5-dimensional array of double precision values More...
 
type  gpu_double6
 6-dimensional array of double precision values More...
 
interface  gpu_download
 Download data from device (GPU) to host (CPU) More...
 
interface  gpu_download_c
 
interface  gpu_get_memory
 Query GPU memory usage (C binding) More...
 
interface  gpu_ndevices
 Get number of GPU devices (C binding) More...
 
type  gpu_real1
 1-dimensional array of single precision values More...
 
type  gpu_real2
 2-dimensional array of single precision values More...
 
type  gpu_real3
 3-dimensional array of single precision values More...
 
type  gpu_real4
 4-dimensional array of single precision values More...
 
type  gpu_real5
 5-dimensional array of single precision values More...
 
type  gpu_real6
 6-dimensional array of single precision values More...
 
interface  gpu_sdot_c
 
interface  gpu_set_device
 Set active GPU device (C binding) More...
 
interface  gpu_set_stream_c
 
interface  gpu_sgeam_c
 
interface  gpu_sgemm_c
 
interface  gpu_sgemv_c
 
type  gpu_stream
 Handle for CUDA streams. More...
 
interface  gpu_stream_create_c
 
interface  gpu_stream_destroy_c
 
interface  gpu_stream_synchronize
 
interface  gpu_synchronize
 
interface  gpu_upload
 Upload data from host (CPU) to device (GPU) More...
 
interface  gpu_upload_c
 Upload data to GPU (C binding) More...
 

Functions/Subroutines

subroutine gpu_allocate_double1 (ptr, s)
 Allocate 1D double precision array (32-bit dimensions)
 
subroutine gpu_allocate_double2 (ptr, s1, s2)
 
subroutine gpu_allocate_double3 (ptr, s1, s2, s3)
 
subroutine gpu_allocate_double4 (ptr, s1, s2, s3, s4)
 
subroutine gpu_allocate_double5 (ptr, s1, s2, s3, s4, s5)
 
subroutine gpu_allocate_double6 (ptr, s1, s2, s3, s4, s5, s6)
 
subroutine gpu_allocate_double1_64 (ptr, s)
 
subroutine gpu_allocate_double2_64 (ptr, s1, s2)
 
subroutine gpu_allocate_double3_64 (ptr, s1, s2, s3)
 
subroutine gpu_allocate_double4_64 (ptr, s1, s2, s3, s4)
 
subroutine gpu_allocate_double5_64 (ptr, s1, s2, s3, s4, s5)
 
subroutine gpu_allocate_double6_64 (ptr, s1, s2, s3, s4, s5, s6)
 
subroutine gpu_allocate_real1 (ptr, s)
 
subroutine gpu_allocate_real2 (ptr, s1, s2)
 
subroutine gpu_allocate_real3 (ptr, s1, s2, s3)
 
subroutine gpu_allocate_real4 (ptr, s1, s2, s3, s4)
 
subroutine gpu_allocate_real5 (ptr, s1, s2, s3, s4, s5)
 
subroutine gpu_allocate_real6 (ptr, s1, s2, s3, s4, s5, s6)
 
subroutine gpu_allocate_real1_64 (ptr, s)
 
subroutine gpu_allocate_real2_64 (ptr, s1, s2)
 
subroutine gpu_allocate_real3_64 (ptr, s1, s2, s3)
 
subroutine gpu_allocate_real4_64 (ptr, s1, s2, s3, s4)
 
subroutine gpu_allocate_real5_64 (ptr, s1, s2, s3, s4, s5)
 
subroutine gpu_allocate_real6_64 (ptr, s1, s2, s3, s4, s5, s6)
 
subroutine gpu_deallocate_double1 (ptr)
 
subroutine gpu_deallocate_double2 (ptr)
 
subroutine gpu_deallocate_double3 (ptr)
 
subroutine gpu_deallocate_double4 (ptr)
 
subroutine gpu_deallocate_double5 (ptr)
 
subroutine gpu_deallocate_double6 (ptr)
 
subroutine gpu_deallocate_real1 (ptr)
 
subroutine gpu_deallocate_real2 (ptr)
 
subroutine gpu_deallocate_real3 (ptr)
 
subroutine gpu_deallocate_real4 (ptr)
 
subroutine gpu_deallocate_real5 (ptr)
 
subroutine gpu_deallocate_real6 (ptr)
 
subroutine gpu_upload_double0 (cpu_ptr, gpu_ptr, n)
 
subroutine gpu_upload_double1 (cpu_ptr, gpu_ptr)
 
subroutine gpu_upload_double2 (cpu_ptr, gpu_ptr)
 
subroutine gpu_upload_double3 (cpu_ptr, gpu_ptr)
 
subroutine gpu_upload_double4 (cpu_ptr, gpu_ptr)
 
subroutine gpu_upload_double5 (cpu_ptr, gpu_ptr)
 
subroutine gpu_upload_double6 (cpu_ptr, gpu_ptr)
 
subroutine gpu_upload_real0 (cpu_ptr, gpu_ptr, n)
 
subroutine gpu_upload_real1 (cpu_ptr, gpu_ptr)
 
subroutine gpu_upload_real2 (cpu_ptr, gpu_ptr)
 
subroutine gpu_upload_real3 (cpu_ptr, gpu_ptr)
 
subroutine gpu_upload_real4 (cpu_ptr, gpu_ptr)
 
subroutine gpu_upload_real5 (cpu_ptr, gpu_ptr)
 
subroutine gpu_upload_real6 (cpu_ptr, gpu_ptr)
 
subroutine gpu_download_double0 (gpu_ptr, cpu_ptr, n)
 
subroutine gpu_download_double1 (gpu_ptr, cpu_ptr)
 
subroutine gpu_download_double2 (gpu_ptr, cpu_ptr)
 
subroutine gpu_download_double3 (gpu_ptr, cpu_ptr)
 
subroutine gpu_download_double4 (gpu_ptr, cpu_ptr)
 
subroutine gpu_download_double5 (gpu_ptr, cpu_ptr)
 
subroutine gpu_download_double6 (gpu_ptr, cpu_ptr)
 
subroutine gpu_download_real0 (gpu_ptr, cpu_ptr, n)
 
subroutine gpu_download_real1 (gpu_ptr, cpu_ptr)
 
subroutine gpu_download_real2 (gpu_ptr, cpu_ptr)
 
subroutine gpu_download_real3 (gpu_ptr, cpu_ptr)
 
subroutine gpu_download_real4 (gpu_ptr, cpu_ptr)
 
subroutine gpu_download_real5 (gpu_ptr, cpu_ptr)
 
subroutine gpu_download_real6 (gpu_ptr, cpu_ptr)
 
subroutine gpu_copy_double0 (gpu_ptr_src, gpu_ptr_dest, n)
 
subroutine gpu_copy_double1 (gpu_ptr_src, gpu_ptr_dest)
 
subroutine gpu_copy_double2 (gpu_ptr_src, gpu_ptr_dest)
 
subroutine gpu_copy_double3 (gpu_ptr_src, gpu_ptr_dest)
 
subroutine gpu_copy_double4 (gpu_ptr_src, gpu_ptr_dest)
 
subroutine gpu_copy_double5 (gpu_ptr_src, gpu_ptr_dest)
 
subroutine gpu_copy_double6 (gpu_ptr_src, gpu_ptr_dest)
 
subroutine gpu_copy_real0 (gpu_ptr_src, gpu_ptr_dest, n)
 
subroutine gpu_copy_real1 (gpu_ptr_src, gpu_ptr_dest)
 
subroutine gpu_copy_real2 (gpu_ptr_src, gpu_ptr_dest)
 
subroutine gpu_copy_real3 (gpu_ptr_src, gpu_ptr_dest)
 
subroutine gpu_copy_real4 (gpu_ptr_src, gpu_ptr_dest)
 
subroutine gpu_copy_real5 (gpu_ptr_src, gpu_ptr_dest)
 
subroutine gpu_copy_real6 (gpu_ptr_src, gpu_ptr_dest)
 
subroutine gpu_stream_create (stream)
 
subroutine gpu_stream_destroy (stream)
 
subroutine gpu_set_stream (handle, stream)
 
subroutine gpu_blas_create (handle)
 Create a BLAS handle.
 
subroutine gpu_blas_destroy (handle)
 Destroy a BLAS handle.
 
subroutine gpu_ddot (handle, n, dx, incx, dy, incy, res)
 Double precision dot product (32-bit dimensions)
 
subroutine gpu_ddot_64 (handle, n, dx, incx, dy, incy, res)
 Double precision dot product (64-bit dimensions)
 
subroutine gpu_sdot (handle, n, dx, incx, dy, incy, res)
 
subroutine gpu_sdot_64 (handle, n, dx, incx, dy, incy, res)
 
subroutine gpu_dgeam (handle, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc)
 
subroutine gpu_dgeam_64 (handle, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc)
 
subroutine gpu_sgeam (handle, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc)
 
subroutine gpu_sgeam_64 (handle, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc)
 
subroutine gpu_dgemv (handle, transa, m, n, alpha, a, lda, x, incx, beta, y, incy)
 
subroutine gpu_dgemv_64 (handle, transa, m, n, alpha, a, lda, x, incx, beta, y, incy)
 
subroutine gpu_sgemv (handle, transa, m, n, alpha, a, lda, x, incx, beta, y, incy)
 
subroutine gpu_sgemv_64 (handle, transa, m, n, alpha, a, lda, x, incx, beta, y, incy)
 
subroutine gpu_dgemm (handle, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
 
subroutine gpu_dgemm_64 (handle, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
 
subroutine gpu_sgemm (handle, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
 
subroutine gpu_sgemm_64 (handle, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
 

Detailed Description

Simple GPU - Fortran GPU Computing Library.

This module provides a Fortran interface for GPU computing with transparent support for both CPU (using standard BLAS) and NVIDIA GPU (using cuBLAS).

The library provides:

  • GPU memory management (allocate, free, upload, download, copy)
  • BLAS operations (Level 1, 2, and 3) for single and double precision
  • Support for multidimensional arrays (1D through 6D)
  • CUDA stream management for asynchronous operations
  • Transparent API that works with both CPU and GPU backends
Author
Anthony Scemama
Date
2026

Function/Subroutine Documentation

◆ gpu_allocate_double1()

subroutine gpu::gpu_allocate_double1 ( type(gpu_double1), intent(inout)  ptr,
integer, intent(in)  s 
)

Allocate 1D double precision array (32-bit dimensions)

Definition at line 530 of file simple_gpu.F90.

◆ gpu_allocate_double1_64()

subroutine gpu::gpu_allocate_double1_64 ( type(gpu_double1), intent(inout)  ptr,
integer*8, intent(in)  s 
)

Definition at line 624 of file simple_gpu.F90.

◆ gpu_allocate_double2()

subroutine gpu::gpu_allocate_double2 ( type(gpu_double2), intent(inout)  ptr,
integer, intent(in)  s1,
integer, intent(in)  s2 
)

Definition at line 543 of file simple_gpu.F90.

◆ gpu_allocate_double2_64()

subroutine gpu::gpu_allocate_double2_64 ( type(gpu_double2), intent(inout)  ptr,
integer*8, intent(in)  s1,
integer*8, intent(in)  s2 
)

Definition at line 633 of file simple_gpu.F90.

◆ gpu_allocate_double3()

subroutine gpu::gpu_allocate_double3 ( type(gpu_double3), intent(inout)  ptr,
integer, intent(in)  s1,
integer, intent(in)  s2,
integer, intent(in)  s3 
)

Definition at line 557 of file simple_gpu.F90.

◆ gpu_allocate_double3_64()

subroutine gpu::gpu_allocate_double3_64 ( type(gpu_double3), intent(inout)  ptr,
integer*8, intent(in)  s1,
integer*8, intent(in)  s2,
integer*8, intent(in)  s3 
)

Definition at line 642 of file simple_gpu.F90.

◆ gpu_allocate_double4()

subroutine gpu::gpu_allocate_double4 ( type(gpu_double4), intent(inout)  ptr,
integer, intent(in)  s1,
integer, intent(in)  s2,
integer, intent(in)  s3,
integer, intent(in)  s4 
)

Definition at line 572 of file simple_gpu.F90.

◆ gpu_allocate_double4_64()

subroutine gpu::gpu_allocate_double4_64 ( type(gpu_double4), intent(inout)  ptr,
integer*8, intent(in)  s1,
integer*8, intent(in)  s2,
integer*8, intent(in)  s3,
integer*8, intent(in)  s4 
)

Definition at line 651 of file simple_gpu.F90.

◆ gpu_allocate_double5()

subroutine gpu::gpu_allocate_double5 ( type(gpu_double5), intent(inout)  ptr,
integer, intent(in)  s1,
integer, intent(in)  s2,
integer, intent(in)  s3,
integer, intent(in)  s4,
integer, intent(in)  s5 
)

Definition at line 588 of file simple_gpu.F90.

◆ gpu_allocate_double5_64()

subroutine gpu::gpu_allocate_double5_64 ( type(gpu_double5), intent(inout)  ptr,
integer*8, intent(in)  s1,
integer*8, intent(in)  s2,
integer*8, intent(in)  s3,
integer*8, intent(in)  s4,
integer*8, intent(in)  s5 
)

Definition at line 660 of file simple_gpu.F90.

◆ gpu_allocate_double6()

subroutine gpu::gpu_allocate_double6 ( type(gpu_double6), intent(inout)  ptr,
integer, intent(in)  s1,
integer, intent(in)  s2,
integer, intent(in)  s3,
integer, intent(in)  s4,
integer, intent(in)  s5,
integer, intent(in)  s6 
)

Definition at line 605 of file simple_gpu.F90.

◆ gpu_allocate_double6_64()

subroutine gpu::gpu_allocate_double6_64 ( type(gpu_double6), intent(inout)  ptr,
integer*8, intent(in)  s1,
integer*8, intent(in)  s2,
integer*8, intent(in)  s3,
integer*8, intent(in)  s4,
integer*8, intent(in)  s5,
integer*8, intent(in)  s6 
)

Definition at line 669 of file simple_gpu.F90.

◆ gpu_allocate_real1()

subroutine gpu::gpu_allocate_real1 ( type(gpu_real1), intent(inout)  ptr,
integer, intent(in)  s 
)

Definition at line 678 of file simple_gpu.F90.

◆ gpu_allocate_real1_64()

subroutine gpu::gpu_allocate_real1_64 ( type(gpu_real1), intent(inout)  ptr,
integer*8, intent(in)  s 
)

Definition at line 772 of file simple_gpu.F90.

◆ gpu_allocate_real2()

subroutine gpu::gpu_allocate_real2 ( type(gpu_real2), intent(inout)  ptr,
integer, intent(in)  s1,
integer, intent(in)  s2 
)

Definition at line 691 of file simple_gpu.F90.

◆ gpu_allocate_real2_64()

subroutine gpu::gpu_allocate_real2_64 ( type(gpu_real2), intent(inout)  ptr,
integer*8, intent(in)  s1,
integer*8, intent(in)  s2 
)

Definition at line 781 of file simple_gpu.F90.

◆ gpu_allocate_real3()

subroutine gpu::gpu_allocate_real3 ( type(gpu_real3), intent(inout)  ptr,
integer, intent(in)  s1,
integer, intent(in)  s2,
integer, intent(in)  s3 
)

Definition at line 705 of file simple_gpu.F90.

◆ gpu_allocate_real3_64()

subroutine gpu::gpu_allocate_real3_64 ( type(gpu_real3), intent(inout)  ptr,
integer*8, intent(in)  s1,
integer*8, intent(in)  s2,
integer*8, intent(in)  s3 
)

Definition at line 790 of file simple_gpu.F90.

◆ gpu_allocate_real4()

subroutine gpu::gpu_allocate_real4 ( type(gpu_real4), intent(inout)  ptr,
integer, intent(in)  s1,
integer, intent(in)  s2,
integer, intent(in)  s3,
integer, intent(in)  s4 
)

Definition at line 720 of file simple_gpu.F90.

◆ gpu_allocate_real4_64()

subroutine gpu::gpu_allocate_real4_64 ( type(gpu_real4), intent(inout)  ptr,
integer*8, intent(in)  s1,
integer*8, intent(in)  s2,
integer*8, intent(in)  s3,
integer*8, intent(in)  s4 
)

Definition at line 799 of file simple_gpu.F90.

◆ gpu_allocate_real5()

subroutine gpu::gpu_allocate_real5 ( type(gpu_real5), intent(inout)  ptr,
integer, intent(in)  s1,
integer, intent(in)  s2,
integer, intent(in)  s3,
integer, intent(in)  s4,
integer, intent(in)  s5 
)

Definition at line 736 of file simple_gpu.F90.

◆ gpu_allocate_real5_64()

subroutine gpu::gpu_allocate_real5_64 ( type(gpu_real5), intent(inout)  ptr,
integer*8, intent(in)  s1,
integer*8, intent(in)  s2,
integer*8, intent(in)  s3,
integer*8, intent(in)  s4,
integer*8, intent(in)  s5 
)

Definition at line 808 of file simple_gpu.F90.

◆ gpu_allocate_real6()

subroutine gpu::gpu_allocate_real6 ( type(gpu_real6), intent(inout)  ptr,
integer, intent(in)  s1,
integer, intent(in)  s2,
integer, intent(in)  s3,
integer, intent(in)  s4,
integer, intent(in)  s5,
integer, intent(in)  s6 
)

Definition at line 753 of file simple_gpu.F90.

◆ gpu_allocate_real6_64()

subroutine gpu::gpu_allocate_real6_64 ( type(gpu_real6), intent(inout)  ptr,
integer*8, intent(in)  s1,
integer*8, intent(in)  s2,
integer*8, intent(in)  s3,
integer*8, intent(in)  s4,
integer*8, intent(in)  s5,
integer*8, intent(in)  s6 
)

Definition at line 817 of file simple_gpu.F90.

◆ gpu_blas_create()

subroutine gpu::gpu_blas_create ( type(gpu_blas handle)

Create a BLAS handle.

Initializes a BLAS handle for performing BLAS operations. Must be called before any BLAS functions. The handle should be destroyed with gpu_blas_destroy when no longer needed.

Parameters
[out]handleBLAS handle to create

Definition at line 1258 of file simple_gpu.F90.

◆ gpu_blas_destroy()

subroutine gpu::gpu_blas_destroy ( type(gpu_blas handle)

Destroy a BLAS handle.

Frees resources associated with a BLAS handle. The handle should not be used after calling this.

Parameters
[in,out]handleBLAS handle to destroy

Definition at line 1269 of file simple_gpu.F90.

◆ gpu_copy_double0()

subroutine gpu::gpu_copy_double0 ( double precision, intent(in), target  gpu_ptr_src,
double precision, intent(in), target  gpu_ptr_dest,
integer, intent(in)  n 
)

Definition at line 1126 of file simple_gpu.F90.

◆ gpu_copy_double1()

subroutine gpu::gpu_copy_double1 ( type(gpu_double1), intent(in)  gpu_ptr_src,
type(gpu_double1), intent(in)  gpu_ptr_dest 
)

Definition at line 1134 of file simple_gpu.F90.

◆ gpu_copy_double2()

subroutine gpu::gpu_copy_double2 ( type(gpu_double2), intent(in)  gpu_ptr_src,
type(gpu_double2), intent(in)  gpu_ptr_dest 
)

Definition at line 1141 of file simple_gpu.F90.

◆ gpu_copy_double3()

subroutine gpu::gpu_copy_double3 ( type(gpu_double3), intent(in)  gpu_ptr_src,
type(gpu_double3), intent(in)  gpu_ptr_dest 
)

Definition at line 1148 of file simple_gpu.F90.

◆ gpu_copy_double4()

subroutine gpu::gpu_copy_double4 ( type(gpu_double4), intent(in)  gpu_ptr_src,
type(gpu_double4), intent(in)  gpu_ptr_dest 
)

Definition at line 1155 of file simple_gpu.F90.

◆ gpu_copy_double5()

subroutine gpu::gpu_copy_double5 ( type(gpu_double5), intent(in)  gpu_ptr_src,
type(gpu_double5), intent(in)  gpu_ptr_dest 
)

Definition at line 1162 of file simple_gpu.F90.

◆ gpu_copy_double6()

subroutine gpu::gpu_copy_double6 ( type(gpu_double6), intent(in)  gpu_ptr_src,
type(gpu_double6), intent(in)  gpu_ptr_dest 
)

Definition at line 1169 of file simple_gpu.F90.

◆ gpu_copy_real0()

subroutine gpu::gpu_copy_real0 ( real, intent(in), target  gpu_ptr_src,
real, intent(in), target  gpu_ptr_dest,
integer, intent(in)  n 
)

Definition at line 1176 of file simple_gpu.F90.

◆ gpu_copy_real1()

subroutine gpu::gpu_copy_real1 ( type(gpu_real1), intent(in)  gpu_ptr_src,
type(gpu_real1), intent(in)  gpu_ptr_dest 
)

Definition at line 1184 of file simple_gpu.F90.

◆ gpu_copy_real2()

subroutine gpu::gpu_copy_real2 ( type(gpu_real2), intent(in)  gpu_ptr_src,
type(gpu_real2), intent(in)  gpu_ptr_dest 
)

Definition at line 1191 of file simple_gpu.F90.

◆ gpu_copy_real3()

subroutine gpu::gpu_copy_real3 ( type(gpu_real3), intent(in)  gpu_ptr_src,
type(gpu_real3), intent(in)  gpu_ptr_dest 
)

Definition at line 1198 of file simple_gpu.F90.

◆ gpu_copy_real4()

subroutine gpu::gpu_copy_real4 ( type(gpu_real4), intent(in)  gpu_ptr_src,
type(gpu_real4), intent(in)  gpu_ptr_dest 
)

Definition at line 1205 of file simple_gpu.F90.

◆ gpu_copy_real5()

subroutine gpu::gpu_copy_real5 ( type(gpu_real5), intent(in)  gpu_ptr_src,
type(gpu_real5), intent(in)  gpu_ptr_dest 
)

Definition at line 1212 of file simple_gpu.F90.

◆ gpu_copy_real6()

subroutine gpu::gpu_copy_real6 ( type(gpu_real6), intent(in)  gpu_ptr_src,
type(gpu_real6), intent(in)  gpu_ptr_dest 
)

Definition at line 1219 of file simple_gpu.F90.

◆ gpu_ddot()

subroutine gpu::gpu_ddot ( type(gpu_blas), intent(in)  handle,
integer*4  n,
double precision, target  dx,
integer*4  incx,
double precision, target  dy,
integer*4  incy,
double precision, intent(out)  res 
)

Double precision dot product (32-bit dimensions)

Computes the dot product of two vectors: result = x^T * y

Parameters
[in]handleBLAS handle
[in]nNumber of elements
[in]dxFirst element of vector x
[in]incxStride for vector x
[in]dyFirst element of vector y
[in]incyStride for vector y
[out]resResulting dot product
Note
Pass the first element of the array: xf(1), not x

Definition at line 1293 of file simple_gpu.F90.

◆ gpu_ddot_64()

subroutine gpu::gpu_ddot_64 ( type(gpu_blas), intent(in)  handle,
integer*8  n,
double precision, target  dx,
integer*8  incx,
double precision, target  dy,
integer*8  incy,
double precision, intent(out)  res 
)

Double precision dot product (64-bit dimensions)

Computes the dot product of two vectors: result = x^T * y This variant accepts 64-bit integers for dimensions.

Parameters
[in]handleBLAS handle
[in]nNumber of elements (64-bit)
[in]dxFirst element of vector x
[in]incxStride for vector x (64-bit)
[in]dyFirst element of vector y
[in]incyStride for vector y (64-bit)
[out]resResulting dot product

Definition at line 1314 of file simple_gpu.F90.

◆ gpu_deallocate_double1()

subroutine gpu::gpu_deallocate_double1 ( type(gpu_double1), intent(inout)  ptr)

Definition at line 829 of file simple_gpu.F90.

◆ gpu_deallocate_double2()

subroutine gpu::gpu_deallocate_double2 ( type(gpu_double2), intent(inout)  ptr)

Definition at line 836 of file simple_gpu.F90.

◆ gpu_deallocate_double3()

subroutine gpu::gpu_deallocate_double3 ( type(gpu_double3), intent(inout)  ptr)

Definition at line 843 of file simple_gpu.F90.

◆ gpu_deallocate_double4()

subroutine gpu::gpu_deallocate_double4 ( type(gpu_double4), intent(inout)  ptr)

Definition at line 850 of file simple_gpu.F90.

◆ gpu_deallocate_double5()

subroutine gpu::gpu_deallocate_double5 ( type(gpu_double5), intent(inout)  ptr)

Definition at line 857 of file simple_gpu.F90.

◆ gpu_deallocate_double6()

subroutine gpu::gpu_deallocate_double6 ( type(gpu_double6), intent(inout)  ptr)

Definition at line 864 of file simple_gpu.F90.

◆ gpu_deallocate_real1()

subroutine gpu::gpu_deallocate_real1 ( type(gpu_real1), intent(inout)  ptr)

Definition at line 872 of file simple_gpu.F90.

◆ gpu_deallocate_real2()

subroutine gpu::gpu_deallocate_real2 ( type(gpu_real2), intent(inout)  ptr)

Definition at line 879 of file simple_gpu.F90.

◆ gpu_deallocate_real3()

subroutine gpu::gpu_deallocate_real3 ( type(gpu_real3), intent(inout)  ptr)

Definition at line 886 of file simple_gpu.F90.

◆ gpu_deallocate_real4()

subroutine gpu::gpu_deallocate_real4 ( type(gpu_real4), intent(inout)  ptr)

Definition at line 893 of file simple_gpu.F90.

◆ gpu_deallocate_real5()

subroutine gpu::gpu_deallocate_real5 ( type(gpu_real5), intent(inout)  ptr)

Definition at line 900 of file simple_gpu.F90.

◆ gpu_deallocate_real6()

subroutine gpu::gpu_deallocate_real6 ( type(gpu_real6), intent(inout)  ptr)

Definition at line 907 of file simple_gpu.F90.

◆ gpu_dgeam()

subroutine gpu::gpu_dgeam ( type(gpu_blas), intent(in)  handle,
character, intent(in)  transa,
character, intent(in)  transb,
integer*4, intent(in)  m,
integer*4, intent(in)  n,
double precision, intent(in)  alpha,
double precision  a,
integer*4, intent(in)  lda,
double precision, intent(in)  beta,
double precision  b,
integer*4, intent(in)  ldb,
double precision  c,
integer*4, intent(in)  ldc 
)

Definition at line 1344 of file simple_gpu.F90.

◆ gpu_dgeam_64()

subroutine gpu::gpu_dgeam_64 ( type(gpu_blas), intent(in)  handle,
character, intent(in)  transa,
character, intent(in)  transb,
integer*8, intent(in)  m,
integer*8, intent(in)  n,
double precision, intent(in)  alpha,
double precision  a,
integer*8, intent(in)  lda,
double precision, intent(in)  beta,
double precision  b,
integer*8, intent(in)  ldb,
double precision  c,
integer*8, intent(in)  ldc 
)

Definition at line 1356 of file simple_gpu.F90.

◆ gpu_dgemm()

subroutine gpu::gpu_dgemm ( type(gpu_blas), intent(in)  handle,
character, intent(in)  transa,
character, intent(in)  transb,
integer*4, intent(in)  m,
integer*4, intent(in)  n,
integer*4, intent(in)  k,
double precision, intent(in)  alpha,
double precision  a,
integer*4, intent(in)  lda,
double precision  b,
integer*4, intent(in)  ldb,
double precision, intent(in)  beta,
double precision  c,
integer*4, intent(in)  ldc 
)

Definition at line 1448 of file simple_gpu.F90.

◆ gpu_dgemm_64()

subroutine gpu::gpu_dgemm_64 ( type(gpu_blas), intent(in)  handle,
character, intent(in)  transa,
character, intent(in)  transb,
integer*8, intent(in)  m,
integer*8, intent(in)  n,
integer*8, intent(in)  k,
double precision, intent(in)  alpha,
double precision  a,
integer*8, intent(in)  lda,
double precision  b,
integer*8, intent(in)  ldb,
double precision, intent(in)  beta,
double precision  c,
integer*8, intent(in)  ldc 
)

Definition at line 1460 of file simple_gpu.F90.

◆ gpu_dgemv()

subroutine gpu::gpu_dgemv ( type(gpu_blas), intent(in)  handle,
character, intent(in)  transa,
integer*4, intent(in)  m,
integer*4, intent(in)  n,
double precision, intent(in)  alpha,
double precision  a,
integer*4, intent(in)  lda,
double precision  x,
integer*4, intent(in)  incx,
double precision, intent(in)  beta,
double precision  y,
integer*4, intent(in)  incy 
)

Definition at line 1395 of file simple_gpu.F90.

◆ gpu_dgemv_64()

subroutine gpu::gpu_dgemv_64 ( type(gpu_blas), intent(in)  handle,
character, intent(in)  transa,
integer*8, intent(in)  m,
integer*8, intent(in)  n,
double precision, intent(in)  alpha,
double precision  a,
integer*8, intent(in)  lda,
double precision  x,
integer*8, intent(in)  incx,
double precision, intent(in)  beta,
double precision  y,
integer*8, intent(in)  incy 
)

Definition at line 1407 of file simple_gpu.F90.

◆ gpu_download_double0()

subroutine gpu::gpu_download_double0 ( double precision, intent(in), target  gpu_ptr,
double precision, intent(in), target  cpu_ptr,
integer, intent(in)  n 
)

Definition at line 1023 of file simple_gpu.F90.

◆ gpu_download_double1()

subroutine gpu::gpu_download_double1 ( type(gpu_double1), intent(in)  gpu_ptr,
double precision, dimension(:), intent(in), target  cpu_ptr 
)

Definition at line 1031 of file simple_gpu.F90.

◆ gpu_download_double2()

subroutine gpu::gpu_download_double2 ( type(gpu_double2), intent(in)  gpu_ptr,
double precision, dimension(:,:), intent(in), target  cpu_ptr 
)

Definition at line 1038 of file simple_gpu.F90.

◆ gpu_download_double3()

subroutine gpu::gpu_download_double3 ( type(gpu_double3), intent(in)  gpu_ptr,
double precision, dimension(:,:,:), intent(in), target  cpu_ptr 
)

Definition at line 1045 of file simple_gpu.F90.

◆ gpu_download_double4()

subroutine gpu::gpu_download_double4 ( type(gpu_double4), intent(in)  gpu_ptr,
double precision, dimension(:,:,:,:), intent(in), target  cpu_ptr 
)

Definition at line 1052 of file simple_gpu.F90.

◆ gpu_download_double5()

subroutine gpu::gpu_download_double5 ( type(gpu_double5), intent(in)  gpu_ptr,
double precision, dimension(:,:,:,:,:), intent(in), target  cpu_ptr 
)

Definition at line 1059 of file simple_gpu.F90.

◆ gpu_download_double6()

subroutine gpu::gpu_download_double6 ( type(gpu_double6), intent(in)  gpu_ptr,
double precision, dimension(:,:,:,:,:,:), intent(in), target  cpu_ptr 
)

Definition at line 1066 of file simple_gpu.F90.

◆ gpu_download_real0()

subroutine gpu::gpu_download_real0 ( real, intent(in), target  gpu_ptr,
real, intent(in), target  cpu_ptr,
integer, intent(in)  n 
)

Definition at line 1073 of file simple_gpu.F90.

◆ gpu_download_real1()

subroutine gpu::gpu_download_real1 ( type(gpu_real1), intent(in)  gpu_ptr,
real, dimension(:), intent(in), target  cpu_ptr 
)

Definition at line 1081 of file simple_gpu.F90.

◆ gpu_download_real2()

subroutine gpu::gpu_download_real2 ( type(gpu_real2), intent(in)  gpu_ptr,
real, dimension(:,:), intent(in), target  cpu_ptr 
)

Definition at line 1088 of file simple_gpu.F90.

◆ gpu_download_real3()

subroutine gpu::gpu_download_real3 ( type(gpu_real3), intent(in)  gpu_ptr,
real, dimension(:,:,:), intent(in), target  cpu_ptr 
)

Definition at line 1095 of file simple_gpu.F90.

◆ gpu_download_real4()

subroutine gpu::gpu_download_real4 ( type(gpu_real4), intent(in)  gpu_ptr,
real, dimension(:,:,:,:), intent(in), target  cpu_ptr 
)

Definition at line 1102 of file simple_gpu.F90.

◆ gpu_download_real5()

subroutine gpu::gpu_download_real5 ( type(gpu_real5), intent(in)  gpu_ptr,
real, dimension(:,:,:,:,:), intent(in), target  cpu_ptr 
)

Definition at line 1109 of file simple_gpu.F90.

◆ gpu_download_real6()

subroutine gpu::gpu_download_real6 ( type(gpu_real6), intent(in)  gpu_ptr,
real, dimension(:,:,:,:,:,:), intent(in), target  cpu_ptr 
)

Definition at line 1116 of file simple_gpu.F90.

◆ gpu_sdot()

subroutine gpu::gpu_sdot ( type(gpu_blas), intent(in)  handle,
integer*4  n,
real, target  dx,
integer*4  incx,
real, target  dy,
integer*4  incy,
real, intent(out)  res 
)

Definition at line 1323 of file simple_gpu.F90.

◆ gpu_sdot_64()

subroutine gpu::gpu_sdot_64 ( type(gpu_blas), intent(in)  handle,
integer*8  n,
real, target  dx,
integer*8  incx,
real, target  dy,
integer*8  incy,
real, intent(out)  res 
)

Definition at line 1332 of file simple_gpu.F90.

◆ gpu_set_stream()

subroutine gpu::gpu_set_stream ( type(gpu_blas handle,
type(gpu_stream stream 
)

Definition at line 1240 of file simple_gpu.F90.

◆ gpu_sgeam()

subroutine gpu::gpu_sgeam ( type(gpu_blas), intent(in)  handle,
character, intent(in)  transa,
character, intent(in)  transb,
integer*4, intent(in)  m,
integer*4, intent(in)  n,
real, intent(in)  alpha,
real  a,
integer*4, intent(in)  lda,
real, intent(in)  beta,
real  b,
integer*4, intent(in)  ldb,
real  c,
integer*4, intent(in)  ldc 
)

Definition at line 1368 of file simple_gpu.F90.

◆ gpu_sgeam_64()

subroutine gpu::gpu_sgeam_64 ( type(gpu_blas), intent(in)  handle,
character, intent(in)  transa,
character, intent(in)  transb,
integer*8, intent(in)  m,
integer*8, intent(in)  n,
real, intent(in)  alpha,
real  a,
integer*8, intent(in)  lda,
real, intent(in)  beta,
real  b,
integer*8, intent(in)  ldb,
real  c,
integer*8, intent(in)  ldc 
)

Definition at line 1380 of file simple_gpu.F90.

◆ gpu_sgemm()

subroutine gpu::gpu_sgemm ( type(gpu_blas), intent(in)  handle,
character, intent(in)  transa,
character, intent(in)  transb,
integer*4, intent(in)  m,
integer*4, intent(in)  n,
integer*4, intent(in)  k,
real, intent(in)  alpha,
real  a,
integer*4, intent(in)  lda,
real  b,
integer*4, intent(in)  ldb,
real, intent(in)  beta,
real  c,
integer*4, intent(in)  ldc 
)

Definition at line 1471 of file simple_gpu.F90.

◆ gpu_sgemm_64()

subroutine gpu::gpu_sgemm_64 ( type(gpu_blas), intent(in)  handle,
character, intent(in)  transa,
character, intent(in)  transb,
integer*8, intent(in)  m,
integer*8, intent(in)  n,
integer*8, intent(in)  k,
real, intent(in)  alpha,
real  a,
integer*8, intent(in)  lda,
real  b,
integer*8, intent(in)  ldb,
real, intent(in)  beta,
real  c,
integer*8, intent(in)  ldc 
)

Definition at line 1483 of file simple_gpu.F90.

◆ gpu_sgemv()

subroutine gpu::gpu_sgemv ( type(gpu_blas), intent(in)  handle,
character, intent(in)  transa,
integer*4, intent(in)  m,
integer*4, intent(in)  n,
real, intent(in)  alpha,
real  a,
integer*4, intent(in)  lda,
real  x,
integer*4, intent(in)  incx,
real, intent(in)  beta,
real  y,
integer*4, intent(in)  incy 
)

Definition at line 1420 of file simple_gpu.F90.

◆ gpu_sgemv_64()

subroutine gpu::gpu_sgemv_64 ( type(gpu_blas), intent(in)  handle,
character, intent(in)  transa,
integer*8, intent(in)  m,
integer*8, intent(in)  n,
real, intent(in)  alpha,
real  a,
integer*8, intent(in)  lda,
real  x,
integer*8, intent(in)  incx,
real, intent(in)  beta,
real  y,
integer*8, intent(in)  incy 
)

Definition at line 1432 of file simple_gpu.F90.

◆ gpu_stream_create()

subroutine gpu::gpu_stream_create ( type(gpu_stream stream)

Definition at line 1230 of file simple_gpu.F90.

◆ gpu_stream_destroy()

subroutine gpu::gpu_stream_destroy ( type(gpu_stream stream)

Definition at line 1235 of file simple_gpu.F90.

◆ gpu_upload_double0()

subroutine gpu::gpu_upload_double0 ( double precision, intent(in), target  cpu_ptr,
double precision, intent(in), target  gpu_ptr,
integer, intent(in)  n 
)

Definition at line 918 of file simple_gpu.F90.

◆ gpu_upload_double1()

subroutine gpu::gpu_upload_double1 ( double precision, dimension(*), intent(in), target  cpu_ptr,
type(gpu_double1), intent(in)  gpu_ptr 
)

Definition at line 926 of file simple_gpu.F90.

◆ gpu_upload_double2()

subroutine gpu::gpu_upload_double2 ( double precision, dimension(:,:), intent(in), target  cpu_ptr,
type(gpu_double2), intent(in)  gpu_ptr 
)

Definition at line 933 of file simple_gpu.F90.

◆ gpu_upload_double3()

subroutine gpu::gpu_upload_double3 ( double precision, dimension(:,:,:), intent(in), target  cpu_ptr,
type(gpu_double3), intent(in)  gpu_ptr 
)

Definition at line 940 of file simple_gpu.F90.

◆ gpu_upload_double4()

subroutine gpu::gpu_upload_double4 ( double precision, dimension(:,:,:,:), intent(in), target  cpu_ptr,
type(gpu_double4), intent(in)  gpu_ptr 
)

Definition at line 947 of file simple_gpu.F90.

◆ gpu_upload_double5()

subroutine gpu::gpu_upload_double5 ( double precision, dimension(:,:,:,:,:), intent(in), target  cpu_ptr,
type(gpu_double5), intent(in)  gpu_ptr 
)

Definition at line 954 of file simple_gpu.F90.

◆ gpu_upload_double6()

subroutine gpu::gpu_upload_double6 ( double precision, dimension(:,:,:,:,:,:), intent(in), target  cpu_ptr,
type(gpu_double6), intent(in)  gpu_ptr 
)

Definition at line 961 of file simple_gpu.F90.

◆ gpu_upload_real0()

subroutine gpu::gpu_upload_real0 ( real, intent(in), target  cpu_ptr,
real, intent(in), target  gpu_ptr,
integer, intent(in)  n 
)

Definition at line 969 of file simple_gpu.F90.

◆ gpu_upload_real1()

subroutine gpu::gpu_upload_real1 ( real, dimension(*), intent(in), target  cpu_ptr,
type(gpu_real1), intent(in)  gpu_ptr 
)

Definition at line 977 of file simple_gpu.F90.

◆ gpu_upload_real2()

subroutine gpu::gpu_upload_real2 ( real, dimension(:,:), intent(in), target  cpu_ptr,
type(gpu_real2), intent(in)  gpu_ptr 
)

Definition at line 984 of file simple_gpu.F90.

◆ gpu_upload_real3()

subroutine gpu::gpu_upload_real3 ( real, dimension(:,:,:), intent(in), target  cpu_ptr,
type(gpu_real3), intent(in)  gpu_ptr 
)

Definition at line 991 of file simple_gpu.F90.

◆ gpu_upload_real4()

subroutine gpu::gpu_upload_real4 ( real, dimension(:,:,:,:), intent(in), target  cpu_ptr,
type(gpu_real4), intent(in)  gpu_ptr 
)

Definition at line 998 of file simple_gpu.F90.

◆ gpu_upload_real5()

subroutine gpu::gpu_upload_real5 ( real, dimension(:,:,:,:,:), intent(in), target  cpu_ptr,
type(gpu_real5), intent(in)  gpu_ptr 
)

Definition at line 1005 of file simple_gpu.F90.

◆ gpu_upload_real6()

subroutine gpu::gpu_upload_real6 ( real, dimension(:,:,:,:,:,:), intent(in), target  cpu_ptr,
type(gpu_real6), intent(in)  gpu_ptr 
)

Definition at line 1012 of file simple_gpu.F90.