|
Simple GPU 1.0
Fortran GPU Computing Library with transparent CPU/GPU support
|
Simple GPU - Fortran GPU Computing Library. More...
Data Types | |
| interface | gpu_allocate |
| Allocate GPU/CPU memory for arrays. More... | |
| interface | gpu_allocate_c |
| Allocate GPU/CPU memory (C binding) More... | |
| type | gpu_blas |
| Handle for BLAS operations. More... | |
| interface | gpu_blas_create_c |
| interface | gpu_blas_destroy_c |
| interface | gpu_copy |
| Copy data between GPU arrays. More... | |
| interface | gpu_copy_c |
| interface | gpu_ddot_c |
| interface | gpu_deallocate |
| Free GPU/CPU memory. More... | |
| interface | gpu_deallocate_c |
| Free GPU/CPU memory (C binding) More... | |
| interface | gpu_dgeam_c |
| interface | gpu_dgemm_c |
| interface | gpu_dgemv_c |
| type | gpu_double1 |
| 1-dimensional array of double precision values More... | |
| type | gpu_double2 |
| 2-dimensional array of double precision values More... | |
| type | gpu_double3 |
| 3-dimensional array of double precision values More... | |
| type | gpu_double4 |
| 4-dimensional array of double precision values More... | |
| type | gpu_double5 |
| 5-dimensional array of double precision values More... | |
| type | gpu_double6 |
| 6-dimensional array of double precision values More... | |
| interface | gpu_download |
| Download data from device (GPU) to host (CPU) More... | |
| interface | gpu_download_c |
| interface | gpu_get_memory |
| Query GPU memory usage (C binding) More... | |
| interface | gpu_ndevices |
| Get number of GPU devices (C binding) More... | |
| type | gpu_real1 |
| 1-dimensional array of single precision values More... | |
| type | gpu_real2 |
| 2-dimensional array of single precision values More... | |
| type | gpu_real3 |
| 3-dimensional array of single precision values More... | |
| type | gpu_real4 |
| 4-dimensional array of single precision values More... | |
| type | gpu_real5 |
| 5-dimensional array of single precision values More... | |
| type | gpu_real6 |
| 6-dimensional array of single precision values More... | |
| interface | gpu_sdot_c |
| interface | gpu_set_device |
| Set active GPU device (C binding) More... | |
| interface | gpu_set_stream_c |
| interface | gpu_sgeam_c |
| interface | gpu_sgemm_c |
| interface | gpu_sgemv_c |
| type | gpu_stream |
| Handle for CUDA streams. More... | |
| interface | gpu_stream_create_c |
| interface | gpu_stream_destroy_c |
| interface | gpu_stream_synchronize |
| interface | gpu_synchronize |
| interface | gpu_upload |
| Upload data from host (CPU) to device (GPU) More... | |
| interface | gpu_upload_c |
| Upload data to GPU (C binding) More... | |
Functions/Subroutines | |
| subroutine | gpu_allocate_double1 (ptr, s) |
| Allocate 1D double precision array (32-bit dimensions) | |
| subroutine | gpu_allocate_double2 (ptr, s1, s2) |
| subroutine | gpu_allocate_double3 (ptr, s1, s2, s3) |
| subroutine | gpu_allocate_double4 (ptr, s1, s2, s3, s4) |
| subroutine | gpu_allocate_double5 (ptr, s1, s2, s3, s4, s5) |
| subroutine | gpu_allocate_double6 (ptr, s1, s2, s3, s4, s5, s6) |
| subroutine | gpu_allocate_double1_64 (ptr, s) |
| subroutine | gpu_allocate_double2_64 (ptr, s1, s2) |
| subroutine | gpu_allocate_double3_64 (ptr, s1, s2, s3) |
| subroutine | gpu_allocate_double4_64 (ptr, s1, s2, s3, s4) |
| subroutine | gpu_allocate_double5_64 (ptr, s1, s2, s3, s4, s5) |
| subroutine | gpu_allocate_double6_64 (ptr, s1, s2, s3, s4, s5, s6) |
| subroutine | gpu_allocate_real1 (ptr, s) |
| subroutine | gpu_allocate_real2 (ptr, s1, s2) |
| subroutine | gpu_allocate_real3 (ptr, s1, s2, s3) |
| subroutine | gpu_allocate_real4 (ptr, s1, s2, s3, s4) |
| subroutine | gpu_allocate_real5 (ptr, s1, s2, s3, s4, s5) |
| subroutine | gpu_allocate_real6 (ptr, s1, s2, s3, s4, s5, s6) |
| subroutine | gpu_allocate_real1_64 (ptr, s) |
| subroutine | gpu_allocate_real2_64 (ptr, s1, s2) |
| subroutine | gpu_allocate_real3_64 (ptr, s1, s2, s3) |
| subroutine | gpu_allocate_real4_64 (ptr, s1, s2, s3, s4) |
| subroutine | gpu_allocate_real5_64 (ptr, s1, s2, s3, s4, s5) |
| subroutine | gpu_allocate_real6_64 (ptr, s1, s2, s3, s4, s5, s6) |
| subroutine | gpu_deallocate_double1 (ptr) |
| subroutine | gpu_deallocate_double2 (ptr) |
| subroutine | gpu_deallocate_double3 (ptr) |
| subroutine | gpu_deallocate_double4 (ptr) |
| subroutine | gpu_deallocate_double5 (ptr) |
| subroutine | gpu_deallocate_double6 (ptr) |
| subroutine | gpu_deallocate_real1 (ptr) |
| subroutine | gpu_deallocate_real2 (ptr) |
| subroutine | gpu_deallocate_real3 (ptr) |
| subroutine | gpu_deallocate_real4 (ptr) |
| subroutine | gpu_deallocate_real5 (ptr) |
| subroutine | gpu_deallocate_real6 (ptr) |
| subroutine | gpu_upload_double0 (cpu_ptr, gpu_ptr, n) |
| subroutine | gpu_upload_double1 (cpu_ptr, gpu_ptr) |
| subroutine | gpu_upload_double2 (cpu_ptr, gpu_ptr) |
| subroutine | gpu_upload_double3 (cpu_ptr, gpu_ptr) |
| subroutine | gpu_upload_double4 (cpu_ptr, gpu_ptr) |
| subroutine | gpu_upload_double5 (cpu_ptr, gpu_ptr) |
| subroutine | gpu_upload_double6 (cpu_ptr, gpu_ptr) |
| subroutine | gpu_upload_real0 (cpu_ptr, gpu_ptr, n) |
| subroutine | gpu_upload_real1 (cpu_ptr, gpu_ptr) |
| subroutine | gpu_upload_real2 (cpu_ptr, gpu_ptr) |
| subroutine | gpu_upload_real3 (cpu_ptr, gpu_ptr) |
| subroutine | gpu_upload_real4 (cpu_ptr, gpu_ptr) |
| subroutine | gpu_upload_real5 (cpu_ptr, gpu_ptr) |
| subroutine | gpu_upload_real6 (cpu_ptr, gpu_ptr) |
| subroutine | gpu_download_double0 (gpu_ptr, cpu_ptr, n) |
| subroutine | gpu_download_double1 (gpu_ptr, cpu_ptr) |
| subroutine | gpu_download_double2 (gpu_ptr, cpu_ptr) |
| subroutine | gpu_download_double3 (gpu_ptr, cpu_ptr) |
| subroutine | gpu_download_double4 (gpu_ptr, cpu_ptr) |
| subroutine | gpu_download_double5 (gpu_ptr, cpu_ptr) |
| subroutine | gpu_download_double6 (gpu_ptr, cpu_ptr) |
| subroutine | gpu_download_real0 (gpu_ptr, cpu_ptr, n) |
| subroutine | gpu_download_real1 (gpu_ptr, cpu_ptr) |
| subroutine | gpu_download_real2 (gpu_ptr, cpu_ptr) |
| subroutine | gpu_download_real3 (gpu_ptr, cpu_ptr) |
| subroutine | gpu_download_real4 (gpu_ptr, cpu_ptr) |
| subroutine | gpu_download_real5 (gpu_ptr, cpu_ptr) |
| subroutine | gpu_download_real6 (gpu_ptr, cpu_ptr) |
| subroutine | gpu_copy_double0 (gpu_ptr_src, gpu_ptr_dest, n) |
| subroutine | gpu_copy_double1 (gpu_ptr_src, gpu_ptr_dest) |
| subroutine | gpu_copy_double2 (gpu_ptr_src, gpu_ptr_dest) |
| subroutine | gpu_copy_double3 (gpu_ptr_src, gpu_ptr_dest) |
| subroutine | gpu_copy_double4 (gpu_ptr_src, gpu_ptr_dest) |
| subroutine | gpu_copy_double5 (gpu_ptr_src, gpu_ptr_dest) |
| subroutine | gpu_copy_double6 (gpu_ptr_src, gpu_ptr_dest) |
| subroutine | gpu_copy_real0 (gpu_ptr_src, gpu_ptr_dest, n) |
| subroutine | gpu_copy_real1 (gpu_ptr_src, gpu_ptr_dest) |
| subroutine | gpu_copy_real2 (gpu_ptr_src, gpu_ptr_dest) |
| subroutine | gpu_copy_real3 (gpu_ptr_src, gpu_ptr_dest) |
| subroutine | gpu_copy_real4 (gpu_ptr_src, gpu_ptr_dest) |
| subroutine | gpu_copy_real5 (gpu_ptr_src, gpu_ptr_dest) |
| subroutine | gpu_copy_real6 (gpu_ptr_src, gpu_ptr_dest) |
| subroutine | gpu_stream_create (stream) |
| subroutine | gpu_stream_destroy (stream) |
| subroutine | gpu_set_stream (handle, stream) |
| subroutine | gpu_blas_create (handle) |
| Create a BLAS handle. | |
| subroutine | gpu_blas_destroy (handle) |
| Destroy a BLAS handle. | |
| subroutine | gpu_ddot (handle, n, dx, incx, dy, incy, res) |
| Double precision dot product (32-bit dimensions) | |
| subroutine | gpu_ddot_64 (handle, n, dx, incx, dy, incy, res) |
| Double precision dot product (64-bit dimensions) | |
| subroutine | gpu_sdot (handle, n, dx, incx, dy, incy, res) |
| subroutine | gpu_sdot_64 (handle, n, dx, incx, dy, incy, res) |
| subroutine | gpu_dgeam (handle, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc) |
| subroutine | gpu_dgeam_64 (handle, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc) |
| subroutine | gpu_sgeam (handle, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc) |
| subroutine | gpu_sgeam_64 (handle, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc) |
| subroutine | gpu_dgemv (handle, transa, m, n, alpha, a, lda, x, incx, beta, y, incy) |
| subroutine | gpu_dgemv_64 (handle, transa, m, n, alpha, a, lda, x, incx, beta, y, incy) |
| subroutine | gpu_sgemv (handle, transa, m, n, alpha, a, lda, x, incx, beta, y, incy) |
| subroutine | gpu_sgemv_64 (handle, transa, m, n, alpha, a, lda, x, incx, beta, y, incy) |
| subroutine | gpu_dgemm (handle, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc) |
| subroutine | gpu_dgemm_64 (handle, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc) |
| subroutine | gpu_sgemm (handle, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc) |
| subroutine | gpu_sgemm_64 (handle, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc) |
Simple GPU - Fortran GPU Computing Library.
This module provides a Fortran interface for GPU computing with transparent support for both CPU (using standard BLAS) and NVIDIA GPU (using cuBLAS).
The library provides:
| subroutine gpu::gpu_allocate_double1 | ( | type(gpu_double1), intent(inout) | ptr, |
| integer, intent(in) | s | ||
| ) |
Allocate 1D double precision array (32-bit dimensions)
Definition at line 530 of file simple_gpu.F90.
| subroutine gpu::gpu_allocate_double1_64 | ( | type(gpu_double1), intent(inout) | ptr, |
| integer*8, intent(in) | s | ||
| ) |
Definition at line 624 of file simple_gpu.F90.
| subroutine gpu::gpu_allocate_double2 | ( | type(gpu_double2), intent(inout) | ptr, |
| integer, intent(in) | s1, | ||
| integer, intent(in) | s2 | ||
| ) |
Definition at line 543 of file simple_gpu.F90.
| subroutine gpu::gpu_allocate_double2_64 | ( | type(gpu_double2), intent(inout) | ptr, |
| integer*8, intent(in) | s1, | ||
| integer*8, intent(in) | s2 | ||
| ) |
Definition at line 633 of file simple_gpu.F90.
| subroutine gpu::gpu_allocate_double3 | ( | type(gpu_double3), intent(inout) | ptr, |
| integer, intent(in) | s1, | ||
| integer, intent(in) | s2, | ||
| integer, intent(in) | s3 | ||
| ) |
Definition at line 557 of file simple_gpu.F90.
| subroutine gpu::gpu_allocate_double3_64 | ( | type(gpu_double3), intent(inout) | ptr, |
| integer*8, intent(in) | s1, | ||
| integer*8, intent(in) | s2, | ||
| integer*8, intent(in) | s3 | ||
| ) |
Definition at line 642 of file simple_gpu.F90.
| subroutine gpu::gpu_allocate_double4 | ( | type(gpu_double4), intent(inout) | ptr, |
| integer, intent(in) | s1, | ||
| integer, intent(in) | s2, | ||
| integer, intent(in) | s3, | ||
| integer, intent(in) | s4 | ||
| ) |
Definition at line 572 of file simple_gpu.F90.
| subroutine gpu::gpu_allocate_double4_64 | ( | type(gpu_double4), intent(inout) | ptr, |
| integer*8, intent(in) | s1, | ||
| integer*8, intent(in) | s2, | ||
| integer*8, intent(in) | s3, | ||
| integer*8, intent(in) | s4 | ||
| ) |
Definition at line 651 of file simple_gpu.F90.
| subroutine gpu::gpu_allocate_double5 | ( | type(gpu_double5), intent(inout) | ptr, |
| integer, intent(in) | s1, | ||
| integer, intent(in) | s2, | ||
| integer, intent(in) | s3, | ||
| integer, intent(in) | s4, | ||
| integer, intent(in) | s5 | ||
| ) |
Definition at line 588 of file simple_gpu.F90.
| subroutine gpu::gpu_allocate_double5_64 | ( | type(gpu_double5), intent(inout) | ptr, |
| integer*8, intent(in) | s1, | ||
| integer*8, intent(in) | s2, | ||
| integer*8, intent(in) | s3, | ||
| integer*8, intent(in) | s4, | ||
| integer*8, intent(in) | s5 | ||
| ) |
Definition at line 660 of file simple_gpu.F90.
| subroutine gpu::gpu_allocate_double6 | ( | type(gpu_double6), intent(inout) | ptr, |
| integer, intent(in) | s1, | ||
| integer, intent(in) | s2, | ||
| integer, intent(in) | s3, | ||
| integer, intent(in) | s4, | ||
| integer, intent(in) | s5, | ||
| integer, intent(in) | s6 | ||
| ) |
Definition at line 605 of file simple_gpu.F90.
| subroutine gpu::gpu_allocate_double6_64 | ( | type(gpu_double6), intent(inout) | ptr, |
| integer*8, intent(in) | s1, | ||
| integer*8, intent(in) | s2, | ||
| integer*8, intent(in) | s3, | ||
| integer*8, intent(in) | s4, | ||
| integer*8, intent(in) | s5, | ||
| integer*8, intent(in) | s6 | ||
| ) |
Definition at line 669 of file simple_gpu.F90.
| subroutine gpu::gpu_allocate_real1 | ( | type(gpu_real1), intent(inout) | ptr, |
| integer, intent(in) | s | ||
| ) |
Definition at line 678 of file simple_gpu.F90.
| subroutine gpu::gpu_allocate_real1_64 | ( | type(gpu_real1), intent(inout) | ptr, |
| integer*8, intent(in) | s | ||
| ) |
Definition at line 772 of file simple_gpu.F90.
| subroutine gpu::gpu_allocate_real2 | ( | type(gpu_real2), intent(inout) | ptr, |
| integer, intent(in) | s1, | ||
| integer, intent(in) | s2 | ||
| ) |
Definition at line 691 of file simple_gpu.F90.
| subroutine gpu::gpu_allocate_real2_64 | ( | type(gpu_real2), intent(inout) | ptr, |
| integer*8, intent(in) | s1, | ||
| integer*8, intent(in) | s2 | ||
| ) |
Definition at line 781 of file simple_gpu.F90.
| subroutine gpu::gpu_allocate_real3 | ( | type(gpu_real3), intent(inout) | ptr, |
| integer, intent(in) | s1, | ||
| integer, intent(in) | s2, | ||
| integer, intent(in) | s3 | ||
| ) |
Definition at line 705 of file simple_gpu.F90.
| subroutine gpu::gpu_allocate_real3_64 | ( | type(gpu_real3), intent(inout) | ptr, |
| integer*8, intent(in) | s1, | ||
| integer*8, intent(in) | s2, | ||
| integer*8, intent(in) | s3 | ||
| ) |
Definition at line 790 of file simple_gpu.F90.
| subroutine gpu::gpu_allocate_real4 | ( | type(gpu_real4), intent(inout) | ptr, |
| integer, intent(in) | s1, | ||
| integer, intent(in) | s2, | ||
| integer, intent(in) | s3, | ||
| integer, intent(in) | s4 | ||
| ) |
Definition at line 720 of file simple_gpu.F90.
| subroutine gpu::gpu_allocate_real4_64 | ( | type(gpu_real4), intent(inout) | ptr, |
| integer*8, intent(in) | s1, | ||
| integer*8, intent(in) | s2, | ||
| integer*8, intent(in) | s3, | ||
| integer*8, intent(in) | s4 | ||
| ) |
Definition at line 799 of file simple_gpu.F90.
| subroutine gpu::gpu_allocate_real5 | ( | type(gpu_real5), intent(inout) | ptr, |
| integer, intent(in) | s1, | ||
| integer, intent(in) | s2, | ||
| integer, intent(in) | s3, | ||
| integer, intent(in) | s4, | ||
| integer, intent(in) | s5 | ||
| ) |
Definition at line 736 of file simple_gpu.F90.
| subroutine gpu::gpu_allocate_real5_64 | ( | type(gpu_real5), intent(inout) | ptr, |
| integer*8, intent(in) | s1, | ||
| integer*8, intent(in) | s2, | ||
| integer*8, intent(in) | s3, | ||
| integer*8, intent(in) | s4, | ||
| integer*8, intent(in) | s5 | ||
| ) |
Definition at line 808 of file simple_gpu.F90.
| subroutine gpu::gpu_allocate_real6 | ( | type(gpu_real6), intent(inout) | ptr, |
| integer, intent(in) | s1, | ||
| integer, intent(in) | s2, | ||
| integer, intent(in) | s3, | ||
| integer, intent(in) | s4, | ||
| integer, intent(in) | s5, | ||
| integer, intent(in) | s6 | ||
| ) |
Definition at line 753 of file simple_gpu.F90.
| subroutine gpu::gpu_allocate_real6_64 | ( | type(gpu_real6), intent(inout) | ptr, |
| integer*8, intent(in) | s1, | ||
| integer*8, intent(in) | s2, | ||
| integer*8, intent(in) | s3, | ||
| integer*8, intent(in) | s4, | ||
| integer*8, intent(in) | s5, | ||
| integer*8, intent(in) | s6 | ||
| ) |
Definition at line 817 of file simple_gpu.F90.
| subroutine gpu::gpu_blas_create | ( | type(gpu_blas) | handle | ) |
Create a BLAS handle.
Initializes a BLAS handle for performing BLAS operations. Must be called before any BLAS functions. The handle should be destroyed with gpu_blas_destroy when no longer needed.
| [out] | handle | BLAS handle to create |
Definition at line 1258 of file simple_gpu.F90.
| subroutine gpu::gpu_blas_destroy | ( | type(gpu_blas) | handle | ) |
Destroy a BLAS handle.
Frees resources associated with a BLAS handle. The handle should not be used after calling this.
| [in,out] | handle | BLAS handle to destroy |
Definition at line 1269 of file simple_gpu.F90.
| subroutine gpu::gpu_copy_double0 | ( | double precision, intent(in), target | gpu_ptr_src, |
| double precision, intent(in), target | gpu_ptr_dest, | ||
| integer, intent(in) | n | ||
| ) |
Definition at line 1126 of file simple_gpu.F90.
| subroutine gpu::gpu_copy_double1 | ( | type(gpu_double1), intent(in) | gpu_ptr_src, |
| type(gpu_double1), intent(in) | gpu_ptr_dest | ||
| ) |
Definition at line 1134 of file simple_gpu.F90.
| subroutine gpu::gpu_copy_double2 | ( | type(gpu_double2), intent(in) | gpu_ptr_src, |
| type(gpu_double2), intent(in) | gpu_ptr_dest | ||
| ) |
Definition at line 1141 of file simple_gpu.F90.
| subroutine gpu::gpu_copy_double3 | ( | type(gpu_double3), intent(in) | gpu_ptr_src, |
| type(gpu_double3), intent(in) | gpu_ptr_dest | ||
| ) |
Definition at line 1148 of file simple_gpu.F90.
| subroutine gpu::gpu_copy_double4 | ( | type(gpu_double4), intent(in) | gpu_ptr_src, |
| type(gpu_double4), intent(in) | gpu_ptr_dest | ||
| ) |
Definition at line 1155 of file simple_gpu.F90.
| subroutine gpu::gpu_copy_double5 | ( | type(gpu_double5), intent(in) | gpu_ptr_src, |
| type(gpu_double5), intent(in) | gpu_ptr_dest | ||
| ) |
Definition at line 1162 of file simple_gpu.F90.
| subroutine gpu::gpu_copy_double6 | ( | type(gpu_double6), intent(in) | gpu_ptr_src, |
| type(gpu_double6), intent(in) | gpu_ptr_dest | ||
| ) |
Definition at line 1169 of file simple_gpu.F90.
| subroutine gpu::gpu_copy_real0 | ( | real, intent(in), target | gpu_ptr_src, |
| real, intent(in), target | gpu_ptr_dest, | ||
| integer, intent(in) | n | ||
| ) |
Definition at line 1176 of file simple_gpu.F90.
| subroutine gpu::gpu_copy_real1 | ( | type(gpu_real1), intent(in) | gpu_ptr_src, |
| type(gpu_real1), intent(in) | gpu_ptr_dest | ||
| ) |
Definition at line 1184 of file simple_gpu.F90.
| subroutine gpu::gpu_copy_real2 | ( | type(gpu_real2), intent(in) | gpu_ptr_src, |
| type(gpu_real2), intent(in) | gpu_ptr_dest | ||
| ) |
Definition at line 1191 of file simple_gpu.F90.
| subroutine gpu::gpu_copy_real3 | ( | type(gpu_real3), intent(in) | gpu_ptr_src, |
| type(gpu_real3), intent(in) | gpu_ptr_dest | ||
| ) |
Definition at line 1198 of file simple_gpu.F90.
| subroutine gpu::gpu_copy_real4 | ( | type(gpu_real4), intent(in) | gpu_ptr_src, |
| type(gpu_real4), intent(in) | gpu_ptr_dest | ||
| ) |
Definition at line 1205 of file simple_gpu.F90.
| subroutine gpu::gpu_copy_real5 | ( | type(gpu_real5), intent(in) | gpu_ptr_src, |
| type(gpu_real5), intent(in) | gpu_ptr_dest | ||
| ) |
Definition at line 1212 of file simple_gpu.F90.
| subroutine gpu::gpu_copy_real6 | ( | type(gpu_real6), intent(in) | gpu_ptr_src, |
| type(gpu_real6), intent(in) | gpu_ptr_dest | ||
| ) |
Definition at line 1219 of file simple_gpu.F90.
| subroutine gpu::gpu_ddot | ( | type(gpu_blas), intent(in) | handle, |
| integer*4 | n, | ||
| double precision, target | dx, | ||
| integer*4 | incx, | ||
| double precision, target | dy, | ||
| integer*4 | incy, | ||
| double precision, intent(out) | res | ||
| ) |
Double precision dot product (32-bit dimensions)
Computes the dot product of two vectors: result = x^T * y
| [in] | handle | BLAS handle |
| [in] | n | Number of elements |
| [in] | dx | First element of vector x |
| [in] | incx | Stride for vector x |
| [in] | dy | First element of vector y |
| [in] | incy | Stride for vector y |
| [out] | res | Resulting dot product |
Definition at line 1293 of file simple_gpu.F90.
| subroutine gpu::gpu_ddot_64 | ( | type(gpu_blas), intent(in) | handle, |
| integer*8 | n, | ||
| double precision, target | dx, | ||
| integer*8 | incx, | ||
| double precision, target | dy, | ||
| integer*8 | incy, | ||
| double precision, intent(out) | res | ||
| ) |
Double precision dot product (64-bit dimensions)
Computes the dot product of two vectors: result = x^T * y This variant accepts 64-bit integers for dimensions.
| [in] | handle | BLAS handle |
| [in] | n | Number of elements (64-bit) |
| [in] | dx | First element of vector x |
| [in] | incx | Stride for vector x (64-bit) |
| [in] | dy | First element of vector y |
| [in] | incy | Stride for vector y (64-bit) |
| [out] | res | Resulting dot product |
Definition at line 1314 of file simple_gpu.F90.
| subroutine gpu::gpu_deallocate_double1 | ( | type(gpu_double1), intent(inout) | ptr | ) |
Definition at line 829 of file simple_gpu.F90.
| subroutine gpu::gpu_deallocate_double2 | ( | type(gpu_double2), intent(inout) | ptr | ) |
Definition at line 836 of file simple_gpu.F90.
| subroutine gpu::gpu_deallocate_double3 | ( | type(gpu_double3), intent(inout) | ptr | ) |
Definition at line 843 of file simple_gpu.F90.
| subroutine gpu::gpu_deallocate_double4 | ( | type(gpu_double4), intent(inout) | ptr | ) |
Definition at line 850 of file simple_gpu.F90.
| subroutine gpu::gpu_deallocate_double5 | ( | type(gpu_double5), intent(inout) | ptr | ) |
Definition at line 857 of file simple_gpu.F90.
| subroutine gpu::gpu_deallocate_double6 | ( | type(gpu_double6), intent(inout) | ptr | ) |
Definition at line 864 of file simple_gpu.F90.
| subroutine gpu::gpu_deallocate_real1 | ( | type(gpu_real1), intent(inout) | ptr | ) |
Definition at line 872 of file simple_gpu.F90.
| subroutine gpu::gpu_deallocate_real2 | ( | type(gpu_real2), intent(inout) | ptr | ) |
Definition at line 879 of file simple_gpu.F90.
| subroutine gpu::gpu_deallocate_real3 | ( | type(gpu_real3), intent(inout) | ptr | ) |
Definition at line 886 of file simple_gpu.F90.
| subroutine gpu::gpu_deallocate_real4 | ( | type(gpu_real4), intent(inout) | ptr | ) |
Definition at line 893 of file simple_gpu.F90.
| subroutine gpu::gpu_deallocate_real5 | ( | type(gpu_real5), intent(inout) | ptr | ) |
Definition at line 900 of file simple_gpu.F90.
| subroutine gpu::gpu_deallocate_real6 | ( | type(gpu_real6), intent(inout) | ptr | ) |
Definition at line 907 of file simple_gpu.F90.
| subroutine gpu::gpu_dgeam | ( | type(gpu_blas), intent(in) | handle, |
| character, intent(in) | transa, | ||
| character, intent(in) | transb, | ||
| integer*4, intent(in) | m, | ||
| integer*4, intent(in) | n, | ||
| double precision, intent(in) | alpha, | ||
| double precision | a, | ||
| integer*4, intent(in) | lda, | ||
| double precision, intent(in) | beta, | ||
| double precision | b, | ||
| integer*4, intent(in) | ldb, | ||
| double precision | c, | ||
| integer*4, intent(in) | ldc | ||
| ) |
Definition at line 1344 of file simple_gpu.F90.
| subroutine gpu::gpu_dgeam_64 | ( | type(gpu_blas), intent(in) | handle, |
| character, intent(in) | transa, | ||
| character, intent(in) | transb, | ||
| integer*8, intent(in) | m, | ||
| integer*8, intent(in) | n, | ||
| double precision, intent(in) | alpha, | ||
| double precision | a, | ||
| integer*8, intent(in) | lda, | ||
| double precision, intent(in) | beta, | ||
| double precision | b, | ||
| integer*8, intent(in) | ldb, | ||
| double precision | c, | ||
| integer*8, intent(in) | ldc | ||
| ) |
Definition at line 1356 of file simple_gpu.F90.
| subroutine gpu::gpu_dgemm | ( | type(gpu_blas), intent(in) | handle, |
| character, intent(in) | transa, | ||
| character, intent(in) | transb, | ||
| integer*4, intent(in) | m, | ||
| integer*4, intent(in) | n, | ||
| integer*4, intent(in) | k, | ||
| double precision, intent(in) | alpha, | ||
| double precision | a, | ||
| integer*4, intent(in) | lda, | ||
| double precision | b, | ||
| integer*4, intent(in) | ldb, | ||
| double precision, intent(in) | beta, | ||
| double precision | c, | ||
| integer*4, intent(in) | ldc | ||
| ) |
Definition at line 1448 of file simple_gpu.F90.
| subroutine gpu::gpu_dgemm_64 | ( | type(gpu_blas), intent(in) | handle, |
| character, intent(in) | transa, | ||
| character, intent(in) | transb, | ||
| integer*8, intent(in) | m, | ||
| integer*8, intent(in) | n, | ||
| integer*8, intent(in) | k, | ||
| double precision, intent(in) | alpha, | ||
| double precision | a, | ||
| integer*8, intent(in) | lda, | ||
| double precision | b, | ||
| integer*8, intent(in) | ldb, | ||
| double precision, intent(in) | beta, | ||
| double precision | c, | ||
| integer*8, intent(in) | ldc | ||
| ) |
Definition at line 1460 of file simple_gpu.F90.
| subroutine gpu::gpu_dgemv | ( | type(gpu_blas), intent(in) | handle, |
| character, intent(in) | transa, | ||
| integer*4, intent(in) | m, | ||
| integer*4, intent(in) | n, | ||
| double precision, intent(in) | alpha, | ||
| double precision | a, | ||
| integer*4, intent(in) | lda, | ||
| double precision | x, | ||
| integer*4, intent(in) | incx, | ||
| double precision, intent(in) | beta, | ||
| double precision | y, | ||
| integer*4, intent(in) | incy | ||
| ) |
Definition at line 1395 of file simple_gpu.F90.
| subroutine gpu::gpu_dgemv_64 | ( | type(gpu_blas), intent(in) | handle, |
| character, intent(in) | transa, | ||
| integer*8, intent(in) | m, | ||
| integer*8, intent(in) | n, | ||
| double precision, intent(in) | alpha, | ||
| double precision | a, | ||
| integer*8, intent(in) | lda, | ||
| double precision | x, | ||
| integer*8, intent(in) | incx, | ||
| double precision, intent(in) | beta, | ||
| double precision | y, | ||
| integer*8, intent(in) | incy | ||
| ) |
Definition at line 1407 of file simple_gpu.F90.
| subroutine gpu::gpu_download_double0 | ( | double precision, intent(in), target | gpu_ptr, |
| double precision, intent(in), target | cpu_ptr, | ||
| integer, intent(in) | n | ||
| ) |
Definition at line 1023 of file simple_gpu.F90.
| subroutine gpu::gpu_download_double1 | ( | type(gpu_double1), intent(in) | gpu_ptr, |
| double precision, dimension(:), intent(in), target | cpu_ptr | ||
| ) |
Definition at line 1031 of file simple_gpu.F90.
| subroutine gpu::gpu_download_double2 | ( | type(gpu_double2), intent(in) | gpu_ptr, |
| double precision, dimension(:,:), intent(in), target | cpu_ptr | ||
| ) |
Definition at line 1038 of file simple_gpu.F90.
| subroutine gpu::gpu_download_double3 | ( | type(gpu_double3), intent(in) | gpu_ptr, |
| double precision, dimension(:,:,:), intent(in), target | cpu_ptr | ||
| ) |
Definition at line 1045 of file simple_gpu.F90.
| subroutine gpu::gpu_download_double4 | ( | type(gpu_double4), intent(in) | gpu_ptr, |
| double precision, dimension(:,:,:,:), intent(in), target | cpu_ptr | ||
| ) |
Definition at line 1052 of file simple_gpu.F90.
| subroutine gpu::gpu_download_double5 | ( | type(gpu_double5), intent(in) | gpu_ptr, |
| double precision, dimension(:,:,:,:,:), intent(in), target | cpu_ptr | ||
| ) |
Definition at line 1059 of file simple_gpu.F90.
| subroutine gpu::gpu_download_double6 | ( | type(gpu_double6), intent(in) | gpu_ptr, |
| double precision, dimension(:,:,:,:,:,:), intent(in), target | cpu_ptr | ||
| ) |
Definition at line 1066 of file simple_gpu.F90.
| subroutine gpu::gpu_download_real0 | ( | real, intent(in), target | gpu_ptr, |
| real, intent(in), target | cpu_ptr, | ||
| integer, intent(in) | n | ||
| ) |
Definition at line 1073 of file simple_gpu.F90.
| subroutine gpu::gpu_download_real1 | ( | type(gpu_real1), intent(in) | gpu_ptr, |
| real, dimension(:), intent(in), target | cpu_ptr | ||
| ) |
Definition at line 1081 of file simple_gpu.F90.
| subroutine gpu::gpu_download_real2 | ( | type(gpu_real2), intent(in) | gpu_ptr, |
| real, dimension(:,:), intent(in), target | cpu_ptr | ||
| ) |
Definition at line 1088 of file simple_gpu.F90.
| subroutine gpu::gpu_download_real3 | ( | type(gpu_real3), intent(in) | gpu_ptr, |
| real, dimension(:,:,:), intent(in), target | cpu_ptr | ||
| ) |
Definition at line 1095 of file simple_gpu.F90.
| subroutine gpu::gpu_download_real4 | ( | type(gpu_real4), intent(in) | gpu_ptr, |
| real, dimension(:,:,:,:), intent(in), target | cpu_ptr | ||
| ) |
Definition at line 1102 of file simple_gpu.F90.
| subroutine gpu::gpu_download_real5 | ( | type(gpu_real5), intent(in) | gpu_ptr, |
| real, dimension(:,:,:,:,:), intent(in), target | cpu_ptr | ||
| ) |
Definition at line 1109 of file simple_gpu.F90.
| subroutine gpu::gpu_download_real6 | ( | type(gpu_real6), intent(in) | gpu_ptr, |
| real, dimension(:,:,:,:,:,:), intent(in), target | cpu_ptr | ||
| ) |
Definition at line 1116 of file simple_gpu.F90.
| subroutine gpu::gpu_sdot | ( | type(gpu_blas), intent(in) | handle, |
| integer*4 | n, | ||
| real, target | dx, | ||
| integer*4 | incx, | ||
| real, target | dy, | ||
| integer*4 | incy, | ||
| real, intent(out) | res | ||
| ) |
Definition at line 1323 of file simple_gpu.F90.
| subroutine gpu::gpu_sdot_64 | ( | type(gpu_blas), intent(in) | handle, |
| integer*8 | n, | ||
| real, target | dx, | ||
| integer*8 | incx, | ||
| real, target | dy, | ||
| integer*8 | incy, | ||
| real, intent(out) | res | ||
| ) |
Definition at line 1332 of file simple_gpu.F90.
| subroutine gpu::gpu_set_stream | ( | type(gpu_blas) | handle, |
| type(gpu_stream) | stream | ||
| ) |
Definition at line 1240 of file simple_gpu.F90.
| subroutine gpu::gpu_sgeam | ( | type(gpu_blas), intent(in) | handle, |
| character, intent(in) | transa, | ||
| character, intent(in) | transb, | ||
| integer*4, intent(in) | m, | ||
| integer*4, intent(in) | n, | ||
| real, intent(in) | alpha, | ||
| real | a, | ||
| integer*4, intent(in) | lda, | ||
| real, intent(in) | beta, | ||
| real | b, | ||
| integer*4, intent(in) | ldb, | ||
| real | c, | ||
| integer*4, intent(in) | ldc | ||
| ) |
Definition at line 1368 of file simple_gpu.F90.
| subroutine gpu::gpu_sgeam_64 | ( | type(gpu_blas), intent(in) | handle, |
| character, intent(in) | transa, | ||
| character, intent(in) | transb, | ||
| integer*8, intent(in) | m, | ||
| integer*8, intent(in) | n, | ||
| real, intent(in) | alpha, | ||
| real | a, | ||
| integer*8, intent(in) | lda, | ||
| real, intent(in) | beta, | ||
| real | b, | ||
| integer*8, intent(in) | ldb, | ||
| real | c, | ||
| integer*8, intent(in) | ldc | ||
| ) |
Definition at line 1380 of file simple_gpu.F90.
| subroutine gpu::gpu_sgemm | ( | type(gpu_blas), intent(in) | handle, |
| character, intent(in) | transa, | ||
| character, intent(in) | transb, | ||
| integer*4, intent(in) | m, | ||
| integer*4, intent(in) | n, | ||
| integer*4, intent(in) | k, | ||
| real, intent(in) | alpha, | ||
| real | a, | ||
| integer*4, intent(in) | lda, | ||
| real | b, | ||
| integer*4, intent(in) | ldb, | ||
| real, intent(in) | beta, | ||
| real | c, | ||
| integer*4, intent(in) | ldc | ||
| ) |
Definition at line 1471 of file simple_gpu.F90.
| subroutine gpu::gpu_sgemm_64 | ( | type(gpu_blas), intent(in) | handle, |
| character, intent(in) | transa, | ||
| character, intent(in) | transb, | ||
| integer*8, intent(in) | m, | ||
| integer*8, intent(in) | n, | ||
| integer*8, intent(in) | k, | ||
| real, intent(in) | alpha, | ||
| real | a, | ||
| integer*8, intent(in) | lda, | ||
| real | b, | ||
| integer*8, intent(in) | ldb, | ||
| real, intent(in) | beta, | ||
| real | c, | ||
| integer*8, intent(in) | ldc | ||
| ) |
Definition at line 1483 of file simple_gpu.F90.
| subroutine gpu::gpu_sgemv | ( | type(gpu_blas), intent(in) | handle, |
| character, intent(in) | transa, | ||
| integer*4, intent(in) | m, | ||
| integer*4, intent(in) | n, | ||
| real, intent(in) | alpha, | ||
| real | a, | ||
| integer*4, intent(in) | lda, | ||
| real | x, | ||
| integer*4, intent(in) | incx, | ||
| real, intent(in) | beta, | ||
| real | y, | ||
| integer*4, intent(in) | incy | ||
| ) |
Definition at line 1420 of file simple_gpu.F90.
| subroutine gpu::gpu_sgemv_64 | ( | type(gpu_blas), intent(in) | handle, |
| character, intent(in) | transa, | ||
| integer*8, intent(in) | m, | ||
| integer*8, intent(in) | n, | ||
| real, intent(in) | alpha, | ||
| real | a, | ||
| integer*8, intent(in) | lda, | ||
| real | x, | ||
| integer*8, intent(in) | incx, | ||
| real, intent(in) | beta, | ||
| real | y, | ||
| integer*8, intent(in) | incy | ||
| ) |
Definition at line 1432 of file simple_gpu.F90.
| subroutine gpu::gpu_stream_create | ( | type(gpu_stream) | stream | ) |
Definition at line 1230 of file simple_gpu.F90.
| subroutine gpu::gpu_stream_destroy | ( | type(gpu_stream) | stream | ) |
Definition at line 1235 of file simple_gpu.F90.
| subroutine gpu::gpu_upload_double0 | ( | double precision, intent(in), target | cpu_ptr, |
| double precision, intent(in), target | gpu_ptr, | ||
| integer, intent(in) | n | ||
| ) |
Definition at line 918 of file simple_gpu.F90.
| subroutine gpu::gpu_upload_double1 | ( | double precision, dimension(*), intent(in), target | cpu_ptr, |
| type(gpu_double1), intent(in) | gpu_ptr | ||
| ) |
Definition at line 926 of file simple_gpu.F90.
| subroutine gpu::gpu_upload_double2 | ( | double precision, dimension(:,:), intent(in), target | cpu_ptr, |
| type(gpu_double2), intent(in) | gpu_ptr | ||
| ) |
Definition at line 933 of file simple_gpu.F90.
| subroutine gpu::gpu_upload_double3 | ( | double precision, dimension(:,:,:), intent(in), target | cpu_ptr, |
| type(gpu_double3), intent(in) | gpu_ptr | ||
| ) |
Definition at line 940 of file simple_gpu.F90.
| subroutine gpu::gpu_upload_double4 | ( | double precision, dimension(:,:,:,:), intent(in), target | cpu_ptr, |
| type(gpu_double4), intent(in) | gpu_ptr | ||
| ) |
Definition at line 947 of file simple_gpu.F90.
| subroutine gpu::gpu_upload_double5 | ( | double precision, dimension(:,:,:,:,:), intent(in), target | cpu_ptr, |
| type(gpu_double5), intent(in) | gpu_ptr | ||
| ) |
Definition at line 954 of file simple_gpu.F90.
| subroutine gpu::gpu_upload_double6 | ( | double precision, dimension(:,:,:,:,:,:), intent(in), target | cpu_ptr, |
| type(gpu_double6), intent(in) | gpu_ptr | ||
| ) |
Definition at line 961 of file simple_gpu.F90.
| subroutine gpu::gpu_upload_real0 | ( | real, intent(in), target | cpu_ptr, |
| real, intent(in), target | gpu_ptr, | ||
| integer, intent(in) | n | ||
| ) |
Definition at line 969 of file simple_gpu.F90.
| subroutine gpu::gpu_upload_real1 | ( | real, dimension(*), intent(in), target | cpu_ptr, |
| type(gpu_real1), intent(in) | gpu_ptr | ||
| ) |
Definition at line 977 of file simple_gpu.F90.
| subroutine gpu::gpu_upload_real2 | ( | real, dimension(:,:), intent(in), target | cpu_ptr, |
| type(gpu_real2), intent(in) | gpu_ptr | ||
| ) |
Definition at line 984 of file simple_gpu.F90.
| subroutine gpu::gpu_upload_real3 | ( | real, dimension(:,:,:), intent(in), target | cpu_ptr, |
| type(gpu_real3), intent(in) | gpu_ptr | ||
| ) |
Definition at line 991 of file simple_gpu.F90.
| subroutine gpu::gpu_upload_real4 | ( | real, dimension(:,:,:,:), intent(in), target | cpu_ptr, |
| type(gpu_real4), intent(in) | gpu_ptr | ||
| ) |
Definition at line 998 of file simple_gpu.F90.
| subroutine gpu::gpu_upload_real5 | ( | real, dimension(:,:,:,:,:), intent(in), target | cpu_ptr, |
| type(gpu_real5), intent(in) | gpu_ptr | ||
| ) |
Definition at line 1005 of file simple_gpu.F90.
| subroutine gpu::gpu_upload_real6 | ( | real, dimension(:,:,:,:,:,:), intent(in), target | cpu_ptr, |
| type(gpu_real6), intent(in) | gpu_ptr | ||
| ) |
Definition at line 1012 of file simple_gpu.F90.