! -*- mode: F90 -*- module my_kernels use cudafor implicit none real,dimension(:),allocatable,device :: d_array real,dimension(:),allocatable :: h_array contains attributes(global) subroutine mykernel() !integer, intent(in), value :: N integer :: i i = blockDim%x * (blockIdx%x-1) + threadIdx%x d_array(i) = i end subroutine mykernel end module my_kernels program cuda use my_kernels implicit none integer :: N = 9, i allocate(h_array(N),d_array(N)) call mykernel<<<1,N>>>() ! Copy device array to host h_array = d_array do i=1,N print*,"i = ",i," array(i) = ",h_array(i) end do end program cuda