# cuda-memcheck ./nvgpu_lapack 10000 ========= CUDA-MEMCHECK Found 8 GPUs [NVIDIA A100-SXM4-40GB][Ampere 8.0] [Sun Jan 09 18:26:47 2022] Matrix size = 10000(SP) loop=1 loop=2 loop=3 . . . . . . . . . loop=70 loop=71 loop=72 ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (15,0,0) in block (39,0,0) ========= Address 0x7f9089d8203c is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (14,0,0) in block (39,0,0) ========= Address 0x7f9089d82038 is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (13,0,0) in block (39,0,0) ========= Address 0x7f9089d82034 is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (12,0,0) in block (39,0,0) ========= Address 0x7f9089d82030 is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (11,0,0) in block (39,0,0) ========= Address 0x7f9089d8202c is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (10,0,0) in block (39,0,0) ========= Address 0x7f9089d82028 is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (9,0,0) in block (39,0,0) ========= Address 0x7f9089d82024 is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (8,0,0) in block (39,0,0) ========= Address 0x7f9089d82020 is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (7,0,0) in block (39,0,0) ========= Address 0x7f9089d8201c is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (6,0,0) in block (39,0,0) ========= Address 0x7f9089d82018 is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (5,0,0) in block (39,0,0) ========= Address 0x7f9089d82014 is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (4,0,0) in block (39,0,0) ========= Address 0x7f9089d82010 is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (3,0,0) in block (39,0,0) ========= Address 0x7f9089d8200c is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (2,0,0) in block (39,0,0) ========= Address 0x7f9089d82008 is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (1,0,0) in block (39,0,0) ========= Address 0x7f9089d82004 is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (0,0,0) in block (39,0,0) ========= Address 0x7f9089d82000 is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (223,0,0) in block (34,0,0) ========= Address 0x7f9089d80f7c is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (222,0,0) in block (34,0,0) ========= Address 0x7f9089d80f78 is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (221,0,0) in block (34,0,0) ========= Address 0x7f9089d80f74 is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (220,0,0) in block (34,0,0) ========= Address 0x7f9089d80f70 is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (219,0,0) in block (34,0,0) ========= Address 0x7f9089d80f6c is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (218,0,0) in block (34,0,0) ========= Address 0x7f9089d80f68 is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (217,0,0) in block (34,0,0) ========= Address 0x7f9089d80f64 is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (216,0,0) in block (34,0,0) ========= Address 0x7f9089d80f60 is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (215,0,0) in block (34,0,0) ========= Address 0x7f9089d80f5c is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (214,0,0) in block (34,0,0) ========= Address 0x7f9089d80f58 is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (213,0,0) in block (34,0,0) ========= Address 0x7f9089d80f54 is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (212,0,0) in block (34,0,0) ========= Address 0x7f9089d80f50 is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (211,0,0) in block (34,0,0) ========= Address 0x7f9089d80f4c is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (210,0,0) in block (34,0,0) ========= Address 0x7f9089d80f48 is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (209,0,0) in block (34,0,0) ========= Address 0x7f9089d80f44 is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (208,0,0) in block (34,0,0) ========= Address 0x7f9089d80f40 is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (207,0,0) in block (34,0,0) ========= Address 0x7f9089d80f3c is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (206,0,0) in block (34,0,0) ========= Address 0x7f9089d80f38 is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (205,0,0) in block (34,0,0) ========= Address 0x7f9089d80f34 is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (204,0,0) in block (34,0,0) ========= Address 0x7f9089d80f30 is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (203,0,0) in block (34,0,0) ========= Address 0x7f9089d80f2c is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (202,0,0) in block (34,0,0) ========= Address 0x7f9089d80f28 is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (201,0,0) in block (34,0,0) ========= Address 0x7f9089d80f24 is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (200,0,0) in block (34,0,0) ========= Address 0x7f9089d80f20 is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (199,0,0) in block (34,0,0) ========= Address 0x7f9089d80f1c is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (198,0,0) in block (34,0,0) ========= Address 0x7f9089d80f18 is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (197,0,0) in block (34,0,0) ========= Address 0x7f9089d80f14 is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (196,0,0) in block (34,0,0) ========= Address 0x7f9089d80f10 is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (195,0,0) in block (34,0,0) ========= Address 0x7f9089d80f0c is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (194,0,0) in block (34,0,0) ========= Address 0x7f9089d80f08 is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (193,0,0) in block (34,0,0) ========= Address 0x7f9089d80f04 is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (192,0,0) in block (34,0,0) ========= Address 0x7f9089d80f00 is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (223,0,0) in block (28,0,0) ========= Address 0x7f9089d7f77c is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (222,0,0) in block (28,0,0) ========= Address 0x7f9089d7f778 is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (221,0,0) in block (28,0,0) ========= Address 0x7f9089d7f774 is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (220,0,0) in block (28,0,0) ========= Address 0x7f9089d7f770 is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (219,0,0) in block (28,0,0) ========= Address 0x7f9089d7f76c is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (218,0,0) in block (28,0,0) ========= Address 0x7f9089d7f768 is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (217,0,0) in block (28,0,0) ========= Address 0x7f9089d7f764 is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (216,0,0) in block (28,0,0) ========= Address 0x7f9089d7f760 is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (215,0,0) in block (28,0,0) ========= Address 0x7f9089d7f75c is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (214,0,0) in block (28,0,0) ========= Address 0x7f9089d7f758 is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (213,0,0) in block (28,0,0) ========= Address 0x7f9089d7f754 is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (212,0,0) in block (28,0,0) ========= Address 0x7f9089d7f750 is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (211,0,0) in block (28,0,0) ========= Address 0x7f9089d7f74c is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (210,0,0) in block (28,0,0) ========= Address 0x7f9089d7f748 is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (209,0,0) in block (28,0,0) ========= Address 0x7f9089d7f744 is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (208,0,0) in block (28,0,0) ========= Address 0x7f9089d7f740 is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (207,0,0) in block (28,0,0) ========= Address 0x7f9089d7f73c is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (206,0,0) in block (28,0,0) ========= Address 0x7f9089d7f738 is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (205,0,0) in block (28,0,0) ========= Address 0x7f9089d7f734 is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (204,0,0) in block (28,0,0) ========= Address 0x7f9089d7f730 is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (203,0,0) in block (28,0,0) ========= Address 0x7f9089d7f72c is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (202,0,0) in block (28,0,0) ========= Address 0x7f9089d7f728 is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (201,0,0) in block (28,0,0) ========= Address 0x7f9089d7f724 is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (200,0,0) in block (28,0,0) ========= Address 0x7f9089d7f720 is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (199,0,0) in block (28,0,0) ========= Address 0x7f9089d7f71c is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (198,0,0) in block (28,0,0) ========= Address 0x7f9089d7f718 is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (197,0,0) in block (28,0,0) ========= Address 0x7f9089d7f714 is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (196,0,0) in block (28,0,0) ========= Address 0x7f9089d7f710 is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (195,0,0) in block (28,0,0) ========= Address 0x7f9089d7f70c is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (194,0,0) in block (28,0,0) ========= Address 0x7f9089d7f708 is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (193,0,0) in block (28,0,0) ========= Address 0x7f9089d7f704 is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (192,0,0) in block (28,0,0) ========= Address 0x7f9089d7f700 is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (255,0,0) in block (21,0,0) ========= Address 0x7f9089d7dbfc is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (254,0,0) in block (21,0,0) ========= Address 0x7f9089d7dbf8 is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (253,0,0) in block (21,0,0) ========= Address 0x7f9089d7dbf4 is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (252,0,0) in block (21,0,0) ========= Address 0x7f9089d7dbf0 is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (251,0,0) in block (21,0,0) ========= Address 0x7f9089d7dbec is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (250,0,0) in block (21,0,0) ========= Address 0x7f9089d7dbe8 is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (249,0,0) in block (21,0,0) ========= Address 0x7f9089d7dbe4 is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (248,0,0) in block (21,0,0) ========= Address 0x7f9089d7dbe0 is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (247,0,0) in block (21,0,0) ========= Address 0x7f9089d7dbdc is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (246,0,0) in block (21,0,0) ========= Address 0x7f9089d7dbd8 is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (245,0,0) in block (21,0,0) ========= Address 0x7f9089d7dbd4 is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (244,0,0) in block (21,0,0) ========= Address 0x7f9089d7dbd0 is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (243,0,0) in block (21,0,0) ========= Address 0x7f9089d7dbcc is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (242,0,0) in block (21,0,0) ========= Address 0x7f9089d7dbc8 is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (241,0,0) in block (21,0,0) ========= Address 0x7f9089d7dbc4 is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (240,0,0) in block (21,0,0) ========= Address 0x7f9089d7dbc0 is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (239,0,0) in block (21,0,0) ========= Address 0x7f9089d7dbbc is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (238,0,0) in block (21,0,0) ========= Address 0x7f9089d7dbb8 is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (237,0,0) in block (21,0,0) ========= Address 0x7f9089d7dbb4 is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (236,0,0) in block (21,0,0) ========= Address 0x7f9089d7dbb0 is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (235,0,0) in block (21,0,0) ========= Address 0x7f9089d7dbac is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (234,0,0) in block (21,0,0) ========= Address 0x7f9089d7dba8 is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (233,0,0) in block (21,0,0) ========= Address 0x7f9089d7dba4 is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (232,0,0) in block (21,0,0) ========= Address 0x7f9089d7dba0 is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (231,0,0) in block (21,0,0) ========= Address 0x7f9089d7db9c is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (230,0,0) in block (21,0,0) ========= Address 0x7f9089d7db98 is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (229,0,0) in block (21,0,0) ========= Address 0x7f9089d7db94 is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (228,0,0) in block (21,0,0) ========= Address 0x7f9089d7db90 is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (227,0,0) in block (21,0,0) ========= Address 0x7f9089d7db8c is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (226,0,0) in block (21,0,0) ========= Address 0x7f9089d7db88 is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (225,0,0) in block (21,0,0) ========= Address 0x7f9089d7db84 is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= ========= Invalid __global__ read of size 4 ========= at 0x000007f0 in void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) ========= by thread (224,0,0) in block (21,0,0) ========= Address 0x7f9089d7db80 is out of bounds ========= Device Frame:void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) (void getrf3_swap_alg1(int, int, int, float*, int, int const *, int, int) : 0x7f0) ========= Saved host backtrace up to driver entry point at kernel launch time ========= Host Frame:/lib64/libcuda.so [0x20d6ea] ========= Host Frame:./nvgpu_lapack [0x20af0b] ========= Host Frame:./nvgpu_lapack [0x261548] ========= Host Frame:./nvgpu_lapack [0x5f524] ========= Host Frame:./nvgpu_lapack [0x611d1] ========= Host Frame:./nvgpu_lapack [0x52fb3] ========= Host Frame:./nvgpu_lapack [0x4bc30] ========= Host Frame:./nvgpu_lapack [0x4d141] ========= Host Frame:./nvgpu_lapack [0xd64f] ========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22555] ========= Host Frame:./nvgpu_lapack [0x4b2b9] ========= nvgpu_lapack.cu:282 cudaDeviceSynchronize failed with error code 719; (cudaErrorLaunchFailure) unspecified launch failure ========= ERROR SUMMARY: 112 errors #