#ifndef _TMP_KERNEL_H_ #define _TMP_KERNEL_H_ #include __device__ double calculate_error(double Ri, double Ra, double u, double f, double F, double amax, double OBS_actual[]) { return 0.01; //dummy double value } __global__ void compute_fit(int n_Ri, double a_Ri, double b_Ri, int n_Ra, double a_Ra, double b_Ra, int n_u, double a_u, double b_u, int n_f, double a_f, double b_f, int n_F, double a_F, double b_F, double OBS_actual[], double amax, double* best_fit) { int i_Ri = blockIdx.x-floor(blockIdx.x/n_Ri)*n_Ri; int i_Ra = blockIdx.y; int i_u = floor(1.0*blockIdx.x/n_Ri)+(blockIdx.x%n_Ri==0?0:1); int i_f = threadIdx.x; int i_F = threadIdx.y; double Ri = a_Ri + (b_Ri-a_Ri)/(n_Ri-1)*i_Ri; double Ra = a_Ra + (b_Ra-a_Ra)/(n_Ra-1)*i_Ra; double u = a_u + (b_u-a_u)/(n_u-1)*i_u; double f = a_f + (b_f-a_f)/(n_f-1)*i_f; double F = a_F + (b_F-a_F)/(n_F-1)*i_F; double val; __syncthreads(); if(true) { for(int i = 0; i < 18; i++) { val = calculate_error(Ri, Ra, u, f, F, amax, OBS_actual); best_fit[i] = val; // This line if uncommented causes nv4_disp error } } return; } #endif // #ifndef _TMP_KERNEL_H_