The code bellow is giving the following error:
"No registered converter was able to produce a C++ rvalue of type unsigned int from this Python object of type numpy.int32" in the callf_tri(data3_gpu, data2_gpu, data1_gpu, a_gpu,PointsZ,PointsY,PointsX, size1[0], block=(256,1,1),grid=grid1)
I do not know in the call the variable that needs to be uint32.The CUDA code compiles Ok.
Can someone help me?
Thanks,
Luis Gonçalves
import pycuda.gpuarray as gpuarrayimport pycuda.driver as cudaimport pycuda.autoinitfrom pycuda.compiler import SourceModuleimport numpy as npmod = SourceModule("""__global__ void Function(int *zs,int *ys,int *xs, unsigned char *a_gpu, int maxz, int maxy, int maxx, int nrtri){ //some code}""")def main20(): cuda.init() PointsX=np.int32(1000) PointsY=np.int32(1000) PointsZ=np.int32(1000) size1=np.asarray(315645) Step1=0.5 minx=-100.0 miny=-121.0 minz=-111.0 PointsS=(size1[0])*3 global data data=np.full((PointsS,3),np.float32(0.0)) #fill "data" with values f_tri = mod.get_function("Function") arrayXYZ=np.full((PointsZ*PointsY*PointsX),np.uint8(0)) a_gpu = gpuarray.zeros((PointsZ*PointsY*PointsX), dtype=np.uint8) data1=np.int_((np.array(data[0::1,0])-np.float32(minx))*np.float32(1/Step1)+np.float32(0.5)) data2=np.int_((np.array(data[0::1,1])-np.float32(miny))*np.float32(1/Step1)+np.float32(0.5)) data3=np.int_((np.array(data[0::1,2])-np.float32(minz))*np.float32(1/Step1)+np.float32(0.5)) data3_gpu = gpuarray.to_gpu(data3) data2_gpu = gpuarray.to_gpu(data2) data1_gpu = gpuarray.to_gpu(data1) grid1=((size1[0] >> 8)+1,1) f_tri(data3_gpu, data2_gpu, data1_gpu, a_gpu,PointsZ,PointsY,PointsX, size1[0], block=(256,1,1),grid=grid1) cuda.memcpy_dtoh(arrayXYZ, a_gpu) #continuesif __name__ == '__main__': main20()