我的代码Mmult
和MmultV
中有两个函数,它们都试图调用kernel
。尽管MmultV
成功调用并打印了所需的值,但Mmult
却没有,并且没有打印任何内容。两种功能之间的主要区别是块的大小; (m, n)
和(m, 1)
。
问题出在哪里?void Mmult(double* A, double* B)
{
int m, n;
double* d_A, * d_B;
m = 3;
n = 2;
cudaMalloc(&d_A, 6 * sizeof(double));
cudaMemcpy(d_A, A, 6 * sizeof(double), cudaMemcpyHostToDevice);
cudaMalloc(&d_B, 6 * sizeof(double));
cudaMemcpy(d_B, B, 6 * sizeof(double), cudaMemcpyHostToDevice);
dim3 Block(m, n);
kernel<<<1, Block>>>(d_A, d_B);
cudaDeviceSynchronize();
cudaFree(d_A);
cudaFree(d_B);
}
void MmultV(double* A, double* B)
{
int m;
double* d_A, * d_B;
m = 3;
size = 6 * sizeof(double);
cudaMalloc(&d_A, 6 * sizeof(double));
cudaMemcpy(d_A, A, 6 * sizeof(double), cudaMemcpyHostToDevice);
cudaMalloc(&d_B, 3 * sizeof(double));
cudaMemcpy(d_B, B, 3 * sizeof(double), cudaMemcpyHostToDevice);
dim3 Block(m, 1);
kernel<<<1, Block>>>(d_A, d_B);
cudaDeviceSynchronize();
cudaFree(d_A);
cudaFree(d_B);
}
__global__ void kernel(double* A, double* B)
{
int r, c;
r = threadIdx.x;
c = threadIdx.y;
printf("%i\n",r);
printf("%i\n",c);
}
0 个答案:
没有答案