欢迎您访问 最编程 本站为您分享编程语言代码,编程技术文章!
您现在的位置是: 首页

HIP编程

最编程 2024-04-13 15:08:45
...
1 #include <stdio.h> 2 #include <stdlib.h> 3 4 #include <hip/hip_runtime.h> 5 #include <hip/hip_runtime_api.h> 6 7 #define M 4 8 #define K 4 9 #define N 4 10 11 void initial(double* list,int row,int col) 12 { 13 double *num = list; 14 for (int i=0; i<row*col; i++) 15 { 16 num[i] = rand()%10; 17 } 18 } 19 20 void CpuMatrix(double *A,double *B,double *C) 21 { 22 int i,j,k; 23 24 for( i=0; i<M; i++) 25 { 26 for(j=0; j<N; j++) 27 { 28 double sum = 0; 29 for(int k=0; k<K; k++) 30 { 31 sum += A[i*K + k] * B[k * N + j]; 32 } 33 C[i * N + j] = sum; 34 } 35 } 36 } 37 38 __global__ void GpuMatrix(double *dev_A,double *dev_B,double *dev_C) 39 { 40 int ix = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x; 41 int iy = hipBlockIdx_y * hipBlockDim_y + hipThreadIdx_y; 42 if(ix<K && iy<M) 43 { 44 double sum = 0; 45 for( int k = 0; k < K;k++) 46 { 47 sum += dev_A[iy*K + k] * dev_B[k*N + ix]; 48 } 49 dev_C[iy * N + ix] = sum; 50 } 51 } 52 53 void printMatrix(double *list,int row,int col) 54 { 55 double *p = list; 56 for(int i=0; i<row; i++) 57 { 58 for(int j=0; j<col; j++) 59 { 60 printf("%10lf",p[j]); 61 } 62 p = p + col; 63 printf("\n"); 64 } 65 } 66 int main(int argc,char **argv) 67 { 68 int Axy = M*K; 69 int Abytes = Axy * sizeof(double); 70 71 int Bxy = K*N; 72 int Bbytes = Bxy * sizeof(double); 73 74 int nxy = M*N; 75 int nbytes = nxy * sizeof(double); 76 77 float time_cpu,time_gpu; 78 79 clock_t start_cpu,stop_cpu; 80 81 hipEvent_t start_GPU,stop_GPU; 82 83 double *host_A, *host_B, *host_C, *c_CPU; 84 host_A = (double*)malloc(Abytes); 85 host_B = (double*)malloc(Bbytes); 86 host_C = (double*)malloc(nbytes); 87 c_CPU = (double*)malloc(nbytes); 88 89 90 initial(host_A,M,K); 91 92 printf("A:(%d,%d):\n",M,K); 93 printMatrix(host_A,M,K); 94 95 initial(host_B,K,N); 96 97 printf("B:(%d,%d):\n",K,N); 98 printMatrix(host_B,K,N); 99 100 // start_cpu = clock(); 101 CpuMatrix(host_A,host_B,host_C); 102 // stop_cpu = clock(); 103 104 printf("Host_C:(%d,%d):\n",M,N); 105 // printf("\nCPU time is %f(ms)\n",(float)(stop_cpu-start_cpu)/CLOCKS_PER_SEC); 106 printMatrix(host_C,M,N); 107 double *dev_A,*dev_B,*dev_C; 108 hipMalloc(&dev_A,Axy*sizeof(double)); 109 hipMalloc(&dev_B,Bxy*sizeof(double)); 110 hipMalloc(&dev_C,nxy*sizeof(double)); 111 112 dim3 block(1024,1); 113 dim3 grid(64,64); 114 115 hipMemcpy(dev_A,host_A,Abytes,hipMemcpyDeviceToHost); 116 hipMemcpy(dev_B,host_B,Bbytes,hipMemcpyDeviceToHost); 117 118 hipEventCreate(&start_GPU); 119 hipEventCreate(&stop_GPU); 120 hipEventRecord(start_GPU,0); 121 hipLaunchKernelGGL(GpuMatrix,grid,block,0,0,dev_A,dev_B,dev_C); 122 hipEventRecord(stop_GPU,0); 123 hipEventSynchronize(start_GPU); 124 hipEventSynchronize(stop_GPU); 125 hipEventElapsedTime(&time_gpu, start_GPU,stop_GPU); 126 printf("\nThe time from GPU:\t%f(ms)\n", time_GPU/1000); 127 hipDeviceSynchronize(); 128 hipEventDestroy(start_GPU); 129 hipEventDestroy(stop_GPU); 130 131 hipMemcpy(c_CPU,dev_C,nbytes,hipMemcpyDeviceToHost); 132 printf("device_C:(%d,%d):\n",M,N); 133 printMatrix(c_CPU,M,N); 134 135 136 hipFree(dev_A); 137 hipFree(dev_B); 138 hipFree(dev_C); 139 free(host_A); 140 free(host_B); 141 free(host_C); 142 free(c_CPU); 143 144 return 0; 145 }