HIP编程
最编程
2024-04-13 15:08:45
...
1 #include <stdio.h>
2 #include <stdlib.h>
3
4 #include <hip/hip_runtime.h>
5 #include <hip/hip_runtime_api.h>
6
7 #define M 4
8 #define K 4
9 #define N 4
10
11 void initial(double* list,int row,int col)
12 {
13 double *num = list;
14 for (int i=0; i<row*col; i++)
15 {
16 num[i] = rand()%10;
17 }
18 }
19
20 void CpuMatrix(double *A,double *B,double *C)
21 {
22 int i,j,k;
23
24 for( i=0; i<M; i++)
25 {
26 for(j=0; j<N; j++)
27 {
28 double sum = 0;
29 for(int k=0; k<K; k++)
30 {
31 sum += A[i*K + k] * B[k * N + j];
32 }
33 C[i * N + j] = sum;
34 }
35 }
36 }
37
38 __global__ void GpuMatrix(double *dev_A,double *dev_B,double *dev_C)
39 {
40 int ix = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
41 int iy = hipBlockIdx_y * hipBlockDim_y + hipThreadIdx_y;
42 if(ix<K && iy<M)
43 {
44 double sum = 0;
45 for( int k = 0; k < K;k++)
46 {
47 sum += dev_A[iy*K + k] * dev_B[k*N + ix];
48 }
49 dev_C[iy * N + ix] = sum;
50 }
51 }
52
53 void printMatrix(double *list,int row,int col)
54 {
55 double *p = list;
56 for(int i=0; i<row; i++)
57 {
58 for(int j=0; j<col; j++)
59 {
60 printf("%10lf",p[j]);
61 }
62 p = p + col;
63 printf("\n");
64 }
65 }
66 int main(int argc,char **argv)
67 {
68 int Axy = M*K;
69 int Abytes = Axy * sizeof(double);
70
71 int Bxy = K*N;
72 int Bbytes = Bxy * sizeof(double);
73
74 int nxy = M*N;
75 int nbytes = nxy * sizeof(double);
76
77 float time_cpu,time_gpu;
78
79 clock_t start_cpu,stop_cpu;
80
81 hipEvent_t start_GPU,stop_GPU;
82
83 double *host_A, *host_B, *host_C, *c_CPU;
84 host_A = (double*)malloc(Abytes);
85 host_B = (double*)malloc(Bbytes);
86 host_C = (double*)malloc(nbytes);
87 c_CPU = (double*)malloc(nbytes);
88
89
90 initial(host_A,M,K);
91
92 printf("A:(%d,%d):\n",M,K);
93 printMatrix(host_A,M,K);
94
95 initial(host_B,K,N);
96
97 printf("B:(%d,%d):\n",K,N);
98 printMatrix(host_B,K,N);
99
100 // start_cpu = clock();
101 CpuMatrix(host_A,host_B,host_C);
102 // stop_cpu = clock();
103
104 printf("Host_C:(%d,%d):\n",M,N);
105 // printf("\nCPU time is %f(ms)\n",(float)(stop_cpu-start_cpu)/CLOCKS_PER_SEC);
106 printMatrix(host_C,M,N);
107 double *dev_A,*dev_B,*dev_C;
108 hipMalloc(&dev_A,Axy*sizeof(double));
109 hipMalloc(&dev_B,Bxy*sizeof(double));
110 hipMalloc(&dev_C,nxy*sizeof(double));
111
112 dim3 block(1024,1);
113 dim3 grid(64,64);
114
115 hipMemcpy(dev_A,host_A,Abytes,hipMemcpyDeviceToHost);
116 hipMemcpy(dev_B,host_B,Bbytes,hipMemcpyDeviceToHost);
117
118 hipEventCreate(&start_GPU);
119 hipEventCreate(&stop_GPU);
120 hipEventRecord(start_GPU,0);
121 hipLaunchKernelGGL(GpuMatrix,grid,block,0,0,dev_A,dev_B,dev_C);
122 hipEventRecord(stop_GPU,0);
123 hipEventSynchronize(start_GPU);
124 hipEventSynchronize(stop_GPU);
125 hipEventElapsedTime(&time_gpu, start_GPU,stop_GPU);
126 printf("\nThe time from GPU:\t%f(ms)\n", time_GPU/1000);
127 hipDeviceSynchronize();
128 hipEventDestroy(start_GPU);
129 hipEventDestroy(stop_GPU);
130
131 hipMemcpy(c_CPU,dev_C,nbytes,hipMemcpyDeviceToHost);
132 printf("device_C:(%d,%d):\n",M,N);
133 printMatrix(c_CPU,M,N);
134
135
136 hipFree(dev_A);
137 hipFree(dev_B);
138 hipFree(dev_C);
139 free(host_A);
140 free(host_B);
141 free(host_C);
142 free(c_CPU);
143
144 return 0;
145 }
上一篇: Eigen 中的保守大小调整和调整操作
下一篇: 独立键盘和矩阵键盘驱动器原理