欢迎您访问 最编程 本站为您分享编程语言代码,编程技术文章!
您现在的位置是: 首页

C++ opencv2 错误查找

最编程 2024-03-19 07:42:34
...

opencv2找错  原来是参数位置没对齐。

#include "windows.h"
#include<iostream>
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <cuda_runtime.h>
#include <cuda.h>
#include <cuComplex.h>
#include <time.h>
#include<math.h>
#include <opencv2/opencv.hpp>

#include<fstream>
#ifdef __APPLE__
#include <OpenCL/opencl.h>
#else
#include <stdlib.h>
#include <CL/cl.h>
#endif
#include <clFFT.h>
using namespace std;
using namespace cv;

int main()
{
	Mat src1W = imread("none2.bmp", 0);
	if (src1W.empty())
	{
		cout << "读取图片错误,请确定目录下是否有imread函数指定图片存在~!\n" << endl;
		return -1;
	}
	imshow("无样品", src1W);

	//waitKey(0);
	Mat src3W = src1W(Rect(463, 169, 256, 256));
	src3W.convertTo(src3W, CV_64FC1);
	imshow("无样品1", src3W / 255);
	//waitKey(0);

	Mat src6;
	src6.convertTo(src6, CV_64FC1);
	cv::copyMakeBorder(src3W, src6, 0, 0, 0, 0, BORDER_CONSTANT, Scalar::all(0));
	imshow("ROItest1", src6 / 255);

	//waitKey(0);
	Mat src1D = imread("celiang2.bmp", 0);
	if (src1D.empty())
	{
		cout << "读取图片错误,请确定目录下是否有imread函数指定图片存在~!\n" << endl;
		return -1;
	}
	imshow("有样品", src1D);
	Mat src4W = src1D(Rect(463, 169, 256, 256));
	src4W.convertTo(src4W, CV_64FC1);
	imshow("有样品1", src4W / 255);

	Mat src6W;
	src6W.convertTo(src6W, CV_64FC1);
	cv::copyMakeBorder(src4W, src6W, 0, 0, 0, 0, BORDER_CONSTANT, Scalar::all(0));
	imshow("ROItest2", src6W / 255);
	Mat frame(src6.size(), CV_64FC1);
	int Width = frame.cols;
	int Height = frame.rows;
	Mat C1(frame.size(), CV_64FC2);
	Mat F(frame.size(), CV_64FC1);
	Mat F_re1(frame.size(), CV_64FC1);
	Mat dest(frame.size(), CV_64FC2);
	Mat B_im1(frame.size(), CV_64FC1, Scalar::all(0));
	Mat B_re1(frame.size(), CV_64FC1);
	Mat B_re2(frame.size(), CV_64FC1);
	Mat B_im2(frame.size(), CV_64FC1, Scalar::all(0));
	B_re1 = src6.clone();
	Mat value[2] = { B_re1,B_im1 };
	Mat B1;
	merge(value, 2, B1);
	B_re2 = src6W.clone();
	Mat value1[2] = { B_re2,B_im2 };
	Mat B2;
	merge(value1, 2, B2);
	cl_int err;
	cl_platform_id platform = 0;
	cl_device_id device = 0;
	cl_context_properties props[3] = { CL_CONTEXT_PLATFORM, 0, 0 };
	cl_context ctx = 0;
	cl_command_queue queue = 0;
	//cl_mem bufX;
	err = clGetPlatformIDs(1, &platform, NULL);
	err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL);
	props[1] = (cl_context_properties)platform;
	ctx = clCreateContext(props, 1, &device, NULL, NULL, &err);
	queue = clCreateCommandQueue(ctx, device, 0, &err);

	clfftSetupData fftSetup;
	err = clfftInitSetupData(&fftSetup);
	err = clfftSetup(&fftSetup);
	cl_mem B1_dev = NULL;  
	cl_mem A1_dev = NULL;
	B1_dev = clCreateBuffer(ctx, CL_MEM_READ_ONLY, frame.rows * frame.cols * sizeof(cl_double2), NULL, NULL);
	A1_dev = clCreateBuffer(ctx, CL_MEM_READ_WRITE, frame.rows * frame.cols * sizeof(cl_double2), NULL, NULL);
	err = clEnqueueWriteBuffer(queue, B1_dev, CL_TRUE, 0, frame.rows * frame.cols * sizeof(cl_double2), B1.data, 0, NULL, NULL);
	clfftPlanHandle planHandle;
	clfftDim dim = CLFFT_2D;
	size_t clLengths[2] = { (size_t)B1.cols,(size_t)B1.rows };
	err = clfftCreateDefaultPlan(&planHandle, ctx, dim, clLengths);
	err = clfftSetPlanPrecision(planHandle, CLFFT_DOUBLE);
	err = clfftSetLayout(planHandle, CLFFT_COMPLEX_INTERLEAVED, CLFFT_COMPLEX_INTERLEAVED);
	err = clfftSetResultLocation(planHandle, CLFFT_OUTOFPLACE);
	err = clfftBakePlan(planHandle, 1, &queue, NULL, NULL);
	err = clfftEnqueueTransform(planHandle, CLFFT_FORWARD, 1, &queue, 0, NULL, NULL, &B1_dev, &A1_dev, NULL);
	Mat result(frame.size(), CV_64FC2);
	clEnqueueReadBuffer(queue, A1_dev, CL_TRUE, 0, frame.rows * frame.cols * sizeof(cl_double2), result.data, 0, NULL, NULL);
	cl_mem B2_dev = NULL;  
	cl_mem A2_dev = NULL;
	B2_dev = clCreateBuffer(ctx, CL_MEM_READ_ONLY, frame.rows * frame.cols * sizeof(cl_double2), NULL, NULL);
	A2_dev = clCreateBuffer(ctx, CL_MEM_READ_WRITE, frame.rows * frame.cols * sizeof(cl_double2), NULL, NULL);
	err = clEnqueueWriteBuffer(queue, B2_dev, CL_TRUE, 0, frame.rows * frame.cols * sizeof(cl_double2), B2.data, 0, NULL, NULL);
	double time1 = static_cast<double>(getTickCount());
	err = clfftEnqueueTransform(planHandle, CLFFT_FORWARD, 1, &queue, 0, NULL, NULL, &B2_dev, &A2_dev, NULL);
	time1 = ((double)getTickCount() - time1) * 1000 / getTickFrequency();
	cout << "此方法运行时间为:" << time1 << "ms" << endl;
	Mat result1(frame.size(), CV_64FC2);
	clEnqueueReadBuffer(queue, A2_dev, CL_TRUE, 0, frame.rows * frame.cols * sizeof(cl_double2), result1.data, 0, NULL, NULL);
	
	std::ifstream kernelFile("sourse.cl", std::ios::in);
	std::ostringstream oss;
	oss << kernelFile.rdbuf();
	std::string srcStdStr = oss.str();
	const char* srcStr = srcStdStr.c_str();
	cl_program program = clCreateProgramWithSource(ctx, 1, (const char**)&srcStr, NULL, &err);
	err = clBuildProgram(program, 1, &device, NULL, NULL, NULL);
	cl_kernel kernel = clCreateKernel(program, "GPU_Zhengjiao", NULL);
	cl_mem des = clCreateBuffer(ctx, CL_MEM_READ_WRITE, frame.rows * frame.cols * sizeof(cl_double2), NULL, NULL);
	cl_mem src = clCreateBuffer(ctx, CL_MEM_READ_ONLY, frame.rows * frame.cols * sizeof(cl_double2), NULL, NULL);
	err = clEnqueueWriteBuffer(queue, src, CL_TRUE, 0, frame.rows * frame.cols * sizeof(cl_double2), result.data, 0, NULL, NULL);
	clSetKernelArg(kernel, 0, sizeof(cl_mem), &des);
	clSetKernelArg(kernel, 1, sizeof(cl_mem), &src);
	clSetKernelArg(kernel, 2, sizeof(int), (void*)&Height);
	clSetKernelArg(kernel, 3, sizeof(int), (void*)&Width);
	size_t globalWorkSize[2] = { frame.rows, frame.cols };
	clEnqueueNDRangeKernel(queue, kernel, 2, NULL, globalWorkSize, NULL, 0, NULL, NULL);
	Mat A11(frame.size(), CV_64FC2);
	err = clEnqueueReadBuffer(queue, des, CL_TRUE, 0, frame.rows * frame.cols * sizeof(cl_double2), A11.data, 0, NULL, NULL);

	cl_kernel kernel1 = clCreateKernel(program, "GPU_Zhengjiao", NULL);
	cl_mem des22 = clCreateBuffer(ctx, CL_MEM_READ_WRITE, frame.rows * frame.cols * sizeof(cl_double2), NULL, NULL);
	cl_mem src22 = clCreateBuffer(ctx, CL_MEM_READ_ONLY, frame.rows * frame.cols * sizeof(cl_double2), NULL, NULL);
	err = clEnqueueWriteBuffer(queue, src22, CL_TRUE, 0, frame.rows * frame.cols * sizeof(cl_double2), result1.data, 0, NULL, NULL);
	clSetKernelArg(kernel1, 0, sizeof(cl_mem), &des22);
	clSetKernelArg(kernel1, 1, sizeof(cl_mem), &src22);
	clSetKernelArg(kernel1, 2, sizeof(int), (void*)&Height);
	clSetKernelArg(kernel1, 3, sizeof(int), (void*)&Width);
	clEnqueueNDRangeKernel(queue, kernel1, 2, NULL, globalWorkSize, NULL, 0, NULL, NULL);
	Mat A22(frame.size(), CV_64FC2);
	err = clEnqueueReadBuffer(queue, des22, CL_TRUE, 0, frame.rows * frame.cols * sizeof(cl_double2), A22.data, 0, NULL, NULL);

	for (int i = 0; i < frame.rows; i++)
	{
		for (int j = 0; j < frame.cols; j++)
		{
			if ((i >= 97 && i < 160) && (j >= 147 && j < 190))
			{
				F_re1.at<double>(i, j) = 1;
			}
			else
			{
				F_re1.at<double>(i, j) = 0;
			}
		}
	}
	//F1圆形滤波器
	F = F_re1;
	imshow("矩形滤波器", F);
	
	err = clBuildProgram(program, 1, &device, NULL, NULL, NULL);
	cl_kernel kernel2 = clCreateKernel(program, "GPU_JvZhenDianCheng", NULL);
	cl_mem des_src = clCreateBuffer(ctx, CL_MEM_READ_WRITE, frame.rows * frame.cols * sizeof(cl_double2), NULL, NULL);
	cl_mem src1 = clCreateBuffer(ctx, CL_MEM_READ_ONLY, frame.rows * frame.cols * sizeof(cl_double2), NULL, NULL);
	cl_mem src2 = clCreateBuffer(ctx, CL_MEM_READ_ONLY, frame.rows * frame.cols * sizeof(double), NULL, NULL);
	clEnqueueWriteBuffer(queue, src1, CL_TRUE, 0, frame.rows * frame.cols * sizeof(cl_double2), A11.data, 0, NULL, NULL);
	clEnqueueWriteBuffer(queue, src2, CL_TRUE, 0, frame.rows * frame.cols * sizeof(double), F.data, 0, NULL, NULL);
	clSetKernelArg(kernel2, 0, sizeof(cl_mem), &des_src);
	clSetKernelArg(kernel2, 1, sizeof(cl_mem), &src1);
	clSetKernelArg(kernel2, 2, sizeof(cl_mem), &src2);
	clSetKernelArg(kernel2, 3, sizeof(int), &frame.cols);
	clSetKernelArg(kernel2, 4, sizeof(int), &frame.rows);
	clEnqueueNDRangeKernel(queue, kernel2, 2, NULL, globalWorkSize, NULL, 0, NULL, NULL);
	Mat SPA1(frame.size(), CV_64FC2);
	err = clEnqueueReadBuffer(queue, des_src, CL_TRUE, 0, frame.rows * frame.cols * sizeof(cl_double2), SPA1.data, 0, NULL, NULL);

	cl_kernel kernel3 = clCreateKernel(program, "GPU_JvZhenDianCheng", NULL);
	cl_mem des_src1 = clCreateBuffer(ctx, CL_MEM_READ_WRITE, frame.rows * frame.cols * sizeof(cl_double2), NULL, NULL);
	cl_mem src15 = clCreateBuffer(ctx, CL_MEM_READ_ONLY, frame.rows * frame.cols * sizeof(cl_double2), NULL, NULL);
	cl_mem src25 = clCreateBuffer(ctx, CL_MEM_READ_ONLY, frame.rows * frame.cols * sizeof(double), NULL, NULL);
	clEnqueueWriteBuffer(queue, src15, CL_TRUE, 0, frame.rows * frame.cols * sizeof(cl_double2), A22.data, 0, NULL, NULL);
	clEnqueueWriteBuffer(queue, src25, CL_TRUE, 0, frame.rows * frame.cols * sizeof(double), F.data, 0, NULL, NULL);
	clSetKernelArg(kernel3, 0, sizeof(cl_mem), &des_src1);
	clSetKernelArg(kernel3, 1, sizeof(cl_mem), &src15);
	clSetKernelArg(kernel3, 2, sizeof(cl_mem), &src25);
	clSetKernelArg(kernel3, 3, sizeof(int), &frame.cols);
	clSetKernelArg(kernel3, 4, sizeof(int), &frame.rows);
	clEnqueueNDRangeKernel(queue, kernel3, 2, NULL, globalWorkSize, NULL, 0, NULL, NULL);
	Mat SPB1(frame.size(), CV_64FC2);
	err = clEnqueueReadBuffer(queue, des_src1, CL_TRUE, 0, frame.rows * frame.cols * sizeof(cl_double2), SPB1.data, 0, NULL, NULL);

	cl_kernel kernel4 = clCreateKernel(program, "GPU_Zhengjiao", NULL);
	cl_mem dre = clCreateBuffer(ctx, CL_MEM_READ_WRITE, frame.rows * frame.cols * sizeof(cl_double2), NULL, NULL);
	cl_mem src11 = clCreateBuffer(ctx, CL_MEM_READ_ONLY, frame.rows * frame.cols * sizeof(cl_double2), NULL, NULL);
	err = clEnqueueWriteBuffer(queue, src11, CL_TRUE, 0, frame.rows * frame.cols * sizeof(cl_double2), SPA1.data, 0, NULL, NULL);
	clSetKernelArg(kernel4, 0, sizeof(cl_mem), &dre);
	clSetKernelArg(kernel4, 1, sizeof(cl_mem), &src11);
	clSetKernelArg(kernel4, 2, sizeof(int), (void*)&Height);
	clSetKernelArg(kernel4, 3, sizeof(int), (void*)&Width);
	clEnqueueNDRangeKernel(queue, kernel4, 2, NULL, globalWorkSize, NULL, 0, NULL, NULL);
	Mat A14(frame.size(), CV_64FC2);
	err = clEnqueueReadBuffer(queue, dre, CL_TRUE, 0, frame.rows * frame.cols * sizeof(cl_double2), A14.data, 0, NULL, NULL);

	cl_kernel kernel5 = clCreateKernel(program, "GPU_Zhengjiao", NULL);
	cl_mem dre33 = clCreateBuffer(ctx, CL_MEM_READ_WRITE, frame.rows * frame.cols * sizeof(cl_double2), NULL, NULL);
	cl_mem src33 = clCreateBuffer(ctx, CL_MEM_READ_ONLY, frame.rows * frame.cols * sizeof(cl_double2), NULL, NULL);
	err = clEnqueueWriteBuffer(queue, src33, CL_TRUE, 0, frame.rows * frame.cols * sizeof(cl_double2), SPB1.data, 0, NULL, NULL);
	clSetKernelArg(kernel5, 0, sizeof(cl_mem), &dre33);
	clSetKernelArg(kernel5, 1, sizeof(cl_mem), &src33);
	clSetKernelArg(kernel5, 2, sizeof(int), (void*)&Height);
	clSetKernelArg(kernel5, 3, sizeof(int), (void*)&Width);
	clEnqueueNDRangeKernel(queue, kernel5, 2, NULL, globalWorkSize, NULL, 0, NULL, NULL);
	Mat A44(frame.size(), CV_64FC2);
	err = clEnqueueReadBuffer(queue, dre33, CL_TRUE, 0, frame.rows * frame.cols * sizeof(cl_double2), A44.data, 0, NULL, NULL);

	cl_mem imageA1_dev = NULL;  
	cl_mem A11_dev = NULL;
	imageA1_dev = clCreateBuffer(ctx, CL_MEM_READ_WRITE, frame.rows * frame.cols * sizeof(cl_double2), NULL, NULL);
	A11_dev = clCreateBuffer(ctx, CL_MEM_READ_ONLY, frame.rows * frame.cols * sizeof(cl_double2), NULL, NULL);
	err = clEnqueueWriteBuffer(queue, A11_dev, CL_TRUE, 0, frame.rows * frame.cols * sizeof(cl_double2), A14.data, 0, NULL, NULL);
	err = clfftEnqueueTransform(planHandle, CLFFT_BACKWARD, 1, &queue, 0, NULL, NULL, &A11_dev, &imageA1_dev, NULL);
	Mat imageA1(frame.size(), CV_64FC2);
	clEnqueueReadBuffer(queue, imageA1_dev, CL_TRUE, 0, frame.rows * frame.cols * sizeof(cl_double2), imageA1.data, 0, NULL, NULL);
	
	cl_mem imageB1_dev = NULL;  
	cl_mem A55_dev = NULL;
	imageB1_dev = clCreateBuffer(ctx, CL_MEM_READ_WRITE, frame.rows * frame.cols * sizeof(cl_double2), NULL, NULL);
	A55_dev = clCreateBuffer(ctx, CL_MEM_READ_ONLY, frame.rows * frame.cols * sizeof(cl_double2), NULL, NULL);
	err = clEnqueueWriteBuffer(queue, A55_dev, CL_TRUE, 0, frame.rows * frame.cols * sizeof(cl_double2), A44.data, 0, NULL, NULL);
	err = clfftEnqueueTransform(planHandle, CLFFT_BACKWARD, 1, &queue, 0, NULL, NULL, &A55_dev, &imageB1_dev, NULL);
	Mat imageB1(frame.size(), CV_64FC2);
	clEnqueueReadBuffer(queue, imageB1_dev, CL_TRUE, 0, frame.rows * frame.cols * sizeof(cl_double2), imageB1.data, 0, NULL, NULL);
	err = clfftDestroyPlan(&planHandle);
	clfftTeardown();
	clReleaseMemObject(B1_dev);
	clReleaseMemObject(A1_dev);
	clReleaseMemObject(B2_dev);
	clReleaseMemObject(A2_dev);
	clReleaseMemObject(A11_dev);
	clReleaseMemObject(imageA1_dev);
	clReleaseMemObject(A55_dev);
	clReleaseMemObject(imageB1_dev);
	
	cl_kernel kernel6 = clCreateKernel(program, "GPU_DianChu", NULL);
	cl_mem des_src11 = clCreateBuffer(ctx, CL_MEM_READ_WRITE, frame.rows * frame.cols * sizeof(cl_double2), NULL, NULL);
	cl_mem src16 = clCreateBuffer(ctx, CL_MEM_READ_ONLY, frame.rows * frame.cols * sizeof(cl_double2), NULL, NULL);
	cl_mem src26 = clCreateBuffer(ctx, CL_MEM_READ_ONLY, frame.rows * frame.cols * sizeof(cl_double2), NULL, NULL);
	clEnqueueWriteBuffer(queue, src16, CL_TRUE, 0, frame.rows * frame.cols * sizeof(cl_double2), imageB1.data, 0, NULL, NULL);
	clEnqueueWriteBuffer(queue, src26, CL_TRUE, 0, frame.rows * frame.cols * sizeof(cl_double2), imageA1.data, 0, NULL, NULL);
	clSetKernelArg(kernel6, 0, sizeof(cl_mem), &des_src11);
	clSetKernelArg(kernel6, 1, sizeof(cl_mem), &src16);
	clSetKernelArg(kernel6, 2, sizeof(cl_mem), &src26);
	clSetKernelArg(kernel6, 3, sizeof(int), &frame.cols);
	clSetKernelArg(kernel6, 4, sizeof(int), &frame.rows);
	clEnqueueNDRangeKernel(queue, kernel6, 2, NULL, globalWorkSize, NULL, 0, NULL, NULL);
	Mat res1(frame.size(), CV_64FC2);
	err = clEnqueueReadBuffer(queue, des_src11, CL_TRUE, 0, frame.rows * frame.cols * sizeof(cl_double2), res1.data, 0, NULL, NULL);

	cl_kernel kernel7 = clCreateKernel(program, "GPU_atan", NULL);
	cl_mem res1_phase_dev = clCreateBuffer(ctx, CL_MEM_READ_WRITE, frame.rows * frame.cols * sizeof(double), NULL, NULL);
	cl_mem src88 = clCreateBuffer(ctx, CL_MEM_READ_ONLY, frame.rows * frame.cols * sizeof(cl_double2), NULL, NULL);
	err = clEnqueueWriteBuffer(queue, src88, CL_TRUE, 0, frame.rows * frame.cols * sizeof(cl_double2), res1.data, 0, NULL, NULL);
	clSetKernelArg(kernel7, 0, sizeof(cl_mem), &res1_phase_dev);
	clSetKernelArg(kernel7, 1, sizeof(cl_mem), &src88);
	clSetKernelArg(kernel7, 2, sizeof(int), (void*)&Height);
	clSetKernelArg(kernel7, 3, sizeof(int), (void*)&Width);
	clEnqueueNDRangeKernel(queue, kernel7, 2, NULL, globalWorkSize, NULL, 0, NULL, NULL);
	Mat phase(frame.size(), CV_64FC1);
	err = clEnqueueReadBuffer(queue, res1_phase_dev, CL_TRUE, 0, frame.rows * frame.cols * sizeof(double), phase.data, 0, NULL, NULL);
	imshow("wrappedphase", phase);

	cl_kernel kernel8 = clCreateKernel(program, "dxanddy_OpenCL", NULL);
	cl_mem dstx_dev = clCreateBuffer(ctx, CL_MEM_READ_WRITE, frame.rows * frame.cols * sizeof(double), NULL, NULL);
	cl_mem dsty_dev = clCreateBuffer(ctx, CL_MEM_READ_WRITE, frame.rows * frame.cols * sizeof(double), NULL, NULL);
	cl_mem src_dev = clCreateBuffer(ctx, CL_MEM_READ_ONLY, frame.rows * frame.cols * sizeof(double), NULL, NULL);
	err = clEnqueueWriteBuffer(queue, src_dev, CL_TRUE, 0, frame.rows * frame.cols * sizeof(double), phase.data, 0, NULL, NULL);
	clSetKernelArg(kernel8, 0, sizeof(cl_mem), &dstx_dev);
	clSetKernelArg(kernel8, 1, sizeof(cl_mem), &dsty_dev);
	clSetKernelArg(kernel8, 2, sizeof(cl_mem), &src_dev);
	clSetKernelArg(kernel8, 3, sizeof(int), (void*)&Height);
	clSetKernelArg(kernel8, 4, sizeof(int), (void*)&Width);
	clEnqueueNDRangeKernel(queue, kernel8, 2, NULL, globalWorkSize, NULL, 0, NULL, NULL);
	Mat dstx(frame.size(), CV_64FC1);
	Mat dsty(frame.size(), CV_64FC1);
	err = clEnqueueReadBuffer(queue, dstx_dev, CL_TRUE, 0, frame.rows * frame.cols * sizeof(double), dstx.data, 0, NULL, NULL);
	err = clEnqueueReadBuffer(queue, dsty_dev, CL_TRUE, 0, frame.rows * frame.cols * sizeof(double), dsty.data, 0, NULL, NULL);

	cl_kernel kernel9 = clCreateKernel(program, "tmpsndderivative_OpenCL", NULL);
	cl_mem dstxx_dev = clCreateBuffer(ctx, CL_MEM_READ_WRITE, frame.rows * frame.cols * sizeof(double), NULL, NULL);
	cl_mem dstyy_dev = clCreateBuffer(ctx, CL_MEM_READ_WRITE, frame.rows * frame.cols * sizeof(double), NULL, NULL);
	cl_mem srcx_dev = clCreateBuffer(ctx, CL_MEM_READ_ONLY, frame.rows * frame.cols * sizeof(double), NULL, NULL);
	cl_mem srcy_dev = clCreateBuffer(ctx, CL_MEM_READ_ONLY, frame.rows * frame.cols * sizeof(double), NULL, NULL);
	err = clEnqueueWriteBuffer(queue, srcx_dev, CL_TRUE, 0, frame.rows * frame.cols * sizeof(double), dstx.data, 0, NULL, NULL);
	err = clEnqueueWriteBuffer(queue, srcy_dev, CL_TRUE, 0, frame.rows * frame.cols * sizeof(double), dsty.data, 0, NULL, NULL);
	clSetKernelArg(kernel9, 0, sizeof(cl_mem), &dstxx_dev);
	clSetKernelArg(kernel9, 1, sizeof(cl_mem), &dstyy_dev);
	clSetKernelArg(kernel9, 2, sizeof(cl_mem), &srcx_dev);
	clSetKernelArg(kernel9, 3, sizeof(cl_mem), &srcy_dev);
	clSetKernelArg(kernel9, 4, sizeof(int), (void*)&Height);
	clSetKernelArg(kernel9, 5, sizeof(int), (void*)&Width);
	clEnqueueNDRangeKernel(queue, kernel9, 2, NULL, globalWorkSize, NULL, 0, NULL, NULL);
	Mat dstxx(frame.size(), CV_64FC1);
	Mat dstyy(frame.size(), CV_64FC1);
	err = clEnqueueReadBuffer(queue, dstxx_dev, CL_TRUE, 0, frame.rows * frame.cols * sizeof(double), dstxx.data, 0, NULL, NULL);
	err = clEnqueueReadBuffer(queue, dstyy_dev, CL_TRUE, 0, frame.rows * frame.cols * sizeof(double), dstyy.data, 0, NULL, NULL);
	
	cl_kernel kernel10 = clCreateKernel(program, "gmpsndderivative_OpenCL", NULL);
	cl_mem dstxx4_dev = clCreateBuffer(ctx, CL_MEM_READ_ONLY, frame.rows * frame.cols * sizeof(double), NULL, NULL);
	cl_mem dstyy4_dev = clCreateBuffer(ctx, CL_MEM_READ_ONLY, frame.rows * frame.cols * sizeof(double), NULL, NULL);
	cl_mem srcxx_dev = clCreateBuffer(ctx, CL_MEM_READ_ONLY, frame.rows * frame.cols * sizeof(double), NULL, NULL);
	cl_mem srcyy_dev = clCreateBuffer(ctx, CL_MEM_READ_ONLY, frame.rows * frame.cols * sizeof(double), NULL, NULL);
	cl_mem dstp_dev = clCreateBuffer(ctx, CL_MEM_READ_WRITE, frame.rows * frame.cols * sizeof(double), NULL, NULL);
	err = clEnqueueWriteBuffer(queue, dstxx4_dev, CL_TRUE, 0, frame.rows * frame.cols * sizeof(double), dstxx.data, 0, NULL, NULL);
	err = clEnqueueWriteBuffer(queue, dstyy4_dev, CL_TRUE, 0, frame.rows * frame.cols * sizeof(double), dstyy.data, 0, NULL, NULL);
	err = clEnqueueWriteBuffer(queue, srcxx_dev, CL_TRUE, 0, frame.rows * frame.cols * sizeof(double), dstx.data, 0, NULL, NULL);
	err = clEnqueueWriteBuffer(queue, srcyy_dev, CL_TRUE, 0, frame.rows * frame.cols * sizeof(double), dsty.data, 0, NULL, NULL);
	clSetKernelArg(kernel10, 0, sizeof(cl_mem), &dstxx4_dev);
	clSetKernelArg(kernel10, 1, sizeof(cl_mem), &dstyy4_dev);
	clSetKernelArg(kernel10, 0, sizeof(cl_mem), &srcxx_dev);
	clSetKernelArg(kernel10, 1, sizeof(cl_mem), &srcyy_dev);
	clSetKernelArg(kernel10, 2, sizeof(cl_mem), &dstp_dev);
	clSetKernelArg(kernel10, 3, sizeof(int), (void*)&Height);
	clSetKernelArg(kernel10, 4, sizeof(int), (void*)&Width);
	clEnqueueNDRangeKernel(queue, kernel10, 2, NULL, globalWorkSize, NULL, 0, NULL, NULL);
	Mat dst(frame.size(), CV_64FC1);
	err = clEnqueueReadBuffer(queue, dstp_dev, CL_TRUE, 0, frame.rows * frame.cols * sizeof(double), dst.data, 0, NULL, NULL);

	for (int i = 0; i < phase.rows; i++) {
		for (int j = 0; j < phase.cols; j++) {
			cout << "result(" << i << "," << j << "): " << dstyy.ptr<double>(i)[j] << endl;
		}
	}
	
	

	/*cl_kernel kernel11 = clCreateKernel(program, "GetAMAndAMT_OpenCL", NULL);
	cl_mem A_dev = clCreateBuffer(ctx, CL_MEM_READ_WRITE, frame.rows * frame.cols * sizeof(double), NULL, NULL);
	cl_mem AT_dev = clCreateBuffer(ctx, CL_MEM_READ_WRITE, frame.rows * frame.cols * sizeof(double), NULL, NULL);
	clSetKernelArg(kernel11, 0, sizeof(cl_mem), &A_dev);
	clSetKernelArg(kernel11, 1, sizeof(cl_mem), &AT_dev);
	clSetKernelArg(kernel11, 2, sizeof(int), (void*)&Height);
	clSetKernelArg(kernel11, 3, sizeof(int), (void*)&Width);
	clEnqueueNDRangeKernel(queue, kernel11, 2, NULL, globalWorkSize, NULL, 0, NULL, NULL);
	Mat A(frame.size(), CV_64FC1);
	Mat AT(frame.size(), CV_64FC1);
	err = clEnqueueReadBuffer(queue, A_dev, CL_TRUE, 0, frame.rows * frame.cols * sizeof(double), A.data, 0, NULL, NULL);
	err = clEnqueueReadBuffer(queue, AT_dev, CL_TRUE, 0, frame.rows * frame.cols * sizeof(double), AT.data, 0, NULL, NULL);
	
	cl_kernel kernel12 = clCreateKernel(program, "LINETRANS_OpenCL", NULL);
	cl_mem dst44_dev = clCreateBuffer(ctx, CL_MEM_READ_WRITE, frame.rows * frame.cols * sizeof(double), NULL, NULL);
	cl_mem src55_dev = clCreateBuffer(ctx, CL_MEM_READ_ONLY, frame.rows * frame.cols * sizeof(double), NULL, NULL);
	err = clEnqueueWriteBuffer(queue, src55_dev, CL_TRUE, 0, frame.rows * frame.cols * sizeof(double), dstxx.data, 0, NULL, NULL);
	clSetKernelArg(kernel12, 0, sizeof(cl_mem), &dst44_dev);
	clSetKernelArg(kernel12, 1, sizeof(cl_mem), &src55_dev);
	clSetKernelArg(kernel12, 2, sizeof(int), (void*)&Height);
	clSetKernelArg(kernel12, 3, sizeof(int), (void*)&Width);
	clEnqueueNDRangeKernel(queue, kernel12, 2, NULL, globalWorkSize, NULL, 0, NULL, NULL);
	Mat D(frame.size(), CV_64FC1);
	err = clEnqueueReadBuffer(queue, dst44_dev, CL_TRUE, 0, frame.rows * frame.cols * sizeof(double),D.data, 0, NULL, NULL);*/



	/*cl_kernel kernel13 = clCreateKernel(program, "matrixMulOpenCL", NULL);
	cl_mem des_src18 = clCreateBuffer(ctx, CL_MEM_READ_WRITE, frame.rows * frame.cols * sizeof(double), NULL, NULL);
	cl_mem src17 = clCreateBuffer(ctx, CL_MEM_READ_ONLY, frame.rows * frame.cols * sizeof(double), NULL, NULL);
	cl_mem src27 = clCreateBuffer(ctx, CL_MEM_READ_ONLY, frame.rows * frame.cols * sizeof(double), NULL, NULL);
	clEnqueueWriteBuffer(queue, src17, CL_TRUE, 0, frame.rows * frame.cols * sizeof(double), A.data, 0, NULL, NULL);
	clEnqueueWriteBuffer(queue, src27, CL_TRUE, 0, frame.rows * frame.cols * sizeof(double), D.data, 0, NULL, NULL);
	clSetKernelArg(kernel13, 0, sizeof(cl_mem), &des_src18);
	clSetKernelArg(kernel13, 1, sizeof(cl_mem), &src17);
	clSetKernelArg(kernel13, 2, sizeof(cl_mem), &src27);
	clSetKernelArg(kernel13, 3, sizeof(int), &frame.cols);
	clSetKernelArg(kernel13, 4, sizeof(int), &frame.rows);
	clEnqueueNDRangeKernel(queue, kernel13, 2, NULL, globalWorkSize, NULL, 0, NULL, NULL);
	Mat res145(frame.size(), CV_64FC1);
	err = clEnqueueReadBuffer(queue, des_src18, CL_TRUE, 0, frame.rows * frame.cols * sizeof(double), res145.data, 0, NULL, NULL);*/
	

	
	/*for (int i = 0; i < A11.rows; i++) {
		for (int j = 0; j < A11.cols; j++) {
			cout << "result(" << i << "," << j << "): " << res1.ptr<cv::Vec2d>(i)[j][0] << " + " << res1.ptr<cv::Vec2d>(i)[j][1] << "i" << endl;
		}
	}*/
	/*ofstream Fs("D:\\test10.xls");
		if (!Fs.is_open())
		{
			cout << "error!" << endl;
			return 0;
		}

		int height = F.rows;
		int width =F.cols;
		for (int i = 0; i < height; i++)
		{
			for (int j = 0; j < width; j++)
			{
				Fs << AT.ptr<double>(i)[j] << '\t';
			}
			Fs << endl;
		}
		Fs.close();*/
	/*ofstream Fs("D:\\test9.xls");
	if (!Fs.is_open())
	{
		cout << "error!" << endl;
		return 0;
	}

	int height = result.rows;
	int width = result.cols;
	for (int i = 0; i < height; i++)
	{
		for (int j = 0; j < width; j++)
		{
			Fs << result.ptr<cv::Vec2d>(i)[j][0] << " + " << result.ptr<cv::Vec2d>(i)[j][1] << "i" << '\t';
		}
		Fs << endl;
	}
	Fs.close();*/
	waitKey(0);
	return 0;
}