如何实现neon优化的yuv420转rgb24汇编代码,iOS/Android可用的具体操作步骤
最编程
2024-08-15 16:33:05
...
#define WIDTH 640
#define HEIGHT 480
void convertYUV420toRGB24(uint8_t* yuv, uint8_t* rgb) {
int i, j;
uint8x8_t y0, y1, u, v;
int16x8_t r, g, b;
uint8x8x3_t rgb0, rgb1;
for (i = 0, j = 0; i < (WIDTH * HEIGHT); i += 8, j += 12) {
y0 = vld1_u8(yuv + i);
y1 = vld1_u8(yuv + i + WIDTH);
u = vld1_u8(yuv + i + WIDTH * HEIGHT);
v = vld1_u8(yuv + i + WIDTH * HEIGHT + (WIDTH * HEIGHT) / 4);
r = vmovl_s8(vqsub_s8(y0, 16));
g = vmlal_s8(vmovl_s8(vqsub_s8(y1, 16)), vget_high_s8(y0), 16);
b = vmovl_s8(vqsub_s8(y1, 16));
rgb0.val[0] = vqmovun_s16(vcombine_s16(vrshrq_n_s16(vaddq_s16(vmulq_lane_s16(r, vget_low_s16(u), 0), vmovl_s8(u), 7), 6), vrshrq_n_s16(vaddq_s16(vmulq_lane_s16(g, vget_low_s16(u), 1), vmovl_s8(u), 7), 6)));
rgb0.val[1] = vqmovun_s16(vcombine_s16(vrshrq_n_s16(vsubq_s16(vmulq_lane_s16(g, vget_high_s16(u), 0), vmovl_s8(vshl_n_s8(u, 1)), 7), 6), vrshrq_n_s16(vsubq_s16(vmulq_lane_s16(b, vget_low_s16(v), 0), vmovl_s8(vshl_n_s8(v, 1)), 7), 6)));
rgb0.val[2] = vqmovun_s16(vcombine_s16(vrshrq_n_s16(vaddq_s16(vmulq_lane_s16(r, vget_high_s16(v), 1), vmovl_s8(vshl_n_s8(v, 1)), 7), 6), vrshrq_n