// This file is auto-generated. Do not edit! #include "precomp.hpp" #include "opencl_kernels_features2d.hpp" namespace cv { namespace ocl { namespace features2d { const struct ProgramEntry brute_force_match={"brute_force_match", "#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics:enable\n" "#define MAX_FLOAT 3.40282e+038f\n" "#ifndef T\n" "#define T float\n" "#endif\n" "#ifndef BLOCK_SIZE\n" "#define BLOCK_SIZE 16\n" "#endif\n" "#ifndef MAX_DESC_LEN\n" "#define MAX_DESC_LEN 64\n" "#endif\n" "#define BLOCK_SIZE_ODD (BLOCK_SIZE + 1)\n" "#ifndef SHARED_MEM_SZ\n" "# if (BLOCK_SIZE < MAX_DESC_LEN)\n" "# define SHARED_MEM_SZ (kercn * (BLOCK_SIZE * MAX_DESC_LEN + BLOCK_SIZE * BLOCK_SIZE))\n" "# else\n" "# define SHARED_MEM_SZ (kercn * 2 * BLOCK_SIZE_ODD * BLOCK_SIZE)\n" "# endif\n" "#endif\n" "#ifndef DIST_TYPE\n" "#define DIST_TYPE 2\n" "#endif\n" "#if (DIST_TYPE == 2)\n" "# ifdef T_FLOAT\n" "typedef float result_type;\n" "# if (8 == kercn)\n" "typedef float8 value_type;\n" "# define DIST(x, y) {value_type d = fabs((x) - (y)); result += d.s0 + d.s1 + d.s2 + d.s3 + d.s4 + d.s5 + d.s6 + d.s7;}\n" "# elif (4 == kercn)\n" "typedef float4 value_type;\n" "# define DIST(x, y) {value_type d = fabs((x) - (y)); result += d.s0 + d.s1 + d.s2 + d.s3;}\n" "# else\n" "typedef float value_type;\n" "# define DIST(x, y) result += fabs((x) - (y))\n" "# endif\n" "# else\n" "typedef int result_type;\n" "# if (8 == kercn)\n" "typedef int8 value_type;\n" "# define DIST(x, y) {value_type d = abs((x) - (y)); result += d.s0 + d.s1 + d.s2 + d.s3 + d.s4 + d.s5 + d.s6 + d.s7;}\n" "# elif (4 == kercn)\n" "typedef int4 value_type;\n" "# define DIST(x, y) {value_type d = abs((x) - (y)); result += d.s0 + d.s1 + d.s2 + d.s3;}\n" "# else\n" "typedef int value_type;\n" "# define DIST(x, y) result += abs((x) - (y))\n" "# endif\n" "# endif\n" "# define DIST_RES(x) (x)\n" "#elif (DIST_TYPE == 4)\n" "typedef float result_type;\n" "# if (8 == kercn)\n" "typedef float8 value_type;\n" "# define DIST(x, y) {value_type d = ((x) - (y)); result += dot(d.s0123, d.s0123) + dot(d.s4567, d.s4567);}\n" "# elif (4 == kercn)\n" "typedef float4 value_type;\n" "# define DIST(x, y) {value_type d = ((x) - (y)); result += dot(d, d);}\n" "# else\n" "typedef float value_type;\n" "# define DIST(x, y) {value_type d = ((x) - (y)); result = mad(d, d, result);}\n" "# endif\n" "# define DIST_RES(x) sqrt(x)\n" "#elif (DIST_TYPE == 6)\n" "# if (8 == kercn)\n" "typedef int8 value_type;\n" "# elif (4 == kercn)\n" "typedef int4 value_type;\n" "# else\n" "typedef int value_type;\n" "# endif\n" "typedef int result_type;\n" "# define DIST(x, y) result += popcount( (x) ^ (y) )\n" "# define DIST_RES(x) (x)\n" "#endif\n" "inline result_type reduce_block(\n" "__local value_type *s_query,\n" "__local value_type *s_train,\n" "int lidx,\n" "int lidy\n" ")\n" "{\n" "result_type result = 0;\n" "#pragma unroll\n" "for (int j = 0 ; j < BLOCK_SIZE ; j++)\n" "{\n" "DIST(s_query[lidy * BLOCK_SIZE_ODD + j], s_train[j * BLOCK_SIZE_ODD + lidx]);\n" "}\n" "return DIST_RES(result);\n" "}\n" "inline result_type reduce_block_match(\n" "__local value_type *s_query,\n" "__local value_type *s_train,\n" "int lidx,\n" "int lidy\n" ")\n" "{\n" "result_type result = 0;\n" "#pragma unroll\n" "for (int j = 0 ; j < BLOCK_SIZE ; j++)\n" "{\n" "DIST(s_query[lidy * BLOCK_SIZE_ODD + j], s_train[j * BLOCK_SIZE_ODD + lidx]);\n" "}\n" "return result;\n" "}\n" "inline result_type reduce_multi_block(\n" "__local value_type *s_query,\n" "__local value_type *s_train,\n" "int block_index,\n" "int lidx,\n" "int lidy\n" ")\n" "{\n" "result_type result = 0;\n" "#pragma unroll\n" "for (int j = 0 ; j < BLOCK_SIZE ; j++)\n" "{\n" "DIST(s_query[lidy * MAX_DESC_LEN + block_index * BLOCK_SIZE + j], s_train[j * BLOCK_SIZE + lidx]);\n" "}\n" "return result;\n" "}\n" "__kernel void BruteForceMatch_Match(\n" "__global T *query,\n" "__global T *train,\n" "__global int *bestTrainIdx,\n" "__global float *bestDistance,\n" "int query_rows,\n" "int query_cols,\n" "int train_rows,\n" "int train_cols,\n" "int step\n" ")\n" "{\n" "const int lidx = get_local_id(0);\n" "const int lidy = get_local_id(1);\n" "const int groupidx = get_group_id(0);\n" "const int queryIdx = mad24(BLOCK_SIZE, groupidx, lidy);\n" "const int queryOffset = min(queryIdx, query_rows - 1) * step;\n" "__global TN *query_vec = (__global TN *)(query + queryOffset);\n" "query_cols /= kercn;\n" "__local float sharebuffer[SHARED_MEM_SZ];\n" "__local value_type *s_query = (__local value_type *)sharebuffer;\n" "#if 0 < MAX_DESC_LEN\n" "__local value_type *s_train = (__local value_type *)sharebuffer + BLOCK_SIZE * MAX_DESC_LEN;\n" "#pragma unroll\n" "for (int i = 0; i < MAX_DESC_LEN / BLOCK_SIZE; i++)\n" "{\n" "const int loadx = mad24(BLOCK_SIZE, i, lidx);\n" "s_query[mad24(MAX_DESC_LEN, lidy, loadx)] = loadx < query_cols ? query_vec[loadx] : 0;\n" "}\n" "#else\n" "__local value_type *s_train = (__local value_type *)sharebuffer + BLOCK_SIZE_ODD * BLOCK_SIZE;\n" "const int s_query_i = mad24(BLOCK_SIZE_ODD, lidy, lidx);\n" "const int s_train_i = mad24(BLOCK_SIZE_ODD, lidx, lidy);\n" "#endif\n" "float myBestDistance = MAX_FLOAT;\n" "int myBestTrainIdx = -1;\n" "for (int t = 0, endt = (train_rows + BLOCK_SIZE - 1) / BLOCK_SIZE; t < endt; t++)\n" "{\n" "result_type result = 0;\n" "const int trainOffset = min(mad24(BLOCK_SIZE, t, lidy), train_rows - 1) * step;\n" "__global TN *train_vec = (__global TN *)(train + trainOffset);\n" "#if 0 < MAX_DESC_LEN\n" "#pragma unroll\n" "for (int i = 0; i < MAX_DESC_LEN / BLOCK_SIZE; i++)\n" "{\n" "const int loadx = mad24(BLOCK_SIZE, i, lidx);\n" "s_train[mad24(BLOCK_SIZE, lidx, lidy)] = loadx < train_cols ? train_vec[loadx] : 0;\n" "barrier(CLK_LOCAL_MEM_FENCE);\n" "result += reduce_multi_block(s_query, s_train, i, lidx, lidy);\n" "barrier(CLK_LOCAL_MEM_FENCE);\n" "}\n" "#else\n" "for (int i = 0, endq = (query_cols + BLOCK_SIZE - 1) / BLOCK_SIZE; i < endq; i++)\n" "{\n" "const int loadx = mad24(i, BLOCK_SIZE, lidx);\n" "if (loadx < query_cols)\n" "{\n" "s_query[s_query_i] = query_vec[loadx];\n" "s_train[s_train_i] = train_vec[loadx];\n" "}\n" "else\n" "{\n" "s_query[s_query_i] = 0;\n" "s_train[s_train_i] = 0;\n" "}\n" "barrier(CLK_LOCAL_MEM_FENCE);\n" "result += reduce_block_match(s_query, s_train, lidx, lidy);\n" "barrier(CLK_LOCAL_MEM_FENCE);\n" "}\n" "#endif\n" "result = DIST_RES(result);\n" "const int trainIdx = mad24(BLOCK_SIZE, t, lidx);\n" "if (queryIdx < query_rows && trainIdx < train_rows && result < myBestDistance )\n" "{\n" "myBestDistance = result;\n" "myBestTrainIdx = trainIdx;\n" "}\n" "}\n" "barrier(CLK_LOCAL_MEM_FENCE);\n" "__local float *s_distance = (__local float *)sharebuffer;\n" "__local int *s_trainIdx = (__local int *)(sharebuffer + BLOCK_SIZE_ODD * BLOCK_SIZE);\n" "s_distance += lidy * BLOCK_SIZE_ODD;\n" "s_trainIdx += lidy * BLOCK_SIZE_ODD;\n" "s_distance[lidx] = myBestDistance;\n" "s_trainIdx[lidx] = myBestTrainIdx;\n" "barrier(CLK_LOCAL_MEM_FENCE);\n" "#pragma unroll\n" "for (int k = 0 ; k < BLOCK_SIZE; k++)\n" "{\n" "if (myBestDistance > s_distance[k])\n" "{\n" "myBestDistance = s_distance[k];\n" "myBestTrainIdx = s_trainIdx[k];\n" "}\n" "}\n" "if (queryIdx < query_rows && lidx == 0)\n" "{\n" "bestTrainIdx[queryIdx] = myBestTrainIdx;\n" "bestDistance[queryIdx] = myBestDistance;\n" "}\n" "}\n" "__kernel void BruteForceMatch_RadiusMatch(\n" "__global T *query,\n" "__global T *train,\n" "float maxDistance,\n" "__global int *bestTrainIdx,\n" "__global float *bestDistance,\n" "__global int *nMatches,\n" "int query_rows,\n" "int query_cols,\n" "int train_rows,\n" "int train_cols,\n" "int bestTrainIdx_cols,\n" "int step,\n" "int ostep\n" ")\n" "{\n" "const int lidx = get_local_id(0);\n" "const int lidy = get_local_id(1);\n" "const int groupidx = get_group_id(0);\n" "const int groupidy = get_group_id(1);\n" "const int queryIdx = mad24(BLOCK_SIZE, groupidy, lidy);\n" "const int queryOffset = min(queryIdx, query_rows - 1) * step;\n" "__global TN *query_vec = (__global TN *)(query + queryOffset);\n" "const int trainIdx = mad24(BLOCK_SIZE, groupidx, lidx);\n" "const int trainOffset = min(mad24(BLOCK_SIZE, groupidx, lidy), train_rows - 1) * step;\n" "__global TN *train_vec = (__global TN *)(train + trainOffset);\n" "query_cols /= kercn;\n" "__local float sharebuffer[SHARED_MEM_SZ];\n" "__local value_type *s_query = (__local value_type *)sharebuffer;\n" "__local value_type *s_train = (__local value_type *)sharebuffer + BLOCK_SIZE_ODD * BLOCK_SIZE;\n" "result_type result = 0;\n" "const int s_query_i = mad24(BLOCK_SIZE_ODD, lidy, lidx);\n" "const int s_train_i = mad24(BLOCK_SIZE_ODD, lidx, lidy);\n" "for (int i = 0 ; i < (query_cols + BLOCK_SIZE - 1) / BLOCK_SIZE ; ++i)\n" "{\n" "const int loadx = mad24(BLOCK_SIZE, i, lidx);\n" "if (loadx < query_cols)\n" "{\n" "s_query[s_query_i] = query_vec[loadx];\n" "s_train[s_train_i] = train_vec[loadx];\n" "}\n" "else\n" "{\n" "s_query[s_query_i] = 0;\n" "s_train[s_train_i] = 0;\n" "}\n" "barrier(CLK_LOCAL_MEM_FENCE);\n" "result += reduce_block(s_query, s_train, lidx, lidy);\n" "barrier(CLK_LOCAL_MEM_FENCE);\n" "}\n" "if (queryIdx < query_rows && trainIdx < train_rows && convert_float(result) < maxDistance)\n" "{\n" "int ind = atom_inc(nMatches + queryIdx);\n" "if(ind < bestTrainIdx_cols)\n" "{\n" "bestTrainIdx[mad24(queryIdx, ostep, ind)] = trainIdx;\n" "bestDistance[mad24(queryIdx, ostep, ind)] = result;\n" "}\n" "}\n" "}\n" "__kernel void BruteForceMatch_knnMatch(\n" "__global T *query,\n" "__global T *train,\n" "__global int2 *bestTrainIdx,\n" "__global float2 *bestDistance,\n" "int query_rows,\n" "int query_cols,\n" "int train_rows,\n" "int train_cols,\n" "int step\n" ")\n" "{\n" "const int lidx = get_local_id(0);\n" "const int lidy = get_local_id(1);\n" "const int groupidx = get_group_id(0);\n" "const int queryIdx = mad24(BLOCK_SIZE, groupidx, lidy);\n" "const int queryOffset = min(queryIdx, query_rows - 1) * step;\n" "__global TN *query_vec = (__global TN *)(query + queryOffset);\n" "query_cols /= kercn;\n" "__local float sharebuffer[SHARED_MEM_SZ];\n" "__local value_type *s_query = (__local value_type *)sharebuffer;\n" "#if 0 < MAX_DESC_LEN\n" "__local value_type *s_train = (__local value_type *)sharebuffer + BLOCK_SIZE * MAX_DESC_LEN;\n" "#pragma unroll\n" "for (int i = 0 ; i < MAX_DESC_LEN / BLOCK_SIZE; i ++)\n" "{\n" "int loadx = mad24(BLOCK_SIZE, i, lidx);\n" "s_query[mad24(MAX_DESC_LEN, lidy, loadx)] = loadx < query_cols ? query_vec[loadx] : 0;\n" "}\n" "#else\n" "__local value_type *s_train = (__local value_type *)sharebuffer + BLOCK_SIZE_ODD * BLOCK_SIZE;\n" "const int s_query_i = mad24(BLOCK_SIZE_ODD, lidy, lidx);\n" "const int s_train_i = mad24(BLOCK_SIZE_ODD, lidx, lidy);\n" "#endif\n" "float myBestDistance1 = MAX_FLOAT;\n" "float myBestDistance2 = MAX_FLOAT;\n" "int myBestTrainIdx1 = -1;\n" "int myBestTrainIdx2 = -1;\n" "for (int t = 0, endt = (train_rows + BLOCK_SIZE - 1) / BLOCK_SIZE; t < endt ; t++)\n" "{\n" "result_type result = 0;\n" "int trainOffset = min(mad24(BLOCK_SIZE, t, lidy), train_rows - 1) * step;\n" "__global TN *train_vec = (__global TN *)(train + trainOffset);\n" "#if 0 < MAX_DESC_LEN\n" "#pragma unroll\n" "for (int i = 0 ; i < MAX_DESC_LEN / BLOCK_SIZE ; i++)\n" "{\n" "const int loadx = mad24(BLOCK_SIZE, i, lidx);\n" "s_train[mad24(BLOCK_SIZE, lidx, lidy)] = loadx < train_cols ? train_vec[loadx] : 0;\n" "barrier(CLK_LOCAL_MEM_FENCE);\n" "result += reduce_multi_block(s_query, s_train, i, lidx, lidy);\n" "barrier(CLK_LOCAL_MEM_FENCE);\n" "}\n" "#else\n" "for (int i = 0, endq = (query_cols + BLOCK_SIZE -1) / BLOCK_SIZE; i < endq ; i++)\n" "{\n" "const int loadx = mad24(BLOCK_SIZE, i, lidx);\n" "if (loadx < query_cols)\n" "{\n" "s_query[s_query_i] = query_vec[loadx];\n" "s_train[s_train_i] = train_vec[loadx];\n" "}\n" "else\n" "{\n" "s_query[s_query_i] = 0;\n" "s_train[s_train_i] = 0;\n" "}\n" "barrier(CLK_LOCAL_MEM_FENCE);\n" "result += reduce_block_match(s_query, s_train, lidx, lidy);\n" "barrier(CLK_LOCAL_MEM_FENCE);\n" "}\n" "#endif\n" "result = DIST_RES(result);\n" "const int trainIdx = mad24(BLOCK_SIZE, t, lidx);\n" "if (queryIdx < query_rows && trainIdx < train_rows)\n" "{\n" "if (result < myBestDistance1)\n" "{\n" "myBestDistance2 = myBestDistance1;\n" "myBestTrainIdx2 = myBestTrainIdx1;\n" "myBestDistance1 = result;\n" "myBestTrainIdx1 = trainIdx;\n" "}\n" "else if (result < myBestDistance2)\n" "{\n" "myBestDistance2 = result;\n" "myBestTrainIdx2 = trainIdx;\n" "}\n" "}\n" "}\n" "barrier(CLK_LOCAL_MEM_FENCE);\n" "__local float *s_distance = (__local float *)sharebuffer;\n" "__local int *s_trainIdx = (__local int *)(sharebuffer + BLOCK_SIZE_ODD * BLOCK_SIZE);\n" "s_distance += lidy * BLOCK_SIZE_ODD;\n" "s_trainIdx += lidy * BLOCK_SIZE_ODD;\n" "s_distance[lidx] = myBestDistance1;\n" "s_trainIdx[lidx] = myBestTrainIdx1;\n" "float bestDistance1 = MAX_FLOAT;\n" "float bestDistance2 = MAX_FLOAT;\n" "int bestTrainIdx1 = -1;\n" "int bestTrainIdx2 = -1;\n" "barrier(CLK_LOCAL_MEM_FENCE);\n" "if (lidx == 0)\n" "{\n" "for (int i = 0 ; i < BLOCK_SIZE ; i++)\n" "{\n" "float val = s_distance[i];\n" "if (val < bestDistance1)\n" "{\n" "bestDistance2 = bestDistance1;\n" "bestTrainIdx2 = bestTrainIdx1;\n" "bestDistance1 = val;\n" "bestTrainIdx1 = s_trainIdx[i];\n" "}\n" "else if (val < bestDistance2)\n" "{\n" "bestDistance2 = val;\n" "bestTrainIdx2 = s_trainIdx[i];\n" "}\n" "}\n" "}\n" "barrier(CLK_LOCAL_MEM_FENCE);\n" "s_distance[lidx] = myBestDistance2;\n" "s_trainIdx[lidx] = myBestTrainIdx2;\n" "barrier(CLK_LOCAL_MEM_FENCE);\n" "if (lidx == 0)\n" "{\n" "for (int i = 0 ; i < BLOCK_SIZE ; i++)\n" "{\n" "float val = s_distance[i];\n" "if (val < bestDistance2)\n" "{\n" "bestDistance2 = val;\n" "bestTrainIdx2 = s_trainIdx[i];\n" "}\n" "}\n" "}\n" "myBestDistance1 = bestDistance1;\n" "myBestDistance2 = bestDistance2;\n" "myBestTrainIdx1 = bestTrainIdx1;\n" "myBestTrainIdx2 = bestTrainIdx2;\n" "if (queryIdx < query_rows && lidx == 0)\n" "{\n" "bestTrainIdx[queryIdx] = (int2)(myBestTrainIdx1, myBestTrainIdx2);\n" "bestDistance[queryIdx] = (float2)(myBestDistance1, myBestDistance2);\n" "}\n" "}\n" , "35c3a1e231d446e4088561e3604fb94f"}; ProgramSource brute_force_match_oclsrc(brute_force_match.programStr); const struct ProgramEntry fast={"fast", "inline int cornerScore(__global const uchar* img, int step)\n" "{\n" "int k, tofs, v = img[0], a0 = 0, b0;\n" "int d[16];\n" "#define LOAD2(idx, ofs) \\\n" "tofs = ofs; d[idx] = (short)(v - img[tofs]); d[idx+8] = (short)(v - img[-tofs])\n" "LOAD2(0, 3);\n" "LOAD2(1, -step+3);\n" "LOAD2(2, -step*2+2);\n" "LOAD2(3, -step*3+1);\n" "LOAD2(4, -step*3);\n" "LOAD2(5, -step*3-1);\n" "LOAD2(6, -step*2-2);\n" "LOAD2(7, -step-3);\n" "#pragma unroll\n" "for( k = 0; k < 16; k += 2 )\n" "{\n" "int a = min((int)d[(k+1)&15], (int)d[(k+2)&15]);\n" "a = min(a, (int)d[(k+3)&15]);\n" "a = min(a, (int)d[(k+4)&15]);\n" "a = min(a, (int)d[(k+5)&15]);\n" "a = min(a, (int)d[(k+6)&15]);\n" "a = min(a, (int)d[(k+7)&15]);\n" "a = min(a, (int)d[(k+8)&15]);\n" "a0 = max(a0, min(a, (int)d[k&15]));\n" "a0 = max(a0, min(a, (int)d[(k+9)&15]));\n" "}\n" "b0 = -a0;\n" "#pragma unroll\n" "for( k = 0; k < 16; k += 2 )\n" "{\n" "int b = max((int)d[(k+1)&15], (int)d[(k+2)&15]);\n" "b = max(b, (int)d[(k+3)&15]);\n" "b = max(b, (int)d[(k+4)&15]);\n" "b = max(b, (int)d[(k+5)&15]);\n" "b = max(b, (int)d[(k+6)&15]);\n" "b = max(b, (int)d[(k+7)&15]);\n" "b = max(b, (int)d[(k+8)&15]);\n" "b0 = min(b0, max(b, (int)d[k]));\n" "b0 = min(b0, max(b, (int)d[(k+9)&15]));\n" "}\n" "return -b0-1;\n" "}\n" "__kernel\n" "void FAST_findKeypoints(\n" "__global const uchar * _img, int step, int img_offset,\n" "int img_rows, int img_cols,\n" "volatile __global int* kp_loc,\n" "int max_keypoints, int threshold )\n" "{\n" "int j = get_global_id(0) + 3;\n" "int i = get_global_id(1) + 3;\n" "if (i < img_rows - 3 && j < img_cols - 3)\n" "{\n" "__global const uchar* img = _img + mad24(i, step, j + img_offset);\n" "int v = img[0], t0 = v - threshold, t1 = v + threshold;\n" "int k, tofs, v0, v1;\n" "int m0 = 0, m1 = 0;\n" "#define UPDATE_MASK(idx, ofs) \\\n" "tofs = ofs; v0 = img[tofs]; v1 = img[-tofs]; \\\n" "m0 |= ((v0 < t0) << idx) | ((v1 < t0) << (8 + idx)); \\\n" "m1 |= ((v0 > t1) << idx) | ((v1 > t1) << (8 + idx))\n" "UPDATE_MASK(0, 3);\n" "if( (m0 | m1) == 0 )\n" "return;\n" "UPDATE_MASK(2, -step*2+2);\n" "UPDATE_MASK(4, -step*3);\n" "UPDATE_MASK(6, -step*2-2);\n" "#define EVEN_MASK (1+4+16+64)\n" "if( ((m0 | (m0 >> 8)) & EVEN_MASK) != EVEN_MASK &&\n" "((m1 | (m1 >> 8)) & EVEN_MASK) != EVEN_MASK )\n" "return;\n" "UPDATE_MASK(1, -step+3);\n" "UPDATE_MASK(3, -step*3+1);\n" "UPDATE_MASK(5, -step*3-1);\n" "UPDATE_MASK(7, -step-3);\n" "if( ((m0 | (m0 >> 8)) & 255) != 255 &&\n" "((m1 | (m1 >> 8)) & 255) != 255 )\n" "return;\n" "m0 |= m0 << 16;\n" "m1 |= m1 << 16;\n" "#define CHECK0(i) ((m0 & (511 << i)) == (511 << i))\n" "#define CHECK1(i) ((m1 & (511 << i)) == (511 << i))\n" "if( CHECK0(0) + CHECK0(1) + CHECK0(2) + CHECK0(3) +\n" "CHECK0(4) + CHECK0(5) + CHECK0(6) + CHECK0(7) +\n" "CHECK0(8) + CHECK0(9) + CHECK0(10) + CHECK0(11) +\n" "CHECK0(12) + CHECK0(13) + CHECK0(14) + CHECK0(15) +\n" "CHECK1(0) + CHECK1(1) + CHECK1(2) + CHECK1(3) +\n" "CHECK1(4) + CHECK1(5) + CHECK1(6) + CHECK1(7) +\n" "CHECK1(8) + CHECK1(9) + CHECK1(10) + CHECK1(11) +\n" "CHECK1(12) + CHECK1(13) + CHECK1(14) + CHECK1(15) == 0 )\n" "return;\n" "{\n" "int idx = atomic_inc(kp_loc);\n" "if( idx < max_keypoints )\n" "{\n" "kp_loc[1 + 2*idx] = j;\n" "kp_loc[2 + 2*idx] = i;\n" "}\n" "}\n" "}\n" "}\n" "__kernel\n" "void FAST_nonmaxSupression(\n" "__global const int* kp_in, volatile __global int* kp_out,\n" "__global const uchar * _img, int step, int img_offset,\n" "int rows, int cols, int counter, int max_keypoints)\n" "{\n" "const int idx = get_global_id(0);\n" "if (idx < counter)\n" "{\n" "int x = kp_in[1 + 2*idx];\n" "int y = kp_in[2 + 2*idx];\n" "__global const uchar* img = _img + mad24(y, step, x + img_offset);\n" "int s = cornerScore(img, step);\n" "if( (x < 4 || s > cornerScore(img-1, step)) +\n" "(y < 4 || s > cornerScore(img-step, step)) != 2 )\n" "return;\n" "if( (x >= cols - 4 || s > cornerScore(img+1, step)) +\n" "(y >= rows - 4 || s > cornerScore(img+step, step)) +\n" "(x < 4 || y < 4 || s > cornerScore(img-step-1, step)) +\n" "(x >= cols - 4 || y < 4 || s > cornerScore(img-step+1, step)) +\n" "(x < 4 || y >= rows - 4 || s > cornerScore(img+step-1, step)) +\n" "(x >= cols - 4 || y >= rows - 4 || s > cornerScore(img+step+1, step)) == 6)\n" "{\n" "int new_idx = atomic_inc(kp_out);\n" "if( new_idx < max_keypoints )\n" "{\n" "kp_out[1 + 3*new_idx] = x;\n" "kp_out[2 + 3*new_idx] = y;\n" "kp_out[3 + 3*new_idx] = s;\n" "}\n" "}\n" "}\n" "}\n" , "f5e6f463f21a7ed77bd4d2c753478305"}; ProgramSource fast_oclsrc(fast.programStr); const struct ProgramEntry orb={"orb", "#define LAYERINFO_SIZE 1\n" "#define LAYERINFO_OFS 0\n" "#define KEYPOINT_SIZE 3\n" "#define ORIENTED_KEYPOINT_SIZE 4\n" "#define KEYPOINT_X 0\n" "#define KEYPOINT_Y 1\n" "#define KEYPOINT_Z 2\n" "#define KEYPOINT_ANGLE 3\n" "#ifdef ORB_RESPONSES\n" "__kernel void\n" "ORB_HarrisResponses(__global const uchar* imgbuf, int imgstep, int imgoffset0,\n" "__global const int* layerinfo, __global const int* keypoints,\n" "__global float* responses, int nkeypoints )\n" "{\n" "int idx = get_global_id(0);\n" "if( idx < nkeypoints )\n" "{\n" "__global const int* kpt = keypoints + idx*KEYPOINT_SIZE;\n" "__global const int* layer = layerinfo + kpt[KEYPOINT_Z]*LAYERINFO_SIZE;\n" "__global const uchar* img = imgbuf + imgoffset0 + layer[LAYERINFO_OFS] +\n" "(kpt[KEYPOINT_Y] - blockSize/2)*imgstep + (kpt[KEYPOINT_X] - blockSize/2);\n" "int i, j;\n" "int a = 0, b = 0, c = 0;\n" "for( i = 0; i < blockSize; i++, img += imgstep-blockSize )\n" "{\n" "for( j = 0; j < blockSize; j++, img++ )\n" "{\n" "int Ix = (img[1] - img[-1])*2 + img[-imgstep+1] - img[-imgstep-1] + img[imgstep+1] - img[imgstep-1];\n" "int Iy = (img[imgstep] - img[-imgstep])*2 + img[imgstep-1] - img[-imgstep-1] + img[imgstep+1] - img[-imgstep+1];\n" "a += Ix*Ix;\n" "b += Iy*Iy;\n" "c += Ix*Iy;\n" "}\n" "}\n" "responses[idx] = ((float)a * b - (float)c * c - HARRIS_K * (float)(a + b) * (a + b))*scale_sq_sq;\n" "}\n" "}\n" "#endif\n" "#ifdef ORB_ANGLES\n" "#define _DBL_EPSILON 2.2204460492503131e-16f\n" "#define atan2_p1 (0.9997878412794807f*57.29577951308232f)\n" "#define atan2_p3 (-0.3258083974640975f*57.29577951308232f)\n" "#define atan2_p5 (0.1555786518463281f*57.29577951308232f)\n" "#define atan2_p7 (-0.04432655554792128f*57.29577951308232f)\n" "inline float fastAtan2( float y, float x )\n" "{\n" "float ax = fabs(x), ay = fabs(y);\n" "float a, c, c2;\n" "if( ax >= ay )\n" "{\n" "c = ay/(ax + _DBL_EPSILON);\n" "c2 = c*c;\n" "a = (((atan2_p7*c2 + atan2_p5)*c2 + atan2_p3)*c2 + atan2_p1)*c;\n" "}\n" "else\n" "{\n" "c = ax/(ay + _DBL_EPSILON);\n" "c2 = c*c;\n" "a = 90.f - (((atan2_p7*c2 + atan2_p5)*c2 + atan2_p3)*c2 + atan2_p1)*c;\n" "}\n" "if( x < 0 )\n" "a = 180.f - a;\n" "if( y < 0 )\n" "a = 360.f - a;\n" "return a;\n" "}\n" "__kernel void\n" "ORB_ICAngle(__global const uchar* imgbuf, int imgstep, int imgoffset0,\n" "__global const int* layerinfo, __global const int* keypoints,\n" "__global float* responses, const __global int* u_max,\n" "int nkeypoints, int half_k )\n" "{\n" "int idx = get_global_id(0);\n" "if( idx < nkeypoints )\n" "{\n" "__global const int* kpt = keypoints + idx*KEYPOINT_SIZE;\n" "__global const int* layer = layerinfo + kpt[KEYPOINT_Z]*LAYERINFO_SIZE;\n" "__global const uchar* center = imgbuf + imgoffset0 + layer[LAYERINFO_OFS] +\n" "kpt[KEYPOINT_Y]*imgstep + kpt[KEYPOINT_X];\n" "int u, v, m_01 = 0, m_10 = 0;\n" "for( u = -half_k; u <= half_k; u++ )\n" "m_10 += u * center[u];\n" "for( v = 1; v <= half_k; v++ )\n" "{\n" "int v_sum = 0;\n" "int d = u_max[v];\n" "for( u = -d; u <= d; u++ )\n" "{\n" "int val_plus = center[u + v*imgstep], val_minus = center[u - v*imgstep];\n" "v_sum += (val_plus - val_minus);\n" "m_10 += u * (val_plus + val_minus);\n" "}\n" "m_01 += v * v_sum;\n" "}\n" "responses[idx] = fastAtan2((float)m_01, (float)m_10);\n" "}\n" "}\n" "#endif\n" "#ifdef ORB_DESCRIPTORS\n" "__kernel void\n" "ORB_computeDescriptor(__global const uchar* imgbuf, int imgstep, int imgoffset0,\n" "__global const int* layerinfo, __global const int* keypoints,\n" "__global uchar* _desc, const __global int* pattern,\n" "int nkeypoints, int dsize )\n" "{\n" "int idx = get_global_id(0);\n" "if( idx < nkeypoints )\n" "{\n" "int i;\n" "__global const int* kpt = keypoints + idx*ORIENTED_KEYPOINT_SIZE;\n" "__global const int* layer = layerinfo + kpt[KEYPOINT_Z]*LAYERINFO_SIZE;\n" "__global const uchar* center = imgbuf + imgoffset0 + layer[LAYERINFO_OFS] +\n" "kpt[KEYPOINT_Y]*imgstep + kpt[KEYPOINT_X];\n" "float angle = as_float(kpt[KEYPOINT_ANGLE]);\n" "angle *= 0.01745329251994329547f;\n" "float cosa;\n" "float sina = sincos(angle, &cosa);\n" "__global uchar* desc = _desc + idx*dsize;\n" "#define GET_VALUE(idx) \\\n" "center[mad24(convert_int_rte(pattern[(idx)*2] * sina + pattern[(idx)*2+1] * cosa), imgstep, \\\n" "convert_int_rte(pattern[(idx)*2] * cosa - pattern[(idx)*2+1] * sina))]\n" "for( i = 0; i < dsize; i++ )\n" "{\n" "int val;\n" "#if WTA_K == 2\n" "int t0, t1;\n" "t0 = GET_VALUE(0); t1 = GET_VALUE(1);\n" "val = t0 < t1;\n" "t0 = GET_VALUE(2); t1 = GET_VALUE(3);\n" "val |= (t0 < t1) << 1;\n" "t0 = GET_VALUE(4); t1 = GET_VALUE(5);\n" "val |= (t0 < t1) << 2;\n" "t0 = GET_VALUE(6); t1 = GET_VALUE(7);\n" "val |= (t0 < t1) << 3;\n" "t0 = GET_VALUE(8); t1 = GET_VALUE(9);\n" "val |= (t0 < t1) << 4;\n" "t0 = GET_VALUE(10); t1 = GET_VALUE(11);\n" "val |= (t0 < t1) << 5;\n" "t0 = GET_VALUE(12); t1 = GET_VALUE(13);\n" "val |= (t0 < t1) << 6;\n" "t0 = GET_VALUE(14); t1 = GET_VALUE(15);\n" "val |= (t0 < t1) << 7;\n" "pattern += 16*2;\n" "#elif WTA_K == 3\n" "int t0, t1, t2;\n" "t0 = GET_VALUE(0); t1 = GET_VALUE(1); t2 = GET_VALUE(2);\n" "val = t2 > t1 ? (t2 > t0 ? 2 : 0) : (t1 > t0);\n" "t0 = GET_VALUE(3); t1 = GET_VALUE(4); t2 = GET_VALUE(5);\n" "val |= (t2 > t1 ? (t2 > t0 ? 2 : 0) : (t1 > t0)) << 2;\n" "t0 = GET_VALUE(6); t1 = GET_VALUE(7); t2 = GET_VALUE(8);\n" "val |= (t2 > t1 ? (t2 > t0 ? 2 : 0) : (t1 > t0)) << 4;\n" "t0 = GET_VALUE(9); t1 = GET_VALUE(10); t2 = GET_VALUE(11);\n" "val |= (t2 > t1 ? (t2 > t0 ? 2 : 0) : (t1 > t0)) << 6;\n" "pattern += 12*2;\n" "#elif WTA_K == 4\n" "int t0, t1, t2, t3, k;\n" "int a, b;\n" "t0 = GET_VALUE(0); t1 = GET_VALUE(1);\n" "t2 = GET_VALUE(2); t3 = GET_VALUE(3);\n" "a = 0, b = 2;\n" "if( t1 > t0 ) t0 = t1, a = 1;\n" "if( t3 > t2 ) t2 = t3, b = 3;\n" "k = t0 > t2 ? a : b;\n" "val = k;\n" "t0 = GET_VALUE(4); t1 = GET_VALUE(5);\n" "t2 = GET_VALUE(6); t3 = GET_VALUE(7);\n" "a = 0, b = 2;\n" "if( t1 > t0 ) t0 = t1, a = 1;\n" "if( t3 > t2 ) t2 = t3, b = 3;\n" "k = t0 > t2 ? a : b;\n" "val |= k << 2;\n" "t0 = GET_VALUE(8); t1 = GET_VALUE(9);\n" "t2 = GET_VALUE(10); t3 = GET_VALUE(11);\n" "a = 0, b = 2;\n" "if( t1 > t0 ) t0 = t1, a = 1;\n" "if( t3 > t2 ) t2 = t3, b = 3;\n" "k = t0 > t2 ? a : b;\n" "val |= k << 4;\n" "t0 = GET_VALUE(12); t1 = GET_VALUE(13);\n" "t2 = GET_VALUE(14); t3 = GET_VALUE(15);\n" "a = 0, b = 2;\n" "if( t1 > t0 ) t0 = t1, a = 1;\n" "if( t3 > t2 ) t2 = t3, b = 3;\n" "k = t0 > t2 ? a : b;\n" "val |= k << 6;\n" "pattern += 16*2;\n" "#else\n" "#error \"unknown/undefined WTA_K value; should be 2, 3 or 4\"\n" "#endif\n" "desc[i] = (uchar)val;\n" "}\n" "}\n" "}\n" "#endif\n" , "a7c2cfaeda19907b637211b1cc91d253"}; ProgramSource orb_oclsrc(orb.programStr); } }}