/* * Copyright 2018 Google Inc. * * Use of this source code is governed by a BSD-style license that can be * found in the LICENSE file. * */ // // // #include "transpose.h" #include "common/macros.h" // // Rows must be an even number. This is enforced elsewhere. // // The transpose requires (cols_log2 * rows/2) row-pair blends. // void hsg_transpose(uint32_t const cols_log2, uint32_t const rows, void (*pfn_blend)(uint32_t const cols_log2, uint32_t const row_ll, // lower-left uint32_t const row_ur, // upper-right void * blend), void * blend, void (*pfn_remap)(uint32_t const row_from, uint32_t const row_to, void * remap), void * remap) { // get mapping array uint32_t * map_curr = ALLOCA_MACRO(rows * sizeof(*map_curr)); uint32_t * map_next = ALLOCA_MACRO(rows * sizeof(*map_next)); // init the mapping array for (uint32_t ii=0; ii> cols_log2,remap); } // // test it! // #ifdef HS_TRANSPOSE_DEBUG #include static uint32_t cols; // implicit on SIMD/GPU static void hsg_debug_blend(uint32_t const cols_log2, uint32_t const row_ll, // lower-left uint32_t const row_ur, // upper-right uint32_t * b) { fprintf(stdout,"BLEND( %u, %3u, %3u )\n",cols_log2,row_ll,row_ur); uint32_t * const ll = ALLOCA_MACRO(cols * sizeof(*b)); uint32_t * const ur = ALLOCA_MACRO(cols * sizeof(*b)); memcpy(ll,b+row_ll*cols,cols * sizeof(*b)); memcpy(ur,b+row_ur*cols,cols * sizeof(*b)); for (uint32_t ii=0; ii> cols_log2-1) & 1) ? ll[ii] : ur[ii^(1<> cols_log2-1) & 1) ? ll[ii^(1<