1 /****************************************************************************
2 * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * @file Scatter.cpp
24 *
25 * @brief Shader support library implementation for scatter emulation
26 *
27 * Notes:
28 *
29 ******************************************************************************/
30 #include <stdarg.h>
31 #include "common/os.h"
32 #include "common/simdlib.hpp"
33
ScatterPS_256(uint8_t * pBase,SIMD256::Integer vIndices,SIMD256::Float vSrc,uint8_t mask,uint32_t scale)34 extern "C" void ScatterPS_256(uint8_t* pBase, SIMD256::Integer vIndices, SIMD256::Float vSrc, uint8_t mask, uint32_t scale)
35 {
36 OSALIGN(float, 32) src[8];
37 OSALIGN(uint32_t, 32) indices[8];
38
39 SIMD256::store_ps(src, vSrc);
40 SIMD256::store_si((SIMD256::Integer*)indices, vIndices);
41
42 unsigned long index;
43 while (_BitScanForward(&index, mask))
44 {
45 mask &= ~(1 << index);
46
47 *(float*)(pBase + indices[index] * scale) = src[index];
48 }
49 }
50