1//****************************************************************************** 2//* 3//* Copyright (C) 2015 The Android Open Source Project 4//* 5//* Licensed under the Apache License, Version 2.0 (the "License"); 6//* you may not use this file except in compliance with the License. 7//* You may obtain a copy of the License at: 8//* 9//* http://www.apache.org/licenses/LICENSE-2.0 10//* 11//* Unless required by applicable law or agreed to in writing, software 12//* distributed under the License is distributed on an "AS IS" BASIS, 13//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14//* See the License for the specific language governing permissions and 15//* limitations under the License. 16//* 17//***************************************************************************** 18//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore 19//*/ 20 21//****************************************************************************** 22//* 23//* 24//* @brief 25//* This file contains definitions of routines for variance caclulation 26//* 27//* @author 28//* Ittiam 29//* 30//* @par List of Functions: 31//* - icv_variance_8x4_av8() 32//* 33//* @remarks 34//* None 35//* 36//******************************************************************************* 37 38 39//****************************************************************************** 40//* 41//* @brief computes variance of a 8x4 block 42//* 43//* 44//* @par Description 45//* This functions computes variance of a 8x4 block 46//* 47//* @param[in] pu1_src 48//* UWORD8 pointer to the source 49//* 50//* @param[in] src_strd 51//* integer source stride 52//* 53//* @param[in] wd 54//* Width (assumed to be 8) 55//* 56//* @param[in] ht 57//* Height (assumed to be 4) 58//* 59//* @returns 60//* variance value in x0 61//* 62//* @remarks 63//* 64//****************************************************************************** 65 66 .global icv_variance_8x4_av8 67 68icv_variance_8x4_av8: 69 70 // Load 8x4 source 71 ld1 {v0.8b}, [x0], x1 72 ld1 {v1.8b}, [x0], x1 73 ld1 {v2.8b}, [x0], x1 74 ld1 {v3.8b}, [x0], x1 75 76 // Calculate Sum(values) 77 uaddl v4.8h, v0.8b, v1.8b 78 uaddl v6.8h, v2.8b, v3.8b 79 add v4.8h, v4.8h, v6.8h 80 81 addp v4.8h, v4.8h, v4.8h 82 addp v4.4h, v4.4h, v4.4h 83 addp v4.4h, v4.4h, v4.4h 84 85 // Calculate SumOfSquares 86 umull v20.8h, v0.8b, v0.8b 87 umull v22.8h, v1.8b, v1.8b 88 umull v24.8h, v2.8b, v2.8b 89 umull v26.8h, v3.8b, v3.8b 90 91 uaddl v21.4s, v20.4h, v22.4h 92 uaddl v25.4s, v24.4h, v26.4h 93 uaddl2 v20.4s, v20.8h, v22.8h 94 uaddl2 v24.4s, v24.8h, v26.8h 95 96 add v20.4s, v20.4s, v21.4s 97 add v22.4s, v24.4s, v25.4s 98 add v20.4s, v20.4s, v22.4s 99 addp v20.4s, v20.4s, v20.4s 100 addp v20.2s, v20.2s, v20.2s 101 102 // Sum(values) 103 smov x0, v4.4h[0] 104 105 // SumOfSquares 106 smov x1, v20.2s[0] 107 108 // SquareOfSums 109 mul x3, x0, x0 110 111 // SumOfSquares * 8 * 4 - SquareOfSums 112 sub x1, x3, x1, LSL #5 113 neg x0, x1 114 115 // Divide by 32 * 32 116 117 ASR x0, x0, #10 118 ret 119