1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 package org.apache.commons.math.stat.correlation; 18 19 import org.apache.commons.math.MathRuntimeException; 20 import org.apache.commons.math.exception.util.LocalizedFormats; 21 import org.apache.commons.math.linear.RealMatrix; 22 import org.apache.commons.math.linear.BlockRealMatrix; 23 import org.apache.commons.math.stat.descriptive.moment.Mean; 24 import org.apache.commons.math.stat.descriptive.moment.Variance; 25 26 /** 27 * Computes covariances for pairs of arrays or columns of a matrix. 28 * 29 * <p>The constructors that take <code>RealMatrix</code> or 30 * <code>double[][]</code> arguments generate covariance matrices. The 31 * columns of the input matrices are assumed to represent variable values.</p> 32 * 33 * <p>The constructor argument <code>biasCorrected</code> determines whether or 34 * not computed covariances are bias-corrected.</p> 35 * 36 * <p>Unbiased covariances are given by the formula</p> 37 * <code>cov(X, Y) = Σ[(x<sub>i</sub> - E(X))(y<sub>i</sub> - E(Y))] / (n - 1)</code> 38 * where <code>E(X)</code> is the mean of <code>X</code> and <code>E(Y)</code> 39 * is the mean of the <code>Y</code> values. 40 * 41 * <p>Non-bias-corrected estimates use <code>n</code> in place of <code>n - 1</code> 42 * 43 * @version $Revision: 983921 $ $Date: 2010-08-10 12:46:06 +0200 (mar. 10 août 2010) $ 44 * @since 2.0 45 */ 46 public class Covariance { 47 48 /** covariance matrix */ 49 private final RealMatrix covarianceMatrix; 50 51 /** 52 * Create an empty covariance matrix. 53 */ 54 /** Number of observations (length of covariate vectors) */ 55 private final int n; 56 57 /** 58 * Create a Covariance with no data 59 */ Covariance()60 public Covariance() { 61 super(); 62 covarianceMatrix = null; 63 n = 0; 64 } 65 66 /** 67 * Create a Covariance matrix from a rectangular array 68 * whose columns represent covariates. 69 * 70 * <p>The <code>biasCorrected</code> parameter determines whether or not 71 * covariance estimates are bias-corrected.</p> 72 * 73 * <p>The input array must be rectangular with at least two columns 74 * and two rows.</p> 75 * 76 * @param data rectangular array with columns representing covariates 77 * @param biasCorrected true means covariances are bias-corrected 78 * @throws IllegalArgumentException if the input data array is not 79 * rectangular with at least two rows and two columns. 80 */ Covariance(double[][] data, boolean biasCorrected)81 public Covariance(double[][] data, boolean biasCorrected) { 82 this(new BlockRealMatrix(data), biasCorrected); 83 } 84 85 /** 86 * Create a Covariance matrix from a rectangular array 87 * whose columns represent covariates. 88 * 89 * <p>The input array must be rectangular with at least two columns 90 * and two rows</p> 91 * 92 * @param data rectangular array with columns representing covariates 93 * @throws IllegalArgumentException if the input data array is not 94 * rectangular with at least two rows and two columns. 95 */ Covariance(double[][] data)96 public Covariance(double[][] data) { 97 this(data, true); 98 } 99 100 /** 101 * Create a covariance matrix from a matrix whose columns 102 * represent covariates. 103 * 104 * <p>The <code>biasCorrected</code> parameter determines whether or not 105 * covariance estimates are bias-corrected.</p> 106 * 107 * <p>The matrix must have at least two columns and two rows</p> 108 * 109 * @param matrix matrix with columns representing covariates 110 * @param biasCorrected true means covariances are bias-corrected 111 * @throws IllegalArgumentException if the input matrix does not have 112 * at least two rows and two columns 113 */ Covariance(RealMatrix matrix, boolean biasCorrected)114 public Covariance(RealMatrix matrix, boolean biasCorrected) { 115 checkSufficientData(matrix); 116 n = matrix.getRowDimension(); 117 covarianceMatrix = computeCovarianceMatrix(matrix, biasCorrected); 118 } 119 120 /** 121 * Create a covariance matrix from a matrix whose columns 122 * represent covariates. 123 * 124 * <p>The matrix must have at least two columns and two rows</p> 125 * 126 * @param matrix matrix with columns representing covariates 127 * @throws IllegalArgumentException if the input matrix does not have 128 * at least two rows and two columns 129 */ Covariance(RealMatrix matrix)130 public Covariance(RealMatrix matrix) { 131 this(matrix, true); 132 } 133 134 /** 135 * Returns the covariance matrix 136 * 137 * @return covariance matrix 138 */ getCovarianceMatrix()139 public RealMatrix getCovarianceMatrix() { 140 return covarianceMatrix; 141 } 142 143 /** 144 * Returns the number of observations (length of covariate vectors) 145 * 146 * @return number of observations 147 */ 148 getN()149 public int getN() { 150 return n; 151 } 152 153 /** 154 * Compute a covariance matrix from a matrix whose columns represent 155 * covariates. 156 * @param matrix input matrix (must have at least two columns and two rows) 157 * @param biasCorrected determines whether or not covariance estimates are bias-corrected 158 * @return covariance matrix 159 */ computeCovarianceMatrix(RealMatrix matrix, boolean biasCorrected)160 protected RealMatrix computeCovarianceMatrix(RealMatrix matrix, boolean biasCorrected) { 161 int dimension = matrix.getColumnDimension(); 162 Variance variance = new Variance(biasCorrected); 163 RealMatrix outMatrix = new BlockRealMatrix(dimension, dimension); 164 for (int i = 0; i < dimension; i++) { 165 for (int j = 0; j < i; j++) { 166 double cov = covariance(matrix.getColumn(i), matrix.getColumn(j), biasCorrected); 167 outMatrix.setEntry(i, j, cov); 168 outMatrix.setEntry(j, i, cov); 169 } 170 outMatrix.setEntry(i, i, variance.evaluate(matrix.getColumn(i))); 171 } 172 return outMatrix; 173 } 174 175 /** 176 * Create a covariance matrix from a matrix whose columns represent 177 * covariates. Covariances are computed using the bias-corrected formula. 178 * @param matrix input matrix (must have at least two columns and two rows) 179 * @return covariance matrix 180 * @see #Covariance 181 */ computeCovarianceMatrix(RealMatrix matrix)182 protected RealMatrix computeCovarianceMatrix(RealMatrix matrix) { 183 return computeCovarianceMatrix(matrix, true); 184 } 185 186 /** 187 * Compute a covariance matrix from a rectangular array whose columns represent 188 * covariates. 189 * @param data input array (must have at least two columns and two rows) 190 * @param biasCorrected determines whether or not covariance estimates are bias-corrected 191 * @return covariance matrix 192 */ computeCovarianceMatrix(double[][] data, boolean biasCorrected)193 protected RealMatrix computeCovarianceMatrix(double[][] data, boolean biasCorrected) { 194 return computeCovarianceMatrix(new BlockRealMatrix(data), biasCorrected); 195 } 196 197 /** 198 * Create a covariance matrix from a rectangual array whose columns represent 199 * covariates. Covariances are computed using the bias-corrected formula. 200 * @param data input array (must have at least two columns and two rows) 201 * @return covariance matrix 202 * @see #Covariance 203 */ computeCovarianceMatrix(double[][] data)204 protected RealMatrix computeCovarianceMatrix(double[][] data) { 205 return computeCovarianceMatrix(data, true); 206 } 207 208 /** 209 * Computes the covariance between the two arrays. 210 * 211 * <p>Array lengths must match and the common length must be at least 2.</p> 212 * 213 * @param xArray first data array 214 * @param yArray second data array 215 * @param biasCorrected if true, returned value will be bias-corrected 216 * @return returns the covariance for the two arrays 217 * @throws IllegalArgumentException if the arrays lengths do not match or 218 * there is insufficient data 219 */ covariance(final double[] xArray, final double[] yArray, boolean biasCorrected)220 public double covariance(final double[] xArray, final double[] yArray, boolean biasCorrected) 221 throws IllegalArgumentException { 222 Mean mean = new Mean(); 223 double result = 0d; 224 int length = xArray.length; 225 if (length != yArray.length) { 226 throw MathRuntimeException.createIllegalArgumentException( 227 LocalizedFormats.DIMENSIONS_MISMATCH_SIMPLE, length, yArray.length); 228 } else if (length < 2) { 229 throw MathRuntimeException.createIllegalArgumentException( 230 LocalizedFormats.INSUFFICIENT_DIMENSION, length, 2); 231 } else { 232 double xMean = mean.evaluate(xArray); 233 double yMean = mean.evaluate(yArray); 234 for (int i = 0; i < length; i++) { 235 double xDev = xArray[i] - xMean; 236 double yDev = yArray[i] - yMean; 237 result += (xDev * yDev - result) / (i + 1); 238 } 239 } 240 return biasCorrected ? result * ((double) length / (double)(length - 1)) : result; 241 } 242 243 /** 244 * Computes the covariance between the two arrays, using the bias-corrected 245 * formula. 246 * 247 * <p>Array lengths must match and the common length must be at least 2.</p> 248 * 249 * @param xArray first data array 250 * @param yArray second data array 251 * @return returns the covariance for the two arrays 252 * @throws IllegalArgumentException if the arrays lengths do not match or 253 * there is insufficient data 254 */ covariance(final double[] xArray, final double[] yArray)255 public double covariance(final double[] xArray, final double[] yArray) 256 throws IllegalArgumentException { 257 return covariance(xArray, yArray, true); 258 } 259 260 /** 261 * Throws IllegalArgumentException of the matrix does not have at least 262 * two columns and two rows 263 * @param matrix matrix to check 264 */ checkSufficientData(final RealMatrix matrix)265 private void checkSufficientData(final RealMatrix matrix) { 266 int nRows = matrix.getRowDimension(); 267 int nCols = matrix.getColumnDimension(); 268 if (nRows < 2 || nCols < 2) { 269 throw MathRuntimeException.createIllegalArgumentException( 270 LocalizedFormats.INSUFFICIENT_ROWS_AND_COLUMNS, 271 nRows, nCols); 272 } 273 } 274 } 275