Source for org.jfree.data.statistics.Statistics

   1: /* ===========================================================
   2:  * JFreeChart : a free chart library for the Java(tm) platform
   3:  * ===========================================================
   4:  *
   5:  * (C) Copyright 2000-2006, by Object Refinery Limited and Contributors.
   6:  *
   7:  * Project Info:  http://www.jfree.org/jfreechart/index.html
   8:  *
   9:  * This library is free software; you can redistribute it and/or modify it 
  10:  * under the terms of the GNU Lesser General Public License as published by 
  11:  * the Free Software Foundation; either version 2.1 of the License, or 
  12:  * (at your option) any later version.
  13:  *
  14:  * This library is distributed in the hope that it will be useful, but 
  15:  * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 
  16:  * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 
  17:  * License for more details.
  18:  *
  19:  * You should have received a copy of the GNU Lesser General Public
  20:  * License along with this library; if not, write to the Free Software
  21:  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, 
  22:  * USA.  
  23:  *
  24:  * [Java is a trademark or registered trademark of Sun Microsystems, Inc. 
  25:  * in the United States and other countries.]
  26:  *
  27:  * ---------------
  28:  * Statistics.java
  29:  * ---------------
  30:  * (C) Copyright 2000-2006, by Matthew Wright and Contributors.
  31:  *
  32:  * Original Author:  Matthew Wright;
  33:  * Contributor(s):   David Gilbert (for Object Refinery Limited);
  34:  *
  35:  * $Id: Statistics.java,v 1.5.2.2 2006/11/16 11:19:47 mungady Exp $
  36:  *
  37:  * Changes (from 08-Nov-2001)
  38:  * --------------------------
  39:  * 08-Nov-2001 : Added standard header and tidied Javadoc comments (DG);
  40:  *               Moved from JFreeChart to package com.jrefinery.data.* in 
  41:  *               JCommon class library (DG);
  42:  * 24-Jun-2002 : Removed unnecessary local variable (DG);
  43:  * 07-Oct-2002 : Fixed errors reported by Checkstyle (DG);
  44:  * 26-May-2004 : Moved calculateMean() method from BoxAndWhiskerCalculator (DG);
  45:  * 02-Jun-2004 : Fixed bug in calculateMedian() method (DG);
  46:  * 11-Jan-2005 : Removed deprecated code in preparation for the 1.0.0 
  47:  *               release (DG);
  48:  *
  49:  */
  50: 
  51: package org.jfree.data.statistics;
  52: 
  53: import java.util.ArrayList;
  54: import java.util.Collection;
  55: import java.util.Collections;
  56: import java.util.Iterator;
  57: import java.util.List;
  58: 
  59: /**
  60:  * A utility class that provides some common statistical functions.
  61:  */
  62: public abstract class Statistics {
  63: 
  64:     /**
  65:      * Returns the mean of an array of numbers.  This is equivalent to calling
  66:      * <code>calculateMean(values, true)</code>.
  67:      *
  68:      * @param values  the values (<code>null</code> not permitted).
  69:      *
  70:      * @return The mean.
  71:      */
  72:     public static double calculateMean(Number[] values) {
  73:         return calculateMean(values, true);
  74:     }
  75:     
  76:     /**
  77:      * Returns the mean of an array of numbers.
  78:      *
  79:      * @param values  the values (<code>null</code> not permitted).
  80:      * @param includeNullAndNaN  a flag that controls whether or not 
  81:      *     <code>null</code> and <code>Double.NaN</code> values are included
  82:      *     in the calculation (if either is present in the array, the result is 
  83:      *     {@link Double#NaN}).
  84:      *
  85:      * @return The mean.
  86:      * 
  87:      * @since 1.0.3
  88:      */
  89:     public static double calculateMean(Number[] values, 
  90:             boolean includeNullAndNaN) {
  91:         
  92:         if (values == null) {
  93:             throw new IllegalArgumentException("Null 'values' argument.");
  94:         }
  95:         double sum = 0.0;
  96:         double current;
  97:         int counter = 0;
  98:         for (int i = 0; i < values.length; i++) {
  99:             // treat nulls the same as NaNs
 100:             if (values[i] != null) {
 101:                 current = values[i].doubleValue();    
 102:             }
 103:             else {
 104:                 current = Double.NaN;
 105:             }
 106:             // calculate the sum and count
 107:             if (includeNullAndNaN || !Double.isNaN(current)) {
 108:                 sum = sum + current;
 109:                 counter++;
 110:             }
 111:         }
 112:         double result = (sum / counter);
 113:         return result;
 114:     }
 115: 
 116:     /**
 117:      * Returns the mean of a collection of <code>Number</code> objects.
 118:      * 
 119:      * @param values  the values (<code>null</code> not permitted).
 120:      * 
 121:      * @return The mean.
 122:      */
 123:     public static double calculateMean(Collection values) {
 124:         return calculateMean(values, true);
 125:     }
 126:     
 127:     /**
 128:      * Returns the mean of a collection of <code>Number</code> objects.
 129:      * 
 130:      * @param values  the values (<code>null</code> not permitted).
 131:      * @param includeNullAndNaN  a flag that controls whether or not 
 132:      *     <code>null</code> and <code>Double.NaN</code> values are included
 133:      *     in the calculation (if either is present in the array, the result is 
 134:      *     {@link Double#NaN}).
 135:      * 
 136:      * @return The mean.
 137:      * 
 138:      * @since 1.0.3
 139:      */
 140:     public static double calculateMean(Collection values, 
 141:             boolean includeNullAndNaN) {
 142:         
 143:         if (values == null) {
 144:             throw new IllegalArgumentException("Null 'values' argument.");
 145:         }
 146:         int count = 0;
 147:         double total = 0.0;
 148:         Iterator iterator = values.iterator();
 149:         while (iterator.hasNext()) {
 150:             Object object = iterator.next();
 151:             if (object == null) {
 152:                 if (includeNullAndNaN) {
 153:                     return Double.NaN;
 154:                 }
 155:             }
 156:             else {
 157:                 if (object instanceof Number) {
 158:                     Number number = (Number) object;
 159:                     double value = number.doubleValue();
 160:                     if (Double.isNaN(value)) {
 161:                         if (includeNullAndNaN) {
 162:                             return Double.NaN;
 163:                         }
 164:                     }
 165:                     else {
 166:                         total = total + number.doubleValue();
 167:                         count = count + 1;
 168:                     }
 169:                 }
 170:             }
 171:         }      
 172:         return total / count;
 173:     }
 174: 
 175:     /**
 176:      * Calculates the median for a list of values (<code>Number</code> objects).
 177:      * The list of values will be copied, and the copy sorted, before 
 178:      * calculating the median.  To avoid this step (if your list of values
 179:      * is already sorted), use the {@link #calculateMedian(List, boolean)} 
 180:      * method.
 181:      * 
 182:      * @param values  the values (<code>null</code> permitted).
 183:      * 
 184:      * @return The median.
 185:      */
 186:     public static double calculateMedian(List values) {
 187:         return calculateMedian(values, true);
 188:     }
 189:     
 190:     /**
 191:      * Calculates the median for a list of values (<code>Number</code> objects).
 192:      * If <code>copyAndSort</code> is <code>false</code>, the list is assumed
 193:      * to be presorted in ascending order by value.
 194:      * 
 195:      * @param values  the values (<code>null</code> permitted).
 196:      * @param copyAndSort  a flag that controls whether the list of values is
 197:      *                     copied and sorted.
 198:      * 
 199:      * @return The median.
 200:      */
 201:     public static double calculateMedian(List values, boolean copyAndSort) {
 202:         
 203:         double result = Double.NaN;
 204:         if (values != null) {
 205:             if (copyAndSort) {
 206:                 int itemCount = values.size();
 207:                 List copy = new ArrayList(itemCount);
 208:                 for (int i = 0; i < itemCount; i++) {
 209:                     copy.add(i, values.get(i));   
 210:                 }
 211:                 Collections.sort(copy);
 212:                 values = copy;
 213:             }
 214:             int count = values.size();
 215:             if (count > 0) {
 216:                 if (count % 2 == 1) {
 217:                     if (count > 1) {
 218:                         Number value = (Number) values.get((count - 1) / 2);
 219:                         result = value.doubleValue();
 220:                     }
 221:                     else {
 222:                         Number value = (Number) values.get(0);
 223:                         result = value.doubleValue();
 224:                     }
 225:                 }
 226:                 else {
 227:                     Number value1 = (Number) values.get(count / 2 - 1);
 228:                     Number value2 = (Number) values.get(count / 2);
 229:                     result = (value1.doubleValue() + value2.doubleValue()) 
 230:                              / 2.0;
 231:                 }
 232:             }
 233:         }
 234:         return result;
 235:     }
 236:     
 237:     /**
 238:      * Calculates the median for a sublist within a list of values 
 239:      * (<code>Number</code> objects).
 240:      * 
 241:      * @param values  the values, in any order (<code>null</code> not 
 242:      *                permitted).
 243:      * @param start  the start index.
 244:      * @param end  the end index.
 245:      * 
 246:      * @return The median.
 247:      */
 248:     public static double calculateMedian(List values, int start, int end) {
 249:         return calculateMedian(values, start, end, true);
 250:     }
 251: 
 252:     /**
 253:      * Calculates the median for a sublist within a list of values 
 254:      * (<code>Number</code> objects).  The entire list will be sorted if the 
 255:      * <code>ascending</code< argument is <code>false</code>.
 256:      * 
 257:      * @param values  the values (<code>null</code> not permitted).
 258:      * @param start  the start index.
 259:      * @param end  the end index.
 260:      * @param copyAndSort  a flag that that controls whether the list of values 
 261:      *                     is copied and sorted.
 262:      * 
 263:      * @return The median.
 264:      */
 265:     public static double calculateMedian(List values, int start, int end,
 266:                                          boolean copyAndSort) {
 267:         
 268:         double result = Double.NaN;
 269:         if (copyAndSort) {
 270:             List working = new ArrayList(end - start + 1);
 271:             for (int i = start; i <= end; i++) {
 272:                 working.add(values.get(i));  
 273:             }
 274:             Collections.sort(working); 
 275:             result = calculateMedian(working, false);
 276:         }
 277:         else {
 278:             int count = end - start + 1;
 279:             if (count > 0) {
 280:                 if (count % 2 == 1) {
 281:                     if (count > 1) {
 282:                         Number value 
 283:                             = (Number) values.get(start + (count - 1) / 2);
 284:                         result = value.doubleValue();
 285:                     }
 286:                     else {
 287:                         Number value = (Number) values.get(start);
 288:                         result = value.doubleValue();
 289:                     }
 290:                 }
 291:                 else {
 292:                     Number value1 = (Number) values.get(start + count / 2 - 1);
 293:                     Number value2 = (Number) values.get(start + count / 2);
 294:                     result 
 295:                         = (value1.doubleValue() + value2.doubleValue()) / 2.0;
 296:                 }
 297:             }
 298:         }
 299:         return result;    
 300:         
 301:     }
 302:     
 303:     /**
 304:      * Returns the standard deviation of a set of numbers.
 305:      *
 306:      * @param data  the data (<code>null</code> or zero length array not 
 307:      *     permitted).
 308:      *
 309:      * @return The standard deviation of a set of numbers.
 310:      */
 311:     public static double getStdDev(Number[] data) {
 312:         if (data == null) {
 313:             throw new IllegalArgumentException("Null 'data' array.");
 314:         }
 315:         if (data.length == 0) {
 316:             throw new IllegalArgumentException("Zero length 'data' array.");
 317:         }
 318:         double avg = calculateMean(data);
 319:         double sum = 0.0;
 320: 
 321:         for (int counter = 0; counter < data.length; counter++) {
 322:             double diff = data[counter].doubleValue() - avg;
 323:             sum = sum + diff * diff;
 324:         }
 325:         return Math.sqrt(sum / (data.length - 1));
 326:     }
 327: 
 328:     /**
 329:      * Fits a straight line to a set of (x, y) data, returning the slope and
 330:      * intercept.
 331:      *
 332:      * @param xData  the x-data (<code>null</code> not permitted).
 333:      * @param yData  the y-data (<code>null</code> not permitted).
 334:      *
 335:      * @return A double array with the intercept in [0] and the slope in [1].
 336:      */
 337:     public static double[] getLinearFit(Number[] xData, Number[] yData) {
 338: 
 339:         if (xData == null) { 
 340:             throw new IllegalArgumentException("Null 'xData' argument.");
 341:         }
 342:         if (yData == null) { 
 343:             throw new IllegalArgumentException("Null 'yData' argument.");
 344:         }
 345:         if (xData.length != yData.length) {
 346:             throw new IllegalArgumentException(
 347:                 "Statistics.getLinearFit(): array lengths must be equal.");
 348:         }
 349: 
 350:         double[] result = new double[2];
 351:         // slope
 352:         result[1] = getSlope(xData, yData);
 353:         // intercept
 354:         result[0] = calculateMean(yData) - result[1] * calculateMean(xData);
 355: 
 356:         return result;
 357: 
 358:     }
 359: 
 360:     /**
 361:      * Finds the slope of a regression line using least squares.
 362:      *
 363:      * @param xData  the x-values (<code>null</code> not permitted).
 364:      * @param yData  the y-values (<code>null</code> not permitted).
 365:      *
 366:      * @return The slope.
 367:      */
 368:     public static double getSlope(Number[] xData, Number[] yData) {
 369: 
 370:         if (xData == null) { 
 371:             throw new IllegalArgumentException("Null 'xData' argument.");
 372:         }
 373:         if (yData == null) { 
 374:             throw new IllegalArgumentException("Null 'yData' argument.");
 375:         }
 376:         if (xData.length != yData.length) {
 377:             throw new IllegalArgumentException("Array lengths must be equal.");
 378:         }
 379: 
 380:         // ********* stat function for linear slope ********
 381:         // y = a + bx
 382:         // a = ybar - b * xbar
 383:         //     sum(x * y) - (sum (x) * sum(y)) / n
 384:         // b = ------------------------------------
 385:         //     sum (x^2) - (sum(x)^2 / n
 386:         // *************************************************
 387: 
 388:         // sum of x, x^2, x * y, y
 389:         double sx = 0.0, sxx = 0.0, sxy = 0.0, sy = 0.0;
 390:         int counter;
 391:         for (counter = 0; counter < xData.length; counter++) {
 392:             sx = sx + xData[counter].doubleValue();
 393:             sxx = sxx + Math.pow(xData[counter].doubleValue(), 2);
 394:             sxy = sxy + yData[counter].doubleValue() 
 395:                       * xData[counter].doubleValue();
 396:             sy = sy + yData[counter].doubleValue();
 397:         }
 398:         return (sxy - (sx * sy) / counter) / (sxx - (sx * sx) / counter);
 399: 
 400:     }
 401: 
 402:     /**
 403:      * Calculates the correlation between two datasets.  Both arrays should 
 404:      * contain the same number of items.  Null values are treated as zero.
 405:      * <P>
 406:      * Information about the correlation calculation was obtained from:
 407:      * 
 408:      * http://trochim.human.cornell.edu/kb/statcorr.htm
 409:      * 
 410:      * @param data1  the first dataset.
 411:      * @param data2  the second dataset.
 412:      * 
 413:      * @return The correlation.
 414:      */
 415:     public static double getCorrelation(Number[] data1, Number[] data2) {
 416:         if (data1 == null) {
 417:             throw new IllegalArgumentException("Null 'data1' argument.");
 418:         }
 419:         if (data2 == null) {
 420:             throw new IllegalArgumentException("Null 'data2' argument.");
 421:         }
 422:         if (data1.length != data2.length) {
 423:             throw new IllegalArgumentException(
 424:                 "'data1' and 'data2' arrays must have same length."
 425:             );   
 426:         }
 427:         int n = data1.length;
 428:         double sumX = 0.0;
 429:         double sumY = 0.0;
 430:         double sumX2 = 0.0;
 431:         double sumY2 = 0.0;
 432:         double sumXY = 0.0;
 433:         for (int i = 0; i < n; i++) {
 434:             double x = 0.0;
 435:             if (data1[i] != null) {
 436:                 x = data1[i].doubleValue();   
 437:             }
 438:             double y = 0.0;
 439:             if (data2[i] != null) {
 440:                 y = data2[i].doubleValue();   
 441:             }
 442:             sumX = sumX + x;
 443:             sumY = sumY + y;
 444:             sumXY = sumXY + (x * y);
 445:             sumX2 = sumX2 + (x * x);
 446:             sumY2 = sumY2 + (y * y);
 447:         }
 448:         return (n * sumXY - sumX * sumY) / Math.pow((n * sumX2 - sumX * sumX) 
 449:                 * (n * sumY2 - sumY * sumY), 0.5);      
 450:     }
 451: 
 452:     /**
 453:      * Returns a data set for a moving average on the data set passed in.
 454:      *
 455:      * @param xData  an array of the x data.
 456:      * @param yData  an array of the y data.
 457:      * @param period  the number of data points to average
 458:      *
 459:      * @return A double[][] the length of the data set in the first dimension,
 460:      *         with two doubles for x and y in the second dimension
 461:      */
 462:     public static double[][] getMovingAverage(Number[] xData, 
 463:                                               Number[] yData, 
 464:                                               int period) {
 465: 
 466:         // check arguments...
 467:         if (xData.length != yData.length) {
 468:             throw new IllegalArgumentException("Array lengths must be equal.");
 469:         }
 470: 
 471:         if (period > xData.length) {
 472:             throw new IllegalArgumentException(
 473:                 "Period can't be longer than dataset."
 474:             );
 475:         }
 476: 
 477:         double[][] result = new double[xData.length - period][2];
 478:         for (int i = 0; i < result.length; i++) {
 479:             result[i][0] = xData[i + period].doubleValue();
 480:             // holds the moving average sum
 481:             double sum = 0.0;
 482:             for (int j = 0; j < period; j++) {
 483:                 sum += yData[i + j].doubleValue();
 484:             }
 485:             sum = sum / period;
 486:             result[i][1] = sum;
 487:         }
 488:         return result;
 489: 
 490:     }
 491: 
 492: }