Bugzilla 54356 - Support of statistical function SLOPE

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1435633 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Yegor Kozlov 2013-01-19 18:33:34 +00:00
parent eae929673f
commit 3680179e47
5 changed files with 431 additions and 182 deletions

View File

@ -28,7 +28,7 @@ import java.util.TreeSet;
/**
* @author Amol S. Deshmukh < amolweb at ya hoo dot com >
* @author Johan Karlsteen - added Intercept
* @author Johan Karlsteen - added Intercept and Slope
*/
public final class FunctionEval {
/**
@ -210,6 +210,7 @@ public final class FunctionEval {
retval[305] = new Sumx2py2();
retval[311] = new Intercept();
retval[315] = new Slope();
retval[318] = AggregateFunction.DEVSQ;

View File

@ -19,13 +19,8 @@
package org.apache.poi.ss.formula.functions;
import org.apache.poi.ss.formula.TwoDEval;
import org.apache.poi.ss.formula.eval.ErrorEval;
import org.apache.poi.ss.formula.eval.EvaluationException;
import org.apache.poi.ss.formula.eval.NumberEval;
import org.apache.poi.ss.formula.eval.RefEval;
import org.apache.poi.ss.formula.eval.ValueEval;
import org.apache.poi.ss.formula.functions.LookupUtils.ValueVector;
import org.apache.poi.ss.formula.functions.LinearRegressionFunction.FUNCTION;
/**
* Implementation of Excel function INTERCEPT()<p/>
@ -40,184 +35,15 @@ import org.apache.poi.ss.formula.functions.LookupUtils.ValueVector;
*/
public final class Intercept extends Fixed2ArgFunction {
private static abstract class ValueArray implements ValueVector {
private final int _size;
protected ValueArray(int size) {
_size = size;
private final LinearRegressionFunction func;
public Intercept() {
func = new LinearRegressionFunction(FUNCTION.INTERCEPT);
}
public ValueEval getItem(int index) {
if (index < 0 || index > _size) {
throw new IllegalArgumentException("Specified index " + index
+ " is outside range (0.." + (_size - 1) + ")");
}
return getItemInternal(index);
}
protected abstract ValueEval getItemInternal(int index);
public final int getSize() {
return _size;
}
}
private static final class SingleCellValueArray extends ValueArray {
private final ValueEval _value;
public SingleCellValueArray(ValueEval value) {
super(1);
_value = value;
}
@Override
protected ValueEval getItemInternal(int index) {
return _value;
}
}
private static final class RefValueArray extends ValueArray {
private final RefEval _ref;
public RefValueArray(RefEval ref) {
super(1);
_ref = ref;
}
@Override
protected ValueEval getItemInternal(int index) {
return _ref.getInnerValueEval();
}
}
private static final class AreaValueArray extends ValueArray {
private final TwoDEval _ae;
private final int _width;
public AreaValueArray(TwoDEval ae) {
super(ae.getWidth() * ae.getHeight());
_ae = ae;
_width = ae.getWidth();
}
@Override
protected ValueEval getItemInternal(int index) {
int rowIx = index / _width;
int colIx = index % _width;
return _ae.getValue(rowIx, colIx);
}
}
public ValueEval evaluate(int srcRowIndex, int srcColumnIndex,
ValueEval arg0, ValueEval arg1) {
double result;
try {
ValueVector vvX = createValueVector(arg0);
ValueVector vvY = createValueVector(arg1);
int size = vvX.getSize();
if (size == 0 || vvY.getSize() != size) {
return ErrorEval.NA;
}
result = evaluateInternal(vvX, vvY, size);
} catch (EvaluationException e) {
return e.getErrorEval();
}
if (Double.isNaN(result) || Double.isInfinite(result)) {
return ErrorEval.NUM_ERROR;
}
return new NumberEval(result);
}
private double evaluateInternal(ValueVector x, ValueVector y, int size)
throws EvaluationException {
// error handling is as if the x is fully evaluated before y
ErrorEval firstXerr = null;
ErrorEval firstYerr = null;
boolean accumlatedSome = false;
double result = 0.0;
// first pass: read in data, compute xbar and ybar
double sumx = 0.0, sumy = 0.0;
for (int i = 0; i < size; i++) {
ValueEval vx = x.getItem(i);
ValueEval vy = y.getItem(i);
if (vx instanceof ErrorEval) {
if (firstXerr == null) {
firstXerr = (ErrorEval) vx;
continue;
}
}
if (vy instanceof ErrorEval) {
if (firstYerr == null) {
firstYerr = (ErrorEval) vy;
continue;
}
}
// only count pairs if both elements are numbers
if (vx instanceof NumberEval && vy instanceof NumberEval) {
accumlatedSome = true;
NumberEval nx = (NumberEval) vx;
NumberEval ny = (NumberEval) vy;
sumx += nx.getNumberValue();
sumy += ny.getNumberValue();
} else {
// all other combinations of value types are silently ignored
}
}
double xbar = sumx / size;
double ybar = sumy / size;
// second pass: compute summary statistics
double xxbar = 0.0, xybar = 0.0;
for (int i = 0; i < size; i++) {
ValueEval vx = x.getItem(i);
ValueEval vy = y.getItem(i);
if (vx instanceof ErrorEval) {
if (firstXerr == null) {
firstXerr = (ErrorEval) vx;
continue;
}
}
if (vy instanceof ErrorEval) {
if (firstYerr == null) {
firstYerr = (ErrorEval) vy;
continue;
return func.evaluate(srcRowIndex, srcColumnIndex, arg0, arg1);
}
}
// only count pairs if both elements are numbers
if (vx instanceof NumberEval && vy instanceof NumberEval) {
NumberEval nx = (NumberEval) vx;
NumberEval ny = (NumberEval) vy;
xxbar += (nx.getNumberValue() - xbar) * (nx.getNumberValue() - xbar);
xybar += (nx.getNumberValue() - xbar) * (ny.getNumberValue() - ybar);
} else {
// all other combinations of value types are silently ignored
}
}
double beta1 = xybar / xxbar;
double beta0 = ybar - beta1 * xbar;
if (firstXerr != null) {
throw new EvaluationException(firstXerr);
}
if (firstYerr != null) {
throw new EvaluationException(firstYerr);
}
if (!accumlatedSome) {
throw new EvaluationException(ErrorEval.DIV_ZERO);
}
result = beta0;
return result;
}
private static ValueVector createValueVector(ValueEval arg) throws EvaluationException {
if (arg instanceof ErrorEval) {
throw new EvaluationException((ErrorEval) arg);
}
if (arg instanceof TwoDEval) {
return new AreaValueArray((TwoDEval) arg);
}
if (arg instanceof RefEval) {
return new RefValueArray((RefEval) arg);
}
return new SingleCellValueArray(arg);
}
}

View File

@ -0,0 +1,236 @@
/*
* ====================================================================
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* ====================================================================
*/
package org.apache.poi.ss.formula.functions;
import org.apache.poi.ss.formula.TwoDEval;
import org.apache.poi.ss.formula.eval.ErrorEval;
import org.apache.poi.ss.formula.eval.EvaluationException;
import org.apache.poi.ss.formula.eval.NumberEval;
import org.apache.poi.ss.formula.eval.RefEval;
import org.apache.poi.ss.formula.eval.ValueEval;
import org.apache.poi.ss.formula.functions.LookupUtils.ValueVector;
/**
* Base class for linear regression functions.
*
* Calculates the linear regression line that is used to predict y values from x values<br/>
* (http://introcs.cs.princeton.edu/java/97data/LinearRegression.java.html)
* <b>Syntax</b>:<br/>
* <b>INTERCEPT</b>(<b>arrayX</b>, <b>arrayY</b>)<p/>
* or
* <b>SLOPE</b>(<b>arrayX</b>, <b>arrayY</b>)<p/>
*
*
* @author Johan Karlsteen
*/
public final class LinearRegressionFunction extends Fixed2ArgFunction {
private static abstract class ValueArray implements ValueVector {
private final int _size;
protected ValueArray(int size) {
_size = size;
}
@Override
public ValueEval getItem(int index) {
if (index < 0 || index > _size) {
throw new IllegalArgumentException("Specified index " + index
+ " is outside range (0.." + (_size - 1) + ")");
}
return getItemInternal(index);
}
protected abstract ValueEval getItemInternal(int index);
@Override
public final int getSize() {
return _size;
}
}
private static final class SingleCellValueArray extends ValueArray {
private final ValueEval _value;
public SingleCellValueArray(ValueEval value) {
super(1);
_value = value;
}
@Override
protected ValueEval getItemInternal(int index) {
return _value;
}
}
private static final class RefValueArray extends ValueArray {
private final RefEval _ref;
public RefValueArray(RefEval ref) {
super(1);
_ref = ref;
}
@Override
protected ValueEval getItemInternal(int index) {
return _ref.getInnerValueEval();
}
}
private static final class AreaValueArray extends ValueArray {
private final TwoDEval _ae;
private final int _width;
public AreaValueArray(TwoDEval ae) {
super(ae.getWidth() * ae.getHeight());
_ae = ae;
_width = ae.getWidth();
}
@Override
protected ValueEval getItemInternal(int index) {
int rowIx = index / _width;
int colIx = index % _width;
return _ae.getValue(rowIx, colIx);
}
}
public enum FUNCTION {INTERCEPT, SLOPE};
public FUNCTION function;
public LinearRegressionFunction(FUNCTION function) {
this.function = function;
}
@Override
public ValueEval evaluate(int srcRowIndex, int srcColumnIndex,
ValueEval arg0, ValueEval arg1) {
double result;
try {
ValueVector vvX = createValueVector(arg0);
ValueVector vvY = createValueVector(arg1);
int size = vvX.getSize();
if (size == 0 || vvY.getSize() != size) {
return ErrorEval.NA;
}
result = evaluateInternal(vvX, vvY, size);
} catch (EvaluationException e) {
return e.getErrorEval();
}
if (Double.isNaN(result) || Double.isInfinite(result)) {
return ErrorEval.NUM_ERROR;
}
return new NumberEval(result);
}
private double evaluateInternal(ValueVector x, ValueVector y, int size)
throws EvaluationException {
// error handling is as if the x is fully evaluated before y
ErrorEval firstXerr = null;
ErrorEval firstYerr = null;
boolean accumlatedSome = false;
double result = 0.0;
// first pass: read in data, compute xbar and ybar
double sumx = 0.0, sumy = 0.0;
for (int i = 0; i < size; i++) {
ValueEval vx = x.getItem(i);
ValueEval vy = y.getItem(i);
if (vx instanceof ErrorEval) {
if (firstXerr == null) {
firstXerr = (ErrorEval) vx;
continue;
}
}
if (vy instanceof ErrorEval) {
if (firstYerr == null) {
firstYerr = (ErrorEval) vy;
continue;
}
}
// only count pairs if both elements are numbers
if (vx instanceof NumberEval && vy instanceof NumberEval) {
accumlatedSome = true;
NumberEval nx = (NumberEval) vx;
NumberEval ny = (NumberEval) vy;
sumx += nx.getNumberValue();
sumy += ny.getNumberValue();
} else {
// all other combinations of value types are silently ignored
}
}
double xbar = sumx / size;
double ybar = sumy / size;
// second pass: compute summary statistics
double xxbar = 0.0, xybar = 0.0;
for (int i = 0; i < size; i++) {
ValueEval vx = x.getItem(i);
ValueEval vy = y.getItem(i);
if (vx instanceof ErrorEval) {
if (firstXerr == null) {
firstXerr = (ErrorEval) vx;
continue;
}
}
if (vy instanceof ErrorEval) {
if (firstYerr == null) {
firstYerr = (ErrorEval) vy;
continue;
}
}
// only count pairs if both elements are numbers
if (vx instanceof NumberEval && vy instanceof NumberEval) {
NumberEval nx = (NumberEval) vx;
NumberEval ny = (NumberEval) vy;
xxbar += (nx.getNumberValue() - xbar) * (nx.getNumberValue() - xbar);
xybar += (nx.getNumberValue() - xbar) * (ny.getNumberValue() - ybar);
} else {
// all other combinations of value types are silently ignored
}
}
double beta1 = xybar / xxbar;
double beta0 = ybar - beta1 * xbar;
if (firstXerr != null) {
throw new EvaluationException(firstXerr);
}
if (firstYerr != null) {
throw new EvaluationException(firstYerr);
}
if (!accumlatedSome) {
throw new EvaluationException(ErrorEval.DIV_ZERO);
}
if(function == FUNCTION.INTERCEPT) {
return beta0;
} else {
return beta1;
}
}
private static ValueVector createValueVector(ValueEval arg) throws EvaluationException {
if (arg instanceof ErrorEval) {
throw new EvaluationException((ErrorEval) arg);
}
if (arg instanceof TwoDEval) {
return new AreaValueArray((TwoDEval) arg);
}
if (arg instanceof RefEval) {
return new RefValueArray((RefEval) arg);
}
return new SingleCellValueArray(arg);
}
}

View File

@ -0,0 +1,49 @@
/*
* ====================================================================
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* ====================================================================
*/
package org.apache.poi.ss.formula.functions;
import org.apache.poi.ss.formula.eval.ValueEval;
import org.apache.poi.ss.formula.functions.LinearRegressionFunction.FUNCTION;
/**
* Implementation of Excel function SLOPE()<p/>
*
* Calculates the SLOPE of the linear regression line that is used to predict y values from x values<br/>
* (http://introcs.cs.princeton.edu/java/97data/LinearRegression.java.html)
* <b>Syntax</b>:<br/>
* <b>SLOPE</b>(<b>arrayX</b>, <b>arrayY</b>)<p/>
*
*
* @author Johan Karlsteen
*/
public final class Slope extends Fixed2ArgFunction {
private final LinearRegressionFunction func;
public Slope() {
func = new LinearRegressionFunction(FUNCTION.SLOPE);
}
@Override
public ValueEval evaluate(int srcRowIndex, int srcColumnIndex,
ValueEval arg0, ValueEval arg1) {
return func.evaluate(srcRowIndex, srcColumnIndex, arg0, arg1);
}
}

View File

@ -0,0 +1,137 @@
/*
* ====================================================================
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* ====================================================================
*/
package org.apache.poi.ss.formula.functions;
import junit.framework.TestCase;
import org.apache.poi.ss.formula.eval.ErrorEval;
import org.apache.poi.ss.formula.eval.NumberEval;
import org.apache.poi.ss.formula.eval.ValueEval;
/**
* Test for Excel function SLOPE()
*
* @author Johan Karlsteen
*/
public final class TestSlope extends TestCase {
private static final Function SLOPE = new Slope();
private static ValueEval invoke(Function function, ValueEval xArray, ValueEval yArray) {
ValueEval[] args = new ValueEval[] { xArray, yArray, };
return function.evaluate(args, -1, (short)-1);
}
private void confirm(Function function, ValueEval xArray, ValueEval yArray, double expected) {
ValueEval result = invoke(function, xArray, yArray);
assertEquals(NumberEval.class, result.getClass());
assertEquals(expected, ((NumberEval)result).getNumberValue(), 0);
}
private void confirmError(Function function, ValueEval xArray, ValueEval yArray, ErrorEval expectedError) {
ValueEval result = invoke(function, xArray, yArray);
assertEquals(ErrorEval.class, result.getClass());
assertEquals(expectedError.getErrorCode(), ((ErrorEval)result).getErrorCode());
}
private void confirmError(ValueEval xArray, ValueEval yArray, ErrorEval expectedError) {
confirmError(SLOPE, xArray, yArray, expectedError);
}
public void testBasic() {
Double exp = Math.pow(10, 7.5);
ValueEval[] xValues = {
new NumberEval(3+exp),
new NumberEval(4+exp),
new NumberEval(2+exp),
new NumberEval(5+exp),
new NumberEval(4+exp),
new NumberEval(7+exp),
};
ValueEval areaEvalX = createAreaEval(xValues);
ValueEval[] yValues = {
new NumberEval(1),
new NumberEval(2),
new NumberEval(3),
new NumberEval(4),
new NumberEval(5),
new NumberEval(6),
};
ValueEval areaEvalY = createAreaEval(yValues);
confirm(SLOPE, areaEvalX, areaEvalY, 0.7752808988764045);
// Excel 2010 gives 0.775280898876405
}
/**
* number of items in array is not limited to 30
*/
public void testLargeArrays() {
ValueEval[] xValues = createMockNumberArray(100, 3); // [1,2,0,1,2,0,...,0,1]
xValues[0] = new NumberEval(2.0); // Changes first element to 2
ValueEval[] yValues = createMockNumberArray(100, 101); // [1,2,3,4,...,99,100]
confirm(SLOPE, createAreaEval(xValues), createAreaEval(yValues), -1.231527093596059);
// Excel 2010 gives -1.23152709359606
}
private ValueEval[] createMockNumberArray(int size, double value) {
ValueEval[] result = new ValueEval[size];
for (int i = 0; i < result.length; i++) {
result[i] = new NumberEval((i+1)%value);
}
return result;
}
private static ValueEval createAreaEval(ValueEval[] values) {
String refStr = "A1:A" + values.length;
return EvalFactory.createAreaEval(refStr, values);
}
public void testErrors() {
ValueEval[] xValues = {
ErrorEval.REF_INVALID,
new NumberEval(2),
};
ValueEval areaEvalX = createAreaEval(xValues);
ValueEval[] yValues = {
new NumberEval(2),
ErrorEval.NULL_INTERSECTION,
};
ValueEval areaEvalY = createAreaEval(yValues);
ValueEval[] zValues = { // wrong size
new NumberEval(2),
};
ValueEval areaEvalZ = createAreaEval(zValues);
// if either arg is an error, that error propagates
confirmError(ErrorEval.REF_INVALID, ErrorEval.NAME_INVALID, ErrorEval.REF_INVALID);
confirmError(areaEvalX, ErrorEval.NAME_INVALID, ErrorEval.NAME_INVALID);
confirmError(ErrorEval.NAME_INVALID, areaEvalX, ErrorEval.NAME_INVALID);
// array sizes must match
confirmError(areaEvalX, areaEvalZ, ErrorEval.NA);
confirmError(areaEvalZ, areaEvalY, ErrorEval.NA);
// any error in an array item propagates up
confirmError(areaEvalX, areaEvalX, ErrorEval.REF_INVALID);
// search for errors array by array, not pair by pair
confirmError(areaEvalX, areaEvalY, ErrorEval.REF_INVALID);
confirmError(areaEvalY, areaEvalX, ErrorEval.NULL_INTERSECTION);
}
}