From 3680179e479ca2e24cc6b9aa4750aca4a62c680e Mon Sep 17 00:00:00 2001 From: Yegor Kozlov Date: Sat, 19 Jan 2013 18:33:34 +0000 Subject: [PATCH] Bugzilla 54356 - Support of statistical function SLOPE git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1435633 13f79535-47bb-0310-9956-ffa450edef68 --- .../poi/ss/formula/eval/FunctionEval.java | 3 +- .../poi/ss/formula/functions/Intercept.java | 188 +------------- .../functions/LinearRegressionFunction.java | 236 ++++++++++++++++++ .../poi/ss/formula/functions/Slope.java | 49 ++++ .../poi/ss/formula/functions/TestSlope.java | 137 ++++++++++ 5 files changed, 431 insertions(+), 182 deletions(-) create mode 100644 src/java/org/apache/poi/ss/formula/functions/LinearRegressionFunction.java create mode 100644 src/java/org/apache/poi/ss/formula/functions/Slope.java create mode 100644 src/testcases/org/apache/poi/ss/formula/functions/TestSlope.java diff --git a/src/java/org/apache/poi/ss/formula/eval/FunctionEval.java b/src/java/org/apache/poi/ss/formula/eval/FunctionEval.java index 021d6980f..733b14b26 100644 --- a/src/java/org/apache/poi/ss/formula/eval/FunctionEval.java +++ b/src/java/org/apache/poi/ss/formula/eval/FunctionEval.java @@ -28,7 +28,7 @@ import java.util.TreeSet; /** * @author Amol S. Deshmukh < amolweb at ya hoo dot com > - * @author Johan Karlsteen - added Intercept + * @author Johan Karlsteen - added Intercept and Slope */ public final class FunctionEval { /** @@ -210,6 +210,7 @@ public final class FunctionEval { retval[305] = new Sumx2py2(); retval[311] = new Intercept(); + retval[315] = new Slope(); retval[318] = AggregateFunction.DEVSQ; diff --git a/src/java/org/apache/poi/ss/formula/functions/Intercept.java b/src/java/org/apache/poi/ss/formula/functions/Intercept.java index 06bb6f97d..cf76cc588 100644 --- a/src/java/org/apache/poi/ss/formula/functions/Intercept.java +++ b/src/java/org/apache/poi/ss/formula/functions/Intercept.java @@ -19,13 +19,8 @@ package org.apache.poi.ss.formula.functions; -import org.apache.poi.ss.formula.TwoDEval; -import org.apache.poi.ss.formula.eval.ErrorEval; -import org.apache.poi.ss.formula.eval.EvaluationException; -import org.apache.poi.ss.formula.eval.NumberEval; -import org.apache.poi.ss.formula.eval.RefEval; import org.apache.poi.ss.formula.eval.ValueEval; -import org.apache.poi.ss.formula.functions.LookupUtils.ValueVector; +import org.apache.poi.ss.formula.functions.LinearRegressionFunction.FUNCTION; /** * Implementation of Excel function INTERCEPT()

@@ -40,184 +35,15 @@ import org.apache.poi.ss.formula.functions.LookupUtils.ValueVector; */ public final class Intercept extends Fixed2ArgFunction { - private static abstract class ValueArray implements ValueVector { - private final int _size; - protected ValueArray(int size) { - _size = size; - } - - public ValueEval getItem(int index) { - if (index < 0 || index > _size) { - throw new IllegalArgumentException("Specified index " + index - + " is outside range (0.." + (_size - 1) + ")"); - } - return getItemInternal(index); - } - protected abstract ValueEval getItemInternal(int index); - - public final int getSize() { - return _size; - } + private final LinearRegressionFunction func; + public Intercept() { + func = new LinearRegressionFunction(FUNCTION.INTERCEPT); } - - private static final class SingleCellValueArray extends ValueArray { - private final ValueEval _value; - public SingleCellValueArray(ValueEval value) { - super(1); - _value = value; - } - @Override - protected ValueEval getItemInternal(int index) { - return _value; - } - } - - private static final class RefValueArray extends ValueArray { - private final RefEval _ref; - public RefValueArray(RefEval ref) { - super(1); - _ref = ref; - } - @Override - protected ValueEval getItemInternal(int index) { - return _ref.getInnerValueEval(); - } - } - - private static final class AreaValueArray extends ValueArray { - private final TwoDEval _ae; - private final int _width; - - public AreaValueArray(TwoDEval ae) { - super(ae.getWidth() * ae.getHeight()); - _ae = ae; - _width = ae.getWidth(); - } - @Override - protected ValueEval getItemInternal(int index) { - int rowIx = index / _width; - int colIx = index % _width; - return _ae.getValue(rowIx, colIx); - } - } - + @Override public ValueEval evaluate(int srcRowIndex, int srcColumnIndex, ValueEval arg0, ValueEval arg1) { - double result; - try { - ValueVector vvX = createValueVector(arg0); - ValueVector vvY = createValueVector(arg1); - int size = vvX.getSize(); - if (size == 0 || vvY.getSize() != size) { - return ErrorEval.NA; - } - result = evaluateInternal(vvX, vvY, size); - } catch (EvaluationException e) { - return e.getErrorEval(); - } - if (Double.isNaN(result) || Double.isInfinite(result)) { - return ErrorEval.NUM_ERROR; - } - return new NumberEval(result); + return func.evaluate(srcRowIndex, srcColumnIndex, arg0, arg1); } - - private double evaluateInternal(ValueVector x, ValueVector y, int size) - throws EvaluationException { +} - // error handling is as if the x is fully evaluated before y - ErrorEval firstXerr = null; - ErrorEval firstYerr = null; - boolean accumlatedSome = false; - double result = 0.0; - // first pass: read in data, compute xbar and ybar - double sumx = 0.0, sumy = 0.0; - - for (int i = 0; i < size; i++) { - ValueEval vx = x.getItem(i); - ValueEval vy = y.getItem(i); - if (vx instanceof ErrorEval) { - if (firstXerr == null) { - firstXerr = (ErrorEval) vx; - continue; - } - } - if (vy instanceof ErrorEval) { - if (firstYerr == null) { - firstYerr = (ErrorEval) vy; - continue; - } - } - // only count pairs if both elements are numbers - if (vx instanceof NumberEval && vy instanceof NumberEval) { - accumlatedSome = true; - NumberEval nx = (NumberEval) vx; - NumberEval ny = (NumberEval) vy; - sumx += nx.getNumberValue(); - sumy += ny.getNumberValue(); - } else { - // all other combinations of value types are silently ignored - } - } - double xbar = sumx / size; - double ybar = sumy / size; - - // second pass: compute summary statistics - double xxbar = 0.0, xybar = 0.0; - for (int i = 0; i < size; i++) { - ValueEval vx = x.getItem(i); - ValueEval vy = y.getItem(i); - - if (vx instanceof ErrorEval) { - if (firstXerr == null) { - firstXerr = (ErrorEval) vx; - continue; - } - } - if (vy instanceof ErrorEval) { - if (firstYerr == null) { - firstYerr = (ErrorEval) vy; - continue; - } - } - - // only count pairs if both elements are numbers - if (vx instanceof NumberEval && vy instanceof NumberEval) { - NumberEval nx = (NumberEval) vx; - NumberEval ny = (NumberEval) vy; - xxbar += (nx.getNumberValue() - xbar) * (nx.getNumberValue() - xbar); - xybar += (nx.getNumberValue() - xbar) * (ny.getNumberValue() - ybar); - } else { - // all other combinations of value types are silently ignored - } - } - double beta1 = xybar / xxbar; - double beta0 = ybar - beta1 * xbar; - - if (firstXerr != null) { - throw new EvaluationException(firstXerr); - } - if (firstYerr != null) { - throw new EvaluationException(firstYerr); - } - if (!accumlatedSome) { - throw new EvaluationException(ErrorEval.DIV_ZERO); - } - - result = beta0; - return result; - } - - private static ValueVector createValueVector(ValueEval arg) throws EvaluationException { - if (arg instanceof ErrorEval) { - throw new EvaluationException((ErrorEval) arg); - } - if (arg instanceof TwoDEval) { - return new AreaValueArray((TwoDEval) arg); - } - if (arg instanceof RefEval) { - return new RefValueArray((RefEval) arg); - } - return new SingleCellValueArray(arg); - } -} \ No newline at end of file diff --git a/src/java/org/apache/poi/ss/formula/functions/LinearRegressionFunction.java b/src/java/org/apache/poi/ss/formula/functions/LinearRegressionFunction.java new file mode 100644 index 000000000..740fd0507 --- /dev/null +++ b/src/java/org/apache/poi/ss/formula/functions/LinearRegressionFunction.java @@ -0,0 +1,236 @@ +/* + * ==================================================================== + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ==================================================================== + */ + +package org.apache.poi.ss.formula.functions; + +import org.apache.poi.ss.formula.TwoDEval; +import org.apache.poi.ss.formula.eval.ErrorEval; +import org.apache.poi.ss.formula.eval.EvaluationException; +import org.apache.poi.ss.formula.eval.NumberEval; +import org.apache.poi.ss.formula.eval.RefEval; +import org.apache.poi.ss.formula.eval.ValueEval; +import org.apache.poi.ss.formula.functions.LookupUtils.ValueVector; + +/** + * Base class for linear regression functions. + * + * Calculates the linear regression line that is used to predict y values from x values
+ * (http://introcs.cs.princeton.edu/java/97data/LinearRegression.java.html) + * Syntax:
+ * INTERCEPT(arrayX, arrayY)

+ * or + * SLOPE(arrayX, arrayY)

+ * + * + * @author Johan Karlsteen + */ +public final class LinearRegressionFunction extends Fixed2ArgFunction { + + private static abstract class ValueArray implements ValueVector { + private final int _size; + protected ValueArray(int size) { + _size = size; + } + @Override + public ValueEval getItem(int index) { + if (index < 0 || index > _size) { + throw new IllegalArgumentException("Specified index " + index + + " is outside range (0.." + (_size - 1) + ")"); + } + return getItemInternal(index); + } + protected abstract ValueEval getItemInternal(int index); + @Override + public final int getSize() { + return _size; + } + } + + private static final class SingleCellValueArray extends ValueArray { + private final ValueEval _value; + public SingleCellValueArray(ValueEval value) { + super(1); + _value = value; + } + @Override + protected ValueEval getItemInternal(int index) { + return _value; + } + } + + private static final class RefValueArray extends ValueArray { + private final RefEval _ref; + public RefValueArray(RefEval ref) { + super(1); + _ref = ref; + } + @Override + protected ValueEval getItemInternal(int index) { + return _ref.getInnerValueEval(); + } + } + + private static final class AreaValueArray extends ValueArray { + private final TwoDEval _ae; + private final int _width; + + public AreaValueArray(TwoDEval ae) { + super(ae.getWidth() * ae.getHeight()); + _ae = ae; + _width = ae.getWidth(); + } + @Override + protected ValueEval getItemInternal(int index) { + int rowIx = index / _width; + int colIx = index % _width; + return _ae.getValue(rowIx, colIx); + } + } + + public enum FUNCTION {INTERCEPT, SLOPE}; + public FUNCTION function; + + public LinearRegressionFunction(FUNCTION function) { + this.function = function; + } + + @Override + public ValueEval evaluate(int srcRowIndex, int srcColumnIndex, + ValueEval arg0, ValueEval arg1) { + double result; + try { + ValueVector vvX = createValueVector(arg0); + ValueVector vvY = createValueVector(arg1); + int size = vvX.getSize(); + if (size == 0 || vvY.getSize() != size) { + return ErrorEval.NA; + } + result = evaluateInternal(vvX, vvY, size); + } catch (EvaluationException e) { + return e.getErrorEval(); + } + if (Double.isNaN(result) || Double.isInfinite(result)) { + return ErrorEval.NUM_ERROR; + } + return new NumberEval(result); + } + + private double evaluateInternal(ValueVector x, ValueVector y, int size) + throws EvaluationException { + + // error handling is as if the x is fully evaluated before y + ErrorEval firstXerr = null; + ErrorEval firstYerr = null; + boolean accumlatedSome = false; + double result = 0.0; + // first pass: read in data, compute xbar and ybar + double sumx = 0.0, sumy = 0.0; + + for (int i = 0; i < size; i++) { + ValueEval vx = x.getItem(i); + ValueEval vy = y.getItem(i); + if (vx instanceof ErrorEval) { + if (firstXerr == null) { + firstXerr = (ErrorEval) vx; + continue; + } + } + if (vy instanceof ErrorEval) { + if (firstYerr == null) { + firstYerr = (ErrorEval) vy; + continue; + } + } + // only count pairs if both elements are numbers + if (vx instanceof NumberEval && vy instanceof NumberEval) { + accumlatedSome = true; + NumberEval nx = (NumberEval) vx; + NumberEval ny = (NumberEval) vy; + sumx += nx.getNumberValue(); + sumy += ny.getNumberValue(); + } else { + // all other combinations of value types are silently ignored + } + } + double xbar = sumx / size; + double ybar = sumy / size; + + // second pass: compute summary statistics + double xxbar = 0.0, xybar = 0.0; + for (int i = 0; i < size; i++) { + ValueEval vx = x.getItem(i); + ValueEval vy = y.getItem(i); + + if (vx instanceof ErrorEval) { + if (firstXerr == null) { + firstXerr = (ErrorEval) vx; + continue; + } + } + if (vy instanceof ErrorEval) { + if (firstYerr == null) { + firstYerr = (ErrorEval) vy; + continue; + } + } + + // only count pairs if both elements are numbers + if (vx instanceof NumberEval && vy instanceof NumberEval) { + NumberEval nx = (NumberEval) vx; + NumberEval ny = (NumberEval) vy; + xxbar += (nx.getNumberValue() - xbar) * (nx.getNumberValue() - xbar); + xybar += (nx.getNumberValue() - xbar) * (ny.getNumberValue() - ybar); + } else { + // all other combinations of value types are silently ignored + } + } + double beta1 = xybar / xxbar; + double beta0 = ybar - beta1 * xbar; + + if (firstXerr != null) { + throw new EvaluationException(firstXerr); + } + if (firstYerr != null) { + throw new EvaluationException(firstYerr); + } + if (!accumlatedSome) { + throw new EvaluationException(ErrorEval.DIV_ZERO); + } + + if(function == FUNCTION.INTERCEPT) { + return beta0; + } else { + return beta1; + } + } + + private static ValueVector createValueVector(ValueEval arg) throws EvaluationException { + if (arg instanceof ErrorEval) { + throw new EvaluationException((ErrorEval) arg); + } + if (arg instanceof TwoDEval) { + return new AreaValueArray((TwoDEval) arg); + } + if (arg instanceof RefEval) { + return new RefValueArray((RefEval) arg); + } + return new SingleCellValueArray(arg); + } +} + diff --git a/src/java/org/apache/poi/ss/formula/functions/Slope.java b/src/java/org/apache/poi/ss/formula/functions/Slope.java new file mode 100644 index 000000000..ec7210214 --- /dev/null +++ b/src/java/org/apache/poi/ss/formula/functions/Slope.java @@ -0,0 +1,49 @@ +/* + * ==================================================================== + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ==================================================================== + */ + +package org.apache.poi.ss.formula.functions; + +import org.apache.poi.ss.formula.eval.ValueEval; +import org.apache.poi.ss.formula.functions.LinearRegressionFunction.FUNCTION; + +/** + * Implementation of Excel function SLOPE()

+ * + * Calculates the SLOPE of the linear regression line that is used to predict y values from x values
+ * (http://introcs.cs.princeton.edu/java/97data/LinearRegression.java.html) + * Syntax:
+ * SLOPE(arrayX, arrayY)

+ * + * + * @author Johan Karlsteen + */ +public final class Slope extends Fixed2ArgFunction { + + private final LinearRegressionFunction func; + public Slope() { + func = new LinearRegressionFunction(FUNCTION.SLOPE); + } + + @Override + public ValueEval evaluate(int srcRowIndex, int srcColumnIndex, + ValueEval arg0, ValueEval arg1) { + return func.evaluate(srcRowIndex, srcColumnIndex, arg0, arg1); + } +} + diff --git a/src/testcases/org/apache/poi/ss/formula/functions/TestSlope.java b/src/testcases/org/apache/poi/ss/formula/functions/TestSlope.java new file mode 100644 index 000000000..2ea0332ed --- /dev/null +++ b/src/testcases/org/apache/poi/ss/formula/functions/TestSlope.java @@ -0,0 +1,137 @@ +/* + * ==================================================================== + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ==================================================================== + */ + +package org.apache.poi.ss.formula.functions; + +import junit.framework.TestCase; + +import org.apache.poi.ss.formula.eval.ErrorEval; +import org.apache.poi.ss.formula.eval.NumberEval; +import org.apache.poi.ss.formula.eval.ValueEval; +/** + * Test for Excel function SLOPE() + * + * @author Johan Karlsteen + */ +public final class TestSlope extends TestCase { + private static final Function SLOPE = new Slope(); + + private static ValueEval invoke(Function function, ValueEval xArray, ValueEval yArray) { + ValueEval[] args = new ValueEval[] { xArray, yArray, }; + return function.evaluate(args, -1, (short)-1); + } + + private void confirm(Function function, ValueEval xArray, ValueEval yArray, double expected) { + ValueEval result = invoke(function, xArray, yArray); + assertEquals(NumberEval.class, result.getClass()); + assertEquals(expected, ((NumberEval)result).getNumberValue(), 0); + } + private void confirmError(Function function, ValueEval xArray, ValueEval yArray, ErrorEval expectedError) { + ValueEval result = invoke(function, xArray, yArray); + assertEquals(ErrorEval.class, result.getClass()); + assertEquals(expectedError.getErrorCode(), ((ErrorEval)result).getErrorCode()); + } + + private void confirmError(ValueEval xArray, ValueEval yArray, ErrorEval expectedError) { + confirmError(SLOPE, xArray, yArray, expectedError); + } + + public void testBasic() { + Double exp = Math.pow(10, 7.5); + ValueEval[] xValues = { + new NumberEval(3+exp), + new NumberEval(4+exp), + new NumberEval(2+exp), + new NumberEval(5+exp), + new NumberEval(4+exp), + new NumberEval(7+exp), + }; + ValueEval areaEvalX = createAreaEval(xValues); + + ValueEval[] yValues = { + new NumberEval(1), + new NumberEval(2), + new NumberEval(3), + new NumberEval(4), + new NumberEval(5), + new NumberEval(6), + }; + ValueEval areaEvalY = createAreaEval(yValues); + confirm(SLOPE, areaEvalX, areaEvalY, 0.7752808988764045); + // Excel 2010 gives 0.775280898876405 + } + + /** + * number of items in array is not limited to 30 + */ + public void testLargeArrays() { + ValueEval[] xValues = createMockNumberArray(100, 3); // [1,2,0,1,2,0,...,0,1] + xValues[0] = new NumberEval(2.0); // Changes first element to 2 + ValueEval[] yValues = createMockNumberArray(100, 101); // [1,2,3,4,...,99,100] + + confirm(SLOPE, createAreaEval(xValues), createAreaEval(yValues), -1.231527093596059); + // Excel 2010 gives -1.23152709359606 + } + + private ValueEval[] createMockNumberArray(int size, double value) { + ValueEval[] result = new ValueEval[size]; + for (int i = 0; i < result.length; i++) { + result[i] = new NumberEval((i+1)%value); + } + return result; + } + + private static ValueEval createAreaEval(ValueEval[] values) { + String refStr = "A1:A" + values.length; + return EvalFactory.createAreaEval(refStr, values); + } + + public void testErrors() { + ValueEval[] xValues = { + ErrorEval.REF_INVALID, + new NumberEval(2), + }; + ValueEval areaEvalX = createAreaEval(xValues); + ValueEval[] yValues = { + new NumberEval(2), + ErrorEval.NULL_INTERSECTION, + }; + ValueEval areaEvalY = createAreaEval(yValues); + ValueEval[] zValues = { // wrong size + new NumberEval(2), + }; + ValueEval areaEvalZ = createAreaEval(zValues); + + // if either arg is an error, that error propagates + confirmError(ErrorEval.REF_INVALID, ErrorEval.NAME_INVALID, ErrorEval.REF_INVALID); + confirmError(areaEvalX, ErrorEval.NAME_INVALID, ErrorEval.NAME_INVALID); + confirmError(ErrorEval.NAME_INVALID, areaEvalX, ErrorEval.NAME_INVALID); + + // array sizes must match + confirmError(areaEvalX, areaEvalZ, ErrorEval.NA); + confirmError(areaEvalZ, areaEvalY, ErrorEval.NA); + + // any error in an array item propagates up + confirmError(areaEvalX, areaEvalX, ErrorEval.REF_INVALID); + + // search for errors array by array, not pair by pair + confirmError(areaEvalX, areaEvalY, ErrorEval.REF_INVALID); + confirmError(areaEvalY, areaEvalX, ErrorEval.NULL_INTERSECTION); + } +}