Bugzilla 54356: Support of statistical function INTERCEPT
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1426485 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
876fea8596
commit
0023138765
@ -34,6 +34,7 @@
|
||||
|
||||
<changes>
|
||||
<release version="4.0-beta1" date="2013-??-??">
|
||||
<action dev="poi-developers" type="add">54356 - Support for INTERCEPT function</action>
|
||||
<action dev="poi-developers" type="fix">54282 - Improve the performance of ColumnHelper addCleanColIntoCols, speeds up some .xlsx file loading</action>
|
||||
<action dev="poi-developers" type="fix">53650 - Prevent unreadable content and disalow to overwrite rows from input template in SXSSF</action>
|
||||
<action dev="poi-developers" type="fix">54228,53672 - Fixed XSSF to read cells with missing R attribute</action>
|
||||
|
@ -28,6 +28,7 @@ import java.util.TreeSet;
|
||||
|
||||
/**
|
||||
* @author Amol S. Deshmukh < amolweb at ya hoo dot com >
|
||||
* @author Johan Karlsteen - added Intercept
|
||||
*/
|
||||
public final class FunctionEval {
|
||||
/**
|
||||
@ -208,6 +209,8 @@ public final class FunctionEval {
|
||||
retval[304] = new Sumx2my2();
|
||||
retval[305] = new Sumx2py2();
|
||||
|
||||
retval[311] = new Intercept();
|
||||
|
||||
retval[318] = AggregateFunction.DEVSQ;
|
||||
|
||||
retval[321] = AggregateFunction.SUMSQ;
|
||||
|
223
src/java/org/apache/poi/ss/formula/functions/Intercept.java
Normal file
223
src/java/org/apache/poi/ss/formula/functions/Intercept.java
Normal file
@ -0,0 +1,223 @@
|
||||
/*
|
||||
* ====================================================================
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
* ====================================================================
|
||||
*/
|
||||
|
||||
package org.apache.poi.ss.formula.functions;
|
||||
|
||||
import org.apache.poi.ss.formula.TwoDEval;
|
||||
import org.apache.poi.ss.formula.eval.ErrorEval;
|
||||
import org.apache.poi.ss.formula.eval.EvaluationException;
|
||||
import org.apache.poi.ss.formula.eval.NumberEval;
|
||||
import org.apache.poi.ss.formula.eval.RefEval;
|
||||
import org.apache.poi.ss.formula.eval.ValueEval;
|
||||
import org.apache.poi.ss.formula.functions.LookupUtils.ValueVector;
|
||||
|
||||
/**
|
||||
* Implementation of Excel function INTERCEPT()<p/>
|
||||
*
|
||||
* Calculates the INTERCEPT of the linear regression line that is used to predict y values from x values<br/>
|
||||
* (http://introcs.cs.princeton.edu/java/97data/LinearRegression.java.html)
|
||||
* <b>Syntax</b>:<br/>
|
||||
* <b>INTERCEPT</b>(<b>arrayX</b>, <b>arrayY</b>)<p/>
|
||||
*
|
||||
*
|
||||
* @author Johan Karlsteen
|
||||
*/
|
||||
public final class Intercept extends Fixed2ArgFunction {
|
||||
|
||||
private static abstract class ValueArray implements ValueVector {
|
||||
private final int _size;
|
||||
protected ValueArray(int size) {
|
||||
_size = size;
|
||||
}
|
||||
|
||||
public ValueEval getItem(int index) {
|
||||
if (index < 0 || index > _size) {
|
||||
throw new IllegalArgumentException("Specified index " + index
|
||||
+ " is outside range (0.." + (_size - 1) + ")");
|
||||
}
|
||||
return getItemInternal(index);
|
||||
}
|
||||
protected abstract ValueEval getItemInternal(int index);
|
||||
|
||||
public final int getSize() {
|
||||
return _size;
|
||||
}
|
||||
}
|
||||
|
||||
private static final class SingleCellValueArray extends ValueArray {
|
||||
private final ValueEval _value;
|
||||
public SingleCellValueArray(ValueEval value) {
|
||||
super(1);
|
||||
_value = value;
|
||||
}
|
||||
@Override
|
||||
protected ValueEval getItemInternal(int index) {
|
||||
return _value;
|
||||
}
|
||||
}
|
||||
|
||||
private static final class RefValueArray extends ValueArray {
|
||||
private final RefEval _ref;
|
||||
public RefValueArray(RefEval ref) {
|
||||
super(1);
|
||||
_ref = ref;
|
||||
}
|
||||
@Override
|
||||
protected ValueEval getItemInternal(int index) {
|
||||
return _ref.getInnerValueEval();
|
||||
}
|
||||
}
|
||||
|
||||
private static final class AreaValueArray extends ValueArray {
|
||||
private final TwoDEval _ae;
|
||||
private final int _width;
|
||||
|
||||
public AreaValueArray(TwoDEval ae) {
|
||||
super(ae.getWidth() * ae.getHeight());
|
||||
_ae = ae;
|
||||
_width = ae.getWidth();
|
||||
}
|
||||
@Override
|
||||
protected ValueEval getItemInternal(int index) {
|
||||
int rowIx = index / _width;
|
||||
int colIx = index % _width;
|
||||
return _ae.getValue(rowIx, colIx);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public ValueEval evaluate(int srcRowIndex, int srcColumnIndex,
|
||||
ValueEval arg0, ValueEval arg1) {
|
||||
double result;
|
||||
try {
|
||||
ValueVector vvX = createValueVector(arg0);
|
||||
ValueVector vvY = createValueVector(arg1);
|
||||
int size = vvX.getSize();
|
||||
if (size == 0 || vvY.getSize() != size) {
|
||||
return ErrorEval.NA;
|
||||
}
|
||||
result = evaluateInternal(vvX, vvY, size);
|
||||
} catch (EvaluationException e) {
|
||||
return e.getErrorEval();
|
||||
}
|
||||
if (Double.isNaN(result) || Double.isInfinite(result)) {
|
||||
return ErrorEval.NUM_ERROR;
|
||||
}
|
||||
return new NumberEval(result);
|
||||
}
|
||||
|
||||
private double evaluateInternal(ValueVector x, ValueVector y, int size)
|
||||
throws EvaluationException {
|
||||
|
||||
// error handling is as if the x is fully evaluated before y
|
||||
ErrorEval firstXerr = null;
|
||||
ErrorEval firstYerr = null;
|
||||
boolean accumlatedSome = false;
|
||||
double result = 0.0;
|
||||
// first pass: read in data, compute xbar and ybar
|
||||
double sumx = 0.0, sumy = 0.0;
|
||||
|
||||
for (int i = 0; i < size; i++) {
|
||||
ValueEval vx = x.getItem(i);
|
||||
ValueEval vy = y.getItem(i);
|
||||
if (vx instanceof ErrorEval) {
|
||||
if (firstXerr == null) {
|
||||
firstXerr = (ErrorEval) vx;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (vy instanceof ErrorEval) {
|
||||
if (firstYerr == null) {
|
||||
firstYerr = (ErrorEval) vy;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
// only count pairs if both elements are numbers
|
||||
if (vx instanceof NumberEval && vy instanceof NumberEval) {
|
||||
accumlatedSome = true;
|
||||
NumberEval nx = (NumberEval) vx;
|
||||
NumberEval ny = (NumberEval) vy;
|
||||
sumx += nx.getNumberValue();
|
||||
sumy += ny.getNumberValue();
|
||||
} else {
|
||||
// all other combinations of value types are silently ignored
|
||||
}
|
||||
}
|
||||
double xbar = sumx / size;
|
||||
double ybar = sumy / size;
|
||||
|
||||
// second pass: compute summary statistics
|
||||
double xxbar = 0.0, xybar = 0.0;
|
||||
for (int i = 0; i < size; i++) {
|
||||
ValueEval vx = x.getItem(i);
|
||||
ValueEval vy = y.getItem(i);
|
||||
|
||||
if (vx instanceof ErrorEval) {
|
||||
if (firstXerr == null) {
|
||||
firstXerr = (ErrorEval) vx;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (vy instanceof ErrorEval) {
|
||||
if (firstYerr == null) {
|
||||
firstYerr = (ErrorEval) vy;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// only count pairs if both elements are numbers
|
||||
if (vx instanceof NumberEval && vy instanceof NumberEval) {
|
||||
NumberEval nx = (NumberEval) vx;
|
||||
NumberEval ny = (NumberEval) vy;
|
||||
xxbar += (nx.getNumberValue() - xbar) * (nx.getNumberValue() - xbar);
|
||||
xybar += (nx.getNumberValue() - xbar) * (ny.getNumberValue() - ybar);
|
||||
} else {
|
||||
// all other combinations of value types are silently ignored
|
||||
}
|
||||
}
|
||||
double beta1 = xybar / xxbar;
|
||||
double beta0 = ybar - beta1 * xbar;
|
||||
|
||||
if (firstXerr != null) {
|
||||
throw new EvaluationException(firstXerr);
|
||||
}
|
||||
if (firstYerr != null) {
|
||||
throw new EvaluationException(firstYerr);
|
||||
}
|
||||
if (!accumlatedSome) {
|
||||
throw new EvaluationException(ErrorEval.DIV_ZERO);
|
||||
}
|
||||
|
||||
result = beta0;
|
||||
return result;
|
||||
}
|
||||
|
||||
private static ValueVector createValueVector(ValueEval arg) throws EvaluationException {
|
||||
if (arg instanceof ErrorEval) {
|
||||
throw new EvaluationException((ErrorEval) arg);
|
||||
}
|
||||
if (arg instanceof TwoDEval) {
|
||||
return new AreaValueArray((TwoDEval) arg);
|
||||
}
|
||||
if (arg instanceof RefEval) {
|
||||
return new RefValueArray((RefEval) arg);
|
||||
}
|
||||
return new SingleCellValueArray(arg);
|
||||
}
|
||||
}
|
@ -0,0 +1,159 @@
|
||||
/*
|
||||
* ====================================================================
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
* ====================================================================
|
||||
*/
|
||||
|
||||
package org.apache.poi.ss.formula.functions;
|
||||
|
||||
import junit.framework.TestCase;
|
||||
|
||||
import org.apache.poi.hssf.HSSFTestDataSamples;
|
||||
import org.apache.poi.hssf.usermodel.HSSFCell;
|
||||
import org.apache.poi.hssf.usermodel.HSSFFormulaEvaluator;
|
||||
import org.apache.poi.hssf.usermodel.HSSFSheet;
|
||||
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
|
||||
import org.apache.poi.ss.formula.eval.ErrorEval;
|
||||
import org.apache.poi.ss.formula.eval.NumberEval;
|
||||
import org.apache.poi.ss.formula.eval.ValueEval;
|
||||
/**
|
||||
* Test for Excel function INTERCEPT()
|
||||
*
|
||||
* @author Johan Karlsteen
|
||||
*/
|
||||
public final class TestIntercept extends TestCase {
|
||||
private static final Function INTERCEPT = new Intercept();
|
||||
|
||||
private static ValueEval invoke(Function function, ValueEval xArray, ValueEval yArray) {
|
||||
ValueEval[] args = new ValueEval[] { xArray, yArray, };
|
||||
return function.evaluate(args, -1, (short)-1);
|
||||
}
|
||||
|
||||
private void confirm(Function function, ValueEval xArray, ValueEval yArray, double expected) {
|
||||
ValueEval result = invoke(function, xArray, yArray);
|
||||
assertEquals(NumberEval.class, result.getClass());
|
||||
assertEquals(expected, ((NumberEval)result).getNumberValue(), 0);
|
||||
}
|
||||
private void confirmError(Function function, ValueEval xArray, ValueEval yArray, ErrorEval expectedError) {
|
||||
ValueEval result = invoke(function, xArray, yArray);
|
||||
assertEquals(ErrorEval.class, result.getClass());
|
||||
assertEquals(expectedError.getErrorCode(), ((ErrorEval)result).getErrorCode());
|
||||
}
|
||||
|
||||
private void confirmError(ValueEval xArray, ValueEval yArray, ErrorEval expectedError) {
|
||||
confirmError(INTERCEPT, xArray, yArray, expectedError);
|
||||
}
|
||||
|
||||
public void testBasic() {
|
||||
Double exp = Math.pow(10, 7.5);
|
||||
ValueEval[] xValues = {
|
||||
new NumberEval(3+exp),
|
||||
new NumberEval(4+exp),
|
||||
new NumberEval(2+exp),
|
||||
new NumberEval(5+exp),
|
||||
new NumberEval(4+exp),
|
||||
new NumberEval(7+exp),
|
||||
};
|
||||
ValueEval areaEvalX = createAreaEval(xValues);
|
||||
|
||||
ValueEval[] yValues = {
|
||||
new NumberEval(1),
|
||||
new NumberEval(2),
|
||||
new NumberEval(3),
|
||||
new NumberEval(4),
|
||||
new NumberEval(5),
|
||||
new NumberEval(6),
|
||||
};
|
||||
ValueEval areaEvalY = createAreaEval(yValues);
|
||||
confirm(INTERCEPT, areaEvalX, areaEvalY, -24516534.39905822);
|
||||
// Excel 2010 gives -24516534.3990583
|
||||
}
|
||||
|
||||
/**
|
||||
* number of items in array is not limited to 30
|
||||
*/
|
||||
public void testLargeArrays() {
|
||||
ValueEval[] xValues = createMockNumberArray(100, 3); // [1,2,0,1,2,0,...,0,1]
|
||||
xValues[0] = new NumberEval(2.0); // Changes first element to 2
|
||||
ValueEval[] yValues = createMockNumberArray(100, 101); // [1,2,3,4,...,99,100]
|
||||
|
||||
confirm(INTERCEPT, createAreaEval(xValues), createAreaEval(yValues), 51.74384236453202);
|
||||
// Excel 2010 gives 51.74384236453200
|
||||
}
|
||||
|
||||
private ValueEval[] createMockNumberArray(int size, double value) {
|
||||
ValueEval[] result = new ValueEval[size];
|
||||
for (int i = 0; i < result.length; i++) {
|
||||
result[i] = new NumberEval((i+1)%value);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
private static ValueEval createAreaEval(ValueEval[] values) {
|
||||
String refStr = "A1:A" + values.length;
|
||||
return EvalFactory.createAreaEval(refStr, values);
|
||||
}
|
||||
|
||||
public void testErrors() {
|
||||
ValueEval[] xValues = {
|
||||
ErrorEval.REF_INVALID,
|
||||
new NumberEval(2),
|
||||
};
|
||||
ValueEval areaEvalX = createAreaEval(xValues);
|
||||
ValueEval[] yValues = {
|
||||
new NumberEval(2),
|
||||
ErrorEval.NULL_INTERSECTION,
|
||||
};
|
||||
ValueEval areaEvalY = createAreaEval(yValues);
|
||||
ValueEval[] zValues = { // wrong size
|
||||
new NumberEval(2),
|
||||
};
|
||||
ValueEval areaEvalZ = createAreaEval(zValues);
|
||||
|
||||
// if either arg is an error, that error propagates
|
||||
confirmError(ErrorEval.REF_INVALID, ErrorEval.NAME_INVALID, ErrorEval.REF_INVALID);
|
||||
confirmError(areaEvalX, ErrorEval.NAME_INVALID, ErrorEval.NAME_INVALID);
|
||||
confirmError(ErrorEval.NAME_INVALID, areaEvalX, ErrorEval.NAME_INVALID);
|
||||
|
||||
// array sizes must match
|
||||
confirmError(areaEvalX, areaEvalZ, ErrorEval.NA);
|
||||
confirmError(areaEvalZ, areaEvalY, ErrorEval.NA);
|
||||
|
||||
// any error in an array item propagates up
|
||||
confirmError(areaEvalX, areaEvalX, ErrorEval.REF_INVALID);
|
||||
|
||||
// search for errors array by array, not pair by pair
|
||||
confirmError(areaEvalX, areaEvalY, ErrorEval.REF_INVALID);
|
||||
confirmError(areaEvalY, areaEvalX, ErrorEval.NULL_INTERSECTION);
|
||||
}
|
||||
|
||||
/**
|
||||
* Example from
|
||||
* http://office.microsoft.com/en-us/excel-help/intercept-function-HP010062512.aspx?CTT=5&origin=HA010277524
|
||||
*/
|
||||
public void testFromFile() {
|
||||
|
||||
HSSFWorkbook wb = HSSFTestDataSamples.openSampleWorkbook("intercept.xls");
|
||||
HSSFFormulaEvaluator fe = new HSSFFormulaEvaluator(wb);
|
||||
|
||||
HSSFSheet example1 = wb.getSheet("Example 1");
|
||||
HSSFCell a8 = example1.getRow(7).getCell(0);
|
||||
assertEquals("INTERCEPT(A2:A6,B2:B6)", a8.getCellFormula());
|
||||
fe.evaluate(a8);
|
||||
assertEquals(0.048387097, a8.getNumericCellValue(), 0.000000001);
|
||||
|
||||
}
|
||||
}
|
BIN
test-data/spreadsheet/intercept.xls
Normal file
BIN
test-data/spreadsheet/intercept.xls
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user