diff --git a/src/java/org/apache/poi/ss/formula/eval/FunctionEval.java b/src/java/org/apache/poi/ss/formula/eval/FunctionEval.java index 0c55327a4..8442f5832 100644 --- a/src/java/org/apache/poi/ss/formula/eval/FunctionEval.java +++ b/src/java/org/apache/poi/ss/formula/eval/FunctionEval.java @@ -243,7 +243,8 @@ public final class FunctionEval { // 247: DB // 252: FEQUENCY - + retval[252] = Frequency.instance; + retval[FunctionID.EXTERNAL_FUNC] = null; // ExternalFunction is a FreeRefFunction, nominally 255 retval[261] = new Errortype(); diff --git a/src/java/org/apache/poi/ss/formula/functions/Frequency.java b/src/java/org/apache/poi/ss/formula/functions/Frequency.java new file mode 100644 index 000000000..9df832366 --- /dev/null +++ b/src/java/org/apache/poi/ss/formula/functions/Frequency.java @@ -0,0 +1,81 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.ss.formula.functions; + +import org.apache.poi.ss.formula.CacheAreaEval; +import org.apache.poi.ss.formula.eval.EvaluationException; +import org.apache.poi.ss.formula.eval.NumberEval; +import org.apache.poi.ss.formula.eval.ValueEval; + +import java.util.Arrays; + +/** + * Implementation of Excel 'Analysis ToolPak' function FREQUENCY()
+ * Returns a frequency distribution as a vertical array

+ *

+ * Syntax
+ * FREQUENCY(data_array, bins_array)

+ *

+ * data_array Required. An array of or reference to a set of values for which you want to count frequencies. + * If data_array contains no values, FREQUENCY returns an array of zeros.
+ * bins_array Required. An array of or reference to intervals into which you want to group the values in data_array. + * If bins_array contains no values, FREQUENCY returns the number of elements in data_array.
+ * + * @author Yegor Kozlov + */ +public class Frequency extends Fixed2ArgFunction { + public static final Function instance = new Frequency(); + + private Frequency() { + // enforce singleton + } + + public ValueEval evaluate(int srcRowIndex, int srcColumnIndex, ValueEval arg0, ValueEval arg1) { + MatrixFunction.MutableValueCollector collector = new MatrixFunction.MutableValueCollector(false, false); + + double[] values; + double[] bins; + try { + values = collector.collectValues(arg0); + bins = collector.collectValues(arg1); + } catch (EvaluationException e) { + return e.getErrorEval(); + } + + // can bins be not sorted? + //bins = Arrays.stream(bins).sorted().distinct().toArray(); + + int[] histogram = histogram(values, bins); + NumberEval[] result = Arrays.stream(histogram).boxed().map(NumberEval::new).toArray(NumberEval[]::new); + return new CacheAreaEval(srcRowIndex, srcColumnIndex, + srcRowIndex + result.length - 1, srcColumnIndex, result); + } + + static int findBin(double value, double[] bins) { + int idx = Arrays.binarySearch(bins, value); + return idx >= 0 ? idx + 1 : -idx; + } + + static int[] histogram(double[] values, double[] bins) { + int[] histogram = new int[bins.length + 1]; + for (double val : values) { + histogram[findBin(val, bins) - 1]++; + } + return histogram; + } +} diff --git a/src/testcases/org/apache/poi/ss/formula/functions/TestFrequency.java b/src/testcases/org/apache/poi/ss/formula/functions/TestFrequency.java new file mode 100644 index 000000000..c441c82ad --- /dev/null +++ b/src/testcases/org/apache/poi/ss/formula/functions/TestFrequency.java @@ -0,0 +1,91 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.ss.formula.functions; + +import org.apache.poi.hssf.usermodel.HSSFWorkbook; +import org.apache.poi.ss.usermodel.*; +import org.apache.poi.ss.util.CellRangeAddress; +import org.junit.Test; + +import static org.apache.poi.ss.formula.functions.Frequency.histogram; +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; + +/** + * Testcase for the function FREQUENCY(data, bins) + * + * @author Yegor Kozlov + */ +public class TestFrequency { + + @Test + public void testHistogram() { + assertArrayEquals(new int[]{3, 2, 2, 0, 1, 1}, + histogram( + new double[]{11, 12, 13, 21, 29, 36, 40, 58, 69}, + new double[]{20, 30, 40, 50, 60}) + ); + + assertArrayEquals(new int[]{1, 1, 1, 1, 1, 0}, + histogram( + new double[]{20, 30, 40, 50, 60}, + new double[]{20, 30, 40, 50, 60}) + + ); + + assertArrayEquals(new int[]{2, 3}, + histogram( + new double[]{20, 30, 40, 50, 60}, + new double[]{30}) + + ); + } + + @Test + public void testEvaluate() { + Workbook wb = new HSSFWorkbook(); + FormulaEvaluator evaluator = wb.getCreationHelper().createFormulaEvaluator(); + + int[] data = {1, 1, 2, 3, 4, 4, 5, 7, 8, 9, 9, 11, 3, 5, 8}; + int[] bins = {3, 6, 9}; + Sheet sheet = wb.createSheet(); + Row dataRow = sheet.createRow(0); // A1:O1 + for (int i = 0; i < data.length; i++) { + dataRow.createCell(i).setCellValue(data[i]); + } + Row binsRow = sheet.createRow(1); + for (int i = 0; i < bins.length; i++) { // A2:C2 + binsRow.createCell(i).setCellValue(bins[i]); + } + Row fmlaRow = sheet.createRow(2); + CellRange arrayFmla = sheet.setArrayFormula("FREQUENCY(A1:O1,A2:C2)", CellRangeAddress.valueOf("A3:A6")); + Cell b3 = fmlaRow.createCell(1); // B3 + b3.setCellFormula("COUNT(FREQUENCY(A1:O1,A2:C2))"); // frequency returns a vertical array of bins+1 + + Cell c3 = fmlaRow.createCell(2); + c3.setCellFormula("SUM(FREQUENCY(A1:O1,A2:C2))"); // sum of the frequency bins should add up to the number of data values + + assertEquals(5, (int) evaluator.evaluate(arrayFmla.getFlattenedCells()[0]).getNumberValue()); + assertEquals(4, (int) evaluator.evaluate(arrayFmla.getFlattenedCells()[1]).getNumberValue()); + assertEquals(5, (int) evaluator.evaluate(arrayFmla.getFlattenedCells()[2]).getNumberValue()); + assertEquals(1, (int) evaluator.evaluate(arrayFmla.getFlattenedCells()[3]).getNumberValue()); + + assertEquals(4, (int) evaluator.evaluate(b3).getNumberValue()); + assertEquals(15, (int) evaluator.evaluate(c3).getNumberValue()); + + } +}