From 9e46ee937ca353ef7a5f3e9809e9d17100fdcd55 Mon Sep 17 00:00:00 2001 From: Tim Allison Date: Thu, 18 Feb 2016 01:49:59 +0000 Subject: [PATCH] 59021 -- fix content extraction from namespaced elements in XSSFEventBasedExcelExtractor git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1730992 13f79535-47bb-0310-9956-ffa450edef68 --- .../eventusermodel/XSSFSheetXMLHandler.java | 65 ++++++++++-------- .../TestXSSFEventBasedExcelExtractor.java | 9 +++ test-data/spreadsheet/59021.xlsx | Bin 0 -> 1933 bytes 3 files changed, 47 insertions(+), 27 deletions(-) create mode 100644 test-data/spreadsheet/59021.xlsx diff --git a/src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFSheetXMLHandler.java b/src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFSheetXMLHandler.java index 24a0564d1..6ad897c3f 100644 --- a/src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFSheetXMLHandler.java +++ b/src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFSheetXMLHandler.java @@ -41,7 +41,7 @@ import org.xml.sax.helpers.DefaultHandler; */ public class XSSFSheetXMLHandler extends DefaultHandler { private static final POILogger logger = POILogFactory.getLogger(XSSFSheetXMLHandler.class); - + static final String SPREADSHEETML_NS = "http://schemas.openxmlformats.org/spreadsheetml/2006/main"; /** * These are the different kinds of cells we support. * We keep track of the current one between @@ -186,17 +186,21 @@ public class XSSFSheetXMLHandler extends DefaultHandler { @Override @SuppressWarnings("unused") - public void startElement(String uri, String localName, String name, + public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException { - if (isTextTag(name)) { + if (uri != null && ! uri.equals(SPREADSHEETML_NS)) { + return; + } + + if (isTextTag(localName)) { vIsOpen = true; // Clear contents cache value.setLength(0); - } else if ("is".equals(name)) { + } else if ("is".equals(localName)) { // Inline string outer tag isIsOpen = true; - } else if ("f".equals(name)) { + } else if ("f".equals(localName)) { // Clear contents cache formula.setLength(0); @@ -231,14 +235,14 @@ public class XSSFSheetXMLHandler extends DefaultHandler { fIsOpen = true; } } - else if("oddHeader".equals(name) || "evenHeader".equals(name) || - "firstHeader".equals(name) || "firstFooter".equals(name) || - "oddFooter".equals(name) || "evenFooter".equals(name)) { + else if("oddHeader".equals(localName) || "evenHeader".equals(localName) || + "firstHeader".equals(localName) || "firstFooter".equals(localName) || + "oddFooter".equals(localName) || "evenFooter".equals(localName)) { hfIsOpen = true; // Clear contents cache headerFooter.setLength(0); } - else if("row".equals(name)) { + else if("row".equals(localName)) { String rowNumStr = attributes.getValue("r"); if(rowNumStr != null) { rowNum = Integer.parseInt(rowNumStr) - 1; @@ -248,7 +252,7 @@ public class XSSFSheetXMLHandler extends DefaultHandler { output.startRow(rowNum); } // c => cell - else if ("c".equals(name)) { + else if ("c".equals(localName)) { // Set up defaults. this.nextDataType = xssfDataType.NUMBER; this.formatIndex = -1; @@ -269,11 +273,13 @@ public class XSSFSheetXMLHandler extends DefaultHandler { else { // Number, but almost certainly with a special style or format XSSFCellStyle style = null; - if (cellStyleStr != null) { - int styleIndex = Integer.parseInt(cellStyleStr); - style = stylesTable.getStyleAt(styleIndex); - } else if (stylesTable.getNumCellStyles() > 0) { - style = stylesTable.getStyleAt(0); + if (stylesTable != null) { + if (cellStyleStr != null) { + int styleIndex = Integer.parseInt(cellStyleStr); + style = stylesTable.getStyleAt(styleIndex); + } else if (stylesTable.getNumCellStyles() > 0) { + style = stylesTable.getStyleAt(0); + } } if (style != null) { this.formatIndex = style.getDataFormat(); @@ -286,12 +292,17 @@ public class XSSFSheetXMLHandler extends DefaultHandler { } @Override - public void endElement(String uri, String localName, String name) + public void endElement(String uri, String localName, String qName) throws SAXException { + + if (uri != null && ! uri.equals(SPREADSHEETML_NS)) { + return; + } + String thisStr = null; // v => contents of a cell - if (isTextTag(name)) { + if (isTextTag(localName)) { vIsOpen = false; // Process the value contents as required, now we have it all @@ -364,11 +375,11 @@ public class XSSFSheetXMLHandler extends DefaultHandler { // Output output.cell(cellRef, thisStr, comment); - } else if ("f".equals(name)) { + } else if ("f".equals(localName)) { fIsOpen = false; - } else if ("is".equals(name)) { + } else if ("is".equals(localName)) { isIsOpen = false; - } else if ("row".equals(name)) { + } else if ("row".equals(localName)) { // Handle any "missing" cells which had comments attached checkForEmptyCellComments(EmptyCellCommentsCheckType.END_OF_ROW); @@ -377,19 +388,19 @@ public class XSSFSheetXMLHandler extends DefaultHandler { // some sheets do not have rowNum set in the XML, Excel can read them so we should try to read them as well nextRowNum = rowNum + 1; - } else if ("sheetData".equals(name)) { + } else if ("sheetData".equals(localName)) { // Handle any "missing" cells which had comments attached checkForEmptyCellComments(EmptyCellCommentsCheckType.END_OF_SHEET_DATA); } - else if("oddHeader".equals(name) || "evenHeader".equals(name) || - "firstHeader".equals(name)) { + else if("oddHeader".equals(localName) || "evenHeader".equals(localName) || + "firstHeader".equals(localName)) { hfIsOpen = false; - output.headerFooter(headerFooter.toString(), true, name); + output.headerFooter(headerFooter.toString(), true, localName); } - else if("oddFooter".equals(name) || "evenFooter".equals(name) || - "firstFooter".equals(name)) { + else if("oddFooter".equals(localName) || "evenFooter".equals(localName) || + "firstFooter".equals(localName)) { hfIsOpen = false; - output.headerFooter(headerFooter.toString(), false, name); + output.headerFooter(headerFooter.toString(), false, localName); } } diff --git a/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFEventBasedExcelExtractor.java b/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFEventBasedExcelExtractor.java index b56b3791f..2be777d6f 100644 --- a/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFEventBasedExcelExtractor.java +++ b/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFEventBasedExcelExtractor.java @@ -337,4 +337,13 @@ public class TestXSSFEventBasedExcelExtractor extends TestCase { extractor.close(); } } + + public void test59021() throws Exception { + XSSFEventBasedExcelExtractor ex = + new XSSFEventBasedExcelExtractor( + XSSFTestDataSamples.openSamplePackage("59021.xlsx")); + String text = ex.getText(); + assertTrue("can't find Abhkazia", text.contains("Abkhazia - Fixed")); + assertTrue("can't find 10/02/2016", text.contains("10/02/2016")); + } } diff --git a/test-data/spreadsheet/59021.xlsx b/test-data/spreadsheet/59021.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..0f485d270e687e927dc0ea4abe98aa4ff12adb09 GIT binary patch literal 1933 zcmWIWW@Zs#VBp|jNcHvd;F|yL)ky{h1}#Pg27Vx2k)vOpUzDAcpP#K)k(<-oZqJ@ zz3ZrSWYgKyDa{vEQs(-(Se|*da+R4{g#5>%_q(pzvtDG&`dE9t{=m7kpo$HOdk<(^ z2eo7`D`ypO%!+UMvhIa+&mZ=-;}`kA96PKTaK>C};mv13G3xVAiC;E-=9l(v>F)Rd z6kngYb>V_8(8n4;%nkH)d{JsnvA!ONWYl21{_p+n{WkVM?<KQ00 zq~;~(r)1`(+bWfor0H5H+1uF!rRF4-096!cWEK=F0JY>5+bU(0loVL$>lY_yq~<0T z>*W`u<^g5W@{4j4OMu*>bp3+FD)zw9MpG zm;B_?+|;}hEC%bNs!&o0Ni0fFEwNS7M~ibM1)r65BylQaXv6hlMH6f={QR7)iV zeLEX{6#v>mLY0YugF$6ppvR4?Cd)g3AzI18z#xuDOT`(fsU^kwU<#C`=1%eUJ8U4( zwmvD9l{-s)&#D#tQqfR`T?Xx7}iTS=}FT9bsH}YGaE+w}xE(e;=Fu z$K>o#vfaOa~@JDU`;1Z9X)k z)@P+3x5mzQD{N0x>4!%Y1?v|&f0b)Y?_G1}V#h}7s3Jw?*&905?iB2HC{>D>o?jO9 z+meep=~tuKjU#={EL#L8NiR(4ztvE?-2RVqw|I^FF)e0!+p?GHc?~f(`jfro=k55h z<3SPgt>b<%Ws{yykSb)EWa66Eubn?J(_3S6yoz!7rU{2`6zeZxwD~mG-z0yo>?P); z+#T_5($(i;x`1d-%;K-x9;|PgtZgJJni97<=3?#lw#wM9eT>&Q*XQTWtljg?Wv$r~ zj+o}_^5T4wse-)`70*kL9^GrVrKn2!{qj3@!Rcm4+_YM9npOntW>n&TDpin~ld>c{ zK+@XHWxv9cGOeWBxxd_R>hz@lpK$hY#Xr@>Yws^h4?poh7&S}CngmpC07jY|Fao6@ zkq5~Y$i)FThxDE|I+BzV?<{;3OwYX=Y&Dx#$FR zHlEzH|F4Su`~F2y%O=L2Xul`LcUyAb=)nD$jt?|8fN9M0`)Y2+Xb?xV!K7SY= zU;U+6;vyKKyl2@7#_1o8`F`j{HZ@+=Wslz;oe&|mkR`2O~3-sR|pg)Cy zbhLARUI{SA#Dg+gaV#i-uAOM>cf>%Tb$!xK*Nx1qP8ui09Jobd5}x=7W&c$_pTe2G z_<=_ZV}4VRf|QEZ>3QGpnH&GB`JchpUH+(LW)9EVJt12~Bz)fFN<>?J4!C>5`TCwc zsw#m?8lx-?mq)KX>GPs7#7Dv_wc{r9^lZ6Ors5XCuc5K=*}}}p54z4vZpwaF*l=@W zMZ~GkZMtQe@67H>UiI=g8Oa=T>X*nb`3YKl&89cpjejQvPBC)w7gnlXel56i!^0%r zYx|9it$s;=sn!aev${=fc80snRMoWGi!0jB^gr;P7dNM__va2Xm#WvV>(lQ({%-f> zSNL7=0B=Sn5eD290x*b>09sLjt{J^tMd