diff --git a/src/documentation/content/xdocs/changes.xml b/src/documentation/content/xdocs/changes.xml index 2323f3a4c..2c5adef95 100644 --- a/src/documentation/content/xdocs/changes.xml +++ b/src/documentation/content/xdocs/changes.xml @@ -37,7 +37,8 @@ - 45622 - Support stripping HWPF fields (eg macros) out of text, via Range.stripFields(text) + 45623 - Support stripping HSSF header and footer fields (eg page number) out of header and footer text if required + 45622 - Support stripping HWPF fields (eg macros) out of text, via Range.stripFields(text) New HPSF based TextExtractor for document metadata, org.apache.poi.hpsf.extractor.HPSFPropertiesExtractor Properly update the array of Slide's text runs in HSLF when new text shapes are added 45590 - Fix for Header/footer extraction for .ppt files saved in Office 2007 diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml index 8b9c2353a..9d64f5feb 100644 --- a/src/documentation/content/xdocs/status.xml +++ b/src/documentation/content/xdocs/status.xml @@ -34,7 +34,8 @@ - 45622 - Support stripping HWPF fields (eg macros) out of text, via Range.stripFields(text) + 45623 - Support stripping HSSF header and footer fields (eg page number) out of header and footer text if required + 45622 - Support stripping HWPF fields (eg macros) out of text, via Range.stripFields(text) New HPSF based TextExtractor for document metadata, org.apache.poi.hpsf.extractor.HPSFPropertiesExtractor Properly update the array of Slide's text runs in HSLF when new text shapes are added 45590 - Fix for Header/footer extraction for .ppt files saved in Office 2007 diff --git a/src/java/org/apache/poi/hssf/usermodel/HeaderFooter.java b/src/java/org/apache/poi/hssf/usermodel/HeaderFooter.java index 6120e6d53..f4c416bbf 100644 --- a/src/java/org/apache/poi/hssf/usermodel/HeaderFooter.java +++ b/src/java/org/apache/poi/hssf/usermodel/HeaderFooter.java @@ -16,6 +16,8 @@ ==================================================================== */ package org.apache.poi.hssf.usermodel; +import java.util.ArrayList; + /** * Common class for {@link HSSFHeader} and * {@link HSSFFooter}. @@ -25,6 +27,8 @@ public abstract class HeaderFooter { protected String center; protected String right; + private boolean stripFields = false; + protected HeaderFooter(String text) { while (text != null && text.length() > 1) { int pos = text.length(); @@ -70,6 +74,8 @@ public abstract class HeaderFooter { * @return The string representing the left side. */ public String getLeft() { + if(stripFields) + return stripFields(left); return left; } public abstract void setLeft( String newLeft ); @@ -79,6 +85,8 @@ public abstract class HeaderFooter { * @return The string representing the center. */ public String getCenter() { + if(stripFields) + return stripFields(center); return center; } public abstract void setCenter( String newCenter ); @@ -88,11 +96,14 @@ public abstract class HeaderFooter { * @return The string representing the right side. */ public String getRight() { + if(stripFields) + return stripFields(right); return right; } public abstract void setRight( String newRight ); + /** * Returns the string that represents the change in font size. @@ -122,9 +133,8 @@ public abstract class HeaderFooter { * * @return The special string for page number */ - public static String page() - { - return "&P"; + public static String page() { + return PAGE_FIELD.sequence; } /** @@ -132,9 +142,8 @@ public abstract class HeaderFooter { * * @return The special string for the number of pages */ - public static String numPages() - { - return "&N"; + public static String numPages() { + return NUM_PAGES_FIELD.sequence; } /** @@ -142,9 +151,8 @@ public abstract class HeaderFooter { * * @return The special string for the date */ - public static String date() - { - return "&D"; + public static String date() { + return DATE_FIELD.sequence; } /** @@ -152,9 +160,8 @@ public abstract class HeaderFooter { * * @return The special string for the time */ - public static String time() - { - return "&T"; + public static String time() { + return TIME_FIELD.sequence; } /** @@ -162,9 +169,8 @@ public abstract class HeaderFooter { * * @return The special string for the file name */ - public static String file() - { - return "&F"; + public static String file() { + return FILE_FIELD.sequence; } /** @@ -172,9 +178,8 @@ public abstract class HeaderFooter { * * @return The special string for tab name */ - public static String tab() - { - return "&A"; + public static String tab() { + return TAB_FIELD.sequence; } /** @@ -182,9 +187,8 @@ public abstract class HeaderFooter { * * @return The special string for start underline */ - public static String startUnderline() - { - return "&U"; + public static String startUnderline() { + return UNDERLINE_FIELD.sequence; } /** @@ -192,9 +196,8 @@ public abstract class HeaderFooter { * * @return The special string for end underline */ - public static String endUnderline() - { - return "&U"; + public static String endUnderline() { + return UNDERLINE_FIELD.sequence; } /** @@ -202,9 +205,8 @@ public abstract class HeaderFooter { * * @return The special string for start double underline */ - public static String startDoubleUnderline() - { - return "&E"; + public static String startDoubleUnderline() { + return DOUBLE_UNDERLINE_FIELD.sequence; } /** @@ -212,8 +214,78 @@ public abstract class HeaderFooter { * * @return The special string for end double underline */ - public static String endDoubleUnderline() - { - return "&E"; + public static String endDoubleUnderline() { + return DOUBLE_UNDERLINE_FIELD.sequence; + } + + + /** + * Removes any fields (eg macros, page markers etc) + * from the string. + * Normally used to make some text suitable for showing + * to humans, and the resultant text should not normally + * be saved back into the document! + */ + public static String stripFields(String text) { + int pos; + + // Firstly, do the easy ones which are static + for(int i=0; i -1) { + text = text.substring(0, pos) + + text.substring(pos+seq.length()); + } + } + + // Now do the tricky, dynamic ones + text = text.replaceAll("\\&\\d+", ""); + text = text.replaceAll("\\&\".*?,.*?\"", ""); + + // All done + return text; + } + + + /** + * Are fields currently being stripped from + * the text that this {@link HeaderStories} returns? + * Default is false, but can be changed + */ + public boolean areFieldsStripped() { + return stripFields; + } + /** + * Should fields (eg macros) be stripped from + * the text that this class returns? + * Default is not to strip. + * @param stripFields + */ + public void setAreFieldsStripped(boolean stripFields) { + this.stripFields = stripFields; + } + + + public static final Field TAB_FIELD = new Field("&A"); + public static final Field DATE_FIELD = new Field("&D"); + public static final Field FILE_FIELD = new Field("&F"); + public static final Field PAGE_FIELD = new Field("&P"); + public static final Field TIME_FIELD = new Field("&T"); + public static final Field NUM_PAGES_FIELD = new Field("&N"); + public static final Field UNDERLINE_FIELD = new Field("&U"); + public static final Field DOUBLE_UNDERLINE_FIELD = new Field("&E"); + + /** + * Represents a special field in a header or footer, + * eg the page number + */ + public static class Field { + private static ArrayList ALL_FIELDS = new ArrayList(); + /** The character sequence that marks this field */ + public final String sequence; + private Field(String sequence) { + this.sequence = sequence; + ALL_FIELDS.add(this); + } } } diff --git a/src/testcases/org/apache/poi/hssf/usermodel/TestHSSFHeaderFooter.java b/src/testcases/org/apache/poi/hssf/usermodel/TestHSSFHeaderFooter.java index ddc953555..59a1d1f81 100644 --- a/src/testcases/org/apache/poi/hssf/usermodel/TestHSSFHeaderFooter.java +++ b/src/testcases/org/apache/poi/hssf/usermodel/TestHSSFHeaderFooter.java @@ -17,11 +17,6 @@ package org.apache.poi.hssf.usermodel; -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.File; -import java.io.FileInputStream; - import junit.framework.TestCase; import org.apache.poi.hssf.HSSFTestDataSamples; @@ -49,6 +44,46 @@ public final class TestHSSFHeaderFooter extends TestCase { assertEquals("Top Center", head.getCenter()); assertEquals("Top Right", head.getRight()); } + + public void testSpecialChars() { + assertEquals("&U", HSSFHeader.startUnderline()); + assertEquals("&U", HSSFHeader.endUnderline()); + assertEquals("&P", HSSFHeader.page()); + + assertEquals("&22", HSSFFooter.fontSize((short)22)); + assertEquals("&\"Arial,bold\"", HSSFFooter.font("Arial", "bold")); + } + + public void testStripFields() { + String simple = "I am a test header"; + String withPage = "I am a&P test header"; + String withLots = "I&A am&N a&P test&T header&U"; + String withFont = "I&22 am a&\"Arial,bold\" test header"; + String withOtherAnds = "I am a&P test header&Z"; + + assertEquals(simple, HSSFHeader.stripFields(simple)); + assertEquals(simple, HSSFHeader.stripFields(withPage)); + assertEquals(simple, HSSFHeader.stripFields(withLots)); + assertEquals(simple, HSSFHeader.stripFields(withFont)); + assertEquals(simple + "&Z", HSSFHeader.stripFields(withOtherAnds)); + + // Now test the default strip flag + HSSFWorkbook wb = HSSFTestDataSamples.openSampleWorkbook("EmbeddedChartHeaderTest.xls"); + HSSFSheet s = wb.getSheetAt( 0 ); + HSSFHeader head = s.getHeader(); + + assertEquals("Top Left", head.getLeft()); + assertEquals("Top Center", head.getCenter()); + assertEquals("Top Right", head.getRight()); + + head.setLeft("Top &P&F&D Left"); + assertEquals("Top &P&F&D Left", head.getLeft()); + assertFalse(head.areFieldsStripped()); + + head.setAreFieldsStripped(true); + assertEquals("Top Left", head.getLeft()); + assertTrue(head.areFieldsStripped()); + } /** * Tests that get header retreives the proper values.