From ee4ba764b7cfec9086e2070c6b235d3785942022 Mon Sep 17 00:00:00 2001 From: Nick Burch Date: Tue, 20 May 2008 16:57:20 +0000 Subject: [PATCH] Patch from bug #45001 - Partial fix for HWPF Range.insertBefore() and Range.delete() with unicode characters git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@658349 13f79535-47bb-0310-9956-ffa450edef68 --- src/documentation/content/xdocs/changes.xml | 1 + src/documentation/content/xdocs/status.xml | 1 + .../poi/hwpf/model/FileInformationBlock.java | 10 ++++ .../org/apache/poi/hwpf/model/TextPiece.java | 8 +++ .../org/apache/poi/hwpf/usermodel/Range.java | 56 +++++++++++++++++++ 5 files changed, 76 insertions(+) diff --git a/src/documentation/content/xdocs/changes.xml b/src/documentation/content/xdocs/changes.xml index ebb063f27..824713479 100644 --- a/src/documentation/content/xdocs/changes.xml +++ b/src/documentation/content/xdocs/changes.xml @@ -37,6 +37,7 @@ + 45001 - Partial fix for HWPF Range.insertBefore() and Range.delete() with unicode characters 44977 - Support for AM/PM in excel date formats Support for specifying a policy to HSSF on missing / blank cells when fetching 44937 - Partial support for extracting Escher images from HWPF files diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml index 2167a4257..12bdd0dd9 100644 --- a/src/documentation/content/xdocs/status.xml +++ b/src/documentation/content/xdocs/status.xml @@ -34,6 +34,7 @@ + 45001 - Partial fix for HWPF Range.insertBefore() and Range.delete() with unicode characters 44977 - Support for AM/PM in excel date formats Support for specifying a policy to HSSF on missing / blank cells when fetching 44937 - Partial support for extracting Escher images from HWPF files diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/FileInformationBlock.java b/src/scratchpad/src/org/apache/poi/hwpf/model/FileInformationBlock.java index 48e6d78b3..887e13d82 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/FileInformationBlock.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/FileInformationBlock.java @@ -294,6 +294,16 @@ public class FileInformationBlock extends FIBAbstractType _longHandler.setLong(FIBLongHandler.CBMAC, cbMac); } + public int getCcpText() + { + return _longHandler.getLong(FIBLongHandler.CCPTEXT); + } + + public void setCcpText(int ccpText) + { + _longHandler.setLong(FIBLongHandler.CCPTEXT, ccpText); + } + public void clearOffsetsSizes() { _fieldHandler.clearFields(); diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/TextPiece.java b/src/scratchpad/src/org/apache/poi/hwpf/model/TextPiece.java index 67c634d9f..bc33954df 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/TextPiece.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/TextPiece.java @@ -90,12 +90,20 @@ public class TextPiece extends PropertyNode implements Comparable public void adjustForDelete(int start, int length) { + + if (usesUnicode()) { + + start /= 2; + length /= 2; + } + int myStart = getStart(); int myEnd = getEnd(); int end = start + length; /* do we have to delete from this text piece? */ if (start <= myEnd && end >= myStart) { + /* find where the deleted area overlaps with this text piece */ int overlapStart = Math.max(myStart, start); int overlapEnd = Math.min(myEnd, end); diff --git a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java index f2d9a615f..85592a92a 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java @@ -226,6 +226,25 @@ public class Range } } + /** + * Does any TextPiece in this Range use unicode? + * + * @return true if it does and false if it doesn't + */ + public boolean usesUnicode() { + + initText(); + + for (int i = _textStart; i < _textEnd; i++) + { + TextPiece piece = (TextPiece)_text.get(i); + if (piece.usesUnicode()) + return true; + } + + return false; + } + /** * Gets the text that this Range contains. * @@ -306,13 +325,19 @@ public class Range // Since this is the first item in our list, it is safe to assume that // _start >= tp.getStart() int insertIndex = _start - tp.getStart(); + if (tp.usesUnicode()) + insertIndex /= 2; sb.insert(insertIndex, text); + int adjustedLength = _doc.getTextTable().adjustForInsert(_textStart, text.length()); _doc.getCharacterTable().adjustForInsert(_charStart, adjustedLength); _doc.getParagraphTable().adjustForInsert(_parStart, adjustedLength); _doc.getSectionTable().adjustForInsert(_sectionStart, adjustedLength); adjustForInsert(text.length()); + // update the FIB.CCPText field + adjustFIB(text.length()); + return getCharacterRun(0); } @@ -489,6 +514,7 @@ public class Range public void delete() { + initAll(); int numSections = _sections.size(); @@ -519,6 +545,12 @@ public class Range TextPiece piece = (TextPiece)_text.get(x); piece.adjustForDelete(_start, _end - _start); } + + // update the FIB.CCPText field + if (usesUnicode()) + adjustFIB(-((_end - _start) / 2)); + else + adjustFIB(-(_end - _start)); } /** @@ -827,6 +859,19 @@ public class Range _sectionRangeFound = false; } + /** + * Adjust the value of FIB.CCPText after an insert or a delete... + * + * @param adjustment The (signed) value that should be added to FIB.CCPText + */ + protected void adjustFIB(int adjustment) { + + // update the FIB.CCPText field (this should happen once per adjustment, so we don't want it in + // adjustForInsert() or it would get updated multiple times if the range has a parent) + // without this, OpenOffice.org (v. 2.2.x) does not see all the text in the document + _doc.getFileInformationBlock().setCcpText(_doc.getFileInformationBlock().getCcpText() + adjustment); + } + /** * adjust this range after an insert happens. * @param length the length to adjust for @@ -834,6 +879,7 @@ public class Range private void adjustForInsert(int length) { _end += length; + reset(); Range parent = (Range)_parent.get(); if (parent != null) @@ -842,4 +888,14 @@ public class Range } } + + public int getStartOffset() { + + return _start; + } + + public int getEndOffset() { + + return _end; + } }