Get most of the hwpf tests passing again
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@684322 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
3ce7ac40ac
commit
567db8a3dc
@ -128,7 +128,7 @@ public class WordExtractor extends POIOLE2TextExtractor {
|
|||||||
TextPiece piece = (TextPiece) textPieces.next();
|
TextPiece piece = (TextPiece) textPieces.next();
|
||||||
|
|
||||||
String encoding = "Cp1252";
|
String encoding = "Cp1252";
|
||||||
if (piece.usesUnicode()) {
|
if (piece.isUnicode()) {
|
||||||
encoding = "UTF-16LE";
|
encoding = "UTF-16LE";
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
|
@ -104,7 +104,6 @@ public class SectionTable
|
|||||||
}
|
}
|
||||||
int FC = TP.getPieceDescriptor().getFilePosition();
|
int FC = TP.getPieceDescriptor().getFilePosition();
|
||||||
int offset = CP - TP.getCP();
|
int offset = CP - TP.getCP();
|
||||||
if(TP.usesUnicode()) offset*=2;
|
|
||||||
FC = FC+offset-((TextPiece)_text.get(0)).getPieceDescriptor().getFilePosition();
|
FC = FC+offset-((TextPiece)_text.get(0)).getPieceDescriptor().getFilePosition();
|
||||||
return FC;
|
return FC;
|
||||||
}
|
}
|
||||||
@ -120,12 +119,12 @@ public class SectionTable
|
|||||||
|
|
||||||
if (fc <= piece.getEnd())
|
if (fc <= piece.getEnd())
|
||||||
{
|
{
|
||||||
cp += ((fc - piece.getStart())/ (piece.usesUnicode() ? 2 : 1));
|
cp += (fc - piece.getStart());
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
cp += ((piece.getEnd() - piece.getStart())/ (piece.usesUnicode() ? 2 : 1));
|
cp += (piece.getEnd() - piece.getStart());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return cp;
|
return cp;
|
||||||
|
@ -73,9 +73,9 @@ public class TextPiece extends PropertyNode implements Comparable
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return If this text piece uses unicode
|
* @return If this text piece is unicode
|
||||||
*/
|
*/
|
||||||
public boolean usesUnicode()
|
public boolean isUnicode()
|
||||||
{
|
{
|
||||||
return _usesUnicode;
|
return _usesUnicode;
|
||||||
}
|
}
|
||||||
|
@ -239,7 +239,7 @@ public class Range
|
|||||||
for (int i = _textStart; i < _textEnd; i++)
|
for (int i = _textStart; i < _textEnd; i++)
|
||||||
{
|
{
|
||||||
TextPiece piece = (TextPiece)_text.get(i);
|
TextPiece piece = (TextPiece)_text.get(i);
|
||||||
if (piece.usesUnicode())
|
if (piece.isUnicode())
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -332,8 +332,6 @@ public class Range
|
|||||||
// Since this is the first item in our list, it is safe to assume that
|
// Since this is the first item in our list, it is safe to assume that
|
||||||
// _start >= tp.getStart()
|
// _start >= tp.getStart()
|
||||||
int insertIndex = _start - tp.getStart();
|
int insertIndex = _start - tp.getStart();
|
||||||
if (tp.usesUnicode())
|
|
||||||
insertIndex /= 2;
|
|
||||||
sb.insert(insertIndex, text);
|
sb.insert(insertIndex, text);
|
||||||
|
|
||||||
int adjustedLength = _doc.getTextTable().adjustForInsert(_textStart, text.length());
|
int adjustedLength = _doc.getTextTable().adjustForInsert(_textStart, text.length());
|
||||||
|
@ -87,7 +87,7 @@ public class TestDifferentRoutes extends TestCase {
|
|||||||
TextPiece piece = (TextPiece) textPieces.next();
|
TextPiece piece = (TextPiece) textPieces.next();
|
||||||
|
|
||||||
String encoding = "Cp1252";
|
String encoding = "Cp1252";
|
||||||
if (piece.usesUnicode()) {
|
if (piece.isUnicode()) {
|
||||||
encoding = "UTF-16LE";
|
encoding = "UTF-16LE";
|
||||||
}
|
}
|
||||||
String text = new String(piece.getRawBytes(), encoding);
|
String text = new String(piece.getRawBytes(), encoding);
|
||||||
|
@ -118,9 +118,9 @@ public class TestTextPieceTable extends TestCase {
|
|||||||
TextPiece tpB = (TextPiece)tbl.getTextPieces().get(1);
|
TextPiece tpB = (TextPiece)tbl.getTextPieces().get(1);
|
||||||
TextPiece tpC = (TextPiece)tbl.getTextPieces().get(2);
|
TextPiece tpC = (TextPiece)tbl.getTextPieces().get(2);
|
||||||
|
|
||||||
assertTrue(tpA.usesUnicode());
|
assertTrue(tpA.isUnicode());
|
||||||
assertTrue(tpB.usesUnicode());
|
assertTrue(tpB.isUnicode());
|
||||||
assertTrue(tpC.usesUnicode());
|
assertTrue(tpC.isUnicode());
|
||||||
|
|
||||||
assertEquals(256, tpA.characterLength());
|
assertEquals(256, tpA.characterLength());
|
||||||
assertEquals(256, tpB.characterLength());
|
assertEquals(256, tpB.characterLength());
|
||||||
@ -147,9 +147,9 @@ public class TestTextPieceTable extends TestCase {
|
|||||||
tpB = (TextPiece)tbl.getTextPieces().get(1);
|
tpB = (TextPiece)tbl.getTextPieces().get(1);
|
||||||
tpC = (TextPiece)tbl.getTextPieces().get(2);
|
tpC = (TextPiece)tbl.getTextPieces().get(2);
|
||||||
|
|
||||||
assertTrue(tpA.usesUnicode());
|
assertTrue(tpA.isUnicode());
|
||||||
assertTrue(tpB.usesUnicode());
|
assertTrue(tpB.isUnicode());
|
||||||
assertTrue(tpC.usesUnicode());
|
assertTrue(tpC.isUnicode());
|
||||||
|
|
||||||
assertEquals(256, tpA.characterLength());
|
assertEquals(256, tpA.characterLength());
|
||||||
assertEquals(256, tpB.characterLength());
|
assertEquals(256, tpB.characterLength());
|
||||||
|
@ -0,0 +1,82 @@
|
|||||||
|
/* ====================================================================
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
==================================================================== */
|
||||||
|
package org.apache.poi.hwpf.usermodel;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.FileInputStream;
|
||||||
|
|
||||||
|
import org.apache.poi.hwpf.HWPFDocFixture;
|
||||||
|
import org.apache.poi.hwpf.HWPFDocument;
|
||||||
|
|
||||||
|
import junit.framework.TestCase;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests to ensure that our ranges end up with
|
||||||
|
* the right text in them, and the right font/styling
|
||||||
|
* properties applied to them.
|
||||||
|
*/
|
||||||
|
public class TestRangeProperties extends TestCase {
|
||||||
|
private static final char page_break = (char)12;
|
||||||
|
|
||||||
|
private static final String u_page_1 =
|
||||||
|
"This is a fairly simple word document, over two pages, with headers and footers.\r" +
|
||||||
|
"The trick with this one is that it contains some Unicode based strings in it.\r" +
|
||||||
|
"Firstly, some currency symbols:\r" +
|
||||||
|
"\tGBP - \u00a3\r" +
|
||||||
|
"\tEUR - \u20ac\r" +
|
||||||
|
"Now, we\u2019ll have some French text, in bold and big:\r" +
|
||||||
|
"\tMoli\u00e8re\r" +
|
||||||
|
"And some normal French text:\r" +
|
||||||
|
"\tL'Avare ou l'\u00c9cole du mensonge\r" +
|
||||||
|
"That\u2019s it for page one\r"
|
||||||
|
;
|
||||||
|
private static final String u_page_2 =
|
||||||
|
"This is page two. Les Pr\u00e9cieuses ridicules. The end.\r"
|
||||||
|
;
|
||||||
|
|
||||||
|
private HWPFDocument u;
|
||||||
|
// TODO - a non unicode document too
|
||||||
|
|
||||||
|
private String dirname;
|
||||||
|
|
||||||
|
protected void setUp() throws Exception {
|
||||||
|
dirname = System.getProperty("HWPF.testdata.path");
|
||||||
|
u = new HWPFDocument(
|
||||||
|
new FileInputStream(new File(dirname, "HeaderFooterUnicode.doc"))
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testUnicodeTextParagraphs() throws Exception {
|
||||||
|
Range r = u.getRange();
|
||||||
|
assertEquals(
|
||||||
|
u_page_1 +
|
||||||
|
page_break + "\r" +
|
||||||
|
u_page_2,
|
||||||
|
r.text()
|
||||||
|
);
|
||||||
|
|
||||||
|
assertEquals(
|
||||||
|
5,
|
||||||
|
r.numParagraphs()
|
||||||
|
);
|
||||||
|
|
||||||
|
System.out.println(r.getParagraph(2).text());
|
||||||
|
}
|
||||||
|
public void testUnicodeStyling() throws Exception {
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user