From af51ea4c1059148ea43c51604efea0f9351467ba Mon Sep 17 00:00:00 2001
From: Tim Allison <tallison@apache.org>
Date: Tue, 4 Apr 2017 02:06:46 +0000
Subject: [PATCH] bug 50955 -- word 6.0 charset fix

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1790061 13f79535-47bb-0310-9956-ffa450edef68
---
 .../org/apache/poi/TestAllFiles.java          |   3 +
 .../org/apache/poi/util/CodePageUtil.java     |  10 ++
 .../poi/util/LittleEndianBig5Stream.java      | 107 ++++++++++++
 src/java/org/apache/poi/util/StringUtil.java  |  32 +++-
 .../org/apache/poi/hwmf/record/HwmfFont.java  |   2 +-
 .../org/apache/poi/hwpf/HWPFOldDocument.java  | 110 ++++++++++--
 .../poi/hwpf/model/ComplexFileTable.java      |  21 ++-
 .../apache/poi/hwpf/model/OldCHPBinTable.java |   2 +-
 .../poi/hwpf/model/OldComplexFileTable.java   |  42 +++++
 .../src/org/apache/poi/hwpf/model/OldFfn.java | 161 ++++++++++++++++++
 .../apache/poi/hwpf/model/OldFontTable.java   |  84 +++++++++
 .../apache/poi/hwpf/model/OldTextPiece.java   | 120 +++++++++++++
 .../poi/hwpf/model/OldTextPieceTable.java     | 119 +++++++++++++
 .../apache/poi/hwpf/model/PAPBinTable.java    |   2 +-
 .../poi/hwpf/model/PieceDescriptor.java       |  72 +++++---
 .../org/apache/poi/hwpf/model/TextPiece.java  |  10 +-
 .../apache/poi/hwpf/model/TextPieceTable.java |  14 +-
 .../poi/hwpf/usermodel/CharacterRun.java      |   5 +
 .../converter/TestWordToConverterSuite.java   |  21 ++-
 .../apache/poi/hwpf/usermodel/TestBugs.java   |   8 +-
 .../hwpf/usermodel/TestHWPFOldDocument.java   |  90 +++++++++-
 test-data/document/Bug60936.doc               | Bin 0 -> 6656 bytes
 test-data/document/Bug60942.doc               | Bin 0 -> 20480 bytes
 test-data/document/Bug60942b.doc              | Bin 0 -> 6144 bytes
 24 files changed, 971 insertions(+), 64 deletions(-)
 create mode 100644 src/java/org/apache/poi/util/LittleEndianBig5Stream.java
 create mode 100644 src/scratchpad/src/org/apache/poi/hwpf/model/OldComplexFileTable.java
 create mode 100644 src/scratchpad/src/org/apache/poi/hwpf/model/OldFfn.java
 create mode 100644 src/scratchpad/src/org/apache/poi/hwpf/model/OldFontTable.java
 create mode 100644 src/scratchpad/src/org/apache/poi/hwpf/model/OldTextPiece.java
 create mode 100644 src/scratchpad/src/org/apache/poi/hwpf/model/OldTextPieceTable.java
 create mode 100644 test-data/document/Bug60936.doc
 create mode 100644 test-data/document/Bug60942.doc
 create mode 100644 test-data/document/Bug60942b.doc
diff --git a/src/integrationtest/org/apache/poi/TestAllFiles.java b/src/integrationtest/org/apache/poi/TestAllFiles.java
index 25ac41a9b..19edc1455 100644
--- a/src/integrationtest/org/apache/poi/TestAllFiles.java
+++ b/src/integrationtest/org/apache/poi/TestAllFiles.java
@@ -218,6 +218,9 @@ public class TestAllFiles {
         "document/Word6_sections2.doc",
         "document/Word95.doc",
         "document/word95err.doc",
+        "document/Bug60936.doc",
+        "document/Bug60942.doc",
+        "document/Bug60942b.doc",
         "hpsf/TestMickey.doc",
         "document/52117.doc"
     );
diff --git a/src/java/org/apache/poi/util/CodePageUtil.java b/src/java/org/apache/poi/util/CodePageUtil.java
index 145929182..5be1c5077 100644
--- a/src/java/org/apache/poi/util/CodePageUtil.java
+++ b/src/java/org/apache/poi/util/CodePageUtil.java
@@ -18,6 +18,9 @@
 package org.apache.poi.util;
 
 import java.io.UnsupportedEncodingException;
+import java.nio.charset.Charset;
+import java.util.HashSet;
+import java.util.Set;
 
 /**
  * Utilities for working with Microsoft CodePages.
@@ -27,6 +30,13 @@ import java.io.UnsupportedEncodingException;
  */
 public class CodePageUtil
 {
+
+    public static final Set<Charset> VARIABLE_BYTE_CHARSETS = new HashSet<Charset>();
+    static {
+        //others?
+        VARIABLE_BYTE_CHARSETS.add(StringUtil.BIG5);
+    }
+
     /** <p>Codepage 037, a special case</p> */
     public static final int CP_037 = 37;
 
diff --git a/src/java/org/apache/poi/util/LittleEndianBig5Stream.java b/src/java/org/apache/poi/util/LittleEndianBig5Stream.java
new file mode 100644
index 000000000..f68b1cdb9
--- /dev/null
+++ b/src/java/org/apache/poi/util/LittleEndianBig5Stream.java
@@ -0,0 +1,107 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+
+package org.apache.poi.util;
+
+import java.io.ByteArrayInputStream;
+
+/**
+ * Stream that converts MSOffice's way of storing Big5, with
+ * zero-byte padding for ASCII and in LittleEndianOrder.
+ */
+@Internal
+public class LittleEndianBig5Stream extends ByteArrayInputStream {
+    private static final int EOF = -1;
+    private static final int INVALID_PAIR = -2;
+    private static final int EMPTY_TRAILING = -3;
+
+    //the char that is logically trailing in Big5 encoding
+    //however in LittleEndian order, this is the first encountered.
+    int trailing = EMPTY_TRAILING;
+    public LittleEndianBig5Stream(byte[] buf) {
+        super(buf);
+    }
+
+    public LittleEndianBig5Stream(byte[] buf, int offset, int length) {
+        super(buf, offset, length);
+    }
+
+    @Override
+    public int read() {
+
+        if (trailing != EMPTY_TRAILING) {
+            int tmp = trailing;
+            trailing = EMPTY_TRAILING;
+            return tmp;
+        }
+        int leading = readNext();
+        while (leading == INVALID_PAIR) {
+            leading = readNext();
+        }
+
+        if (leading == EOF) {
+            return EOF;
+        }
+        return leading;
+    }
+
+    //returns leading, sets trailing appropriately
+    //returns -1 if it hits the end of the stream
+    //returns -2 for an invalid big5 code pair
+    private final int readNext() {
+        trailing = super.read();
+        if (trailing == -1) {
+            return EOF;
+        }
+        int leading = super.read();
+        if (leading == EOF) {
+            return EOF;
+        }
+        int lead = leading&0xff;
+        if (lead > 0x80) {
+            return leading;
+        } else if (lead == 0) {
+            int ret = trailing;
+            trailing = EMPTY_TRAILING;
+            return ret;
+        } else {
+            int ret = trailing;
+            trailing = EMPTY_TRAILING;
+            return ret;
+            //return INVALID_PAIR;
+        }
+
+    }
+
+    @Override
+    public int read(byte[] buff, int off, int len) {
+        int bytesRead = 0;
+        for (int i = off; i < off+len; i++) {
+            int b = read();
+            if (b == -1) {
+                if (bytesRead == 0) {
+                    return -1;
+                } else {
+                    return bytesRead;
+                }
+            }
+            bytesRead++;
+            buff[i] = (byte)b;
+        }
+        return bytesRead;
+    }
+}
diff --git a/src/java/org/apache/poi/util/StringUtil.java b/src/java/org/apache/poi/util/StringUtil.java
index 20a6824c9..5d09dff56 100644
--- a/src/java/org/apache/poi/util/StringUtil.java
+++ b/src/java/org/apache/poi/util/StringUtil.java
@@ -17,6 +17,8 @@
 
 package org.apache.poi.util;
 
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
 import java.nio.charset.Charset;
 import java.util.HashMap;
 import java.util.Iterator;
@@ -27,9 +29,14 @@ import java.util.Map;
  */
 @Internal
 public class StringUtil {
+
+    private static final POILogger logger = POILogFactory
+            .getLogger(StringUtil.class);
     protected static final Charset ISO_8859_1 = Charset.forName("ISO-8859-1");
-    protected static final Charset UTF16LE = Charset.forName("UTF-16LE");
+    public static final Charset UTF16LE = Charset.forName("UTF-16LE");
     public static final Charset UTF8 = Charset.forName("UTF-8");
+    public static final Charset WIN_1252 = Charset.forName("cp1252");
+    public static final Charset BIG5 = Charset.forName("Big5");
 
     private static Map<Integer,Integer> msCodepointToUnicode;
 
@@ -573,7 +580,28 @@ public class StringUtil {
        9133, // 0xf0fe bracerightbt
        ' ', // 0xf0ff not defined
    };
-   
+
+    /**
+     * This tries to convert a LE byte array in Big5 to a String.
+     * We know MS zero-padded ascii, and we drop those.
+     * However, there may be areas for improvement in this.
+     *
+     * @param data
+     * @param offset
+     * @param lengthInBytes
+     * @return
+     */
+   public static String littleEndianBig5Stream(byte[] data, int offset, int lengthInBytes) {
+       ByteArrayOutputStream os = new ByteArrayOutputStream();
+       try {
+           IOUtils.copy(new LittleEndianBig5Stream(data, offset, lengthInBytes), os);
+       } catch (IOException e) {
+           logger.log(POILogger.WARN,
+                   "IOException while copying a byte array stream to a byte array stream?!");
+       }
+       return new String(os.toByteArray(), BIG5);
+   }
+
    // Could be replaced with org.apache.commons.lang3.StringUtils#join
    @Internal
    public static String join(Object[] array, String separator) {
diff --git a/src/scratchpad/src/org/apache/poi/hwmf/record/HwmfFont.java b/src/scratchpad/src/org/apache/poi/hwmf/record/HwmfFont.java
index 703faa153..f6e256381 100644
--- a/src/scratchpad/src/org/apache/poi/hwmf/record/HwmfFont.java
+++ b/src/scratchpad/src/org/apache/poi/hwmf/record/HwmfFont.java
@@ -108,7 +108,7 @@ public class HwmfFont {
             return charset;
         }
 
-        static WmfCharset valueOf(int flag) {
+        public static WmfCharset valueOf(int flag) {
             for (WmfCharset cs : values()) {
                 if (cs.flag == flag) return cs;
             }
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/HWPFOldDocument.java b/src/scratchpad/src/org/apache/poi/hwpf/HWPFOldDocument.java
index 6ff9f29bc..505789e2c 100644
--- a/src/scratchpad/src/org/apache/poi/hwpf/HWPFOldDocument.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/HWPFOldDocument.java
@@ -19,27 +19,43 @@ package org.apache.poi.hwpf;
 import java.io.File;
 import java.io.IOException;
 import java.io.OutputStream;
+import java.nio.charset.Charset;
 
+import org.apache.poi.hwmf.record.HwmfFont;
 import org.apache.poi.hwpf.model.ComplexFileTable;
+import org.apache.poi.hwpf.model.FontTable;
 import org.apache.poi.hwpf.model.OldCHPBinTable;
+import org.apache.poi.hwpf.model.OldComplexFileTable;
+import org.apache.poi.hwpf.model.OldFfn;
+import org.apache.poi.hwpf.model.OldFontTable;
 import org.apache.poi.hwpf.model.OldPAPBinTable;
 import org.apache.poi.hwpf.model.OldSectionTable;
+import org.apache.poi.hwpf.model.OldTextPieceTable;
 import org.apache.poi.hwpf.model.PieceDescriptor;
 import org.apache.poi.hwpf.model.TextPiece;
 import org.apache.poi.hwpf.model.TextPieceTable;
 import org.apache.poi.hwpf.usermodel.Range;
 import org.apache.poi.poifs.filesystem.DirectoryNode;
 import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+import org.apache.poi.util.CodePageUtil;
 import org.apache.poi.util.LittleEndian;
+import org.apache.poi.util.NotImplemented;
+import org.apache.poi.util.StringUtil;
 
 /**
  * Provides very simple support for old (Word 6 / Word 95)
  *  files.
  */
 public class HWPFOldDocument extends HWPFDocumentCore {
-    private TextPieceTable tpt;
+
+    private final static Charset DEFAULT_CHARSET = StringUtil.WIN_1252;
+
+    private OldTextPieceTable tpt;
     
     private StringBuilder _text;
+
+    private final OldFontTable fontTable;
+    private final Charset guessedCharset;
     
     public HWPFOldDocument(POIFSFileSystem fs) throws IOException {
         this(fs.getRoot());
@@ -56,45 +72,52 @@ public class HWPFOldDocument extends HWPFDocumentCore {
         int chpTableSize   = LittleEndian.getInt(_mainStream, 0xbc);
         int papTableOffset = LittleEndian.getInt(_mainStream, 0xc0);
         int papTableSize   = LittleEndian.getInt(_mainStream, 0xc4);
-        //int shfTableOffset = LittleEndian.getInt(_mainStream, 0x60);
-        //int shfTableSize   = LittleEndian.getInt(_mainStream, 0x64);
+        int fontTableOffset = LittleEndian.getInt(_mainStream, 0xd0);
+        int fontTableSize = LittleEndian.getInt(_mainStream, 0xd4);
+
+        fontTable = new OldFontTable(_mainStream, fontTableOffset, fontTableSize);
+        //TODO: figure out how to map runs/text pieces to fonts
+        //for now, if there's a non standard codepage in one of the fonts
+        //assume that the doc is in that codepage.
+        guessedCharset = guessCodePage(fontTable);
+
         int complexTableOffset = LittleEndian.getInt(_mainStream, 0x160);
         
         // We need to get hold of the text that makes up the
         //  document, which might be regular or fast-saved
         ComplexFileTable cft = null;
-        StringBuffer text = new StringBuffer();
         if(_fib.getFibBase().isFComplex()) {
-            cft = new ComplexFileTable(
+            cft = new OldComplexFileTable(
                     _mainStream, _mainStream,
-                    complexTableOffset, _fib.getFibBase().getFcMin()
+                    complexTableOffset, _fib.getFibBase().getFcMin(), guessedCharset
             );
-            tpt = cft.getTextPieceTable();
+            tpt = (OldTextPieceTable)cft.getTextPieceTable();
             
-            for(TextPiece tp : tpt.getTextPieces()) {
-                text.append( tp.getStringBuilder() );
-            }
         } else {
             // TODO Discover if these older documents can ever hold Unicode Strings?
             //  (We think not, because they seem to lack a Piece table)
             // TODO Build the Piece Descriptor properly
             //  (We have to fake it, as they don't seem to have a proper Piece table)
-            PieceDescriptor pd = new PieceDescriptor(new byte[] {0,0, 0,0,0,127, 0,0}, 0);
+            PieceDescriptor pd = new PieceDescriptor(new byte[] {0,0, 0,0,0,127, 0,0}, 0, guessedCharset);
             pd.setFilePosition(_fib.getFibBase().getFcMin());
 
             // Generate a single Text Piece Table, with a single Text Piece
             //  which covers all the (8 bit only) text in the file
-            tpt = new TextPieceTable();
+            tpt = new OldTextPieceTable();
             byte[] textData = new byte[_fib.getFibBase().getFcMac()-_fib.getFibBase().getFcMin()];
             System.arraycopy(_mainStream, _fib.getFibBase().getFcMin(), textData, 0, textData.length);
+
+            int numChars = textData.length;
+            if (CodePageUtil.VARIABLE_BYTE_CHARSETS.contains(guessedCharset)) {
+                numChars /= 2;
+            }
+
             TextPiece tp = new TextPiece(
-                    0, textData.length, textData, pd
+                    0, numChars, textData, pd
             );
             tpt.add(tp);
             
-            text.append(tp.getStringBuilder());
         }
-        
         _text = tpt.getText();
 
         // Now we can fetch the character and paragraph properties
@@ -133,12 +156,54 @@ public class HWPFOldDocument extends HWPFDocumentCore {
         }
     }
 
+
+    /**
+     * Take the first codepage that is not default, ansi or symbol.
+     * Ideally, we'd want to track fonts with runs, but we don't yet
+     * know how to do that.
+     *
+     * Consider throwing an exception if > 1 unique codepage that is not default, symbol or ansi
+     * appears here.
+     *
+     * @param fontTable
+     * @return
+     */
+    private Charset guessCodePage(OldFontTable fontTable) {
+
+        for (OldFfn oldFfn : fontTable.getFontNames()) {
+            HwmfFont.WmfCharset wmfCharset = HwmfFont.WmfCharset.valueOf(oldFfn.getChs()& 0xff);
+            if (wmfCharset != null &&
+                    wmfCharset != HwmfFont.WmfCharset.ANSI_CHARSET &&
+                    wmfCharset != HwmfFont.WmfCharset.DEFAULT_CHARSET &&
+                    wmfCharset != HwmfFont.WmfCharset.SYMBOL_CHARSET ) {
+                return wmfCharset.getCharset();
+            }
+        }
+        return DEFAULT_CHARSET;
+    }
+
     public Range getOverallRange()
     {
         // Life is easy when we have no footers, headers or unicode!
         return new Range( 0, _fib.getFibBase().getFcMac() - _fib.getFibBase().getFcMin(), this );
     }
 
+    /**
+     * Use {@link #getOldFontTable()} instead!!!
+     * This always throws an IllegalArgumentException.
+     *
+     * @return nothing
+     * @throws UnsupportedOperationException
+     */
+    @Override
+    @NotImplemented
+    public FontTable getFontTable() {
+        throw new UnsupportedOperationException("Use getOldFontTable instead.");
+    }
+
+    public OldFontTable getOldFontTable() {
+        return fontTable;
+    }
     public Range getRange()
     {
         return getOverallRange();
@@ -167,4 +232,19 @@ public class HWPFOldDocument extends HWPFDocumentCore {
     public void write(OutputStream out) throws IOException {
         throw new IllegalStateException("Writing is not available for the older file formats");
     }
+
+    /**
+     * As a rough heuristic (total hack), read through the font table
+     * and take the first non-default, non-ansi, non-symbol
+     * font's charset and return that.
+     *
+     * Once we figure out how to link a font to a text piece, we should
+     * use the font information per text piece.
+     *
+     * @return charset
+     */
+    public Charset getGuessedCharset() {
+        return guessedCharset;
+    }
+
 }
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/ComplexFileTable.java b/src/scratchpad/src/org/apache/poi/hwpf/model/ComplexFileTable.java
index 42a2fb987..dc530bd64 100644
--- a/src/scratchpad/src/org/apache/poi/hwpf/model/ComplexFileTable.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/model/ComplexFileTable.java
@@ -18,6 +18,7 @@
 package org.apache.poi.hwpf.model;
 
 import java.io.IOException;
+import java.nio.charset.Charset;
 import java.util.LinkedList;
 import java.util.List;
 
@@ -26,9 +27,10 @@ import org.apache.poi.hwpf.model.io.HWPFOutputStream;
 import org.apache.poi.hwpf.sprm.SprmBuffer;
 import org.apache.poi.util.Internal;
 import org.apache.poi.util.LittleEndian;
+import org.apache.poi.util.StringUtil;
 
 @Internal
-public final class ComplexFileTable {
+public class ComplexFileTable {
     private static final byte GRPPRL_TYPE = 1;
     private static final byte TEXT_PIECE_TABLE_TYPE = 2;
 
@@ -40,7 +42,8 @@ public final class ComplexFileTable {
         _tpt = new TextPieceTable();
     }
 
-    public ComplexFileTable(byte[] documentStream, byte[] tableStream, int offset, int fcMin) throws IOException {
+    protected ComplexFileTable(byte[] documentStream, byte[] tableStream, int offset, int fcMin,
+                               Charset charset) throws IOException {
         //skips through the prms before we reach the piece table. These contain data
         //for actual fast saved files
         List<SprmBuffer> sprmBuffers = new LinkedList<SprmBuffer>();
@@ -61,7 +64,12 @@ public final class ComplexFileTable {
         }
         int pieceTableSize = LittleEndian.getInt(tableStream, ++offset);
         offset += LittleEndian.INT_SIZE;
-        _tpt = new TextPieceTable(documentStream, tableStream, offset, pieceTableSize, fcMin);
+        _tpt = newTextPieceTable(documentStream, tableStream, offset, pieceTableSize, fcMin, charset);
+
+    }
+
+    public ComplexFileTable(byte[] documentStream, byte[] tableStream, int offset, int fcMin) throws IOException {
+        this(documentStream, tableStream, offset, fcMin, StringUtil.WIN_1252);
     }
 
     public TextPieceTable getTextPieceTable() {
@@ -92,4 +100,11 @@ public final class ComplexFileTable {
         tableStream.write(table);
     }
 
+    protected TextPieceTable newTextPieceTable(byte[] documentStream,
+                                               byte[] tableStream, int offset, int pieceTableSize, int fcMin,
+                                               Charset charset) {
+        return new TextPieceTable(documentStream, tableStream, offset, pieceTableSize, fcMin);
+    }
+
+
 }
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/OldCHPBinTable.java b/src/scratchpad/src/org/apache/poi/hwpf/model/OldCHPBinTable.java
index bc3f4869b..45061ad65 100644
--- a/src/scratchpad/src/org/apache/poi/hwpf/model/OldCHPBinTable.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/model/OldCHPBinTable.java
@@ -44,7 +44,7 @@ public final class OldCHPBinTable extends CHPBinTable
    * @param fcMin
    */
   public OldCHPBinTable(byte[] documentStream, int offset,
-                     int size, int fcMin, TextPieceTable tpt)
+                     int size, int fcMin, OldTextPieceTable tpt)
   {
     PlexOfCps binTable = new PlexOfCps(documentStream, offset, size, 2);
 
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/OldComplexFileTable.java b/src/scratchpad/src/org/apache/poi/hwpf/model/OldComplexFileTable.java
new file mode 100644
index 000000000..25510c89e
--- /dev/null
+++ b/src/scratchpad/src/org/apache/poi/hwpf/model/OldComplexFileTable.java
@@ -0,0 +1,42 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+
+package org.apache.poi.hwpf.model;
+
+import java.io.IOException;
+import java.nio.charset.Charset;
+
+import org.apache.poi.util.Internal;
+
+@Internal
+public final class OldComplexFileTable extends ComplexFileTable {
+
+    public OldComplexFileTable(byte[] documentStream, byte[] tableStream,
+                               int offset, int fcMin, Charset charset) throws IOException {
+        super(documentStream, tableStream, offset, fcMin, charset);
+    }
+
+
+    @Override
+    protected TextPieceTable newTextPieceTable(byte[] documentStream,
+                                               byte[] tableStream, int offset,
+                                               int pieceTableSize, int fcMin, Charset charset) {
+        return new OldTextPieceTable(documentStream, tableStream, offset, pieceTableSize, fcMin, charset);
+    }
+
+
+}
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/OldFfn.java b/src/scratchpad/src/org/apache/poi/hwpf/model/OldFfn.java
new file mode 100644
index 000000000..d50ac4ec0
--- /dev/null
+++ b/src/scratchpad/src/org/apache/poi/hwpf/model/OldFfn.java
@@ -0,0 +1,161 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+
+package org.apache.poi.hwpf.model;
+
+import java.nio.charset.Charset;
+
+import org.apache.poi.hwmf.record.HwmfFont;
+import org.apache.poi.util.Internal;
+import org.apache.poi.util.LittleEndian;
+import org.apache.poi.util.POILogFactory;
+import org.apache.poi.util.POILogger;
+import org.apache.poi.util.StringUtil;
+
+/**
+ * Word 6.0 Font information
+ */
+@Internal
+public final class OldFfn {
+
+    private static final POILogger logger = POILogFactory.getLogger(OldFfn.class);
+
+    private byte _chs;// character set identifier
+
+    private final String fontName;
+    private final String altFontName;
+
+    private final int length; //length in bytes for this record
+
+    /**
+     * try to read an OldFfn starting at offset; read no farther than end
+     *
+     * @param buf          buffer from which to read
+     * @param offset       offset at which to start
+     * @param fontTableEnd read no farther than this
+     * @return an OldFfn or null if asked to read beyond end
+     */
+    static OldFfn build(byte[] buf, int offset, int fontTableEnd) {
+        int start = offset;
+        //preliminary bytes
+        if (offset + 6 > fontTableEnd) {
+            return null;
+        }
+        //first byte
+        short fontDescriptionLength = (short) buf[offset];
+        offset += 1;
+        if (offset + fontDescriptionLength > fontTableEnd) {
+            logger.log(POILogger.WARN, "Asked to read beyond font table end. Skipping font");
+            return null;
+        }
+
+        //no idea what these 3 bytes do
+        offset += 3;
+        byte chs = buf[offset];
+        Charset charset = null;
+        HwmfFont.WmfCharset wmfCharset = HwmfFont.WmfCharset.valueOf(chs & 0xff);
+        if (wmfCharset == null) {
+            logger.log(POILogger.WARN, "Couldn't find font for type: " + (chs & 0xff));
+        } else {
+            charset = wmfCharset.getCharset();
+        }
+        charset = charset == null ? StringUtil.WIN_1252 : charset;
+        offset += LittleEndian.BYTE_SIZE;
+        //if this byte here == 7, it _may_ signify existence of
+        //an altername font name
+
+        //not sure what the byte after the _chs does
+        offset += LittleEndian.BYTE_SIZE;
+        int fontNameLength = -1;
+        for (int i = offset; i < fontTableEnd; i++) {
+            if (buf[i] == 0) {
+                fontNameLength = i - offset;
+                break;
+            }
+        }
+        if (fontNameLength == -1) {
+            logger.log(POILogger.WARN, "Couldn't find the zero-byte delimited font name length");
+            return null;
+        }
+        String fontName = new String(buf, offset, fontNameLength, charset);
+        String altFontName = null;
+        int altFontNameLength = -1;
+        offset += fontNameLength + 1;
+        if (offset - start < fontDescriptionLength) {
+            for (int i = offset; i <= start + fontDescriptionLength; i++) {
+                if (buf[i] == 0) {
+                    altFontNameLength = i - offset;
+                    break;
+                }
+            }
+            if (altFontNameLength > -1) {
+                altFontName = new String(buf, offset, altFontNameLength, charset);
+            }
+        }
+        //reset to 0 for length calculation
+        altFontNameLength = (altFontNameLength < 0) ? 0 : altFontNameLength + 1;//add one for zero byte
+
+        int len = LittleEndian.INT_SIZE + LittleEndian.BYTE_SIZE + LittleEndian.BYTE_SIZE +//6 starting bytes
+                fontNameLength + altFontNameLength + 1;//+1 is for the zero byte
+        //this len should == fontDescriptionLength
+
+        return new OldFfn(chs, fontName, altFontName, len);
+
+    }
+
+    public OldFfn(byte charsetIdentifier, String fontName, String altFontName, int length) {
+        this._chs = charsetIdentifier;
+        this.fontName = fontName;
+        this.altFontName = altFontName;
+        this.length = length;
+    }
+
+    public byte getChs() {
+        return _chs;
+    }
+
+    public String getMainFontName() {
+        return fontName;
+    }
+
+    /**
+     * @return altFontName if it exists, null otherwise
+     */
+    public String getAltFontName() {
+        return altFontName;
+    }
+
+
+    /**
+     * @return length in bytes for this record
+     */
+    public int getLength() {
+        return length;
+    }
+
+    @Override
+    public String toString() {
+        return "OldFfn{" +
+                "_chs=" + (_chs & 0xff) +
+                ", fontName='" + fontName + '\'' +
+                ", altFontName='" + altFontName + '\'' +
+                ", length=" + length +
+                '}';
+    }
+}
+
+
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/OldFontTable.java b/src/scratchpad/src/org/apache/poi/hwpf/model/OldFontTable.java
new file mode 100644
index 000000000..dfe1f95e0
--- /dev/null
+++ b/src/scratchpad/src/org/apache/poi/hwpf/model/OldFontTable.java
@@ -0,0 +1,84 @@
+/* ====================================================================
+     Licensed to the Apache Software Foundation (ASF) under one or more
+     contributor license agreements.    See the NOTICE file distributed with
+     this work for additional information regarding copyright ownership.
+     The ASF licenses this file to You under the Apache License, Version 2.0
+     (the "License"); you may not use this file except in compliance with
+     the License.    You may obtain a copy of the License at
+
+             http://www.apache.org/licenses/LICENSE-2.0
+
+     Unless required by applicable law or agreed to in writing, software
+     distributed under the License is distributed on an "AS IS" BASIS,
+     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     See the License for the specific language governing permissions and
+     limitations under the License.
+==================================================================== */
+
+package org.apache.poi.hwpf.model;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.poi.util.Internal;
+import org.apache.poi.util.LittleEndian;
+import org.apache.poi.util.POILogFactory;
+import org.apache.poi.util.POILogger;
+
+/**
+ * Font table for Word 6.0
+ */
+@Internal
+public final class OldFontTable {
+    private final static POILogger _logger = POILogFactory.getLogger(OldFontTable.class);
+
+    // added extra facilitator members
+    // FFN structure containing strings of font names
+    private final OldFfn[] _fontNames;
+
+    public OldFontTable(byte[] buf, int offset, int length) {
+        //length is stored at the index section in the table
+        //and it is recorded in the first short.
+
+
+        List<OldFfn> ffns = new ArrayList<OldFfn>();
+        int fontTableLength = LittleEndian.getShort(buf, offset);
+
+        int endOfTableOffset = offset + length;
+        int startOffset = offset + LittleEndian.SHORT_SIZE;//first short should == length!
+
+        while (true) {
+            OldFfn oldFfn = OldFfn.build(buf, startOffset, endOfTableOffset);
+            if (oldFfn == null) {
+                break;
+            }
+            ffns.add(oldFfn);
+            startOffset += oldFfn.getLength();
+
+        }
+        _fontNames = ffns.toArray(new OldFfn[ffns.size()]);
+    }
+
+
+    public OldFfn[] getFontNames() {
+        return _fontNames;
+    }
+
+
+    public String getMainFont(int chpFtc) {
+        if (chpFtc >= _fontNames.length) {
+            _logger.log(POILogger.INFO, "Mismatch in chpFtc with stringCount");
+            return null;
+        }
+
+        return _fontNames[chpFtc].getMainFontName();
+    }
+
+    @Override
+    public String toString() {
+        return "OldFontTable{" +
+                "_fontNames=" + Arrays.toString(_fontNames) +
+                '}';
+    }
+}
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/OldTextPiece.java b/src/scratchpad/src/org/apache/poi/hwpf/model/OldTextPiece.java
new file mode 100644
index 000000000..c82635bc3
--- /dev/null
+++ b/src/scratchpad/src/org/apache/poi/hwpf/model/OldTextPiece.java
@@ -0,0 +1,120 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+
+package org.apache.poi.hwpf.model;
+
+
+import org.apache.poi.util.Internal;
+import org.apache.poi.util.NotImplemented;
+
+/**
+ * Lightweight representation of a text piece.
+ * Works in the character domain, not the byte domain, so you
+ * need to have turned byte references into character
+ * references before getting here.
+ */
+@Internal
+public class OldTextPiece extends TextPiece {
+
+    private final byte[] rawBytes;
+
+    /**
+     * @param start Beginning offset in main document stream, in characters.
+     * @param end   Ending offset in main document stream, in characters.
+     * @param text  The raw bytes of our text
+     */
+    public OldTextPiece(int start, int end, byte[] text, PieceDescriptor pd) {
+        super(start, end, text, pd);
+        this.rawBytes = text;
+        if (end < start) {
+            throw new IllegalStateException("Told we're of negative size! start=" + start + " end=" + end);
+        }
+    }
+
+    /**
+     * @return nothing, ever. Always throws an UnsupportedOperationException
+     * @throws UnsupportedOperationException
+     */
+    @NotImplemented
+    @Override
+    public boolean isUnicode() {
+        throw new UnsupportedOperationException();
+    }
+
+
+    public StringBuilder getStringBuilder() {
+        return (StringBuilder) _buf;
+    }
+
+    @Override
+    public byte[] getRawBytes() {
+        byte[] buf = new byte[rawBytes.length];
+        System.arraycopy(rawBytes, 0, buf, 0, rawBytes.length);
+        return buf;
+    }
+
+    /**
+     * Returns part of the string.
+     * Works only in characters, not in bytes!
+     *
+     * @param start Local start position, in characters
+     * @param end   Local end position, in characters
+     * @throws UnsupportedOperationException
+     */
+    @Deprecated
+    @NotImplemented
+    public String substring(int start, int end) {
+        throw new UnsupportedOperationException();
+    }
+
+    /**
+     * Not implemented for OldTextPiece.
+     * Always throws UnsupportedOperationException
+     */
+    @Deprecated
+    @NotImplemented
+    public void adjustForDelete(int start, int length) {
+        throw new UnsupportedOperationException();
+    }
+
+    /**
+     * Returns the length, in bytes
+     */
+    public int bytesLength() {
+        return rawBytes.length;
+    }
+
+    @Override
+    public int hashCode() {
+        assert false : "hashCode not designed";
+        return 42; // any arbitrary constant will do
+    }
+
+
+    /**
+     * Returns the character position we start at.
+     */
+    public int getCP() {
+        return getStart();
+    }
+
+    public String toString() {
+        return "OldTextPiece from " + getStart() + " to " + getEnd() + " ("
+                + getPieceDescriptor() + ")";
+    }
+
+}
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/OldTextPieceTable.java b/src/scratchpad/src/org/apache/poi/hwpf/model/OldTextPieceTable.java
new file mode 100644
index 000000000..3fd34ade0
--- /dev/null
+++ b/src/scratchpad/src/org/apache/poi/hwpf/model/OldTextPieceTable.java
@@ -0,0 +1,119 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi.hwpf.model;
+
+import java.nio.charset.Charset;
+import java.util.ArrayList;
+import java.util.Collections;
+
+import org.apache.poi.util.CodePageUtil;
+import org.apache.poi.util.Internal;
+import org.apache.poi.util.POILogFactory;
+import org.apache.poi.util.POILogger;
+
+
+@Internal
+public class OldTextPieceTable extends TextPieceTable {
+
+    private static final POILogger logger = POILogFactory
+            .getLogger(OldTextPieceTable.class);
+
+    public OldTextPieceTable() {
+        super();
+    }
+
+    public OldTextPieceTable(byte[] documentStream, byte[] tableStream,
+                             int offset, int size, int fcMin, Charset charset) {
+        //super(documentStream, tableStream, offset, size, fcMin, charset);
+        // get our plex of PieceDescriptors
+        PlexOfCps pieceTable = new PlexOfCps(tableStream, offset, size,
+                PieceDescriptor.getSizeInBytes());
+
+        int length = pieceTable.length();
+        PieceDescriptor[] pieces = new PieceDescriptor[length];
+
+        // iterate through piece descriptors raw bytes and create
+        // PieceDescriptor objects
+        for (int x = 0; x < length; x++) {
+            GenericPropertyNode node = pieceTable.getProperty(x);
+            pieces[x] = new PieceDescriptor(node.getBytes(), 0, charset);
+        }
+
+        // Figure out the cp of the earliest text piece
+        // Note that text pieces don't have to be stored in order!
+        _cpMin = pieces[0].getFilePosition() - fcMin;
+        for (PieceDescriptor piece : pieces) {
+            int start = piece.getFilePosition() - fcMin;
+            if (start < _cpMin) {
+                _cpMin = start;
+            }
+        }
+
+        // using the PieceDescriptors, build our list of TextPieces.
+        for (int x = 0; x < pieces.length; x++) {
+            int start = pieces[x].getFilePosition();
+            GenericPropertyNode node = pieceTable.getProperty(x);
+
+            // Grab the start and end, which are in characters
+            int nodeStartChars = node.getStart();
+            int nodeEndChars = node.getEnd();
+
+            // What's the relationship between bytes and characters?
+            boolean unicode = pieces[x].isUnicode();
+            int multiple = 1;
+            if (unicode ||
+                    (charset != null && CodePageUtil.VARIABLE_BYTE_CHARSETS.contains(charset))) {
+                multiple = 2;
+            }
+
+            // Figure out the length, in bytes and chars
+            int textSizeChars = (nodeEndChars - nodeStartChars);
+            int textSizeBytes = textSizeChars * multiple;
+
+            // Grab the data that makes up the piece
+            byte[] buf = new byte[textSizeBytes];
+            System.arraycopy(documentStream, start, buf, 0, textSizeBytes);
+
+            // And now build the piece
+            final TextPiece newTextPiece = newTextPiece(nodeStartChars, nodeEndChars, buf,
+                    pieces[x]);
+
+            _textPieces.add(newTextPiece);
+        }
+
+        // In the interest of our sanity, now sort the text pieces
+        // into order, if they're not already
+        Collections.sort(_textPieces);
+        _textPiecesFCOrder = new ArrayList<TextPiece>(_textPieces);
+        Collections.sort(_textPiecesFCOrder, new FCComparator());
+
+    }
+
+    @Override
+    protected TextPiece newTextPiece(int nodeStartChars, int nodeEndChars, byte[] buf, PieceDescriptor pd) {
+        return new OldTextPiece(nodeStartChars, nodeEndChars, buf, pd);
+    }
+
+    @Override
+    protected int getEncodingMultiplier(TextPiece textPiece) {
+        Charset charset = textPiece.getPieceDescriptor().getCharset();
+        if (charset != null && CodePageUtil.VARIABLE_BYTE_CHARSETS.contains(charset)) {
+            return 2;
+        }
+        return 1;
+    }
+}
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/PAPBinTable.java b/src/scratchpad/src/org/apache/poi/hwpf/model/PAPBinTable.java
index 34c29511c..3979009f2 100644
--- a/src/scratchpad/src/org/apache/poi/hwpf/model/PAPBinTable.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/model/PAPBinTable.java
@@ -260,7 +260,7 @@ public class PAPBinTable
             SprmBuffer sprmBuffer = null;
             for ( PAPX papx : papxs )
             {
-                if ( papx.getGrpprl() == null || papx.getGrpprl().length == 0 )
+                if ( papx.getGrpprl() == null || papx.getGrpprl().length <= 2 )
                     continue;
 
                 if ( sprmBuffer == null ) {
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/PieceDescriptor.java b/src/scratchpad/src/org/apache/poi/hwpf/model/PieceDescriptor.java
index a190f1db0..53dcc1745 100644
--- a/src/scratchpad/src/org/apache/poi/hwpf/model/PieceDescriptor.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/model/PieceDescriptor.java
@@ -17,10 +17,13 @@
 
 package org.apache.poi.hwpf.model;
 
+import java.nio.charset.Charset;
+
 import org.apache.poi.util.BitField;
 import org.apache.poi.util.BitFieldFactory;
 import org.apache.poi.util.Internal;
 import org.apache.poi.util.LittleEndian;
+import org.apache.poi.util.StringUtil;
 
 @Internal
 public final class PieceDescriptor
@@ -32,29 +35,51 @@ public final class PieceDescriptor
    private static BitField fCopied = BitFieldFactory.getInstance(0x04);
   int fc;
   PropertyModifier prm;
-  boolean unicode;
+  boolean unicode = false;
+  private final Charset charset;
 
 
-  public PieceDescriptor(byte[] buf, int offset)
-  {
-    descriptor = LittleEndian.getShort(buf, offset);
-    offset += LittleEndian.SHORT_SIZE;
-    fc = LittleEndian.getInt(buf, offset);
-    offset += LittleEndian.INT_SIZE;
-    prm = new PropertyModifier( LittleEndian.getShort(buf, offset));
-
-    // see if this piece uses unicode.
-    if ((fc & 0x40000000) == 0)
-    {
-        unicode = true;
-    }
-    else
-    {
-        unicode = false;
-        fc &= ~(0x40000000);//gives me FC in doc stream
-        fc /= 2;
+    public PieceDescriptor(byte[] buf, int offset) {
+        this(buf, offset, null);
     }
 
+    /**
+     *
+     * This initializer should only be used for HWPFOldDocuments.
+     *
+     * @param buf
+     * @param offset
+     * @param charset which charset to use if this is not unicode
+     */
+  public PieceDescriptor(byte[] buf, int offset, Charset charset) {
+      descriptor = LittleEndian.getShort(buf, offset);
+      offset += LittleEndian.SHORT_SIZE;
+      fc = LittleEndian.getInt(buf, offset);
+      offset += LittleEndian.INT_SIZE;
+      prm = new PropertyModifier(LittleEndian.getShort(buf, offset));
+      if (charset == null) {
+        // see if this piece uses unicode.
+        //From the documentation: If the second most significant bit
+          //is clear, then this indicates the actual file offset of the Unicode character (two bytes). If the
+          //second most significant bit is set, then the actual address of the codepage-1252
+          //compressed version of the Unicode character (one byte), is actually at the offset indicated
+          //by clearing this bit and dividing by two.
+        if ((fc & 0x40000000) == 0) {
+          unicode = true;
+          this.charset = null;
+        } else {
+          unicode = false;
+          fc &= ~(0x40000000);//gives me FC in doc stream
+          fc /= 2;
+          this.charset = StringUtil.WIN_1252;
+        }
+      } else {
+          if (charset == StringUtil.UTF16LE) {
+              unicode = true;
+          }
+          this.charset = charset;
+      }
+
   }
 
   public int getFilePosition()
@@ -72,6 +97,15 @@ public final class PieceDescriptor
     return unicode;
   }
 
+    /**
+     *
+     * @return charset to use if this is not a Unicode PieceDescriptor
+     * this can be <code>null</code>
+     */
+  public Charset getCharset() {
+    return charset;
+  }
+
     public PropertyModifier getPrm()
     {
         return prm;
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/TextPiece.java b/src/scratchpad/src/org/apache/poi/hwpf/model/TextPiece.java
index d432f35b6..2a63bda16 100644
--- a/src/scratchpad/src/org/apache/poi/hwpf/model/TextPiece.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/model/TextPiece.java
@@ -21,6 +21,7 @@ package org.apache.poi.hwpf.model;
 import java.nio.charset.Charset;
 
 import org.apache.poi.util.Internal;
+import org.apache.poi.util.StringUtil;
 
 /**
  * Lightweight representation of a text piece.
@@ -40,7 +41,6 @@ public class TextPiece extends PropertyNode<TextPiece> {
      * @param start Beginning offset in main document stream, in characters.
      * @param end   Ending offset in main document stream, in characters.
      * @param text  The raw bytes of our text
-     * @deprecated Use {@link #TextPiece(int, int, byte[], PieceDescriptor)}
      * instead
      */
     public TextPiece(int start, int end, byte[] text, PieceDescriptor pd,
@@ -72,8 +72,13 @@ public class TextPiece extends PropertyNode<TextPiece> {
      * Create the StringBuilder from the text and unicode flag
      */
     private static StringBuilder buildInitSB(byte[] text, PieceDescriptor pd) {
-        String str = new String(text, Charset.forName(pd.isUnicode() ? "UTF-16LE" : "Cp1252"));
+        byte[] textBuffer = text;
+        if (StringUtil.BIG5.equals(pd.getCharset())) {
+            String txt = new StringBuilder(StringUtil.littleEndianBig5Stream(text, 0, text.length)).toString();
+            return new StringBuilder(txt);
+        }
 
+        String str = new String(textBuffer, 0, textBuffer.length, (pd.isUnicode()) ? StringUtil.UTF16LE : pd.getCharset());
         return new StringBuilder(str);
     }
 
@@ -207,4 +212,5 @@ public class TextPiece extends PropertyNode<TextPiece> {
         return "TextPiece from " + getStart() + " to " + getEnd() + " ("
                 + getPieceDescriptor() + ")";
     }
+
 }
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/TextPieceTable.java b/src/scratchpad/src/org/apache/poi/hwpf/model/TextPieceTable.java
index 0108877c7..bbddd8645 100644
--- a/src/scratchpad/src/org/apache/poi/hwpf/model/TextPieceTable.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/model/TextPieceTable.java
@@ -101,7 +101,7 @@ public class TextPieceTable implements CharIndexTranslator {
             System.arraycopy(documentStream, start, buf, 0, textSizeBytes);
 
             // And now build the piece
-            final TextPiece newTextPiece = new TextPiece(nodeStartChars, nodeEndChars, buf,
+            final TextPiece newTextPiece = newTextPiece(nodeStartChars, nodeEndChars, buf,
                     pieces[x]);
 
             _textPieces.add(newTextPiece);
@@ -114,6 +114,10 @@ public class TextPieceTable implements CharIndexTranslator {
         Collections.sort(_textPiecesFCOrder, new FCComparator());
     }
 
+    protected TextPiece newTextPiece(int nodeStartChars, int nodeEndChars, byte[] buf, PieceDescriptor pd) {
+        return new TextPiece(nodeStartChars, nodeEndChars, buf, pd);
+    }
+
     public void add(TextPiece piece) {
         _textPieces.add(piece);
         _textPiecesFCOrder.add(piece);
@@ -249,7 +253,7 @@ public class TextPieceTable implements CharIndexTranslator {
             if (rangeStartBytes > rangeEndBytes)
                 continue;
 
-            final int encodingMultiplier = textPiece.isUnicode() ? 2 : 1;
+            final int encodingMultiplier = getEncodingMultiplier(textPiece);
 
             final int rangeStartCp = textPiece.getStart()
                     + (rangeStartBytes - tpStart) / encodingMultiplier;
@@ -262,6 +266,10 @@ public class TextPieceTable implements CharIndexTranslator {
         return result.toArray(new int[result.size()][]);
     }
 
+    protected int getEncodingMultiplier(TextPiece textPiece) {
+        return textPiece.isUnicode() ? 2 : 1;
+    }
+
     public int getCpMin() {
         return _cpMin;
     }
@@ -439,7 +447,7 @@ public class TextPieceTable implements CharIndexTranslator {
         return textPlex.toByteArray();
     }
 
-    private static class FCComparator implements Comparator<TextPiece>, Serializable {
+    protected static class FCComparator implements Comparator<TextPiece>, Serializable {
         public int compare(TextPiece textPiece, TextPiece textPiece1) {
             if (textPiece.getPieceDescriptor().fc > textPiece1
                     .getPieceDescriptor().fc) {
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/CharacterRun.java b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/CharacterRun.java
index 730133319..5c2dc4749 100644
--- a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/CharacterRun.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/CharacterRun.java
@@ -18,6 +18,7 @@
 package org.apache.poi.hwpf.usermodel;
 
 import org.apache.poi.hwpf.HWPFDocument;
+import org.apache.poi.hwpf.HWPFOldDocument;
 import org.apache.poi.hwpf.model.CHPX;
 import org.apache.poi.hwpf.model.FFData;
 import org.apache.poi.hwpf.model.Ffn;
@@ -438,6 +439,10 @@ public final class CharacterRun extends Range
 
   public String getFontName()
   {
+    if (_doc instanceof HWPFOldDocument) {
+      return ((HWPFOldDocument) _doc).getOldFontTable().getMainFont(_props.getFtcAscii());
+    }
+
     if (_doc.getFontTable() == null)
       // old word format
       return null;
diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/converter/TestWordToConverterSuite.java b/src/scratchpad/testcases/org/apache/poi/hwpf/converter/TestWordToConverterSuite.java
index f3194bf2f..5a3bc6e38 100644
--- a/src/scratchpad/testcases/org/apache/poi/hwpf/converter/TestWordToConverterSuite.java
+++ b/src/scratchpad/testcases/org/apache/poi/hwpf/converter/TestWordToConverterSuite.java
@@ -16,18 +16,19 @@
 ==================================================================== */
 package org.apache.poi.hwpf.converter;
 
-import java.io.File;
-import java.io.FilenameFilter;
-import java.io.StringWriter;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
+import static org.junit.Assert.assertNotNull;
 
 import javax.xml.transform.OutputKeys;
 import javax.xml.transform.Transformer;
 import javax.xml.transform.TransformerFactory;
 import javax.xml.transform.dom.DOMSource;
 import javax.xml.transform.stream.StreamResult;
+import java.io.File;
+import java.io.FilenameFilter;
+import java.io.StringWriter;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
 
 import org.apache.poi.POIDataSamples;
 import org.apache.poi.hwpf.HWPFDocumentCore;
@@ -36,8 +37,6 @@ import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.Parameterized;
 
-import static org.junit.Assert.assertNotNull;
-
 @RunWith(Parameterized.class)
 public class TestWordToConverterSuite
 {
@@ -45,7 +44,11 @@ public class TestWordToConverterSuite
      * YK: a quick hack to exclude failing documents from the suite.
      */
     private static List<String> failingFiles = Arrays
-            .asList( "ProblemExtracting.doc" );
+            .asList( "ProblemExtracting.doc",
+                    "Bug50955.doc" //basic extraction works,
+                                    // but these extractors modify the document,
+                                    // which is a no-go for this Word 6.0 file
+            );
 
     @Parameterized.Parameters(name="{index}: {0}")
     public static Iterable<Object[]> files() {
diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestBugs.java b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestBugs.java
index b1e02f35c..1ff7abd25 100644
--- a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestBugs.java
+++ b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestBugs.java
@@ -57,6 +57,7 @@ import junit.framework.TestCase;
  *  against HWPF
  */
 public class TestBugs{
+
     private static final POILogger logger = POILogFactory.getLogger(TestBugs.class);
 
     public static void assertEqualsIgnoreNewline(String expected, String actual )
@@ -536,13 +537,6 @@ public class TestBugs{
         hwpfDocument.getPicturesTable().getAllPictures();
     }
 
-    /**
-     * [FAILING] Bug 50955 - error while retrieving the text file
-     */
-    @Test(expected=IllegalStateException.class)
-    public void test50955() throws IOException {
-        getTextOldFile("Bug50955.doc");
-    }
 
     /**
      * [RESOLVED FIXED] Bug 51604 - replace text fails for doc (poi 3.8 beta
diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestHWPFOldDocument.java b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestHWPFOldDocument.java
index 47017dbf7..bfe22605a 100644
--- a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestHWPFOldDocument.java
+++ b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestHWPFOldDocument.java
@@ -17,14 +17,19 @@
 
 package org.apache.poi.hwpf.usermodel;
 
+import static org.apache.poi.POITestCase.assertContains;
 import static org.junit.Assert.assertEquals;
 
 import java.io.IOException;
+import java.nio.charset.Charset;
 
 import org.apache.poi.OldFileFormatException;
+import org.apache.poi.hwmf.record.HwmfFont;
 import org.apache.poi.hwpf.HWPFOldDocument;
 import org.apache.poi.hwpf.HWPFTestCase;
 import org.apache.poi.hwpf.HWPFTestDataSamples;
+import org.apache.poi.hwpf.extractor.Word6Extractor;
+import org.apache.poi.hwpf.model.OldFontTable;
 import org.junit.Test;
 
 /**
@@ -98,7 +103,7 @@ public final class TestHWPFOldDocument extends HWPFTestCase {
         assertEquals(1, doc.getRange().getParagraph(5).numCharacterRuns());
         // Normal, superscript for 4th, normal
         assertEquals(3, doc.getRange().getParagraph(6).numCharacterRuns());
-        
+
         doc.close();
     }
 
@@ -143,4 +148,87 @@ public final class TestHWPFOldDocument extends HWPFTestCase {
                 doc.getRange().getParagraph(1).text());
         doc.close();
     }
+
+    @Test
+    public void testDefaultCodePageEncoding() throws IOException {
+        HWPFOldDocument doc = HWPFTestDataSamples.openOldSampleFile("Bug60942.doc");
+        Word6Extractor ex = new Word6Extractor(doc);
+        String txt = ex.getText();
+        assertContains(txt, "BERTHOD");
+        assertContains(txt, "APPLICOLOR");
+        assertContains(txt, "les meilleurs");
+        assertContains(txt, "GUY LECOLE");
+    }
+
+
+    @Test
+    public void testCodePageBug50955() throws IOException {
+        //windows 1251
+        HWPFOldDocument doc = HWPFTestDataSamples.openOldSampleFile("Bug50955.doc");
+        Word6Extractor ex = new Word6Extractor(doc);
+
+        StringBuilder sb = new StringBuilder();
+        for (String p : ex.getParagraphText()) {
+            sb.append(p);
+        }
+        assertContains(sb.toString(), "\u043F\u0440\u0438\u0432\u0435\u0442");//Greetings!
+    }
+
+    @Test
+    public void testCodePageBug60936() throws IOException {
+        //windows 1250 -- this test file was generated with OpenOffice
+        //see https://bz.apache.org/ooo/show_bug.cgi?id=12445 for the inspiration
+
+
+        HWPFOldDocument doc = HWPFTestDataSamples.openOldSampleFile("Bug60936.doc");
+        Word6Extractor ex = new Word6Extractor(doc);
+        StringBuilder sb = new StringBuilder();
+        for (String p : ex.getParagraphText()) {
+            sb.append(p);
+        }
+        assertContains(sb.toString(), "4 sk\u00f3re a p\u0159ed 7 lety");//Greetings!
+    }
+
+    @Test
+    public void testOldFontTableEncoding() throws IOException {
+        HWPFOldDocument doc = HWPFTestDataSamples.openOldSampleFile("Bug51944.doc");
+        OldFontTable oldFontTable = doc.getOldFontTable();
+        assertEquals(5, oldFontTable.getFontNames().length);
+        assertEquals("\u7D30\u660E\u9AD4", oldFontTable.getFontNames()[0].getMainFontName());
+        assertEquals(HwmfFont.WmfCharset.CHINESEBIG5_CHARSET.getCharset(), Charset.forName("Big5"));
+        assertEquals("Times New Roman", oldFontTable.getFontNames()[1].getMainFontName());
+        doc.close();
+
+    }
+
+    @Test
+    public void testOldFontTableAltName() throws IOException {
+        HWPFOldDocument doc  = HWPFTestDataSamples.openOldSampleFile("Bug60942b.doc");
+        OldFontTable oldFontTable = doc.getOldFontTable();
+        assertEquals(5, oldFontTable.getFontNames().length);
+        assertEquals("Roboto", oldFontTable.getFontNames()[3].getMainFontName());
+        assertEquals("arial", oldFontTable.getFontNames()[3].getAltFontName());
+        assertEquals("Roboto", oldFontTable.getFontNames()[4].getMainFontName());
+        assertEquals("arial", oldFontTable.getFontNames()[4].getAltFontName());
+    }
+
+
+    @Test
+    public void test51944() throws IOException {
+        HWPFOldDocument doc = HWPFTestDataSamples.openOldSampleFile("Bug51944.doc");
+        Word6Extractor ex = new Word6Extractor(doc);
+        StringBuilder sb = new StringBuilder();
+        for (String p : ex.getParagraphText()) {
+            sb.append(p.replaceAll("[\r\n]+", "\n"));
+        }
+        String txt = sb.toString();
+        assertContains(txt, "Post and Fax");
+        assertContains(txt, "also maintain");//this is at a critical juncture
+        assertContains(txt, "which are available for");//this too
+
+        //TODO: figure out why these two aren't passing
+//        assertContains(txt, "\u2019\u0078 block2");//make sure smart quote is extracted correctly
+//        assertContains(txt, "We are able to");//not sure if we can get this easily?
+    }
+
 }
diff --git a/test-data/document/Bug60936.doc b/test-data/document/Bug60936.doc
new file mode 100644
index 0000000000000000000000000000000000000000..e7e397d568b80815761bb5e4e9b08a812b736dbe
GIT binary patch
literal 6656
zcmeHL&u<$=6n<l`8@owdC)g%U16>NFfyPduP%0s${4j`$mPSoz5fIU}y=j)%yU}{1
zI*3Ch#1RS90~aJzi9!(w3GoNe3r9qSR3Rh|;erB42ucoAqA}l_wTWXAdu=!*lArZ?
zX5Y-bdGqGY%$vFXb<gtU&-#C$8hV`kv{nkyt}4Djes|?NLgYi9d26LoslrCKO*_Z}
zx68Wn94eT^0KjwJ0R({%z-67~bO#Nst*QqLoF)rDhg5o!Or$m~VWz!P93E5MjhB|y
z`-PYKjq0@fU$;ObTXA`{{VV%-_4?<0_4?;MpVxmE&<%uvJphgX>H&IzJAfz<1LDA)
zKp(Id=m+)z1HfIt-N1f81?~Y30QUm-0rvxgz(HULcmQ}1I0Os>2_OlK0IcUj<#gQw
z?OZb!5U`>j6U=QW#3wq@HTh3Z@qC>$GPYH)W*zmcWoOl+$-{}1HNTkGO-IH8C#RU7
zvNF;eIG@CaEKeSF<<$L$|GAykt+jRG8X^y|i3s-V<u~5DdF#|%<m0!4bm-vc-yH_;
zHGt1IE<XnF`8Eo$kDmb8&z}d_*Rufo`vSm9{&d^*UCy(tQJ_Mjl?+nOUp$vie{uC2
z!TEah{S}=O*Ft>sl3rcWk2M5LvSKyFYpWsQ@SfyBR?-FyCSeF(zg{Ve`mCB+PAlz-
z_<p(a;UWDN^?46jLCQ8?MapqUtMnJzPq)<Ahlp25zi6p>yHQIPctkDC|6%K@rY`)c
zXVu5lobD`jcSq^{>myf|pZLI3J~Wl9rt*=gTxzL<?cNA$yc6gGVgRrC1hBfYN<VIQ
z-P`(Yuz+t-Q2YgYk<ubFOy`KSfO3YOqZ#b(oAUh#o-x;Pfam>ZSK7)3q3w?yWdS~N
zV^N9@AV>(%5rMM}VKSm)1jdMqM@S&LMQ}UhhPJ*i3W7Lwo{<r?jCCWwmwbqMJ5u_r
zwwQC=c;yHUi!h~WhA7iVT1c7ao@!~3#zt?JE+c~8JC-hJ&Z~(jM>DgUoh@YMY-85Z
zY{*-uL0a37=m|P?JgsY4!#vNY`W}k?M)4vA#V(rhQD%Tt8jo;2l|s{c(NWV@7T3fe
zhQ`AxLSU3eB0}_vC@Ck5f`fR8Pjy%R0hT|ZEjWf{vix3(6}eSd1b8rK1a1nf$HStJ
z0>{m)4uJ}67RC^hvSeH&sb|vGXDvIgISvjAR)aN&mJJI{AuK9~#_^FNS5!H!#oFRM
z6;o1PX@0~gvG`k%KW*gog8Hm}K|O8dHIur)_0iN)K5gZwbC9cy*@l**ZiVxPIj7r(
zBfozA=d0&YE}O6xZ9}(N7zQ79E+k;?^SYC8q^|CrNGgaw)^QwKir|}lL|;O+Ify{+
zmnqDFOOhd@RlZfZv57*jb6+)Fw<a75<7<w#il3nn`EGo5<K5)G$fb7>KM&vfGzDIu
zhW8S97=VnMSPM8i+gZlSTk%ocrfp(@|MUGvdST5Sa<gHwlV0RtA0%i3?@xK$u})!k
zd&Olo@!tz?V}E0r@($~<ZMs}fpx@lao0mrkZnfR$z52fGr7`R4?o@n6Owl67pC?Ve
zy&cCZ+$=0&OO8VZ^4K)Dq#^Xrmq&iL^*VmG)hA_%XJubmN+HciO?3GVST1gu^tUj6
qzJFKlwSj;2+?+j*wa`D`-8USA4M$nOLbLN<{r*;8vbWOz(*NJ;MswHz

literal 0
HcmV?d00001

diff --git a/test-data/document/Bug60942.doc b/test-data/document/Bug60942.doc
new file mode 100644
index 0000000000000000000000000000000000000000..fe64e67bc1a7a9d9ad4d8e1a03a4857faf631a9f
GIT binary patch
literal 20480
zcmeHP3v650dH&D6BClkNre)cxtvJ!8CCgSK(Rx{mlK~P<(T-w0IFWv}UJ;t2Ejkph
z%12gj-9)deH>7qGyTPyxXp$Cdmtbw?bpw_S=%RE1Hgv$+b;HnMMaLm&R~74^WC5nS
z3c2sU51FE=R~*N2%Hw?ZKhOU^Z=Um?U-{0rYqDSbv-<y}BKI)Kl*?C<y&zt|ztswF
zCi)2c9GA=I^BlbmfVb@Kq8@k$bydtUtN^tEJm!^V9FOe=_%{O8fCJbBH~~xnssXkD
zwE*WWySu9g#z@1z6shE+IB0^-V<gw^>M&!h7Ry|$O419@=wCWy5aW9>Kk~b8+P@+6
zd_1Y(1bHEUlBQ^sPSZIfavk3-BneAC)3p{oZDu?U=?NOA0njj|QIm+?4{PO9Z_4Mj
zvsU?4$a|ePOE{y|5y(!FA9c_uW+bffTSr3h9?ZSIocxR5z4v#x-z{3m+S<Rl?EB&F
zh4A};20#T|z%HN>XaaTv&A=X@yr10u5fSKR8EcUx6|dc$7=r;VoSuxtQ|fRuoY0cm
znUs25ON^^M?snoOJRIO4a_PSY>~~v5+&xGqTJ9vlGjHknfy1}I#bpVFJs?xnf=n+R
zbmlWc7Bf<A*Yuoyu0n{6XsDQRP0uwsb5h?nJzSY}_02nS5{U(=PmrV_|Fp<#UT{te
z+AHU!X*mZbnI`9gOXO`%2Z?#fDdw<4WkjE{V9;h{?Ax4iH7rz!E=5r&k593ipXX?v
z5Cu+AtmvZbh%Sn`Yl25SLC?_1fX@Lp;0gNFha3Z;R79!O1E>1cbLaLA5AO?wwsv&x
z!+!^Qx9M&3s6%@^!Juy>I5;|@4sf}krVb~Ps+IszhTZBx-&pXG(E*2^e{}4yXJjxi
z%o)Af<WwkrUI}R7=xl0M_4xfmgWl1h(J>?M$Temd^bPsEqy9nP(J_=c6#As9#;+;u
z-D>Xv^}#N+vt8}pZ{+uT4*LSF>Pgi*I_w`C45(h;(2%dqJBV`r(Ah|`RgK2OvGjN}
zepXFQL{Z0hMCs7$1wlg|)$bV#vP>3~&?c3`>GLW}AMz;*_l(0ati_YjNIKC9e;^%A
zMbxYs*HVdynwSmq{uI{YsZcnjre<TaQxjS|`iXR;RXw*S6jfspbt)E$CZh>;Ji-~z
zL=%&dgjGHgACLT6+Y?$ksg9@BjsxmAI8L4h>#2kmo(M(bkkzq&qP<-m55<$}6IxP@
zY2i@%lg#dzb~c7sx5M#h1W8~rm6%PUMv(-I)-WP0G?GG<6SL`fREwiRNv;OEB#EFd
z7KPfzqoK2LEt!gj)nqCWO3j|-Mu0P>1acWXLSO{uXxgH<Xhav0(4q;%FZL8_9orL1
zCew*fJgjG#gj}v#ly`JA0>Q~hG!{d@asv!kb@=E>v-1(r(b1|V(h>D=8u_&JxZ0-n
z9B6OvQio1r-nFU0*_c~>NNw+S_jbB_9(1?!e~9P#QW)Lk?&)y^+#a_YN~__}R5TTe
zajp0F^r-FV*+XLiw>swbx<UM*1m=i(Q0>B0;|#sM9bC71`hyLB^lA5(|K9VJCvAWH
zr0s=({Nj_gOM)&7`p!D<vBs|tpD~E|_pjJ-diFEVz4f!viOqj<rIH@l^XLE8j<{<$
zXz~4r_om+m_}uk<fX{z?8RmOa6DZ%69s%WhQw)^vO;3aJz3DGN`R?>RP!_!a%6BN$
z<UIh&XT%9mzE@oWE!?w9`T0z>7w|rGYB(@@=+L0ock1}y$nnv!fm6ey16a#}Q-?e!
zc+}hjqe0{}oQ(qdsdth>`q_UFr{6RBnV4fz6oo%-WiCPYq6Z4=QzFYvQGBk*qD}9W
zTd(~0O0LD{!y=#O5YId7H_2*WA>}+gC;f@YnMfiL8CSjOWJ<$!dL!3b^7JG7#lQRd
z<9si<QF5t|^Q`9jkzqAHeBb0d#Y&FzyuMNfr1h_o&dFBWOB|OMOS7zH8*S!dKD<Oa
z&2TBgd{%R-?Tg?4a)kEJRO{PUq>veyw?YwQ-BSybo;BC*<-*8fKQ_sO%+;g^d4q#n
ziL!1nX69Ro^4|F=<m0{bA}EKg{5df3ApCoXuLJ$6$#1>14wIalxt%9_5#{}>8}a`N
zx&iW-W<hz(`f#rgf_@(KEGXBfl(YK5su%x)*f1-kn`&HK+A3jDOM(@)I9t?;FXqEN
z#Lh}zju!JN4s%bHa>u}*e+@ql%D<Rax#g%O-{QyNb0}xUmy&Voan@eP=Zo_ByL|pG
zpTFPT`5PNo2gx#Bloo`uF+V37pdKL67lv2%WGF8|yCC|EZ*$+gP{f?9xH2TN;Vezh
z2?d#^_1vs_Xvxn(LE!yOlJC!Fh0@kA?^5!tpb&k{|ChxFu?=Fz1@6uka1)9m=DYGU
z)p-f(Pze?JZCBqM%E(#alykcp=Dk_s3QfB}pnTv8Fxd^!vd@_#s5)e>pi>Y^HfGTv
zmvcdak^*9!IT_lEjWaGe??8SuO)s0D5e+t^3n!#zT}rNM4kBi3vdC=_4pEhp>2cR|
zR>&2s6H%Ct)Yq6J^bmiz5n^`(lqnU=PFj#y<)Y+l3OHnhQZX-Q6zEPW#D>0RE}&$j
z`3fTm+M1T>Y?W~3Gqo4?$vIg8=r^PpA*2RDO<?D&$VdyjoeS~;8X_;)XDa6*fRkh}
z=Ef=ZJf8We3aAEZfsX*&fgL~#a6ixqbOGJKe&9ji2yh%Y0gM9?U;;=1zYaVDya4<u
za1EFNUIktQvcS)P>j0Eb^bWums2X4!PzShxT|gsn)kgFM;91}~;CbMuKnu<%4*+~l
zc?4LX>+{$Dm-ydx_Uhk7j}hbq#Lv;({}BE6t^a;}P&r6xL2`I2*^XYow=yjT(2;P`
zu?r*}y`abc5+5(o_dNa^fQ^7P4|on(<J|*)FJO&3k9iod#{CTZX8~*cKL!6&z|+7L
z;41KWz?u_0A6@~hIq^;S{}!<3M&Z1`q^3OZa}raHFG=%E9AYVXo5b6jwhEm%#hWBv
ztaXSV8bptDl!`1aaJQ5z*)WVOG!5Iu4)O9K`YZ7_l_dRj{$b}-qkf9rj=6xRT^ST-
zTAR#^%_i4;ZGl?2Z>_agyzZKiZ8y*i>;d)yoj@1R4SWmu2cQXSaW~Kmd=|I_Tn0V|
zJOw-rTmg6;ms-R37p!B97mwbYF{C<iik~iyC8zy_=YUiEh>2UgVa$m25Q(SIVu7V0
zeR{n?B_qwWrGyt1naqB`b=fNZU5`Vg?#$5lmPDFqW&tB<O<&1LHjLtC{-QgbHWJNz
z&*A$dhH`Hej@-r=<6AK9%gm0(O{C(KF6fA^nIeR!|INGvY9G-Olc5-MgI3^IBnnkm
zgtsGble>>R{cq*Ji2F~CUyDV<P@IOKYMnh3N~a1lH<Q{g=q=pEtNqc)III7rpdN;<
z%t=nn*ni%lH42DpqK3!NM$tryFW#2=Nh-oO2sBydTrby5>A5~iIZe<O@lmpeL(oJ#
zfEVl<<Z*1Q*(qw@$kokJb$4Fy9faq&pnJHF;Nk{Z>AMrWYs4<GlPbMB?~ZQt!xk_K
z<MMY*m)t3ik^M~U$Ki<x9fVFqmwb<?{b8=O*z8II^|X3wdF)H%5%sibBt1Es&=z|L
zw<ulanD{ttm`bCE6B@MEXz{iJxdkoerrv1rUs`VYDm(`M9pbcjjH++&S37Z+uA`g!
zbDi9cUNgokf$_4jgf`p=VL_z#TCY^lYemeLud<$E@UGX#>$lKz_CV@<EYg9|+HQ{4
z3BBj8iW8)ypdfGS=w!(|R+WqjuHweP`4i$xV$9&2YD1BSsh(;AvNZ*kHfD8})T-C^
z>!Oux!NK5|kF^{3V6it>Rqm;(#PV*bs#NIcg^yhbtGrTp4M?DssVE*pI{bssitI$&
zYF-qz$8!~WP8GRc#lljl+Q=2ei$HP;<?c26i2ILo)SG-d!AHV+mgr*_?nBx?iMPak
zX6eqw(hYh#cTy`>m|Z`I8JcQAc2^#2&q~uh`iFOuOTbecLMcAC;U@$>{w3m*oDH0K
zn`r?sR66h4hOPN+nRB$|X0@XVjrXFW+jDem&^I!2N`KLUx*Mu)@4Cu(b@jM6-|zDW
z@V=q?{6lT~_qVsxkU!Y*5c&L%_>beeG!2zp;&7Id2k0s}dwDOeqCec&T{({BsL%-s
z_XGWVP_khww;acw72H~O|0VNF!)J6q_t!pfc~sQVAZ9{o_uUE^dDvOX7<43fGDl++
zIBhvMIKut?VQbX}eHO_XK~JM=7|=pxD8oK>Pl9@ATOGg31o0x4R7WDes*Y)sp*U3|
zR-(Z9$<tblHa2q>Pa=vRL#Le+j^iPdA2XA*3zJutFKs_o{TkT4sQ2}MoZk5r1`9D{
zHN*EhU<^eCeVUV&aVI6H4fm|^8K=@xUx8VPd0rywhEzi{{Jz?MUE6W_9q9Xapb{CZ
zc6Psr_Ft?&9yU)>ciR4|@a@X---_K8pGDcGlzA|4M%C9L?_3;StNm9!@*{iM-R2&+
z6YsygDQ1nG5x!e@g<W*(+}6+GZkEqKNxUw3JH|PT@02)y8TJkQdc*I*{LWTr|IPW!
z?!(aotG&Ok_O{-SxpDExkjB63@RoEuxA+LOlH)wDuap64{ha<UUweC5K}deD-`~4f
z<}!}!yqa6>{oOji-KqD-J8l1ax7c6FS?T9MUY-N}w#_N$l}yzuQVvftc`?(h&%3>t
z3~M~Nmz*{{ci(UOoA>*!xt3d*^s?DK_k*?qtT)*Q@U@t)v|qsCfsIg}1D*%=VkKIS
zRS&?=>InRb>FZP2S;c^hhP@Htb|LJKn&D<MJfw%o&d*$M_7OA8s%j4NlM~bDO}f`i
z=jSJmXZ1Bxeu}z{SIPrn<60@}))dVU^*ZnYyi%6`nD8z-8r-n|+KRt?DsWf~tc|no
zsRb$e(&O;u!pLGjlr8VZ{_7u21y*be(P>O>D_ls~-s=a*$LG=?gL2qP|7HA9st4hD
z#PeNrlWBjoUiwv(!_Go4ndJ*9-`~H5c((g`9(G<#pMWslzpp}m5cEf&Z2$EP;=ch}
z>?|uNt6qHi##;_#S}_(aE@{PElqImU!WL(XTJc7%8(;1rc2@dww3ttEn0u;}Td)0>
zCEw!5;b#l#S4zgM$KC1kSNZQR66L?Y_z?X4MMq~x=Yif8|3sr~|5djCTD0-G9rj;*
zu;;@(AK*uU$ANce6L$9-hON>5tCufHW&1BSD=OQ6eGu%wc-{U&+JKoBVXK{J-(&l)
zrn3E4sU9!ef9dvRW&5vElVdTiZ2!gfVcZ>t{TJ`umHMmw^7dcWJ0dF|*aTuf&;vXO
z909@r>!Hp76F>@JTd^y^Rp1%m1>hCnRp6VzYd{vb4!i^809K6F0Cm7Fpb=oppDVys
zKvxmNTK6UV-z?Z1*swR|Fa7{TH$i4U$Mf0XgOtj)&s=A>{^h#q_2)M04vkx4VaKOn
syzJtf$B(=`M$22+JzL6(=(bqcl`u_OUQXF6Z_y6X9OOlEtG0&!53+H`f&c&j

literal 0
HcmV?d00001

diff --git a/test-data/document/Bug60942b.doc b/test-data/document/Bug60942b.doc
new file mode 100644
index 0000000000000000000000000000000000000000..7ca3b9839dd0e9327eaa65af7d16ea13ad7b56c3
GIT binary patch
literal 6144
zcmeHL&2Jk;6o2EaYrFY!-8i^up-U-Fp@|bwR8>Ntq&27&EtTTXARrQJdqdpDyOpyM
zDdK>HI3gA0zy+zQL@8fFLi_{0aBD?{R3Q)t_YjFH9NHj+3N+^TW_ROFXzYzaqE$TB
z&olex<ITKzGxM9d`t6qT`LBAe)2i?kwa`)}LYq8tfpo<EjuHisX4z7uQgOvdHp6{n
zfjdrLc@7m!Vi4dt4*_9-<(r-OJ{nfDt3E9-LMDC|sq`!vh)Z+^v+X~0!(+Uv{M7XB
z7k<juE7R0pwLm>valY67?*8pv|D1N$KkxUv{#$`IpdIJ{IsqI3v;~L(TY)Yh4s-(#
z0103l&;x7-b^s3o4*@#?6?hof1v~=m1|9{HKrgTdcno+P*bDRl{Q&oGz=^9PZ>Act
zfPghyFu`1xB7C9~U6TLw9M9LWLat;^n-i8gW|s2m{?uT9+B`K=)D27e#jy;_(`HV3
z1E*8?kol?ob~`oy;eWQ$nx)pxTmthDcWHwCy8Q0B#oNaxqo2PYroFvi|1b#ITL7PL
zoPPjd{~iL^$BzQ+=Pv>5>v@3v{WQRxym80&T~2dbLx3BG+yr7yzx`w9-0si56P&IU
ze!8Gj|D^~Yy`*~!wqV@=zb+H5Y9_Czx&eo`%7b*{Iu%st;q~h`Jc;sbvuB*xt#=sT
zWA4ZQ5TAqo(T4N}z<7o9i-xK<D>Y<+*6-&R|GYV`UN0{!l+~ZrTl2sCr7p}bs_k9$
z(ba(q<)=S3l#7P)iJ^RID02<zxTh;&jSC<Ign@RT1Ly>zKo<}P5&-W9g8==u;QZ*u
zJw3J!*}1@spsg>{E0h(vK6;Jl1ieV(%3Wlb$~J5|a$bMmkha=rgT`STq<sRU8QhsE
zv*^I78^)>gykyjp>qc)?0^q!%v_7HDOj$Nx*++dSo27B0T!OTSa*7Z4)gej4LyML3
zV9?u!v(uXOW`D-gjJ#ILPv<5}g$YY5p}lHI($Y?_Cy}JkVO`4?jMw>8@1*!m>Yk;r
z*hJ$2%IzSP4o110N|8~0)>5-(9@oS$MghK$lFS_jqC&((7b!;z(-!y=-wf>bcX9iN
zw9{6>G`Rg3#b>!zhY0duP6*r-xF1i51O<;6c^wU6y&&K2Df1mgt`di*V$JYA2p;CF
zo)&P9c=uV9A1xI1Y4rvD4RypUY6i7I8lcRXV%D6Z)+84hE)}#XiYaHs<<N+kH7%30
zWf83u9fV>LelJ1v4YW<l4xRiXbJ6aYO(X3`6Tax!^g*F_SdDy2sY+#4NPE#`Pgsk;
zLK_9HUAgvQYI}6<1Mt_r+h3+33$BvA3K@DQc?+w6U$I>zS6xev?B(!Z-v8wgSKT3&
z6?qcU0!*+G1RcWrLlGSNIK2C$EnLTc45Q`oa+~rF<FjtoDJP(JY9;gb*@9hcGqii}
zU4ANae_gH}qzuhq{6*5_+shHW!c4#-B`L9}fHW4(nqd$0^W}lx?fj0PZS{rI;+WKx
zr!?Z6?1?Vlft>8RQBMQo=lhp?uL=G`^ZM*@paK0U=wGo7)*WT-0_&ZB@B2&bmi^`a
H8~uL++M0I+

literal 0
HcmV?d00001