204 lines
6.9 KiB
Java
204 lines
6.9 KiB
Java
/* ====================================================================
|
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
|
contributor license agreements. See the NOTICE file distributed with
|
|
this work for additional information regarding copyright ownership.
|
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
|
(the "License"); you may not use this file except in compliance with
|
|
the License. You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
==================================================================== */
|
|
|
|
package org.apache.poi.util;
|
|
|
|
import java.io.FilterInputStream;
|
|
import java.io.IOException;
|
|
import java.io.InputStream;
|
|
import java.nio.charset.Charset;
|
|
import java.util.Arrays;
|
|
|
|
/**
|
|
* Simple FilterInputStream that can replace occurrences of bytes with something else.
|
|
*
|
|
* This has been taken from inbot-utils. (MIT licensed)
|
|
*
|
|
* @see <a href="https://github.com/Inbot/inbot-utils">inbot-utils</a>
|
|
*/
|
|
@Internal
|
|
public class ReplacingInputStream extends FilterInputStream {
|
|
|
|
// while matching, this is where the bytes go.
|
|
final int[] buf;
|
|
private int matchedIndex=0;
|
|
private int unbufferIndex=0;
|
|
private int replacedIndex=0;
|
|
|
|
private final byte[] pattern;
|
|
private final byte[] replacement;
|
|
private State state=State.NOT_MATCHED;
|
|
|
|
// simple state machine for keeping track of what we are doing
|
|
private enum State {
|
|
NOT_MATCHED,
|
|
MATCHING,
|
|
REPLACING,
|
|
UNBUFFER
|
|
}
|
|
|
|
private static final Charset UTF8 = Charset.forName("UTF-8");
|
|
|
|
/**
|
|
* Replace occurrences of pattern in the input. Note: input is assumed to be UTF-8 encoded. If not the case use byte[] based pattern and replacement.
|
|
* @param in input
|
|
* @param pattern pattern to replace.
|
|
* @param replacement the replacement or null
|
|
*/
|
|
public ReplacingInputStream(InputStream in, String pattern, String replacement) {
|
|
this(in, pattern.getBytes(UTF8), replacement==null ? null : replacement.getBytes(UTF8));
|
|
}
|
|
|
|
/**
|
|
* Replace occurrences of pattern in the input.<p>
|
|
*
|
|
* If you want to normalize line endings DOS/MAC (\n\r | \r) to UNIX (\n), you can call the following:<br/>
|
|
* {@code new ReplacingInputStream(new ReplacingInputStream(is, "\n\r", "\n"), "\r", "\n")}
|
|
*
|
|
* @param in input
|
|
* @param pattern pattern to replace
|
|
* @param replacement the replacement or null
|
|
*/
|
|
public ReplacingInputStream(InputStream in, byte[] pattern, byte[] replacement) {
|
|
super(in);
|
|
if (pattern == null || pattern.length == 0) {
|
|
throw new IllegalArgumentException("pattern length should be > 0");
|
|
}
|
|
this.pattern = pattern;
|
|
this.replacement = replacement;
|
|
// we will never match more than the pattern length
|
|
buf = new int[pattern.length];
|
|
}
|
|
|
|
@Override
|
|
public int read(byte[] b, int off, int len) throws IOException {
|
|
// copy of parent logic; we need to call our own read() instead of super.read(), which delegates instead of calling our read
|
|
if (b == null) {
|
|
throw new NullPointerException();
|
|
} else if (off < 0 || len < 0 || len > b.length - off) {
|
|
throw new IndexOutOfBoundsException();
|
|
} else if (len == 0) {
|
|
return 0;
|
|
}
|
|
|
|
int c = read();
|
|
if (c == -1) {
|
|
return -1;
|
|
}
|
|
b[off] = (byte)c;
|
|
|
|
int i = 1;
|
|
for (; i < len ; i++) {
|
|
c = read();
|
|
if (c == -1) {
|
|
break;
|
|
}
|
|
b[off + i] = (byte)c;
|
|
}
|
|
return i;
|
|
|
|
}
|
|
|
|
@Override
|
|
public int read(byte[] b) throws IOException {
|
|
// call our own read
|
|
return read(b, 0, b.length);
|
|
}
|
|
|
|
@Override
|
|
public int read() throws IOException {
|
|
// use a simple state machine to figure out what we are doing
|
|
int next;
|
|
switch (state) {
|
|
default:
|
|
case NOT_MATCHED:
|
|
// we are not currently matching, replacing, or unbuffering
|
|
next=super.read();
|
|
if (pattern[0] != next) {
|
|
return next;
|
|
}
|
|
|
|
// clear whatever was there
|
|
Arrays.fill(buf, 0);
|
|
// make sure we start at 0
|
|
matchedIndex=0;
|
|
|
|
buf[matchedIndex++]=next;
|
|
if (pattern.length == 1) {
|
|
// edge-case when the pattern length is 1 we go straight to replacing
|
|
state=State.REPLACING;
|
|
// reset replace counter
|
|
replacedIndex=0;
|
|
} else {
|
|
// pattern of length 1
|
|
state=State.MATCHING;
|
|
}
|
|
// recurse to continue matching
|
|
return read();
|
|
|
|
case MATCHING:
|
|
// the previous bytes matched part of the pattern
|
|
next=super.read();
|
|
if (pattern[matchedIndex]==next) {
|
|
buf[matchedIndex++]=next;
|
|
if (matchedIndex==pattern.length) {
|
|
// we've found a full match!
|
|
if (replacement==null || replacement.length==0) {
|
|
// the replacement is empty, go straight to NOT_MATCHED
|
|
state=State.NOT_MATCHED;
|
|
matchedIndex=0;
|
|
} else {
|
|
// start replacing
|
|
state=State.REPLACING;
|
|
replacedIndex=0;
|
|
}
|
|
}
|
|
} else {
|
|
// mismatch -> unbuffer
|
|
buf[matchedIndex++]=next;
|
|
state=State.UNBUFFER;
|
|
unbufferIndex=0;
|
|
}
|
|
return read();
|
|
|
|
case REPLACING:
|
|
// we've fully matched the pattern and are returning bytes from the replacement
|
|
next=replacement[replacedIndex++];
|
|
if (replacedIndex==replacement.length) {
|
|
state=State.NOT_MATCHED;
|
|
replacedIndex=0;
|
|
}
|
|
return next;
|
|
|
|
case UNBUFFER:
|
|
// we partially matched the pattern before encountering a non matching byte
|
|
// we need to serve up the buffered bytes before we go back to NOT_MATCHED
|
|
next=buf[unbufferIndex++];
|
|
if (unbufferIndex==matchedIndex) {
|
|
state=State.NOT_MATCHED;
|
|
matchedIndex=0;
|
|
}
|
|
return next;
|
|
}
|
|
}
|
|
|
|
@Override
|
|
public String toString() {
|
|
return state.name() + " " + matchedIndex + " " + replacedIndex + " " + unbufferIndex;
|
|
}
|
|
|
|
} |