001package org.gbif.utils.file;
002
003import java.io.IOException;
004import java.io.InputStream;
005
006/**
007 * A wrapper for an input stream that removes UTF8 BOM sequences at the start of the file.
008 * UTF8 BOMs can cause XML parser to fall over with a "Content is not allowed in prolog" Exception.
009 * See:
010 * <ul>
011 *  <li>http://bugs.java.com/bugdatabase/view_bug.do?bug_id=4508058</li>
012 *  <li>https://de.wikipedia.org/wiki/Byte_Order_Mark</li>
013 * </ul>
014 *
015 * @deprecated use org.apache.commons.io.input.BOMInputStream instead
016 */
017@Deprecated
018public class BomSafeInputStreamWrapper extends InputStream {
019
020  private static final int BUFFER_SIZE = 4;
021  private final InputStream stream;
022  private final byte[] buffer = new byte[BUFFER_SIZE];
023  private int pointer = 0;
024
025  public BomSafeInputStreamWrapper(InputStream stream) {
026    this.stream = stream;
027    skipBom();
028  }
029
030  @Override
031  public int read() throws IOException {
032    if (pointer < BUFFER_SIZE) {
033      pointer++;
034      return buffer[pointer - 1];
035    } else {
036      return stream.read();
037    }
038  }
039
040  private void skipBom() {
041    try {
042      stream.read(buffer, 0, BUFFER_SIZE);
043      if (CharsetDetection.hasUTF16BEBom(buffer) || CharsetDetection.hasUTF16LEBom(buffer)) {
044        // SQX Parser handles UTF16 BOMs fine
045        pointer = 2;
046      } else if (CharsetDetection.hasUTF8Bom(buffer)) {
047        pointer = 3;
048      }
049    } catch (IOException ignored) {
050    }
051  }
052
053}