001/*
002 * Licensed under the Apache License, Version 2.0 (the "License");
003 * you may not use this file except in compliance with the License.
004 * You may obtain a copy of the License at
005 *
006 *     http://www.apache.org/licenses/LICENSE-2.0
007 *
008 * Unless required by applicable law or agreed to in writing, software
009 * distributed under the License is distributed on an "AS IS" BASIS,
010 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
011 * See the License for the specific language governing permissions and
012 * limitations under the License.
013 */
014package org.gbif.utils.file;
015
016import java.io.IOException;
017import java.io.InputStream;
018
019/**
020 * A wrapper for an input stream that removes UTF8 BOM sequences at the start of the file.
021 * UTF8 BOMs can cause XML parser to fall over with a "Content is not allowed in prolog" Exception.
022 * See:
023 * <ul>
024 *  <li>http://bugs.java.com/bugdatabase/view_bug.do?bug_id=4508058</li>
025 *  <li>https://de.wikipedia.org/wiki/Byte_Order_Mark</li>
026 * </ul>
027 *
028 * @deprecated use org.apache.commons.io.input.BOMInputStream instead
029 */
030@Deprecated
031public class BomSafeInputStreamWrapper extends InputStream {
032
033  private static final int BUFFER_SIZE = 4;
034  private final InputStream stream;
035  private final byte[] buffer = new byte[BUFFER_SIZE];
036  private int pointer = 0;
037
038  public BomSafeInputStreamWrapper(InputStream stream) {
039    this.stream = stream;
040    skipBom();
041  }
042
043  @Override
044  public int read() throws IOException {
045    if (pointer < BUFFER_SIZE) {
046      pointer++;
047      return buffer[pointer - 1];
048    } else {
049      return stream.read();
050    }
051  }
052
053  private void skipBom() {
054    try {
055      stream.read(buffer, 0, BUFFER_SIZE);
056      if (CharsetDetection.hasUTF16BEBom(buffer) || CharsetDetection.hasUTF16LEBom(buffer)) {
057        // SQX Parser handles UTF16 BOMs fine
058        pointer = 2;
059      } else if (CharsetDetection.hasUTF8Bom(buffer)) {
060        pointer = 3;
061      }
062    } catch (IOException ignored) {
063    }
064  }
065}