001/* 002 * Licensed under the Apache License, Version 2.0 (the "License"); 003 * you may not use this file except in compliance with the License. 004 * You may obtain a copy of the License at 005 * 006 * http://www.apache.org/licenses/LICENSE-2.0 007 * 008 * Unless required by applicable law or agreed to in writing, software 009 * distributed under the License is distributed on an "AS IS" BASIS, 010 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 011 * See the License for the specific language governing permissions and 012 * limitations under the License. 013 */ 014package org.gbif.utils.file; 015 016import java.io.IOException; 017import java.io.InputStream; 018 019/** 020 * A wrapper for an input stream that removes UTF8 BOM sequences at the start of the file. 021 * UTF8 BOMs can cause XML parser to fall over with a "Content is not allowed in prolog" Exception. 022 * See: 023 * <ul> 024 * <li>http://bugs.java.com/bugdatabase/view_bug.do?bug_id=4508058</li> 025 * <li>https://de.wikipedia.org/wiki/Byte_Order_Mark</li> 026 * </ul> 027 * 028 * @deprecated use org.apache.commons.io.input.BOMInputStream instead 029 */ 030@Deprecated 031public class BomSafeInputStreamWrapper extends InputStream { 032 033 private static final int BUFFER_SIZE = 4; 034 private final InputStream stream; 035 private final byte[] buffer = new byte[BUFFER_SIZE]; 036 private int pointer = 0; 037 038 public BomSafeInputStreamWrapper(InputStream stream) { 039 this.stream = stream; 040 skipBom(); 041 } 042 043 @Override 044 public int read() throws IOException { 045 if (pointer < BUFFER_SIZE) { 046 pointer++; 047 return buffer[pointer - 1]; 048 } else { 049 return stream.read(); 050 } 051 } 052 053 private void skipBom() { 054 try { 055 stream.read(buffer, 0, BUFFER_SIZE); 056 if (CharsetDetection.hasUTF16BEBom(buffer) || CharsetDetection.hasUTF16LEBom(buffer)) { 057 // SQX Parser handles UTF16 BOMs fine 058 pointer = 2; 059 } else if (CharsetDetection.hasUTF8Bom(buffer)) { 060 pointer = 3; 061 } 062 } catch (IOException ignored) { 063 } 064 } 065}