001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.compressors.lz4; 020 021import java.io.IOException; 022import java.io.InputStream; 023import java.util.Arrays; 024 025import org.apache.commons.compress.compressors.CompressorInputStream; 026import org.apache.commons.compress.utils.BoundedInputStream; 027import org.apache.commons.compress.utils.ByteUtils; 028import org.apache.commons.compress.utils.ChecksumCalculatingInputStream; 029import org.apache.commons.compress.utils.IOUtils; 030 031/** 032 * CompressorInputStream for the LZ4 frame format. 033 * 034 * <p>Based on the "spec" in the version "1.5.1 (31/03/2015)"</p> 035 * 036 * @see <a href="http://lz4.github.io/lz4/lz4_Frame_format.html">LZ4 Frame Format Description</a> 037 * @since 1.14 038 * @NotThreadSafe 039 */ 040public class FramedLZ4CompressorInputStream extends CompressorInputStream { 041 042 // used by FramedLZ4CompressorOutputStream as well 043 static final byte[] LZ4_SIGNATURE = new byte[] { //NOSONAR 044 4, 0x22, 0x4d, 0x18 045 }; 046 private static final byte[] SKIPPABLE_FRAME_TRAILER = new byte[] { 047 0x2a, 0x4d, 0x18 048 }; 049 private static final byte SKIPPABLE_FRAME_PREFIX_BYTE_MASK = 0x50; 050 051 static final int VERSION_MASK = 0xC0; 052 static final int SUPPORTED_VERSION = 0x40; 053 static final int BLOCK_INDEPENDENCE_MASK = 0x20; 054 static final int BLOCK_CHECKSUM_MASK = 0x10; 055 static final int CONTENT_SIZE_MASK = 0x08; 056 static final int CONTENT_CHECKSUM_MASK = 0x04; 057 static final int BLOCK_MAX_SIZE_MASK = 0x70; 058 static final int UNCOMPRESSED_FLAG_MASK = 0x80000000; 059 060 // used in no-arg read method 061 private final byte[] oneByte = new byte[1]; 062 063 private final ByteUtils.ByteSupplier supplier = new ByteUtils.ByteSupplier() { 064 @Override 065 public int getAsByte() throws IOException { 066 return readOneByte(); 067 } 068 }; 069 070 private final InputStream in; 071 private final boolean decompressConcatenated; 072 073 private boolean expectBlockChecksum; 074 private boolean expectBlockDependency; 075 private boolean expectContentSize; 076 private boolean expectContentChecksum; 077 078 private InputStream currentBlock; 079 private boolean endReached, inUncompressed; 080 081 // used for frame header checksum and content checksum, if present 082 private final XXHash32 contentHash = new XXHash32(); 083 084 // used for block checksum, if present 085 private final XXHash32 blockHash = new XXHash32(); 086 087 // only created if the frame doesn't set the block independence flag 088 private byte[] blockDependencyBuffer; 089 090 /** 091 * Creates a new input stream that decompresses streams compressed 092 * using the LZ4 frame format and stops after decompressing the 093 * first frame. 094 * @param in the InputStream from which to read the compressed data 095 * @throws IOException if reading fails 096 */ 097 public FramedLZ4CompressorInputStream(InputStream in) throws IOException { 098 this(in, false); 099 } 100 101 /** 102 * Creates a new input stream that decompresses streams compressed 103 * using the LZ4 frame format. 104 * @param in the InputStream from which to read the compressed data 105 * @param decompressConcatenated if true, decompress until the end 106 * of the input; if false, stop after the first LZ4 frame 107 * and leave the input position to point to the next byte 108 * after the frame stream 109 * @throws IOException if reading fails 110 */ 111 public FramedLZ4CompressorInputStream(InputStream in, boolean decompressConcatenated) throws IOException { 112 this.in = in; 113 this.decompressConcatenated = decompressConcatenated; 114 init(true); 115 } 116 117 /** {@inheritDoc} */ 118 @Override 119 public int read() throws IOException { 120 return read(oneByte, 0, 1) == -1 ? -1 : oneByte[0] & 0xFF; 121 } 122 123 /** {@inheritDoc} */ 124 @Override 125 public void close() throws IOException { 126 if (currentBlock != null) { 127 currentBlock.close(); 128 currentBlock = null; 129 } 130 in.close(); 131 } 132 133 /** {@inheritDoc} */ 134 @Override 135 public int read(final byte[] b, final int off, final int len) throws IOException { 136 if (endReached) { 137 return -1; 138 } 139 int r = readOnce(b, off, len); 140 if (r == -1) { 141 nextBlock(); 142 if (!endReached) { 143 r = readOnce(b, off, len); 144 } 145 } 146 if (r != -1) { 147 if (expectBlockDependency) { 148 appendToBlockDependencyBuffer(b, off, r); 149 } 150 if (expectContentChecksum) { 151 contentHash.update(b, off, r); 152 } 153 } 154 return r; 155 } 156 157 private void init(boolean firstFrame) throws IOException { 158 if (readSignature(firstFrame)) { 159 readFrameDescriptor(); 160 nextBlock(); 161 } 162 } 163 164 private boolean readSignature(boolean firstFrame) throws IOException { 165 String garbageMessage = firstFrame ? "Not a LZ4 frame stream" : "LZ4 frame stream followed by garbage"; 166 final byte[] b = new byte[4]; 167 int read = IOUtils.readFully(in, b); 168 count(read); 169 if (0 == read && !firstFrame) { 170 // good LZ4 frame and nothing after it 171 endReached = true; 172 return false; 173 } 174 if (4 != read) { 175 throw new IOException(garbageMessage); 176 } 177 178 read = skipSkippableFrame(b); 179 if (0 == read && !firstFrame) { 180 // good LZ4 frame with only some skippable frames after it 181 endReached = true; 182 return false; 183 } 184 if (4 != read || !matches(b, 4)) { 185 throw new IOException(garbageMessage); 186 } 187 return true; 188 } 189 190 private void readFrameDescriptor() throws IOException { 191 int flags = readOneByte(); 192 if (flags == -1) { 193 throw new IOException("Premature end of stream while reading frame flags"); 194 } 195 contentHash.update(flags); 196 if ((flags & VERSION_MASK) != SUPPORTED_VERSION) { 197 throw new IOException("Unsupported version " + (flags >> 6)); 198 } 199 expectBlockDependency = (flags & BLOCK_INDEPENDENCE_MASK) == 0; 200 if (expectBlockDependency) { 201 if (blockDependencyBuffer == null) { 202 blockDependencyBuffer = new byte[BlockLZ4CompressorInputStream.WINDOW_SIZE]; 203 } 204 } else { 205 blockDependencyBuffer = null; 206 } 207 expectBlockChecksum = (flags & BLOCK_CHECKSUM_MASK) != 0; 208 expectContentSize = (flags & CONTENT_SIZE_MASK) != 0; 209 expectContentChecksum = (flags & CONTENT_CHECKSUM_MASK) != 0; 210 int bdByte = readOneByte(); 211 if (bdByte == -1) { // max size is irrelevant for this implementation 212 throw new IOException("Premature end of stream while reading frame BD byte"); 213 } 214 contentHash.update(bdByte); 215 if (expectContentSize) { // for now we don't care, contains the uncompressed size 216 byte[] contentSize = new byte[8]; 217 int skipped = IOUtils.readFully(in, contentSize); 218 count(skipped); 219 if (8 != skipped) { 220 throw new IOException("Premature end of stream while reading content size"); 221 } 222 contentHash.update(contentSize, 0, contentSize.length); 223 } 224 int headerHash = readOneByte(); 225 if (headerHash == -1) { // partial hash of header. 226 throw new IOException("Premature end of stream while reading frame header checksum"); 227 } 228 int expectedHash = (int) ((contentHash.getValue() >> 8) & 0xff); 229 contentHash.reset(); 230 if (headerHash != expectedHash) { 231 throw new IOException("frame header checksum mismatch."); 232 } 233 } 234 235 private void nextBlock() throws IOException { 236 maybeFinishCurrentBlock(); 237 long len = ByteUtils.fromLittleEndian(supplier, 4); 238 boolean uncompressed = (len & UNCOMPRESSED_FLAG_MASK) != 0; 239 int realLen = (int) (len & (~UNCOMPRESSED_FLAG_MASK)); 240 if (realLen == 0) { 241 verifyContentChecksum(); 242 if (!decompressConcatenated) { 243 endReached = true; 244 } else { 245 init(false); 246 } 247 return; 248 } 249 InputStream capped = new BoundedInputStream(in, realLen); 250 if (expectBlockChecksum) { 251 capped = new ChecksumCalculatingInputStream(blockHash, capped); 252 } 253 if (uncompressed) { 254 inUncompressed = true; 255 currentBlock = capped; 256 } else { 257 inUncompressed = false; 258 BlockLZ4CompressorInputStream s = new BlockLZ4CompressorInputStream(capped); 259 if (expectBlockDependency) { 260 s.prefill(blockDependencyBuffer); 261 } 262 currentBlock = s; 263 } 264 } 265 266 private void maybeFinishCurrentBlock() throws IOException { 267 if (currentBlock != null) { 268 currentBlock.close(); 269 currentBlock = null; 270 if (expectBlockChecksum) { 271 verifyChecksum(blockHash, "block"); 272 blockHash.reset(); 273 } 274 } 275 } 276 277 private void verifyContentChecksum() throws IOException { 278 if (expectContentChecksum) { 279 verifyChecksum(contentHash, "content"); 280 } 281 contentHash.reset(); 282 } 283 284 private void verifyChecksum(XXHash32 hash, String kind) throws IOException { 285 byte[] checksum = new byte[4]; 286 int read = IOUtils.readFully(in, checksum); 287 count(read); 288 if (4 != read) { 289 throw new IOException("Premature end of stream while reading " + kind + " checksum"); 290 } 291 long expectedHash = hash.getValue(); 292 if (expectedHash != ByteUtils.fromLittleEndian(checksum)) { 293 throw new IOException(kind + " checksum mismatch."); 294 } 295 } 296 297 private int readOneByte() throws IOException { 298 final int b = in.read(); 299 if (b != -1) { 300 count(1); 301 return b & 0xFF; 302 } 303 return -1; 304 } 305 306 private int readOnce(byte[] b, int off, int len) throws IOException { 307 if (inUncompressed) { 308 int cnt = currentBlock.read(b, off, len); 309 count(cnt); 310 return cnt; 311 } 312 BlockLZ4CompressorInputStream l = (BlockLZ4CompressorInputStream) currentBlock; 313 long before = l.getBytesRead(); 314 int cnt = currentBlock.read(b, off, len); 315 count(l.getBytesRead() - before); 316 return cnt; 317 } 318 319 private static boolean isSkippableFrameSignature(byte[] b) { 320 if ((b[0] & SKIPPABLE_FRAME_PREFIX_BYTE_MASK) != SKIPPABLE_FRAME_PREFIX_BYTE_MASK) { 321 return false; 322 } 323 for (int i = 1; i < 4; i++) { 324 if (b[i] != SKIPPABLE_FRAME_TRAILER[i - 1]) { 325 return false; 326 } 327 } 328 return true; 329 } 330 331 /** 332 * Skips over the contents of a skippable frame as well as 333 * skippable frames following it. 334 * 335 * <p>It then tries to read four more bytes which are supposed to 336 * hold an LZ4 signature and returns the number of bytes read 337 * while storing the bytes in the given array.</p> 338 */ 339 private int skipSkippableFrame(byte[] b) throws IOException { 340 int read = 4; 341 while (read == 4 && isSkippableFrameSignature(b)) { 342 long len = ByteUtils.fromLittleEndian(supplier, 4); 343 long skipped = IOUtils.skip(in, len); 344 count(skipped); 345 if (len != skipped) { 346 throw new IOException("Premature end of stream while skipping frame"); 347 } 348 read = IOUtils.readFully(in, b); 349 count(read); 350 } 351 return read; 352 } 353 354 private void appendToBlockDependencyBuffer(final byte[] b, final int off, int len) { 355 len = Math.min(len, blockDependencyBuffer.length); 356 if (len > 0) { 357 int keep = blockDependencyBuffer.length - len; 358 if (keep > 0) { 359 // move last keep bytes towards the start of the buffer 360 System.arraycopy(blockDependencyBuffer, len, blockDependencyBuffer, 0, keep); 361 } 362 // append new data 363 System.arraycopy(b, off, blockDependencyBuffer, keep, len); 364 } 365 } 366 367 /** 368 * Checks if the signature matches what is expected for a .lz4 file. 369 * 370 * <p>.lz4 files start with a four byte signature.</p> 371 * 372 * @param signature the bytes to check 373 * @param length the number of bytes to check 374 * @return true if this is a .sz stream, false otherwise 375 */ 376 public static boolean matches(final byte[] signature, final int length) { 377 378 if (length < LZ4_SIGNATURE.length) { 379 return false; 380 } 381 382 byte[] shortenedSig = signature; 383 if (signature.length > LZ4_SIGNATURE.length) { 384 shortenedSig = new byte[LZ4_SIGNATURE.length]; 385 System.arraycopy(signature, 0, shortenedSig, 0, LZ4_SIGNATURE.length); 386 } 387 388 return Arrays.equals(shortenedSig, LZ4_SIGNATURE); 389 } 390}