001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.compressors.lz4;
020
021import java.io.IOException;
022import java.io.InputStream;
023import java.util.Arrays;
024
025import org.apache.commons.compress.compressors.CompressorInputStream;
026import org.apache.commons.compress.utils.BoundedInputStream;
027import org.apache.commons.compress.utils.ByteUtils;
028import org.apache.commons.compress.utils.ChecksumCalculatingInputStream;
029import org.apache.commons.compress.utils.IOUtils;
030
031/**
032 * CompressorInputStream for the LZ4 frame format.
033 *
034 * <p>Based on the "spec" in the version "1.5.1 (31/03/2015)"</p>
035 *
036 * @see <a href="http://lz4.github.io/lz4/lz4_Frame_format.html">LZ4 Frame Format Description</a>
037 * @since 1.14
038 * @NotThreadSafe
039 */
040public class FramedLZ4CompressorInputStream extends CompressorInputStream {
041
042    // used by FramedLZ4CompressorOutputStream as well
043    static final byte[] LZ4_SIGNATURE = new byte[] { //NOSONAR
044        4, 0x22, 0x4d, 0x18
045    };
046    private static final byte[] SKIPPABLE_FRAME_TRAILER = new byte[] {
047        0x2a, 0x4d, 0x18
048    };
049    private static final byte SKIPPABLE_FRAME_PREFIX_BYTE_MASK = 0x50;
050
051    static final int VERSION_MASK = 0xC0;
052    static final int SUPPORTED_VERSION = 0x40;
053    static final int BLOCK_INDEPENDENCE_MASK = 0x20;
054    static final int BLOCK_CHECKSUM_MASK = 0x10;
055    static final int CONTENT_SIZE_MASK = 0x08;
056    static final int CONTENT_CHECKSUM_MASK = 0x04;
057    static final int BLOCK_MAX_SIZE_MASK = 0x70;
058    static final int UNCOMPRESSED_FLAG_MASK = 0x80000000;
059
060    // used in no-arg read method
061    private final byte[] oneByte = new byte[1];
062
063    private final ByteUtils.ByteSupplier supplier = new ByteUtils.ByteSupplier() {
064        @Override
065        public int getAsByte() throws IOException {
066            return readOneByte();
067        }
068    };
069
070    private final InputStream in;
071    private final boolean decompressConcatenated;
072
073    private boolean expectBlockChecksum;
074    private boolean expectBlockDependency;
075    private boolean expectContentSize;
076    private boolean expectContentChecksum;
077
078    private InputStream currentBlock;
079    private boolean endReached, inUncompressed;
080
081    // used for frame header checksum and content checksum, if present
082    private final XXHash32 contentHash = new XXHash32();
083
084    // used for block checksum, if present
085    private final XXHash32 blockHash = new XXHash32();
086
087    // only created if the frame doesn't set the block independence flag
088    private byte[] blockDependencyBuffer;
089
090    /**
091     * Creates a new input stream that decompresses streams compressed
092     * using the LZ4 frame format and stops after decompressing the
093     * first frame.
094     * @param in  the InputStream from which to read the compressed data
095     * @throws IOException if reading fails
096     */
097    public FramedLZ4CompressorInputStream(InputStream in) throws IOException {
098        this(in, false);
099    }
100
101    /**
102     * Creates a new input stream that decompresses streams compressed
103     * using the LZ4 frame format.
104     * @param in  the InputStream from which to read the compressed data
105     * @param decompressConcatenated if true, decompress until the end
106     *          of the input; if false, stop after the first LZ4 frame
107     *          and leave the input position to point to the next byte
108     *          after the frame stream
109     * @throws IOException if reading fails
110     */
111    public FramedLZ4CompressorInputStream(InputStream in, boolean decompressConcatenated) throws IOException {
112        this.in = in;
113        this.decompressConcatenated = decompressConcatenated;
114        init(true);
115    }
116
117    /** {@inheritDoc} */
118    @Override
119    public int read() throws IOException {
120        return read(oneByte, 0, 1) == -1 ? -1 : oneByte[0] & 0xFF;
121    }
122
123    /** {@inheritDoc} */
124    @Override
125    public void close() throws IOException {
126        if (currentBlock != null) {
127            currentBlock.close();
128            currentBlock = null;
129        }
130        in.close();
131    }
132
133    /** {@inheritDoc} */
134    @Override
135    public int read(final byte[] b, final int off, final int len) throws IOException {
136        if (endReached) {
137            return -1;
138        }
139        int r = readOnce(b, off, len);
140        if (r == -1) {
141            nextBlock();
142            if (!endReached) {
143                r = readOnce(b, off, len);
144            }
145        }
146        if (r != -1) {
147            if (expectBlockDependency) {
148                appendToBlockDependencyBuffer(b, off, r);
149            }
150            if (expectContentChecksum) {
151                contentHash.update(b, off, r);
152            }
153        }
154        return r;
155    }
156
157    private void init(boolean firstFrame) throws IOException {
158        if (readSignature(firstFrame)) {
159            readFrameDescriptor();
160            nextBlock();
161        }
162    }
163
164    private boolean readSignature(boolean firstFrame) throws IOException {
165        String garbageMessage = firstFrame ? "Not a LZ4 frame stream" : "LZ4 frame stream followed by garbage";
166        final byte[] b = new byte[4];
167        int read = IOUtils.readFully(in, b);
168        count(read);
169        if (0 == read && !firstFrame) {
170            // good LZ4 frame and nothing after it
171            endReached = true;
172            return false;
173        }
174        if (4 != read) {
175            throw new IOException(garbageMessage);
176        }
177
178        read = skipSkippableFrame(b);
179        if (0 == read && !firstFrame) {
180            // good LZ4 frame with only some skippable frames after it
181            endReached = true;
182            return false;
183        }
184        if (4 != read || !matches(b, 4)) {
185            throw new IOException(garbageMessage);
186        }
187        return true;
188    }
189
190    private void readFrameDescriptor() throws IOException {
191        int flags = readOneByte();
192        if (flags == -1) {
193            throw new IOException("Premature end of stream while reading frame flags");
194        }
195        contentHash.update(flags);
196        if ((flags & VERSION_MASK) != SUPPORTED_VERSION) {
197            throw new IOException("Unsupported version " + (flags >> 6));
198        }
199        expectBlockDependency = (flags & BLOCK_INDEPENDENCE_MASK) == 0;
200        if (expectBlockDependency) {
201            if (blockDependencyBuffer == null) {
202                blockDependencyBuffer = new byte[BlockLZ4CompressorInputStream.WINDOW_SIZE];
203            }
204        } else {
205            blockDependencyBuffer = null;
206        }
207        expectBlockChecksum = (flags & BLOCK_CHECKSUM_MASK) != 0;
208        expectContentSize = (flags & CONTENT_SIZE_MASK) != 0;
209        expectContentChecksum = (flags & CONTENT_CHECKSUM_MASK) != 0;
210        int bdByte = readOneByte();
211        if (bdByte == -1) { // max size is irrelevant for this implementation
212            throw new IOException("Premature end of stream while reading frame BD byte");
213        }
214        contentHash.update(bdByte);
215        if (expectContentSize) { // for now we don't care, contains the uncompressed size
216            byte[] contentSize = new byte[8];
217            int skipped = IOUtils.readFully(in, contentSize);
218            count(skipped);
219            if (8 != skipped) {
220                throw new IOException("Premature end of stream while reading content size");
221            }
222            contentHash.update(contentSize, 0, contentSize.length);
223        }
224        int headerHash = readOneByte();
225        if (headerHash == -1) { // partial hash of header.
226            throw new IOException("Premature end of stream while reading frame header checksum");
227        }
228        int expectedHash = (int) ((contentHash.getValue() >> 8) & 0xff);
229        contentHash.reset();
230        if (headerHash != expectedHash) {
231            throw new IOException("frame header checksum mismatch.");
232        }
233    }
234
235    private void nextBlock() throws IOException {
236        maybeFinishCurrentBlock();
237        long len = ByteUtils.fromLittleEndian(supplier, 4);
238        boolean uncompressed = (len & UNCOMPRESSED_FLAG_MASK) != 0;
239        int realLen = (int) (len & (~UNCOMPRESSED_FLAG_MASK));
240        if (realLen == 0) {
241            verifyContentChecksum();
242            if (!decompressConcatenated) {
243                endReached = true;
244            } else {
245                init(false);
246            }
247            return;
248        }
249        InputStream capped = new BoundedInputStream(in, realLen);
250        if (expectBlockChecksum) {
251            capped = new ChecksumCalculatingInputStream(blockHash, capped);
252        }
253        if (uncompressed) {
254            inUncompressed = true;
255            currentBlock = capped;
256        } else {
257            inUncompressed = false;
258            BlockLZ4CompressorInputStream s = new BlockLZ4CompressorInputStream(capped);
259            if (expectBlockDependency) {
260                s.prefill(blockDependencyBuffer);
261            }
262            currentBlock = s;
263        }
264    }
265
266    private void maybeFinishCurrentBlock() throws IOException {
267        if (currentBlock != null) {
268            currentBlock.close();
269            currentBlock = null;
270            if (expectBlockChecksum) {
271                verifyChecksum(blockHash, "block");
272                blockHash.reset();
273            }
274        }
275    }
276
277    private void verifyContentChecksum() throws IOException {
278        if (expectContentChecksum) {
279            verifyChecksum(contentHash, "content");
280        }
281        contentHash.reset();
282    }
283
284    private void verifyChecksum(XXHash32 hash, String kind) throws IOException {
285        byte[] checksum = new byte[4];
286        int read = IOUtils.readFully(in, checksum);
287        count(read);
288        if (4 != read) {
289            throw new IOException("Premature end of stream while reading " + kind + " checksum");
290        }
291        long expectedHash = hash.getValue();
292        if (expectedHash != ByteUtils.fromLittleEndian(checksum)) {
293            throw new IOException(kind + " checksum mismatch.");
294        }
295    }
296
297    private int readOneByte() throws IOException {
298        final int b = in.read();
299        if (b != -1) {
300            count(1);
301            return b & 0xFF;
302        }
303        return -1;
304    }
305
306    private int readOnce(byte[] b, int off, int len) throws IOException {
307        if (inUncompressed) {
308            int cnt = currentBlock.read(b, off, len);
309            count(cnt);
310            return cnt;
311        }
312        BlockLZ4CompressorInputStream l = (BlockLZ4CompressorInputStream) currentBlock;
313        long before = l.getBytesRead();
314        int cnt = currentBlock.read(b, off, len);
315        count(l.getBytesRead() - before);
316        return cnt;
317    }
318
319    private static boolean isSkippableFrameSignature(byte[] b) {
320        if ((b[0] & SKIPPABLE_FRAME_PREFIX_BYTE_MASK) != SKIPPABLE_FRAME_PREFIX_BYTE_MASK) {
321            return false;
322        }
323        for (int i = 1; i < 4; i++) {
324            if (b[i] != SKIPPABLE_FRAME_TRAILER[i - 1]) {
325                return false;
326            }
327        }
328        return true;
329    }
330
331    /**
332     * Skips over the contents of a skippable frame as well as
333     * skippable frames following it.
334     *
335     * <p>It then tries to read four more bytes which are supposed to
336     * hold an LZ4 signature and returns the number of bytes read
337     * while storing the bytes in the given array.</p>
338     */
339    private int skipSkippableFrame(byte[] b) throws IOException {
340        int read = 4;
341        while (read == 4 && isSkippableFrameSignature(b)) {
342            long len = ByteUtils.fromLittleEndian(supplier, 4);
343            long skipped = IOUtils.skip(in, len);
344            count(skipped);
345            if (len != skipped) {
346                throw new IOException("Premature end of stream while skipping frame");
347            }
348            read = IOUtils.readFully(in, b);
349            count(read);
350        }
351        return read;
352    }
353
354    private void appendToBlockDependencyBuffer(final byte[] b, final int off, int len) {
355        len = Math.min(len, blockDependencyBuffer.length);
356        if (len > 0) {
357            int keep = blockDependencyBuffer.length - len;
358            if (keep > 0) {
359                // move last keep bytes towards the start of the buffer
360                System.arraycopy(blockDependencyBuffer, len, blockDependencyBuffer, 0, keep);
361            }
362            // append new data
363            System.arraycopy(b, off, blockDependencyBuffer, keep, len);
364        }
365    }
366
367    /**
368     * Checks if the signature matches what is expected for a .lz4 file.
369     *
370     * <p>.lz4 files start with a four byte signature.</p>
371     *
372     * @param signature the bytes to check
373     * @param length    the number of bytes to check
374     * @return          true if this is a .sz stream, false otherwise
375     */
376    public static boolean matches(final byte[] signature, final int length) {
377
378        if (length < LZ4_SIGNATURE.length) {
379            return false;
380        }
381
382        byte[] shortenedSig = signature;
383        if (signature.length > LZ4_SIGNATURE.length) {
384            shortenedSig = new byte[LZ4_SIGNATURE.length];
385            System.arraycopy(signature, 0, shortenedSig, 0, LZ4_SIGNATURE.length);
386        }
387
388        return Arrays.equals(shortenedSig, LZ4_SIGNATURE);
389    }
390}