001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.compressors.lz4; 020 021import java.io.IOException; 022import java.io.InputStream; 023import java.util.Arrays; 024import java.util.zip.CheckedInputStream; 025 026import org.apache.commons.compress.compressors.CompressorInputStream; 027import org.apache.commons.compress.utils.BoundedInputStream; 028import org.apache.commons.compress.utils.ByteUtils; 029import org.apache.commons.compress.utils.IOUtils; 030import org.apache.commons.compress.utils.InputStreamStatistics; 031import org.apache.commons.io.input.CountingInputStream; 032 033/** 034 * CompressorInputStream for the LZ4 frame format. 035 * 036 * <p> 037 * Based on the "spec" in the version "1.5.1 (31/03/2015)" 038 * </p> 039 * 040 * @see <a href="https://lz4.github.io/lz4/lz4_Frame_format.html">LZ4 Frame Format Description</a> 041 * @since 1.14 042 * @NotThreadSafe 043 */ 044public class FramedLZ4CompressorInputStream extends CompressorInputStream implements InputStreamStatistics { 045 046 /** Used by FramedLZ4CompressorOutputStream as well. */ 047 static final byte[] LZ4_SIGNATURE = { 4, 0x22, 0x4d, 0x18 }; 048 private static final byte[] SKIPPABLE_FRAME_TRAILER = { 0x2a, 0x4d, 0x18 }; 049 private static final byte SKIPPABLE_FRAME_PREFIX_BYTE_MASK = 0x50; 050 051 static final int VERSION_MASK = 0xC0; 052 static final int SUPPORTED_VERSION = 0x40; 053 static final int BLOCK_INDEPENDENCE_MASK = 0x20; 054 static final int BLOCK_CHECKSUM_MASK = 0x10; 055 static final int CONTENT_SIZE_MASK = 0x08; 056 static final int CONTENT_CHECKSUM_MASK = 0x04; 057 static final int BLOCK_MAX_SIZE_MASK = 0x70; 058 static final int UNCOMPRESSED_FLAG_MASK = 0x80000000; 059 060 private static boolean isSkippableFrameSignature(final byte[] b) { 061 if ((b[0] & SKIPPABLE_FRAME_PREFIX_BYTE_MASK) != SKIPPABLE_FRAME_PREFIX_BYTE_MASK) { 062 return false; 063 } 064 for (int i = 1; i < 4; i++) { 065 if (b[i] != SKIPPABLE_FRAME_TRAILER[i - 1]) { 066 return false; 067 } 068 } 069 return true; 070 } 071 072 /** 073 * Checks if the signature matches what is expected for a .lz4 file. 074 * <p> 075 * .lz4 files start with a four byte signature. 076 * </p> 077 * 078 * @param signature the bytes to check 079 * @param length the number of bytes to check 080 * @return true if this is a .sz stream, false otherwise 081 */ 082 public static boolean matches(final byte[] signature, final int length) { 083 084 if (length < LZ4_SIGNATURE.length) { 085 return false; 086 } 087 088 byte[] shortenedSig = signature; 089 if (signature.length > LZ4_SIGNATURE.length) { 090 shortenedSig = Arrays.copyOf(signature, LZ4_SIGNATURE.length); 091 } 092 093 return Arrays.equals(shortenedSig, LZ4_SIGNATURE); 094 } 095 096 /** Used in no-arg read method. */ 097 private final byte[] oneByte = new byte[1]; 098 private final ByteUtils.ByteSupplier supplier = this::readOneByte; 099 100 private final CountingInputStream inputStream; 101 private final boolean decompressConcatenated; 102 private boolean expectBlockChecksum; 103 private boolean expectBlockDependency; 104 105 private boolean expectContentSize; 106 private boolean expectContentChecksum; 107 108 private InputStream currentBlock; 109 110 private boolean endReached, inUncompressed; 111 112 /** Used for frame header checksum and content checksum, if present. */ 113 private final org.apache.commons.codec.digest.XXHash32 contentHash = new org.apache.commons.codec.digest.XXHash32(); 114 115 /** Used for block checksum, if present. */ 116 private final org.apache.commons.codec.digest.XXHash32 blockHash = new org.apache.commons.codec.digest.XXHash32(); 117 118 /** Only created if the frame doesn't set the block independence flag. */ 119 private byte[] blockDependencyBuffer; 120 121 /** 122 * Creates a new input stream that decompresses streams compressed using the LZ4 frame format and stops after decompressing the first frame. 123 * 124 * @param in the InputStream from which to read the compressed data 125 * @throws IOException if reading fails 126 */ 127 public FramedLZ4CompressorInputStream(final InputStream in) throws IOException { 128 this(in, false); 129 } 130 131 /** 132 * Creates a new input stream that decompresses streams compressed using the LZ4 frame format. 133 * 134 * @param in the InputStream from which to read the compressed data 135 * @param decompressConcatenated if true, decompress until the end of the input; if false, stop after the first LZ4 frame and leave the input position to 136 * point to the next byte after the frame stream 137 * @throws IOException if reading fails 138 */ 139 public FramedLZ4CompressorInputStream(final InputStream in, final boolean decompressConcatenated) throws IOException { 140 this.inputStream = new CountingInputStream(in); 141 this.decompressConcatenated = decompressConcatenated; 142 init(true); 143 } 144 145 private void appendToBlockDependencyBuffer(final byte[] b, final int off, int len) { 146 len = Math.min(len, blockDependencyBuffer.length); 147 if (len > 0) { 148 final int keep = blockDependencyBuffer.length - len; 149 if (keep > 0) { 150 // move last keep bytes towards the start of the buffer 151 System.arraycopy(blockDependencyBuffer, len, blockDependencyBuffer, 0, keep); 152 } 153 // append new data 154 System.arraycopy(b, off, blockDependencyBuffer, keep, len); 155 } 156 } 157 158 /** {@inheritDoc} */ 159 @Override 160 public void close() throws IOException { 161 try { 162 if (currentBlock != null) { 163 currentBlock.close(); 164 currentBlock = null; 165 } 166 } finally { 167 inputStream.close(); 168 } 169 } 170 171 /** 172 * @since 1.17 173 */ 174 @Override 175 public long getCompressedCount() { 176 return inputStream.getByteCount(); 177 } 178 179 private void init(final boolean firstFrame) throws IOException { 180 if (readSignature(firstFrame)) { 181 readFrameDescriptor(); 182 nextBlock(); 183 } 184 } 185 186 private void maybeFinishCurrentBlock() throws IOException { 187 if (currentBlock != null) { 188 currentBlock.close(); 189 currentBlock = null; 190 if (expectBlockChecksum) { 191 verifyChecksum(blockHash, "block"); 192 blockHash.reset(); 193 } 194 } 195 } 196 197 private void nextBlock() throws IOException { 198 maybeFinishCurrentBlock(); 199 final long len = ByteUtils.fromLittleEndian(supplier, 4); 200 final boolean uncompressed = (len & UNCOMPRESSED_FLAG_MASK) != 0; 201 final int realLen = (int) (len & ~UNCOMPRESSED_FLAG_MASK); 202 if (realLen == 0) { 203 verifyContentChecksum(); 204 if (!decompressConcatenated) { 205 endReached = true; 206 } else { 207 init(false); 208 } 209 return; 210 } 211 InputStream capped = new BoundedInputStream(inputStream, realLen); 212 if (expectBlockChecksum) { 213 capped = new CheckedInputStream(capped, blockHash); 214 } 215 if (uncompressed) { 216 inUncompressed = true; 217 currentBlock = capped; 218 } else { 219 inUncompressed = false; 220 final BlockLZ4CompressorInputStream s = new BlockLZ4CompressorInputStream(capped); 221 if (expectBlockDependency) { 222 s.prefill(blockDependencyBuffer); 223 } 224 currentBlock = s; 225 } 226 } 227 228 /** {@inheritDoc} */ 229 @Override 230 public int read() throws IOException { 231 return read(oneByte, 0, 1) == -1 ? -1 : oneByte[0] & 0xFF; 232 } 233 234 /** {@inheritDoc} */ 235 @Override 236 public int read(final byte[] b, final int off, final int len) throws IOException { 237 if (len == 0) { 238 return 0; 239 } 240 if (endReached) { 241 return -1; 242 } 243 int r = readOnce(b, off, len); 244 if (r == -1) { 245 nextBlock(); 246 if (!endReached) { 247 r = readOnce(b, off, len); 248 } 249 } 250 if (r != -1) { 251 if (expectBlockDependency) { 252 appendToBlockDependencyBuffer(b, off, r); 253 } 254 if (expectContentChecksum) { 255 contentHash.update(b, off, r); 256 } 257 } 258 return r; 259 } 260 261 private void readFrameDescriptor() throws IOException { 262 final int flags = readOneByte(); 263 if (flags == -1) { 264 throw new IOException("Premature end of stream while reading frame flags"); 265 } 266 contentHash.update(flags); 267 if ((flags & VERSION_MASK) != SUPPORTED_VERSION) { 268 throw new IOException("Unsupported version " + (flags >> 6)); 269 } 270 expectBlockDependency = (flags & BLOCK_INDEPENDENCE_MASK) == 0; 271 if (expectBlockDependency) { 272 if (blockDependencyBuffer == null) { 273 blockDependencyBuffer = new byte[BlockLZ4CompressorInputStream.WINDOW_SIZE]; 274 } 275 } else { 276 blockDependencyBuffer = null; 277 } 278 expectBlockChecksum = (flags & BLOCK_CHECKSUM_MASK) != 0; 279 expectContentSize = (flags & CONTENT_SIZE_MASK) != 0; 280 expectContentChecksum = (flags & CONTENT_CHECKSUM_MASK) != 0; 281 final int bdByte = readOneByte(); 282 if (bdByte == -1) { // max size is irrelevant for this implementation 283 throw new IOException("Premature end of stream while reading frame BD byte"); 284 } 285 contentHash.update(bdByte); 286 if (expectContentSize) { // for now, we don't care, contains the uncompressed size 287 final byte[] contentSize = new byte[8]; 288 final int skipped = IOUtils.readFully(inputStream, contentSize); 289 count(skipped); 290 if (8 != skipped) { 291 throw new IOException("Premature end of stream while reading content size"); 292 } 293 contentHash.update(contentSize, 0, contentSize.length); 294 } 295 final int headerHash = readOneByte(); 296 if (headerHash == -1) { // partial hash of header. 297 throw new IOException("Premature end of stream while reading frame header checksum"); 298 } 299 final int expectedHash = (int) (contentHash.getValue() >> 8 & 0xff); 300 contentHash.reset(); 301 if (headerHash != expectedHash) { 302 throw new IOException("Frame header checksum mismatch"); 303 } 304 } 305 306 private int readOnce(final byte[] b, final int off, final int len) throws IOException { 307 if (inUncompressed) { 308 final int cnt = currentBlock.read(b, off, len); 309 count(cnt); 310 return cnt; 311 } 312 final BlockLZ4CompressorInputStream l = (BlockLZ4CompressorInputStream) currentBlock; 313 final long before = l.getBytesRead(); 314 final int cnt = currentBlock.read(b, off, len); 315 count(l.getBytesRead() - before); 316 return cnt; 317 } 318 319 private int readOneByte() throws IOException { 320 final int b = inputStream.read(); 321 if (b != -1) { 322 count(1); 323 return b & 0xFF; 324 } 325 return -1; 326 } 327 328 private boolean readSignature(final boolean firstFrame) throws IOException { 329 final String garbageMessage = firstFrame ? "Not a LZ4 frame stream" : "LZ4 frame stream followed by garbage"; 330 final byte[] b = new byte[4]; 331 int read = IOUtils.readFully(inputStream, b); 332 count(read); 333 if (0 == read && !firstFrame) { 334 // good LZ4 frame and nothing after it 335 endReached = true; 336 return false; 337 } 338 if (4 != read) { 339 throw new IOException(garbageMessage); 340 } 341 342 read = skipSkippableFrame(b); 343 if (0 == read && !firstFrame) { 344 // good LZ4 frame with only some skippable frames after it 345 endReached = true; 346 return false; 347 } 348 if (4 != read || !matches(b, 4)) { 349 throw new IOException(garbageMessage); 350 } 351 return true; 352 } 353 354 /** 355 * Skips over the contents of a skippable frame as well as skippable frames following it. 356 * <p> 357 * It then tries to read four more bytes which are supposed to hold an LZ4 signature and returns the number of bytes read while storing the bytes in the 358 * given array. 359 * </p> 360 */ 361 private int skipSkippableFrame(final byte[] b) throws IOException { 362 int read = 4; 363 while (read == 4 && isSkippableFrameSignature(b)) { 364 final long len = ByteUtils.fromLittleEndian(supplier, 4); 365 if (len < 0) { 366 throw new IOException("Found illegal skippable frame with negative size"); 367 } 368 final long skipped = org.apache.commons.io.IOUtils.skip(inputStream, len); 369 count(skipped); 370 if (len != skipped) { 371 throw new IOException("Premature end of stream while skipping frame"); 372 } 373 read = IOUtils.readFully(inputStream, b); 374 count(read); 375 } 376 return read; 377 } 378 379 private void verifyChecksum(final org.apache.commons.codec.digest.XXHash32 hash, final String kind) throws IOException { 380 final byte[] checksum = new byte[4]; 381 final int read = IOUtils.readFully(inputStream, checksum); 382 count(read); 383 if (4 != read) { 384 throw new IOException("Premature end of stream while reading " + kind + " checksum"); 385 } 386 final long expectedHash = hash.getValue(); 387 if (expectedHash != ByteUtils.fromLittleEndian(checksum)) { 388 throw new IOException(kind + " checksum mismatch."); 389 } 390 } 391 392 private void verifyContentChecksum() throws IOException { 393 if (expectContentChecksum) { 394 verifyChecksum(contentHash, "content"); 395 } 396 contentHash.reset(); 397 } 398}