Skip to content

Commit

Permalink
Added tests, added skipRaw, simplified ensureAvailable
Browse files Browse the repository at this point in the history
  • Loading branch information
markrileybot committed Dec 8, 2016
1 parent b86ad77 commit 345362d
Show file tree
Hide file tree
Showing 4 changed files with 217 additions and 29 deletions.
76 changes: 47 additions & 29 deletions src/main/java/heatshrink/HsInputStream.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,24 +3,20 @@
import java.io.EOFException;
import java.io.FilterInputStream;
import java.io.IOException;
import java.io.InputStream;

/**
*
*/
public class HsInputStream extends FilterInputStream {

private static final double LN2 = Math.log(2);

/**
* State machine states. Don't really need this but it's nice
* to match the c code.
*/
enum State {
TAG_BIT, /* tag bit */
YIELD_LITERAL, /* ready to yield literal byte */
BACKREF_INDEX, /* ready to yield backref index */
BACKREF_COUNT, /* ready to yield backref count */
BACKREF_BOUNDS, /* ready to yield backref bounds */
YIELD_BACKREF, /* ready to yield back-reference */
BUFFER_EMPTY, /* Not enough data to continue */
}
Expand Down Expand Up @@ -87,17 +83,17 @@ enum State {
* this instance is to be created without an underlying stream.
*/
public HsInputStream(java.io.InputStream in) {
this(in, 2 << 11, 11, 4);
this(in, 11, 4);
}


public HsInputStream(java.io.InputStream in, int windowSize, int lookaheadSize) {
this(in, 2 << windowSize, windowSize, lookaheadSize);
this(in, bestInputBufferSize(0, windowSize), windowSize, lookaheadSize);
}

public HsInputStream(java.io.InputStream in, int bufferSize, int windowSize, int lookaheadSize) {
super(in);
this.inputBuffer = new byte[Math.max(2 << windowSize, bufferSize)];
this.inputBuffer = new byte[bestInputBufferSize(bufferSize, windowSize)];
this.window = new byte[1 << windowSize];
this.windowSize = windowSize;
this.lookaheadSize = lookaheadSize;
Expand Down Expand Up @@ -185,17 +181,16 @@ public int read(byte b[], int off, int len) throws IOException {
case YIELD_LITERAL:
state = readLiteral(rr);
break;
case BACKREF_INDEX:
state = readBackrefIndex();
break;
case BACKREF_COUNT:
state = readBackrefCount();
case BACKREF_BOUNDS:
state = readBackrefBounds();
break;
case YIELD_BACKREF:
state = readBackref(rr);
break;
case BUFFER_EMPTY:
break;
default:
break;
}

if(state == State.BUFFER_EMPTY) {
Expand All @@ -216,18 +211,15 @@ private State readTagBit() throws IOException {
return State.YIELD_LITERAL;
}
outputCount = outputIndex = 0;
return State.BACKREF_INDEX;
return State.BACKREF_BOUNDS;
}

private State readBackrefIndex() throws IOException {
private State readBackrefBounds() throws IOException {
int bits = getBits(windowSize);
if(bits == -1) return State.BUFFER_EMPTY;
outputIndex = bits + 1;
return State.BACKREF_COUNT;
}

private State readBackrefCount() throws IOException {
int bits = getBits(lookaheadSize);
bits = getBits(lookaheadSize);
if(bits == -1) return State.BUFFER_EMPTY;
outputCount = bits + 1;
return State.YIELD_BACKREF;
Expand Down Expand Up @@ -264,13 +256,14 @@ private State readLiteral(ReadResult rr) throws IOException {
}

/**
* Skips bytes (decompressing first). This means that <code>n</code>
* and the return value will be relative to the uncompressed bytes.
*
* Skips over and discards <code>n</code> bytes of data from the
* input stream. The <code>skip</code> method may, for a variety of
* reasons, end up skipping over some smaller number of bytes,
* possibly <code>0</code>. The actual number of bytes skipped is
* returned.
* <p>
* This method simply performs <code>in.skip(n)</code>.
*
* @param n the number of bytes to be skipped.
* @return the actual number of bytes skipped.
Expand All @@ -285,6 +278,25 @@ public long skip(long n) throws IOException {
return r;
}

/**
* Like skip but skips compressed bytes
*
* @param n the number of compressed bytes to skip
* @return The number of bytes skipped
* @throws IOException If something breaks
* @see #skip(long)
*/
public long skipRaw(long n) throws IOException {
long toSkip = Math.min(n, inputBufferLen - inputBufferPos);
currentBytePos = 0;
inputBufferPos += toSkip;
n -= toSkip;
if(n > 0) {
toSkip += in.skip(n);
}
return toSkip;
}

/**
* Returns an estimate of the number of bytes that can be read (or
* skipped over) from this input stream without blocking by the next
Expand Down Expand Up @@ -384,15 +396,17 @@ public void clear() {
state = State.TAG_BIT;
outputCount = outputIndex = 0;
inputBufferPos = inputBufferLen = 0;
currentBytePos = 0;
inputExhausted = false;
currentBytePos = 0;
windowPos = 0;
}

private boolean ensureAvailable(int bitsRequired) throws IOException {
// exposed for testing
boolean ensureAvailable(int bitsRequired) throws IOException {
int bytesRemaining = inputBufferLen - inputBufferPos;
int bitsAvailable = bytesRemaining * 8;

bitsRequired -= (currentBytePos > 0 ? 8 - ((Math.log(currentBytePos) / LN2) + 1) : 0);
bitsRequired -= currentBytePos;
if(bitsRequired > bitsAvailable) {
if(bytesRemaining > 0) {
// lame buffer shift won't happen often
Expand All @@ -417,26 +431,30 @@ private int getBits(int numBits) throws IOException {
if(!ensureAvailable(numBits)) {
return -1;
}
for(; numBits > 0; numBits--, currentBytePos >>= 1) {
for(; numBits > 0; numBits--, currentBytePos--) {
if(currentBytePos == 0) {
currentByte = inputBuffer[inputBufferPos++];
currentBytePos = 0x80;
currentBytePos = 8;
}

ret <<= 1;
if (currentBytePos == 0x80 && numBits >= 8) {
if (currentBytePos == 8 && numBits >= 8) {
ret <<= 7;
ret |= currentByte & 0xff;
numBits -= 7;
currentBytePos = 0;
} else if ((currentByte & currentBytePos) != 0) {
currentBytePos = 1;
} else if ((currentByte & (1 << (currentBytePos - 1))) != 0) {
ret |= 0x01;
}
}

return ret;
}

private static int bestInputBufferSize(int bufferSize, int windowSize) {
return Math.max(1 << windowSize, bufferSize);
}

private static final class ReadResult {
int off;
int len;
Expand Down
32 changes: 32 additions & 0 deletions src/test/java/heatshrink/HsInputStreamTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -41,4 +41,36 @@ public void testAvailable() throws IOException {
Assert.assertEquals(3, hsi.available());
}
}

@Test
public void testSkip() throws IOException {
try (HsInputStream hsi = new HsInputStream(new ByteArrayInputStream(new byte[]{1,2,3}))) {
Assert.assertEquals(3, hsi.skip(10));
}
}

@Test
public void testSkipRaw() throws IOException {
try(HsInputStream hsi = new HsInputStream(new ByteArrayInputStream(new byte[] {}))) {
hsi.ensureAvailable(1);
Assert.assertEquals(0, hsi.skipRaw(10));
}
try(HsInputStream hsi = new HsInputStream(new ByteArrayInputStream(new byte[] {1,2,3}))) {
hsi.ensureAvailable(1);
Assert.assertEquals(3, hsi.skipRaw(10));
}
try(HsInputStream hsi = new HsInputStream(new ByteArrayInputStream(new byte[] {0,1,2,3,4,5,6,7,8,9}))) {
hsi.ensureAvailable(1);
Assert.assertEquals(10, hsi.skipRaw(10));
}
try(HsInputStream hsi = new HsInputStream(new ByteArrayInputStream(new byte[] {0,1,2,3,4,5,6,7,8,9}))) {
hsi.ensureAvailable(1);
Assert.assertEquals(5, hsi.skipRaw(5));
}
try(HsInputStream hsi = new HsInputStream(new ByteArrayInputStream(new byte[1024]), 5, 4)) {
hsi.ensureAvailable(1);
Assert.assertEquals(513, hsi.skipRaw(513));
Assert.assertEquals(511, hsi.skipRaw(513));
}
}
}
138 changes: 138 additions & 0 deletions src/test/resources/testfiles/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
# heatshrink

A data compression/decompression library for embedded/real-time systems.


## Key Features:

- **Low memory usage (as low as 50 bytes)**
It is useful for some cases with less than 50 bytes, and useful
for many general cases with < 300 bytes.
- **Incremental, bounded CPU use**
You can chew on input data in arbitrarily tiny bites.
This is a useful property in hard real-time environments.
- **Can use either static or dynamic memory allocation**
The library doesn't impose any constraints on memory management.
- **ISC license**
You can use it freely, even for commercial purposes.


## Getting Started:

There is a standalone command-line program, `heatshrink`, but the
encoder and decoder can also be used as libraries, independent of each
other. To do so, copy `heatshrink_common.h`, `heatshrink_config.h`, and
either `heatshrink_encoder.c` or `heatshrink_decoder.c` (and their
respective header) into your project. For projects that use both,
static libraries are built that use static and dynamic allocation.

Dynamic allocation is used by default, but in an embedded context, you
probably want to statically allocate the encoder/decoder. Set
`HEATSHRINK_DYNAMIC_ALLOC` to 0 in `heatshrink_config.h`.


### Basic Usage

1. Allocate a `heatshrink_encoder` or `heatshrink_decoder` state machine
using their `alloc` function, or statically allocate one and call their
`reset` function to initialize them. (See below for configuration
options.)

2. Use `sink` to sink an input buffer into the state machine. The
`input_size` pointer argument will be set to indicate how many bytes of
the input buffer were actually consumed. (If 0 bytes were conusmed, the
buffer is full.)

3. Use `poll` to move output from the state machine into an output
buffer. The `output_size` pointer argument will be set to indicate how
many bytes were output, and the function return value will indicate
whether further output is available. (The state machine may not output
any data until it has received enough input.)

Repeat steps 2 and 3 to stream data through the state machine. Since
it's doing data compression, the input and output sizes can vary
significantly. Looping will be necessary to buffer the input and output
as the data is processed.

4. When the end of the input stream is reached, call `finish` to notify
the state machine that no more input is available. The return value from
`finish` will indicate whether any output remains. if so, call `poll` to
get more.

Continue calling `finish` and `poll`ing to flush remaining output until
`finish` indicates that the output has been exhausted.

Sinking more data after `finish` has been called will not work without
calling `reset` on the state machine.


## Configuration

heatshrink has a couple configuration options, which impact its resource
usage and how effectively it can compress data. These are set when
dynamically allocating an encoder or decoder, or in `heatshrink_config.h`
if they are statically allocated.

- `window_sz2`, `-w` in the CLI: Set the window size to 2^W bytes.

The window size determines how far back in the input can be searched for
repeated patterns. A `window_sz2` of 8 will only use 256 bytes (2^8),
while a `window_sz2` of 10 will use 1024 bytes (2^10). The latter uses
more memory, but may also compress more effectively by detecting more
repetition.

The `window_sz2` setting currently must be between 4 and 15.

- `lookahead_sz2`, `-l` in the CLI: Set the lookahead size to 2^L bytes.

The lookahead size determines the max length for repeated patterns that
are found. If the `lookahead_sz2` is 4, a 50-byte run of 'a' characters
will be represented as several repeated 16-byte patterns (2^4 is 16),
whereas a larger `lookahead_sz2` may be able to represent it all at
once. The number of bits used for the lookahead size is fixed, so an
overly large lookahead size can reduce compression by adding unused
size bits to small patterns.

The `lookahead_sz2` setting currently must be between 3 and the
`window_sz2` - 1.

- `input_buffer_size` - How large an input buffer to use for the
decoder. This impacts how much work the decoder can do in a single
step, and a larger buffer will use more memory. An extremely small
buffer (say, 1 byte) will add overhead due to lots of suspend/resume
function calls, but should not change how well data compresses.


### Recommended Defaults

For embedded/low memory contexts, a `window_sz2` in the 8 to 10 range is
probably a good default, depending on how tight memory is. Smaller or
larger window sizes may make better trade-offs in specific
circumstances, but should be checked with representative data.

The `lookahead_sz2` should probably start near the `window_sz2`/2, e.g.
-w 8 -l 4 or -w 10 -l 5. The command-line program can be used to measure
how well test data works with different settings.


## More Information and Benchmarks:

heatshrink is based on [LZSS], since it's particularly suitable for
compression in small amounts of memory. It can use an optional, small
[index] to make compression significantly faster, but otherwise can run
in under 100 bytes of memory. The index currently adds 2^(window size+1)
bytes to memory usage for compression, and temporarily allocates 512
bytes on the stack during index construction (if the index is enabled).

For more information, see the [blog post] for an overview, and the
`heatshrink_encoder.h` / `heatshrink_decoder.h` header files for API
documentation.

[blog post]: http://spin.atomicobject.com/2013/03/14/heatshrink-embedded-data-compression/
[index]: http://spin.atomicobject.com/2014/01/13/lightweight-indexing-for-embedded-systems/
[LZSS]: http://en.wikipedia.org/wiki/Lempel-Ziv-Storer-Szymanski


## Build Status

[![Build Status](https://travis-ci.org/atomicobject/heatshrink.png)](http://travis-ci.org/atomicobject/heatshrink)
Binary file added src/test/resources/testfiles/README.md.hs.9.8
Binary file not shown.

0 comments on commit 345362d

Please sign in to comment.