From 345362d3844ac4b098fe3ef68db0dce9c5d41937 Mon Sep 17 00:00:00 2001 From: Mark Riley Date: Thu, 8 Dec 2016 02:03:56 -0500 Subject: [PATCH] Added tests, added skipRaw, simplified ensureAvailable --- src/main/java/heatshrink/HsInputStream.java | 76 ++++++---- .../java/heatshrink/HsInputStreamTest.java | 32 ++++ src/test/resources/testfiles/README.md | 138 ++++++++++++++++++ src/test/resources/testfiles/README.md.hs.9.8 | Bin 0 -> 4008 bytes 4 files changed, 217 insertions(+), 29 deletions(-) create mode 100644 src/test/resources/testfiles/README.md create mode 100644 src/test/resources/testfiles/README.md.hs.9.8 diff --git a/src/main/java/heatshrink/HsInputStream.java b/src/main/java/heatshrink/HsInputStream.java index 1aef6d2..723a7c0 100644 --- a/src/main/java/heatshrink/HsInputStream.java +++ b/src/main/java/heatshrink/HsInputStream.java @@ -3,15 +3,12 @@ import java.io.EOFException; import java.io.FilterInputStream; import java.io.IOException; -import java.io.InputStream; /** * */ public class HsInputStream extends FilterInputStream { - private static final double LN2 = Math.log(2); - /** * State machine states. Don't really need this but it's nice * to match the c code. @@ -19,8 +16,7 @@ public class HsInputStream extends FilterInputStream { enum State { TAG_BIT, /* tag bit */ YIELD_LITERAL, /* ready to yield literal byte */ - BACKREF_INDEX, /* ready to yield backref index */ - BACKREF_COUNT, /* ready to yield backref count */ + BACKREF_BOUNDS, /* ready to yield backref bounds */ YIELD_BACKREF, /* ready to yield back-reference */ BUFFER_EMPTY, /* Not enough data to continue */ } @@ -87,17 +83,17 @@ enum State { * this instance is to be created without an underlying stream. */ public HsInputStream(java.io.InputStream in) { - this(in, 2 << 11, 11, 4); + this(in, 11, 4); } public HsInputStream(java.io.InputStream in, int windowSize, int lookaheadSize) { - this(in, 2 << windowSize, windowSize, lookaheadSize); + this(in, bestInputBufferSize(0, windowSize), windowSize, lookaheadSize); } public HsInputStream(java.io.InputStream in, int bufferSize, int windowSize, int lookaheadSize) { super(in); - this.inputBuffer = new byte[Math.max(2 << windowSize, bufferSize)]; + this.inputBuffer = new byte[bestInputBufferSize(bufferSize, windowSize)]; this.window = new byte[1 << windowSize]; this.windowSize = windowSize; this.lookaheadSize = lookaheadSize; @@ -185,17 +181,16 @@ public int read(byte b[], int off, int len) throws IOException { case YIELD_LITERAL: state = readLiteral(rr); break; - case BACKREF_INDEX: - state = readBackrefIndex(); - break; - case BACKREF_COUNT: - state = readBackrefCount(); + case BACKREF_BOUNDS: + state = readBackrefBounds(); break; case YIELD_BACKREF: state = readBackref(rr); break; case BUFFER_EMPTY: break; + default: + break; } if(state == State.BUFFER_EMPTY) { @@ -216,18 +211,15 @@ private State readTagBit() throws IOException { return State.YIELD_LITERAL; } outputCount = outputIndex = 0; - return State.BACKREF_INDEX; + return State.BACKREF_BOUNDS; } - private State readBackrefIndex() throws IOException { + private State readBackrefBounds() throws IOException { int bits = getBits(windowSize); if(bits == -1) return State.BUFFER_EMPTY; outputIndex = bits + 1; - return State.BACKREF_COUNT; - } - private State readBackrefCount() throws IOException { - int bits = getBits(lookaheadSize); + bits = getBits(lookaheadSize); if(bits == -1) return State.BUFFER_EMPTY; outputCount = bits + 1; return State.YIELD_BACKREF; @@ -264,13 +256,14 @@ private State readLiteral(ReadResult rr) throws IOException { } /** + * Skips bytes (decompressing first). This means that n + * and the return value will be relative to the uncompressed bytes. + * * Skips over and discards n bytes of data from the * input stream. The skip method may, for a variety of * reasons, end up skipping over some smaller number of bytes, * possibly 0. The actual number of bytes skipped is * returned. - *

- * This method simply performs in.skip(n). * * @param n the number of bytes to be skipped. * @return the actual number of bytes skipped. @@ -285,6 +278,25 @@ public long skip(long n) throws IOException { return r; } + /** + * Like skip but skips compressed bytes + * + * @param n the number of compressed bytes to skip + * @return The number of bytes skipped + * @throws IOException If something breaks + * @see #skip(long) + */ + public long skipRaw(long n) throws IOException { + long toSkip = Math.min(n, inputBufferLen - inputBufferPos); + currentBytePos = 0; + inputBufferPos += toSkip; + n -= toSkip; + if(n > 0) { + toSkip += in.skip(n); + } + return toSkip; + } + /** * Returns an estimate of the number of bytes that can be read (or * skipped over) from this input stream without blocking by the next @@ -384,15 +396,17 @@ public void clear() { state = State.TAG_BIT; outputCount = outputIndex = 0; inputBufferPos = inputBufferLen = 0; - currentBytePos = 0; inputExhausted = false; + currentBytePos = 0; + windowPos = 0; } - private boolean ensureAvailable(int bitsRequired) throws IOException { + // exposed for testing + boolean ensureAvailable(int bitsRequired) throws IOException { int bytesRemaining = inputBufferLen - inputBufferPos; int bitsAvailable = bytesRemaining * 8; - bitsRequired -= (currentBytePos > 0 ? 8 - ((Math.log(currentBytePos) / LN2) + 1) : 0); + bitsRequired -= currentBytePos; if(bitsRequired > bitsAvailable) { if(bytesRemaining > 0) { // lame buffer shift won't happen often @@ -417,19 +431,19 @@ private int getBits(int numBits) throws IOException { if(!ensureAvailable(numBits)) { return -1; } - for(; numBits > 0; numBits--, currentBytePos >>= 1) { + for(; numBits > 0; numBits--, currentBytePos--) { if(currentBytePos == 0) { currentByte = inputBuffer[inputBufferPos++]; - currentBytePos = 0x80; + currentBytePos = 8; } ret <<= 1; - if (currentBytePos == 0x80 && numBits >= 8) { + if (currentBytePos == 8 && numBits >= 8) { ret <<= 7; ret |= currentByte & 0xff; numBits -= 7; - currentBytePos = 0; - } else if ((currentByte & currentBytePos) != 0) { + currentBytePos = 1; + } else if ((currentByte & (1 << (currentBytePos - 1))) != 0) { ret |= 0x01; } } @@ -437,6 +451,10 @@ private int getBits(int numBits) throws IOException { return ret; } + private static int bestInputBufferSize(int bufferSize, int windowSize) { + return Math.max(1 << windowSize, bufferSize); + } + private static final class ReadResult { int off; int len; diff --git a/src/test/java/heatshrink/HsInputStreamTest.java b/src/test/java/heatshrink/HsInputStreamTest.java index 34c4984..e71cb7f 100644 --- a/src/test/java/heatshrink/HsInputStreamTest.java +++ b/src/test/java/heatshrink/HsInputStreamTest.java @@ -41,4 +41,36 @@ public void testAvailable() throws IOException { Assert.assertEquals(3, hsi.available()); } } + + @Test + public void testSkip() throws IOException { + try (HsInputStream hsi = new HsInputStream(new ByteArrayInputStream(new byte[]{1,2,3}))) { + Assert.assertEquals(3, hsi.skip(10)); + } + } + + @Test + public void testSkipRaw() throws IOException { + try(HsInputStream hsi = new HsInputStream(new ByteArrayInputStream(new byte[] {}))) { + hsi.ensureAvailable(1); + Assert.assertEquals(0, hsi.skipRaw(10)); + } + try(HsInputStream hsi = new HsInputStream(new ByteArrayInputStream(new byte[] {1,2,3}))) { + hsi.ensureAvailable(1); + Assert.assertEquals(3, hsi.skipRaw(10)); + } + try(HsInputStream hsi = new HsInputStream(new ByteArrayInputStream(new byte[] {0,1,2,3,4,5,6,7,8,9}))) { + hsi.ensureAvailable(1); + Assert.assertEquals(10, hsi.skipRaw(10)); + } + try(HsInputStream hsi = new HsInputStream(new ByteArrayInputStream(new byte[] {0,1,2,3,4,5,6,7,8,9}))) { + hsi.ensureAvailable(1); + Assert.assertEquals(5, hsi.skipRaw(5)); + } + try(HsInputStream hsi = new HsInputStream(new ByteArrayInputStream(new byte[1024]), 5, 4)) { + hsi.ensureAvailable(1); + Assert.assertEquals(513, hsi.skipRaw(513)); + Assert.assertEquals(511, hsi.skipRaw(513)); + } + } } diff --git a/src/test/resources/testfiles/README.md b/src/test/resources/testfiles/README.md new file mode 100644 index 0000000..617c64b --- /dev/null +++ b/src/test/resources/testfiles/README.md @@ -0,0 +1,138 @@ +# heatshrink + +A data compression/decompression library for embedded/real-time systems. + + +## Key Features: + +- **Low memory usage (as low as 50 bytes)** + It is useful for some cases with less than 50 bytes, and useful + for many general cases with < 300 bytes. +- **Incremental, bounded CPU use** + You can chew on input data in arbitrarily tiny bites. + This is a useful property in hard real-time environments. +- **Can use either static or dynamic memory allocation** + The library doesn't impose any constraints on memory management. +- **ISC license** + You can use it freely, even for commercial purposes. + + +## Getting Started: + +There is a standalone command-line program, `heatshrink`, but the +encoder and decoder can also be used as libraries, independent of each +other. To do so, copy `heatshrink_common.h`, `heatshrink_config.h`, and +either `heatshrink_encoder.c` or `heatshrink_decoder.c` (and their +respective header) into your project. For projects that use both, +static libraries are built that use static and dynamic allocation. + +Dynamic allocation is used by default, but in an embedded context, you +probably want to statically allocate the encoder/decoder. Set +`HEATSHRINK_DYNAMIC_ALLOC` to 0 in `heatshrink_config.h`. + + +### Basic Usage + +1. Allocate a `heatshrink_encoder` or `heatshrink_decoder` state machine +using their `alloc` function, or statically allocate one and call their +`reset` function to initialize them. (See below for configuration +options.) + +2. Use `sink` to sink an input buffer into the state machine. The +`input_size` pointer argument will be set to indicate how many bytes of +the input buffer were actually consumed. (If 0 bytes were conusmed, the +buffer is full.) + +3. Use `poll` to move output from the state machine into an output +buffer. The `output_size` pointer argument will be set to indicate how +many bytes were output, and the function return value will indicate +whether further output is available. (The state machine may not output +any data until it has received enough input.) + +Repeat steps 2 and 3 to stream data through the state machine. Since +it's doing data compression, the input and output sizes can vary +significantly. Looping will be necessary to buffer the input and output +as the data is processed. + +4. When the end of the input stream is reached, call `finish` to notify +the state machine that no more input is available. The return value from +`finish` will indicate whether any output remains. if so, call `poll` to +get more. + +Continue calling `finish` and `poll`ing to flush remaining output until +`finish` indicates that the output has been exhausted. + +Sinking more data after `finish` has been called will not work without +calling `reset` on the state machine. + + +## Configuration + +heatshrink has a couple configuration options, which impact its resource +usage and how effectively it can compress data. These are set when +dynamically allocating an encoder or decoder, or in `heatshrink_config.h` +if they are statically allocated. + +- `window_sz2`, `-w` in the CLI: Set the window size to 2^W bytes. + +The window size determines how far back in the input can be searched for +repeated patterns. A `window_sz2` of 8 will only use 256 bytes (2^8), +while a `window_sz2` of 10 will use 1024 bytes (2^10). The latter uses +more memory, but may also compress more effectively by detecting more +repetition. + +The `window_sz2` setting currently must be between 4 and 15. + +- `lookahead_sz2`, `-l` in the CLI: Set the lookahead size to 2^L bytes. + +The lookahead size determines the max length for repeated patterns that +are found. If the `lookahead_sz2` is 4, a 50-byte run of 'a' characters +will be represented as several repeated 16-byte patterns (2^4 is 16), +whereas a larger `lookahead_sz2` may be able to represent it all at +once. The number of bits used for the lookahead size is fixed, so an +overly large lookahead size can reduce compression by adding unused +size bits to small patterns. + +The `lookahead_sz2` setting currently must be between 3 and the +`window_sz2` - 1. + +- `input_buffer_size` - How large an input buffer to use for the +decoder. This impacts how much work the decoder can do in a single +step, and a larger buffer will use more memory. An extremely small +buffer (say, 1 byte) will add overhead due to lots of suspend/resume +function calls, but should not change how well data compresses. + + +### Recommended Defaults + +For embedded/low memory contexts, a `window_sz2` in the 8 to 10 range is +probably a good default, depending on how tight memory is. Smaller or +larger window sizes may make better trade-offs in specific +circumstances, but should be checked with representative data. + +The `lookahead_sz2` should probably start near the `window_sz2`/2, e.g. +-w 8 -l 4 or -w 10 -l 5. The command-line program can be used to measure +how well test data works with different settings. + + +## More Information and Benchmarks: + +heatshrink is based on [LZSS], since it's particularly suitable for +compression in small amounts of memory. It can use an optional, small +[index] to make compression significantly faster, but otherwise can run +in under 100 bytes of memory. The index currently adds 2^(window size+1) +bytes to memory usage for compression, and temporarily allocates 512 +bytes on the stack during index construction (if the index is enabled). + +For more information, see the [blog post] for an overview, and the +`heatshrink_encoder.h` / `heatshrink_decoder.h` header files for API +documentation. + +[blog post]: http://spin.atomicobject.com/2013/03/14/heatshrink-embedded-data-compression/ +[index]: http://spin.atomicobject.com/2014/01/13/lightweight-indexing-for-embedded-systems/ +[LZSS]: http://en.wikipedia.org/wiki/Lempel-Ziv-Storer-Szymanski + + +## Build Status + + [![Build Status](https://travis-ci.org/atomicobject/heatshrink.png)](http://travis-ci.org/atomicobject/heatshrink) diff --git a/src/test/resources/testfiles/README.md.hs.9.8 b/src/test/resources/testfiles/README.md.hs.9.8 new file mode 100644 index 0000000000000000000000000000000000000000..f0dc58a164dfe636119203f5f578f577ee605dee GIT binary patch literal 4008 zcmXAsYgiNKzQre#420ogz$Ac3PpF_sE(I^Br9C@A)Kp40MTmreO@vt3BmucvFFlhG zqNN^oL}%V+Iz2WcasV9#RCUVjBDdt@dytS*$chVnd34JL{FG zQa2;Pd~cWECp{EL)TDl8+nZ*+SBAP|*h0I;$RDC zgEJ+4LdE`6d#P`s(QRme!q$%{ZssTiD~G8*X~gt<@eDW2lX3t*$ ze<+MDu0AGx&5#TeEs7^;1MX~M7Mm^@d-r|`>RxhGX2Pyg^H*ZNF|z)t!lGjc&gjVx ziNUgFncZtjc zi9kCx!Ve=gDuv5}2pP%~n}o7$sV$dhip)8Sfng!9NGFxa25cd6!2UJiFGy8tEFJzt zIVo>SIxI|J7{DahcmL3dF&Vd4+a*>c5;`vUaFP1JH&?AiqDrq5^I82 zw{9&ggfozC-No?f)eNHn5iq+tCYuShjN4(GWPV?mv*r5iNYq)t zG6^{68~+Y>vM2f9Fm2u2STO@f2f?fE#l&N*I9fszKX%=z4{KXje|~2X(#Pm{a^vI@ z{bHq#R%w}%PgEJ{JaGbn72>~j!tEr8mK?VO0{*eu>^hkqVSATbd=akuwzkX=7e~7{u1c^7kI2lPX}YA4{0n4tH&ggo170G*J>xi^87JlJuo+N) zLLyYWhX){uxk2NKzAa+>4=+5BS_lZ&9^&>X)TFl2Qg`WSVwlH#ov;YsX7q^$^$Z(4 zRMzW}V}fV!Z$)gIfoG#N-X)n_hLdE98Y)~5bYT&dv%G`-9+r6rH1GfpMecD&Zt12^ zD6^NFGE*qGK}#K1u(J?8d#Kb_Q<{F7vIexeZ|gToQhZ^Sa=8egp_7M|qYvYYYX_XN z?YG(33W`FN)X&w=`=eNv;bbs_Jk*pTxZZ z{Tp?2Ir;AUzq`Lb(A@41*VHR%(eJCu2()K`k8wx)mEX65ZS3FWpoG3OG9y=2~= z!uSFbQJK9&7eyN{g74T2^x`z@c)2Otw{+`ic>GZRpq57n^W%bM(KOw4>;-9q3R8BS zk9Zz_$Rb)Yw#pbYf2#bPauupJ2|x4hSsTdmu?Xc@M4EMuqs@^y{9F1j`1XmGbc5m` z+UKap*SJ#)pDZ~6bPbUyDL#7TX{8?{tN`{WG}{El(uui3x>CtVFs%8NK@j;F8?qz! z6L)a`Zj0Bi8M~p(He5w$y59?p9!2#oZO9dzTHo8Y#0J-rjLvS2nDPx}jhYHniK1S? z1|^^KYNgQ-2wHO>Z|tW>aRay)&8Z~S6BpQhyQYvDw`ZO99fqJd?VE)%qlcZif*-p= zbW+ijw9Bpm*C<(={38bzO0g&}FFsfGw3L7>m)$YKG)S-;x0%NvY9C72r=V>XYHe^=!WK3_LU?g0-2 zOJmn!W<3(@ie6wFBa=XLr+2qLtO}BvvzK0_WW!I(v_h6XJ=*9`G`ZU7L}2}ZP&Lv; zC+}M({g;xgPkz3JOp5cl&S-E#s^tlSX!)NjEx$^|IzB7Jb@QmtMOFIzx#k~K9jOEY zB8g?G=8A3!3qn(mtE306*;)IW5b{ni-lp`9#5CRnu`cocEtHT461)`%A|ed}Uz4jl zG?PXD{-|UPKeAC+#uwj*#J>V`-=It|X zJU1arQQ2cw$G!YzAFFmVl4hr=w^PoK61C}LLHY5`_#tgRJ(%&miqEd;VhbMAOYzqfWWQ?|+t|0`13#y3*O`(~=vL2lUK!Il;2xUCD^X z7I%KqhZObTeEOHZa^sU|`Uuqd_AM^VM(Q2PIaQpyIzQ2uIecWFm9Dp1932Gh=0t8|xVym+;X&v+B+B`-6n-Ho`_g54ExA89{1v);b>Qx_1 zD`6v-^zTwB%vC*;@*x|oVIdzxx?VYept&Nmt694d(%dh=`SZfRtM6~bS_rAJM`b|e z*jd}>ok>W5;^f<;=n>z1tGj!*J{W-HR`>ErB(iR#;-*uE&67qifoK>|dJTFqu0Tc0 zKigOsFM*1*qyEsJU5okZ{}0E0I}k4mI#Y79nQ_NXN%muqc^@I0I{m1=wL%t_bfRIT z?62I%05c8qE)#O6>Bt*g!Jet;&|@ctC%yqmUOPXg;mJl^_x+o^uv1!_yQ~wbjcugT zK_6rBt#$y6;4Wb+S~4UhAlEm|Lp~k``h?TOWMjis%4kHZ?_cw2x$%j#!LP0xsz8T# zj%6K(xueP+u`adi+;>DwG|bp=3ZAR9xN(L@->L_#E}8M#)(KC(b(5_sfe+8IqJZuO z|CY;7D=}d-ec^vmQrEW0jHaft zFAP;;d7RSDtZ)v9_f>(2!Ghky*69z48DByQYdEsAkfz~2@`(mMGOqPrC!QaT4vkt2 z885HRh7e{DbQG`FbXVXs5xn6npflNyXDjK07yxM{S#!JlP)#HmeHJo8d9C*wl6Hv@?Giw23@4s;%{ z`P?Q<$|pXOqqu+SY*oh^HB$=&QC)me*Qg%M;&x%8z=sQE=wJ)*b-Qk^qLQb;VctD^ z5OlH=i?`~g4-)vCLSslj=_QfB5nE~0@jfyOyIwPk21piVpf>}i={)%+ku2)Z!LrTP zjZ=MiTyf*NEyJItt|t1~u2l^G_{o(#9Y?>HOjV6noW;iou@A?)71i(YMox@Z@VEw) zPzX~C2(d_Y0uoqDEJdd25%K}N1K?g=+1ww<(Pk(#&!aHXP9>lH8cYcql7#~`!1gTp zUqJK;^GfHsL7%u<_fq1qa-*-roabv}Zq>~Gukw8x_RmFlrrK1;O&*IJUQpCh82dks zhNm&Y$I*1=u$aWFy$z&%M2GTR%jJry7!P`w%cEsml58+KMxRv-K@ zxyH5hQ&RizLjLEx82ZKdPso8_k5?cnv-=jDv)D2vZn6gS1>9csGk8QAz++Z1r2~)f z%C}s-cRrbXUFk8HnKwE{s$+Rw^z1C$X-5Sgf`bi2tj~x9BwBqBsIPIY*-}E)23Cp3=IDI0+3+*U%(}r_{$o~PhLiZN{ literal 0 HcmV?d00001