Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add optimized crc32 for Power 8+ processors #857

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@
*.gcno
*.gcov

/crc32_test
/crc32_test64
/crc32_testsh
/example
/example64
/examplesh
Expand Down
76 changes: 76 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ project(zlib C)

set(VERSION "1.3")

option(POWER "Enable building power implementation")

set(INSTALL_BIN_DIR "${CMAKE_INSTALL_PREFIX}/bin" CACHE PATH "Installation directory for executables")
set(INSTALL_LIB_DIR "${CMAKE_INSTALL_PREFIX}/lib" CACHE PATH "Installation directory for libraries")
set(INSTALL_INC_DIR "${CMAKE_INSTALL_PREFIX}/include" CACHE PATH "Installation directory for headers")
Expand Down Expand Up @@ -126,6 +128,76 @@ if(NOT MINGW)
)
endif()

if(CMAKE_COMPILER_IS_GNUCC)

# test to see if we can use a GNU indirect function to detect and load optimized code at runtime
CHECK_C_SOURCE_COMPILES("
static int test_ifunc_native(void)
{
return 1;
}
static int (*(check_ifunc_native(void)))(void)
{
return test_ifunc_native;
}
int test_ifunc(void) __attribute__ ((ifunc (\"check_ifunc_native\")));
int main(void)
{
return 0;
}
" HAS_C_ATTR_IFUNC)

if(HAS_C_ATTR_IFUNC)
add_definitions(-DHAVE_IFUNC)
set(ZLIB_PRIVATE_HDRS ${ZLIB_PRIVATE_HDRS} contrib/gcc/zifunc.h)
endif()

if(POWER)
# Test to see if we can use the optimizations for Power
CHECK_C_SOURCE_COMPILES("
#ifndef _ARCH_PPC
#error \"Target is not Power\"
#endif
#ifndef __BUILTIN_CPU_SUPPORTS__
#error \"Target doesn't support __builtin_cpu_supports()\"
#endif
int main() { return 0; }
" HAS_POWER_SUPPORT)

if(HAS_POWER_SUPPORT AND HAS_C_ATTR_IFUNC)
add_definitions(-DZ_POWER_OPT)

set(CMAKE_REQUIRED_FLAGS -mcpu=power8)
CHECK_C_SOURCE_COMPILES("int main(void){return 0;}" POWER8)

if(POWER8)
add_definitions(-DZ_POWER8)
set(ZLIB_POWER8
contrib/power/crc32_z_power8.c)

set_source_files_properties(
${ZLIB_POWER8}
PROPERTIES COMPILE_FLAGS -mcpu=power8)
endif()

set(CMAKE_REQUIRED_FLAGS -mcpu=power9)
CHECK_C_SOURCE_COMPILES("int main(void){return 0;}" POWER9)

if(POWER9)
add_definitions(-DZ_POWER9)
set(ZLIB_POWER9 )

set_source_files_properties(
${ZLIB_POWER9}
PROPERTIES COMPILE_FLAGS -mcpu=power9)
endif()

set(ZLIB_PRIVATE_HDRS ${ZLIB_PRIVATE_HDRS} contrib/power/power.h)
set(ZLIB_SRCS ${ZLIB_SRCS} ${ZLIB_POWER8} ${ZLIB_POWER9})
endif()
endif()
endif()

# parse the full version number from zlib.h and include in ZLIB_FULL_VERSION
file(READ ${CMAKE_CURRENT_SOURCE_DIR}/zlib.h _zlib_h_contents)
string(REGEX REPLACE ".*#define[ \t]+ZLIB_VERSION[ \t]+\"([-0-9A-Za-z.]+)\".*"
Expand Down Expand Up @@ -198,6 +270,10 @@ add_executable(example test/example.c)
target_link_libraries(example zlib)
add_test(example example)

add_executable(crc32_test test/crc32_test.c)
target_link_libraries(crc32_test zlib)
add_test(crc32_test crc32_test)

add_executable(minigzip test/minigzip.c)
target_link_libraries(minigzip zlib)

Expand Down
43 changes: 33 additions & 10 deletions Makefile.in
Original file line number Diff line number Diff line change
Expand Up @@ -71,19 +71,19 @@ PIC_OBJS = $(PIC_OBJC) $(PIC_OBJA)

all: static shared

static: example$(EXE) minigzip$(EXE)
static: crc32_test$(EXE) example$(EXE) minigzip$(EXE)

shared: examplesh$(EXE) minigzipsh$(EXE)
shared: crc32_testsh$(EXE) examplesh$(EXE) minigzipsh$(EXE)

all64: example64$(EXE) minigzip64$(EXE)
all64: crc32_test64$(EXE) example64$(EXE) minigzip64$(EXE)

check: test

test: all teststatic testshared

teststatic: static
@TMPST=tmpst_$$; \
if echo hello world | ${QEMU_RUN} ./minigzip | ${QEMU_RUN} ./minigzip -d && ${QEMU_RUN} ./example $$TMPST ; then \
if echo hello world | ${QEMU_RUN} ./minigzip | ${QEMU_RUN} ./minigzip -d && ${QEMU_RUN} ./example $$TMPST && ${QEMU_RUN} ./crc32_test; then \
echo ' *** zlib test OK ***'; \
else \
echo ' *** zlib test FAILED ***'; false; \
Expand All @@ -96,7 +96,7 @@ testshared: shared
DYLD_LIBRARY_PATH=`pwd`:$(DYLD_LIBRARY_PATH) ; export DYLD_LIBRARY_PATH; \
SHLIB_PATH=`pwd`:$(SHLIB_PATH) ; export SHLIB_PATH; \
TMPSH=tmpsh_$$; \
if echo hello world | ${QEMU_RUN} ./minigzipsh | ${QEMU_RUN} ./minigzipsh -d && ${QEMU_RUN} ./examplesh $$TMPSH; then \
if echo hello world | ${QEMU_RUN} ./minigzipsh | ${QEMU_RUN} ./minigzipsh -d && ${QEMU_RUN} ./examplesh $$TMPSH && ${QEMU_RUN} ./crc32_testsh; then \
echo ' *** zlib shared test OK ***'; \
else \
echo ' *** zlib shared test FAILED ***'; false; \
Expand All @@ -105,7 +105,7 @@ testshared: shared

test64: all64
@TMP64=tmp64_$$; \
if echo hello world | ${QEMU_RUN} ./minigzip64 | ${QEMU_RUN} ./minigzip64 -d && ${QEMU_RUN} ./example64 $$TMP64; then \
if echo hello world | ${QEMU_RUN} ./minigzip64 | ${QEMU_RUN} ./minigzip64 -d && ${QEMU_RUN} ./example64 $$TMP64 && ${QEMU_RUN} ./crc32_test64; then \
echo ' *** zlib 64-bit test OK ***'; \
else \
echo ' *** zlib 64-bit test FAILED ***'; false; \
Expand Down Expand Up @@ -139,12 +139,18 @@ match.lo: match.S
mv _match.o match.lo
rm -f _match.s

crc32_test.o: $(SRCDIR)test/crc32_test.c $(SRCDIR)zlib.h zconf.h
$(CC) $(CFLAGS) $(ZINCOUT) -c -o $@ $(SRCDIR)test/crc32_test.c

example.o: $(SRCDIR)test/example.c $(SRCDIR)zlib.h zconf.h
$(CC) $(CFLAGS) $(ZINCOUT) -c -o $@ $(SRCDIR)test/example.c

minigzip.o: $(SRCDIR)test/minigzip.c $(SRCDIR)zlib.h zconf.h
$(CC) $(CFLAGS) $(ZINCOUT) -c -o $@ $(SRCDIR)test/minigzip.c

crc32_test64.o: $(SRCDIR)test/crc32_test.c $(SRCDIR)zlib.h zconf.h
$(CC) $(CFLAGS) $(ZINCOUT) -D_FILE_OFFSET_BITS=64 -c -o $@ $(SRCDIR)test/crc32_test.c

example64.o: $(SRCDIR)test/example.c $(SRCDIR)zlib.h zconf.h
$(CC) $(CFLAGS) $(ZINCOUT) -D_FILE_OFFSET_BITS=64 -c -o $@ $(SRCDIR)test/example.c

Expand All @@ -158,6 +164,9 @@ adler32.o: $(SRCDIR)adler32.c
crc32.o: $(SRCDIR)crc32.c
$(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)crc32.c

crc32_z_power8.o: $(SRCDIR)contrib/power/crc32_z_power8.c
$(CC) $(CFLAGS) -mcpu=power8 $(ZINC) -c -o $@ $(SRCDIR)contrib/power/crc32_z_power8.c

deflate.o: $(SRCDIR)deflate.c
$(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)deflate.c

Expand Down Expand Up @@ -208,6 +217,11 @@ crc32.lo: $(SRCDIR)crc32.c
$(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/crc32.o $(SRCDIR)crc32.c
-@mv objs/crc32.o $@

crc32_z_power8.lo: $(SRCDIR)contrib/power/crc32_z_power8.c
-@mkdir objs 2>/dev/null || test -d objs
$(CC) $(SFLAGS) -mcpu=power8 $(ZINC) -DPIC -c -o objs/crc32_z_power8.o $(SRCDIR)contrib/power/crc32_z_power8.c
-@mv objs/crc32_z_power8.o $@

deflate.lo: $(SRCDIR)deflate.c
-@mkdir objs 2>/dev/null || test -d objs
$(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/deflate.o $(SRCDIR)deflate.c
Expand Down Expand Up @@ -281,18 +295,27 @@ placebo $(SHAREDLIBV): $(PIC_OBJS) libz.a
ln -s $@ $(SHAREDLIBM)
-@rmdir objs

crc32_test$(EXE): crc32_test.o $(STATICLIB)
$(CC) $(CFLAGS) -o $@ crc32_test.o $(TEST_LDFLAGS)

example$(EXE): example.o $(STATICLIB)
$(CC) $(CFLAGS) -o $@ example.o $(TEST_LDFLAGS)

minigzip$(EXE): minigzip.o $(STATICLIB)
$(CC) $(CFLAGS) -o $@ minigzip.o $(TEST_LDFLAGS)

crc32_testsh$(EXE): crc32_test.o $(SHAREDLIBV)
$(CC) $(CFLAGS) -o $@ crc32_test.o -L. $(SHAREDLIBV)

examplesh$(EXE): example.o $(SHAREDLIBV)
$(CC) $(CFLAGS) -o $@ example.o $(LDFLAGS) -L. $(SHAREDLIBV)

minigzipsh$(EXE): minigzip.o $(SHAREDLIBV)
$(CC) $(CFLAGS) -o $@ minigzip.o $(LDFLAGS) -L. $(SHAREDLIBV)

crc32_test64$(EXE): crc32_test64.o $(STATICLIB)
$(CC) $(CFLAGS) -o $@ crc32_test64.o $(TEST_LDFLAGS)

example64$(EXE): example64.o $(STATICLIB)
$(CC) $(CFLAGS) -o $@ example64.o $(TEST_LDFLAGS)

Expand Down Expand Up @@ -368,8 +391,8 @@ minizip-clean:
mostlyclean: clean
clean: minizip-clean
rm -f *.o *.lo *~ \
example$(EXE) minigzip$(EXE) examplesh$(EXE) minigzipsh$(EXE) \
example64$(EXE) minigzip64$(EXE) \
crc32_test$(EXE) example$(EXE) minigzip$(EXE) crc32_testsh$(EXE) examplesh$(EXE) minigzipsh$(EXE) \
crc32_test64$(EXE) example64$(EXE) minigzip64$(EXE) \
infcover \
libz.* foo.gz so_locations \
_match.s maketree contrib/infback9/*.o
Expand All @@ -391,7 +414,7 @@ tags:

adler32.o zutil.o: $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h
gzclose.o gzlib.o gzread.o gzwrite.o: $(SRCDIR)zlib.h zconf.h $(SRCDIR)gzguts.h
compress.o example.o minigzip.o uncompr.o: $(SRCDIR)zlib.h zconf.h
compress.o crc32_test.o example.o minigzip.o uncompr.o: $(SRCDIR)zlib.h zconf.h
crc32.o: $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h $(SRCDIR)crc32.h
deflate.o: $(SRCDIR)deflate.h $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h
infback.o inflate.o: $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h $(SRCDIR)inftrees.h $(SRCDIR)inflate.h $(SRCDIR)inffast.h $(SRCDIR)inffixed.h
Expand All @@ -401,7 +424,7 @@ trees.o: $(SRCDIR)deflate.h $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h $(SRCDIR)tr

adler32.lo zutil.lo: $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h
gzclose.lo gzlib.lo gzread.lo gzwrite.lo: $(SRCDIR)zlib.h zconf.h $(SRCDIR)gzguts.h
compress.lo example.lo minigzip.lo uncompr.lo: $(SRCDIR)zlib.h zconf.h
compress.lo crc32_test.lo example.lo minigzip.lo uncompr.lo: $(SRCDIR)zlib.h zconf.h
crc32.lo: $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h $(SRCDIR)crc32.h
deflate.lo: $(SRCDIR)deflate.h $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h
infback.lo inflate.lo: $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h $(SRCDIR)inftrees.h $(SRCDIR)inflate.h $(SRCDIR)inffast.h $(SRCDIR)inffixed.h
Expand Down
69 changes: 69 additions & 0 deletions configure
Original file line number Diff line number Diff line change
Expand Up @@ -834,6 +834,75 @@ EOF
fi
fi

# test to see if we can use a gnu indirection function to detect and load optimized code at runtime
echo >> configure.log
cat > $test.c <<EOF
static int test_ifunc_native(void)
{
return 1;
}

static int (*(check_ifunc_native(void)))(void)
{
return test_ifunc_native;
}

int test_ifunc(void) __attribute__ ((ifunc ("check_ifunc_native")));
EOF

if tryboth $CC -c $CFLAGS $test.c; then
SFLAGS="${SFLAGS} -DHAVE_IFUNC"
CFLAGS="${CFLAGS} -DHAVE_IFUNC"
echo "Checking for attribute(ifunc) support... Yes." | tee -a configure.log
else
echo "Checking for attribute(ifunc) support... No." | tee -a configure.log
fi

# Test to see if we can use the optimizations for Power
echo >> configure.log
cat > $test.c <<EOF
#ifndef _ARCH_PPC
#error "Target is not Power"
#endif
#if !(defined(__PPC64__) || defined(__powerpc64__))
#error "Target is not 64 bits"
#endif
#ifndef HAVE_IFUNC
#error "Target doesn't support ifunc"
#endif
#ifndef __BUILTIN_CPU_SUPPORTS__
#error "Target doesn't support __builtin_cpu_supports()"
#endif
EOF

if tryboth $CC -c $CFLAGS $test.c; then
echo "int main(void){return 0;}" > $test.c

if tryboth $CC -c $CFLAGS -mcpu=power8 $test.c; then
POWER8="-DZ_POWER8"
PIC_OBJC="${PIC_OBJC} crc32_z_power8.lo"
OBJC="${OBJC} crc32_z_power8.o"
echo "Checking for -mcpu=power8 support... Yes." | tee -a configure.log
else
echo "Checking for -mcpu=power8 support... No." | tee -a configure.log
fi

if tryboth $CC -c $CFLAGS -mcpu=power9 $test.c; then
POWER9="-DZ_POWER9"
PIC_OBJC="${PIC_OBJC}"
OBJC="${OBJC}"
echo "Checking for -mcpu=power9 support... Yes." | tee -a configure.log
else
echo "Checking for -mcpu=power9 support... No." | tee -a configure.log
fi

SFLAGS="${SFLAGS} ${POWER8} ${POWER9} -DZ_POWER_OPT"
CFLAGS="${CFLAGS} ${POWER8} ${POWER9} -DZ_POWER_OPT"
echo "Checking for Power optimizations support... Yes." | tee -a configure.log
else
echo "Checking for Power optimizations support... No." | tee -a configure.log
fi

# show the results in the log
echo >> configure.log
echo ALL = $ALL >> configure.log
Expand Down
9 changes: 9 additions & 0 deletions contrib/README.contrib
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@ ada/ by Dmitriy Anisimkov <[email protected]>
blast/ by Mark Adler <[email protected]>
Decompressor for output of PKWare Data Compression Library (DCL)

gcc/ by Matheus Castanho <[email protected]>
and Rogerio Alves <[email protected]>
Optimization helpers using GCC-specific extensions

delphi/ by Cosmin Truta <[email protected]>
Support for Delphi and C++ Builder

Expand Down Expand Up @@ -42,6 +46,11 @@ minizip/ by Gilles Vollant <[email protected]>
pascal/ by Bob Dellaca <[email protected]> et al.
Support for Pascal

power/ by Daniel Black <[email protected]>
Matheus Castanho <[email protected]>
and Rogerio Alves <[email protected]>
Optimized functions for Power processors

puff/ by Mark Adler <[email protected]>
Small, low memory usage inflate. Also serves to provide an
unambiguous description of the deflate format.
Expand Down
Loading