From 9504fa1af1a4a2e612f3206a371b20c73252f0e8 Mon Sep 17 00:00:00 2001 From: hanshenrik Date: Sun, 4 Feb 2024 10:01:24 +0100 Subject: [PATCH] php patches + optimized checkout patches to specifically address a gcc -Werror=logical-op issue explained in https://github.com/BLAKE3-team/BLAKE3/pull/380 and a gcc -Wunused-function issue explained in https://github.com/BLAKE3-team/BLAKE3/pull/382 and optimized upstream git checkout to only fetch the files we want. --- ext/hash/blake3/fetch_upstream_blake3.sh | 15 +++-- ext/hash/blake3/patches.diff | 56 +++++++++++++++++++ ext/hash/blake3/upstream_blake3/c/blake3.c | 16 +++--- .../upstream_blake3/c/blake3_dispatch.c | 3 +- 4 files changed, 77 insertions(+), 13 deletions(-) create mode 100644 ext/hash/blake3/patches.diff diff --git a/ext/hash/blake3/fetch_upstream_blake3.sh b/ext/hash/blake3/fetch_upstream_blake3.sh index 7f68c74613b7d..e8d9445d133c7 100755 --- a/ext/hash/blake3/fetch_upstream_blake3.sh +++ b/ext/hash/blake3/fetch_upstream_blake3.sh @@ -2,12 +2,17 @@ # afaik the PHP project doesn't allow git submodules, so we do this fetcher script instead. cd "$(dirname "$0")" rm -rf upstream_blake3 -# fancy way of just fetching the "c" folder (the only thing we want) +# fancy way of fetching only the files we want git clone --branch '1.5.0' -n --depth=1 --filter=tree:0 'https://github.com/BLAKE3-team/BLAKE3.git' 'upstream_blake3' cd upstream_blake3 -git sparse-checkout set --no-cone c LICENSE +git sparse-checkout set --no-cone LICENSE c/blake3.c c/blake3.h c/blake3_avx2.c c/blake3_avx2_x86-64_unix.S \ + c/blake3_avx2_x86-64_windows_gnu.S c/blake3_avx2_x86-64_windows_msvc.asm c/blake3_avx512.c \ + c/blake3_avx512_x86-64_unix.S c/blake3_avx512_x86-64_windows_gnu.S c/blake3_avx512_x86-64_windows_msvc.asm \ + c/blake3_dispatch.c c/blake3_impl.h c/blake3_neon.c c/blake3_portable.c c/blake3_sse2.c \ + c/blake3_sse2_x86-64_unix.S c/blake3_sse2_x86-64_windows_gnu.S c/blake3_sse2_x86-64_windows_msvc.asm \ + c/blake3_sse41.c c/blake3_sse41_x86-64_unix.S c/blake3_sse41_x86-64_windows_gnu.S \ + c/blake3_sse41_x86-64_windows_msvc.asm git checkout rm -rf .git -cd c -# some stuff we don't need -rm -rf blake3_c_rust_bindings test.py example.c main.c Makefile.testing CMakeLists.txt blake3-config.cmake.in README.md .gitignore libblake3.pc.in +cd .. +git apply patches.diff diff --git a/ext/hash/blake3/patches.diff b/ext/hash/blake3/patches.diff new file mode 100644 index 0000000000000..d27e1870c46c0 --- /dev/null +++ b/ext/hash/blake3/patches.diff @@ -0,0 +1,56 @@ +diff --git a/ext/hash/blake3/upstream_blake3/c/blake3.c b/ext/hash/blake3/upstream_blake3/c/blake3.c +index 692f4b0216..3591ba245a 100644 +--- a/ext/hash/blake3/upstream_blake3/c/blake3.c ++++ b/ext/hash/blake3/upstream_blake3/c/blake3.c +@@ -341,21 +341,23 @@ INLINE void compress_subtree_to_parent_node( + size_t num_cvs = blake3_compress_subtree_wide(input, input_len, key, + chunk_counter, flags, cv_array); + assert(num_cvs <= MAX_SIMD_DEGREE_OR_2); +- +- // If MAX_SIMD_DEGREE is greater than 2 and there's enough input, +- // compress_subtree_wide() returns more than 2 chaining values. Condense +- // them into 2 by forming parent nodes repeatedly. +- uint8_t out_array[MAX_SIMD_DEGREE_OR_2 * BLAKE3_OUT_LEN / 2]; +- // The second half of this loop condition is always true, and we just ++ // https://github.com/BLAKE3-team/BLAKE3/pull/380 ++ // This condition is always true, and we just + // asserted it above. But GCC can't tell that it's always true, and if NDEBUG + // is set on platforms where MAX_SIMD_DEGREE_OR_2 == 2, GCC emits spurious + // warnings here. GCC 8.5 is particularly sensitive, so if you're changing + // this code, test it against that version. +- while (num_cvs > 2 && num_cvs <= MAX_SIMD_DEGREE_OR_2) { ++#if MAX_SIMD_DEGREE_OR_2 > 2 ++ // If MAX_SIMD_DEGREE_OR_2 is greater than 2 and there's enough input, ++ // compress_subtree_wide() returns more than 2 chaining values. Condense ++ // them into 2 by forming parent nodes repeatedly. ++ uint8_t out_array[MAX_SIMD_DEGREE_OR_2 * BLAKE3_OUT_LEN / 2]; ++ while (num_cvs > 2) { + num_cvs = + compress_parents_parallel(cv_array, num_cvs, key, flags, out_array); + memcpy(cv_array, out_array, num_cvs * BLAKE3_OUT_LEN); + } ++#endif + memcpy(out, cv_array, 2 * BLAKE3_OUT_LEN); + } + +diff --git a/ext/hash/blake3/upstream_blake3/c/blake3_dispatch.c b/ext/hash/blake3/upstream_blake3/c/blake3_dispatch.c +index af6c3dadc7..af3bf17bbe 100644 +--- a/ext/hash/blake3/upstream_blake3/c/blake3_dispatch.c ++++ b/ext/hash/blake3/upstream_blake3/c/blake3_dispatch.c +@@ -86,7 +86,6 @@ static void cpuidex(uint32_t out[4], uint32_t id, uint32_t sid) { + #endif + } + +-#endif + + enum cpu_feature { + SSE2 = 1 << 0, +@@ -161,6 +160,8 @@ static + #endif + } + } ++// https://github.com/BLAKE3-team/BLAKE3/pull/382 ++#endif + + void blake3_compress_in_place(uint32_t cv[8], + const uint8_t block[BLAKE3_BLOCK_LEN], diff --git a/ext/hash/blake3/upstream_blake3/c/blake3.c b/ext/hash/blake3/upstream_blake3/c/blake3.c index 692f4b0216485..3591ba245a3b2 100644 --- a/ext/hash/blake3/upstream_blake3/c/blake3.c +++ b/ext/hash/blake3/upstream_blake3/c/blake3.c @@ -341,21 +341,23 @@ INLINE void compress_subtree_to_parent_node( size_t num_cvs = blake3_compress_subtree_wide(input, input_len, key, chunk_counter, flags, cv_array); assert(num_cvs <= MAX_SIMD_DEGREE_OR_2); - - // If MAX_SIMD_DEGREE is greater than 2 and there's enough input, - // compress_subtree_wide() returns more than 2 chaining values. Condense - // them into 2 by forming parent nodes repeatedly. - uint8_t out_array[MAX_SIMD_DEGREE_OR_2 * BLAKE3_OUT_LEN / 2]; - // The second half of this loop condition is always true, and we just + // https://github.com/BLAKE3-team/BLAKE3/pull/380 + // This condition is always true, and we just // asserted it above. But GCC can't tell that it's always true, and if NDEBUG // is set on platforms where MAX_SIMD_DEGREE_OR_2 == 2, GCC emits spurious // warnings here. GCC 8.5 is particularly sensitive, so if you're changing // this code, test it against that version. - while (num_cvs > 2 && num_cvs <= MAX_SIMD_DEGREE_OR_2) { +#if MAX_SIMD_DEGREE_OR_2 > 2 + // If MAX_SIMD_DEGREE_OR_2 is greater than 2 and there's enough input, + // compress_subtree_wide() returns more than 2 chaining values. Condense + // them into 2 by forming parent nodes repeatedly. + uint8_t out_array[MAX_SIMD_DEGREE_OR_2 * BLAKE3_OUT_LEN / 2]; + while (num_cvs > 2) { num_cvs = compress_parents_parallel(cv_array, num_cvs, key, flags, out_array); memcpy(cv_array, out_array, num_cvs * BLAKE3_OUT_LEN); } +#endif memcpy(out, cv_array, 2 * BLAKE3_OUT_LEN); } diff --git a/ext/hash/blake3/upstream_blake3/c/blake3_dispatch.c b/ext/hash/blake3/upstream_blake3/c/blake3_dispatch.c index af6c3dadc7bbf..af3bf17bbe770 100644 --- a/ext/hash/blake3/upstream_blake3/c/blake3_dispatch.c +++ b/ext/hash/blake3/upstream_blake3/c/blake3_dispatch.c @@ -86,7 +86,6 @@ static void cpuidex(uint32_t out[4], uint32_t id, uint32_t sid) { #endif } -#endif enum cpu_feature { SSE2 = 1 << 0, @@ -161,6 +160,8 @@ static #endif } } +// https://github.com/BLAKE3-team/BLAKE3/pull/382 +#endif void blake3_compress_in_place(uint32_t cv[8], const uint8_t block[BLAKE3_BLOCK_LEN],