From c86802e9da424bfba3511d93a5c6344de4f2ab2f Mon Sep 17 00:00:00 2001 From: Dylan Modesitt Date: Sat, 17 Sep 2022 19:57:17 -0400 Subject: [PATCH 01/19] CityHash128 (wip) --- src/CityHash128.jl | 317 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 317 insertions(+) create mode 100644 src/CityHash128.jl diff --git a/src/CityHash128.jl b/src/CityHash128.jl new file mode 100644 index 0000000..161f71b --- /dev/null +++ b/src/CityHash128.jl @@ -0,0 +1,317 @@ +""" +A (pure Julia) implementation of the version of CityHash used by ClickHouse. +ClickHouse server comes built-in with an old version of this algorithm - so +the implementation below is not a port of the currently ordained CityHash, but +rather the one required for the transport-compression protocol(s) in ClickHouse. + +This is a fairly literal translation of the C source used in +[clickhouse-cityhash](https://github.com/xzkostyan/clickhouse-cityhash). +""" + +# Some primes between 2^63 and 2^64 for various uses. +const k0::UInt64 = 0xc3a5c85c97cb3127 +const k1::UInt64 = 0xb492b66fbe98f273 +const k2::UInt64 = 0x9ae16a3b2f90404f +const k3::UInt64 = 0xc949d7c7509e6557 + +u8(val) = val % UInt8 +u32(val) = val % UInt32 +u64(val) = val % UInt64 +u128(val) = val % UInt128 + +# Avoid shifting by 64: doing so yields an undefined result. +@inline function rotate(val::UInt64, shift::Int) + return shift == 0 ? val : ((val >> shift) | (val << (64 - shift))) +end + +## Equivalent to Rotate(), but requires the second arg to be non-zero. +@inline function rotate_by_at_least1(val::UInt64, shift::Int) + return (val >> shift) | (val << (64 - shift)) +end + +@inline function shift_mix(val::UInt64) + return val ⊻ (val >> 47) +end + +@inline low64(x::UInt128)::UInt64 = u64(x) +@inline high64(x::UInt128)::UInt64 = u64(x >> 64) +@inline UInt128(x::UInt64, y::UInt64) = (u128(y) << 64) + u128(x) + +""" +Hash 128 input bits down to 64 bits of output. +This is intended to be a reasonably good hash function. +""" +@inline function hash_128_to_64(x::UInt128) + ## Murmur-inspired hashing. + kMul::UInt64 = 0x9ddfea08eb382d69 + a = (low64(x) ⊻ high64(x)) * kMul + a ⊻= (a >> 47) + b = (high64(x) ⊻ a) * kMul + b ⊻= (b >> 47) + b *= kMul + return b +end + +hash_len_16(u::UInt64, v::UInt64)::UInt64 = hash_128_to_64(UInt128(u, v)) +reinterpret_first(type, A) = reinterpret(type, A)[begin] + +@views function fetch64(s::AbstractArray{})::UInt64 + reinterpret_first(UInt64, s[begin:8]) +end + +@views function fetch64(s::AbstractArray{}, start::Integer)::UInt64 + reinterpret_first(UInt64, s[start:start+7]) +end + +@views function fetch32(s::AbstractArray{})::UInt32 + reinterpret_first(UInt32, s[begin:4]) +end + +@views function fetch32(s::AbstractArray{}, start::Integer)::UInt32 + reinterpret_first(UInt32, s[start:start+3]) +end + +@views function hash_len_0_to_16(s::Vector{UInt8}, len::UInt)::UInt64 + if len > 8 + a = fetch64(s) + b = fetch64(s, len - 7) + return hash_len_16(a, rotate_by_at_least1(b + u64(len), len)) ⊻ b + end + if len >= 4 + a = u64(fetch32(s)) + b = u64(fetch32(s, len - 3)) + return hash_len_16(len + (a << 3), b) + end + if len > 0 + a = s[1] + b = s[len>>1+1] + c = s[len] + y = u32(a) + u32(b) << 8 + z = len + u32(c) << 2 + return shift_mix(y * k2 ⊻ z * k3) * k2 + end +end + +function hash_len_17_to_32(s::Vector{UInt8}, len::UInt)::UInt64 + a = fetch64(s) * k1 + b = fetch64(s, 9) + c = fetch64(s, len - 7) * k2 + d = fetch64(s, len - 15) * k0 + return hash_len_16(rotate(a - b, 43) + rotate(c, 30) + d, + a + rotate(b ⊻ k3, 20) - c + len) +end + +function weak_hash_len32_with_seeds(w::UInt64, x::UInt64, y::UInt64, + z::UInt64, a::UInt64, b::UInt64) + a += w + b = rotate(b + a + z, 21) + c = a + a += x + a += y + b += rotate(a, 44) + return (a + z, b + c) +end + +function weak_hash_len32_with_seeds(s::AbstractArray{}, a::UInt64, b::UInt64) + return weak_hash_len32_with_seeds( + fetch64(s), + fetch64(s, 9), + fetch64(s, 17), + fetch64(s, 25), + a, + b + ) +end + +@views function hash_len_33_to_64(s::Vector{UInt8}, len::UInt) + z = fetch64(s, 25) + a = fetch64(s) + (len + fetch64(s, len - 15)) * k0 + b = rotate(a + z, 52) + c = rotate(a, 37) + a += fetch64(s, 9) + c += rotate(a, 7) + vf = a + z + vs = b + rotate(a, 31) + c + a = fetch64(s, 17) + fetch64(s, len - 31) + z = fetch64(s, len - 7) + b = rotate(a + z, 52) + c = rotate(a, 37) + a += fetch64(s, len - 23) + c += rotate(a, 7) + a += fetch64(s, len - 15) + wf = a + z + ws = b + rotate(a, 31) + c + r = shift_mix((vf + ws) * k2 + (wf + vs) * k0)l + return shift_mix(r * k0 + vs) * k2 +end + +@views function city_hash_64(s::Vector{UInt8}, len::UInt) + if (len <= 32) + if len < 16 + return hash_len_0_to_16(s, len) + else + return hash_len_17_to_32(s, len) + end + elseif (len <= 64) + return hash_len_33_to_64(s, len) + end + + ## For strings over 64 bytes we hash the end first, and then as we + ## loop we keep 56 bytes of state: v, w, x, y, and z. + x = fetch64(s) + y = fetch64(s, len - 15) ⊻ k1 + z = fetch64(s, len - 55) ⊻ k0 + v = weak_hash_len32_with_seeds(s[len-63:len], len, y) + w = weak_hash_len32_with_seeds(s[len-31:len], len * k1, k0) + z += shift_mix(v[2]) * k1 + x = rotate(z + x, 39) * k1 + y = rotate(y, 33) * k1 + + ## Decrease len to the nearest multiple of 64 and operate on 64-byte chunks. + len = (len - 1) & ~UInt(63) + while true + x = rotate(x + y + v[1] + fetch64(s, 17), 37) * k1 + y = rotate(y + v[2] + fetch64(s, 49), 42) * k1 + x ⊻= w[2] + y ⊻= v[1] + z = rotate(z ⊻ w[1], 33) + v = weak_hash_len32_with_seeds(s, v[2] * k1, x + w[1]) + w = weak_hash_len32_with_seeds(s[33:end], z + w[2], y) + + x, z = z, x + s = s[65:end] + len -= 64 + len != 0 || break + end + return hash_len_16(hash_len_16(v[1], w[1]) + shift_mix(y) * k1 + z, + hash_len_16(v[2], w[2]) + x) +end + +function city_hash_64(s::Vector{UInt8}, len::UInt, seed::UInt) + return city_hash_64(s, len, k2, seed) +end + +function city_hash_64(s::Vector{UInt8}, len::UInt, seed0::UInt, seed1::UInt) + return hash_len_16(city_hash_64(s, len) - seed0, seed1) +end + +""" +A subroutine for CityHash128(). Returns a decent 128-bit hash for strings +of any length representable in signed long. Based on City and Murmur. +""" +@views function city_murmor(s::Vector{UInt8}, len::UInt, seed::UInt128)::UInt128 + a = low64(seed) + b = high64(seed) + c = 0 + d = 0 + l = Int64(len - 16) + if l <= 0 + a = shift_mix(a * k1) * k1 + c = b * k1 + hash_len_0_to_16(s, len) + d = shift_mix(a + (len >= 8 ? fetch64(s) : c)) + else # len > 16 + c = hash_len_16(fetch64(s, len - 7) + k1, a) + d = hash_len_16(b + len, c + fetch64(s, len - 15)) + a += d + while true + a ⊻= shift_mix(fetch64(s) * k1) * k1 + a *= k1 + b ⊻= b + c ⊻= shift_mix(fetch64(s, 9) * k1) * k1 + c *= k1 + d ⊻= c + s += 16 + l -= 16 + l > 0 || break + end + end + a = hash_len_16(a, c) + b = hash_len_16(d, b) + return UInt128(a ⊻ b, hash_len_16(b, a)) +end + +@views function city_hash_128_with_seed(s::Vector{UInt8}, len::UInt, seed::UInt128):::UInt128 + if len < 128 + return city_murmor(s, len, seed) + end + s_og, len_og = s, length(s) # backtracking can occur + + # We expect len >= 128 to be the common case. Keep 56 bytes of state: + # v, w, x, y, and z. + v = [u64(0), u64(0)] + w = [u64(0), u64(0)] + x = low64(seed) + y = high64(seed) + z = len * k1 + v[1] = rotate(y ⊻ k1, 49) * k1 + fetch64(s) + v[2] = rotate(v[1], 42) * k1 + fetch64(s, 9) + w[1] = rotate(y + z, 35) * k1 + x + w[2] = rotate(x + fetch64(s, 89), 53) * k1 + + # This is the same inner loop as CityHash64, manually unrolled + while true + x = rotate(x + y + v[1] + fetch64(s, 17), 37) * k1 + y = rotate(y + v[2] + fetch64(s, 49), 42) * k1 + x ⊻= w[2] + y ⊻= v[1] + z = rotate(z ⊻ w[1], 33) + v = weak_hash_len32_with_seeds(s, v[2] * k1, x + w[1]) + w = weak_hash_len32_with_seeds(s[33:end], z + w[2], y) + x, z = z, x + s = s[65:end] + + x = rotate(x + y + v[1] + fetch64(s, 17), 37) * k1 + y = rotate(y + v[2] + fetch64(s, 49), 42) * k1 + x ⊻= w[2] + y ⊻= v[1] + z = rotate(z ⊻ w[1], 33) + v = weak_hash_len32_with_seeds(s, v[2] * k1, x + w[1]) + w = weak_hash_len32_with_seeds(s[33:end], z + w[2], y) + x, z = z, x + s = s[65:end] + + len -= 128 + len >= 128 || break + end + y += rotate(w[1], 37) * k0 + z + x += rotate(v[1] + z, 49) * k0 + + # If 0 < len < 128, hash up to 4 chunks of 32 bytes each from the end of s. + tail_done = 0 + while tail_done < len + tail_done += 32 + y = rotate(y - x, 42) * k0 + v[2] + w = (w[1] + fetch64(s_og, len_og - tail_done + 17), w[2]) + x = rotate(x, 49) * k0 + w[1] + w = (w[1] + v[1], w[2]) + v = weak_hash_len32_with_seeds(s_og[len_og-tail_done+1:end], v[1], v[2]) + end + + # At this point our 48 bytes of state should contain more than + # enough information for a strong 128-bit hash. We use two + # different 48-byte-to-8-byte hashes to get a 16-byte final result. + x = hash_len_16(x, v[1]) + y = hash_len_16(y, w[1]) + return UInt128(hash_len_16(x + v[2], w[2]) + y, + hash_len_16(x + w[2], y + v[2])) +end + +@views function city_hash_128(s::Vector{UInt8}, len::UInt)::UInt128 + if len >= 16 + return city_hash_128_with_seed(s[17:end], + len - 16, + UInt128(fetch64(s) ⊻ k3, fetch64(s[9:16])) + ) + elseif len >= 8 + return city_hash_128_with_seed( + [], 0, UInt128(fetch64(s) ⊻ (len * k0), fetch64(s[len-7:len]) ⊻ k1) + ) + else + return city_hash_128_with_seed(s, len, UInt128(k0, k1)) + end +end + +function city_hash_128(s::String)::UInt128 + data = Vector{UInt8}(s) + city_hash_128(data, length(data)) +end From 74d93a61c4a9146b853ee728af66e577f51916ca Mon Sep 17 00:00:00 2001 From: Dylan Modesitt Date: Mon, 19 Sep 2022 17:17:06 -0400 Subject: [PATCH 02/19] add cityhash tests (and fix resulting bugs) --- src/CityHash128.jl | 51 +- test/ch/fw_ch128_key.txt | 1706 ++++++++++++++++++++++++++++++++++++++ test/cityhash.jl | 66 ++ 3 files changed, 1802 insertions(+), 21 deletions(-) create mode 100644 test/ch/fw_ch128_key.txt create mode 100644 test/cityhash.jl diff --git a/src/CityHash128.jl b/src/CityHash128.jl index 161f71b..4c4aa44 100644 --- a/src/CityHash128.jl +++ b/src/CityHash128.jl @@ -25,17 +25,17 @@ u128(val) = val % UInt128 end ## Equivalent to Rotate(), but requires the second arg to be non-zero. -@inline function rotate_by_at_least1(val::UInt64, shift::Int) +@inline function rotate_by_at_least1(val::UInt64, shift::Integer) return (val >> shift) | (val << (64 - shift)) end -@inline function shift_mix(val::UInt64) +@inline function shift_mix(val::UInt64)::UInt64 return val ⊻ (val >> 47) end @inline low64(x::UInt128)::UInt64 = u64(x) @inline high64(x::UInt128)::UInt64 = u64(x >> 64) -@inline UInt128(x::UInt64, y::UInt64) = (u128(y) << 64) + u128(x) +@inline u128_from_pair(x::UInt64, y::UInt64)::UInt128 = (u128(y) << 64) + u128(x) """ Hash 128 input bits down to 64 bits of output. @@ -52,7 +52,7 @@ This is intended to be a reasonably good hash function. return b end -hash_len_16(u::UInt64, v::UInt64)::UInt64 = hash_128_to_64(UInt128(u, v)) +hash_len_16(u::UInt64, v::UInt64)::UInt64 = hash_128_to_64(u128_from_pair(u, v)) reinterpret_first(type, A) = reinterpret(type, A)[begin] @views function fetch64(s::AbstractArray{})::UInt64 @@ -71,7 +71,7 @@ end reinterpret_first(UInt32, s[start:start+3]) end -@views function hash_len_0_to_16(s::Vector{UInt8}, len::UInt)::UInt64 +@views function hash_len_0_to_16(s::AbstractArray{}, len::UInt)::UInt64 if len > 8 a = fetch64(s) b = fetch64(s, len - 7) @@ -90,9 +90,10 @@ end z = len + u32(c) << 2 return shift_mix(y * k2 ⊻ z * k3) * k2 end + k2 end -function hash_len_17_to_32(s::Vector{UInt8}, len::UInt)::UInt64 +function hash_len_17_to_32(s::AbstractArray{}, len::UInt)::UInt64 a = fetch64(s) * k1 b = fetch64(s, 9) c = fetch64(s, len - 7) * k2 @@ -123,13 +124,14 @@ function weak_hash_len32_with_seeds(s::AbstractArray{}, a::UInt64, b::UInt64) ) end -@views function hash_len_33_to_64(s::Vector{UInt8}, len::UInt) +@views function hash_len_33_to_64(s::AbstractArray{}, len::UInt) z = fetch64(s, 25) a = fetch64(s) + (len + fetch64(s, len - 15)) * k0 b = rotate(a + z, 52) c = rotate(a, 37) a += fetch64(s, 9) c += rotate(a, 7) + a += fetch64(s, 17) vf = a + z vs = b + rotate(a, 31) + c a = fetch64(s, 17) + fetch64(s, len - 31) @@ -141,11 +143,11 @@ end a += fetch64(s, len - 15) wf = a + z ws = b + rotate(a, 31) + c - r = shift_mix((vf + ws) * k2 + (wf + vs) * k0)l + r = shift_mix((vf + ws) * k2 + (wf + vs) * k0) return shift_mix(r * k0 + vs) * k2 end -@views function city_hash_64(s::Vector{UInt8}, len::UInt) +@views function city_hash_64(s::AbstractArray{}, len::UInt) if (len <= 32) if len < 16 return hash_len_0_to_16(s, len) @@ -195,16 +197,21 @@ function city_hash_64(s::Vector{UInt8}, len::UInt, seed0::UInt, seed1::UInt) return hash_len_16(city_hash_64(s, len) - seed0, seed1) end +function city_hash_64(s::AbstractString)::UInt64 + data = Vector{UInt8}(s) + return city_hash_64(data, UInt(length(data))) +end + """ A subroutine for CityHash128(). Returns a decent 128-bit hash for strings of any length representable in signed long. Based on City and Murmur. """ -@views function city_murmor(s::Vector{UInt8}, len::UInt, seed::UInt128)::UInt128 +@views function city_murmor(s::AbstractArray{}, len::UInt, seed::UInt128)::UInt128 a = low64(seed) b = high64(seed) c = 0 d = 0 - l = Int64(len - 16) + l = Int(len) - 16 if l <= 0 a = shift_mix(a * k1) * k1 c = b * k1 + hash_len_0_to_16(s, len) @@ -216,21 +223,21 @@ of any length representable in signed long. Based on City and Murmur. while true a ⊻= shift_mix(fetch64(s) * k1) * k1 a *= k1 - b ⊻= b + b ⊻= a c ⊻= shift_mix(fetch64(s, 9) * k1) * k1 c *= k1 d ⊻= c - s += 16 + s = s[17:end] l -= 16 l > 0 || break end end a = hash_len_16(a, c) b = hash_len_16(d, b) - return UInt128(a ⊻ b, hash_len_16(b, a)) + return u128_from_pair(a ⊻ b, hash_len_16(b, a)) end -@views function city_hash_128_with_seed(s::Vector{UInt8}, len::UInt, seed::UInt128):::UInt128 +@views function city_hash_128_with_seed(s::AbstractArray{}, len::UInt, seed::UInt128)::UInt128 if len < 128 return city_murmor(s, len, seed) end @@ -292,26 +299,28 @@ end # different 48-byte-to-8-byte hashes to get a 16-byte final result. x = hash_len_16(x, v[1]) y = hash_len_16(y, w[1]) - return UInt128(hash_len_16(x + v[2], w[2]) + y, + + return u128_from_pair(hash_len_16(x + v[2], w[2]) + y, hash_len_16(x + w[2], y + v[2])) + return t end @views function city_hash_128(s::Vector{UInt8}, len::UInt)::UInt128 if len >= 16 return city_hash_128_with_seed(s[17:end], len - 16, - UInt128(fetch64(s) ⊻ k3, fetch64(s[9:16])) + u128_from_pair(fetch64(s) ⊻ k3, fetch64(s[9:16])) ) elseif len >= 8 return city_hash_128_with_seed( - [], 0, UInt128(fetch64(s) ⊻ (len * k0), fetch64(s[len-7:len]) ⊻ k1) + Vector{UInt8}([]), UInt(0), u128_from_pair(fetch64(s) ⊻ (len * k0), fetch64(s[len-7:len]) ⊻ k1) ) else - return city_hash_128_with_seed(s, len, UInt128(k0, k1)) + return city_hash_128_with_seed(s, len, u128_from_pair(k0, k1)) end end -function city_hash_128(s::String)::UInt128 +function city_hash_128(s::AbstractString)::UInt128 data = Vector{UInt8}(s) - city_hash_128(data, length(data)) + return city_hash_128(data, UInt(length(data))) end diff --git a/test/ch/fw_ch128_key.txt b/test/ch/fw_ch128_key.txt new file mode 100644 index 0000000..5a005e4 --- /dev/null +++ b/test/ch/fw_ch128_key.txt @@ -0,0 +1,1706 @@ +4463240938071824939 +4374473821787594281 +7053907627585791678 +5387711300376061230 +17448920336343069622 +8783146992399324990 +855377389729829915 +1254411480194272554 +14596158469467002544 +5106762334925022266 +16369256012973784743 +3502334598754678128 +12543511968287337081 +15848880841708507816 +4602887543637889858 +18224469886079618893 +14964565147054796461 +16936994045072363290 +4463348257335035248 +16752442588187219629 +14609974622188200806 +11674717294784560406 +7572293062532965360 +8279462154784386050 +16334831990901063437 +13117014728749694982 +11303556460246080721 +5915605696776307538 +7099509739770695646 +9892258943508100002 +3889041731939997311 +3487894887444346828 +3266826996716336086 +12705083165671477143 +4410289230173716478 +650185838659020011 +316969900207750486 +15253606126131690946 +17272310913738213738 +10468236941597790086 +7285563370090357318 +10816109891633168401 +11168268792345582912 +6882717494489102370 +9671633896543615349 +14255411114306090086 +1374481376608514272 +2837292061530612078 +3638051728752914733 +311897013365839352 +6430201897705070740 +5761704081615130064 +10585684289499222310 +9790676770635020039 +2057833514920652169 +3798835665998919946 +16683060882456657353 +2461064443446431768 +745658207846226392 +17894784951961272024 +10158453748100150473 +15192815882491277896 +15987422632105282656 +3600136419968793779 +9643982283125122549 +6450302067072687170 +16974702583869454020 +10593284782581823296 +14969325397439904960 +7057753327702417379 +4102364261739926028 +11942831615417318371 +3533517166747755551 +18241149993068210491 +9759280147433594109 +3388036542104294250 +1426024724050988679 +14055088123683174341 +15951613073029232334 +13615458584017072500 +13996780673699313987 +3802257138318081077 +252360170077863979 +17446947084104203971 +10222944487670744072 +4967988309340032334 +209323914493615778 +2120579155633039595 +3876531354182966811 +9315916419817568706 +778083358996873830 +6849895336443750349 +14171027579861028958 +2453318776239434345 +15472128915639619704 +10038126310429092939 +18327579702583444338 +5877310430287401726 +17743446023165621996 +10991619045443879579 +1253472569560167869 +17920838515294856023 +15832167874601481831 +5031359430817221361 +7906198261896673648 +15112278412447802892 +8126969485953235798 +12329511350654531391 +6137032832409122991 +13242058244384512753 +9105106261105426365 +18041702001106384886 +10446314833287452737 +13370185175305497353 +4113271819496520550 +6849103853398182633 +9147876613595865272 +1882850494948178637 +16454907706637403612 +10729126504852034286 +2782324417212988278 +11768897783235466838 +1684109276911719605 +1539206951793571834 +606718634558674778 +6600359069872686984 +8749404258328540693 +7002447372564732959 +17215815863554617619 +2617341189086133067 +3517842320024920113 +6740675236303914492 +1570415932730136049 +2931255401596662259 +3809787777492214863 +11183581668019915760 +8789061214261019161 +1772475684607294352 +4640821916497698555 +11618848366701723139 +4648312431619731151 +5070722024101567056 +9565197192122827201 +11655614646362524051 +7037980233398558432 +4116933406138847958 +298290587684895860 +8767000584008831230 +13037699518395526027 +14888053878219037350 +1555317596061094434 +8034381985909173289 +1529492179108039534 +3754454738685785022 +10852693680304986508 +17481322528875733960 +12049970723204513408 +11970390947596141499 +6671522592648719775 +9919945305122250445 +6576475842575181741 +8370769092248699813 +6365097304136638089 +8401832087512345699 +16757883294943869514 +6872069513828645579 +3427732517441673778 +3532192825769827810 +1974159305721197628 +16420912314194252726 +6095856223403657666 +3606431192978707050 +14265537480353923574 +3074528962276250972 +12439225247110181233 +10339727337623144981 +2302332854452152604 +13380058800782820145 +17566275743593193276 +10802733043336301858 +923212651209656761 +974390238017707853 +8738355181330744734 +17997839371622959068 +221331754221938073 +6777450783040046232 +17018245620529786773 +7184171372558968788 +6319163069222938505 +8706648749034404904 +9622106191021496548 +7559606029483476455 +6609676852744667018 +4249634764144064961 +15014510959878729966 +17595535126296643606 +13423438225384478697 +6800909020659834251 +13087462800865228660 +1906176738234004361 +14799278099044812669 +11159307889513303284 +5154226553374512037 +17007128023235002407 +14596254979108333997 +4736367974720441882 +10843571017830072431 +14571717103469755078 +11260915952317375665 +6488395702177871066 +11655591499900639660 +18385333252634897524 +5204868695714301239 +15071434657933237217 +3765401973494003568 +7650585762093330634 +17139425158130502643 +13344751519501818494 +9867188188607316138 +6420529897328093629 +2223457448774132200 +16864203172353269416 +5280816799757141596 +3704090878603390536 +6916706850330743531 +12610688546159098857 +9174035515017021632 +1456427201701195048 +10336877651260931897 +12355955531472554471 +13119263089916347636 +3721483646648628700 +11556436983590819486 +8097516019107150800 +6606983078430748899 +10594336222683267144 +1909798015315854530 +9967397445419026136 +5231174573238433382 +3390111920445281562 +2071478387666043031 +3448316916181086703 +6374295463017162102 +11756234663723448588 +15261799283082032076 +4233402939758264927 +12356804186151433736 +3198001997502384936 +5673872754769833233 +7211492608966450687 +2533418862767249793 +17862190869675538842 +11884556283474678674 +13796326125554010372 +14904173199317855398 +2817049287292923874 +4598312868271861925 +16923786991201666875 +10514754682043136230 +13766981215495794951 +12419116212769359579 +15621549188632761805 +11327314834049725714 +9090721637360106615 +15895305311453328243 +13689681626214386545 +13238319130474698598 +9651733082688531087 +11629628957400637154 +7792165966641936195 +8189017209434928626 +7282893529482814203 +16347133291118027127 +4952155852111160166 +13509406455307025827 +3879157108512905802 +9474320365853132639 +13482079924115930831 +4783119654032740231 +2475798146869753580 +12157935486858301103 +14199499016262425454 +1854436736171370606 +7076322494861860817 +11287421925773226321 +13774325743140696538 +10747278071433231531 +8917314713495784827 +5636954128817496544 +2766027281143215584 +10434542924203170879 +4260512984679726908 +17255039685938548618 +6122764254432666344 +254370327020925318 +11312787822352911605 +12985809617610117683 +12650470362854176861 +553656276524624584 +6026884874962380586 +3988170723331778612 +14221423358677232809 +3476933195351329130 +16734323205162230736 +4974260648737260794 +3221402445262672040 +13210922193171385225 +5720149770013102829 +4403227827733590626 +7856294057087981738 +10099146980504822562 +7560398609762280314 +5113468127817763242 +3720473816032569069 +3585336926746648516 +10972091994874699058 +9707818449116806932 +3084063925924874820 +17870256797371101324 +6773589755042087040 +14526112768173853102 +407524138430394257 +14910066649756719721 +14457222019439952811 +627810625264822901 +2257860133218302807 +14160738157903814384 +3400484430370178630 +6053866213107096347 +16387141660477905707 +4896479635854575631 +10688978395651593887 +14961928532037895006 +5648470673536835748 +15635474076145527290 +4592829388005689752 +6296642391635369357 +2709535651028870436 +4711649996552871799 +6626853578303445437 +8408031351067128292 +11890242617538043545 +17322259414134603996 +14025246650379376654 +10422269005733851970 +8335485162818970766 +16693297451798619133 +10460828258039236212 +16778408619967448833 +16372605717792611831 +999769814713911381 +9352552126899775394 +1415167767157458574 +17472015672515048832 +10426591664767036126 +2420226874705061981 +13158333852411058466 +5973342309899293850 +13000902032524687259 +12763072379006467429 +4473910306848462750 +15315831837454763761 +15485569602610843731 +12719255427563501678 +9740887197086169530 +16306485516562134365 +16741745045507272598 +18312172787092859583 +10791418903025267671 +12989581003458120908 +5712265515329314309 +2547876845854504054 +4011723788239192375 +3577162674304841398 +10445681847952431336 +2676747676596038779 +11208688592909503105 +17064667133734968407 +17132686886100030262 +860531454272492379 +6683559209365095989 +11011206272967497245 +5139317714757752372 +17625030891837506363 +5191610316539325750 +380052826852285934 +3659535242855017021 +15930826341546292645 +5961429455257840440 +11296107025691265916 +732350910983046549 +12056228042691487582 +12681781104206154848 +10026569286277440550 +16378595526017855741 +13684376082106336209 +5676466631598134026 +8084863009686535523 +54016578132788994 +13218337400200942919 +7209779424566282930 +7682120753272418892 +15868926172677645301 +5454294229279376752 +9747332912690565913 +15441769645665158962 +7975876874905155087 +6565308080888788898 +6027757540405586062 +1892336456108486166 +17393524540935043457 +10224919343438089983 +12570803546902536994 +11457648271934786858 +11921828635154716134 +5669227572700962008 +18246699372743054798 +1671357099234441376 +13078866633795957227 +14987571337804681599 +177706930586287149 +11022181215453040134 +11652116918362346596 +10062321075491625274 +14740816404757844669 +8495729496147307913 +6406521396285902945 +14981293986482988224 +3358083202080385858 +12342440036207372586 +11932369165724033219 +4964400178045591429 +4527956087133794410 +13239816786044064360 +2084947433184958910 +540602645637412899 +1036476460984734291 +7388964595178080102 +8629874262814071504 +11645939498478910879 +14793554100631455942 +7083339739405982039 +2741289392352812899 +11615201334138532284 +5893346560431758798 +18313276551654505285 +2306685550157429116 +9410405780931561066 +7312656785120234314 +7042133941803861785 +2568878256772302101 +25449835764654186 +15932695488933419162 +11820420573418649886 +4289586757860408328 +14083577127879501362 +12251696865851134658 +13360693572488735621 +12974510359878345731 +10228933409802111233 +4232551770855678527 +15056105755247221087 +17867226383554688976 +12377793119652995798 +15852911537731976607 +8953011816241225584 +4266813769515285423 +9852982524703829173 +6885877612017388707 +11320164151583572481 +1684659438850705654 +191923096209447179 +7875865561999198662 +3677384157072286765 +10376703829563114607 +1068458999701214207 +12786969987126574653 +12977795429304784684 +11195927989901167071 +13465423869083472283 +17273216192516161109 +10703594371965162024 +531791539178552269 +18235473991910759118 +10139050865535737742 +2476154837598999923 +5169725386605448882 +11541017473142635567 +12728962833380271515 +7473282870566483702 +11232290042610801834 +729774359297830016 +3584631898700213342 +7946631243973008125 +4983997922458314471 +16373830568357439266 +59629460522671379 +1785981007954678405 +17465374826919316036 +6155599741453243029 +7786836455057256137 +16847872141390414370 +14903411072992719099 +12283538193424759414 +9006175546849839816 +12491144562245667795 +9407428041825133108 +4870984516684239220 +8673439656990721542 +8084531201794688790 +11954809713824878910 +6506278865841176253 +16058147614874018100 +3275529459955634210 +15728905305798474744 +11114653844976877927 +11909450478711173386 +17216233017721537099 +1274855101428091984 +3353678061718664175 +13623802719755123699 +6776666422968020761 +8791913441056025802 +4056774253795473552 +7102942914523759154 +9996589555417029741 +17824879181436928433 +6524553255536016681 +8015272535563178468 +338066109818547563 +1249168581078388031 +14221358737699342986 +4716649056558638426 +7102363805202433723 +5485603663770431129 +15181987271883537506 +9132091984419001947 +10279725032480934960 +2074705985648984308 +14541152688069502908 +10907683854987488327 +16541537511559033726 +12625328779221999262 +5001684503397897050 +8620051079612739713 +7513826376760932653 +17103494673571912287 +7723058800728805695 +5615288538281529121 +13482236639997728544 +12944277533079846962 +10696910201442768175 +7988850597880379189 +12383601620496773949 +17302589539212950513 +12389983514938977638 +13027729251428106058 +5299015226093755790 +8797541212804618563 +15199799474513444080 +841633295294881203 +3120620239312910188 +778376613600052485 +12045411681559138910 +5831478043378812023 +3012924210106900863 +11590512752176711814 +102637340993839743 +9121916341311406956 +11884362057779302567 +6591024367046829137 +5409271614300196658 +4243223658261839468 +18421271242073559464 +10269176099531269613 +8202222738685511467 +15318829885445699125 +3824904857326533903 +14783062174672584413 +11680031698056500629 +5621277219708775197 +7405077897209700463 +4888175629306519824 +5471546613231028559 +2844457607038964617 +18010628757011727013 +13266397614569497571 +12224134256366430247 +5293645390092579618 +9544097254890865081 +17258714407324865704 +8769314401550668413 +9771993184038254285 +11646269344757690385 +10968561056404191072 +1184479521633533907 +4468725874170836814 +5662837711353823922 +15249502868616390111 +9382776260639363829 +560187148930254500 +1255347281194688348 +2885021141032297642 +18081420337593680150 +15241161860396820002 +5909434267251552604 +10635039380631413501 +10967391554524849733 +16652897039592459591 +3554714681007422647 +10466934083989788783 +5762397147098926541 +18332436663584975471 +12647981631440381718 +2067221081910997828 +14927381192569082356 +1971191080626755311 +18076635996241379713 +13394545802991659614 +18258911607216509998 +17042784035667082087 +8511333536425797300 +10457545078235699814 +11211771526863308692 +10117985039145408324 +16956484705238408308 +15311914818840094183 +12973646550572082505 +1902522962622202782 +4361463014629245097 +4719188092686969965 +10534969514382507653 +9640111882245070513 +12763031976522167603 +15073528270017529084 +1130377417719887843 +1529856767405836927 +8403099865430726954 +2164040510589214037 +11529505982647288332 +7224926182893217098 +8836635697931723014 +7409565783147632522 +4327107031092371000 +9573272143663195934 +576002281442087540 +16428763407163191566 +11377145486426070633 +16178696002000962577 +9923880720081718298 +17296096111032243985 +4668437763295393142 +17588491052911755653 +12363044881011084673 +7939980372112001930 +3580503895457622788 +13837974557611798009 +8930307587912831601 +6948614322976797645 +9738416483820446870 +5463367399574550971 +8974687267498046588 +3171280669785863377 +5038165352116074493 +12642013485560785696 +4775495659023821108 +13313425083802172639 +9226532640985648252 +9784106431495453845 +14648742719141114691 +7521602362029437297 +3795938198841442389 +8575847801144514829 +5458819399579996440 +9196726142531574664 +51026466832854101 +3120182318946765744 +2871157824720511371 +2928146786945864158 +12639704155312427758 +14181326864662683644 +7297761886015463747 +1619240548939357112 +10409222392894998180 +5107591698267765323 +13540021433732344633 +11697536237493625545 +1913537609399348646 +2744371572840948082 +9600320453104372256 +4311586477660748825 +5511427784311136217 +3213403298260361095 +11714738427042426920 +8858958259867038454 +4463363665951810199 +13816275494299009499 +5674560804062971444 +17054960467931297039 +7208471637587082089 +16403039699250652241 +4636579085548623170 +17938974275879587065 +2976853194180195919 +15451785533556582640 +9292800538885499479 +16587512986444568119 +12783322438406141527 +13388977670924661114 +10215943377238684091 +8829200137148465095 +2403538038092867115 +3389128475374109488 +6247005886954150074 +18051304556350618331 +5481705430324620912 +12238572769097900092 +15107340264712266320 +7417534522666137731 +12813786101685070350 +1358104501041743870 +12172317996801033904 +7092024126334169686 +16929508318559719825 +4464849007399022784 +18054848193801792485 +16992339261309178403 +13065673982059484133 +5734245720887457185 +4329920397566013537 +13811846057924504411 +5625536005283433294 +14454808886016792840 +15812137776496776763 +8389575483091903420 +1255993074213543635 +11217612583060464928 +14558087800133053661 +13062031013041714112 +12500578064691613477 +1927176512137704673 +4887994862915958636 +5575006327558471669 +12441472495608784904 +4481172748342541702 +13647106995080650887 +448829656136786869 +13167724639574042246 +11565268144434646069 +16472702713137095316 +3911650477641754595 +7961500952854885764 +2585052468887211430 +17610221618075377299 +14217854927554762075 +8505079069193132125 +3657612461204920368 +16494732748977673039 +12382783470293118493 +17925475771222387823 +8440479882262819544 +9674788076453772182 +5326521257972196 +2431434427090294057 +18403412740266085674 +5327847863390340954 +10500892257698310411 +6682960568218428254 +13713381259690126690 +604581477064930665 +10411861708271758374 +6731117388164779767 +32976906391320368 +12974108655105077480 +2500131692822253563 +7521619379915002736 +619913538601248393 +8845981851740023472 +7251122094493306549 +2690070308798586666 +10499397381053100423 +8906457194256183221 +4264763590919282648 +5478125227610590208 +2973985151184521102 +708303174501102986 +13132707516941997886 +11381196474518791942 +1606105804498993339 +7261545309497908996 +17040667424219867939 +18445983354609557732 +7416333468447041761 +10309627329225826064 +17224354686450865504 +5399645710733742991 +12793739064822631273 +12846304133706106789 +1808809729448078349 +17990315504890750398 +6808879911580655908 +7661534753884023426 +9087358359319887943 +11641300441063658822 +8567993453203611654 +18350390521461015053 +5836668416730583685 +17669562431997443741 +9537667104799528491 +16352642740589741162 +10130471303621497942 +15745873153453756595 +3749335082255862790 +15619958434901786913 +14850225601182539697 +14007660545995364276 +1639960886591558523 +15124549102211217108 +9800690442460508055 +9269174698267338365 +7736695267512216148 +7548771427801480982 +8078280197378999423 +10055611507846217255 +17156473309779951104 +2193557554641497905 +9162927268126825992 +18345234481194029220 +1314144399734289196 +2167728381252121892 +4679114088520918547 +7752407336000827961 +14008484999775484810 +5175287290671694713 +13122048225477045789 +6980549240846786677 +7971807131774142009 +4721392510071200610 +12734874372820986567 +4243595887018109345 +16854355484151858975 +17824179734558184441 +16575015955374163975 +11166072895359788251 +4672793412310595979 +11662546881149105250 +12538166544933777435 +11722210424178360283 +1902194734705606765 +266686581463175661 +16074839640020454939 +15610428338711345163 +9375103138090391670 +17843133346217035257 +9376447468692705174 +12348514360643133585 +7092183650900606612 +7381215085367261040 +1025566684643822133 +2708356584101210201 +6966475830774284782 +5737618151194088 +16816798862489869782 +9623512148755793970 +14696063528935499280 +17922303468508763300 +3463131064664665558 +10465389732375196281 +2927949035455020871 +8715002928666343443 +6352216148907768729 +15014990112144496670 +151492256457492990 +14672168770998046504 +9198546391288650458 +4717764190349933682 +2384889042485943729 +1275573531338280674 +13723257417777193210 +17499607644343274311 +13482788580441737048 +451366214124872455 +2704176625334388720 +8711758432296364613 +1743530822193510813 +10861865177793144621 +361274931772094380 +13568286686974248061 +14861556124459317622 +6909222356600930741 +607676752504049721 +10259111739125082093 +9822315908896451773 +2265857296951320150 +1550651589192204149 +6945346911255698290 +9600339620756968838 +4479830930170517773 +1210832166732647046 +12095000562951351030 +811946562278436937 +1117482976802521978 +17240483200809764630 +4380097279851823775 +516931220871028440 +10428708131254582613 +14129787489717357036 +10328720574036373405 +8146876242072047218 +7222369055013741541 +13024664779031638579 +15943047487577034006 +362413165636374421 +3261245193222193749 +11621310117978089282 +15549497242015160964 +13291520089188517160 +11671104737927796055 +11135069841661177575 +3650764710119866526 +7839745228827868989 +3157286853404593525 +2455109419895529807 +11414816602741973625 +17346698351529666101 +3918716191747070040 +10597282261754539209 +10179074980843412971 +12098710892186580551 +16758789573774396811 +3665358782076936080 +5845109775786629845 +1476258799297415348 +8739970607004250939 +2830293699793713539 +6095910703456974492 +10953472343596448364 +17274795811748208517 +1725245671366670437 +3375108735023551703 +2233397411684274890 +2453444860450500778 +12634657687420451779 +2784907074970399101 +3635034659931953434 +3042164568691140188 +4340879949540761184 +1864139470075917099 +16202655634977194645 +18178330625170322115 +4172112296397778784 +5517762869024057721 +7167368615970693146 +17514281323421660912 +1809784681923541601 +3882241840403341980 +15032659973033228138 +75806323760747216 +8138504882119159390 +7127120030880977955 +5737331778203917441 +4393186212231951253 +5364946801465698334 +4804551465397875256 +12433866337301245767 +1386117858520588697 +2895584012162304180 +7848270236029184769 +3569026730226013894 +1417362519360335938 +15969179098585396427 +14238077263127059248 +9751549263662471475 +4037338678038761786 +1804013018567407046 +13457998065235455118 +10140063732153922203 +17656661976643228106 +3720743467758294877 +2870825844397404926 +9587177189767209906 +10438595910771325312 +17408664646399403265 +8713530339034185493 +8606889736453311463 +12389102058524798149 +9066461448360331459 +9315806523386499354 +602208109489543619 +14237574329016677252 +1404822544835157692 +3204304976457042129 +16748580854221101915 +6097234164520443853 +9696947528332974447 +3470496262818306799 +13609208421308386710 +11253612619346528539 +3153101215946654475 +6028540266207431221 +15825328615014668612 +9059727884937860652 +6125130893958454073 +40046782929237202 +10609688785591961749 +7766472292891291486 +5254940967886639942 +474976430800597621 +16727529846999063119 +12571719785354339975 +7948312970608637162 +17650945605274347755 +7896164873215621577 +9174353876421987377 +7738740492070263136 +3678102111746261545 +17713364541053957907 +16853976982461110165 +15124311396150907006 +12518608450327922493 +4500691136685536177 +16085747271303886528 +13249559416938871933 +2797885249165091990 +1612107073384607720 +1469622000625590692 +8728995836403765866 +4983530247007603126 +5186818979009286302 +13121525670433996715 +16298295277343631668 +6799231993924959188 +4087489174016204558 +12383509282853806001 +9037230528883624718 +13951262222910418052 +670503974497833315 +13433094156556130463 +9299432395902598273 +2021072000413091101 +14603884042524710050 +12516093515714363149 +15893334968644532257 +5696765208438067602 +5267194477194015035 +9570009798829323280 +18308287724311476366 +3205339228647710355 +746255473472505726 +3665716382630981771 +6828369592644033499 +956305507613709845 +126357214402659695 +14836592626658562698 +6743670665873833149 +3945795285814364296 +777583248197347548 +4182387369963973594 +11760298295102248744 +3319985052948601734 +4924566410842343866 +5377199827187539336 +4494250652839597278 +12279912848310139411 +13232937367523590677 +13544595547797855873 +39825052807405201 +1329271246090066523 +9948030671800011682 +2691649421870037179 +3220528211311085705 +2351879416812597700 +12490116559530771772 +17907282061602413474 +16018273326922491034 +6084263969774472225 +14053490404202387321 +6740591850722498102 +2535003159784572826 +6397621555212308060 +6194406788654613665 +5441445297006231366 +4178524783272515962 +3774534924308920500 +12119543693312806913 +13619202904345541099 +15100784748751303647 +16956685835645438553 +9549393604806673809 +14346669114126130152 +9946998172751016265 +157065615153165458 +7138497376696127394 +5451452507587091276 +4134790102616069899 +10310686331069833649 +14056519399604594595 +8500985978751272413 +8516266365230735367 +762769955269582344 +8724025445718242313 +2044304504947551323 +9631245551564445703 +11659421814115840457 +11809266806814803123 +15377356609167882739 +11694506275358569864 +17615609854937782350 +790351869843545922 +883052528779541447 +5623836459300677847 +3570439442510970202 +4312478898801067729 +857309034837418070 +15600162650321143133 +4242894555680109784 +11322152854717094330 +11090080340914891101 +12253266208423313443 +11406767161137034447 +16602148492278092287 +17421004376186190347 +6536477517592063672 +8436251690507677221 +6121836912575577302 +7800359204463079916 +694269232845562808 +10715789643428431676 +10788231227813363308 +848384638454566060 +4727982358932254997 +13184504046794564705 +13750155190943714717 +13991491645776223454 +2354086058066578721 +1194433219219249865 +13512868931303953009 +5998969340103664118 +1168317444156948528 +18240243192146950133 +1020815024689062091 +4986527234175350579 +2964839490955489338 +3369870798342310151 +5627557579020036797 +8428910422101497037 +5370547072872604517 +11897814979685118191 +17367409734696224156 +17039534569496388965 +16502132836651310723 +13794632198757117092 +9974997848195300119 +16672993654329612018 +13866366436735364868 +5787908918887943743 +16680782387736726032 +12518030341885517639 +16949208116132498778 +13624856407313894384 +724690348523040483 +7691093015859897712 +17045286496227471751 +8456839444871222706 +7276489053691194742 +13301300181970965721 +6749379642227642116 +8028346909762667136 +10706179543082621333 +3914972782726861115 +8888429757612197162 +7923703801710869095 +10723108453986194977 +11129511637016039264 +15463764330142754754 +7407150199860806354 +10999403738324431442 +14403847177914799090 +6199911289460010732 +17255314834163720002 +9923407944802509246 +2307400847742167058 +10443564959760893013 +16793214757275112941 +2125717472221290645 +7622451517991298515 +1138936309782878568 +1472925750418937830 +4125161401613747076 +12948740161842266442 +9961373503514186535 +14079582375870286611 +1925965943589895706 +13464875181133358289 +2997895004591619102 +18124057953775235473 +7040875257063670256 +893876746595571928 +9419831274018068770 +18124796234921203631 +16936135384846868081 +8640727609187450012 +15795366104307280377 +12385485943112793195 +7176283546560411545 +8662559250643184540 +3198566401880634127 +3609879722652704527 +5385644151025851564 +5488749228899779990 +18319884306383238407 +7231164259268988479 +2336339218965464516 +4644486039252364531 +17297668829603702837 +3327637973763105036 +11096645604493543350 +3537051743146286545 +7188013625689201043 +9890696072751143777 +1007306500871742686 +10678349684957093011 +15529156978783803948 +11818893863965111650 +7330498660416045027 +6530492653355812218 +2418434787264645953 +6561376226365904088 +11325525227211596296 +5485701159777230269 +12555419094748438701 +11469759192788925904 +2365974219192832938 +8497343639240804048 +8984386879693882411 +14501463500034558004 +9118470939569871228 +17179592302874339368 +1638892979162014305 +3561128775778382596 +14651497542111386683 +16310187422289014119 +10448718554036259927 +1490851250227666825 +4592221683008360854 +11899703434278514930 +645675638074005086 +12054235398550235077 +11580256222861746154 +14176029271872813933 +7749167674103536468 +236771692192121153 +3702499144204600418 +3390785520669952788 +9961637361768770885 +11725311724184646244 +14060076233721840135 +17763413079630395316 +10717396052826531779 +3867599184669540196 +16344276345204461355 +9823148548855755457 +6353962033514365348 +10727304960227660244 +5037036879355646884 +4066644695468007393 +15698659219821682562 +17590152392379601734 +8065035266586922993 +15051878826125581128 +7804736997269626743 +15540207945801666432 +17052712224436235178 +12328056066685494985 +9998629110604197228 +3963756363474491450 +10221801650441587570 +268080158107027111 +8995769160772888743 +2535543740433179696 +10594412852009400677 +8482872930067867325 +16364730312101125870 +13581870310121943887 +5774932137117529750 +2002220083302564001 +10830005012720981636 +265420956020226102 +4466934330239418671 +9716380479912779116 +13769275090841699879 +14293201537316188676 +642542107733392026 +13518046156561781888 +13647729904966700089 +5539617345991756690 +6429206141563005414 +6725127094448021792 +8858738687609325331 +13357970590328563008 +9001236612908988113 +15562512223694971481 +8495957782487848413 +3711339541843813795 +10298548790884935842 +10201033711455914368 +8572953541290654094 +10424947555565636230 +7721269080464803165 +12241223658505824854 +2110023417246522730 +13115534447250309237 +17742384517227967054 +12088578056785921232 +11697109414684130284 +18011797748292602309 +4665321917206980314 +11757172444507973835 +2060556456719917718 +7762522970899829711 +4836394696666418718 +18147329150759676341 +12222218749115058807 +8594938192289274249 +17769090428126536190 +4029117041813328525 +16810929230547642587 +14914898842132798009 +948314100515757393 +11908528049755882112 +1723616829251678387 +6118504805068468335 +17460309694493863759 +15657258614059819016 +13683856607739877309 +11768077934277764370 +7534977965215846753 +6526232549886152538 +12156436041086457587 +6655421576613076786 +7512327434614526553 +15838360773660180869 +14787254480717453100 +3396654986126865972 +4357756686937769361 +1975960509304797250 +14999487786469824142 +12124261609090098430 +8591824195064630389 +14960253475839307870 +4972707619253705345 +7144076413649384117 +11282766207863807190 +2680079068849593395 +9234616385570408302 +2753987330696668115 +3824348610037961064 +7815323855828707646 +18347856919390078603 +10955106789917251505 +5256044242423507395 +2806735554833576658 +5015794505417923938 +5281839736169662258 +6954875887078425658 +5156856221828052564 +5304942836443554117 +14553372930505520654 +18097775279862385185 +10890677188457308874 +10473466465483594223 +9434102961265920698 +7919725889244772205 +2638623867936123793 +13812391017564510467 +16673599384603980078 +6564958585081812683 +2454497426303312063 +4122102016745923333 +7143678331339267298 +15924981510411998392 +560063250528431645 +4496395773499366213 +4541702123867764134 +755997748023618032 +12852392212867788221 +9532923950028562548 +11631157450484277349 +4922355153338157116 +4157958950040437499 +15532707892995802316 +3437958335911926699 +16164569173133699491 +13354127630827743832 +13523911097966125551 +403791498826292272 +10197981641069554167 +7159513973024413175 +8870001787647554543 +8406624180974201097 +10097689899432758960 +11004165897919219986 +11232401318497222967 +6406722594226356017 +12754432273599679642 +7637480801120864704 +10422522340209730226 +2492766686664504644 +12038413078864228929 +12073274723224653011 +6475042328040258433 +6782752214477272881 +14679037231491925753 +16567915082978193718 +12085010846037209521 +4118841150025972359 +9229129281008161387 +17344601510053255572 +12380556265641414410 +3096639924436395019 +7965191195961952116 +5109118381727697752 +7152331846788255993 +295914791786348132 +8776391015391600822 +17516770343467473001 +11487013637283499237 +3129249599490630443 +5792826833090002111 +5181605918778466500 +564260202891141670 +13594335007478245700 +7968567261537832647 +11786662718928914860 +13915006957760619865 +13735389068222665226 +6733494230998356373 +13623310531330974755 +14445449377193340858 +16860627908034187808 +18120260251226767533 +16257947705394086374 +6474851759952078241 +11236615277303254198 +17804550593341443373 +9509866670582357105 +11842562592565603286 +6274602276324483887 +17395645206463006993 +9274758382659909118 +16786217888483697372 +10021536345216661242 +9598611395849734745 +10128456149186959601 +10308628832782076080 +11099354825228739905 +16641435143682762904 +14519686365083772700 +4817696562257483925 +12001808669904294613 +7386517284762403820 +14827844167953121014 +7305770844172395624 +13101984771022075062 +13418081493726424880 +3105815288523742248 +5942848856275452851 +16696920254771145939 +3528381782439003288 +12237492756054572065 +4353977860326249258 +12748879607457864993 +5876673475287094411 +4649048907820451495 +18308575514867184483 +3714735451219033578 +11918032555283317230 +13283843342905540876 +8786479526004947512 +5517949967049014914 +449720041204007627 +1958607848676649341 +9180316695381978751 +2983928066147321932 +15981703582555846390 +11732335170030413041 +10130366131537172015 +16186409951658365933 +17078357154606106912 +10171675355595599317 +2006093289667857382 +17356155612128019340 +1658905635850369998 +7420080700041180938 +14155571250652237832 +17671700882261358903 +14360903145404122609 +7790756002864472242 +7190377415306855656 +3671290089367032951 +12874319777948281933 +18091519150529814515 +14721541259410270663 +18349030196226448897 +15404914658216331974 +11200785903660030889 +2591391468054275260 +9148462101794037522 +16409385237481694076 +16202927447520697681 +12461884265086008651 +13039544234635425355 +2035306369654392761 +16978578401013244377 +6253134325636002935 +16826408511357414064 +6224353264275578177 +4215236095240485278 +4677119553447190557 +1510953764591642467 +10203064520588451797 +11385960355542738201 +12317543763020062805 +2582979884753476059 +13681517850453691584 +13504568649835475920 +3983035804329517471 +17406149782255561597 +11565450612614761144 +17732942685097605410 +11596470391962784369 +15352130401556879943 +8421722898698241957 +9981917889162167466 +16113485222072940221 +12952835170715525045 +6426220784809647532 +11920251228567293416 +4353697044331137620 +10323958267104413011 +3345016079793402001 +1050789679752792912 +10785870631454199061 +11754108920729779134 +8675222355845156173 +11921769203406063396 +15214270696534249650 +4499380633987712275 +10762510094395267610 +13661192721759023728 +12550239913435127467 +6871440594261128193 +16427135110662244570 +9818793003328990332 +71778455455430491 +9270492984685023865 +7362853139030942450 +15118932982439025260 +5768927476616553736 +5753295134902353894 +9377379923759531269 +6917367340536772641 +6078373075865657692 +17441518178866798306 +4600272646876261503 +15835596113352012966 +6997264724615703207 +7368113585755592156 +17422811833318351243 +748920056273657291 +2969175901429135591 +12047786239116392081 +13039716583719505563 +16104557680630537908 +7039005040878403180 +36300520651392208 +2470494804931986405 +6597729064239790943 +17974305239141220513 +4228489990123921695 +17580144536918644310 +13984653964188477418 +9671355922918861813 +11540919791986370480 +7821308086899945909 +17222849710595307879 +16783738834762952936 +13650501703260897985 +7727464251774911190 +14707773778295667432 +859213829440206550 +10138425228537049123 +14409205833023900905 +2097270506640962281 +7238525563483336469 +5494164564336640977 +15073384955213014286 +5762387850254038714 +2936993165018763527 +4479046328905527001 +6700431391554858667 +6670445864194769749 +16530572498260845358 +283643494799686663 +18322696208965864934 +15287075276187210454 +13103521669029394936 +9412970228661647169 +17506235843268138620 +16352397132123499088 +8607418086799707498 +14461753306762933703 +6096492355585726309 +12081138807914186555 +12064737845702003624 +15612569284043876295 +12695712940098678409 +3290542994592767717 +2709703614291402414 +8797747310607864794 +11984307524574040894 +5941388926340431049 +18029996918909847146 +13506459773967849898 +18266384440457397890 +16507928690078159606 +14975981821963128293 +13064018147813409743 +10262370344689570942 +14560935371037282379 +15664960121951110901 +15104740105914189555 +10516768123345126842 +12990814514942266067 +2842113797571577890 +11459497692011770115 +12100036863451011429 +8708933595930994784 +12137087144322832212 +16324026911898435857 +6003348259300173230 +6928815495447995485 +663161988840521992 +12001076887408293194 +16611534658521739705 +1657195900583564379 +9261712685264712533 +2195048041882536297 +3450229806928721975 +2172759156654983921 +9996595988668875405 +2006877847146210400 +5276900246750457017 +13259020647197816128 +6262162109662289600 +2458245056098212519 +3050664171353298084 +7144385097503810964 +4635498932729631844 +15168041629751675714 +15816537237102031830 +2281164924410291070 +13819504007848352525 +4562257165877688273 +14963643192256733125 +1265818814357317489 +13926442581679541303 +17180682799400244173 +17197230886967893826 +7556959403997580330 +17857262572331656511 +16006347220941918866 +4060848512164529141 +14018389065325244927 +16978382253066253382 +12984148333835958342 +2174596803080821887 +14236749295087575963 +6674737544057987411 +6410309726683755614 +2461642179072915229 +10134848087575501482 +5012491244188366290 +5394166363993877349 +871082274808830762 +15216751078536331992 +12039081855132906456 +16387028070432891903 +16848686817979203843 +15193612151842966742 +13590488944960381794 +14270638504406766869 +6365073028953699338 +2262895935444952513 +1376344433913859090 +16690700222978805320 +14471473094485731365 +7774939215132675579 +13953494299625679002 +15154497862824493470 +8041890444470766131 +14699410355325925225 +9729025890544229009 +1298732817764412937 +9171023127826383901 +17902936651870499906 \ No newline at end of file diff --git a/test/cityhash.jl b/test/cityhash.jl new file mode 100644 index 0000000..36aa336 --- /dev/null +++ b/test/cityhash.jl @@ -0,0 +1,66 @@ +using DelimitedFiles +using Test +using ClickHouse: city_hash_128, city_hash_64, low64, high64 + + +text = """riverrun, past Eve and Adam's, from swerve of shore to bend +of bay, brings us by a commodius vicus of recirculation back to +Howth Castle and Environs. + Sir Tristram, violer d'amores, fr'over the short sea, had passen- +core rearrived from North Armorica on this side the scraggy +isthmus of Europe Minor to wielderfight his penisolate war: nor +had topsawyer's rocks by the stream Oconee exaggerated themselse +to Laurens County's gorgios while they went doublin their mumper +all the time: nor avoice from afire bellowsed mishe mishe to +tauftauf thuartpeatrick: not yet, though venissoon after, had a +kidscad buttended a bland old isaac: not yet, though all's fair in +vanessy, were sosie sesthers wroth with twone nathandjoe. Rot a +peck of pa's malt had Jhem or Shen brewed by arclight and rory +end to the regginbrow was to be seen ringsome on the aquaface. +""" + +@testset "CityHash128 known hash value comparisson" begin + """ + answer key generated from C++ clickhouse CityHash128:: + + char* x = "..."; + for (int i = 0; i <= strlen(x); i++) { + auto y = CityHash128_2(x, i); + std::cout << Uint128Low64(y) << std::endl; + std::cout << Uint128High64(y) << std::endl; + } + + where x is the above book string (with whitespace replaced by " ") and stripped + as is done below. + """ + + answer_key = readdlm("ch/fw_ch128_key.txt", UInt64)[:, 1] + t = strip(replace(text, r"\s+" => " ")) + + for i in 0:length(t) + tt = String(t[begin:i]) + h = city_hash_128(tt) + x̂, ŷ = low64(h), high64(h) + + x, y = answer_key[2 * i + 1], answer_key[2 * i + 2] + @test x == x̂ + @test y == ŷ + end +end + +@testset "CityHash64" begin + # examples given https://clickhouse.com/docs/en/native-protocol/hash/#implementations + @test city_hash_64("Moscow") == UInt64(12507901496292878638) + @test city_hash_64("How can you write a big system without C++? -Paul Glick") == UInt64(6237945311650045625) + @test city_hash_64("A: Use Julia") == city_hash_64("A: Use Julia") +end + +@testset "CityHash128 Unicode" begin + # SELECT cityHash64('some unicode ϵ Σ ∱') + # Query id: 8545fa6f-2a23-479c-8400-f17631e1f6f4 + # ┌─cityHash64('some unicode ϵ Σ ∱')─┐ + # │ 15571479198080573106 │ + # └──────────────────────────────────┘ + # 1 rows in set. Elapsed: 0.004 sec. + @test city_hash_64("some unicode ϵ Σ ∱") == UInt64(15571479198080573106) +end From d7eb39bac062bba3e2b95422a4eb1936cbdeaaf1 Mon Sep 17 00:00:00 2001 From: Dylan Modesitt Date: Mon, 19 Sep 2022 17:20:37 -0400 Subject: [PATCH 03/19] clarify comment --- test/cityhash.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/cityhash.jl b/test/cityhash.jl index 36aa336..a24a95a 100644 --- a/test/cityhash.jl +++ b/test/cityhash.jl @@ -30,8 +30,8 @@ end to the regginbrow was to be seen ringsome on the aquaface. std::cout << Uint128High64(y) << std::endl; } - where x is the above book string (with whitespace replaced by " ") and stripped - as is done below. + where x is the above `text`` string (with whitespace replaced by " ") and + stripped as is done below. """ answer_key = readdlm("ch/fw_ch128_key.txt", UInt64)[:, 1] From a3756f49b38144437883e1c312e88c0bac10653e Mon Sep 17 00:00:00 2001 From: Dylan Modesitt Date: Sun, 2 Oct 2022 12:02:51 -0400 Subject: [PATCH 04/19] implement transport compression --- .gitignore | 3 ++ Project.toml | 1 + docs/src/index.md | 1 - src/Connect.jl | 6 ++- src/Exceptions.jl | 5 ++- src/Query.jl | 3 +- src/{ => tcp}/CityHash128.jl | 8 +++- src/tcp/ClickHouseSock.jl | 6 +++ src/tcp/Compression.jl | 33 ++++++++++++++ src/tcp/DataBlocks.jl | 87 +++++++++++++++++++++++++++++++----- src/tcp/tcp.jl | 2 + test/runtests.jl | 12 +++-- 12 files changed, 145 insertions(+), 22 deletions(-) rename src/{ => tcp}/CityHash128.jl (98%) create mode 100644 src/tcp/Compression.jl diff --git a/.gitignore b/.gitignore index a0d0a61..5ed895e 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,6 @@ docs/build docs/Manifest.toml Manifest.toml + +# editor(s) +.vscode \ No newline at end of file diff --git a/Project.toml b/Project.toml index c660923..9be4399 100644 --- a/Project.toml +++ b/Project.toml @@ -5,6 +5,7 @@ version = "0.2.2" [deps] CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597" +CodecLz4 = "5ba52731-8f18-5e0d-9241-30f10d1ec561" DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" DecFP = "55939f99-70c6-5e9b-8bb0-5071ed7d61fd" diff --git a/docs/src/index.md b/docs/src/index.md index 7baf86e..5b24e83 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -31,7 +31,6 @@ Usage examples can be found on the [usage page](@ref Usage). ## Limitations -- Transfer compression is currently not implemented - Timezone conversion of `DateTime` / `DateTime64` for columns that have a timezone assigned in ClickHouse doesn't happen automatically. All DateTime objects are naive, meaning they aren't timezone aware. For reasoning, see diff --git a/src/Connect.jl b/src/Connect.jl index defc2b0..65387a2 100644 --- a/src/Connect.jl +++ b/src/Connect.jl @@ -95,7 +95,8 @@ function connect( password::AbstractString = "", connection_timeout = DBMS_DEFAULT_CONNECT_TIMEOUT, max_insert_block_size = DBMS_DEFAULT_MAX_INSERT_BLOCK, - send_buffer_size = DBMS_DEFAULT_BUFFER_SIZE + send_buffer_size = DBMS_DEFAULT_BUFFER_SIZE, + compression::Union{Bool, AbstractString, UInt8} = false )::ClickHouseSock sock = ClickHouseSock( nothing, @@ -107,7 +108,8 @@ function connect( password = password, connection_timeout = connection_timeout, max_insert_block_size = max_insert_block_size, - send_buffer_size = send_buffer_size + send_buffer_size = send_buffer_size, + compression = Compression(compression), ) ) diff --git a/src/Exceptions.jl b/src/Exceptions.jl index 32d6c4a..13318ab 100644 --- a/src/Exceptions.jl +++ b/src/Exceptions.jl @@ -3,4 +3,7 @@ struct ClickHouseServerException <: Exception code::Int name::String message::String -end \ No newline at end of file +end + +"""checksum (compressed block hash values) don't match""" +struct ChecksumError <: Exception end \ No newline at end of file diff --git a/src/Query.jl b/src/Query.jl index 31c155b..5c715fa 100644 --- a/src/Query.jl +++ b/src/Query.jl @@ -8,7 +8,8 @@ import Sockets # ============================================================================ # function write_query(sock::ClickHouseSock, query::AbstractString)::Nothing - query = ClientQuery("", ClientInfo(), "", 2, 0, query) + compression = compression_enabled(sock.settings) + query = ClientQuery("", ClientInfo(), "", 2, compression, query) write_packet(sock, query) write_packet(sock, make_block()) nothing diff --git a/src/CityHash128.jl b/src/tcp/CityHash128.jl similarity index 98% rename from src/CityHash128.jl rename to src/tcp/CityHash128.jl index 4c4aa44..5bf1c0d 100644 --- a/src/CityHash128.jl +++ b/src/tcp/CityHash128.jl @@ -1,7 +1,7 @@ """ A (pure Julia) implementation of the version of CityHash used by ClickHouse. -ClickHouse server comes built-in with an old version of this algorithm - so -the implementation below is not a port of the currently ordained CityHash, but +ClickHouse server comes built-in with an old version of this algorithm - so the +implementation below is not a port of the currently ordained CityHash, but rather the one required for the transport-compression protocol(s) in ClickHouse. This is a fairly literal translation of the C source used in @@ -320,6 +320,10 @@ end end end +function city_hash_128(s::Vector{UInt8})::UInt128 + return city_hash_128(s, UInt(length(s))) +end + function city_hash_128(s::AbstractString)::UInt128 data = Vector{UInt8}(s) return city_hash_128(data, UInt(length(data))) diff --git a/src/tcp/ClickHouseSock.jl b/src/tcp/ClickHouseSock.jl index 398aaef..8d1be35 100644 --- a/src/tcp/ClickHouseSock.jl +++ b/src/tcp/ClickHouseSock.jl @@ -7,8 +7,11 @@ Base.@kwdef struct CHSettings connection_timeout::Int = DBMS_DEFAULT_CONNECT_TIMEOUT max_insert_block_size::Int = DBMS_DEFAULT_MAX_INSERT_BLOCK send_buffer_size::Int = DBMS_DEFAULT_BUFFER_SIZE + compression::Compression = COMPRESSION_NONE end +"""is compression enabled in these settings?""" +compression_enabled(settings::CHSettings) = settings.compression != COMPRESSION_NONE mutable struct ClickHouseSock io ::Union{IO, Nothing} @@ -30,6 +33,9 @@ mutable struct ClickHouseSock end end +"""is compression enabled on this socket?""" +compression_enabled(sock::ClickHouseSock) = compression_enabled(sock.settings) + """ @guarded(sock::ClickHouseSock, expr) diff --git a/src/tcp/Compression.jl b/src/tcp/Compression.jl new file mode 100644 index 0000000..58c0fbf --- /dev/null +++ b/src/tcp/Compression.jl @@ -0,0 +1,33 @@ + +@enum Compression::UInt8 begin + COMPRESSION_NONE = 0 + COMPRESSION_DRY = 0x02 + COMPRESSEION_LZ4 = 0x82 +end + +Compression(flag::Bool)::Compression = flag ? COMPRESSEION_LZ4 : COMPRESSION_NONE + +function Compression(name::String)::Compression + if lowercase(name) == "lz4" + return COMPRESSEION_LZ4 + end + error("unkown compression mode: $(name)") +end + +"""compress data according to the compression mode""" +function compress(mode::Compression, data::Vector{UInt8})::Vector{UInt8} + return if mode == COMPRESSION_NONE || mode == COMPRESSION_DRY + data + elseif mode == COMPRESSION_LZ4 + lz4_compress(data) + end +end + +"""decompress data according to the compression mode""" +function decompress(mode::Compression, data::Vector{UInt8})::Vector{UInt8} + return if mode == COMPRESSION_NONE || mode == COMPRESSION_DRY + data + elseif mode == COMPRESSION_LZ4 + lz4_decompress(data) + end +end diff --git a/src/tcp/DataBlocks.jl b/src/tcp/DataBlocks.jl index 0f4e7f1..8501933 100644 --- a/src/tcp/DataBlocks.jl +++ b/src/tcp/DataBlocks.jl @@ -1,7 +1,13 @@ +using CodecLz4 + const BLOCK_INFO_FIELD_STOP = UInt64(0) const BLOCK_INFO_FIELD_OVERFLOWS = UInt64(1) const BLOCK_INFO_FIELD_BUCKET_NUM = UInt64(2) +# UInt32 || UInt32 || UInt8 = (4 + 4 + 1) +const HEADER_SIZE_W_COMPRESSION = UInt32(9) + + struct BlockInfo is_overflows::Bool bucket_num::Int32 @@ -96,19 +102,78 @@ end function chread(sock::ClickHouseSock, ::Type{Block})::Block temp_table = chread(sock, String) - block_info = chread(sock, BlockInfo) - num_columns = chread(sock, VarUInt) - num_rows = chread(sock, VarUInt) - columns = [read_col(sock, num_rows) for _ ∈ 1:UInt64(num_columns)] - Block(temp_table, block_info, num_columns, num_rows, columns) + main_io = sock.io + try + if compression_enabled(sock.settings) + hash = chread(sock, UInt128) + method = Compression(chread(sock, UInt8)) + compressed = chread(sock, UInt32) + original = chread(sock, UInt32) + comp_data = chread(sock, Vector{UInt8}, VarUInt(compressed - 9)) + decomp_data = decompress(COMPRESSION_LZ4, comp_data) + sock.io = IOBuffer(decomp_data) + end + + block_info = chread(sock, BlockInfo) + num_columns = chread(sock, VarUInt) + num_rows = chread(sock, VarUInt) + columns = [read_col(sock, num_rows) for _ ∈ 1:UInt64(num_columns)] + return Block(temp_table, block_info, num_columns, num_rows, columns) + finally + sock.io = main_io + end end function chwrite(sock::ClickHouseSock, x::Block) - chwrite(sock, x.temp_table) - chwrite(sock, x.block_info) - chwrite(sock, x.num_columns) - chwrite(sock, x.num_rows) - for x ∈ x.columns - chwrite(sock, x) + main_io = sock.io + try + if compression_enabled(sock) + sock.io = IOBuffer(read = true, write = true) + else + # tmp table's aren't written in the compression block, so they are + # only written here if we aren't compressing what's about to be on + # sock.io + chwrite(sock, x.temp_table) + end + + chwrite(sock, x.block_info) + chwrite(sock, x.num_columns) + chwrite(sock, x.num_rows) + for x ∈ x.columns + chwrite(sock, x) + end + + if compression_enabled(sock) + # packet: + # checksum(packet-inner) :: UInt128 (1) + # packet-inner: + # compression method ∈ Compression :: UInt8 (2) + # |C(D)| + |header| :: UInt32 (3) + # |D| :: UInt32 (4) + # C(D) :: UInt8[] (5) + + data = take!(sock.io) + compressed = compress(sock.settings.compression, data) + if length(data) > typemax(UInt32) || + length(comp_data) > typemax(UInt32) + throw(DomainError("Block too big")) + end + + sock.io = IOBuffer(read = true, write = true) + chwrite(sock, sock.settings.compression) # (2) + chwrite(sock, UInt32(length(compressed) + HEADER_SIZE_W_COMPRESSION)) # (3) + chwrite(sock, UInt32(length(data))) # (4) + chwrite(sock, compressed) # (5) + + block_data = take!(sock.io) # unroll (2:5) for (1) + hash = city_hash_128(block_data) # checksum(packet-inner) + sock.io = main_io + chwrite(sock, x.temp_table) + chwrite(sock, hash) # (1) + chwrite(sock, block_data) # (2:5) + end + + finally + sock.io = main_io end end \ No newline at end of file diff --git a/src/tcp/tcp.jl b/src/tcp/tcp.jl index cf9bfea..eae161d 100644 --- a/src/tcp/tcp.jl +++ b/src/tcp/tcp.jl @@ -1,4 +1,6 @@ include("VarUInt.jl") +include("CityHash128.jl") +include("Compression.jl") include("ClickHouseSock.jl") include("BasicIO.jl") include("Macro.jl") diff --git a/test/runtests.jl b/test/runtests.jl index 0f3c5b5..b42d7b0 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -34,7 +34,9 @@ end include("defines.jl") include("tcp.jl") include("columns_io.jl") - +include("cityhash.jl") +using CategoricalArrays +using Sockets: IPv4, IPv6 function miss_or_equal(a, b) return (ismissing(a) && ismissing(b)) || @@ -55,7 +57,7 @@ end @testset "Decode & re-encode client packets (SELECT 1)" begin # This .bin file was extracted from a tcpdump captured from a session - # with the official ClickHouse command line client. + # with the official ClickHouse mand line client. data = read(open("select1/client-query.bin"), 100_000, all = true) sock = data |> IOBuffer |> ClickHouseSock sock.server_rev = ClickHouse.DBMS_VER_REV @@ -351,8 +353,10 @@ end # Multi block insert. insert(sock, table, repeat([data], 100)) - # SELECT -> Dict + SELECT -> Dict proj = ClickHouse.select(sock, "SELECT * FROM $(table) LIMIT 4") + @show proj + @test proj[:lul] == UInt64[42, 1337, 123, 42] @test proj[:oof] == Float32[0., ℯ, π, 0.] @test proj[:foo] == String["aa", "bb", "cc", "aa"] @@ -468,4 +472,4 @@ end # Clean up. execute(sock, "DROP TABLE $(table)") -end \ No newline at end of file +end From 3010ab7230981a239ead25806d357851cf7d1b7a Mon Sep 17 00:00:00 2001 From: Dylan Modesitt Date: Sun, 2 Oct 2022 12:11:15 -0400 Subject: [PATCH 05/19] test both modes + bugfixes --- src/tcp/Compression.jl | 14 +++++++++++--- src/tcp/DataBlocks.jl | 2 +- test/runtests.jl | 15 +++++++++++---- 3 files changed, 23 insertions(+), 8 deletions(-) diff --git a/src/tcp/Compression.jl b/src/tcp/Compression.jl index 58c0fbf..0507192 100644 --- a/src/tcp/Compression.jl +++ b/src/tcp/Compression.jl @@ -2,14 +2,16 @@ @enum Compression::UInt8 begin COMPRESSION_NONE = 0 COMPRESSION_DRY = 0x02 - COMPRESSEION_LZ4 = 0x82 + COMPRESSION_LZ4 = 0x82 end -Compression(flag::Bool)::Compression = flag ? COMPRESSEION_LZ4 : COMPRESSION_NONE +Compression(flag::Bool)::Compression = flag ? COMPRESSION_LZ4 : COMPRESSION_NONE function Compression(name::String)::Compression if lowercase(name) == "lz4" - return COMPRESSEION_LZ4 + return COMPRESSION_LZ4 + elseif lowercase(name) == "dry" + return COMPRESSION_DRY end error("unkown compression mode: $(name)") end @@ -19,6 +21,7 @@ function compress(mode::Compression, data::Vector{UInt8})::Vector{UInt8} return if mode == COMPRESSION_NONE || mode == COMPRESSION_DRY data elseif mode == COMPRESSION_LZ4 + println("DECOMPRESSING :)") lz4_compress(data) end end @@ -28,6 +31,11 @@ function decompress(mode::Compression, data::Vector{UInt8})::Vector{UInt8} return if mode == COMPRESSION_NONE || mode == COMPRESSION_DRY data elseif mode == COMPRESSION_LZ4 + println("COMPRESSING :)") lz4_decompress(data) end end + +function chwrite(sock::ClickHouseSock, compression::ClickHouse.Compression) + chwrite(sock, UInt8(compression)) +end diff --git a/src/tcp/DataBlocks.jl b/src/tcp/DataBlocks.jl index 8501933..d621ed8 100644 --- a/src/tcp/DataBlocks.jl +++ b/src/tcp/DataBlocks.jl @@ -155,7 +155,7 @@ function chwrite(sock::ClickHouseSock, x::Block) data = take!(sock.io) compressed = compress(sock.settings.compression, data) if length(data) > typemax(UInt32) || - length(comp_data) > typemax(UInt32) + length(compressed) > typemax(UInt32) throw(DomainError("Block too big")) end diff --git a/test/runtests.jl b/test/runtests.jl index b42d7b0..b8cdf55 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -234,9 +234,8 @@ end @test reencoded_data == data end -@testset "Queries on localhost DB" begin +function test_queries(sock) table = "ClickHouseJL_Test" - sock = connect() try execute(sock, """ @@ -353,9 +352,9 @@ end # Multi block insert. insert(sock, table, repeat([data], 100)) - SELECT -> Dict + # SELECT -> Dict proj = ClickHouse.select(sock, "SELECT * FROM $(table) LIMIT 4") - @show proj + # @show proj @test proj[:lul] == UInt64[42, 1337, 123, 42] @test proj[:oof] == Float32[0., ℯ, π, 0.] @@ -473,3 +472,11 @@ end # Clean up. execute(sock, "DROP TABLE $(table)") end + +@testset "Queries on localhost DB" begin + test_queries(connect()) +end + +@testset "Queries on localhost DB + compression (lz4)" begin + test_queries(connect(compression="lz4")) +end From 899ec831d74c5a7ef2be28b9cb4c1069f418c2a5 Mon Sep 17 00:00:00 2001 From: Dylan Modesitt Date: Sun, 2 Oct 2022 12:11:45 -0400 Subject: [PATCH 06/19] removing logging --- src/tcp/Compression.jl | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/tcp/Compression.jl b/src/tcp/Compression.jl index 0507192..6926b7c 100644 --- a/src/tcp/Compression.jl +++ b/src/tcp/Compression.jl @@ -21,7 +21,6 @@ function compress(mode::Compression, data::Vector{UInt8})::Vector{UInt8} return if mode == COMPRESSION_NONE || mode == COMPRESSION_DRY data elseif mode == COMPRESSION_LZ4 - println("DECOMPRESSING :)") lz4_compress(data) end end @@ -31,7 +30,6 @@ function decompress(mode::Compression, data::Vector{UInt8})::Vector{UInt8} return if mode == COMPRESSION_NONE || mode == COMPRESSION_DRY data elseif mode == COMPRESSION_LZ4 - println("COMPRESSING :)") lz4_decompress(data) end end From 500302f81c201e17b10bac51ef435303639bae7f Mon Sep 17 00:00:00 2001 From: Dylan Modesitt Date: Sun, 2 Oct 2022 12:20:23 -0400 Subject: [PATCH 07/19] various bugfixes --- src/tcp/BasicIO.jl | 13 ++++++++++++- src/tcp/Compression.jl | 12 ++++++------ src/tcp/DataBlocks.jl | 6 +++--- 3 files changed, 21 insertions(+), 10 deletions(-) diff --git a/src/tcp/BasicIO.jl b/src/tcp/BasicIO.jl index 9b72cec..03606c2 100644 --- a/src/tcp/BasicIO.jl +++ b/src/tcp/BasicIO.jl @@ -71,4 +71,15 @@ chwrite(sock::ClickHouseSock, x::AbstractVector{T}) where T <: Number = write(sock.io, x) chwrite(sock::ClickHouseSock, x::AbstractVector{String}) = - foreach(x -> chwrite(sock, x), x) \ No newline at end of file + foreach(x -> chwrite(sock, x), x) + + +# Compression bytes + +function chwrite(sock::ClickHouseSock, compression::ClickHouse.Compression) + chwrite(sock, UInt8(compression)) +end + +function chread(sock::ClickHouseSock, ::Type{ClickHouse.Compression}) + Compression(chread(sock, UInt8)) +end diff --git a/src/tcp/Compression.jl b/src/tcp/Compression.jl index 6926b7c..bb3d449 100644 --- a/src/tcp/Compression.jl +++ b/src/tcp/Compression.jl @@ -26,14 +26,14 @@ function compress(mode::Compression, data::Vector{UInt8})::Vector{UInt8} end """decompress data according to the compression mode""" -function decompress(mode::Compression, data::Vector{UInt8})::Vector{UInt8} +function decompress( + mode::Compression, + data::Vector{UInt8}, + uncompressed_size::Integer = length(data) * 2 +)::Vector{UInt8} return if mode == COMPRESSION_NONE || mode == COMPRESSION_DRY data elseif mode == COMPRESSION_LZ4 - lz4_decompress(data) + lz4_decompress(data, uncompressed_size) end end - -function chwrite(sock::ClickHouseSock, compression::ClickHouse.Compression) - chwrite(sock, UInt8(compression)) -end diff --git a/src/tcp/DataBlocks.jl b/src/tcp/DataBlocks.jl index d621ed8..d7a82d4 100644 --- a/src/tcp/DataBlocks.jl +++ b/src/tcp/DataBlocks.jl @@ -106,11 +106,11 @@ function chread(sock::ClickHouseSock, ::Type{Block})::Block try if compression_enabled(sock.settings) hash = chread(sock, UInt128) - method = Compression(chread(sock, UInt8)) + method = chread(sock, Compression) compressed = chread(sock, UInt32) - original = chread(sock, UInt32) + original = chread(sock, UInt32) # TODO, not needed? comp_data = chread(sock, Vector{UInt8}, VarUInt(compressed - 9)) - decomp_data = decompress(COMPRESSION_LZ4, comp_data) + decomp_data = decompress(method, comp_data, original) sock.io = IOBuffer(decomp_data) end From 2e647ee8fd2767a4204d1aa788fff364def0f3c4 Mon Sep 17 00:00:00 2001 From: Dylan Modesitt Date: Sun, 2 Oct 2022 12:28:22 -0400 Subject: [PATCH 08/19] check checksum on read --- src/tcp/DataBlocks.jl | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/src/tcp/DataBlocks.jl b/src/tcp/DataBlocks.jl index d7a82d4..f8bed27 100644 --- a/src/tcp/DataBlocks.jl +++ b/src/tcp/DataBlocks.jl @@ -107,11 +107,24 @@ function chread(sock::ClickHouseSock, ::Type{Block})::Block if compression_enabled(sock.settings) hash = chread(sock, UInt128) method = chread(sock, Compression) - compressed = chread(sock, UInt32) - original = chread(sock, UInt32) # TODO, not needed? - comp_data = chread(sock, Vector{UInt8}, VarUInt(compressed - 9)) - decomp_data = decompress(method, comp_data, original) - sock.io = IOBuffer(decomp_data) + raw_len = chread(sock, UInt32) + data_len = chread(sock, UInt32) + compressed_len = VarUInt(raw_len - HEADER_SIZE_W_COMPRESSION) + compressed = chread(sock, Vector{UInt8}, compressed_len) + + # check packet checksum + packet = [ + UInt8(method); + reinterpret(UInt8, [raw_len]) |> Vector{UInt8}; + reinterpret(UInt8, [data_len]) |> Vector{UInt8}; + compressed + ] + if city_hash_128(packet) != hash + throw(ChecksumError()) + end + + data = decompress(method, compressed, data_len) + sock.io = IOBuffer(data) end block_info = chread(sock, BlockInfo) From a4c85af2b422d1a95ec3bc0ad392109e3f3dc697 Mon Sep 17 00:00:00 2001 From: Dylan Modesitt Date: Sun, 2 Oct 2022 18:31:55 -0400 Subject: [PATCH 09/19] address comments --- src/tcp/CityHash128.jl | 9 ++++----- test/runtests.jl | 2 +- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/src/tcp/CityHash128.jl b/src/tcp/CityHash128.jl index 5bf1c0d..7422029 100644 --- a/src/tcp/CityHash128.jl +++ b/src/tcp/CityHash128.jl @@ -302,21 +302,20 @@ end return u128_from_pair(hash_len_16(x + v[2], w[2]) + y, hash_len_16(x + w[2], y + v[2])) - return t end @views function city_hash_128(s::Vector{UInt8}, len::UInt)::UInt128 - if len >= 16 - return city_hash_128_with_seed(s[17:end], + return if len >= 16 + city_hash_128_with_seed(s[17:end], len - 16, u128_from_pair(fetch64(s) ⊻ k3, fetch64(s[9:16])) ) elseif len >= 8 - return city_hash_128_with_seed( + city_hash_128_with_seed( Vector{UInt8}([]), UInt(0), u128_from_pair(fetch64(s) ⊻ (len * k0), fetch64(s[len-7:len]) ⊻ k1) ) else - return city_hash_128_with_seed(s, len, u128_from_pair(k0, k1)) + city_hash_128_with_seed(s, len, u128_from_pair(k0, k1)) end end diff --git a/test/runtests.jl b/test/runtests.jl index b8cdf55..6c2d127 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -57,7 +57,7 @@ end @testset "Decode & re-encode client packets (SELECT 1)" begin # This .bin file was extracted from a tcpdump captured from a session - # with the official ClickHouse mand line client. + # with the official ClickHouse command line client. data = read(open("select1/client-query.bin"), 100_000, all = true) sock = data |> IOBuffer |> ClickHouseSock sock.server_rev = ClickHouse.DBMS_VER_REV From 03def1ec7dac9a1567cfc4715159ba2edffde7d9 Mon Sep 17 00:00:00 2001 From: Dylan Modesitt Date: Sun, 2 Oct 2022 18:33:18 -0400 Subject: [PATCH 10/19] avoid evangelizing --- test/cityhash.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/test/cityhash.jl b/test/cityhash.jl index a24a95a..849c759 100644 --- a/test/cityhash.jl +++ b/test/cityhash.jl @@ -52,7 +52,6 @@ end # examples given https://clickhouse.com/docs/en/native-protocol/hash/#implementations @test city_hash_64("Moscow") == UInt64(12507901496292878638) @test city_hash_64("How can you write a big system without C++? -Paul Glick") == UInt64(6237945311650045625) - @test city_hash_64("A: Use Julia") == city_hash_64("A: Use Julia") end @testset "CityHash128 Unicode" begin From 69495a7d51569f215c7e499e0158469c169f5830 Mon Sep 17 00:00:00 2001 From: Dylan Modesitt Date: Sun, 2 Oct 2022 20:50:38 -0400 Subject: [PATCH 11/19] give a better name to 0x02 compression mode --- src/tcp/Compression.jl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/tcp/Compression.jl b/src/tcp/Compression.jl index bb3d449..17732a6 100644 --- a/src/tcp/Compression.jl +++ b/src/tcp/Compression.jl @@ -1,7 +1,7 @@ @enum Compression::UInt8 begin COMPRESSION_NONE = 0 - COMPRESSION_DRY = 0x02 + COMPRESSION_CHECKSUM_ONLY = 0x02 COMPRESSION_LZ4 = 0x82 end @@ -11,14 +11,14 @@ function Compression(name::String)::Compression if lowercase(name) == "lz4" return COMPRESSION_LZ4 elseif lowercase(name) == "dry" - return COMPRESSION_DRY + returnCOMPRESSION_CHECKSUM_ONLY end error("unkown compression mode: $(name)") end """compress data according to the compression mode""" function compress(mode::Compression, data::Vector{UInt8})::Vector{UInt8} - return if mode == COMPRESSION_NONE || mode == COMPRESSION_DRY + return if mode == COMPRESSION_NONE || mode ==COMPRESSION_CHECKSUM_ONLY data elseif mode == COMPRESSION_LZ4 lz4_compress(data) @@ -31,7 +31,7 @@ function decompress( data::Vector{UInt8}, uncompressed_size::Integer = length(data) * 2 )::Vector{UInt8} - return if mode == COMPRESSION_NONE || mode == COMPRESSION_DRY + return if mode == COMPRESSION_NONE || mode ==COMPRESSION_CHECKSUM_ONLY data elseif mode == COMPRESSION_LZ4 lz4_decompress(data, uncompressed_size) From 2f38ea9b54ae8b8f8944e2de154e41b52b69da2c Mon Sep 17 00:00:00 2001 From: Dylan Modesitt Date: Sun, 2 Oct 2022 20:56:47 -0400 Subject: [PATCH 12/19] expose Compression enum --- src/ClickHouse.jl | 1 + src/Connect.jl | 2 +- src/tcp/Compression.jl | 4 ++-- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/ClickHouse.jl b/src/ClickHouse.jl index e49bb95..fdd5d3b 100644 --- a/src/ClickHouse.jl +++ b/src/ClickHouse.jl @@ -8,6 +8,7 @@ include("columns/columns.jl") include("Connect.jl") include("Query.jl") +export Compression export ClickHouseSock export Block export select diff --git a/src/Connect.jl b/src/Connect.jl index 65387a2..08bc254 100644 --- a/src/Connect.jl +++ b/src/Connect.jl @@ -96,7 +96,7 @@ function connect( connection_timeout = DBMS_DEFAULT_CONNECT_TIMEOUT, max_insert_block_size = DBMS_DEFAULT_MAX_INSERT_BLOCK, send_buffer_size = DBMS_DEFAULT_BUFFER_SIZE, - compression::Union{Bool, AbstractString, UInt8} = false + compression::Union{Bool, AbstractString, UInt8, Compression} = false )::ClickHouseSock sock = ClickHouseSock( nothing, diff --git a/src/tcp/Compression.jl b/src/tcp/Compression.jl index 17732a6..bdfb166 100644 --- a/src/tcp/Compression.jl +++ b/src/tcp/Compression.jl @@ -10,8 +10,8 @@ Compression(flag::Bool)::Compression = flag ? COMPRESSION_LZ4 : COMPRESSION_NONE function Compression(name::String)::Compression if lowercase(name) == "lz4" return COMPRESSION_LZ4 - elseif lowercase(name) == "dry" - returnCOMPRESSION_CHECKSUM_ONLY + elseif lowercase(name) == "checksum_only" + return COMPRESSION_CHECKSUM_ONLY end error("unkown compression mode: $(name)") end From 7590448c88bec8b5d4fe81459e5c5475302af20a Mon Sep 17 00:00:00 2001 From: Dylan Modesitt Date: Sat, 8 Oct 2022 16:50:56 -0400 Subject: [PATCH 13/19] remove uncessary allocations in city_hash --- src/tcp/CityHash128.jl | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/tcp/CityHash128.jl b/src/tcp/CityHash128.jl index 7422029..9e07cf6 100644 --- a/src/tcp/CityHash128.jl +++ b/src/tcp/CityHash128.jl @@ -41,9 +41,10 @@ end Hash 128 input bits down to 64 bits of output. This is intended to be a reasonably good hash function. """ +const kMul::UInt64 = 0x9ddfea08eb382d69 + @inline function hash_128_to_64(x::UInt128) ## Murmur-inspired hashing. - kMul::UInt64 = 0x9ddfea08eb382d69 a = (low64(x) ⊻ high64(x)) * kMul a ⊻= (a >> 47) b = (high64(x) ⊻ a) * kMul @@ -53,7 +54,10 @@ This is intended to be a reasonably good hash function. end hash_len_16(u::UInt64, v::UInt64)::UInt64 = hash_128_to_64(u128_from_pair(u, v)) -reinterpret_first(type, A) = reinterpret(type, A)[begin] + +@inline function reinterpret_first(type, A) + reinterpret(type, A)[begin] +end @views function fetch64(s::AbstractArray{})::UInt64 reinterpret_first(UInt64, s[begin:8]) @@ -312,7 +316,9 @@ end ) elseif len >= 8 city_hash_128_with_seed( - Vector{UInt8}([]), UInt(0), u128_from_pair(fetch64(s) ⊻ (len * k0), fetch64(s[len-7:len]) ⊻ k1) + Vector{UInt8}([]), + UInt(0), + u128_from_pair(fetch64(s) ⊻ (len * k0), fetch64(s[len-7:len]) ⊻ k1) ) else city_hash_128_with_seed(s, len, u128_from_pair(k0, k1)) From 2bc148a62a4699b96dc9bd684ba27946aff2d0e6 Mon Sep 17 00:00:00 2001 From: Dylan Modesitt Date: Sat, 8 Oct 2022 16:51:07 -0400 Subject: [PATCH 14/19] allow decompressing views --- src/tcp/Compression.jl | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/src/tcp/Compression.jl b/src/tcp/Compression.jl index bdfb166..31041fc 100644 --- a/src/tcp/Compression.jl +++ b/src/tcp/Compression.jl @@ -1,3 +1,4 @@ +using CodecLz4 @enum Compression::UInt8 begin COMPRESSION_NONE = 0 @@ -17,7 +18,7 @@ function Compression(name::String)::Compression end """compress data according to the compression mode""" -function compress(mode::Compression, data::Vector{UInt8})::Vector{UInt8} +function compress(mode::Compression, data)::Vector{UInt8} return if mode == COMPRESSION_NONE || mode ==COMPRESSION_CHECKSUM_ONLY data elseif mode == COMPRESSION_LZ4 @@ -25,10 +26,24 @@ function compress(mode::Compression, data::Vector{UInt8})::Vector{UInt8} end end +function lz4_decompress( + input::AbstractArray{UInt8}, + expected_size::Integer=length(input) * 2 +) + out_buffer = Vector{UInt8}(undef, expected_size) + out_size = CodecLz4.LZ4_decompress_safe( + pointer(input), + pointer(out_buffer), + length(input), + expected_size + ) + resize!(out_buffer, out_size) +end + """decompress data according to the compression mode""" function decompress( mode::Compression, - data::Vector{UInt8}, + data::AbstractArray{UInt8}, uncompressed_size::Integer = length(data) * 2 )::Vector{UInt8} return if mode == COMPRESSION_NONE || mode ==COMPRESSION_CHECKSUM_ONLY From 20009f7a6fcc1f1f7ce19f50e442008dad4f4078 Mon Sep 17 00:00:00 2001 From: Dylan Modesitt Date: Sat, 8 Oct 2022 16:51:16 -0400 Subject: [PATCH 15/19] remove alloc heavy packet formation --- src/tcp/DataBlocks.jl | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/src/tcp/DataBlocks.jl b/src/tcp/DataBlocks.jl index f8bed27..abfd20f 100644 --- a/src/tcp/DataBlocks.jl +++ b/src/tcp/DataBlocks.jl @@ -1,4 +1,3 @@ -using CodecLz4 const BLOCK_INFO_FIELD_STOP = UInt64(0) const BLOCK_INFO_FIELD_OVERFLOWS = UInt64(1) @@ -109,20 +108,18 @@ function chread(sock::ClickHouseSock, ::Type{Block})::Block method = chread(sock, Compression) raw_len = chread(sock, UInt32) data_len = chread(sock, UInt32) - compressed_len = VarUInt(raw_len - HEADER_SIZE_W_COMPRESSION) - compressed = chread(sock, Vector{UInt8}, compressed_len) - - # check packet checksum - packet = [ - UInt8(method); - reinterpret(UInt8, [raw_len]) |> Vector{UInt8}; - reinterpret(UInt8, [data_len]) |> Vector{UInt8}; - compressed - ] + + # form the packet with header and compressed data for the purpose + # computing the checksum + packet = Vector{UInt8}(undef, raw_len) + packet[1] = UInt8(method) + packet[2:5] = reinterpret(UInt8, [raw_len]) + packet[6:9] = reinterpret(UInt8, [data_len]) + compressed = @view packet[HEADER_SIZE_W_COMPRESSION+1:end] + read!(sock.io, compressed) if city_hash_128(packet) != hash throw(ChecksumError()) end - data = decompress(method, compressed, data_len) sock.io = IOBuffer(data) end From 73e3c59eaf1a62b4650a16e892415a1c41460ec4 Mon Sep 17 00:00:00 2001 From: Dylan Modesitt Date: Sun, 16 Oct 2022 17:56:15 -0400 Subject: [PATCH 16/19] avoid DelimitedFiles --- test/cityhash.jl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/test/cityhash.jl b/test/cityhash.jl index 849c759..3d3967b 100644 --- a/test/cityhash.jl +++ b/test/cityhash.jl @@ -1,4 +1,3 @@ -using DelimitedFiles using Test using ClickHouse: city_hash_128, city_hash_64, low64, high64 @@ -34,7 +33,7 @@ end to the regginbrow was to be seen ringsome on the aquaface. stripped as is done below. """ - answer_key = readdlm("ch/fw_ch128_key.txt", UInt64)[:, 1] + answer_key = parse.(UInt64, readlines("ch/fw_ch128_key.txt")) t = strip(replace(text, r"\s+" => " ")) for i in 0:length(t) From 96cdbebc6f63bb7734c5812f256a0ae8338cd545 Mon Sep 17 00:00:00 2001 From: Dylan Modesitt Date: Sun, 16 Oct 2022 17:57:37 -0400 Subject: [PATCH 17/19] improve Compression safety --- src/ClickHouse.jl | 3 +++ src/tcp/Compression.jl | 41 +++++++++++++++++------------------------ test/runtests.jl | 2 +- 3 files changed, 21 insertions(+), 25 deletions(-) diff --git a/src/ClickHouse.jl b/src/ClickHouse.jl index fdd5d3b..c729690 100644 --- a/src/ClickHouse.jl +++ b/src/ClickHouse.jl @@ -9,6 +9,9 @@ include("Connect.jl") include("Query.jl") export Compression +export COMPRESSION_NONE +export COMPRESSION_LZ4 +export COMPRESSION_CHECKSUM_ONLY export ClickHouseSock export Block export select diff --git a/src/tcp/Compression.jl b/src/tcp/Compression.jl index 31041fc..9ae3a6f 100644 --- a/src/tcp/Compression.jl +++ b/src/tcp/Compression.jl @@ -6,20 +6,9 @@ using CodecLz4 COMPRESSION_LZ4 = 0x82 end -Compression(flag::Bool)::Compression = flag ? COMPRESSION_LZ4 : COMPRESSION_NONE - -function Compression(name::String)::Compression - if lowercase(name) == "lz4" - return COMPRESSION_LZ4 - elseif lowercase(name) == "checksum_only" - return COMPRESSION_CHECKSUM_ONLY - end - error("unkown compression mode: $(name)") -end - """compress data according to the compression mode""" -function compress(mode::Compression, data)::Vector{UInt8} - return if mode == COMPRESSION_NONE || mode ==COMPRESSION_CHECKSUM_ONLY +function compress(mode::Compression, data::Vector{UInt8})::Vector{UInt8} + return if mode == COMPRESSION_NONE || mode == COMPRESSION_CHECKSUM_ONLY data elseif mode == COMPRESSION_LZ4 lz4_compress(data) @@ -30,25 +19,29 @@ function lz4_decompress( input::AbstractArray{UInt8}, expected_size::Integer=length(input) * 2 ) - out_buffer = Vector{UInt8}(undef, expected_size) - out_size = CodecLz4.LZ4_decompress_safe( - pointer(input), - pointer(out_buffer), - length(input), - expected_size - ) - resize!(out_buffer, out_size) + # mark the input variable here because it's not used again later and the + # call to pointer erases the GC's knowledge of the binding + GC.@preserve input begin + out_buffer = Vector{UInt8}(undef, expected_size) + out_size = CodecLz4.LZ4_decompress_safe( + pointer(input), + pointer(out_buffer), + length(input), + expected_size + ) + resize!(out_buffer, out_size) + end end """decompress data according to the compression mode""" function decompress( mode::Compression, data::AbstractArray{UInt8}, - uncompressed_size::Integer = length(data) * 2 + uncompressed_size::Integer=length(data) * 2 )::Vector{UInt8} - return if mode == COMPRESSION_NONE || mode ==COMPRESSION_CHECKSUM_ONLY + return if mode == COMPRESSION_NONE || mode == COMPRESSION_CHECKSUM_ONLY data elseif mode == COMPRESSION_LZ4 - lz4_decompress(data, uncompressed_size) + GC.@preserve data lz4_decompress(data, uncompressed_size) end end diff --git a/test/runtests.jl b/test/runtests.jl index 6c2d127..f335802 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -478,5 +478,5 @@ end end @testset "Queries on localhost DB + compression (lz4)" begin - test_queries(connect(compression="lz4")) + test_queries(connect(compression=COMPRESSION_LZ4)) end From b3c2063d33134eaf9983e35c60cb867e3f24b9b1 Mon Sep 17 00:00:00 2001 From: Dylan Modesitt Date: Sun, 16 Oct 2022 18:00:27 -0400 Subject: [PATCH 18/19] remove compression optionality in connect() --- src/Connect.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Connect.jl b/src/Connect.jl index 08bc254..02f7bc4 100644 --- a/src/Connect.jl +++ b/src/Connect.jl @@ -96,7 +96,7 @@ function connect( connection_timeout = DBMS_DEFAULT_CONNECT_TIMEOUT, max_insert_block_size = DBMS_DEFAULT_MAX_INSERT_BLOCK, send_buffer_size = DBMS_DEFAULT_BUFFER_SIZE, - compression::Union{Bool, AbstractString, UInt8, Compression} = false + compression::Compression = COMPRESSION_NONE )::ClickHouseSock sock = ClickHouseSock( nothing, @@ -109,7 +109,7 @@ function connect( connection_timeout = connection_timeout, max_insert_block_size = max_insert_block_size, send_buffer_size = send_buffer_size, - compression = Compression(compression), + compression = compression, ) ) From f901b8b803e628049c5433bb963205b84ef2a4ed Mon Sep 17 00:00:00 2001 From: Dylan Modesitt Date: Sun, 16 Oct 2022 18:07:39 -0400 Subject: [PATCH 19/19] annotate lz4_decompress --- src/tcp/Compression.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tcp/Compression.jl b/src/tcp/Compression.jl index 9ae3a6f..e1dda11 100644 --- a/src/tcp/Compression.jl +++ b/src/tcp/Compression.jl @@ -18,7 +18,7 @@ end function lz4_decompress( input::AbstractArray{UInt8}, expected_size::Integer=length(input) * 2 -) +)::Vector{UInt8} # mark the input variable here because it's not used again later and the # call to pointer erases the GC's knowledge of the binding GC.@preserve input begin