From 0fc1de7153a9e8170d6e832fcd908a1a82e2228f Mon Sep 17 00:00:00 2001 From: Jamie Gaskins Date: Sat, 30 Nov 2024 19:34:16 -0600 Subject: [PATCH 1/4] Encode query params in binary instead of text --- spec/pg/encoder_spec.cr | 20 ++- src/pg/decoder.cr | 3 +- src/pq/param.cr | 316 ++++++++++++++++++++++++++++------------ 3 files changed, 240 insertions(+), 99 deletions(-) diff --git a/spec/pg/encoder_spec.cr b/spec/pg/encoder_spec.cr index 2dd50a01..4d1cc2ec 100644 --- a/spec/pg/encoder_spec.cr +++ b/spec/pg/encoder_spec.cr @@ -6,7 +6,7 @@ enum EncoderSpec::Status end private def test_insert_and_read(datatype, value, file = __FILE__, line = __LINE__) - it "inserts #{datatype}", file, line do + it "inserts #{value.inspect} as #{datatype}", file, line do PG_DB.exec "drop table if exists test_table" PG_DB.exec "create table test_table (v #{datatype})" PG_DB.exec "insert into test_table values ($1)", args: [value] @@ -18,11 +18,18 @@ private def test_insert_and_read(datatype, value, file = __FILE__, line = __LINE end describe PG::Driver, "encoder" do - test_insert_and_read "int4", 123 + test_insert_and_read "boolean", true + test_insert_and_read "boolean", false - test_insert_and_read "float", 12.34 + test_insert_and_read "int2", 123i16 + test_insert_and_read "int4", 123i32 + test_insert_and_read "int8", 123i64 + + test_insert_and_read "float4", 12.34f32 + test_insert_and_read "float8", 12.34f64 test_insert_and_read "varchar", "hello world" + test_insert_and_read "text", "hello world" test_insert_and_read "timestamp", Time.utc(2015, 2, 3, 17, 15, nanosecond: 13_000_000) test_insert_and_read "timestamp", Time.utc(2015, 2, 3, 17, 15, 13, nanosecond: 11_000_000) @@ -32,18 +39,23 @@ describe PG::Driver, "encoder" do test_insert_and_read "int4", EncoderSpec::Status::Open test_insert_and_read "int4", EncoderSpec::Status::Closed + test_insert_and_read "bool[]", [] of Bool + test_insert_and_read "bool[]", [true] + test_insert_and_read "bool[]", [false] test_insert_and_read "bool[]", [true, false, true] test_insert_and_read "float[]", [1.2, 3.4, 5.6] test_insert_and_read "integer[]", [] of Int32 test_insert_and_read "integer[]", [1, 2, 3] - test_insert_and_read "integer[]", [[1, 2], [3, 4]] + test_insert_and_read "integer[][]", [[1, 2], [3, 4]] + test_insert_and_read "integer[][][]", [[[1, 2], [3, 4]], [[5, 6], [7, 8]]] test_insert_and_read "text[]", ["t", "f", "t"] test_insert_and_read "text[]", [%("a), %(\\b~), %(c\\"d), %(\uFF8F)] test_insert_and_read "text[]", ["baz, bar"] test_insert_and_read "text[]", ["foo}"] + test_insert_and_read "text[][]", [["foo", "bar"], ["baz", "quux"]] test_insert_and_read "interval", PG::Interval.new test_insert_and_read "interval", PG::Interval.new(days: 400, microseconds: 5000000) diff --git a/src/pg/decoder.cr b/src/pg/decoder.cr index 990d8fc7..88280ce6 100644 --- a/src/pg/decoder.cr +++ b/src/pg/decoder.cr @@ -501,7 +501,8 @@ module PG end end - @@decoders = Hash(Int32, PG::Decoders::Decoder).new(ByteaDecoder.new) + # :nodoc: + class_getter decoders = Hash(Int32, PG::Decoders::Decoder).new(ByteaDecoder.new) def self.from_oid(oid) @@decoders[oid] diff --git a/src/pq/param.cr b/src/pq/param.cr index 5232d817..1659b267 100644 --- a/src/pq/param.cr +++ b/src/pq/param.cr @@ -1,164 +1,292 @@ +require "../pg/interval" require "../pg/geo" module PQ # :nodoc: - record Param, slice : Slice(UInt8), size : Int32, format : Int16 do + record Param, slice : Slice(UInt8), size : Int32, format : Format do + enum Format : Int16 + None = -1 + Text = 0 + Binary = 1 + end delegate to_unsafe, to: slice # Internal wrapper to represent an encoded parameter def self.encode(val : Nil) - binary Pointer(UInt8).null.to_slice(0), -1 + binary Bytes.empty, -1 + end + + def self.encode(val : Bool, into slice : Bytes = Bytes.new(1)) + slice[0] = val ? 1u8 : 0u8 + binary slice + end + + def self.encode(val : String) + encode val.to_slice + end + + def self.encode(val : String, into slice : Bytes) + encode val.to_slice, into: slice end def self.encode(val : Slice) - binary val, val.size + binary val end - def self.encode(val : Array) - text encode_array(val) + def self.encode(val : Slice, into slice : Bytes) + val.copy_to slice + + binary slice + end + + def self.encode(val : Array(T)) forall T + bytes = ArrayEncoder.new(val).to_slice + + binary bytes end def self.encode(val : Time) - text Time::Format::RFC_3339.format(val, fraction_digits: 9) + # text Time::Format::RFC_3339.format(val, fraction_digits: 9) + encode ((val - 30.years).to_unix_ns // 1_000).to_i64 end + {% for type in %w[Int16 Int32 Int64 Float32 Float64] %} + def self.encode(val : {{type.id}}, into slice : Bytes = Bytes.new(sizeof(typeof(val)))) + IO::ByteFormat::NetworkEndian.encode val, slice + # pp value: val, type: typeof(val), slice: slice + binary slice + end + {% end %} + def self.encode(val : Enum) encode val.value end - def self.encode(val : PG::Geo::Point) - text "(#{val.x},#{val.y})" + def self.encode(val : UUID) + bytes = Bytes.new(16) + val.bytes.to_slice.copy_to bytes + binary bytes + end + + def self.encode(val : PG::Geo::Point, into slice : Bytes = Bytes.new(sizeof(PG::Geo::Point))) + encode val.x, into: slice + encode val.y, into: slice + sizeof(Float64) + + binary slice end def self.encode(val : PG::Geo::Line) - text "{#{val.a},#{val.b},#{val.c}}" + slice = Bytes.new(sizeof(PG::Geo::Line)) + encode val.a, into: slice + encode val.b, into: slice + sizeof(Float64) + encode val.c, into: slice + sizeof(Float64) * 2 + + binary slice end def self.encode(val : PG::Geo::Circle) - text "<(#{val.x},#{val.y}),#{val.radius}>" - end + slice = Bytes.new(sizeof(PG::Geo::Circle)) + encode val.x, into: slice + encode val.y, into: slice + sizeof(Float64) + encode val.radius, into: slice + sizeof(Float64) * 2 - def self.encode(val : PG::Geo::LineSegment) - text "((#{val.x1},#{val.y1}),(#{val.x2},#{val.y2}))" + binary slice end - def self.encode(val : PG::Geo::Box) - text "((#{val.x1},#{val.y1}),(#{val.x2},#{val.y2}))" + def self.encode(val : PG::Geo::LineSegment | PG::Geo::Box) + slice = Bytes.new(sizeof(PG::Geo::LineSegment)) + encode val.x1, into: slice + encode val.y1, into: slice + sizeof(Float64) + encode val.x2, into: slice + sizeof(Float64) * 2 + encode val.y2, into: slice + sizeof(Float64) * 3 + + binary slice end def self.encode(val : PG::Geo::Path) - if val.closed? - encode_points "(", val.points, ")" - else - encode_points "[", val.points, "]" - end + slice = Bytes.new( + sizeof(UInt8) + # closed flag + sizeof(UInt32) + # Size + sizeof(PG::Geo::Point) * val.points.size + # point data + 0 + ) + + slice[0] = val.closed? ? 1u8 : 0u8 + + encode_points(val, into: slice + 1) + binary slice end def self.encode(val : PG::Geo::Polygon) - encode_points "(", val.points, ")" + slice = Bytes.new( + sizeof(UInt32) + # Size + sizeof(PG::Geo::Point) * val.points.size + # point data + 0 + ) + + encode_points val, into: slice end - private def self.encode_points(left, points, right) - string = String.build do |io| - io << left - points.each_with_index do |point, i| - io << "," if i > 0 - io << "(" << point.x << "," << point.y << ")" - end - io << right + private def self.encode_points(val, into slice : Bytes) + encode val.points.size, into: slice + data = slice + sizeof(UInt32) + val.points.each_with_index do |point, index| + encode point, into: data + index * sizeof(PG::Geo::Point) end - text string + binary slice end def self.encode(val : PG::Interval) - # https://www.postgresql.org/docs/current/datatype-datetime.html#DATATYPE-INTERVAL-INPUT - text "#{val.months} months #{val.days} days #{val.microseconds} microseconds" - end + slice = Bytes.new(sizeof(PG::Interval)) - def self.encode(val) - text val.to_s + encode val.microseconds, into: slice + encode val.days, into: slice + sizeof(Int64) + encode val.months, into: slice + sizeof(Int64) + sizeof(Int32) + + binary slice end - def self.binary(slice, size) - new slice, size, 1_i16 + def self.binary(slice, size = slice.bytesize) + new slice, size, :binary end - def self.text(string : String) - text string.to_slice + # Types taken from src/pg/decoder.cr + private OID_MAP = { + Bool.name => 16, # boolean + Bytes.name => 17, # bytea + Char.name => 18, # char + Int16.name => 21, # int2 + Int32.name => 23, # int4 + Int64.name => 20, # int8 + String.name => 25, # text + Float32.name => 700, # float4 + Float64.name => 701, # float8 + UUID.name => 2950, # uuid + PG::Geo::Point.name => 600, # point + PG::Geo::Path.name => 602, # path + PG::Geo::Polygon.name => 604, # polygon + PG::Geo::Box.name => 603, # box + PG::Geo::LineSegment.name => 601, # lseg + PG::Geo::Line.name => 628, # line + PG::Geo::Circle.name => 718, # circle + JSON::Any.name => 3802, # jsonb + Time.name => 1184, # timestamptz + Time::Span.name => 1186, # interval + PG::Interval.name => 1186, # interval + } of String => Int32 + + protected def self.oid_for(type : T.class) forall T + OID_MAP[type.name] end - def self.text(slice : Bytes) - new slice, slice.size, 0_i16 + protected def self.oid_for(type : Array(T).class) forall T + oid_for(T) end - def self.encode_array(array) - String.build(array.size + 2) do |io| - encode_array(io, array) + record ArrayEncoder(T), array : Array(T) do + # Count array dimensions at compile time. This will generate an arithmetic + # expression like `1 + 1 + 1 + 0` for a 3-dimensional array, which will be + # inlined into the numeric literal `3` at compile time. + macro dimension_count(type) + {% if type.resolve < Array %} + 1 + dimension_count({{type.resolve.type_vars.first}}) + {% else %} + 0 + {% end %} end - end - def self.encode_array(io, value : Array) - io << "{" - value.join(io, ",") do |item| - encode_array(io, item) + def to_slice + # puts + dimensions = dimension_count(Array(T)) + nilable = {{T.nilable?}} + oid = Param.oid_for(T) + flat_data = array.flatten # TODO: Avoid allocating this + data_size = total_element_count * 4 + flat_data.sum { |element| size_for(element) } + # pp total_element_count: total_element_count + + bytes = Bytes.new( + 4 + # dimension count + 4 + # nulls flag (why is this 32-bit?) + 4 + # element OID + 8 * dimensions + # dimension length and lower bound + data_size + # 32-bit size prefix + 0 + ) + format = IO::ByteFormat::NetworkEndian + format.encode dimensions, bytes + format.encode nilable ? 1 : 0, bytes + 4 + format.encode oid, bytes + 8 + + dimensions_offset = 12 + collect_dimensions.each_with_index do |size, index| + entry_offset = dimensions_offset + 8 * index + format.encode size, bytes + entry_offset + format.encode 1, bytes + entry_offset + 4 + end + + # pp collect_dimensions: collect_dimensions, data_size: { + # from_size_prefixes: total_element_count * 4, + # from_data: flat_data.sum { |e| size_for e }, + # total: data_size, + # } + data_offset = dimensions_offset + 8 * collect_dimensions.size + flat_data.each do |element| + # pp encoding: element, into: data_offset + size = size_for(element) + Param.encode size, into: bytes + data_offset + Param.encode element, into: bytes + data_offset + 4 + data_offset += size + 4 + end + + # pp bytes.to_a.map_with_index { |byte, index| {index, byte.chr} }.to_h + + bytes end - io << "}" - end + private SIZE_MAP = { + Bool.name => 1, + Int16.name => sizeof(Int16), + Int32.name => sizeof(Int32), + Int64.name => sizeof(Int64), + Float64.name => sizeof(Float64), + UUID.name => sizeof(UUID), + } + + def size_for(value : T) forall T + SIZE_MAP.fetch(T.name) do + raise "Could not determine encoding size for #{T}" + end + end - def self.encode_array(io, value) - io << value - end + def size_for(value : Bool) + 1 + end - def self.encode_array(io, value : Nil) - io << "NULL" - end + def size_for(data : String | Bytes) + data.bytesize + end - def self.encode_array(io, value : Bool) - io << (value ? 't' : 'f') - end + def total_element_count(value : Array = array) + value.sum { |element| total_element_count(element) } + end - def self.encode_array(io, value : Bytes) - io << %{"\\\\x} - value.each do |byte| - byte.to_s io, base: 16, precision: 2 + def total_element_count(value) + 1 end - io << '"' - end - - def self.encode_array(io, value : String) - io << '"' - if value.ascii_only? - special_chars = {'"'.ord.to_u8, '\\'.ord.to_u8} - last_index = 0 - value.to_slice.each_with_index do |byte, index| - if special_chars.includes?(byte) - io.write value.unsafe_byte_slice(last_index, index - last_index) - last_index = index - io << '\\' - end - end - io.write value.unsafe_byte_slice(last_index) - else - last_index = 0 - reader = Char::Reader.new(value) - while reader.has_next? - char = reader.current_char - if {'"', '\\'}.includes?(char) - io.write value.unsafe_byte_slice(last_index, reader.pos - last_index) - last_index = reader.pos - io << '\\' - end - reader.next_char + getter collect_dimensions : Array(Int32) do + dimensions = Array(Int32).new(dimension_count(Array(T))) + dimensions << array.size + dimension = array + + while dimension = dimension.first?.try &.as?(Array) + dimensions << dimension.size end - io.write value.unsafe_byte_slice(last_index) + dimensions end - - io << '"' end end end From c8bc7a7fec5e72c57ef956b4b32f1c20059a79b7 Mon Sep 17 00:00:00 2001 From: Jamie Gaskins Date: Sat, 30 Nov 2024 20:20:53 -0600 Subject: [PATCH 2/4] Remove unnecessary code --- src/pq/param.cr | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/pq/param.cr b/src/pq/param.cr index 1659b267..cb341900 100644 --- a/src/pq/param.cr +++ b/src/pq/param.cr @@ -5,7 +5,6 @@ module PQ # :nodoc: record Param, slice : Slice(UInt8), size : Int32, format : Format do enum Format : Int16 - None = -1 Text = 0 Binary = 1 end @@ -54,7 +53,6 @@ module PQ {% for type in %w[Int16 Int32 Int64 Float32 Float64] %} def self.encode(val : {{type.id}}, into slice : Bytes = Bytes.new(sizeof(typeof(val)))) IO::ByteFormat::NetworkEndian.encode val, slice - # pp value: val, type: typeof(val), slice: slice binary slice end {% end %} From f636bbfed7439872522b5c9c94dde72dd4f58b64 Mon Sep 17 00:00:00 2001 From: Jamie Gaskins Date: Sat, 30 Nov 2024 21:14:59 -0600 Subject: [PATCH 3/4] Handle nil values inside arrays --- spec/pg/encoder_spec.cr | 1 + src/pq/param.cr | 38 ++++++++++++++++++++++++++++++-------- 2 files changed, 31 insertions(+), 8 deletions(-) diff --git a/spec/pg/encoder_spec.cr b/spec/pg/encoder_spec.cr index 4d1cc2ec..c0ee3aca 100644 --- a/spec/pg/encoder_spec.cr +++ b/spec/pg/encoder_spec.cr @@ -52,6 +52,7 @@ describe PG::Driver, "encoder" do test_insert_and_read "integer[][][]", [[[1, 2], [3, 4]], [[5, 6], [7, 8]]] test_insert_and_read "text[]", ["t", "f", "t"] + test_insert_and_read "text[]", ["non-nil value", nil] test_insert_and_read "text[]", [%("a), %(\\b~), %(c\\"d), %(\uFF8F)] test_insert_and_read "text[]", ["baz, bar"] test_insert_and_read "text[]", ["foo}"] diff --git a/src/pq/param.cr b/src/pq/param.cr index cb341900..56f2e65c 100644 --- a/src/pq/param.cr +++ b/src/pq/param.cr @@ -13,7 +13,11 @@ module PQ # Internal wrapper to represent an encoded parameter def self.encode(val : Nil) - binary Bytes.empty, -1 + encode val, into: Bytes.empty + end + + def self.encode(val : Nil, into slice : Bytes) + binary slice, -1 end def self.encode(val : Bool, into slice : Bytes = Bytes.new(1)) @@ -176,7 +180,11 @@ module PQ } of String => Int32 protected def self.oid_for(type : T.class) forall T - OID_MAP[type.name] + {% if T.union? %} + oid_for({{T.union_types.reject(&.nilable?).first}}) + {% else %} + OID_MAP[type.name] + {% end %} end protected def self.oid_for(type : Array(T).class) forall T @@ -188,7 +196,9 @@ module PQ # expression like `1 + 1 + 1 + 0` for a 3-dimensional array, which will be # inlined into the numeric literal `3` at compile time. macro dimension_count(type) - {% if type.resolve < Array %} + {% if type.is_a? Expressions %} + ::PQ::Param::ArrayEncoder.dimension_count(Union({{type}})) + {% elsif type.resolve < Array %} 1 + dimension_count({{type.resolve.type_vars.first}}) {% else %} 0 @@ -232,7 +242,11 @@ module PQ data_offset = dimensions_offset + 8 * collect_dimensions.size flat_data.each do |element| # pp encoding: element, into: data_offset - size = size_for(element) + if element.nil? + size = -1 + else + size = size_for(element) + end Param.encode size, into: bytes + data_offset Param.encode element, into: bytes + data_offset + 4 data_offset += size + 4 @@ -249,13 +263,21 @@ module PQ Int32.name => sizeof(Int32), Int64.name => sizeof(Int64), Float64.name => sizeof(Float64), - UUID.name => sizeof(UUID), + UUID.name => sizeof(UUID), } def size_for(value : T) forall T - SIZE_MAP.fetch(T.name) do - raise "Could not determine encoding size for #{T}" - end + {% if T.union? %} + if value.nil? + 0 + else + size_for(value.as({{T.union_types.reject(&.nilable?).first}})) + end + {% else %} + SIZE_MAP.fetch(value.class.name) do + raise "Could not determine encoding size for #{T}" + end + {% end %} end def size_for(value : Bool) From d05b98569c0c9b6244aff9ae3aee37f8af5903ab Mon Sep 17 00:00:00 2001 From: Jamie Gaskins Date: Sat, 30 Nov 2024 21:17:04 -0600 Subject: [PATCH 4/4] Remove unnecessary specs This file tests functionality that no lonfer applies --- spec/pq/param_spec.cr | 25 ------------------------- 1 file changed, 25 deletions(-) delete mode 100644 spec/pq/param_spec.cr diff --git a/spec/pq/param_spec.cr b/spec/pq/param_spec.cr deleted file mode 100644 index 54831247..00000000 --- a/spec/pq/param_spec.cr +++ /dev/null @@ -1,25 +0,0 @@ -require "spec" -require "../../src/pq/param" - -private def it_encodes_array(value, encoded) - it "encodes #{value.class}" do - PQ::Param.encode_array(value).should eq encoded - end -end - -describe PQ::Param do - describe "encoders" do - describe "#encode_array" do - it_encodes_array([] of String, "{}") - it_encodes_array([true, false, true], "{t,f,t}") - it_encodes_array(["t", "f", "t"], %({"t","f","t"})) - it_encodes_array([1, 2, 3], "{1,2,3}") - it_encodes_array([1.2, 3.4, 5.6], "{1.2,3.4,5.6}") - it_encodes_array([%(a), %(\\b~), %(c\\"d), %(\uFF8F)], %({"a","\\\\b~","c\\\\\\"d","\uFF8F"})) - it_encodes_array([%(this is a "slice").to_slice], %({"\\\\x7468697320697320612022736c69636522"})) - it_encodes_array(["baz, bar"], %({"baz, bar"})) - it_encodes_array(["foo}"], %({"foo}"})) - it_encodes_array([nil, nil], %({NULL,NULL})) - end - end -end