Skip to content

Commit

Permalink
[RNTuple] write multiple fields (#349)
Browse files Browse the repository at this point in the history
* [RNTuple] write multiple cols

* add cvmfs ci

* add C++ ROOT readback test

* increase compat

* bump julia compat and CI

* bump HTTP compat
  • Loading branch information
Moelf authored Oct 16, 2024
1 parent 10599f9 commit 0110b5f
Show file tree
Hide file tree
Showing 25 changed files with 257 additions and 86 deletions.
2 changes: 2 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,5 @@ test/samples/RNTuple/* linguist-detectable=false

# JOSS paper
paper/** linguist-detectable=false
# GitHub syntax highlighting
pixi.lock linguist-language=YAML linguist-generated=true
3 changes: 1 addition & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,7 @@ jobs:
fail-fast: false
matrix:
version:
- '1.7'
- '1.8'
- 'lts'
- '1' # Leave this line unchanged. '1' will automatically expand to the latest stable 1.x release of Julia.
- 'pre'
os: [ubuntu-latest]
Expand Down
36 changes: 36 additions & 0 deletions .github/workflows/cvmfs.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
name: Test C++ ROOT read back
on:
pull_request:
branches:
- main
push:
branches:
- main
tags: '*'
jobs:
test:
name: C++ ROOT read back rntuple files ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }}
runs-on: ${{ matrix.os }}
continue-on-error: ${{ matrix.allow_failure }}
strategy:
fail-fast: true
matrix:
os: [ubuntu-22.04]
arch: [x64]
allow_failure: [false]
steps:
- uses: actions/checkout@v2
- uses: julia-actions/setup-julia@v1
with:
version: '1'
arch: ${{ matrix.arch }}
- uses: julia-actions/cache@v1
- uses: julia-actions/julia-buildpkg@v1
- name: Generate root file
run: |
julia --project ./test/RNTupleWriting/output_sample.jl test1.root
- uses: cvmfs-contrib/github-action-cvmfs@v4
- name: Read root file in C++
run: |
source /cvmfs/sft.cern.ch/lcg/views/dev3/latest/x86_64-ubuntu2204-gcc11-opt/setup.sh
python ./test/RNTupleWriting/test1.py test1.root
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,6 @@
*__pycache__*
/.benchmarkci
/benchmark/*.json
# pixi environments
.pixi
*.egg-info
6 changes: 3 additions & 3 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ CodecXz = "^0.7"
CodecZstd = "^0.8"
DataFrames = "^1.5"
FHist = "^0.10, ^0.11"
HTTP = "^1"
HTTP = "^1.10"
InteractiveUtils = "^1.0"
IterTools = "^1"
LRUCache = "^1.3.0"
Expand All @@ -59,14 +59,14 @@ PrettyTables = "^2.1"
Random = "^1.0"
SHA = "^0.7, ^1.0"
SentinelArrays = "^1.3"
StaticArrays = "^1"
StaticArrays = "^1.5"
StructArrays = "0.6"
TOML = "^1.0"
Tables = "^1.9"
Test = "^1.0"
XRootD = "^0.1"
XXHashNative = "^1.0.1"
julia = "^1.7"
julia = "^1.10"

[extras]
Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
Expand Down
8 changes: 5 additions & 3 deletions src/RNTuple/Writing/Stubs.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
module Stubs
using ..UnROOT

const WRITE_TIME = 0x7670F8CD
const WRITE_TIME = 0x768A676E
const WRITE_TIME_ary = reverse(reinterpret(UInt8, [WRITE_TIME]))

const file_preamble = [
0x72, 0x6F, 0x6F, 0x74, 0x00, 0x00, 0xF7, 0x45,
Expand All @@ -23,7 +24,8 @@ const dummy_padding2 = [
]


const RBlob1 = UnROOT.RBlob(0x00DC, 0x0004, 0x000000BA, WRITE_TIME, 0x0022, 0x0001, 244, 100, "RBlob", "", "")
const RBlob1 = UnROOT.RBlob(; fNbytes = 0x00DC, fVersion = 0x0004, fObjLen = 0x000000BA, fDatime = WRITE_TIME, fKeyLen = 0x0022,
fCycle = 0x0001, fSeekKey = 244, fSeekPdir = 100, fClassName = "RBlob", fName = "", fTitle = "")
const rnt_header = UnROOT.RNTupleHeader(zero(UInt64), "myntuple", "", "ROOT v6.33.01", [
UnROOT.FieldRecord(zero(UInt32), zero(UInt32), zero(UInt32), zero(UInt16), zero(UInt16), 0, -1, -1, "one_uint", "std::uint32_t", "", ""),
], [UnROOT.ColumnRecord(0x14, 0x20, zero(UInt32), 0x00, 0x00, 0),], UnROOT.AliasRecord[], UnROOT.ExtraTypeInfo[])
Expand Down Expand Up @@ -90,7 +92,7 @@ const tsreamerinfo_compressed = [
]

const tfile_end = [
0x00, 0x00, 0x00, 0x3F, 0x00, 0x04, 0x00, 0x00, 0x00, 0x0A, 0x76, 0x70, 0xF8, 0xCD, 0x00, 0x35,
0x00, 0x00, 0x00, 0x3F, 0x00, 0x04, 0x00, 0x00, 0x00, 0x0A, WRITE_TIME_ary..., 0x00, 0x35,
0x00, 0x01, 0x00, 0x00, 0x05, 0xF9, 0x00, 0x00, 0x00, 0x64, 0x00, 0x18, 0x74, 0x65, 0x73, 0x74,
0x5F, 0x6E, 0x74, 0x75, 0x70, 0x6C, 0x65, 0x5F, 0x6D, 0x69, 0x6E, 0x69, 0x6D, 0x61, 0x6C, 0x2E,
0x72, 0x6F, 0x6F, 0x74, 0x00, 0x00, 0x01, 0x00, 0x00, 0x06, 0x38, 0x77, 0x35, 0x94, 0x00,
Expand Down
82 changes: 56 additions & 26 deletions src/RNTuple/Writing/TFileWriter.jl
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ function rnt_write(io::IO, x::UnROOT.ROOTDirectoryHeader32)
rnt_write(io, x.fSeekKeys; legacy=true)
end

struct RBlob
Base.@kwdef struct RBlob
fNbytes::Int32
fVersion::Int16
fObjLen::Int32
Expand Down Expand Up @@ -237,7 +237,7 @@ function _checksum(x::UnROOT.RNTupleHeader)
envelope_size = temp_io.size + sizeof(Int64) + sizeof(UInt64)
id_type = 0x0001

id_length = (UInt64(envelope_size & 0xff) << 16) | id_type
id_length = (UInt64(envelope_size) << 16) | id_type

payload_ary = take!(temp_io)
prepend!(payload_ary, reinterpret(UInt8, [id_length]))
Expand All @@ -260,7 +260,7 @@ function rnt_write(io::IO, x::UnROOT.RNTupleHeader; envelope=true)
envelope_size = temp_io.size + sizeof(Int64) + sizeof(UInt64)
id_type = 0x0001

id_length = (UInt64(envelope_size & 0xff) << 16) | id_type
id_length = (UInt64(envelope_size) << 16) | id_type

payload_ary = take!(temp_io)

Expand Down Expand Up @@ -473,19 +473,45 @@ function rnt_write_observe(io::IO, x::T) where T
WriteObservable(io, pos, len, x)
end

function add_field_column_record!(field_records, column_records, input_T::Type{<:Real}, NAME; parent_field_id)
fr = UnROOT.FieldRecord(zero(UInt32), zero(UInt32), parent_field_id, zero(UInt16), zero(UInt16), 0, -1, -1, string(NAME), RNTUPLE_WRITE_TYPE_CPPNAME_DICT[input_T], "", "")
cr = UnROOT.ColumnRecord(RNTUPLE_WRITE_TYPE_IDX_DICT[input_T]..., parent_field_id, 0x00, 0x00, 0)
push!(field_records, fr)
push!(column_records, cr)
nothing
end

function schema_to_field_column_records(table)
input_schema = schema(table)
input_Ts = input_schema.types
input_names = input_schema.names
field_records = UnROOT.FieldRecord[]
column_records = UnROOT.ColumnRecord[]

for (input_T, input_name) in zip(input_Ts, input_names)
add_field_column_record!(field_records, column_records, input_T, input_name, parent_field_id=length(field_records))
end
return field_records, column_records
end

function generate_page_links(column_records, pages_obses, Nitems)
outer_list = RNTuplePageOuterList{RNTuplePageInnerList{PageDescription}}([])
for (cr, page_obs) in zip(column_records, pages_obses)
inner_list = RNTuplePageInnerList([
PageDescription(Nitems, Locator(div(cr.nbits * Nitems, 8, RoundUp), page_obs.position))
])
push!(outer_list, inner_list)
end
return RNTuplePageTopList([outer_list])
end

function write_rntuple(file::IO, table; file_name="test_ntuple_minimal.root", rntuple_name="myntuple")
if !istable(table)
error("RNTuple writing accepts object compatible with Tables.jl interface, got type $(typeof(table))")
end

input_schema = schema(table)
input_Ncols = length(input_schema.names)
if input_Ncols != 1
error("Currently, RNTuple writing only supports a single, UInt32 column, got $input_Ncols columns")
end
input_T = only(input_schema.types)
input_col = only(columntable(table))
input_length = length(input_col)
input_cols = columntable(table)
input_length = length(input_cols[begin])
if input_length > 65535
error("Input too long: RNTuple writing currently only supports a single page (65535 elements)")
end
Expand All @@ -505,30 +531,33 @@ function write_rntuple(file::IO, table; file_name="test_ntuple_minimal.root", rn
tdirectory32_obs = rnt_write_observe(file, Stubs.tdirectory32)
dummy_padding2_obs = rnt_write_observe(file, Stubs.dummy_padding2)

RBlob1_obs = rnt_write_observe(file, Stubs.RBlob1)
rntAnchor_update[:fSeekHeader] = UInt32(position(file))
rnt_header = UnROOT.RNTupleHeader(zero(UInt64), rntuple_name, "", "ROOT v6.33.01", [
UnROOT.FieldRecord(zero(UInt32), zero(UInt32), zero(UInt32), zero(UInt16), zero(UInt16), 0, -1, -1, string(only(input_schema.names)), RNTUPLE_WRITE_TYPE_CPPNAME_DICT[input_T], "", ""),
], [UnROOT.ColumnRecord(RNTUPLE_WRITE_TYPE_IDX_DICT[input_T]..., zero(UInt32), 0x00, 0x00, 0),], UnROOT.AliasRecord[], UnROOT.ExtraTypeInfo[])
RBlob1 = UnROOT.RBlob(; fNbytes = 0x00DC, fVersion = 0x0004, fObjLen = 0x000000BA, fDatime = Stubs.WRITE_TIME, fKeyLen = 34,
fCycle = 0x0001, fSeekKey = position(file), fSeekPdir = 100, fClassName = "RBlob", fName = "", fTitle = "")
RBlob1_update = Dict{Symbol, Any}()
RBlob1_obs = rnt_write_observe(file, RBlob1)
field_records, col_records = schema_to_field_column_records(table)
rnt_header = UnROOT.RNTupleHeader(
zero(UInt64), rntuple_name, "", "ROOT v6.33.01",
field_records, col_records,
UnROOT.AliasRecord[], UnROOT.ExtraTypeInfo[]
)

rntAnchor_update[:fSeekHeader] = UInt32(position(file))
rnt_header_obs = rnt_write_observe(file, rnt_header)
rntAnchor_update[:fNBytesHeader] = rnt_header_obs.len
rntAnchor_update[:fLenHeader] = rnt_header_obs.len
RBlob1_update[:fObjLen] = rnt_header_obs.len
RBlob1_update[:fNbytes] = rnt_header_obs.len + 34

Base.setindex!(RBlob1_obs, RBlob1_update)

RBlob2_obs = rnt_write_observe(file, Stubs.RBlob2)
page1 = rnt_ary_to_page(input_col)
page1_obs = rnt_write_observe(file, page1)
pages = [rnt_ary_to_page(col, cr) for (col, cr) in zip(input_cols, col_records)]
pages_obses = [rnt_write_observe(file, page) for page in pages]

RBlob3_obs = rnt_write_observe(file, Stubs.RBlob3)
cluster_summary = Write_RNTupleListFrame([ClusterSummary(0, input_length)])
nested_page_locations =
UnROOT.RNTuplePageTopList([
UnROOT.RNTuplePageOuterList([
UnROOT.RNTuplePageInnerList([
PageDescription(input_length, UnROOT.Locator(sizeof(input_T) * input_length, page1_obs.position, )),
]),
]),
])
nested_page_locations = generate_page_links(col_records, pages_obses, input_length)

pagelink = UnROOT.PageLink(_checksum(rnt_header_obs.object), cluster_summary.payload, nested_page_locations)
pagelink_obs = rnt_write_observe(file, pagelink)
Expand Down Expand Up @@ -565,6 +594,7 @@ function write_rntuple(file::IO, table; file_name="test_ntuple_minimal.root", rn
tfile_end_obs = rnt_write_observe(file, Stubs.tfile_end)
fileheader_obs[:fEND] = UInt32(position(file))

flush!(RBlob1_obs)
flush!(tkey32_anchor_obs1)
flush!(tkey32_anchor_obs2)
flush!(tkey32_tfile_obs)
Expand Down
10 changes: 5 additions & 5 deletions src/RNTuple/Writing/constants.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@ const RNTUPLE_WRITE_TYPE_IDX_DICT = Dict(
Float64 => (0x10, sizeof(UInt64) * 8),
Float32 => (0x11, sizeof(UInt32) * 8),
Float16 => (0x12, sizeof(UInt16) * 8),
UInt64 => (0x13, sizeof(UInt64) * 8),
UInt32 => (0x14, sizeof(UInt32) * 8),
UInt16 => (0x15, sizeof(UInt16) * 8),
UInt64 => (0x0A, sizeof(UInt64) * 8),
UInt32 => (0x0B, sizeof(UInt32) * 8),
UInt16 => (0x0C, sizeof(UInt16) * 8),
Int64 => (0x16, sizeof(Int64) * 8),
Int32 => (0x17, sizeof(Int32) * 8),
Int16 => (0x18, sizeof(Int16) * 8),
Expand All @@ -13,8 +13,8 @@ const RNTUPLE_WRITE_TYPE_IDX_DICT = Dict(

const RNTUPLE_WRITE_TYPE_CPPNAME_DICT = Dict(
Float16 => "std::float16_t",
Float32 => "std::float32_t",
Float64 => "std::float64_t",
Float32 => "float",
Float64 => "double",
Int8 => "std::int8_t",
Int16 => "std::int16_t",
Int32 => "std::int32_t",
Expand Down
70 changes: 52 additions & 18 deletions src/RNTuple/Writing/page_writing.jl
Original file line number Diff line number Diff line change
@@ -1,48 +1,82 @@
"""
rnt_ary_to_page(ary::AbstractVector) end
rnt_ary_to_page(ary::AbstractVector, cr::ColumnRecord) end
Turns an AbstractVector into a page of an RNTuple. The element type must be primitive for this to work.
"""
function rnt_ary_to_page(ary::AbstractVector) end
function rnt_ary_to_page(ary::AbstractVector, cr::ColumnRecord) end

function rnt_ary_to_page(ary::AbstractVector{Float64})
Page_write(split8_encode(reinterpret(UInt8, ary)))
function rnt_ary_to_page(ary::AbstractVector{Float64}, cr::ColumnRecord)
(;split, zigzag, delta) = _detect_encoding(cr.type)
if split
Page_write(split8_encode(reinterpret(UInt8, ary)))
else
Page_write(reinterpret(UInt8, ary))
end
end

function rnt_ary_to_page(ary::AbstractVector{Float32})
Page_write(split4_encode(reinterpret(UInt8, ary)))
function rnt_ary_to_page(ary::AbstractVector{Float32}, cr::ColumnRecord)
(;split, zigzag, delta) = _detect_encoding(cr.type)
if split
Page_write(split4_encode(reinterpret(UInt8, ary)))
else
Page_write(reinterpret(UInt8, ary))
end
end

function rnt_ary_to_page(ary::AbstractVector{Float16})
Page_write(split2_encode(reinterpret(UInt8, ary)))
function rnt_ary_to_page(ary::AbstractVector{Float16}, cr::ColumnRecord)
(;split, zigzag, delta) = _detect_encoding(cr.type)
if split
Page_write(split2_encode(reinterpret(UInt8, ary)))
else
Page_write(reinterpret(UInt8, ary))
end
end

function rnt_ary_to_page(ary::AbstractVector{UInt64})
Page_write(split8_encode(reinterpret(UInt8, ary)))
function rnt_ary_to_page(ary::AbstractVector{UInt64}, cr::ColumnRecord)
(;split, zigzag, delta) = _detect_encoding(cr.type)
if split
Page_write(split8_encode(reinterpret(UInt8, ary)))
else
Page_write(reinterpret(UInt8, ary))
end
end

function rnt_ary_to_page(ary::AbstractVector{UInt32})
Page_write(split4_encode(reinterpret(UInt8, ary)))
function rnt_ary_to_page(ary::AbstractVector{UInt32}, cr::ColumnRecord)
(;split, zigzag, delta) = _detect_encoding(cr.type)
if split
Page_write(split4_encode(reinterpret(UInt8, ary)))
else
Page_write(reinterpret(UInt8, ary))
end
end

function rnt_ary_to_page(ary::AbstractVector{UInt16})
Page_write(split2_encode(reinterpret(UInt8, ary)))
function rnt_ary_to_page(ary::AbstractVector{UInt16}, cr::ColumnRecord)
(;split, zigzag, delta) = _detect_encoding(cr.type)
if split
Page_write(split2_encode(reinterpret(UInt8, ary)))
else
Page_write(reinterpret(UInt8, ary))
end
end

function rnt_ary_to_page(ary::AbstractVector{Int64})
function rnt_ary_to_page(ary::AbstractVector{Int64}, cr::ColumnRecord)
(;split, zigzag, delta) = _detect_encoding(cr.type)
Page_write(reinterpret(UInt8, ary))
end

function rnt_ary_to_page(ary::AbstractVector{Int32})
function rnt_ary_to_page(ary::AbstractVector{Int32}, cr::ColumnRecord)
(;split, zigzag, delta) = _detect_encoding(cr.type)
Page_write(reinterpret(UInt8, ary))
end

function rnt_ary_to_page(ary::AbstractVector{Int16})
function rnt_ary_to_page(ary::AbstractVector{Int16}, cr::ColumnRecord)
(;split, zigzag, delta) = _detect_encoding(cr.type)
Page_write(reinterpret(UInt8, ary))
end

function rnt_ary_to_page(ary::AbstractVector{Int8})
function rnt_ary_to_page(ary::AbstractVector{Int8}, cr::ColumnRecord)
(;split, zigzag, delta) = _detect_encoding(cr.type)
Page_write(reinterpret(UInt8, ary))
end

Expand Down
2 changes: 2 additions & 0 deletions src/RNTuple/footer.jl
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,8 @@ for x in (:RNTuplePageTopList, :RNTuplePageOuterList, :RNTuplePageInnerList)
Base.size(r::$x) = size(r.payload)
Base.getindex(r::$x, i) = r.payload[i]
Base.setindex!(r::$x, v, i) = (r.payload[i] = v)
Base.push!(r::$x, v) = push!(r.payload, v)
Base.append!(r::$x, v) = append!(r.payload, v)

end
end
Expand Down
2 changes: 1 addition & 1 deletion src/RNTuple/header.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
struct FieldRecord
Base.@kwdef struct FieldRecord
field_version::UInt32
type_version::UInt32
parent_field_id::UInt32
Expand Down
Loading

0 comments on commit 0110b5f

Please sign in to comment.