Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[RNTuple] write multiple fields #349

Merged
merged 22 commits into from
Oct 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,5 @@ test/samples/RNTuple/* linguist-detectable=false

# JOSS paper
paper/** linguist-detectable=false
# GitHub syntax highlighting
pixi.lock linguist-language=YAML linguist-generated=true
3 changes: 1 addition & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,7 @@ jobs:
fail-fast: false
matrix:
version:
- '1.7'
- '1.8'
- 'lts'
- '1' # Leave this line unchanged. '1' will automatically expand to the latest stable 1.x release of Julia.
- 'pre'
os: [ubuntu-latest]
Expand Down
36 changes: 36 additions & 0 deletions .github/workflows/cvmfs.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
name: Test C++ ROOT read back
on:
pull_request:
branches:
- main
push:
branches:
- main
tags: '*'
jobs:
test:
name: C++ ROOT read back rntuple files ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }}
runs-on: ${{ matrix.os }}
continue-on-error: ${{ matrix.allow_failure }}
strategy:
fail-fast: true
matrix:
os: [ubuntu-22.04]
arch: [x64]
allow_failure: [false]
steps:
- uses: actions/checkout@v2
- uses: julia-actions/setup-julia@v1
with:
version: '1'
arch: ${{ matrix.arch }}
- uses: julia-actions/cache@v1
- uses: julia-actions/julia-buildpkg@v1
- name: Generate root file
run: |
julia --project ./test/RNTupleWriting/output_sample.jl test1.root
- uses: cvmfs-contrib/github-action-cvmfs@v4
- name: Read root file in C++
run: |
source /cvmfs/sft.cern.ch/lcg/views/dev3/latest/x86_64-ubuntu2204-gcc11-opt/setup.sh
python ./test/RNTupleWriting/test1.py test1.root
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,6 @@
*__pycache__*
/.benchmarkci
/benchmark/*.json
# pixi environments
.pixi
*.egg-info
6 changes: 3 additions & 3 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ CodecXz = "^0.7"
CodecZstd = "^0.8"
DataFrames = "^1.5"
FHist = "^0.10, ^0.11"
HTTP = "^1"
HTTP = "^1.10"
InteractiveUtils = "^1.0"
IterTools = "^1"
LRUCache = "^1.3.0"
Expand All @@ -59,14 +59,14 @@ PrettyTables = "^2.1"
Random = "^1.0"
SHA = "^0.7, ^1.0"
SentinelArrays = "^1.3"
StaticArrays = "^1"
StaticArrays = "^1.5"
StructArrays = "0.6"
TOML = "^1.0"
Tables = "^1.9"
Test = "^1.0"
XRootD = "^0.1"
XXHashNative = "^1.0.1"
julia = "^1.7"
julia = "^1.10"

[extras]
Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
Expand Down
8 changes: 5 additions & 3 deletions src/RNTuple/Writing/Stubs.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
module Stubs
using ..UnROOT

const WRITE_TIME = 0x7670F8CD
const WRITE_TIME = 0x768A676E
const WRITE_TIME_ary = reverse(reinterpret(UInt8, [WRITE_TIME]))

const file_preamble = [
0x72, 0x6F, 0x6F, 0x74, 0x00, 0x00, 0xF7, 0x45,
Expand All @@ -23,7 +24,8 @@ const dummy_padding2 = [
]


const RBlob1 = UnROOT.RBlob(0x00DC, 0x0004, 0x000000BA, WRITE_TIME, 0x0022, 0x0001, 244, 100, "RBlob", "", "")
const RBlob1 = UnROOT.RBlob(; fNbytes = 0x00DC, fVersion = 0x0004, fObjLen = 0x000000BA, fDatime = WRITE_TIME, fKeyLen = 0x0022,
fCycle = 0x0001, fSeekKey = 244, fSeekPdir = 100, fClassName = "RBlob", fName = "", fTitle = "")
const rnt_header = UnROOT.RNTupleHeader(zero(UInt64), "myntuple", "", "ROOT v6.33.01", [
UnROOT.FieldRecord(zero(UInt32), zero(UInt32), zero(UInt32), zero(UInt16), zero(UInt16), 0, -1, -1, "one_uint", "std::uint32_t", "", ""),
], [UnROOT.ColumnRecord(0x14, 0x20, zero(UInt32), 0x00, 0x00, 0),], UnROOT.AliasRecord[], UnROOT.ExtraTypeInfo[])
Expand Down Expand Up @@ -90,7 +92,7 @@ const tsreamerinfo_compressed = [
]

const tfile_end = [
0x00, 0x00, 0x00, 0x3F, 0x00, 0x04, 0x00, 0x00, 0x00, 0x0A, 0x76, 0x70, 0xF8, 0xCD, 0x00, 0x35,
0x00, 0x00, 0x00, 0x3F, 0x00, 0x04, 0x00, 0x00, 0x00, 0x0A, WRITE_TIME_ary..., 0x00, 0x35,
0x00, 0x01, 0x00, 0x00, 0x05, 0xF9, 0x00, 0x00, 0x00, 0x64, 0x00, 0x18, 0x74, 0x65, 0x73, 0x74,
0x5F, 0x6E, 0x74, 0x75, 0x70, 0x6C, 0x65, 0x5F, 0x6D, 0x69, 0x6E, 0x69, 0x6D, 0x61, 0x6C, 0x2E,
0x72, 0x6F, 0x6F, 0x74, 0x00, 0x00, 0x01, 0x00, 0x00, 0x06, 0x38, 0x77, 0x35, 0x94, 0x00,
Expand Down
82 changes: 56 additions & 26 deletions src/RNTuple/Writing/TFileWriter.jl
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@
rnt_write(io, x.fSeekKeys; legacy=true)
end

struct RBlob
Base.@kwdef struct RBlob
fNbytes::Int32
fVersion::Int16
fObjLen::Int32
Expand Down Expand Up @@ -237,7 +237,7 @@
envelope_size = temp_io.size + sizeof(Int64) + sizeof(UInt64)
id_type = 0x0001

id_length = (UInt64(envelope_size & 0xff) << 16) | id_type
id_length = (UInt64(envelope_size) << 16) | id_type

payload_ary = take!(temp_io)
prepend!(payload_ary, reinterpret(UInt8, [id_length]))
Expand All @@ -260,7 +260,7 @@
envelope_size = temp_io.size + sizeof(Int64) + sizeof(UInt64)
id_type = 0x0001

id_length = (UInt64(envelope_size & 0xff) << 16) | id_type
id_length = (UInt64(envelope_size) << 16) | id_type

payload_ary = take!(temp_io)

Expand Down Expand Up @@ -473,19 +473,45 @@
WriteObservable(io, pos, len, x)
end

function add_field_column_record!(field_records, column_records, input_T::Type{<:Real}, NAME; parent_field_id)
fr = UnROOT.FieldRecord(zero(UInt32), zero(UInt32), parent_field_id, zero(UInt16), zero(UInt16), 0, -1, -1, string(NAME), RNTUPLE_WRITE_TYPE_CPPNAME_DICT[input_T], "", "")
cr = UnROOT.ColumnRecord(RNTUPLE_WRITE_TYPE_IDX_DICT[input_T]..., parent_field_id, 0x00, 0x00, 0)
push!(field_records, fr)
push!(column_records, cr)
nothing

Check warning on line 481 in src/RNTuple/Writing/TFileWriter.jl

View check run for this annotation

Codecov / codecov/patch

src/RNTuple/Writing/TFileWriter.jl#L481

Added line #L481 was not covered by tests
end

function schema_to_field_column_records(table)
input_schema = schema(table)
input_Ts = input_schema.types
input_names = input_schema.names
field_records = UnROOT.FieldRecord[]
column_records = UnROOT.ColumnRecord[]

for (input_T, input_name) in zip(input_Ts, input_names)
add_field_column_record!(field_records, column_records, input_T, input_name, parent_field_id=length(field_records))
end
return field_records, column_records
end

function generate_page_links(column_records, pages_obses, Nitems)
outer_list = RNTuplePageOuterList{RNTuplePageInnerList{PageDescription}}([])
for (cr, page_obs) in zip(column_records, pages_obses)
inner_list = RNTuplePageInnerList([
PageDescription(Nitems, Locator(div(cr.nbits * Nitems, 8, RoundUp), page_obs.position))
])
push!(outer_list, inner_list)
end
return RNTuplePageTopList([outer_list])
end

function write_rntuple(file::IO, table; file_name="test_ntuple_minimal.root", rntuple_name="myntuple")
if !istable(table)
error("RNTuple writing accepts object compatible with Tables.jl interface, got type $(typeof(table))")
end

input_schema = schema(table)
input_Ncols = length(input_schema.names)
if input_Ncols != 1
error("Currently, RNTuple writing only supports a single, UInt32 column, got $input_Ncols columns")
end
input_T = only(input_schema.types)
input_col = only(columntable(table))
input_length = length(input_col)
input_cols = columntable(table)
input_length = length(input_cols[begin])
if input_length > 65535
error("Input too long: RNTuple writing currently only supports a single page (65535 elements)")
end
Expand All @@ -505,30 +531,33 @@
tdirectory32_obs = rnt_write_observe(file, Stubs.tdirectory32)
dummy_padding2_obs = rnt_write_observe(file, Stubs.dummy_padding2)

RBlob1_obs = rnt_write_observe(file, Stubs.RBlob1)
rntAnchor_update[:fSeekHeader] = UInt32(position(file))
rnt_header = UnROOT.RNTupleHeader(zero(UInt64), rntuple_name, "", "ROOT v6.33.01", [
UnROOT.FieldRecord(zero(UInt32), zero(UInt32), zero(UInt32), zero(UInt16), zero(UInt16), 0, -1, -1, string(only(input_schema.names)), RNTUPLE_WRITE_TYPE_CPPNAME_DICT[input_T], "", ""),
], [UnROOT.ColumnRecord(RNTUPLE_WRITE_TYPE_IDX_DICT[input_T]..., zero(UInt32), 0x00, 0x00, 0),], UnROOT.AliasRecord[], UnROOT.ExtraTypeInfo[])
RBlob1 = UnROOT.RBlob(; fNbytes = 0x00DC, fVersion = 0x0004, fObjLen = 0x000000BA, fDatime = Stubs.WRITE_TIME, fKeyLen = 34,
fCycle = 0x0001, fSeekKey = position(file), fSeekPdir = 100, fClassName = "RBlob", fName = "", fTitle = "")
RBlob1_update = Dict{Symbol, Any}()
RBlob1_obs = rnt_write_observe(file, RBlob1)
field_records, col_records = schema_to_field_column_records(table)
rnt_header = UnROOT.RNTupleHeader(
tamasgal marked this conversation as resolved.
Show resolved Hide resolved
zero(UInt64), rntuple_name, "", "ROOT v6.33.01",
field_records, col_records,
UnROOT.AliasRecord[], UnROOT.ExtraTypeInfo[]
)

rntAnchor_update[:fSeekHeader] = UInt32(position(file))
rnt_header_obs = rnt_write_observe(file, rnt_header)
rntAnchor_update[:fNBytesHeader] = rnt_header_obs.len
rntAnchor_update[:fLenHeader] = rnt_header_obs.len
RBlob1_update[:fObjLen] = rnt_header_obs.len
RBlob1_update[:fNbytes] = rnt_header_obs.len + 34

Base.setindex!(RBlob1_obs, RBlob1_update)

RBlob2_obs = rnt_write_observe(file, Stubs.RBlob2)
page1 = rnt_ary_to_page(input_col)
page1_obs = rnt_write_observe(file, page1)
pages = [rnt_ary_to_page(col, cr) for (col, cr) in zip(input_cols, col_records)]
pages_obses = [rnt_write_observe(file, page) for page in pages]

RBlob3_obs = rnt_write_observe(file, Stubs.RBlob3)
cluster_summary = Write_RNTupleListFrame([ClusterSummary(0, input_length)])
nested_page_locations =
UnROOT.RNTuplePageTopList([
UnROOT.RNTuplePageOuterList([
UnROOT.RNTuplePageInnerList([
PageDescription(input_length, UnROOT.Locator(sizeof(input_T) * input_length, page1_obs.position, )),
]),
]),
])
nested_page_locations = generate_page_links(col_records, pages_obses, input_length)

pagelink = UnROOT.PageLink(_checksum(rnt_header_obs.object), cluster_summary.payload, nested_page_locations)
pagelink_obs = rnt_write_observe(file, pagelink)
Expand Down Expand Up @@ -565,6 +594,7 @@
tfile_end_obs = rnt_write_observe(file, Stubs.tfile_end)
fileheader_obs[:fEND] = UInt32(position(file))

flush!(RBlob1_obs)
flush!(tkey32_anchor_obs1)
flush!(tkey32_anchor_obs2)
flush!(tkey32_tfile_obs)
Expand Down
10 changes: 5 additions & 5 deletions src/RNTuple/Writing/constants.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@ const RNTUPLE_WRITE_TYPE_IDX_DICT = Dict(
Float64 => (0x10, sizeof(UInt64) * 8),
Float32 => (0x11, sizeof(UInt32) * 8),
Float16 => (0x12, sizeof(UInt16) * 8),
UInt64 => (0x13, sizeof(UInt64) * 8),
UInt32 => (0x14, sizeof(UInt32) * 8),
UInt16 => (0x15, sizeof(UInt16) * 8),
UInt64 => (0x0A, sizeof(UInt64) * 8),
UInt32 => (0x0B, sizeof(UInt32) * 8),
UInt16 => (0x0C, sizeof(UInt16) * 8),
Int64 => (0x16, sizeof(Int64) * 8),
Int32 => (0x17, sizeof(Int32) * 8),
Int16 => (0x18, sizeof(Int16) * 8),
Expand All @@ -13,8 +13,8 @@ const RNTUPLE_WRITE_TYPE_IDX_DICT = Dict(

const RNTUPLE_WRITE_TYPE_CPPNAME_DICT = Dict(
Float16 => "std::float16_t",
Float32 => "std::float32_t",
Float64 => "std::float64_t",
Float32 => "float",
Float64 => "double",
Int8 => "std::int8_t",
Int16 => "std::int16_t",
Int32 => "std::int32_t",
Expand Down
70 changes: 52 additions & 18 deletions src/RNTuple/Writing/page_writing.jl
Original file line number Diff line number Diff line change
@@ -1,48 +1,82 @@
"""
rnt_ary_to_page(ary::AbstractVector) end
rnt_ary_to_page(ary::AbstractVector, cr::ColumnRecord) end

Turns an AbstractVector into a page of an RNTuple. The element type must be primitive for this to work.

"""
function rnt_ary_to_page(ary::AbstractVector) end
function rnt_ary_to_page(ary::AbstractVector, cr::ColumnRecord) end

Check warning on line 7 in src/RNTuple/Writing/page_writing.jl

View check run for this annotation

Codecov / codecov/patch

src/RNTuple/Writing/page_writing.jl#L7

Added line #L7 was not covered by tests

function rnt_ary_to_page(ary::AbstractVector{Float64})
Page_write(split8_encode(reinterpret(UInt8, ary)))
function rnt_ary_to_page(ary::AbstractVector{Float64}, cr::ColumnRecord)
(;split, zigzag, delta) = _detect_encoding(cr.type)
if split
Page_write(split8_encode(reinterpret(UInt8, ary)))
else
Page_write(reinterpret(UInt8, ary))

Check warning on line 14 in src/RNTuple/Writing/page_writing.jl

View check run for this annotation

Codecov / codecov/patch

src/RNTuple/Writing/page_writing.jl#L14

Added line #L14 was not covered by tests
end
end

function rnt_ary_to_page(ary::AbstractVector{Float32})
Page_write(split4_encode(reinterpret(UInt8, ary)))
function rnt_ary_to_page(ary::AbstractVector{Float32}, cr::ColumnRecord)
(;split, zigzag, delta) = _detect_encoding(cr.type)
if split
Page_write(split4_encode(reinterpret(UInt8, ary)))
else
Page_write(reinterpret(UInt8, ary))

Check warning on line 23 in src/RNTuple/Writing/page_writing.jl

View check run for this annotation

Codecov / codecov/patch

src/RNTuple/Writing/page_writing.jl#L23

Added line #L23 was not covered by tests
end
end

function rnt_ary_to_page(ary::AbstractVector{Float16})
Page_write(split2_encode(reinterpret(UInt8, ary)))
function rnt_ary_to_page(ary::AbstractVector{Float16}, cr::ColumnRecord)
(;split, zigzag, delta) = _detect_encoding(cr.type)
if split
Page_write(split2_encode(reinterpret(UInt8, ary)))
else
Page_write(reinterpret(UInt8, ary))

Check warning on line 32 in src/RNTuple/Writing/page_writing.jl

View check run for this annotation

Codecov / codecov/patch

src/RNTuple/Writing/page_writing.jl#L32

Added line #L32 was not covered by tests
end
end

function rnt_ary_to_page(ary::AbstractVector{UInt64})
Page_write(split8_encode(reinterpret(UInt8, ary)))
function rnt_ary_to_page(ary::AbstractVector{UInt64}, cr::ColumnRecord)
(;split, zigzag, delta) = _detect_encoding(cr.type)
if split
Page_write(split8_encode(reinterpret(UInt8, ary)))

Check warning on line 39 in src/RNTuple/Writing/page_writing.jl

View check run for this annotation

Codecov / codecov/patch

src/RNTuple/Writing/page_writing.jl#L39

Added line #L39 was not covered by tests
else
Page_write(reinterpret(UInt8, ary))
end
end

function rnt_ary_to_page(ary::AbstractVector{UInt32})
Page_write(split4_encode(reinterpret(UInt8, ary)))
function rnt_ary_to_page(ary::AbstractVector{UInt32}, cr::ColumnRecord)
(;split, zigzag, delta) = _detect_encoding(cr.type)
if split
Page_write(split4_encode(reinterpret(UInt8, ary)))

Check warning on line 48 in src/RNTuple/Writing/page_writing.jl

View check run for this annotation

Codecov / codecov/patch

src/RNTuple/Writing/page_writing.jl#L48

Added line #L48 was not covered by tests
else
Page_write(reinterpret(UInt8, ary))
end
end

function rnt_ary_to_page(ary::AbstractVector{UInt16})
Page_write(split2_encode(reinterpret(UInt8, ary)))
function rnt_ary_to_page(ary::AbstractVector{UInt16}, cr::ColumnRecord)
(;split, zigzag, delta) = _detect_encoding(cr.type)
if split
Page_write(split2_encode(reinterpret(UInt8, ary)))

Check warning on line 57 in src/RNTuple/Writing/page_writing.jl

View check run for this annotation

Codecov / codecov/patch

src/RNTuple/Writing/page_writing.jl#L57

Added line #L57 was not covered by tests
else
Page_write(reinterpret(UInt8, ary))
end
end

function rnt_ary_to_page(ary::AbstractVector{Int64})
function rnt_ary_to_page(ary::AbstractVector{Int64}, cr::ColumnRecord)
(;split, zigzag, delta) = _detect_encoding(cr.type)
Page_write(reinterpret(UInt8, ary))
end

function rnt_ary_to_page(ary::AbstractVector{Int32})
function rnt_ary_to_page(ary::AbstractVector{Int32}, cr::ColumnRecord)
(;split, zigzag, delta) = _detect_encoding(cr.type)
Page_write(reinterpret(UInt8, ary))
end

function rnt_ary_to_page(ary::AbstractVector{Int16})
function rnt_ary_to_page(ary::AbstractVector{Int16}, cr::ColumnRecord)
(;split, zigzag, delta) = _detect_encoding(cr.type)
Page_write(reinterpret(UInt8, ary))
end

function rnt_ary_to_page(ary::AbstractVector{Int8})
function rnt_ary_to_page(ary::AbstractVector{Int8}, cr::ColumnRecord)
(;split, zigzag, delta) = _detect_encoding(cr.type)
Page_write(reinterpret(UInt8, ary))
Moelf marked this conversation as resolved.
Show resolved Hide resolved
end

Expand Down
2 changes: 2 additions & 0 deletions src/RNTuple/footer.jl
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,8 @@
Base.size(r::$x) = size(r.payload)
Base.getindex(r::$x, i) = r.payload[i]
Base.setindex!(r::$x, v, i) = (r.payload[i] = v)
Base.push!(r::$x, v) = push!(r.payload, v)
Base.append!(r::$x, v) = append!(r.payload, v)

Check warning on line 124 in src/RNTuple/footer.jl

View check run for this annotation

Codecov / codecov/patch

src/RNTuple/footer.jl#L124

Added line #L124 was not covered by tests

end
end
Expand Down
2 changes: 1 addition & 1 deletion src/RNTuple/header.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
struct FieldRecord
Base.@kwdef struct FieldRecord
field_version::UInt32
type_version::UInt32
parent_field_id::UInt32
Expand Down
Loading
Loading