From ffa8337995fc798f6d55f651e15cd357393e9151 Mon Sep 17 00:00:00 2001 From: Florian Henkes Date: Wed, 20 Nov 2024 01:38:41 +0100 Subject: [PATCH 01/19] Move is_analysis_run to specific category --- src/dataprod_config.jl | 34 +++++++++++++++++++--------------- src/exposure.jl | 16 +++++----------- 2 files changed, 24 insertions(+), 26 deletions(-) diff --git a/src/dataprod_config.jl b/src/dataprod_config.jl index 7cfdf31d..fc122591 100644 --- a/src/dataprod_config.jl +++ b/src/dataprod_config.jl @@ -245,24 +245,31 @@ function runinfo(data::LegendData) # load runinfo rinfo = PropDict(data.metadata.dataprod.runinfo) parts_default = merge(values(pydataprod_config(data).partitions.default)...) - nttype = @NamedTuple{startkey::MaybeFileKey, livetime::typeof(1.0u"s")} + nttype = @NamedTuple{startkey::MaybeFileKey, livetime::typeof(1.0u"s"), is_analysis_run::Bool} function make_row(p, r, ri) period::DataPeriod = DataPeriod(p) run::DataRun = DataRun(r) function get_cat_entry(cat) if haskey(ri, cat) fk = ifelse(haskey(ri[cat], :start_key), FileKey(data.name, period, run, cat, Timestamp(get(ri[cat], :start_key, 1))), missing) - nttype((fk, get(ri[cat], :livetime_in_s, NaN)*u"s")) + is_ana_run = if cat == :phy + (; period, run) in analysis_runs(data) && !ismissing(fk) + elseif cat == :cal + "$run" in get(parts_default, period, []) + else + false + end + nttype((fk, get(ri[cat], :livetime_in_s, NaN)*u"s", Bool(is_ana_run))) else - nttype((missing, NaN*u"s")) + nttype((missing, NaN*u"s", Bool(false))) end end - is_ana_phy_run = (; period, run) in analysis_runs(data) && !ismissing(get_cat_entry(:phy).startkey) - is_ana_cal_run = "$run" in get(parts_default, period, []) - @NamedTuple{period::DataPeriod, run::DataRun, is_analysis_cal_run::Bool, is_analysis_phy_run::Bool, cal::nttype, phy::nttype, fft::nttype}((period, run, Bool(is_ana_cal_run), Bool(is_ana_phy_run), get_cat_entry(:cal), get_cat_entry(:phy), get_cat_entry(:fft))) + # is_ana_phy_run = (; period, run) in analysis_runs(data) && !ismissing(get_cat_entry(:phy).startkey) + # is_ana_cal_run = "$run" in get(parts_default, period, []) + @NamedTuple{period::DataPeriod, run::DataRun, cal::nttype, phy::nttype, fft::nttype}((period, run, get_cat_entry(:cal), get_cat_entry(:phy), get_cat_entry(:fft))) end periods_and_runs = [[make_row(p, r, ri) for (r, ri) in rs] for (p, rs) in rinfo] - flat_pr = sort(StructArray(vcat(periods_and_runs...)::Vector{@NamedTuple{period::DataPeriod, run::DataRun, is_analysis_cal_run::Bool, is_analysis_phy_run::Bool, cal::nttype, phy::nttype, fft::nttype}})) + flat_pr = sort(StructArray(vcat(periods_and_runs...)::Vector{@NamedTuple{period::DataPeriod, run::DataRun, cal::nttype, phy::nttype, fft::nttype}})) Table(merge(columns(flat_pr), (cal = Table(StructArray(flat_pr.cal)), phy = Table(StructArray(flat_pr.phy)), fft = Table(StructArray(flat_pr.fft))))) end end @@ -335,7 +342,7 @@ export is_lrun Return `true` if `run` is an analysis run for `data` in `period`. # ATTENTION: This is only valid for `phy` runs. """ -is_analysis_phy_run(data::LegendData, runsel::RunSelLike) = runinfo(data, runsel).is_analysis_phy_run +is_analysis_phy_run(data::LegendData, runsel::RunSelLike) = runinfo(data, runsel).phy.is_analysis_run """ is_analysis_cal_run(data::LegendData, (period::DataPeriodLike, run::DataRunLike)) @@ -343,7 +350,7 @@ is_analysis_phy_run(data::LegendData, runsel::RunSelLike) = runinfo(data, runsel Return `true` if `run` is an analysis run for `data` in `period`. # ATTENTION: This is only valid for `cal` runs. """ -is_analysis_cal_run(data::LegendData, runsel::RunSelLike) = runinfo(data, runsel).is_analysis_cal_run +is_analysis_cal_run(data::LegendData, runsel::RunSelLike) = runinfo(data, runsel).cal.is_analysis_run """ is_analysis_run(data::LegendData, (period::DataPeriodLike, run::DataRunLike, cat::DataCategoryLike)) @@ -360,13 +367,10 @@ function is_analysis_run(data::LegendData, runsel::RunCategorySelLike) # unpack runsel period, run, category = runsel period, run, category = DataPeriod(period), DataRun(run), DataCategory(category) - if category == DataCategory(:cal) - is_analysis_cal_run(data, (period, run)) - elseif category == DataCategory(:phy) - is_analysis_phy_run(data, (period, run)) - else - throw(ArgumentError("Invalid category $(runs.category) for analysis run")) + if !(hasproperty(runinfo(data), Symbol(category))) + throw(ArgumentError("Invalid category $category for analysis run")) end + runinfo(data, runsel).is_analysis_run end is_analysis_run(data::LegendData, fk::FileKey) = is_analysis_run(data, (fk.period, fk.run, fk.category)) is_analysis_run(data::LegendData, selectors...) = is_analysis_run(data, selectors) diff --git a/src/exposure.jl b/src/exposure.jl index 86e69bc9..c7c9cb59 100644 --- a/src/exposure.jl +++ b/src/exposure.jl @@ -33,7 +33,7 @@ get_exposure(l200, :V00050A, DataPartition(1)) """ function get_exposure(data::LegendData, det::DetectorIdLike, period::DataPeriodLike, run::DataRunLike; is_analysis_run::Bool=true, cat::DataCategoryLike=:phy) rinfo = runinfo(data, period, run) - _get_exposure(data, det, Table([rinfo]), is_analysis_run, cat) + _get_exposure(data, det, rinfo, is_analysis_run, cat) end function get_exposure(data::LegendData, det::DetectorIdLike, period::DataPeriod; is_analysis_run::Bool=true, cat::DataCategoryLike=:phy) @@ -56,7 +56,7 @@ function get_exposure(data::LegendData, det::DetectorIdLike, sel::Union{Abstract selectors = (DataPartition, DataPeriod) for SEL in selectors if _can_convert_to(SEL, sel) - return _get_exposure(data, det, SEL(sel); kwargs...) + return _get_exposure(data, det, SEL(sel); kwargs...) end end throw(ArgumentError("The selector $(sel) cannot be converted to type: $(selectors)")) @@ -70,20 +70,14 @@ function _get_exposure(data::LegendData, det::DetectorIdLike, rinfo::Table, is_a if !(_can_convert_to(DataCategory, cat) && hasproperty(rinfo, DataCategory(cat).label)) throw(ArgumentError("Data category `$(cat)`` is invalid")) end - cat_label::Symbol = DataCategory(cat).label + cat_label::Symbol = Symbol(DataCategory(cat)) # determine livetime rinfo_cat = getproperty(rinfo, cat_label) livetimes = getproperty.(rinfo_cat, :livetime) - # if is_analysis_run == true: - # check that the analysis flag is valid and apply it - analysis_flag = Symbol("is_analysis_$(cat_label)_run") - if is_analysis_run - if !hasproperty(rinfo, analysis_flag) - throw(ArgumentError("No column `$(analysis_flag)` found. Please set `is_analysis_run = false` in `get_exposure`")) - end - livetimes = livetimes .* getproperty(rinfo, analysis_flag) + if is_analysis_run + livetimes = livetimes .* getproperty(rinfo_cat, :is_analysis_run) end # sum up all livetimes (excluding NaN values) livetime = !isempty(livetimes) ? sum((livetimes .* .!isnan.(livetimes))) : 0.0u"s" From 4fb56a62ab702d0d853396bc48d8cc57fa6fed1a Mon Sep 17 00:00:00 2001 From: Florian Henkes Date: Thu, 21 Nov 2024 01:43:28 +0100 Subject: [PATCH 02/19] search_disk can search for DataSets on disk including pre-cache and saving outputs on disk in a save_tier --- Project.toml | 2 ++ src/LegendDataManagement.jl | 1 + src/legend_data.jl | 45 +++++++++++++++++++++++++++++++++++-- 3 files changed, 46 insertions(+), 2 deletions(-) diff --git a/Project.toml b/Project.toml index ab611a68..6849800e 100644 --- a/Project.toml +++ b/Project.toml @@ -14,6 +14,7 @@ LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" MIMEs = "6c6e2e6c-3030-632d-7369-2d6c69616d65" Markdown = "d6f4376e-aef5-505a-96c1-9c027394607a" Measurements = "eff96d63-e80a-5855-80a2-b1b0885c5ab7" +OhMyThreads = "67456a42-1dca-4109-a031-0a68de7e3ad5" Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca" @@ -55,6 +56,7 @@ LinearAlgebra = "<0.0.1, 1" MIMEs = "0.1, 1" Markdown = "<0.0.1, 1" Measurements = "2.2.1" +OhMyThreads = "0.5, 0.6, 0.7" Pkg = "1" Plots = "<0.0.1, 1" Printf = "<0.0.1, 1" diff --git a/src/LegendDataManagement.jl b/src/LegendDataManagement.jl index 9710ae06..928a31e7 100644 --- a/src/LegendDataManagement.jl +++ b/src/LegendDataManagement.jl @@ -24,6 +24,7 @@ using Printf: @printf using IntervalSets: AbstractInterval, ClosedInterval, leftendpoint, rightendpoint using LRUCache: LRU +using OhMyThreads: @tasks, tmapreduce using ProgressMeter: @showprogress using PropertyFunctions: PropertyFunction, @pf, filterby, props2varsyms, PropSelFunction using StaticStrings: StaticString diff --git a/src/legend_data.jl b/src/legend_data.jl index 910a5522..77303f94 100644 --- a/src/legend_data.jl +++ b/src/legend_data.jl @@ -212,8 +212,16 @@ end """ search_disk(::Type{<:DataSelector}, path::AbstractString) + search_disk(::Type{DataSet}, data::LegendData; search_categories::Vector{<:DataCategoryLike} = DataCategory.([:cal, :phy]), search_tier::DataTierLike = DataTier(:raw), only_analysis_runs::Bool=true, save_filekeys::Bool=true, ignore_save_tier::Bool=false, save_tier::DataTierLike=DataTier(:jlfks)) -Search on-disk data for data categories, periods, runs, and filekeys. +Search on-disk data for data categories, periods, runs, and filekeys or whole datasets +If you want to search for a whole `DataSet`, you have the following keyword options: + - `search_categories` (default: `[:cal, :phy]`): The categories to search on disk. + - `search_tier` (default: `DataTier(:raw)`): The tier to search on disk. + - `only_analysis_runs` (default: `true`): Only include for analysis runs as defined in the metadata + - `save_filekeys` (default: `true`): Save the filekeys to a file in the `save_tier` directory. + - `ignore_save_tier` (default: `false`): Ignore the `save_tier` and do not save the filekeys. + - `save_tier` (default: `DataTier(:jlfks)`): The tier to save the filekeys to. Examples: @@ -224,17 +232,50 @@ search_disk(DataCategory, l200.tier[:raw]) search_disk(DataPeriod, l200.tier[:raw, :cal]) search_disk(DataRun, l200.tier[:raw, :cal, "p02"]) search_disk(FileKey, l200.tier[DataTier(:raw), :cal, DataPeriod(2), "r006"]) +search_disk(DataSet, l200) ``` """ function search_disk end export search_disk -function search_disk(::Type{DT}, path::AbstractString) where DT<:DataSelector +function search_disk(::Type{DT}, path::AbstractString; kwargs...) where DT<:DataSelector all_files = readdir(path) valid_files = filter(filename -> _can_convert_to(DT, filename), all_files) return unique(sort(DT.(valid_files))) end +const _cached_dataset = LRU{Tuple{UInt, Vector{<:DataCategoryLike}, DataTierLike, DataTierLike}, DataSet}(maxsize = 10^3) + +function search_disk(::Type{DataSet}, data::LegendData; search_categories::Vector{<:DataCategoryLike} = DataCategory.([:cal, :phy]), search_tier::DataTierLike = DataTier(:raw), only_analysis_runs::Bool=true, save_filekeys::Bool=true, ignore_save_tier::Bool=false, save_tier::DataTierLike=DataTier(:jlfks)) + key = (objectid(data), search_categories, search_tier, save_tier) + if ignore_save_tier + delete!(key, ignore_save_tier) + end + get!(_cached_dataset, key) do + DataSet(let rinfo = runinfo(data) + sort(tmapreduce(vcat, rinfo) do ri + vcat([let keylist_filename = joinpath(data.tier[save_tier, cat, ri.period, ri.run], "filekeys.txt"), search_path = data.tier[search_tier, cat, ri.period, ri.run] + if !ispath(search_path) + Vector{FileKey}() + elseif only_analysis_runs && !is_analysis_run(data, ri.period, ri.run, cat) + Vector{FileKey}() + elseif isfile(keylist_filename) && !ignore_save_tier + read_filekeys(keylist_filename) + else + let fks = search_disk(FileKey, search_path) + if save_filekeys + mkpath(dirname(keylist_filename)) + write_filekeys(keylist_filename, fks) + end + fks + end + end + end for cat in search_categories]...) + end) + end) + end +end + const _cached_channelinfo = LRU{Tuple{UInt, AnyValiditySelection}, StructVector}(maxsize = 10^3) From 26ef4418df2778a718b5f21174ae3ab3ec5b5153 Mon Sep 17 00:00:00 2001 From: Florian Henkes Date: Thu, 21 Nov 2024 01:44:01 +0100 Subject: [PATCH 03/19] write_validity incorporates ReentrantLock on worker 1 to avoid to processes writing a validity at the same time --- src/utils/pars_utils.jl | 50 +++++++++++++++++++++++------------------ 1 file changed, 28 insertions(+), 22 deletions(-) diff --git a/src/utils/pars_utils.jl b/src/utils/pars_utils.jl index d3202564..5f26c048 100644 --- a/src/utils/pars_utils.jl +++ b/src/utils/pars_utils.jl @@ -38,29 +38,35 @@ Write validity for a given filekey. function writevalidity end export writevalidity function writevalidity(props_db::LegendDataManagement.MaybePropsDB, filekey::FileKey, apply::Vector{String}; category::DataCategoryLike=:all) + remotecall_fetch(_writevalidity_impl, 1, props_db, filekey, apply; category=category) +end +const _writevalidity_lock = ReentrantLock() +function _writevalidity_impl(props_db::LegendDataManagement.MaybePropsDB, filekey::FileKey, apply::Vector{String}; category::DataCategoryLike=:all) # write validity - # get timestamp from filekey - pars_validTimeStamp = string(filekey.time) - # get validity filename and check if exists - validity_filename = joinpath(data_path(props_db), "validity.jsonl") - mkpath(dirname(validity_filename)) - touch(validity_filename) - # check if validity already written - validity_lines = readlines(validity_filename) - # check if given validity already exists - is_validity = findall(x -> contains(x, "$pars_validTimeStamp") && contains(x, "$(string(category))"), validity_lines) - if isempty(is_validity) - @info "Write new validity for $pars_validTimeStamp" - push!(validity_lines, "{\"valid_from\":\"$pars_validTimeStamp\", \"category\":\"$(string(category))\", \"apply\":[\"$(join(sort(apply), "\", \""))\"]}") - elseif length(is_validity) == 1 - @info "Merge old $pars_validTimeStamp $(string(category)) validity entry" - apply = unique(append!(Vector{String}(JSON.parse(validity_lines[first(is_validity)])["apply"]), apply)) - validity_lines[first(is_validity)] = "{\"valid_from\":\"$pars_validTimeStamp\", \"category\":\"$(string(category))\", \"apply\":[\"$(join(sort(apply), "\", \""))\"]}" - end - # write validity - open(validity_filename, "w") do io - for line in sort(validity_lines) - println(io, line) + @lock _writevalidity_lock begin + # get timestamp from filekey + pars_validTimeStamp = string(filekey.time) + # get validity filename and check if exists + validity_filename = joinpath(data_path(props_db), "validity.jsonl") + mkpath(dirname(validity_filename)) + touch(validity_filename) + # check if validity already written + validity_lines = readlines(validity_filename) + # check if given validity already exists + is_validity = findall(x -> contains(x, "$pars_validTimeStamp") && contains(x, "$(string(category))"), validity_lines) + if isempty(is_validity) + @info "Write new validity for $pars_validTimeStamp" + push!(validity_lines, "{\"valid_from\":\"$pars_validTimeStamp\", \"category\":\"$(string(category))\", \"apply\":[\"$(join(sort(apply), "\", \""))\"]}") + elseif length(is_validity) == 1 + @info "Merge old $pars_validTimeStamp $(string(category)) validity entry" + apply = unique(append!(Vector{String}(JSON.parse(validity_lines[first(is_validity)])["apply"]), apply)) + validity_lines[first(is_validity)] = "{\"valid_from\":\"$pars_validTimeStamp\", \"category\":\"$(string(category))\", \"apply\":[\"$(join(sort(apply), "\", \""))\"]}" + end + # write validity + open(validity_filename, "w") do io + for line in sort(validity_lines) + println(io, line) + end end end end From f9310450b4fd9688ab016f94e763d5f05ff466f8 Mon Sep 17 00:00:00 2001 From: Florian Henkes Date: Tue, 26 Nov 2024 02:49:00 +0100 Subject: [PATCH 04/19] Allow string parsing --- src/ljl_expressions.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/ljl_expressions.jl b/src/ljl_expressions.jl index 5f0d1764..8011a374 100644 --- a/src/ljl_expressions.jl +++ b/src/ljl_expressions.jl @@ -48,6 +48,7 @@ const ljl_expr_allowed_funcs = Set([ :value, :uncertainty, :stdscore, :weightedmean, :±, :(:), :Symbol, :String, :Int, :Float64, :Bool, + :string, :parse, :DetectorId, :ChannelId ]) From 9768018844871b1b65c08747bc12f81ee06a5a9d Mon Sep 17 00:00:00 2001 From: Florian Henkes Date: Tue, 26 Nov 2024 02:49:59 +0100 Subject: [PATCH 05/19] LegendTimeStamps can be Unitful.Time objects to allow parsing of timestamps --- src/filekey.jl | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/filekey.jl b/src/filekey.jl index 9c086e93..fae85965 100644 --- a/src/filekey.jl +++ b/src/filekey.jl @@ -411,9 +411,12 @@ export Timestamp Dates.DateTime(timestamp::Timestamp) = Dates.unix2datetime(timestamp.unixtime) Timestamp(datetime::Dates.DateTime) = Timestamp(round(Int, Dates.datetime2unix(datetime))) +Timestamp(ts::Unitful.Time{<:Real}) = Timestamp(Dates.unix2datetime(ustrip(u"s", ts))) _can_convert_to(::Type{Timestamp}, s::AbstractString) = _is_timestamp_string(s) || _is_filekey_string(s) _can_convert_to(::Type{Timestamp}, s::Integer) = true +_can_convert_to(::Type{Timestamp}, s::Dates.DateTime) = true +_can_convert_to(::Type{Timestamp}, s::Unitful.Time{<:Real}) = true _can_convert_to(::Type{Timestamp}, s::Timestamp) = true _can_convert_to(::Type{Timestamp}, s) = false @@ -429,6 +432,7 @@ end Base.convert(::Type{Timestamp}, s::AbstractString) = Timestamp(s) Base.convert(::Type{Timestamp}, datetime::DateTime) = Timestamp(datetime) +Base.convert(::Type{Timestamp}, ts::Unitful.Time{<:Real}) = Timestamp(ts) Base.:(==)(a::Timestamp, b::Timestamp) = a.unixtime == b.unixtime @@ -457,7 +461,7 @@ _timestamp_from_string(s::AbstractString) = DateTime(Timestamp(s)) Anything that can represent a timestamp, like `Timestamp("20221226T200846Z")` or "20221226T200846Z". """ -const TimestampLike = Union{Timestamp, AbstractString, Integer} +const TimestampLike = Union{Timestamp, AbstractString, Integer, Unitful.Time{<:Real}} export TimestampLike From 3f7fa441429fd7f3d605d13b0cea981cee11fad7 Mon Sep 17 00:00:00 2001 From: Florian Henkes Date: Tue, 26 Nov 2024 02:50:40 +0100 Subject: [PATCH 06/19] Check if channel keys are valid ChannelId Or DetectorId to not parse ORCAHeader etc. --- ext/LegendDataManagementLegendHDF5IOExt.jl | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/ext/LegendDataManagementLegendHDF5IOExt.jl b/ext/LegendDataManagementLegendHDF5IOExt.jl index 3282478d..0fc2465c 100644 --- a/ext/LegendDataManagementLegendHDF5IOExt.jl +++ b/ext/LegendDataManagementLegendHDF5IOExt.jl @@ -12,6 +12,17 @@ const ChannelOrDetectorIdLike = Union{ChannelIdLike, DetectorIdLike} const AbstractDataSelectorLike = Union{AbstractString, Symbol, DataTierLike, DataCategoryLike, DataPeriodLike, DataRunLike, DataPartitionLike, ChannelOrDetectorIdLike} const PossibleDataSelectors = [DataTier, DataCategory, DataPeriod, DataRun, DataPartition, ChannelId, DetectorId] +function _is_valid_channel(data::LegendData, rsel::Union{AnyValiditySelection, RunCategorySelLike}, det::ChannelOrDetectorIdLike) + if LegendDataManagement._can_convert_to(ChannelId, det) + true + elseif LegendDataManagement._can_convert_to(DetectorId, det) + detector2channel(data, rsel, det) isa ChannelId + else + @warn "Skipped $det since it is neither a valid ChannelId nor DetectorId" + false + end +end + function _get_channelid(data::LegendData, rsel::Union{AnyValiditySelection, RunCategorySelLike}, det::ChannelOrDetectorIdLike) if LegendDataManagement._can_convert_to(ChannelId, det) ChannelId(det) @@ -143,6 +154,7 @@ function LegendDataManagement.read_ldata(f::Base.Callable, data::LegendData, rse ch_keys = _lh5_data_open(data, rsel[1], rsel[2], "") do h keys(h) end + filter!(x -> _is_valid_channel(data, rsel[2], x), ch_keys) @debug "Found keys: $ch_keys" if length(ch_keys) == 1 if string(only(ch_keys)) == string(rsel[1]) From 4cff1b5d5da63e9294c7be90ba47edf168a5a551 Mon Sep 17 00:00:00 2001 From: Florian Henkes Date: Tue, 26 Nov 2024 02:51:26 +0100 Subject: [PATCH 07/19] Find filekey to determine closest FileKey + channelinfo sort and usability functions --- src/legend_data.jl | 33 ++++++++++++++++++++++++++++++--- 1 file changed, 30 insertions(+), 3 deletions(-) diff --git a/src/legend_data.jl b/src/legend_data.jl index 77303f94..bf9f22a9 100644 --- a/src/legend_data.jl +++ b/src/legend_data.jl @@ -276,17 +276,34 @@ function search_disk(::Type{DataSet}, data::LegendData; search_categories::Vecto end end +""" + find_filekey(ds::DataSet, ts::TimestampLike) + find_filekey(data::LegendData, ts::TimestampLike; kwargs...) +Find the filekey in a dataset that is closest to a given timestamp. +The kwargs are passed to `search_disk` to generate the `DataSet`. +""" +function find_filekey end +export find_filekey + +function find_filekey(ds::DataSet, ts::TimestampLike) + last(filter(fk -> fk.time < Timestamp(ts), ds.keys)) +end + +function find_filekey(data::LegendData, ts; kwargs...) + find_filekey(search_disk(DataSet, data; kwargs...), ts) +end + const _cached_channelinfo = LRU{Tuple{UInt, AnyValiditySelection}, StructVector}(maxsize = 10^3) """ - channelinfo(data::LegendData, sel::AnyValiditySelection; system::Symbol = :all, only_processable::Bool = false) - channelinfo(data::LegendData, sel::RunCategorySelLike; system::Symbol = :all, only_processable::Bool = false) + channelinfo(data::LegendData, sel::AnyValiditySelection; system::Symbol = :all, only_processable::Bool = false, only_usability::Symbol = :all, extended::Bool = false) + channelinfo(data::LegendData, sel::RunCategorySelLike; system::Symbol = :all, only_processable::Bool = false, only_usability::Symbol = :all, extended::Bool = false) Get all channel information for the given [`LegendData`](@ref) and [`ValiditySelection`](@ref). """ -function channelinfo(data::LegendData, sel::AnyValiditySelection; system::Symbol = :all, only_processable::Bool = false, extended::Bool = false, verbose::Bool = true) +function channelinfo(data::LegendData, sel::AnyValiditySelection; system::Symbol = :all, only_processable::Bool = false, only_usability::Symbol = :all, sort_by::Symbol=:detector, extended::Bool = false, verbose::Bool = true) key = (objectid(data), sel) chinfo = get!(_cached_channelinfo, key) do chmap = data.metadata(sel).hardware.configuration.channelmaps @@ -375,12 +392,22 @@ function channelinfo(data::LegendData, sel::AnyValiditySelection; system::Symbol StructVector(make_row.(channel_keys)) end + # apply filters and masks if !(system == :all) chinfo = chinfo |> filterby(@pf $system .== system) end if only_processable chinfo = chinfo |> filterby(@pf $processable .== true) end + if !(only_usability == :all) + chinfo = chinfo |> filterby(@pf $usability .== usability) + end + # apply sorting + if sort_by == :detector + chinfo = chinfo |> sortby(@pf string($detector)) + elseif sort_by == :channel + chinfo = chinfo |> sortby(@pf string($channel)) + end return Table(chinfo) end export channelinfo From c74f11fb5c19d2d811cc81a5617ac528fe6b4ba2 Mon Sep 17 00:00:00 2001 From: Florian Henkes Date: Tue, 26 Nov 2024 02:51:50 +0100 Subject: [PATCH 08/19] Event plotting recipes to allow to plot waveforms for a given timestamp --- ext/LegendDataManagementPlotsExt.jl | 74 ++++++++++++++++++++++++++++- 1 file changed, 73 insertions(+), 1 deletion(-) diff --git a/ext/LegendDataManagementPlotsExt.jl b/ext/LegendDataManagementPlotsExt.jl index 0bd0dcaa..8de85242 100644 --- a/ext/LegendDataManagementPlotsExt.jl +++ b/ext/LegendDataManagementPlotsExt.jl @@ -8,8 +8,10 @@ using PropDicts using Statistics using TypedTables using Unitful +using Dates using Format using Measurements: value, uncertainty, weightedmean +using LegendDataManagement @recipe function f( chinfo::Table, @@ -131,7 +133,77 @@ using Measurements: value, uncertainty, weightedmean seriescolor := c markerstrokecolor := c xvalues, value.(yvalues) - end + end +end + +@recipe function f(data::LegendData, fk::FileKey, ts::Unitful.Time{<:Real}, ch::ChannelIdLike; plot_tier=DataTier(:raw), plot_waveform=:waveform_presummed) + framestyle := :box + margins := (1, :mm) + yformatter := :plain + raw = read_ldata(data, plot_tier, fk, ch) + idx = findfirst(isequal(ts), raw.timestamp) + if isnothing(idx) + throw(ArgumentError("Timestamp $ts not found in the data")) + end + plot_title := "$(channelinfo(data, fk, ch).detector) ($(ch)) - Evt $(ts)" + @series begin + label := "$(Dates.unix2datetime(ustrip.(u"s", raw.timestamp[idx])))" + xunit := u"µs" + getproperty(raw, plot_waveform)[idx] + end +end + +@recipe function f(data::LegendData, ts::Unitful.Time{<:Real}, ch::ChannelIdLike; plot_tier=DataTier(:raw), plot_waveform=:waveform_presummed) + fk = find_filekey(data, ts) + @series begin + plot_tier := plot_tier + plot_waveform := plot_waveform + data, fk, ts, ch + end +end + +@recipe function f(data::LegendData, ts::Unitful.Time{<:Real}; plot_tier=DataTier(:raw), plot_waveform=:waveform_presummed, system=[:geds], only_processable=true) + fk = find_filekey(data, ts) + framestyle := :box + margins := (1, :mm) + yformatter := :plain + if fk.category == DataCategory(:cal) + @debug "Got $(fk.category) event, looking for raw event" + timestamps = read_ldata(:timestamp, data, DataTier(:raw), fk) + ch_ts = "" + for ch in keys(timestamps) + if any(ts .== timestamps[ch].timestamp) + ch_ts = string(ch) + @debug "Found event $ts in channel $ch" + break + end + end + if isempty(ch_ts) + throw(ArgumentError("Timestamp $ts not found in the data")) + end + ch = ChannelId(ch_ts) + @series begin + plot_tier := plot_tier + plot_waveform := plot_waveform + data, fk, ts, ch + end + elseif fk.category == DataCategory(:phy) + raw = read_ldata(data, plot_tier, fk) + sys = system[1] + chinfo = channelinfo(data, fk; system=sys, only_processable=only_processable) + idx = findfirst(isequal(ts), raw[first(chinfo.channel)].timestamp) + if isnothing(idx) + throw(ArgumentError("Timestamp $ts not found in the data")) + end + for chinfo_ch in chinfo + plot_title := "$(channelinfo(data, fk, ch).detector) ($(ch))" + @series begin + label := "$(chinfo_ch.detector) ($(chinfo_ch.channel))" + xunit := u"µs" + getproperty(raw, plot_waveform)[idx] + end + end + end end From f6889dce3432ca37d74961363d18faa395e1583c Mon Sep 17 00:00:00 2001 From: Florian Henkes Date: Tue, 26 Nov 2024 10:21:14 -0800 Subject: [PATCH 09/19] Allow sortby string and all keys of table --- src/LegendDataManagement.jl | 2 +- src/legend_data.jl | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/LegendDataManagement.jl b/src/LegendDataManagement.jl index 928a31e7..062acac9 100644 --- a/src/LegendDataManagement.jl +++ b/src/LegendDataManagement.jl @@ -26,7 +26,7 @@ using IntervalSets: AbstractInterval, ClosedInterval, leftendpoint, rightendpoin using LRUCache: LRU using OhMyThreads: @tasks, tmapreduce using ProgressMeter: @showprogress -using PropertyFunctions: PropertyFunction, @pf, filterby, props2varsyms, PropSelFunction +using PropertyFunctions: PropertyFunction, @pf, filterby, sortby, props2varsyms, PropSelFunction using StaticStrings: StaticString import Tables using Tables: columns diff --git a/src/legend_data.jl b/src/legend_data.jl index bf9f22a9..fdade584 100644 --- a/src/legend_data.jl +++ b/src/legend_data.jl @@ -400,13 +400,13 @@ function channelinfo(data::LegendData, sel::AnyValiditySelection; system::Symbol chinfo = chinfo |> filterby(@pf $processable .== true) end if !(only_usability == :all) - chinfo = chinfo |> filterby(@pf $usability .== usability) + chinfo = chinfo |> filterby(@pf $usability .== only_usability) end # apply sorting - if sort_by == :detector - chinfo = chinfo |> sortby(@pf string($detector)) - elseif sort_by == :channel - chinfo = chinfo |> sortby(@pf string($channel)) + if sort_by == :string + chinfo = chinfo |> sortby(@pf $detstring * maximum(chinfo.position) + $position) + elseif hasproperty(chinfo, sort_by) + chinfo = chinfo |> sortby(ljl_propfunc("string($sort_by)")) end return Table(chinfo) end From 0a78a9da21ef0b9b9956c903bfc4a214bdb0fa41 Mon Sep 17 00:00:00 2001 From: Florian Henkes Date: Tue, 3 Dec 2024 12:01:43 +0100 Subject: [PATCH 10/19] Valid key check also allows DataTiers --- ext/LegendDataManagementLegendHDF5IOExt.jl | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/ext/LegendDataManagementLegendHDF5IOExt.jl b/ext/LegendDataManagementLegendHDF5IOExt.jl index 0fc2465c..017d09ef 100644 --- a/ext/LegendDataManagementLegendHDF5IOExt.jl +++ b/ext/LegendDataManagementLegendHDF5IOExt.jl @@ -12,15 +12,13 @@ const ChannelOrDetectorIdLike = Union{ChannelIdLike, DetectorIdLike} const AbstractDataSelectorLike = Union{AbstractString, Symbol, DataTierLike, DataCategoryLike, DataPeriodLike, DataRunLike, DataPartitionLike, ChannelOrDetectorIdLike} const PossibleDataSelectors = [DataTier, DataCategory, DataPeriod, DataRun, DataPartition, ChannelId, DetectorId] -function _is_valid_channel(data::LegendData, rsel::Union{AnyValiditySelection, RunCategorySelLike}, det::ChannelOrDetectorIdLike) - if LegendDataManagement._can_convert_to(ChannelId, det) - true - elseif LegendDataManagement._can_convert_to(DetectorId, det) - detector2channel(data, rsel, det) isa ChannelId - else - @warn "Skipped $det since it is neither a valid ChannelId nor DetectorId" - false - end +function _is_valid_channel_or_tier(data::LegendData, rsel::Union{AnyValiditySelection, RunCategorySelLike}, det::ChannelOrDetectorIdLike) + if LegendDataManagement._can_convert_to(ChannelId, det) || LegendDataManagement._can_convert_to(DetectorId, det) || LegendDataManagement._can_convert_to(DataTier, det) + true + else + @warn "Skipped $det since it is neither a valid `ChannelId`, `DetectorId` nor a `DataTier`" + false + end end function _get_channelid(data::LegendData, rsel::Union{AnyValiditySelection, RunCategorySelLike}, det::ChannelOrDetectorIdLike) @@ -154,7 +152,7 @@ function LegendDataManagement.read_ldata(f::Base.Callable, data::LegendData, rse ch_keys = _lh5_data_open(data, rsel[1], rsel[2], "") do h keys(h) end - filter!(x -> _is_valid_channel(data, rsel[2], x), ch_keys) + filter!(x -> _is_valid_channel_or_tier(data, rsel[2], x), ch_keys) @debug "Found keys: $ch_keys" if length(ch_keys) == 1 if string(only(ch_keys)) == string(rsel[1]) From af22ebd695e4a8f64111e4395804800758624717 Mon Sep 17 00:00:00 2001 From: Florian Henkes Date: Tue, 3 Dec 2024 12:02:05 +0100 Subject: [PATCH 11/19] Include phy event plot recipe --- ext/LegendDataManagementPlotsExt.jl | 98 ++++++++++++++++++++++------- 1 file changed, 76 insertions(+), 22 deletions(-) diff --git a/ext/LegendDataManagementPlotsExt.jl b/ext/LegendDataManagementPlotsExt.jl index 8de85242..55d73661 100644 --- a/ext/LegendDataManagementPlotsExt.jl +++ b/ext/LegendDataManagementPlotsExt.jl @@ -136,33 +136,48 @@ using LegendDataManagement end end -@recipe function f(data::LegendData, fk::FileKey, ts::Unitful.Time{<:Real}, ch::ChannelIdLike; plot_tier=DataTier(:raw), plot_waveform=:waveform_presummed) +@recipe function f(data::LegendData, fk::FileKey, ts::Unitful.Time{<:Real}, ch::ChannelIdLike; plot_tier=DataTier(:raw), plot_waveform=[:waveform_presummed], show_unixtime=false) framestyle := :box - margins := (1, :mm) + margins := (0.5, :mm) yformatter := :plain + legend := :bottomright + plot_titlefontsize := 12 raw = read_ldata(data, plot_tier, fk, ch) idx = findfirst(isequal(ts), raw.timestamp) if isnothing(idx) throw(ArgumentError("Timestamp $ts not found in the data")) end - plot_title := "$(channelinfo(data, fk, ch).detector) ($(ch)) - Evt $(ts)" - @series begin - label := "$(Dates.unix2datetime(ustrip.(u"s", raw.timestamp[idx])))" - xunit := u"µs" - getproperty(raw, plot_waveform)[idx] + if show_unixtime + title := "Event $(ts)" + else + title := "Event $(Dates.unix2datetime(ustrip(u"s", ts)))" + end + plot_title := "$(fk.setup)-$(fk.period)-$(fk.run)-$(fk.category)" + for (p, p_wvf) in enumerate(plot_waveform) + @series begin + if p == 1 + label := "$(channelinfo(data, fk, ch).detector) ($(ch))" + else + label := :none + end + color := 1 + xunit := u"µs" + getproperty(raw, p_wvf)[idx] + end end end -@recipe function f(data::LegendData, ts::Unitful.Time{<:Real}, ch::ChannelIdLike; plot_tier=DataTier(:raw), plot_waveform=:waveform_presummed) +@recipe function f(data::LegendData, ts::Unitful.Time{<:Real}, ch::ChannelIdLike; plot_tier=DataTier(:raw), plot_waveform=[:waveform_presummed], show_unixtime=false) fk = find_filekey(data, ts) @series begin plot_tier := plot_tier plot_waveform := plot_waveform + show_unixtime := show_unixtime data, fk, ts, ch end end -@recipe function f(data::LegendData, ts::Unitful.Time{<:Real}; plot_tier=DataTier(:raw), plot_waveform=:waveform_presummed, system=[:geds], only_processable=true) +@recipe function f(data::LegendData, ts::Unitful.Time{<:Real}; plot_tier=DataTier(:raw), system=Dict{Symbol, Vector{Symbol}}([:geds, :spms] .=> [[:waveform_presummed], [:waveform_bit_drop]]), only_processable=true, show_unixtime=false) fk = find_filekey(data, ts) framestyle := :box margins := (1, :mm) @@ -182,27 +197,66 @@ end throw(ArgumentError("Timestamp $ts not found in the data")) end ch = ChannelId(ch_ts) + chinfo_ch = channelinfo(data, fk, ch) + if chinfo_ch.system != :geds + throw(ArgumentError("Only HPGe cal events are supported")) + end + if only_processable && !chinfo_ch.processable + throw(ArgumentError("Channel $ch is not processable")) + end @series begin plot_tier := plot_tier - plot_waveform := plot_waveform + plot_waveform := system[:geds] + show_unixtime := show_unixtime data, fk, ts, ch end elseif fk.category == DataCategory(:phy) + # load raw file with all channels raw = read_ldata(data, plot_tier, fk) - sys = system[1] - chinfo = channelinfo(data, fk; system=sys, only_processable=only_processable) - idx = findfirst(isequal(ts), raw[first(chinfo.channel)].timestamp) - if isnothing(idx) - throw(ArgumentError("Timestamp $ts not found in the data")) - end - for chinfo_ch in chinfo - plot_title := "$(channelinfo(data, fk, ch).detector) ($(ch))" - @series begin - label := "$(chinfo_ch.detector) ($(chinfo_ch.channel))" - xunit := u"µs" - getproperty(raw, plot_waveform)[idx] + # layout + layout := (length(system), 1) + size := (1500, 500 * length(system)) + margins := (1, :mm) + bottom_margin := (2, :mm) + legend := :none + legendcolumns := 4 + @debug "Plot systems $(system) with waveforms $(plot_waveform)" + for (s, sys) in enumerate(sort(collect(keys(system)))) + chinfo = channelinfo(data, fk; system=sys, only_processable=only_processable) + if !all(hasproperty.(Ref(raw[Symbol(first(chinfo.channel))]), system[sys])) + throw(ArgumentError("Property $(plot_waveform[s]) not found in the data")) + end + plot_title := "$(fk.setup)-$(fk.period)-$(fk.run)-$(fk.category)" + if show_unixtime + title := "$sys - Event $(ts)" + else + title := "$sys - Event $(Dates.unix2datetime(ustrip(u"s", ts)))" + end + for (c, chinfo_ch) in enumerate(chinfo) + @debug "Load $(chinfo_ch.detector)" + for (p, p_wvf) in enumerate(system[sys]) + @series begin + if p == 1 + label := "$(chinfo_ch.detector) ($(chinfo_ch.channel))" + else + label := "" + end + color := c + subplot := s + xunit := u"µs" + idx = findfirst(isequal(ts), raw[Symbol(chinfo_ch.channel)].timestamp) + if isnothing(idx) + @warn "Timestamp $ts not found in $(chinfo_ch.detector) ($(chinfo_ch.channel)) data" + u"µs", NoUnits + else + getproperty(raw[Symbol(chinfo_ch.channel)], p_wvf)[idx] + end + end + end end end + else + throw(ArgumentError("Only `DataCategory` cal and phy are supported")) end end From 1bc24cb4044ba8cccea4a6098d390587bf5a8927 Mon Sep 17 00:00:00 2001 From: Florian Henkes Date: Tue, 3 Dec 2024 12:14:27 +0100 Subject: [PATCH 12/19] Allow TimestampLike to be Dates.DateTime --- src/filekey.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/filekey.jl b/src/filekey.jl index fae85965..555778f0 100644 --- a/src/filekey.jl +++ b/src/filekey.jl @@ -461,7 +461,7 @@ _timestamp_from_string(s::AbstractString) = DateTime(Timestamp(s)) Anything that can represent a timestamp, like `Timestamp("20221226T200846Z")` or "20221226T200846Z". """ -const TimestampLike = Union{Timestamp, AbstractString, Integer, Unitful.Time{<:Real}} +const TimestampLike = Union{Timestamp, AbstractString, Integer, Unitful.Time{<:Real}, Dates.DateTime} export TimestampLike From 19fa2a8dcd73553cef0be96e9631e5f6b74dc8c3 Mon Sep 17 00:00:00 2001 From: Florian Henkes Date: Tue, 3 Dec 2024 19:05:17 +0100 Subject: [PATCH 13/19] Add documentation --- docs/src/extensions.md | 60 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/docs/src/extensions.md b/docs/src/extensions.md index 6aaa24eb..1fe06433 100644 --- a/docs/src/extensions.md +++ b/docs/src/extensions.md @@ -1,5 +1,65 @@ # Extensions +## `Plots` extension + +LegendDataManagment provides an extension for [Plots](https://github.com/JuliaPlots/Plots.jl). This makes it possible to directly plot LEGEND data via the `plot` function. The extension is automatically loaded when both packages are loaded. +You can plot a parameter overview as a 2D plot over a set of detectors (requires a `$LEGEND_DATA_CONFIG` environment variable pointing to a legend data-config file): + +```julia +using LegendDataManagement, Plots + +l200 = LegendData(:l200) + +filekey = FileKey("l200-p03-r000-cal-20230311T235840Z") + +pars = l200.par.ppars.ecal(filekey) +properties = [:e_cusp_ctc, :fwhm, :qbb]; + +chinfo = channelinfo(l200, filekey; system = :geds, only_processable = true) + +plot(chinfo, pars, properties, verbose = true, color = 1, markershape = :o, calculate_mean = true) +``` + +The plot recipe takes three arguments: +- `chinfo`: the channel info with all detectors to be plotted on the x-axis +- `pars`: a `PropDict` that has the detector IDs as keys and parameters as values +- `properties`: an array of `Symbols` to access the data that should be plotted +(if no `properties` are provided, the `PropDict` `pars` is expected to just contain the data to be plotted as values) + +There are also keyword arguments: +- `calculate_mean`: If set to `true`, then the mean values are included in the legend labels. For values with uncertainties, the mean values are calculated as weighted means. +- `verbose`: some output when the plot is generated, e.g. if values for (some) detectors are missing + +A 3D plot is WIP. + +In addition, you can plot an event display of the `raw` waveforms: +``` julia +using Unitful, LegendDataManagement, Plots + +l200 = LegendData(:l200) + +ts = 1.6785791257987175e9u"s" + +ch = ChannelId(1104000) + +plot(l200, ts, ch) +``` + +- `plot_tier`: The data tier to be plotted. Default is `DataTier(:raw)`. +- `plot_waveform`: All waveforms to be plotted from the data. Default is `[:waveform_presummed]` which plots the presummed waveform. +- `show_unixtime`: If set to `true`, use unix time instead of the datetime in the title. Default is `false`. + +If the channel is not given, the recipe automtically searches for the correct event in the data. +``` julia +ts = 1.6785791257987175e9u"s" + +plot(l200, ts) +``` +In case of a `cal` event, only the HPGe channel with that event is plotted. In case of a `phy` event, all waveforms of the full HPGe and SiPM systems are plotted. +The following additional keywords arguments can be set (the `plot_waveform` kwarg is replaced by the `system` kwarg here): +- `system`: The system and the waveforms to be plotted for each system. Default is `Dict{Symbol, Vector{Symbol}}([:geds, :spms] .=> [[:waveform_presummed], [:waveform_bit_drop]])` +- `only_processable`: If set to `true`, only processable channels are plotted. Default is `true`. + ## `LegendHDF5IO` extension LegendDataManagment provides an extension for [LegendHDF5IO](https://github.com/legend-exp/LegendHDF5IO.jl). From 9a63fd984222d561666b4689cf71b78a22026e50 Mon Sep 17 00:00:00 2001 From: Florian Henkes Date: Tue, 3 Dec 2024 19:06:12 +0100 Subject: [PATCH 14/19] Allow DateTime objects to be plotted --- ext/LegendDataManagementPlotsExt.jl | 33 +++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/ext/LegendDataManagementPlotsExt.jl b/ext/LegendDataManagementPlotsExt.jl index 55d73661..d0694332 100644 --- a/ext/LegendDataManagementPlotsExt.jl +++ b/ext/LegendDataManagementPlotsExt.jl @@ -167,6 +167,17 @@ end end end +# TODO: check rounding of `Dates.DateTime` +@recipe function f(data::LegendData, fk::FileKey, ts::Dates.DateTime, ch::ChannelIdLike; plot_tier=DataTier(:raw), plot_waveform=[:waveform_presummed], show_unixtime=false) + @series begin + plot_tier := plot_tier + plot_waveform := plot_waveform + show_unixtime := show_unixtime + data, fk, Dates.datetime2unix(ts)*u"s", ch + end +end + + @recipe function f(data::LegendData, ts::Unitful.Time{<:Real}, ch::ChannelIdLike; plot_tier=DataTier(:raw), plot_waveform=[:waveform_presummed], show_unixtime=false) fk = find_filekey(data, ts) @series begin @@ -177,6 +188,18 @@ end end end +# TODO: check rounding of `Dates.DateTime` +@recipe function f(data::LegendData, ts::Dates.DateTime, ch::ChannelIdLike; plot_tier=DataTier(:raw), plot_waveform=[:waveform_presummed], show_unixtime=false) + fk = find_filekey(data, ts) + @series begin + plot_tier := plot_tier + plot_waveform := plot_waveform + show_unixtime := show_unixtime + data, fk, Dates.datetime2unix(ts)*u"s", ch + end +end + + @recipe function f(data::LegendData, ts::Unitful.Time{<:Real}; plot_tier=DataTier(:raw), system=Dict{Symbol, Vector{Symbol}}([:geds, :spms] .=> [[:waveform_presummed], [:waveform_bit_drop]]), only_processable=true, show_unixtime=false) fk = find_filekey(data, ts) framestyle := :box @@ -260,5 +283,15 @@ end end end +# TODO: check rounding of `Dates.DateTime` +@recipe function f(data::LegendData, ts::Dates.DateTime; plot_tier=DataTier(:raw), system=Dict{Symbol, Vector{Symbol}}([:geds, :spms] .=> [[:waveform_presummed], [:waveform_bit_drop]]), only_processable=true, show_unixtime=false) + @series begin + plot_tier := plot_tier + system := system + only_processable := only_processable + show_unixtime := show_unixtime + data, Dates.datetime2unix(ts)*u"s" + end +end end # module LegendDataManagementPlotsExt \ No newline at end of file From cf74f7bfaf1f018229f0fa022d05fa31cbd373f5 Mon Sep 17 00:00:00 2001 From: Florian Henkes Date: Tue, 3 Dec 2024 21:07:06 +0100 Subject: [PATCH 15/19] partitioninfo and runinfo always return TypedTables independent of selection --- src/dataprod_config.jl | 40 +++++++++++++++------------------------- 1 file changed, 15 insertions(+), 25 deletions(-) diff --git a/src/dataprod_config.jl b/src/dataprod_config.jl index fc122591..2c3db7fe 100644 --- a/src/dataprod_config.jl +++ b/src/dataprod_config.jl @@ -115,18 +115,17 @@ function _get_partitions(data::LegendData, label::Symbol) rinfo_type = typeof(first(runinfo(data))) result::IdDict{ DataPartition, - StructVector{rinfo_type} + Table{rinfo_type} } = IdDict([ let periods_and_runs = [ let period = DataPeriod(string(p)) - map(run -> runinfo(data, (period, run)), _resolve_partition_runs(data, period, rs)) + filter(row -> row.run in Vector{DataRun}(rs), runinfo(data, period)) end for (p,rs) in part ] - # @info periods_and_runs - flat_pr = vcat(periods_and_runs...)::Vector{rinfo_type} - DataPartition(pidx)::DataPartition => sort(StructArray(flat_pr)) + flat_pr = vcat(periods_and_runs...)::Table{rinfo_type} + DataPartition(pidx)::DataPartition => sort(Table(flat_pr)) end for (pidx, part) in parts ]) @@ -135,15 +134,6 @@ function _get_partitions(data::LegendData, label::Symbol) end end -_resolve_partition_runs(data::LegendData, period::DataPeriod, runs::AbstractVector) = Vector{DataRun}(runs) -function _resolve_partition_runs(data::LegendData, period::DataPeriod, runs::AbstractString) - if runs == "all" - search_disk(DataRun, data.tier[:raw, :cal, period]) - else - throw(ArgumentError("Invalid specification \"$runs\" for runs in data partition")) - end -end - """ partitioninfo(data::LegendData, ch::ChannelId) partitioninfo(data::LegendData, ch::ChannelId, part::DataPartitionLike) @@ -179,7 +169,7 @@ Base.Broadcast.broadcasted(f::typeof(partitioninfo), data::LegendData, ch::Chann Base.Broadcast.broadcasted(f::typeof(partitioninfo), data::LegendData, ch::ChannelId, p::Vector{<:DataPartition}) = vcat(f.(Ref(data), Ref(ch), p)...) Base.Broadcast.broadcasted(f::typeof(partitioninfo), data::LegendData, ch::Vector{ChannelId}, p::DataPeriod) = f.(Ref(data), ch, Ref(p)) -const _cached_combined_partitions2 = LRU{Tuple{UInt, Symbol, Vector{Symbol}}, Vector{DataPeriod}}(maxsize = 300) +const _cached_combined_partitions = LRU{Tuple{UInt, Symbol, Vector{Symbol}}, Vector{DataPeriod}}(maxsize = 300) """ get_partition_combined_periods(data::LegendData, period::DataPeriodLike; chs::Vector{ChannelIdLike}=ChannelIdLike[]) @@ -188,7 +178,7 @@ Get a list periods which are combined in any partition for the given period and """ function get_partition_combined_periods(data::LegendData, period::DataPeriodLike; chs::Vector{ChannelIdLike}=ChannelIdLike[]) period, chs = Symbol(DataPeriod(period)), Symbol.(ChannelId.(chs)) - get!(_cached_combined_partitions2, (objectid(data), period, chs)) do + get!(_cached_combined_partitions, (objectid(data), period, chs)) do # load partition information parts = pydataprod_config(data).partitions # if chs is empty, check for all keys @@ -215,17 +205,17 @@ const _cached_analysis_runs = LRU{UInt, StructVector{@NamedTuple{period::DataPer Return cross-period analysis runs. """ function analysis_runs(data::LegendData) - get!(_cached_analysis_runs, objectid(data)) do + Table(sort(get!(_cached_analysis_runs, objectid(data)) do aruns = pydataprod_config(data).analysis_runs periods_and_runs = [ let period = DataPeriod(string(p)) - map(run -> (period = period, run = run), _resolve_partition_runs(data, period, rs)) + map(run -> (period = period, run = run), Vector{DataRun}(rs)) end for (p,rs) in aruns ] flat_pr = vcat(periods_and_runs...)::Vector{@NamedTuple{period::DataPeriod, run::DataRun}} StructArray(flat_pr) - end + end)) end export analysis_runs @@ -284,7 +274,7 @@ function runinfo(data::LegendData, runsel::RunSelLike) if isempty(t) throw(ArgumentError("No run information found for period $period run $run")) else - only(t) + Table(t) end end @@ -303,7 +293,7 @@ Get the starting filekey for `data` in `period`, `run`, `category`. """ function start_filekey end export start_filekey -start_filekey(data::LegendData, runsel::RunCategorySelLike) = runinfo(data, runsel).startkey +start_filekey(data::LegendData, runsel::RunCategorySelLike) = only(runinfo(data, runsel).startkey) start_filekey(data::LegendData, fk::FileKey) = start_filekey(data, (fk.period, fk.run, fk.category)) start_filekey(data::LegendData, selectors...) = start_filekey(data, selectors) @@ -315,7 +305,7 @@ Get the livetime for `data` in physics data taking of `run` in `period`. """ function livetime end export livetime -livetime(data::LegendData, runsel::RunCategorySelLike) = runinfo(data, runsel).livetime +livetime(data::LegendData, runsel::RunCategorySelLike) = only(runinfo(data, runsel).livetime) livetime(data, selectors...) = livetime(data, selectors) """ @@ -342,7 +332,7 @@ export is_lrun Return `true` if `run` is an analysis run for `data` in `period`. # ATTENTION: This is only valid for `phy` runs. """ -is_analysis_phy_run(data::LegendData, runsel::RunSelLike) = runinfo(data, runsel).phy.is_analysis_run +is_analysis_phy_run(data::LegendData, runsel::RunSelLike) = only(runinfo(data, runsel).phy.is_analysis_run) """ is_analysis_cal_run(data::LegendData, (period::DataPeriodLike, run::DataRunLike)) @@ -350,7 +340,7 @@ is_analysis_phy_run(data::LegendData, runsel::RunSelLike) = runinfo(data, runsel Return `true` if `run` is an analysis run for `data` in `period`. # ATTENTION: This is only valid for `cal` runs. """ -is_analysis_cal_run(data::LegendData, runsel::RunSelLike) = runinfo(data, runsel).cal.is_analysis_run +is_analysis_cal_run(data::LegendData, runsel::RunSelLike) = only(runinfo(data, runsel).cal.is_analysis_run) """ is_analysis_run(data::LegendData, (period::DataPeriodLike, run::DataRunLike, cat::DataCategoryLike)) @@ -370,7 +360,7 @@ function is_analysis_run(data::LegendData, runsel::RunCategorySelLike) if !(hasproperty(runinfo(data), Symbol(category))) throw(ArgumentError("Invalid category $category for analysis run")) end - runinfo(data, runsel).is_analysis_run + only(runinfo(data, runsel).is_analysis_run) end is_analysis_run(data::LegendData, fk::FileKey) = is_analysis_run(data, (fk.period, fk.run, fk.category)) is_analysis_run(data::LegendData, selectors...) = is_analysis_run(data, selectors) From ef1ae458925d52479f90294eca69f20929860ccf Mon Sep 17 00:00:00 2001 From: Florian Henkes Date: Tue, 3 Dec 2024 23:32:19 +0100 Subject: [PATCH 16/19] Update codecov workflow --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index bb7e332e..df88743d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -62,7 +62,7 @@ jobs: with: fail_ci_if_error: true token: ${{ secrets.CODECOV_TOKEN }} - file: lcov.info + files: lcov.info docs: name: Documentation runs-on: ubuntu-latest From 5ebcc64c4d681b2f903995e4bb231a491dd73368 Mon Sep 17 00:00:00 2001 From: Felix Hagemann Date: Wed, 4 Dec 2024 01:58:56 +0100 Subject: [PATCH 17/19] Adjust lower compat of StructArrays --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 6849800e..1cd56a69 100644 --- a/Project.toml +++ b/Project.toml @@ -68,7 +68,7 @@ RecipesBase = "1" SolidStateDetectors = "0.10.2" StaticStrings = "0.2" Statistics = "<0.0.1, 1" -StructArrays = "0.5, 0.6" +StructArrays = "0.6.6, 0.7" Tables = "1.1" TypedTables = "1.4" UUIDs = "<0.0.1, 1" From 711acc7425e4652a0489b8dcc5cfdf534f2e44b6 Mon Sep 17 00:00:00 2001 From: Felix Hagemann Date: Wed, 4 Dec 2024 10:24:21 +0100 Subject: [PATCH 18/19] Add simple tests --- test/test_dataprod_config.jl | 40 ++++++++++++++++++++++++++ test/test_filekey.jl | 6 ++++ test/test_legend_data.jl | 54 +++++++++++++++++++++--------------- 3 files changed, 77 insertions(+), 23 deletions(-) diff --git a/test/test_dataprod_config.jl b/test/test_dataprod_config.jl index 36405304..ff3c059a 100644 --- a/test/test_dataprod_config.jl +++ b/test/test_dataprod_config.jl @@ -3,5 +3,45 @@ using LegendDataManagement using Test +using TypedTables +using Unitful + @testset "dataprod_config" begin + l200 = LegendData(:l200) + + @testset "runinfo" begin + rinfo = runinfo(l200, (DataPeriod(2), DataRun(6), :cal)) + @test rinfo isa TypedTables.Table + @test length(rinfo) == 1 + @test only(rinfo).startkey.period == DataPeriod(2) + @test only(rinfo).startkey.run == DataRun(6) + @test only(rinfo).startkey.category == DataCategory(:cal) + @test_nowarn empty!(LegendDataManagement._cached_runinfo) + end + + @testset "analysis_runs" begin + analysisruns = analysis_runs(l200) + @test analysisruns isa TypedTables.Table + @test hasproperty(analysisruns, :period) + @test hasproperty(analysisruns, :run) + @test_nowarn empty!(LegendDataManagement._cached_analysis_runs) + end + + @testset "partitioninfo" begin + partinfo = partitioninfo(l200, :V99000A) + @test partinfo isa IdDict + @test partinfo[DataPartition(1)] isa TypedTables.Table + @test_nowarn empty!(LegendDataManagement._cached_partitioninfo) + end + + @testset "utils" begin + sel = (DataPeriod(2), DataRun(6), :phy) + @test start_filekey(l200, sel) isa FileKey + @test livetime(l200, sel) isa Unitful.Time + + rsel = (DataPeriod(2), DataRun(6)) + @test LegendDataManagement.is_analysis_cal_run(l200, rsel) + @test LegendDataManagement.is_analysis_phy_run(l200, rsel) + @test LegendDataManagement.is_analysis_run(l200, sel) + end end diff --git a/test/test_filekey.jl b/test/test_filekey.jl index 7ef414ca..3c1c2908 100644 --- a/test/test_filekey.jl +++ b/test/test_filekey.jl @@ -4,6 +4,7 @@ using LegendDataManagement using Test using Dates +using Unitful @testset "filekey" begin setup = ExpSetup(:l200) @@ -29,6 +30,11 @@ using Dates timestamp = @inferred(Timestamp("20221226T200846Z")) @test timestamp.unixtime == 1672085326 @test @inferred(string(timestamp)) == "20221226T200846Z" + + unix_timestamp = 1672085326u"s" + timestamp2 = @inferred(Timestamp(unix_timestamp)) + @test timestamp2.unixtime == 1672085326 + @test @inferred(string(timestamp2)) == "20221226T200846Z" key = @inferred FileKey("l200-p02-r006-cal-20221226T200846Z") @test string(key) == "l200-p02-r006-cal-20221226T200846Z" diff --git a/test/test_legend_data.jl b/test/test_legend_data.jl index 907064f3..bf9bbc19 100644 --- a/test/test_legend_data.jl +++ b/test/test_legend_data.jl @@ -23,27 +23,35 @@ include("testing_utils.jl") props_base_path = data_path(LegendDataConfig().setups.l200, "metadata") @test l200.metadata isa LegendDataManagement.PropsDB - # ToDo: Make type-stable: - @test channelinfo(l200, filekey) isa TypedTables.Table - chinfo = channelinfo(l200, filekey) - @test all(filterby(@pf $processable && $usability == :on)(chinfo).processable) - @test all(filterby(@pf $processable && $usability == :on)(chinfo).usability .== :on) - - # Delete the channelinfo cache - empty!(LegendDataManagement._cached_channelinfo) - - # Test the extended channel info with active volume calculation - extended = channelinfo(l200, filekey, extended = true) - @test extended isa TypedTables.Table - - # Check that some keywords only appear in the extended channelinfo - extended_keywords = (:cc4, :cc4ch, :daqcrate, :daqcard, :hvcard, :hvch, :enrichment, :mass, :total_volume, :active_volume) - @test !any(in(columnnames(chinfo)), extended_keywords) - @test all(in(columnnames(extended)), extended_keywords) - - # ToDo: Make type-stable: - # @test #=@inferred=#(channel_info(l200, filekey)) isa StructArray - # chinfo = channel_info(l200, filekey) - # @test all(filterby(@pf $processable && $usability == :on)(chinfo).processable) - # @test all(filterby(@pf $processable && $usability == :on)(chinfo).usability .== :on) + @testset "channelinfo" begin + # ToDo: Make type-stable: + @test channelinfo(l200, filekey) isa TypedTables.Table + chinfo = channelinfo(l200, filekey) + @test all(filterby(@pf $processable && $usability == :on)(chinfo).processable) + @test all(filterby(@pf $processable && $usability == :on)(chinfo).usability .== :on) + + # Delete the channelinfo cache + empty!(LegendDataManagement._cached_channelinfo) + + # Test the extended channel info with active volume calculation + extended = channelinfo(l200, filekey, only_usability = :on, extended = true) + @test extended isa TypedTables.Table + + # Check that some keywords only appear in the extended channelinfo + extended_keywords = (:cc4, :cc4ch, :daqcrate, :daqcard, :hvcard, :hvch, :enrichment, :mass, :total_volume, :active_volume) + @test !any(in(columnnames(chinfo)), extended_keywords) + @test all(in(columnnames(extended)), extended_keywords) + + # ToDo: Make type-stable: + # @test #=@inferred=#(channel_info(l200, filekey)) isa StructArray + # chinfo = channel_info(l200, filekey) + # @test all(filterby(@pf $processable && $usability == :on)(chinfo).processable) + # @test all(filterby(@pf $processable && $usability == :on)(chinfo).usability .== :on) + end + + @testset "search_disk" begin + datasets = search_disk(DataSet, l200) + # LegendTestData is probably not in the correct formats + @test_broken !(isempty(datasets)) + end end From 8b63b60315288a55dc9228f43e1fcf0b9bef1716 Mon Sep 17 00:00:00 2001 From: Felix Hagemann Date: Wed, 4 Dec 2024 10:48:39 +0100 Subject: [PATCH 19/19] Remove plot recipes from codecov --- .codecov.yml | 2 ++ .travis.yml | 52 ---------------------------------------------------- 2 files changed, 2 insertions(+), 52 deletions(-) delete mode 100644 .travis.yml diff --git a/.codecov.yml b/.codecov.yml index ac546553..343026b6 100644 --- a/.codecov.yml +++ b/.codecov.yml @@ -1 +1,3 @@ # comment: false +ignore: + - "ext/LegendDataManagementPlotsExt.jl" diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 84b6fb47..00000000 --- a/.travis.yml +++ /dev/null @@ -1,52 +0,0 @@ -## Documentation: http://docs.travis-ci.com/user/languages/julia/ - -language: julia - -os: - - linux - - osx - - windows - -julia: - - 1.0 - - 1.4 - - nightly - -arch: - - amd64 - - x86 - -branches: - only: - - master - - dev - - /^release-.*$/ - - /^v\d+\.\d+(\.\d+)?(-\S*)?$/ - -notifications: - email: false - -after_success: - # push coverage results to Codecov - - julia -e 'import Pkg; Pkg.add("Coverage"); using Coverage; Codecov.submit(Codecov.process_folder())' - # push coverage results to Coveralls - - julia -e 'import Pkg; Pkg.add("Coverage"); using Coverage; Coveralls.submit(Coveralls.process_folder())' - -jobs: - allow_failures: - - julia: nightly - fast_finish: true - exclude: - - os: osx - arch: x86 - - os: windows - arch: x86 - include: - - stage: "Documentation" - julia: 1.4 - os: linux - arch: amd64 - script: - - julia --project=docs/ -e 'using Pkg; Pkg.develop(PackageSpec(path=pwd())); Pkg.instantiate()' - - julia --project=docs/ docs/make.jl - after_success: skip