diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml new file mode 100644 index 0000000..5128a12 --- /dev/null +++ b/.github/workflows/documentation.yml @@ -0,0 +1,22 @@ +name: Documentation +on: + push: + branches: [master] + tags: '*' + pull_request: + types: [opened, synchronize, reopened] +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: julia-actions/setup-julia@latest + with: + # Build documentation on Julia lastest stabel release + version: '1' + - name: Install dependencies + run: julia --project=docs/ -e 'using Pkg; Pkg.develop(PackageSpec(path=pwd())); Pkg.instantiate()' + - name: Build and deploy + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # For authentication with GitHub Actions token. + run: julia --project=docs/ docs/make.jl \ No newline at end of file diff --git a/README.md b/README.md index bed20f0..9303ccd 100644 --- a/README.md +++ b/README.md @@ -1,36 +1,9 @@ # Quiver.jl -Quiver is an alternative data-structure to represent time series data. It is designed for time series that can have extra dimensions such as scenarios, blocks, segments, etc. +| **Build Status** | **Coverage** | **Documentation** | +|:-----------------:|:-----------------:|:-----------------:| +| [![Build Status][build-img]][build-url] | [![Codecov branch][codecov-img]][codecov-url] |[![](https://img.shields.io/badge/docs-latest-blue.svg)](https://psrenergy.github.io/Quiver.jl/dev/) -Quiver is not the fastest data-structure for time series data, but it is designed to be flexible and easy to use. The main idea behind Quiver -is to have a set of dimensions that can be used to index the data and a set of values from the time serires attributes. This allows to have a -table-like data-structure that can be used to store time series data. +Repository to read and write open-source formats for PSR models. -Files that follow the Quiver implementation can be stored in any format that maps directly to a table-like structure with metadata. The metadata stores the frequency of the time series, the initial date, the unit of the data, the number of the dimension, the maximum value of each dimension, the time dimension and the version of the file. - -The matadata is always stored in a TOML file in the following format: - -```toml -version = 1 -dimensions = ["stage", "scenario", "block"] -dimension_size = [10, 12, 744] -initial_date = "2006-01-01 00:00:00" -time_dimension = "stage" -frequency = "month" -unit = "" -labels = ["agent_1", "agent_2", "agent_3"] -``` - -And the data is stored in a csv or binary file that contains the values of the time series. The csv format is as follows: -```csv -stage,scenario,block,agent_1,agent_2,agent_3 -1,1,1,1.0,1.0,1.0 -1,1,2,1.0,1.0,1.0 -1,1,3,1.0,1.0,1.0 -``` - -## Installation - -```julia -pkg> add Quiver -``` \ No newline at end of file +For reading and writing Graf files in the csv format please add https://github.com/psrenergy/GrafCSV.jl to your project. \ No newline at end of file diff --git a/docs/Project.toml b/docs/Project.toml new file mode 100644 index 0000000..dfa65cd --- /dev/null +++ b/docs/Project.toml @@ -0,0 +1,2 @@ +[deps] +Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" diff --git a/docs/docs.bat b/docs/docs.bat new file mode 100644 index 0000000..a9ef007 --- /dev/null +++ b/docs/docs.bat @@ -0,0 +1,5 @@ +@echo off + +SET DOCSPATH=%~dp0 + +CALL "%JULIA_1100%" --project=%DOCSPATH% %DOCSPATH%\make.jl diff --git a/docs/make.jl b/docs/make.jl new file mode 100644 index 0000000..8e7b0d4 --- /dev/null +++ b/docs/make.jl @@ -0,0 +1,40 @@ +import Pkg +Pkg.activate(dirname(@__DIR__)) +Pkg.instantiate() +using Quiver + +Pkg.activate(@__DIR__) +Pkg.instantiate() +using Documenter + +makedocs(; + modules = [Quiver], + doctest = false, + clean = true, + format = Documenter.HTML(; + mathengine = Documenter.MathJax2(), + prettyurls = false, + # Prevents the edit on github button from showing up + edit_link = nothing, + footer = nothing, + disable_git = true, + repolink = nothing, + ), + sitename = "Quiver.jl", + warnonly = true, + pages = [ + "Home" => [ + "Overview" => "home.md", + ], + "Manual" => [ + "Reading Data" => "reading.md", + "Writing Data" => "writing.md", + "Examples" => "examples.md", + ], + ], +) + +Documenter.deploydocs(; + repo = "https://github.com/psrenergy/Quiver.jl.git", + push_preview = true, +) diff --git a/docs/src/examples.md b/docs/src/examples.md new file mode 100644 index 0000000..41a4a25 --- /dev/null +++ b/docs/src/examples.md @@ -0,0 +1,97 @@ +## Examples + +Here are some practical examples demonstrating how to use Quiver for time series data operations, such as writing, reading, merging, and converting between formats. + +### 1. Writing and Reading Time Series + +This example shows how to write and read time series data with Quiver, using multiple dimensions like stage, scenario, and block. + +```julia +using Quiver +using Dates + +# Define the dimensions and metadata +filename = "path/to/output/file" +initial_date = DateTime(2006, 1, 1) +num_stages = 10 +num_scenarios = 12 +num_blocks_per_stage = Int32.(Dates.daysinmonth.(initial_date:Dates.Month(1):initial_date + Dates.Month(num_stages - 1)) .* 24) +dimensions = ["stage", "scenario", "block"] +labels = ["agent_1", "agent_2", "agent_3"] +time_dimension = "stage" +dimension_size = [num_stages, num_scenarios, maximum(num_blocks_per_stage)] + +# Initialize the Writer +writer = Quiver.Writer{Quiver.binary}( + filename; + dimensions, + labels, + time_dimension, + dimension_size, + initial_date = initial_date +) + +# Write data +for stage in 1:num_stages + for scenario in 1:num_scenarios + for block in 1:num_blocks_per_stage[stage] + data = [stage, scenario, block] + Quiver.write!(writer, data, stage=stage, scenario=scenario, block=block) + end + end +end + +# Close the writer +Quiver.close!(writer) + +# Now, read the data back +reader = Quiver.Reader{Quiver.binary}(filename) + +for stage in 1:num_stages + for scenario in 1:num_scenarios + for block in 1:num_blocks_per_stage[stage] + data = Quiver.goto!(reader, stage=stage, scenario=scenario, block=block) + println(data) + end + end +end + +Quiver.close!(reader) +``` + +### 2. Converting Between Formats + +This example demonstrates how to convert time series data from binary format to CSV. To convert the data in the opposite direction (from CSV to binary), simply switch the positions of `Quiver.binary` and `Quiver.csv` in the function below. + +```julia +using Quiver + +# Convert binary file to CSV +filename = "path/to/file" +Quiver.convert(filename, Quiver.binary, Quiver.csv) +``` + +### 3. Merging Multiple Files + +This example shows how to merge multiple time series files into a binary single file. + +```julia +using Quiver +using Dates + +# Define metadata and filenames +filename = "path/to/output/file" +filenames = ["path/to/input_file_1", "path/to/input_file_2", "path/to/input_file_3"] +initial_date = DateTime(2006, 1, 1) +num_stages = 10 +num_scenarios = 12 +num_blocks = 24 +dimensions = ["stage", "scenario", "block"] +time_dimension = "stage" +dimension_size = [num_stages, num_scenarios, num_blocks] + +# Merge the files +Quiver.merge(filename, filenames, Quiver.binary) +``` + +--- diff --git a/docs/src/home.md b/docs/src/home.md new file mode 100644 index 0000000..e0eae09 --- /dev/null +++ b/docs/src/home.md @@ -0,0 +1,36 @@ +# Quiver.jl + +Quiver is an alternative data-structure to represent time series data. It is designed for time series that can have extra dimensions such as scenarios, blocks, segments, etc. + +Quiver is not the fastest data-structure for time series data, but it is designed to be flexible and easy to use. The main idea behind Quiver +is to have a set of dimensions that can be used to index the data and a set of values from the time serires attributes. This allows to have a +table-like data-structure that can be used to store time series data. + +Files that follow the Quiver implementation can be stored in any format that maps directly to a table-like structure with metadata. The metadata stores the frequency of the time series, the initial date, the unit of the data, the number of the dimension, the maximum value of each dimension, the time dimension and the version of the file. + +The metadata is always stored in a TOML file in the following format: + +```toml +version = 1 +dimensions = ["stage", "scenario", "block"] +dimension_size = [10, 12, 744] +initial_date = "2006-01-01 00:00:00" +time_dimension = "stage" +frequency = "month" +unit = "" +labels = ["agent_1", "agent_2", "agent_3"] +``` + +And the data is stored in a csv or binary file that contains the values of the time series. The csv format is as follows: +```csv +stage,scenario,block,agent_1,agent_2,agent_3 +1,1,1,1.0,1.0,1.0 +1,1,2,1.0,1.0,1.0 +1,1,3,1.0,1.0,1.0 +``` + +## Installation + +```julia +pkg> add Quiver +``` \ No newline at end of file diff --git a/docs/src/reading.md b/docs/src/reading.md new file mode 100644 index 0000000..1269dff --- /dev/null +++ b/docs/src/reading.md @@ -0,0 +1,45 @@ +## Reading + +To read time series with Quiver, the Reader structure is used to manage the file, data, and dimensions. This structure helps load the relevant data from time series files, which can be either in CSV or binary format. Below is a more detailed example of how to use the `Reader`: + +#### Example of initializing a Reader: + +```julia +using Quiver + +# Path to the time series file +filename = "path/to/your/timeseries_file" + +# Initialize the Reader (assuming binary format for simplicity) +reader = Reader{Quiver.binary}(filename) + +# Fetch data from the reader by specifying the stage, scenario, and block +data = goto!(reader, stage=1, scenario=2, block=5) + +# Display the retrieved data +println(data) +``` +### Key Functions: +```@docs +Quiver.goto! +``` + +```@docs +Quiver.next_dimension! +``` + +```@docs +Quiver.file_to_array +``` + +```@docs +Quiver.file_to_df +``` + +#### Closing the Reader: + +Always close the reader when done to release resources. + +```@docs +Quiver.close! +``` \ No newline at end of file diff --git a/docs/src/writing.md b/docs/src/writing.md new file mode 100644 index 0000000..260389d --- /dev/null +++ b/docs/src/writing.md @@ -0,0 +1,8 @@ +```@docs +Quiver.Writer +``` + +### Key Functions: +```@docs +Quiver.array_to_file +``` \ No newline at end of file diff --git a/src/reader.jl b/src/reader.jl index 1cd35ce..d42b01f 100644 --- a/src/reader.jl +++ b/src/reader.jl @@ -1,3 +1,26 @@ +""" + Reader{I <: Implementation, R}(reader::R, filename::String, metadata::Metadata, + dimension_in_cache::Vector{Int}, dimension_to_read::Vector{Int}; + labels_to_read::Vector{String} = metadata.labels, + carrousel::Bool = false) + +Creates a new instance of `Reader`, which is used to manage time series files. +The `Reader` manages the dimensions and labels of the file and allows data access via functions like `goto!` and `next_dimension!`. + +# Parameters + + - `reader::R`: The reader responsible for reading the file (can be for binary or CSV formats). + - `filename::String`: The name of the file containing the time series data. + - `metadata::Metadata`: Metadata describing the file’s dimensions and labels. + - `dimension_in_cache::Vector{Int}`: Vector of dimensions kept in cache. + - `dimension_to_read::Vector{Int}`: Specific dimensions to be read. + - `labels_to_read::Vector{String}`: Labels to be read from the file (optional). + - `carrousel::Bool`: Defines whether the reader uses a carousel to access dimensions (optional). + +# Returns + + - A new `Reader` object. +""" mutable struct Reader{I <: Implementation, R} reader::R filename::String @@ -10,8 +33,8 @@ mutable struct Reader{I <: Implementation, R} indices_of_labels_to_read::Vector{Int} carrousel::Bool function Reader{I}( - reader::R, - filename::String, + reader::R, + filename::String, metadata::Metadata, dimension_in_cache::Vector{Int}, dimension_to_read::Vector{Int}; @@ -39,11 +62,11 @@ mutable struct Reader{I <: Implementation, R} data = fill(NaN32, length(labels_to_read)) reader = new{I, R}( - reader, - filename, - metadata, + reader, + filename, + metadata, dimension_in_cache, - dimension_to_read, + dimension_to_read, all_labels_data_cache, data, labels_to_read, @@ -81,12 +104,28 @@ function _move_data_from_buffer_cache_to_data!(reader::Reader) end """ - goto!( - reader::Reader; - dims... - ) + goto!(reader::Reader; dims...) + +This function moves the reader to the specified dimensions and returns the corresponding data. It updates the internal cache and retrieves the necessary time series values. + +- For **binary files**, `goto!` allows random access to any part of the time series, meaning you can jump between stages, scenarios, and blocks in any order. This provides greater flexibility for accessing specific points in the data. -Move the reader to the specified dimensions and return the data. +- For **CSV files**, `goto!` works differently. It only supports forward sequential access, meaning that while you can still navigate through stages, scenarios, and blocks, you cannot randomly jump to previous positions. The function moves forward through the file, reading data sequentially. + +# Parameters + + - `reader::Reader`: The time series reader. + - `dims...`: Specific dimensions to move the reader to. + +# Returns + + - The data at the specified dimensions. + +# Simple Example: + +```julia +data = goto!(reader, stage = 1, scenario = 2, block = 5) +``` """ function goto!(reader::Reader; dims...) validate_dimensions(reader.metadata, dims...) @@ -100,7 +139,21 @@ end """ next_dimension!(reader::Reader) -Move the reader to the next dimension and return the data. +This function advances the reader to the next dimension and returns the updated data. It's useful when iterating over multiple dimensions sequentially. This function is especially useful for **CSV files**, where random access is not available. It allows for easy iteration through multiple dimensions in a forward-only manner. + +# Parameters + + - `reader::Reader`: The time series reader. + +# Returns + + - The data in the next dimension. + +# Simples Example: + +```julia +next_data = next_dimension!(reader) +``` """ function next_dimension!(reader::Reader) _quiver_next_dimension!(reader) @@ -111,7 +164,16 @@ end """ max_index(reader::Reader, dimension::String) -Return the maximum index of the specified dimension. +Returns the maximum index of the specified dimension. + +# Parameters + + - `reader::Reader`: The time series reader. + - `dimension::String`: The name of the dimension to find the index for. + +# Returns + + - The maximum index of the specified dimension. """ function max_index(reader::Reader, dimension::String) symbol_dim = Symbol(dimension) @@ -125,7 +187,21 @@ end """ close!(reader::Reader) -Close the reader. +Closes the reader and releases associated resources. + +# Parameters + + - `reader::Reader`: The time series reader to close. + +# Returns + + - `nothing`. + +# Simple Example: + +```julia +close!(reader) +``` """ function close!(reader::Reader) _quiver_close!(reader) @@ -133,13 +209,19 @@ function close!(reader::Reader) end """ - file_to_array( - filename::String, - implementation::Type{I}; - labels_to_read::Vector{String} = String[], - ) where {I <: Implementation} + file_to_array(filename::String, implementation::Type{I}; labels_to_read::Vector{String} = String[]) where {I <: Implementation} Reads a file and returns the data and metadata as a tuple. + +# Parameters + + - `filename::String`: The name of the file to be read. + - `implementation::Type{I}`: The implementation type for reading the file (binary or CSV). + - `labels_to_read::Vector{String}`: Specific labels to read (optional). + +# Returns + + - A tuple containing the read data and associated metadata. """ function file_to_array( filename::String, @@ -172,13 +254,19 @@ function file_to_array( end """ - file_to_df( - filename::String, - implementation::Type{I}; - labels_to_read::Vector{String} = String[], - ) where {I <: Implementation} + file_to_df(filename::String, implementation::Type{I}; labels_to_read::Vector{String} = String[]) where {I <: Implementation} Reads a file and returns the data and metadata as a DataFrame. + +# Parameters + + - `filename::String`: The name of the file to be read. + - `implementation::Type{I}`: The implementation type for reading the file (binary or CSV). + - `labels_to_read::Vector{String}`: Specific labels to read (optional). + +# Returns + + - A DataFrame with the read data and metadata. """ function file_to_df( filename::String, @@ -223,4 +311,4 @@ function file_to_df( Quiver.close!(reader) return df -end \ No newline at end of file +end diff --git a/src/writer.jl b/src/writer.jl index c15052d..511348a 100644 --- a/src/writer.jl +++ b/src/writer.jl @@ -1,13 +1,131 @@ +""" +## Writing + +To write time series data in Quiver, you can leverage different implementations, such as binary and CSV, depending on your performance or readability requirements: + + - **CSV Format**: This format is human-readable and easy to inspect manually, storing data in a plain-text, tabular form. It is ideal when ease of access and manual editing are priorities. + + - **Binary Format**: Optimized for large-scale data, the binary format provides significantly better performance, making it suitable for scenarios where efficiency and speed are critical. + +#### Writer Fields: + + - **filename**: The path where the time series data will be written. + - **dimensions**: An array that specifies the dimensions of the time series (e.g., `["stage", "scenario", "block"]`). + - **labels**: Labels for each time series (e.g., `["agent_1", "agent_2", "agent_3"]`). + - **time_dimension**: The primary time-related dimension, such as "stage". + - **dimension_size**: An array specifying the size of each dimension (e.g., `[num_stages, num_scenarios, num_blocks]`). + - **initial_date**: The starting date of the time series, used for associating data with time. + +### Key Functions: + +#### `write!` + +This function writes data to the specified dimensions in the file. It validates the dimensions, updates the cache, and writes the provided data. + +#### `close!` + +This function closes the writer and finalizes the writing process. + +```julia +close!(writer) +``` + +#### Example of writing to binary: + +```julia +using Quiver +using Dates + +# Define the file path and time series characteristics +filename = "path/to/output/file" +initial_date = DateTime(2024, 1, 1) +num_stages = 10 +num_scenarios = 12 +num_blocks = 24 + +# Define dimensions, labels, and time information +dimensions = ["stage", "scenario", "block"] +labels = ["agent_1", "agent_2", "agent_3"] +time_dimension = "stage" +dimension_size = [num_stages, num_scenarios, num_blocks] + +# Initialize the Writer for binary format +writer = Quiver.Writer{Quiver.binary}( + filename; + dimensions, + labels, + time_dimension, + dimension_size, + initial_date = initial_date, +) + +# Write data +for stage in 1:num_stages + for scenario in 1:num_scenarios + for block in 1:num_blocks + data = [stage, scenario, block] # Example data + Quiver.write!(writer, data; stage, scenario, block) + end + end +end + +# Close the writer +Quiver.close!(writer) +``` + +#### Example of writing to CSV: + +```julia +using Quiver +using Dates + +# Define the file path and time series characteristics +filename = "path/to/output/file" +initial_date = DateTime(2024, 1, 1) +num_stages = 10 +num_scenarios = 12 +num_blocks = 24 + +# Define dimensions, labels, and time information +dimensions = ["stage", "scenario", "block"] +labels = ["agent_1", "agent_2", "agent_3"] +time_dimension = "stage" +dimension_size = [num_stages, num_scenarios, num_blocks] + +# Initialize the Writer for CSV format +writer = Quiver.Writer{Quiver.csv}( + filename; + dimensions, + labels, + time_dimension, + dimension_size, + initial_date = initial_date, +) + +# Write data +for stage in 1:num_stages + for scenario in 1:num_scenarios + for block in 1:num_blocks + data = [stage, scenario, block] # Example data + Quiver.write!(writer, data; stage, scenario, block) + end + end +end + +# Close the writer +Quiver.close!(writer) +``` +""" mutable struct Writer{I <: Implementation, W} writer::W filename::String metadata::Metadata last_dimension_added::Vector{Int} function Writer{I}( - writer::W, - filename::String, - metadata::Metadata, - last_dimension_added::Vector{Int} + writer::W, + filename::String, + metadata::Metadata, + last_dimension_added::Vector{Int}, ) where {I, W} writer = new{I, W}(writer, filename, metadata, last_dimension_added) finalizer(Quiver.close!, writer) @@ -22,10 +140,10 @@ function _build_last_dimension_added!(writer::Writer; dims...) return nothing end -function write!(writer::Writer, data::Vector{T}; dims...) where T <: Real +function write!(writer::Writer, data::Vector{T}; dims...) where {T <: Real} validate_dimensions(writer.metadata, dims...) _build_last_dimension_added!(writer; dims...) - _quiver_write!(writer, data) + return _quiver_write!(writer, data) end function close!(writer::Writer) @@ -116,4 +234,4 @@ end function round_digits(vec::Vector{T}, digits::Int) where {T} return round.(vec; digits) -end \ No newline at end of file +end