From 3916b5ac17a2dff9942893a4af767afa394cad8e Mon Sep 17 00:00:00 2001 From: "Michael B. Klein" Date: Wed, 9 Oct 2024 23:33:11 -0500 Subject: [PATCH] Switch from Regex-based parsing to NimbleParsec grammar-based parsing Allow qualifiers on decades and centuries --- .github/workflows/build.yml | 62 +---------- README.md | 1 - lib/edtf.ex | 41 +++----- lib/edtf/aggregate.ex | 53 +++------- lib/edtf/date.ex | 177 +++++++------------------------ lib/edtf/humanize.ex | 7 +- lib/edtf/infinity.ex | 5 - lib/edtf/interval.ex | 42 ++------ lib/edtf/parser.ex | 133 +++++++++++++++++++++++ lib/edtf/parser/helpers.ex | 204 ++++++++++++++++++++++++++++++++++++ lib/edtf/range.ex | 26 ----- lib/edtf/season.ex | 29 ----- lib/edtf/year.ex | 42 -------- mix.exs | 3 +- test/edtf/date_test.exs | 40 +++++++ test/edtf/level_test.exs | 15 +++ test/edtf/parser_test.exs | 6 ++ test/edtf_test.exs | 1 + test/error_test.exs | 20 ---- 19 files changed, 482 insertions(+), 425 deletions(-) create mode 100644 lib/edtf/parser.ex create mode 100644 lib/edtf/parser/helpers.ex delete mode 100644 lib/edtf/range.ex delete mode 100644 lib/edtf/season.ex delete mode 100644 lib/edtf/year.ex create mode 100644 test/edtf/level_test.exs create mode 100644 test/edtf/parser_test.exs delete mode 100644 test/error_test.exs diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 8e8bb7f..253c1b0 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -1,34 +1,17 @@ --- -name: edtf +name: test on: [push] jobs: test: runs-on: ubuntu-latest - strategy: - matrix: - elixir: - - 1.15-25 - - 1.15-26 - - 1.16-25 - - 1.16-26 - - 1.17-25 - - 1.17-26 - - 1.17-27 env: MIX_ENV: test steps: - - name: Set Elixir and OTP versions - id: elixir-otp - run: | - echo "elixir=$(sed 's/-.*$//' <<< $version)" >> $GITHUB_OUTPUT - echo "otp=$(sed 's/^.*-//' <<< $version)" >> $GITHUB_OUTPUT - env: - version: ${{ matrix.elixir }} - uses: actions/checkout@v2 - uses: erlef/setup-beam@v1 with: - otp-version: ${{ steps.elixir-otp.outputs.otp }} - elixir-version: ${{ steps.elixir-otp.outputs.elixir }} + otp-version: 27 + elixir-version: 1.17 - name: Cache Elixir dependencies uses: actions/cache@v2 with: @@ -46,45 +29,6 @@ jobs: env: MIX_ENV: test - name: Run Tests - run: mix test --trace - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - build: - runs-on: ubuntu-latest - needs: test - steps: - - name: Pass all checks - run: echo "Tests passed" - coverage: - runs-on: ubuntu-latest - env: - MIX_ENV: test - steps: - - name: Set Elixir and OTP versions - id: elixir-otp - run: | - echo "elixir=$(sed 's/-.*$//' <<< $version)" >> $GITHUB_OUTPUT - echo "otp=$(sed 's/^.*-//' <<< $version)" >> $GITHUB_OUTPUT - env: - version: "1.17-27" - - uses: actions/checkout@v2 - - uses: erlef/setup-beam@v1 - with: - otp-version: ${{ steps.elixir-otp.outputs.otp }} - elixir-version: ${{ steps.elixir-otp.outputs.elixir }} - - name: Cache Elixir dependencies - uses: actions/cache@v2 - with: - path: | - deps - _build - key: ${{ runner.os }}-deps-${{ steps.elixir-otp.outputs.elixir }}-${{ steps.elixir-otp.outputs.otp }}-${{ hashFiles('mix.lock') }} - restore-keys: | - ${{ runner.os }}-deps-${{ steps.elixir-otp.outputs.elixir }}-${{ steps.elixir-otp.outputs.otp }}-${{ hashFiles('mix.lock') }} - ${{ runner.os }}-deps-${{ steps.elixir-otp.outputs.elixir }}-${{ steps.elixir-otp.outputs.otp }}- - - name: Install Dependencies - run: mix do deps.get, deps.compile - - name: Run Tests & Coverage Analysis run: mix coveralls.github --trace env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/README.md b/README.md index f6df2f6..be15d8d 100644 --- a/README.md +++ b/README.md @@ -33,4 +33,3 @@ See `EDTF.parse/1`, `EDTF.validate/1`, and `EDTF.humanize/1`. - Some human-readable dates containing Level 2 qualifications and years with significant digits, may produce less specific results than desired. -- Level 2 years without the leading `Y` character (e.g., `2024S03`) are not supported at this time. diff --git a/lib/edtf.ex b/lib/edtf.ex index 9c673ff..7d69145 100644 --- a/lib/edtf.ex +++ b/lib/edtf.ex @@ -17,14 +17,21 @@ defmodule EDTF do {:error, :invalid_format} ``` """ - def parse(edtf, include \\ [Interval, Aggregate, Date]) do - case Enum.find(include, & &1.match?(edtf)) do - nil -> error() - mod -> mod.parse(edtf) + def parse(edtf) do + case EDTF.Parser.parse(edtf) do + {:ok, [result], _, _, _, _} -> {:ok, assemble(result) |> Level.add_level()} + {:error, _, _, _, _, _} -> {:error, :invalid_format} end - |> Level.add_level() end + defp assemble({:date, _} = result), do: Date.assemble(result) + defp assemble({:year, _} = result), do: Date.assemble(result) + defp assemble({:decade, _} = result), do: Date.assemble(result) + defp assemble({:century, _} = result), do: Date.assemble(result) + defp assemble({:interval, _} = result), do: Interval.assemble(result) + defp assemble({:set, _} = result), do: Aggregate.assemble(result) + defp assemble({:list, _} = result), do: Aggregate.assemble(result) + @doc """ Validate an EDTF date string @@ -62,28 +69,4 @@ defmodule EDTF do other -> other end end - - @doc """ - Generate an error response - """ - def error(error \\ :invalid_format), do: {:error, error} - - @doc """ - Identify the open-ended continuation markers on an EDTF date string - """ - def open_ended(edtf) do - case Regex.named_captures(~r/^(?\.\.)?(?.+?)(?\.\.)?$/, edtf) do - %{"earlier" => "..", "edtf" => result, "later" => ".."} -> - {result, [{:earlier, true}, {:later, true}]} - - %{"earlier" => "..", "edtf" => result} -> - {result, [{:earlier, true}, {:later, false}]} - - %{"edtf" => result, "later" => ".."} -> - {result, [{:earlier, false}, {:later, true}]} - - %{"edtf" => result} -> - {result, [{:earlier, false}, {:later, false}]} - end - end end diff --git a/lib/edtf/aggregate.ex b/lib/edtf/aggregate.ex index 2dc23a1..988610b 100644 --- a/lib/edtf/aggregate.ex +++ b/lib/edtf/aggregate.ex @@ -3,10 +3,6 @@ defmodule EDTF.Aggregate do Parser for EDTF Lists and Sets """ - @matchers list: ~r/^\{(.+)\}$/, set: ~r/^\[(.+)\]$/ - - @valid [EDTF.Date, EDTF.Range] - defstruct type: nil, values: [], level: 2, earlier: false, later: false @type t :: %__MODULE__{ @@ -17,38 +13,21 @@ defmodule EDTF.Aggregate do later: boolean() } - def match?(edtf), do: Enum.any?(@matchers, fn {_, re} -> Regex.match?(re, edtf) end) - - def parse(edtf) do - case Enum.find(@matchers, fn {_, re} -> Regex.match?(re, edtf) end) do - nil -> - EDTF.error() - - {type, re} -> - [_, dates] = Regex.run(re, edtf) - {dates, attributes} = EDTF.open_ended(dates) - - Regex.split(~r/\s*,\s*/, dates) - |> Enum.reduce_while([], &reducer/2) - |> finalize(type, attributes) - end - end - - defp reducer(date, acc) do - case EDTF.parse(date, @valid) do - {:ok, parsed} -> {:cont, [parsed | acc]} - {:error, _error} -> {:halt, :error} - end - end - - defp finalize(:error, _, _), do: EDTF.error() - - defp finalize(values, type, attributes), - do: %__MODULE__{ - type: type, - values: Enum.reverse(values), - earlier: attributes[:earlier], - later: attributes[:later], - level: 2 + def assemble({:list, value}), do: %__MODULE__{assemble(value) | type: :list} + def assemble({:set, value}), do: %__MODULE__{assemble(value) | type: :set} + + def assemble(value) do + dates = + Keyword.get(value, :dates, []) + |> Enum.map(fn + [{:interval, _}] = v -> EDTF.Interval.assemble(v) + v -> EDTF.Date.assemble({:date, v}) + end) + + %__MODULE__{ + values: dates, + earlier: Keyword.get(value, :earlier, false), + later: Keyword.get(value, :later, false) } + end end diff --git a/lib/edtf/date.ex b/lib/edtf/date.ex index cd5656e..69b4eb5 100644 --- a/lib/edtf/date.ex +++ b/lib/edtf/date.ex @@ -3,11 +3,6 @@ defmodule EDTF.Date do Parser for basic EDTF dates, including year, and decade """ - alias EDTF.{Season, Year} - - @matcher ~r/^Y?[~%?]?-?[\dX]+(?:E\d+)?(?:S\d+)?(?:-[~%?]?[\dX]{2})?(?:-[~%?]?[\dX]{2})?[~%?]?$/ - @subtypes [Year, Season] - defstruct type: :date, values: [], level: 0, @@ -31,146 +26,52 @@ defmodule EDTF.Date do } | nil - def match?(edtf), do: Regex.match?(@matcher, edtf) - - def parse(edtf) do - case Enum.find(@subtypes, & &1.match?(edtf)) do - nil -> parse_date(edtf) - mod -> mod.parse(edtf) - end - end - - defp parse_date(edtf) do - {edtf, attributes} = get_attributes(edtf) - - parse_date(edtf, attributes) - |> case do - :error -> EDTF.error() - result -> result - end - end - - defp parse_date(<<"-", val::binary-size(2)>>, attributes) do - {:ok, - %__MODULE__{type: :century, values: [0 - String.to_integer(val)], attributes: attributes}} - end - - defp parse_date(<>, attributes) do - {:ok, %__MODULE__{type: :century, values: [String.to_integer(val)], attributes: attributes}} - end - - defp parse_date(<<"-", val::binary-size(3)>>, attributes) do - {:ok, - %__MODULE__{type: :decade, values: [0 - String.to_integer(val)], attributes: attributes}} - end - - defp parse_date(<>, attributes) do - {:ok, %__MODULE__{type: :decade, values: [String.to_integer(val)], attributes: attributes}} - end - - defp parse_date(edtf, attributes) do - {edtf, masks} = - bitmask(edtf) - - [_, sign, edtf] = Regex.run(~r/^(-?)(.+)$/, edtf) - - {edtf, specificity} = - case String.length(edtf) do - 4 -> {"#{edtf}-01-01", :year} - 7 -> {"#{edtf}-01", :month} - _ -> {edtf, :day} - end - - case Elixir.Date.from_iso8601(sign <> edtf) do - {:ok, %Date{year: year, month: month, day: day}} -> - [year, month - 1, day] |> process_result(specificity, masks, attributes) - - {:error, _} -> - :error - end + def assemble({_, nil}), do: nil + + def assemble({type, value}) when type == :decade or type == :century, + do: %__MODULE__{ + type: type, + values: [Keyword.get(value, :value)], + attributes: Keyword.get(value, :attributes) + } + + def assemble({:year, value}) do + attributes = Keyword.get(value, :attributes, []) + multiplier = 10 ** Keyword.get(attributes, :exponent, 0) + significant = Keyword.get(attributes, :significant) + level = if significant, do: 2, else: 1 + + value = Keyword.get(value, :value) * multiplier + + %__MODULE__{ + type: :year, + values: [value], + attributes: [significant: significant], + level: level + } end - defp process_result(values, specificity, masks, attributes) do - values = - case specificity do - :day -> values - :month -> Enum.take(values, 2) - :year -> Enum.take(values, 1) - end - - attributes = Keyword.merge(attributes, masks) - - {:ok, - %__MODULE__{ - values: values, - attributes: attributes - }} - end - - defp bitmask(edtf) do - {str, _, attrs} = - edtf - |> String.graphemes() - |> Enum.reduce( - {"", 1, [unspecified: 0, approximate: 0, uncertain: 0]}, - fn char, {str, bits, attrs} -> - case char do - "X" -> - {str <> "0", bits * 2, add_bits(attrs, :unspecified, bits)} - - "~" -> - {str, bits, add_bits(attrs, :approximate, bits)} + def assemble({:date, [:infinity]}), do: %EDTF.Infinity{} - "?" -> - {str, bits, add_bits(attrs, :uncertain, bits)} + def assemble({:date, value}) do + values = Keyword.get(value, :values) - "%" -> - {str, bits, add_bits(attrs, :approximate, bits) |> add_bits(:uncertain, bits)} + {type, values} = + case values do + [year, month, day] -> + {:date, [year, month - 1, day]} - "-" -> - {str <> "-", bits, attrs} + [year, month] -> + if month > 12, do: {:season, [year, month]}, else: {:date, [year, month - 1]} - d -> - {str <> d, bits * 2, attrs} - end - end - ) - - {str - |> nonzero_month_and_day(), Keyword.reject(attrs, fn {_, v} -> v == 0 end)} - end - - defp add_bits(attrs, attr, bits) do - bits = - cond do - # unspecified can exist in any place - attr == :unspecified -> bits - # approximate or uncertain year (XXXX-mm-dd) - bits < 15 -> 15 - # approximate or uncertain month (yyyy-XX-dd) - bits < 48 -> 48 - # approximate or uncertain day (yyyy-mm-XX) - bits < 192 -> 192 + [year] -> + {:date, [year]} end - Keyword.update!(attrs, attr, fn v -> v + bits end) - end - - defp nonzero_month_and_day(str), do: String.replace(str, "-00", "-01") - - defp get_attributes(edtf) do - case Regex.named_captures(~r/^(?.+?)(?[~%?])?$/, edtf) do - %{"edtf" => result, "attr" => ""} -> - {result, []} - - %{"edtf" => result, "attr" => "~"} -> - {result, [{:approximate, true}]} - - %{"edtf" => result, "attr" => "%"} -> - {result, [{:approximate, true}, {:uncertain, true}]} - - %{"edtf" => result, "attr" => "?"} -> - {result, [{:uncertain, true}]} - end + %__MODULE__{ + type: type, + values: values, + attributes: Keyword.get(value, :attributes) + } end end diff --git a/lib/edtf/humanize.ex b/lib/edtf/humanize.ex index 38a5366..439a1a0 100644 --- a/lib/edtf/humanize.ex +++ b/lib/edtf/humanize.ex @@ -10,13 +10,10 @@ defmodule EDTF.Humanize do def humanize(nil), do: "Unknown" - def humanize([start_date | [end_date]]), - do: humanize(%EDTF.Interval{start: start_date, end: end_date}) - def humanize(%EDTF.Interval{start: start_date, end: end_date}) do case [start_date, end_date] do - [value | [%EDTF.Infinity{}]] -> "from #{humanize(value)}" - [%EDTF.Infinity{} | [value]] -> "before #{humanize(value)}" + [value, %EDTF.Infinity{}] -> "from #{humanize(value)}" + [%EDTF.Infinity{}, value] -> "before #{humanize(value)}" values -> values |> Enum.map_join(" to ", &humanize/1) end end diff --git a/lib/edtf/infinity.ex b/lib/edtf/infinity.ex index db70c1c..25e3f54 100644 --- a/lib/edtf/infinity.ex +++ b/lib/edtf/infinity.ex @@ -5,9 +5,4 @@ defmodule EDTF.Infinity do defstruct level: 1 @type t :: %__MODULE__{level: integer()} - - def match?(".."), do: true - def match?(_), do: false - def parse(".."), do: {:ok, %__MODULE__{level: 1}} - def parse(_), do: EDTF.error() end diff --git a/lib/edtf/interval.ex b/lib/edtf/interval.ex index 585bc18..b4662e1 100644 --- a/lib/edtf/interval.ex +++ b/lib/edtf/interval.ex @@ -3,45 +3,21 @@ defmodule EDTF.Interval do Parser for EDTF Intervals """ - @matcher ~r"^([^/]+)?/([^/]+)?$" - @valid [EDTF.Date, EDTF.Infinity] - - defstruct start: nil, - end: nil, + defstruct start: :unknown, + end: :unknown, level: 2 @type t :: %__MODULE__{ - start: EDTF.Date.t() | nil, - end: EDTF.Date.t() | nil, + start: EDTF.Date.t() | :unknown, + end: EDTF.Date.t() | :unknown, level: integer() } - def match?(edtf), do: Regex.match?(@matcher, edtf) - - def parse(edtf) do - case Regex.run(@matcher, edtf) do - [_ | values] -> - values - |> Enum.reduce_while([], &reducer/2) - |> case do - :error -> EDTF.error() - values -> {:ok, Enum.reverse(values) |> module()} - end - - _ -> - EDTF.error() - end - end - - defp reducer("", acc), do: {:cont, [nil | acc]} + def assemble([{:interval, value}]), do: assemble({:interval, value}) - defp reducer(date, acc) do - case EDTF.parse(date, @valid) do - {:ok, parsed} -> {:cont, [parsed | acc]} - {:error, _error} -> {:halt, :error} - end + def assemble({:interval, value}) do + start_date = {:date, Keyword.get(value, :start)} |> EDTF.Date.assemble() + end_date = {:date, Keyword.get(value, :end)} |> EDTF.Date.assemble() + %__MODULE__{start: start_date, end: end_date} end - - defp module([start | [stop]]), do: %__MODULE__{start: start, end: stop, level: 2} - defp module([v]), do: module([v, nil]) end diff --git a/lib/edtf/parser.ex b/lib/edtf/parser.ex new file mode 100644 index 0000000..156d3fb --- /dev/null +++ b/lib/edtf/parser.ex @@ -0,0 +1,133 @@ +defmodule EDTF.Parser do + @moduledoc """ + NimbleParsec parser for EDTF dates + """ + + import NimbleParsec + alias EDTF.Parser.Helpers + + # Basic combinators + qualifier = ascii_char([??, ?~, ?%]) + component_qualifier = lookahead_not(qualifier |> concat(eos())) |> concat(qualifier) + digit = ascii_char([?0..?9]) + digit_or_x = ascii_char([?0..?9, ?X]) + sign = ascii_char([?+, ?-]) + year = times(digit_or_x, 4) + month = times(digit_or_x, 2) + day = times(digit_or_x, 2) + + # Signed year with optional qualifier + qualified_year = + optional(component_qualifier |> tag(:qualifier)) + |> concat(optional(sign) |> tag(:sign)) + |> concat(year |> tag(:value)) + |> post_traverse({Helpers, :bitmask, [0]}) + + # Month with optional qualifier + qualified_month = + optional(component_qualifier |> tag(:qualifier)) + |> concat(month |> tag(:value)) + |> post_traverse({Helpers, :bitmask, [4]}) + + # Day with optional qualifier + qualified_day = + optional(component_qualifier |> tag(:qualifier)) + |> concat(day |> tag(:value)) + |> post_traverse({Helpers, :bitmask, [6]}) + + # Basic [-]YYYY[-MM[-DD]] with optional qualifiers + edtf_date = + qualified_year + |> optional(ignore(string("-")) |> concat(qualified_month)) + |> optional(ignore(string("-")) |> concat(qualified_day)) + |> optional(tag(qualifier, :qualifier)) + |> post_traverse({Helpers, :reduce, []}) + + # Continuation / Range Operator (..) + continuation = times(ascii_char([?.]), 2) |> replace(true) + + # Range ([date]..[date]) + range = + tag(edtf_date, :start) + |> concat(ignore(continuation)) + |> concat(tag(edtf_date, :end)) + + # Aggregates (Sets, Lists, and Intervals) + aggregate_item = choice([tag(range, :interval), edtf_date]) |> wrap() + aggregate_separator = ignore(string(",")) |> ignore(optional(repeat(ascii_char(~c" ")))) + + aggregate_values = + optional(continuation |> unwrap_and_tag(:earlier)) + |> concat( + aggregate_item + |> repeat(aggregate_separator |> concat(aggregate_item)) + |> tag(:dates) + ) + |> concat(optional(continuation |> unwrap_and_tag(:later))) + + edtf_interval = + optional(choice([continuation |> replace(:infinity), edtf_date]) |> tag(:start)) + |> ignore(ascii_char([?/])) + |> optional(choice([continuation |> replace(:infinity), edtf_date]) |> tag(:end)) + + edtf_list = + ignore(ascii_char([?{])) + |> concat(aggregate_values) + |> concat(ignore(ascii_char([?}]))) + + edtf_set = + ignore(ascii_char([?[])) + |> concat(aggregate_values) + |> concat(ignore(ascii_char([?]]))) + + # Level 0 Century and Decade + signed_integer = fn digits -> + optional(sign |> tag(:sign)) + |> concat( + times(digit, digits) + |> post_traverse({Helpers, :to_integer, []}) + |> unwrap_and_tag(:value) + |> wrap() + ) + |> concat(optional(qualifier) |> tag(:qualifier)) + |> post_traverse({Helpers, :apply_sign, []}) + |> post_traverse({Helpers, :apply_qualifier, []}) + end + + edtf_century = signed_integer.(2) + edtf_decade = signed_integer.(3) + + # Level 2 Years with optional exponents and significant digits + exponent = ignore(ascii_char([?E])) |> concat(integer(min: 1) |> unwrap_and_tag(:exponent)) + + significant = + ignore(ascii_char([?S])) |> concat(integer(min: 1) |> unwrap_and_tag(:significant)) + + qualified_year = + optional(sign |> tag(:sign)) + |> concat(integer(min: 1) |> unwrap_and_tag(:value)) + |> post_traverse({Helpers, :apply_sign, []}) + + edtf_year = + choice([ + optional(ignore(ascii_char([?Y]))) |> concat(qualified_year), + lookahead(choice([exponent, significant])) |> concat(qualified_year) + ]) + |> tag( + optional(exponent) |> concat(optional(significant)), + :attributes + ) + + defparsec( + :parse, + choice([ + tag(edtf_date, :date) |> eos(), + tag(edtf_century, :century) |> eos(), + tag(edtf_decade, :decade) |> eos(), + tag(edtf_year, :year) |> eos(), + tag(edtf_interval, :interval) |> eos(), + tag(edtf_list, :list) |> eos(), + tag(edtf_set, :set) |> eos() + ]) + ) +end diff --git a/lib/edtf/parser/helpers.ex b/lib/edtf/parser/helpers.ex new file mode 100644 index 0000000..80ccf0f --- /dev/null +++ b/lib/edtf/parser/helpers.ex @@ -0,0 +1,204 @@ +defmodule EDTF.Parser.Helpers do + @moduledoc """ + Helper functions for parsing EDTF dates + """ + + import Bitwise + + @qualifier_attributes %{ + ~c"~" => [:approximate], + ~c"?" => [:uncertain], + ~c"%" => [:approximate, :uncertain] + } + + @doc """ + Calculate the appropriate qualifier bitmasks for a given YYYY, MM, or DD. Bits + are calculated from the left and shifted left to account for the specific + component. + + - The digits of YYYY are 1, 2, 4, 8 + - The digits of MM are 16, 32 + - The digits of DD are 64, 128 + + A full component qualifier (leading `~`, `?`, or `%`) results in the component + being fully masked (15 for year, 48 for month, or 192 for day). Unspecified digits + (`X`) flip individual bits. + + Example: + ```elixir + iex> bitmask("-%02", [value: ~c"200X", sign: ~c"-"], %{}, nil, nil, 0) + {"-%02", [[value: -2000, attributes: [unspecified: 8]]], %{}} + + iex> bitmask("", [value: ~c"02", qualifier: ~c"%"], %{}, nil, nil, 4) + {"", [[value: 2, attributes: [approximate: 48, uncertain: 48]]], %{}} + ``` + """ + def bitmask(rest, value, context, _line, _offset, shift) do + {rest, + [ + bitmask( + Keyword.get(value, :value), + Keyword.get(value, :sign, ~c""), + Keyword.get(value, :qualifier, ~c""), + shift + ) + ], context} + end + + def bitmask(bitstring, sign, [], shift) do + {output, mask} = + bitstring + |> Enum.with_index() + |> Enum.reduce({~c"", 0}, fn + {?X, index}, {output, mask} -> + char = if output == ~c"0" and shift > 0 and index > 0, do: "1", else: "0" + {[char | output], mask + 2 ** index} + + {char, _}, {output, mask} -> + {[char | output], mask} + end) + + output = Enum.reverse(output) + + {output, mask} = + {[sign | output] + |> IO.iodata_to_binary() + |> String.to_integer(), mask <<< shift} + + case mask do + 0 -> [value: output] + mask -> [value: output, attributes: [unspecified: mask]] + end + end + + def bitmask(bitstring, sign, qualifier, shift) do + mask = ((1 <<< length(bitstring)) - 1) <<< shift + + attributes = + Map.get(@qualifier_attributes, qualifier) + |> Enum.map(&{&1, mask}) + + [ + value: IO.iodata_to_binary([sign | bitstring]) |> String.to_integer(), + attributes: attributes + ] + end + + @doc """ + Apply a parsed sign to a parsed integer value. + + Example: + ```elixir + iex> apply_sign("", [value: 2000, sign: ~c"-"], %{}, nil, nil) + {"", [value: -2000], %{}} + + iex> apply_sign("", [value: 2000], %{}, nil, nil) + {"", [value: 2000], %{}} + ``` + """ + def apply_sign(rest, value, context, _line, _offset) do + value = List.flatten(value) + + result = + case Keyword.get(value, :sign) do + ~c"-" -> 0 - Keyword.get(value, :value) + _ -> Keyword.get(value, :value) + end + + {rest, value |> Keyword.delete(:sign) |> Keyword.put(:value, result), context} + end + + @doc """ + Apply a parsed qualifier to a single value. + + Example: + ```elixir + iex> apply_qualifier("", [value: 2000, qualifier: ~c"%"], %{}, nil, nil) + {"", [attributes: [approximate: true, uncertain: true], value: 2000], %{}} + + iex> apply_qualifier("", [value: 2000], %{}, nil, nil) + {"", [attributes: [], value: 2000], %{}} + ``` + """ + def apply_qualifier(rest, value, context, _line, _offset) do + qualifier = Keyword.get(value, :qualifier) + + attributes = + Map.get(@qualifier_attributes, qualifier, []) + |> Enum.map(&{&1, true}) + + {rest, Keyword.delete(value, :qualifier) |> Keyword.put(:attributes, attributes), context} + end + + @doc """ + Convert a parsed numeric bitstring to an integer + + Example: + ```elixir + iex> to_integer("", ~c"4321", %{}, nil, nil) + {"", [1234], %{}} + """ + def to_integer(rest, value, context, _line, _offset) do + {rest, [value |> Enum.reverse() |> to_string() |> String.to_integer()], context} + end + + @doc """ + Reduce a list of components and bitmasks to a single list of values with + their mask attributes ORed together. + + Example: + ```elixir + iex> reduce("", [ + ...> [value: 10, attributes: [unspecified: 128]], + ...> [value: 1, attributes: [unspecified: 32]], + ...> [value: 0, attributes: [unspecified: 5]] + ...> ], %{}, nil, nil) + {"", [values: [0, 1, 10], attributes: [unspecified: 165]], %{}} + + iex> reduce("", [ + ...> [value: 10, attributes: [unspecified: 128]], + ...> [value: 1, attributes: [approximate: 48]], + ...> [value: 0, attributes: [approximate: 15, uncertain: 15]] + ...> ], %{}, nil, nil) + {"", [values: [0, 1, 10], attributes: [unspecified: 128, approximate: 63, uncertain: 15]], %{}} + """ + def reduce(rest, values, context, _line, _offset) do + case reduce(Keyword.get(values, :qualifier), Enum.reject(values, &is_tuple/1)) do + {:error, reason} -> {:error, reason} + values -> {rest, values, context} + end + end + + defp reduce(nil, values) do + {values, attributes} = + Enum.reduce(values, {[], []}, fn member, {values_acc, attrs_acc} -> + value = Keyword.get(member, :value) + attrs = Keyword.get(member, :attributes, []) + new_values_acc = [value | values_acc] + + new_attrs_acc = + Enum.reduce(attrs, attrs_acc, fn {key, attr_value}, acc -> + Keyword.update(acc, key, attr_value, &(&1 ||| attr_value)) + end) + |> Enum.reject(fn {_, v} -> v == 0 end) + + {new_values_acc, new_attrs_acc} + end) + + [values: values, attributes: attributes] + end + + defp reduce(qualifier, values) do + if Enum.all?(values, fn v -> Keyword.get(v, :attributes, []) |> length() == 0 end) do + attributes = + Map.get(@qualifier_attributes, qualifier, []) + |> Enum.map(&{&1, true}) + + values = Enum.reduce(values, [], fn value, acc -> [Keyword.get(value, :value) | acc] end) + + [values: values, attributes: attributes] + else + {:error, "Cannot mix level 0 and level 2 qualifiers"} + end + end +end diff --git a/lib/edtf/range.ex b/lib/edtf/range.ex deleted file mode 100644 index 07172a8..0000000 --- a/lib/edtf/range.ex +++ /dev/null @@ -1,26 +0,0 @@ -defmodule EDTF.Range do - @moduledoc """ - Parser for EDTF Ranges - """ - - @matcher ~r"^([^/]+)\.\.([^/]+)$" - @valid [EDTF.Date] - - def match?(edtf), do: Regex.match?(@matcher, edtf) - - def parse(edtf) do - case Regex.run(@matcher, edtf) do - [_, start, stop] -> - case {EDTF.parse(start, @valid), EDTF.parse(stop, @valid)} do - {{:ok, start_date}, {:ok, stop_date}} -> - {:ok, [start_date, stop_date]} - - _ -> - EDTF.error() - end - - _ -> - EDTF.error() - end - end -end diff --git a/lib/edtf/season.ex b/lib/edtf/season.ex deleted file mode 100644 index fc7fdb6..0000000 --- a/lib/edtf/season.ex +++ /dev/null @@ -1,29 +0,0 @@ -defmodule EDTF.Season do - @moduledoc """ - Parser for EDTF Seasons - """ - - @matcher ~r/^(?-?\d{4})-(?\d{2})$/ - @seasons ~w(21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41) - - def match?(edtf) do - case Regex.named_captures(@matcher, edtf) do - nil -> false - %{"season" => season} -> Enum.member?(@seasons, season) - end - end - - def parse(edtf) do - case Regex.named_captures(@matcher, edtf) do - nil -> - EDTF.error() - - %{"year" => year, "season" => season} -> - {:ok, - %EDTF.Date{ - type: :season, - values: [String.to_integer(year), String.to_integer(season)] - }} - end - end -end diff --git a/lib/edtf/year.ex b/lib/edtf/year.ex deleted file mode 100644 index 9bae1f4..0000000 --- a/lib/edtf/year.ex +++ /dev/null @@ -1,42 +0,0 @@ -defmodule EDTF.Year do - @moduledoc """ - Parser for EDTF Level 1 Years - """ - - @matcher ~r/^Y(?-?\d+)(?:E(?\d+))?(?:S(?\d+))?$/ - - def match?(edtf), do: Regex.match?(@matcher, edtf) - - def parse(edtf) do - Regex.named_captures(@matcher, edtf) - |> calculate() - |> case do - :error -> EDTF.error() - result -> result - end - end - - defp calculate(%{"year" => year, "exponent" => "", "significant" => significant}), - do: - {:ok, - %EDTF.Date{type: :year, values: [String.to_integer(year)], level: 1} - |> add_significance(significant)} - - defp calculate(%{"year" => year, "exponent" => exponent, "significant" => significant}) do - {:ok, - %EDTF.Date{ - type: :year, - values: [String.to_integer(year) * 10 ** String.to_integer(exponent)], - level: 2 - } - |> add_significance(significant)} - end - - defp calculate(_), do: :error - - defp add_significance(result, ""), do: result - - defp add_significance(result, v) do - %EDTF.Date{result | level: 2, attributes: [{:significant, String.to_integer(v)}]} - end -end diff --git a/mix.exs b/mix.exs index d4a2d6c..81f3325 100644 --- a/mix.exs +++ b/mix.exs @@ -51,7 +51,8 @@ defmodule EDTF.MixProject do {:credo, "~> 1.7", only: [:dev, :test], runtime: false}, {:ex_doc, "~> 0.34", only: [:dev, :test, :docs], runtime: false}, {:excoveralls, "~> 0.18", only: [:dev, :test], runtime: false}, - {:inflex, "~> 2.1"} + {:inflex, "~> 2.1"}, + {:nimble_parsec, "~> 1.4"} ] end diff --git a/test/edtf/date_test.exs b/test/edtf/date_test.exs index 6f87d38..81482f6 100644 --- a/test/edtf/date_test.exs +++ b/test/edtf/date_test.exs @@ -123,6 +123,46 @@ defmodule EDTF.DateTest do refute subject.attributes[:uncertain] assert subject.attributes[:unspecified] == 165 end + + @tag edtf: "201?" + test "decade", %{subject: subject} do + assert subject.type == :decade + assert subject.values == [201] + assert subject.level == 1 + refute subject.attributes[:approximate] + assert subject.attributes[:uncertain] + refute subject.attributes[:unspecified] + end + + @tag edtf: "-201~" + test "negative decade", %{subject: subject} do + assert subject.type == :decade + assert subject.values == [-201] + assert subject.level == 1 + assert subject.attributes[:approximate] + refute subject.attributes[:uncertain] + refute subject.attributes[:unspecified] + end + + @tag edtf: "20%" + test "century", %{subject: subject} do + assert subject.type == :century + assert subject.values == [20] + assert subject.level == 1 + assert subject.attributes[:approximate] + assert subject.attributes[:uncertain] + refute subject.attributes[:unspecified] + end + + @tag edtf: "-20?" + test "negative century", %{subject: subject} do + assert subject.type == :century + assert subject.values == [-20] + assert subject.level == 1 + refute subject.attributes[:approximate] + assert subject.attributes[:uncertain] + refute subject.attributes[:unspecified] + end end describe "significant digits" do diff --git a/test/edtf/level_test.exs b/test/edtf/level_test.exs new file mode 100644 index 0000000..500cb3b --- /dev/null +++ b/test/edtf/level_test.exs @@ -0,0 +1,15 @@ +defmodule EDTF.LevelTest do + use ExUnit.Case + alias EDTF.Level + + describe "add_level/1" do + test "errors pass through" do + assert {:error, :no_level} |> Level.add_level() == {:error, :no_level} + end + + test "status-wrapped value" do + date = EDTF.parse("2024") + assert Level.add_level(date) == date + end + end +end diff --git a/test/edtf/parser_test.exs b/test/edtf/parser_test.exs new file mode 100644 index 0000000..c1f2c8c --- /dev/null +++ b/test/edtf/parser_test.exs @@ -0,0 +1,6 @@ +defmodule EDTF.ParserTest do + use ExUnit.Case + import ExUnit.DocTest + + doctest EDTF.Parser.Helpers, import: true +end diff --git a/test/edtf_test.exs b/test/edtf_test.exs index 511ff96..8b837bd 100644 --- a/test/edtf_test.exs +++ b/test/edtf_test.exs @@ -13,5 +13,6 @@ defmodule EDTFTest do test "parse/1" do assert EDTF.parse("2020") == {:ok, %EDTF.Date{level: 0, values: [2020]}} assert EDTF.parse("bad date!") == {:error, :invalid_format} + assert EDTF.parse("2020-%06-25?") == {:error, :invalid_format} end end diff --git a/test/error_test.exs b/test/error_test.exs deleted file mode 100644 index 6786bb1..0000000 --- a/test/error_test.exs +++ /dev/null @@ -1,20 +0,0 @@ -defmodule EDTF.ErrorTest do - use ExUnit.Case - - test "edge cases" do - refute EDTF.Infinity.match?("") - assert EDTF.Infinity.parse("") == {:error, :invalid_format} - end - - test "parser errors" do - assert EDTF.Date.parse("bad!") == {:error, :invalid_format} - assert EDTF.Aggregate.parse("bad!") == {:error, :invalid_format} - assert EDTF.Aggregate.parse("[bad!]") == {:error, :invalid_format} - assert EDTF.Range.parse("bad!") == {:error, :invalid_format} - assert EDTF.Range.parse("1000..bad!") == {:error, :invalid_format} - assert EDTF.Interval.parse("bad!") == {:error, :invalid_format} - assert EDTF.Interval.parse("2024/bad!") == {:error, :invalid_format} - assert EDTF.Season.parse("2024-bad!") == {:error, :invalid_format} - assert EDTF.Year.parse("bad!") == {:error, :invalid_format} - end -end