From 3882fa38a328b2a21bd8bce6973fa96a4931141b Mon Sep 17 00:00:00 2001 From: Marco Cusumano-Towner Date: Fri, 5 Mar 2021 16:50:59 -0500 Subject: [PATCH 1/5] Add a couple docstrings --- src/distributions/maybe_swap.jl | 5 +++++ src/distributions/time_prior.jl | 7 +++++++ 2 files changed, 12 insertions(+) diff --git a/src/distributions/maybe_swap.jl b/src/distributions/maybe_swap.jl index e4cdbc8..2514ccd 100644 --- a/src/distributions/maybe_swap.jl +++ b/src/distributions/maybe_swap.jl @@ -1,3 +1,8 @@ +""" + MaybeSwap(val, options, prob) + +With probability prob, return a random element from options, otherwise return val. +""" struct MaybeSwap <: PCleanDistribution end supports_explicitly_missing_observations(::MaybeSwap) = true diff --git a/src/distributions/time_prior.jl b/src/distributions/time_prior.jl index c22f6f5..58a4292 100644 --- a/src/distributions/time_prior.jl +++ b/src/distributions/time_prior.jl @@ -1,5 +1,12 @@ using CSV +""" + TimePrior( + +Return a random time stamp of form @sprintf("%d:%02d %s", hours, minutes, ampm). + +The hours, minutes and ampm are drawn uniformly from {1, .., 12}, {0, .., 59}, and {"a.m.", "p.m."} respectively. +""" struct TimePrior <: PCleanDistribution end has_discrete_proposal(::TimePrior) = true From 813a5470e45fe82edefcd9e65da3664dadb76c4a Mon Sep 17 00:00:00 2001 From: Marco Cusumano-Towner Date: Fri, 5 Mar 2021 18:49:44 -0500 Subject: [PATCH 2/5] Add docstring for StringPrior --- src/distributions/string_prior.jl | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/distributions/string_prior.jl b/src/distributions/string_prior.jl index 7270b81..2cdd1e8 100644 --- a/src/distributions/string_prior.jl +++ b/src/distributions/string_prior.jl @@ -1,5 +1,13 @@ using CSV +""" + str::String ~ StringPrior(min_length, max_length, proposal_atoms::Vector{String}) + +Sample a string of random length froma simple bigram model fit to English text. + +The string length is uniformly distributed between min_length and max_length (inclusive). +The alphabet is {'a', 'b', .., 'z', ' ', '.'}. +""" struct StringPrior <: PCleanDistribution end letter_probs_file = joinpath(dirname(pathof(PClean)), "distributions", "lmparams", "letter_probabilities.csv") From 84e8b45684b9f3a7e8caef91c3a2246aaef699bc Mon Sep 17 00:00:00 2001 From: Marco Cusumano-Towner Date: Fri, 5 Mar 2021 19:17:00 -0500 Subject: [PATCH 3/5] Add dosctring for AddTypos --- src/distributions/add_typos.jl | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/src/distributions/add_typos.jl b/src/distributions/add_typos.jl index 8ffcc4e..eed0246 100644 --- a/src/distributions/add_typos.jl +++ b/src/distributions/add_typos.jl @@ -1,5 +1,26 @@ import StringDistances: DamerauLevenshtein, evaluate +""" + word_with_typos::String ~ AddTypos(word::String, max_typos=nothing) + +Add a random number of random typos to the given string. + +The distribution on the of typos added to a word depends on the word +length. On average there is approximately 1 typo for every 45 characters in the +input word when max_typos is large or not provided. + +The typos can be one of several types: + +- insertion: insert a random lower-case letter at a random location + +- deletion: delete a random character + +- substitution: replace a random character with a random lower-case letter + +- transpose: swap a random pair of two consecutive letters + +NOTE: The log-density is approximate +""" struct AddTypos <: PCleanDistribution end has_discrete_proposal(::AddTypos) = false From a61034c6eeaf4d66ffc327c8648f85da30d334eb Mon Sep 17 00:00:00 2001 From: Marco Cusumano-Towner Date: Fri, 5 Mar 2021 20:58:01 -0500 Subject: [PATCH 4/5] Improve docstrings --- src/distributions/add_noise.jl | 5 +++++ src/distributions/add_typos.jl | 2 +- src/distributions/maybe_swap.jl | 2 +- src/distributions/string_prior.jl | 4 ++-- src/distributions/time_prior.jl | 6 +++--- 5 files changed, 12 insertions(+), 7 deletions(-) diff --git a/src/distributions/add_noise.jl b/src/distributions/add_noise.jl index f4f2a8b..e58b342 100644 --- a/src/distributions/add_noise.jl +++ b/src/distributions/add_noise.jl @@ -1,3 +1,8 @@ +""" + noisy_value::Float64 ~ AddNoise(mean::Float64, std::Float64) + +Adds normally-distributed random noise (with standard deviation `std`) to the value `std`. +""" struct AddNoise <: PCleanDistribution end has_discrete_proposal(::AddNoise) = false diff --git a/src/distributions/add_typos.jl b/src/distributions/add_typos.jl index eed0246..75078ef 100644 --- a/src/distributions/add_typos.jl +++ b/src/distributions/add_typos.jl @@ -7,7 +7,7 @@ Add a random number of random typos to the given string. The distribution on the of typos added to a word depends on the word length. On average there is approximately 1 typo for every 45 characters in the -input word when max_typos is large or not provided. +input word when `max_typos` is large or not provided. The typos can be one of several types: diff --git a/src/distributions/maybe_swap.jl b/src/distributions/maybe_swap.jl index 2514ccd..a23f7a3 100644 --- a/src/distributions/maybe_swap.jl +++ b/src/distributions/maybe_swap.jl @@ -1,7 +1,7 @@ """ MaybeSwap(val, options, prob) -With probability prob, return a random element from options, otherwise return val. +With probability `prob`, return a random element from `options`, otherwise return `val`. """ struct MaybeSwap <: PCleanDistribution end diff --git a/src/distributions/string_prior.jl b/src/distributions/string_prior.jl index 2cdd1e8..d4a88e0 100644 --- a/src/distributions/string_prior.jl +++ b/src/distributions/string_prior.jl @@ -5,8 +5,8 @@ using CSV Sample a string of random length froma simple bigram model fit to English text. -The string length is uniformly distributed between min_length and max_length (inclusive). -The alphabet is {'a', 'b', .., 'z', ' ', '.'}. +The string length is uniformly distributed between `min_length` and `max_length` (inclusive). +The alphabet is the set {'a', 'b', .., 'z', ' ', '.'}. """ struct StringPrior <: PCleanDistribution end diff --git a/src/distributions/time_prior.jl b/src/distributions/time_prior.jl index 58a4292..d2e9b07 100644 --- a/src/distributions/time_prior.jl +++ b/src/distributions/time_prior.jl @@ -1,11 +1,11 @@ using CSV """ - TimePrior( + timestamp::String ~ TimePrior(proposal_atoms::Vector{String}) -Return a random time stamp of form @sprintf("%d:%02d %s", hours, minutes, ampm). +Return a random time stamp of form `@sprintf("%d:%02d %s", hours, minutes, ampm)`. -The hours, minutes and ampm are drawn uniformly from {1, .., 12}, {0, .., 59}, and {"a.m.", "p.m."} respectively. +The `hours`, `minutes` and `ampm` are drawn uniformly from {1, .., 12}, {0, .., 59}, and {"a.m.", "p.m."} respectively. """ struct TimePrior <: PCleanDistribution end From b7056301a3dc16ba53d393e4043145b87e5ffd2c Mon Sep 17 00:00:00 2001 From: Marco Cusumano-Towner Date: Fri, 5 Mar 2021 20:59:29 -0500 Subject: [PATCH 5/5] Fixes --- src/distributions/add_noise.jl | 2 +- src/distributions/add_typos.jl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/distributions/add_noise.jl b/src/distributions/add_noise.jl index e58b342..433ec92 100644 --- a/src/distributions/add_noise.jl +++ b/src/distributions/add_noise.jl @@ -1,7 +1,7 @@ """ noisy_value::Float64 ~ AddNoise(mean::Float64, std::Float64) -Adds normally-distributed random noise (with standard deviation `std`) to the value `std`. +Adds normally-distributed random noise (with standard deviation `std`) to the value `mean`. """ struct AddNoise <: PCleanDistribution end diff --git a/src/distributions/add_typos.jl b/src/distributions/add_typos.jl index 75078ef..9e025c0 100644 --- a/src/distributions/add_typos.jl +++ b/src/distributions/add_typos.jl @@ -3,7 +3,7 @@ import StringDistances: DamerauLevenshtein, evaluate """ word_with_typos::String ~ AddTypos(word::String, max_typos=nothing) -Add a random number of random typos to the given string. +Add a random number of random typos to `word`. The distribution on the of typos added to a word depends on the word length. On average there is approximately 1 typo for every 45 characters in the