From b1af3f5f756095dc4ec6b3dc2da7b6affad60217 Mon Sep 17 00:00:00 2001
From: Mikkel Schubert <MikkelSch@gmail.com>
Date: Tue, 24 Nov 2020 14:22:48 +0100
Subject: [PATCH 01/52] reformat using black

---
 croissance/__init__.py                      |  10 +-
 croissance/__main__.py                      | 105 +++++++++++-------
 croissance/estimation/__init__.py           | 111 ++++++++++++-------
 croissance/estimation/defaults.py           |   9 +-
 croissance/estimation/outliers.py           |  15 ++-
 croissance/estimation/ranking.py            |  19 +++-
 croissance/estimation/regression.py         |  35 +++---
 croissance/estimation/smoothing/__init__.py |  10 +-
 croissance/estimation/smoothing/segments.py |  18 +--
 croissance/estimation/util.py               |  25 +++--
 croissance/figures/__init__.py              | 115 +++++++++++++-------
 croissance/formats/input.py                 |   6 +-
 croissance/formats/output.py                |  26 +++--
 setup.py                                    |  48 ++++----
 test.py                                     |  82 ++++++++------
 15 files changed, 393 insertions(+), 241 deletions(-)

diff --git a/croissance/__init__.py b/croissance/__init__.py
index f535c77..a365d3c 100644
--- a/croissance/__init__.py
+++ b/croissance/__init__.py
@@ -1,15 +1,13 @@
 from croissance.estimation import Estimator
 from collections import namedtuple
 
-AnnotatedGrowthCurve = namedtuple('AnnotatedGrowthCurve', ('series', 'outliers', 'growth_phases'))
+AnnotatedGrowthCurve = namedtuple(
+    "AnnotatedGrowthCurve", ("series", "outliers", "growth_phases")
+)
 
 
-def process_curve(curve: 'pandas.Series', **kwargs):
+def process_curve(curve: "pandas.Series", **kwargs):
     estimator = Estimator(**kwargs)
     if curve.isnull().all():
         return AnnotatedGrowthCurve(curve, [], [])
     return estimator.growth(curve)
-
-
-
-
diff --git a/croissance/__main__.py b/croissance/__main__.py
index 1bc9b20..fb66ca2 100644
--- a/croissance/__main__.py
+++ b/croissance/__main__.py
@@ -12,68 +12,86 @@
 from croissance.formats.output import TSVWriter
 
 parser = argparse.ArgumentParser(description="Estimate growth rates in growth curves")
-parser.add_argument('infiles', type=argparse.FileType('r'), nargs='+')
-parser.add_argument('--input-format', type=str, default='tsv')
-parser.add_argument('--input-time-unit',
-                    choices=['hours', 'minutes'],
-                    default='hours',
-                    help="Time unit in time column")
-parser.add_argument('--N0',
-                    type=float,
-                    default=0.0,
-                    help="Baseline for constrained regressions & identifying curve segments in log space.")
-parser.add_argument('--constrain-N0',
-                    action='store_true',
-                    help="All growth phases will begin at N0 when this flag is set.")
-parser.add_argument('--segment-log-N0',
-                    action='store_true',
-                    help="Identify curve segments using log(N-N0) rather than N; "
-                         "increases sensitivity to changes in exponential growth but has to assume a certain N0.")
-parser.add_argument('--output-format', type=str, default='tsv')
-parser.add_argument('--output-suffix', type=str, default='.output')
-parser.add_argument('--output-exclude-default-phase',
-                    type=bool,
-                    default=False,
-                    help="The output will not include a phase '0' for each curve when this flag is set.")
-parser.add_argument('--figures',
-                    action='store_true',
-                    help='Renders a PDF file with figures for each curve.')
+parser.add_argument("infiles", type=argparse.FileType("r"), nargs="+")
+parser.add_argument("--input-format", type=str, default="tsv")
+parser.add_argument(
+    "--input-time-unit",
+    choices=["hours", "minutes"],
+    default="hours",
+    help="Time unit in time column",
+)
+parser.add_argument(
+    "--N0",
+    type=float,
+    default=0.0,
+    help="Baseline for constrained regressions & identifying curve segments in log space.",
+)
+parser.add_argument(
+    "--constrain-N0",
+    action="store_true",
+    help="All growth phases will begin at N0 when this flag is set.",
+)
+parser.add_argument(
+    "--segment-log-N0",
+    action="store_true",
+    help="Identify curve segments using log(N-N0) rather than N; "
+    "increases sensitivity to changes in exponential growth but has to assume a certain N0.",
+)
+parser.add_argument("--output-format", type=str, default="tsv")
+parser.add_argument("--output-suffix", type=str, default=".output")
+parser.add_argument(
+    "--output-exclude-default-phase",
+    type=bool,
+    default=False,
+    help="The output will not include a phase '0' for each curve when this flag is set.",
+)
+parser.add_argument(
+    "--figures",
+    action="store_true",
+    help="Renders a PDF file with figures for each curve.",
+)
 
 
 def main():
     args = parser.parse_args()
 
-    if args.input_format.lower() != 'tsv':
+    if args.input_format.lower() != "tsv":
         raise NotImplementedError()
 
-    if args.output_format.lower() != 'tsv':
+    if args.output_format.lower() != "tsv":
         raise NotImplementedError()
 
     reader = TSVReader()
 
-    estimator = Estimator(constrain_n0=args.constrain_N0,
-                          segment_log_n0=args.segment_log_N0,
-                          n0=args.N0)
+    estimator = Estimator(
+        constrain_n0=args.constrain_N0, segment_log_n0=args.segment_log_N0, n0=args.N0
+    )
 
     try:
         empties = {}
-        for infile in tqdm(args.infiles, unit='infile'):
-            outfile = open('{}{}.tsv'.format(infile.name[:-4], args.output_suffix), 'w')
+        for infile in tqdm(args.infiles, unit="infile"):
+            outfile = open("{}{}.tsv".format(infile.name[:-4], args.output_suffix), "w")
 
             if args.figures:
-                figfile = open('{}{}.pdf'.format(infile.name[:-4], args.output_suffix), 'wb')
+                figfile = open(
+                    "{}{}.pdf".format(infile.name[:-4], args.output_suffix), "wb"
+                )
                 figwriter = PDFWriter(figfile)
 
-            outwriter = TSVWriter(outfile, include_default_phase=not args.output_exclude_default_phase)
+            outwriter = TSVWriter(
+                outfile, include_default_phase=not args.output_exclude_default_phase
+            )
 
-            for name, curve in tqdm(list(reader.read(infile)), unit='curve'):
+            for name, curve in tqdm(list(reader.read(infile)), unit="curve"):
                 if curve.empty:
                     try:
                         empties[infile.name].append(name)
                     except KeyError:
                         empties[infile.name] = [name]
                     continue
-                annotated_curve = estimator.growth(normalize_time_unit(curve, args.input_time_unit))
+                annotated_curve = estimator.growth(
+                    normalize_time_unit(curve, args.input_time_unit)
+                )
 
                 outwriter.write(name, annotated_curve)
 
@@ -88,10 +106,19 @@ def main():
         pass
 
     if empties != {}:
-        print('\nEmpty cells were found and discarded:\n', '\n'.join([(infile.name+'\t'+name) for key, names in empties.items() for name in names]))
+        print(
+            "\nEmpty cells were found and discarded:\n",
+            "\n".join(
+                [
+                    (infile.name + "\t" + name)
+                    for key, names in empties.items()
+                    for name in names
+                ]
+            ),
+        )
     else:
         print()
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     sys.exit(main())
diff --git a/croissance/estimation/__init__.py b/croissance/estimation/__init__.py
index d54dd53..a132d29 100644
--- a/croissance/estimation/__init__.py
+++ b/croissance/estimation/__init__.py
@@ -5,17 +5,28 @@
 import numpy
 import pandas
 
-from croissance.estimation.defaults import MINIMUM_VALID_OD, RESAMPLE_POINTS_PER_HOUR, MINIMUM_VALID_SLOPE, \
-    MINIMUM_PHASE_LENGTH, PHASE_MIN_DELTA_LOG_OD, CURVE_MINIMUM_DURATION_HOURS
+from croissance.estimation.defaults import (
+    MINIMUM_VALID_OD,
+    RESAMPLE_POINTS_PER_HOUR,
+    MINIMUM_VALID_SLOPE,
+    MINIMUM_PHASE_LENGTH,
+    PHASE_MIN_DELTA_LOG_OD,
+    CURVE_MINIMUM_DURATION_HOURS,
+)
 from croissance.estimation.outliers import remove_outliers
 from croissance.estimation.ranking import rank_phases
 from croissance.estimation.regression import fit_exponential
 from croissance.estimation.smoothing import median_smoothing
 from croissance.estimation.smoothing.segments import segment_spline_smoothing
-from croissance.estimation.util import savitzky_golay, resample, with_overhangs, points_per_hour
+from croissance.estimation.util import (
+    savitzky_golay,
+    resample,
+    with_overhangs,
+    points_per_hour,
+)
 
 
-class RawGrowthPhase(namedtuple('RawGrowthPhase', ('start', 'end'))):
+class RawGrowthPhase(namedtuple("RawGrowthPhase", ("start", "end"))):
     __slots__ = ()
 
     @property
@@ -23,14 +34,11 @@ def duration(self):
         return self.end - self.start
 
 
-class GrowthPhase(namedtuple('GrowthPhase', (
-        'start',
-        'end',
-        'slope',
-        'intercept',
-        'n0',
-        'attributes'
-))):
+class GrowthPhase(
+    namedtuple(
+        "GrowthPhase", ("start", "end", "slope", "intercept", "n0", "attributes")
+    )
+):
     __slots__ = ()
 
     @property
@@ -38,7 +46,7 @@ def duration(self):
         return self.end - self.start
 
     @classmethod
-    def pick_best(cls, growth_phases, metric='duration'):
+    def pick_best(cls, growth_phases, metric="duration"):
         try:
             return sorted(growth_phases, key=attrgetter(metric), reverse=True)[0]
         except IndexError:
@@ -48,21 +56,27 @@ def __getattr__(self, item):
         return self.attributes.get(item, None)
 
 
-AnnotatedGrowthCurve = namedtuple('AnnotatedGrowthCurve', ('series', 'outliers', 'growth_phases'))
+AnnotatedGrowthCurve = namedtuple(
+    "AnnotatedGrowthCurve", ("series", "outliers", "growth_phases")
+)
 
 
 class Estimator(object):
-    def __init__(self,
-                 *,
-                 segment_log_n0: bool = False,
-                 constrain_n0: bool = False,
-                 n0: float = 0.0):
+    def __init__(
+        self,
+        *,
+        segment_log_n0: bool = False,
+        constrain_n0: bool = False,
+        n0: float = 0.0
+    ):
         self._logger = logging.getLogger(__name__)
         self._segment_log_n0 = segment_log_n0
         self._constrain_n0 = constrain_n0
         self._n0 = n0
 
-    def _find_growth_phases(self, curve: 'pandas.Series', window) -> 'typing.List[RawGrowthPhase]':
+    def _find_growth_phases(
+        self, curve: "pandas.Series", window
+    ) -> "typing.List[RawGrowthPhase]":
         """
         Finds growth phases by locating regions in a series where both the first and second derivatives are positive.
 
@@ -73,8 +87,10 @@ def _find_growth_phases(self, curve: 'pandas.Series', window) -> 'typing.List[Ra
         first_derivative = savitzky_golay(curve, window, 3, deriv=1)
         second_derivative = savitzky_golay(curve, window, 3, deriv=2)
 
-        growth = pandas.Series(index=curve.index,
-                               data=(first_derivative.values > 0) & (second_derivative.values > 0))
+        growth = pandas.Series(
+            index=curve.index,
+            data=(first_derivative.values > 0) & (second_derivative.values > 0),
+        )
 
         istart, iend, phases = None, None, []
         for i, v in zip(growth.index, growth.values):
@@ -94,7 +110,9 @@ def _find_growth_phases(self, curve: 'pandas.Series', window) -> 'typing.List[Ra
     def growth(self, curve: pandas.Series) -> AnnotatedGrowthCurve:
         series = curve.dropna()
 
-        n_hours = int(numpy.round(points_per_hour(series) * CURVE_MINIMUM_DURATION_HOURS))
+        n_hours = int(
+            numpy.round(points_per_hour(series) * CURVE_MINIMUM_DURATION_HOURS)
+        )
 
         if n_hours == 0:
             return AnnotatedGrowthCurve(series, [], [])
@@ -112,7 +130,9 @@ def growth(self, curve: pandas.Series) -> AnnotatedGrowthCurve:
             series_log_n0 = numpy.log(series - self._n0).dropna()
             smooth_series = segment_spline_smoothing(series, series_log_n0)
         else:
-            smooth_series = segment_spline_smoothing(series, )
+            smooth_series = segment_spline_smoothing(
+                series,
+            )
 
         phases = []
         # give up if there isn't enough data
@@ -121,7 +141,7 @@ def growth(self, curve: pandas.Series) -> AnnotatedGrowthCurve:
         raw_phases = self._find_growth_phases(smooth_series, window=n_hours)
 
         for phase in raw_phases:
-            phase_series = series[phase.start:phase.end]
+            phase_series = series[phase.start : phase.end]
 
             # skip any growth phases that have not enough points for fitting
             if len(phase_series[phase_series > 0]) < 3:
@@ -137,9 +157,13 @@ def growth(self, curve: pandas.Series) -> AnnotatedGrowthCurve:
             #     continue
 
             if self._constrain_n0:
-                slope, intercept, n0, snr, fallback_linear_method = fit_exponential(phase_series, n0=self._n0)
+                slope, intercept, n0, snr, fallback_linear_method = fit_exponential(
+                    phase_series, n0=self._n0
+                )
             else:
-                slope, intercept, n0, snr, fallback_linear_method = fit_exponential(phase_series)
+                slope, intercept, n0, snr, fallback_linear_method = fit_exponential(
+                    phase_series
+                )
 
             # skip phases whose actual slope is below the limit
             if slope < max(0.0, defaults.PHASE_MINIMUM_SLOPE):
@@ -149,13 +173,26 @@ def growth(self, curve: pandas.Series) -> AnnotatedGrowthCurve:
             if snr < max(1.0, defaults.PHASE_MINIMUM_SIGNAL_NOISE_RATIO):
                 continue
 
-            phases.append(GrowthPhase(phase.start, phase.end, slope, intercept, n0, {"SNR": snr}))
-
-        ranked_phases = rank_phases(phases, defaults.PHASE_RANK_WEIGHTS, thresholds={
-            'SNR': defaults.PHASE_MINIMUM_SIGNAL_NOISE_RATIO,
-            'duration': defaults.PHASE_MINIMUM_DURATION_HOURS,
-            'slope': defaults.PHASE_MINIMUM_SLOPE
-        })
-
-        return AnnotatedGrowthCurve(series, outliers, [phase for phase in ranked_phases
-                                                       if phase.rank >= defaults.PHASE_RANK_EXCLUDE_BELOW])
+            phases.append(
+                GrowthPhase(phase.start, phase.end, slope, intercept, n0, {"SNR": snr})
+            )
+
+        ranked_phases = rank_phases(
+            phases,
+            defaults.PHASE_RANK_WEIGHTS,
+            thresholds={
+                "SNR": defaults.PHASE_MINIMUM_SIGNAL_NOISE_RATIO,
+                "duration": defaults.PHASE_MINIMUM_DURATION_HOURS,
+                "slope": defaults.PHASE_MINIMUM_SLOPE,
+            },
+        )
+
+        return AnnotatedGrowthCurve(
+            series,
+            outliers,
+            [
+                phase
+                for phase in ranked_phases
+                if phase.rank >= defaults.PHASE_RANK_EXCLUDE_BELOW
+            ],
+        )
diff --git a/croissance/estimation/defaults.py b/croissance/estimation/defaults.py
index d3a8172..f074476 100644
--- a/croissance/estimation/defaults.py
+++ b/croissance/estimation/defaults.py
@@ -1,4 +1,3 @@
-
 CURVE_MINIMUM_DURATION_HOURS = 5
 
 PHASE_MINIMUM_SIGNAL_NOISE_RATIO = 1.0
@@ -7,9 +6,9 @@
 
 PHASE_RANK_EXCLUDE_BELOW = 33
 PHASE_RANK_WEIGHTS = {
-    'SNR': 50,
-    'duration': 30,
-    'slope': 10,
+    "SNR": 50,
+    "duration": 30,
+    "slope": 10,
     # TODO add 1 - start?
 }
 
@@ -24,4 +23,4 @@
 PHASE_MIN_DELTA_OD = 0.25
 MINIMUM_PHASE_LENGTH = 2
 
-MINOR_PHASE_MERGING_THRESHOLD = 0.10 # in percent
+MINOR_PHASE_MERGING_THRESHOLD = 0.10  # in percent
diff --git a/croissance/estimation/outliers.py b/croissance/estimation/outliers.py
index 74d5749..0b99c82 100644
--- a/croissance/estimation/outliers.py
+++ b/croissance/estimation/outliers.py
@@ -16,10 +16,17 @@ def remove_outliers(series, window=30, std=2):
         return series, pandas.Series(data=[], index=[])
 
     values = with_overhangs(series.values, window)
-    outliers = abs(values - values.rolling(window=window, center=True).median()) < values.rolling(window=window, center=True).std() * std
+    outliers = (
+        abs(values - values.rolling(window=window, center=True).median())
+        < values.rolling(window=window, center=True).std() * std
+    )
     outlier_mask = outliers[window:-window].values
 
-    outliers = pandas.Series(data=series.values[~outlier_mask], index=series.index[~outlier_mask])
-    series = pandas.Series(data=series.values[outlier_mask], index=series.index[outlier_mask])
+    outliers = pandas.Series(
+        data=series.values[~outlier_mask], index=series.index[~outlier_mask]
+    )
+    series = pandas.Series(
+        data=series.values[outlier_mask], index=series.index[outlier_mask]
+    )
 
-    return series, outliers
\ No newline at end of file
+    return series, outliers
diff --git a/croissance/estimation/ranking.py b/croissance/estimation/ranking.py
index 68dc2ea..4b86dca 100644
--- a/croissance/estimation/ranking.py
+++ b/croissance/estimation/ranking.py
@@ -15,13 +15,24 @@ def rank_phases(phases, weights, thresholds):
         values[attribute] = [getattr(phase, attribute) for phase in phases]
 
     for phase in phases:
-        scores.append((sum(weight * score([thresholds[attribute]] + values[attribute], getattr(phase, attribute))
-                           for attribute, weight in weights.items()) / sum(weights.values()),
-                       phase))
+        scores.append(
+            (
+                sum(
+                    weight
+                    * score(
+                        [thresholds[attribute]] + values[attribute],
+                        getattr(phase, attribute),
+                    )
+                    for attribute, weight in weights.items()
+                )
+                / sum(weights.values()),
+                phase,
+            )
+        )
 
     ranked_phases = []
     for rank, phase in sorted(scores, key=itemgetter(0), reverse=True):
-        phase.attributes['rank'] = rank
+        phase.attributes["rank"] = rank
         ranked_phases.append(phase)
 
     return ranked_phases
diff --git a/croissance/estimation/regression.py b/croissance/estimation/regression.py
index bea1059..a9c12b3 100644
--- a/croissance/estimation/regression.py
+++ b/croissance/estimation/regression.py
@@ -17,6 +17,7 @@ def fit_exponential(series, *, p0=(1.0, 0.01, 0.0), n0: float = None):
     if n0 is None:
         fit_fn = exponential
     else:
+
         def exponential_constrain_n0(x, a, b):
             return a * numpy.exp(b * x) + n0
 
@@ -24,12 +25,16 @@ def exponential_constrain_n0(x, a, b):
         p0 = p0[:2]
 
     try:
-        popt, pcov = curve_fit(fit_fn,
-                               series.index,
-                               series.values,
-                               p0=p0,
-                               maxfev=10000,
-                               bounds=([0., 0., 0.], numpy.inf) if n0 is None else ([0., 0.], numpy.inf))
+        popt, pcov = curve_fit(
+            fit_fn,
+            series.index,
+            series.values,
+            p0=p0,
+            maxfev=10000,
+            bounds=([0.0, 0.0, 0.0], numpy.inf)
+            if n0 is None
+            else ([0.0, 0.0], numpy.inf),
+        )
 
         if n0 is not None:
             popt = tuple(popt) + (n0,)
@@ -47,7 +52,7 @@ def exponential_constrain_n0(x, a, b):
     log_series = numpy.log(series[series > 0] - (n0 or 0.0))
 
     slope, c, *__ = linregress(log_series.index, log_series.values)
-    intercept = - c / slope
+    intercept = -c / slope
 
     if n0 is None:
         p0 = (numpy.exp(c), slope, 0.0)
@@ -55,12 +60,16 @@ def exponential_constrain_n0(x, a, b):
         p0 = (numpy.exp(c), slope)
 
     try:
-        popt, pcov = curve_fit(fit_fn,
-                               series.index,
-                               series.values,
-                               p0=p0,
-                               maxfev=10000,
-                               bounds=([0., 0., 0.], numpy.inf) if n0 is None else ([0., 0.], numpy.inf))
+        popt, pcov = curve_fit(
+            fit_fn,
+            series.index,
+            series.values,
+            p0=p0,
+            maxfev=10000,
+            bounds=([0.0, 0.0, 0.0], numpy.inf)
+            if n0 is None
+            else ([0.0, 0.0], numpy.inf),
+        )
 
         if n0 is not None:
             popt = tuple(popt) + (n0,)
diff --git a/croissance/estimation/smoothing/__init__.py b/croissance/estimation/smoothing/__init__.py
index 56a05f1..6a4c1e6 100644
--- a/croissance/estimation/smoothing/__init__.py
+++ b/croissance/estimation/smoothing/__init__.py
@@ -4,8 +4,10 @@
 
 
 def median_smoothing(series, window=10):
-    data = pandas.rolling_apply(with_overhangs(series.values, window // 2),
-                                window,
-                                median_window_clean,
-                                center=True)[window // 2:-window // 2]
+    data = pandas.rolling_apply(
+        with_overhangs(series.values, window // 2),
+        window,
+        median_window_clean,
+        center=True,
+    )[window // 2 : -window // 2]
     return pandas.Series(index=series.index, data=data)
diff --git a/croissance/estimation/smoothing/segments.py b/croissance/estimation/smoothing/segments.py
index 77a3168..389bb2b 100644
--- a/croissance/estimation/smoothing/segments.py
+++ b/croissance/estimation/smoothing/segments.py
@@ -22,8 +22,10 @@ def segment_by_std_dev(series, increment=2, maximum=20):
 
     for i in range(start, duration, increment):
         for size in range(1, maximum + 1):
-            window = detrend(series[i:i + size*increment])
-            heappush(windows, (window.std() / (size*increment), i, i + size*increment))
+            window = detrend(series[i : i + size * increment])
+            heappush(
+                windows, (window.std() / (size * increment), i, i + size * increment)
+            )
 
     segments = []
     spots = set()
@@ -66,7 +68,7 @@ def segment_points(series, segments):
     :param segments:
     :return:
     """
-    out = [(series.index[0], numpy.median(series[:series.index[0] + 1]))]
+    out = [(series.index[0], numpy.median(series[: series.index[0] + 1]))]
 
     for start, end in segments:
         window = series[start:end]
@@ -75,16 +77,18 @@ def segment_points(series, segments):
             continue
 
         if end - start > 5:
-            out.append(window_median(series[start:start + 2], start, start + 2))
+            out.append(window_median(series[start : start + 2], start, start + 2))
 
             if end - start > 11:
-                out.append(window_median(series[start + 2:end - 2], start + 2, end - 2))
+                out.append(
+                    window_median(series[start + 2 : end - 2], start + 2, end - 2)
+                )
 
-            out.append(window_median(series[end - 2:end], end - 2, end))
+            out.append(window_median(series[end - 2 : end], end - 2, end))
         else:
             out.append(window_median(window, start, end))
 
-    out += [(series.index[-1], numpy.max(series[series.index[0] - 1:]))]
+    out += [(series.index[-1], numpy.max(series[series.index[0] - 1 :]))]
 
     index, data = zip(*out)
     return pandas.Series(index=index, data=data)
diff --git a/croissance/estimation/util.py b/croissance/estimation/util.py
index 43ead1d..63adae7 100644
--- a/croissance/estimation/util.py
+++ b/croissance/estimation/util.py
@@ -5,7 +5,9 @@
 
 
 def points_per_hour(series):
-    return 1 / numpy.median([series.index[i] - series.index[i - 1] for i in range(1, len(series))])
+    return 1 / numpy.median(
+        [series.index[i] - series.index[i - 1] for i in range(1, len(series))]
+    )
 
 
 def resample(series, *, factor=10, size=None):
@@ -29,20 +31,25 @@ def resample(series, *, factor=10, size=None):
 
 
 def savitzky_golay(series, *args, **kwargs):
-    return pandas.Series(index=series.index, data=savgol_filter(series.values, *args, **kwargs))
+    return pandas.Series(
+        index=series.index, data=savgol_filter(series.values, *args, **kwargs)
+    )
 
 
 def with_overhangs(values, overhang_size):
-    start_overhang = numpy.repeat([numpy.median(values[0:overhang_size // 2 + 1])], overhang_size)
-    end_overhang = numpy.repeat([numpy.max(values[-1 - overhang_size // 2:-1])], overhang_size)
+    start_overhang = numpy.repeat(
+        [numpy.median(values[0 : overhang_size // 2 + 1])], overhang_size
+    )
+    end_overhang = numpy.repeat(
+        [numpy.max(values[-1 - overhang_size // 2 : -1])], overhang_size
+    )
     return pandas.Series(numpy.concatenate([start_overhang, values, end_overhang]))
 
 
-def normalize_time_unit(curve: pandas.Series, unit: str = 'hours'):
-    if unit == 'hours':
+def normalize_time_unit(curve: pandas.Series, unit: str = "hours"):
+    if unit == "hours":
         return curve
-    elif unit == 'minutes':
-        return pandas.Series(index=curve.index / 60., data=curve.values)
+    elif unit == "minutes":
+        return pandas.Series(index=curve.index / 60.0, data=curve.values)
     else:
         raise NotImplementedError("Unsupported time unit: '{}'".format(unit))
-
diff --git a/croissance/figures/__init__.py b/croissance/figures/__init__.py
index cb42fd1..d3dedc0 100644
--- a/croissance/figures/__init__.py
+++ b/croissance/figures/__init__.py
@@ -10,12 +10,10 @@ def __init__(self, file, include_shifted_exponentials: bool = False):
         self.doc = PdfPages(file)
         self._include_shifted_exponentials = include_shifted_exponentials
 
-    def write(self,
-              name: str,
-              curve: AnnotatedGrowthCurve):
+    def write(self, name: str, curve: AnnotatedGrowthCurve):
 
         fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(16, 16))
-        axes[1].set_yscale('log')
+        axes[1].set_yscale("log")
 
         try:
             plt.title(name)
@@ -24,21 +22,25 @@ def write(self,
                 return
 
             for axis in axes:
-                axis.plot(curve.series.index,
-                          curve.series.values,
-                          color='black',
-                          marker='.',
-                          markersize=5,
-                          linestyle='None')
-
-                axis.plot(curve.outliers.index,
-                          curve.outliers.values,
-                          color='red',
-                          marker='.',
-                          markersize=5,
-                          linestyle='None')
-
-            colors = ['b', 'g', 'c', 'm', 'y', 'k']
+                axis.plot(
+                    curve.series.index,
+                    curve.series.values,
+                    color="black",
+                    marker=".",
+                    markersize=5,
+                    linestyle="None",
+                )
+
+                axis.plot(
+                    curve.outliers.index,
+                    curve.outliers.values,
+                    color="red",
+                    marker=".",
+                    markersize=5,
+                    linestyle="None",
+                )
+
+            colors = ["b", "g", "c", "m", "y", "k"]
 
             for i, phase in enumerate(curve.growth_phases):
                 color = colors[i % len(colors)]
@@ -51,30 +53,63 @@ def write(self,
                 def gf(x):
                     return a * numpy.exp(phase.slope * x) + phase.n0
 
-                phase_series = curve.series[phase.start:phase.end]
+                phase_series = curve.series[phase.start : phase.end]
 
                 for axis in axes:
                     if self._include_shifted_exponentials:
-                        axis.plot(phase_series.index,
-                                  phase_series.values - phase.n0,
-                                  marker='.', linewidth=0, color='red')
-
-                        axis.plot(phase_series.index, gf(phase_series.index) - phase.n0,
-                                  marker=None, linewidth=2, color='red')
-
-                    axis.axhline(y=phase.n0, marker=None, linewidth=1, linestyle='dashed', color=color)
-                    axis.axvline(x=phase.intercept, marker=None, linewidth=1, linestyle='dashed', color=color)
-
-                    axis.plot(phase_series.index,
-                              phase_series.values,
-                              marker=None,
-                              linewidth=15,
-                              color=color,
-                              solid_capstyle='round',
-                              alpha=0.2)
-
-                    axis.plot(curve.series.index, gf(curve.series.index), color=color, linewidth=1)
-                    axis.plot(phase_series.index, gf(phase_series.index), color=color, linewidth=2)
+                        axis.plot(
+                            phase_series.index,
+                            phase_series.values - phase.n0,
+                            marker=".",
+                            linewidth=0,
+                            color="red",
+                        )
+
+                        axis.plot(
+                            phase_series.index,
+                            gf(phase_series.index) - phase.n0,
+                            marker=None,
+                            linewidth=2,
+                            color="red",
+                        )
+
+                    axis.axhline(
+                        y=phase.n0,
+                        marker=None,
+                        linewidth=1,
+                        linestyle="dashed",
+                        color=color,
+                    )
+                    axis.axvline(
+                        x=phase.intercept,
+                        marker=None,
+                        linewidth=1,
+                        linestyle="dashed",
+                        color=color,
+                    )
+
+                    axis.plot(
+                        phase_series.index,
+                        phase_series.values,
+                        marker=None,
+                        linewidth=15,
+                        color=color,
+                        solid_capstyle="round",
+                        alpha=0.2,
+                    )
+
+                    axis.plot(
+                        curve.series.index,
+                        gf(curve.series.index),
+                        color=color,
+                        linewidth=1,
+                    )
+                    axis.plot(
+                        phase_series.index,
+                        gf(phase_series.index),
+                        color=color,
+                        linewidth=2,
+                    )
 
             for axis in axes:
                 axis.set_xlim(curve.series.index[0], curve.series.index[-1])
diff --git a/croissance/formats/input.py b/croissance/formats/input.py
index 5d964eb..81c2073 100644
--- a/croissance/formats/input.py
+++ b/croissance/formats/input.py
@@ -2,12 +2,8 @@
 
 
 class TSVReader(object):
-
     def read(self, file):
-        data = pandas.read_csv(file,
-                               sep="\t",
-                               header=0,
-                               index_col=0)
+        data = pandas.read_csv(file, sep="\t", header=0, index_col=0)
 
         names = data.columns
 
diff --git a/croissance/formats/output.py b/croissance/formats/output.py
index 903ac05..11d67d8 100644
--- a/croissance/formats/output.py
+++ b/croissance/formats/output.py
@@ -5,27 +5,37 @@
 
 class TSVWriter(object):
     def __init__(self, file, include_default_phase: bool = True):
-        writer = csv.writer(file, delimiter='\t', quoting=csv.QUOTE_MINIMAL)
-        writer.writerow(['name', 'phase', 'slope', 'intercept', 'N0', 'SNR', 'rank'])
+        writer = csv.writer(file, delimiter="\t", quoting=csv.QUOTE_MINIMAL)
+        writer.writerow(["name", "phase", "slope", "intercept", "N0", "SNR", "rank"])
 
         self._writer = writer
         self._file = file
         self._include_default_phase = include_default_phase
 
-    def write(self,
-              name: str,
-              curve: AnnotatedGrowthCurve):
+    def write(self, name: str, curve: AnnotatedGrowthCurve):
 
         if self._include_default_phase:
-            phase = GrowthPhase.pick_best(curve.growth_phases, 'rank')
+            phase = GrowthPhase.pick_best(curve.growth_phases, "rank")
 
             if not phase:
                 self._writer.writerow([name, 0, None, None, None])
             else:
-                self._writer.writerow([name, 0, phase.slope, phase.intercept, phase.n0, phase.SNR, phase.rank])
+                self._writer.writerow(
+                    [
+                        name,
+                        0,
+                        phase.slope,
+                        phase.intercept,
+                        phase.n0,
+                        phase.SNR,
+                        phase.rank,
+                    ]
+                )
 
         for i, phase in enumerate(curve.growth_phases, start=1):
-            self._writer.writerow([name, i, phase.slope, phase.intercept, phase.n0, phase.SNR, phase.rank])
+            self._writer.writerow(
+                [name, i, phase.slope, phase.intercept, phase.n0, phase.SNR, phase.rank]
+            )
 
     def __enter__(self):
         return self
diff --git a/setup.py b/setup.py
index 7a7986f..b559cfb 100644
--- a/setup.py
+++ b/setup.py
@@ -19,37 +19,37 @@
 from setuptools import setup, find_packages
 
 setup(
-    name='croissance',
-    version='1.1.1',
-    packages=find_packages(exclude=['*tests*']),
-    url='https://github.com/biosustain/croissance',
-    author='Lars Schöning',
-    author_email='lays@biosustain.dtu.dk',
-    description='A tool for estimating growth rates in growth curves.',
-    long_description=codecs.open('README.rst', encoding='utf-8').read(),
-    license='Apache License Version 2.0',
+    name="croissance",
+    version="1.1.1",
+    packages=find_packages(exclude=["*tests*"]),
+    url="https://github.com/biosustain/croissance",
+    author="Lars Schöning",
+    author_email="lays@biosustain.dtu.dk",
+    description="A tool for estimating growth rates in growth curves.",
+    long_description=codecs.open("README.rst", encoding="utf-8").read(),
+    license="Apache License Version 2.0",
     entry_points={
-        'console_scripts': [
-            'croissance = croissance.__main__:main',
+        "console_scripts": [
+            "croissance = croissance.__main__:main",
         ],
     },
-    test_suite='nose.collector',
+    test_suite="nose.collector",
     tests_require=[
-        'nose>=1.1.2',
+        "nose>=1.1.2",
     ],
     install_requires=[
-        'numpy>=1.9.1',
-        'pandas>=0.18.0',
-        'scipy>=0.14.0',
-        'matplotlib>=1.4.3',
-        'tqdm>=4.11.2'
+        "numpy>=1.9.1",
+        "pandas>=0.18.0",
+        "scipy>=0.14.0",
+        "matplotlib>=1.4.3",
+        "tqdm>=4.11.2",
     ],
     classifiers=[
-        'Development Status :: 5 - Production/Stable',
-        'Topic :: Utilities',
-        'Intended Audience :: Science/Research',
-        'Programming Language :: Python :: 3',
-        'Programming Language :: Python :: 3.5',
-        'License :: OSI Approved :: Apache Software License',
+        "Development Status :: 5 - Production/Stable",
+        "Topic :: Utilities",
+        "Intended Audience :: Science/Research",
+        "Programming Language :: Python :: 3",
+        "Programming Language :: Python :: 3.5",
+        "License :: OSI Approved :: Apache Software License",
     ],
 )
diff --git a/test.py b/test.py
index a09a3e8..5982ebb 100644
--- a/test.py
+++ b/test.py
@@ -14,39 +14,41 @@ def test_regression_basic(self):
         for mu in (0.001, 0.10, 0.15, 0.50, 1.0):
             phase = pandas.Series(
                 data=[numpy.exp(i * mu) for i in range(20)],
-                index=[i for i in range(20)])
+                index=[i for i in range(20)],
+            )
 
             slope, intercept, n0, snr, lin = fit_exponential(phase)
             print(slope, intercept, n0, snr, lin)
 
             self.assertFalse(lin, "This fit should not require a linear fallback.")
-            self.assertAlmostEqual(n0, 0, 2, msg='N0=0')
+            self.assertAlmostEqual(n0, 0, 2, msg="N0=0")
             self.assertAlmostEqual(intercept, 0, 2, msg='"intercept"=0')
-            self.assertAlmostEqual(mu, slope, 6, msg='growth rate (mu)={}'.format(mu))
+            self.assertAlmostEqual(mu, slope, 6, msg="growth rate (mu)={}".format(mu))
             self.assertTrue(snr > 100000, "signal-noise ratio should be very good")
 
     def test_process_curve_basic(self):
-        with open('./test.basic.pdf', 'wb') as f:
+        with open("./test.basic.pdf", "wb") as f:
             with PDFWriter(f) as doc:
-                mu = .5
-                pph = 4.
+                mu = 0.5
+                pph = 4.0
                 curve = pandas.Series(
                     data=[numpy.exp(mu * i / pph) for i in range(100)],
-                    index=[i / pph for i in range(100)])
+                    index=[i / pph for i in range(100)],
+                )
 
-                result = process_curve(curve, constrain_n0=True, n0=0.)
+                result = process_curve(curve, constrain_n0=True, n0=0.0)
 
                 # self.assertAlmostEqual(mu, slope, 6, msg='growth rate (mu)={}'.format(mu))
 
-                doc.write('#0 n0=1', result)
+                doc.write("#0 n0=1", result)
                 print(result.growth_phases)
                 self.assertEqual(len(result.growth_phases), 1)
                 self.assertAlmostEqual(mu, result.growth_phases[0].slope, 2)
-                self.assertAlmostEqual(0., result.growth_phases[0].n0, 3)
+                self.assertAlmostEqual(0.0, result.growth_phases[0].n0, 3)
                 self.assertTrue(result.growth_phases[0].SNR > 1000)
 
                 result = process_curve(curve, constrain_n0=False)
-                doc.write('#0 n0=auto', result)
+                doc.write("#0 n0=auto", result)
                 print(result.growth_phases)
                 self.assertEqual(len(result.growth_phases), 1)
                 self.assertAlmostEqual(mu, result.growth_phases[0].slope, 2)
@@ -55,62 +57,70 @@ def test_process_curve_basic(self):
 
                 curve = pandas.Series(
                     data=(
-                        [1.] * 5 +
-                        [numpy.exp(mu * i / pph) for i in range(25)] +
-                        [numpy.exp(mu * 24 / pph)] * 20),
-                    index=([i / pph for i in range(50)]))
-
-                result = process_curve(curve, constrain_n0=True, n0=0.)
-                doc.write('#1 n0=0', result)
+                        [1.0] * 5
+                        + [numpy.exp(mu * i / pph) for i in range(25)]
+                        + [numpy.exp(mu * 24 / pph)] * 20
+                    ),
+                    index=([i / pph for i in range(50)]),
+                )
+
+                result = process_curve(curve, constrain_n0=True, n0=0.0)
+                doc.write("#1 n0=0", result)
                 print(result.growth_phases)
                 self.assertEqual(len(result.growth_phases), 1)
                 self.assertAlmostEqual(mu, result.growth_phases[0].slope, 2)
-                self.assertAlmostEqual(0., result.growth_phases[0].n0, 3)
+                self.assertAlmostEqual(0.0, result.growth_phases[0].n0, 3)
                 self.assertTrue(result.growth_phases[0].SNR > 1000)
 
                 result = process_curve(curve, constrain_n0=False)
-                doc.write('#1 n0=auto', result)
+                doc.write("#1 n0=auto", result)
                 print(result.growth_phases)
                 self.assertEqual(len(result.growth_phases), 1)
                 self.assertAlmostEqual(mu, result.growth_phases[0].slope, 1)
                 self.assertTrue(-0.25 < result.growth_phases[0].n0 < 0.25)
                 self.assertTrue(result.growth_phases[0].SNR > 1000)
 
-
-                mu = .20
+                mu = 0.20
                 curve = pandas.Series(
-                    data=([i / 10. for i in range(10)] +
-                          [numpy.exp(mu * i / pph) for i in range(25)] +
-                          [numpy.exp(mu * 24 / pph)] * 15),
-                    index=([i / pph for i in range(50)]))
-
-                result = process_curve(curve, constrain_n0=True, n0=0.)
-                doc.write('#2 n0=0', result)
+                    data=(
+                        [i / 10.0 for i in range(10)]
+                        + [numpy.exp(mu * i / pph) for i in range(25)]
+                        + [numpy.exp(mu * 24 / pph)] * 15
+                    ),
+                    index=([i / pph for i in range(50)]),
+                )
+
+                result = process_curve(curve, constrain_n0=True, n0=0.0)
+                doc.write("#2 n0=0", result)
                 print(result.growth_phases)
                 self.assertEqual(len(result.growth_phases), 1)
                 self.assertAlmostEqual(mu, result.growth_phases[0].slope, 2)
-                self.assertAlmostEqual(0., result.growth_phases[0].n0, 6)
+                self.assertAlmostEqual(0.0, result.growth_phases[0].n0, 6)
                 self.assertTrue(result.growth_phases[0].SNR > 1000)
 
                 result = process_curve(curve, constrain_n0=False)
-                doc.write('#2 n0=auto', result)
+                doc.write("#2 n0=auto", result)
                 self.assertEqual(len(result.growth_phases), 1)
                 self.assertAlmostEqual(mu, result.growth_phases[0].slope, 2)
                 self.assertTrue(-0.25 < result.growth_phases[0].n0 < 0.25)
                 self.assertTrue(result.growth_phases[0].SNR > 1000)
 
     def test_process_curve_wrong_time_unit(self):
-        mu = .5
+        mu = 0.5
         pph = 0.1
         curve = pandas.Series(
             data=[numpy.exp(mu * i / pph) for i in range(100)],
-            index=[i / pph for i in range(100)])
+            index=[i / pph for i in range(100)],
+        )
 
-        result = process_curve(curve, constrain_n0=False, n0=0.)
+        result = process_curve(curve, constrain_n0=False, n0=0.0)
         self.assertEqual(len(result.growth_phases), 0)
         self.assertEqual(list(result.series), list(curve))
 
     def test_normalize_time_unit(self):
         curve = pandas.Series(index=[0, 15, 30, 60], data=[1, 2, 3, 4])
-        self.assertTrue(pandas.Series(index=[0., 0.25, 0.5, 1.], data=[1, 2, 3, 4])
-                        .equals(normalize_time_unit(curve, 'minutes')))
+        self.assertTrue(
+            pandas.Series(index=[0.0, 0.25, 0.5, 1.0], data=[1, 2, 3, 4]).equals(
+                normalize_time_unit(curve, "minutes")
+            )
+        )

From 124d090ab4437ee89ed185282f25f653a6f91f51 Mon Sep 17 00:00:00 2001
From: Mikkel Schubert <MikkelSch@gmail.com>
Date: Tue, 24 Nov 2020 14:22:48 +0100
Subject: [PATCH 02/52] lints and minor cleanups

---
 croissance/__main__.py                      |  2 +-
 croissance/estimation/__init__.py           | 18 ++++++------------
 croissance/estimation/ranking.py            |  2 +-
 croissance/estimation/regression.py         |  4 ++--
 croissance/estimation/smoothing/segments.py |  4 ++--
 croissance/figures/__init__.py              |  2 +-
 croissance/formats/input.py                 |  2 +-
 croissance/formats/output.py                |  2 +-
 8 files changed, 15 insertions(+), 21 deletions(-)

diff --git a/croissance/__main__.py b/croissance/__main__.py
index fb66ca2..83d86cb 100644
--- a/croissance/__main__.py
+++ b/croissance/__main__.py
@@ -110,7 +110,7 @@ def main():
             "\nEmpty cells were found and discarded:\n",
             "\n".join(
                 [
-                    (infile.name + "\t" + name)
+                    (key + "\t" + name)
                     for key, names in empties.items()
                     for name in names
                 ]
diff --git a/croissance/estimation/__init__.py b/croissance/estimation/__init__.py
index a132d29..c0fce5c 100644
--- a/croissance/estimation/__init__.py
+++ b/croissance/estimation/__init__.py
@@ -5,14 +5,8 @@
 import numpy
 import pandas
 
-from croissance.estimation.defaults import (
-    MINIMUM_VALID_OD,
-    RESAMPLE_POINTS_PER_HOUR,
-    MINIMUM_VALID_SLOPE,
-    MINIMUM_PHASE_LENGTH,
-    PHASE_MIN_DELTA_LOG_OD,
-    CURVE_MINIMUM_DURATION_HOURS,
-)
+import croissance.estimation.defaults as defaults
+
 from croissance.estimation.outliers import remove_outliers
 from croissance.estimation.ranking import rank_phases
 from croissance.estimation.regression import fit_exponential
@@ -61,7 +55,7 @@ def __getattr__(self, item):
 )
 
 
-class Estimator(object):
+class Estimator:
     def __init__(
         self,
         *,
@@ -111,7 +105,7 @@ def growth(self, curve: pandas.Series) -> AnnotatedGrowthCurve:
         series = curve.dropna()
 
         n_hours = int(
-            numpy.round(points_per_hour(series) * CURVE_MINIMUM_DURATION_HOURS)
+            numpy.round(points_per_hour(series) * defaults.CURVE_MINIMUM_DURATION_HOURS)
         )
 
         if n_hours == 0:
@@ -157,11 +151,11 @@ def growth(self, curve: pandas.Series) -> AnnotatedGrowthCurve:
             #     continue
 
             if self._constrain_n0:
-                slope, intercept, n0, snr, fallback_linear_method = fit_exponential(
+                slope, intercept, n0, snr, _fallback_linear_method = fit_exponential(
                     phase_series, n0=self._n0
                 )
             else:
-                slope, intercept, n0, snr, fallback_linear_method = fit_exponential(
+                slope, intercept, n0, snr, _fallback_linear_method = fit_exponential(
                     phase_series
                 )
 
diff --git a/croissance/estimation/ranking.py b/croissance/estimation/ranking.py
index 4b86dca..5b9def5 100644
--- a/croissance/estimation/ranking.py
+++ b/croissance/estimation/ranking.py
@@ -11,7 +11,7 @@ def rank_phases(phases, weights, thresholds):
     values = {}
     scores = []
 
-    for attribute, weight in weights.items():
+    for attribute in weights:
         values[attribute] = [getattr(phase, attribute) for phase in phases]
 
     for phase in phases:
diff --git a/croissance/estimation/regression.py b/croissance/estimation/regression.py
index a9c12b3..833b15f 100644
--- a/croissance/estimation/regression.py
+++ b/croissance/estimation/regression.py
@@ -25,7 +25,7 @@ def exponential_constrain_n0(x, a, b):
         p0 = p0[:2]
 
     try:
-        popt, pcov = curve_fit(
+        popt, _pcov = curve_fit(
             fit_fn,
             series.index,
             series.values,
@@ -60,7 +60,7 @@ def exponential_constrain_n0(x, a, b):
         p0 = (numpy.exp(c), slope)
 
     try:
-        popt, pcov = curve_fit(
+        popt, _pcov = curve_fit(
             fit_fn,
             series.index,
             series.values,
diff --git a/croissance/estimation/smoothing/segments.py b/croissance/estimation/smoothing/segments.py
index 389bb2b..da31560 100644
--- a/croissance/estimation/smoothing/segments.py
+++ b/croissance/estimation/smoothing/segments.py
@@ -32,7 +32,7 @@ def segment_by_std_dev(series, increment=2, maximum=20):
 
     try:
         while True:
-            window_agv_std, start, end = heappop(windows)
+            _window_agv_std, start, end = heappop(windows)
 
             if any(i in spots for i in range(start, int(end))):
                 continue
@@ -51,7 +51,7 @@ def segment_by_std_dev(series, increment=2, maximum=20):
 def window_median(window, start, end):
     x = numpy.linspace(0, 1, num=len(window))
     A = numpy.vstack([x, numpy.ones(len(x))]).T
-    m, c = numpy.linalg.lstsq(A, window, rcond=None)[0]
+    m, _c = numpy.linalg.lstsq(A, window, rcond=None)[0]
 
     return (start + end) / 2, m * 0.5 + numpy.median(window - m * x)
 
diff --git a/croissance/figures/__init__.py b/croissance/figures/__init__.py
index d3dedc0..6f204c6 100644
--- a/croissance/figures/__init__.py
+++ b/croissance/figures/__init__.py
@@ -5,7 +5,7 @@
 from croissance.estimation import AnnotatedGrowthCurve
 
 
-class PDFWriter(object):
+class PDFWriter:
     def __init__(self, file, include_shifted_exponentials: bool = False):
         self.doc = PdfPages(file)
         self._include_shifted_exponentials = include_shifted_exponentials
diff --git a/croissance/formats/input.py b/croissance/formats/input.py
index 81c2073..b631bdc 100644
--- a/croissance/formats/input.py
+++ b/croissance/formats/input.py
@@ -1,7 +1,7 @@
 import pandas
 
 
-class TSVReader(object):
+class TSVReader:
     def read(self, file):
         data = pandas.read_csv(file, sep="\t", header=0, index_col=0)
 
diff --git a/croissance/formats/output.py b/croissance/formats/output.py
index 11d67d8..415beee 100644
--- a/croissance/formats/output.py
+++ b/croissance/formats/output.py
@@ -3,7 +3,7 @@
 from croissance.estimation import GrowthPhase, AnnotatedGrowthCurve
 
 
-class TSVWriter(object):
+class TSVWriter:
     def __init__(self, file, include_default_phase: bool = True):
         writer = csv.writer(file, delimiter="\t", quoting=csv.QUOTE_MINIMAL)
         writer.writerow(["name", "phase", "slope", "intercept", "N0", "SNR", "rank"])

From c897dbcfba6146a9aec3052a06276d5af92a24c2 Mon Sep 17 00:00:00 2001
From: Mikkel Schubert <MikkelSch@gmail.com>
Date: Tue, 24 Nov 2020 14:22:48 +0100
Subject: [PATCH 03/52] cleanup main/argument parsing

---
 croissance/__main__.py | 124 -----------------------------------------
 croissance/main.py     | 119 +++++++++++++++++++++++++++++++++++++++
 setup.py               |   2 +-
 3 files changed, 120 insertions(+), 125 deletions(-)
 delete mode 100644 croissance/__main__.py
 create mode 100644 croissance/main.py

diff --git a/croissance/__main__.py b/croissance/__main__.py
deleted file mode 100644
index 83d86cb..0000000
--- a/croissance/__main__.py
+++ /dev/null
@@ -1,124 +0,0 @@
-#!/usr/bin/env python
-import argparse
-
-import sys
-
-from tqdm import tqdm
-
-from croissance import Estimator
-from croissance.estimation.util import normalize_time_unit
-from croissance.figures import PDFWriter
-from croissance.formats.input import TSVReader
-from croissance.formats.output import TSVWriter
-
-parser = argparse.ArgumentParser(description="Estimate growth rates in growth curves")
-parser.add_argument("infiles", type=argparse.FileType("r"), nargs="+")
-parser.add_argument("--input-format", type=str, default="tsv")
-parser.add_argument(
-    "--input-time-unit",
-    choices=["hours", "minutes"],
-    default="hours",
-    help="Time unit in time column",
-)
-parser.add_argument(
-    "--N0",
-    type=float,
-    default=0.0,
-    help="Baseline for constrained regressions & identifying curve segments in log space.",
-)
-parser.add_argument(
-    "--constrain-N0",
-    action="store_true",
-    help="All growth phases will begin at N0 when this flag is set.",
-)
-parser.add_argument(
-    "--segment-log-N0",
-    action="store_true",
-    help="Identify curve segments using log(N-N0) rather than N; "
-    "increases sensitivity to changes in exponential growth but has to assume a certain N0.",
-)
-parser.add_argument("--output-format", type=str, default="tsv")
-parser.add_argument("--output-suffix", type=str, default=".output")
-parser.add_argument(
-    "--output-exclude-default-phase",
-    type=bool,
-    default=False,
-    help="The output will not include a phase '0' for each curve when this flag is set.",
-)
-parser.add_argument(
-    "--figures",
-    action="store_true",
-    help="Renders a PDF file with figures for each curve.",
-)
-
-
-def main():
-    args = parser.parse_args()
-
-    if args.input_format.lower() != "tsv":
-        raise NotImplementedError()
-
-    if args.output_format.lower() != "tsv":
-        raise NotImplementedError()
-
-    reader = TSVReader()
-
-    estimator = Estimator(
-        constrain_n0=args.constrain_N0, segment_log_n0=args.segment_log_N0, n0=args.N0
-    )
-
-    try:
-        empties = {}
-        for infile in tqdm(args.infiles, unit="infile"):
-            outfile = open("{}{}.tsv".format(infile.name[:-4], args.output_suffix), "w")
-
-            if args.figures:
-                figfile = open(
-                    "{}{}.pdf".format(infile.name[:-4], args.output_suffix), "wb"
-                )
-                figwriter = PDFWriter(figfile)
-
-            outwriter = TSVWriter(
-                outfile, include_default_phase=not args.output_exclude_default_phase
-            )
-
-            for name, curve in tqdm(list(reader.read(infile)), unit="curve"):
-                if curve.empty:
-                    try:
-                        empties[infile.name].append(name)
-                    except KeyError:
-                        empties[infile.name] = [name]
-                    continue
-                annotated_curve = estimator.growth(
-                    normalize_time_unit(curve, args.input_time_unit)
-                )
-
-                outwriter.write(name, annotated_curve)
-
-                if args.figures:
-                    figwriter.write(name, annotated_curve)
-
-            outwriter.close()
-
-            if args.figures:
-                figwriter.close()
-    except KeyboardInterrupt:
-        pass
-
-    if empties != {}:
-        print(
-            "\nEmpty cells were found and discarded:\n",
-            "\n".join(
-                [
-                    (key + "\t" + name)
-                    for key, names in empties.items()
-                    for name in names
-                ]
-            ),
-        )
-    else:
-        print()
-
-
-if __name__ == "__main__":
-    sys.exit(main())
diff --git a/croissance/main.py b/croissance/main.py
new file mode 100644
index 0000000..2df4389
--- /dev/null
+++ b/croissance/main.py
@@ -0,0 +1,119 @@
+#!/usr/bin/env python
+import argparse
+import collections
+import sys
+
+from pathlib import Path
+
+from tqdm import tqdm
+
+from croissance import Estimator
+from croissance.estimation.util import normalize_time_unit
+from croissance.figures import PDFWriter
+from croissance.formats.input import TSVReader
+from croissance.formats.output import TSVWriter
+
+
+def parse_args(argv):
+    parser = argparse.ArgumentParser(
+        description="Estimate growth rates in growth curves",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+    )
+
+    parser.add_argument("infiles", type=Path, nargs="+")
+    parser.add_argument("--input-format", type=str.lower, choices=("tsv",))
+    parser.add_argument(
+        "--input-time-unit",
+        default="hours",
+        choices=("hours", "minutes"),
+        help="Time unit in time column",
+    )
+    parser.add_argument(
+        "--N0",
+        type=float,
+        default=0.0,
+        help="Baseline for constrained regressions and identifying curve segments in "
+        "log space",
+    )
+    parser.add_argument(
+        "--constrain-N0",
+        action="store_true",
+        help="All growth phases will begin at N0 when this flag is set",
+    )
+    parser.add_argument(
+        "--segment-log-N0",
+        action="store_true",
+        help="Identify curve segments using log(N-N0) rather than N; increases "
+        "sensitivity to changes in exponential growth but has to assume a certain N0",
+    )
+    parser.add_argument("--output-format", type=str.lower, choices=("tsv",))
+    parser.add_argument("--output-suffix", type=str, default=".output")
+    parser.add_argument(
+        "--output-exclude-default-phase",
+        type=bool,
+        default=False,
+        help="Do not output phase '0' for each curve",
+    )
+    parser.add_argument(
+        "--figures",
+        action="store_true",
+        help="Renders a PDF file with figures for each curve",
+    )
+
+    return parser.parse_args(argv)
+
+
+def main(argv):
+    args = parse_args(argv)
+    reader = TSVReader()
+
+    estimator = Estimator(
+        constrain_n0=args.constrain_N0, segment_log_n0=args.segment_log_N0, n0=args.N0
+    )
+
+    empty_cells = collections.defaultdict(list)
+    for filepath in tqdm(args.infiles, unit="infile"):
+        with filepath.open("rt") as infile:
+            curves = list(reader.read(infile))
+
+        annotated_curves = {}
+        for name, curve in tqdm(curves, unit="curve"):
+            if curve.empty:
+                empty_cells[infile.name].append(name)
+                continue
+
+            annotated_curves[name] = estimator.growth(
+                normalize_time_unit(curve, args.input_time_unit)
+            )
+
+        output_filepath = filepath.with_suffix(args.output_suffix + ".tsv")
+        with output_filepath.open("wt") as outfile:
+            outwriter = TSVWriter(
+                outfile, include_default_phase=not args.output_exclude_default_phase
+            )
+
+            for name, annotated_curve in annotated_curves.items():
+                outwriter.write(name, annotated_curve)
+
+        if args.figures:
+            figure_filepath = filepath.with_suffix(args.output_suffix + ".pdf")
+            with figure_filepath.open("wb") as figfile:
+                with PDFWriter(figfile) as figwriter:
+                    for name, annotated_curve in annotated_curves.items():
+                        figwriter.write(name, annotated_curve)
+
+    print()
+    if empty_cells:
+        print("Empty cells were found and discarded:\n")
+
+        for key, names in empty_cells.items():
+            for name in names:
+                print(key + "\t" + name)
+
+
+def entry_point():
+    sys.exit(main(sys.argv[1:]))
+
+
+if __name__ == "__main__":
+    entry_point()
diff --git a/setup.py b/setup.py
index b559cfb..6f1ee40 100644
--- a/setup.py
+++ b/setup.py
@@ -30,7 +30,7 @@
     license="Apache License Version 2.0",
     entry_points={
         "console_scripts": [
-            "croissance = croissance.__main__:main",
+            "croissance = croissance.main:entry_point",
         ],
     },
     test_suite="nose.collector",

From 243024ee1f8d72ac4d528531fd7bad842bb2a7b3 Mon Sep 17 00:00:00 2001
From: Mikkel Schubert <MikkelSch@gmail.com>
Date: Tue, 24 Nov 2020 14:22:48 +0100
Subject: [PATCH 04/52] breaking: drop argument from

---
 croissance/main.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/croissance/main.py b/croissance/main.py
index 2df4389..a884025 100644
--- a/croissance/main.py
+++ b/croissance/main.py
@@ -50,8 +50,7 @@ def parse_args(argv):
     parser.add_argument("--output-suffix", type=str, default=".output")
     parser.add_argument(
         "--output-exclude-default-phase",
-        type=bool,
-        default=False,
+        action="store_true",
         help="Do not output phase '0' for each curve",
     )
     parser.add_argument(

From 82ccb726af7f6917862de8f35d987f38d59ed9d8 Mon Sep 17 00:00:00 2001
From: Mikkel Schubert <MikkelSch@gmail.com>
Date: Tue, 24 Nov 2020 14:22:48 +0100
Subject: [PATCH 05/52] add `unit` parameter to `process_curve`

This makes it explicit that the function expects hours as the time unit
by default and allows easy conversion. Also replaced pass-through kwargs
with named arguments for ease of use.

This fixes #6
---
 croissance/__init__.py | 22 ++++++++++++++++++----
 test.py                | 35 ++++++++++++++++++++++++++++++++++-
 2 files changed, 52 insertions(+), 5 deletions(-)

diff --git a/croissance/__init__.py b/croissance/__init__.py
index a365d3c..db9ad1a 100644
--- a/croissance/__init__.py
+++ b/croissance/__init__.py
@@ -1,13 +1,27 @@
-from croissance.estimation import Estimator
 from collections import namedtuple
 
+from croissance.estimation import Estimator
+from croissance.estimation.util import normalize_time_unit
+
+
 AnnotatedGrowthCurve = namedtuple(
     "AnnotatedGrowthCurve", ("series", "outliers", "growth_phases")
 )
 
 
-def process_curve(curve: "pandas.Series", **kwargs):
-    estimator = Estimator(**kwargs)
+def process_curve(
+    curve: "pandas.Series",
+    segment_log_n0: bool = False,
+    constrain_n0: bool = False,
+    n0: float = 0.0,
+    unit: str = "hours",
+):
+    curve = normalize_time_unit(curve, unit)
     if curve.isnull().all():
         return AnnotatedGrowthCurve(curve, [], [])
-    return estimator.growth(curve)
+
+    return Estimator(
+        segment_log_n0=segment_log_n0,
+        constrain_n0=constrain_n0,
+        n0=n0,
+    ).growth(curve)
diff --git a/test.py b/test.py
index 5982ebb..1f5edc2 100644
--- a/test.py
+++ b/test.py
@@ -3,7 +3,7 @@
 import numpy
 import pandas
 
-from croissance import process_curve
+from croissance import process_curve, AnnotatedGrowthCurve
 from croissance.estimation import fit_exponential
 from croissance.estimation.util import normalize_time_unit
 from croissance.figures import PDFWriter
@@ -26,6 +26,39 @@ def test_regression_basic(self):
             self.assertAlmostEqual(mu, slope, 6, msg="growth rate (mu)={}".format(mu))
             self.assertTrue(snr > 100000, "signal-noise ratio should be very good")
 
+    def test_process_curve_empty_series(self):
+        result = process_curve(pandas.Series([], dtype="float64"))
+
+        self.assertTrue(result.series.empty)
+        self.assertEqual(result.outliers, [])
+        self.assertEqual(result.growth_phases, [])
+
+    def test_process_curve_null_series(self):
+        curve = pandas.Series(index=[1, 2], data=[None, float("NaN")])
+        result = process_curve(curve)
+
+        self.assertIs(result.series, curve)
+        self.assertEqual(result.outliers, [])
+        self.assertEqual(result.growth_phases, [])
+
+    def test_process_curve_time_unit(self):
+        mu = 0.5
+        pph = 4.0
+        curve = pandas.Series(
+            data=[numpy.exp(mu * i / pph) for i in range(100)],
+            index=[i / pph for i in range(100)],
+        )
+
+        hours = process_curve(curve, unit="hours")
+        minutes = process_curve(
+            pandas.Series(index=curve.index * 60.0, data=curve.values),
+            unit="minutes",
+        )
+
+        self.assertTrue(hours.series.equals(minutes.series))
+        self.assertTrue(hours.outliers.equals(minutes.outliers))
+        self.assertEqual(hours.growth_phases, minutes.growth_phases)
+
     def test_process_curve_basic(self):
         with open("./test.basic.pdf", "wb") as f:
             with PDFWriter(f) as doc:

From 74754e3f108e1d6e624ea0972ce9781f4f1ca96b Mon Sep 17 00:00:00 2001
From: Mikkel Schubert <MikkelSch@gmail.com>
Date: Tue, 24 Nov 2020 14:22:48 +0100
Subject: [PATCH 06/52] initial work on adding proper logging

---
 croissance/estimation/__init__.py | 13 ++++-----
 croissance/main.py                | 46 +++++++++++++++++++++----------
 setup.py                          |  4 +--
 3 files changed, 39 insertions(+), 24 deletions(-)

diff --git a/croissance/estimation/__init__.py b/croissance/estimation/__init__.py
index c0fce5c..f6f174d 100644
--- a/croissance/estimation/__init__.py
+++ b/croissance/estimation/__init__.py
@@ -63,7 +63,7 @@ def __init__(
         constrain_n0: bool = False,
         n0: float = 0.0
     ):
-        self._logger = logging.getLogger(__name__)
+        self._log = logging.getLogger(__name__)
         self._segment_log_n0 = segment_log_n0
         self._constrain_n0 = constrain_n0
         self._n0 = n0
@@ -72,11 +72,8 @@ def _find_growth_phases(
         self, curve: "pandas.Series", window
     ) -> "typing.List[RawGrowthPhase]":
         """
-        Finds growth phases by locating regions in a series where both the first and second derivatives are positive.
-
-        :param curve:
-        :param window:
-        :return:
+        Finds growth phases by locating regions in a series where both the first and
+        second derivatives are positive.
         """
         first_derivative = savitzky_golay(curve, window, 3, deriv=1)
         second_derivative = savitzky_golay(curve, window, 3, deriv=2)
@@ -109,6 +106,7 @@ def growth(self, curve: pandas.Series) -> AnnotatedGrowthCurve:
         )
 
         if n_hours == 0:
+            self._log.warning("Less than 1 data-point/hour; cannot proceed")
             return AnnotatedGrowthCurve(series, [], [])
 
         if n_hours % 2 == 0:
@@ -118,6 +116,7 @@ def growth(self, curve: pandas.Series) -> AnnotatedGrowthCurve:
 
         # NOTE workaround for issue with negative curves
         if len(series[series > 0]) < 3:
+            self._log.warning("Less than 3 data-points post filtering; cannot proceeed")
             return AnnotatedGrowthCurve(series, outliers, [])
 
         if self._segment_log_n0:
@@ -129,8 +128,8 @@ def growth(self, curve: pandas.Series) -> AnnotatedGrowthCurve:
             )
 
         phases = []
-        # give up if there isn't enough data
         if len(smooth_series) < n_hours:
+            self._log.warning("Insufficient data, cannot determine growth")
             return AnnotatedGrowthCurve(series, [], [])
         raw_phases = self._find_growth_phases(smooth_series, window=n_hours)
 
diff --git a/croissance/main.py b/croissance/main.py
index a884025..8e30583 100644
--- a/croissance/main.py
+++ b/croissance/main.py
@@ -1,11 +1,11 @@
 #!/usr/bin/env python
 import argparse
-import collections
+import logging
 import sys
 
 from pathlib import Path
 
-from tqdm import tqdm
+import coloredlogs
 
 from croissance import Estimator
 from croissance.estimation.util import normalize_time_unit
@@ -59,28 +59,50 @@ def parse_args(argv):
         help="Renders a PDF file with figures for each curve",
     )
 
+    group = parser.add_argument_group("Logging")
+    group.add_argument(
+        "--log-level",
+        type=str.upper,
+        default="INFO",
+        choices=("DEBUG", "INFO", "WARNING", "ERROR"),
+        help="Set verbosity of log messages",
+    )
+
     return parser.parse_args(argv)
 
 
+def setup_logging(level):
+    coloredlogs.install(
+        fmt="%(asctime)s %(name)s %(levelname)s %(message)s",
+        level=level,
+    )
+
+    return logging.getLogger("croissance")
+
+
 def main(argv):
     args = parse_args(argv)
-    reader = TSVReader()
+    log = setup_logging(level=args.log_level)
 
+    reader = TSVReader()
     estimator = Estimator(
-        constrain_n0=args.constrain_N0, segment_log_n0=args.segment_log_N0, n0=args.N0
+        constrain_n0=args.constrain_N0,
+        segment_log_n0=args.segment_log_N0,
+        n0=args.N0,
     )
 
-    empty_cells = collections.defaultdict(list)
-    for filepath in tqdm(args.infiles, unit="infile"):
+    for filepath in args.infiles:
+        log.info("Reading curves from '%s", filepath)
         with filepath.open("rt") as infile:
             curves = list(reader.read(infile))
 
         annotated_curves = {}
-        for name, curve in tqdm(curves, unit="curve"):
+        for name, curve in curves:
             if curve.empty:
-                empty_cells[infile.name].append(name)
+                log.warning("Skipping empty curve %r", name)
                 continue
 
+            log.info("Estimating growth for curve %r", name)
             annotated_curves[name] = estimator.growth(
                 normalize_time_unit(curve, args.input_time_unit)
             )
@@ -101,13 +123,7 @@ def main(argv):
                     for name, annotated_curve in annotated_curves.items():
                         figwriter.write(name, annotated_curve)
 
-    print()
-    if empty_cells:
-        print("Empty cells were found and discarded:\n")
-
-        for key, names in empty_cells.items():
-            for name in names:
-                print(key + "\t" + name)
+    log.info("Done ..")
 
 
 def entry_point():
diff --git a/setup.py b/setup.py
index 6f1ee40..f87ed62 100644
--- a/setup.py
+++ b/setup.py
@@ -38,11 +38,11 @@
         "nose>=1.1.2",
     ],
     install_requires=[
+        "coloredlogs>=14.0.0",
+        "matplotlib>=1.4.3",
         "numpy>=1.9.1",
         "pandas>=0.18.0",
         "scipy>=0.14.0",
-        "matplotlib>=1.4.3",
-        "tqdm>=4.11.2",
     ],
     classifiers=[
         "Development Status :: 5 - Production/Stable",

From a5996519865f99359ebcd4c44f156576792f63a4 Mon Sep 17 00:00:00 2001
From: Mikkel Schubert <MikkelSch@gmail.com>
Date: Tue, 24 Nov 2020 14:22:48 +0100
Subject: [PATCH 07/52] rework I/O handling

File handling logic is moved to the individual classes, to make it
easier to add support for other file types with different requirements.
---
 croissance/figures/__init__.py |  10 ++-
 croissance/formats/input.py    |  18 ++--
 croissance/formats/output.py   |  20 +++--
 croissance/main.py             |  21 ++---
 test.py                        | 154 ++++++++++++++++-----------------
 5 files changed, 115 insertions(+), 108 deletions(-)

diff --git a/croissance/figures/__init__.py b/croissance/figures/__init__.py
index 6f204c6..a64f0b8 100644
--- a/croissance/figures/__init__.py
+++ b/croissance/figures/__init__.py
@@ -6,8 +6,9 @@
 
 
 class PDFWriter:
-    def __init__(self, file, include_shifted_exponentials: bool = False):
-        self.doc = PdfPages(file)
+    def __init__(self, filepath, include_shifted_exponentials: bool = False):
+        self._handle = filepath.open("wb")
+        self._doc = PdfPages(self._handle)
         self._include_shifted_exponentials = include_shifted_exponentials
 
     def write(self, name: str, curve: AnnotatedGrowthCurve):
@@ -117,7 +118,7 @@ def gf(x):
             axes[0].set_ylim([max(curve.series.min(), -2.5), curve.series.max()])
             axes[1].set_ylim([0.1, curve.series.max()])
         finally:
-            self.doc.savefig(fig)
+            self._doc.savefig(fig)
             plt.close()
 
     def __enter__(self):
@@ -127,4 +128,5 @@ def __exit__(self, *args, **kwargs):
         self.close()
 
     def close(self):
-        self.doc.close()
+        self._doc.close()
+        self._handle.close()
diff --git a/croissance/formats/input.py b/croissance/formats/input.py
index b631bdc..25b5df8 100644
--- a/croissance/formats/input.py
+++ b/croissance/formats/input.py
@@ -2,11 +2,17 @@
 
 
 class TSVReader:
-    def read(self, file):
-        data = pandas.read_csv(file, sep="\t", header=0, index_col=0)
+    def __init__(self, filepath):
+        self._filepath = filepath
 
-        names = data.columns
+    def read(self):
+        with self._filepath.open("rt") as handle:
+            data = pandas.read_csv(handle, sep="\t", header=0, index_col=0)
 
-        for name in names:
-            curve = data[name].dropna()
-            yield name, curve
+        return [(name, data[name].dropna()) for name in data.columns]
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, *args, **kwargs):
+        pass
diff --git a/croissance/formats/output.py b/croissance/formats/output.py
index 415beee..1aa7ae5 100644
--- a/croissance/formats/output.py
+++ b/croissance/formats/output.py
@@ -4,17 +4,19 @@
 
 
 class TSVWriter:
-    def __init__(self, file, include_default_phase: bool = True):
-        writer = csv.writer(file, delimiter="\t", quoting=csv.QUOTE_MINIMAL)
-        writer.writerow(["name", "phase", "slope", "intercept", "N0", "SNR", "rank"])
+    def __init__(self, filepath, exclude_default_phase: bool = True):
+        self._exclude_default_phase = exclude_default_phase
+        self._handle = filepath.open("wt")
+        self._writer = csv.writer(
+            self._handle, delimiter="\t", quoting=csv.QUOTE_MINIMAL
+        )
 
-        self._writer = writer
-        self._file = file
-        self._include_default_phase = include_default_phase
+        self._writer.writerow(
+            ["name", "phase", "slope", "intercept", "N0", "SNR", "rank"]
+        )
 
     def write(self, name: str, curve: AnnotatedGrowthCurve):
-
-        if self._include_default_phase:
+        if not self._exclude_default_phase:
             phase = GrowthPhase.pick_best(curve.growth_phases, "rank")
 
             if not phase:
@@ -44,4 +46,4 @@ def __exit__(self, *args, **kwargs):
         self.close()
 
     def close(self):
-        self._file.close()
+        self._handle.close()
diff --git a/croissance/main.py b/croissance/main.py
index 8e30583..9ae9609 100644
--- a/croissance/main.py
+++ b/croissance/main.py
@@ -1,5 +1,6 @@
 #!/usr/bin/env python
 import argparse
+import collections
 import logging
 import sys
 
@@ -84,17 +85,17 @@ def main(argv):
     args = parse_args(argv)
     log = setup_logging(level=args.log_level)
 
-    reader = TSVReader()
     estimator = Estimator(
         constrain_n0=args.constrain_N0,
         segment_log_n0=args.segment_log_N0,
         n0=args.N0,
     )
 
+    log.info("Estimating growth curves ..")
     for filepath in args.infiles:
         log.info("Reading curves from '%s", filepath)
-        with filepath.open("rt") as infile:
-            curves = list(reader.read(infile))
+        with TSVReader(filepath) as reader:
+            curves = reader.read()
 
         annotated_curves = {}
         for name, curve in curves:
@@ -108,20 +109,16 @@ def main(argv):
             )
 
         output_filepath = filepath.with_suffix(args.output_suffix + ".tsv")
-        with output_filepath.open("wt") as outfile:
-            outwriter = TSVWriter(
-                outfile, include_default_phase=not args.output_exclude_default_phase
-            )
-
+        with TSVWriter(output_filepath, args.output_exclude_default_phase) as outwriter:
             for name, annotated_curve in annotated_curves.items():
                 outwriter.write(name, annotated_curve)
 
         if args.figures:
             figure_filepath = filepath.with_suffix(args.output_suffix + ".pdf")
-            with figure_filepath.open("wb") as figfile:
-                with PDFWriter(figfile) as figwriter:
-                    for name, annotated_curve in annotated_curves.items():
-                        figwriter.write(name, annotated_curve)
+
+            with PDFWriter(figure_filepath) as figwriter:
+                for name, annotated_curve in annotated_curves.items():
+                    figwriter.write(name, annotated_curve)
 
     log.info("Done ..")
 
diff --git a/test.py b/test.py
index 1f5edc2..ee81438 100644
--- a/test.py
+++ b/test.py
@@ -1,3 +1,4 @@
+from pathlib import Path
 from unittest import TestCase
 
 import numpy
@@ -60,83 +61,82 @@ def test_process_curve_time_unit(self):
         self.assertEqual(hours.growth_phases, minutes.growth_phases)
 
     def test_process_curve_basic(self):
-        with open("./test.basic.pdf", "wb") as f:
-            with PDFWriter(f) as doc:
-                mu = 0.5
-                pph = 4.0
-                curve = pandas.Series(
-                    data=[numpy.exp(mu * i / pph) for i in range(100)],
-                    index=[i / pph for i in range(100)],
-                )
-
-                result = process_curve(curve, constrain_n0=True, n0=0.0)
-
-                # self.assertAlmostEqual(mu, slope, 6, msg='growth rate (mu)={}'.format(mu))
-
-                doc.write("#0 n0=1", result)
-                print(result.growth_phases)
-                self.assertEqual(len(result.growth_phases), 1)
-                self.assertAlmostEqual(mu, result.growth_phases[0].slope, 2)
-                self.assertAlmostEqual(0.0, result.growth_phases[0].n0, 3)
-                self.assertTrue(result.growth_phases[0].SNR > 1000)
-
-                result = process_curve(curve, constrain_n0=False)
-                doc.write("#0 n0=auto", result)
-                print(result.growth_phases)
-                self.assertEqual(len(result.growth_phases), 1)
-                self.assertAlmostEqual(mu, result.growth_phases[0].slope, 2)
-                self.assertTrue(-0.25 < result.growth_phases[0].n0 < 0.25)
-                self.assertTrue(result.growth_phases[0].SNR > 1000)
-
-                curve = pandas.Series(
-                    data=(
-                        [1.0] * 5
-                        + [numpy.exp(mu * i / pph) for i in range(25)]
-                        + [numpy.exp(mu * 24 / pph)] * 20
-                    ),
-                    index=([i / pph for i in range(50)]),
-                )
-
-                result = process_curve(curve, constrain_n0=True, n0=0.0)
-                doc.write("#1 n0=0", result)
-                print(result.growth_phases)
-                self.assertEqual(len(result.growth_phases), 1)
-                self.assertAlmostEqual(mu, result.growth_phases[0].slope, 2)
-                self.assertAlmostEqual(0.0, result.growth_phases[0].n0, 3)
-                self.assertTrue(result.growth_phases[0].SNR > 1000)
-
-                result = process_curve(curve, constrain_n0=False)
-                doc.write("#1 n0=auto", result)
-                print(result.growth_phases)
-                self.assertEqual(len(result.growth_phases), 1)
-                self.assertAlmostEqual(mu, result.growth_phases[0].slope, 1)
-                self.assertTrue(-0.25 < result.growth_phases[0].n0 < 0.25)
-                self.assertTrue(result.growth_phases[0].SNR > 1000)
-
-                mu = 0.20
-                curve = pandas.Series(
-                    data=(
-                        [i / 10.0 for i in range(10)]
-                        + [numpy.exp(mu * i / pph) for i in range(25)]
-                        + [numpy.exp(mu * 24 / pph)] * 15
-                    ),
-                    index=([i / pph for i in range(50)]),
-                )
-
-                result = process_curve(curve, constrain_n0=True, n0=0.0)
-                doc.write("#2 n0=0", result)
-                print(result.growth_phases)
-                self.assertEqual(len(result.growth_phases), 1)
-                self.assertAlmostEqual(mu, result.growth_phases[0].slope, 2)
-                self.assertAlmostEqual(0.0, result.growth_phases[0].n0, 6)
-                self.assertTrue(result.growth_phases[0].SNR > 1000)
-
-                result = process_curve(curve, constrain_n0=False)
-                doc.write("#2 n0=auto", result)
-                self.assertEqual(len(result.growth_phases), 1)
-                self.assertAlmostEqual(mu, result.growth_phases[0].slope, 2)
-                self.assertTrue(-0.25 < result.growth_phases[0].n0 < 0.25)
-                self.assertTrue(result.growth_phases[0].SNR > 1000)
+        with PDFWriter(Path("test.basic.pdf")) as doc:
+            mu = 0.5
+            pph = 4.0
+            curve = pandas.Series(
+                data=[numpy.exp(mu * i / pph) for i in range(100)],
+                index=[i / pph for i in range(100)],
+            )
+
+            result = process_curve(curve, constrain_n0=True, n0=0.0)
+
+            # self.assertAlmostEqual(mu, slope, 6, msg='growth rate (mu)={}'.format(mu))
+
+            doc.write("#0 n0=1", result)
+            print(result.growth_phases)
+            self.assertEqual(len(result.growth_phases), 1)
+            self.assertAlmostEqual(mu, result.growth_phases[0].slope, 2)
+            self.assertAlmostEqual(0.0, result.growth_phases[0].n0, 3)
+            self.assertTrue(result.growth_phases[0].SNR > 1000)
+
+            result = process_curve(curve, constrain_n0=False)
+            doc.write("#0 n0=auto", result)
+            print(result.growth_phases)
+            self.assertEqual(len(result.growth_phases), 1)
+            self.assertAlmostEqual(mu, result.growth_phases[0].slope, 2)
+            self.assertTrue(-0.25 < result.growth_phases[0].n0 < 0.25)
+            self.assertTrue(result.growth_phases[0].SNR > 1000)
+
+            curve = pandas.Series(
+                data=(
+                    [1.0] * 5
+                    + [numpy.exp(mu * i / pph) for i in range(25)]
+                    + [numpy.exp(mu * 24 / pph)] * 20
+                ),
+                index=([i / pph for i in range(50)]),
+            )
+
+            result = process_curve(curve, constrain_n0=True, n0=0.0)
+            doc.write("#1 n0=0", result)
+            print(result.growth_phases)
+            self.assertEqual(len(result.growth_phases), 1)
+            self.assertAlmostEqual(mu, result.growth_phases[0].slope, 2)
+            self.assertAlmostEqual(0.0, result.growth_phases[0].n0, 3)
+            self.assertTrue(result.growth_phases[0].SNR > 1000)
+
+            result = process_curve(curve, constrain_n0=False)
+            doc.write("#1 n0=auto", result)
+            print(result.growth_phases)
+            self.assertEqual(len(result.growth_phases), 1)
+            self.assertAlmostEqual(mu, result.growth_phases[0].slope, 1)
+            self.assertTrue(-0.25 < result.growth_phases[0].n0 < 0.25)
+            self.assertTrue(result.growth_phases[0].SNR > 1000)
+
+            mu = 0.20
+            curve = pandas.Series(
+                data=(
+                    [i / 10.0 for i in range(10)]
+                    + [numpy.exp(mu * i / pph) for i in range(25)]
+                    + [numpy.exp(mu * 24 / pph)] * 15
+                ),
+                index=([i / pph for i in range(50)]),
+            )
+
+            result = process_curve(curve, constrain_n0=True, n0=0.0)
+            doc.write("#2 n0=0", result)
+            print(result.growth_phases)
+            self.assertEqual(len(result.growth_phases), 1)
+            self.assertAlmostEqual(mu, result.growth_phases[0].slope, 2)
+            self.assertAlmostEqual(0.0, result.growth_phases[0].n0, 6)
+            self.assertTrue(result.growth_phases[0].SNR > 1000)
+
+            result = process_curve(curve, constrain_n0=False)
+            doc.write("#2 n0=auto", result)
+            self.assertEqual(len(result.growth_phases), 1)
+            self.assertAlmostEqual(mu, result.growth_phases[0].slope, 2)
+            self.assertTrue(-0.25 < result.growth_phases[0].n0 < 0.25)
+            self.assertTrue(result.growth_phases[0].SNR > 1000)
 
     def test_process_curve_wrong_time_unit(self):
         mu = 0.5

From 5ef906aacdef14f2ba4174c6863036445b2fa05e Mon Sep 17 00:00:00 2001
From: Mikkel Schubert <MikkelSch@gmail.com>
Date: Tue, 24 Nov 2020 14:22:48 +0100
Subject: [PATCH 08/52] run tests using tox/pytest

---
 setup.py |  4 ----
 tox.ini  | 15 +++++++++++++++
 2 files changed, 15 insertions(+), 4 deletions(-)
 create mode 100644 tox.ini

diff --git a/setup.py b/setup.py
index f87ed62..762e29c 100644
--- a/setup.py
+++ b/setup.py
@@ -33,10 +33,6 @@
             "croissance = croissance.main:entry_point",
         ],
     },
-    test_suite="nose.collector",
-    tests_require=[
-        "nose>=1.1.2",
-    ],
     install_requires=[
         "coloredlogs>=14.0.0",
         "matplotlib>=1.4.3",
diff --git a/tox.ini b/tox.ini
new file mode 100644
index 0000000..a86f9b8
--- /dev/null
+++ b/tox.ini
@@ -0,0 +1,15 @@
+# Tox (http://tox.testrun.org/) is a tool for running tests
+# in multiple virtualenvs. This configuration file will run the
+# test suite on all supported python versions. To use it, "pip install tox"
+# and then run "tox" from this directory.
+
+[tox]
+envlist = py3
+
+[testenv]
+commands =
+    pytest test.py
+
+deps =
+    pytest
+    nose >= 1.1.2

From 6e907df0796c5bf36cc7b4536093960fcb2f87d0 Mon Sep 17 00:00:00 2001
From: Mikkel Schubert <MikkelSch@gmail.com>
Date: Tue, 24 Nov 2020 14:22:48 +0100
Subject: [PATCH 09/52] update gitignore

---
 .gitignore | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/.gitignore b/.gitignore
index 514a598..56ec7f4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,8 +4,7 @@
 *.so
 
 # Packages
-*.egg
-*.egg-info
+*.egg*
 dist
 build
 eggs
@@ -18,6 +17,7 @@ develop-eggs
 lib
 lib64
 __pycache__
+venv
 
 # Installer logs
 pip-log.txt
@@ -35,8 +35,9 @@ nosetests.xml
 .project
 .pydevproject
 
-# PyCharm
+# IDEs
 .idea
+.vscode
 
 # Test output
 test.*.pdf
\ No newline at end of file

From d44402c754ddd8a655423d319cfeff9de7b89b96 Mon Sep 17 00:00:00 2001
From: Mikkel Schubert <MikkelSch@gmail.com>
Date: Tue, 24 Nov 2020 14:22:48 +0100
Subject: [PATCH 10/52] split plotting into plotting and pdf writing

---
 croissance/figures/__init__.py | 132 ---------------------------------
 croissance/figures/plot.py     | 115 ++++++++++++++++++++++++++++
 croissance/figures/writer.py   |  35 +++++++++
 3 files changed, 150 insertions(+), 132 deletions(-)
 create mode 100644 croissance/figures/plot.py
 create mode 100644 croissance/figures/writer.py

diff --git a/croissance/figures/__init__.py b/croissance/figures/__init__.py
index a64f0b8..e69de29 100644
--- a/croissance/figures/__init__.py
+++ b/croissance/figures/__init__.py
@@ -1,132 +0,0 @@
-import numpy
-from matplotlib.backends.backend_pdf import PdfPages
-import matplotlib.pyplot as plt
-
-from croissance.estimation import AnnotatedGrowthCurve
-
-
-class PDFWriter:
-    def __init__(self, filepath, include_shifted_exponentials: bool = False):
-        self._handle = filepath.open("wb")
-        self._doc = PdfPages(self._handle)
-        self._include_shifted_exponentials = include_shifted_exponentials
-
-    def write(self, name: str, curve: AnnotatedGrowthCurve):
-
-        fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(16, 16))
-        axes[1].set_yscale("log")
-
-        try:
-            plt.title(name)
-
-            if curve.series.max() <= 0:
-                return
-
-            for axis in axes:
-                axis.plot(
-                    curve.series.index,
-                    curve.series.values,
-                    color="black",
-                    marker=".",
-                    markersize=5,
-                    linestyle="None",
-                )
-
-                axis.plot(
-                    curve.outliers.index,
-                    curve.outliers.values,
-                    color="red",
-                    marker=".",
-                    markersize=5,
-                    linestyle="None",
-                )
-
-            colors = ["b", "g", "c", "m", "y", "k"]
-
-            for i, phase in enumerate(curve.growth_phases):
-                color = colors[i % len(colors)]
-
-                # if phase['exclude'] is True:
-                #     color = 'gray'
-
-                a = 1 / numpy.exp(phase.intercept * phase.slope)
-
-                def gf(x):
-                    return a * numpy.exp(phase.slope * x) + phase.n0
-
-                phase_series = curve.series[phase.start : phase.end]
-
-                for axis in axes:
-                    if self._include_shifted_exponentials:
-                        axis.plot(
-                            phase_series.index,
-                            phase_series.values - phase.n0,
-                            marker=".",
-                            linewidth=0,
-                            color="red",
-                        )
-
-                        axis.plot(
-                            phase_series.index,
-                            gf(phase_series.index) - phase.n0,
-                            marker=None,
-                            linewidth=2,
-                            color="red",
-                        )
-
-                    axis.axhline(
-                        y=phase.n0,
-                        marker=None,
-                        linewidth=1,
-                        linestyle="dashed",
-                        color=color,
-                    )
-                    axis.axvline(
-                        x=phase.intercept,
-                        marker=None,
-                        linewidth=1,
-                        linestyle="dashed",
-                        color=color,
-                    )
-
-                    axis.plot(
-                        phase_series.index,
-                        phase_series.values,
-                        marker=None,
-                        linewidth=15,
-                        color=color,
-                        solid_capstyle="round",
-                        alpha=0.2,
-                    )
-
-                    axis.plot(
-                        curve.series.index,
-                        gf(curve.series.index),
-                        color=color,
-                        linewidth=1,
-                    )
-                    axis.plot(
-                        phase_series.index,
-                        gf(phase_series.index),
-                        color=color,
-                        linewidth=2,
-                    )
-
-            for axis in axes:
-                axis.set_xlim(curve.series.index[0], curve.series.index[-1])
-
-            axes[0].set_ylim([max(curve.series.min(), -2.5), curve.series.max()])
-            axes[1].set_ylim([0.1, curve.series.max()])
-        finally:
-            self._doc.savefig(fig)
-            plt.close()
-
-    def __enter__(self):
-        return self
-
-    def __exit__(self, *args, **kwargs):
-        self.close()
-
-    def close(self):
-        self._doc.close()
-        self._handle.close()
diff --git a/croissance/figures/plot.py b/croissance/figures/plot.py
new file mode 100644
index 0000000..fafa370
--- /dev/null
+++ b/croissance/figures/plot.py
@@ -0,0 +1,115 @@
+import matplotlib.pyplot as plt
+import numpy
+
+from croissance.estimation import AnnotatedGrowthCurve
+
+
+def plot_processed_curve(
+    curve: AnnotatedGrowthCurve,
+    name: str,
+    figsize=(16, 16),
+    include_shifted_exponentials: bool = False,
+):
+    fig, axes = plt.subplots(nrows=2, ncols=1, figsize=figsize)
+
+    axes[1].set_yscale("log")
+
+    fig.suptitle(name)
+
+    if curve.series.max() <= 0:
+        return
+
+    for axis in axes:
+        axis.plot(
+            curve.series.index,
+            curve.series.values,
+            color="black",
+            marker=".",
+            markersize=5,
+            linestyle="None",
+        )
+
+        axis.plot(
+            curve.outliers.index,
+            curve.outliers.values,
+            color="red",
+            marker=".",
+            markersize=5,
+            linestyle="None",
+        )
+
+    colors = ["b", "g", "c", "m", "y", "k"]
+
+    for i, phase in enumerate(curve.growth_phases):
+        color = colors[i % len(colors)]
+
+        a = 1 / numpy.exp(phase.intercept * phase.slope)
+
+        def gf(x):
+            return a * numpy.exp(phase.slope * x) + phase.n0
+
+        phase_series = curve.series[phase.start : phase.end]
+
+        for axis in axes:
+            if include_shifted_exponentials:
+                axis.plot(
+                    phase_series.index,
+                    phase_series.values - phase.n0,
+                    marker=".",
+                    linewidth=0,
+                    color="red",
+                )
+
+                axis.plot(
+                    phase_series.index,
+                    gf(phase_series.index) - phase.n0,
+                    marker=None,
+                    linewidth=2,
+                    color="red",
+                )
+
+            axis.axhline(
+                y=phase.n0,
+                marker=None,
+                linewidth=1,
+                linestyle="dashed",
+                color=color,
+            )
+            axis.axvline(
+                x=phase.intercept,
+                marker=None,
+                linewidth=1,
+                linestyle="dashed",
+                color=color,
+            )
+
+            axis.plot(
+                phase_series.index,
+                phase_series.values,
+                marker=None,
+                linewidth=15,
+                color=color,
+                solid_capstyle="round",
+                alpha=0.2,
+            )
+
+            axis.plot(
+                curve.series.index,
+                gf(curve.series.index),
+                color=color,
+                linewidth=1,
+            )
+            axis.plot(
+                phase_series.index,
+                gf(phase_series.index),
+                color=color,
+                linewidth=2,
+            )
+
+    for axis in axes:
+        axis.set_xlim(curve.series.index[0], curve.series.index[-1])
+
+    axes[0].set_ylim([max(curve.series.min(), -2.5), curve.series.max()])
+    axes[1].set_ylim([0.1, curve.series.max()])
+
+    return fig, axes
diff --git a/croissance/figures/writer.py b/croissance/figures/writer.py
new file mode 100644
index 0000000..f3ad4f3
--- /dev/null
+++ b/croissance/figures/writer.py
@@ -0,0 +1,35 @@
+import matplotlib.pyplot as plt
+
+from matplotlib.backends.backend_pdf import PdfPages
+
+from croissance.estimation import AnnotatedGrowthCurve
+from croissance.figures.plot import plot_processed_curve
+
+
+class PDFWriter:
+    def __init__(self, filepath, include_shifted_exponentials: bool = False):
+        self._handle = filepath.open("wb")
+        self._doc = PdfPages(self._handle)
+        self._include_shifted_exponentials = include_shifted_exponentials
+
+    def write(self, name: str, curve: AnnotatedGrowthCurve):
+        fig, _axes = plot_processed_curve(
+            curve=curve,
+            name=name,
+            include_shifted_exponentials=self._include_shifted_exponentials,
+        )
+
+        try:
+            self._doc.savefig(fig)
+        finally:
+            plt.close(fig)
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, *args, **kwargs):
+        self.close()
+
+    def close(self):
+        self._doc.close()
+        self._handle.close()

From 3d91a98d2b3b5a8ff7ca1ab33bceea4e638404f1 Mon Sep 17 00:00:00 2001
From: Mikkel Schubert <MikkelSch@gmail.com>
Date: Tue, 24 Nov 2020 14:22:48 +0100
Subject: [PATCH 11/52] expose plotting function to library users

This fixes #7
---
 croissance/__init__.py | 25 ++++++++++++++++++++-----
 1 file changed, 20 insertions(+), 5 deletions(-)

diff --git a/croissance/__init__.py b/croissance/__init__.py
index db9ad1a..92096db 100644
--- a/croissance/__init__.py
+++ b/croissance/__init__.py
@@ -1,12 +1,13 @@
-from collections import namedtuple
+import croissance.figures.plot
 
-from croissance.estimation import Estimator
+from croissance.estimation import Estimator, AnnotatedGrowthCurve
 from croissance.estimation.util import normalize_time_unit
 
 
-AnnotatedGrowthCurve = namedtuple(
-    "AnnotatedGrowthCurve", ("series", "outliers", "growth_phases")
-)
+__all__ = [
+    "plot_processed_curve",
+    "process_curve",
+]
 
 
 def process_curve(
@@ -25,3 +26,17 @@ def process_curve(
         constrain_n0=constrain_n0,
         n0=n0,
     ).growth(curve)
+
+
+def plot_processed_curve(
+    curve: AnnotatedGrowthCurve,
+    name: str = None,
+    figsize=(10, 10),
+    include_shifted_exponentials: bool = False,
+):
+    return croissance.figures.plot.plot_processed_curve(
+        curve=curve,
+        name=name,
+        figsize=figsize,
+        include_shifted_exponentials=include_shifted_exponentials,
+    )

From 020af1c0392297f1c96e203b3794f9dbacee5a5a Mon Sep 17 00:00:00 2001
From: Mikkel Schubert <MikkelSch@gmail.com>
Date: Tue, 24 Nov 2020 14:22:48 +0100
Subject: [PATCH 12/52] allow non-Path filepaths

---
 croissance/figures/writer.py | 2 +-
 croissance/formats/input.py  | 2 +-
 croissance/formats/output.py | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/croissance/figures/writer.py b/croissance/figures/writer.py
index f3ad4f3..ef8b774 100644
--- a/croissance/figures/writer.py
+++ b/croissance/figures/writer.py
@@ -8,7 +8,7 @@
 
 class PDFWriter:
     def __init__(self, filepath, include_shifted_exponentials: bool = False):
-        self._handle = filepath.open("wb")
+        self._handle = open(filepath, "wb")
         self._doc = PdfPages(self._handle)
         self._include_shifted_exponentials = include_shifted_exponentials
 
diff --git a/croissance/formats/input.py b/croissance/formats/input.py
index 25b5df8..1ce52a0 100644
--- a/croissance/formats/input.py
+++ b/croissance/formats/input.py
@@ -6,7 +6,7 @@ def __init__(self, filepath):
         self._filepath = filepath
 
     def read(self):
-        with self._filepath.open("rt") as handle:
+        with open(self._filepath, "rt") as handle:
             data = pandas.read_csv(handle, sep="\t", header=0, index_col=0)
 
         return [(name, data[name].dropna()) for name in data.columns]
diff --git a/croissance/formats/output.py b/croissance/formats/output.py
index 1aa7ae5..653ee1d 100644
--- a/croissance/formats/output.py
+++ b/croissance/formats/output.py
@@ -6,7 +6,7 @@
 class TSVWriter:
     def __init__(self, filepath, exclude_default_phase: bool = True):
         self._exclude_default_phase = exclude_default_phase
-        self._handle = filepath.open("wt")
+        self._handle = open(filepath, "wt")
         self._writer = csv.writer(
             self._handle, delimiter="\t", quoting=csv.QUOTE_MINIMAL
         )

From 2f1305e7b5d9396f6602b2cf5e5a54585652232e Mon Sep 17 00:00:00 2001
From: Mikkel Schubert <MikkelSch@gmail.com>
Date: Tue, 24 Nov 2020 14:22:48 +0100
Subject: [PATCH 13/52] fix import paths and remove unused imports

---
 croissance/main.py | 3 +--
 test.py            | 4 ++--
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/croissance/main.py b/croissance/main.py
index 9ae9609..2dc8bb7 100644
--- a/croissance/main.py
+++ b/croissance/main.py
@@ -1,6 +1,5 @@
 #!/usr/bin/env python
 import argparse
-import collections
 import logging
 import sys
 
@@ -10,7 +9,7 @@
 
 from croissance import Estimator
 from croissance.estimation.util import normalize_time_unit
-from croissance.figures import PDFWriter
+from croissance.figures.writer import PDFWriter
 from croissance.formats.input import TSVReader
 from croissance.formats.output import TSVWriter
 
diff --git a/test.py b/test.py
index ee81438..9f71d47 100644
--- a/test.py
+++ b/test.py
@@ -4,10 +4,10 @@
 import numpy
 import pandas
 
-from croissance import process_curve, AnnotatedGrowthCurve
+from croissance import process_curve
 from croissance.estimation import fit_exponential
 from croissance.estimation.util import normalize_time_unit
-from croissance.figures import PDFWriter
+from croissance.figures.writer import PDFWriter
 
 
 class CroissanceTestCase(TestCase):

From 01038792d0e4f13381cc99deef6028632cd1c6c2 Mon Sep 17 00:00:00 2001
From: Mikkel Schubert <MikkelSch@gmail.com>
Date: Tue, 24 Nov 2020 14:22:48 +0100
Subject: [PATCH 14/52] remove dead code

---
 croissance/estimation/__init__.py           |  8 +-------
 croissance/estimation/smoothing/__init__.py | 13 -------------
 2 files changed, 1 insertion(+), 20 deletions(-)

diff --git a/croissance/estimation/__init__.py b/croissance/estimation/__init__.py
index f6f174d..8642033 100644
--- a/croissance/estimation/__init__.py
+++ b/croissance/estimation/__init__.py
@@ -10,14 +10,8 @@
 from croissance.estimation.outliers import remove_outliers
 from croissance.estimation.ranking import rank_phases
 from croissance.estimation.regression import fit_exponential
-from croissance.estimation.smoothing import median_smoothing
 from croissance.estimation.smoothing.segments import segment_spline_smoothing
-from croissance.estimation.util import (
-    savitzky_golay,
-    resample,
-    with_overhangs,
-    points_per_hour,
-)
+from croissance.estimation.util import savitzky_golay, points_per_hour
 
 
 class RawGrowthPhase(namedtuple("RawGrowthPhase", ("start", "end"))):
diff --git a/croissance/estimation/smoothing/__init__.py b/croissance/estimation/smoothing/__init__.py
index 6a4c1e6..e69de29 100644
--- a/croissance/estimation/smoothing/__init__.py
+++ b/croissance/estimation/smoothing/__init__.py
@@ -1,13 +0,0 @@
-import pandas
-
-from croissance.estimation.util import with_overhangs
-
-
-def median_smoothing(series, window=10):
-    data = pandas.rolling_apply(
-        with_overhangs(series.values, window // 2),
-        window,
-        median_window_clean,
-        center=True,
-    )[window // 2 : -window // 2]
-    return pandas.Series(index=series.index, data=data)

From 341835b6765c38b15307808282768ee4bd96fbe9 Mon Sep 17 00:00:00 2001
From: Mikkel Schubert <MikkelSch@gmail.com>
Date: Tue, 24 Nov 2020 14:22:48 +0100
Subject: [PATCH 15/52] simplify ranking calculation

---
 croissance/estimation/ranking.py | 34 ++++++++------------------------
 1 file changed, 8 insertions(+), 26 deletions(-)

diff --git a/croissance/estimation/ranking.py b/croissance/estimation/ranking.py
index 5b9def5..262b5d9 100644
--- a/croissance/estimation/ranking.py
+++ b/croissance/estimation/ranking.py
@@ -1,38 +1,20 @@
-from operator import itemgetter
-
-import numpy
+from operator import attrgetter
 
 
 def score(values, q):
-    return (q - numpy.min(values)) / (numpy.max(values) - numpy.min(values)) * 100
+    return (q - min(values)) / (max(values) - min(values)) * 100
 
 
 def rank_phases(phases, weights, thresholds):
     values = {}
-    scores = []
-
     for attribute in weights:
         values[attribute] = [getattr(phase, attribute) for phase in phases]
+        values[attribute].append(thresholds[attribute])
 
     for phase in phases:
-        scores.append(
-            (
-                sum(
-                    weight
-                    * score(
-                        [thresholds[attribute]] + values[attribute],
-                        getattr(phase, attribute),
-                    )
-                    for attribute, weight in weights.items()
-                )
-                / sum(weights.values()),
-                phase,
-            )
-        )
-
-    ranked_phases = []
-    for rank, phase in sorted(scores, key=itemgetter(0), reverse=True):
-        phase.attributes["rank"] = rank
-        ranked_phases.append(phase)
+        phase.attributes["rank"] = sum(
+            weight * score(values[attribute], getattr(phase, attribute))
+            for attribute, weight in weights.items()
+        ) / sum(weights.values())
 
-    return ranked_phases
+    return sorted(phases, key=attrgetter("rank"), reverse=True)

From fb872b227e31faa8d43bb6459dc2143d3e1784ea Mon Sep 17 00:00:00 2001
From: Mikkel Schubert <MikkelSch@gmail.com>
Date: Tue, 24 Nov 2020 14:22:48 +0100
Subject: [PATCH 16/52] add start/end positions to phase table

---
 croissance/formats/output.py | 42 +++++++++++++++++++-----------------
 1 file changed, 22 insertions(+), 20 deletions(-)

diff --git a/croissance/formats/output.py b/croissance/formats/output.py
index 653ee1d..0900be8 100644
--- a/croissance/formats/output.py
+++ b/croissance/formats/output.py
@@ -12,32 +12,34 @@ def __init__(self, filepath, exclude_default_phase: bool = True):
         )
 
         self._writer.writerow(
-            ["name", "phase", "slope", "intercept", "N0", "SNR", "rank"]
+            ["name", "phase", "start", "end", "slope", "intercept", "N0", "SNR", "rank"]
         )
 
     def write(self, name: str, curve: AnnotatedGrowthCurve):
         if not self._exclude_default_phase:
             phase = GrowthPhase.pick_best(curve.growth_phases, "rank")
+            if phase is None:
+                phase = GrowthPhase(None, None, None, None, None, {})
 
-            if not phase:
-                self._writer.writerow([name, 0, None, None, None])
-            else:
-                self._writer.writerow(
-                    [
-                        name,
-                        0,
-                        phase.slope,
-                        phase.intercept,
-                        phase.n0,
-                        phase.SNR,
-                        phase.rank,
-                    ]
-                )
-
-        for i, phase in enumerate(curve.growth_phases, start=1):
-            self._writer.writerow(
-                [name, i, phase.slope, phase.intercept, phase.n0, phase.SNR, phase.rank]
-            )
+            self._write_phase(name, 0, phase)
+
+        for idx, phase in enumerate(curve.growth_phases, start=1):
+            self._write_phase(name, idx, phase)
+
+    def _write_phase(self, name, idx, phase):
+        self._writer.writerow(
+            [
+                name,
+                idx,
+                phase.start,
+                phase.end,
+                phase.slope,
+                phase.intercept,
+                phase.n0,
+                phase.SNR,
+                phase.rank,
+            ]
+        )
 
     def __enter__(self):
         return self

From 7a9d832dd9e4c1c53ed6945990c7e5380814ef37 Mon Sep 17 00:00:00 2001
From: Mikkel Schubert <MikkelSch@gmail.com>
Date: Tue, 24 Nov 2020 14:22:48 +0100
Subject: [PATCH 17/52] simplify GrowthPhase class

Values stored in the 'attributes' dict are made proper attributes in
order to get rid of __getattr__ implemention that results in inconstent
behavior when accessing missing attributes.
---
 croissance/estimation/__init__.py | 27 +++++++++++++++++----------
 croissance/estimation/ranking.py  |  7 +++++--
 croissance/formats/output.py      |  2 +-
 3 files changed, 23 insertions(+), 13 deletions(-)

diff --git a/croissance/estimation/__init__.py b/croissance/estimation/__init__.py
index 8642033..9e838fc 100644
--- a/croissance/estimation/__init__.py
+++ b/croissance/estimation/__init__.py
@@ -1,4 +1,5 @@
 import logging
+
 from collections import namedtuple
 from operator import attrgetter
 
@@ -24,7 +25,7 @@ def duration(self):
 
 class GrowthPhase(
     namedtuple(
-        "GrowthPhase", ("start", "end", "slope", "intercept", "n0", "attributes")
+        "GrowthPhase", ("start", "end", "slope", "intercept", "n0", "SNR", "rank")
     )
 ):
     __slots__ = ()
@@ -33,15 +34,13 @@ class GrowthPhase(
     def duration(self):
         return self.end - self.start
 
-    @classmethod
-    def pick_best(cls, growth_phases, metric="duration"):
-        try:
-            return sorted(growth_phases, key=attrgetter(metric), reverse=True)[0]
-        except IndexError:
-            return None
+    @staticmethod
+    def pick_best(growth_phases, metric="duration"):
+        growth_phases = sorted(growth_phases, key=attrgetter(metric))
+        if growth_phases:
+            return growth_phases[-1]
 
-    def __getattr__(self, item):
-        return self.attributes.get(item, None)
+        return None
 
 
 AnnotatedGrowthCurve = namedtuple(
@@ -161,7 +160,15 @@ def growth(self, curve: pandas.Series) -> AnnotatedGrowthCurve:
                 continue
 
             phases.append(
-                GrowthPhase(phase.start, phase.end, slope, intercept, n0, {"SNR": snr})
+                GrowthPhase(
+                    start=phase.start,
+                    end=phase.end,
+                    slope=slope,
+                    intercept=intercept,
+                    n0=n0,
+                    SNR=snr,
+                    rank=None,
+                )
             )
 
         ranked_phases = rank_phases(
diff --git a/croissance/estimation/ranking.py b/croissance/estimation/ranking.py
index 262b5d9..5e1c498 100644
--- a/croissance/estimation/ranking.py
+++ b/croissance/estimation/ranking.py
@@ -11,10 +11,13 @@ def rank_phases(phases, weights, thresholds):
         values[attribute] = [getattr(phase, attribute) for phase in phases]
         values[attribute].append(thresholds[attribute])
 
+    ranked_phases = []
     for phase in phases:
-        phase.attributes["rank"] = sum(
+        rank = sum(
             weight * score(values[attribute], getattr(phase, attribute))
             for attribute, weight in weights.items()
         ) / sum(weights.values())
 
-    return sorted(phases, key=attrgetter("rank"), reverse=True)
+        ranked_phases.append(phase._replace(rank=rank))
+
+    return sorted(ranked_phases, key=attrgetter("rank"), reverse=True)
diff --git a/croissance/formats/output.py b/croissance/formats/output.py
index 0900be8..5d3856b 100644
--- a/croissance/formats/output.py
+++ b/croissance/formats/output.py
@@ -19,7 +19,7 @@ def write(self, name: str, curve: AnnotatedGrowthCurve):
         if not self._exclude_default_phase:
             phase = GrowthPhase.pick_best(curve.growth_phases, "rank")
             if phase is None:
-                phase = GrowthPhase(None, None, None, None, None, {})
+                phase = GrowthPhase(None, None, None, None, None, None, None)
 
             self._write_phase(name, 0, phase)
 

From 58aceabf3a677dc7a8c3baa2b4b7edc6931fae84 Mon Sep 17 00:00:00 2001
From: Mikkel Schubert <MikkelSch@gmail.com>
Date: Tue, 24 Nov 2020 14:22:48 +0100
Subject: [PATCH 18/52] simple parallelization of growth rate estimations

---
 croissance/estimation/__init__.py | 14 +++---
 croissance/main.py                | 77 +++++++++++++++++++++++--------
 2 files changed, 65 insertions(+), 26 deletions(-)

diff --git a/croissance/estimation/__init__.py b/croissance/estimation/__init__.py
index 9e838fc..14cda3c 100644
--- a/croissance/estimation/__init__.py
+++ b/croissance/estimation/__init__.py
@@ -91,7 +91,9 @@ def _find_growth_phases(
 
         return phases
 
-    def growth(self, curve: pandas.Series) -> AnnotatedGrowthCurve:
+    def growth(
+        self, curve: pandas.Series, name: str = "untitled curve"
+    ) -> AnnotatedGrowthCurve:
         series = curve.dropna()
 
         n_hours = int(
@@ -99,7 +101,7 @@ def growth(self, curve: pandas.Series) -> AnnotatedGrowthCurve:
         )
 
         if n_hours == 0:
-            self._log.warning("Less than 1 data-point/hour; cannot proceed")
+            self._log.warning("Fewer than one Data-points/hour for %s", name)
             return AnnotatedGrowthCurve(series, [], [])
 
         if n_hours % 2 == 0:
@@ -109,20 +111,18 @@ def growth(self, curve: pandas.Series) -> AnnotatedGrowthCurve:
 
         # NOTE workaround for issue with negative curves
         if len(series[series > 0]) < 3:
-            self._log.warning("Less than 3 data-points post filtering; cannot proceeed")
+            self._log.warning("Fewer than three positive data-points for %s", name)
             return AnnotatedGrowthCurve(series, outliers, [])
 
         if self._segment_log_n0:
             series_log_n0 = numpy.log(series - self._n0).dropna()
             smooth_series = segment_spline_smoothing(series, series_log_n0)
         else:
-            smooth_series = segment_spline_smoothing(
-                series,
-            )
+            smooth_series = segment_spline_smoothing(series)
 
         phases = []
         if len(smooth_series) < n_hours:
-            self._log.warning("Insufficient data, cannot determine growth")
+            self._log.warning("Insufficient smoothed data for %s", name)
             return AnnotatedGrowthCurve(series, [], [])
         raw_phases = self._find_growth_phases(smooth_series, window=n_hours)
 
diff --git a/croissance/main.py b/croissance/main.py
index 2dc8bb7..2707029 100644
--- a/croissance/main.py
+++ b/croissance/main.py
@@ -1,6 +1,8 @@
 #!/usr/bin/env python
 import argparse
 import logging
+import multiprocessing
+import signal
 import sys
 
 from pathlib import Path
@@ -14,6 +16,30 @@
 from croissance.formats.output import TSVWriter
 
 
+class EstimatorWrapper:
+    def __init__(self, args):
+        self.estimator = Estimator(
+            constrain_n0=args.constrain_N0,
+            segment_log_n0=args.segment_log_N0,
+            n0=args.N0,
+        )
+
+        self.input_time_unit = args.input_time_unit
+
+    def __call__(self, values):
+        filepath, idx, name, curve = values
+
+        normalized_curve = normalize_time_unit(curve, self.input_time_unit)
+        annotated_curve = self.estimator.growth(normalized_curve, name)
+
+        return (filepath, idx, name, annotated_curve)
+
+
+def init_worker():
+    # Ensure that KeyboardInterrupt exceptions only occur in the main thread
+    signal.signal(signal.SIGINT, signal.SIG_IGN)
+
+
 def parse_args(argv):
     parser = argparse.ArgumentParser(
         description="Estimate growth rates in growth curves",
@@ -59,6 +85,13 @@ def parse_args(argv):
         help="Renders a PDF file with figures for each curve",
     )
 
+    parser.add_argument(
+        "--threads",
+        type=int,
+        default=1,
+        help="Max number of threads to use during growth estimation",
+    )
+
     group = parser.add_argument_group("Logging")
     group.add_argument(
         "--log-level",
@@ -84,39 +117,45 @@ def main(argv):
     args = parse_args(argv)
     log = setup_logging(level=args.log_level)
 
-    estimator = Estimator(
-        constrain_n0=args.constrain_N0,
-        segment_log_n0=args.segment_log_N0,
-        n0=args.N0,
-    )
-
-    log.info("Estimating growth curves ..")
+    curves = []
     for filepath in args.infiles:
         log.info("Reading curves from '%s", filepath)
         with TSVReader(filepath) as reader:
-            curves = reader.read()
+            for idx, (name, curve) in enumerate(reader.read()):
+                if curve.empty:
+                    log.warning("Skipping empty curve %r", name)
+                    continue
+
+                curves.append((filepath, idx, name, curve))
+    log.info("Collected a total of %i growth curves", len(curves))
 
-        annotated_curves = {}
-        for name, curve in curves:
-            if curve.empty:
-                log.warning("Skipping empty curve %r", name)
-                continue
+    # Dont spawn more processes than tasks
+    args.threads = max(1, min(args.threads, len(curves)))
 
-            log.info("Estimating growth for curve %r", name)
-            annotated_curves[name] = estimator.growth(
-                normalize_time_unit(curve, args.input_time_unit)
-            )
+    log.info("Annotating growth curves ..")
+    annotated_curves = {filepath: [] for filepath in args.infiles}
+    with multiprocessing.Pool(processes=args.threads, initializer=init_worker) as pool:
+        async_calculation = pool.imap_unordered(EstimatorWrapper(args), curves)
 
+        for nth, (filepath, idx, name, curve) in enumerate(async_calculation, start=1):
+            log.info("Annotated curve %i of %i: %s", nth, len(curves), name)
+
+            annotated_curves[filepath].append((idx, name, curve))
+
+    for filepath in args.infiles:
         output_filepath = filepath.with_suffix(args.output_suffix + ".tsv")
+        log.info("Writing annotated curves to '%s'", output_filepath)
+
         with TSVWriter(output_filepath, args.output_exclude_default_phase) as outwriter:
-            for name, annotated_curve in annotated_curves.items():
+            for _, name, annotated_curve in sorted(annotated_curves[filepath]):
                 outwriter.write(name, annotated_curve)
 
         if args.figures:
             figure_filepath = filepath.with_suffix(args.output_suffix + ".pdf")
+            log.info("Writing PDFs to '%s'", figure_filepath)
 
             with PDFWriter(figure_filepath) as figwriter:
-                for name, annotated_curve in annotated_curves.items():
+                for _, name, annotated_curve in sorted(annotated_curves[filepath]):
                     figwriter.write(name, annotated_curve)
 
     log.info("Done ..")

From 7b42feadb77c64821893038637eb9b2637d5ef2b Mon Sep 17 00:00:00 2001
From: Mikkel Schubert <MikkelSch@gmail.com>
Date: Tue, 24 Nov 2020 14:22:48 +0100
Subject: [PATCH 19/52] fix outliers not being returned in one case

---
 croissance/estimation/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/croissance/estimation/__init__.py b/croissance/estimation/__init__.py
index 14cda3c..f6daee9 100644
--- a/croissance/estimation/__init__.py
+++ b/croissance/estimation/__init__.py
@@ -123,7 +123,7 @@ def growth(
         phases = []
         if len(smooth_series) < n_hours:
             self._log.warning("Insufficient smoothed data for %s", name)
-            return AnnotatedGrowthCurve(series, [], [])
+            return AnnotatedGrowthCurve(series, outliers, [])
         raw_phases = self._find_growth_phases(smooth_series, window=n_hours)
 
         for phase in raw_phases:

From a4830bdd9c9db39808861dfc111fc0fd3d65655e Mon Sep 17 00:00:00 2001
From: Mikkel Schubert <MikkelSch@gmail.com>
Date: Tue, 24 Nov 2020 14:22:48 +0100
Subject: [PATCH 20/52] simplify outlier masking

---
 croissance/estimation/outliers.py | 26 +++++++-------------------
 1 file changed, 7 insertions(+), 19 deletions(-)

diff --git a/croissance/estimation/outliers.py b/croissance/estimation/outliers.py
index 0b99c82..0d4ed8e 100644
--- a/croissance/estimation/outliers.py
+++ b/croissance/estimation/outliers.py
@@ -5,28 +5,16 @@
 
 def remove_outliers(series, window=30, std=2):
     """
-    Removes any points where the distance of the median exceeds ``std`` standard deviations within a rolling window.
-
-    :param series:
-    :param window:
-    :param std:
-    :return:
+    Returns a tuple containing a series with points removed where the distance of the
+    median exceeds ``std`` standard deviations within a rolling window, and a series
+    containing the removed outliers.
     """
     if len(series.values) < 10:
-        return series, pandas.Series(data=[], index=[])
+        return series, pandas.Series([], dtype="float64")
 
     values = with_overhangs(series.values, window)
-    outliers = (
-        abs(values - values.rolling(window=window, center=True).median())
-        < values.rolling(window=window, center=True).std() * std
-    )
+    windows = values.rolling(window=window, center=True)
+    outliers = abs(values - windows.median()) >= windows.std() * std
     outlier_mask = outliers[window:-window].values
 
-    outliers = pandas.Series(
-        data=series.values[~outlier_mask], index=series.index[~outlier_mask]
-    )
-    series = pandas.Series(
-        data=series.values[outlier_mask], index=series.index[outlier_mask]
-    )
-
-    return series, outliers
+    return series[~outlier_mask], series[outlier_mask]

From 2a51af0333cf1228128834c56e5e84e173e0f05e Mon Sep 17 00:00:00 2001
From: Mikkel Schubert <MikkelSch@gmail.com>
Date: Tue, 24 Nov 2020 14:22:48 +0100
Subject: [PATCH 21/52] added hint for times that might be in minutes

---
 croissance/estimation/__init__.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/croissance/estimation/__init__.py b/croissance/estimation/__init__.py
index f6daee9..f839580 100644
--- a/croissance/estimation/__init__.py
+++ b/croissance/estimation/__init__.py
@@ -101,7 +101,11 @@ def growth(
         )
 
         if n_hours == 0:
-            self._log.warning("Fewer than one Data-points/hour for %s", name)
+            self._log.warning(
+                "Fewer than one data-point per hour for %s. Use the command-line"
+                "`--input-time-unit minutes` if times are represented in minutes.",
+                name,
+            )
             return AnnotatedGrowthCurve(series, [], [])
 
         if n_hours % 2 == 0:

From bcae68feebc72131f8d7225128bab12b094509c6 Mon Sep 17 00:00:00 2001
From: Mikkel Schubert <MikkelSch@gmail.com>
Date: Tue, 24 Nov 2020 14:22:48 +0100
Subject: [PATCH 22/52] outliers should always be a Series

This fixes errors caused by code assuming this to be the case.
---
 croissance/estimation/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/croissance/estimation/__init__.py b/croissance/estimation/__init__.py
index f839580..6a8c650 100644
--- a/croissance/estimation/__init__.py
+++ b/croissance/estimation/__init__.py
@@ -106,7 +106,7 @@ def growth(
                 "`--input-time-unit minutes` if times are represented in minutes.",
                 name,
             )
-            return AnnotatedGrowthCurve(series, [], [])
+            return AnnotatedGrowthCurve(series, pandas.Series(dtype="float64"), [])
 
         if n_hours % 2 == 0:
             n_hours += 1

From 4e9bb82020b3432f60a7a58a3416ef7b5f1d3fda Mon Sep 17 00:00:00 2001
From: Mikkel Schubert <MikkelSch@gmail.com>
Date: Tue, 24 Nov 2020 14:22:48 +0100
Subject: [PATCH 23/52] remove unused functionality

---
 croissance/__init__.py       | 12 ++----------
 croissance/figures/plot.py   | 24 +-----------------------
 croissance/figures/writer.py |  9 ++-------
 3 files changed, 5 insertions(+), 40 deletions(-)

diff --git a/croissance/__init__.py b/croissance/__init__.py
index 92096db..10b7fef 100644
--- a/croissance/__init__.py
+++ b/croissance/__init__.py
@@ -28,15 +28,7 @@ def process_curve(
     ).growth(curve)
 
 
-def plot_processed_curve(
-    curve: AnnotatedGrowthCurve,
-    name: str = None,
-    figsize=(10, 10),
-    include_shifted_exponentials: bool = False,
-):
+def plot_processed_curve(curve: AnnotatedGrowthCurve, name=None, figsize=(10, 10)):
     return croissance.figures.plot.plot_processed_curve(
-        curve=curve,
-        name=name,
-        figsize=figsize,
-        include_shifted_exponentials=include_shifted_exponentials,
+        curve=curve, name=name, figsize=figsize
     )
diff --git a/croissance/figures/plot.py b/croissance/figures/plot.py
index fafa370..5630940 100644
--- a/croissance/figures/plot.py
+++ b/croissance/figures/plot.py
@@ -4,12 +4,7 @@
 from croissance.estimation import AnnotatedGrowthCurve
 
 
-def plot_processed_curve(
-    curve: AnnotatedGrowthCurve,
-    name: str,
-    figsize=(16, 16),
-    include_shifted_exponentials: bool = False,
-):
+def plot_processed_curve(curve: AnnotatedGrowthCurve, name: str, figsize=(16, 16)):
     fig, axes = plt.subplots(nrows=2, ncols=1, figsize=figsize)
 
     axes[1].set_yscale("log")
@@ -51,23 +46,6 @@ def gf(x):
         phase_series = curve.series[phase.start : phase.end]
 
         for axis in axes:
-            if include_shifted_exponentials:
-                axis.plot(
-                    phase_series.index,
-                    phase_series.values - phase.n0,
-                    marker=".",
-                    linewidth=0,
-                    color="red",
-                )
-
-                axis.plot(
-                    phase_series.index,
-                    gf(phase_series.index) - phase.n0,
-                    marker=None,
-                    linewidth=2,
-                    color="red",
-                )
-
             axis.axhline(
                 y=phase.n0,
                 marker=None,
diff --git a/croissance/figures/writer.py b/croissance/figures/writer.py
index ef8b774..afb019c 100644
--- a/croissance/figures/writer.py
+++ b/croissance/figures/writer.py
@@ -7,17 +7,12 @@
 
 
 class PDFWriter:
-    def __init__(self, filepath, include_shifted_exponentials: bool = False):
+    def __init__(self, filepath):
         self._handle = open(filepath, "wb")
         self._doc = PdfPages(self._handle)
-        self._include_shifted_exponentials = include_shifted_exponentials
 
     def write(self, name: str, curve: AnnotatedGrowthCurve):
-        fig, _axes = plot_processed_curve(
-            curve=curve,
-            name=name,
-            include_shifted_exponentials=self._include_shifted_exponentials,
-        )
+        fig, _axes = plot_processed_curve(curve=curve, name=name)
 
         try:
             self._doc.savefig(fig)

From 83aa990d3b9832ac9e4beccfae34b7e54355d79c Mon Sep 17 00:00:00 2001
From: Mikkel Schubert <MikkelSch@gmail.com>
Date: Tue, 24 Nov 2020 14:22:48 +0100
Subject: [PATCH 24/52] add missing whitespace to log message

---
 croissance/estimation/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/croissance/estimation/__init__.py b/croissance/estimation/__init__.py
index 6a8c650..464f74a 100644
--- a/croissance/estimation/__init__.py
+++ b/croissance/estimation/__init__.py
@@ -102,7 +102,7 @@ def growth(
 
         if n_hours == 0:
             self._log.warning(
-                "Fewer than one data-point per hour for %s. Use the command-line"
+                "Fewer than one data-point per hour for %s. Use the command-line "
                 "`--input-time-unit minutes` if times are represented in minutes.",
                 name,
             )

From 2de4c23dbd0e8f48ebafaadd3cd0de95baf2453e Mon Sep 17 00:00:00 2001
From: Mikkel Schubert <MikkelSch@gmail.com>
Date: Tue, 24 Nov 2020 14:22:48 +0100
Subject: [PATCH 25/52] cleanup and handle edge cases in `points_per_hour`

---
 croissance/estimation/util.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/croissance/estimation/util.py b/croissance/estimation/util.py
index 63adae7..7b6fcd4 100644
--- a/croissance/estimation/util.py
+++ b/croissance/estimation/util.py
@@ -5,9 +5,10 @@
 
 
 def points_per_hour(series):
-    return 1 / numpy.median(
-        [series.index[i] - series.index[i - 1] for i in range(1, len(series))]
-    )
+    if len(series) < 2:
+        return len(series)
+
+    return 1 / numpy.median(series.index[1:] - series.index[:-1])
 
 
 def resample(series, *, factor=10, size=None):

From 41b376698c7833d6186aa1f586742703bec6e440 Mon Sep 17 00:00:00 2001
From: Mikkel Schubert <MikkelSch@gmail.com>
Date: Tue, 24 Nov 2020 14:22:48 +0100
Subject: [PATCH 26/52] fix division-by-0 in `score` for identical values

This was observed for curve with a single phase with duration = 1.5,
but it could theoretically occur for any attribute and any number of
phases.
---
 croissance/estimation/ranking.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/croissance/estimation/ranking.py b/croissance/estimation/ranking.py
index 5e1c498..e1d7fd2 100644
--- a/croissance/estimation/ranking.py
+++ b/croissance/estimation/ranking.py
@@ -2,7 +2,11 @@
 
 
 def score(values, q):
-    return (q - min(values)) / (max(values) - min(values)) * 100
+    span = max(values) - min(values)
+    if not span:
+        return 100
+
+    return (q - min(values)) / span * 100
 
 
 def rank_phases(phases, weights, thresholds):

From 6a9a741d28a89b1a62219b004e63d8c6a4aee607 Mon Sep 17 00:00:00 2001
From: Mikkel Schubert <MikkelSch@gmail.com>
Date: Tue, 24 Nov 2020 14:22:48 +0100
Subject: [PATCH 27/52] catch and log errors in worker threads

---
 croissance/main.py | 25 ++++++++++++++++++++-----
 1 file changed, 20 insertions(+), 5 deletions(-)

diff --git a/croissance/main.py b/croissance/main.py
index 2707029..2148494 100644
--- a/croissance/main.py
+++ b/croissance/main.py
@@ -4,6 +4,7 @@
 import multiprocessing
 import signal
 import sys
+import traceback
 
 from pathlib import Path
 
@@ -29,10 +30,16 @@ def __init__(self, args):
     def __call__(self, values):
         filepath, idx, name, curve = values
 
-        normalized_curve = normalize_time_unit(curve, self.input_time_unit)
-        annotated_curve = self.estimator.growth(normalized_curve, name)
+        try:
+            normalized_curve = normalize_time_unit(curve, self.input_time_unit)
+            annotated_curve = self.estimator.growth(normalized_curve, name)
 
-        return (filepath, idx, name, annotated_curve)
+            return (filepath, idx, name, annotated_curve)
+        except Exception:
+            log = logging.getLogger("croissance")
+            log.error("Unhandled exception while annotating %r:", name)
+            for line in traceback.format_exc().splitlines():
+                log.error("%s", line)
 
 
 def init_worker():
@@ -131,13 +138,19 @@ def main(argv):
 
     # Dont spawn more processes than tasks
     args.threads = max(1, min(args.threads, len(curves)))
+    log.info("Annotating growth curves using %i threads", args.threads)
 
-    log.info("Annotating growth curves ..")
+    return_code = 0
     annotated_curves = {filepath: [] for filepath in args.infiles}
     with multiprocessing.Pool(processes=args.threads, initializer=init_worker) as pool:
         async_calculation = pool.imap_unordered(EstimatorWrapper(args), curves)
 
-        for nth, (filepath, idx, name, curve) in enumerate(async_calculation, start=1):
+        for nth, result in enumerate(async_calculation, start=1):
+            if result is None:
+                return_code = 1
+                continue
+
+            (filepath, idx, name, curve) = result
             log.info("Annotated curve %i of %i: %s", nth, len(curves), name)
 
             annotated_curves[filepath].append((idx, name, curve))
@@ -160,6 +173,8 @@ def main(argv):
 
     log.info("Done ..")
 
+    return return_code
+
 
 def entry_point():
     sys.exit(main(sys.argv[1:]))

From 663886a351ddc36760488f2cf948d3bba61c0f81 Mon Sep 17 00:00:00 2001
From: Mikkel Schubert <MikkelSch@gmail.com>
Date: Tue, 24 Nov 2020 14:22:48 +0100
Subject: [PATCH 28/52] add option for controlling y-axis scale

The option allows for output plots with linear, logorithmic, or both types
of scales for y-axies.
---
 croissance/figures/plot.py   | 145 ++++++++++++++++++-----------------
 croissance/figures/writer.py |   8 +-
 croissance/main.py           |   9 ++-
 3 files changed, 90 insertions(+), 72 deletions(-)

diff --git a/croissance/figures/plot.py b/croissance/figures/plot.py
index 5630940..f2e45c2 100644
--- a/croissance/figures/plot.py
+++ b/croissance/figures/plot.py
@@ -4,34 +4,44 @@
 from croissance.estimation import AnnotatedGrowthCurve
 
 
-def plot_processed_curve(curve: AnnotatedGrowthCurve, name: str, figsize=(16, 16)):
-    fig, axes = plt.subplots(nrows=2, ncols=1, figsize=figsize)
-
-    axes[1].set_yscale("log")
-
+def plot_processed_curve(curve: AnnotatedGrowthCurve, name: str, yscale="log"):
+    fig, axes = plt.subplots(nrows=2 if yscale == "both" else 1, ncols=1)
     fig.suptitle(name)
 
     if curve.series.max() <= 0:
         return
 
-    for axis in axes:
-        axis.plot(
-            curve.series.index,
-            curve.series.values,
-            color="black",
-            marker=".",
-            markersize=5,
-            linestyle="None",
-        )
-
-        axis.plot(
-            curve.outliers.index,
-            curve.outliers.values,
-            color="red",
-            marker=".",
-            markersize=5,
-            linestyle="None",
-        )
+    if yscale == "both":
+        _do_plot_processed_curve(curve, axes[0], "linear")
+        _do_plot_processed_curve(curve, axes[1], "log")
+
+        return fig, axes
+    elif yscale in ("log", "linear"):
+        _do_plot_processed_curve(curve, axes, yscale)
+
+        return fig, [axes]
+    else:
+        raise ValueError(yscale)
+
+
+def _do_plot_processed_curve(curve, axis, yscale):
+    axis.plot(
+        curve.series.index,
+        curve.series.values,
+        color="black",
+        marker=".",
+        markersize=5,
+        linestyle="None",
+    )
+
+    axis.plot(
+        curve.outliers.index,
+        curve.outliers.values,
+        color="red",
+        marker=".",
+        markersize=5,
+        linestyle="None",
+    )
 
     colors = ["b", "g", "c", "m", "y", "k"]
 
@@ -45,49 +55,46 @@ def gf(x):
 
         phase_series = curve.series[phase.start : phase.end]
 
-        for axis in axes:
-            axis.axhline(
-                y=phase.n0,
-                marker=None,
-                linewidth=1,
-                linestyle="dashed",
-                color=color,
-            )
-            axis.axvline(
-                x=phase.intercept,
-                marker=None,
-                linewidth=1,
-                linestyle="dashed",
-                color=color,
-            )
-
-            axis.plot(
-                phase_series.index,
-                phase_series.values,
-                marker=None,
-                linewidth=15,
-                color=color,
-                solid_capstyle="round",
-                alpha=0.2,
-            )
-
-            axis.plot(
-                curve.series.index,
-                gf(curve.series.index),
-                color=color,
-                linewidth=1,
-            )
-            axis.plot(
-                phase_series.index,
-                gf(phase_series.index),
-                color=color,
-                linewidth=2,
-            )
-
-    for axis in axes:
-        axis.set_xlim(curve.series.index[0], curve.series.index[-1])
-
-    axes[0].set_ylim([max(curve.series.min(), -2.5), curve.series.max()])
-    axes[1].set_ylim([0.1, curve.series.max()])
-
-    return fig, axes
+        axis.axhline(
+            y=phase.n0,
+            marker=None,
+            linewidth=1,
+            linestyle="dashed",
+            color=color,
+        )
+        axis.axvline(
+            x=phase.intercept,
+            marker=None,
+            linewidth=1,
+            linestyle="dashed",
+            color=color,
+        )
+
+        axis.axhline(y=phase.n0, linewidth=1, linestyle="dashed", color=color)
+        axis.axvline(x=phase.intercept, linewidth=1, linestyle="dashed", color=color)
+
+        axis.plot(
+            phase_series.index,
+            phase_series.values,
+            marker=None,
+            linewidth=15,
+            color=color,
+            solid_capstyle="round",
+            alpha=0.2,
+        )
+
+        axis.plot(curve.series.index, gf(curve.series.index), color=color, linewidth=1)
+        axis.plot(phase_series.index, gf(phase_series.index), color=color, linewidth=2)
+
+    axis.set_xlim(curve.series.index[0], curve.series.index[-1])
+
+    yvalues = curve.series
+    ymargin = (yvalues.max() - yvalues.min()) * 0.025
+    yvalues_min = 0.01 if yscale == "log" else float("-inf")
+    axis.set_ylim([max(yvalues.min() - ymargin, yvalues_min), yvalues.max() + ymargin])
+
+    xvalues = curve.series.index
+    xmargin = (xvalues.max() - xvalues.min()) * 0.025
+    axis.set_xlim([xvalues.min() - xmargin, xvalues.max() + xmargin])
+
+    axis.set_yscale(yscale)
diff --git a/croissance/figures/writer.py b/croissance/figures/writer.py
index afb019c..09b7863 100644
--- a/croissance/figures/writer.py
+++ b/croissance/figures/writer.py
@@ -7,12 +7,16 @@
 
 
 class PDFWriter:
-    def __init__(self, filepath):
+    def __init__(self, filepath, yscale="both"):
         self._handle = open(filepath, "wb")
         self._doc = PdfPages(self._handle)
+        self._yscale = yscale
 
     def write(self, name: str, curve: AnnotatedGrowthCurve):
-        fig, _axes = plot_processed_curve(curve=curve, name=name)
+        fig, _axes = plot_processed_curve(curve=curve, name=name, yscale=self._yscale)
+
+        fig.set_figwidth(16)
+        fig.set_figheight(16 if self._yscale == "both" else 8)
 
         try:
             self._doc.savefig(fig)
diff --git a/croissance/main.py b/croissance/main.py
index 2148494..fa6c1c8 100644
--- a/croissance/main.py
+++ b/croissance/main.py
@@ -91,6 +91,13 @@ def parse_args(argv):
         action="store_true",
         help="Renders a PDF file with figures for each curve",
     )
+    parser.add_argument(
+        "--figures-yscale",
+        type=str.lower,
+        default="both",
+        choices=("log", "linear", "both"),
+        help="Yscale(s) for figures. If both, then two plots are generated per curve",
+    )
 
     parser.add_argument(
         "--threads",
@@ -167,7 +174,7 @@ def main(argv):
             figure_filepath = filepath.with_suffix(args.output_suffix + ".pdf")
             log.info("Writing PDFs to '%s'", figure_filepath)
 
-            with PDFWriter(figure_filepath) as figwriter:
+            with PDFWriter(figure_filepath, yscale=args.figures_yscale) as figwriter:
                 for _, name, annotated_curve in sorted(annotated_curves[filepath]):
                     figwriter.write(name, annotated_curve)
 

From 794c2472e82a30dd81751f7af4bb1ffea2a9fd7b Mon Sep 17 00:00:00 2001
From: Mikkel Schubert <MikkelSch@gmail.com>
Date: Tue, 24 Nov 2020 14:22:48 +0100
Subject: [PATCH 29/52] remove extranous set_xlim

---
 croissance/figures/plot.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/croissance/figures/plot.py b/croissance/figures/plot.py
index f2e45c2..dfb2e68 100644
--- a/croissance/figures/plot.py
+++ b/croissance/figures/plot.py
@@ -86,8 +86,6 @@ def gf(x):
         axis.plot(curve.series.index, gf(curve.series.index), color=color, linewidth=1)
         axis.plot(phase_series.index, gf(phase_series.index), color=color, linewidth=2)
 
-    axis.set_xlim(curve.series.index[0], curve.series.index[-1])
-
     yvalues = curve.series
     ymargin = (yvalues.max() - yvalues.min()) * 0.025
     yvalues_min = 0.01 if yscale == "log" else float("-inf")

From 08e6e7a9947f003ee3ec64a59a12bbaa1118980b Mon Sep 17 00:00:00 2001
From: Mikkel Schubert <MikkelSch@gmail.com>
Date: Tue, 24 Nov 2020 14:22:48 +0100
Subject: [PATCH 30/52] lints

---
 croissance/estimation/__init__.py           |  5 -----
 croissance/estimation/regression.py         |  6 +++---
 croissance/estimation/smoothing/segments.py | 22 ++++++++-------------
 3 files changed, 11 insertions(+), 22 deletions(-)

diff --git a/croissance/estimation/__init__.py b/croissance/estimation/__init__.py
index 464f74a..bb0b2bb 100644
--- a/croissance/estimation/__init__.py
+++ b/croissance/estimation/__init__.py
@@ -141,11 +141,6 @@ def growth(
             if phase.duration < defaults.PHASE_MINIMUM_DURATION_HOURS:
                 continue
 
-            # snr_estimate, slope_estimate = signal_noise_ratio_estimate(phase_series.values)
-            # # skip any growth phase with <strike>an estimated S/N ratio < 1 or</strike> negative slope.
-            # if slope_estimate <= 0:
-            #     continue
-
             if self._constrain_n0:
                 slope, intercept, n0, snr, _fallback_linear_method = fit_exponential(
                     phase_series, n0=self._n0
diff --git a/croissance/estimation/regression.py b/croissance/estimation/regression.py
index 833b15f..3c02793 100644
--- a/croissance/estimation/regression.py
+++ b/croissance/estimation/regression.py
@@ -9,9 +9,9 @@ def exponential(x, a, b, c):
 
 def fit_exponential(series, *, p0=(1.0, 0.01, 0.0), n0: float = None):
     """
-    Fits an exponential to a series. First attempts an exponential fit in linear space using p0, then falls back to a
-    fit in log space to attempt to find parameters p0 for a linear fit; if all else fails returns the linear fit.
-
+    Fits an exponential to a series. First attempts an exponential fit in linear space
+    using p0, then falls back to a fit in log space to attempt to find parameters p0
+    for a linear fit; if all else fails returns the linear fit.
     """
 
     if n0 is None:
diff --git a/croissance/estimation/smoothing/segments.py b/croissance/estimation/smoothing/segments.py
index da31560..6b27487 100644
--- a/croissance/estimation/smoothing/segments.py
+++ b/croissance/estimation/smoothing/segments.py
@@ -8,13 +8,9 @@
 
 def segment_by_std_dev(series, increment=2, maximum=20):
     """
-    Divides a series into segments, minimizing standard deviation over window size. Windows are of varying size from
-    `increment` to `maximum * increment` at each offset `increment` within the series.
-
-    :param series:
-    :param increment:
-    :param maximum:
-    :return:
+    Divides a series into segments, minimizing standard deviation over window size.
+    Windows are of varying size from `increment` to `maximum * increment` at each
+    offset `increment` within the series.
     """
     start = int(series.index.min())
     duration = int(series.index[-2])
@@ -58,15 +54,13 @@ def window_median(window, start, end):
 
 def segment_points(series, segments):
     """
-    Picks knot points for an interpolating spline along a series of segments according to these rules:
+    Picks knot points for an interpolating spline along a series of segments according
+    to these rules:
 
     - For small segments, add a knot in the center of the segment
-    - For medium-sized segments, add a knot each near the beginning and end of the segment
-    - For large segments, add a knot a knot each near the beginning and end of the segment, and one in the center.
-
-    :param series:
-    :param segments:
-    :return:
+    - For medium-sized segments, add a knot near the beginning and end of the segment
+    - For large segments, add a knot a knot near the beginning and end of the segment,
+      and one in the center.
     """
     out = [(series.index[0], numpy.median(series[: series.index[0] + 1]))]
 

From 875b0e48cfcf73d2adebe07bb886ecd7ad247d30 Mon Sep 17 00:00:00 2001
From: Mikkel Schubert <MikkelSch@gmail.com>
Date: Tue, 24 Nov 2020 14:22:48 +0100
Subject: [PATCH 31/52] small simplification

---
 croissance/estimation/__init__.py | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/croissance/estimation/__init__.py b/croissance/estimation/__init__.py
index bb0b2bb..d6bd8da 100644
--- a/croissance/estimation/__init__.py
+++ b/croissance/estimation/__init__.py
@@ -58,8 +58,7 @@ def __init__(
     ):
         self._log = logging.getLogger(__name__)
         self._segment_log_n0 = segment_log_n0
-        self._constrain_n0 = constrain_n0
-        self._n0 = n0
+        self._n0 = n0 if constrain_n0 else None
 
     def _find_growth_phases(
         self, curve: "pandas.Series", window
@@ -141,14 +140,9 @@ def growth(
             if phase.duration < defaults.PHASE_MINIMUM_DURATION_HOURS:
                 continue
 
-            if self._constrain_n0:
-                slope, intercept, n0, snr, _fallback_linear_method = fit_exponential(
-                    phase_series, n0=self._n0
-                )
-            else:
-                slope, intercept, n0, snr, _fallback_linear_method = fit_exponential(
-                    phase_series
-                )
+            slope, intercept, n0, snr, _fallback_linear_method = fit_exponential(
+                phase_series, n0=self._n0
+            )
 
             # skip phases whose actual slope is below the limit
             if slope < max(0.0, defaults.PHASE_MINIMUM_SLOPE):

From 1a063e080d66bd6a55dabd830a59a6c89b98de7a Mon Sep 17 00:00:00 2001
From: Mikkel Schubert <MikkelSch@gmail.com>
Date: Tue, 24 Nov 2020 14:22:48 +0100
Subject: [PATCH 32/52] remove dead code

---
 croissance/estimation/util.py | 21 ---------------------
 1 file changed, 21 deletions(-)

diff --git a/croissance/estimation/util.py b/croissance/estimation/util.py
index 7b6fcd4..9767cf3 100644
--- a/croissance/estimation/util.py
+++ b/croissance/estimation/util.py
@@ -1,6 +1,5 @@
 import numpy
 import pandas
-from scipy.interpolate import InterpolatedUnivariateSpline
 from scipy.signal import savgol_filter
 
 
@@ -11,26 +10,6 @@ def points_per_hour(series):
     return 1 / numpy.median(series.index[1:] - series.index[:-1])
 
 
-def resample(series, *, factor=10, size=None):
-    """
-    Returns a new series re-sampled to a given number of points.
-
-    :param series:
-    :param factor: a number of points per unit time to scale the series to.
-    :param size: a number of points to scale the series to.
-    :return:
-    """
-    series = series.dropna()
-    start, end = series.index[0], series.index[-1]
-
-    if size is None:
-        size = (end - start) * factor
-
-    index = numpy.linspace(start, end, size)
-    spline = InterpolatedUnivariateSpline(series.index, series.values)
-    return pandas.Series(index=index, data=spline(index))
-
-
 def savitzky_golay(series, *args, **kwargs):
     return pandas.Series(
         index=series.index, data=savgol_filter(series.values, *args, **kwargs)

From 7b28d91056759123660890d7ab987376835b0d77 Mon Sep 17 00:00:00 2001
From: Mikkel Schubert <MikkelSch@gmail.com>
Date: Tue, 24 Nov 2020 14:22:48 +0100
Subject: [PATCH 33/52] improved plot limits using auto-calculated ranges

---
 croissance/figures/plot.py | 16 +++++-----------
 1 file changed, 5 insertions(+), 11 deletions(-)

diff --git a/croissance/figures/plot.py b/croissance/figures/plot.py
index dfb2e68..cc35d05 100644
--- a/croissance/figures/plot.py
+++ b/croissance/figures/plot.py
@@ -43,6 +43,11 @@ def _do_plot_processed_curve(curve, axis, yscale):
         linestyle="None",
     )
 
+    # Lock x/y limits to those auto-calculated prior to the addition of growth curves
+    axis.set_yscale(yscale)
+    axis.set_xlim(axis.get_xlim())
+    axis.set_ylim(axis.get_ylim())
+
     colors = ["b", "g", "c", "m", "y", "k"]
 
     for i, phase in enumerate(curve.growth_phases):
@@ -85,14 +90,3 @@ def gf(x):
 
         axis.plot(curve.series.index, gf(curve.series.index), color=color, linewidth=1)
         axis.plot(phase_series.index, gf(phase_series.index), color=color, linewidth=2)
-
-    yvalues = curve.series
-    ymargin = (yvalues.max() - yvalues.min()) * 0.025
-    yvalues_min = 0.01 if yscale == "log" else float("-inf")
-    axis.set_ylim([max(yvalues.min() - ymargin, yvalues_min), yvalues.max() + ymargin])
-
-    xvalues = curve.series.index
-    xmargin = (xvalues.max() - xvalues.min()) * 0.025
-    axis.set_xlim([xvalues.min() - xmargin, xvalues.max() + xmargin])
-
-    axis.set_yscale(yscale)

From 27f9567d0ff899cd45557884109fd7bfb5f75b81 Mon Sep 17 00:00:00 2001
From: Mikkel Schubert <MikkelSch@gmail.com>
Date: Tue, 24 Nov 2020 15:49:25 +0100
Subject: [PATCH 34/52] handle gappy data in `segment_by_std_dev`

This prevents divisions by zero when calling `detrend`.
---
 croissance/estimation/smoothing/segments.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/croissance/estimation/smoothing/segments.py b/croissance/estimation/smoothing/segments.py
index 6b27487..4dd9751 100644
--- a/croissance/estimation/smoothing/segments.py
+++ b/croissance/estimation/smoothing/segments.py
@@ -18,10 +18,13 @@ def segment_by_std_dev(series, increment=2, maximum=20):
 
     for i in range(start, duration, increment):
         for size in range(1, maximum + 1):
-            window = detrend(series[i : i + size * increment])
-            heappush(
-                windows, (window.std() / (size * increment), i, i + size * increment)
-            )
+            window = series[i : i + size * increment]
+            if not window.empty:
+                window = detrend(window)
+                heappush(
+                    windows,
+                    (window.std() / (size * increment), i, i + size * increment),
+                )
 
     segments = []
     spots = set()

From 61f006269a4b3fbb7663035cf72e8f4af610e312 Mon Sep 17 00:00:00 2001
From: Mikkel Schubert <MikkelSch@gmail.com>
Date: Tue, 24 Nov 2020 15:50:59 +0100
Subject: [PATCH 35/52] simplify `segment_by_std_dev`

---
 croissance/estimation/smoothing/segments.py | 32 +++++++--------------
 1 file changed, 11 insertions(+), 21 deletions(-)

diff --git a/croissance/estimation/smoothing/segments.py b/croissance/estimation/smoothing/segments.py
index 4dd9751..5d12e07 100644
--- a/croissance/estimation/smoothing/segments.py
+++ b/croissance/estimation/smoothing/segments.py
@@ -1,7 +1,6 @@
-from heapq import heappush, heappop
-
 import numpy
 import pandas
+
 from scipy.signal import detrend
 from scipy.interpolate import InterpolatedUnivariateSpline
 
@@ -14,37 +13,28 @@ def segment_by_std_dev(series, increment=2, maximum=20):
     """
     start = int(series.index.min())
     duration = int(series.index[-2])
-    windows = []
 
+    windows = []
     for i in range(start, duration, increment):
         for size in range(1, maximum + 1):
             window = series[i : i + size * increment]
+            # Gaps in measurements may result in empty windows
             if not window.empty:
                 window = detrend(window)
-                heappush(
-                    windows,
-                    (window.std() / (size * increment), i, i + size * increment),
+                windows.append(
+                    (window.std() / (size * increment), i, i + size * increment)
                 )
 
     segments = []
     spots = set()
+    for _window_agv_std, start, end in sorted(windows):
+        window_spots = range(start, int(end))
 
-    try:
-        while True:
-            _window_agv_std, start, end = heappop(windows)
-
-            if any(i in spots for i in range(start, int(end))):
-                continue
-
-            for i in range(start, int(end)):
-                spots.add(int(i))
-
-            heappush(segments, (start, min(duration, end)))
-
-    except IndexError:
-        pass
+        if not any(i in spots for i in window_spots):
+            segments.append((start, min(duration, end)))
+            spots.update(window_spots)
 
-    return [heappop(segments) for _ in range(len(segments))]
+    return sorted(segments)
 
 
 def window_median(window, start, end):

From ce4505627bf5d32be2fa5e82b6b64f3edbd20dad Mon Sep 17 00:00:00 2001
From: Mikkel Schubert <MikkelSch@gmail.com>
Date: Tue, 24 Nov 2020 15:53:15 +0100
Subject: [PATCH 36/52] improve handling of gappy data in `segment_points`

This prevents subsequent interpolation from returning a series of NaNs.
---
 croissance/estimation/smoothing/segments.py | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/croissance/estimation/smoothing/segments.py b/croissance/estimation/smoothing/segments.py
index 5d12e07..f57fc23 100644
--- a/croissance/estimation/smoothing/segments.py
+++ b/croissance/estimation/smoothing/segments.py
@@ -57,23 +57,21 @@ def segment_points(series, segments):
     """
     out = [(series.index[0], numpy.median(series[: series.index[0] + 1]))]
 
-    for start, end in segments:
+    def _add_knot(start, end):
         window = series[start:end]
+        if not window.empty:
+            out.append(window_median(window, start, end))
 
-        if window.empty:
-            continue
-
+    for start, end in segments:
         if end - start > 5:
-            out.append(window_median(series[start : start + 2], start, start + 2))
+            _add_knot(start, start + 2)
 
             if end - start > 11:
-                out.append(
-                    window_median(series[start + 2 : end - 2], start + 2, end - 2)
-                )
+                _add_knot(start + 2, end - 2)
 
-            out.append(window_median(series[end - 2 : end], end - 2, end))
+            _add_knot(end - 2, end)
         else:
-            out.append(window_median(window, start, end))
+            _add_knot(start, end)
 
     out += [(series.index[-1], numpy.max(series[series.index[0] - 1 :]))]
 

From 2bc9fbc35d721a6dcba098977416d904e029e5fc Mon Sep 17 00:00:00 2001
From: Mikkel Schubert <MikkelSch@gmail.com>
Date: Tue, 24 Nov 2020 17:13:39 +0100
Subject: [PATCH 37/52] update Travis config

---
 .travis.yml | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 0a1cd2f..213d14d 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -4,9 +4,10 @@ cache:
 python:
   - "3.5"
 before_install:
-  - | 
+  - |
     mkdir -p $HOME/.config/matplotlib && \
     echo 'backend: agg' > $HOME/.config/matplotlib/matplotlibrc
 install:
-  - "pip install ."
-script: python setup.py test
+  - python setup.py install
+  - pip install pytest
+script: pytest

From 95b4a4cb0a41cbd0bbe759e73d8f45ed8cc6af1b Mon Sep 17 00:00:00 2001
From: Mikkel Schubert <MikkelSch@gmail.com>
Date: Wed, 25 Nov 2020 16:38:14 +0100
Subject: [PATCH 38/52] collect coverage when running tests

---
 tox.ini | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/tox.ini b/tox.ini
index a86f9b8..f427e36 100644
--- a/tox.ini
+++ b/tox.ini
@@ -8,8 +8,13 @@ envlist = py3
 
 [testenv]
 commands =
-    pytest test.py
+    pytest test.py \
+        --cov croissance \
+        --no-cov-on-fail \
+        --cov-branch \
+        --cov-report=term-missing:skip-covered
 
 deps =
     pytest
+    pytest-cov
     nose >= 1.1.2

From 3b3482d957e816a0d8916146632a1fc7ca226988 Mon Sep 17 00:00:00 2001
From: Mikkel Schubert <MikkelSch@gmail.com>
Date: Wed, 25 Nov 2020 16:38:43 +0100
Subject: [PATCH 39/52] simplify plotting API

---
 croissance/__init__.py       | 6 ++----
 croissance/figures/plot.py   | 4 +---
 croissance/figures/writer.py | 3 ++-
 3 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/croissance/__init__.py b/croissance/__init__.py
index 10b7fef..2ffbe9d 100644
--- a/croissance/__init__.py
+++ b/croissance/__init__.py
@@ -28,7 +28,5 @@ def process_curve(
     ).growth(curve)
 
 
-def plot_processed_curve(curve: AnnotatedGrowthCurve, name=None, figsize=(10, 10)):
-    return croissance.figures.plot.plot_processed_curve(
-        curve=curve, name=name, figsize=figsize
-    )
+def plot_processed_curve(curve: AnnotatedGrowthCurve, yscale="both"):
+    return croissance.figures.plot.plot_processed_curve(curve=curve, yscale=yscale)
diff --git a/croissance/figures/plot.py b/croissance/figures/plot.py
index cc35d05..f3e5256 100644
--- a/croissance/figures/plot.py
+++ b/croissance/figures/plot.py
@@ -4,10 +4,8 @@
 from croissance.estimation import AnnotatedGrowthCurve
 
 
-def plot_processed_curve(curve: AnnotatedGrowthCurve, name: str, yscale="log"):
+def plot_processed_curve(curve: AnnotatedGrowthCurve, yscale="log"):
     fig, axes = plt.subplots(nrows=2 if yscale == "both" else 1, ncols=1)
-    fig.suptitle(name)
-
     if curve.series.max() <= 0:
         return
 
diff --git a/croissance/figures/writer.py b/croissance/figures/writer.py
index 09b7863..47a0fd5 100644
--- a/croissance/figures/writer.py
+++ b/croissance/figures/writer.py
@@ -13,8 +13,9 @@ def __init__(self, filepath, yscale="both"):
         self._yscale = yscale
 
     def write(self, name: str, curve: AnnotatedGrowthCurve):
-        fig, _axes = plot_processed_curve(curve=curve, name=name, yscale=self._yscale)
+        fig, _axes = plot_processed_curve(curve=curve, yscale=self._yscale)
 
+        fig.suptitle(name)
         fig.set_figwidth(16)
         fig.set_figheight(16 if self._yscale == "both" else 8)
 

From 7b5031674c8eeb9ac22fa263461df68121dd34c9 Mon Sep 17 00:00:00 2001
From: Mikkel Schubert <MikkelSch@gmail.com>
Date: Wed, 25 Nov 2020 16:39:45 +0100
Subject: [PATCH 40/52] reorganize unit tests

---
 test.py                       | 159 ----------------------------------
 tests/__init__.py             |   0
 tests/api_test.py             | 157 +++++++++++++++++++++++++++++++++
 tests/estimation/__init__.py  |   0
 tests/estimation/main_test.py |  19 ++++
 tests/estimation/util_test.py |  11 +++
 tox.ini                       |   2 +-
 7 files changed, 188 insertions(+), 160 deletions(-)
 delete mode 100644 test.py
 create mode 100644 tests/__init__.py
 create mode 100644 tests/api_test.py
 create mode 100644 tests/estimation/__init__.py
 create mode 100644 tests/estimation/main_test.py
 create mode 100644 tests/estimation/util_test.py

diff --git a/test.py b/test.py
deleted file mode 100644
index 9f71d47..0000000
--- a/test.py
+++ /dev/null
@@ -1,159 +0,0 @@
-from pathlib import Path
-from unittest import TestCase
-
-import numpy
-import pandas
-
-from croissance import process_curve
-from croissance.estimation import fit_exponential
-from croissance.estimation.util import normalize_time_unit
-from croissance.figures.writer import PDFWriter
-
-
-class CroissanceTestCase(TestCase):
-    def test_regression_basic(self):
-        for mu in (0.001, 0.10, 0.15, 0.50, 1.0):
-            phase = pandas.Series(
-                data=[numpy.exp(i * mu) for i in range(20)],
-                index=[i for i in range(20)],
-            )
-
-            slope, intercept, n0, snr, lin = fit_exponential(phase)
-            print(slope, intercept, n0, snr, lin)
-
-            self.assertFalse(lin, "This fit should not require a linear fallback.")
-            self.assertAlmostEqual(n0, 0, 2, msg="N0=0")
-            self.assertAlmostEqual(intercept, 0, 2, msg='"intercept"=0')
-            self.assertAlmostEqual(mu, slope, 6, msg="growth rate (mu)={}".format(mu))
-            self.assertTrue(snr > 100000, "signal-noise ratio should be very good")
-
-    def test_process_curve_empty_series(self):
-        result = process_curve(pandas.Series([], dtype="float64"))
-
-        self.assertTrue(result.series.empty)
-        self.assertEqual(result.outliers, [])
-        self.assertEqual(result.growth_phases, [])
-
-    def test_process_curve_null_series(self):
-        curve = pandas.Series(index=[1, 2], data=[None, float("NaN")])
-        result = process_curve(curve)
-
-        self.assertIs(result.series, curve)
-        self.assertEqual(result.outliers, [])
-        self.assertEqual(result.growth_phases, [])
-
-    def test_process_curve_time_unit(self):
-        mu = 0.5
-        pph = 4.0
-        curve = pandas.Series(
-            data=[numpy.exp(mu * i / pph) for i in range(100)],
-            index=[i / pph for i in range(100)],
-        )
-
-        hours = process_curve(curve, unit="hours")
-        minutes = process_curve(
-            pandas.Series(index=curve.index * 60.0, data=curve.values),
-            unit="minutes",
-        )
-
-        self.assertTrue(hours.series.equals(minutes.series))
-        self.assertTrue(hours.outliers.equals(minutes.outliers))
-        self.assertEqual(hours.growth_phases, minutes.growth_phases)
-
-    def test_process_curve_basic(self):
-        with PDFWriter(Path("test.basic.pdf")) as doc:
-            mu = 0.5
-            pph = 4.0
-            curve = pandas.Series(
-                data=[numpy.exp(mu * i / pph) for i in range(100)],
-                index=[i / pph for i in range(100)],
-            )
-
-            result = process_curve(curve, constrain_n0=True, n0=0.0)
-
-            # self.assertAlmostEqual(mu, slope, 6, msg='growth rate (mu)={}'.format(mu))
-
-            doc.write("#0 n0=1", result)
-            print(result.growth_phases)
-            self.assertEqual(len(result.growth_phases), 1)
-            self.assertAlmostEqual(mu, result.growth_phases[0].slope, 2)
-            self.assertAlmostEqual(0.0, result.growth_phases[0].n0, 3)
-            self.assertTrue(result.growth_phases[0].SNR > 1000)
-
-            result = process_curve(curve, constrain_n0=False)
-            doc.write("#0 n0=auto", result)
-            print(result.growth_phases)
-            self.assertEqual(len(result.growth_phases), 1)
-            self.assertAlmostEqual(mu, result.growth_phases[0].slope, 2)
-            self.assertTrue(-0.25 < result.growth_phases[0].n0 < 0.25)
-            self.assertTrue(result.growth_phases[0].SNR > 1000)
-
-            curve = pandas.Series(
-                data=(
-                    [1.0] * 5
-                    + [numpy.exp(mu * i / pph) for i in range(25)]
-                    + [numpy.exp(mu * 24 / pph)] * 20
-                ),
-                index=([i / pph for i in range(50)]),
-            )
-
-            result = process_curve(curve, constrain_n0=True, n0=0.0)
-            doc.write("#1 n0=0", result)
-            print(result.growth_phases)
-            self.assertEqual(len(result.growth_phases), 1)
-            self.assertAlmostEqual(mu, result.growth_phases[0].slope, 2)
-            self.assertAlmostEqual(0.0, result.growth_phases[0].n0, 3)
-            self.assertTrue(result.growth_phases[0].SNR > 1000)
-
-            result = process_curve(curve, constrain_n0=False)
-            doc.write("#1 n0=auto", result)
-            print(result.growth_phases)
-            self.assertEqual(len(result.growth_phases), 1)
-            self.assertAlmostEqual(mu, result.growth_phases[0].slope, 1)
-            self.assertTrue(-0.25 < result.growth_phases[0].n0 < 0.25)
-            self.assertTrue(result.growth_phases[0].SNR > 1000)
-
-            mu = 0.20
-            curve = pandas.Series(
-                data=(
-                    [i / 10.0 for i in range(10)]
-                    + [numpy.exp(mu * i / pph) for i in range(25)]
-                    + [numpy.exp(mu * 24 / pph)] * 15
-                ),
-                index=([i / pph for i in range(50)]),
-            )
-
-            result = process_curve(curve, constrain_n0=True, n0=0.0)
-            doc.write("#2 n0=0", result)
-            print(result.growth_phases)
-            self.assertEqual(len(result.growth_phases), 1)
-            self.assertAlmostEqual(mu, result.growth_phases[0].slope, 2)
-            self.assertAlmostEqual(0.0, result.growth_phases[0].n0, 6)
-            self.assertTrue(result.growth_phases[0].SNR > 1000)
-
-            result = process_curve(curve, constrain_n0=False)
-            doc.write("#2 n0=auto", result)
-            self.assertEqual(len(result.growth_phases), 1)
-            self.assertAlmostEqual(mu, result.growth_phases[0].slope, 2)
-            self.assertTrue(-0.25 < result.growth_phases[0].n0 < 0.25)
-            self.assertTrue(result.growth_phases[0].SNR > 1000)
-
-    def test_process_curve_wrong_time_unit(self):
-        mu = 0.5
-        pph = 0.1
-        curve = pandas.Series(
-            data=[numpy.exp(mu * i / pph) for i in range(100)],
-            index=[i / pph for i in range(100)],
-        )
-
-        result = process_curve(curve, constrain_n0=False, n0=0.0)
-        self.assertEqual(len(result.growth_phases), 0)
-        self.assertEqual(list(result.series), list(curve))
-
-    def test_normalize_time_unit(self):
-        curve = pandas.Series(index=[0, 15, 30, 60], data=[1, 2, 3, 4])
-        self.assertTrue(
-            pandas.Series(index=[0.0, 0.25, 0.5, 1.0], data=[1, 2, 3, 4]).equals(
-                normalize_time_unit(curve, "minutes")
-            )
-        )
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/api_test.py b/tests/api_test.py
new file mode 100644
index 0000000..01ccb0c
--- /dev/null
+++ b/tests/api_test.py
@@ -0,0 +1,157 @@
+from pathlib import Path
+
+import matplotlib.pyplot as plt
+import numpy
+import pandas
+import pytest
+
+
+from pytest import approx
+
+from croissance import process_curve, plot_processed_curve
+from croissance.figures.writer import PDFWriter
+
+
+def test_process_curve_empty_series():
+    result = process_curve(pandas.Series([], dtype="float64"))
+
+    assert result.series.empty
+    assert result.outliers == []
+    assert result.growth_phases == []
+
+
+def test_process_curve_null_series():
+    curve = pandas.Series(index=[1, 2], data=[None, float("NaN")])
+    result = process_curve(curve)
+
+    assert result.series is curve
+    assert result.outliers == []
+    assert result.growth_phases == []
+
+
+def test_process_curve_time_unit():
+    mu = 0.5
+    pph = 4.0
+    curve = pandas.Series(
+        data=[numpy.exp(mu * i / pph) for i in range(100)],
+        index=[i / pph for i in range(100)],
+    )
+
+    hours = process_curve(curve, unit="hours")
+    minutes = process_curve(
+        pandas.Series(index=curve.index * 60.0, data=curve.values),
+        unit="minutes",
+    )
+
+    assert hours.series.equals(minutes.series)
+    assert hours.outliers.equals(minutes.outliers)
+    assert hours.growth_phases == minutes.growth_phases
+
+
+def test_process_curve_basic():
+    with PDFWriter(Path("test.basic.pdf")) as doc:
+        mu = 0.5
+        pph = 4.0
+        curve = pandas.Series(
+            data=[numpy.exp(mu * i / pph) for i in range(100)],
+            index=[i / pph for i in range(100)],
+        )
+
+        result = process_curve(curve, constrain_n0=True, n0=0.0)
+
+        # self.assertAlmostEqual(mu, slope, 6, msg='growth rate (mu)={}'.format(mu))
+
+        doc.write("#0 n0=1", result)
+        print(result.growth_phases)
+        assert len(result.growth_phases) == 1
+        assert mu == approx(result.growth_phases[0].slope, abs=1e-2)
+        assert 0.0 == approx(result.growth_phases[0].n0, 1e-3)
+        assert result.growth_phases[0].SNR > 1000
+
+        result = process_curve(curve, constrain_n0=False)
+        doc.write("#0 n0=auto", result)
+        print(result.growth_phases)
+        assert len(result.growth_phases) == 1
+        assert mu == approx(result.growth_phases[0].slope, abs=1e-2)
+        assert -0.25 < result.growth_phases[0].n0 < 0.25
+        assert result.growth_phases[0].SNR > 1000
+
+        curve = pandas.Series(
+            data=(
+                [1.0] * 5
+                + [numpy.exp(mu * i / pph) for i in range(25)]
+                + [numpy.exp(mu * 24 / pph)] * 20
+            ),
+            index=([i / pph for i in range(50)]),
+        )
+
+        result = process_curve(curve, constrain_n0=True, n0=0.0)
+        doc.write("#1 n0=0", result)
+        print(result.growth_phases)
+        assert len(result.growth_phases) == 1
+        assert mu == approx(result.growth_phases[0].slope, abs=1e-2)
+        assert 0.0 == approx(result.growth_phases[0].n0, 1e-3)
+        assert result.growth_phases[0].SNR > 1000
+
+        result = process_curve(curve, constrain_n0=False)
+        doc.write("#1 n0=auto", result)
+        print(result.growth_phases)
+        assert len(result.growth_phases) == 1
+        assert mu == approx(result.growth_phases[0].slope, abs=1e-1)
+        assert -0.25 < result.growth_phases[0].n0 < 0.25
+        assert result.growth_phases[0].SNR > 1000
+
+        mu = 0.20
+        curve = pandas.Series(
+            data=(
+                [i / 10.0 for i in range(10)]
+                + [numpy.exp(mu * i / pph) for i in range(25)]
+                + [numpy.exp(mu * 24 / pph)] * 15
+            ),
+            index=([i / pph for i in range(50)]),
+        )
+
+        result = process_curve(curve, constrain_n0=True, n0=0.0)
+        doc.write("#2 n0=0", result)
+        print(result.growth_phases)
+        assert len(result.growth_phases) == 1
+        assert mu == approx(result.growth_phases[0].slope, abs=1e-2)
+        assert 0.0 == approx(result.growth_phases[0].n0, 1e-6)
+        assert result.growth_phases[0].SNR > 1000
+
+        result = process_curve(curve, constrain_n0=False)
+        doc.write("#2 n0=auto", result)
+        assert len(result.growth_phases) == 1
+        assert mu == approx(result.growth_phases[0].slope, abs=1e-2)
+        assert -0.25 < result.growth_phases[0].n0 < 0.25
+        assert result.growth_phases[0].SNR > 1000
+
+
+def test_process_curve_wrong_time_unit():
+    mu = 0.5
+    pph = 0.1
+    curve = pandas.Series(
+        data=[numpy.exp(mu * i / pph) for i in range(100)],
+        index=[i / pph for i in range(100)],
+    )
+
+    result = process_curve(curve, constrain_n0=False, n0=0.0)
+    assert len(result.growth_phases) == 0
+    assert list(result.series) == list(curve)
+
+
+@pytest.mark.parametrize("yscale, naxis", (("linear", 1), ("log", 1), ("both", 2)))
+def test_plot_processed_curve(yscale, naxis):
+    mu = 0.5
+    pph = 4.0
+    curve = pandas.Series(
+        data=[numpy.exp(mu * i / pph) for i in range(100)],
+        index=[i / pph for i in range(100)],
+    )
+
+    curve = process_curve(curve, constrain_n0=True, n0=0.0)
+    fig, axes = plot_processed_curve(curve, yscale=yscale)
+    try:
+        assert len(axes) == naxis
+    finally:
+        plt.close()
diff --git a/tests/estimation/__init__.py b/tests/estimation/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/estimation/main_test.py b/tests/estimation/main_test.py
new file mode 100644
index 0000000..9aae500
--- /dev/null
+++ b/tests/estimation/main_test.py
@@ -0,0 +1,19 @@
+import numpy
+import pandas
+import pytest
+
+from croissance.estimation import fit_exponential
+
+
+@pytest.mark.parametrize("mu", (0.001, 0.10, 0.15, 0.50, 1.0))
+def test_regression_basic(mu):
+    phase = numpy.exp(pandas.Series(range(20)) * mu)
+    slope, intercept, n0, snr, lin = fit_exponential(phase)
+
+    print(slope, intercept, n0, snr, lin)
+
+    assert not lin, "This fit should not require a linear fallback."
+    assert n0 == pytest.approx(0, abs=1e-3), "N0=0"
+    assert intercept == pytest.approx(0, abs=1e-3), '"intercept"=0'
+    assert mu == pytest.approx(slope, abs=1e-7), "growth rate (mu)={}".format(mu)
+    assert snr > 100000, "signal-noise ratio should be very good"
diff --git a/tests/estimation/util_test.py b/tests/estimation/util_test.py
new file mode 100644
index 0000000..c9c9e12
--- /dev/null
+++ b/tests/estimation/util_test.py
@@ -0,0 +1,11 @@
+import pandas
+
+from croissance.estimation.util import normalize_time_unit
+
+
+def test_normalize_time_unit():
+    curve = pandas.Series(index=[0, 15, 30, 60], data=[1, 2, 3, 4])
+
+    assert pandas.Series(index=[0.0, 0.25, 0.5, 1.0], data=[1, 2, 3, 4]).equals(
+        normalize_time_unit(curve, "minutes")
+    )
diff --git a/tox.ini b/tox.ini
index f427e36..06fe06c 100644
--- a/tox.ini
+++ b/tox.ini
@@ -8,7 +8,7 @@ envlist = py3
 
 [testenv]
 commands =
-    pytest test.py \
+    pytest tests \
         --cov croissance \
         --no-cov-on-fail \
         --cov-branch \

From 5867065a46c84f6b2aa84f744ef071e639d45fcb Mon Sep 17 00:00:00 2001
From: Mikkel Schubert <MikkelSch@gmail.com>
Date: Wed, 25 Nov 2020 16:40:39 +0100
Subject: [PATCH 41/52] require Python 3.6 to meet scipy requirements

---
 .travis.yml | 2 +-
 setup.py    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 213d14d..a63517c 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -2,7 +2,7 @@ language: python
 cache:
   - pip: true
 python:
-  - "3.5"
+  - "3.6"
 before_install:
   - |
     mkdir -p $HOME/.config/matplotlib && \
diff --git a/setup.py b/setup.py
index 762e29c..6f4371e 100644
--- a/setup.py
+++ b/setup.py
@@ -45,7 +45,7 @@
         "Topic :: Utilities",
         "Intended Audience :: Science/Research",
         "Programming Language :: Python :: 3",
-        "Programming Language :: Python :: 3.5",
+        "Programming Language :: Python :: 3.6",
         "License :: OSI Approved :: Apache Software License",
     ],
 )

From 685b872ecf4091ee3c311ee943cb2db5560417ba Mon Sep 17 00:00:00 2001
From: Mikkel Schubert <MikkelSch@gmail.com>
Date: Wed, 25 Nov 2020 16:40:59 +0100
Subject: [PATCH 42/52] prevent unhandled exception for very short curves

This includes curves for which only a very small number of measurements
are available and curves with hour time-units for which the
`--input-time minutes` option was was mistakenly used.
---
 croissance/estimation/smoothing/segments.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/croissance/estimation/smoothing/segments.py b/croissance/estimation/smoothing/segments.py
index f57fc23..b02ddd4 100644
--- a/croissance/estimation/smoothing/segments.py
+++ b/croissance/estimation/smoothing/segments.py
@@ -79,10 +79,15 @@ def _add_knot(start, end):
     return pandas.Series(index=index, data=data)
 
 
-def segment_spline_smoothing(series, series_std_dev=None):
+def segment_spline_smoothing(series, series_std_dev=None, k=3):
     if series_std_dev is None:
         series_std_dev = series
+
     segments = segment_by_std_dev(series_std_dev)
     points = segment_points(series, segments).sort_index()
-    spline = InterpolatedUnivariateSpline(points.index, points.values, k=3)
+    if len(points) < k + 1:
+        # InterpolatedUnivariateSpline requires at k + 1 points
+        return pandas.Series(dtype="float64")
+
+    spline = InterpolatedUnivariateSpline(points.index, points.values, k=k)
     return pandas.Series(data=spline(series.index), index=series.index)

From c14ab48ec0eb319613a9db3b170d48a77919f6ec Mon Sep 17 00:00:00 2001
From: Mikkel Schubert <MikkelSch@gmail.com>
Date: Wed, 25 Nov 2020 16:42:20 +0100
Subject: [PATCH 43/52] convert stateless Estimator class to function

---
 croissance/__init__.py            |   9 +-
 croissance/estimation/__init__.py | 226 ++++++++++++++----------------
 croissance/main.py                |  18 ++-
 3 files changed, 126 insertions(+), 127 deletions(-)

diff --git a/croissance/__init__.py b/croissance/__init__.py
index 2ffbe9d..bfe4b92 100644
--- a/croissance/__init__.py
+++ b/croissance/__init__.py
@@ -1,6 +1,8 @@
+import pandas
+
 import croissance.figures.plot
 
-from croissance.estimation import Estimator, AnnotatedGrowthCurve
+from croissance.estimation import AnnotatedGrowthCurve, estimate_growth
 from croissance.estimation.util import normalize_time_unit
 
 
@@ -21,11 +23,12 @@ def process_curve(
     if curve.isnull().all():
         return AnnotatedGrowthCurve(curve, [], [])
 
-    return Estimator(
+    return estimate_growth(
+        curve,
         segment_log_n0=segment_log_n0,
         constrain_n0=constrain_n0,
         n0=n0,
-    ).growth(curve)
+    )
 
 
 def plot_processed_curve(curve: AnnotatedGrowthCurve, yscale="both"):
diff --git a/croissance/estimation/__init__.py b/croissance/estimation/__init__.py
index d6bd8da..69bc515 100644
--- a/croissance/estimation/__init__.py
+++ b/croissance/estimation/__init__.py
@@ -48,138 +48,130 @@ def pick_best(growth_phases, metric="duration"):
 )
 
 
-class Estimator:
-    def __init__(
-        self,
-        *,
-        segment_log_n0: bool = False,
-        constrain_n0: bool = False,
-        n0: float = 0.0
-    ):
-        self._log = logging.getLogger(__name__)
-        self._segment_log_n0 = segment_log_n0
-        self._n0 = n0 if constrain_n0 else None
-
-    def _find_growth_phases(
-        self, curve: "pandas.Series", window
-    ) -> "typing.List[RawGrowthPhase]":
-        """
-        Finds growth phases by locating regions in a series where both the first and
-        second derivatives are positive.
-        """
-        first_derivative = savitzky_golay(curve, window, 3, deriv=1)
-        second_derivative = savitzky_golay(curve, window, 3, deriv=2)
-
-        growth = pandas.Series(
-            index=curve.index,
-            data=(first_derivative.values > 0) & (second_derivative.values > 0),
+def estimate_growth(
+    curve: pandas.Series,
+    *,
+    segment_log_n0: bool = False,
+    constrain_n0: bool = False,
+    n0: float = 0.0,
+    name: str = "untitled curve"
+) -> AnnotatedGrowthCurve:
+    log = logging.getLogger(__name__)
+    series = curve.dropna()
+
+    n_hours = int(
+        numpy.round(points_per_hour(series) * defaults.CURVE_MINIMUM_DURATION_HOURS)
+    )
+
+    if n_hours == 0:
+        log.warning(
+            "Fewer than one data-point per hour for %s. Use the command-line "
+            "`--input-time-unit minutes` if times are represented in minutes.",
+            name,
         )
+        return AnnotatedGrowthCurve(series, pandas.Series(dtype="float64"), [])
 
-        istart, iend, phases = None, None, []
-        for i, v in zip(growth.index, growth.values):
-            if v:
-                if istart is None:
-                    istart = i
-            elif istart is not None:
-                phases.append(RawGrowthPhase(istart, iend))
-                istart = None
-            iend = i
-
-        if istart is not None:
-            phases.append(RawGrowthPhase(istart, iend))
+    if n_hours % 2 == 0:
+        n_hours += 1
 
-        return phases
+    series, outliers = remove_outliers(series, window=n_hours, std=3)
 
-    def growth(
-        self, curve: pandas.Series, name: str = "untitled curve"
-    ) -> AnnotatedGrowthCurve:
-        series = curve.dropna()
+    # NOTE workaround for issue with negative curves
+    if len(series[series > 0]) < 3:
+        log.warning("Fewer than three positive data-points for %s", name)
+        return AnnotatedGrowthCurve(series, outliers, [])
 
-        n_hours = int(
-            numpy.round(points_per_hour(series) * defaults.CURVE_MINIMUM_DURATION_HOURS)
-        )
+    if segment_log_n0:
+        series_log_n0 = numpy.log(series - n0).dropna()
+        smooth_series = segment_spline_smoothing(series, series_log_n0)
+    else:
+        smooth_series = segment_spline_smoothing(series)
 
-        if n_hours == 0:
-            self._log.warning(
-                "Fewer than one data-point per hour for %s. Use the command-line "
-                "`--input-time-unit minutes` if times are represented in minutes.",
-                name,
-            )
-            return AnnotatedGrowthCurve(series, pandas.Series(dtype="float64"), [])
+    if len(smooth_series) < n_hours:
+        log.warning("Insufficient smoothed data for %s", name)
+        return AnnotatedGrowthCurve(series, outliers, [])
 
-        if n_hours % 2 == 0:
-            n_hours += 1
+    phases = []
+    for phase in _find_growth_phases(smooth_series, window=n_hours):
+        phase_series = series[phase.start : phase.end]
 
-        series, outliers = remove_outliers(series, window=n_hours, std=3)
+        # skip any growth phases that have not enough points for fitting
+        if len(phase_series[phase_series > 0]) < 3:
+            continue
 
-        # NOTE workaround for issue with negative curves
-        if len(series[series > 0]) < 3:
-            self._log.warning("Fewer than three positive data-points for %s", name)
-            return AnnotatedGrowthCurve(series, outliers, [])
+        # skip any phases with less than minimum duration
+        if phase.duration < defaults.PHASE_MINIMUM_DURATION_HOURS:
+            continue
 
-        if self._segment_log_n0:
-            series_log_n0 = numpy.log(series - self._n0).dropna()
-            smooth_series = segment_spline_smoothing(series, series_log_n0)
-        else:
-            smooth_series = segment_spline_smoothing(series)
+        slope, intercept, n0, snr, _fallback_linear_method = fit_exponential(
+            phase_series, n0=n0 if constrain_n0 else None
+        )
 
-        phases = []
-        if len(smooth_series) < n_hours:
-            self._log.warning("Insufficient smoothed data for %s", name)
-            return AnnotatedGrowthCurve(series, outliers, [])
-        raw_phases = self._find_growth_phases(smooth_series, window=n_hours)
+        # skip phases whose actual slope is below the limit
+        if slope < max(0.0, defaults.PHASE_MINIMUM_SLOPE):
+            continue
+
+        # skip phases whose actual signal-noise-ratio is below the limit
+        if snr < max(1.0, defaults.PHASE_MINIMUM_SIGNAL_NOISE_RATIO):
+            continue
+
+        phases.append(
+            GrowthPhase(
+                start=phase.start,
+                end=phase.end,
+                slope=slope,
+                intercept=intercept,
+                n0=n0,
+                SNR=snr,
+                rank=None,
+            )
+        )
 
-        for phase in raw_phases:
-            phase_series = series[phase.start : phase.end]
+    ranked_phases = rank_phases(
+        phases,
+        defaults.PHASE_RANK_WEIGHTS,
+        thresholds={
+            "SNR": defaults.PHASE_MINIMUM_SIGNAL_NOISE_RATIO,
+            "duration": defaults.PHASE_MINIMUM_DURATION_HOURS,
+            "slope": defaults.PHASE_MINIMUM_SLOPE,
+        },
+    )
 
-            # skip any growth phases that have not enough points for fitting
-            if len(phase_series[phase_series > 0]) < 3:
-                continue
+    return AnnotatedGrowthCurve(
+        series,
+        outliers,
+        [
+            phase
+            for phase in ranked_phases
+            if phase.rank >= defaults.PHASE_RANK_EXCLUDE_BELOW
+        ],
+    )
 
-            # skip any phases with less than minimum duration
-            if phase.duration < defaults.PHASE_MINIMUM_DURATION_HOURS:
-                continue
 
-            slope, intercept, n0, snr, _fallback_linear_method = fit_exponential(
-                phase_series, n0=self._n0
-            )
+def _find_growth_phases(curve: "pandas.Series", window):
+    """
+    Finds growth phases by locating regions in a series where both the first and
+    second derivatives are positive.
+    """
+    first_derivative = savitzky_golay(curve, window, 3, deriv=1)
+    second_derivative = savitzky_golay(curve, window, 3, deriv=2)
 
-            # skip phases whose actual slope is below the limit
-            if slope < max(0.0, defaults.PHASE_MINIMUM_SLOPE):
-                continue
-
-            # skip phases whose actual signal-noise-ratio is below the limit
-            if snr < max(1.0, defaults.PHASE_MINIMUM_SIGNAL_NOISE_RATIO):
-                continue
-
-            phases.append(
-                GrowthPhase(
-                    start=phase.start,
-                    end=phase.end,
-                    slope=slope,
-                    intercept=intercept,
-                    n0=n0,
-                    SNR=snr,
-                    rank=None,
-                )
-            )
+    growth = pandas.Series(
+        index=curve.index,
+        data=(first_derivative.values > 0) & (second_derivative.values > 0),
+    )
 
-        ranked_phases = rank_phases(
-            phases,
-            defaults.PHASE_RANK_WEIGHTS,
-            thresholds={
-                "SNR": defaults.PHASE_MINIMUM_SIGNAL_NOISE_RATIO,
-                "duration": defaults.PHASE_MINIMUM_DURATION_HOURS,
-                "slope": defaults.PHASE_MINIMUM_SLOPE,
-            },
-        )
+    istart, iend, phases = None, None, []
+    for i, v in zip(growth.index, growth.values):
+        if v:
+            if istart is None:
+                istart = i
+        elif istart is not None:
+            phases.append(RawGrowthPhase(istart, iend))
+            istart = None
+        iend = i
 
-        return AnnotatedGrowthCurve(
-            series,
-            outliers,
-            [
-                phase
-                for phase in ranked_phases
-                if phase.rank >= defaults.PHASE_RANK_EXCLUDE_BELOW
-            ],
-        )
+    if istart is not None:
+        phases.append(RawGrowthPhase(istart, iend))
+
+    return phases
diff --git a/croissance/main.py b/croissance/main.py
index fa6c1c8..4856c21 100644
--- a/croissance/main.py
+++ b/croissance/main.py
@@ -10,7 +10,7 @@
 
 import coloredlogs
 
-from croissance import Estimator
+from croissance import estimate_growth
 from croissance.estimation.util import normalize_time_unit
 from croissance.figures.writer import PDFWriter
 from croissance.formats.input import TSVReader
@@ -19,11 +19,9 @@
 
 class EstimatorWrapper:
     def __init__(self, args):
-        self.estimator = Estimator(
-            constrain_n0=args.constrain_N0,
-            segment_log_n0=args.segment_log_N0,
-            n0=args.N0,
-        )
+        self.constrain_n0 = args.constrain_N0
+        self.segment_log_n0 = args.segment_log_N0
+        self.n0 = args.N0
 
         self.input_time_unit = args.input_time_unit
 
@@ -32,7 +30,13 @@ def __call__(self, values):
 
         try:
             normalized_curve = normalize_time_unit(curve, self.input_time_unit)
-            annotated_curve = self.estimator.growth(normalized_curve, name)
+            annotated_curve = estimate_growth(
+                normalized_curve,
+                segment_log_n0=self.segment_log_n0,
+                constrain_n0=self.constrain_n0,
+                n0=self.n0,
+                name=name,
+            )
 
             return (filepath, idx, name, annotated_curve)
         except Exception:

From a569332e72ef0d445b90e8d203053e312abb2a57 Mon Sep 17 00:00:00 2001
From: Mikkel Schubert <MikkelSch@gmail.com>
Date: Wed, 9 Dec 2020 10:44:19 +0100
Subject: [PATCH 44/52] removed unused constants

---
 croissance/estimation/defaults.py | 13 -------------
 1 file changed, 13 deletions(-)

diff --git a/croissance/estimation/defaults.py b/croissance/estimation/defaults.py
index f074476..b0cb31c 100644
--- a/croissance/estimation/defaults.py
+++ b/croissance/estimation/defaults.py
@@ -11,16 +11,3 @@
     "slope": 10,
     # TODO add 1 - start?
 }
-
-
-# NOTE these defaults are good for fast growing bacteria such as E.coli:
-
-MINIMUM_VALID_OD = 0.05
-MINIMUM_VALID_SLOPE = 0.05
-MINIMUM_DATA_HOURS = 5
-RESAMPLE_POINTS_PER_HOUR = 6
-PHASE_MIN_DELTA_LOG_OD = 0.20
-PHASE_MIN_DELTA_OD = 0.25
-MINIMUM_PHASE_LENGTH = 2
-
-MINOR_PHASE_MERGING_THRESHOLD = 0.10  # in percent

From cfa281b833c09ae0fd4359edfbec79cac00293a0 Mon Sep 17 00:00:00 2001
From: Mikkel Schubert <MikkelSch@gmail.com>
Date: Wed, 9 Dec 2020 11:44:14 +0100
Subject: [PATCH 45/52] cleanup command-line arguments

---
 croissance/main.py | 69 +++++++++++++++++++++++++++-------------------
 1 file changed, 41 insertions(+), 28 deletions(-)

diff --git a/croissance/main.py b/croissance/main.py
index 4856c21..d9c5436 100644
--- a/croissance/main.py
+++ b/croissance/main.py
@@ -58,44 +58,44 @@ def parse_args(argv):
     )
 
     parser.add_argument("infiles", type=Path, nargs="+")
-    parser.add_argument("--input-format", type=str.lower, choices=("tsv",))
+
     parser.add_argument(
+        "--threads",
+        type=int,
+        default=1,
+        help="Max number of threads to use during growth estimation",
+    )
+
+    group = parser.add_argument_group("Input")
+    group.add_argument("--input-format", type=str.lower, choices=("tsv",))
+    group.add_argument(
         "--input-time-unit",
         default="hours",
         choices=("hours", "minutes"),
         help="Time unit in time column",
     )
-    parser.add_argument(
-        "--N0",
-        type=float,
-        default=0.0,
-        help="Baseline for constrained regressions and identifying curve segments in "
-        "log space",
-    )
-    parser.add_argument(
-        "--constrain-N0",
-        action="store_true",
-        help="All growth phases will begin at N0 when this flag is set",
-    )
-    parser.add_argument(
-        "--segment-log-N0",
-        action="store_true",
-        help="Identify curve segments using log(N-N0) rather than N; increases "
-        "sensitivity to changes in exponential growth but has to assume a certain N0",
+
+    group = parser.add_argument_group("Output")
+    group.add_argument("--output-format", type=str.lower, choices=("tsv",))
+    group.add_argument(
+        "--output-suffix",
+        type=str,
+        default=".output",
+        help="Suffix used for output files in addition to the file extension. By "
+        "default, an input file `file.tsv` will result in output files named `file. "
+        "output.tsv` and `file.output.pdf`",
     )
-    parser.add_argument("--output-format", type=str.lower, choices=("tsv",))
-    parser.add_argument("--output-suffix", type=str, default=".output")
-    parser.add_argument(
+    group.add_argument(
         "--output-exclude-default-phase",
         action="store_true",
         help="Do not output phase '0' for each curve",
     )
-    parser.add_argument(
+    group.add_argument(
         "--figures",
         action="store_true",
         help="Renders a PDF file with figures for each curve",
     )
-    parser.add_argument(
+    group.add_argument(
         "--figures-yscale",
         type=str.lower,
         default="both",
@@ -103,11 +103,24 @@ def parse_args(argv):
         help="Yscale(s) for figures. If both, then two plots are generated per curve",
     )
 
-    parser.add_argument(
-        "--threads",
-        type=int,
-        default=1,
-        help="Max number of threads to use during growth estimation",
+    group = parser.add_argument_group("Growth model")
+    group.add_argument(
+        "--N0",
+        type=float,
+        default=0.0,
+        help="Baseline for constrained regressions and identifying curve segments in "
+        "log space",
+    )
+    group.add_argument(
+        "--constrain-N0",
+        action="store_true",
+        help="All growth phases will begin at N0 when this flag is set",
+    )
+    group.add_argument(
+        "--segment-log-N0",
+        action="store_true",
+        help="Identify curve segments using log(N-N0) rather than N; increases "
+        "sensitivity to changes in exponential growth but has to assume a certain N0",
     )
 
     group = parser.add_argument_group("Logging")

From aab72c5eb9bf827cc0fc8a6e72246c96e5467729 Mon Sep 17 00:00:00 2001
From: Mikkel Schubert <MikkelSch@gmail.com>
Date: Wed, 9 Dec 2020 12:42:51 +0100
Subject: [PATCH 46/52] expose phase quality filters on command-line

---
 croissance/__init__.py            | 18 +++++----
 croissance/estimation/__init__.py | 64 +++++++++++++++++++++++--------
 croissance/estimation/defaults.py | 13 -------
 croissance/main.py                | 44 +++++++++++++++++----
 4 files changed, 94 insertions(+), 45 deletions(-)
 delete mode 100644 croissance/estimation/defaults.py

diff --git a/croissance/__init__.py b/croissance/__init__.py
index bfe4b92..25a4e97 100644
--- a/croissance/__init__.py
+++ b/croissance/__init__.py
@@ -2,7 +2,11 @@
 
 import croissance.figures.plot
 
-from croissance.estimation import AnnotatedGrowthCurve, estimate_growth
+from croissance.estimation import (
+    AnnotatedGrowthCurve,
+    GrowthEstimationParameters,
+    estimate_growth,
+)
 from croissance.estimation.util import normalize_time_unit
 
 
@@ -23,12 +27,12 @@ def process_curve(
     if curve.isnull().all():
         return AnnotatedGrowthCurve(curve, [], [])
 
-    return estimate_growth(
-        curve,
-        segment_log_n0=segment_log_n0,
-        constrain_n0=constrain_n0,
-        n0=n0,
-    )
+    params = GrowthEstimationParameters()
+    params.segment_log_n0 = segment_log_n0
+    params.constrain_n0 = constrain_n0
+    params.n0 = n0
+
+    return estimate_growth(curve, params=params)
 
 
 def plot_processed_curve(curve: AnnotatedGrowthCurve, yscale="both"):
diff --git a/croissance/estimation/__init__.py b/croissance/estimation/__init__.py
index 69bc515..467e4f9 100644
--- a/croissance/estimation/__init__.py
+++ b/croissance/estimation/__init__.py
@@ -6,8 +6,6 @@
 import numpy
 import pandas
 
-import croissance.estimation.defaults as defaults
-
 from croissance.estimation.outliers import remove_outliers
 from croissance.estimation.ranking import rank_phases
 from croissance.estimation.regression import fit_exponential
@@ -48,19 +46,51 @@ def pick_best(growth_phases, metric="duration"):
 )
 
 
+class GrowthEstimationParameters:
+    __slots__ = [
+        "segment_log_n0",
+        "constrain_n0",
+        "n0",
+        "curve_minimum_duration_hours",
+        "phase_minimum_signal_noise_ratio",
+        "phase_minimum_duration_hours",
+        "phase_minimum_slope",
+        "phase_rank_exclude_below",
+        "phase_rank_weights",
+    ]
+
+    def __init__(self):
+        self.segment_log_n0 = False
+        self.constrain_n0 = False
+        self.n0 = 0.0
+
+        self.curve_minimum_duration_hours = 5
+        self.phase_minimum_signal_noise_ratio = 1.0
+        self.phase_minimum_duration_hours = 1.5
+        self.phase_minimum_slope = 0.005
+
+        self.phase_rank_exclude_below = 33
+        self.phase_rank_weights = {
+            "SNR": 50,
+            "duration": 30,
+            "slope": 10,
+            # TODO add 1 - start?
+        }
+
+
 def estimate_growth(
     curve: pandas.Series,
     *,
-    segment_log_n0: bool = False,
-    constrain_n0: bool = False,
-    n0: float = 0.0,
+    params=GrowthEstimationParameters(),
     name: str = "untitled curve"
 ) -> AnnotatedGrowthCurve:
     log = logging.getLogger(__name__)
     series = curve.dropna()
 
     n_hours = int(
-        numpy.round(points_per_hour(series) * defaults.CURVE_MINIMUM_DURATION_HOURS)
+        numpy.round(
+            points_per_hour(series) * max(1, params.curve_minimum_duration_hours)
+        )
     )
 
     if n_hours == 0:
@@ -81,8 +111,8 @@ def estimate_growth(
         log.warning("Fewer than three positive data-points for %s", name)
         return AnnotatedGrowthCurve(series, outliers, [])
 
-    if segment_log_n0:
-        series_log_n0 = numpy.log(series - n0).dropna()
+    if params.segment_log_n0:
+        series_log_n0 = numpy.log(series - params.n0).dropna()
         smooth_series = segment_spline_smoothing(series, series_log_n0)
     else:
         smooth_series = segment_spline_smoothing(series)
@@ -100,19 +130,19 @@ def estimate_growth(
             continue
 
         # skip any phases with less than minimum duration
-        if phase.duration < defaults.PHASE_MINIMUM_DURATION_HOURS:
+        if phase.duration < max(0.0, params.phase_minimum_duration_hours):
             continue
 
         slope, intercept, n0, snr, _fallback_linear_method = fit_exponential(
-            phase_series, n0=n0 if constrain_n0 else None
+            phase_series, n0=params.n0 if params.constrain_n0 else None
         )
 
         # skip phases whose actual slope is below the limit
-        if slope < max(0.0, defaults.PHASE_MINIMUM_SLOPE):
+        if slope < max(0.0, params.phase_minimum_slope):
             continue
 
         # skip phases whose actual signal-noise-ratio is below the limit
-        if snr < max(1.0, defaults.PHASE_MINIMUM_SIGNAL_NOISE_RATIO):
+        if snr < max(1.0, params.phase_minimum_signal_noise_ratio):
             continue
 
         phases.append(
@@ -129,11 +159,11 @@ def estimate_growth(
 
     ranked_phases = rank_phases(
         phases,
-        defaults.PHASE_RANK_WEIGHTS,
+        params.phase_rank_weights,
         thresholds={
-            "SNR": defaults.PHASE_MINIMUM_SIGNAL_NOISE_RATIO,
-            "duration": defaults.PHASE_MINIMUM_DURATION_HOURS,
-            "slope": defaults.PHASE_MINIMUM_SLOPE,
+            "duration": max(0.0, params.phase_minimum_duration_hours),
+            "slope": max(0.0, params.phase_minimum_slope),
+            "SNR": max(1.0, params.phase_minimum_signal_noise_ratio),
         },
     )
 
@@ -143,7 +173,7 @@ def estimate_growth(
         [
             phase
             for phase in ranked_phases
-            if phase.rank >= defaults.PHASE_RANK_EXCLUDE_BELOW
+            if phase.rank >= params.phase_rank_exclude_below
         ],
     )
 
diff --git a/croissance/estimation/defaults.py b/croissance/estimation/defaults.py
deleted file mode 100644
index b0cb31c..0000000
--- a/croissance/estimation/defaults.py
+++ /dev/null
@@ -1,13 +0,0 @@
-CURVE_MINIMUM_DURATION_HOURS = 5
-
-PHASE_MINIMUM_SIGNAL_NOISE_RATIO = 1.0
-PHASE_MINIMUM_DURATION_HOURS = 1.5
-PHASE_MINIMUM_SLOPE = 0.005
-
-PHASE_RANK_EXCLUDE_BELOW = 33
-PHASE_RANK_WEIGHTS = {
-    "SNR": 50,
-    "duration": 30,
-    "slope": 10,
-    # TODO add 1 - start?
-}
diff --git a/croissance/main.py b/croissance/main.py
index d9c5436..e546c4d 100644
--- a/croissance/main.py
+++ b/croissance/main.py
@@ -10,7 +10,7 @@
 
 import coloredlogs
 
-from croissance import estimate_growth
+from croissance import GrowthEstimationParameters, estimate_growth
 from croissance.estimation.util import normalize_time_unit
 from croissance.figures.writer import PDFWriter
 from croissance.formats.input import TSVReader
@@ -19,9 +19,17 @@
 
 class EstimatorWrapper:
     def __init__(self, args):
-        self.constrain_n0 = args.constrain_N0
-        self.segment_log_n0 = args.segment_log_N0
-        self.n0 = args.N0
+        self.params = GrowthEstimationParameters()
+
+        self.params.constrain_n0 = args.constrain_N0
+        self.params.segment_log_n0 = args.segment_log_N0
+        self.params.n0 = args.N0
+
+        self.params.phase_minimum_signal_noise_ratio = (
+            args.phase_minimum_signal_to_noise
+        )
+        self.params.phase_minimum_duration_hours = args.phase_minimum_duration
+        self.params.phase_minimum_slope = args.phase_minimum_slope
 
         self.input_time_unit = args.input_time_unit
 
@@ -32,9 +40,7 @@ def __call__(self, values):
             normalized_curve = normalize_time_unit(curve, self.input_time_unit)
             annotated_curve = estimate_growth(
                 normalized_curve,
-                segment_log_n0=self.segment_log_n0,
-                constrain_n0=self.constrain_n0,
-                n0=self.n0,
+                params=self.params,
                 name=name,
             )
 
@@ -103,11 +109,12 @@ def parse_args(argv):
         help="Yscale(s) for figures. If both, then two plots are generated per curve",
     )
 
+    defaults = GrowthEstimationParameters()
     group = parser.add_argument_group("Growth model")
     group.add_argument(
         "--N0",
         type=float,
-        default=0.0,
+        default=defaults.n0,
         help="Baseline for constrained regressions and identifying curve segments in "
         "log space",
     )
@@ -122,6 +129,27 @@ def parse_args(argv):
         help="Identify curve segments using log(N-N0) rather than N; increases "
         "sensitivity to changes in exponential growth but has to assume a certain N0",
     )
+    group.add_argument(
+        "--phase-minimum-signal-to-noise",
+        type=float,
+        metavar="SNR",
+        default=defaults.phase_minimum_signal_noise_ratio,
+        help="Minimum phase signal-to-noise ratio",
+    )
+    group.add_argument(
+        "--phase-minimum-duration",
+        type=float,
+        metavar="HOURS",
+        default=defaults.phase_minimum_duration_hours,
+        help="Minimum duration of phases in hours",
+    )
+    group.add_argument(
+        "--phase-minimum-slope",
+        type=float,
+        metavar="SLOPE",
+        default=defaults.phase_minimum_slope,
+        help="Minimum phase slope",
+    )
 
     group = parser.add_argument_group("Logging")
     group.add_argument(

From 1f5bad0bfeec7b0175cea959ea61d99d391f0ec0 Mon Sep 17 00:00:00 2001
From: Mikkel Schubert <MikkelSch@gmail.com>
Date: Wed, 9 Dec 2020 12:42:59 +0100
Subject: [PATCH 47/52] tweak travis config

---
 .travis.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index a63517c..dd3e3e0 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -10,4 +10,4 @@ before_install:
 install:
   - python setup.py install
   - pip install pytest
-script: pytest
+script: pytest tests

From 8faeb7822cdf5689604392cc5b0274c3684f03bf Mon Sep 17 00:00:00 2001
From: Mikkel Schubert <MikkelSch@gmail.com>
Date: Wed, 9 Dec 2020 12:45:12 +0100
Subject: [PATCH 48/52] simplify setup.py

---
 setup.py | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/setup.py b/setup.py
index 6f4371e..165f32a 100644
--- a/setup.py
+++ b/setup.py
@@ -13,20 +13,17 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from __future__ import unicode_literals
-import codecs
-
-from setuptools import setup, find_packages
+from setuptools import setup
 
 setup(
     name="croissance",
     version="1.1.1",
-    packages=find_packages(exclude=["*tests*"]),
+    packages=["croissance"],
     url="https://github.com/biosustain/croissance",
     author="Lars Schöning",
     author_email="lays@biosustain.dtu.dk",
     description="A tool for estimating growth rates in growth curves.",
-    long_description=codecs.open("README.rst", encoding="utf-8").read(),
+    long_description=open("README.rst", encoding="utf-8").read(),
     license="Apache License Version 2.0",
     entry_points={
         "console_scripts": [

From afecf701053ccffb59fbe8c48c9dcc6ab9af4d99 Mon Sep 17 00:00:00 2001
From: Mikkel Schubert <MikkelSch@gmail.com>
Date: Thu, 10 Dec 2020 14:35:11 +0100
Subject: [PATCH 49/52] alternative install method

---
 .travis.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index dd3e3e0..626c061 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -8,6 +8,6 @@ before_install:
     mkdir -p $HOME/.config/matplotlib && \
     echo 'backend: agg' > $HOME/.config/matplotlib/matplotlibrc
 install:
-  - python setup.py install
+  - pip install .
   - pip install pytest
 script: pytest tests

From 1142c904cf80515117f97c518dae0472eebe424f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Emre=20=C3=96zdemir?= <emoz@biosustain.dtu.dk>
Date: Tue, 19 Jan 2021 10:20:13 +0100
Subject: [PATCH 50/52] update version

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 165f32a..89e874b 100644
--- a/setup.py
+++ b/setup.py
@@ -17,7 +17,7 @@
 
 setup(
     name="croissance",
-    version="1.1.1",
+    version="1.2.0",
     packages=["croissance"],
     url="https://github.com/biosustain/croissance",
     author="Lars Schöning",

From 6554cb6d4908a7a24a320e120d44850d6226c3b9 Mon Sep 17 00:00:00 2001
From: meono <emoz@biosustain.dtu.dk>
Date: Tue, 19 Jan 2021 11:55:45 +0100
Subject: [PATCH 51/52] previous form seems to exclude submodules from install.

---
 setup.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/setup.py b/setup.py
index 89e874b..fc6a604 100644
--- a/setup.py
+++ b/setup.py
@@ -13,12 +13,13 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from setuptools import setup
+from setuptools import setup, find_packages
 
 setup(
     name="croissance",
     version="1.2.0",
-    packages=["croissance"],
+    packages=find_packages(where="croissance", exclude=["tests"]),
+    package_dir={"": "croissance"},
     url="https://github.com/biosustain/croissance",
     author="Lars Schöning",
     author_email="lays@biosustain.dtu.dk",

From 35c50f3e93e0d027b2113756473210e54db5948c Mon Sep 17 00:00:00 2001
From: meono <emoz@biosustain.dtu.dk>
Date: Thu, 21 Jan 2021 09:17:24 +0100
Subject: [PATCH 52/52] path got messed up for submodules, so reverting to
 simple version wihtout exluding test.

---
 setup.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/setup.py b/setup.py
index fc6a604..8faec7e 100644
--- a/setup.py
+++ b/setup.py
@@ -18,8 +18,7 @@
 setup(
     name="croissance",
     version="1.2.0",
-    packages=find_packages(where="croissance", exclude=["tests"]),
-    package_dir={"": "croissance"},
+    packages=find_packages(),
     url="https://github.com/biosustain/croissance",
     author="Lars Schöning",
     author_email="lays@biosustain.dtu.dk",