Skip to content

Commit

Permalink
Merge pull request #4 from meono/master
Browse files Browse the repository at this point in the history
Some fixes related to edge cases and pandas fetures required
  • Loading branch information
meono authored Jul 14, 2020
2 parents da57133 + 102d8ae commit e9a9251
Show file tree
Hide file tree
Showing 7 changed files with 44 additions and 8 deletions.
6 changes: 5 additions & 1 deletion README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,11 @@ Description

A tool for estimating growth rates in growth curves. The tool fits λ ⋅ e :sup:`μ⋅x` + N :sub:`0` to any candidate growth phases of the growth curve that have increasing growth, i.e. where both the first and second derivative of the growth function are positive. To identify these phases reliably, the tool utilizes a custom smoothing function that addresses problems other smoothing methods have with growth curves that have regions with varying levels of noise (e.g. lots of noise in the beginning, then less noise after growth starts, then more noise in the stationary phase).

The parameter N :sub:`0` of the model can optionally be constrained. This is recommended if the value is known. The growth rate in calculated growth phases can only be properly compared if their N :sub:`0` (baseline OD; when the organism is at its initial population) points to a similar stage of actual growth.
The parameter N :sub:`0` represents the background/blank OD reading (not seeding OD) and can optionally be constrained. This is recommended if the value is known.

The growth rate in calculated growth phases can only be properly compared if their seeding OD (when the organism is at its initial population) points to a similar stage of actual growth.

Intercept (λ) reported by this package can be used as indicator of lag if SNR is sufficiently high.

Installation
============
Expand Down
5 changes: 5 additions & 0 deletions croissance/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,13 @@
from croissance.estimation import Estimator
from collections import namedtuple

AnnotatedGrowthCurve = namedtuple('AnnotatedGrowthCurve', ('series', 'outliers', 'growth_phases'))


def process_curve(curve: 'pandas.Series', **kwargs):
estimator = Estimator(**kwargs)
if curve.isnull().all():
return AnnotatedGrowthCurve(curve, [], [])
return estimator.growth(curve)


Expand Down
12 changes: 11 additions & 1 deletion croissance/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ def main():
n0=args.N0)

try:
empties = {}
for infile in tqdm(args.infiles, unit='infile'):
outfile = open('{}{}.tsv'.format(infile.name[:-4], args.output_suffix), 'w')

Expand All @@ -66,6 +67,12 @@ def main():
outwriter = TSVWriter(outfile, include_default_phase=not args.output_exclude_default_phase)

for name, curve in tqdm(list(reader.read(infile)), unit='curve'):
if curve.empty:
try:
empties[infile.name].append(name)
except KeyError:
empties[infile.name] = [name]
continue
annotated_curve = estimator.growth(normalize_time_unit(curve, args.input_time_unit))

outwriter.write(name, annotated_curve)
Expand All @@ -80,7 +87,10 @@ def main():
except KeyboardInterrupt:
pass

print()
if empties != {}:
print('\nEmpty cells were found and discarded:\n', '\n'.join([(infile.name+'\t'+name) for key, names in empties.items() for name in names]))
else:
print()


if __name__ == '__main__':
Expand Down
3 changes: 3 additions & 0 deletions croissance/estimation/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,9 @@ def growth(self, curve: pandas.Series) -> AnnotatedGrowthCurve:
smooth_series = segment_spline_smoothing(series, )

phases = []
# give up if there isn't enough data
if len(smooth_series) < n_hours:
return AnnotatedGrowthCurve(series, [], [])
raw_phases = self._find_growth_phases(smooth_series, window=n_hours)

for phase in raw_phases:
Expand Down
14 changes: 12 additions & 2 deletions croissance/estimation/regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,12 @@ def exponential_constrain_n0(x, a, b):
p0 = p0[:2]

try:
popt, pcov = curve_fit(fit_fn, series.index, series.values, p0=p0, maxfev=10000)
popt, pcov = curve_fit(fit_fn,
series.index,
series.values,
p0=p0,
maxfev=10000,
bounds=([0., 0., 0.], numpy.inf) if n0 is None else ([0., 0.], numpy.inf))

if n0 is not None:
popt = tuple(popt) + (n0,)
Expand All @@ -50,7 +55,12 @@ def exponential_constrain_n0(x, a, b):
p0 = (numpy.exp(c), slope)

try:
popt, pcov = curve_fit(fit_fn, series.index, series.values, p0=p0, maxfev=10000)
popt, pcov = curve_fit(fit_fn,
series.index,
series.values,
p0=p0,
maxfev=10000,
bounds=([0., 0., 0.], numpy.inf) if n0 is None else ([0., 0.], numpy.inf))

if n0 is not None:
popt = tuple(popt) + (n0,)
Expand Down
10 changes: 7 additions & 3 deletions croissance/estimation/smoothing/segments.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,11 @@ def segment_by_std_dev(series, increment=2, maximum=20):
:param maximum:
:return:
"""
start = int(series.index.min())
duration = int(series.index[-2])
windows = []

for i in range(0, duration, increment):
for i in range(start, duration, increment):
for size in range(1, maximum + 1):
window = detrend(series[i:i + size*increment])
heappush(windows, (window.std() / (size*increment), i, i + size*increment))
Expand Down Expand Up @@ -48,7 +49,7 @@ def segment_by_std_dev(series, increment=2, maximum=20):
def window_median(window, start, end):
x = numpy.linspace(0, 1, num=len(window))
A = numpy.vstack([x, numpy.ones(len(x))]).T
m, c = numpy.linalg.lstsq(A, window)[0]
m, c = numpy.linalg.lstsq(A, window, rcond=None)[0]

return (start + end) / 2, m * 0.5 + numpy.median(window - m * x)

Expand All @@ -70,6 +71,9 @@ def segment_points(series, segments):
for start, end in segments:
window = series[start:end]

if window.empty:
continue

if end - start > 5:
out.append(window_median(series[start:start + 2], start, start + 2))

Expand All @@ -90,6 +94,6 @@ def segment_spline_smoothing(series, series_std_dev=None):
if series_std_dev is None:
series_std_dev = series
segments = segment_by_std_dev(series_std_dev)
points = segment_points(series, segments)
points = segment_points(series, segments).sort_index()
spline = InterpolatedUnivariateSpline(points.index, points.values, k=3)
return pandas.Series(data=spline(series.index), index=series.index)
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
],
install_requires=[
'numpy>=1.9.1',
'pandas>=0.15.2',
'pandas>=0.18.0',
'scipy>=0.14.0',
'matplotlib>=1.4.3',
'tqdm>=4.11.2'
Expand Down

0 comments on commit e9a9251

Please sign in to comment.