Skip to content

Commit

Permalink
tdms heatmap, summarize & validate; fixing beta bug (#66)
Browse files Browse the repository at this point in the history
* `tdms heatmap` makes a heatmap.
* `tdms summarize` summarizes data.
* `tdms validate` checks data.
* Fixing `tdms beta` that scrambled results.
* Plot titles that show the target displayed.
* 2d geplot replacing contour.
* Do not drop targets from original_df when prepping data sets.
* Adding density of effects per AA.
* Adding alphabet as a feature of a model.
* Adding a simple Linear model.
* Adding `--drop-nans` for `tdms prep`.

Closes #30
Closes #46
  • Loading branch information
matsen authored Jun 29, 2020
1 parent bcf5cea commit e0bdb4c
Show file tree
Hide file tree
Showing 10 changed files with 597 additions and 121 deletions.
1 change: 1 addition & 0 deletions .github/workflows/build-and-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ jobs:
- name: Test
run: |
make test
make datatest
- name: Lint with flake8
run: |
# stop the build if there are Python syntax errors or undefined names
Expand Down
10 changes: 7 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,22 @@ install:

test: torchdms/data/_ignore/test_df.prepped.pkl
cd torchdms/data; tdms go --config test_config.json
rm torchdms/data/_ignore/test_df.prepped.pkl
pytest
rm torchdms/data/_ignore/test_df.prepped.pkl

datatest: torchdms/data/_ignore/test_df.prepped.pkl
tdms validate torchdms/data/_ignore/test_df.prepped.pkl
tdms summarize --out-prefix torchdms/data/_ignore/test_df.summary torchdms/data/_ignore/test_df.prepped.pkl

format:
black torchdms
docformatter --in-place torchdms/*py

lint:
pylint torchdms && echo "LINTING PASS"
pylint **/[^_]*.py && echo "LINTING PASS"

torchdms/data/_ignore/test_df.prepped.pkl: torchdms/data/test_df.pkl
mkdir -p torchdms/data/_ignore
tdms prep --per-stratum-variants-for-test 10 --skip-stratum-if-count-is-smaller-than 30 torchdms/data/test_df.pkl torchdms/data/_ignore/test_df.prepped affinity_score

.PHONY: install test format lint
.PHONY: install test datatest format lint
6 changes: 6 additions & 0 deletions pylintrc
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,12 @@ max-attributes=700
max-locals=700


[SIMILARITIES]

# Ignore imports when computing similarities.
ignore-imports=yes


[MESSAGES CONTROL]
disable=bad-continuation,missing-function-docstring,too-few-public-methods
# List of members which are set dynamically and missed by pylint inference
Expand Down
62 changes: 59 additions & 3 deletions torchdms/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import torch
from torch.utils.data import DataLoader
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torchdms.data import BinaryMapDataset
from torchdms.model import monotonic_params_from_latent_space


Expand Down Expand Up @@ -81,7 +82,7 @@ def complete_loss(self, loss_fn, targets, predictions, loss_decays):
def train(
self, epoch_count, loss_fn, patience=10, min_lr=1e-5, loss_weight_span=None
):
"""Train self.model."""
"""Train self.model using all the bells and whistles."""
assert len(self.train_datasets) > 0
target_count = self.train_datasets[0].target_count()
assert self.model.output_size == target_count
Expand Down Expand Up @@ -124,7 +125,6 @@ def loss_decays_of_target_extrema(extremum_pairs_across_targets):
self.model.to(self.device)

def step_model():
per_epoch_loss = 0.0
for _ in range(batch_count):
optimizer.zero_grad()
per_batch_loss = 0.0
Expand All @@ -151,7 +151,6 @@ def step_model():
if self.model.monotonic_sign:
for param in monotonic_params_from_latent_space(self.model):
param.data.clamp_(0)
per_epoch_loss += per_batch_loss
optimizer.step()

val_samples = self.val_data.samples.to(self.device)
Expand Down Expand Up @@ -206,3 +205,60 @@ def multi_train(
click.echo("LOG: Beginning full training.")
self.model = torch.load(self.model_path)
self.train(epoch_count, loss_fn, patience, min_lr, loss_weight_span)

def simple_train(self, epoch_count, loss_fn):
"""Bare-bones training of self.model.
This traning doesn't even handle nans. If you want that behavior, just use
self.loss_of_targets_and_prediction rather than loss_fn directly.
We also cat together all of the data rather than getting gradients on a
per-stratum basis. If you don't want this behavior use
self.train_infinite_loaders rather than the train_infinite_loaders defined
below.
"""
assert len(self.train_datasets) > 0
target_count = self.train_datasets[0].target_count()
assert self.model.output_size == target_count

batch_count = 1 + max(map(len, self.train_datasets)) // self.batch_size
self.model.train() # Sets model to training mode.
optimizer = torch.optim.Adam(self.model.parameters(), lr=self.learning_rate)
self.model.to(self.device)

train_infinite_loaders = [
make_data_loader_infinite(
DataLoader(
BinaryMapDataset.cat(self.train_datasets),
batch_size=self.batch_size,
shuffle=True,
)
)
]

def step_model():
for _ in range(batch_count):
optimizer.zero_grad()
for train_infinite_loader in train_infinite_loaders:
batch = next(train_infinite_loader)
samples = batch["samples"].to(self.device)
predictions = self.model(samples)
loss = loss_fn(batch["targets"], predictions)

# Note that here we are using gradient accumulation: calling
# backward for each loader before clearing the gradient via
# zero_grad. See, e.g. https://link.medium.com/wem03OhPH5
loss.backward()

# if the model is monotonic, we clamp all negative parameters
# after the latent space ecluding all bias parameters.
if self.model.monotonic_sign:
for param in monotonic_params_from_latent_space(self.model):
param.data.clamp_(0)
optimizer.step()

with click.progressbar(range(epoch_count)) as progress_bar:
for _ in progress_bar:
step_model()

torch.save(self.model, self.model_path)
Loading

0 comments on commit e0bdb4c

Please sign in to comment.