Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/master'
Browse files Browse the repository at this point in the history
Updating to upstream codebase
  • Loading branch information
szmazurek committed Mar 29, 2024
2 parents 5bc69c8 + 2b2445c commit 46103d6
Show file tree
Hide file tree
Showing 32 changed files with 567 additions and 305 deletions.
2 changes: 1 addition & 1 deletion GANDLF/anonymize/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def run_anonymizer(
input_path,
output_path,
anonymization_actions={},
deletePrivateTags=parameters["delete_private_tags"],
delete_private_tags=parameters["delete_private_tags"],
)
elif parameters["modality"] in ["histo", "path"]:
# anonymize_slide(
Expand Down
1 change: 1 addition & 0 deletions GANDLF/cli/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from .recover_config import recover_config
from .post_training_model_optimization import post_training_model_optimization
from .generate_metrics import generate_metrics_dict
from .data_split_saver import split_data_and_save_csvs

from datetime import date

Expand Down
49 changes: 49 additions & 0 deletions GANDLF/cli/data_split_saver.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
from typing import Union
import os

import pandas as pd
from GANDLF.utils import get_dataframe, split_data


def split_data_and_save_csvs(
input_data: Union[pd.DataFrame, str], output_dir: str, parameters: dict
) -> None:
"""
Split the data into training, validation, and testing sets and save them as csvs in the output directory
Args:
input_data (Union[pd.Dataframe, str]): The input data to be split and saved.
output_dir (str): The output directory to save the split data.
parameters (dict): The parameters dictionary.
"""

full_data = get_dataframe(input_data)

dataframe_split = split_data(full_data, parameters)

for (
testing_and_valid_indeces,
trainingData,
validationData,
testingData,
) in dataframe_split:
# training and validation dataframes use the same index, since they are based on the validation split
training_data_path = os.path.join(
output_dir, f"training_{testing_and_valid_indeces[1]}.csv"
)
validation_data_path = os.path.join(
output_dir, f"validation_{testing_and_valid_indeces[1]}.csv"
)
# testing dataframes use the first index
testing_data_path = os.path.join(
output_dir, f"testing_{testing_and_valid_indeces[0]}.csv"
)

for data, path in zip(
[trainingData, validationData, testingData],
[training_data_path, validation_data_path, testing_data_path],
):
# check if the data is not None and the path does not exist
if not os.path.exists(path):
if data is not None:
data.to_csv(path, index=False)
2 changes: 0 additions & 2 deletions GANDLF/cli/main_run.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import os, pickle
from typing import Optional
from pathlib import Path

Expand All @@ -9,7 +8,6 @@
populate_header_in_parameters,
parseTrainingCSV,
parseTestingCSV,
set_determinism,
)


Expand Down
6 changes: 3 additions & 3 deletions GANDLF/compute/training_loop.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,9 +237,9 @@ def training_loop(
params["validation_data"] = validation_data
params["testing_data"] = testing_data
testingDataDefined = True
if params["testing_data"] is None:
# testing_data = validation_data
testingDataDefined = False
if not isinstance(testing_data, pd.DataFrame):
if params["testing_data"] is None:
testingDataDefined = False

# Setup a few variables for tracking
best_loss = 1e7
Expand Down
6 changes: 6 additions & 0 deletions GANDLF/config_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -630,6 +630,12 @@ def _parseConfig(
"nested_training" in params
), "The parameter 'nested_training' needs to be defined"
# initialize defaults for nested training
params["nested_training"]["stratified"] = params["nested_training"].get(
"stratified", False
)
params["nested_training"]["stratified"] = params["nested_training"].get(
"proportional", params["nested_training"]["stratified"]
)
params["nested_training"]["testing"] = params["nested_training"].get("testing", -5)
params["nested_training"]["validation"] = params["nested_training"].get(
"validation", -5
Expand Down
2 changes: 1 addition & 1 deletion GANDLF/losses/regression.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from typing import Optional
import torch
import torch.nn.functional as F
from torch.nn import MSELoss, CrossEntropyLoss, L1Loss
from torch.nn import CrossEntropyLoss
from GANDLF.utils import one_hot


Expand Down
Loading

0 comments on commit 46103d6

Please sign in to comment.