Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: Adding Domain adaptation #51

Open
wants to merge 21 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
5b60f95
changes to enable domain adaptation
bruce-edelman Jul 6, 2023
6339e21
move imports back to save time when not in train/predict mode
bruce-edelman Jul 6, 2023
1970c9e
fix imports and make sure old version is still same output
bruce-edelman Jul 6, 2023
a081eb8
replace imports to old locations
bruce-edelman Jul 6, 2023
a15d1a5
tie loss fcts to model output names for clarity
bruce-edelman Jul 6, 2023
a9cbf17
try to make pypi publish only happen for changes to main
bruce-edelman Jul 6, 2023
943a14a
Revert "try to make pypi publish only happen for changes to main"
bruce-edelman Jul 6, 2023
1802e82
fix small errors
bruce-edelman Jul 7, 2023
ecb54a6
fix loss bug in masked cce with reduce_all
bruce-edelman Jul 7, 2023
4aed9a2
add ignore
bruce-edelman Jul 18, 2023
bb1d967
get Domain adaptation fully working -- some refactoring to clean thin…
bruce-edelman Jul 20, 2023
5a351b1
cleanup
bruce-edelman Jul 20, 2023
f9a6668
remove unused import
bruce-edelman Jul 20, 2023
8a989ee
rework gh actions so it will test installations correctly on PRs and …
bruce-edelman Jul 20, 2023
f07537f
bump version number
bruce-edelman Jul 20, 2023
3d0efac
finish bumpign version add badges on README
bruce-edelman Jul 20, 2023
707f1f3
fix action badge:
bruce-edelman Jul 20, 2023
e58cd48
no dropout in discriminator branch
bruce-edelman Jul 20, 2023
7504223
small changes
bruce-edelman Jul 27, 2023
cc8a1ee
add more plots and training metrics to output
bruce-edelman Jul 28, 2023
5dcf59d
small changes -- save acc file
bruce-edelman Jul 29, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
File renamed without changes.
36 changes: 0 additions & 36 deletions .github/workflows/python-app.yml

This file was deleted.

37 changes: 37 additions & 0 deletions .github/workflows/python-build.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# This workflow will install Python dependencies, run tests and lint with a single version of Python
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions

name: Build

on: push

jobs:
Build_Package:
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
python-version: ["3.8", "3.9", "3.10", "3.11"]
steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v3
with:
python-version: ${{ matrix.python-version }}
- name: Setup conda
uses: s-weigand/setup-conda@v1
with:
activate-conda: true
update-conda: true
python-version: ${{ matrix.python-version }}
conda-channels: conda-forge
- name: Install dependencies
run: |
conda install pip setuptools
pip install --upgrade pip
- name: Install diploSHIC
run: |
pip install .
- name: List installed
run: |
conda list
12 changes: 9 additions & 3 deletions .github/workflows/python-publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,18 @@
# separate terms of service, privacy policy, and support
# documentation.

name: Upload Python Package
name: Publish

on: [push, pull_request]
on:
push:
branches:
- main
- master
tags:
- v*

jobs:
manylinux:
Build_Wheel:
runs-on: ubuntu-latest
steps:
- name: Checkout
Expand Down
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
build
diploSHIC.egg-info
work
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
[![Build](https://github.com/kr-colab/diploSHIC/actions/workflows/python-build.yml/badge.svg)](https://github.com/kr-colab/diploSHIC/actions/workflows/python-build.yml)
[![PyPI version](https://badge.fury.io/py/diploSHIC.svg)](https://badge.fury.io/py/diploSHIC)

# diploS/HIC
This repo contains the implementation for `diploS/HIC` as described in Kern and Schrider (2018; https://doi.org/10.1534/g3.118.200262), along
with its associated support scripts. `diploS/HIC` uses a deep convolutional neural network to identify
Expand Down
4 changes: 4 additions & 0 deletions diploshic/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
from diploshic.fvTools import *
from diploshic.msTools import *
from diploshic.shicstats import *
from . import network
from . import dataloader
from . import misc
from . import parser
101 changes: 101 additions & 0 deletions diploshic/dataloader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
from keras.utils import Sequence
import numpy as np
import gc


def load_fvecs_from_directory(directory, n_subwin=11):
hard = np.loadtxt(directory + "hard.fvec", skiprows=1)
nDims = int(hard.shape[1] / n_subwin)
h1 = np.reshape(hard, (hard.shape[0], nDims, n_subwin))
neut = np.loadtxt(directory + "neut.fvec", skiprows=1)
n1 = np.reshape(neut, (neut.shape[0], nDims, n_subwin))
soft = np.loadtxt(directory + "soft.fvec", skiprows=1)
s1 = np.reshape(soft, (soft.shape[0], nDims, n_subwin))
lsoft = np.loadtxt(directory + "linkedSoft.fvec", skiprows=1)
ls1 = np.reshape(lsoft, (lsoft.shape[0], nDims, n_subwin))
lhard = np.loadtxt(directory + "linkedHard.fvec", skiprows=1)
lh1 = np.reshape(lhard, (lhard.shape[0], nDims, n_subwin))
both = np.concatenate((h1, n1, s1, ls1, lh1))
y = np.concatenate((np.repeat(0, len(h1)),
np.repeat(1, len(n1)),
np.repeat(2, len(s1)),
np.repeat(3, len(ls1)),
np.repeat(4, len(lh1)),))
return both.reshape(both.shape[0], nDims, n_subwin, 1), y


def load_empirical_fvecs_from_directory(directory, n_subwin=11):
nDims = int(emp.shape[1] / n_subwin)
emp = np.loadtxt(directory + "empirical.fvec", skiprows=1)
emp = np.reshape(emp, (emp.shape[0], nDims, n_subwin))
return emp.reshape(emp, emp.shape[0], nDims, n_subwin, 1)


class DADiploSHICDataLoader(Sequence):
def __init__(self, X_src, X_tgt, Y_pred, batch_size):
self.tgt_data = X_tgt
self.src_data = X_src
self.y_pred = Y_pred

self.batch_size = batch_size
src_size = self.src_data.shape[0]
tgt_size = self.tgt_data.shape[0]

self.no_batch = int(np.floor(np.minimum(src_size, tgt_size) / self.batch_size)) # model sees training sample at most once per epoch
self.src_pred_idx = np.arange(src_size)
self.src_discr_idx = np.arange(src_size)
self.tgt_discr_idx = np.arange(tgt_size)

np.random.shuffle(self.src_pred_idx)
np.random.shuffle(self.src_discr_idx)
np.random.shuffle(self.tgt_discr_idx)

def __len__(self):
return self.no_batch

def on_epoch_end(self):
np.random.shuffle(self.src_pred_idx)
np.random.shuffle(self.src_discr_idx)
np.random.shuffle(self.tgt_discr_idx)
gc.collect()

def __getitem__(self, idx):
pred_batch_idx = self.src_pred_idx[idx*self.batch_size:(idx+1)*self.batch_size]
discrSrc_batch_idx = self.src_discr_idx[idx*(self.batch_size//2):(idx+1)*(self.batch_size//2)]
discrTgt_batch_idx = self.tgt_discr_idx[idx*(self.batch_size//2):(idx+1)*(self.batch_size//2)]
batch_X = np.concatenate((self.src_data[pred_batch_idx],
self.src_data[discrSrc_batch_idx],
self.tgt_data[discrTgt_batch_idx]))
batch_Y_pred = np.concatenate((self.y_pred[pred_batch_idx],
-1*np.ones((len(discrSrc_batch_idx), self.y_pred.shape[1])),
-1*np.ones((len(discrTgt_batch_idx), self.y_pred.shape[1]))))
batch_Y_discr = np.concatenate((-1*np.ones(len(pred_batch_idx)),
np.zeros(len(discrSrc_batch_idx)),
np.ones(len(discrTgt_batch_idx))))
assert batch_X.shape[0] == self.batch_size*2, (batch_X.shape, self.batch_size*2)
assert batch_Y_pred.shape[0] == batch_Y_discr.shape[0], (batch_Y_pred.shape, batch_Y_discr.shape)
return batch_X, {"predictor":batch_Y_pred, "discriminator":batch_Y_discr}


class DiploSHICDataLoader(Sequence):
def __init__(self, X_src, Y_pred, batch_size):
self.data = X_src
self.y_pred = Y_pred
self.batch_size = batch_size
size = self.data.shape[0]
self.no_batch = int(np.floor(size/ self.batch_size))
self.pred_idx = np.arange(size)
np.random.shuffle(self.pred_idx)

def __len__(self):
return self.no_batch

def on_epoch_end(self):
np.random.shuffle(self.pred_idx)
gc.collect()

def __getitem__(self, idx):
pred_batch_idx = self.pred_idx[idx*self.batch_size:(idx+1)*self.batch_size]
batch_X = self.data[pred_batch_idx]
batch_Y_pred = self.y_pred[pred_batch_idx]
return batch_X, batch_Y_pred
Loading