forked from PNNL-Comp-Mass-Spec/DL-Batch-Correction
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathqcmetrics.py
54 lines (46 loc) · 1.7 KB
/
qcmetrics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
def compute_pca(x):
from sklearn.preprocessing import StandardScaler
from scipy.stats import zscore
from scipy.linalg import eigh
x = zscore(x)
k = x.shape[1]
cov_mat = np.cov(x , rowvar = False)
eigenvalues , eigenvectors = eigh(cov_mat, eigvals=(k-2, k-1))
#eigenvectors = eigenvectors.T
idx = np.argsort(eigenvalues)[::-1]
sorted_eigenvalues = eigenvalues[idx]
sorted_eigenvectors = eigenvectors[:,idx]
eigenvector_subset = sorted_eigenvectors[:,0:2]
z = np.dot(eigenvector_subset.T, x.T)
return z.T
def plot_several_pca(datasets):
fig, axs = plt.subplots(1, len(datasets), figsize=(6*len(datasets), 6))
for i in range(len(datasets)):
key = list(datasets)[i]
x, y = datasets[key]
y = np.expand_dims(y, 1)
z = compute_pca(x)
df = pd.DataFrame(np.concatenate([z, y], axis=1), columns = ['PC1','PC2','batch'])
df['batch'] = df['batch'].astype('category')
ax = axs[i]
sns.scatterplot(x='PC1', y='PC2', data=df, hue='batch', legend=False, ax=ax)
axs[i].set_title(key)
return axs
def plot_several_anova(datasets):
from sklearn import feature_selection
fig, axs = plt.subplots(1, len(datasets), figsize=(6*len(datasets), 6))
for i in range(len(datasets)):
key = list(datasets)[i]
x, y = datasets[key]
f, p = feature_selection.f_classif(x, y)
ax = axs[i]
ax.hist(p,
bins = min(20, 1+int(len(p)/20)),
color="grey")
ax.set_xlim([0,1])
ax.set_title('{} (N = {})'.format(key, len(p)))
return axs