-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathplot_histogram.py
74 lines (63 loc) · 2.41 KB
/
plot_histogram.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
#!/usr/bin/env python
import matplotlib.pyplot as plt
import scipy.stats as st
import numpy as np
import math
import numpy as np
import pandas as pd
def get_bins_num(x,if_print=False):
q25, q75 = np.percentile(x, [25, 75])
if q25 == 0 and q75 == 0:
bins = 20
else:
bin_width = 2 * (q75 - q25) * len(x) ** (-1 / 3)
bins = round((max(x) - min(x)) / bin_width)
if if_print:
print("Freedman–Diaconis number of bins:", bins)
return bins
def get_statistics_list(x):
print(
f"mean: {round(np.mean(x),5)} median: {round(np.median(x),5)} variance: {round(np.var(x),5)} min: {round(np.min(x),5)} max: {round(np.max(x),5)}"
)
return round(np.median(x), 5), round(np.mean(x), 5)
def plot_hist(x, xlabel, median, mean, title=None, fontsize=15):
fig = plt.figure(figsize=(12, 4))
bins = get_bins_num(x)
plt.subplot(1, 2, 1)
plt.hist(x, alpha=0.8, density=False, bins=bins, label="counts")
plt.axvline(median, color="r", linestyle="--", label="median")
plt.axvline(mean, color="y", linestyle="--", label="mean")
plt.ylabel("Counts", fontsize=fontsize)
plt.legend(loc="best", fontsize=12)
plt.xlabel(xlabel, fontsize=fontsize)
plt.xticks(fontsize=fontsize)
plt.yticks(fontsize=fontsize)
if title is not None:
plt.title(str(title), fontsize=fontsize)
else:
plt.title("counts histogram", fontsize=fontsize)
plt.subplot(1, 2, 2)
plt.hist(x, alpha=0.8, density=True, bins=bins, label="frequencies")
mn, mx = plt.xlim()
plt.xlim(mn, mx)
kde_xs = np.linspace(mn, mx, 300)
kde = st.gaussian_kde(x)
plt.plot(kde_xs, kde.pdf(kde_xs), label="PDF")
plt.ylabel("Probability", fontsize=fontsize)
plt.legend(loc="upper right", fontsize=12)
plt.xlabel(xlabel, fontsize=fontsize)
if title is not None:
plt.title(str(title), fontsize=fontsize)
else:
plt.title("frequency histogram", fontsize=fontsize)
plt.xticks(fontsize=fontsize)
plt.yticks(fontsize=fontsize)
plt.show()
def plot_hist_from_list_pval(path, filename, xlabel, title=None, fontsize=15):
my_file = open(path + "/" + filename, "r")
data = my_file.read()
data_into_list = data.split("\n")
data_float = [float(x) for x in data_into_list if x.strip()]
my_file.close()
median, mean = get_statistics_list(data_float)
plot_hist(data_float, xlabel, median, mean, title, fontsize=fontsize)