Skip to content

Commit

Permalink
Update SummarizeQuantTab.py (#238)
Browse files Browse the repository at this point in the history
  • Loading branch information
acesnik authored Jun 19, 2024
1 parent 70db8c4 commit a4c9f23
Showing 1 changed file with 41 additions and 20 deletions.
61 changes: 41 additions & 20 deletions Spritz/workflow/scripts/SummarizeQuantTab.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,44 @@
import sys, os
import sys
import os
import numpy as np
import pandas as pd

outtpms = sys.argv[1]
files = sys.argv[2:]
ids = []
tpms = []
for file in files:
print(f"reading {os.path.basename(file)}")
table=pd.read_csv(file, sep="\t")
currIds = table["Gene ID"]
currTpms = table["TPM"]
if len(ids) == 0: ids = np.asarray(currIds)
elif all(np.array(currIds) != ids): print("error with ids")
tpms.append(currTpms)
tpms.insert(0, np.asarray(ids))

print(f"Saving to {outtpms} ...")
dataframe = np.row_stack(tpms)
dataframe[1:,0] = [os.path.basename(file).split(".")[0] for file in files]
pddf = pd.DataFrame(dataframe[1:,1:], index=dataframe[1:,0], columns=dataframe[0,1:]).sort_index()
pddf.T.to_csv(outtpms)
def read_tpm_file(file_path):
"""Reads a TPM file and returns the Gene IDs and TPMs."""
table = pd.read_csv(file_path, sep="\t")
return table["Gene ID"].values, table["TPM"].values

def main():
if len(sys.argv) < 3:
print("Usage: SummarizeQuantTab.py <output_file> <input_file_1> <input_file_2> ...")
sys.exit(1)

output_file = sys.argv[1]
input_files = sys.argv[2:]

ids_list = []
tpms_list = []

for file in input_files:
currIds, currTpms = read_tpm_file(file)

if not ids_list:
ids_list = currIds
elif not np.array_equal(currIds, ids_list):
print(f"Error with IDs in file: {os.path.basename(file)}")
sys.exit(1)

tpms_list.append(currTpms)

tpms_list.insert(0, ids_list)
dataframe = np.row_stack(tpms_list)
dataframe[1:, 0] = [os.path.basename(file).split(".")[0] for file in input_files]

# Create a DataFrame and save to CSV
pddf = pd.DataFrame(dataframe[1:, 1:], index=dataframe[1:, 0], columns=dataframe[0, 1:]).sort_index()
pddf.T.to_csv(output_file)

print(f"Saved summarized data to {output_file}")

if __name__ == "__main__":
main()

0 comments on commit a4c9f23

Please sign in to comment.