Skip to content

Commit

Permalink
instead of global filtering, pick top 5 highest quality neighbors for…
Browse files Browse the repository at this point in the history
… all nodes, this improves graph coverage and quality
  • Loading branch information
borauyar committed Jul 6, 2024
1 parent 68f4d90 commit fa91072
Showing 1 changed file with 1 addition and 12 deletions.
13 changes: 1 addition & 12 deletions flexynesis/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -735,9 +735,6 @@ def __init__(self, multiomic_dataset, interaction_df):
# Store labels for all samples
self.labels = {target_name: labels for target_name, labels in self.multiomic_dataset.ann.items()}

# Store sample identifiers
self.samples = self.multiomic_dataset.samples

def find_common_features(self):
common_features = set.intersection(*(set(features) for features in self.multiomic_dataset.features.values()))
interaction_genes = set(self.interaction_df['protein1']).union(set(self.interaction_df['protein2']))
Expand Down Expand Up @@ -949,18 +946,10 @@ def read_user_graph(fpath, sep=" ", header=None, **pd_read_csv_kw):
"""
return pd.read_csv(fpath, sep=sep, header=header, **pd_read_csv_kw)


def read_stringdb_links(fname):
df = pd.read_csv(fname, header=0, sep=" ")
df = df[df.combined_score > 400]
df = df[df.combined_score > df.combined_score.quantile(0.9)]
df[["protein1", "protein2"]] = df[["protein1", "protein2"]].map(lambda a: a.split(".")[-1])
return df

def read_stringdb_links_test(fname):
df = pd.read_csv(fname, header=0, sep=" ")
df = df[df.combined_score > 800]
df = df[df.combined_score > df.combined_score.quantile(0.9)]
#df = df[df.combined_score > df.combined_score.quantile(0.9)]
df_expanded = pd.concat([
df.rename(columns={'protein1': 'protein', 'protein2': 'partner'}),
df.rename(columns={'protein2': 'protein', 'protein1': 'partner'})
Expand Down

0 comments on commit fa91072

Please sign in to comment.