Skip to content

Commit

Permalink
Update database.py
Browse files Browse the repository at this point in the history
  • Loading branch information
KalinNonchev authored Jun 30, 2021
1 parent 79860bb commit 6dbb88f
Showing 1 changed file with 7 additions and 4 deletions.
11 changes: 7 additions & 4 deletions gnomad_db/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,13 @@

class gnomAD_DB:

def __init__(self, genodb_path, parallel=True, cpu_count=512):
def __init__(self, genodb_path, parallel=False, cpu_count=None):


self.cpu_count = min(cpu_count, int(multiprocessing.cpu_count()))
self.parallel = parallel

if self.parallel:
self.cpu_count = cpu_count if isinstance(cpu_count, int) else int(multiprocessing.cpu_count())

self.db_file = os.path.join(genodb_path, 'gnomad_db.sqlite3')

Expand Down Expand Up @@ -137,7 +140,7 @@ def get_maf_from_df(self, var_df: pd.DataFrame, query: str="AF") -> pd.Series:
if self.parallel and len(var_df) > 100 * self.cpu_count:
out = np.array_split(var_df, self.cpu_count)
assert len(out) == self.cpu_count
out = Parallel(self.cpu_count)(delayed(self._get_maf_from_df)(df, query) for df in out)
out = Parallel(self.cpu_count, prefer="threads")(delayed(self._get_maf_from_df)(df, query) for df in out)
out = pd.concat(out)
out.set_index(var_df.index, inplace=True)
assert len(var_df) == len(out)
Expand Down Expand Up @@ -204,4 +207,4 @@ def download_and_unzip(url, output_path):
"""
download database and unzip file
"""
utils.download_and_unzip_file(url, output_path)
utils.download_and_unzip_file(url, output_path)

0 comments on commit 6dbb88f

Please sign in to comment.