Skip to content

Commit

Permalink
remove algorithm specific, cell type specific lines
Browse files Browse the repository at this point in the history
  • Loading branch information
jaspreetishar committed Jan 17, 2025
1 parent c4f6b91 commit 82d95cc
Show file tree
Hide file tree
Showing 4 changed files with 57 additions and 55 deletions.
45 changes: 27 additions & 18 deletions notebooks/Segmentation_QC.ipynb

Large diffs are not rendered by default.

16 changes: 7 additions & 9 deletions src/celldega/pre/boundary_tile.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def filter_and_save_fine_boundary(coarse_tile, fine_i, fine_j, fine_tile_x_min,
filename = f"{path_output}/cell_tile_{fine_i}_{fine_j}.parquet"
fine_tile_cells.to_parquet(filename)

def process_fine_boundaries(coarse_tile, i, j, coarse_tile_x_min, coarse_tile_x_max, coarse_tile_y_min, coarse_tile_y_max, tile_size, path_output, x_min, y_min, n_fine_tiles_x, n_fine_tiles_y):
def process_fine_boundaries(coarse_tile, i, j, coarse_tile_x_min, coarse_tile_x_max, coarse_tile_y_min, coarse_tile_y_max, tile_size, path_output, x_min, y_min, n_fine_tiles_x, n_fine_tiles_y, max_workers):
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
futures = []
for fine_i in range(n_fine_tiles_x):
Expand Down Expand Up @@ -111,8 +111,6 @@ def get_cell_polygons(technology, path_cell_boundaries, transformation_matrix, p
grouped["geometry"] = grouped.apply(lambda row: Polygon(zip(row["vertex_x"], row["vertex_y"])), axis=1)
cells_orig = gpd.GeoDataFrame(grouped, geometry="geometry")[["geometry"]]

transformation_matrix = pd.read_csv(transformation_matrix).values[:3,:3]

elif technology == "custom":
cells_orig = gpd.read_parquet(path_cell_boundaries)

Expand Down Expand Up @@ -175,15 +173,15 @@ def make_cell_boundary_tiles(

transformation_matrix = pd.read_csv(path_transformation_matrix, header=None, sep=" ").values

gdf_cells = get_cell_polygons(technology, path_output, path_cell_boundaries, transformation_matrix, image_scale, path_meta_cell_micron)

gdf_cells["center_x"] = gdf_cells.geometry.centroid.x
gdf_cells["center_y"] = gdf_cells.geometry.centroid.y

# Ensure the output directory exists
if not os.path.exists(path_output):
os.makedirs(path_output)

gdf_cells = get_cell_polygons(technology, path_cell_boundaries, transformation_matrix, path_output, image_scale, path_meta_cell_micron)

gdf_cells["center_x"] = gdf_cells.geometry.centroid.x
gdf_cells["center_y"] = gdf_cells.geometry.centroid.y

# Calculate tile bounds and fine/coarse tiles
x_min, x_max = tile_bounds["x_min"], tile_bounds["x_max"]
y_min, y_max = tile_bounds["y_min"], tile_bounds["y_max"]
Expand All @@ -206,4 +204,4 @@ def make_cell_boundary_tiles(
(gdf_cells["center_y"] >= coarse_tile_y_min) & (gdf_cells["center_y"] < coarse_tile_y_max)
]
if not coarse_tile.empty:
process_fine_boundaries(coarse_tile, i, j, coarse_tile_x_min, coarse_tile_x_max, coarse_tile_y_min, coarse_tile_y_max, tile_size, path_output, x_min, y_min, n_fine_tiles_x, n_fine_tiles_y)
process_fine_boundaries(coarse_tile, i, j, coarse_tile_x_min, coarse_tile_x_max, coarse_tile_y_min, coarse_tile_y_max, tile_size, path_output, x_min, y_min, n_fine_tiles_x, n_fine_tiles_y, max_workers)
3 changes: 1 addition & 2 deletions src/celldega/pre/trx_tile.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,7 @@ def filter_and_save_fine_tile(coarse_tile, coarse_i, coarse_j, fine_i, fine_j, f

def transform_transcript_coordinates(technology, path_trx, chunk_size, path_transformation_matrix, image_scale=1):

#transformation_matrix = np.loadtxt(path_transformation_matrix)
transformation_matrix = pd.read_csv(path_transformation_matrix).values[:3,:3]
transformation_matrix = np.loadtxt(path_transformation_matrix)

# Load the transcript data based on the technology using Polars
if technology == "MERSCOPE":
Expand Down
48 changes: 22 additions & 26 deletions src/celldega/qc/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,46 +85,42 @@ def qc_segmentation(transcript_metadata_file, transcript_data_file, cell_polygon

print("segmentation metrics calculation completed")

def mixed_expression_calc(cell_feature_matrix_xenium_directory, cbg_stp_cellpose_default_file, cbg_stp_instanseg_file, cbg_stp_cellpose2_file, t_cell_specific_genes, b_cell_specific_genes):
def mixed_expression_calc(default_segmentation_segmentation_name, default_segmentation_cell_feature_matrix_path, algorithm_names, algorithm_specific_cbg_files, cell_type_A_specific_genes, cell_type_B_specific_genes, cell_A_name, cell_B_name):

cbg_stp_cellpose2 = pd.read_parquet(cbg_stp_cellpose2_file)
cbg_stp_cellpose_default = pd.read_parquet(cbg_stp_cellpose_default_file)
cbg_stp_instanseg = pd.read_parquet(cbg_stp_instanseg_file)
cbg_dict = {}

cbg_xenium = read_cbg_mtx(cell_feature_matrix_xenium_directory)
for cbg_file, algorithm_name in zip(algorithm_specific_cbg_files, algorithm_names):
cbg_dict[algorithm_name] = pd.read_parquet(cbg_file)

cbg_dict = {}
cbg_dict['xenium_default'] = cbg_xenium
cbg_dict['cellpose_default'] = cbg_stp_cellpose_default
cbg_dict['instanseg'] = cbg_stp_instanseg
cbg_dict['cellpose2'] = cbg_stp_cellpose2
cbg_dict[default_segmentation_segmentation_name] = read_cbg_mtx(default_segmentation_cell_feature_matrix_path)

for algorithm_name, cbg in cbg_dict.items():
t_cell_overlap = [gene for gene in t_cell_specific_genes if gene in cbg.columns]
b_cell_overlap = [gene for gene in b_cell_specific_genes if gene in cbg.columns]

A_cell_overlap = [gene for gene in cell_type_A_specific_genes if gene in cbg.columns]
B_cell_overlap = [gene for gene in cell_type_B_specific_genes if gene in cbg.columns]

cells_with_t_genes = cbg[t_cell_overlap].sum(axis=1) > 0
cells_with_b_genes = cbg[b_cell_overlap].sum(axis=1) > 0
cells_with_A_genes = cbg[A_cell_overlap].sum(axis=1) > 0
cells_with_B_genes = cbg[B_cell_overlap].sum(axis=1) > 0

cells_with_both = cbg[cells_with_t_genes & cells_with_b_genes]
cells_with_both = cbg[cells_with_A_genes & cells_with_B_genes]

t_cell_genes_expressed = cells_with_both[t_cell_overlap].apply(
A_cell_genes_expressed = cells_with_both[A_cell_overlap].apply(
lambda row: {gene: int(row[gene]) for gene in row[row > 0].index}, axis=1
)

b_cell_genes_expressed = cells_with_both[b_cell_overlap].apply(
B_cell_genes_expressed = cells_with_both[B_cell_overlap].apply(
lambda row: {gene: int(row[gene]) for gene in row[row > 0].index}, axis=1
)

results = pd.DataFrame({
"T-cell genes and transcripts": t_cell_genes_expressed,
"B-cell genes and transcripts": b_cell_genes_expressed
f"{cell_A_name} genes and transcripts": A_cell_genes_expressed,
f"{cell_B_name} genes and transcripts": B_cell_genes_expressed
}, index=cells_with_both.index)

results["Total T-cell transcripts"] = t_cell_genes_expressed.apply(lambda x: sum(x.values()))
results["Total B-cell transcripts"] = b_cell_genes_expressed.apply(lambda x: sum(x.values()))
results[f"Total {cell_A_name} transcripts"] = A_cell_genes_expressed.apply(lambda x: sum(x.values()))
results[f"Total {cell_B_name} transcripts"] = B_cell_genes_expressed.apply(lambda x: sum(x.values()))

results["Total"] = t_cell_genes_expressed.apply(lambda x: sum(x.values())) + b_cell_genes_expressed.apply(lambda x: sum(x.values()))
results["Total"] = A_cell_genes_expressed.apply(lambda x: sum(x.values())) + B_cell_genes_expressed.apply(lambda x: sum(x.values()))
results['Technology'] = algorithm_name

sns.set(style='white', rc={'figure.dpi': 250, 'axes.facecolor': (0, 0, 0, 0), 'figure.facecolor': (0, 0, 0, 0)})
Expand All @@ -140,18 +136,18 @@ def mixed_expression_calc(cell_feature_matrix_xenium_directory, cbg_stp_cellpose
g.map_dataframe(
lambda data, **kwargs: sns.histplot(
data=data,
x="Total T-cell transcripts",
y="Total B-cell transcripts",
x=f"Total {cell_A_name} transcripts",
y=f"Total {cell_B_name} transcripts",
bins=15,
cbar=True,
cmap='coolwarm',
vmin=1,
vmax=data["Total T-cell transcripts"].max(),
vmax=data[f"Total {cell_A_name} transcripts"].max(),
**kwargs
)
)

g.set_axis_labels("Total T-cell transcripts", "Total B-cell transcripts")
g.set_axis_labels(f"Total {cell_A_name} transcripts", f"Total {cell_B_name} transcripts")
for ax in g.axes.flat:
ax.xaxis.set_major_locator(plt.MaxNLocator(integer=True))
ax.yaxis.set_major_locator(plt.MaxNLocator(integer=True))
Expand Down

0 comments on commit 82d95cc

Please sign in to comment.