Getting OrientedPoint annotation together with tomo data #813
-
Hi, I want to download a tomogram and the associated OritentedPoints with the set of tomograms. What would be the right way of getting this information w/o having to manually check the annotation IDs? Here's how I am grabbing the tomos for the SARS-CoV-2 for instance.
|
Beta Was this translation helpful? Give feedback.
Replies: 1 comment 2 replies
-
Hi @shahpnmlab, the easiest way of getting those files is using the AnnotationFile class and a query with multiple conditions. See below for an example: import json
import numpy as np
import s3fs
from cryoet_data_portal import Client, AnnotationFile
from typing import Tuple
# Run Names
tomo_names = ["TS_032","TS_033","TS_034","TS_035","TS_036",
"TS_037","TS_038", "TS_039","TS_040","TS_041",
"TS_042", "TS_043","TS_044","TS_045","TS_046",
"TS_049", "TS_050","TS_052","TS_054","TS_055",
"TS_056", "TS_057","TS_058","TS_059","TS_061",
"TS_079","TS_080", "TS_081","TS_082","TS_083"]
def get_points(run_name: str) -> Tuple[np.ndarray, np.ndarray]:
"""Returns the points (3xN) and orientations (3x3xN) for a given run name."""
# Multi-condition query (run name, shape type and object)
client = Client()
point_annotation = AnnotationFile.find(
client,
[
AnnotationFile.shape_type == "OrientedPoint",
AnnotationFile.annotation.object_id == "GO:0019062",
AnnotationFile.annotation.tomogram_voxel_spacing.run.dataset_id == 10006,
AnnotationFile.annotation.tomogram_voxel_spacing.run.name == run_name,
],
)
point_annotation = point_annotation[0]
# Load an annotation into numpy arrays
fs = s3fs.S3FileSystem(anon=True)
with fs.open(point_annotation.s3_path, "r") as f:
data = f.readlines()
n_parts = len(data)
points = np.ndarray((3, n_parts))
ori = np.ndarray((3, 3, n_parts))
for i, line in enumerate(data):
data = json.loads(line)
x, y, z = data["location"]["x"], data["location"]["y"], data["location"]["z"]
rot_m = np.array(data["xyz_rotation_matrix"])
points[:, i] = np.array([x, y, z])
ori[:, :, i] = rot_m
return points, ori
# Get the points and orientations for a tomogram
res = get_points("TS_032")
# Map the points to the tomogram names
# res = {tn: get_points(tn) for tn in tomo_names} In preparing this example I noticed that there is a small bug in the API client currently, be sure to order the query conditions from least traversal to highest traversal for this to work. |
Beta Was this translation helpful? Give feedback.
Hi @shahpnmlab,
the easiest way of getting those files is using the AnnotationFile class and a query with multiple conditions. See below for an example: