Skip to content

Commit

Permalink
Merge pull request #114 from nextstrain/victorlin/support-json-gz
Browse files Browse the repository at this point in the history
Support .json.gz files
  • Loading branch information
jameshadfield authored Jan 6, 2025
2 parents 9c25550 + f89f81e commit c802708
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 21 deletions.
74 changes: 54 additions & 20 deletions auspice_client_customisation/handleDroppedFiles.js
Original file line number Diff line number Diff line change
Expand Up @@ -22,19 +22,22 @@ export const handleDroppedFiles = async (dispatch, files) => {
};

/** promisify FileReader's readAsText() so we can use it within
* async functions via `await readJson(file)`.
* async functions via `await readFile(file)`.
* Adapted from https://stackoverflow.com/a/51026615
*/
function readFile(file, isJSON=true) {
function readFile(file) {
return new Promise((resolve, reject) => {
const fileReader = new window.FileReader();
fileReader.onloadend = function(e) {
if (isJSON) {
const json = JSON.parse(e.target.result);
resolve(json);
fileReader.onloadend = async function(e) {
let data;
if (file.name.toLowerCase().endsWith(".json.gz")) {
data = JSON.parse(await decompressGzipStream(file.stream()));
} else if (file.name.toLowerCase().endsWith(".json")) {
data = JSON.parse(e.target.result);
} else {
resolve(e.target.result);
data = e.target.result;
}
resolve(data);
};
fileReader.onerror = function(e) {
reject(e);
Expand All @@ -43,6 +46,37 @@ function readFile(file, isJSON=true) {
});
}

/**
* Decompress a gzip stream using the Compression Streams API.
* Adapted from https://stackoverflow.com/a/68829631
*/
async function decompressGzipStream(stream) {
let ds = new DecompressionStream("gzip");
let decompressedStream = stream.pipeThrough(ds);
return await new Response(decompressedStream).text();
}

/**
* Determine the dataset name from an Auspice JSON file.
* @param {string} filename
* @param {string} sidecarSuffix For sidecar files
* @returns
*/
function getDatasetName(filename, sidecarSuffix="") {
let datasetName = filename.toLowerCase();

// Dataset name of sidecar files is the filename without sidecar suffix (removed here) and file extension (removed later).
if (sidecarSuffix) {
datasetName = datasetName.replace(`_${sidecarSuffix}`, "");
}

datasetName = datasetName
.slice(0, datasetName.indexOf('.json')) // removes everything after and including ".json"
.replaceAll("_", "/"); // nextstrain-like file path display

return datasetName;
}

/**
* Parse the dropped files into a collection of `Dataset` objects, which is the structure
* Auspice uses to represent a "main" dataset JSON + any associated sidecar files.
Expand All @@ -59,10 +93,11 @@ async function collectDatasets(dispatch, files) {
measurements: "measurements",
"root-sequence": "rootSequence"
};
const jsonFileTypes = [".json", ".json.gz"];
const newickFileTypes = ["new", "nwk", "newick"];
const isMain = (f) => (
f.name.toLowerCase().endsWith("json") &&
Object.keys(sidecarMappings).every((suffix) => !f.name.toLowerCase().endsWith(`_${suffix}.json`))
jsonFileTypes.some(ext => f.name.toLowerCase().endsWith(ext)) &&
Object.keys(sidecarMappings).every((suffix) => !jsonFileTypes.some(ext => f.name.toLowerCase().endsWith(`_${suffix}${ext}`)))
);
const filesSeen = new Set(); // lowercase names of files we have read (successfully or otherwise)
const logs = [];
Expand All @@ -73,12 +108,11 @@ async function collectDatasets(dispatch, files) {
if (isMain(file)) {
filesSeen.add(nameLower);
try {
const name = file.name.slice(0, -5) // removes ".json" suffix
.replaceAll("_", "/"); // nextstrain-like file path display
const name = getDatasetName(file.name);
const d = new Dataset(name);
d.apiCalls = {}; // ensures no prototypes mistakenly make api calls
d.main = await readFile(file);
datasets[nameLower] = d;
datasets[name] = d;
logs.push(`Read ${file.name} as a main dataset JSON file`);
} catch (e) {
console.error(`${file.name} failed to be read as a main dataset JSON file. Error: ${e}`);
Expand All @@ -88,7 +122,7 @@ async function collectDatasets(dispatch, files) {
try {
const d = new Dataset(file.name);
d.apiCalls = {}; // ensures no prototypes mistakenly make api calls
d.main = newickToAuspiceJson(file.name, await readFile(file, false));
d.main = newickToAuspiceJson(file.name, await readFile(file));
datasets[nameLower] = d;
logs.push(`Read ${file.name} as a newick file`);
} catch (e) {
Expand All @@ -109,7 +143,7 @@ async function collectDatasets(dispatch, files) {
const nameLower = file.name.toLowerCase();
if (filesSeen.has(nameLower)) continue;

if (!nameLower.endsWith("json") && !nameLower.endsWith(".md")) {
if (!jsonFileTypes.some(ext => nameLower.endsWith(ext)) && !nameLower.endsWith(".md")) {
dispatch(errorNotification({
message: `Failed to load ${file.name}.`,
details: "Please refer to the homepage for supported files, and check that your file is named properly."
Expand All @@ -118,12 +152,12 @@ async function collectDatasets(dispatch, files) {
}

for (const [sidecarSuffix, sidecarPropName] of Object.entries(sidecarMappings)) {
if (nameLower.endsWith(`_${sidecarSuffix}.json`)) { // filename looks like a sidecar file?
if (jsonFileTypes.some(ext => nameLower.endsWith(`_${sidecarSuffix}${ext}`))) { // filename looks like a sidecar file?
filesSeen.add(nameLower);
const mainNameLower = nameLower.replace(`_${sidecarSuffix}.json`, '.json');
if (datasets[mainNameLower]) {
datasets[mainNameLower][sidecarPropName] = readFile(file);
logs.push(`Read ${file.name} as a sidecar file of ${datasets[mainNameLower].name}`);
const datasetName = getDatasetName(nameLower, sidecarSuffix);
if (datasets[datasetName]) {
datasets[datasetName][sidecarPropName] = readFile(file);
logs.push(`Read ${file.name} as a sidecar file of ${datasets[datasetName].name}`);
} else {
dispatch(errorNotification({
message: `Failed to load ${file.name}.`,
Expand All @@ -141,7 +175,7 @@ async function collectDatasets(dispatch, files) {
if (nameLower.endsWith(".md")) {
filesSeen.add(nameLower);
logs.push(`Read ${file.name} as a narrative.`);
({datasets, narrative} = await parseNarrative(await readFile(file, false), datasets, logs));
({datasets, narrative} = await parseNarrative(await readFile(file), datasets, logs));
break; // don't consider multiple markdown files
}
}
Expand Down
2 changes: 1 addition & 1 deletion auspice_client_customisation/splash.js
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ const SplashContent = (props) => {
<li>Auspice datasets (a main JSON plus any sidecars). See the
<a href="https://nextstrain.org/docs/bioinformatics/introduction-to-augur"> Nextstrain docs </a>
for how to run the bioinformatics tools to generate these datasets.
Note that it's possible to drag on multiple datasets, however at most two will be loaded, and it's not possible to control the ordering of these datasets!
Note that it's possible to drag on multiple datasets, however at most two will be loaded, and it's not possible to control the ordering of these datasets! Each JSON file can be uncompressed (<Bold>.json</Bold>) or gzip-compressed (<Bold>.json.gz</Bold>)
</li>
<li>A nextstrain narrative ending in <Bold>.md</Bold> and associated datasets (JSONs) - see the
<a href="https://docs.nextstrain.org/en/latest/tutorials/narratives-how-to-write.html"> Nextstrain docs </a>
Expand Down

0 comments on commit c802708

Please sign in to comment.