Skip to content

Commit

Permalink
core: load clinical analysis into Neo4J database, #53
Browse files Browse the repository at this point in the history
  • Loading branch information
jtarraga committed May 22, 2019
1 parent c6903e3 commit 51085a3
Show file tree
Hide file tree
Showing 26 changed files with 2,421 additions and 402 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,9 @@ public static void main(String[] args) {
case "load":
commandExecutor = new LoadCommandExecutor(cliOptionsParser.getLoadCommandOptions());
break;
case "create-csv":
commandExecutor = new ImportCommandExecutor(cliOptionsParser.getCreateCsvCommandOptions());
break;
case "import":
commandExecutor = new ImportCommandExecutor(cliOptionsParser.getImportCommandOptions());
break;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ public class CliOptionsParser {

private BuildCommandOptions buildCommandOptions;
private LoadCommandOptions loadCommandOptions;
private CreateCsvCommandOptions createCsvCommandOptions;
private ImportCommandOptions importCommandOptions;
private QueryCommandOptions queryCommandOptions;
private VariantAnnotationCommandOptions variantAnnotationCommandOptions;
Expand All @@ -34,13 +35,15 @@ public CliOptionsParser() {

buildCommandOptions = new BuildCommandOptions();
loadCommandOptions = new LoadCommandOptions();
createCsvCommandOptions = new CreateCsvCommandOptions();
importCommandOptions = new ImportCommandOptions();
queryCommandOptions = new QueryCommandOptions();
variantAnnotationCommandOptions = new VariantAnnotationCommandOptions();
expressionCommandOptions = new ExpressionCommandOptions();

jcommander.addCommand("build", buildCommandOptions);
jcommander.addCommand("load", loadCommandOptions);
jcommander.addCommand("create-csv", createCsvCommandOptions);
jcommander.addCommand("import", importCommandOptions);
jcommander.addCommand("query", queryCommandOptions);
jcommander.addCommand("annotation", variantAnnotationCommandOptions);
Expand Down Expand Up @@ -120,8 +123,34 @@ public class LoadCommandOptions {
@Parameter(names = {"-i", "--input"}, description = "Input directory", required = true, arity = 1)
public String input;

@Parameter(names = {"-d", "--data-type"}, description = "Data type. Valid values: clinical-analysis", required = true, arity = 1)
public String dataType;

@Parameter(names = {"--database"}, description = "Data model type to be loaded, i.e. genome, gene, ...", arity = 1)
public String database;
//
// @Parameter(names = {"--exclude"}, description = "Exclude information separated by comma, e.g.:'XREF_DBNAME:Reactome Database ID Release 63'", arity = 1)
// public List<String> exclude;

@DynamicParameter(names = "-D", description = "Dynamic parameters go here", hidden = true)
public Map<String, String> loaderParams = new HashMap<>();

}

@Parameters(commandNames = {"create-csv"}, commandDescription = "Create CSV filesImport the built data models in format CSV files into the database")
public class CreateCsvCommandOptions {

@ParametersDelegate
public CommonCommandOptions commonOptions = commonCommandOptions;

@Parameter(names = {"-i", "--input"}, description = "Input directory that contains the biological files to convert to CSV files)", required = true, arity = 1)
public String input;

@Parameter(names = {"-o", "--output"}, description = "Output directory where to save the CSV files to import", arity = 1)
public String output;

@Parameter(names = {"--clinical-analysis"}, description = "Input JSON files contains clinical analysis (otherwise, variants)", arity = 0)
public boolean clinicalAnalysis = false;

@Parameter(names = {"--exclude"}, description = "Exclude information separated by comma, e.g.:'XREF_DBNAME:Reactome Database ID Release 63'", arity = 1)
public List<String> exclude;
Expand All @@ -137,14 +166,9 @@ public class ImportCommandOptions {
@ParametersDelegate
public CommonCommandOptions commonOptions = commonCommandOptions;

@Parameter(names = {"-i", "--input"}, description = "Input directory where the CSV files are located (when used with --create-csv-files parameter, it contains the biological files to convert to CSV files)", required = true, arity = 1)
@Parameter(names = {"-i", "--input"}, description = "Input directory where the CSV files are located", required = true, arity = 1)
public String input;

@Parameter(names = {"-o", "--output"}, description = "Output directory where to save the CSV files to import (used with the --create-csv-files parameter)", arity = 1)
public String output;

@Parameter(names = {"--create-csv-files"}, description = "Create the CSV files from the input biological files", arity = 0)
public boolean createCsvFiles = false;

@Parameter(names = {"--database"}, description = "Data model type to be loaded, i.e. genome, gene, ...", arity = 1)
public String database;
Expand Down Expand Up @@ -341,9 +365,9 @@ public LoadCommandOptions getLoadCommandOptions() {
return loadCommandOptions;
}

public ImportCommandOptions getImportCommandOptions() {
return importCommandOptions;
}
public CreateCsvCommandOptions getCreateCsvCommandOptions() { return createCsvCommandOptions; }

public ImportCommandOptions getImportCommandOptions() { return importCommandOptions; }

public QueryCommandOptions getQueryCommandOptions() {
return queryCommandOptions;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import org.apache.commons.lang.StringEscapeUtils;
import org.apache.commons.lang.StringUtils;
import org.opencb.bionetdb.core.exceptions.BioNetDBException;
import org.opencb.bionetdb.core.models.network.Node;
import org.opencb.bionetdb.core.models.network.Relation;
import org.opencb.bionetdb.core.utils.CsvInfo;
Expand All @@ -27,20 +28,31 @@
*/
public class ImportCommandExecutor extends CommandExecutor {

private CliOptionsParser.CreateCsvCommandOptions createCsvCommandOptions;
private CliOptionsParser.ImportCommandOptions importCommandOptions;

public ImportCommandExecutor(CliOptionsParser.CreateCsvCommandOptions createCsvCommandOptions) {
super(createCsvCommandOptions.commonOptions.logLevel, createCsvCommandOptions.commonOptions.conf);

this.createCsvCommandOptions = createCsvCommandOptions;
this.importCommandOptions = null;
}

public ImportCommandExecutor(CliOptionsParser.ImportCommandOptions importCommandOptions) {
super(importCommandOptions.commonOptions.logLevel, importCommandOptions.commonOptions.conf);

this.createCsvCommandOptions = null;
this.importCommandOptions = importCommandOptions;
}

@Override
public void execute() {
if (importCommandOptions.createCsvFiles) {
public void execute() throws BioNetDBException {
if (createCsvCommandOptions != null) {
createCsvFiles();
} else {
} else if (importCommandOptions != null) {
importCsvFiles();
} else {
throw new BioNetDBException("Import commandline error");
}
}

Expand All @@ -49,10 +61,10 @@ private void createCsvFiles() {
long start;

// Check input and output directories
Path inputPath = Paths.get(importCommandOptions.input);
Path inputPath = Paths.get(createCsvCommandOptions.input);
FileUtils.checkDirectory(inputPath);

Path outputPath = Paths.get(importCommandOptions.output);
Path outputPath = Paths.get(createCsvCommandOptions.output);
FileUtils.checkDirectory(outputPath);

// Prepare CSV object
Expand Down Expand Up @@ -107,7 +119,7 @@ private void createCsvFiles() {
FileUtils.checkFile(geneFile.toPath());
}
start = System.currentTimeMillis();
importer.indexingGenes(geneFile.toPath(), outputPath);
importer.indexingGenes(geneFile.toPath());
geneIndexingTime = (System.currentTimeMillis() - start) / 1000;
logger.info("Gene indexing done in {} s", geneIndexingTime);

Expand All @@ -119,7 +131,7 @@ private void createCsvFiles() {
FileUtils.checkFile(proteinFile.toPath());
}
start = System.currentTimeMillis();
importer.indexingProteins(proteinFile.toPath(), outputPath);
importer.indexingProteins(proteinFile.toPath());
proteinIndexingTime = (System.currentTimeMillis() - start) / 1000;
logger.info("Protein indexing done in {} s", proteinIndexingTime);

Expand All @@ -144,7 +156,7 @@ private void createCsvFiles() {
}

// Parse BioPAX files
Map<String, Set<String>> filters = parseFilters(importCommandOptions.exclude);
Map<String, Set<String>> filters = parseFilters(createCsvCommandOptions.exclude);
BPAXProcessing bpaxProcessing = new BPAXProcessing(importer);
Neo4jBioPaxImporter bioPAXImporter = new Neo4jBioPaxImporter(csv, filters, bpaxProcessing);
start = System.currentTimeMillis();
Expand All @@ -153,10 +165,15 @@ private void createCsvFiles() {
bioPaxTime = (System.currentTimeMillis() - start) / 1000;


// Parse JSON variant files
start = System.currentTimeMillis();
importer.addVariantFiles(jsonFiles);
long variantTime = (System.currentTimeMillis() - start) / 1000;
start = System.currentTimeMillis();
if (createCsvCommandOptions.clinicalAnalysis) {
// Parse JSON variant files
importer.addClinicalAnalysisFiles(jsonFiles);
} else {
// Parse JSON variant files
importer.addVariantFiles(jsonFiles);
}
long jsonTime = (System.currentTimeMillis() - start) / 1000;

// Close CSV files
csv.close();
Expand All @@ -166,7 +183,7 @@ private void createCsvFiles() {
logger.info("Gene panels processing in {} s", genePanelsTime);
logger.info("miRNA indexing in {} s", miRnaIndexingTime);
logger.info("BioPAX processing in {} s", bioPaxTime);
logger.info("Variant processing in {} s", variantTime);
logger.info((createCsvCommandOptions.clinicalAnalysis ? "Clinical analysis" : "Variant") + " processing in {} s", jsonTime);
} catch (IOException e) {
logger.error("Error generation CSV files: {}", e.getMessage());
e.printStackTrace();
Expand Down Expand Up @@ -200,7 +217,7 @@ private void importCsvFiles() {
sb.setLength(0);
sb.append(neo4jHome);
sb.append("/bin/neo4j-admin import --id-type INTEGER --delimiter=\"" + StringEscapeUtils.escapeJava(CsvInfo.SEPARATOR) + "\" "
+ "--ignore-duplicate-nodes --ignore-missing-nodes");
+ "--ignore-duplicate-nodes --ignore-missing-nodes");

// Retrieving files from the input directory
List<File> relationFiles = new ArrayList<>();
Expand Down Expand Up @@ -307,9 +324,9 @@ private String removeCsvExt(String filename) {
return name;
}

//-------------------------------------------------------------------------
// BioPAX importer callback object
//-------------------------------------------------------------------------
//-------------------------------------------------------------------------
// BioPAX importer callback object
//-------------------------------------------------------------------------

public class BPAXProcessing implements Neo4jBioPaxImporter.BioPAXProcessing {
private Neo4jCsvImporter importer;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,21 +1,28 @@
package org.opencb.bionetdb.app.cli;

import org.apache.commons.lang.StringUtils;
import org.opencb.bionetdb.core.BioNetDbManager;
import org.opencb.bionetdb.core.exceptions.BioNetDBException;
import org.opencb.bionetdb.core.neo4j.Neo4JLoader;
import org.opencb.commons.utils.FileUtils;
import org.opencb.commons.utils.ListUtils;

import java.io.BufferedReader;
import java.io.IOException;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.*;

import static org.neo4j.driver.v1.Values.parameters;

/**
* Created by imedina on 12/08/15.
*/
public class LoadCommandExecutor extends CommandExecutor {

private final String CLINICAL_ANALYSIS = "clinical-analysis";
private final Set<String> dataTypes = new HashSet<>(Arrays.asList(CLINICAL_ANALYSIS));

private CliOptionsParser.LoadCommandOptions loadCommandOptions;

public LoadCommandExecutor(CliOptionsParser.LoadCommandOptions loadCommandOptions) {
Expand All @@ -38,24 +45,31 @@ public void execute() {
// BioNetDbManager checks if database parameter is empty
BioNetDbManager bioNetDbManager = new BioNetDbManager(loadCommandOptions.database, configuration);

Map<String, Set<String>> filter = null;
if (ListUtils.isNotEmpty(loadCommandOptions.exclude)) {
filter = new HashMap<>();
for (String exclude: loadCommandOptions.exclude) {
String split[] = exclude.split(":");
if (split.length == 2) {
if (!filter.containsKey(split[0])) {
filter.put(split[0], new HashSet<>());
}
filter.get(split[0]).add(split[1]);
}
if (dataTypes.contains(loadCommandOptions.dataType)) {
if (CLINICAL_ANALYSIS.equals(loadCommandOptions.dataType)) {
bioNetDbManager.loadClinicalAnalysis(inputPath);
}
} else {
throw new BioNetDBException("Unknown data type to load: " + loadCommandOptions.dataType
+ ". Valid data types values are: " + StringUtils.join(dataTypes, ","));
}
bioNetDbManager.loadBioPax(inputPath, filter);

// Map<String, Set<String>> filter = null;
// if (ListUtils.isNotEmpty(loadCommandOptions.exclude)) {
// filter = new HashMap<>();
// for (String exclude: loadCommandOptions.exclude) {
// String split[] = exclude.split(":");
// if (split.length == 2) {
// if (!filter.containsKey(split[0])) {
// filter.put(split[0], new HashSet<>());
// }
// filter.get(split[0]).add(split[1]);
// }
// }
// }
// bioNetDbManager.loadBioPax(inputPath, filter);
} catch (Exception e) {
e.printStackTrace();
}


}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
package org.opencb.bionetdb.app;

import org.junit.Test;

import static org.junit.Assert.*;

public class BioNetDBMainTest {

@Test
public void createCsvClinicalAnalysis() {
String caPath = "/home/jtarraga/data150/clinicalAnalysis";
String cmdLine = "~/appl/bionetdb/build/bin/bionetdb.sh create-csv -i " + caPath + "/input/ -o csv/ --clinical-analysis";
}

}
17 changes: 16 additions & 1 deletion bionetdb-core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,22 @@
<dependency>
<groupId>org.opencb.opencga</groupId>
<artifactId>opencga-storage-core</artifactId>
<version>1.4.1-dev</version>
<version>${opencga.version}</version>
</dependency>
<dependency>
<groupId>org.neo4j</groupId>
<artifactId>neo4j</artifactId>
<version>3.2.8-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>org.neo4j</groupId>
<artifactId>neo4j-kernel</artifactId>
<version>3.2.8-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>org.neo4j</groupId>
<artifactId>neo4j-kernel</artifactId>
<version>3.2.8-SNAPSHOT</version>
</dependency>
</dependencies>

Expand Down
Loading

0 comments on commit 51085a3

Please sign in to comment.