diff --git a/bionetdb-core/src/main/java/org/opencb/bionetdb/core/analysis/VariantAnalysis.java b/bionetdb-core/src/main/java/org/opencb/bionetdb/core/analysis/VariantAnalysis.java index b9eee2c..c459be8 100644 --- a/bionetdb-core/src/main/java/org/opencb/bionetdb/core/analysis/VariantAnalysis.java +++ b/bionetdb-core/src/main/java/org/opencb/bionetdb/core/analysis/VariantAnalysis.java @@ -94,7 +94,7 @@ public QueryResult getDeNovoVariants(Pedigree pedigree, Query query) th } public QueryResult>> getCompoundHeterozygousVariants(Pedigree pedigree, Query query) - throws BioNetDBException, IOException { + throws BioNetDBException { Map> genotypes = ModeOfInheritance.compoundHeterozygous(pedigree); putGenotypes(query, genotypes); diff --git a/bionetdb-core/src/main/java/org/opencb/bionetdb/core/neo4j/converters/Neo4JRecordToVariantConverter.java b/bionetdb-core/src/main/java/org/opencb/bionetdb/core/neo4j/converters/Neo4JRecordToVariantConverter.java index b051aa5..f432b30 100644 --- a/bionetdb-core/src/main/java/org/opencb/bionetdb/core/neo4j/converters/Neo4JRecordToVariantConverter.java +++ b/bionetdb-core/src/main/java/org/opencb/bionetdb/core/neo4j/converters/Neo4JRecordToVariantConverter.java @@ -3,8 +3,6 @@ import com.fasterxml.jackson.annotation.JsonInclude; import com.fasterxml.jackson.databind.MapperFeature; import com.fasterxml.jackson.databind.ObjectMapper; -import org.apache.commons.collections.CollectionUtils; -import org.apache.commons.collections.MapUtils; import org.apache.commons.lang.StringUtils; import org.neo4j.driver.v1.Record; import org.opencb.biodata.models.variant.StudyEntry; @@ -32,90 +30,78 @@ public Neo4JRecordToVariantConverter() { @Override public Variant convert(Record record) { try { - List> sampleGT = new ArrayList<>(); - Map sampleMap = new HashMap<>(); + // Create variant and prepare additional attributes + Variant variant = Utils.uncompress(fixString(record.get("attr_core").asString()), Variant.class, objMapper); Map additionalAttributes = new HashMap<>(); - // Create variant - Variant variant = Utils.uncompress(fixString(record.get("attr_core").asString()), Variant.class, objMapper); + // For sample attributes + String sampleNames = null; + String sampleGenotypes = null; for (String attr : record.keys()) { - if (record.get(attr) != null) { - attr = attr.replace("attr_", ""); + if (!record.get(attr).isNull() && !"null".equals(record.get(attr).toString())) { switch (attr) { - case "studies": + case "attr_studies": List studies = Utils.uncompressList(fixString(record.get(attr).asString()), StudyEntry.class, objMapper); variant.setStudies(studies); break; - case "consequenceTypes": + case "attr_consequenceTypes": List ct = Utils.uncompressList(fixString(record.get(attr).asString()), ConsequenceType.class, objMapper); variant.getAnnotation().setConsequenceTypes(ct); break; - case "xrefs": + case "attr_xrefs": List xrefs = Utils.uncompressList(fixString(record.get(attr).asString()), Xref.class, objMapper); variant.getAnnotation().setXrefs(xrefs); break; - case "populationFrequencies": + case "attr_populationFrequencies": List popFreqs = Utils.uncompressList(fixString(record.get(attr).asString()), PopulationFrequency.class, objMapper); variant.getAnnotation().setPopulationFrequencies(popFreqs); break; - case "conservation": + case "attr_conservation": List conservation = Utils.uncompressList(fixString(record.get(attr).asString()), Score.class, objMapper); variant.getAnnotation().setConservation(conservation); break; - case "geneExpression": + case "attr_geneExpression": List expression = Utils.uncompressList(fixString(record.get(attr).asString()), Expression.class, objMapper); variant.getAnnotation().setGeneExpression(expression); break; - case "geneTraitAssociation": + case "attr_geneTraitAssociation": List gta = Utils.uncompressList(fixString(record.get(attr).asString()), GeneTraitAssociation.class, objMapper); variant.getAnnotation().setGeneTraitAssociation(gta); break; - case "geneDrugInteraction": + case "attr_geneDrugInteraction": List gdi = Utils.uncompressList(fixString(record.get(attr).asString()), GeneDrugInteraction.class, objMapper); variant.getAnnotation().setGeneDrugInteraction(gdi); break; - case "variantTraitAssociation": + case "attr_variantTraitAssociation": VariantTraitAssociation vta = Utils.uncompress(fixString(record.get(attr).asString()), VariantTraitAssociation.class, objMapper); variant.getAnnotation().setVariantTraitAssociation(vta); break; - case "traitAssociation": + case "attr_traitAssociation": List ta = Utils.uncompressList(fixString(record.get(attr).asString()), EvidenceEntry.class, objMapper); variant.getAnnotation().setTraitAssociation(ta); break; - case "functionalScore": + case "attr_functionalScore": List fs = Utils.uncompressList(fixString(record.get(attr).asString()), Score.class, objMapper); variant.getAnnotation().setFunctionalScore(fs); break; case NodeBuilder.SAMPLE: - if (!record.get(NodeBuilder.SAMPLE).isNull()) { - List sampleNames = new ArrayList<>(); - String sampleString; - for (Object sample : record.get(NodeBuilder.SAMPLE).asList()) { - sampleNames.add(sample.toString()); - } - sampleString = StringUtils.join(sampleNames, ","); - sampleMap.put(NodeBuilder.SAMPLE, sampleString); - } + sampleNames = StringUtils.join(record.get(NodeBuilder.SAMPLE).asList(), ","); break; case NodeBuilder.GENOTYPE: - if (!record.get(NodeBuilder.GENOTYPE).isNull()) { - for (Object gt : record.get(NodeBuilder.GENOTYPE).asList()) { - sampleGT.add(Collections.singletonList(gt.toString())); - } - } + sampleGenotypes = StringUtils.join(record.get(NodeBuilder.GENOTYPE).asList(), ","); break; default: - if (!attr.equals("core")) { + if (!attr.equals("attr_core")) { String[] split = attr.split("_"); String mainKey = attr; String subKey = attr; @@ -136,18 +122,23 @@ public Variant convert(Record record) { } // Set additional attributes and return variant - if (MapUtils.isNotEmpty(sampleMap)) { - AdditionalAttribute samplesAttribute = new AdditionalAttribute(); - samplesAttribute.setAttribute(sampleMap); - additionalAttributes.put("samples", samplesAttribute); + if (StringUtils.isNotEmpty(sampleNames) && StringUtils.isNotEmpty(sampleGenotypes)) { + AdditionalAttribute sampleAttrs = new AdditionalAttribute(); + Map map = new HashMap<>(); + map.put(NodeBuilder.SAMPLE, sampleNames); + map.put(NodeBuilder.GENOTYPE, sampleGenotypes); + sampleAttrs.setAttribute(map); + additionalAttributes.put("samples", sampleAttrs); - } - if (CollectionUtils.isNotEmpty(sampleGT)) { - List studies = new ArrayList<>(); + // And set sample data StudyEntry studyEntry = new StudyEntry(); - studyEntry.setFormat(Collections.singletonList("GT")).setSamplesData(sampleGT); - studies.add(studyEntry); - variant.setStudies(studies); + studyEntry.setFormat(Collections.singletonList("GT")); + List> sampleData = new ArrayList<>(); + for (String gt : sampleGenotypes.split(",")) { + sampleData.add(Collections.singletonList(gt)); + } + studyEntry.setSamplesData(sampleData); + variant.setStudies(Collections.singletonList(studyEntry)); } variant.getAnnotation().setAdditionalAttributes(additionalAttributes); return variant; diff --git a/bionetdb-core/src/main/java/org/opencb/bionetdb/core/neo4j/query/Neo4JVariantQueryParser.java b/bionetdb-core/src/main/java/org/opencb/bionetdb/core/neo4j/query/Neo4JVariantQueryParser.java index eee4749..61d31c2 100644 --- a/bionetdb-core/src/main/java/org/opencb/bionetdb/core/neo4j/query/Neo4JVariantQueryParser.java +++ b/bionetdb-core/src/main/java/org/opencb/bionetdb/core/neo4j/query/Neo4JVariantQueryParser.java @@ -55,6 +55,16 @@ public static String parse(Query query, QueryOptions options) { } public static String parseProteinNetworkInterpretation(Query query, QueryOptions options, boolean complexOrReaction) { + // Check query + Set includeMap = getIncludeMap(query); + Set excludeMap = getExcludeMap(query); + if (CollectionUtils.isNotEmpty(includeMap) && CollectionUtils.isNotEmpty(excludeMap)) { + throw new IllegalArgumentException("Invalid query: mixing INCLUDE and EXCLUDE parameters is not permitted."); + } + + // Include attributes + List includeAttrs = getIncludeAttributes(includeMap, excludeMap); + String cypher; if (query.containsKey(VariantQueryParam.PANEL.key()) && query.containsKey(VariantQueryParam.GENE.key())) { @@ -64,18 +74,18 @@ public static String parseProteinNetworkInterpretation(Query query, QueryOptions query.remove(VariantQueryParam.GENE.key()); - String panelCypherQuery = getProteinNetworkCypher(query, options, complexOrReaction); + String panelCypherQuery = getProteinNetworkCypher(query, options, includeAttrs, complexOrReaction); query.remove(VariantQueryParam.PANEL.key()); query.put(VariantQueryParam.GENE.key(), geneValues); query.put(VariantQueryParam.ANNOT_BIOTYPE.key(), biotypeValues); query.put(VariantQueryParam.CHROMOSOME.key(), chromValues); - String geneCypherQuery = getProteinNetworkCypher(query, options, complexOrReaction); + String geneCypherQuery = getProteinNetworkCypher(query, options, includeAttrs, complexOrReaction); cypher = panelCypherQuery + "\nUNION\n" + geneCypherQuery; } else { - cypher = getProteinNetworkCypher(query, options, complexOrReaction); + cypher = getProteinNetworkCypher(query, options, includeAttrs, complexOrReaction); } System.out.println(cypher); @@ -315,8 +325,8 @@ private static String buildCypherStatement(Query query, List includeAttr .append("\n"); } - sb.append("MATCH (vo:VARIANT_OBJECT) WHERE vo.id = v.id "); - sb.append("RETURN vo.attr_core AS attr_core"); + sb.append("MATCH (v)-[:VARIANT__VARIANT_OBJECT]-(vo:VARIANT_OBJECT) "); + sb.append("RETURN DISTINCT vo.attr_core AS attr_core"); for (String includeAttr : includeAttrs) { sb.append(", ").append("vo.").append(includeAttr).append(" AS ").append(includeAttr); } @@ -541,7 +551,7 @@ private static String getConditionString(List stringList, String calling } } - private static String getProteinNetworkCypher(Query query, QueryOptions options, boolean complexOrReaction) { + private static String getProteinNetworkCypher(Query query, QueryOptions options, List includeAttrs, boolean complexOrReaction) { StringBuilder cypher = new StringBuilder(); if (!query.containsKey(VariantQueryParam.PANEL.key()) && !query.containsKey(VariantQueryParam.GENE.key())) { @@ -613,10 +623,16 @@ private static String getProteinNetworkCypher(Query query, QueryOptions options, st = cypherStatements.get(i); cypher.append(st.getMatch()).append("\n").append(st.getWhere()).append("\n").append("WITH DISTINCT v").append(systemParams) .append("\n").append("MATCH (s:SAMPLE)-[:SAMPLE__VARIANT_CALL]-(vc:VARIANT_CALL)-[:VARIANT__VARIANT_CALL]-(v:VARIANT)") - .append("\n").append("RETURN DISTINCT v.attr_chromosome AS ").append(NodeBuilder.CHROMOSOME).append(", v.attr_start AS ") - .append(NodeBuilder.START).append(", v.attr_reference AS ").append(NodeBuilder.REFERENCE).append(", v.attr_alternate AS ") - .append(NodeBuilder.ALTERNATE).append(", v.attr_type AS ").append(NodeBuilder.TYPE) - .append(", collect(s.id), collect(vc.attr_GT)").append(systemParams); + .append("\n").append("WITH DISTINCT v, collect(s.id) AS ").append(NodeBuilder.SAMPLE) + .append(", collect(vc.attr_GT) AS ").append(NodeBuilder.GENOTYPE) + .append("\n"); + + cypher.append("MATCH (v)-[:VARIANT__VARIANT_OBJECT]-(vo:VARIANT_OBJECT) "); + cypher.append("RETURN DISTINCT vo.attr_core AS attr_core"); + for (String includeAttr : includeAttrs) { + cypher.append(", ").append("vo.").append(includeAttr).append(" AS ").append(includeAttr); + } + cypher.append(", ").append(NodeBuilder.SAMPLE).append(", ").append(NodeBuilder.GENOTYPE); return cypher.toString(); } diff --git a/bionetdb-core/src/test/java/org/opencb/bionetdb/core/BioNetDbManagerTest.java b/bionetdb-core/src/test/java/org/opencb/bionetdb/core/BioNetDbManagerTest.java index 4985b04..7554453 100644 --- a/bionetdb-core/src/test/java/org/opencb/bionetdb/core/BioNetDbManagerTest.java +++ b/bionetdb-core/src/test/java/org/opencb/bionetdb/core/BioNetDbManagerTest.java @@ -65,7 +65,8 @@ public void setUp() throws Exception { System.out.println(dbConfig); } - bioNetDBConfiguration.getDatabases().get(0).setPort(6660); + //bioNetDBConfiguration.getDatabases().get(0).setPort(6660); + bioNetDBConfiguration.getDatabases().get(0).setPort(27687); bioNetDbManager = new BioNetDbManager(bioNetDBConfiguration); } catch (IOException e) { e.printStackTrace(); @@ -191,40 +192,42 @@ public void dominant() throws BioNetDBException, IOException { Pedigree pedigree = getPedigreeFamily1(disorder); Query query = new Query(); - query.put("panel", "Familial or syndromic hypoparathyroidism"); -// query.put("panel", "Familial or syndromic hypoparathyroidism,Hereditary haemorrhagic telangiectasia," + -// "Neurotransmitter disorders," + -// "Familial Tumours Syndromes of the central & peripheral Nervous system" + -// "Inherited non-medullary thyroid cancer" + -// "Cytopaenias and congenital anaemias" + -// "Ectodermal dysplasia without a known gene mutation" + -// "Hyperammonaemia" + -// "Neuro-endocrine Tumours- PCC and PGL" + -// "Classical tuberous sclerosis" + -// "Familial hypercholesterolaemia" + -// "Pain syndromes" + -// "Congenital myopathy" + -// "Corneal abnormalities" + -// "Hydrocephalus" + -// "Infantile enterocolitis & monogenic inflammatory bowel disease" + -// "Severe familial anorexia" + -// "Haematological malignancies for rare disease" + -// "Long QT syndrome" + -// "Infantile nystagmus"); - //query.put("gene", "BRCA1,BRCA2"); +// query.put("panel", "Familial or syndromic hypoparathyroidism"); + query.put("panel", "Familial or syndromic hypoparathyroidism,Hereditary haemorrhagic telangiectasia," + + "Neurotransmitter disorders," + + "Familial Tumours Syndromes of the central & peripheral Nervous system" + + "Inherited non-medullary thyroid cancer" + + "Cytopaenias and congenital anaemias" + + "Ectodermal dysplasia without a known gene mutation" + + "Hyperammonaemia" + + "Neuro-endocrine Tumours- PCC and PGL" + + "Classical tuberous sclerosis" + + "Familial hypercholesterolaemia" + + "Pain syndromes" + + "Congenital myopathy" + + "Corneal abnormalities" + + "Hydrocephalus" + + "Infantile enterocolitis & monogenic inflammatory bowel disease" + + "Severe familial anorexia" + + "Haematological malignancies for rare disease" + + "Long QT syndrome" + + "Infantile nystagmus"); + query.put("gene", "BRCA1,BRCA2"); query.put("ct", "missense_variant,stop_lost,intron_variant"); query.put("biotype", "protein_coding"); query.put("populationFrequencyAlt", "ALL<0.05"); + excludeAll(query); + QueryResult dominantVariants = bioNetDbManager.getVariantAnalysis().getDominantVariants(pedigree, disorder, query); if (dominantVariants.getResult().size() > 0) { System.out.println("\n"); System.out.println("Variants:"); for (Variant variant : dominantVariants.getResult()) { - System.out.println(variant.toJson()); + System.out.println(variant.toStringSimple()); } } - System.out.println(dominantVariants.first()); +// System.out.println(dominantVariants.first()); } @Test @@ -353,8 +356,6 @@ public void compoundHeterozygous() throws BioNetDBException, IOException { query.put("biotype", "protein_coding"); query.put("populationFrequencyAlt", "ALL<0.05"); - excludeAll(query); - QueryResult>> variantMap = bioNetDbManager.getVariantAnalysis().getCompoundHeterozygousVariants(pedigree, query); if (variantMap.getResult().size() > 0) { System.out.println("\n"); @@ -383,6 +384,8 @@ public void systemDominant() throws BioNetDBException { query.put("biotype", "protein_coding"); query.put("populationFrequencyAlt", "ALL<0.05"); + excludeAll(query); + QueryResult variants = bioNetDbManager.getInterpretationAnalysis().proteinNetworkAnalysis(pedigree, disorder, moi, true, query); if (variants.getResult().size() > 0) { System.out.println("\n");