Skip to content

Commit

Permalink
core: improve coverter: Neo4j node to Variant object
Browse files Browse the repository at this point in the history
  • Loading branch information
jtarraga committed Apr 5, 2019
1 parent 9f8969e commit c6903e3
Show file tree
Hide file tree
Showing 4 changed files with 91 additions and 81 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ public QueryResult<Variant> getDeNovoVariants(Pedigree pedigree, Query query) th
}

public QueryResult<Map<String, List<Variant>>> getCompoundHeterozygousVariants(Pedigree pedigree, Query query)
throws BioNetDBException, IOException {
throws BioNetDBException {
Map<String, List<String>> genotypes = ModeOfInheritance.compoundHeterozygous(pedigree);
putGenotypes(query, genotypes);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
import com.fasterxml.jackson.annotation.JsonInclude;
import com.fasterxml.jackson.databind.MapperFeature;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.collections.MapUtils;
import org.apache.commons.lang.StringUtils;
import org.neo4j.driver.v1.Record;
import org.opencb.biodata.models.variant.StudyEntry;
Expand Down Expand Up @@ -32,90 +30,78 @@ public Neo4JRecordToVariantConverter() {
@Override
public Variant convert(Record record) {
try {
List<List<String>> sampleGT = new ArrayList<>();
Map<String, String> sampleMap = new HashMap<>();
// Create variant and prepare additional attributes
Variant variant = Utils.uncompress(fixString(record.get("attr_core").asString()), Variant.class, objMapper);
Map<String, AdditionalAttribute> additionalAttributes = new HashMap<>();

// Create variant
Variant variant = Utils.uncompress(fixString(record.get("attr_core").asString()), Variant.class, objMapper);
// For sample attributes
String sampleNames = null;
String sampleGenotypes = null;

for (String attr : record.keys()) {
if (record.get(attr) != null) {
attr = attr.replace("attr_", "");
if (!record.get(attr).isNull() && !"null".equals(record.get(attr).toString())) {
switch (attr) {
case "studies":
case "attr_studies":
List<org.opencb.biodata.models.variant.StudyEntry> studies =
Utils.uncompressList(fixString(record.get(attr).asString()), StudyEntry.class, objMapper);
variant.setStudies(studies);
break;
case "consequenceTypes":
case "attr_consequenceTypes":
List<ConsequenceType> ct = Utils.uncompressList(fixString(record.get(attr).asString()), ConsequenceType.class,
objMapper);
variant.getAnnotation().setConsequenceTypes(ct);
break;
case "xrefs":
case "attr_xrefs":
List<Xref> xrefs = Utils.uncompressList(fixString(record.get(attr).asString()), Xref.class, objMapper);
variant.getAnnotation().setXrefs(xrefs);
break;
case "populationFrequencies":
case "attr_populationFrequencies":
List<PopulationFrequency> popFreqs = Utils.uncompressList(fixString(record.get(attr).asString()),
PopulationFrequency.class, objMapper);
variant.getAnnotation().setPopulationFrequencies(popFreqs);
break;
case "conservation":
case "attr_conservation":
List<Score> conservation = Utils.uncompressList(fixString(record.get(attr).asString()), Score.class, objMapper);
variant.getAnnotation().setConservation(conservation);
break;
case "geneExpression":
case "attr_geneExpression":
List<Expression> expression = Utils.uncompressList(fixString(record.get(attr).asString()), Expression.class,
objMapper);
variant.getAnnotation().setGeneExpression(expression);
break;
case "geneTraitAssociation":
case "attr_geneTraitAssociation":
List<GeneTraitAssociation> gta = Utils.uncompressList(fixString(record.get(attr).asString()),
GeneTraitAssociation.class, objMapper);
variant.getAnnotation().setGeneTraitAssociation(gta);
break;
case "geneDrugInteraction":
case "attr_geneDrugInteraction":
List<GeneDrugInteraction> gdi = Utils.uncompressList(fixString(record.get(attr).asString()),
GeneDrugInteraction.class, objMapper);
variant.getAnnotation().setGeneDrugInteraction(gdi);
break;
case "variantTraitAssociation":
case "attr_variantTraitAssociation":
VariantTraitAssociation vta = Utils.uncompress(fixString(record.get(attr).asString()),
VariantTraitAssociation.class, objMapper);
variant.getAnnotation().setVariantTraitAssociation(vta);
break;
case "traitAssociation":
case "attr_traitAssociation":
List<EvidenceEntry> ta = Utils.uncompressList(fixString(record.get(attr).asString()), EvidenceEntry.class,
objMapper);
variant.getAnnotation().setTraitAssociation(ta);
break;
case "functionalScore":
case "attr_functionalScore":
List<Score> fs = Utils.uncompressList(fixString(record.get(attr).asString()), Score.class, objMapper);
variant.getAnnotation().setFunctionalScore(fs);
break;
case NodeBuilder.SAMPLE:
if (!record.get(NodeBuilder.SAMPLE).isNull()) {
List<String> sampleNames = new ArrayList<>();
String sampleString;
for (Object sample : record.get(NodeBuilder.SAMPLE).asList()) {
sampleNames.add(sample.toString());
}
sampleString = StringUtils.join(sampleNames, ",");
sampleMap.put(NodeBuilder.SAMPLE, sampleString);
}
sampleNames = StringUtils.join(record.get(NodeBuilder.SAMPLE).asList(), ",");
break;
case NodeBuilder.GENOTYPE:
if (!record.get(NodeBuilder.GENOTYPE).isNull()) {
for (Object gt : record.get(NodeBuilder.GENOTYPE).asList()) {
sampleGT.add(Collections.singletonList(gt.toString()));
}
}
sampleGenotypes = StringUtils.join(record.get(NodeBuilder.GENOTYPE).asList(), ",");
break;

default:
if (!attr.equals("core")) {
if (!attr.equals("attr_core")) {
String[] split = attr.split("_");
String mainKey = attr;
String subKey = attr;
Expand All @@ -136,18 +122,23 @@ public Variant convert(Record record) {
}

// Set additional attributes and return variant
if (MapUtils.isNotEmpty(sampleMap)) {
AdditionalAttribute samplesAttribute = new AdditionalAttribute();
samplesAttribute.setAttribute(sampleMap);
additionalAttributes.put("samples", samplesAttribute);
if (StringUtils.isNotEmpty(sampleNames) && StringUtils.isNotEmpty(sampleGenotypes)) {
AdditionalAttribute sampleAttrs = new AdditionalAttribute();
Map<String, String> map = new HashMap<>();
map.put(NodeBuilder.SAMPLE, sampleNames);
map.put(NodeBuilder.GENOTYPE, sampleGenotypes);
sampleAttrs.setAttribute(map);
additionalAttributes.put("samples", sampleAttrs);

}
if (CollectionUtils.isNotEmpty(sampleGT)) {
List<StudyEntry> studies = new ArrayList<>();
// And set sample data
StudyEntry studyEntry = new StudyEntry();
studyEntry.setFormat(Collections.singletonList("GT")).setSamplesData(sampleGT);
studies.add(studyEntry);
variant.setStudies(studies);
studyEntry.setFormat(Collections.singletonList("GT"));
List<List<String>> sampleData = new ArrayList<>();
for (String gt : sampleGenotypes.split(",")) {
sampleData.add(Collections.singletonList(gt));
}
studyEntry.setSamplesData(sampleData);
variant.setStudies(Collections.singletonList(studyEntry));
}
variant.getAnnotation().setAdditionalAttributes(additionalAttributes);
return variant;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,16 @@ public static String parse(Query query, QueryOptions options) {
}

public static String parseProteinNetworkInterpretation(Query query, QueryOptions options, boolean complexOrReaction) {
// Check query
Set<VariantQueryParam> includeMap = getIncludeMap(query);
Set<VariantQueryParam> excludeMap = getExcludeMap(query);
if (CollectionUtils.isNotEmpty(includeMap) && CollectionUtils.isNotEmpty(excludeMap)) {
throw new IllegalArgumentException("Invalid query: mixing INCLUDE and EXCLUDE parameters is not permitted.");
}

// Include attributes
List<String> includeAttrs = getIncludeAttributes(includeMap, excludeMap);

String cypher;

if (query.containsKey(VariantQueryParam.PANEL.key()) && query.containsKey(VariantQueryParam.GENE.key())) {
Expand All @@ -64,18 +74,18 @@ public static String parseProteinNetworkInterpretation(Query query, QueryOptions

query.remove(VariantQueryParam.GENE.key());

String panelCypherQuery = getProteinNetworkCypher(query, options, complexOrReaction);
String panelCypherQuery = getProteinNetworkCypher(query, options, includeAttrs, complexOrReaction);

query.remove(VariantQueryParam.PANEL.key());
query.put(VariantQueryParam.GENE.key(), geneValues);
query.put(VariantQueryParam.ANNOT_BIOTYPE.key(), biotypeValues);
query.put(VariantQueryParam.CHROMOSOME.key(), chromValues);

String geneCypherQuery = getProteinNetworkCypher(query, options, complexOrReaction);
String geneCypherQuery = getProteinNetworkCypher(query, options, includeAttrs, complexOrReaction);

cypher = panelCypherQuery + "\nUNION\n" + geneCypherQuery;
} else {
cypher = getProteinNetworkCypher(query, options, complexOrReaction);
cypher = getProteinNetworkCypher(query, options, includeAttrs, complexOrReaction);
}

System.out.println(cypher);
Expand Down Expand Up @@ -315,8 +325,8 @@ private static String buildCypherStatement(Query query, List<String> includeAttr
.append("\n");
}

sb.append("MATCH (vo:VARIANT_OBJECT) WHERE vo.id = v.id ");
sb.append("RETURN vo.attr_core AS attr_core");
sb.append("MATCH (v)-[:VARIANT__VARIANT_OBJECT]-(vo:VARIANT_OBJECT) ");
sb.append("RETURN DISTINCT vo.attr_core AS attr_core");
for (String includeAttr : includeAttrs) {
sb.append(", ").append("vo.").append(includeAttr).append(" AS ").append(includeAttr);
}
Expand Down Expand Up @@ -541,7 +551,7 @@ private static String getConditionString(List<String> stringList, String calling
}
}

private static String getProteinNetworkCypher(Query query, QueryOptions options, boolean complexOrReaction) {
private static String getProteinNetworkCypher(Query query, QueryOptions options, List<String> includeAttrs, boolean complexOrReaction) {
StringBuilder cypher = new StringBuilder();

if (!query.containsKey(VariantQueryParam.PANEL.key()) && !query.containsKey(VariantQueryParam.GENE.key())) {
Expand Down Expand Up @@ -613,10 +623,16 @@ private static String getProteinNetworkCypher(Query query, QueryOptions options,
st = cypherStatements.get(i);
cypher.append(st.getMatch()).append("\n").append(st.getWhere()).append("\n").append("WITH DISTINCT v").append(systemParams)
.append("\n").append("MATCH (s:SAMPLE)-[:SAMPLE__VARIANT_CALL]-(vc:VARIANT_CALL)-[:VARIANT__VARIANT_CALL]-(v:VARIANT)")
.append("\n").append("RETURN DISTINCT v.attr_chromosome AS ").append(NodeBuilder.CHROMOSOME).append(", v.attr_start AS ")
.append(NodeBuilder.START).append(", v.attr_reference AS ").append(NodeBuilder.REFERENCE).append(", v.attr_alternate AS ")
.append(NodeBuilder.ALTERNATE).append(", v.attr_type AS ").append(NodeBuilder.TYPE)
.append(", collect(s.id), collect(vc.attr_GT)").append(systemParams);
.append("\n").append("WITH DISTINCT v, collect(s.id) AS ").append(NodeBuilder.SAMPLE)
.append(", collect(vc.attr_GT) AS ").append(NodeBuilder.GENOTYPE)
.append("\n");

cypher.append("MATCH (v)-[:VARIANT__VARIANT_OBJECT]-(vo:VARIANT_OBJECT) ");
cypher.append("RETURN DISTINCT vo.attr_core AS attr_core");
for (String includeAttr : includeAttrs) {
cypher.append(", ").append("vo.").append(includeAttr).append(" AS ").append(includeAttr);
}
cypher.append(", ").append(NodeBuilder.SAMPLE).append(", ").append(NodeBuilder.GENOTYPE);

return cypher.toString();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,8 @@ public void setUp() throws Exception {
System.out.println(dbConfig);
}

bioNetDBConfiguration.getDatabases().get(0).setPort(6660);
//bioNetDBConfiguration.getDatabases().get(0).setPort(6660);
bioNetDBConfiguration.getDatabases().get(0).setPort(27687);
bioNetDbManager = new BioNetDbManager(bioNetDBConfiguration);
} catch (IOException e) {
e.printStackTrace();
Expand Down Expand Up @@ -191,40 +192,42 @@ public void dominant() throws BioNetDBException, IOException {
Pedigree pedigree = getPedigreeFamily1(disorder);

Query query = new Query();
query.put("panel", "Familial or syndromic hypoparathyroidism");
// query.put("panel", "Familial or syndromic hypoparathyroidism,Hereditary haemorrhagic telangiectasia," +
// "Neurotransmitter disorders," +
// "Familial Tumours Syndromes of the central & peripheral Nervous system" +
// "Inherited non-medullary thyroid cancer" +
// "Cytopaenias and congenital anaemias" +
// "Ectodermal dysplasia without a known gene mutation" +
// "Hyperammonaemia" +
// "Neuro-endocrine Tumours- PCC and PGL" +
// "Classical tuberous sclerosis" +
// "Familial hypercholesterolaemia" +
// "Pain syndromes" +
// "Congenital myopathy" +
// "Corneal abnormalities" +
// "Hydrocephalus" +
// "Infantile enterocolitis & monogenic inflammatory bowel disease" +
// "Severe familial anorexia" +
// "Haematological malignancies for rare disease" +
// "Long QT syndrome" +
// "Infantile nystagmus");
//query.put("gene", "BRCA1,BRCA2");
// query.put("panel", "Familial or syndromic hypoparathyroidism");
query.put("panel", "Familial or syndromic hypoparathyroidism,Hereditary haemorrhagic telangiectasia," +
"Neurotransmitter disorders," +
"Familial Tumours Syndromes of the central & peripheral Nervous system" +
"Inherited non-medullary thyroid cancer" +
"Cytopaenias and congenital anaemias" +
"Ectodermal dysplasia without a known gene mutation" +
"Hyperammonaemia" +
"Neuro-endocrine Tumours- PCC and PGL" +
"Classical tuberous sclerosis" +
"Familial hypercholesterolaemia" +
"Pain syndromes" +
"Congenital myopathy" +
"Corneal abnormalities" +
"Hydrocephalus" +
"Infantile enterocolitis & monogenic inflammatory bowel disease" +
"Severe familial anorexia" +
"Haematological malignancies for rare disease" +
"Long QT syndrome" +
"Infantile nystagmus");
query.put("gene", "BRCA1,BRCA2");
query.put("ct", "missense_variant,stop_lost,intron_variant");
query.put("biotype", "protein_coding");
query.put("populationFrequencyAlt", "ALL<0.05");

excludeAll(query);

QueryResult<Variant> dominantVariants = bioNetDbManager.getVariantAnalysis().getDominantVariants(pedigree, disorder, query);
if (dominantVariants.getResult().size() > 0) {
System.out.println("\n");
System.out.println("Variants:");
for (Variant variant : dominantVariants.getResult()) {
System.out.println(variant.toJson());
System.out.println(variant.toStringSimple());
}
}
System.out.println(dominantVariants.first());
// System.out.println(dominantVariants.first());
}

@Test
Expand Down Expand Up @@ -353,8 +356,6 @@ public void compoundHeterozygous() throws BioNetDBException, IOException {
query.put("biotype", "protein_coding");
query.put("populationFrequencyAlt", "ALL<0.05");

excludeAll(query);

QueryResult<Map<String, List<Variant>>> variantMap = bioNetDbManager.getVariantAnalysis().getCompoundHeterozygousVariants(pedigree, query);
if (variantMap.getResult().size() > 0) {
System.out.println("\n");
Expand Down Expand Up @@ -383,6 +384,8 @@ public void systemDominant() throws BioNetDBException {
query.put("biotype", "protein_coding");
query.put("populationFrequencyAlt", "ALL<0.05");

excludeAll(query);

QueryResult<Variant> variants = bioNetDbManager.getInterpretationAnalysis().proteinNetworkAnalysis(pedigree, disorder, moi, true, query);
if (variants.getResult().size() > 0) {
System.out.println("\n");
Expand Down

0 comments on commit c6903e3

Please sign in to comment.