Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Blazegraph for go lego experiment #297

Merged
merged 30 commits into from
Mar 23, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
70336f0
allowing command line blazegraph load to handle an OWL file
goodb Mar 6, 2020
35912e5
working command line validator with local blazegraph ontology journal
goodb Mar 9, 2020
765524f
not working but no sure cause - ought to be - backstop commit for rol…
goodb Mar 12, 2020
58e76af
backed off query to match up with previously working golr requests
goodb Mar 13, 2020
9544379
cleaning up ontology jounral when shutting down an m3
goodb Mar 13, 2020
2ba141a
added a test case for the tbox ontology manager
goodb Mar 14, 2020
d1cc87b
genes are chemical entities test
goodb Mar 14, 2020
67c04ca
setting up minerva server ontology journal init
goodb Mar 14, 2020
59b68b9
investigating slow server performance
goodb Mar 17, 2020
0d75854
basic tbox search functionality and tests
goodb Mar 21, 2020
721eca0
query expansion working for GO terms
goodb Mar 21, 2020
a3da092
added search by taxon
goodb Mar 21, 2020
5240251
Added update method for the taxon_models cache. activates on save
goodb Mar 22, 2020
c8a2442
functional product for #291
goodb Mar 22, 2020
6c75119
added exact date and date range queries #292
goodb Mar 22, 2020
77a4a8c
added route to list species in gocam repo for #295
goodb Mar 22, 2020
d1624f8
bug fix on contributor search and added tests
goodb Mar 22, 2020
e2030d7
updated test files for validation - fixed a test
goodb Mar 22, 2020
b831d43
removed uses of external lookup service from minerva
goodb Mar 22, 2020
f174ad3
match up params for server and command line
goodb Mar 22, 2020
eb9f824
removing unused import - offending Travis
goodb Mar 22, 2020
6c1f69e
currently set to require a tbox ontology location
goodb Mar 22, 2020
46b0d68
allowing null tbox for command line operations that don't require it.
goodb Mar 22, 2020
e958ec2
can't dispose of nothing
goodb Mar 23, 2020
ff56bbb
not using golr service anyway.. though these test failures may be of …
goodb Mar 23, 2020
ffa6103
thats what tests are for...
goodb Mar 23, 2020
50d29f5
fixed just because..
goodb Mar 23, 2020
5a3ea67
fixed a bunch of tests - mainly closing blazegraph instances not gett…
goodb Mar 23, 2020
31e123a
more dispose
goodb Mar 23, 2020
0990b4b
more disposing for repeated uses of same blazegraph store in differen…
goodb Mar 23, 2020
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion minerva-cli/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,11 @@
<artifactId>minerva-server</artifactId>
<version>${project.parent.version}</version>
</dependency>

<dependency>
<groupId>org.geneontology</groupId>
<artifactId>whelk_2.12</artifactId>
<version>0.1.3</version>
</dependency>
</dependencies>

</project>

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
import org.semanticweb.owlapi.model.IRI;
import org.semanticweb.owlapi.model.OWLAnnotation;
import org.semanticweb.owlapi.model.OWLAnnotationProperty;
import org.semanticweb.owlapi.model.OWLClass;
import org.semanticweb.owlapi.model.OWLDataFactory;
import org.semanticweb.owlapi.model.OWLDocumentFormat;
import org.semanticweb.owlapi.model.OWLImportsDeclaration;
Expand All @@ -62,6 +63,7 @@
import org.semanticweb.owlapi.model.OWLOntologyIRIMapper;
import org.semanticweb.owlapi.model.OWLOntologyManager;
import org.semanticweb.owlapi.model.OWLOntologyStorageException;
import org.semanticweb.owlapi.model.OWLSubClassOfAxiom;
import org.semanticweb.owlapi.rio.RioMemoryTripleSource;
import org.semanticweb.owlapi.rio.RioRenderer;

Expand All @@ -81,7 +83,7 @@ public class BlazegraphMolecularModelManager<METADATA> extends CoreMolecularMode
boolean isPrecomputePropertyClassCombinations = false;

final String pathToOWLStore;
final String pathToExportFolder;
final String pathToExportFolder;
private final BigdataSailRepository repo;
private final CurieHandler curieHandler;

Expand All @@ -92,21 +94,42 @@ public class BlazegraphMolecularModelManager<METADATA> extends CoreMolecularMode
private final List<PreFileSaveHandler> preFileSaveHandlers = new ArrayList<PreFileSaveHandler>();
private final List<PostLoadOntologyFilter> postLoadOntologyFilters = new ArrayList<PostLoadOntologyFilter>();

private Map<String, Set<String>> taxon_models;

/**
* @param tbox
* @param modelIdPrefix
* @param pathToJournal Path to Blazegraph journal file to use.
* Only one instance of Blazegraph can use this file at a time.
* Only one instance of Blazegraph can use this file at a time.
* @throws OWLOntologyCreationException
* @throws IOException
*/
public BlazegraphMolecularModelManager(OWLOntology tbox, CurieHandler curieHandler, String modelIdPrefix, String pathToJournal, String pathToExportFolder)
throws OWLOntologyCreationException {
super(tbox);
public BlazegraphMolecularModelManager(OWLOntology tbox, CurieHandler curieHandler, String modelIdPrefix, String pathToJournal, String pathToExportFolder, String pathToOntologyJournal)
throws OWLOntologyCreationException, IOException {
super(tbox, pathToOntologyJournal);
if(curieHandler==null) {
LOG.error("curie handler required for blazegraph model manager startup ");
System.exit(-1);
}else if(curieHandler.getMappings()==null) {
LOG.error("curie handler WITH MAPPINGS required for blazegraph model manager startup ");
System.exit(-1);
}
this.modelIdPrefix = modelIdPrefix;
this.curieHandler = curieHandler;
this.pathToOWLStore = pathToJournal;
this.pathToExportFolder = pathToExportFolder;
this.repo = initializeRepository(this.pathToOWLStore);
if(pathToOntologyJournal!=null) {
taxon_models = buildTaxonModelMap();
}
}

public Map<String, Set<String>> getTaxon_models() {
return taxon_models;
}

public void setTaxon_models(Map<String, Set<String>> taxon_models) {
this.taxon_models = taxon_models;
}

/**
Expand Down Expand Up @@ -206,7 +229,7 @@ public ModelContainer generateBlankModel(METADATA metadata)
createImports(abox, tbox.getOntologyID(), metadata);

// generate model
model = new ModelContainer(modelId, tbox, abox, tbox_reasoner);
model = new ModelContainer(modelId, tbox, abox);
} catch (OWLOntologyCreationException exception) {
if (abox != null) {
m.removeOntology(abox);
Expand Down Expand Up @@ -275,6 +298,7 @@ public void saveModel(ModelContainer m,
}
}
}
this.updateModelTaxonMap(modelId.toString());
}

private void writeModelToDatabase(OWLOntology model, IRI modelId) throws RepositoryException, IOException {
Expand Down Expand Up @@ -510,18 +534,38 @@ public QueryResult executeSPARQLQuery(String queryText, int timeout) throws Malf
connection.close();
}
}

public QueryResult executeSPARQLQueryWithoutPrefixManipulation(String queryText, int timeout) throws MalformedQueryException, QueryEvaluationException, RepositoryException {
BigdataSailRepositoryConnection connection = repo.getReadOnlyConnection();
try {
Query query = connection.prepareQuery(QueryLanguage.SPARQL, queryText.toString());
query.setMaxQueryTime(timeout);
if (query instanceof TupleQuery) {
TupleQuery tupleQuery = (TupleQuery) query;
return tupleQuery.evaluate();
} else if (query instanceof GraphQuery) {
GraphQuery graphQuery = (GraphQuery) query;
return graphQuery.evaluate();
} else if (query instanceof BooleanQuery) {
throw new UnsupportedOperationException("Unsupported query type."); //FIXME
} else {
throw new UnsupportedOperationException("Unsupported query type.");
}
} finally {
connection.close();
}
}

@Override
protected void loadModel(IRI modelId, boolean isOverride) throws OWLOntologyCreationException {
LOG.info("Load model: " + modelId + " from database");
if (modelMap.containsKey(modelId)) {
if (!isOverride) {
throw new OWLOntologyCreationException("Model already exists: " + modelId);
}
unlinkModel(modelId);
}
try {
BigdataSailRepositoryConnection connection = repo.getReadOnlyConnection();
BigdataSailRepositoryConnection connection = repo.getReadOnlyConnection();
try {
RepositoryResult<Resource> graphs = connection.getContextIDs();
if (!Iterations.asSet(graphs).contains(new URIImpl(modelId.toString()))) {
Expand All @@ -530,7 +574,8 @@ protected void loadModel(IRI modelId, boolean isOverride) throws OWLOntologyCrea
graphs.close();
RepositoryResult<Statement> statements =
connection.getStatements(null, null, null, false, new URIImpl(modelId.toString()));
OWLOntology abox = loadOntologyDocumentSource(new RioMemoryTripleSource(statements), false);
boolean minimal = true;
OWLOntology abox = loadOntologyDocumentSource(new RioMemoryTripleSource(statements), minimal);
statements.close();
abox = postLoadFileFilter(abox);
ModelContainer model = addModel(modelId, abox);
Expand Down Expand Up @@ -601,7 +646,7 @@ public String importModelToDatabase(File file, boolean skipMarkedDelete) throws
try {
connection.begin();
try {
final boolean delete;
final boolean delete;
if (skipMarkedDelete) {
delete = scanForIsDelete(file);
} else {
Expand All @@ -613,7 +658,11 @@ public String importModelToDatabase(File file, boolean skipMarkedDelete) throws
URI graph = ontIRIOpt.get();
connection.clear(graph);
//FIXME Turtle format is hard-coded here
connection.add(file, "", RDFFormat.TURTLE, graph);
if(file.getName().endsWith(".ttl")) {
connection.add(file, "", RDFFormat.TURTLE, graph);
}else if(file.getName().endsWith(".owl")) {
connection.add(file, "", RDFFormat.RDFXML, graph);
}
connection.commit();
modeliri = graph.toString();
} else {
Expand Down Expand Up @@ -650,7 +699,10 @@ public void handleStatement(Statement statement) {
InputStream inputStream = new FileInputStream(file);
try {
//FIXME Turtle format is hard-coded here
RDFParser parser = Rio.createParser(RDFFormat.TURTLE);
RDFParser parser = Rio.createParser(RDFFormat.RDFXML);
if(file.getName().endsWith(".ttl")) {
parser = Rio.createParser(RDFFormat.TURTLE);
}
parser.setRDFHandler(handler);
parser.parse(inputStream, "");
// If an ontology IRI triple is found, it will be thrown out
Expand Down Expand Up @@ -794,10 +846,126 @@ public void dumpStoredModel(IRI modelId, File folder) throws IOException {
public void dispose() {
super.dispose();
try {
repo.shutDown();
if(repo.getSail().isOpen()) {
repo.shutDown();
}
if(this.getGolego_repo()!=null) {
if(this.getGolego_repo().getGo_lego_repo().getSail().isOpen()) {
getGolego_repo().dispose();
}
}
} catch (RepositoryException e) {
LOG.error("Failed to shutdown Blazegraph sail.", e);
}
}

public Map<String, Set<String>> buildTaxonModelMap() throws IOException {
Map<String, Set<String>> model_genes = buildModelGeneMap();
Map<String, Set<String>> taxon_models = new HashMap<String, Set<String>>();
for(String model : model_genes.keySet()) {
Set<String> genes = model_genes.get(model);
Set<String> taxa = this.getGolego_repo().getTaxaByGenes(genes);
for(String taxon : taxa) {
Set<String> models = taxon_models.get(taxon);
if(models==null) {
models = new HashSet<String>();
}
models.add(model);
taxon_models.put(taxon, models);
}
}
return taxon_models;
}

public Map<String, Set<String>> buildModelGeneMap(){
Map<String, Set<String>> model_genes = new HashMap<String, Set<String>>();
TupleQueryResult result;
String sparql = "SELECT ?id (GROUP_CONCAT(DISTINCT ?type;separator=\";\") AS ?types) WHERE {\n" +
" GRAPH ?id { \n" +
"?i rdf:type ?type .\n" +
"FILTER (?type != <http://www.w3.org/2002/07/owl#Axiom> \n" +
" && ?type != <http://www.w3.org/2002/07/owl#NamedIndividual> \n" +
" && ?type != <http://www.w3.org/2002/07/owl#Ontology> \n" +
" && ?type != <http://www.w3.org/2002/07/owl#Class> \n" +
" && ?type != <http://www.w3.org/2002/07/owl#ObjectProperty> \n" +
" && ?type != <http://www.w3.org/2000/01/rdf-schema#Datatype> \n" +
" && ?type != <http://www.w3.org/2002/07/owl#AnnotationProperty>) . \n" +
"FILTER (!regex(str(?type), \"http://purl.obolibrary.org/obo/\" ) ) \n" +
" }\n" +
" } \n" +
" \n" +
"GROUP BY ?id";
try {
result = (TupleQueryResult) executeSPARQLQueryWithoutPrefixManipulation(sparql, 100);
while(result.hasNext()) {
BindingSet bs = result.next();
String model = bs.getBinding("id").getValue().stringValue();
String genes = bs.getBinding("types").getValue().stringValue();
Set<String> g = new HashSet<String>();
if(genes!=null) {
String[] geness = genes.split(";");
for(String gene : geness) {
g.add(gene);
}
}
model_genes.put(model, g);
}
} catch (MalformedQueryException | QueryEvaluationException | RepositoryException e) {
e.printStackTrace();
}
return model_genes;
}

public void updateModelTaxonMap(String model_id) throws IOException {
Set<String> genes = getModelGenes(model_id);
if(genes.isEmpty()) {
return;
}
Set<String> taxa = this.getGolego_repo().getTaxaByGenes(genes);
for(String taxon : taxa) {
Set<String> models = taxon_models.get(taxon);
if(models==null) {
models = new HashSet<String>();
}
models.add(model_id);
taxon_models.put(taxon, models);
}

}

public Set<String> getModelGenes(String model_id){
Set<String> g = new HashSet<String>();
TupleQueryResult result;
String sparql = "SELECT ?type WHERE {\n" +
" GRAPH <"+model_id+"> { \n" +
" ?i rdf:type ?type .\n" +
"FILTER (?type != <http://www.w3.org/2002/07/owl#Axiom> \n" +
" && ?type != <http://www.w3.org/2002/07/owl#NamedIndividual> \n" +
" && ?type != <http://www.w3.org/2002/07/owl#Ontology> \n" +
" && ?type != <http://www.w3.org/2002/07/owl#Class> \n" +
" && ?type != <http://www.w3.org/2002/07/owl#ObjectProperty> \n" +
" && ?type != <http://www.w3.org/2000/01/rdf-schema#Datatype> \n" +
" && ?type != <http://www.w3.org/2002/07/owl#AnnotationProperty>) . \n" +
//this one cuts out all the reacto genes
// "FILTER (!regex(str(?type), \"http://purl.obolibrary.org/obo/\" ) ) \n" +
//this will probably let a few past but the effect would only be a slight slow down when looking up taxa
"FILTER (!regex(str(?type), \"http://purl.obolibrary.org/obo/ECO_\" ) ) . \n" +
"FILTER (!regex(str(?type), \"http://purl.obolibrary.org/obo/GO_\" ) ) " +
" }\n" +
" } \n" +
" \n";
try {
result = (TupleQueryResult) executeSPARQLQueryWithoutPrefixManipulation(sparql, 10);

while(result.hasNext()) {
BindingSet bs = result.next();
String gene = bs.getBinding("type").getValue().stringValue();
g.add(gene);
}
} catch (MalformedQueryException | QueryEvaluationException | RepositoryException e) {
e.printStackTrace();
}
return g;
}

}
Loading