Skip to content

Commit

Permalink
Lexical matcher; parsing improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
fvilla committed Dec 5, 2024
1 parent 852caa3 commit c06a871
Show file tree
Hide file tree
Showing 8 changed files with 369 additions and 111 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,10 @@ public void run() {

var current = new ArrayList<String>();
for (var token : arguments) {
if (token.equals(",")) {
if (token.endsWith(",")) {
if (token.trim().length() > 1) {
current.add(token.trim().substring(0, token.length()-1));
}
tokens.add(current);
current = new ArrayList<>();
} else {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
package org.integratedmodelling.klab.api.lang;

import org.integratedmodelling.klab.api.knowledge.SemanticRole;
import org.integratedmodelling.klab.api.knowledge.SemanticType;

import java.util.EnumSet;
import java.util.Set;

public enum SemanticClause {

OF(SemanticRole.INHERENT, "of", EnumSet.of(SemanticType.OBSERVABLE), EnumSet.of(SemanticType.COUNTABLE)),
FOR(SemanticRole.GOAL, "for", EnumSet.of(SemanticType.OBSERVABLE), EnumSet.of(SemanticType.OBSERVABLE)),
WITH(SemanticRole.COMPRESENT, "with", EnumSet.of(SemanticType.OBSERVABLE), EnumSet.of(SemanticType.COUNTABLE)),
CAUSED_BY(SemanticRole.CAUSANT, "caused by", EnumSet.of(SemanticType.OBSERVABLE), EnumSet.of(SemanticType.OBSERVABLE)),
ADJACENT_TO(SemanticRole.ADJACENT, "adjacent to", EnumSet.of(SemanticType.OBSERVABLE), EnumSet.of(SemanticType.COUNTABLE)),
CAUSING(SemanticRole.CAUSED, "causing", EnumSet.of(SemanticType.PROCESS, SemanticType.EVENT), EnumSet.of(SemanticType.OBSERVABLE)),
DURING(SemanticRole.COOCCURRENT, "during", EnumSet.of(SemanticType.OBSERVABLE), EnumSet.of(SemanticType.PROCESS, SemanticType.EVENT)),
LINKING(SemanticRole.RELATIONSHIP_SOURCE, "linking", EnumSet.of(SemanticType.RELATIONSHIP), EnumSet.of(SemanticType.COUNTABLE)),
TO(SemanticRole.RELATIONSHIP_TARGET, "to", EnumSet.of(SemanticType.RELATIONSHIP), EnumSet.of(SemanticType.COUNTABLE));

public String[] declaration;
public SemanticRole role;
public Set<SemanticType> applicable;
public Set<SemanticType> argument;

SemanticClause(SemanticRole role, String declaration, Set<SemanticType> applicable, Set<SemanticType> argument) {
this.declaration = new String[] {declaration};
this.role = role;
this.applicable = applicable;
this.argument = argument;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,7 @@
*/
public enum SemanticLexicalElement {

// WITHIN(SemanticRole.CONTEXT, "within", EnumSet.of(SemanticType.OBSERVABLE),
// EnumSet.of(SemanticType.AGENT, SemanticType.SUBJECT)),

OF(SemanticRole.INHERENT, "of", EnumSet.of(SemanticType.OBSERVABLE), EnumSet.of(SemanticType.COUNTABLE)),
FOR(SemanticRole.GOAL, "for", EnumSet.of(SemanticType.OBSERVABLE), EnumSet.of(SemanticType.OBSERVABLE)),
WITH(SemanticRole.COMPRESENT, "with", EnumSet.of(SemanticType.OBSERVABLE), EnumSet.of(SemanticType.COUNTABLE)),
Expand Down

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
package org.integratedmodelling.klab.api.lang.kim;

import org.integratedmodelling.klab.api.collections.Triple;
import org.integratedmodelling.klab.api.knowledge.SemanticRole;
import org.integratedmodelling.klab.api.knowledge.SemanticType;
import org.integratedmodelling.klab.api.lang.SemanticClause;
import org.integratedmodelling.klab.api.lang.UnarySemanticOperator;

import java.util.Collection;
Expand Down Expand Up @@ -166,6 +168,22 @@ public String toString() {

Collection<String> getPatternVariables();

/**
* If the concept is the result of a unary operation applied to one or two arguments, return the operator along with its
* argument. Otherwise return null;
*
* @return
*/
Triple<UnarySemanticOperator, KimConcept, KimConcept> semanticOperation();

/**
* If the concept contains the passed modifier, return its argument, otherwise return null.
*
* @param semanticClause
* @return
*/
KimConcept semanticClause(SemanticClause semanticClause);

/**
* Return any temporal inherency for this occurrent ('during each').
*
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
package org.integratedmodelling.common.lang.kim;

import org.integratedmodelling.klab.api.collections.Triple;
import org.integratedmodelling.klab.api.knowledge.SemanticRole;
import org.integratedmodelling.klab.api.knowledge.SemanticType;
import org.integratedmodelling.klab.api.lang.SemanticClause;
import org.integratedmodelling.klab.api.lang.UnarySemanticOperator;
import org.integratedmodelling.klab.api.lang.kim.KimConcept;

Expand Down Expand Up @@ -773,4 +775,27 @@ public void visit(Visitor visitor) {
}

}

@Override
public Triple<UnarySemanticOperator, KimConcept, KimConcept> semanticOperation() {
if (semanticModifier != null) {
return Triple.of(semanticModifier, observable, comparisonConcept);
}
return null;
}

@Override
public KimConcept semanticClause(SemanticClause semanticClause) {
return switch (semanticClause) {
case OF -> inherent;
case FOR -> goal;
case WITH -> compresent;
case CAUSED_BY -> causant;
case ADJACENT_TO -> adjacent;
case CAUSING -> caused;
case DURING -> temporalInherent;
case LINKING -> relationshipSource;
case TO -> relationshipTarget;
};
}
}
Original file line number Diff line number Diff line change
@@ -1,16 +1,20 @@
package org.integratedmodelling.klab.services.reasoner;

import com.google.common.cache.Cache;
import com.google.common.cache.CacheBuilder;
import com.google.common.cache.CacheLoader;
import com.google.common.cache.LoadingCache;
import org.integratedmodelling.common.logging.Logging;
import org.integratedmodelling.klab.api.collections.Pair;
import org.integratedmodelling.klab.api.exceptions.KlabIllegalStateException;
import org.integratedmodelling.klab.api.knowledge.Concept;
import org.integratedmodelling.klab.api.knowledge.SemanticType;
import org.integratedmodelling.klab.api.knowledge.Semantics;
import org.integratedmodelling.klab.api.lang.SemanticClause;
import org.integratedmodelling.klab.api.lang.kim.KimConcept;
import org.integratedmodelling.klab.api.lang.kim.KimObservable;
import org.integratedmodelling.klab.api.services.ResourcesService;

import java.util.concurrent.TimeUnit;
import java.util.concurrent.ExecutionException;

/**
* Match two concept using one as a syntactic pattern for the other. Used in the rule system to filter
Expand All @@ -22,6 +26,7 @@ public class SyntacticMatcher {

private ReasonerService reasonerService;
private ResourcesService resourcesService;

private LoadingCache<String, KimObservable> conceptCache =
CacheBuilder.newBuilder()
.concurrencyLevel(20)
Expand All @@ -33,51 +38,164 @@ public KimObservable load(String key) throws Exception {
}
});

private LoadingCache<Pair<Semantics, Semantics>, Boolean> matchCache =
CacheBuilder.newBuilder()
.concurrencyLevel(20)
.maximumSize(400) // TODO configure
.build(new CacheLoader<>() {
@Override
public Boolean load(Pair<Semantics, Semantics> key) throws Exception {
return doMatch(key.getFirst(), key.getSecond());
}
});

public SyntacticMatcher(ReasonerService reasonerService, ResourcesService resourcesService) {
this.reasonerService = reasonerService;
this.resourcesService = resourcesService;
}

public boolean match(Semantics candidate, Semantics pattern) {
try {
return matchCache.get(Pair.of(candidate, pattern));
} catch (ExecutionException e) {
Logging.INSTANCE.error(e);
return false;
}
}

public boolean doMatch(Semantics candidate, Semantics pattern) {

if (candidate == null || pattern == null || candidate.is(SemanticType.NOTHING) || pattern.is(SemanticType.NOTHING)) {
// null doesn't match null
return false;
}

if (isAtomic(pattern.getUrn())) {
return reasonerService.subsumes(candidate, pattern);
}

KimObservable oCandidateObservable = null;
KimObservable pCandidateObservable = null;
KimConcept oCandidate = null;
KimConcept pCandidate = null;

try {
oCandidate = conceptCache.getUnchecked(candidate.getUrn()).getSemantics();
pCandidate = conceptCache.getUnchecked(pattern.getUrn()).getSemantics();
oCandidateObservable = conceptCache.getUnchecked(candidate.getUrn());
oCandidate = oCandidateObservable.getSemantics();
pCandidateObservable = conceptCache.getUnchecked(pattern.getUrn());
pCandidate = pCandidateObservable.getSemantics();
} catch (Throwable t) {
//
Logging.INSTANCE.error(t);
return false;
}

if (pCandidate == null || oCandidate == null) {
return false;
}

return matchConcepts(oCandidate, pCandidate);
return matchConcepts(oCandidate, pCandidate, oCandidateObservable, pCandidateObservable,
candidate.asConcept(), pattern.asConcept());
}

private boolean matchConcepts(KimConcept candidate, KimConcept pattern) {
private boolean matchConcepts(KimConcept candidate, KimConcept pattern,
KimObservable candidateObservable, KimObservable patternObservable,
Concept candidateConcept,
Concept patternConcept) {

if (candidate == null || pattern == null) {
// null doesn't match null
return false;
}

if (pattern.is(SemanticType.UNION) || pattern.is(SemanticType.INTERSECTION)) {
// must have same type and same number of arguments
// TODO this applies also to all ops
// pattern should have at most two arguments; we operate on a <tail, rest> basis.
if (pattern.getOperands().size() != 2) {
throw new KlabIllegalStateException("Patterns in AND or OR should have at most two operands");
}

var type = pattern.is(SemanticType.UNION) ? SemanticType.UNION : SemanticType.INTERSECTION;

/*
candidate must have at least two operands; extract the head and the tail as concept
*/
if (!candidate.is(type)) {
return false;
}

StringBuffer buffer = new StringBuffer();
var headSyntax = candidate.getOperands().getFirst();
candidate.getOperands().stream().skip(1).map(c -> buffer.append(buffer.isEmpty() ? "" : " ").append(c.getUrn()));

if (buffer.isEmpty()) {
return false;
}

/* Match the FIRST operand and connect the remaining, then match the two pieces */

var head = reasonerService.declareConcept(headSyntax);
var tail = reasonerService.resolveConcept(buffer.toString());

return match(head, reasonerService.declareConcept(pattern.getOperands().getFirst())) &&
match(tail, reasonerService.declareConcept(pattern.getOperands().get(1)));
}

int narg = pattern.getOperands().size();
// NO - if pattern is X or Y it should match X or Y or Z with matching Y = Y or Z
if (candidate.getOperands().size() != narg) {
if (pattern.isCollective() != candidate.isCollective()) {
return false;
}

if (pattern.isNegated() != candidate.isNegated()) {
return false;
}

if (pattern.getSemanticModifier() != null) {

if (pattern.getSemanticModifier() != candidate.getSemanticModifier()) {
return false;
}

var pMod = pattern.semanticOperation();
var oMod = candidate.semanticOperation();

if (!match(reasonerService.declareConcept(candidate.semanticOperation().getSecond()),
reasonerService.declareConcept(pMod.getSecond()))) {
return false;
}

if (pMod.getThird() != null) {

if (oMod.getThird() == null) {
return false;
}

// match the comparison
if (!match(reasonerService.declareConcept(oMod.getThird()),
reasonerService.declareConcept(pMod.getThird()))) {
return false;
}
}
}

for (var clause : SemanticClause.values()) {
var target = pattern.semanticClause(clause);
// for all the modifiers, use the reasoner on the candidate
if (target != null) {
var operand = candidate.semanticClause(clause);
if (operand == null || !match(reasonerService.declareConcept(operand),
reasonerService.declareConcept(target))) {
return false;
}
}
}

return false;

for (var valueOperator : patternObservable.getValueOperators()) {
// TODO match the corresponding value operator. Must enable both value equality and generic
// value classifier
}

// all checks passed

return true;
}

private boolean isAtomic(String urn) {
Expand All @@ -86,4 +204,12 @@ private boolean isAtomic(String urn) {
return !urn.contains(" ");
}

/**
* Call this after any changes to the worldview!
*/
public void resetCaches() {
this.conceptCache.invalidateAll();
this.matchCache.invalidateAll();
}

}
Loading

0 comments on commit c06a871

Please sign in to comment.