Skip to content

Commit

Permalink
Add overlap parameter
Browse files Browse the repository at this point in the history
  • Loading branch information
holderlb committed Jan 24, 2021
1 parent 07b7419 commit a2b2092
Show file tree
Hide file tree
Showing 6 changed files with 78 additions and 204 deletions.
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,10 @@ Minimum size (#edges) of a pattern. Default is 1.

Number of best patterns to report at end. Default is 3.

`--overlap <overlap_type>`

Controls how instances of a pattern may overlap. Possible overlap_type values are: none, vertex, edge. Overlap of "none" means no part of two instances of a pattern can overlap. Overlap of "vertex" means that any number of vertices can overlap, but no edges. Overlap of "edge" means that edges and vertices can overlap, but the two instances cannot be identical. Default is "none".

`--prune`

If enabled, Subdue removes any pattern whose value is worse than its parent pattern. Disabled by default.
Expand Down
7 changes: 7 additions & 0 deletions src/Parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ def __init__(self):
self.maxSize = 0 # Maximum size (#edges) of a pattern; default (0) is |E|/2.
self.minSize = 1 # Minimum size (#edges) of a pattern; default is 1.
self.numBest = 3 # Number of best patterns to report at end; default is 3.
self.overlap = "none" # Extent that pattern instances can overlap (none, vertex, edge)
self.prune = False # Remove any patterns that are worse than their parent.
self.valueBased = False # Retain all patterns with the top beam best values.
self.writeCompressed = False # Write compressed graph after iteration i to file outputFileName-compressed-i.json
Expand Down Expand Up @@ -55,6 +56,11 @@ def set_parameters (self, args):
if optionName == "--numbest":
index += 1
self.numBest = int(args[index])
if optionName == "--overlap":
index += 1
overlap_type = args[index]
if overlap_type in ["none", "vertex", "edge"]:
self.overlap = overlap_type
if optionName == "--prune":
self.prune = True
if optionName == "--valuebased":
Expand All @@ -79,6 +85,7 @@ def print(self):
print(" Max Size: " + str(self.maxSize))
print(" Min Size: " + str(self.minSize))
print(" Num Best: " + str(self.numBest))
print(" Overlap: " + self.overlap)
print(" Prune: " + str(self.prune))
print(" Value Based: " + str(self.valueBased))
print(" Write Compressed: " + str(self.writeCompressed))
Expand Down
30 changes: 20 additions & 10 deletions src/Pattern.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def CreatePatternFromInstances(definition, instances):

# ----- Pattern Extension

def ExtendPattern (pattern, temporal = False):
def ExtendPattern (parameters, pattern):
"""Return list of patterns created by extending each instance of the given pattern by one edge in all possible ways,
and then collecting matching extended instances together into new patterns."""
extendedInstances = []
Expand All @@ -110,15 +110,15 @@ def ExtendPattern (pattern, temporal = False):
while extendedInstances:
newInstance = extendedInstances.pop(0)
newInstanceGraph = Graph.CreateGraphFromInstance(newInstance)
if temporal:
if parameters.temporal:
newInstanceGraph.TemporalOrder()
matchingInstances = [newInstance]
nonmatchingInstances = []
for extendedInstance in extendedInstances:
extendedInstanceGraph = Graph.CreateGraphFromInstance(extendedInstance)
if temporal:
if parameters.temporal:
extendedInstanceGraph.TemporalOrder()
if Graph.GraphMatch(newInstanceGraph,extendedInstanceGraph) and (not InstancesOverlap(matchingInstances,extendedInstance)):
if Graph.GraphMatch(newInstanceGraph,extendedInstanceGraph) and (not InstancesOverlap(parameters.overlap, matchingInstances, extendedInstance)):
matchingInstances.append(extendedInstance)
else:
nonmatchingInstances.append(extendedInstance)
Expand Down Expand Up @@ -163,16 +163,26 @@ def InstanceMatch(instance1,instance2):
else:
return False

def InstancesOverlap(instanceList,instance):
"""Returns True if instance contains a vertex that is contained in an instance of the given instanceList."""
def InstancesOverlap(overlap, instanceList, instance):
"""Returns True if instance overlaps with an instance in the given instanceList
according to the overlap parameter, which indicates what type of overlap ignored.
Overlap="none" means no overlap ignored. Overlap="vertex" means vertex overlap
ignored. Overlap="edge" means vertex and edge overlap ignored, but the instances
cannot be identical."""
for instance2 in instanceList:
if InstanceOverlap(instance,instance2):
if InstanceOverlap(overlap, instance, instance2):
return True
return False

def InstanceOverlap(instance1,instance2):
"""Returns True if given instances share a vertex."""
return instance1.vertices.intersect(instance2.vertices)
def InstanceOverlap(overlap, instance1, instance2):
"""Returns True if given instances overlap according to given overlap parameter.
See InstancesOverlap for explanation."""
if overlap == "edge":
return InstanceMatch(instance1, instance2)
elif overlap == "vertex":
return instance1.edges.intersect(instance2.edges)
else: # overlap == "none"
return instance1.vertices.intersect(instance2.vertices)


# ----- Pattern List Operations
Expand Down
16 changes: 8 additions & 8 deletions src/Subdue.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def DiscoverPatterns(parameters, graph):
"""The main discovery loop. Finds and returns best patterns in given graph."""
patternCount = 0
# get initial one-edge patterns
parentPatternList = GetInitialPatterns(graph, parameters.temporal)
parentPatternList = GetInitialPatterns(parameters, graph)
if DEBUGFLAG:
print("Initial patterns (" + str(len(parentPatternList)) + "):")
for pattern in parentPatternList:
Expand All @@ -42,7 +42,7 @@ def DiscoverPatterns(parameters, graph):
parentPattern = parentPatternList.pop(0)
if ((len(parentPattern.instances) > 1) and (patternCount < parameters.limit)):
patternCount += 1
extendedPatternList = Pattern.ExtendPattern(parentPattern, parameters.temporal)
extendedPatternList = Pattern.ExtendPattern(parameters, parentPattern)
while (extendedPatternList):
extendedPattern = extendedPatternList.pop(0)
if DEBUGFLAG:
Expand All @@ -66,15 +66,14 @@ def DiscoverPatterns(parameters, graph):
Pattern.PatternListInsert(parentPattern, discoveredPatternList, parameters.numBest, False) # valueBased = False
return discoveredPatternList

def GetInitialPatterns(graph, temporal = False, overlap = True):
"""Returns list of single-edge, evaluated patterns in given graph with more than one instance.
If overlap=False, then instances of a single-edge pattern cannot share vertices."""
def GetInitialPatterns(parameters, graph):
"""Returns list of single-edge, evaluated patterns in given graph with more than one instance."""
initialPatternList = []
# Create a graph and an instance for each edge
edgeGraphInstancePairs = []
for edge in graph.edges.values():
graph1 = Graph.CreateGraphFromEdge(edge)
if temporal:
if parameters.temporal:
graph1.TemporalOrder()
instance1 = Pattern.CreateInstanceFromEdge(edge)
edgeGraphInstancePairs.append((graph1,instance1))
Expand All @@ -89,7 +88,7 @@ def GetInitialPatterns(graph, temporal = False, overlap = True):
for edgePair2 in edgeGraphInstancePairs:
graph2 = edgePair2[0]
instance2 = edgePair2[1]
if Graph.GraphMatch(graph1,graph2) and (overlap or (not Pattern.InstancesOverlap(pattern.instances,instance2))):
if Graph.GraphMatch(graph1,graph2) and (not Pattern.InstancesOverlap(parameters.overlap, pattern.instances, instance2)):
pattern.instances.append(instance2)
else:
nonmatchingEdgePairs.append(edgePair2)
Expand Down Expand Up @@ -171,6 +170,7 @@ def nx_subdue(
:param maxSize: (Default: 0) -- Maximum size (#edges) of a pattern; default (0) is |E|/2.
:param minSize: (Default: 1) -- Minimum size (#edges) of a pattern; default is 1.
:param numBest: (Default: 3) -- Number of best patterns to report at end; default is 3.
:param overlap: (Defaul: none) -- Extent that pattern instances can overlap (none, vertex, edge)
:param prune: (Default: False) -- Remove any patterns that are worse than their parent.
:param valueBased: (Default: False) -- Retain all patterns with the top beam best values.
:param temporal: (Default: False) -- Discover static (False) or temporal (True) patterns
Expand Down Expand Up @@ -223,7 +223,7 @@ def unwrap_output(iterations):
return out

def main():
print("SUBDUE v1.3 (python)\n")
print("SUBDUE v1.4 (python)\n")
parameters = Parameters.Parameters()
parameters.set_parameters(sys.argv)
graph = ReadGraph(parameters.inputFileName)
Expand Down
148 changes: 0 additions & 148 deletions testing/output.txt
Original file line number Diff line number Diff line change
@@ -1,148 +0,0 @@
SUBDUE v1.3 (python)

Parameters:
Input File Name: inputgraph.json
Output File Name: inputgraph
Beam Width: 4
Iterations: 1
Limit: 7
Max Size: 7
Min Size: 1
Num Best: 3
Prune: False
Value Based: False
Write Compressed: False
Write Pattern: False
Write Instances: False
Temporal: False

Graph: 15 vertices, 15 edges
7 patterns left
4 patterns left
1 patterns left
No more patterns to consider

Best 3 patterns:

Pattern (value=0.8, instances=5):
Graph:
vertex "1": timestamp=1, label=v1
vertex "2": timestamp=1, label=v2
vertex "3": timestamp=1, label=v3
edge "1" (1--2): timestamp=1, label=e12
edge "2" (1--3): timestamp=1, label=e13
edge "3" (2--3): timestamp=7, label=e23
Instance 1:
vertex "1": timestamp=1, label=v1
vertex "2": timestamp=1, label=v2
vertex "3": timestamp=1, label=v3
edge "1" (1--2): timestamp=1, label=e12
edge "2" (1--3): timestamp=1, label=e13
edge "3" (2--3): timestamp=7, label=e23
Instance 2:
vertex "4": timestamp=2, label=v1
vertex "5": timestamp=2, label=v2
vertex "6": timestamp=2, label=v3
edge "4" (4--5): timestamp=2, label=e12
edge "5" (4--6): timestamp=2, label=e13
edge "6" (5--6): timestamp=8, label=e23
Instance 3:
vertex "7": timestamp=4, label=v1
vertex "8": timestamp=4, label=v2
vertex "9": timestamp=4, label=v3
edge "9" (7--8): timestamp=4, label=e12
edge "10" (7--9): timestamp=4, label=e13
edge "11" (8--9): timestamp=11, label=e23
Instance 4:
vertex "10": timestamp=5, label=v1
vertex "11": timestamp=5, label=v2
vertex "12": timestamp=5, label=v3
edge "13" (10--11): timestamp=5, label=e12
edge "14" (10--12): timestamp=5, label=e13
edge "15" (11--12): timestamp=12, label=e23
Instance 5:
vertex "13": timestamp=9, label=v1
vertex "14": timestamp=9, label=v2
vertex "15": timestamp=9, label=v3
edge "20" (13--14): timestamp=9, label=e12
edge "21" (13--15): timestamp=9, label=e13
edge "22" (14--15): timestamp=16, label=e23

Pattern (value=0.5333333333333333, instances=5):
Graph:
vertex "1": timestamp=1, label=v1
vertex "2": timestamp=1, label=v2
vertex "3": timestamp=1, label=v3
edge "1" (1--2): timestamp=1, label=e12
edge "2" (1--3): timestamp=1, label=e13
Instance 1:
vertex "1": timestamp=1, label=v1
vertex "2": timestamp=1, label=v2
vertex "3": timestamp=1, label=v3
edge "1" (1--2): timestamp=1, label=e12
edge "2" (1--3): timestamp=1, label=e13
Instance 2:
vertex "4": timestamp=2, label=v1
vertex "5": timestamp=2, label=v2
vertex "6": timestamp=2, label=v3
edge "4" (4--5): timestamp=2, label=e12
edge "5" (4--6): timestamp=2, label=e13
Instance 3:
vertex "7": timestamp=4, label=v1
vertex "8": timestamp=4, label=v2
vertex "9": timestamp=4, label=v3
edge "9" (7--8): timestamp=4, label=e12
edge "10" (7--9): timestamp=4, label=e13
Instance 4:
vertex "10": timestamp=5, label=v1
vertex "11": timestamp=5, label=v2
vertex "12": timestamp=5, label=v3
edge "13" (10--11): timestamp=5, label=e12
edge "14" (10--12): timestamp=5, label=e13
Instance 5:
vertex "13": timestamp=9, label=v1
vertex "14": timestamp=9, label=v2
vertex "15": timestamp=9, label=v3
edge "20" (13--14): timestamp=9, label=e12
edge "21" (13--15): timestamp=9, label=e13

Pattern (value=0.5333333333333333, instances=5):
Graph:
vertex "1": timestamp=1, label=v1
vertex "2": timestamp=1, label=v2
vertex "3": timestamp=1, label=v3
edge "1" (1--2): timestamp=1, label=e12
edge "2" (2--3): timestamp=7, label=e23
Instance 1:
vertex "1": timestamp=1, label=v1
vertex "2": timestamp=1, label=v2
vertex "3": timestamp=1, label=v3
edge "1" (1--2): timestamp=1, label=e12
edge "3" (2--3): timestamp=7, label=e23
Instance 2:
vertex "4": timestamp=2, label=v1
vertex "5": timestamp=2, label=v2
vertex "6": timestamp=2, label=v3
edge "4" (4--5): timestamp=2, label=e12
edge "6" (5--6): timestamp=8, label=e23
Instance 3:
vertex "7": timestamp=4, label=v1
vertex "8": timestamp=4, label=v2
vertex "9": timestamp=4, label=v3
edge "9" (7--8): timestamp=4, label=e12
edge "11" (8--9): timestamp=11, label=e23
Instance 4:
vertex "10": timestamp=5, label=v1
vertex "11": timestamp=5, label=v2
vertex "12": timestamp=5, label=v3
edge "13" (10--11): timestamp=5, label=e12
edge "15" (11--12): timestamp=12, label=e23
Instance 5:
vertex "13": timestamp=9, label=v1
vertex "14": timestamp=9, label=v2
vertex "15": timestamp=9, label=v3
edge "20" (13--14): timestamp=9, label=e12
edge "22" (14--15): timestamp=16, label=e23

SUBDUE done. Elapsed time = 0.0024871826171875 seconds

Loading

0 comments on commit a2b2092

Please sign in to comment.