-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathparse-experiment.py
executable file
·247 lines (209 loc) · 10.9 KB
/
parse-experiment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
#!/usr/bin/python
"""
Copyright (c) 2014 High-Performance Computing and GIS (HPCGIS) Laboratory. All rights reserved.
Use of this source code is governed by a BSD-style license that can be found in the LICENSE file.
Authors and contributors: Eric Shook ([email protected])
"""
import sys,getopt
from xml.dom import minidom
import itertools
from collections import defaultdict
import math
# Example
# ./parse-experiment.py -m Ache-v1.1b-test2.nlogo -e experiment60a
# Parse out the 'experiments' section of a NetLogo model, ignoring the rest of the file
# It isn't pretty, but it is functional in parsing a NetLogo model
def parseexperiment(f):
print " Parsing experiment"
exptext=""
inexperiments=0
for line in f:
if line=="<experiments>\n":
inexperiments=1
# Record the conents if within the experiment tags
if inexperiments==1:
exptext+=line
if line=="</experiments>\n":
inexperiments=0
break
return exptext
# From an xml.dom experiment write it to a file with the appropriate header/footer
# to be used by NetLogo to execute a sub-experiment
def writeexperimentfile(experiment,subexperimentdirectory):
# Print the experiment file to XML
experimentfilename=subexperimentdirectory+"/subexperiment."+experiment.attributes["name"].value+".xml"
f=open(experimentfilename,'w')
f.write("<?xml version=\"1.0\" encoding=\"us-ascii\"?>\n<!DOCTYPE experiments SYSTEM \"behaviorspace.dtd\">\n<experiments>\n")
f.write(experiment.toxml())
f.write("\n</experiments>\n")
f.close()
# Transform a dictionary of enumerated value sets to XML and add to the experimentmaster xml.dom to create a subexperiment
def createexperiment(dict,experimentmaster,experimentgroupcount):
# Define a new experiment based on master and append to the name the experimentgroup count for a unique name
experiment=experimentmaster.cloneNode(True) # Duplicate master
experiment.attributes["name"].value+=str(experimentgroupcount)
for attr in dict: # Iterate over the dictionary and add values based on attribute variable names
string=" \n <enumeratedValueSet variable=\""+attr+"\">\n"
for value in set(dict[attr]): # Remember each attribute in the dict holds a list of values, set returns unique values in the list
string+=" <value value=\""+value+"\"/>\n"
string+="</enumeratedValueSet> \n "
#print "string=",string
evsnode=minidom.parseString(string).firstChild # Extract out the XML from the dom
#print evsnode.toxml()
experiment.appendChild(evsnode) # Add it to the larger subexperiment
return experiment
# Parse the xml from the experiment text and create a list of lists of unique simulations in the experiment
def generateexperiments(exptext,experimentname,numberofsubexperiments,subexperimentdirectory):
print " Generating experiments"
# Parse experiments XML from netlogo model
dom=minidom.parseString(exptext)
# Store subexperiments in a list of lists
experimentlistoflists=[]
experimentfound=0
# This loop iterates over each experiment looking for the user selected experiment (experimentname)
numberofexperiments=1
for node in dom.getElementsByTagName('experiment'):
if node.attributes["name"].value==experimentname: # Experiment found
experiment=node.cloneNode(True) # Save this experiment
numberofrepetitions=int(node.attributes["repetitions"].value) # Save the number of repititions for each parameter combination
print " Experiment %s found (number of repetitions=%i)"%(experimentname,numberofrepetitions)
experimentfound=1
# Iterate over each enumeraed value set to find all parameter combinations
for evs in node.getElementsByTagName('enumeratedValueSet'):
evslist=[]
#print "enumeratedValueSet=",evs.attributes["variable"].value
count=0
for val in evs.getElementsByTagName('value'): # for each parameter value
#print "val", val.attributes["value"].value
count+=1 # count it
# # and add it to the list of parameter combinations
evslist.append((evs.attributes["variable"].value,val.attributes["value"].value))
#print "count=",count
numberofexperiments*=count
# add this value set list to the experiment list (creating a list of lists)
experimentlistoflists.append(evslist)
#print node.toxml()
if experimentfound==0:
print " [ ERROR ] Experiment",experimentname,"not found"
exit(1)
print "The total number of simulations is",numberofexperiments*numberofrepetitions
# Sort the list so try to align similar experiments
experimentlistoflists.sort()
#print experimentlistoflists
# this function will take the list of lists generated above (experimentlistoflists)
# which consists of all of the enumeratedvaluesets with their individual values
# and creates unique combinations of the parameters
# this new list of lists is saved in allexperiments
# allexperiments represents every single simulation parameter configuration in a defined experiment
allexperiments=itertools.product(*experimentlistoflists)
# Count using loop (len would be simpler)
count=0
for simulation in allexperiments:
count+=1
print "The total number of experiments generated is",count
# Remove all enumerated value sets from experiment (which are grouped currently)
# This is necessary, because we will add them back one at a time to represent a single sub-experiment
for evs in experiment.getElementsByTagName('enumeratedValueSet'):
experiment.removeChild(evs)
evs.unlink()
#print experiment.toxml()
experimentmaster=experiment.cloneNode(True)
# Build a set of sub-experiment (xml) files that are grouped by attributevaluesets
# It is controllable how
simulationcount=0 # This parameter counts the number of simulations
# This parameter enables multiple simulations (parameter combinations) to be combined into a single xml file
# rather than having each one run separately
# Determine how large sub-experiments should be
experimentgroupsize=int(math.ceil(float(count)/float(numberofsubexperiments)))
experimentgroupcount=0 # This parameter counts the number of experiment groups
dict=defaultdict(list)
# Re-create allexperiments
allexperiments=itertools.product(*experimentlistoflists)
for simulation in allexperiments:
simulationcount+=1
for evs in simulation:
#print "evs=",evs
val=evs[1] # Pull the value from the attribute/value pair
val=val.replace("\"",""") # NetLogo handles strings using HTML formatting, but Python auto-changes them upon read
attribute=evs[0]
dict[attribute].append(val)
#print "val=",val
#print "sim=",simulation
if(simulationcount%experimentgroupsize==0): # We reached out experiment group size limit, so write out the results
#experiment=createexperiment(dict,experimentmaster,experimentgroupcount)
experiment=createexperiment(dict,experimentmaster,simulationcount) # Create a DOM based on the dictionary that can be written
writeexperimentfile(experiment,subexperimentdirectory)
# Now that an experiment group has been written setup a new experiment for writing
# Increase count, define a new experiment based on master and append to the name the experimentgroup count for a unique name
experimentgroupcount+=1
#experiment=experimentmaster.cloneNode(True)
#experiment.attributes["name"].value+=str(experimentgroupcount)
dict=defaultdict(list) # Reset the dictionary too
# Check to see if any remaining simulations have not been written (it will be a smaller experiment as it has not reached groupsize level
# This happens if allexperiments is not divisible by experimentgroupsize
if(simulationcount%experimentgroupsize!=0):
# Write the remaining experiments
experiment=createexperiment(dict,experimentmaster,experimentgroupcount)
writeexperimentfile(experiment,subexperimentdirectory)
experimentgroupcount+=1
dict=defaultdict(list) # Reset the dictionary too
#print "#####################################################"
#print experiment.toxml()
#print experiment.toprettyxml(indent=' ',newl='')
# FIXME: Catch the end experiment if it is not modulus of groupsize
print "Simulation group files written :",experimentgroupcount
def main():
# Try to extract command-line options
try:
opts,args=getopt.getopt(sys.argv[1:],"m:e:n:d:",["model=","experiment=","num=","dir="])
except getopt.GetoptError:
print "parse-experiment.py -m <netlogomodel.nlogo> -e <experiment-name> -n <number of sub-experiments> -d <directory for sub-experiments>"
sys.exit(1)
# Set model filename and experiment name based on command-line parameter
modelfilename=""
experimentname=""
subexperimentdirectory=""
numberofsubexperiments=0
for opt, arg in opts:
if opt in ("-m", "--model"):
modelfilename=arg
if opt in ("-e", "--experiment"):
experimentname=arg
if opt in ("-n", "--num"):
numberofsubexperiments=int(arg)
if opt in ("-d", "--dir"):
subexperimentdirectory=arg
err=0
if modelfilename=="":
print " [ ERROR ] No model file found"
err=1
if experimentname=="":
print " [ ERROR ] No experiment found"
err=1
if subexperimentdirectory=="":
print " [ ERROR ] No subexperiment directory"
err=1
if numberofsubexperiments<=0:
print " [ ERROR ] Number of subexperiments must be greater than 0"
err=1
if err==1:
print "parse-experiment.py -m <netlogomodel.nlogo> -e <experiment-name> -n <number of sub-experiments> -d <directory for sub-experiments>"
sys.exit(1)
print "Start parsing netlogo model file %s"%modelfilename
print "Experiment %s"%experimentname
print "Number of sub experiments to generate",numberofsubexperiments
print "Directory for subexperiments",subexperimentdirectory
# Now I have a netlogo model filename and experiment to parse
# Try to open the file and parse
try:
f=open(modelfilename)
exptext=parseexperiment(f)
f.close()
except IOError:
print ' [ ERROR ] Cannot open file %s'%modelfilename
sys.exit(1)
# Generate the job files after decomposing the experiment
generateexperiments(exptext,experimentname,numberofsubexperiments,subexperimentdirectory)
# Run main
if __name__=="__main__":
main()