-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsubmission.groovy
362 lines (254 loc) · 12.3 KB
/
submission.groovy
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
import groovy.sql.Sql
import java.util.regex.Matcher;
import java.util.regex.Pattern;
class SubmissionMigrator {
// Regex taken from old vireo: org.dspace.app.xmlui.aspect.vireo.model
static String REGEX_FOR_NAME_TOKEN = "(?:\\p{L}|[-'])+";
static String REGEX_FOR_INTEGRATED_NAME_PART = "((?:" + REGEX_FOR_NAME_TOKEN + "\\s*)+)";
static String REGEX_FOR_STANDALONE_NAME_PART = "((?:\\s*" + REGEX_FOR_NAME_TOKEN + "\\s*)+)";
static String REGEX_FOR_INTEGRATED_MIDDLE_INITIAL = "(?:\\s+(\\p{L})[.])?";
static String REGEX_FOR_STANDALONE_MIDDLE_INITIAL = "(?:\\s*(\\p{L})[.]?\\s*)?";
static Pattern PATTERN_FOR_AUTHORITATIVE_NAME = Pattern.compile( //
REGEX_FOR_INTEGRATED_NAME_PART // last name
+ "(?:\\s*,\\s*" // delimiter between first and last names
+ REGEX_FOR_INTEGRATED_NAME_PART // first name
+ REGEX_FOR_INTEGRATED_MIDDLE_INITIAL + ")?" // middle
// initial
+ "(?:\\s*,\\s*(?:\\d+)[-]?)?"); // birth year
static void main(String[] args) {
def config = new ConfigSlurper().parse(new File('config.groovy').toURL())
def sql = Sql.newInstance(config.old_db_url,config.old_db_user, config.old_db_pwd)
def newsql = Sql.newInstance(config.new_db_url,config.new_db_user, config.new_db_pwd)
// These can take a really long time if there is a lot of data - so log when we start and stop
newsql.execute("truncate attachment cascade")
newsql.execute("truncate actionlog cascade")
newsql.execute("truncate submission cascade")
newsql.execute("truncate custom_action_value cascade")
// Main driver query - for each submission in old vireo
int nameExceptions = 0;
int customActions = 0;
sql.eachRow("select * from vireosubmission where collection_id = "+config.old_collection_id +" order by submission_id asc"){
row ->
// This is a TAMU specific flag to route submisisons for either the graduate or undergraduate theses offices
if (config.tamu_graduate_office || config.tamu_undergraduate_office) {
boolean graduate = true;
if ("Undergraduate Honors Fellows".equals(row.college) || "Undergraduate Scholars".equals(row.college))
graduate = false;
if (config.tamu_undergraduate_office && graduate)
// Skip this submission
return;
if (config.tamu_graduate_office && !graduate)
// Skip this submission
return;
}
def name = getName(sql, row.item_id);
def lname = null;
def fname = null;
def mname = null;
if (name != null && !",".equals(name)) {
name = name.trim();
// This should be made more robust
Matcher m = PATTERN_FOR_AUTHORITATIVE_NAME.matcher(name);
if (m.matches()) {
// Yay, it's a well formed name.
lname = m.group(1);
fname = m.group(2);
mname = m.group(3);
} else {
// We have a badly formatted name so fallback to simple parsing
try {
def name_parts = name.tokenize(",");
fname = name_parts[1].trim();
lname = name_parts[0].trim();
if (fname.contains(" ")) {
mname = fname.substring(fname.indexOf(" "), fname.length()).trim();
fname = fname.substring(0,fname.indexOf(" ")).trim();
}
} catch (RuntimeException re) {
lname = name;
}
nameExceptions++;
println("["+row.submission_id+"] Unable to parse student '"+name+"', using alternative parsing: l='"+lname+"', f='"+fname+"', m='"+mname+"'");
}
} else {
println("["+row.submission_id+"] Has a blank student name.");
}
def committeeApprovalDate = null;
if (row.committee_approval != null)
committeeApprovalDate = new java.sql.Date(System.currentTimeMillis());
def params = [row.submission_id, getUmiRelease(row.umi), row.approval_date, row.college, committeeApprovalDate, row.committee_email_address,
row.email_hash, null, getDegree(sql, row.item_id),
getDegreeLevel(sql, row.item_id), getDepartment(sql, row.item_id), getDepositId(sql, row.item_id), getAbstract(sql, row.item_id),
getKeywords(sql, row.item_id), getDocumentTitle(sql, row.item_id), row.degree_type, getGraduationMonth(sql, row.item_id),
getGraduationYear(sql, row.item_id), new java.sql.Date(System.currentTimeMillis()), "Submission imported into Vireo 1.8.",
row.license_agreement_date, getMajor(sql, row.item_id), getSubStatus(row.status),
row.year_of_birth, fname, lname, mname, row.submission_date, (row.assigned_to == -1 ?null:row.assigned_to), getEmbargoType(newsql, row.embargo_name), row.applicant_id];
// Fix lastactionlog and lastactionlogentry
newsql.execute '''insert into submission (
id,
umirelease,
approvaldate,
college,
committeeapprovaldate,
committeecontactemail,
committeeemailhash,
committeeembargoapprovaldate,
degree,
degreelevel,
department,
depositid,
documentabstract,
documentkeywords,
documenttitle,
documenttype,
graduationmonth,
graduationyear,
lastactionlogdate,
lastactionlogentry,
licenseagreementdate,
major,
statename,
studentbirthyear,
studentfirstname,
studentlastname,
studentmiddlename,
submissiondate,
assignee_id,
embargotype_id,
submitter_id)
values (
?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?
)''', params;
// Migrate custom action definitions.
if (row.custom_actions_completed != null) {
def actions = row.custom_actions_completed.tokenize(",");
for (String action : actions) {
action = action.trim();
// check if the definition exists
def definition = newsql.firstRow "select id from custom_action_definition where id = ?", [Integer.valueOf(action)];
if (definition != null) {
def actionParams = [customActions++, true, Integer.valueOf(action),row.submission_id]
newsql.execute '''insert into custom_action_value (id, value, definition_id, submission_id) values (?,?,?,?)''', actionParams;
}
}
}
}
println("Student name exceptions: "+nameExceptions);
// Update sequence counter
def row1 = newsql.firstRow("select (max(id) + 1) max from submission");
newsql.execute("alter sequence seq_submission restart with " + row1.max );
def row2 = newsql.firstRow("select (max(id) + 1) max from custom_action_value");
newsql.execute("alter sequence seq_custom_action_value restart with " + row2.max );
}
// Set up value for UMI Release
static boolean getUmiRelease(Integer val) {
if (val == null || val == 0)
return false
else
return true
}
// Get metadata value for degree
static String getDegree(Sql sql, Integer id) {
return getMetadataValue(sql, id, getMetadataFieldId(sql,"thesis","degree","name"));
}
// Get and translate the metadata value for degree level
static Integer getDegreeLevel(Sql sql, Integer id) {
def degree = getMetadataValue(sql, id, getMetadataFieldId(sql,"thesis","degree","level"))
if (degree == null) return null
if (degree.equals("Doctoral")) return 3
if (degree.equals("Masters")) return 2
}
// Get value for department from metadata table
static String getDepartment(Sql sql, Integer id) {
return getMetadataValue(sql, id, getMetadataFieldId(sql,"thesis","degree","department"));
}
// Get value for submission abstract from metadata table
static String getAbstract(Sql sql, Integer id) {
return getMetadataValue(sql, id, getMetadataFieldId(sql,"dc","description","abstract"));
}
// Get submission title
static String getDocumentTitle(Sql sql, Integer id) {
return getMetadataValue(sql, id, getMetadataFieldId(sql,"dc","title",null));
}
// Get creator name
static String getName(Sql sql, Integer id) {
return getMetadataValue(sql, id, getMetadataFieldId(sql,"dc","creator",null));
}
// Get identifier URI
static String getDepositId(Sql sql, Integer id) {
String depositId = getMetadataValue(sql, id, getMetadataFieldId(sql,"dc","identifier","uri"));
// Correction for TAMU data, this shouldn't effect anyone else
if (depositId != null)
depositId = depositId.replaceAll("http://handle.tamu.edu/","http://hdl.handle.net/");
return depositId;
}
// Get major - degree/discipline
static String getMajor(Sql sql, Integer id) {
return getMetadataValue(sql, id, getMetadataFieldId(sql,"thesis","degree","discipline"));
}
// Get Keywords
static String getKeywords(Sql sql, Integer id) {
def ret = ""
def fieldId = getMetadataFieldId(sql, "dc","subject",null);
sql.eachRow("select text_value from metadatavalue where metadatavalue.item_id = " + id + " and metadata_field_id = "+fieldId){
row ->
ret = ret + row.text_value + "; "
}
if (ret.length() > 0)
return ret[0..-2] // remove trailing semicolon
}
static Integer getGraduationMonth(Sql sql, Integer id) {
def field_id = getMetadataFieldId(sql,"dc","date","submitted");
def row = sql.firstRow("select text_value from metadatavalue where metadatavalue.item_id = " + id + " and metadata_field_id = " + field_id);
if (row == null) return null;
def parts = row.text_value.split(" ");
if ("January".equals(parts[0])) return 0;
if ("Feburary".equals(parts[0])) return 1;
if ("March".equals(parts[0])) return 2;
if ("April".equals(parts[0])) return 3;
if ("May".equals(parts[0])) return 4;
if ("June".equals(parts[0])) return 5;
if ("July".equals(parts[0])) return 6;
if ("August".equals(parts[0])) return 7;
if ("September".equals(parts[0])) return 8;
if ("October".equals(parts[0])) return 9;
if ("November".equals(parts[0])) return 10;
if ("December".equals(parts[0])) return 11;
println("Error: Unknown month, "+parts[0]);
return null;
}
static Integer getGraduationYear(Sql sql, Integer id) {
def field_id = getMetadataFieldId(sql,"dc","date","submitted");
def row = sql.firstRow("select text_value from metadatavalue where metadatavalue.item_id = " + id + " and metadata_field_id = " + field_id);
if (row == null) return null;
def parts = row.text_value.split(" ");
return Integer.valueOf(parts[1]);
}
// Utility function to return a value from the metadata table
static String getMetadataValue(Sql sql, Integer id, String mv){
def rows = sql.rows("select text_value from metadatavalue where metadatavalue.item_id = " + id + " and metadata_field_id = " + mv)
if (rows[0] != null) {
return rows[0].text_value
} else
return null
}
static String getMetadataFieldId(Sql sql, String schema, String element, String qualifier) {
def params = [schema, element, qualifier];
if (qualifier == null) params = [schema,element];
def row = sql.firstRow("select f.metadata_field_id from metadatafieldregistry f, metadataschemaregistry s where f.metadata_schema_id = s.metadata_schema_id AND s.short_id = ? AND f.element = ? AND f.qualifier " + ( qualifier == null ? " IS NULL " : " = ? "), params);
return row.metadata_field_id;
}
static Integer getEmbargoType(Sql sql, String name) {
def row = sql.firstRow("select id from embargo_type where name = ?",[name]);
if (row == null)
return null;
else
return row.id;
}
// Translate submission status
static String getSubStatus(Integer stat) {
def state = [10:'InProgress', 20:'Submitted', 30:'InReview', 40:'NeedsCorrection', 50:'WaitingOnRequirements', 60:'Approved',
70:'PendingPublication', 80:'Published', 90:'OnHold', 100:'Withdrawn', 110:'Cancelled'];
return state[stat]
}
}