From 606e2462dec222a6624def489005948ce218fa69 Mon Sep 17 00:00:00 2001
From: Sebastiano Giacomini <92300303+Sebastiano-G@users.noreply.github.com>
Date: Wed, 28 Feb 2024 17:53:59 +0100
Subject: [PATCH] #10 Functions optimization, reuse of new entities

Mapping.py functions have been rewritten to ensure efficient handling of newly created subrecords. Recursion has been improved by a new way of expressing subrecord links in the front-end environment. Main.js was modified for this purpose and to correct the function dedicated to editing subrecords.
It is now possible to reuse a subrecord entity within the same record form.
---
 mapping.py        | 199 +++++++++++++++++++++++++++-------------------
 static/js/main.js | 110 +++++++++++++++++--------
 2 files changed, 193 insertions(+), 116 deletions(-)

diff --git a/mapping.py b/mapping.py
index 6be163b..76a6052 100644
--- a/mapping.py
+++ b/mapping.py
@@ -81,6 +81,7 @@ def inputToRDF(recordData, userID, stage, knowledge_extraction, graphToClear=Non
 	# CREATE/MODIFY A NAMED GRAPH for each new record
 
 	recordID = recordData.recordID
+	print(recordID)
 	graph_name = recordID
 	wd = rdflib.Graph(identifier=URIRef(base+graph_name+'/'))
 
@@ -199,31 +200,42 @@ def inputToRDF(recordData, userID, stage, knowledge_extraction, graphToClear=Non
 					server.update('load <file:///'+dir_path+'/records/'+recordID+"-extraction-"+str(graph['internalID'])+'.ttl> into graph <'+base+extraction_graph_name+'/>')
 		# SUBTEMPLATE
 		elif field['type']=="Subtemplate":
-			# check potential duplications:
-			#doubled_values = check_double_subrecords(recordData) if not doubled_values else doubled_values
-
-			# handle imported entities from catalogue (not newly created ones)
-			imported_entities = [field_id for field_id in recordData if field_id.startswith(field['id']+"-") and "," in recordData[field_id]]
-			for imported_entity in imported_entities:
-				imported_entity_id, imported_entity_label = recordData[imported_entity].split(',')
-				imported_entity_label = urllib.parse.unquote(imported_entity_label)
-				entityURI = getRightURIbase(imported_entity_id) 
-				print(entityURI)
-				wd.add(( URIRef(base+graph_name), URIRef(field['property']), URIRef(entityURI) ))
-				wd.add(( URIRef( entityURI ), RDFS.label, Literal(imported_entity_label.lstrip().rstrip(), datatype="http://www.w3.org/2001/XMLSchema#string") ))
-			subrecords = process_subrecords(recordData, field['id']) if not subrecords_dict else subrecords_dict
-			print("#### surbrecords:", subrecords)
-			if field['id'] in subrecords:
-				for subrecord_idx, subrecord in subrecords[field['id']].items():
+			print(field['id'], recordData[field['id']], type(recordData[field['id']]))
+			if type(recordData[field['id']]) != type([]):
+				new_subrecords, imported_subrecords, duplicate_subrecords = check_subrecords(recordData, field['id']) if field['id']+"-subrecords" in recordData else ([],[],[])
+
+				# newly created subrecords
+				for new_entity in new_subrecords:
 					ct = datetime.datetime.now()
 					ts = ct.timestamp()
 					ID = str(ts).replace('.', '-')
-					subrecord['recordID'] = ID
-					label = find_label(field['import_subtemplate'], subrecord, field['label'])
-					inputToRDF(storify(subrecord),userID,stage,knowledge_extraction,tpl_form=field['import_subtemplate'],subrecords_dict=subrecord)
+					label_id = find_label(field['import_subtemplate'])
+					retrieved_label = [recordData[label] for label in recordData[new_entity].split(",") if label.startswith(label_id+"__")][0] if label_id else recordData[new_entity]+"-"+ID 
+					process_new_subrecord(recordData, new_entity, userID, stage, knowledge_extraction, field['import_subtemplate'], ID)
+					recordData[new_entity] = ID+","+retrieved_label
 					wd.add(( URIRef(base+graph_name), URIRef(field['property']), URIRef(base+ID) ))
-					wd.add(( URIRef(base+ID), RDFS.label, Literal(label, datatype="http://www.w3.org/2001/XMLSchema#string")))
+					wd.add(( URIRef(base+ID), RDFS.label, Literal(retrieved_label, datatype="http://www.w3.org/2001/XMLSchema#string")))
+				
+				# imported subrecords
+				for imported_entity in imported_subrecords:
+					imported_entity_id, imported_entity_label = recordData[imported_entity].split(',')
+					imported_entity_label = urllib.parse.unquote(imported_entity_label)
+					entityURI = getRightURIbase(imported_entity_id) 
+					wd.add(( URIRef(base+graph_name), URIRef(field['property']), URIRef(entityURI) ))
+					wd.add(( URIRef( entityURI ), RDFS.label, Literal(imported_entity_label.lstrip().rstrip(), datatype="http://www.w3.org/2001/XMLSchema#string") ))
 
+				# duplicate subrecords
+				for duplicate_entity in duplicate_subrecords:
+					duplicate_entity_id, duplicate_entity_label = process_duplicate_subrecord(recordData, duplicate_entity, userID, stage, knowledge_extraction, field['import_subtemplate'], return_entity=True)
+					entityURI = getRightURIbase(duplicate_entity_id) 
+					wd.add(( URIRef(base+graph_name), URIRef(field['property']), URIRef(entityURI) ))
+					wd.add(( URIRef( entityURI ), RDFS.label, Literal(duplicate_entity_label.lstrip().rstrip(), datatype="http://www.w3.org/2001/XMLSchema#string") ))
+
+			else:
+				for entity in recordData[field['id']]:
+					entity_URI, entity_label = entity.split(",",1)
+					wd.add(( URIRef(base+graph_name), URIRef(field['property']), URIRef(base+entity_URI) ))
+					wd.add(( URIRef(base+entity_URI), RDFS.label, Literal(entity_label, datatype="http://www.w3.org/2001/XMLSchema#string")))
 
 	# get keywords (record modify)
 	if stage == 'modified' and any([k for k,v in recordData.items() if k.startswith('keywords')]):
@@ -243,71 +255,90 @@ def inputToRDF(recordData, userID, stage, knowledge_extraction, graphToClear=Non
 
 	return 'records/'+recordID+'.ttl'
 
-def check_double_subrecords(data):
-	results_dict = {
-		'targets': {},
-		'pointers' : {},
-	}
-	for key, value in data.items():
-		if value.startswith("target-"):
-			split_key = key.split("__")
-			new_key = split_key[0] + "__" + split_key[-1]
-			split_value = value.replace("target-", "").split("__")
-			new_value = split_value[0] + "__" + split_value[-1]
-			results_dict['targets'][new_value] = new_key
-			results_dict['pointers'][new_key] = new_value
-	return results_dict
-
-
-
-
-# convert the dict of inputs into a series of nested dictionaries to be parsed as single records
-def process_subrecords(data, id, created_subrecords=None):
-    results = {}
-    subrecords = [key for key in data if key.startswith(id+"__") and not data[key].startswith("target-")] if created_subrecords == None else created_subrecords
-
-    for subrecord in subrecords:
-        subrecord_split = subrecord.split('__')
-        prefix, num = subrecord_split[0], subrecord_split[-1]
-        if prefix not in results:
-            results[prefix] = { num: {} }  
-        else:
-            results[prefix][num] = {}
-        add_results = {}
-        subrecord_fields = data[subrecord].split(',')
-        for key in subrecord_fields:
-            if data[key].startswith("target-"):
-                add_results[key.replace("target-", "").split('__')[0]] = {key.split('__')[-1] : process_subrecords(data, data[key].replace("target-", "")) }
-            elif data[key] != "":
-                add_results[key.split('__')[0]] = data[key]
-            else:
-                multiple_values_fields = [import_key for import_key in data.keys() if import_key.startswith(key + "-")]
-                for imported_value in multiple_values_fields:
-                    new_key = imported_value.split('__')[0] + "-" + imported_value.split('-')[-1]
-                    add_results[new_key] = data[imported_value]
-                inner_subrecords = [item for item in data.keys() if item.startswith(key + "__") and not data[item].startswith("target-") ]
-                if inner_subrecords:
-                    add_results[key.split('__')[0]] = process_subrecords(data, key, inner_subrecords)[key.split('__')[0]]
-                
-            results[prefix][num] = add_results
-
-    if not subrecords and data[id] != "":
-        for el in data[id].split(','):
-            imported_resources = [field_id for field_id in data if field_id.startswith(el+"-")]
-            for imported_res in imported_resources:
-                results[imported_res.split('__')[0]+"-"+imported_res.split("-")[-1]] = data[imported_res]
-            results[el.split('__')[0]] = data[el]
-
-    return results
-
-
-def find_label(tpl, subrecord, alternative_label):
-	print(tpl)
+def check_subrecords(data, identifier):
+	subrecords = data[identifier+"-subrecords"].split(",")
+	new_subrecords, imported_subrecords, double_subrecords = [], [], []
+	for subrecord in subrecords:
+		if subrecord.startswith(identifier+"__"):
+			new_subrecords.append(subrecord)
+		elif subrecord.startswith(identifier+"-") and "target-" not in data[subrecord]:
+			imported_subrecords.append(subrecord)
+		else:
+			double_subrecords.append(subrecord)
+	return new_subrecords, imported_subrecords, double_subrecords
+	
+def process_new_subrecord(data, subrecord_id, userID, stage, knowledge_extraction, sub_tpl, ID):
+	subrecord_fields = data[subrecord_id].split(",")
+	new_record_data = {}
+	with open(sub_tpl) as fields:
+		subtemplate = json.load(fields)
+	for subrecord_field in subrecord_fields:
+		# check inner subrecords
+		if subrecord_field+"-subrecords" in data:
+			new_record_data[subrecord_field.split("__")[0]] = [[]]
+			inner_subtemplate = [key['import_subtemplate'] for key in subtemplate if key['id'] == subrecord_field.split("__")[0]][0]
+			for inner_subrecord in data[subrecord_field + "-subrecords"].split(","):
+				if "-" in inner_subrecord:
+					inner_subrecord = subrecord_field + "-" + inner_subrecord
+					data = process_imported_subrecord(data, inner_subrecord)
+				elif "target-" in data[inner_subrecord]:
+					data = process_duplicate_subrecord(data, inner_subrecord, userID, stage, knowledge_extraction, sub_tpl)
+				else:
+					ct = datetime.datetime.now()
+					ts = ct.timestamp()
+					new_ID = str(ts).replace('.', '-')
+					data = process_new_subrecord(data, inner_subrecord, userID, stage, knowledge_extraction, inner_subtemplate, new_ID)
+				new_record_data[subrecord_field.split("__")[0]][0].append(data[inner_subrecord])
+		# check single value fields (e.g. Date/Literal)
+		elif data[subrecord_field] != "":
+			key = subrecord_field.split("__")[0]
+			new_record_data[key] = data[subrecord_field]
+		# check multiple values fields (e.g. Entities, SKOS Vocabs)
+		else:
+			multiple_values = [key for key in data if key.startswith(subrecord_field+"-")]
+			if multiple_values != []:
+				for value in multiple_values:
+					new_key = value.split("__")[0] + "-" + value.split("-")[-1]
+					new_record_data[new_key] = data[value]
+			else:
+				new_record_data[subrecord_field.split("__")[0]] = ""
+	new_record_data['recordID'] = ID
+	label = new_record_data[find_label(sub_tpl)]
+	store_data = storify(new_record_data)
+	inputToRDF(store_data,userID,stage,knowledge_extraction,tpl_form=sub_tpl)
+	result = ID+","+label
+	data[subrecord_id] = result.strip()
+	return data
+
+def process_imported_subrecord(data, subrecord_id):
+	ID, label = data[subrecord_id].split(",",1)
+	label = urllib.parse.unquote(label)
+	result = ID+","+label
+	data[subrecord_id] = result.strip()
+	return data
+
+def process_duplicate_subrecord(data, subrecord_id, userID, stage, knowledge_extraction, sub_tpl, return_entity=False):
+	inner_key = data[subrecord_id].replace("target-","")
+	if "," not in data[inner_key]:
+		ct = datetime.datetime.now()
+		ts = ct.timestamp()
+		new_ID = str(ts).replace('.', '-')
+		data = process_new_subrecord(data, inner_key, userID, stage, knowledge_extraction, sub_tpl, new_ID)
+	ID, label = data[inner_key].split(",",1)
+	label = urllib.parse.unquote(label)
+	if return_entity:
+		return ID, label
+	else:
+		result = ID+","+label
+		data[subrecord_id] = result.strip()
+		return data
+
+def find_label(tpl):
 	# Retrieve the field associated with the Primary Key (i.e., the label) of the Record
 	with open(tpl) as tpl_file:
 		tpl_fields = json.load(tpl_file)
-	label_field_id = [field['id'] for field in tpl_fields if field['disambiguate'] == "True"][0]
-
+	fields_id = [field['id'] for field in tpl_fields if field['disambiguate'] == "True"]
+	label_field_id = fields_id[0] if fields_id != [] else False
+	
 	# Add a mechanism to handle potential Templates without a Primary Key (e.g. the primary key has been set to "hidden")
-	label = subrecord[label_field_id] if label_field_id in subrecord else alternative_label+"-"+subrecord['recordID']
-	return label
\ No newline at end of file
+	return label_field_id
\ No newline at end of file
diff --git a/static/js/main.js b/static/js/main.js
index 13d16fe..7e9ae5b 100644
--- a/static/js/main.js
+++ b/static/js/main.js
@@ -662,6 +662,12 @@ function searchCatalogueByClass(searchterm) {
             $('#' + searchterm).next('i').after("<span class='tag " + oldID + "' data-input='" + searchterm + "' data-id='" + oldID + "'>" + oldLabel + "</span><input type='hidden' class='hiddenInput " + oldID + "' name='" + searchterm + "-" + oldID + "' value=\" " + oldID + "," + encodeURIComponent(oldLabel) + "\"/>");
             $("#searchresult").hide();
             $('#' + searchterm).val('');
+            if ($('[name="'+searchterm+'-subrecords"]').length) {
+              $('[name="'+searchterm+'-subrecords"]').val($('[name="'+searchterm+'-subrecords"]').val() + "," + oldID);
+            } else {
+              const new_sub = $("<input type='hidden' name='"+searchterm+"-subrecords' value='"+oldID+"'>")
+              $('#recordForm').append(new_sub)
+            }
           });
 
         });;
@@ -685,6 +691,12 @@ function searchCatalogueByClass(searchterm) {
             $('#' + searchterm).next('i').after("<span class='tag-subrecord "+resource_class+"' id='"+target+"-tag'>" + label + "</span><i class='far fa-edit' onclick='modify_subrecord("+target+", keep=true)'></i><i class='far fa-trash-alt' onclick='modify_subrecord("+target+", keep=false)'></i><input type='hidden' class='hiddenInput' id='"+id_root+subrecord_idx+"' name='"+id_root+subrecord_idx+"' value='target-"+target+"'>");
             $("#searchresult").hide();
             $('#' + searchterm).val('');
+            if ($('[name="'+searchterm+'-subrecords"]').length) {
+              $('[name="'+searchterm+'-subrecords"]').val($('[name="'+searchterm+'-subrecords"]').val()+","+id_root+subrecord_idx);
+            } else {
+              const new_sub = $("<input type='hidden' name='"+searchterm+"-subrecords' value='"+id_root+subrecord_idx+"'>")
+              $('#recordForm').append(new_sub)
+            }
           });
 
         });
@@ -1439,13 +1451,25 @@ function create_subrecord(resource_class, field_name, el) {
       subrecord_form.find('input:not(.btn)').each(function() {
         $("#recordForm").append($(this));
         $(this).hide();
-        if ($(this).attr('id') !== undefined) {subinputs.push($(this).attr('id'))};
+        if ($(this).attr('id') !== undefined && !$(this).val().startsWith("target-") ) {subinputs.push($(this).attr('id'))};
       });
       var subrecord_index = $("[subtemplate='"+resource_class+"']").parent().parent().find('.tag-subrecord').length + 1;
-      var subrecord_id = $("[subtemplate='"+resource_class+"']").attr('id') + "__" + subrecord_index;
+      var subrecord_base = $("[subtemplate='"+resource_class+"']").attr('id')
+      var subrecord_id = subrecord_base + "__" + subrecord_index;
       el.after("<br/><span id='"+subrecord_id+"-tag' class='tag-subrecord "+resource_class+"'>" + tag_label + "</span><i class='far fa-edit' onclick='modify_subrecord(\""+subrecord_id+"\", keep=true)'></i><i class='far fa-trash-alt' onclick='modify_subrecord(\""+subrecord_id+"\", keep=false)'></i>");
       $('#recordForm').append("<input type='hidden' name='"+subrecord_id+"' id='"+subrecord_id+"' value='"+subinputs.toString()+"'></input>");
 
+      var $subrecords = $('[name="'+subrecord_base+'-subrecords"]');
+      if ($subrecords.length) {
+          var to_add_value = $subrecords.val();
+          if (!$subrecords.val().split(',').includes(to_add_value)) {
+              $subrecords.val(to_add_value + "," + subrecord_id);
+          }
+      } else {
+          const new_sub = $("<input type='hidden' name='"+$("[subtemplate='"+resource_class+"']").attr('id')+"-subrecords' value='"+subrecord_id+"'>");
+          $('#recordForm').append(new_sub);
+      }
+
       // hide_subform
       cancel_subrecord(this);
     }
@@ -1498,42 +1522,64 @@ function modify_subrecord(sub_id, keep) {
     create_subrecord(original_subtemplate_class, field_name, el);
 
     for (let i=0; i<inner_inputs.length; i++) {
-      console.log(inner_inputs[i])
       var input = $('#'+inner_inputs[i]);
       var shortened_id = inner_inputs[i].split("__").slice(0, -1).join("__");
       var new_input = $('.subform_section [id*="'+shortened_id+'__"]');
       new_input.replaceWith(input.show());
-      if ($('input[type="hidden"][name*="'+inner_inputs[i]+'-"]').length) {
-        var imported_values = $('input[type="hidden"][name*="'+inner_inputs[i]+'-"]');
-        imported_values.each(function() {
-          var id = $(this).attr('name').split("-")[0];
-          var values = $(this).attr('value').split(",");
-          var value_code = values[0];
-          var value_string = decodeURIComponent(values[1]);
-          var tag = "<span class='tag "+value_code+"' data-input='"+id+"' data-id='"+value_code+"'>"+value_string+"</span>";
-          var hidden_input = $(this).detach();
-          input.after(tag, hidden_input);
-        })
-      } else if ($('input[id*="'+inner_inputs[i]+'__"]')) {
-        var inner_subrecords = $('input[id*="'+inner_inputs[i]+'__"]');
-        inner_subrecords.each(function() {
-          var inner_subrecord_fields = $(this).val().split(',');
-          var primary_key = "";
-          for (let i=0; i<inner_subrecord_fields.length; i++){
-            if ($('#'+inner_subrecord_fields[i]).hasClass('disambiguate')) {
-              primary_key = $('#'+inner_subrecord_fields[i]).val();
+      if ($('input[name*="'+inner_inputs[i]+'-subrecords"]').length) {
+        console.log(inner_inputs[i])
+        var inner_subrecords = $('input[name*="'+inner_inputs[i]+'-subrecords"]').val().split(",");
+        console.log(inner_subrecords);
+        for (let j=0;j<inner_subrecords.length;j++) {
+          if (inner_subrecords[j].includes("-")) {
+            console.log(inner_subrecords[j]);
+            var id = inner_subrecords[j].split("-")[0];
+            console.log($('[name="'+inner_inputs[i]+"-"+inner_subrecords[j]+'"]'))
+            var values = $('[name="'+inner_inputs[i]+"-"+inner_subrecords[j]+'"]').attr('value').split(",");
+            var value_code = values[0];
+            var value_string = decodeURIComponent(values[1]);
+            var tag = "<span class='tag "+value_code+"' data-input='"+id+"' data-id='"+value_code+"'>"+value_string+"</span>";
+            var hidden_input = $('[name="'+inner_subrecords[j]+'"').detach();
+            input.after(tag, hidden_input);
+          } else if ($('[name="'+inner_subrecords[j]+'"]').val().startsWith("target-")) {
+            console.log(inner_subrecords[j]);
+            var target_id = $('input[name="'+inner_subrecords[j]+'"]').val().split("target-")[1];
+            var inner_subrecord_fields = $('input[name="'+target_id+'"]').val().split(',');
+            var primary_key = "";
+            for (let y=0; y<inner_subrecord_fields.length; y++){
+              if ($('#'+inner_subrecord_fields[y]).hasClass('disambiguate')) {
+                primary_key = $('#'+inner_subrecord_fields[iy]).val();
+              }
+            }
+            if (primary_key === "") {
+              var inner_field_name = $('#'+inner_inputs[i]).prev('span').attr('data-original-title');
+              var num = $(this).attr('id').split('__')[-1];
+              var primary_key = inner_field_name+ "-" + num;
+            }
+            var resource_class = $('#'+inner_inputs[i]).attr('subtemplate');
+            var tag = "<span id='"+$(this).attr('id')+"-tag' class='tag-subrecord "+resource_class+"'>" + primary_key + "</span><i class='far fa-edit' onclick='modify_subrecord(\""+$(this).attr('id')+"\", keep=true)'></i><i class='far fa-trash-alt' onclick='modify_subrecord(\""+$(this).attr('id')+"\", keep=false)'></i>"
+            $('#'+inner_inputs[i]).after(tag);
+          } else {
+            console.log(inner_subrecords[j]);
+            var inner_subrecord_fields = $('[name="'+inner_subrecords[j]+'"]').val().split(',');
+            var primary_key = "";
+            for (let y=0; y<inner_subrecord_fields.length; y++){
+              if ($('#'+inner_subrecord_fields[y]).hasClass('disambiguate')) {
+                primary_key = $('#'+inner_subrecord_fields[y]).val();
+              }
             }
+            if (primary_key === "") {
+              var inner_field_name = $('#'+inner_inputs[i]).prev('span').attr('data-original-title');
+              var num = $(this).attr('id').split('__')[-1];
+              var primary_key = inner_field_name+ "-" + num;
+            }
+            var resource_class = $('#'+inner_inputs[i]).attr('subtemplate');
+            var tag = "<span id='"+$(this).attr('id')+"-tag' class='tag-subrecord "+resource_class+"'>" + primary_key + "</span><i class='far fa-edit' onclick='modify_subrecord(\""+$(this).attr('id')+"\", keep=true)'></i><i class='far fa-trash-alt' onclick='modify_subrecord(\""+$(this).attr('id')+"\", keep=false)'></i>"
+            console.log(inner_inputs[i]);
+            $('#'+inner_inputs[i]).after(tag);
           }
-          if (primary_key === "") {
-            var inner_field_name = $('#'+inner_inputs[i]).prev('span').attr('data-original-title') 
-            var num = $(this).attr('id').split('__')[-1];
-            var primary_key = inner_field_name+ "-" + num;
-          } 
-          var resource_class = $('#'+inner_inputs[i]).attr('subtemplate');
-          var tag = "<span id='"+$(this).attr('id')+"-tag' class='tag-subrecord "+resource_class+"'>" + primary_key + "</span><i class='far fa-edit' onclick='modify_subrecord(\""+$(this).attr('id')+"\", keep=true)'></i><i class='far fa-trash-alt' onclick='modify_subrecord(\""+$(this).attr('id')+"\", keep=false)'></i>"
-          $('#'+inner_inputs[i]).after(tag);
-        })
-      }
+        }
+      }  
     }
   }
   $('#'+sub_id+'-tag').next('i').remove();