From 6216074caa2043bf1bc6a09ab7c72609e8dec0b3 Mon Sep 17 00:00:00 2001 From: kwahlin Date: Tue, 25 Apr 2023 14:08:22 +0200 Subject: [PATCH] Always produce report with split clusters --- .../datatool/scripts/mergeworks/Html.groovy | 30 +++++++++++++------ .../scripts/mergeworks/WorkToolJob.groovy | 20 ++++++++++--- .../scripts/mergeworks/compare/Id.groovy | 2 +- 3 files changed, 38 insertions(+), 14 deletions(-) diff --git a/whelktool/src/main/groovy/datatool/scripts/mergeworks/Html.groovy b/whelktool/src/main/groovy/datatool/scripts/mergeworks/Html.groovy index 8e7d045b54..4b57e963bf 100644 --- a/whelktool/src/main/groovy/datatool/scripts/mergeworks/Html.groovy +++ b/whelktool/src/main/groovy/datatool/scripts/mergeworks/Html.groovy @@ -52,27 +52,39 @@ class Html { } static String hubTable(List> docs) { - def mergedWorks = docs*.first() - def ids = docs.collect { group -> - group.drop(1).collectEntries { doc -> + def mergedWorks = [] + def derivedFromIds = [] + + docs.each { + def work = it.head() + def derivedFrom = it.tail() + mergedWorks.add(work) + derivedFromIds.add(derivedFrom.collectEntries { doc -> [doc.doc.shortId, doc.view.link()] - } + }) } - def clusterId = clusterId(ids*.keySet().flatten()) + + def clusterId = clusterId(derivedFromIds*.keySet().flatten()) String header = """ ${clusterId} - ${mergedWorks.collect { "" }.join('\n')} + ${docs.collect { + def work = it.head() + def derivedFrom = it.tail() + derivedFrom.size() > 1 || it instanceof UpdatedWork + ? "${work.doc.shortId}" + : "" } + .join('\n')} """.stripIndent() String derivedFrom = """ - _derivedFrom - ${ids.collect { "${it.collect { id, link -> "$id" }.join('
')}" }.join('\n')} - + _instances + ${derivedFromIds.collect { "${it.collect { id, link -> "$id" }.join('
')}" }.join('\n')} + """.stripIndent() def statuses = WorkComparator.compare(mergedWorks) diff --git a/whelktool/src/main/groovy/datatool/scripts/mergeworks/WorkToolJob.groovy b/whelktool/src/main/groovy/datatool/scripts/mergeworks/WorkToolJob.groovy index b75346a92e..c81f578129 100644 --- a/whelktool/src/main/groovy/datatool/scripts/mergeworks/WorkToolJob.groovy +++ b/whelktool/src/main/groovy/datatool/scripts/mergeworks/WorkToolJob.groovy @@ -132,14 +132,19 @@ class WorkToolJob { void merge() { def s = statistics.printOnShutdown() + def multiWorkClusters = Collections.synchronizedList([]) run({ cluster -> return { def titles = titleClusters(cluster) def works = mergedWorks(titles) - def needsStore = { it instanceof UpdatedWork || it.derivedFrom.size() > 1 } - def storedWorks = works.findAll(needsStore).each { store(it) } + if (works.size() > 1) { + multiWorkClusters.add(works.collect { [new Doc(whelk, it.doc)] + it.derivedFrom }) + } + + def multiInstanceWorks = works.findAll { it.derivedFrom.size() > 1 || it instanceof UpdatedWork } + def storedWorks = multiInstanceWorks.each { store(it) } String report = htmlReport(titles, storedWorks) @@ -155,6 +160,14 @@ class WorkToolJob { } } }) + + new File(reportDir, "multi-work-clusters.html").with {f -> + f.append(Html.START) + multiWorkClusters.each { + f.append(Html.hubTable(it) + Html.HORIZONTAL_RULE) + } + f.append(Html.END) + } } void revert() { @@ -226,8 +239,7 @@ class WorkToolJob { LegacyIntegrationTools.determineLegacyCollection(work.doc, whelk.getJsonld()), false)) { throw new WhelkRuntimeException("Could not store new work: ${work.doc.shortId}") } - } - else if (work instanceof UpdatedWork) { + } else if (work instanceof UpdatedWork) { whelk.storeAtomicUpdate(work.doc, !loud, false, changedIn, generationProcess, work.checksum) } diff --git a/whelktool/src/main/groovy/datatool/scripts/mergeworks/compare/Id.groovy b/whelktool/src/main/groovy/datatool/scripts/mergeworks/compare/Id.groovy index 555e661ac4..fc3305148b 100644 --- a/whelktool/src/main/groovy/datatool/scripts/mergeworks/compare/Id.groovy +++ b/whelktool/src/main/groovy/datatool/scripts/mergeworks/compare/Id.groovy @@ -7,7 +7,7 @@ class Id implements ValuePicker { @Override boolean isCompatible(Object a, Object b) { - return !a || !b + return true } @Override