Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SHB data import #1440

Draft
wants to merge 1 commit into
base: develop
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions whelktool/scripts/dataimports/shb/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
/build
/cache
/logs
33 changes: 33 additions & 0 deletions whelktool/scripts/dataimports/shb/ConvertSHB.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import java.io.*;
import java.util.*;
import static java.lang.System.out;

import se.kb.libris.util.marc.MarcRecord;
import se.kb.libris.util.marc.io.Iso2709MarcRecordReader;

import static whelk.util.Jackson.mapper;
import whelk.converter.MarcJSONConverter;
import whelk.converter.marc.MarcFrameConverter;
import whelk.converter.marc.MarcFrameCli;

// Example usage:
// $ java -cp importers/build/libs/xlimporter.jar -Dxl.secret.properties=DEV2-secret.properties librisxl-tools/scripts/ConvertSHB.java ~/Downloads/shb_alfanum_marc > /var/tmp/shb_alnum.jsonld.lines
public class ConvertSHB {
public static void main(String[] args) throws Exception {
var converter = new MarcFrameConverter();
new MarcFrameCli().addSystemComponents(converter);

var reader = new Iso2709MarcRecordReader(MarcJSONConverter.getNormalizedInputStreamFromFile(new File(args[0])));
MarcRecord record = null;
int i = 0;
while (true) {
record = reader.readRecord();
if (record == null)
break;

String id = "dataset/shb/" + (++i);
var result = (Map) converter.convert(MarcJSONConverter.toJSONMap(record), id);
out.println(mapper.writeValueAsString(result));
}
}
}
19 changes: 19 additions & 0 deletions whelktool/scripts/dataimports/shb/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
ROOT=../../../../
STORE=$(HOME)/Downloads

build/shb_alnum-reshaped.jsonld.lines: reshape_shb.py cache/shb_alnum-raw.jsonld.lines
mkdir -p build
python3 $^ cache/register_shb_240502-*.csv > $@

cache/shb_alnum-raw.jsonld.lines: ConvertSHB.java $(STORE)/shb_alfanum_marc
mkdir -p cache
java -cp $(ROOT)importers/build/libs/xlimporter.jar -Dxl.secret.properties=$(ROOT)/DEV2-secret.properties $^ > $@

build/shb-sample.trig: reshape_shb.py cache/shb_alnum-raw.jsonld.lines
mkdir -p build
python3 $^ cache/register_shb_240502-*.csv --sample-pretty-with $(ROOT)/../definitions/build/sys/context/kbv.jsonld | trld -ijsonld -o trig > $@

# See <https://wiki.documentfoundation.org/ReleaseNotes/7.2#Document_Conversion>
cache/register_shb_240502-*.csv: $(STORE)/register_shb_240502.xslx
mkdir -p cache
libreoffice --headless --convert-to csv:"Text - txt - csv (StarCalc)":"44,34,UTF8,1,,0,false,true,false,false,false,-1" $^ --outdir cache
Loading
Loading