forked from kba/infolis-dbminer
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathMakefile
57 lines (43 loc) · 1.47 KB
/
Makefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
MINER = python dbminer.py
IMAGE = infolis/infolis-dbminer
JSON_TARGETS = import/dara-solr.json import/icpsr-studies.json import/databases.json
RM = rm -f
WGET = wget
CURL = curl -s
MKDIR = mkdir -p
pdfbox.jar:
wget -O$@ "http://mirror.synyx.de/apache/pdfbox/2.0.2/pdfbox-app-2.0.2.jar"
clean:
$(RM) $(JSON_TARGETS)
#
# Imports
#
import: $(JSON_TARGETS)
import/dara-solr.xml:
$(MKDIR) $(dir $@)
$(WGET) -O$@ "http://www.da-ra.de/solr/dara/select?rows=100000&q=resourceType:2"
import/dara-solr.json: import/dara-solr.xml
$(MKDIR) $(dir $@)
$(MINER) jsonify-dara "$<" "$@"
import/databases.csv:
$(MKDIR) $(dir $@)
echo "Please download as 'CSV (comma-separated)' from https://docs.google.com/spreadsheets/d/1UEp9BsnR5QrHcaBAcJ2znKmWqigWfq_4NFoU9WBtH_0/edit#gid=0"
exit 1
import/databases.json: import/databases.csv
$(MINER) jsonify-databases "$<" "$@"
import/icpsr-studies.csv:
$(CURL) "http://www.icpsr.umich.edu/icpsrweb/ICPSR/csv/studies?collection=DATA&paging.startRow=0&paging.rows=1000&archive=ICPSR" > "$@"
for i in $$(seq 1000 1000 10000);do \
$(CURL) "http://www.icpsr.umich.edu/icpsrweb/ICPSR/csv/studies?collection=DATA&paging.startRow=$${i}&paging.rows=1000&archive=ICPSR" |sed -n '2,$$p' >> "$@" ; \
sleep 1; \
done
sed -i -n '/^./p' "$@"
import/icpsr-studies.json: import/icpsr-studies.csv
$(MINER) jsonify-icpsr-studies "$<" "$@"
#
# Docker
#
docker-build:
docker build -t $(IMAGE) .
docker-run: build
docker run --rm -it -v $(PWD)/data:/app/data $(IMAGE)