-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathRakefile
59 lines (44 loc) · 1.2 KB
/
Rakefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
task :convert do
sh "dos2unix -f dataset/*"
Dir["dataset/*"].each do |path|
sh "cp #{path} #{path}.tmp"
sh "iconv -f iso-8859-1 -t utf-8 #{path}.tmp > #{path}"
sh "rm #{path}.tmp"
end
end
task :names do
require "redis"
require "json"
r = Redis.connect
File.open("names.json") do |io|
io.each_line do |line|
line = JSON.parse(line)
next if line["str"] =~ /^[[:alpha:]]$/
r.zadd("names", line["cnt_name"] || 0, line["str"])
r.zadd("last_names", line["cnt_last_name"] || 0, line["str"])
end
end
end
task :load_boletines do
require_relative 'lib/parser'
require_relative 'lib/model'
require "time"
bdb = BoletinDB.new(File.dirname(__FILE__) + "/db")
Dir["dataset/BO2011*-2.txt"].each do |fname|
STDERR.puts "Parsing #{fname}"
time = Time.strptime(File.basename(fname)[2, 8], "%Y%m%d")
File.open(fname) do |f|
cnt = 0
Parser.parse(f, fecha_aparicion: time.strftime("%Y-%m-%d")) do |sociedad|
bdb.store_sociedad(sociedad)
cnt += 1
end
STDERR.puts " parsed #{cnt} records"
end
bdb.flush
end
end
task :test do
Dir["test/**/*.rb"].each { |p| require_relative p }
end
task default: :test