-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathARKI.pl
75 lines (75 loc) · 1.98 KB
/
ARKI.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
#!/usr/local/bin/perl
use strict; use warnings; $|=1; $0='ARK';
use Proc::Daemon;
use LWP::UserAgent;
###################### SUMMONS #
# ARKI - scrape archive.org pdfs
# <:3 )~ ---skrp of MKRX
# SETUP ###############################
my $work = 'MINION/';
my $limbo = 'limbo'; my $dump = 'dump';
my $state = 'STATE'; my $debug = 'DEBUG';
my $log = 'LOG'; my $pid = 'PID';
my $que = 'QUE'; my $clean = 'CLEAN'
my $pause = 'PAUSE'; my $shutdown = 'SHUT';
# DAEMONIZE ##########################
my $daemon = Proc::Daemon->new(
work_dir => $work,
child_STDOUT => $log,
child_STDERR => +>>$debug,
pid_file => $pid,
);
$daemon->Init();
my $base = "http://archive.org/download";
# USER AGENT ####################
my $ua = uagent();
# PROC ###################
unless (-e $target) { sleep 3600; }
open(my $tfh, '<', $target) or die "Couldn't read $target\n";
my @list = readline $tfh; chomp @list;
close $tfh; unlink $target;
my $count = 0;
foreach my $i (@list) {
sleep 1;
if (-e "ARKI_SHUTDOWN")
{ shut(); }
if (-e "ARKI_PAUSE")
{ pause(); }
print "$i started\n";
my $url = "$base/$i/$i.pdf";
my $response = $ua->get($url, ':content_file'=>"$dump/$i");
my $murl = "$base/$i".'_meta.xml';
my $mresponse = $ua->get($url, ':content_file'=>"$dump/$i".'_meta.xml');
print "$i ended\n";
shift @list; $count++;
if ($count % 20 == 0) {
open(my $finitfh, '>', $init);
foreach (@list)
{ print $finitfh "$_\n"; }
close $finitfh;
}
}
unlink $init;
# SUB ########################
sub pause {
my $pausefile = "ARKI_PAUSE";
open(my $pfh, '<', $pausefile) or die "no $pausefile";
my $timeout = readline $pfh; chomp $timeout;
print "sleeping for $timeout\n"; sleep $timeout;
}
sub shut {
my $shut = "ARKI_SHUTDOWN";
unlink $shut;
open(my $sinitfh, '>', $init);
foreach (@list)
{ print $sinitfh "$_\n"; }
die "Shutdown CLEAN";
}
sub uagent {
my $s_ua = LWP::UserAgent->new(
agent => "Mozilla/50.0.2",
from => '[email protected]',
timeout => 45,
);
return $s_ua;
}