-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathget_web_content.pl
executable file
·134 lines (118 loc) · 4.4 KB
/
get_web_content.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
#! /usr/bin/env perl
use warnings;
use strict;
use Try::Tiny qw(try catch);
use IO::All;
use Config::General qw(ParseConfig);
use Net::Netrc;
use Data::Dumper;
use Ska::Convert qw(time2date date2time);
use Ska::Web;
use Clone qw(clone);
use Carp;
our $Task = 'arc3';
our $TaskData = "$ENV{SKA_DATA}/$Task";
our $TaskShare = "$ENV{SKA_SHARE}/$Task";
# Set global current time at beginning of execution
our $CurrentTime = @ARGV ? date2time(shift @ARGV, 'unix') : time;
our $Debug = 0;
our @warn; # Global set of processing warnings (warn but don't die)
our %opt = ParseConfig(-ConfigFile => "$TaskShare/$Task.cfg");
our %web_content_cfg = ParseConfig(-ConfigFile => "$TaskShare/$opt{file}{web_content_cfg}");
our %web_data = %{ clone(\%web_content_cfg) };
while (my ($web_name, $web) = each %web_data) {
my $url = $web->{url};
if (exists $web->{add_tstart_tstop}) { # Ugh, custom code for chandra image
my $tstart = time2date($CurrentTime, 'unix');
my $tstop = time2date($CurrentTime+228600, 'unix');
$url .= "&tstart=$tstart&tstop=$tstop";
}
my %web_opt = map { $_ => $web->{$_} } grep {not ref($web->{$_})} keys %{$web};
# Get username and password from netrc if required
if (defined $web_opt{netrc}) {
my $netrc = Net::Netrc->lookup($web_opt{netrc});
$web_opt{user} = $netrc->login;
$web_opt{passwd} = $netrc->password;
}
my ($html, $error, $header) = Ska::Web::get_url($url, %web_opt);
if ($error) {
warning($web, "$error for web data $web_name ($url)");
next;
}
# Parse each bit of 'content' (i.e. text)
while (my ($content_name, $content) = each %{$web->{content}}) {
my ($html_content, $error) = Ska::Web::get_html_content($html,
url => $url,
%{$content});
if ($error) {
warning($content, "$error for web content $content_name ($url)");
}
if ($content->{file}) {
$content->{outfile} = "$TaskData/".$content->{file};
$html_content > io($content->{outfile});
if (defined $header->last_modified){
utime($header->last_modified, $header->last_modified, $content->{outfile});
}
} else {
$content->{content} = $html_content;
}
}
# Grab each image
while (my ($image_name, $image) = each %{$web->{image}}) {
my $tries = $image->{tries} || 1;
my $img_file = $image->{file};
$image->{outfile} = "$TaskData/$img_file";
my $got_image = 0;
TRY: for my $try (1 .. $tries) {
try {
my ($html_content, $error, @image) = Ska::Web::get_html_content(
$html,
url => $url,
filter => $image->{filter});
if ($error) {
die $error;
}
if (@image != 1) {
die "Did not get exactly one $img_file image";
}
if ((length $image[0]->{data}) < 100) {
die "Retrieved malformed $img_file image after $tries try(s)";
}
$image->{content} = $html_content;
$image[0]->{data} > io($image->{outfile});
utime($image[0]->{header}->last_modified,
$image[0]->{header}->last_modified,
$image->{outfile});
$got_image = 1;
}
catch {
if ($try < $tries){
sleep($image->{'sleep'} || 10)
}
if (not defined $image->{warn_age_hours} and
$image->{warn_bad_image} and $try == $tries) {
warning($image, $_);
}
};
last TRY if $got_image == 1;
}
if (($got_image == 0) and (defined $image->{warn_age_hours})){
if (((-M $image->{outfile}) * 24) > $image->{warn_age_hours}){
warning(
$image,
"Did not get $img_file and more than $image->{warn_age_hours} hours old");
}
}
}
}
# Save the data.
Config::General->new(\%web_data)->save_file("$TaskData/$opt{file}{web_content}");
print STDERR join("\n", @warn), "\n" if @warn;
##***************************************************************************
sub warning {
##***************************************************************************
my $h = shift;
my $msg = shift;
push @{$h->{warn}}, $msg;
push @warn, "Warning: $msg";
}