-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathscrape.php
114 lines (82 loc) · 4.23 KB
/
scrape.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
<?php
ini_set('display_errors', 0);
header("Access-Control-Allow-Origin: *");
// logging in with cURL based on http://thisinterestsme.com/php-login-to-website-with-curl/
date_default_timezone_set('America/New_York');
// if UN and PW are set, and not equal to "test" or empty strings.
if (isset($_POST['providedUsername']) && isset($_POST['providedPassword']) && $_POST['providedUsername'] !== '' && $_POST['providedPassword'] !== '' && strtolower($_POST['providedUsername']) !== 'test' && $_POST['providedPassword'] !== 'test') {
$html = martaLogin();
} elseif (strtolower($_POST['providedUsername']) === 'test' && $_POST['providedPassword'] === 'test') { // fetch dummy data for test user Joanna M Customer
$response = file_get_contents('example-trips.json');
exit(json_encode($response));
} else { // nothing posted
exit("username or password is missing!");
}
function martaLogin()
{
$month = date("n");
if (isset($_POST['month'])){
$month = $_POST['month'];
}
define('USERNAME', $_POST['providedUsername']);
define('PASSWORD', $_POST['providedPassword']);
//Set a user agent. This basically tells the server that we are using Chrome ;)
define('USER_AGENT', 'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.2309.372 Safari/537.36');
//Where our cookie information will be stored (needed for authentication).
define('COOKIE_FILE', 'cookie.txt');
//URL of the login form.
define('LOGIN_FORM_URL', 'https://martapp.mvtransit.com/Account/Login');
//Login action URL. Sometimes, this is the same URL as the login form.
define('LOGIN_ACTION_URL', 'https://martapp.mvtransit.com/Account/Login');
define('GET_TRIPS_URL', 'https://martapp.mvtransit.com/Trips/GetCalendarData?year=2018&month='.$month);
//An associative array that represents the required form fields.
//You will need to change the keys / index names to match the name of the form
//fields.
$postValues = array(
'Username' => USERNAME,
'Password' => PASSWORD
);
//Initiate cURL.
$curl = curl_init();
//Set the URL that we want to send our POST request to. In this
//case, it's the action URL of the login form.
curl_setopt($curl, CURLOPT_URL, LOGIN_ACTION_URL);
//Tell cURL that we want to carry out a POST request.
curl_setopt($curl, CURLOPT_POST, true);
//Set our post fields / date (from the array above).
curl_setopt($curl, CURLOPT_POSTFIELDS, http_build_query($postValues));
//We don't want any HTTPS errors.
curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, false);
curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, false);
//Where our cookie details are saved. This is typically required
//for authentication, as the session ID is usually saved in the cookie file.
curl_setopt($curl, CURLOPT_COOKIEJAR, COOKIE_FILE);
//Sets the user agent. Some websites will attempt to block bot user agents.
//Hence the reason I gave it a Chrome user agent.
curl_setopt($curl, CURLOPT_USERAGENT, USER_AGENT);
//Tells cURL to return the output once the request has been executed.
curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
//Allows us to set the referer header. In this particular case, we are
//fooling the server into thinking that we were referred by the login form.
curl_setopt($curl, CURLOPT_REFERER, LOGIN_FORM_URL);
//Do we want to follow any redirects?
curl_setopt($curl, CURLOPT_FOLLOWLOCATION, false);
//Execute the login request.
curl_exec($curl);
//Check for errors!
if(curl_errno($curl)){
throw new Exception(curl_error($curl));
}
//We should be logged in by now. Let's attempt to access a password protected page
curl_setopt($curl, CURLOPT_URL, GET_TRIPS_URL);
//Use the same cookie file.
curl_setopt($curl, CURLOPT_COOKIEJAR, COOKIE_FILE);
//Use the same user agent, just in case it is used by the server for session validation.
curl_setopt($curl, CURLOPT_USERAGENT, USER_AGENT);
//We don't want any HTTPS / SSL errors.
curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, false);
curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, false);
//Execute the GET request and print out the result.
$html = curl_exec($curl);
echo json_encode($html);
}