Skip to content

Commit

Permalink
feat: add etag support to getContents
Browse files Browse the repository at this point in the history
  • Loading branch information
dvikan committed Jan 12, 2024
1 parent 080e293 commit ca13e8b
Show file tree
Hide file tree
Showing 8 changed files with 42 additions and 31 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ PHP ini config:
```ini
; /etc/php/8.2/fpm/conf.d/30-rss-bridge.ini

max_execution_time = 20
max_execution_time = 15
memory_limit = 64M
```

Expand Down
2 changes: 1 addition & 1 deletion config.default.ini.php
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@

[http]
; Operation timeout in seconds
timeout = 30
timeout = 15

; Operation retry count in case of curl error
retries = 2
Expand Down
5 changes: 2 additions & 3 deletions lib/BridgeCard.php
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ public static function displayBridgeCard($bridgeClassName, $formats, $isActive =

$bridge = $bridgeFactory->create($bridgeClassName);

$isHttps = strpos($bridge->getURI(), 'https') === 0;
$isHttps = str_starts_with($bridge->getURI(), 'https');

$uri = $bridge->getURI();
$name = $bridge->getName();
Expand Down Expand Up @@ -113,8 +113,7 @@ private static function getFormHeader($bridgeClassName, $isHttps = false, $param
}

if (!$isHttps) {
$form .= '<div class="secure-warning">Warning :
This bridge is not fetching its content through a secure connection</div>';
$form .= '<div class="secure-warning">Warning: This bridge is not fetching its content through a secure connection</div>';
}

return $form;
Expand Down
2 changes: 1 addition & 1 deletion lib/FeedExpander.php
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ public function collectExpandableDatas(string $url, $maxItems = -1)
}

/**
* This method is overidden by bridges
* This method is overridden by bridges
*
* @return array
*/
Expand Down
4 changes: 2 additions & 2 deletions lib/FeedParser.php
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@
*
* Scrapes out rss 0.91, 1.0, 2.0 and atom 1.0.
*
* Produce arrays meant to be used inside rss-bridge.
* Produces array meant to be used inside rss-bridge.
*
* The item structure is tweaked so that works with FeedItem
* The item structure is tweaked so that it works with FeedItem
*/
final class FeedParser
{
Expand Down
5 changes: 4 additions & 1 deletion lib/XPathAbstract.php
Original file line number Diff line number Diff line change
Expand Up @@ -518,7 +518,10 @@ protected function formatItemUri($value)
if (strlen($value) === 0) {
return '';
}
if (strpos($value, 'http://') === 0 || strpos($value, 'https://') === 0) {
if (
strpos($value, 'http://') === 0
|| strpos($value, 'https://') === 0
) {
return $value;
}

Expand Down
49 changes: 27 additions & 22 deletions lib/contents.php
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,32 @@ function getContents(
$headerValue = trim(implode(':', array_slice($parts, 1)));
$httpHeadersNormalized[$headerName] = $headerValue;
}

$requestBodyHash = null;
if (isset($curlOptions[CURLOPT_POSTFIELDS])) {
$requestBodyHash = md5(Json::encode($curlOptions[CURLOPT_POSTFIELDS], false));
}
$cacheKey = implode('_', ['server', $url, $requestBodyHash]);

/** @var Response $cachedResponse */
$cachedResponse = $cache->get($cacheKey);
if ($cachedResponse) {
$lastModified = $cachedResponse->getHeader('last-modified');
if ($lastModified) {
try {
// Some servers send Unix timestamp instead of RFC7231 date. Prepend it with @ to allow parsing as DateTime
$lastModified = new \DateTimeImmutable((is_numeric($lastModified) ? '@' : '') . $lastModified);
$config['if_not_modified_since'] = $lastModified->getTimestamp();
} catch (Exception $e) {
// Failed to parse last-modified
}
}
$etag = $cachedResponse->getHeader('etag');
if ($etag) {
$httpHeadersNormalized['if-none-match'] = $etag;
}
}

// Snagged from https://github.com/lwthiker/curl-impersonate/blob/main/firefox/curl_ff102
$defaultHttpHeaders = [
'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
Expand All @@ -35,6 +61,7 @@ function getContents(
'Sec-Fetch-User' => '?1',
'TE' => 'trailers',
];

$config = [
'useragent' => Configuration::getConfig('http', 'useragent'),
'timeout' => Configuration::getConfig('http', 'timeout'),
Expand All @@ -53,28 +80,6 @@ function getContents(
$config['proxy'] = Configuration::getConfig('proxy', 'url');
}

$requestBodyHash = null;
if (isset($curlOptions[CURLOPT_POSTFIELDS])) {
$requestBodyHash = md5(Json::encode($curlOptions[CURLOPT_POSTFIELDS], false));
}
$cacheKey = implode('_', ['server', $url, $requestBodyHash]);

/** @var Response $cachedResponse */
$cachedResponse = $cache->get($cacheKey);
if ($cachedResponse) {
$cachedLastModified = $cachedResponse->getHeader('last-modified');
if ($cachedLastModified) {
try {
// Some servers send Unix timestamp instead of RFC7231 date. Prepend it with @ to allow parsing as DateTime
$cachedLastModified = new \DateTimeImmutable((is_numeric($cachedLastModified) ? '@' : '') . $cachedLastModified);
$config['if_not_modified_since'] = $cachedLastModified->getTimestamp();
} catch (Exception $dateTimeParseFailue) {
// Ignore invalid 'Last-Modified' HTTP header value
}
}
// todo: We should also check for Etag
}

$response = $httpClient->request($url, $config);

switch ($response->getCode()) {
Expand Down
4 changes: 4 additions & 0 deletions lib/http.php
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,10 @@ public function getHeaders(): array
}

/**
* HTTP response may have multiple headers with the same name.
*
* This method by default, returns only the last header.
*
* @return string[]|string|null
*/
public function getHeader(string $name, bool $all = false)
Expand Down

0 comments on commit ca13e8b

Please sign in to comment.