diff --git a/phpstan.dist.neon b/phpstan.dist.neon index 848a033a1..6562144b2 100644 --- a/phpstan.dist.neon +++ b/phpstan.dist.neon @@ -14,12 +14,6 @@ parameters: count: 1 path: src/HTTP/Psr18Client.php - # SimplePie\Content\Type\Sniffer::__construct(): Parameter $file could be mixed due to BC. - - - message: '(Result of \|\| is always false\.)' - count: 1 - path: src/Content/Type/Sniffer.php - # Not used since https://github.com/simplepie/simplepie/commit/b2eb0134d53921e75f0fa70b1cf901ed82b988b1 but cannot be removed due to BC. - '(Constructor of class SimplePie\\Enclosure has an unused parameter \$javascript\.)' diff --git a/src/Content/Type/Sniffer.php b/src/Content/Type/Sniffer.php index 0538b9b5f..16d82c2b5 100644 --- a/src/Content/Type/Sniffer.php +++ b/src/Content/Type/Sniffer.php @@ -27,26 +27,17 @@ class Sniffer /** * File object * - * @var File|Response + * @var File */ public $file; /** * Create an instance of the class with the input file * - * @param File|Response $file Input file + * @param File $file Input file */ - public function __construct(/* File */ $file) + public function __construct(File $file) { - if (!is_object($file) || !$file instanceof Response) { - // For BC we're asking for `File`, but internally we accept every `Response` implementation - throw new InvalidArgumentException(sprintf( - '%s(): Argument #1 ($file) must be of type %s', - __METHOD__, - File::class - ), 1); - } - $this->file = $file; } diff --git a/src/File.php b/src/File.php index 6d03d4827..8abbfcf59 100644 --- a/src/File.php +++ b/src/File.php @@ -110,52 +110,27 @@ public function __construct(string $url, int $timeout = 10, int $redirects = 5, } if (!$force_fsockopen && function_exists('curl_exec')) { $this->method = \SimplePie\SimplePie::FILE_SOURCE_REMOTE | \SimplePie\SimplePie::FILE_SOURCE_CURL; - $fp = curl_init(); - $headers2 = []; - foreach ($headers as $key => $value) { - $headers2[] = "$key: $value"; - } - if (isset($curl_options[CURLOPT_HTTPHEADER])) { - if (is_array($curl_options[CURLOPT_HTTPHEADER])) { - $headers2 = array_merge($headers2, $curl_options[CURLOPT_HTTPHEADER]); - } - unset($curl_options[CURLOPT_HTTPHEADER]); - } - if (version_compare(\SimplePie\Misc::get_curl_version(), '7.21.6', '>=')) { - curl_setopt($fp, CURLOPT_ACCEPT_ENCODING, ''); - } else { - curl_setopt($fp, CURLOPT_ENCODING, ''); - } - /** @var non-empty-string $url */ - curl_setopt($fp, CURLOPT_URL, $url); - curl_setopt($fp, CURLOPT_RETURNTRANSFER, true); - curl_setopt($fp, CURLOPT_FAILONERROR, true); - curl_setopt($fp, CURLOPT_TIMEOUT, $timeout); - curl_setopt($fp, CURLOPT_CONNECTTIMEOUT, $timeout); - // curl_setopt($fp, CURLOPT_REFERER, \SimplePie\Misc::url_remove_credentials($url)); // FreshRSS removed - curl_setopt($fp, CURLOPT_USERAGENT, $useragent); - curl_setopt($fp, CURLOPT_HTTPHEADER, $headers2); + $fp = self::curlInit($url, $timeout, $headers, $useragent, $curl_options); $responseHeaders = ''; curl_setopt($fp, CURLOPT_HEADERFUNCTION, function ($ch, string $header) use (&$responseHeaders) { $responseHeaders .= $header; return strlen($header); }); - foreach ($curl_options as $curl_param => $curl_value) { - curl_setopt($fp, $curl_param, $curl_value); - } - $responseBody = curl_exec($fp); $responseHeaders .= "\r\n"; if (curl_errno($fp) === CURLE_WRITE_ERROR || curl_errno($fp) === CURLE_BAD_CONTENT_ENCODING) { $this->error = 'cURL error ' . curl_errno($fp) . ': ' . curl_error($fp); // FreshRSS $this->on_http_response($responseBody === false ? false : $responseHeaders . $responseBody, $curl_options); $this->error = null; // FreshRSS - if (version_compare(\SimplePie\Misc::get_curl_version(), '7.21.6', '>=')) { - curl_setopt($fp, CURLOPT_ACCEPT_ENCODING, null); - } else { - curl_setopt($fp, CURLOPT_ENCODING, null); + if (\PHP_VERSION_ID < 80000) { + curl_close($fp); } + $fp = self::curlInit($url, $timeout, $headers, $useragent, $curl_options, false); $responseHeaders = ''; + curl_setopt($fp, CURLOPT_HEADERFUNCTION, function ($ch, string $header) use (&$responseHeaders) { + $responseHeaders .= $header; + return strlen($header); + }); $responseBody = curl_exec($fp); $responseHeaders .= "\r\n"; } @@ -315,7 +290,7 @@ public function __construct(string $url, int $timeout = 10, int $redirects = 5, } else { $this->method = \SimplePie\SimplePie::FILE_SOURCE_LOCAL | \SimplePie\SimplePie::FILE_SOURCE_FILE_GET_CONTENTS; $filebody = false; - if (empty($url) || !is_readable($url) || false === $filebody = file_get_contents($url)) { + if (empty($url) || !is_readable($url) || false === ($filebody = file_get_contents($url))) { $this->body = ''; $this->error = sprintf('file "%s" is not readable', $url); $this->success = false; @@ -459,10 +434,8 @@ private function flatten_headers(array $headers): array */ final public static function fromResponse(Response $response): self { - $headers = []; - - foreach ($response->get_headers() as $name => $header) { - $headers[$name] = implode(', ', $header); + if ($response instanceof self) { + return $response; } /** @var File */ @@ -470,13 +443,61 @@ final public static function fromResponse(Response $response): self $file->url = $response->get_final_requested_uri(); $file->useragent = null; - $file->headers = $headers; + $file->set_headers($response->get_headers()); $file->body = $response->get_body_content(); $file->status_code = $response->get_status_code(); $file->permanent_url = $response->get_permanent_uri(); return $file; } + + /** + * @param array $headers + * @param array $curl_options + * @return \CurlHandle + */ + private static function curlInit( + string $url, + int $timeout, + array $headers, + string $useragent, + array $curl_options, + bool $setAcceptEncoding = true + ) { + $fp = curl_init(); + + $headers2 = []; + foreach ($headers as $key => $value) { + $headers2[] = "$key: $value"; + } + if (isset($curl_options[CURLOPT_HTTPHEADER])) { + if (is_array($curl_options[CURLOPT_HTTPHEADER])) { + $headers2 = array_merge($headers2, $curl_options[CURLOPT_HTTPHEADER]); + } + unset($curl_options[CURLOPT_HTTPHEADER]); + } + if ($setAcceptEncoding) { + if (version_compare(\SimplePie\Misc::get_curl_version(), '7.21.6', '>=')) { + curl_setopt($fp, CURLOPT_ACCEPT_ENCODING, ''); + } else { + curl_setopt($fp, CURLOPT_ENCODING, ''); + } + } + /** @var non-empty-string $url */ + curl_setopt($fp, CURLOPT_URL, $url); + curl_setopt($fp, CURLOPT_RETURNTRANSFER, true); + curl_setopt($fp, CURLOPT_FAILONERROR, true); + curl_setopt($fp, CURLOPT_TIMEOUT, $timeout); + curl_setopt($fp, CURLOPT_CONNECTTIMEOUT, $timeout); + // curl_setopt($fp, CURLOPT_REFERER, \SimplePie\Misc::url_remove_credentials($url)); // FreshRSS removed + curl_setopt($fp, CURLOPT_USERAGENT, $useragent); + curl_setopt($fp, CURLOPT_HTTPHEADER, $headers2); + foreach ($curl_options as $curl_param => $curl_value) { + curl_setopt($fp, $curl_param, $curl_value); + } + + return $fp; + } } class_alias('SimplePie\File', 'SimplePie_File'); diff --git a/src/HTTP/Psr18Client.php b/src/HTTP/Psr18Client.php index b920333e4..f629c1b98 100644 --- a/src/HTTP/Psr18Client.php +++ b/src/HTTP/Psr18Client.php @@ -12,6 +12,7 @@ use Psr\Http\Client\ClientInterface; use Psr\Http\Message\RequestFactoryInterface; use Psr\Http\Message\UriFactoryInterface; +use SimplePie\Misc; use Throwable; /** @@ -82,7 +83,7 @@ public function request(string $method, string $url, array $headers = []): Respo ), 1); } - if (preg_match('/^http(s)?:\/\//i', $url)) { + if (Misc::is_remote_uri($url)) { return $this->requestUrl($method, $url, $headers); } @@ -119,7 +120,7 @@ private function requestUrl(string $method, string $url, array $headers): Respon $statusCode = $response->getStatusCode(); // If we have a redirect - if (in_array($statusCode, [300, 301, 302, 303, 307]) && $response->hasHeader('Location')) { + if (in_array($statusCode, [300, 301, 302, 303, 307, 308]) && $response->hasHeader('Location')) { // Prevent infinity redirect loops if ($remainingRedirects <= 0) { break; @@ -130,7 +131,7 @@ private function requestUrl(string $method, string $url, array $headers): Respon $requestedUrl = $response->getHeaderLine('Location'); - if ($statusCode === 301) { + if ($statusCode === 301 || $statusCode === 308) { $permanentUrl = $requestedUrl; } diff --git a/src/HTTP/Psr7Response.php b/src/HTTP/Psr7Response.php index 418fddf52..f5ee2c104 100644 --- a/src/HTTP/Psr7Response.php +++ b/src/HTTP/Psr7Response.php @@ -32,13 +32,13 @@ final class Psr7Response implements Response /** * @var string */ - private $requested_url; + private $final_requested_url; - public function __construct(ResponseInterface $response, string $permanent_url, string $requested_url) + public function __construct(ResponseInterface $response, string $permanent_url, string $final_requested_url) { $this->response = $response; $this->permanent_url = $permanent_url; - $this->requested_url = $requested_url; + $this->final_requested_url = $final_requested_url; } public function get_permanent_uri(): string @@ -48,7 +48,7 @@ public function get_permanent_uri(): string public function get_final_requested_uri(): string { - return $this->requested_url; + return $this->final_requested_url; } public function get_status_code(): int @@ -71,7 +71,7 @@ public function has_header(string $name): bool public function with_header(string $name, $value) { - return new self($this->response->withHeader($name, $value), $this->permanent_url, $this->requested_url); + return new self($this->response->withHeader($name, $value), $this->permanent_url, $this->final_requested_url); } public function get_header(string $name): array @@ -86,6 +86,6 @@ public function get_header_line(string $name): string public function get_body_content(): string { - return $this->response->getBody()->__toString(); + return (string) $this->response->getBody(); } } diff --git a/src/HTTP/RawTextResponse.php b/src/HTTP/RawTextResponse.php index fee5e5372..732e1fcd4 100644 --- a/src/HTTP/RawTextResponse.php +++ b/src/HTTP/RawTextResponse.php @@ -28,20 +28,20 @@ final class RawTextResponse implements Response private $permanent_url; /** - * @var array> + * @var array> */ private $headers = []; /** * @var string */ - private $requested_url; + private $final_requested_url; public function __construct(string $raw_text, string $filepath) { $this->raw_text = $raw_text; $this->permanent_url = $filepath; - $this->requested_url = $filepath; + $this->final_requested_url = $filepath; } public function get_permanent_uri(): string @@ -51,7 +51,7 @@ public function get_permanent_uri(): string public function get_final_requested_uri(): string { - return $this->requested_url; + return $this->final_requested_url; } public function get_status_code(): int diff --git a/src/Locator.php b/src/Locator.php index 48d2c4624..fbd7f05e9 100644 --- a/src/Locator.php +++ b/src/Locator.php @@ -26,9 +26,9 @@ class Locator implements RegistryAware { /** @var ?string */ - public $useragent = null; + public $useragent; /** @var int */ - public $timeout = 10; + public $timeout; /** @var File */ public $file; /** @var string[] */ @@ -46,11 +46,11 @@ class Locator implements RegistryAware /** @var int */ public $checked_feeds = 0; /** @var int */ - public $max_checked_feeds = 10; + public $max_checked_feeds; /** @var bool */ - public $force_fsockopen = false; + public $force_fsockopen; /** @var array */ - public $curl_options = []; + public $curl_options; /** @var ?\DomDocument */ public $dom; /** @var ?Registry */ @@ -167,7 +167,8 @@ public function is_feed(Response $file, bool $check_html = false) assert($this->registry !== null); if (Misc::is_remote_uri($file->get_final_requested_uri())) { - $sniffer = $this->registry->create(Content\Type\Sniffer::class, [$file]); + $fileResponse = File::fromResponse($file); + $sniffer = $this->registry->create(Content\Type\Sniffer::class, [$fileResponse]); $sniffed = $sniffer->get_type(); $mime_types = ['application/rss+xml', 'application/rdf+xml', 'text/rdf', 'application/atom+xml', 'text/xml', diff --git a/src/SimplePie.php b/src/SimplePie.php index d80174a38..9bd65f892 100644 --- a/src/SimplePie.php +++ b/src/SimplePie.php @@ -2282,7 +2282,8 @@ protected function fetch_data(&$cache) $headers[$key] = implode(', ', $values); } - $sniffer = $this->registry->create(Sniffer::class, [&$file]); + $fileResponse = File::fromResponse($file); + $sniffer = $this->registry->create(Sniffer::class, [$fileResponse]); $sniffed = $sniffer->get_type(); return [$headers, $sniffed]; diff --git a/tests/Integration/SimplePieTest.php b/tests/Integration/SimplePieTest.php index 4cedf77b3..ce21f2c7d 100644 --- a/tests/Integration/SimplePieTest.php +++ b/tests/Integration/SimplePieTest.php @@ -555,4 +555,39 @@ public function testMicroformatLinkHub(string $data, ?string $hubUrl, ?string $s self::assertLessThanOrEqual(1, count($feed->get_links('self') ?? []), 'Link rel=self should not be promoted from HTML when it is already present in headers'); self::assertSame($bogoUrl, $feed->get_link(0, 'bogo'), 'Link rel=bogo does not match'); } + + public function testSimplePieIgnoresBadContentEncodingHeader(): void + { + // Ensuring that cURL is available + \assert(function_exists('curl_exec')); + $server = new MockWebServer(); + $server->start(); + + $filepath = dirname(__FILE__, 2) . '/data/feed_rss-2.0.xml'; + $body = file_get_contents($filepath); + \assert($body !== false); // For PHPStan + + $url = $server->setResponseOfPath( + '/bad-content-encoding', + new MockWebServerResponse($body, [ + 'content-type: application/rss+xml', + 'content-encoding: aws-chunked', + ], 200) + ); + + $feed = new SimplePie(); + $feed->enable_cache(false); + + $feed->set_feed_url($url); + + // For some reason, without this, Sniffer thinks we have text/plain on error. + $feed->force_feed(true); + + $return = $feed->init(); + $server->stop(); + + $error = implode("\n", (array) ($feed->error() ?? '')); // For PHPStan + self::assertTrue($return, 'Failed fetching feed: ' . $error); + self::assertSame(200, $feed->status_code()); + } } diff --git a/utils/PHPStan/extension.neon b/utils/PHPStan/extension.neon index 3b6811a20..3352454e6 100644 --- a/utils/PHPStan/extension.neon +++ b/utils/PHPStan/extension.neon @@ -1,3 +1,5 @@ +includes: + - version_dependent.php services: - diff --git a/utils/PHPStan/version_dependent.php b/utils/PHPStan/version_dependent.php new file mode 100644 index 000000000..ecb49cac8 --- /dev/null +++ b/utils/PHPStan/version_dependent.php @@ -0,0 +1,15 @@ + [ + 'typeAliases' => $typeAliases, + ], +];