From 74ca8f1455f18d9a804b162c9b8bef74d08cc5bb Mon Sep 17 00:00:00 2001 From: Jan Tojnar Date: Fri, 12 Sep 2025 08:50:37 +0200 Subject: [PATCH 01/13] File: Fix headers when creating `File` from `Response` `get_headers()` actually return `parsed_headers` property which we did not previously set. --- src/File.php | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/File.php b/src/File.php index 0e00e81fd..eba59f12e 100644 --- a/src/File.php +++ b/src/File.php @@ -426,10 +426,8 @@ private function flatten_headers(array $headers): array */ final public static function fromResponse(Response $response): self { - $headers = []; - - foreach ($response->get_headers() as $name => $header) { - $headers[$name] = implode(', ', $header); + if ($response instanceof self) { + return $response; } /** @var File */ @@ -437,7 +435,7 @@ final public static function fromResponse(Response $response): self $file->url = $response->get_final_requested_uri(); $file->useragent = null; - $file->headers = $headers; + $file->set_headers($response->get_headers()); $file->body = $response->get_body_content(); $file->status_code = $response->get_status_code(); $file->permanent_url = $response->get_permanent_uri(); From a4cf15377c328151e6bb60754f774d54dd0fc423 Mon Sep 17 00:00:00 2001 From: Jan Tojnar Date: Fri, 12 Sep 2025 08:51:27 +0200 Subject: [PATCH 02/13] Psr18Client: Use isRemote helper --- src/HTTP/Psr18Client.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/HTTP/Psr18Client.php b/src/HTTP/Psr18Client.php index b920333e4..9fb9df03f 100644 --- a/src/HTTP/Psr18Client.php +++ b/src/HTTP/Psr18Client.php @@ -82,7 +82,7 @@ public function request(string $method, string $url, array $headers = []): Respo ), 1); } - if (preg_match('/^http(s)?:\/\//i', $url)) { + if (Misc::is_remote_uri($url)) { return $this->requestUrl($method, $url, $headers); } From 4dc7c144adee627290cc446ccd87a70d15af1c62 Mon Sep 17 00:00:00 2001 From: Jan Tojnar Date: Fri, 12 Sep 2025 08:51:50 +0200 Subject: [PATCH 03/13] Psr18Client: Support 308 redirect https://developer.mozilla.org/en-US/docs/Web/HTTP/Reference/Status/308 --- src/HTTP/Psr18Client.php | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/HTTP/Psr18Client.php b/src/HTTP/Psr18Client.php index 9fb9df03f..f629c1b98 100644 --- a/src/HTTP/Psr18Client.php +++ b/src/HTTP/Psr18Client.php @@ -12,6 +12,7 @@ use Psr\Http\Client\ClientInterface; use Psr\Http\Message\RequestFactoryInterface; use Psr\Http\Message\UriFactoryInterface; +use SimplePie\Misc; use Throwable; /** @@ -119,7 +120,7 @@ private function requestUrl(string $method, string $url, array $headers): Respon $statusCode = $response->getStatusCode(); // If we have a redirect - if (in_array($statusCode, [300, 301, 302, 303, 307]) && $response->hasHeader('Location')) { + if (in_array($statusCode, [300, 301, 302, 303, 307, 308]) && $response->hasHeader('Location')) { // Prevent infinity redirect loops if ($remainingRedirects <= 0) { break; @@ -130,7 +131,7 @@ private function requestUrl(string $method, string $url, array $headers): Respon $requestedUrl = $response->getHeaderLine('Location'); - if ($statusCode === 301) { + if ($statusCode === 301 || $statusCode === 308) { $permanentUrl = $requestedUrl; } From 7032ca6567209c1ca1ccc63df8456807f5ba29cb Mon Sep 17 00:00:00 2001 From: Jan Tojnar Date: Fri, 12 Sep 2025 08:52:25 +0200 Subject: [PATCH 04/13] =?UTF-8?q?Psr7Response:=20Rename=20private=20proper?= =?UTF-8?q?ty=20requested=5Furl=20=E2=86=92=20final=5Frequested=5Furl?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make it clearer what request it is from and match the getter method name. --- src/HTTP/Psr7Response.php | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/HTTP/Psr7Response.php b/src/HTTP/Psr7Response.php index 418fddf52..057fa2612 100644 --- a/src/HTTP/Psr7Response.php +++ b/src/HTTP/Psr7Response.php @@ -32,13 +32,13 @@ final class Psr7Response implements Response /** * @var string */ - private $requested_url; + private $final_requested_url; - public function __construct(ResponseInterface $response, string $permanent_url, string $requested_url) + public function __construct(ResponseInterface $response, string $permanent_url, string $final_requested_url) { $this->response = $response; $this->permanent_url = $permanent_url; - $this->requested_url = $requested_url; + $this->final_requested_url = $final_requested_url; } public function get_permanent_uri(): string @@ -48,7 +48,7 @@ public function get_permanent_uri(): string public function get_final_requested_uri(): string { - return $this->requested_url; + return $this->final_requested_url; } public function get_status_code(): int @@ -71,7 +71,7 @@ public function has_header(string $name): bool public function with_header(string $name, $value) { - return new self($this->response->withHeader($name, $value), $this->permanent_url, $this->requested_url); + return new self($this->response->withHeader($name, $value), $this->permanent_url, $this->final_requested_url); } public function get_header(string $name): array From 9743d5712b42f5ec20ebf6c376054f08a41cb71e Mon Sep 17 00:00:00 2001 From: Jan Tojnar Date: Fri, 12 Sep 2025 08:54:26 +0200 Subject: [PATCH 05/13] Psr7Response: Use string cast It is slightly cleaner. --- src/HTTP/Psr7Response.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/HTTP/Psr7Response.php b/src/HTTP/Psr7Response.php index 057fa2612..f5ee2c104 100644 --- a/src/HTTP/Psr7Response.php +++ b/src/HTTP/Psr7Response.php @@ -86,6 +86,6 @@ public function get_header_line(string $name): string public function get_body_content(): string { - return $this->response->getBody()->__toString(); + return (string) $this->response->getBody(); } } From 8669905178ab2ce00e4f0dfae13c855faebbb9db Mon Sep 17 00:00:00 2001 From: Jan Tojnar Date: Fri, 12 Sep 2025 08:59:39 +0200 Subject: [PATCH 06/13] =?UTF-8?q?RawTextResponse:=20Rename=20private=20pro?= =?UTF-8?q?perty=20requested=5Furl=20=E2=86=92=20final=5Frequested=5Furl?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make it clearer what request it is from and match the getter method name. --- src/HTTP/RawTextResponse.php | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/HTTP/RawTextResponse.php b/src/HTTP/RawTextResponse.php index fee5e5372..452a6f1e0 100644 --- a/src/HTTP/RawTextResponse.php +++ b/src/HTTP/RawTextResponse.php @@ -35,13 +35,13 @@ final class RawTextResponse implements Response /** * @var string */ - private $requested_url; + private $final_requested_url; public function __construct(string $raw_text, string $filepath) { $this->raw_text = $raw_text; $this->permanent_url = $filepath; - $this->requested_url = $filepath; + $this->final_requested_url = $filepath; } public function get_permanent_uri(): string @@ -51,7 +51,7 @@ public function get_permanent_uri(): string public function get_final_requested_uri(): string { - return $this->requested_url; + return $this->final_requested_url; } public function get_status_code(): int From 965b8c203b7866597746c9a5b4f45a84f900c5d3 Mon Sep 17 00:00:00 2001 From: Jan Tojnar Date: Fri, 12 Sep 2025 09:01:10 +0200 Subject: [PATCH 07/13] RawTextResponse: Fix headers property type annotation --- src/HTTP/RawTextResponse.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/HTTP/RawTextResponse.php b/src/HTTP/RawTextResponse.php index 452a6f1e0..732e1fcd4 100644 --- a/src/HTTP/RawTextResponse.php +++ b/src/HTTP/RawTextResponse.php @@ -28,7 +28,7 @@ final class RawTextResponse implements Response private $permanent_url; /** - * @var array> + * @var array> */ private $headers = []; From 0c823596b1ef2c447dbcb7c764a96c58403c0e3e Mon Sep 17 00:00:00 2001 From: Jan Tojnar Date: Fri, 12 Sep 2025 09:23:24 +0200 Subject: [PATCH 08/13] Sniffer: Revert constructor accepting `Response` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In 1.9.0, we extended `Sniffer` to take any `Response`, not just `File`, and had `Locator` pass it `Response`s. Unfortunately, `Sniffer` is not final so that change might have broken existing third-party subclasses only accepting `File`. (Though it will at least not fail with fatal contravariance error since PHP does not enforce Liskov Substitution Principle for constructors.) Let’s go back to `Sniffer` only accepting `File`. --- phpstan.dist.neon | 6 ------ src/Content/Type/Sniffer.php | 15 +++------------ src/Locator.php | 3 ++- src/SimplePie.php | 3 ++- 4 files changed, 7 insertions(+), 20 deletions(-) diff --git a/phpstan.dist.neon b/phpstan.dist.neon index 848a033a1..6562144b2 100644 --- a/phpstan.dist.neon +++ b/phpstan.dist.neon @@ -14,12 +14,6 @@ parameters: count: 1 path: src/HTTP/Psr18Client.php - # SimplePie\Content\Type\Sniffer::__construct(): Parameter $file could be mixed due to BC. - - - message: '(Result of \|\| is always false\.)' - count: 1 - path: src/Content/Type/Sniffer.php - # Not used since https://github.com/simplepie/simplepie/commit/b2eb0134d53921e75f0fa70b1cf901ed82b988b1 but cannot be removed due to BC. - '(Constructor of class SimplePie\\Enclosure has an unused parameter \$javascript\.)' diff --git a/src/Content/Type/Sniffer.php b/src/Content/Type/Sniffer.php index d1da5fd71..b007669d0 100644 --- a/src/Content/Type/Sniffer.php +++ b/src/Content/Type/Sniffer.php @@ -27,26 +27,17 @@ class Sniffer /** * File object * - * @var File|Response + * @var File */ public $file; /** * Create an instance of the class with the input file * - * @param File|Response $file Input file + * @param File $file Input file */ - public function __construct(/* File */ $file) + public function __construct(File $file) { - if (!is_object($file) || !$file instanceof Response) { - // For BC we're asking for `File`, but internally we accept every `Response` implementation - throw new InvalidArgumentException(sprintf( - '%s(): Argument #1 ($file) must be of type %s', - __METHOD__, - File::class - ), 1); - } - $this->file = $file; } diff --git a/src/Locator.php b/src/Locator.php index 48d2c4624..afbdebd1e 100644 --- a/src/Locator.php +++ b/src/Locator.php @@ -167,7 +167,8 @@ public function is_feed(Response $file, bool $check_html = false) assert($this->registry !== null); if (Misc::is_remote_uri($file->get_final_requested_uri())) { - $sniffer = $this->registry->create(Content\Type\Sniffer::class, [$file]); + $fileResponse = File::fromResponse($file); + $sniffer = $this->registry->create(Content\Type\Sniffer::class, [$fileResponse]); $sniffed = $sniffer->get_type(); $mime_types = ['application/rss+xml', 'application/rdf+xml', 'text/rdf', 'application/atom+xml', 'text/xml', diff --git a/src/SimplePie.php b/src/SimplePie.php index c243b274e..695f33e74 100644 --- a/src/SimplePie.php +++ b/src/SimplePie.php @@ -2097,7 +2097,8 @@ protected function fetch_data(&$cache) $headers[$key] = implode(', ', $values); } - $sniffer = $this->registry->create(Sniffer::class, [&$file]); + $fileResponse = File::fromResponse($file); + $sniffer = $this->registry->create(Sniffer::class, [$fileResponse]); $sniffed = $sniffer->get_type(); return [$headers, $sniffed]; From 07a395e0a6d777d218115526af4bf9d42af2c551 Mon Sep 17 00:00:00 2001 From: Jan Tojnar Date: Fri, 12 Sep 2025 09:26:24 +0200 Subject: [PATCH 09/13] File: Parenthesize slightly unclear condition This should make it easier to parse for humans. --- src/File.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/File.php b/src/File.php index eba59f12e..7d532d880 100644 --- a/src/File.php +++ b/src/File.php @@ -294,7 +294,7 @@ public function __construct(string $url, int $timeout = 10, int $redirects = 5, } } else { $this->method = \SimplePie\SimplePie::FILE_SOURCE_LOCAL | \SimplePie\SimplePie::FILE_SOURCE_FILE_GET_CONTENTS; - if (empty($url) || !is_readable($url) || false === $filebody = file_get_contents($url)) { + if (empty($url) || !is_readable($url) || false === ($filebody = file_get_contents($url))) { $this->body = ''; $this->error = sprintf('file "%s" is not readable', $url); $this->success = false; From dfb17a9c753a5c3b4c68018487e56ab3f15b733b Mon Sep 17 00:00:00 2001 From: Jan Tojnar Date: Fri, 12 Sep 2025 09:29:49 +0200 Subject: [PATCH 10/13] Locator: Remove default values for properties set in constructor These will be immediately overwritten. --- src/Locator.php | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/Locator.php b/src/Locator.php index afbdebd1e..fbd7f05e9 100644 --- a/src/Locator.php +++ b/src/Locator.php @@ -26,9 +26,9 @@ class Locator implements RegistryAware { /** @var ?string */ - public $useragent = null; + public $useragent; /** @var int */ - public $timeout = 10; + public $timeout; /** @var File */ public $file; /** @var string[] */ @@ -46,11 +46,11 @@ class Locator implements RegistryAware /** @var int */ public $checked_feeds = 0; /** @var int */ - public $max_checked_feeds = 10; + public $max_checked_feeds; /** @var bool */ - public $force_fsockopen = false; + public $force_fsockopen; /** @var array */ - public $curl_options = []; + public $curl_options; /** @var ?\DomDocument */ public $dom; /** @var ?Registry */ From a3d81d169a39f880b2b6d0eddeb6fb91047fd2b3 Mon Sep 17 00:00:00 2001 From: Jan Tojnar Date: Sun, 18 Jan 2026 14:28:50 +0100 Subject: [PATCH 11/13] File: extract curl initialization to separate function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On PHP < 8.1.21, we will not be able to disable `CURLOPT_ACCEPT_ENCODING` by setting it to `NULL`: https://github.com/php/php-src/issues/11433 `curl_setopt` with such values will be a no-op so the secound `curl_exec` will still fail with `CURLE_BAD_CONTENT_ENCODING` and return an empty body. Even worse, it will still report 200 in `CURLINFO_HTTP_CODE` so `FileClient` will not throw an exception because the `status_code` property is not 0. Let’s extract the code so that we can create a fresh `CurlHandle` and later make the `CURLOPT_ACCEPT_ENCODING` conditional. Unfortunately, since PHP 8.0 changed `curl_init` to return `CurlHandle` instead of `resource`, and PHPStan has no simple way to make the return type depend on PHP version, we have to resort to an ugly hack using `typeAliases`. --- src/File.php | 77 ++++++++++++++++++----------- utils/PHPStan/extension.neon | 2 + utils/PHPStan/version_dependent.php | 15 ++++++ 3 files changed, 64 insertions(+), 30 deletions(-) create mode 100644 utils/PHPStan/version_dependent.php diff --git a/src/File.php b/src/File.php index 7d532d880..bb1927075 100644 --- a/src/File.php +++ b/src/File.php @@ -110,36 +110,7 @@ public function __construct(string $url, int $timeout = 10, int $redirects = 5, } if (!$force_fsockopen && function_exists('curl_exec')) { $this->method = \SimplePie\SimplePie::FILE_SOURCE_REMOTE | \SimplePie\SimplePie::FILE_SOURCE_CURL; - $fp = curl_init(); - $headers2 = []; - foreach ($headers as $key => $value) { - $headers2[] = "$key: $value"; - } - if (isset($curl_options[CURLOPT_HTTPHEADER])) { - if (is_array($curl_options[CURLOPT_HTTPHEADER])) { - $headers2 = array_merge($headers2, $curl_options[CURLOPT_HTTPHEADER]); - } - unset($curl_options[CURLOPT_HTTPHEADER]); - } - if (version_compare(\SimplePie\Misc::get_curl_version(), '7.21.6', '>=')) { - curl_setopt($fp, CURLOPT_ACCEPT_ENCODING, ''); - } else { - curl_setopt($fp, CURLOPT_ENCODING, ''); - } - /** @var non-empty-string $url */ - curl_setopt($fp, CURLOPT_URL, $url); - curl_setopt($fp, CURLOPT_HEADER, true); - curl_setopt($fp, CURLOPT_RETURNTRANSFER, true); - curl_setopt($fp, CURLOPT_FAILONERROR, true); - curl_setopt($fp, CURLOPT_TIMEOUT, $timeout); - curl_setopt($fp, CURLOPT_CONNECTTIMEOUT, $timeout); - curl_setopt($fp, CURLOPT_REFERER, \SimplePie\Misc::url_remove_credentials($url)); - curl_setopt($fp, CURLOPT_USERAGENT, $useragent); - curl_setopt($fp, CURLOPT_HTTPHEADER, $headers2); - foreach ($curl_options as $curl_param => $curl_value) { - curl_setopt($fp, $curl_param, $curl_value); - } - + $fp = self::curlInit($url, $timeout, $headers, $useragent, $curl_options); $responseHeaders = curl_exec($fp); if (curl_errno($fp) === CURLE_WRITE_ERROR || curl_errno($fp) === CURLE_BAD_CONTENT_ENCODING) { if (version_compare(\SimplePie\Misc::get_curl_version(), '7.21.6', '>=')) { @@ -442,6 +413,52 @@ final public static function fromResponse(Response $response): self return $file; } + + /** + * @param array $headers + * @param array $curl_options + * @return \CurlHandle + */ + private static function curlInit( + string $url, + int $timeout, + array $headers, + string $useragent, + array $curl_options + ) { + $fp = curl_init(); + + $headers2 = []; + foreach ($headers as $key => $value) { + $headers2[] = "$key: $value"; + } + if (isset($curl_options[CURLOPT_HTTPHEADER])) { + if (is_array($curl_options[CURLOPT_HTTPHEADER])) { + $headers2 = array_merge($headers2, $curl_options[CURLOPT_HTTPHEADER]); + } + unset($curl_options[CURLOPT_HTTPHEADER]); + } + if (version_compare(\SimplePie\Misc::get_curl_version(), '7.21.6', '>=')) { + curl_setopt($fp, CURLOPT_ACCEPT_ENCODING, ''); + } else { + curl_setopt($fp, CURLOPT_ENCODING, ''); + } + /** @var non-empty-string $url */ + curl_setopt($fp, CURLOPT_URL, $url); + curl_setopt($fp, CURLOPT_HEADER, true); + curl_setopt($fp, CURLOPT_RETURNTRANSFER, true); + curl_setopt($fp, CURLOPT_FAILONERROR, true); + curl_setopt($fp, CURLOPT_TIMEOUT, $timeout); + curl_setopt($fp, CURLOPT_CONNECTTIMEOUT, $timeout); + curl_setopt($fp, CURLOPT_REFERER, \SimplePie\Misc::url_remove_credentials($url)); + curl_setopt($fp, CURLOPT_USERAGENT, $useragent); + curl_setopt($fp, CURLOPT_HTTPHEADER, $headers2); + foreach ($curl_options as $curl_param => $curl_value) { + curl_setopt($fp, $curl_param, $curl_value); + } + + return $fp; + } } class_alias('SimplePie\File', 'SimplePie_File'); diff --git a/utils/PHPStan/extension.neon b/utils/PHPStan/extension.neon index 3b6811a20..3352454e6 100644 --- a/utils/PHPStan/extension.neon +++ b/utils/PHPStan/extension.neon @@ -1,3 +1,5 @@ +includes: + - version_dependent.php services: - diff --git a/utils/PHPStan/version_dependent.php b/utils/PHPStan/version_dependent.php new file mode 100644 index 000000000..ecb49cac8 --- /dev/null +++ b/utils/PHPStan/version_dependent.php @@ -0,0 +1,15 @@ + [ + 'typeAliases' => $typeAliases, + ], +]; From 4f5952008b0f6a2eec413f161439f6d081cc6ac3 Mon Sep 17 00:00:00 2001 From: Michael Meier Date: Thu, 1 Jan 2026 12:17:38 +0100 Subject: [PATCH 12/13] File: Disable cURL encoding for bad encoding Setting the CURLOPT_ENCODING to 'none' is not officially supported by cURL. Setting it in case of cURL receiving an invalid Content-Encoding header from the server and re-trying would not fix the issue. The current implementation already sets cURL up to send an Accept-Encoding header with all supported encodings a couple of lines above this change. If the fetch still returns a BAD_CONTENT_ENCODING error, the server already ignored the Accept-Encoding headers once. This change, instead of sending 'none' in a re-try, disables cURL's content encoding handling (in practice, handling compression). Co-authored-by: Jan Tojnar --- src/File.php | 20 +++++++++-------- tests/Integration/SimplePieTest.php | 35 +++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+), 9 deletions(-) diff --git a/src/File.php b/src/File.php index bb1927075..7609591dc 100644 --- a/src/File.php +++ b/src/File.php @@ -113,11 +113,10 @@ public function __construct(string $url, int $timeout = 10, int $redirects = 5, $fp = self::curlInit($url, $timeout, $headers, $useragent, $curl_options); $responseHeaders = curl_exec($fp); if (curl_errno($fp) === CURLE_WRITE_ERROR || curl_errno($fp) === CURLE_BAD_CONTENT_ENCODING) { - if (version_compare(\SimplePie\Misc::get_curl_version(), '7.21.6', '>=')) { - curl_setopt($fp, CURLOPT_ACCEPT_ENCODING, 'none'); - } else { - curl_setopt($fp, CURLOPT_ENCODING, 'none'); + if (\PHP_VERSION_ID < 80000) { + curl_close($fp); } + $fp = self::curlInit($url, $timeout, $headers, $useragent, $curl_options, false); $responseHeaders = curl_exec($fp); } $this->status_code = curl_getinfo($fp, CURLINFO_HTTP_CODE); @@ -424,7 +423,8 @@ private static function curlInit( int $timeout, array $headers, string $useragent, - array $curl_options + array $curl_options, + bool $setAcceptEncoding = true ) { $fp = curl_init(); @@ -438,10 +438,12 @@ private static function curlInit( } unset($curl_options[CURLOPT_HTTPHEADER]); } - if (version_compare(\SimplePie\Misc::get_curl_version(), '7.21.6', '>=')) { - curl_setopt($fp, CURLOPT_ACCEPT_ENCODING, ''); - } else { - curl_setopt($fp, CURLOPT_ENCODING, ''); + if ($setAcceptEncoding) { + if (version_compare(\SimplePie\Misc::get_curl_version(), '7.21.6', '>=')) { + curl_setopt($fp, CURLOPT_ACCEPT_ENCODING, ''); + } else { + curl_setopt($fp, CURLOPT_ENCODING, ''); + } } /** @var non-empty-string $url */ curl_setopt($fp, CURLOPT_URL, $url); diff --git a/tests/Integration/SimplePieTest.php b/tests/Integration/SimplePieTest.php index 43723ec4a..01d4ba1c6 100644 --- a/tests/Integration/SimplePieTest.php +++ b/tests/Integration/SimplePieTest.php @@ -553,4 +553,39 @@ public function testMicroformatLinkHub(string $data, ?string $hubUrl, ?string $s self::assertLessThanOrEqual(1, count($feed->get_links('self') ?? []), 'Link rel=self should not be promoted from HTML when it is already present in headers'); self::assertSame($bogoUrl, $feed->get_link(0, 'bogo'), 'Link rel=bogo does not match'); } + + public function testSimplePieIgnoresBadContentEncodingHeader(): void + { + // Ensuring that cURL is available + \assert(function_exists('curl_exec')); + $server = new MockWebServer(); + $server->start(); + + $filepath = dirname(__FILE__, 2) . '/data/feed_rss-2.0.xml'; + $body = file_get_contents($filepath); + \assert($body !== false); // For PHPStan + + $url = $server->setResponseOfPath( + '/bad-content-encoding', + new MockWebServerResponse($body, [ + 'content-type: application/rss+xml', + 'content-encoding: aws-chunked', + ], 200) + ); + + $feed = new SimplePie(); + $feed->enable_cache(false); + + $feed->set_feed_url($url); + + // For some reason, without this, Sniffer thinks we have text/plain on error. + $feed->force_feed(true); + + $return = $feed->init(); + $server->stop(); + + $error = implode("\n", (array) ($feed->error() ?? '')); // For PHPStan + self::assertTrue($return, 'Failed fetching feed: ' . $error); + self::assertSame(200, $feed->status_code()); + } } From 9e5d32187f4a22fa560c61d179ea745d6d854481 Mon Sep 17 00:00:00 2001 From: Alexandre Alapetite Date: Sat, 21 Mar 2026 23:35:46 +0100 Subject: [PATCH 13/13] Merge upstream --- phpstan.dist.neon | 6 -- src/Content/Type/Sniffer.php | 15 +---- src/File.php | 99 +++++++++++++++++------------ src/HTTP/Psr18Client.php | 7 +- src/HTTP/Psr7Response.php | 12 ++-- src/HTTP/RawTextResponse.php | 8 +-- src/Locator.php | 13 ++-- src/SimplePie.php | 3 +- tests/Integration/SimplePieTest.php | 35 ++++++++++ utils/PHPStan/extension.neon | 2 + utils/PHPStan/version_dependent.php | 15 +++++ 11 files changed, 138 insertions(+), 77 deletions(-) create mode 100644 utils/PHPStan/version_dependent.php diff --git a/phpstan.dist.neon b/phpstan.dist.neon index 848a033a1..6562144b2 100644 --- a/phpstan.dist.neon +++ b/phpstan.dist.neon @@ -14,12 +14,6 @@ parameters: count: 1 path: src/HTTP/Psr18Client.php - # SimplePie\Content\Type\Sniffer::__construct(): Parameter $file could be mixed due to BC. - - - message: '(Result of \|\| is always false\.)' - count: 1 - path: src/Content/Type/Sniffer.php - # Not used since https://github.com/simplepie/simplepie/commit/b2eb0134d53921e75f0fa70b1cf901ed82b988b1 but cannot be removed due to BC. - '(Constructor of class SimplePie\\Enclosure has an unused parameter \$javascript\.)' diff --git a/src/Content/Type/Sniffer.php b/src/Content/Type/Sniffer.php index 0538b9b5f..16d82c2b5 100644 --- a/src/Content/Type/Sniffer.php +++ b/src/Content/Type/Sniffer.php @@ -27,26 +27,17 @@ class Sniffer /** * File object * - * @var File|Response + * @var File */ public $file; /** * Create an instance of the class with the input file * - * @param File|Response $file Input file + * @param File $file Input file */ - public function __construct(/* File */ $file) + public function __construct(File $file) { - if (!is_object($file) || !$file instanceof Response) { - // For BC we're asking for `File`, but internally we accept every `Response` implementation - throw new InvalidArgumentException(sprintf( - '%s(): Argument #1 ($file) must be of type %s', - __METHOD__, - File::class - ), 1); - } - $this->file = $file; } diff --git a/src/File.php b/src/File.php index 6d03d4827..8abbfcf59 100644 --- a/src/File.php +++ b/src/File.php @@ -110,52 +110,27 @@ public function __construct(string $url, int $timeout = 10, int $redirects = 5, } if (!$force_fsockopen && function_exists('curl_exec')) { $this->method = \SimplePie\SimplePie::FILE_SOURCE_REMOTE | \SimplePie\SimplePie::FILE_SOURCE_CURL; - $fp = curl_init(); - $headers2 = []; - foreach ($headers as $key => $value) { - $headers2[] = "$key: $value"; - } - if (isset($curl_options[CURLOPT_HTTPHEADER])) { - if (is_array($curl_options[CURLOPT_HTTPHEADER])) { - $headers2 = array_merge($headers2, $curl_options[CURLOPT_HTTPHEADER]); - } - unset($curl_options[CURLOPT_HTTPHEADER]); - } - if (version_compare(\SimplePie\Misc::get_curl_version(), '7.21.6', '>=')) { - curl_setopt($fp, CURLOPT_ACCEPT_ENCODING, ''); - } else { - curl_setopt($fp, CURLOPT_ENCODING, ''); - } - /** @var non-empty-string $url */ - curl_setopt($fp, CURLOPT_URL, $url); - curl_setopt($fp, CURLOPT_RETURNTRANSFER, true); - curl_setopt($fp, CURLOPT_FAILONERROR, true); - curl_setopt($fp, CURLOPT_TIMEOUT, $timeout); - curl_setopt($fp, CURLOPT_CONNECTTIMEOUT, $timeout); - // curl_setopt($fp, CURLOPT_REFERER, \SimplePie\Misc::url_remove_credentials($url)); // FreshRSS removed - curl_setopt($fp, CURLOPT_USERAGENT, $useragent); - curl_setopt($fp, CURLOPT_HTTPHEADER, $headers2); + $fp = self::curlInit($url, $timeout, $headers, $useragent, $curl_options); $responseHeaders = ''; curl_setopt($fp, CURLOPT_HEADERFUNCTION, function ($ch, string $header) use (&$responseHeaders) { $responseHeaders .= $header; return strlen($header); }); - foreach ($curl_options as $curl_param => $curl_value) { - curl_setopt($fp, $curl_param, $curl_value); - } - $responseBody = curl_exec($fp); $responseHeaders .= "\r\n"; if (curl_errno($fp) === CURLE_WRITE_ERROR || curl_errno($fp) === CURLE_BAD_CONTENT_ENCODING) { $this->error = 'cURL error ' . curl_errno($fp) . ': ' . curl_error($fp); // FreshRSS $this->on_http_response($responseBody === false ? false : $responseHeaders . $responseBody, $curl_options); $this->error = null; // FreshRSS - if (version_compare(\SimplePie\Misc::get_curl_version(), '7.21.6', '>=')) { - curl_setopt($fp, CURLOPT_ACCEPT_ENCODING, null); - } else { - curl_setopt($fp, CURLOPT_ENCODING, null); + if (\PHP_VERSION_ID < 80000) { + curl_close($fp); } + $fp = self::curlInit($url, $timeout, $headers, $useragent, $curl_options, false); $responseHeaders = ''; + curl_setopt($fp, CURLOPT_HEADERFUNCTION, function ($ch, string $header) use (&$responseHeaders) { + $responseHeaders .= $header; + return strlen($header); + }); $responseBody = curl_exec($fp); $responseHeaders .= "\r\n"; } @@ -315,7 +290,7 @@ public function __construct(string $url, int $timeout = 10, int $redirects = 5, } else { $this->method = \SimplePie\SimplePie::FILE_SOURCE_LOCAL | \SimplePie\SimplePie::FILE_SOURCE_FILE_GET_CONTENTS; $filebody = false; - if (empty($url) || !is_readable($url) || false === $filebody = file_get_contents($url)) { + if (empty($url) || !is_readable($url) || false === ($filebody = file_get_contents($url))) { $this->body = ''; $this->error = sprintf('file "%s" is not readable', $url); $this->success = false; @@ -459,10 +434,8 @@ private function flatten_headers(array $headers): array */ final public static function fromResponse(Response $response): self { - $headers = []; - - foreach ($response->get_headers() as $name => $header) { - $headers[$name] = implode(', ', $header); + if ($response instanceof self) { + return $response; } /** @var File */ @@ -470,13 +443,61 @@ final public static function fromResponse(Response $response): self $file->url = $response->get_final_requested_uri(); $file->useragent = null; - $file->headers = $headers; + $file->set_headers($response->get_headers()); $file->body = $response->get_body_content(); $file->status_code = $response->get_status_code(); $file->permanent_url = $response->get_permanent_uri(); return $file; } + + /** + * @param array $headers + * @param array $curl_options + * @return \CurlHandle + */ + private static function curlInit( + string $url, + int $timeout, + array $headers, + string $useragent, + array $curl_options, + bool $setAcceptEncoding = true + ) { + $fp = curl_init(); + + $headers2 = []; + foreach ($headers as $key => $value) { + $headers2[] = "$key: $value"; + } + if (isset($curl_options[CURLOPT_HTTPHEADER])) { + if (is_array($curl_options[CURLOPT_HTTPHEADER])) { + $headers2 = array_merge($headers2, $curl_options[CURLOPT_HTTPHEADER]); + } + unset($curl_options[CURLOPT_HTTPHEADER]); + } + if ($setAcceptEncoding) { + if (version_compare(\SimplePie\Misc::get_curl_version(), '7.21.6', '>=')) { + curl_setopt($fp, CURLOPT_ACCEPT_ENCODING, ''); + } else { + curl_setopt($fp, CURLOPT_ENCODING, ''); + } + } + /** @var non-empty-string $url */ + curl_setopt($fp, CURLOPT_URL, $url); + curl_setopt($fp, CURLOPT_RETURNTRANSFER, true); + curl_setopt($fp, CURLOPT_FAILONERROR, true); + curl_setopt($fp, CURLOPT_TIMEOUT, $timeout); + curl_setopt($fp, CURLOPT_CONNECTTIMEOUT, $timeout); + // curl_setopt($fp, CURLOPT_REFERER, \SimplePie\Misc::url_remove_credentials($url)); // FreshRSS removed + curl_setopt($fp, CURLOPT_USERAGENT, $useragent); + curl_setopt($fp, CURLOPT_HTTPHEADER, $headers2); + foreach ($curl_options as $curl_param => $curl_value) { + curl_setopt($fp, $curl_param, $curl_value); + } + + return $fp; + } } class_alias('SimplePie\File', 'SimplePie_File'); diff --git a/src/HTTP/Psr18Client.php b/src/HTTP/Psr18Client.php index b920333e4..f629c1b98 100644 --- a/src/HTTP/Psr18Client.php +++ b/src/HTTP/Psr18Client.php @@ -12,6 +12,7 @@ use Psr\Http\Client\ClientInterface; use Psr\Http\Message\RequestFactoryInterface; use Psr\Http\Message\UriFactoryInterface; +use SimplePie\Misc; use Throwable; /** @@ -82,7 +83,7 @@ public function request(string $method, string $url, array $headers = []): Respo ), 1); } - if (preg_match('/^http(s)?:\/\//i', $url)) { + if (Misc::is_remote_uri($url)) { return $this->requestUrl($method, $url, $headers); } @@ -119,7 +120,7 @@ private function requestUrl(string $method, string $url, array $headers): Respon $statusCode = $response->getStatusCode(); // If we have a redirect - if (in_array($statusCode, [300, 301, 302, 303, 307]) && $response->hasHeader('Location')) { + if (in_array($statusCode, [300, 301, 302, 303, 307, 308]) && $response->hasHeader('Location')) { // Prevent infinity redirect loops if ($remainingRedirects <= 0) { break; @@ -130,7 +131,7 @@ private function requestUrl(string $method, string $url, array $headers): Respon $requestedUrl = $response->getHeaderLine('Location'); - if ($statusCode === 301) { + if ($statusCode === 301 || $statusCode === 308) { $permanentUrl = $requestedUrl; } diff --git a/src/HTTP/Psr7Response.php b/src/HTTP/Psr7Response.php index 418fddf52..f5ee2c104 100644 --- a/src/HTTP/Psr7Response.php +++ b/src/HTTP/Psr7Response.php @@ -32,13 +32,13 @@ final class Psr7Response implements Response /** * @var string */ - private $requested_url; + private $final_requested_url; - public function __construct(ResponseInterface $response, string $permanent_url, string $requested_url) + public function __construct(ResponseInterface $response, string $permanent_url, string $final_requested_url) { $this->response = $response; $this->permanent_url = $permanent_url; - $this->requested_url = $requested_url; + $this->final_requested_url = $final_requested_url; } public function get_permanent_uri(): string @@ -48,7 +48,7 @@ public function get_permanent_uri(): string public function get_final_requested_uri(): string { - return $this->requested_url; + return $this->final_requested_url; } public function get_status_code(): int @@ -71,7 +71,7 @@ public function has_header(string $name): bool public function with_header(string $name, $value) { - return new self($this->response->withHeader($name, $value), $this->permanent_url, $this->requested_url); + return new self($this->response->withHeader($name, $value), $this->permanent_url, $this->final_requested_url); } public function get_header(string $name): array @@ -86,6 +86,6 @@ public function get_header_line(string $name): string public function get_body_content(): string { - return $this->response->getBody()->__toString(); + return (string) $this->response->getBody(); } } diff --git a/src/HTTP/RawTextResponse.php b/src/HTTP/RawTextResponse.php index fee5e5372..732e1fcd4 100644 --- a/src/HTTP/RawTextResponse.php +++ b/src/HTTP/RawTextResponse.php @@ -28,20 +28,20 @@ final class RawTextResponse implements Response private $permanent_url; /** - * @var array> + * @var array> */ private $headers = []; /** * @var string */ - private $requested_url; + private $final_requested_url; public function __construct(string $raw_text, string $filepath) { $this->raw_text = $raw_text; $this->permanent_url = $filepath; - $this->requested_url = $filepath; + $this->final_requested_url = $filepath; } public function get_permanent_uri(): string @@ -51,7 +51,7 @@ public function get_permanent_uri(): string public function get_final_requested_uri(): string { - return $this->requested_url; + return $this->final_requested_url; } public function get_status_code(): int diff --git a/src/Locator.php b/src/Locator.php index 48d2c4624..fbd7f05e9 100644 --- a/src/Locator.php +++ b/src/Locator.php @@ -26,9 +26,9 @@ class Locator implements RegistryAware { /** @var ?string */ - public $useragent = null; + public $useragent; /** @var int */ - public $timeout = 10; + public $timeout; /** @var File */ public $file; /** @var string[] */ @@ -46,11 +46,11 @@ class Locator implements RegistryAware /** @var int */ public $checked_feeds = 0; /** @var int */ - public $max_checked_feeds = 10; + public $max_checked_feeds; /** @var bool */ - public $force_fsockopen = false; + public $force_fsockopen; /** @var array */ - public $curl_options = []; + public $curl_options; /** @var ?\DomDocument */ public $dom; /** @var ?Registry */ @@ -167,7 +167,8 @@ public function is_feed(Response $file, bool $check_html = false) assert($this->registry !== null); if (Misc::is_remote_uri($file->get_final_requested_uri())) { - $sniffer = $this->registry->create(Content\Type\Sniffer::class, [$file]); + $fileResponse = File::fromResponse($file); + $sniffer = $this->registry->create(Content\Type\Sniffer::class, [$fileResponse]); $sniffed = $sniffer->get_type(); $mime_types = ['application/rss+xml', 'application/rdf+xml', 'text/rdf', 'application/atom+xml', 'text/xml', diff --git a/src/SimplePie.php b/src/SimplePie.php index d80174a38..9bd65f892 100644 --- a/src/SimplePie.php +++ b/src/SimplePie.php @@ -2282,7 +2282,8 @@ protected function fetch_data(&$cache) $headers[$key] = implode(', ', $values); } - $sniffer = $this->registry->create(Sniffer::class, [&$file]); + $fileResponse = File::fromResponse($file); + $sniffer = $this->registry->create(Sniffer::class, [$fileResponse]); $sniffed = $sniffer->get_type(); return [$headers, $sniffed]; diff --git a/tests/Integration/SimplePieTest.php b/tests/Integration/SimplePieTest.php index 4cedf77b3..ce21f2c7d 100644 --- a/tests/Integration/SimplePieTest.php +++ b/tests/Integration/SimplePieTest.php @@ -555,4 +555,39 @@ public function testMicroformatLinkHub(string $data, ?string $hubUrl, ?string $s self::assertLessThanOrEqual(1, count($feed->get_links('self') ?? []), 'Link rel=self should not be promoted from HTML when it is already present in headers'); self::assertSame($bogoUrl, $feed->get_link(0, 'bogo'), 'Link rel=bogo does not match'); } + + public function testSimplePieIgnoresBadContentEncodingHeader(): void + { + // Ensuring that cURL is available + \assert(function_exists('curl_exec')); + $server = new MockWebServer(); + $server->start(); + + $filepath = dirname(__FILE__, 2) . '/data/feed_rss-2.0.xml'; + $body = file_get_contents($filepath); + \assert($body !== false); // For PHPStan + + $url = $server->setResponseOfPath( + '/bad-content-encoding', + new MockWebServerResponse($body, [ + 'content-type: application/rss+xml', + 'content-encoding: aws-chunked', + ], 200) + ); + + $feed = new SimplePie(); + $feed->enable_cache(false); + + $feed->set_feed_url($url); + + // For some reason, without this, Sniffer thinks we have text/plain on error. + $feed->force_feed(true); + + $return = $feed->init(); + $server->stop(); + + $error = implode("\n", (array) ($feed->error() ?? '')); // For PHPStan + self::assertTrue($return, 'Failed fetching feed: ' . $error); + self::assertSame(200, $feed->status_code()); + } } diff --git a/utils/PHPStan/extension.neon b/utils/PHPStan/extension.neon index 3b6811a20..3352454e6 100644 --- a/utils/PHPStan/extension.neon +++ b/utils/PHPStan/extension.neon @@ -1,3 +1,5 @@ +includes: + - version_dependent.php services: - diff --git a/utils/PHPStan/version_dependent.php b/utils/PHPStan/version_dependent.php new file mode 100644 index 000000000..ecb49cac8 --- /dev/null +++ b/utils/PHPStan/version_dependent.php @@ -0,0 +1,15 @@ + [ + 'typeAliases' => $typeAliases, + ], +];