Skip to content

Commit 4bde579

Browse files
Hackerboneclaude
andcommitted
Exclude captcha polling requests from networkidle calculations
Patches the driver's frames.js during wheel build to skip requests to known captcha domains (Cloudflare Turnstile, reCAPTCHA, hCaptcha, Arkose Labs) in _inflightRequestStarted/_inflightRequestFinished, following the existing _isFavicon exclusion pattern. Also fixes URL matching for older playwright-python versions that use playwright.azureedge.net instead of cdn.playwright.dev. Adds GitHub Actions workflow for building and releasing wheels. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 2a88d7a commit 4bde579

3 files changed

Lines changed: 224 additions & 1 deletion

File tree

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
name: Build and Release Patched Patchright
2+
3+
on:
4+
workflow_dispatch:
5+
inputs:
6+
playwright_version:
7+
description: 'Playwright-Python version tag (e.g. v1.56.0)'
8+
required: true
9+
default: 'v1.56.0'
10+
patchright_release:
11+
description: 'Patchright release version (e.g. 1.56.0)'
12+
required: true
13+
default: '1.56.0'
14+
15+
permissions:
16+
contents: write
17+
18+
jobs:
19+
build-wheels:
20+
name: Build wheels
21+
runs-on: ubuntu-latest
22+
env:
23+
patchright_release: ${{ github.event.inputs.patchright_release }}
24+
playwright_version: ${{ github.event.inputs.playwright_version }}
25+
steps:
26+
- name: Checkout Repository
27+
uses: actions/checkout@v4
28+
29+
- name: Install uv
30+
uses: astral-sh/setup-uv@v4
31+
32+
- name: Set up Python
33+
run: uv python install 3.11
34+
35+
- name: Install dependencies
36+
run: |
37+
uv sync --all-groups
38+
git clone https://github.com/microsoft/playwright-python --branch "$playwright_version"
39+
uv add -r playwright-python/local-requirements.txt --dev
40+
41+
- name: Patch Playwright-Python Package
42+
run: uv run patch_python_package.py
43+
44+
- name: Format patched code
45+
run: uvx ruff format playwright-python || true
46+
47+
- name: Build wheels
48+
run: |
49+
cd playwright-python
50+
uv pip install -e .
51+
for wheel in $(uv run setup.py --list-wheels); do
52+
PLAYWRIGHT_TARGET_WHEEL=$wheel uv build --wheel
53+
done
54+
55+
- name: Upload wheel artifacts
56+
uses: actions/upload-artifact@v4
57+
with:
58+
name: wheels
59+
path: playwright-python/dist/*.whl
60+
61+
release:
62+
name: Create GitHub Release
63+
needs: build-wheels
64+
runs-on: ubuntu-latest
65+
steps:
66+
- name: Download wheel artifacts
67+
uses: actions/download-artifact@v4
68+
with:
69+
name: wheels
70+
path: dist/
71+
72+
- name: Create Release
73+
uses: softprops/action-gh-release@v2
74+
with:
75+
tag_name: v${{ github.event.inputs.patchright_release }}
76+
name: patchright v${{ github.event.inputs.patchright_release }}
77+
body: |
78+
Custom patchright build with networkidle captcha blacklisting.
79+
80+
Based on playwright-python ${{ github.event.inputs.playwright_version }}.
81+
82+
**Captcha domains excluded from networkidle:**
83+
- challenges.cloudflare.com (Cloudflare Turnstile)
84+
- google.com/recaptcha (reCAPTCHA)
85+
- www.gstatic.com/recaptcha (reCAPTCHA assets)
86+
- hcaptcha.com (hCaptcha)
87+
- api.funcaptcha.com (Arkose Labs)
88+
- client-api.arkoselabs.com (Arkose Labs)
89+
90+
Install: `pip install patchright@https://github.com/Hackerbone/patchright-python/releases/download/v${{ github.event.inputs.patchright_release }}/patchright-${{ github.event.inputs.patchright_release }}-py3-none-manylinux1_x86_64.whl`
91+
files: dist/*.whl
92+
draft: false
93+
prerelease: false

patch_driver_networkidle.py

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
"""
2+
Patches the patchright driver's frames.js to exclude captcha polling requests
3+
from networkidle calculations.
4+
5+
Run after the driver has been downloaded (after `pip install -e .` or `setup.py`):
6+
python patch_driver_networkidle.py
7+
8+
This modifies _inflightRequestStarted and _inflightRequestFinished to skip
9+
requests matching known captcha provider domains, following the same pattern
10+
as the existing _isFavicon exclusion in Playwright's source.
11+
12+
Source: https://github.com/bugbasesecurity/patchright
13+
"""
14+
15+
import re
16+
import sys
17+
from pathlib import Path
18+
19+
CAPTCHA_PATTERNS_JS = '["challenges.cloudflare.com","google.com/recaptcha","www.gstatic.com/recaptcha","hcaptcha.com","api.funcaptcha.com","client-api.arkoselabs.com"]'
20+
21+
CAPTCHA_CHECK = f"""const _reqUrl = request.url();
22+
if ({CAPTCHA_PATTERNS_JS}.some(p => _reqUrl.includes(p)))
23+
return;"""
24+
25+
PATCH_MARKER = "// [patchright-networkidle-blacklist]"
26+
27+
28+
def find_frames_js() -> Path:
29+
# Look in playwright-python/patchright/driver/ (post-patch build dir)
30+
search_roots = [
31+
Path("playwright-python/patchright/driver"),
32+
Path("playwright-python/playwright/driver"),
33+
]
34+
35+
for root in search_roots:
36+
if root.exists():
37+
candidates = list(root.rglob("**/server/frames.js"))
38+
if candidates:
39+
return candidates[0]
40+
41+
# Fallback: search from current dir
42+
candidates = list(Path(".").rglob("**/driver/package/lib/server/frames.js"))
43+
if candidates:
44+
return candidates[0]
45+
46+
print("ERROR: Could not find frames.js in driver", file=sys.stderr)
47+
sys.exit(1)
48+
49+
50+
def patch_method(code: str, method_name: str) -> str:
51+
pattern = rf'({method_name}\(request\) \{{[^}}]*?if \(request\._isFavicon\)\s*return;)'
52+
match = re.search(pattern, code, re.DOTALL)
53+
54+
if not match:
55+
print(f"ERROR: Could not find {method_name} with _isFavicon check", file=sys.stderr)
56+
sys.exit(1)
57+
58+
original = match.group(1)
59+
patched = original + f"\n {PATCH_MARKER}\n {CAPTCHA_CHECK}"
60+
return code.replace(original, patched, 1)
61+
62+
63+
def main():
64+
frames_js = find_frames_js()
65+
code = frames_js.read_text()
66+
67+
if PATCH_MARKER in code:
68+
print(f"Already patched: {frames_js}")
69+
return
70+
71+
code = patch_method(code, "_inflightRequestStarted")
72+
code = patch_method(code, "_inflightRequestFinished")
73+
74+
frames_js.write_text(code)
75+
print(f"Patched: {frames_js}")
76+
print(f"Captcha domains excluded from networkidle: {CAPTCHA_PATTERNS_JS}")
77+
78+
79+
if __name__ == "__main__":
80+
main()

patch_python_package.py

Lines changed: 51 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ def patch_file(file_path: str, patched_tree: ast.AST) -> None:
5151

5252
# Modify url
5353
if isinstance(node, ast.Assign) and isinstance(node.value, ast.Constant) and isinstance(node.targets[0], ast.Name):
54-
if node.targets[0].id == "url" and node.value.value == "https://cdn.playwright.dev/builds/driver/":
54+
if node.targets[0].id == "url" and node.value.value in ("https://cdn.playwright.dev/builds/driver/", "https://playwright.azureedge.net/builds/driver/"):
5555
node.value = ast.JoinedStr(
5656
values=[
5757
ast.Constant(value='https://github.com/Kaliiiiiiiiii-Vinyzu/patchright/releases/download/v'),
@@ -579,6 +579,56 @@ async def route_handler(route: Route) -> None:
579579

580580
patch_file(python_file, file_tree)
581581

582+
# Patching setup.py to add networkidle captcha blacklisting
583+
# This injects a function that patches the driver's frames.js after extraction
584+
# to exclude captcha polling requests from networkidle calculations
585+
with open("playwright-python/setup.py") as f:
586+
setup_code = f.read()
587+
588+
NETWORKIDLE_PATCH_CODE = '''
589+
import re as _re
590+
591+
_CAPTCHA_PATTERNS_JS = '["challenges.cloudflare.com","google.com/recaptcha","www.gstatic.com/recaptcha","hcaptcha.com","api.funcaptcha.com","client-api.arkoselabs.com"]'
592+
_PATCH_MARKER = '// [patchright-networkidle-blacklist]'
593+
_CAPTCHA_CHECK = f'const _reqUrl = request.url();\\n if ({_CAPTCHA_PATTERNS_JS}.some(p => _reqUrl.includes(p)))\\n return;'
594+
595+
def _patch_networkidle_blacklist(driver_root):
596+
for dirpath, _, filenames in os.walk(driver_root):
597+
for f in filenames:
598+
if f == 'frames.js' and 'server' in dirpath:
599+
fpath = os.path.join(dirpath, f)
600+
code = open(fpath).read()
601+
if _PATCH_MARKER in code:
602+
return
603+
for method in ['_inflightRequestStarted', '_inflightRequestFinished']:
604+
pat = _re.compile(rf'({method}\\(request\\) \\{{[^}}]*?if \\(request\\._isFavicon\\)\\s*return;)', _re.DOTALL)
605+
m = pat.search(code)
606+
if m:
607+
code = code.replace(m.group(1), m.group(1) + f'\\n {_PATCH_MARKER}\\n {_CAPTCHA_CHECK}', 1)
608+
open(fpath, 'w').write(code)
609+
print(f'Patched networkidle blacklist: {fpath}')
610+
return
611+
'''
612+
613+
# Insert the function definition near the top (after imports)
614+
# and add calls after each extractall in _build_wheel and _download_and_extract_local_driver
615+
setup_code = NETWORKIDLE_PATCH_CODE + setup_code
616+
617+
# Add _patch_networkidle_blacklist call after extraction in _build_wheel
618+
setup_code = setup_code.replace(
619+
"extractall(zip, f\"driver/{wheel_bundle['zip_name']}\")",
620+
"extractall(zip, f\"driver/{wheel_bundle['zip_name']}\")\n _patch_networkidle_blacklist(f\"driver/{wheel_bundle['zip_name']}\")"
621+
)
622+
623+
# Add _patch_networkidle_blacklist call after extraction in _download_and_extract_local_driver
624+
setup_code = setup_code.replace(
625+
"extractall(zip, 'patchright/driver')",
626+
"extractall(zip, 'patchright/driver')\n _patch_networkidle_blacklist('patchright/driver')"
627+
)
628+
629+
with open("playwright-python/setup.py", "w") as f:
630+
f.write(setup_code)
631+
582632
# Rename the Package Folder to Patchright
583633
os.rename("playwright-python/playwright", "playwright-python/patchright")
584634

0 commit comments

Comments
 (0)