-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsetup_session.py
More file actions
81 lines (59 loc) · 2.47 KB
/
setup_session.py
File metadata and controls
81 lines (59 loc) · 2.47 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
"""
Run this ONCE to log in to LinkedIn and save the session.
After that, scrape.py will reuse session.json without re-logging in.
Usage:
python setup_session.py
"""
import patchright_shim
patchright_shim.install()
import asyncio
import os
from dotenv import load_dotenv
from playwright.async_api import Page
from config import SESSION_FILE
load_dotenv()
LINKEDIN_EMAIL = os.getenv("LINKEDIN_EMAIL")
LINKEDIN_PASSWORD = os.getenv("LINKEDIN_PASSWORD")
async def main() -> None:
"""Log in to LinkedIn and persist the browser session."""
# Import here so missing browser deps don't break the import at module level
from linkedin_scraper import BrowserManager
print("Starting browser (non-headless so you can log in)...")
async with BrowserManager(headless=False) as browser:
page = browser.page
if LINKEDIN_EMAIL and LINKEDIN_PASSWORD:
print(f"Logging in as {LINKEDIN_EMAIL} via credentials...")
try:
from linkedin_scraper import login_with_credentials
await login_with_credentials(
page,
email=LINKEDIN_EMAIL,
password=LINKEDIN_PASSWORD,
)
print("Logged in successfully.")
except (TimeoutError, RuntimeError, ValueError) as error:
print(
"Credential login failed "
f"({type(error).__name__}: {error}), falling back to manual login."
)
await _manual_login(page)
else:
print("No credentials in .env — opening login page for manual login.")
await _manual_login(page)
print(f"Saving session to {SESSION_FILE} ...")
await browser.save_session(SESSION_FILE)
print("Done. You can now run: python scrape.py")
async def _manual_login(page: Page) -> None:
"""Navigate to the LinkedIn login page and wait for the user to complete login."""
await page.goto("https://www.linkedin.com/login")
print("\nPlease log in manually in the browser window.")
print("Waiting up to 5 minutes for you to complete login...")
try:
from linkedin_scraper import wait_for_manual_login
await wait_for_manual_login(page, timeout=300_000)
except ImportError:
# Fallback: wait until the feed URL appears
await page.wait_for_url("**/feed/**", timeout=300_000)
print("Login detected.")
if __name__ == "__main__":
asyncio.run(main())