stackpulse/setup_session.py at main · octrow/stackpulse · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
"""
Run this ONCE to log in to LinkedIn and save the session.
After that, scrape.py will reuse session.json without re-logging in.

Usage:
    python setup_session.py
"""

import patchright_shim

patchright_shim.install()

import asyncio
import os

from dotenv import load_dotenv
from playwright.async_api import Page

from config import SESSION_FILE

load_dotenv()

LINKEDIN_EMAIL = os.getenv("LINKEDIN_EMAIL")
LINKEDIN_PASSWORD = os.getenv("LINKEDIN_PASSWORD")


async def main() -> None:
    """Log in to LinkedIn and persist the browser session."""
    # Import here so missing browser deps don't break the import at module level
    from linkedin_scraper import BrowserManager

    print("Starting browser (non-headless so you can log in)...")

    async with BrowserManager(headless=False) as browser:
        page = browser.page

        if LINKEDIN_EMAIL and LINKEDIN_PASSWORD:
            print(f"Logging in as {LINKEDIN_EMAIL} via credentials...")
            try:
                from linkedin_scraper import login_with_credentials

                await login_with_credentials(
                    page,
                    email=LINKEDIN_EMAIL,
                    password=LINKEDIN_PASSWORD,
                )
                print("Logged in successfully.")
            except (TimeoutError, RuntimeError, ValueError) as error:
                print(
                    "Credential login failed "
                    f"({type(error).__name__}: {error}), falling back to manual login."
                )
                await _manual_login(page)
        else:
            print("No credentials in .env — opening login page for manual login.")
            await _manual_login(page)

        print(f"Saving session to {SESSION_FILE} ...")
        await browser.save_session(SESSION_FILE)
        print("Done. You can now run: python scrape.py")


async def _manual_login(page: Page) -> None:
    """Navigate to the LinkedIn login page and wait for the user to complete login."""
    await page.goto("https://www.linkedin.com/login")
    print("\nPlease log in manually in the browser window.")
    print("Waiting up to 5 minutes for you to complete login...")

    try:
        from linkedin_scraper import wait_for_manual_login

        await wait_for_manual_login(page, timeout=300_000)
    except ImportError:
        # Fallback: wait until the feed URL appears
        await page.wait_for_url("**/feed/**", timeout=300_000)

    print("Login detected.")


if __name__ == "__main__":
    asyncio.run(main())