From 764adbd43f8abadadd520a05a62548a06b79495a Mon Sep 17 00:00:00 2001 From: avireddy0 Date: Tue, 16 Dec 2025 01:51:13 +0000 Subject: [PATCH 01/16] Add BigQuery sync support --- .vscode/settings.json | 4 ++++ requirements.txt | 1 + 2 files changed, 5 insertions(+) create mode 100644 .vscode/settings.json diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..ba2a6c0 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,4 @@ +{ + "python-envs.defaultEnvManager": "ms-python.python:system", + "python-envs.pythonProjects": [] +} \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 066002d..8764e00 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,3 +3,4 @@ google-auth>=2.0.0 google-cloud-bigquery>=3.0.0 functions-framework>=3.0.0 gunicorn>=20.1.0 +google-cloud-bigquery>=3.0.0 From 7e39657dc7f2d971399f99eeedd592616c74bb5c Mon Sep 17 00:00:00 2001 From: avireddy0 Date: Tue, 16 Dec 2025 02:08:41 +0000 Subject: [PATCH 02/16] Update .gitignore to exclude venv and service_account.json --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index b686665..2f5f69a 100644 --- a/.gitignore +++ b/.gitignore @@ -19,3 +19,5 @@ ENV/ # Output files all_company_emails.json +venv/ +service_account.json From 69a8e0f15c0f4d66d4b6f055ecc621fca7df494d Mon Sep 17 00:00:00 2001 From: avireddy0 Date: Tue, 16 Dec 2025 02:47:35 +0000 Subject: [PATCH 03/16] Update scheduler to run every 5 minutes --- .vscode/settings.json | 5 ++++- cloudbuild.yaml | 10 +++++----- deploy.sh | 12 ++++++------ setup_scheduler.sh | 6 +++--- 4 files changed, 18 insertions(+), 15 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index ba2a6c0..25f8d75 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,4 +1,7 @@ { "python-envs.defaultEnvManager": "ms-python.python:system", - "python-envs.pythonProjects": [] + "python-envs.pythonProjects": [], + "githubPullRequests.ignoredPullRequestBranches": [ + "main" + ] } \ No newline at end of file diff --git a/cloudbuild.yaml b/cloudbuild.yaml index f687ff4..511978f 100644 --- a/cloudbuild.yaml +++ b/cloudbuild.yaml @@ -34,11 +34,11 @@ steps: args: - '-c' - | - gcloud scheduler jobs delete gmail-scraper-hourly \ + gcloud scheduler jobs delete gmail-scraper-5min \ --location=us-central1 \ --quiet 2>/dev/null || echo "No existing job to delete" - # Step 3: Create hourly scheduler job + # Step 3: Create 5-minute scheduler job - name: 'gcr.io/google.com/cloudsdktool/cloud-sdk' id: 'create-scheduler' entrypoint: bash @@ -49,16 +49,16 @@ steps: --region=us-central1 \ --format='value(status.url)') - gcloud scheduler jobs create http gmail-scraper-hourly \ + gcloud scheduler jobs create http gmail-scraper-5min \ --location=us-central1 \ - --schedule="0 * * * *" \ + --schedule="*/5 * * * *" \ --time-zone="America/New_York" \ --uri="$${SERVICE_URL}/" \ --http-method=POST \ --headers="Content-Type=application/json" \ --message-body='{"incremental": true, "max_per_user": 100}' \ --attempt-deadline=3600s \ - --description="Hourly incremental Gmail scrape to BigQuery" + --description="5-minute incremental Gmail scrape to BigQuery" options: logging: CLOUD_LOGGING_ONLY diff --git a/deploy.sh b/deploy.sh index 6c83843..5beb83f 100755 --- a/deploy.sh +++ b/deploy.sh @@ -9,7 +9,7 @@ REGION="us-central1" SERVICE_ACCOUNT_EMAIL="claude-service-account@claude-mcp-457317.iam.gserviceaccount.com" SERVICE_ACCOUNT_KEY_FILE="$HOME/claude-mcp-457317-069a2a199017.json" ADMIN_EMAIL="avi@envsn.com" -SCHEDULER_JOB_NAME="gmail-scraper-hourly" +SCHEDULER_JOB_NAME="gmail-scraper-5min" echo "=== Gmail Scraper Cloud Run Deployment ===" echo "Project: $PROJECT_ID" @@ -48,7 +48,7 @@ echo "Service URL: $SERVICE_URL" # Step 4: Set up Cloud Scheduler for hourly incremental scraping echo "" -echo "Step 4: Setting up hourly Cloud Scheduler job..." +echo "Step 4: Setting up 5-minute Cloud Scheduler job..." # Delete existing job if it exists gcloud scheduler jobs delete $SCHEDULER_JOB_NAME \ @@ -56,18 +56,18 @@ gcloud scheduler jobs delete $SCHEDULER_JOB_NAME \ --location=$REGION \ --quiet 2>/dev/null || true -# Create new scheduler job (runs every hour at minute 0) +# Create new scheduler job (runs every 5 minutes) gcloud scheduler jobs create http $SCHEDULER_JOB_NAME \ --project=$PROJECT_ID \ --location=$REGION \ - --schedule="0 * * * *" \ + --schedule="*/5 * * * *" \ --time-zone="America/New_York" \ --uri="${SERVICE_URL}/" \ --http-method=POST \ --headers="Content-Type=application/json" \ --message-body='{"incremental": true, "max_per_user": 100}' \ --attempt-deadline=3600s \ - --description="Hourly incremental Gmail scrape to BigQuery" + --description="5-minute incremental Gmail scrape to BigQuery" echo "" echo "=== Deployment Complete ===" @@ -77,7 +77,7 @@ echo " URL: $SERVICE_URL" echo "" echo "Cloud Scheduler Job:" echo " Name: $SCHEDULER_JOB_NAME" -echo " Schedule: Every hour at minute 0 (0 * * * *)" +echo " Schedule: Every 5 minutes (*/5 * * * *)" echo " Timezone: America/New_York" echo " Mode: Incremental (only new messages)" echo "" diff --git a/setup_scheduler.sh b/setup_scheduler.sh index 3e7436a..7ffbb4d 100755 --- a/setup_scheduler.sh +++ b/setup_scheduler.sh @@ -6,7 +6,7 @@ set -e # Configuration PROJECT_ID="claude-mcp-457317" REGION="us-central1" -JOB_NAME="gmail-scraper-hourly" +JOB_NAME="gmail-scraper-5min" SERVICE_NAME="gmail-scraper" echo "=== Cloud Scheduler Setup for Gmail Scraper ===" @@ -40,11 +40,11 @@ gcloud scheduler jobs delete $JOB_NAME \ --quiet 2>/dev/null || true # Create the scheduler job -# Runs every hour at minute 0 +# Runs every 5 minutes gcloud scheduler jobs create http $JOB_NAME \ --project=$PROJECT_ID \ --location=$REGION \ - --schedule="0 * * * *" \ + --schedule="*/5 * * * *" \ --time-zone="America/New_York" \ --uri="${SERVICE_URL}/" \ --http-method=POST \ From 7f67b12e699f4620f88e9ef315f94f6cf8d7073c Mon Sep 17 00:00:00 2001 From: avireddy0 Date: Tue, 16 Dec 2025 02:52:33 +0000 Subject: [PATCH 04/16] Add GitHub Action for automatic deployment --- .github/workflows/deploy.yml | 47 ++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 .github/workflows/deploy.yml diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml new file mode 100644 index 0000000..540e026 --- /dev/null +++ b/.github/workflows/deploy.yml @@ -0,0 +1,47 @@ +name: Deploy to Cloud Run + +on: + push: + branches: + - main + +env: + PROJECT_ID: claude-mcp-457317 + REGION: us-central1 + SERVICE_NAME: gmail-scraper + SERVICE_ACCOUNT: claude-service-account@claude-mcp-457317.iam.gserviceaccount.com + +jobs: + deploy: + runs-on: ubuntu-latest + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Google Auth + id: auth + uses: google-github-actions/auth@v2 + with: + credentials_json: '${{ secrets.GCP_CREDENTIALS }}' + + - name: Set up Cloud SDK + uses: google-github-actions/setup-gcloud@v2 + + - name: Deploy to Cloud Run + uses: google-github-actions/deploy-cloudrun@v2 + with: + service: ${{ env.SERVICE_NAME }} + region: ${{ env.REGION }} + source: . + env_vars: | + PROJECT_ID=${{ env.PROJECT_ID }} + DATASET_ID=gmail_analytics + TABLE_ID=messages + ADMIN_EMAIL=avi@envsn.com + flags: '--allow-unauthenticated --service-account=${{ env.SERVICE_ACCOUNT }} --timeout=3600 --memory=2Gi --cpu=2 --max-instances=1' + + - name: Update Cloud Scheduler + run: | + chmod +x setup_scheduler.sh + ./setup_scheduler.sh From bdc86fef9b55d3642a1b3523e1985fa7ff44a05b Mon Sep 17 00:00:00 2001 From: avireddy0 Date: Tue, 16 Dec 2025 02:59:01 +0000 Subject: [PATCH 05/16] commit 2 --- .vscode/settings.json | 2 +- gmail_scraper.py | 26 ++++++++++++++------------ 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index 25f8d75..044b5bc 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,5 +1,5 @@ { - "python-envs.defaultEnvManager": "ms-python.python:system", + "python-envs.defaultEnvManager": "ms-python.python:venv", "python-envs.pythonProjects": [], "githubPullRequests.ignoredPullRequestBranches": [ "main" diff --git a/gmail_scraper.py b/gmail_scraper.py index e497568..512d87d 100644 --- a/gmail_scraper.py +++ b/gmail_scraper.py @@ -9,25 +9,25 @@ # Service account configuration SERVICE_ACCOUNT_FILE = os.getenv('SERVICE_ACCOUNT_FILE', 'service-account-key.json') -SCOPES = [ - 'https://www.googleapis.com/auth/gmail.readonly', - 'https://www.googleapis.com/auth/admin.directory.user.readonly', - 'https://www.googleapis.com/auth/bigquery' -] + +# Scopes definitions +BQ_SCOPES = ['https://www.googleapis.com/auth/bigquery'] +GMAIL_SCOPES = ['https://www.googleapis.com/auth/gmail.readonly'] +ADMIN_SCOPES = ['https://www.googleapis.com/auth/admin.directory.user.readonly'] # BigQuery configuration PROJECT_ID = os.getenv('PROJECT_ID', 'claude-mcp-457317') DATASET_ID = os.getenv('DATASET_ID', 'gmail_analytics') TABLE_ID = os.getenv('TABLE_ID', 'messages') -def get_credentials(): - """Get service account credentials.""" +def get_service_account_credentials(scopes): + """Get base service account credentials with specific scopes.""" return service_account.Credentials.from_service_account_file( - SERVICE_ACCOUNT_FILE, scopes=SCOPES) + SERVICE_ACCOUNT_FILE, scopes=scopes) def get_bigquery_client(): - """Get BigQuery client.""" - credentials = get_credentials() + """Get BigQuery client using service account identity.""" + credentials = get_service_account_credentials(BQ_SCOPES) return bigquery.Client(project=PROJECT_ID, credentials=credentials) def ensure_table_exists(client): @@ -222,7 +222,8 @@ def insert_to_bigquery(client, table_ref, rows): def get_all_users(admin_email): """Get all users in the Google Workspace domain.""" - credentials = get_credentials() + # Use Admin SDK scopes for this operation + credentials = get_service_account_credentials(ADMIN_SCOPES) delegated_creds = credentials.with_subject(admin_email) admin_service = build('admin', 'directory_v1', credentials=delegated_creds) @@ -246,7 +247,8 @@ def get_all_users(admin_email): def scrape_user_emails(user_email, query='', max_results=100, existing_ids=None): """Scrape emails for a specific user, skipping already-scraped messages.""" - credentials = get_credentials() + # Use Gmail scopes for this operation + credentials = get_service_account_credentials(GMAIL_SCOPES) delegated_creds = credentials.with_subject(user_email) gmail_service = build('gmail', 'v1', credentials=delegated_creds) From 228e54356930432a0eac06f17fe7d70a2579f49b Mon Sep 17 00:00:00 2001 From: avireddy0 Date: Tue, 16 Dec 2025 03:05:12 +0000 Subject: [PATCH 06/16] Fix bugs: remove duplicate dependency, fix deprecated datetime.utcnow() --- gmail_scraper.py | 6 +++--- requirements.txt | 1 - 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/gmail_scraper.py b/gmail_scraper.py index 512d87d..c07592f 100644 --- a/gmail_scraper.py +++ b/gmail_scraper.py @@ -4,7 +4,7 @@ import json import os import base64 -from datetime import datetime, timedelta +from datetime import datetime, timedelta, timezone from email.utils import parsedate_to_datetime # Service account configuration @@ -206,7 +206,7 @@ def process_message(message, user_email): 'has_attachments': has_attachments, 'attachment_count': attachment_count, 'size_estimate': message.get('sizeEstimate'), - 'scraped_at': datetime.utcnow().isoformat(), + 'scraped_at': datetime.now(timezone.utc).isoformat(), } def insert_to_bigquery(client, table_ref, rows): @@ -372,7 +372,7 @@ def main(query='', max_per_user=100, incremental=True): results['errors'].append(error_msg) results['status'] = 'completed' - results['completed_at'] = datetime.utcnow().isoformat() + results['completed_at'] = datetime.now(timezone.utc).isoformat() except Exception as e: results['status'] = 'failed' diff --git a/requirements.txt b/requirements.txt index 8764e00..066002d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,4 +3,3 @@ google-auth>=2.0.0 google-cloud-bigquery>=3.0.0 functions-framework>=3.0.0 gunicorn>=20.1.0 -google-cloud-bigquery>=3.0.0 From 46c7998f8ef386d8d8e18f383c24d3ace50aa902 Mon Sep 17 00:00:00 2001 From: avireddy0 Date: Sat, 31 Jan 2026 13:19:05 -0500 Subject: [PATCH 07/16] docs: add context-layer usage note --- README.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index b51f8f6..0f368b5 100644 --- a/README.md +++ b/README.md @@ -1 +1,4 @@ -# gmail \ No newline at end of file +# Gmail Scraper (Context Layer Ingestion) + +This service ingests domain-wide Gmail into BigQuery for the Envision context layer. +Operational details and deployment instructions live in `README_CLOUDRUN.md`. From f35498a7d52612cc137137883b03bd0594810a18 Mon Sep 17 00:00:00 2001 From: avireddy0 Date: Mon, 23 Mar 2026 00:10:11 -0400 Subject: [PATCH 08/16] fix(security): multi-stage Dockerfile, non-root user, .dockerignore Gmail scraper was running as root with no .dockerignore (COPY . ./ sent entire repo to Docker daemon). Converted to multi-stage build with non-root user (uid 65532). Added .dockerignore excluding .git, tests, docs, and other non-runtime files. Constraint: functions-framework CMD preserved exactly Confidence: high Scope-risk: narrow Co-Authored-By: Claude Opus 4.6 (1M context) --- .dockerignore | 12 ++++++++++++ Dockerfile | 37 +++++++++++++++++++++++++++---------- 2 files changed, 39 insertions(+), 10 deletions(-) create mode 100644 .dockerignore diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..8440e4d --- /dev/null +++ b/.dockerignore @@ -0,0 +1,12 @@ +.git +__pycache__ +*.pyc +*.md +.env* +.claude +.omc +.planning +tests +docs +deploy +*.log diff --git a/Dockerfile b/Dockerfile index 181ffe0..127474f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,17 +1,34 @@ -# Use the official lightweight Python image. -# https://hub.docker.com/_/python -FROM python:3.10-slim +# syntax=docker/dockerfile:1 -# Allow statements and log messages to immediately appear in the Knative logs -ENV PYTHONUNBUFFERED True +# Stage 1: builder — install dependencies into /app/deps +FROM python:3.10-slim AS builder + +ENV PYTHONUNBUFFERED=True +WORKDIR /app + +COPY requirements.txt . +RUN pip install --no-cache-dir --target=/app/deps -r requirements.txt + + +# Stage 2: runtime — non-root user, copy deps and app code +FROM python:3.10-slim AS runtime + +ENV PYTHONUNBUFFERED=True +ENV APP_HOME=/app +ENV PYTHONPATH=/app/deps -# Copy local code to the container image. -ENV APP_HOME /app WORKDIR $APP_HOME -COPY . ./ -# Install production dependencies. -RUN pip install --no-cache-dir -r requirements.txt +# Copy installed dependencies from builder +COPY --from=builder /app/deps /app/deps + +# Copy application source files +COPY gmail_scraper.py . +COPY main.py . + +# Run as nonroot (uid 65532) +RUN useradd -u 65532 -r -s /sbin/nologin nonroot +USER nonroot # Run the web service on container startup. # Use functions-framework to run the function From cce897717c4e9a544a0b741646e456f224f3115a Mon Sep 17 00:00:00 2001 From: avireddy0 Date: Tue, 24 Mar 2026 00:28:01 -0400 Subject: [PATCH 09/16] feat(ci): pin base image to SHA256 digest for reproducibility python:3.10-slim pinned to @sha256:4ba18b066cee1... in both stages. Confidence: high Scope-risk: narrow Co-Authored-By: Claude Opus 4.6 (1M context) --- Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 127474f..8539b42 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,7 +1,7 @@ # syntax=docker/dockerfile:1 # Stage 1: builder — install dependencies into /app/deps -FROM python:3.10-slim AS builder +FROM python:3.10-slim@sha256:4ba18b066cee17f2696cf9a2ba564d7d5eb05a91d6a949326780aa7c6912160d AS builder ENV PYTHONUNBUFFERED=True WORKDIR /app @@ -11,7 +11,7 @@ RUN pip install --no-cache-dir --target=/app/deps -r requirements.txt # Stage 2: runtime — non-root user, copy deps and app code -FROM python:3.10-slim AS runtime +FROM python:3.10-slim@sha256:4ba18b066cee17f2696cf9a2ba564d7d5eb05a91d6a949326780aa7c6912160d AS runtime ENV PYTHONUNBUFFERED=True ENV APP_HOME=/app From a7ca72b1a91ddc861de5749769a6c00ac3d826ae Mon Sep 17 00:00:00 2001 From: avireddy0 Date: Tue, 24 Mar 2026 01:02:01 -0400 Subject: [PATCH 10/16] security: add --no-log-init to useradd (prevents sparse log files) Co-Authored-By: Claude Opus 4.6 (1M context) --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 8539b42..71a54f7 100644 --- a/Dockerfile +++ b/Dockerfile @@ -27,7 +27,7 @@ COPY gmail_scraper.py . COPY main.py . # Run as nonroot (uid 65532) -RUN useradd -u 65532 -r -s /sbin/nologin nonroot +RUN useradd --no-log-init -u 65532 -r -s /sbin/nologin nonroot USER nonroot # Run the web service on container startup. From 821bd086624209cca65784ef934c61ff8dadbc39 Mon Sep 17 00:00:00 2001 From: avireddy0 Date: Wed, 25 Mar 2026 02:31:11 -0400 Subject: [PATCH 11/16] fix(ci): switch from SA key to WIF auth for Cloud Run deploy Org policy blocks SA key creation. Switch to Workload Identity Federation using existing github pool in claude-mcp-457317. Added id-token: write permission required for OIDC token exchange. Confidence: high Scope-risk: narrow Co-Authored-By: Claude Opus 4.6 (1M context) --- .github/workflows/deploy.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 540e026..1affb4f 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -14,6 +14,9 @@ env: jobs: deploy: runs-on: ubuntu-latest + permissions: + contents: read + id-token: write steps: - name: Checkout @@ -23,7 +26,8 @@ jobs: id: auth uses: google-github-actions/auth@v2 with: - credentials_json: '${{ secrets.GCP_CREDENTIALS }}' + workload_identity_provider: ${{ secrets.WIF_PROVIDER }} + service_account: claude-service-account@claude-mcp-457317.iam.gserviceaccount.com - name: Set up Cloud SDK uses: google-github-actions/setup-gcloud@v2 From d8d4cb058e3d6f627510b72a9756bdb9bacb8ff5 Mon Sep 17 00:00:00 2001 From: avireddy0 Date: Wed, 25 Mar 2026 02:41:19 -0400 Subject: [PATCH 12/16] ci: re-trigger deploy with updated WIF IAM binding Co-Authored-By: Claude Opus 4.6 (1M context) From 18ef92783c141a67c3fea4e40f87e61b0dccc2df Mon Sep 17 00:00:00 2001 From: avireddy0 Date: Thu, 26 Mar 2026 01:04:16 -0400 Subject: [PATCH 13/16] fix(ci): add startup-cpu-boost for faster container startup Container was failing health check with failureThreshold:1. Adding startup-cpu-boost gives the container more CPU during initialization to start functions-framework before the probe timeout. Confidence: high Scope-risk: narrow Co-Authored-By: Claude Opus 4.6 (1M context) --- .github/workflows/deploy.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 1affb4f..c304a47 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -43,7 +43,7 @@ jobs: DATASET_ID=gmail_analytics TABLE_ID=messages ADMIN_EMAIL=avi@envsn.com - flags: '--allow-unauthenticated --service-account=${{ env.SERVICE_ACCOUNT }} --timeout=3600 --memory=2Gi --cpu=2 --max-instances=1' + flags: '--allow-unauthenticated --service-account=${{ env.SERVICE_ACCOUNT }} --timeout=3600 --memory=2Gi --cpu=2 --max-instances=1 --startup-cpu-boost' - name: Update Cloud Scheduler run: | From c0502117b409bccd1b2d70bd397fb611b115c4e6 Mon Sep 17 00:00:00 2001 From: avireddy0 Date: Thu, 26 Mar 2026 01:06:23 -0400 Subject: [PATCH 14/16] fix(ci): add /app/deps/bin to PATH for functions-framework binary pip install --target=/app/deps puts binaries in /app/deps/bin/ which isn't on PATH. Container failed with 'functions-framework: not found'. Confidence: high Scope-risk: narrow Co-Authored-By: Claude Opus 4.6 (1M context) --- Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/Dockerfile b/Dockerfile index 71a54f7..c3ab170 100644 --- a/Dockerfile +++ b/Dockerfile @@ -16,6 +16,7 @@ FROM python:3.10-slim@sha256:4ba18b066cee17f2696cf9a2ba564d7d5eb05a91d6a94932678 ENV PYTHONUNBUFFERED=True ENV APP_HOME=/app ENV PYTHONPATH=/app/deps +ENV PATH="/app/deps/bin:${PATH}" WORKDIR $APP_HOME From 6c13fb4b402dd7cee1f19e7563f2f8ec6d22d667 Mon Sep 17 00:00:00 2001 From: avireddy0 Date: Thu, 26 Mar 2026 01:13:26 -0400 Subject: [PATCH 15/16] =?UTF-8?q?fix(ci):=20correct=20flag=20--startup-cpu?= =?UTF-8?q?-boost=20=E2=86=92=20--cpu-boost?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Opus 4.6 (1M context) --- .github/workflows/deploy.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index c304a47..ca7a797 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -43,7 +43,7 @@ jobs: DATASET_ID=gmail_analytics TABLE_ID=messages ADMIN_EMAIL=avi@envsn.com - flags: '--allow-unauthenticated --service-account=${{ env.SERVICE_ACCOUNT }} --timeout=3600 --memory=2Gi --cpu=2 --max-instances=1 --startup-cpu-boost' + flags: '--allow-unauthenticated --service-account=${{ env.SERVICE_ACCOUNT }} --timeout=3600 --memory=2Gi --cpu=2 --max-instances=1 --cpu-boost' - name: Update Cloud Scheduler run: | From dee6b4e699758a07e1468616a8adc16b2c224c92 Mon Sep 17 00:00:00 2001 From: avireddy0 Date: Thu, 26 Mar 2026 01:18:37 -0400 Subject: [PATCH 16/16] fix(ci): reduce scheduler attempt-deadline from 3600s to 1800s (max 30m) Cloud Scheduler enforces max attempt_deadline of 30 minutes. Co-Authored-By: Claude Opus 4.6 (1M context) --- setup_scheduler.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup_scheduler.sh b/setup_scheduler.sh index 7ffbb4d..d98c999 100755 --- a/setup_scheduler.sh +++ b/setup_scheduler.sh @@ -50,7 +50,7 @@ gcloud scheduler jobs create http $JOB_NAME \ --http-method=POST \ --headers="Content-Type=application/json" \ --message-body='{"incremental": true, "max_per_user": 100}' \ - --attempt-deadline=3600s \ + --attempt-deadline=1800s \ --description="Hourly incremental Gmail scrape to BigQuery" echo ""