Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
229 changes: 206 additions & 23 deletions .github/workflows/sync-cloud-run-env.yml
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ env:
GCP_WORKLOAD_IDENTITY_PROVIDER: projects/252919773759/locations/global/workloadIdentityPools/github-actions/providers/github-main
GCP_WORKLOAD_IDENTITY_SERVICE_ACCOUNT: longbridge-platform-deploy@longbridgequant.iam.gserviceaccount.com
GCP_RUNTIME_SERVICE_ACCOUNT: longbridge-platform-runtime@longbridgequant.iam.gserviceaccount.com
GCP_SCHEDULER_SERVICE_ACCOUNT: longbridge-platform-scheduler@longbridgequant.iam.gserviceaccount.com
GCP_ARTIFACT_REGISTRY_REPOSITORY: cloud-run-source-deploy

concurrency:
Expand Down Expand Up @@ -99,11 +100,13 @@ jobs:
# Set CLOUD_RUN_REGION per Environment so paper/HK/SG can target different regions.
CLOUD_RUN_REGION: ${{ vars.CLOUD_RUN_REGION }}
CLOUD_RUN_SERVICE: ${{ vars.CLOUD_RUN_SERVICE }}
CLOUD_RUN_SERVICE_TARGETS_JSON: ${{ vars.CLOUD_RUN_SERVICE_TARGETS_JSON }}
CLOUD_RUN_ENV_SYNC_WAIT_FOR_COMMIT: ${{ vars.CLOUD_RUN_ENV_SYNC_WAIT_FOR_COMMIT }}
CLOUD_SCHEDULER_LOCATION: ${{ vars.CLOUD_SCHEDULER_LOCATION }}
CLOUD_SCHEDULER_MAIN_TIME: ${{ vars.CLOUD_SCHEDULER_MAIN_TIME }}
CLOUD_SCHEDULER_PROBE_TIME: ${{ vars.CLOUD_SCHEDULER_PROBE_TIME }}
CLOUD_SCHEDULER_PRECHECK_TIME: ${{ vars.CLOUD_SCHEDULER_PRECHECK_TIME }}
MONITOR_DISPATCHER_OWNER_LABEL: ${{ vars.MONITOR_DISPATCHER_OWNER_LABEL || 'SG' }}
ACCOUNT_PREFIX: ${{ vars.ACCOUNT_PREFIX }}
TELEGRAM_TOKEN_SECRET_NAME: ${{ vars.TELEGRAM_TOKEN_SECRET_NAME }}
LONGPORT_APP_KEY_SECRET_NAME: ${{ vars.LONGPORT_APP_KEY_SECRET_NAME }}
Expand Down Expand Up @@ -961,6 +964,89 @@ jobs:
remove_env_vars+=("RUNTIME_TARGET_ENABLED")
fi

monitor_targets_json="$(
python - <<'PY'
import json
import os
import subprocess

def decode_json(raw, fallback):
raw = str(raw or "").strip()
if not raw:
return fallback
try:
return json.loads(raw)
except json.JSONDecodeError:
return fallback

def runtime_target_from(source):
runtime_target = source.get("runtime_target") or source.get("runtime_target_json")
if isinstance(runtime_target, str):
runtime_target = decode_json(runtime_target, {})
return runtime_target if isinstance(runtime_target, dict) else {}

raw_targets = decode_json(os.environ.get("CLOUD_RUN_SERVICE_TARGETS_JSON"), None)
if isinstance(raw_targets, dict):
source_targets = raw_targets.get("targets")
else:
source_targets = raw_targets
if not isinstance(source_targets, list) or not source_targets:
source_targets = [
{
"service": os.environ.get("CLOUD_RUN_SERVICE"),
"region": os.environ.get("CLOUD_RUN_REGION"),
"runtime_target": decode_json(os.environ.get("RUNTIME_TARGET_JSON"), {}),
"runtime_target_enabled": os.environ.get("RUNTIME_TARGET_ENABLED", "true"),
}
]

project = os.environ.get("GCP_PROJECT_ID", "").strip()
output = []
for source in source_targets:
if not isinstance(source, dict):
continue
runtime_target = runtime_target_from(source)
service_name = (
source.get("service")
or source.get("service_name")
or source.get("cloud_run_service")
or runtime_target.get("service_name")
)
region = source.get("region") or source.get("cloud_run_region") or os.environ.get("CLOUD_RUN_REGION")
service_name = str(service_name or "").strip()
region = str(region or "").strip()
if not service_name or not region:
continue
service_url = subprocess.check_output(
[
"gcloud",
"run",
"services",
"describe",
service_name,
f"--project={project}",
f"--region={region}",
"--format=value(status.url)",
],
text=True,
).strip()
if not service_url:
continue
output.append(
{
"service_name": service_name,
"service_url": service_url,
"strategy_profile": runtime_target.get("strategy_profile") or source.get("STRATEGY_PROFILE"),
"account_scope": runtime_target.get("account_scope") or source.get("ACCOUNT_REGION") or source.get("account_scope"),
"runtime_target_enabled": source.get("runtime_target_enabled", source.get("RUNTIME_TARGET_ENABLED", True)),
"scheduler": runtime_target.get("scheduler") if isinstance(runtime_target.get("scheduler"), dict) else {},

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Populate fallback monitor schedules

When CLOUD_RUN_SERVICE_TARGETS_JSON contains the minimal target entries supported elsewhere (for example {"service":"svc-a"}), or when the generated RUNTIME_TARGET_JSON has no scheduler, this writes {} into MONITOR_DISPATCH_TARGETS_JSON. The new dispatcher only calls /probe and /dry-run when probe_time/precheck_time are present, and this workflow later deletes the legacy probe/precheck jobs, so default-configured services silently stop receiving monitor checks despite the CLOUD_SCHEDULER_PROBE_TIME/CLOUD_SCHEDULER_PRECHECK_TIME defaults. Please include the resolved fallback schedule in each monitor target before removing the old jobs.

Useful? React with 👍 / 👎.

}
)
print(json.dumps({"targets": output}, separators=(",", ":")))
PY
)"
env_pairs+=("MONITOR_DISPATCH_TARGETS_JSON=${monitor_targets_json}")

gcloud_args=(
run services update "${CLOUD_RUN_SERVICE}"
--region "${CLOUD_RUN_REGION}"
Expand Down Expand Up @@ -1042,33 +1128,42 @@ jobs:
exit 1
fi

for suffix in scheduler probe-scheduler precheck-scheduler; do
job_name="${CLOUD_RUN_SERVICE}-${suffix}"
case "${suffix}" in
scheduler)
schedule_time="${main_time}"
scheduler_path="/run"
;;
probe-scheduler)
schedule_time="${probe_time}"
scheduler_path="/probe"
;;
precheck-scheduler)
schedule_time="${precheck_time}"
scheduler_path="/dry-run"
;;
esac

current_schedule="$(gcloud scheduler jobs describe "${job_name}" \
gcloud run services add-iam-policy-binding "${CLOUD_RUN_SERVICE}" \
--project="${GCP_PROJECT_ID}" \
--region="${CLOUD_RUN_REGION}" \
--member="serviceAccount:${GCP_SCHEDULER_SERVICE_ACCOUNT}" \
--role="roles/run.invoker" \
--quiet
gcloud run services add-iam-policy-binding "${CLOUD_RUN_SERVICE}" \
--project="${GCP_PROJECT_ID}" \
--region="${CLOUD_RUN_REGION}" \
--member="serviceAccount:${GCP_RUNTIME_SERVICE_ACCOUNT}" \
--role="roles/run.invoker" \
--quiet

scheduler_job_candidates=("${CLOUD_RUN_SERVICE}-scheduler")
if [[ "${CLOUD_RUN_SERVICE}" == *-service ]]; then
scheduler_job_candidates+=("${CLOUD_RUN_SERVICE%-service}-scheduler")
fi

job_name=""
current_schedule=""
for candidate_job in "${scheduler_job_candidates[@]}"; do
current_schedule="$(gcloud scheduler jobs describe "${candidate_job}" \
--project="${GCP_PROJECT_ID}" \
--location="${scheduler_location}" \
--format='value(schedule)' 2>/dev/null || true)"
if [ -z "${current_schedule}" ]; then
echo "Cloud Scheduler job ${job_name} was not found in ${scheduler_location}; skipping schedule sync."
continue
if [ -n "${current_schedule}" ]; then
job_name="${candidate_job}"
break
fi
done
if [ -z "${job_name}" ]; then
job_name="${scheduler_job_candidates[0]}"
fi

desired_schedule="$(CURRENT_SCHEDULE="${current_schedule}" SCHEDULE_TIME="${schedule_time}" python - <<'PY'
if [ -n "${current_schedule}" ]; then
desired_schedule="$(CURRENT_SCHEDULE="${current_schedule}" SCHEDULE_TIME="${main_time}" python - <<'PY'
import os

current_fields = os.environ["CURRENT_SCHEDULE"].split()
Expand All @@ -1085,8 +1180,25 @@ jobs:
)
PY
)"
else
desired_schedule="$(SCHEDULE_TIME="${main_time}" python - <<'PY'
import os

fields = os.environ["SCHEDULE_TIME"].split()
if len(fields) == 5:
print(" ".join(fields))
elif len(fields) == 2:
print(" ".join([*fields, "*", "*", "*"]))
else:
raise SystemExit(
f"Cloud Scheduler override must have 2 time fields or 5 cron fields: {os.environ['SCHEDULE_TIME']!r}"
)
PY
)"
fi

scheduler_uri="${service_url}${scheduler_path}"
scheduler_uri="${service_url}/run"
if [ -n "${current_schedule}" ]; then
echo "Updating Cloud Scheduler job ${job_name} schedule to ${desired_schedule}, timezone to ${market_timezone}, and URI to ${scheduler_uri}."
gcloud scheduler jobs update http "${job_name}" \
--project="${GCP_PROJECT_ID}" \
Expand All @@ -1095,6 +1207,77 @@ jobs:
--schedule="${desired_schedule}" \
--time-zone="${market_timezone}" \
--quiet
else
echo "Creating Cloud Scheduler job ${job_name} schedule ${desired_schedule}, timezone ${market_timezone}, and URI ${scheduler_uri}."
gcloud scheduler jobs create http "${job_name}" \
--project="${GCP_PROJECT_ID}" \
--location="${scheduler_location}" \
--uri="${scheduler_uri}" \
--http-method=POST \
--oidc-service-account-email="${GCP_SCHEDULER_SERVICE_ACCOUNT}" \
--oidc-token-audience="${service_url}" \
--schedule="${desired_schedule}" \
--time-zone="${market_timezone}" \
--attempt-deadline=600s \
--quiet
fi

if [ "${DEPLOYMENT_LABEL:-}" = "${MONITOR_DISPATCHER_OWNER_LABEL:-SG}" ]; then
monitor_job_name="longbridge-monitor-dispatcher-scheduler"
monitor_uri="${service_url}/monitor-dispatch"
if gcloud scheduler jobs describe "${monitor_job_name}" \
--project="${GCP_PROJECT_ID}" \
--location="${scheduler_location}" >/dev/null 2>&1; then
echo "Updating Cloud Scheduler job ${monitor_job_name} to ${monitor_uri}."
gcloud scheduler jobs update http "${monitor_job_name}" \
--project="${GCP_PROJECT_ID}" \
--location="${scheduler_location}" \
--uri="${monitor_uri}" \
--http-method=POST \
--oidc-service-account-email="${GCP_SCHEDULER_SERVICE_ACCOUNT}" \
--oidc-token-audience="${service_url}" \
--schedule="*/5 * * * *" \
--time-zone="UTC" \
--attempt-deadline=180s \
--quiet
else
echo "Creating Cloud Scheduler job ${monitor_job_name} at ${monitor_uri}."
gcloud scheduler jobs create http "${monitor_job_name}" \
--project="${GCP_PROJECT_ID}" \
--location="${scheduler_location}" \
--uri="${monitor_uri}" \
--http-method=POST \
--oidc-service-account-email="${GCP_SCHEDULER_SERVICE_ACCOUNT}" \
--oidc-token-audience="${service_url}" \
--schedule="*/5 * * * *" \
--time-zone="UTC" \
--attempt-deadline=180s \
--quiet
fi
else
echo "Skipping shared LongBridge monitor dispatcher scheduler from ${DEPLOYMENT_LABEL}; owner is ${MONITOR_DISPATCHER_OWNER_LABEL:-SG}."
fi

legacy_jobs=(
"${CLOUD_RUN_SERVICE}-probe-scheduler"
"${CLOUD_RUN_SERVICE}-precheck-scheduler"
)
if [[ "${CLOUD_RUN_SERVICE}" == *-service ]]; then
legacy_jobs+=(
"${CLOUD_RUN_SERVICE%-service}-probe-scheduler"
"${CLOUD_RUN_SERVICE%-service}-precheck-scheduler"
)
fi
for legacy_job in "${legacy_jobs[@]}"; do
if gcloud scheduler jobs describe "${legacy_job}" \
--project="${GCP_PROJECT_ID}" \
--location="${scheduler_location}" >/dev/null 2>&1; then
echo "Deleting legacy Cloud Scheduler job ${legacy_job}; monitor dispatcher now owns probe/precheck."
gcloud scheduler jobs delete "${legacy_job}" \
--project="${GCP_PROJECT_ID}" \
--location="${scheduler_location}" \
--quiet
fi
done

- name: Prune old Cloud Run revisions
Expand Down
Loading