Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 38 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
name: CI - Build and Push QuickTicket Images

on:
push:
branches: [ main ]

jobs:
build-and-push:
runs-on: ubuntu-latest
permissions:
packages: write
contents: write

steps:
- name: Checkout code
uses: actions/checkout@v4

- name: Log in to GitHub Container Registry
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}

- name: Build and push gateway
run: |
docker build -t ghcr.io/${{ github.actor }}/quickticket-gateway:${{ github.sha }} ./app/gateway
docker push ghcr.io/${{ github.actor }}/quickticket-gateway:${{ github.sha }}

- name: Build and push events
run: |
docker build -t ghcr.io/${{ github.actor }}/quickticket-events:${{ github.sha }} ./app/events
docker push ghcr.io/${{ github.actor }}/quickticket-events:${{ github.sha }}

- name: Build and push payments
run: |
docker build -t ghcr.io/${{ github.actor }}/quickticket-payments:${{ github.sha }} ./app/payments
docker push ghcr.io/${{ github.actor }}/quickticket-payments:${{ github.sha }}
10 changes: 10 additions & 0 deletions app/events/.dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
__pycache__
*.pyc
*.pyo
.git
.gitignore
.env
README.md
*.md
.vscode
__MACOSX
10 changes: 10 additions & 0 deletions app/gateway/.dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
__pycache__
*.pyc
*.pyo
.git
.gitignore
.env
README.md
*.md
.vscode
__MACOSX
4 changes: 3 additions & 1 deletion app/gateway/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@ FROM python:3.13-slim
WORKDIR /app
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
RUN addgroup --system app && adduser --system --ingroup app app
COPY main.py .

RUN chown -R app:app /app
USER app
EXPOSE 8080
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8080"]
29 changes: 16 additions & 13 deletions app/gateway/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -310,14 +310,10 @@ async def _notify_order_confirmed(reservation_id: str):
log.warning(f"notify failed (non-critical) order={reservation_id} err={e}")


@app.post("/reserve/{reservation_id}/pay")
@app.post("/reserve/{reservation_id}/pay")
async def pay_reservation(reservation_id: str):
# 1. Call payments — wrapped in circuit breaker + retry.
#
# Composition order matters: cb.call(retry(_charge)) means each CB-tracked
# invocation includes its retries internally; the CB only sees the FINAL
# outcome. The reverse — retry(cb.call(_charge)) — would retry past the
# CircuitOpenError, defeating the fast-fail. See lab 11 §11.4.
"""Pay for reservation with graceful degradation when payments service is down."""
async def _charge():
resp = await client.post(
f"{PAYMENTS_URL}/charge",
Expand All @@ -327,20 +323,27 @@ async def _charge():
return resp

try:
# Try to call payments with circuit breaker + retry
pay_resp = await payments_cb.call(lambda: call_with_retry(_charge, target="payments"))
payment_ref = pay_resp.json().get("payment_ref", "unknown")
except CircuitOpenError:
log.error("circuit open, skipping payments call")
raise HTTPException(503, "Payment service temporarily unavailable (circuit open)")
except httpx.TimeoutException:
raise HTTPException(504, "Payment service timeout")
except (CircuitOpenError, httpx.ConnectError, httpx.TimeoutException, httpx.RequestError) as e:
# === GRACEFUL DEGRADATION ===
log.warning(f"Payments service unavailable for reservation {reservation_id}: {e}")
return JSONResponse(
status_code=503,
content={
"error": "payments_unavailable",
"message": "Payment service is temporarily down. Your reservation is held — try again in a few minutes.",
"reservation_id": reservation_id
}
)
except httpx.HTTPStatusError as e:
raise HTTPException(e.response.status_code, "Payment failed")
except Exception as e:
log.error(f"payment error: {e}")
raise HTTPException(502, "Payment service unavailable")

# 2. Confirm reservation in events.
# 2. Confirm reservation in events (only if payment succeeded)
try:
confirm_resp = await client.post(
f"{EVENTS_URL}/reservations/{reservation_id}/confirm",
Expand All @@ -352,7 +355,7 @@ async def _charge():
log.error(f"confirm error after payment: {e}")
raise HTTPException(500, "Payment succeeded but confirmation failed — contact support")

# 3. Fire-and-forget notify (don't await → don't add latency, don't fail user).
# 3. Fire-and-forget notify
asyncio.create_task(_notify_order_confirmed(reservation_id))

return result
10 changes: 10 additions & 0 deletions app/payments/.dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
__pycache__
*.pyc
*.pyo
.git
.gitignore
.env
README.md
*.md
.vscode
__MACOSX
2 changes: 1 addition & 1 deletion docker-compose.monitoring.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@ services:
- "9090:9090"
volumes:
- ../monitoring/prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro
- ../monitoring/prometheus/rules.yml:/etc/prometheus/rules.yml:ro # ← добавь эту строку
command:
- "--config.file=/etc/prometheus/prometheus.yml"
- "--storage.tsdb.retention.time=7d"

grafana:
image: grafana/grafana:13.0.1
ports:
Expand Down
18 changes: 18 additions & 0 deletions k8s/analysis-template.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
apiVersion: argoproj.io/v1alpha1
kind: AnalysisTemplate
metadata:
name: gateway-error-rate
spec:
metrics:
- name: error-rate
interval: 30s
count: 5
successCondition: result[0] < 0.1 # less than 10% error rate
failureLimit: 2
provider:
prometheus:
address: http://prometheus:9090
query: |
sum(rate(gateway_requests_total{status=~"5..", rs_hash="{{args.canary-hash}}"}[1m]))
/
sum(rate(gateway_requests_total{rs_hash="{{args.canary-hash}}"}[1m]))
42 changes: 42 additions & 0 deletions k8s/gateway.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
apiVersion: argoproj.io/v1alpha1
kind: Rollout
metadata:
name: gateway
spec:
replicas: 5
strategy:
canary:
steps:
- setWeight: 20
- pause: {duration: 30s}
- analysis:
templates:
- templateName: gateway-error-rate
args:
- name: canary-hash
valueFrom:
podTemplateHashValue: Latest
- setWeight: 60
- pause: {duration: 30s}
- setWeight: 100
selector:
matchLabels:
app: gateway
template:
metadata:
labels:
app: gateway
spec:
containers:
- name: gateway
image: quickticket-gateway:v1
imagePullPolicy: Never
ports:
- containerPort: 8080
env:
- name: EVENTS_URL
value: "http://events:8081"
- name: PAYMENTS_URL
value: "http://payments:8082"
- name: APP_VERSION
value: "v4-auto-analysis"
37 changes: 37 additions & 0 deletions k8s/postgres.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: postgres
spec:
replicas: 1
selector:
matchLabels:
app: postgres
template:
metadata:
labels:
app: postgres
spec:
containers:
- name: postgres
image: postgres:17-alpine
ports:
- containerPort: 5432
env:
- name: POSTGRES_DB
value: quickticket
- name: POSTGRES_USER
value: quickticket
- name: POSTGRES_PASSWORD
value: quickticket
---
apiVersion: v1
kind: Service
metadata:
name: postgres
spec:
selector:
app: postgres
ports:
- port: 5432
targetPort: 5432
23 changes: 23 additions & 0 deletions monitoring/prometheus/prometheus.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
global:
scrape_interval: 15s
evaluation_interval: 15s

rule_files:
- "rules.yml"

scrape_configs:
- job_name: 'gateway'
static_configs:
- targets: ['gateway:8080']

- job_name: 'events'
static_configs:
- targets: ['events:8081']

- job_name: 'payments'
static_configs:
- targets: ['payments:8082']

- job_name: 'prometheus'
static_configs:
- targets: ['localhost:9090']
12 changes: 12 additions & 0 deletions monitoring/prometheus/rules.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
groups:
- name: quickticket_slo_rules
interval: 30s
rules:
- record: gateway:sli_availability:ratio_rate5m
expr: sum(rate(gateway_requests_total{status!~"5.."}[5m])) / sum(rate(gateway_requests_total[5m]))

- record: gateway:sli_latency_500ms:ratio_rate5m
expr: sum(rate(gateway_request_duration_seconds_bucket{le="0.5"}[5m])) / sum(rate(gateway_request_duration_seconds_count[5m]))

- record: gateway:error_budget_burn_rate:ratio_rate5m
expr: (1 - gateway:sli_availability:ratio_rate5m) / (1 - 0.995)
Loading