Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 59 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
name: CI

on:
pull_request:

jobs:
python-check:
runs-on: ubuntu-latest
strategy:
matrix:
module:
- backend
- autoscaler
- load_balancer
steps:
- name: Checkout repository
uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.11'

- name: Compile Python files
run: python -m compileall ${{ matrix.module }}

frontend-build:
runs-on: ubuntu-latest
defaults:
run:
working-directory: fe
steps:
- name: Checkout repository
uses: actions/checkout@v4

- name: Set up Node.js
uses: actions/setup-node@v4
with:
node-version: '20'
cache: 'npm'
cache-dependency-path: fe/package-lock.json

- name: Install dependencies
run: npm ci

- name: Lint frontend
run: npm run lint

- name: Build frontend
run: npm run build

docker-compose-check:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4

- name: Validate docker compose configuration
run: docker compose config
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
.idea/
__pycache__/
*.py[cod]
185 changes: 22 additions & 163 deletions autoscaler/autoscaler.py
Original file line number Diff line number Diff line change
@@ -1,175 +1,34 @@
import time
import logging
import os
import multiprocessing
import signal, sys, docker, json
import requests
from metrics import PrometheusClient, DockerManager, clear_prometheus_targets

from cleanup import register_signal_handlers
from config import load_settings
from scaler import AutoScaler
from targets import clear_prometheus_targets

class AutoScaler:
def __init__(
self,
prom_url: str,
docker_image: str,
label: str = 'autoscale_service',
cpu_threshold: float = 0.7,
min_instances: int = 1,
max_instances: int = 10,
check_interval: int = 10,
load_balancer_url: str = "http://host.docker.internal:8000"
):
self.prom = PrometheusClient(prom_url)
self.dock = DockerManager()
self.image = docker_image
self.label = label
self.threshold = cpu_threshold
self.min = min_instances
self.max = max_instances
self.interval = check_interval

self.above_since = None
self.below_since = None

self.load_balancer_url = load_balancer_url

def notify_load_balancer(self):
"""로드밸런서에 서버 목록 갱신 요청"""
try:
refresh_url = f"{self.load_balancer_url}/refresh-servers"

response = requests.post(refresh_url, timeout=3)

if response.status_code == 200:
logging.info("✅ Load balancer server refresh triggered")
else:
logging.warning(f"⚠️ Load balancer refresh failed: {response.status_code}")

except requests.exceptions.ConnectionError:
logging.warning("🔌 Could not connect to load balancer")
except requests.exceptions.Timeout:
logging.warning("⏰ Load balancer request timed out")
except Exception as e:
logging.error(f"❗ Error notifying load balancer: {e}")

def scale(self) -> None:
containers = self.dock.list_containers(self.label)
autoscaled_containers = [c for c in containers if not self.dock._is_fixed(c)]
count = len(containers)

# under min instances
if count < self.min:
logging.info(f"Instances below minimum ({count} < {self.min}). Scaling up.")
self.dock.run_container(self.image, self.label)
#서버 갱신 요청
self.notify_load_balancer()

self.above_since = None
self.below_since = None
return

num_cpus = multiprocessing.cpu_count()
usages = [self.dock.get_container_cpu(c) for c in containers]
raw_avg = sum(usages) / count if usages else 0.0
avg_cpu = raw_avg / num_cpus

logging.info(
f"Avg CPU: {avg_cpu:.2f}% (per core) "
f"across {count} containers"
)

now = time.time()

if avg_cpu > (self.threshold*100):
if self.above_since is None:
self.above_since = now
logging.debug("CPU above threshold, starting timer for scale-out.")
elif now - self.above_since >= 30 and count < self.max:
logging.info("CPU above threshold for ≥ 2 minutes. Scaling up by 1.")
self.dock.run_container(self.image, self.label)

# 서버 갱신 요청
self.notify_load_balancer()

self.above_since = None
self.below_since = None
else:
self.above_since = None

if avg_cpu < (self.threshold * 50):
if self.below_since is None:
self.below_since = now
logging.debug("CPU below half-threshold, starting timer for scale-in.")
elif now - self.below_since >= 15 and len(autoscaled_containers) > 0:
target = autoscaled_containers[-1]
logging.info(f"CPU below half-threshold for ≥ 1 minute. Scaling down container: {target.name}")
self.dock.remove_container(target)

# 서버 갱신 요청
self.notify_load_balancer()

self.above_since = None
self.below_since = None
elif now - self.below_since >= 30:
logging.info("CPU below half-threshold, but no removable container found (all fixed).")
else:
self.below_since = None

def run(self) -> None:
logging.info("Starting AutoScaler loop.")
while True:
try:
self.scale()
except Exception as e:
logging.error(f"Error during scaling: {e}")
time.sleep(self.interval)


if __name__ == '__main__':
def main():
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s [%(levelname)s] %(message)s',
datefmt='%Y-%m-%d %H:%M:%S'
format="%(asctime)s [%(levelname)s] %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
)
clear_prometheus_targets()
prom_url = os.getenv('PROM_URL', 'http://localhost:9090')
docker_img = os.getenv('DOCKER_IMAGE', '')
min_i = int(os.getenv('MIN_INSTANCES', 1))
max_i = int(os.getenv('MAX_INSTANCES', 10))
cpu_th = float(os.getenv('CPU_THRESHOLD', 0.7))
interval = int(os.getenv('CHECK_INTERVAL', 30))

def graceful_shutdown(signum, frame):
print("📦 Shutting down autoscaler...")

# 1. Docker에서 autoscale_service-* 컨테이너 삭제
client = docker.from_env()
for container in client.containers.list(all=True):
if container.name.startswith("autoscale_service-"):
print(f"🗑 Removing container {container.name}")
try:
container.remove(force=True)
except Exception as e:
print(f"❌ Failed to remove {container.name}: {e}")

# 2. flask.json 초기화
try:
flask_json_path = "/app/prometheus/targets/flask.json"
if os.path.exists(flask_json_path):
with open(flask_json_path, "w") as f:
json.dump([], f)
print("🧹 flask.json cleared")
except Exception as e:
print(f"❌ Failed to clear flask.json: {e}")

sys.exit(0)
register_signal_handlers()
clear_prometheus_targets()

settings = load_settings()
scaler = AutoScaler(
prom_url,
docker_img,
min_instances=min_i,
max_instances=max_i,
cpu_threshold=cpu_th,
check_interval=interval
prom_url=settings.prom_url,
docker_image=settings.docker_image,
label=settings.label,
min_instances=settings.min_instances,
max_instances=settings.max_instances,
cpu_threshold=settings.cpu_threshold,
check_interval=settings.check_interval,
load_balancer_url=settings.load_balancer_url,
)
scaler.run()


if __name__ == "__main__":
main()
35 changes: 35 additions & 0 deletions autoscaler/cleanup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import signal
import sys

import docker
from targets import FLASK_TARGET_PATH, clear_prometheus_targets


def cleanup_autoscaled_containers():
client = docker.from_env()
for container in client.containers.list(all=True):
if container.name.startswith("autoscale_service-"):
print(f"Removing container {container.name}")
Comment on lines +8 to +12
Copy link

Copilot AI Apr 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Cleanup removes containers only when the name starts with the hard-coded prefix autoscale_service-, but DockerManager names containers using the configurable label (and the label can differ via env). Consider basing this on the configured label or on container labels (e.g., autoscale_service) to avoid leaving autoscaled containers behind when AUTOSCALE_LABEL changes.

Copilot uses AI. Check for mistakes.
try:
container.remove(force=True)
except Exception as error:
print(f"Failed to remove {container.name}: {error}")


def clear_local_target_file():
clear_prometheus_targets()
print(f"Cleared target file: {FLASK_TARGET_PATH}")


def register_signal_handlers():
def graceful_shutdown(signum, frame):
print("Shutting down autoscaler...")
cleanup_autoscaled_containers()
try:
clear_local_target_file()
except Exception as error:
print(f"Failed to clear flask.json: {error}")
sys.exit(0)

signal.signal(signal.SIGINT, graceful_shutdown)
signal.signal(signal.SIGTERM, graceful_shutdown)
30 changes: 30 additions & 0 deletions autoscaler/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import os
from dataclasses import dataclass


@dataclass
class AutoScalerSettings:
prom_url: str
docker_image: str
label: str
cpu_threshold: float
min_instances: int
max_instances: int
check_interval: int
load_balancer_url: str


def load_settings() -> AutoScalerSettings:
return AutoScalerSettings(
prom_url=os.getenv("PROM_URL", "http://localhost:9090"),
docker_image=os.getenv("DOCKER_IMAGE", ""),
label=os.getenv("AUTOSCALE_LABEL", "autoscale_service"),
cpu_threshold=float(os.getenv("CPU_THRESHOLD", 0.7)),
min_instances=int(os.getenv("MIN_INSTANCES", 1)),
max_instances=int(os.getenv("MAX_INSTANCES", 10)),
check_interval=int(os.getenv("CHECK_INTERVAL", 30)),
load_balancer_url=os.getenv(
"LOAD_BALANCER_URL",
"http://host.docker.internal:8000",
),
)
Loading
Loading