From 0066c088597f69e1ea09e005ab6194ef8fa8f30d Mon Sep 17 00:00:00 2001
From: Bihan  Rana <bihan@Bihans-MacBook-Pro.local>
Date: Wed, 28 Jan 2026 21:06:29 +0545
Subject: [PATCH 1/2] Add Replica Groups Docs

---
 docs/docs/concepts/services.md | 53 ++++++++++++++++++++++++++++++++++
 1 file changed, 53 insertions(+)
diff --git a/docs/docs/concepts/services.md b/docs/docs/concepts/services.md
index 745f78e3f0..a400f79a47 100644
--- a/docs/docs/concepts/services.md
+++ b/docs/docs/concepts/services.md
@@ -164,6 +164,59 @@ Setting the minimum number of replicas to `0` allows the service to scale down t
 
 > The `scaling` property requires creating a [gateway](gateways.md).
 
+### Replica Groups
+
+Replica groups let you define multiple groups of replicas within a single service. Each group can define its own replica count, autoscaling rules, resource requirements, and commands.
+
+<div editor-title="service.dstack.yml"> 
+
+```yaml
+type: service
+name: replica-groups
+image: lmsysorg/sglang:latest
+
+env:
+  - MODEL_ID=deepseek-ai/DeepSeek-R1-Distill-Llama-8B
+
+replicas:
+  - count: 1..2
+    scaling:
+      metric: rps
+      target: 10
+    commands:
+      - |
+          python -m sglang.launch_server \
+            --model-path $MODEL_ID \
+            --port 8000 \
+            --trust-remote-code
+
+    resources:
+      gpu: 48GB
+
+  - count: 1..4
+    scaling:
+      metric: rps
+      target: 5
+    commands:
+      - |
+          python -m sglang.launch_server \
+            --model-path $MODEL_ID \
+            --port 8000 \
+            --trust-remote-code
+    resources:
+      gpu: 24GB
+
+port: 8000
+model: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
+```
+
+</div>
+
+> Support for configuring `port`, `image`, `env`, `docker`, and other properties is coming soon.
+
+!!! info
+    Replica groups enable `prefill–decode` disaggregation by running prefill and decode workers as separate replica groups within a single service. This capability is planned for an upcoming release. See the [issue](https://github.com/dstackai/dstack/issues/3363) for more details.
+
 ### Model
 
 If the service is running a chat model with an OpenAI-compatible interface,

From a1a171c41a4b4ad43685a481ab367a84df75ad45 Mon Sep 17 00:00:00 2001
From: peterschmidt85 <andrey.cheptsov@gmail.com>
Date: Wed, 28 Jan 2026 17:22:20 +0100
Subject: [PATCH 2/2] Minor edits

---
 docs/docs/concepts/services.md | 88 +++++++++++++++++-----------------
 1 file changed, 43 insertions(+), 45 deletions(-)

diff --git a/docs/docs/concepts/services.md b/docs/docs/concepts/services.md
index a400f79a47..0f6bf07bb8 100644
--- a/docs/docs/concepts/services.md
+++ b/docs/docs/concepts/services.md
@@ -164,58 +164,56 @@ Setting the minimum number of replicas to `0` allows the service to scale down t
 
 > The `scaling` property requires creating a [gateway](gateways.md).
 
-### Replica Groups
+??? info "Replica groups"
+    A service can include multiple replica groups. Each group can define its own `commands`, `resources` requirements, and `scaling` rules.
 
-Replica groups let you define multiple groups of replicas within a single service. Each group can define its own replica count, autoscaling rules, resource requirements, and commands.
-
-<div editor-title="service.dstack.yml"> 
-
-```yaml
-type: service
-name: replica-groups
-image: lmsysorg/sglang:latest
-
-env:
-  - MODEL_ID=deepseek-ai/DeepSeek-R1-Distill-Llama-8B
-
-replicas:
-  - count: 1..2
-    scaling:
-      metric: rps
-      target: 10
-    commands:
-      - |
-          python -m sglang.launch_server \
-            --model-path $MODEL_ID \
-            --port 8000 \
-            --trust-remote-code
+    <div editor-title="service.dstack.yml"> 
 
-    resources:
-      gpu: 48GB
+    ```yaml
+    type: service
+    name: llama-8b-service
 
-  - count: 1..4
-    scaling:
-      metric: rps
-      target: 5
-    commands:
-      - |
-          python -m sglang.launch_server \
-            --model-path $MODEL_ID \
-            --port 8000 \
-            --trust-remote-code
-    resources:
-      gpu: 24GB
+    image: lmsysorg/sglang:latest
+    env:
+      - MODEL_ID=deepseek-ai/DeepSeek-R1-Distill-Llama-8B
+
+    replicas:
+      - count: 1..2
+        scaling:
+          metric: rps
+          target: 10
+        commands:
+          - |
+            python -m sglang.launch_server \
+              --model-path $MODEL_ID \
+              --port 8000 \
+              --trust-remote-code
+        resources:
+          gpu: 48GB
+
+      - count: 1..4
+        scaling:
+          metric: rps
+          target: 5
+        commands:
+          - |
+            python -m sglang.launch_server \
+              --model-path $MODEL_ID \
+              --port 8000 \
+              --trust-remote-code
+        resources:
+          gpu: 24GB
 
-port: 8000
-model: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
-```
+    port: 8000
+    model: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
+    ```
 
-</div>
+    </div>
 
-> Support for configuring `port`, `image`, `env`, `docker`, and other properties is coming soon.
+    > Properties such as `regions`, `port`, `image`, `env` and some other cannot be configured per replica group. This support is coming soon.
 
-!!! info
-    Replica groups enable `prefill–decode` disaggregation by running prefill and decode workers as separate replica groups within a single service. This capability is planned for an upcoming release. See the [issue](https://github.com/dstackai/dstack/issues/3363) for more details.
+??? info "Disaggregated serving"
+    Native support for disaggregated prefill and decode, allowing both worker types to run within a single service, is coming soon.
 
 ### Model