Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
3ddabdc
WIP
atobiszei Mar 2, 2026
b5ed7d8
Inpainting/outpainting CPU
atobiszei Mar 9, 2026
c2ba9d8
Update dockerignore
atobiszei Mar 11, 2026
af51476
Demo
atobiszei Mar 13, 2026
5f89fc2
Change mask
atobiszei Mar 13, 2026
102f8af
Fix
atobiszei Mar 13, 2026
330374f
Fix: remove mask from accepted fields in text2image request options
atobiszei Mar 16, 2026
0a65e68
Merge remote-tracking branch 'origin/main' into atobisze_image_inpain…
atobiszei Mar 16, 2026
eac3932
Fix concurrent request inpainting issue, propagate quantization param…
atobiszei Mar 18, 2026
f4be12a
Merge remote-tracking branch 'origin/main' into atobisze_image_inpain…
atobiszei Mar 18, 2026
a2476ee
Add tests & review fix
atobiszei Mar 18, 2026
15dd08a
Address PR review: blocking inpainting guard, string_view optimizatio…
atobiszei Mar 18, 2026
7e392c6
Minor comment fix
atobiszei Mar 19, 2026
eb34c6e
Add LoRA adapter support for image generation
atobiszei Mar 16, 2026
9da0860
Add download commands for inpainting/outpainting demo images
atobiszei Mar 24, 2026
4d24c22
Add LoRA adapter support for image generation
atobiszei Mar 25, 2026
dec1959
Merge origin/main into atobisze_image_inpainting_lora
atobiszei Mar 25, 2026
f10b657
Fix LoRA download auth, style issues, and restore README curl commands
atobiszei Mar 25, 2026
4fe7c75
Fix Windows build: rename shadowed variable 'it' to 'member'
atobiszei Mar 25, 2026
0ef2ebe
Fix Windows path detection in LoRA CLI parser
atobiszei Mar 25, 2026
519ea61
Extract isLocalFilePath() into stringutils for cross-platform path de…
atobiszei Mar 25, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion .github/copilot-instructions.md
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,10 @@ When analyzing a Pull Request, follow this protocol:
- **Keep headers self-contained but minimal**: each header must compile on its own, but should not pull in transitive dependencies that callers don't need.
- **Prefer opaque types / Pimpl**: for complex implementation details, consider the Pimpl idiom to keep implementation-only types out of the public header entirely.
- **Never include a header solely for a typedef or enum**: forward-declare the enum (`enum class Foo;` in C++17) or relocate the typedef to a lightweight `fwd.hpp`-style header.
13. Be mindful when accepting `const T&` in constructors or functions that store the reference: verify that the referenced object's lifetime outlives the usage to avoid dangling references.
13. **No dangling references or temporaries bound to `const T&`**:
- Never use `const T&` parameters with default arguments that construct temporaries (e.g. `const std::string& param = ""`). This binds a reference to a temporary — use a function overload instead, or pass by value.
- When accepting `const T&` in constructors or functions that store the reference, verify that the referenced object's lifetime outlives the usage to avoid dangling references.
- Prefer overloads over default arguments for non-trivial types passed by reference.

## Build System

Expand Down
47 changes: 45 additions & 2 deletions demos/common/export_models/export_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,11 @@ def add_common_arguments(parser):
parser_image_generation.add_argument('--max_num_images_per_prompt', type=int, default=0, help='Max allowed number of images client is allowed to request for a given prompt', dest='max_num_images_per_prompt')
parser_image_generation.add_argument('--default_num_inference_steps', type=int, default=0, help='Default number of inference steps when not specified by client', dest='default_num_inference_steps')
parser_image_generation.add_argument('--max_num_inference_steps', type=int, default=0, help='Max allowed number of inference steps client is allowed to request for a given prompt', dest='max_num_inference_steps')
parser_image_generation.add_argument('--source_loras', default=None,
help='LoRA adapters to apply. Format: alias1=org1/repo1,alias2=org2/repo2@lora_file.safetensors '
'where @filename is optional and specifies which .safetensors file to use from the downloaded repo '
'(auto-detected when repo contains exactly one). Only for image_generation task.',
dest='source_loras')

parser_text2speech = subparsers.add_parser('text2speech', help='export model for text2speech endpoint')
add_common_arguments(parser_text2speech)
Expand Down Expand Up @@ -323,6 +328,9 @@ def add_common_arguments(parser):
default_num_inference_steps: {{default_num_inference_steps}},{% endif %}
{%- if max_num_inference_steps > 0 %}
max_num_inference_steps: {{max_num_inference_steps}},{% endif %}
{%- for lora in lora_adapters %}
lora_adapters { alias: "{{lora.alias}}" path: "{{lora.path}}" }
{%- endfor %}
}
}
}"""
Expand Down Expand Up @@ -600,7 +608,7 @@ def export_rerank_model(model_repository_path, source_model, model_name, precisi
add_servable_to_config(config_file_path, model_name, os.path.relpath(os.path.join(model_repository_path, model_name), os.path.dirname(config_file_path)))


def export_image_generation_model(model_repository_path, source_model, model_name, precision, task_parameters, config_file_path, num_streams):
def export_image_generation_model(model_repository_path, source_model, model_name, precision, task_parameters, config_file_path, num_streams, source_loras):
model_path = "./"
target_path = os.path.join(model_repository_path, model_name)
model_index_path = os.path.join(target_path, 'model_index.json')
Expand All @@ -613,6 +621,41 @@ def export_image_generation_model(model_repository_path, source_model, model_nam
if os.system(optimum_command):
raise ValueError("Failed to export image generation model", source_model)

# Download and resolve LoRA adapters
lora_adapters = []
if source_loras:
from huggingface_hub import snapshot_download
entries = source_loras.split(',')
for entry in entries:
entry = entry.strip()
if '=' in entry:
alias, repo_and_file = entry.split('=', 1)
else:
repo_and_file = entry
alias = entry.split('/')[-1] if '/' in entry else entry
safetensors_file = ''
if '@' in repo_and_file:
repo, safetensors_file = repo_and_file.rsplit('@', 1)
else:
repo = repo_and_file
lora_dir = os.path.join(target_path, 'loras', repo)
if not os.path.isdir(lora_dir):
print(f"Downloading LoRA adapter: {repo} to {lora_dir}")
snapshot_download(repo_id=repo, local_dir=lora_dir)
else:
print(f"LoRA adapter directory already exists: {lora_dir}")
if not safetensors_file:
st_files = [f for f in os.listdir(lora_dir) if f.endswith('.safetensors')]
if len(st_files) == 0:
raise ValueError(f"No .safetensors files found in LoRA adapter: {repo}")
if len(st_files) > 1:
raise ValueError(f"Multiple .safetensors files in LoRA adapter: {repo}. Use @filename to specify.")
safetensors_file = st_files[0]
lora_path = 'loras/' + repo + '/' + safetensors_file
lora_adapters.append({'alias': alias, 'path': lora_path})
print(f"LoRA adapter: {alias} -> {lora_path}")
task_parameters['lora_adapters'] = lora_adapters

plugin_config = {}
assert num_streams >= 0, "num_streams should be a non-negative integer"
if num_streams > 0:
Expand Down Expand Up @@ -695,4 +738,4 @@ def export_image_generation_model(model_repository_path, source_model, model_nam
'max_num_inference_steps',
'extra_quantization_params'
]}
export_image_generation_model(args['model_repository_path'], args['source_model'], args['model_name'], args['precision'], template_parameters, args['config_file_path'], args['num_streams'])
export_image_generation_model(args['model_repository_path'], args['source_model'], args['model_name'], args['precision'], template_parameters, args['config_file_path'], args['num_streams'], args['source_loras'])
12 changes: 12 additions & 0 deletions demos/image_generation/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -528,6 +528,12 @@ Output file (`edit_output.png`):

Inpainting replaces a masked region in an image based on the prompt. The `mask` is a black-and-white image where white pixels mark the area to repaint.

Download sample images:
```console
curl -O https://raw.githubusercontent.com/openvinotoolkit/model_server/main/demos/image_generation/cat.png
curl -O https://raw.githubusercontent.com/openvinotoolkit/model_server/main/demos/image_generation/cat_mask.png
```

![cat](./cat.png) ![cat_mask](./cat_mask.png)

::::{tab-set}
Expand Down Expand Up @@ -599,6 +605,12 @@ Outpainting extends an image beyond its original borders. Prepare two images:
- **outpaint_input.png** — the original image centered on a larger canvas (e.g. 768×768) with black borders
- **outpaint_mask.png** — white where the new content should be generated (the borders), black where the original image is

Download sample images:
```console
curl -O https://raw.githubusercontent.com/openvinotoolkit/model_server/main/demos/image_generation/outpaint_input.png
curl -O https://raw.githubusercontent.com/openvinotoolkit/model_server/main/demos/image_generation/outpaint_mask.png
```

![outpaint_input](./outpaint_input.png) ![outpaint_mask](./outpaint_mask.png)

::::{tab-set}
Expand Down
1 change: 1 addition & 0 deletions src/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -3067,6 +3067,7 @@ cc_library(
":test_light_test_utils",
":test_test_with_temp_dir",
"//src/graph_export:graph_export",
"//src/graph_export:image_generation_graph_cli_parser",
"//src:libovms_server_settings",
"@com_google_googletest//:gtest",
],
Expand Down
26 changes: 26 additions & 0 deletions src/capi_frontend/server_settings.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,29 @@ struct RerankGraphSettingsImpl {
uint64_t maxAllowedChunks = 10000;
};

enum class LoraSourceType {
HF_REPO,
DIRECT_URL,
LOCAL_FILE
};

struct LoraAdapterSettings {
std::string alias;
std::string sourceLora; // HF repo, direct URL, or local file path
std::string safetensorsFile; // resolved filename, empty = auto-detect (HF only)
LoraSourceType sourceType = LoraSourceType::HF_REPO;
};

struct CompositeLoraComponent {
std::string adapterAlias; // references a LoraAdapterSettings alias
float weight = 1.0f;
};

struct CompositeLoraSettings {
std::string alias;
std::vector<CompositeLoraComponent> components;
};

struct ImageGenerationGraphSettingsImpl {
std::string resolution = "";
std::string maxResolution = "";
Expand All @@ -152,6 +175,8 @@ struct ImageGenerationGraphSettingsImpl {
std::optional<uint32_t> maxNumberImagesPerPrompt;
std::optional<uint32_t> defaultNumInferenceSteps;
std::optional<uint32_t> maxNumInferenceSteps;
std::vector<LoraAdapterSettings> loraAdapters;
std::vector<CompositeLoraSettings> compositeLoraAdapters;
};

struct ExportSettings {
Expand All @@ -169,6 +194,7 @@ struct HFSettingsImpl {
std::string sourceModel = "";
std::optional<std::string> ggufFilename;
std::string downloadPath = "";
std::string sourceLoras = ""; // raw --source_loras value, parsed by image gen CLI parser
bool overwriteModels = false;
ModelDownlaodType downloadType = GIT_CLONE_DOWNLOAD;
GraphExportType task = TEXT_GENERATION_GRAPH;
Expand Down
11 changes: 9 additions & 2 deletions src/cli_parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -115,11 +115,11 @@ std::variant<bool, std::pair<int, std::string>> CLIParser::parse(int argc, char*
cxxopts::value<std::string>(), "GRPC_CHANNEL_ARGUMENTS")
("file_system_poll_wait_seconds",
"Time interval between config and model versions changes detection. Default is 1. Zero or negative value disables changes monitoring.",
cxxopts::value<uint32_t>()->default_value("1"),
cxxopts::value<uint32_t>()->default_value("0"),
"FILE_SYSTEM_POLL_WAIT_SECONDS")
("sequence_cleaner_poll_wait_minutes",
"Time interval between two consecutive sequence cleanup scans. Default is 5. Zero value disables sequence cleaner. It also sets the schedule for releasing free memory from the heap.",
cxxopts::value<uint32_t>()->default_value("5"),
cxxopts::value<uint32_t>()->default_value("0"),
"SEQUENCE_CLEANER_POLL_WAIT_MINUTES")
("custom_node_resources_cleaner_interval_seconds",
"Time interval between two consecutive resources cleanup scans. Default is 300. Zero value disables resources cleaner.",
Expand Down Expand Up @@ -213,6 +213,10 @@ std::variant<bool, std::pair<int, std::string>> CLIParser::parse(int argc, char*
"HF source model path",
cxxopts::value<std::string>(),
"HF_SOURCE")
("source_loras",
"LoRA adapters for image generation. Format: alias1=org1/repo1,alias2=org2/repo2@file.safetensors,alias3=https://url/file.safetensors,alias4=/local/path/file.safetensors",
cxxopts::value<std::string>(),
"SOURCE_LORAS")
("gguf_filename",
"Name of the GGUF file",
cxxopts::value<std::string>(),
Expand Down Expand Up @@ -715,6 +719,9 @@ void CLIParser::prepareGraph(ServerSettingsImpl& serverSettings, HFSettingsImpl&
} else if (result->count("model_name")) {
hfSettings.sourceModel = result->operator[]("model_name").as<std::string>();
}
if (result->count("source_loras")) {
hfSettings.sourceLoras = result->operator[]("source_loras").as<std::string>();
}
if ((result->count("weight-format") || result->count("extra_quantization_params")) && isOptimumCliDownload(hfSettings.sourceModel, hfSettings.ggufFilename)) {
hfSettings.downloadType = OPTIMUM_CLI_DOWNLOAD;
}
Expand Down
31 changes: 31 additions & 0 deletions src/graph_export/graph_export.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -467,6 +467,37 @@ node: {
max_num_inference_steps: )" << graphSettings.maxNumInferenceSteps.value();
}

for (const auto& adapter : graphSettings.loraAdapters) {
std::string loraPath;
if (adapter.sourceType == LoraSourceType::LOCAL_FILE) {
loraPath = adapter.sourceLora;
} else if (adapter.sourceType == LoraSourceType::HF_REPO) {
loraPath = "loras/" + adapter.sourceLora + "/" + adapter.safetensorsFile;
} else { // cURL direct link
loraPath = "loras/" + adapter.alias + "/" + adapter.safetensorsFile;
}
oss << R"(
lora_adapters { alias: ")" << adapter.alias << R"(" path: ")" << loraPath << R"(")";
// Only omit alpha when default (1.0) - let proto handle it
oss << R"( })";
}

for (const auto& composite : graphSettings.compositeLoraAdapters) {
oss << R"(
composite_lora_adapters {
alias: ")" << composite.alias << R"("
)";
for (const auto& component : composite.components) {
oss << R"( components { adapter_alias: ")" << component.adapterAlias << R"(")";
if (component.weight != 1.0f) {
oss << R"( weight: )" << component.weight;
}
oss << R"( }
)";
}
oss << R"( })";
}

oss << R"(
}
}
Expand Down
Loading