diff --git a/.agents/recipes/docs-and-references/recipe.md b/.agents/recipes/docs-and-references/recipe.md index 87fe21dc1..83ec8ad95 100644 --- a/.agents/recipes/docs-and-references/recipe.md +++ b/.agents/recipes/docs-and-references/recipe.md @@ -4,7 +4,7 @@ description: Audit documentation freshness - docstrings vs signatures, broken li trigger: schedule tool: claude-code timeout_minutes: 20 -max_turns: 30 +max_turns: 50 permissions: contents: write --- diff --git a/.github/workflows/agentic-ci-issue-triage.yml b/.github/workflows/agentic-ci-issue-triage.yml index 4594ea8da..8ef311fa6 100644 --- a/.github/workflows/agentic-ci-issue-triage.yml +++ b/.github/workflows/agentic-ci-issue-triage.yml @@ -55,18 +55,25 @@ jobs: echo "Claude CLI version: $(claude --version 2>&1 || true)" if [ -n "$ANTHROPIC_BASE_URL" ] && [ -n "$ANTHROPIC_API_KEY" ]; then - HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" \ - --max-time 30 \ - -X POST "${ANTHROPIC_BASE_URL}/v1/messages" \ - -H "Content-Type: application/json" \ - -H "x-api-key: ${ANTHROPIC_API_KEY}" \ - -H "anthropic-version: 2023-06-01" \ - -d "{\"model\":\"${AGENTIC_CI_MODEL}\",\"max_tokens\":5,\"messages\":[{\"role\":\"user\",\"content\":\"hi\"}]}") - if [ "$HTTP_CODE" -lt 200 ] || [ "$HTTP_CODE" -ge 300 ]; then - echo "::error::API pre-flight failed with HTTP ${HTTP_CODE}" - exit 1 - fi - echo "API pre-flight passed (HTTP ${HTTP_CODE})" + for ATTEMPT in 1 2 3; do + HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" \ + --max-time 30 \ + -X POST "${ANTHROPIC_BASE_URL}/v1/messages" \ + -H "Content-Type: application/json" \ + -H "x-api-key: ${ANTHROPIC_API_KEY}" \ + -H "anthropic-version: 2023-06-01" \ + -d "{\"model\":\"${AGENTIC_CI_MODEL}\",\"max_tokens\":5,\"messages\":[{\"role\":\"user\",\"content\":\"hi\"}]}") + if [ "$HTTP_CODE" -ge 200 ] && [ "$HTTP_CODE" -lt 300 ]; then + echo "API pre-flight passed (HTTP ${HTTP_CODE})" + break + fi + if [ "$ATTEMPT" = "3" ]; then + echo "::error::API pre-flight failed with HTTP ${HTTP_CODE}" + exit 1 + fi + echo "API pre-flight failed with HTTP ${HTTP_CODE}; retrying (${ATTEMPT}/3)" + sleep $((ATTEMPT * 10)) + done fi - name: Run issue triage recipe diff --git a/.github/workflows/build-notebooks.yml b/.github/workflows/build-notebooks.yml index 610c7aac8..a5b22dda3 100644 --- a/.github/workflows/build-notebooks.yml +++ b/.github/workflows/build-notebooks.yml @@ -87,7 +87,11 @@ jobs: echo "No previous successful run found, proceeding without cache" fi - name: Convert and execute notebooks - run: make convert-execute-notebooks ${{ inputs.use_cache && 'USE_CACHE=1' || '' }} + run: | + if [ "$GITHUB_EVENT_NAME" = "schedule" ]; then + export DATA_DESIGNER_FLUX_2_PRO_CREATE_NUM_RECORDS=2 + fi + make convert-execute-notebooks ${{ inputs.use_cache && 'USE_CACHE=1' || '' }} - name: Upload notebooks as artifacts uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 with: diff --git a/docs/colab_notebooks/4-providing-images-as-context.ipynb b/docs/colab_notebooks/4-providing-images-as-context.ipynb index 6cd599e0c..b28ec84d3 100644 --- a/docs/colab_notebooks/4-providing-images-as-context.ipynb +++ b/docs/colab_notebooks/4-providing-images-as-context.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "cd505b79", + "id": "351a3662", "metadata": { "nemo_colab_inject": true }, @@ -12,7 +12,7 @@ }, { "cell_type": "markdown", - "id": "ed119996", + "id": "17ecc2b8", "metadata": {}, "source": [ "# 🎨 Data Designer Tutorial: Providing Images as Context for Vision-Based Data Generation" @@ -20,7 +20,7 @@ }, { "cell_type": "markdown", - "id": "d13a4cb5", + "id": "5ed7e603", "metadata": {}, "source": [ "#### 📚 What you'll learn\n", @@ -37,7 +37,7 @@ }, { "cell_type": "markdown", - "id": "2924c2d1", + "id": "6f8c2761", "metadata": {}, "source": [ "### 📦 Import Data Designer\n", @@ -49,7 +49,7 @@ }, { "cell_type": "markdown", - "id": "4c6e4f22", + "id": "bbfa06b7", "metadata": { "nemo_colab_inject": true }, @@ -62,7 +62,7 @@ { "cell_type": "code", "execution_count": null, - "id": "98151070", + "id": "35fe9312", "metadata": { "nemo_colab_inject": true }, @@ -75,7 +75,7 @@ { "cell_type": "code", "execution_count": null, - "id": "5490b9a8", + "id": "4dd4c002", "metadata": { "nemo_colab_inject": true }, @@ -95,7 +95,7 @@ { "cell_type": "code", "execution_count": null, - "id": "7a66e1ce", + "id": "b67c1cd2", "metadata": {}, "outputs": [], "source": [ @@ -118,7 +118,7 @@ }, { "cell_type": "markdown", - "id": "3e7a28c6", + "id": "de03245a", "metadata": {}, "source": [ "### ⚙️ Initialize the Data Designer interface\n", @@ -131,7 +131,7 @@ { "cell_type": "code", "execution_count": null, - "id": "f31d6ac0", + "id": "4e46778f", "metadata": {}, "outputs": [], "source": [ @@ -140,7 +140,7 @@ }, { "cell_type": "markdown", - "id": "14b063e4", + "id": "badc7737", "metadata": {}, "source": [ "### 🏗️ Initialize the Data Designer Config Builder\n", @@ -155,16 +155,20 @@ { "cell_type": "code", "execution_count": null, - "id": "d8fd37ae", + "id": "bc544dcb", "metadata": {}, "outputs": [], "source": [ - "config_builder = dd.DataDesignerConfigBuilder()" + "config_builder = dd.DataDesignerConfigBuilder()\n", + "for model_config in config_builder.model_configs:\n", + " if model_config.alias == \"nvidia-vision\":\n", + " model_config.skip_health_check = True\n", + " break" ] }, { "cell_type": "markdown", - "id": "3a7e0787", + "id": "77842781", "metadata": {}, "source": [ "### 🌱 Seed Dataset Creation\n", @@ -181,7 +185,7 @@ { "cell_type": "code", "execution_count": null, - "id": "b01b5496", + "id": "63f56ad9", "metadata": {}, "outputs": [], "source": [ @@ -196,7 +200,7 @@ { "cell_type": "code", "execution_count": null, - "id": "78b3b9ea", + "id": "3d59a3fc", "metadata": {}, "outputs": [], "source": [ @@ -241,7 +245,7 @@ { "cell_type": "code", "execution_count": null, - "id": "7b6b2908", + "id": "f8e794ea", "metadata": {}, "outputs": [], "source": [ @@ -259,7 +263,7 @@ { "cell_type": "code", "execution_count": null, - "id": "e0ab09d5", + "id": "7a16b74e", "metadata": {}, "outputs": [], "source": [ @@ -269,7 +273,7 @@ { "cell_type": "code", "execution_count": null, - "id": "c9ce69ed", + "id": "26db3ac3", "metadata": {}, "outputs": [], "source": [ @@ -280,7 +284,7 @@ }, { "cell_type": "markdown", - "id": "94528475", + "id": "fd0af217", "metadata": {}, "source": [ "### 🧩 Media context and model capabilities\n", @@ -317,7 +321,7 @@ { "cell_type": "code", "execution_count": null, - "id": "bd8148f4", + "id": "35153311", "metadata": {}, "outputs": [], "source": [ @@ -339,7 +343,7 @@ }, { "cell_type": "markdown", - "id": "2150d704", + "id": "c0120b74", "metadata": {}, "source": [ "### 🔁 Iteration is key – preview the dataset!\n", @@ -356,7 +360,7 @@ { "cell_type": "code", "execution_count": null, - "id": "85cf2067", + "id": "25cc4054", "metadata": {}, "outputs": [], "source": [ @@ -366,7 +370,7 @@ { "cell_type": "code", "execution_count": null, - "id": "509f00ed", + "id": "22315ca6", "metadata": {}, "outputs": [], "source": [ @@ -377,7 +381,7 @@ { "cell_type": "code", "execution_count": null, - "id": "8b1a7d15", + "id": "ee176135", "metadata": {}, "outputs": [], "source": [ @@ -387,7 +391,7 @@ }, { "cell_type": "markdown", - "id": "9bf4843c", + "id": "cc653ea1", "metadata": {}, "source": [ "### 📊 Analyze the generated data\n", @@ -400,7 +404,7 @@ { "cell_type": "code", "execution_count": null, - "id": "d80d106d", + "id": "5af110d9", "metadata": {}, "outputs": [], "source": [ @@ -410,7 +414,7 @@ }, { "cell_type": "markdown", - "id": "ed22e721", + "id": "a462f684", "metadata": {}, "source": [ "### 🔎 Visual Inspection\n", @@ -421,7 +425,7 @@ { "cell_type": "code", "execution_count": null, - "id": "f41c068a", + "id": "55067aa0", "metadata": { "lines_to_next_cell": 2 }, @@ -445,7 +449,7 @@ }, { "cell_type": "markdown", - "id": "f096be05", + "id": "8446cd74", "metadata": {}, "source": [ "### 🆙 Scale up!\n", @@ -458,7 +462,7 @@ { "cell_type": "code", "execution_count": null, - "id": "c2efd0f8", + "id": "051275d5", "metadata": {}, "outputs": [], "source": [ @@ -468,7 +472,7 @@ { "cell_type": "code", "execution_count": null, - "id": "1f7b5f60", + "id": "822d566f", "metadata": {}, "outputs": [], "source": [ @@ -481,7 +485,7 @@ { "cell_type": "code", "execution_count": null, - "id": "dbb9ea18", + "id": "fc2124b6", "metadata": {}, "outputs": [], "source": [ @@ -493,7 +497,7 @@ }, { "cell_type": "markdown", - "id": "f7a1f3ba", + "id": "28120764", "metadata": {}, "source": [ "## ⏭️ Next Steps\n", diff --git a/docs/colab_notebooks/5-generating-images.ipynb b/docs/colab_notebooks/5-generating-images.ipynb index 76a933da0..683ec4dc7 100644 --- a/docs/colab_notebooks/5-generating-images.ipynb +++ b/docs/colab_notebooks/5-generating-images.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "66019c7e", + "id": "4a358875", "metadata": { "nemo_colab_inject": true }, @@ -12,7 +12,7 @@ }, { "cell_type": "markdown", - "id": "267d3938", + "id": "d8a6a126", "metadata": {}, "source": [ "# 🎨 Data Designer Tutorial: Generating Images\n", @@ -34,7 +34,7 @@ }, { "cell_type": "markdown", - "id": "486d74eb", + "id": "eb1be8d6", "metadata": {}, "source": [ "### 📦 Import Data Designer\n", @@ -45,7 +45,7 @@ }, { "cell_type": "markdown", - "id": "9c888db8", + "id": "dcd53838", "metadata": { "nemo_colab_inject": true }, @@ -58,7 +58,7 @@ { "cell_type": "code", "execution_count": null, - "id": "4fcdfb3f", + "id": "61daeaa8", "metadata": { "nemo_colab_inject": true }, @@ -71,7 +71,7 @@ { "cell_type": "code", "execution_count": null, - "id": "6a87ecb2", + "id": "e4d278ae", "metadata": { "nemo_colab_inject": true }, @@ -91,10 +91,12 @@ { "cell_type": "code", "execution_count": null, - "id": "ec5dd8e7", + "id": "c180471a", "metadata": {}, "outputs": [], "source": [ + "import os\n", + "\n", "from IPython.display import Image as IPImage\n", "from IPython.display import display\n", "\n", @@ -104,7 +106,7 @@ }, { "cell_type": "markdown", - "id": "651d9f3b", + "id": "af026515", "metadata": {}, "source": [ "### ⚙️ Initialize the Data Designer interface\n", @@ -115,7 +117,7 @@ { "cell_type": "code", "execution_count": null, - "id": "5fc8972e", + "id": "6371269a", "metadata": {}, "outputs": [], "source": [ @@ -124,7 +126,7 @@ }, { "cell_type": "markdown", - "id": "dd50d576", + "id": "44ea1023", "metadata": {}, "source": [ "### 🎛️ Define an image-generation model\n", @@ -136,7 +138,7 @@ { "cell_type": "code", "execution_count": null, - "id": "03ca2abf", + "id": "2e2da7c1", "metadata": {}, "outputs": [], "source": [ @@ -158,7 +160,7 @@ }, { "cell_type": "markdown", - "id": "73bf1fa1", + "id": "ee9ea7e3", "metadata": {}, "source": [ "### 🏗️ Build the config: samplers + image column\n", @@ -169,7 +171,7 @@ { "cell_type": "code", "execution_count": null, - "id": "efa7ecf8", + "id": "54cee314", "metadata": {}, "outputs": [], "source": [ @@ -342,7 +344,7 @@ }, { "cell_type": "markdown", - "id": "e34da1ef", + "id": "14a5f2da", "metadata": {}, "source": [ "### 🔁 Preview: images as base64\n", @@ -353,7 +355,7 @@ { "cell_type": "code", "execution_count": null, - "id": "e27fc9fd", + "id": "da237615", "metadata": {}, "outputs": [], "source": [ @@ -363,7 +365,7 @@ { "cell_type": "code", "execution_count": null, - "id": "437b1054", + "id": "5910dddc", "metadata": {}, "outputs": [], "source": [ @@ -374,7 +376,7 @@ { "cell_type": "code", "execution_count": null, - "id": "5666999a", + "id": "c5638566", "metadata": {}, "outputs": [], "source": [ @@ -383,7 +385,7 @@ }, { "cell_type": "markdown", - "id": "9e9b5c1b", + "id": "718e0730", "metadata": {}, "source": [ "### 🆙 Create: images saved to disk\n", @@ -394,17 +396,20 @@ { "cell_type": "code", "execution_count": null, - "id": "8adecae8", + "id": "e224bfe4", "metadata": {}, "outputs": [], "source": [ - "results = data_designer.create(config_builder, num_records=2, dataset_name=\"tutorial-5-images\")" + "create_num_records = 2\n", + "if MODEL_ID == \"black-forest-labs/flux.2-pro\":\n", + " create_num_records = int(os.environ.get(\"DATA_DESIGNER_FLUX_2_PRO_CREATE_NUM_RECORDS\") or create_num_records)\n", + "results = data_designer.create(config_builder, num_records=create_num_records, dataset_name=\"tutorial-5-images\")" ] }, { "cell_type": "code", "execution_count": null, - "id": "92998c4c", + "id": "b2d6d2dd", "metadata": {}, "outputs": [], "source": [ @@ -415,7 +420,7 @@ { "cell_type": "code", "execution_count": null, - "id": "0ad903b0", + "id": "31dcf0a0", "metadata": {}, "outputs": [], "source": [ @@ -431,7 +436,7 @@ }, { "cell_type": "markdown", - "id": "12134406", + "id": "b96b5f59", "metadata": {}, "source": [ "## ⏭️ Next steps\n", diff --git a/docs/colab_notebooks/6-editing-images-with-image-context.ipynb b/docs/colab_notebooks/6-editing-images-with-image-context.ipynb index 023dd198c..03ed91551 100644 --- a/docs/colab_notebooks/6-editing-images-with-image-context.ipynb +++ b/docs/colab_notebooks/6-editing-images-with-image-context.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "30e20568", + "id": "58c83dec", "metadata": { "nemo_colab_inject": true }, @@ -12,7 +12,7 @@ }, { "cell_type": "markdown", - "id": "d63f4416", + "id": "3ace2f9f", "metadata": {}, "source": [ "# 🎨 Data Designer Tutorial: Image-to-Image Editing\n", @@ -34,7 +34,7 @@ }, { "cell_type": "markdown", - "id": "d3e60ea6", + "id": "499c1438", "metadata": {}, "source": [ "### 📦 Import Data Designer\n", @@ -45,7 +45,7 @@ }, { "cell_type": "markdown", - "id": "2f1c15e7", + "id": "8fe5ea18", "metadata": { "nemo_colab_inject": true }, @@ -58,7 +58,7 @@ { "cell_type": "code", "execution_count": null, - "id": "143db4c6", + "id": "1944c158", "metadata": { "nemo_colab_inject": true }, @@ -71,7 +71,7 @@ { "cell_type": "code", "execution_count": null, - "id": "d9115072", + "id": "76e54d76", "metadata": { "nemo_colab_inject": true }, @@ -91,11 +91,12 @@ { "cell_type": "code", "execution_count": null, - "id": "dfb43d40", + "id": "b353dbf2", "metadata": {}, "outputs": [], "source": [ "import base64\n", + "import os\n", "from pathlib import Path\n", "\n", "from IPython.display import Image as IPImage\n", @@ -107,7 +108,7 @@ }, { "cell_type": "markdown", - "id": "5f892cd5", + "id": "41376f64", "metadata": {}, "source": [ "### ⚙️ Initialize the Data Designer interface\n", @@ -118,7 +119,7 @@ { "cell_type": "code", "execution_count": null, - "id": "70b474a9", + "id": "c42aad9c", "metadata": {}, "outputs": [], "source": [ @@ -127,7 +128,7 @@ }, { "cell_type": "markdown", - "id": "f2aef849", + "id": "0d741a34", "metadata": {}, "source": [ "### 🎛️ Define an image model\n", @@ -143,7 +144,7 @@ { "cell_type": "code", "execution_count": null, - "id": "aa2f73aa", + "id": "2b1b99dc", "metadata": {}, "outputs": [], "source": [ @@ -165,7 +166,7 @@ }, { "cell_type": "markdown", - "id": "f19cf925", + "id": "4fa922de", "metadata": {}, "source": [ "### 🏗️ Build the configuration\n", @@ -180,7 +181,7 @@ { "cell_type": "code", "execution_count": null, - "id": "d76b5043", + "id": "3a18ce2c", "metadata": {}, "outputs": [], "source": [ @@ -278,7 +279,7 @@ }, { "cell_type": "markdown", - "id": "c73e97f0", + "id": "ab4e5259", "metadata": {}, "source": [ "### 🔁 Preview: quick iteration\n", @@ -289,7 +290,7 @@ { "cell_type": "code", "execution_count": null, - "id": "87f2ce90", + "id": "f84d102d", "metadata": {}, "outputs": [], "source": [ @@ -299,7 +300,7 @@ { "cell_type": "code", "execution_count": null, - "id": "5032ba60", + "id": "64d74ae6", "metadata": {}, "outputs": [], "source": [ @@ -310,7 +311,7 @@ { "cell_type": "code", "execution_count": null, - "id": "b7806720", + "id": "b7d04a73", "metadata": {}, "outputs": [], "source": [ @@ -319,7 +320,7 @@ }, { "cell_type": "markdown", - "id": "fb02667d", + "id": "901a916d", "metadata": { "lines_to_next_cell": 2 }, @@ -332,7 +333,7 @@ { "cell_type": "code", "execution_count": null, - "id": "514fc44d", + "id": "b1e7c109", "metadata": {}, "outputs": [], "source": [ @@ -363,7 +364,7 @@ { "cell_type": "code", "execution_count": null, - "id": "27719b25", + "id": "566f25a3", "metadata": {}, "outputs": [], "source": [ @@ -373,7 +374,7 @@ }, { "cell_type": "markdown", - "id": "99c431db", + "id": "1e1a9621", "metadata": {}, "source": [ "### 🆙 Create at scale\n", @@ -384,17 +385,20 @@ { "cell_type": "code", "execution_count": null, - "id": "e8862095", + "id": "86c7a831", "metadata": {}, "outputs": [], "source": [ - "results = data_designer.create(config_builder, num_records=5, dataset_name=\"tutorial-6-edited-images\")" + "create_num_records = 5\n", + "if MODEL_ID == \"black-forest-labs/flux.2-pro\":\n", + " create_num_records = int(os.environ.get(\"DATA_DESIGNER_FLUX_2_PRO_CREATE_NUM_RECORDS\") or create_num_records)\n", + "results = data_designer.create(config_builder, num_records=create_num_records, dataset_name=\"tutorial-6-edited-images\")" ] }, { "cell_type": "code", "execution_count": null, - "id": "690c8016", + "id": "9a5ab86a", "metadata": {}, "outputs": [], "source": [ @@ -405,7 +409,7 @@ { "cell_type": "code", "execution_count": null, - "id": "6bd21f76", + "id": "dc5afbb4", "metadata": {}, "outputs": [], "source": [ @@ -415,7 +419,7 @@ }, { "cell_type": "markdown", - "id": "1d00589c", + "id": "ee58dd05", "metadata": {}, "source": [ "## ⏭️ Next steps\n", diff --git a/docs/concepts/models/default-model-settings.md b/docs/concepts/models/default-model-settings.md index 450c7d135..bef56a910 100644 --- a/docs/concepts/models/default-model-settings.md +++ b/docs/concepts/models/default-model-settings.md @@ -46,7 +46,7 @@ The following model configurations are automatically available when `NVIDIA_API_ | `nvidia-text` | `nvidia/nemotron-3-nano-30b-a3b` | General text generation | `temperature=1.0, top_p=1.0` | | `nvidia-reasoning` | `nvidia/nemotron-3-super-120b-a12b` | Reasoning and analysis tasks | `temperature=1.0, top_p=0.95, extra_body={"reasoning_effort": "medium"}` | | `nvidia-vision` | `nvidia/nemotron-3-nano-omni-30b-a3b-reasoning` | Vision and image understanding | `temperature=0.60, top_p=0.95` | -| `nvidia-embedding` | `nvidia/llama-3.2-nv-embedqa-1b-v2` | Text embeddings | `encoding_format="float", extra_body={"input_type": "query"}` | +| `nvidia-embedding` | `nvidia/llama-nemotron-embed-1b-v2` | Text embeddings | `encoding_format="float", extra_body={"input_type": "query"}` | ### OpenAI Models diff --git a/docs/concepts/models/model-configs.md b/docs/concepts/models/model-configs.md index 856fbad9c..b9cacc2ec 100644 --- a/docs/concepts/models/model-configs.md +++ b/docs/concepts/models/model-configs.md @@ -95,7 +95,7 @@ model_configs = [ # Embedding tasks dd.ModelConfig( alias="embedding_model", - model="nvidia/llama-3.2-nv-embedqa-1b-v2", + model="nvidia/llama-nemotron-embed-1b-v2", provider="nvidia", inference_parameters=dd.EmbeddingInferenceParams( encoding_format="float", diff --git a/docs/notebook_source/4-providing-images-as-context.py b/docs/notebook_source/4-providing-images-as-context.py index 301e90125..1229d0fa7 100644 --- a/docs/notebook_source/4-providing-images-as-context.py +++ b/docs/notebook_source/4-providing-images-as-context.py @@ -76,6 +76,10 @@ # %% config_builder = dd.DataDesignerConfigBuilder() +for model_config in config_builder.model_configs: + if model_config.alias == "nvidia-vision": + model_config.skip_health_check = True + break # %% [markdown] # ### 🌱 Seed Dataset Creation diff --git a/docs/notebook_source/5-generating-images.py b/docs/notebook_source/5-generating-images.py index dfdc5782a..17f539782 100644 --- a/docs/notebook_source/5-generating-images.py +++ b/docs/notebook_source/5-generating-images.py @@ -38,6 +38,8 @@ # # %% +import os + from IPython.display import Image as IPImage from IPython.display import display @@ -272,7 +274,10 @@ # # %% -results = data_designer.create(config_builder, num_records=2, dataset_name="tutorial-5-images") +create_num_records = 2 +if MODEL_ID == "black-forest-labs/flux.2-pro": + create_num_records = int(os.environ.get("DATA_DESIGNER_FLUX_2_PRO_CREATE_NUM_RECORDS") or create_num_records) +results = data_designer.create(config_builder, num_records=create_num_records, dataset_name="tutorial-5-images") # %% dataset = results.load_dataset() diff --git a/docs/notebook_source/6-editing-images-with-image-context.py b/docs/notebook_source/6-editing-images-with-image-context.py index a322d84a0..d744cddfc 100644 --- a/docs/notebook_source/6-editing-images-with-image-context.py +++ b/docs/notebook_source/6-editing-images-with-image-context.py @@ -39,6 +39,7 @@ # %% import base64 +import os from pathlib import Path from IPython.display import Image as IPImage @@ -244,7 +245,10 @@ def display_before_after(row, index: int, base_path: Path | None = None) -> None # # %% -results = data_designer.create(config_builder, num_records=5, dataset_name="tutorial-6-edited-images") +create_num_records = 5 +if MODEL_ID == "black-forest-labs/flux.2-pro": + create_num_records = int(os.environ.get("DATA_DESIGNER_FLUX_2_PRO_CREATE_NUM_RECORDS") or create_num_records) +results = data_designer.create(config_builder, num_records=create_num_records, dataset_name="tutorial-6-edited-images") # %% dataset = results.load_dataset() diff --git a/fern/versions/latest/pages/concepts/models/default-model-settings.mdx b/fern/versions/latest/pages/concepts/models/default-model-settings.mdx index ff8377b71..8b73c5575 100644 --- a/fern/versions/latest/pages/concepts/models/default-model-settings.mdx +++ b/fern/versions/latest/pages/concepts/models/default-model-settings.mdx @@ -49,7 +49,7 @@ The following model configurations are automatically available when `NVIDIA_API_ | `nvidia-text` | `nvidia/nemotron-3-nano-30b-a3b` | General text generation | `temperature=1.0, top_p=1.0` | | `nvidia-reasoning` | `nvidia/nemotron-3-super-120b-a12b` | Reasoning and analysis tasks | `temperature=1.0, top_p=0.95, extra_body={"reasoning_effort": "medium"}` | | `nvidia-vision` | `nvidia/nemotron-3-nano-omni-30b-a3b-reasoning` | Omni multimodal understanding for image, audio, and video inputs | `temperature=0.60, top_p=0.95` | -| `nvidia-embedding` | `nvidia/llama-3.2-nv-embedqa-1b-v2` | Text embeddings | `encoding_format="float", extra_body={"input_type": "query"}` | +| `nvidia-embedding` | `nvidia/llama-nemotron-embed-1b-v2` | Text embeddings | `encoding_format="float", extra_body={"input_type": "query"}` | ### OpenAI Models diff --git a/fern/versions/latest/pages/concepts/models/model-configs.mdx b/fern/versions/latest/pages/concepts/models/model-configs.mdx index 647fece16..90834e8ff 100644 --- a/fern/versions/latest/pages/concepts/models/model-configs.mdx +++ b/fern/versions/latest/pages/concepts/models/model-configs.mdx @@ -101,7 +101,7 @@ model_configs = [ # Embedding tasks dd.ModelConfig( alias="embedding_model", - model="nvidia/llama-3.2-nv-embedqa-1b-v2", + model="nvidia/llama-nemotron-embed-1b-v2", provider="nvidia", inference_parameters=dd.EmbeddingInferenceParams( encoding_format="float", diff --git a/packages/data-designer-config/src/data_designer/config/utils/constants.py b/packages/data-designer-config/src/data_designer/config/utils/constants.py index db450e872..d7b3391e2 100644 --- a/packages/data-designer-config/src/data_designer/config/utils/constants.py +++ b/packages/data-designer-config/src/data_designer/config/utils/constants.py @@ -360,7 +360,7 @@ class NordColor(Enum): "inference_parameters": NEMOTRON_3_NANO_OMNI_30B_A3B_REASONING_INFERENCE_PARAMS, }, "embedding": { - "model": "nvidia/llama-3.2-nv-embedqa-1b-v2", + "model": "nvidia/llama-nemotron-embed-1b-v2", "inference_parameters": DEFAULT_EMBEDDING_INFERENCE_PARAMS | {"extra_body": {"input_type": "query"}}, }, }, diff --git a/packages/data-designer-config/tests/config/test_default_model_settings.py b/packages/data-designer-config/tests/config/test_default_model_settings.py index 24c98af51..dc5f03107 100644 --- a/packages/data-designer-config/tests/config/test_default_model_settings.py +++ b/packages/data-designer-config/tests/config/test_default_model_settings.py @@ -65,7 +65,7 @@ def test_get_builtin_model_configs(): assert builtin_model_configs[2].model == "nvidia/nemotron-3-nano-omni-30b-a3b-reasoning" assert builtin_model_configs[2].provider == "nvidia" assert builtin_model_configs[3].alias == "nvidia-embedding" - assert builtin_model_configs[3].model == "nvidia/llama-3.2-nv-embedqa-1b-v2" + assert builtin_model_configs[3].model == "nvidia/llama-nemotron-embed-1b-v2" assert builtin_model_configs[3].provider == "nvidia" assert builtin_model_configs[4].alias == "openai-text" assert builtin_model_configs[4].model == "gpt-4.1" diff --git a/scripts/health_checks.py b/scripts/health_checks.py index c4956f14f..bd5976f56 100644 --- a/scripts/health_checks.py +++ b/scripts/health_checks.py @@ -12,8 +12,10 @@ from __future__ import annotations +import importlib import os import sys +import time import traceback from pathlib import Path from tempfile import TemporaryDirectory @@ -31,6 +33,10 @@ ) from data_designer.interface import DataDesigner +MAX_ATTEMPTS = 3 +RETRY_BACKOFF_SECONDS = 5 +RETRYABLE_MODEL_ERRORS = importlib.import_module("data_designer.engine.models.errors").RETRYABLE_MODEL_ERRORS +HEALTH_CHECK_RETRYABLE_ERRORS = RETRYABLE_MODEL_ERRORS + (TimeoutError,) PROVIDER_API_KEY_ENV_VARS = { NVIDIA_PROVIDER_NAME: NVIDIA_API_KEY_ENV_VAR_NAME, OPENAI_PROVIDER_NAME: OPENAI_API_KEY_ENV_VAR_NAME, @@ -88,8 +94,20 @@ def _check_model(provider_name: str, model_type: str) -> None: model_config = _get_model_config(provider_name, model_type) config_builder = _build_check_config(model_config, model_type) - with TemporaryDirectory(prefix="data-designer-health-check-") as temp_dir: - DataDesigner(artifact_path=Path(temp_dir), model_providers=[provider]).check_models(config_builder) + for attempt in range(1, MAX_ATTEMPTS + 1): + try: + with TemporaryDirectory(prefix="data-designer-health-check-") as temp_dir: + DataDesigner(artifact_path=Path(temp_dir), model_providers=[provider]).check_models(config_builder) + return + except HEALTH_CHECK_RETRYABLE_ERRORS as exc: + if attempt == MAX_ATTEMPTS: + raise + delay = attempt * RETRY_BACKOFF_SECONDS + print( + f"RETRY {provider_name}/{model_type} after {type(exc).__name__}: {exc} " + f"(attempt {attempt + 1}/{MAX_ATTEMPTS}, sleeping {delay}s)" + ) + time.sleep(delay) def main() -> int: