diff --git a/.buildkite/pipelines/format_and_validation.yml.sh b/.buildkite/pipelines/format_and_validation.yml.sh index ffff9e54d5..b21d1c0fdb 100755 --- a/.buildkite/pipelines/format_and_validation.yml.sh +++ b/.buildkite/pipelines/format_and_validation.yml.sh @@ -18,4 +18,13 @@ steps: notify: - github_commit_status: context: "Validate formatting with clang-format" + - label: "Validate changelog entries" + key: "validate_changelogs" + command: ".buildkite/scripts/steps/validate-changelogs.sh" + agents: + image: "python:3.11-slim" + soft_fail: true + notify: + - github_commit_status: + context: "Validate changelog entries" EOL diff --git a/.buildkite/scripts/steps/validate-changelogs.sh b/.buildkite/scripts/steps/validate-changelogs.sh new file mode 100755 index 0000000000..797f001381 --- /dev/null +++ b/.buildkite/scripts/steps/validate-changelogs.sh @@ -0,0 +1,62 @@ +#!/bin/bash +# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +# or more contributor license agreements. Licensed under the Elastic License +# 2.0 and the following additional limitation. Functionality enabled by the +# files subject to the Elastic License 2.0 may only be used in production when +# invoked by an Elasticsearch process with a license key installed that permits +# use of machine learning features. You may not use this file except in +# compliance with the Elastic License 2.0 and the foregoing additional +# limitation. + +set -euo pipefail + +SKIP_LABELS=">test >refactoring >docs >build >non-issue" + +# On PR builds, check if the PR has a label that skips changelog validation. +# BUILDKITE_PULL_REQUEST_LABELS is a comma-separated list set by Buildkite. +if [[ -n "${BUILDKITE_PULL_REQUEST_LABELS:-}" ]]; then + IFS=',' read -ra LABELS <<< "${BUILDKITE_PULL_REQUEST_LABELS}" + for label in "${LABELS[@]}"; do + label="$(echo "${label}" | xargs)" # trim whitespace + for skip in ${SKIP_LABELS}; do + if [[ "${label}" == "${skip}" ]]; then + echo "Skipping changelog validation: PR has label '${label}'" + exit 0 + fi + done + done +fi + +# Install system and Python dependencies +if ! command -v git &>/dev/null; then + apt-get update -qq && apt-get install -y -qq git >/dev/null 2>&1 +fi +python3 -m pip install --quiet --break-system-packages pyyaml jsonschema 2>/dev/null \ + || python3 -m pip install --quiet pyyaml jsonschema + +# Find changelog files changed in this PR (compared to main/target branch) +TARGET_BRANCH="${BUILDKITE_PULL_REQUEST_BASE_BRANCH:-main}" + +# Fetch the target branch so we can diff against it +git fetch origin "${TARGET_BRANCH}" --depth=1 2>/dev/null || true + +CHANGED_CHANGELOGS=$(git diff --name-only --diff-filter=ACM "origin/${TARGET_BRANCH}"...HEAD -- 'docs/changelog/*.yaml' || true) + +if [[ -z "${CHANGED_CHANGELOGS}" ]]; then + echo "No changelog files found in this PR." + echo "If this PR changes user-visible behaviour, please add a changelog entry." + echo "See docs/changelog/README.md for details." + echo "To skip this check, add one of these labels: ${SKIP_LABELS}" + + # Soft warning rather than hard failure during rollout + if [[ "${CHANGELOG_REQUIRED:-false}" == "true" ]]; then + exit 1 + fi + exit 0 +fi + +echo "Validating changelog files:" +echo "${CHANGED_CHANGELOGS}" +echo "" + +python3 dev-tools/validate_changelogs.py ${CHANGED_CHANGELOGS} diff --git a/build.gradle b/build.gradle index 843e8718d7..080714884e 100644 --- a/build.gradle +++ b/build.gradle @@ -169,6 +169,20 @@ task format(type: Exec) { workingDir "${projectDir}" } +task validateChangelogs(type: Exec) { + commandLine 'python3', 'dev-tools/validate_changelogs.py' + workingDir "${projectDir}" + description = 'Validate changelog YAML entries against the schema' + group = 'verification' +} + +task bundleChangelogs(type: Exec) { + commandLine 'python3', 'dev-tools/bundle_changelogs.py', '--version', project.version + workingDir "${projectDir}" + description = 'Generate consolidated changelog from per-PR YAML entries' + group = 'documentation' +} + task precommit(type: Exec) { commandLine shell workingDir "${projectDir}" diff --git a/dev-tools/bundle_changelogs.py b/dev-tools/bundle_changelogs.py new file mode 100755 index 0000000000..9614b536a8 --- /dev/null +++ b/dev-tools/bundle_changelogs.py @@ -0,0 +1,147 @@ +#!/usr/bin/env python3 +""" +Bundle per-PR changelog YAML files into a consolidated changelog for release. + +Usage: + python3 bundle_changelogs.py [--dir DIR] [--version VERSION] [--format FORMAT] + +Outputs a formatted changelog grouped by type and area, suitable for inclusion +in release notes. + +Formats: + markdown (default) - Markdown suitable for GitHub releases + asciidoc - AsciiDoc suitable for Elastic docs +""" + +import argparse +import sys +from collections import defaultdict +from pathlib import Path + +try: + import yaml +except ImportError: + print("Missing pyyaml. Install with: pip3 install pyyaml", file=sys.stderr) + sys.exit(2) + + +TYPE_ORDER = [ + ("known-issue", "Known issues"), + ("security", "Security fixes"), + ("breaking", "Breaking changes"), + ("breaking-java", "Breaking Java changes"), + ("deprecation", "Deprecations"), + ("feature", "New features"), + ("new-aggregation", "New aggregations"), + ("enhancement", "Enhancements"), + ("bug", "Bug fixes"), + ("regression", "Regression fixes"), + ("upgrade", "Upgrades"), +] + +ML_CPP_PULL_URL = "https://github.com/elastic/ml-cpp/pull" +ML_CPP_ISSUE_URL = "https://github.com/elastic/ml-cpp/issues" + + +def load_entries(changelog_dir): + entries = [] + for path in sorted(changelog_dir.glob("*.yaml")): + with open(path) as f: + data = yaml.safe_load(f) + if data and isinstance(data, dict): + data["_file"] = path.name + entries.append(data) + return entries + + +def format_markdown(entries, version=None): + lines = [] + if version: + lines.append(f"## {version}\n") + + grouped = defaultdict(lambda: defaultdict(list)) + for entry in entries: + area = entry.get("area", "General") + grouped[entry["type"]][area].append(entry) + + for type_key, type_label in TYPE_ORDER: + if type_key not in grouped: + continue + lines.append(f"### {type_label}\n") + for area in sorted(grouped[type_key].keys()): + lines.append(f"**{area}**") + for entry in sorted(grouped[type_key][area], key=lambda e: e.get("pr", 0)): + pr = entry.get("pr") + summary = entry["summary"] + issues = entry.get("issues", []) + issue_refs = ", ".join(f"#{i}" for i in issues) + if pr: + line = f"- {summary} [#{pr}]({ML_CPP_PULL_URL}/{pr})" + else: + line = f"- {summary}" + if issue_refs: + line += f" ({issue_refs})" + lines.append(line) + lines.append("") + + return "\n".join(lines) + + +def format_asciidoc(entries, version=None): + lines = [] + if version: + lines.append(f"== {version}\n") + + grouped = defaultdict(lambda: defaultdict(list)) + for entry in entries: + area = entry.get("area", "General") + grouped[entry["type"]][area].append(entry) + + for type_key, type_label in TYPE_ORDER: + if type_key not in grouped: + continue + lines.append(f"=== {type_label}\n") + for area in sorted(grouped[type_key].keys()): + lines.append(f"*{area}*") + for entry in sorted(grouped[type_key][area], key=lambda e: e.get("pr", 0)): + pr = entry.get("pr") + summary = entry["summary"] + issues = entry.get("issues", []) + issue_refs = ", ".join( + f"{ML_CPP_ISSUE_URL}/{i}[#{i}]" for i in issues + ) + if pr: + line = f"* {summary} {{ml-pull}}{pr}[#{pr}]" + else: + line = f"* {summary}" + if issue_refs: + line += f" ({issue_refs})" + lines.append(line) + lines.append("") + + return "\n".join(lines) + + +def main(): + parser = argparse.ArgumentParser(description="Bundle changelog YAML files") + parser.add_argument("--dir", default=None, help="Changelog directory") + parser.add_argument("--version", default=None, help="Version string for heading") + parser.add_argument("--format", default="markdown", choices=["markdown", "asciidoc"]) + args = parser.parse_args() + + repo_root = Path(__file__).resolve().parent.parent + changelog_dir = Path(args.dir) if args.dir else repo_root / "docs" / "changelog" + + entries = load_entries(changelog_dir) + if not entries: + print("No changelog entries found.", file=sys.stderr) + sys.exit(0) + + if args.format == "asciidoc": + print(format_asciidoc(entries, args.version)) + else: + print(format_markdown(entries, args.version)) + + +if __name__ == "__main__": + main() diff --git a/dev-tools/export_changelogs.py b/dev-tools/export_changelogs.py new file mode 100755 index 0000000000..5d264eceeb --- /dev/null +++ b/dev-tools/export_changelogs.py @@ -0,0 +1,320 @@ +#!/usr/bin/env python3 +""" +Export ml-cpp changelog entries for inclusion in Elasticsearch release notes. + +Copies changelog YAML files from docs/changelog/ to a target directory +(typically elastic/elasticsearch's docs/changelog/) with a 'ml-cpp-' filename +prefix to avoid PR number collisions with ES-native entries. + +Usage: + # Preview what would be exported + python3 dev-tools/export_changelogs.py --dry-run + + # Export to a local ES checkout + python3 dev-tools/export_changelogs.py --target ~/src/elasticsearch/docs/changelog + + # Export and create a PR in the ES repo + python3 dev-tools/export_changelogs.py --target ~/src/elasticsearch/docs/changelog --create-pr + + # Export specific files only + python3 dev-tools/export_changelogs.py --target /tmp/out docs/changelog/3008.yaml +""" + +import argparse +import difflib +import json +import subprocess +import sys +from pathlib import Path + +try: + import yaml +except ImportError: + print("Missing pyyaml. Install with: pip3 install pyyaml", file=sys.stderr) + sys.exit(2) + +try: + import jsonschema +except ImportError: + print("Missing jsonschema. Install with: pip3 install jsonschema", file=sys.stderr) + sys.exit(2) + + +PREFIX = "ml-cpp-" +SOURCE_REPO = "elastic/ml-cpp" + + +def validate_entries(entries, schema_path): + """Validate all entries against the JSON schema. Returns list of errors.""" + with open(schema_path) as f: + schema = json.load(f) + + validator = jsonschema.Draft7Validator(schema) + errors = [] + for source_path, _, data in entries: + for error in validator.iter_errors(data): + path = ".".join(str(p) for p in error.absolute_path) or "(root)" + errors.append(f"{source_path.name}: {path}: {error.message}") + return errors + + +def collect_entries(changelog_dir, specific_files=None): + """Collect changelog YAML files, returning (source_path, target_name, data) tuples.""" + if specific_files: + paths = [Path(f) for f in specific_files] + else: + paths = sorted(changelog_dir.glob("*.yaml")) + + entries = [] + for path in paths: + if not path.exists(): + print(f"Warning: {path} not found, skipping", file=sys.stderr) + continue + with open(path) as f: + data = yaml.safe_load(f) + if not data or not isinstance(data, dict): + continue + + target_name = PREFIX + path.name + entries.append((path, target_name, data)) + + return entries + + +def resolve_conflict(source_path, dest, target_name): + """Handle a pre-existing file at the destination. Returns the action taken.""" + source_lines = source_path.read_text().splitlines(keepends=True) + dest_lines = dest.read_text().splitlines(keepends=True) + + if source_lines == dest_lines: + print(f" {target_name}: identical to existing file, skipping") + return "skip" + + print(f"\n {target_name}: file already exists with different content.\n") + diff = difflib.unified_diff( + dest_lines, source_lines, + fromfile=f"existing: {dest.name}", + tofile=f"incoming: {source_path.name}", + ) + sys.stdout.writelines(" " + line for line in diff) + print() + + while True: + choice = input(f" [{target_name}] (o)verwrite / (s)kip / (a)bort export? ").strip().lower() + if choice in ("o", "overwrite"): + write_entry_with_source_repo(source_path, dest) + print(f" {target_name}: overwritten") + return "overwrite" + elif choice in ("s", "skip"): + print(f" {target_name}: skipped") + return "skip" + elif choice in ("a", "abort"): + print("\nExport aborted.") + sys.exit(1) + else: + print(" Please enter 'o' (overwrite), 's' (skip), or 'a' (abort).") + + +def verify_es_repo(target_dir): + """Verify that the target looks like an ES docs/changelog directory.""" + target = Path(target_dir).resolve() + + if not target.is_dir(): + print(f"Error: target directory does not exist: {target}", file=sys.stderr) + sys.exit(1) + + es_repo_root = target.parent.parent + markers = [ + es_repo_root / "build.gradle", + es_repo_root / "settings.gradle", + es_repo_root / "docs" / "changelog", + ] + if not all(m.exists() for m in markers): + print( + f"Warning: {es_repo_root} does not look like an Elasticsearch checkout.\n" + f" Expected to find build.gradle, settings.gradle, and docs/changelog/\n" + f" at the repo root (two levels above --target).\n", + file=sys.stderr, + ) + choice = input(" Continue anyway? (y/n) ").strip().lower() + if choice not in ("y", "yes"): + print("Export aborted.") + sys.exit(1) + + return es_repo_root + + +def write_entry_with_source_repo(source_path, dest): + """Write a changelog entry to dest, injecting source_repo if not already present.""" + with open(source_path) as f: + data = yaml.safe_load(f) + if "source_repo" not in data: + data["source_repo"] = SOURCE_REPO + with open(dest, "w") as f: + yaml.dump(data, f, default_flow_style=False, sort_keys=False) + + +def export_entries(entries, target_dir, dry_run=False): + """Export entries to the target directory with prefixed filenames and source_repo.""" + target = Path(target_dir) + + exported = [] + skipped = 0 + for source_path, target_name, data in entries: + dest = target / target_name + pr = data.get("pr", "n/a") + summary = data.get("summary", "")[:60] + if dry_run: + flag = " [EXISTS]" if dest.exists() else "" + print(f" {target_name} (PR #{pr}: {summary}){flag}") + exported.append(dest) + elif dest.exists(): + action = resolve_conflict(source_path, dest, target_name) + if action == "overwrite": + exported.append(dest) + else: + skipped += 1 + else: + write_entry_with_source_repo(source_path, dest) + print(f" Copied {source_path.name} -> {target_name}") + exported.append(dest) + + if skipped > 0 and not dry_run: + print(f"\n ({skipped} file(s) skipped due to conflicts)") + + return exported + + +def create_pr(es_repo_dir, exported_files, version=None): + """Create a git branch and PR in the ES repo with the exported entries.""" + es_repo = Path(es_repo_dir).resolve() + branch_name = "ml-cpp-changelog-export" + if version: + branch_name += f"-{version}" + + try: + subprocess.run(["git", "checkout", "-b", branch_name], cwd=es_repo, check=True) + subprocess.run(["git", "add"] + [str(f) for f in exported_files], cwd=es_repo, check=True) + + msg = "[ML] Add ml-cpp changelog entries" + if version: + msg += f" for {version}" + subprocess.run(["git", "commit", "-m", msg], cwd=es_repo, check=True) + subprocess.run(["git", "push", "-u", "origin", branch_name], cwd=es_repo, check=True) + + pr_body = ( + "Adds ml-cpp changelog entries to the ES release notes.\n\n" + "Source: elastic/ml-cpp docs/changelog/" + ) + if version: + pr_body += f"\nVersion: {version}" + result = subprocess.run( + ["gh", "pr", "create", "--title", msg, "--body", pr_body], + cwd=es_repo, capture_output=True, text=True, + ) + if result.returncode == 0: + print(f"\nPR created: {result.stdout.strip()}") + else: + print(f"\nFailed to create PR: {result.stderr}", file=sys.stderr) + sys.exit(1) + except subprocess.CalledProcessError as e: + print(f"Git error: {e}", file=sys.stderr) + sys.exit(1) + + +def main(): + parser = argparse.ArgumentParser( + description="Export ml-cpp changelog entries for ES release notes", + ) + parser.add_argument( + "--target", + help="Target directory (e.g. ~/src/elasticsearch/docs/changelog)", + ) + parser.add_argument( + "--dir", + default=None, + help="Source changelog directory (default: docs/changelog/)", + ) + parser.add_argument( + "--version", + default=None, + help="Version label (used in PR title/branch if --create-pr)", + ) + parser.add_argument( + "--dry-run", + action="store_true", + help="Show what would be exported without copying files", + ) + parser.add_argument( + "--create-pr", + action="store_true", + help="Create a PR in the ES repo (requires --target to be inside an ES checkout)", + ) + parser.add_argument( + "--prune", + action="store_true", + help="Delete source YAML files after successful export (use after release)", + ) + parser.add_argument( + "files", + nargs="*", + help="Specific changelog files to export (default: all *.yaml in --dir)", + ) + args = parser.parse_args() + + repo_root = Path(__file__).resolve().parent.parent + changelog_dir = Path(args.dir) if args.dir else repo_root / "docs" / "changelog" + schema_path = repo_root / "docs" / "changelog" / "changelog-schema.json" + + entries = collect_entries(changelog_dir, args.files if args.files else None) + if not entries: + print("No changelog entries found.") + return + + print(f"Found {len(entries)} changelog entry(ies).") + + # Validate all entries before exporting + if schema_path.exists(): + print("Validating entries against schema... ", end="", flush=True) + errors = validate_entries(entries, schema_path) + if errors: + print(f"FAILED ({len(errors)} error(s)):\n") + for error in errors: + print(f" - {error}") + print("\nFix validation errors before exporting.") + sys.exit(1) + print("OK") + else: + print(f"Warning: schema not found at {schema_path}, skipping validation", + file=sys.stderr) + + print() + + if args.dry_run or not args.target: + export_entries(entries, args.target or "/dev/null", dry_run=True) + if not args.target: + print("\nUse --target to export, or --dry-run to preview.") + return + + # Verify the target is a real ES checkout + es_repo_root = verify_es_repo(args.target) + + exported = export_entries(entries, args.target) + if not exported: + print("\nNo files exported.") + return + + print(f"\nExported {len(exported)} file(s) to {args.target}") + + if args.create_pr: + create_pr(es_repo_root, exported, args.version) + + if args.prune: + for source_path, _, _ in entries: + source_path.unlink() + print(f" Pruned {source_path}") + print(f"\nPruned {len(entries)} source file(s)") + + +if __name__ == "__main__": + main() diff --git a/dev-tools/validate_changelogs.py b/dev-tools/validate_changelogs.py new file mode 100755 index 0000000000..1bc8437c97 --- /dev/null +++ b/dev-tools/validate_changelogs.py @@ -0,0 +1,156 @@ +#!/usr/bin/env python3 +""" +Validate changelog YAML files against the changelog JSON schema. + +Usage: + python3 validate_changelogs.py [--schema SCHEMA] [--dir DIR] [FILES...] + +If FILES are given, only those files are validated. +Otherwise all *.yaml files in DIR (default: docs/changelog/) are validated. + +Exit codes: + 0 All files valid (or no files to validate) + 1 One or more validation errors + 2 Missing dependencies or bad arguments +""" + +import argparse +import json +import os +import re +import sys +from pathlib import Path + + +def check_dependencies(): + """Check that required Python packages are available.""" + missing = [] + try: + import yaml # noqa: F401 + except ImportError: + missing.append("pyyaml") + try: + import jsonschema # noqa: F401 + except ImportError: + missing.append("jsonschema") + if missing: + print( + f"Missing Python packages: {', '.join(missing)}\n" + f"Install with: pip3 install {' '.join(missing)}", + file=sys.stderr, + ) + sys.exit(2) + + +def load_schema(schema_path): + with open(schema_path) as f: + return json.load(f) + + +def validate_file(filepath, schema): + """Validate a single YAML file. Returns a list of error strings.""" + import jsonschema + import yaml + + errors = [] + filename = os.path.basename(filepath) + stem = Path(filepath).stem + + try: + with open(filepath) as f: + data = yaml.safe_load(f) + except yaml.YAMLError as e: + errors.append(f"{filename}: invalid YAML: {e}") + return errors + + if data is None: + errors.append(f"{filename}: file is empty") + return errors + + if not isinstance(data, dict): + errors.append(f"{filename}: expected a YAML mapping, got {type(data).__name__}") + return errors + + # Validate against JSON schema + validator = jsonschema.Draft7Validator(schema) + for error in sorted(validator.iter_errors(data), key=lambda e: list(e.path)): + path = ".".join(str(p) for p in error.absolute_path) or "(root)" + errors.append(f"{filename}: {path}: {error.message}") + + # Filename convention: numeric filenames must match the pr field. + # Types without a pr field (known-issue, security) may use descriptive names. + if re.match(r"^\d+$", stem): + if "pr" in data and data["pr"] != int(stem): + errors.append( + f"{filename}: pr field ({data['pr']}) does not match filename ({stem})" + ) + elif "pr" in data: + errors.append( + f"{filename}: file has a pr field ({data['pr']}), " + f"so filename should be {data['pr']}.yaml" + ) + + return errors + + +def main(): + parser = argparse.ArgumentParser(description="Validate changelog YAML files") + parser.add_argument( + "--schema", + default=None, + help="Path to the JSON schema (default: docs/changelog/changelog-schema.json)", + ) + parser.add_argument( + "--dir", + default=None, + help="Directory containing changelog YAML files (default: docs/changelog/)", + ) + parser.add_argument( + "files", + nargs="*", + help="Specific files to validate (overrides --dir)", + ) + args = parser.parse_args() + + check_dependencies() + + # Resolve paths relative to repo root + repo_root = Path(__file__).resolve().parent.parent + schema_path = Path(args.schema) if args.schema else repo_root / "docs" / "changelog" / "changelog-schema.json" + changelog_dir = Path(args.dir) if args.dir else repo_root / "docs" / "changelog" + + if not schema_path.exists(): + print(f"Schema not found: {schema_path}", file=sys.stderr) + sys.exit(2) + + schema = load_schema(schema_path) + + # Collect files to validate + if args.files: + yaml_files = [Path(f) for f in args.files] + else: + yaml_files = sorted(changelog_dir.glob("*.yaml")) + + if not yaml_files: + print("No changelog files to validate.") + return + + all_errors = [] + for filepath in yaml_files: + if not filepath.exists(): + all_errors.append(f"{filepath}: file not found") + continue + errors = validate_file(filepath, schema) + all_errors.extend(errors) + + if all_errors: + print(f"Changelog validation failed ({len(all_errors)} error(s)):\n") + for error in all_errors: + print(f" - {error}") + sys.exit(1) + else: + print(f"Validated {len(yaml_files)} changelog file(s) successfully.") + + +if __name__ == "__main__": + main() diff --git a/docs/changelog/.gitkeep b/docs/changelog/.gitkeep new file mode 100644 index 0000000000..e69de29bb2 diff --git a/docs/changelog/0000.yaml.sample b/docs/changelog/0000.yaml.sample new file mode 100644 index 0000000000..bd1d40314a --- /dev/null +++ b/docs/changelog/0000.yaml.sample @@ -0,0 +1,6 @@ +pr: 1234 +summary: Fix anomaly detection model state persistence for large jobs +area: Machine Learning +type: bug +issues: + - 1230 diff --git a/docs/changelog/2841.yaml b/docs/changelog/2841.yaml new file mode 100644 index 0000000000..a7d97b7041 --- /dev/null +++ b/docs/changelog/2841.yaml @@ -0,0 +1,5 @@ +pr: 2841 +summary: "Better messaging regarding OOM process termination" +area: Machine Learning +type: enhancement +issues: [] diff --git a/docs/changelog/2846.yaml b/docs/changelog/2846.yaml new file mode 100644 index 0000000000..d225e9a1cb --- /dev/null +++ b/docs/changelog/2846.yaml @@ -0,0 +1,5 @@ +pr: 2846 +summary: "Report the actual memory usage of the autodetect process" +area: Machine Learning +type: enhancement +issues: [] diff --git a/docs/changelog/2848.yaml b/docs/changelog/2848.yaml new file mode 100644 index 0000000000..ccca2a64c6 --- /dev/null +++ b/docs/changelog/2848.yaml @@ -0,0 +1,5 @@ +pr: 2848 +summary: "Improve adherence to memory limits for the bucket gatherer" +area: Machine Learning +type: enhancement +issues: [] diff --git a/docs/changelog/2863.yaml b/docs/changelog/2863.yaml new file mode 100644 index 0000000000..7042ed337a --- /dev/null +++ b/docs/changelog/2863.yaml @@ -0,0 +1,5 @@ +pr: 2863 +summary: "Update the PyTorch library to version 2.7.1" +area: Machine Learning +type: upgrade +issues: [] diff --git a/docs/changelog/2889.yaml b/docs/changelog/2889.yaml new file mode 100644 index 0000000000..7206fb8e5e --- /dev/null +++ b/docs/changelog/2889.yaml @@ -0,0 +1,5 @@ +pr: 2889 +summary: "Downgrade log severity for a batch of recoverable errors" +area: Machine Learning +type: enhancement +issues: [] diff --git a/docs/changelog/2894.yaml b/docs/changelog/2894.yaml new file mode 100644 index 0000000000..528edff646 --- /dev/null +++ b/docs/changelog/2894.yaml @@ -0,0 +1,5 @@ +pr: 2894 +summary: "Better error handling regarding quantiles state documents" +area: Machine Learning +type: enhancement +issues: [] diff --git a/docs/changelog/2895.yaml b/docs/changelog/2895.yaml new file mode 100644 index 0000000000..6d5e810b39 --- /dev/null +++ b/docs/changelog/2895.yaml @@ -0,0 +1,5 @@ +pr: 2895 +summary: "Better handling of invalid JSON state documents" +area: Machine Learning +type: enhancement +issues: [] diff --git a/docs/changelog/3008.yaml b/docs/changelog/3008.yaml new file mode 100644 index 0000000000..e851b35d57 --- /dev/null +++ b/docs/changelog/3008.yaml @@ -0,0 +1,5 @@ +pr: 3008 +summary: "Harden pytorch_inference with TorchScript model graph validation" +area: Machine Learning +type: enhancement +issues: [2890] diff --git a/docs/changelog/3015.yaml b/docs/changelog/3015.yaml new file mode 100644 index 0000000000..9b9dee8485 --- /dev/null +++ b/docs/changelog/3015.yaml @@ -0,0 +1,5 @@ +pr: 3015 +summary: "Add EuroBERT and Jina v5 ops to graph validation allowlist" +area: Machine Learning +type: enhancement +issues: [] diff --git a/docs/changelog/3017.yaml b/docs/changelog/3017.yaml new file mode 100644 index 0000000000..ae0820ff1a --- /dev/null +++ b/docs/changelog/3017.yaml @@ -0,0 +1,5 @@ +pr: 3017 +summary: "Fix flaky CIoManagerTest/testFileIoGood test" +area: Machine Learning +type: bug +issues: [] diff --git a/docs/changelog/README.md b/docs/changelog/README.md new file mode 100644 index 0000000000..6ab3193508 --- /dev/null +++ b/docs/changelog/README.md @@ -0,0 +1,102 @@ +# Changelog entries + +Each pull request that changes user-visible behaviour should include a changelog +entry as a YAML file in this directory, named `.yaml`. + +The schema is aligned with the +[Elasticsearch changelog schema](https://github.com/elastic/elasticsearch/blob/main/build-tools-internal/src/main/resources/changelog-schema.json) +so that ml-cpp entries can be consumed directly by the ES release notes pipeline. + +## Format + +```yaml +pr: 2914 +summary: Split build and test into separate pipeline steps +area: Machine Learning +type: enhancement +issues: [] +``` + +### Required fields + +| Field | Description | +|-----------|-------------| +| `type` | The type of change (see below). Always required. | +| `summary` | A concise, user-facing description of the change. Always required. | +| `pr` | The pull request number (integer). Required unless type is `known-issue` or `security`. | +| `area` | The area of the codebase affected (see below). Required unless type is `known-issue` or `security`. | + +### Optional fields + +| Field | Description | +|---------------|-------------| +| `issues` | List of related GitHub issue numbers (integers). Default: `[]` | +| `highlight` | Release highlight object (see below). | +| `breaking` | Breaking change details. **Required** when type is `breaking` or `breaking-java`. | +| `deprecation` | Deprecation details. **Required** when type is `deprecation`. | + +### Valid areas + +Most ml-cpp entries should use **Machine Learning**. Other valid areas from the +ES schema (e.g. **Inference**) may be used when appropriate. The full list of +valid areas is defined in `changelog-schema.json`. + +### Valid types + +| Type | Description | +|------|-------------| +| `breaking` | A change that breaks backwards compatibility (requires `breaking` object) | +| `breaking-java` | A breaking change to the Java API (requires `breaking` object) | +| `bug` | A fix for an existing defect | +| `deprecation` | Deprecation of existing functionality (requires `deprecation` object) | +| `enhancement` | An improvement to existing functionality | +| `feature` | A wholly new feature | +| `known-issue` | A known issue (`pr` and `area` not required) | +| `new-aggregation` | A new aggregation type | +| `regression` | A fix for a recently introduced defect | +| `security` | A security fix (`pr` and `area` not required) | +| `upgrade` | An upgrade-related change | + +### Highlight object + +For changes worthy of a release highlight: + +```yaml +highlight: + notable: true + title: "Short title for the highlight" + body: "Longer description in AsciiDoc format (no triple-backtick code blocks)." +``` + +### Breaking / Deprecation object + +Required when `type` is `breaking`, `breaking-java`, or `deprecation`: + +```yaml +breaking: + area: Machine Learning + title: "Short title describing the breaking change" + details: "Detailed description of what changed (AsciiDoc, no triple-backticks)." + impact: "What users need to do to adapt." + notable: true +``` + +Valid areas for breaking/deprecation changes are a subset of the main areas, +defined in `changelog-schema.json` under `compatibilityChangeArea`. + +## When is a changelog entry required? + +A changelog entry is **required** for any PR that: +- Fixes a bug +- Adds or changes user-visible functionality +- Changes the API or data formats +- Deprecates or removes functionality + +A changelog entry is **not required** for: +- Pure refactoring with no behaviour change +- Test-only changes +- CI/build infrastructure changes (unless they affect the shipped artefact) +- Documentation-only changes + +PRs that do not require a changelog entry should be labelled with +`>test`, `>refactoring`, `>docs`, `>build`, or `>non-issue` to skip validation. diff --git a/docs/changelog/changelog-schema.json b/docs/changelog/changelog-schema.json new file mode 100644 index 0000000000..2777aedb4a --- /dev/null +++ b/docs/changelog/changelog-schema.json @@ -0,0 +1,311 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "$id": "https://github.com/elastic/ml-cpp/tree/main/docs/changelog", + "$ref": "#/definitions/Changelog", + "definitions": { + "Changelog": { + "type": "object", + "properties": { + "pr": { + "type": "integer" + }, + "issues": { + "type": "array", + "items": { + "type": "integer" + } + }, + "area": { + "type": "string", + "enum": [ + "Aggregations", + "Allocation", + "Analysis", + "Application", + "Audit", + "Authentication", + "Authorization", + "Autoscaling", + "CAT APIs", + "CCR", + "CCS", + "CRUD", + "Client", + "Cluster Coordination", + "Codec", + "Data streams", + "DLM", + "Discovery-Plugins", + "Distributed", + "Downsampling", + "EQL", + "ES|QL", + "Engine", + "Experiences", + "Extract&Transform", + "FIPS", + "Features", + "Geo", + "Graph", + "Health", + "Highlighting", + "ILM", + "IdentityProvider", + "Indices APIs", + "Inference", + "Infra/CLI", + "Infra/Circuit Breakers", + "Infra/Core", + "Infra/Logging", + "Infra/Node Lifecycle", + "Infra/Plugins", + "Infra/REST API", + "Infra/Resiliency", + "Infra/Scripting", + "Infra/Settings", + "Infra/Transport API", + "Infra/Metrics", + "Ingest", + "Ingest Node", + "Java High Level REST Client", + "Java Low Level REST Client", + "License", + "Logs", + "Machine Learning", + "Mapping", + "Monitoring", + "Network", + "Packaging", + "Percolator", + "Performance", + "PromQL", + "Query Languages", + "Ranking", + "Recovery", + "Reindex", + "Relevance", + "Rollup", + "SQL", + "Search", + "Searchable Snapshots", + "Security", + "SLM", + "Snapshot/Restore", + "Stats", + "Store", + "Suggesters", + "Task Management", + "TLS", + "Transform", + "TSDB", + "Vector Search", + "Watcher" + ] + }, + "type": { + "type": "string", + "enum": [ + "breaking", + "breaking-java", + "bug", + "deprecation", + "enhancement", + "feature", + "known-issue", + "new-aggregation", + "regression", + "security", + "upgrade" + ] + }, + "summary": { + "type": "string", + "minLength": 1 + }, + "highlight": { + "$ref": "#/definitions/Highlight" + }, + "breaking": { + "$ref": "#/definitions/CompatibilityChange" + }, + "deprecation": { + "$ref": "#/definitions/CompatibilityChange" + }, + "source_repo": { + "type": "string", + "description": "GitHub repository (owner/name) for entries from external repos, e.g. elastic/ml-cpp. Defaults to elastic/elasticsearch when absent." + } + }, + "required": [ + "type", + "summary" + ], + "anyOf": [ + { + "$comment": "PR number and area fields not required for known-issue type", + "if": { + "not": { + "properties": { + "type": { + "const": "known-issue" + } + } + } + }, + "then": { + "required": [ + "pr", + "area" + ] + } + }, + { + "$comment": "PR number and area fields not required for security type", + "if": { + "not": { + "properties": { + "type": { + "const": "security" + } + } + } + }, + "then": { + "required": [ + "pr", + "area" + ] + } + } + ], + "allOf": [ + { + "if": { + "properties": { + "type": { + "const": "breaking" + } + } + }, + "then": { + "required": [ + "breaking" + ] + } + }, + { + "if": { + "properties": { + "type": { + "const": "breaking-java" + } + } + }, + "then": { + "required": [ + "breaking" + ] + } + } + ], + "if": { + "properties": { + "type": { + "const": "deprecation" + } + } + }, + "then": { + "required": [ + "deprecation" + ] + }, + "additionalProperties": false + }, + "Highlight": { + "properties": { + "notable": { + "type": "boolean" + }, + "title": { + "type": "string", + "minLength": 1 + }, + "body": { + "type": "string", + "pattern": "(?s)^((?!```).)*$", + "minLength": 1 + } + }, + "required": [ + "title", + "body" + ], + "additionalProperties": false + }, + "CompatibilityChange": { + "properties": { + "area": { + "$ref": "#/definitions/compatibilityChangeArea" + }, + "title": { + "type": "string", + "minLength": 1 + }, + "details": { + "type": "string", + "pattern": "(?s)^((?!```).)*$", + "minLength": 1 + }, + "impact": { + "type": "string", + "pattern": "(?s)^((?!```).)*$", + "minLength": 1 + }, + "notable": { + "type": "boolean" + }, + "ess_setting_change": { + "type": "boolean" + } + }, + "required": [ + "area", + "title", + "details", + "impact" + ], + "additionalProperties": false + }, + "compatibilityChangeArea": { + "type": "string", + "enum": [ + "Aggregations", + "Analysis", + "Authorization", + "Cluster and node setting", + "Command line tool", + "CRUD", + "ES|QL", + "ILM", + "Index setting", + "Ingest", + "JVM option", + "Java API", + "Logging", + "Logs", + "Machine Learning", + "Mapping", + "Metrics", + "Packaging", + "Painless", + "REST API", + "Rollup", + "Search", + "System requirement", + "Transform" + ] + }, + "additionalProperties": false + } +}