From 5245efbbce6952f74fcde49a9c912f74af2c15f3 Mon Sep 17 00:00:00 2001 From: Matt Perpick Date: Fri, 24 Oct 2025 14:56:21 -0400 Subject: [PATCH 1/6] Run tests against different dependencies. --- .github/workflows/ci.yml | 7 --- .gitignore | 3 + Appraisals | 21 +++++++ Gemfile.lock | 6 ++ Rakefile | 61 +++++++++++++++++--- braintrust.gemspec | 3 +- gemfiles/openai_0.33.gemfile | 14 +++++ gemfiles/openai_0.34.gemfile | 14 +++++ gemfiles/openai_latest.gemfile | 14 +++++ gemfiles/openai_uninstalled.gemfile | 13 +++++ lib/braintrust/trace.rb | 9 ++- lib/braintrust/trace/{ => contrib}/openai.rb | 0 test/braintrust/trace/openai_test.rb | 3 + test/braintrust/without_openai_test.rb | 56 ++++++++++++++++++ 14 files changed, 205 insertions(+), 19 deletions(-) create mode 100644 Appraisals create mode 100644 gemfiles/openai_0.33.gemfile create mode 100644 gemfiles/openai_0.34.gemfile create mode 100644 gemfiles/openai_latest.gemfile create mode 100644 gemfiles/openai_uninstalled.gemfile rename lib/braintrust/trace/{ => contrib}/openai.rb (100%) create mode 100644 test/braintrust/without_openai_test.rb diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 103128fb..416ef7bc 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -29,13 +29,6 @@ jobs: BRAINTRUST_API_KEY: ${{ secrets.BRAINTRUST_API_KEY }} OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} - - name: Upload coverage to Codecov - uses: codecov/codecov-action@v4 - if: matrix.ruby-version == '3.4' && matrix.os == 'ubuntu-latest' - with: - files: ./coverage/.resultset.json - fail_ci_if_error: false - # Summary job that requires all matrix tests to pass # This is what branch protection will check ci-success: diff --git a/.gitignore b/.gitignore index 8cdb3e38..54a06cbf 100644 --- a/.gitignore +++ b/.gitignore @@ -48,3 +48,6 @@ # Release artifacts changelog.md + +# Appraisal: commit gemfiles but ignore lock files +gemfiles/*.gemfile.lock diff --git a/Appraisals b/Appraisals new file mode 100644 index 00000000..98f23e52 --- /dev/null +++ b/Appraisals @@ -0,0 +1,21 @@ +# frozen_string_literal: true + +# Test with OpenAI gem 0.33.x (previous stable version) +appraise "openai-0.33" do + gem "openai", "~> 0.33.0" +end + +# Test with current stable OpenAI gem version +appraise "openai-0.34" do + gem "openai", "~> 0.34.0" +end + +# Test with latest OpenAI gem version (allows newer patch/minor versions) +appraise "openai-latest" do + gem "openai", ">= 0.34" +end + +# Test without OpenAI gem (verify SDK works without optional dependency) +appraise "openai-uninstalled" do + remove_gem "openai" +end diff --git a/Gemfile.lock b/Gemfile.lock index 4a64e3d5..40ffabef 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -12,6 +12,10 @@ GEM addressable (2.8.7) public_suffix (>= 2.0.2, < 7.0) ansi (1.5.0) + appraisal (2.5.0) + bundler + rake + thor (>= 0.14.0) ast (2.4.3) base64 (0.3.0) bigdecimal (3.3.1) @@ -112,6 +116,7 @@ GEM standard-performance (1.8.0) lint_roller (~> 1.1) rubocop-performance (~> 1.25.0) + thor (1.4.0) unicode-display_width (3.2.0) unicode-emoji (~> 4.1) unicode-emoji (4.1.0) @@ -130,6 +135,7 @@ PLATFORMS x86_64-linux DEPENDENCIES + appraisal (~> 2.5) braintrust! minitest (~> 5.0) minitest-reporters (~> 1.6) diff --git a/Rakefile b/Rakefile index f6436137..48a048ba 100644 --- a/Rakefile +++ b/Rakefile @@ -9,12 +9,6 @@ Rake::TestTask.new(:test) do |t| t.warning = false end -desc "Run tests with verbose timing output" -task :"test:verbose" do - ENV["MT_VERBOSE"] = "1" - Rake::Task[:test].invoke -end - desc "Run Standard linter" task :lint do sh "bundle exec standardrb" @@ -70,13 +64,62 @@ task coverage: :test do end end -desc "Verify CI (lint + test)" -task ci: [:lint, :test] +desc "Verify CI (lint + test all appraisal scenarios)" +task ci: [:lint, :"test:appraisal"] task default: :ci -# VCR tasks for managing HTTP cassettes +# Test-related tasks namespace :test do + desc "Run tests with verbose timing output" + task :verbose do + ENV["MT_VERBOSE"] = "1" + Rake::Task[:test].invoke + end + + desc "Install optional test dependencies (e.g., openai gem)" + task :install do + puts "Installing optional test dependencies..." + sh "gem install openai -v '~> 0.34'" + puts "✓ Optional dependencies installed" + puts "" + puts "Now run 'rake test' to run tests with OpenAI integration" + end + + # Appraisal tasks for testing with/without optional dependencies + # Run directly: bundle exec appraisal [scenario] rake test + # List scenarios: bundle exec appraisal list + desc "Run tests against different dependencies" + task :appraisal do + sh "bundle exec appraisal rake test" + end + + namespace :appraisal do + desc "Show help for appraisal scenarios and usage" + task :help do + puts "\n=== Appraisal Test Scenarios ===" + puts "\nAvailable scenarios:" + sh "bundle exec appraisal list" + puts "\n=== Usage ===" + puts "Run specific scenario:" + puts " bundle exec appraisal rake test" + puts "" + puts "Example:" + puts " bundle exec appraisal openai-0.34 rake test" + puts "" + puts "Run all scenarios:" + puts " bundle exec appraisal rake test" + puts " or: rake test:appraisal" + puts "" + end + + desc "Install all appraisal gemfiles" + task :install do + sh "bundle exec appraisal install" + end + end + + # VCR tasks for managing HTTP cassettes namespace :vcr do desc "Re-record all VCR cassettes" task :record_all do diff --git a/braintrust.gemspec b/braintrust.gemspec index b751c848..b7ab8bf2 100644 --- a/braintrust.gemspec +++ b/braintrust.gemspec @@ -23,7 +23,6 @@ Gem::Specification.new do |spec| lib/**/*.rb README.md LICENSE - CHANGELOG.md ]) spec.bindir = "exe" spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) } @@ -44,7 +43,7 @@ Gem::Specification.new do |spec| spec.add_development_dependency "rake", "~> 13.0" spec.add_development_dependency "standard", "~> 1.0" spec.add_development_dependency "simplecov", "~> 0.22" - spec.add_development_dependency "openai", "~> 0.34" spec.add_development_dependency "vcr", "~> 6.0" spec.add_development_dependency "webmock", "~> 3.0" + spec.add_development_dependency "appraisal", "~> 2.5" end diff --git a/gemfiles/openai_0.33.gemfile b/gemfiles/openai_0.33.gemfile new file mode 100644 index 00000000..1b8d2d23 --- /dev/null +++ b/gemfiles/openai_0.33.gemfile @@ -0,0 +1,14 @@ +# This file was generated by Appraisal + +source "https://rubygems.org" + +gem "rake", "~> 13.0" +gem "minitest", "~> 5.0" +gem "minitest-reporters", "~> 1.6" +gem "standard", "~> 1.0" +gem "simplecov", "~> 0.22" +gem "vcr", "~> 6.0" +gem "webmock", "~> 3.0" +gem "openai", "~> 0.33.0" + +gemspec path: "../" diff --git a/gemfiles/openai_0.34.gemfile b/gemfiles/openai_0.34.gemfile new file mode 100644 index 00000000..24b39dc5 --- /dev/null +++ b/gemfiles/openai_0.34.gemfile @@ -0,0 +1,14 @@ +# This file was generated by Appraisal + +source "https://rubygems.org" + +gem "rake", "~> 13.0" +gem "minitest", "~> 5.0" +gem "minitest-reporters", "~> 1.6" +gem "standard", "~> 1.0" +gem "simplecov", "~> 0.22" +gem "vcr", "~> 6.0" +gem "webmock", "~> 3.0" +gem "openai", "~> 0.34.0" + +gemspec path: "../" diff --git a/gemfiles/openai_latest.gemfile b/gemfiles/openai_latest.gemfile new file mode 100644 index 00000000..54f1dc79 --- /dev/null +++ b/gemfiles/openai_latest.gemfile @@ -0,0 +1,14 @@ +# This file was generated by Appraisal + +source "https://rubygems.org" + +gem "rake", "~> 13.0" +gem "minitest", "~> 5.0" +gem "minitest-reporters", "~> 1.6" +gem "standard", "~> 1.0" +gem "simplecov", "~> 0.22" +gem "vcr", "~> 6.0" +gem "webmock", "~> 3.0" +gem "openai", ">= 0.34" + +gemspec path: "../" diff --git a/gemfiles/openai_uninstalled.gemfile b/gemfiles/openai_uninstalled.gemfile new file mode 100644 index 00000000..27153f41 --- /dev/null +++ b/gemfiles/openai_uninstalled.gemfile @@ -0,0 +1,13 @@ +# This file was generated by Appraisal + +source "https://rubygems.org" + +gem "rake", "~> 13.0" +gem "minitest", "~> 5.0" +gem "minitest-reporters", "~> 1.6" +gem "standard", "~> 1.0" +gem "simplecov", "~> 0.22" +gem "vcr", "~> 6.0" +gem "webmock", "~> 3.0" + +gemspec path: "../" diff --git a/lib/braintrust/trace.rb b/lib/braintrust/trace.rb index 62225a34..f88eb646 100644 --- a/lib/braintrust/trace.rb +++ b/lib/braintrust/trace.rb @@ -3,9 +3,16 @@ require "opentelemetry/sdk" require "opentelemetry/exporter/otlp" require_relative "trace/span_processor" -require_relative "trace/openai" require_relative "logger" +# OpenAI integration is optional - automatically loaded if openai gem is available +begin + require "openai" + require_relative "trace/contrib/openai" +rescue LoadError + # OpenAI gem not installed - integration will not be available +end + module Braintrust module Trace def self.enable(tracer_provider, state: nil, exporter: nil) diff --git a/lib/braintrust/trace/openai.rb b/lib/braintrust/trace/contrib/openai.rb similarity index 100% rename from lib/braintrust/trace/openai.rb rename to lib/braintrust/trace/contrib/openai.rb diff --git a/test/braintrust/trace/openai_test.rb b/test/braintrust/trace/openai_test.rb index 2afbdafd..67021090 100644 --- a/test/braintrust/trace/openai_test.rb +++ b/test/braintrust/trace/openai_test.rb @@ -4,6 +4,9 @@ class Braintrust::Trace::OpenAITest < Minitest::Test def setup + # Skip all OpenAI tests if the gem is not available + skip "OpenAI gem not available" unless defined?(OpenAI) + @api_key = ENV["OPENAI_API_KEY"] @original_api_key = ENV["OPENAI_API_KEY"] end diff --git a/test/braintrust/without_openai_test.rb b/test/braintrust/without_openai_test.rb new file mode 100644 index 00000000..f34ec4f2 --- /dev/null +++ b/test/braintrust/without_openai_test.rb @@ -0,0 +1,56 @@ +# frozen_string_literal: true + +require "test_helper" + +class WithoutOpenAITest < Minitest::Test + def test_sdk_loads_without_openai_gem + # This test verifies that the core SDK can be loaded without the openai gem + # Skip this test if we're in the with-openai appraisal + skip "Test only runs in without-openai appraisal" if openai_available? + + # If we got here, the SDK loaded successfully (via test_helper.rb) + assert true, "SDK loaded without openai gem" + end + + def test_core_functionality_without_openai + skip "Test only runs in without-openai appraisal" if openai_available? + + # Test that we can initialize Braintrust without tracing (no OpenAI needed) + state = Braintrust.init( + api_key: "test-key", + set_global: false, + blocking_login: false, + tracing: false + ) + + assert_instance_of Braintrust::State, state + assert_equal "test-key", state.api_key + end + + def test_openai_require_fails_without_gem + skip "Test only runs in without-openai appraisal" if openai_available? + + # Attempting to require openai should fail if gem not installed + assert_raises(LoadError) do + require "openai" + end + end + + def test_openai_trace_wrapper_not_available_without_gem + skip "Test only runs in without-openai appraisal" if openai_available? + + # The OpenAI trace wrapper should not be automatically loaded + # It should only load when explicitly required + refute defined?(OpenAI), "OpenAI should not be defined without the gem" + end + + private + + # Check if OpenAI gem is available (used for skipping tests in wrong appraisal) + def openai_available? + require "openai" + true + rescue LoadError + false + end +end From 69da05ef025896c1b0646632fbdcfd1843ae16c2 Mon Sep 17 00:00:00 2001 From: Matt Perpick Date: Fri, 24 Oct 2025 15:08:26 -0400 Subject: [PATCH 2/6] gemfile --- Gemfile.lock | 4 ---- 1 file changed, 4 deletions(-) diff --git a/Gemfile.lock b/Gemfile.lock index 40ffabef..a11a48e1 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -20,7 +20,6 @@ GEM base64 (0.3.0) bigdecimal (3.3.1) builder (3.3.0) - connection_pool (2.5.4) crack (1.0.1) bigdecimal rexml @@ -46,8 +45,6 @@ GEM builder minitest (>= 5.0) ruby-progressbar - openai (0.34.1) - connection_pool openssl (3.3.1) opentelemetry-api (1.7.0) opentelemetry-common (0.23.0) @@ -139,7 +136,6 @@ DEPENDENCIES braintrust! minitest (~> 5.0) minitest-reporters (~> 1.6) - openai (~> 0.34) rake (~> 13.0) simplecov (~> 0.22) standard (~> 1.0) From dad498a734a05b6809c09eea3f6ee5216486a97d Mon Sep 17 00:00:00 2001 From: Matt Perpick Date: Fri, 24 Oct 2025 15:42:49 -0400 Subject: [PATCH 3/6] ci --- Rakefile | 2 +- mise.toml | 11 ++++++----- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/Rakefile b/Rakefile index 48a048ba..2226c08e 100644 --- a/Rakefile +++ b/Rakefile @@ -65,7 +65,7 @@ task coverage: :test do end desc "Verify CI (lint + test all appraisal scenarios)" -task ci: [:lint, :"test:appraisal"] +task ci: [:lint, :"test:appraisal:install", :"test:appraisal"] task default: :ci diff --git a/mise.toml b/mise.toml index 09b4a098..cd39c811 100644 --- a/mise.toml +++ b/mise.toml @@ -10,22 +10,23 @@ rust = "1.83" watchexec = "latest" [tasks.lint] -run = "bundle exec standardrb" +run = "bundle exec rake lint" [tasks."lint:fix"] -run = "bundle exec standardrb --fix" +run = "bundle exec rake lint:fix" [tasks.watch-test] description = "Runs tests when files change" run = "watchexec --exts rb --watch lib --watch test --restart --clear -- rake test" -[tasks.verify-fmt] -run = "bundle exec standardrb --format progress" +[tasks.precommit] +description = "Run linter before commit (full CI runs on push)" +run = "bundle exec rake lint" [hooks] postinstall = """ echo "Installing gem dependencies..." bundle install echo "Setting up git pre-commit hook..." -mise generate git-pre-commit --write --task=verify-fmt +mise generate git-pre-commit --write --task=precommit """ From 2f4624fa72c0ccb86b5328bceeb69ab0fa5ac9fb Mon Sep 17 00:00:00 2001 From: Matt Perpick Date: Fri, 24 Oct 2025 16:06:18 -0400 Subject: [PATCH 4/6] clean flaky tests --- test/braintrust/api/datasets_test.rb | 2 +- test/braintrust/api/functions_test.rb | 2 +- test/braintrust/api_test.rb | 10 ++++-- test/braintrust/eval/functions_test.rb | 2 +- test/braintrust/eval_test.rb | 36 +++++++------------- test/braintrust/internal/experiments_test.rb | 9 ++--- test/braintrust/state_login_test.rb | 2 ++ test/test_helper.rb | 9 +++++ 8 files changed, 36 insertions(+), 36 deletions(-) diff --git a/test/braintrust/api/datasets_test.rb b/test/braintrust/api/datasets_test.rb index d526d6c3..34dbae2c 100644 --- a/test/braintrust/api/datasets_test.rb +++ b/test/braintrust/api/datasets_test.rb @@ -9,7 +9,7 @@ def setup end def get_test_api - state = Braintrust.init(set_global: false, blocking_login: true) + state = get_non_global_state Braintrust::API.new(state: state) end diff --git a/test/braintrust/api/functions_test.rb b/test/braintrust/api/functions_test.rb index 453936ac..eccb5c83 100644 --- a/test/braintrust/api/functions_test.rb +++ b/test/braintrust/api/functions_test.rb @@ -9,7 +9,7 @@ def setup end def get_test_api - state = Braintrust.init(set_global: false, blocking_login: true) + state = get_non_global_state Braintrust::API.new(state: state) end diff --git a/test/braintrust/api_test.rb b/test/braintrust/api_test.rb index 3c8b0614..68752817 100644 --- a/test/braintrust/api_test.rb +++ b/test/braintrust/api_test.rb @@ -7,9 +7,13 @@ def setup flunk "BRAINTRUST_API_KEY not set" unless ENV["BRAINTRUST_API_KEY"] end + def teardown + Braintrust::State.instance_variable_set(:@global_state, nil) + end + def test_api_new_with_explicit_state VCR.use_cassette("api/new_explicit_state") do - state = Braintrust.init(set_global: false, blocking_login: true) + state = get_non_global_state api = Braintrust::API.new(state: state) assert_equal state, api.state @@ -41,7 +45,7 @@ def test_api_new_raises_without_state def test_api_datasets_returns_datasets_instance VCR.use_cassette("api/datasets_instance") do - state = Braintrust.init(set_global: false, blocking_login: true) + state = get_non_global_state api = Braintrust::API.new(state: state) datasets = api.datasets @@ -51,7 +55,7 @@ def test_api_datasets_returns_datasets_instance def test_api_datasets_is_memoized VCR.use_cassette("api/datasets_memoized") do - state = Braintrust.init(set_global: false, blocking_login: true) + state = get_non_global_state api = Braintrust::API.new(state: state) datasets1 = api.datasets diff --git a/test/braintrust/eval/functions_test.rb b/test/braintrust/eval/functions_test.rb index 8be1dff8..290a6e06 100644 --- a/test/braintrust/eval/functions_test.rb +++ b/test/braintrust/eval/functions_test.rb @@ -11,7 +11,7 @@ def setup end def get_test_state_and_api - state = Braintrust.init(set_global: false, blocking_login: true) + state = get_non_global_state api = Braintrust::API.new(state: state) [state, api] end diff --git a/test/braintrust/eval_test.rb b/test/braintrust/eval_test.rb index a6823eb5..f49c2234 100644 --- a/test/braintrust/eval_test.rb +++ b/test/braintrust/eval_test.rb @@ -16,8 +16,7 @@ def test_eval_scorer_helper def test_eval_run_basic VCR.use_cassette("eval/run_basic") do - Braintrust.init(blocking_login: true) - state = Braintrust.current_state + state = get_non_global_state task = ->(input) { input.upcase } scorer = Braintrust::Eval.scorer("exact") do |input, expected, output| @@ -46,8 +45,7 @@ def test_eval_run_basic def test_eval_run_with_task_error VCR.use_cassette("eval/run_task_error") do - Braintrust.init(blocking_login: true) - state = Braintrust.current_state + state = get_non_global_state task = ->(input) { raise "Task failed!" if input == "bad" @@ -79,8 +77,7 @@ def test_eval_run_with_task_error def test_eval_run_with_scorer_error VCR.use_cassette("eval/run_scorer_error") do - Braintrust.init(blocking_login: true) - state = Braintrust.current_state + state = get_non_global_state task = ->(input) { input.upcase } @@ -152,8 +149,7 @@ def test_eval_scorer_error_records_exception_event def test_eval_run_with_multiple_scorers VCR.use_cassette("eval/run_multiple_scorers") do - Braintrust.init(blocking_login: true) - state = Braintrust.current_state + state = get_non_global_state task = ->(input) { input.upcase } @@ -183,8 +179,7 @@ def test_eval_run_with_multiple_scorers def test_eval_run_with_callable_task VCR.use_cassette("eval/run_callable_task") do - Braintrust.init(blocking_login: true) - state = Braintrust.current_state + state = get_non_global_state callable_task = Class.new do def call(input) @@ -243,8 +238,7 @@ def test_eval_run_validates_task_callable def test_eval_run_with_method_scorer VCR.use_cassette("eval/run_method_scorer") do - Braintrust.init(blocking_login: true) - state = Braintrust.current_state + state = get_non_global_state task = ->(input) { input.upcase } # Use a lambda instead of nested method @@ -314,8 +308,7 @@ def test_eval_run_with_tracing rig = setup_otel_test_rig # Initialize and login - Braintrust.init(blocking_login: true) - state = Braintrust.current_state + state = get_non_global_state task = ->(input) { input.upcase } scorer = Braintrust::Eval.scorer("exact") { |i, e, o| (o == e) ? 1.0 : 0.0 } @@ -366,8 +359,7 @@ def test_eval_run_with_tracing # Test dataset integration: dataset as string (same project as experiment) def test_eval_run_with_dataset_string VCR.use_cassette("eval/dataset_string") do - Braintrust.init(blocking_login: true) - state = Braintrust.current_state + state = get_non_global_state api = Braintrust::API.new(state: state) # Create a test dataset with records @@ -417,8 +409,7 @@ def test_eval_run_with_dataset_string # Test dataset integration: dataset as hash with name + project def test_eval_run_with_dataset_hash_name_project VCR.use_cassette("eval/dataset_hash_name_project") do - Braintrust.init(blocking_login: true) - state = Braintrust.current_state + state = get_non_global_state api = Braintrust::API.new(state: state) # Create a test dataset @@ -458,8 +449,7 @@ def test_eval_run_with_dataset_hash_name_project # Test dataset integration: dataset as hash with id def test_eval_run_with_dataset_hash_id VCR.use_cassette("eval/dataset_hash_id") do - Braintrust.init(blocking_login: true) - state = Braintrust.current_state + state = get_non_global_state api = Braintrust::API.new(state: state) # Create a test dataset @@ -499,8 +489,7 @@ def test_eval_run_with_dataset_hash_id # Test dataset integration: dataset with limit option def test_eval_run_with_dataset_limit VCR.use_cassette("eval/dataset_limit") do - Braintrust.init(blocking_login: true) - state = Braintrust.current_state + state = get_non_global_state api = Braintrust::API.new(state: state) # Create a test dataset with multiple records @@ -552,8 +541,7 @@ def test_eval_run_with_dataset_limit # Test dataset integration: error when both dataset and cases provided def test_eval_run_with_both_dataset_and_cases_errors VCR.use_cassette("eval/run_both_dataset_and_cases_error") do - Braintrust.init(blocking_login: true) - state = Braintrust.current_state + state = get_non_global_state task = ->(input) { input.upcase } scorer = Braintrust::Eval.scorer("exact") { |i, e, o| (o == e) ? 1.0 : 0.0 } diff --git a/test/braintrust/internal/experiments_test.rb b/test/braintrust/internal/experiments_test.rb index aff745b6..f7190ef7 100644 --- a/test/braintrust/internal/experiments_test.rb +++ b/test/braintrust/internal/experiments_test.rb @@ -6,8 +6,7 @@ class Braintrust::Internal::ExperimentsTest < Minitest::Test def test_get_or_create_basic VCR.use_cassette("experiments/get_or_create_basic") do - Braintrust.init(blocking_login: true) - state = Braintrust.current_state + state = get_non_global_state result = Braintrust::Internal::Experiments.get_or_create( "test-ruby-sdk-experiment-basic", @@ -24,8 +23,7 @@ def test_get_or_create_basic def test_get_or_create_with_tags_and_metadata VCR.use_cassette("experiments/get_or_create_with_tags") do - Braintrust.init(blocking_login: true) - state = Braintrust.current_state + state = get_non_global_state result = Braintrust::Internal::Experiments.get_or_create( "test-ruby-sdk-experiment-tags", @@ -42,8 +40,7 @@ def test_get_or_create_with_tags_and_metadata def test_get_or_create_with_update_flag VCR.use_cassette("experiments/get_or_create_with_update") do - Braintrust.init(blocking_login: true) - state = Braintrust.current_state + state = get_non_global_state # First create with update: false (new experiment) result1 = Braintrust::Internal::Experiments.get_or_create( diff --git a/test/braintrust/state_login_test.rb b/test/braintrust/state_login_test.rb index e7576c84..01f252a4 100644 --- a/test/braintrust/state_login_test.rb +++ b/test/braintrust/state_login_test.rb @@ -4,6 +4,8 @@ class Braintrust::StateLoginTest < Minitest::Test def setup + # Clear any global state from previous tests to ensure isolation + Braintrust::State.instance_variable_set(:@global_state, nil) @api_key = ENV["BRAINTRUST_API_KEY"] assert @api_key, "BRAINTRUST_API_KEY environment variable is required for login tests" end diff --git a/test/test_helper.rb b/test/test_helper.rb index c49732f7..56ad5a64 100644 --- a/test/test_helper.rb +++ b/test/test_helper.rb @@ -109,6 +109,15 @@ def get_test_state(**options) state end + # Creates a non-global State by calling Braintrust.init with set_global: false and blocking_login: true + # This performs login (via VCR cassettes in tests) without polluting global state + # Use this for tests that need to interact with the API (eval, experiments, datasets, etc.) + # @param options [Hash] Options to pass to Braintrust.init (set_global and blocking_login are fixed) + # @return [Braintrust::State] + def get_non_global_state(**options) + Braintrust.init(set_global: false, blocking_login: true, **options) + end + # Sets up OpenTelemetry with an in-memory exporter for testing # Returns an OtelTestRig with tracer_provider, exporter, state, and drain() method # The exporter can be passed to Braintrust::Trace.enable to replace OTLP exporter From 9d2531bbfbe656ca83c66fa5711045e359342512 Mon Sep 17 00:00:00 2001 From: Matt Perpick Date: Fri, 24 Oct 2025 16:10:17 -0400 Subject: [PATCH 5/6] flaky test --- test/braintrust/api_test.rb | 4 ---- test/braintrust/state_login_test.rb | 6 ------ test/braintrust_test.rb | 6 +++--- test/test_helper.rb | 7 +++++++ 4 files changed, 10 insertions(+), 13 deletions(-) diff --git a/test/braintrust/api_test.rb b/test/braintrust/api_test.rb index 68752817..0d21a9d8 100644 --- a/test/braintrust/api_test.rb +++ b/test/braintrust/api_test.rb @@ -7,10 +7,6 @@ def setup flunk "BRAINTRUST_API_KEY not set" unless ENV["BRAINTRUST_API_KEY"] end - def teardown - Braintrust::State.instance_variable_set(:@global_state, nil) - end - def test_api_new_with_explicit_state VCR.use_cassette("api/new_explicit_state") do state = get_non_global_state diff --git a/test/braintrust/state_login_test.rb b/test/braintrust/state_login_test.rb index 01f252a4..ad7484c7 100644 --- a/test/braintrust/state_login_test.rb +++ b/test/braintrust/state_login_test.rb @@ -4,16 +4,10 @@ class Braintrust::StateLoginTest < Minitest::Test def setup - # Clear any global state from previous tests to ensure isolation - Braintrust::State.instance_variable_set(:@global_state, nil) @api_key = ENV["BRAINTRUST_API_KEY"] assert @api_key, "BRAINTRUST_API_KEY environment variable is required for login tests" end - def teardown - Braintrust::State.instance_variable_set(:@global_state, nil) - end - def test_login_fetches_org_info VCR.use_cassette("auth/login_success") do state = Braintrust::State.new( diff --git a/test/braintrust_test.rb b/test/braintrust_test.rb index c22cbdf5..168e42e9 100644 --- a/test/braintrust_test.rb +++ b/test/braintrust_test.rb @@ -16,9 +16,6 @@ def setup end def teardown - # Reset global state after each test - Braintrust::State.instance_variable_set(:@global_state, nil) - # Reset global tracer provider to default proxy OpenTelemetry.tracer_provider = OpenTelemetry::Internal::ProxyTracerProvider.new @@ -34,6 +31,9 @@ def teardown else ENV.delete("BRAINTRUST_DEFAULT_PROJECT") end + + # Call parent teardown (includes global state cleanup from test_helper) + super end def test_init_sets_global_state_by_default diff --git a/test/test_helper.rb b/test/test_helper.rb index 56ad5a64..7a6889a1 100644 --- a/test/test_helper.rb +++ b/test/test_helper.rb @@ -162,4 +162,11 @@ def unique_name(prefix = "") # Include helper in all test cases class Minitest::Test include TracingTestHelper + + # Use Minitest hooks to clear global state after every test + # This ensures cleanup happens even if individual tests don't have teardown methods + def after_teardown + Braintrust::State.instance_variable_set(:@global_state, nil) + super + end end From c71fb59d737b6778dea4077bc0522cd89753b303 Mon Sep 17 00:00:00 2001 From: Matt Perpick Date: Fri, 24 Oct 2025 16:17:03 -0400 Subject: [PATCH 6/6] tweak --- test/test_helper.rb | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/test_helper.rb b/test/test_helper.rb index 7a6889a1..c53789c1 100644 --- a/test/test_helper.rb +++ b/test/test_helper.rb @@ -46,7 +46,8 @@ # Recording mode: :once (default), :all (re-record), :none (no recording) config.default_cassette_options = { record: ENV["VCR_MODE"]&.to_sym || :once, - match_requests_on: [:method, :uri] # Don't match on body (contains dynamic data) + match_requests_on: [:method, :uri], # Don't match on body (contains dynamic data) + allow_playback_repeats: true # Allow same HTTP interaction to be replayed multiple times } end