From 5245efbbce6952f74fcde49a9c912f74af2c15f3 Mon Sep 17 00:00:00 2001
From: Matt Perpick <matt@braintrustdata.com>
Date: Fri, 24 Oct 2025 14:56:21 -0400
Subject: [PATCH 1/6] Run tests against different dependencies.

---
 .github/workflows/ci.yml                     |  7 ---
 .gitignore                                   |  3 +
 Appraisals                                   | 21 +++++++
 Gemfile.lock                                 |  6 ++
 Rakefile                                     | 61 +++++++++++++++++---
 braintrust.gemspec                           |  3 +-
 gemfiles/openai_0.33.gemfile                 | 14 +++++
 gemfiles/openai_0.34.gemfile                 | 14 +++++
 gemfiles/openai_latest.gemfile               | 14 +++++
 gemfiles/openai_uninstalled.gemfile          | 13 +++++
 lib/braintrust/trace.rb                      |  9 ++-
 lib/braintrust/trace/{ => contrib}/openai.rb |  0
 test/braintrust/trace/openai_test.rb         |  3 +
 test/braintrust/without_openai_test.rb       | 56 ++++++++++++++++++
 14 files changed, 205 insertions(+), 19 deletions(-)
 create mode 100644 Appraisals
 create mode 100644 gemfiles/openai_0.33.gemfile
 create mode 100644 gemfiles/openai_0.34.gemfile
 create mode 100644 gemfiles/openai_latest.gemfile
 create mode 100644 gemfiles/openai_uninstalled.gemfile
 rename lib/braintrust/trace/{ => contrib}/openai.rb (100%)
 create mode 100644 test/braintrust/without_openai_test.rb

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 103128fb..416ef7bc 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -29,13 +29,6 @@ jobs:
         BRAINTRUST_API_KEY: ${{ secrets.BRAINTRUST_API_KEY }}
         OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
 
-    - name: Upload coverage to Codecov
-      uses: codecov/codecov-action@v4
-      if: matrix.ruby-version == '3.4' && matrix.os == 'ubuntu-latest'
-      with:
-        files: ./coverage/.resultset.json
-        fail_ci_if_error: false
-
   # Summary job that requires all matrix tests to pass
   # This is what branch protection will check
   ci-success:
diff --git a/.gitignore b/.gitignore
index 8cdb3e38..54a06cbf 100644
--- a/.gitignore
+++ b/.gitignore
@@ -48,3 +48,6 @@
 
 # Release artifacts
 changelog.md
+
+# Appraisal: commit gemfiles but ignore lock files
+gemfiles/*.gemfile.lock
diff --git a/Appraisals b/Appraisals
new file mode 100644
index 00000000..98f23e52
--- /dev/null
+++ b/Appraisals
@@ -0,0 +1,21 @@
+# frozen_string_literal: true
+
+# Test with OpenAI gem 0.33.x (previous stable version)
+appraise "openai-0.33" do
+  gem "openai", "~> 0.33.0"
+end
+
+# Test with current stable OpenAI gem version
+appraise "openai-0.34" do
+  gem "openai", "~> 0.34.0"
+end
+
+# Test with latest OpenAI gem version (allows newer patch/minor versions)
+appraise "openai-latest" do
+  gem "openai", ">= 0.34"
+end
+
+# Test without OpenAI gem (verify SDK works without optional dependency)
+appraise "openai-uninstalled" do
+  remove_gem "openai"
+end
diff --git a/Gemfile.lock b/Gemfile.lock
index 4a64e3d5..40ffabef 100644
--- a/Gemfile.lock
+++ b/Gemfile.lock
@@ -12,6 +12,10 @@ GEM
     addressable (2.8.7)
       public_suffix (>= 2.0.2, < 7.0)
     ansi (1.5.0)
+    appraisal (2.5.0)
+      bundler
+      rake
+      thor (>= 0.14.0)
     ast (2.4.3)
     base64 (0.3.0)
     bigdecimal (3.3.1)
@@ -112,6 +116,7 @@ GEM
     standard-performance (1.8.0)
       lint_roller (~> 1.1)
       rubocop-performance (~> 1.25.0)
+    thor (1.4.0)
     unicode-display_width (3.2.0)
       unicode-emoji (~> 4.1)
     unicode-emoji (4.1.0)
@@ -130,6 +135,7 @@ PLATFORMS
   x86_64-linux
 
 DEPENDENCIES
+  appraisal (~> 2.5)
   braintrust!
   minitest (~> 5.0)
   minitest-reporters (~> 1.6)
diff --git a/Rakefile b/Rakefile
index f6436137..48a048ba 100644
--- a/Rakefile
+++ b/Rakefile
@@ -9,12 +9,6 @@ Rake::TestTask.new(:test) do |t|
   t.warning = false
 end
 
-desc "Run tests with verbose timing output"
-task :"test:verbose" do
-  ENV["MT_VERBOSE"] = "1"
-  Rake::Task[:test].invoke
-end
-
 desc "Run Standard linter"
 task :lint do
   sh "bundle exec standardrb"
@@ -70,13 +64,62 @@ task coverage: :test do
   end
 end
 
-desc "Verify CI (lint + test)"
-task ci: [:lint, :test]
+desc "Verify CI (lint + test all appraisal scenarios)"
+task ci: [:lint, :"test:appraisal"]
 
 task default: :ci
 
-# VCR tasks for managing HTTP cassettes
+# Test-related tasks
 namespace :test do
+  desc "Run tests with verbose timing output"
+  task :verbose do
+    ENV["MT_VERBOSE"] = "1"
+    Rake::Task[:test].invoke
+  end
+
+  desc "Install optional test dependencies (e.g., openai gem)"
+  task :install do
+    puts "Installing optional test dependencies..."
+    sh "gem install openai -v '~> 0.34'"
+    puts "✓ Optional dependencies installed"
+    puts ""
+    puts "Now run 'rake test' to run tests with OpenAI integration"
+  end
+
+  # Appraisal tasks for testing with/without optional dependencies
+  # Run directly: bundle exec appraisal [scenario] rake test
+  # List scenarios: bundle exec appraisal list
+  desc "Run tests against different dependencies"
+  task :appraisal do
+    sh "bundle exec appraisal rake test"
+  end
+
+  namespace :appraisal do
+    desc "Show help for appraisal scenarios and usage"
+    task :help do
+      puts "\n=== Appraisal Test Scenarios ==="
+      puts "\nAvailable scenarios:"
+      sh "bundle exec appraisal list"
+      puts "\n=== Usage ==="
+      puts "Run specific scenario:"
+      puts "  bundle exec appraisal <scenario> rake test"
+      puts ""
+      puts "Example:"
+      puts "  bundle exec appraisal openai-0.34 rake test"
+      puts ""
+      puts "Run all scenarios:"
+      puts "  bundle exec appraisal rake test"
+      puts "  or: rake test:appraisal"
+      puts ""
+    end
+
+    desc "Install all appraisal gemfiles"
+    task :install do
+      sh "bundle exec appraisal install"
+    end
+  end
+
+  # VCR tasks for managing HTTP cassettes
   namespace :vcr do
     desc "Re-record all VCR cassettes"
     task :record_all do
diff --git a/braintrust.gemspec b/braintrust.gemspec
index b751c848..b7ab8bf2 100644
--- a/braintrust.gemspec
+++ b/braintrust.gemspec
@@ -23,7 +23,6 @@ Gem::Specification.new do |spec|
     lib/**/*.rb
     README.md
     LICENSE
-    CHANGELOG.md
   ])
   spec.bindir = "exe"
   spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
@@ -44,7 +43,7 @@ Gem::Specification.new do |spec|
   spec.add_development_dependency "rake", "~> 13.0"
   spec.add_development_dependency "standard", "~> 1.0"
   spec.add_development_dependency "simplecov", "~> 0.22"
-  spec.add_development_dependency "openai", "~> 0.34"
   spec.add_development_dependency "vcr", "~> 6.0"
   spec.add_development_dependency "webmock", "~> 3.0"
+  spec.add_development_dependency "appraisal", "~> 2.5"
 end
diff --git a/gemfiles/openai_0.33.gemfile b/gemfiles/openai_0.33.gemfile
new file mode 100644
index 00000000..1b8d2d23
--- /dev/null
+++ b/gemfiles/openai_0.33.gemfile
@@ -0,0 +1,14 @@
+# This file was generated by Appraisal
+
+source "https://rubygems.org"
+
+gem "rake", "~> 13.0"
+gem "minitest", "~> 5.0"
+gem "minitest-reporters", "~> 1.6"
+gem "standard", "~> 1.0"
+gem "simplecov", "~> 0.22"
+gem "vcr", "~> 6.0"
+gem "webmock", "~> 3.0"
+gem "openai", "~> 0.33.0"
+
+gemspec path: "../"
diff --git a/gemfiles/openai_0.34.gemfile b/gemfiles/openai_0.34.gemfile
new file mode 100644
index 00000000..24b39dc5
--- /dev/null
+++ b/gemfiles/openai_0.34.gemfile
@@ -0,0 +1,14 @@
+# This file was generated by Appraisal
+
+source "https://rubygems.org"
+
+gem "rake", "~> 13.0"
+gem "minitest", "~> 5.0"
+gem "minitest-reporters", "~> 1.6"
+gem "standard", "~> 1.0"
+gem "simplecov", "~> 0.22"
+gem "vcr", "~> 6.0"
+gem "webmock", "~> 3.0"
+gem "openai", "~> 0.34.0"
+
+gemspec path: "../"
diff --git a/gemfiles/openai_latest.gemfile b/gemfiles/openai_latest.gemfile
new file mode 100644
index 00000000..54f1dc79
--- /dev/null
+++ b/gemfiles/openai_latest.gemfile
@@ -0,0 +1,14 @@
+# This file was generated by Appraisal
+
+source "https://rubygems.org"
+
+gem "rake", "~> 13.0"
+gem "minitest", "~> 5.0"
+gem "minitest-reporters", "~> 1.6"
+gem "standard", "~> 1.0"
+gem "simplecov", "~> 0.22"
+gem "vcr", "~> 6.0"
+gem "webmock", "~> 3.0"
+gem "openai", ">= 0.34"
+
+gemspec path: "../"
diff --git a/gemfiles/openai_uninstalled.gemfile b/gemfiles/openai_uninstalled.gemfile
new file mode 100644
index 00000000..27153f41
--- /dev/null
+++ b/gemfiles/openai_uninstalled.gemfile
@@ -0,0 +1,13 @@
+# This file was generated by Appraisal
+
+source "https://rubygems.org"
+
+gem "rake", "~> 13.0"
+gem "minitest", "~> 5.0"
+gem "minitest-reporters", "~> 1.6"
+gem "standard", "~> 1.0"
+gem "simplecov", "~> 0.22"
+gem "vcr", "~> 6.0"
+gem "webmock", "~> 3.0"
+
+gemspec path: "../"
diff --git a/lib/braintrust/trace.rb b/lib/braintrust/trace.rb
index 62225a34..f88eb646 100644
--- a/lib/braintrust/trace.rb
+++ b/lib/braintrust/trace.rb
@@ -3,9 +3,16 @@
 require "opentelemetry/sdk"
 require "opentelemetry/exporter/otlp"
 require_relative "trace/span_processor"
-require_relative "trace/openai"
 require_relative "logger"
 
+# OpenAI integration is optional - automatically loaded if openai gem is available
+begin
+  require "openai"
+  require_relative "trace/contrib/openai"
+rescue LoadError
+  # OpenAI gem not installed - integration will not be available
+end
+
 module Braintrust
   module Trace
     def self.enable(tracer_provider, state: nil, exporter: nil)
diff --git a/lib/braintrust/trace/openai.rb b/lib/braintrust/trace/contrib/openai.rb
similarity index 100%
rename from lib/braintrust/trace/openai.rb
rename to lib/braintrust/trace/contrib/openai.rb
diff --git a/test/braintrust/trace/openai_test.rb b/test/braintrust/trace/openai_test.rb
index 2afbdafd..67021090 100644
--- a/test/braintrust/trace/openai_test.rb
+++ b/test/braintrust/trace/openai_test.rb
@@ -4,6 +4,9 @@
 
 class Braintrust::Trace::OpenAITest < Minitest::Test
   def setup
+    # Skip all OpenAI tests if the gem is not available
+    skip "OpenAI gem not available" unless defined?(OpenAI)
+
     @api_key = ENV["OPENAI_API_KEY"]
     @original_api_key = ENV["OPENAI_API_KEY"]
   end
diff --git a/test/braintrust/without_openai_test.rb b/test/braintrust/without_openai_test.rb
new file mode 100644
index 00000000..f34ec4f2
--- /dev/null
+++ b/test/braintrust/without_openai_test.rb
@@ -0,0 +1,56 @@
+# frozen_string_literal: true
+
+require "test_helper"
+
+class WithoutOpenAITest < Minitest::Test
+  def test_sdk_loads_without_openai_gem
+    # This test verifies that the core SDK can be loaded without the openai gem
+    # Skip this test if we're in the with-openai appraisal
+    skip "Test only runs in without-openai appraisal" if openai_available?
+
+    # If we got here, the SDK loaded successfully (via test_helper.rb)
+    assert true, "SDK loaded without openai gem"
+  end
+
+  def test_core_functionality_without_openai
+    skip "Test only runs in without-openai appraisal" if openai_available?
+
+    # Test that we can initialize Braintrust without tracing (no OpenAI needed)
+    state = Braintrust.init(
+      api_key: "test-key",
+      set_global: false,
+      blocking_login: false,
+      tracing: false
+    )
+
+    assert_instance_of Braintrust::State, state
+    assert_equal "test-key", state.api_key
+  end
+
+  def test_openai_require_fails_without_gem
+    skip "Test only runs in without-openai appraisal" if openai_available?
+
+    # Attempting to require openai should fail if gem not installed
+    assert_raises(LoadError) do
+      require "openai"
+    end
+  end
+
+  def test_openai_trace_wrapper_not_available_without_gem
+    skip "Test only runs in without-openai appraisal" if openai_available?
+
+    # The OpenAI trace wrapper should not be automatically loaded
+    # It should only load when explicitly required
+    refute defined?(OpenAI), "OpenAI should not be defined without the gem"
+  end
+
+  private
+
+  # Check if OpenAI gem is available (used for skipping tests in wrong appraisal)
+  def openai_available?
+    require "openai"
+    true
+  rescue LoadError
+    false
+  end
+end

From 69da05ef025896c1b0646632fbdcfd1843ae16c2 Mon Sep 17 00:00:00 2001
From: Matt Perpick <matt@braintrustdata.com>
Date: Fri, 24 Oct 2025 15:08:26 -0400
Subject: [PATCH 2/6] gemfile

---
 Gemfile.lock | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/Gemfile.lock b/Gemfile.lock
index 40ffabef..a11a48e1 100644
--- a/Gemfile.lock
+++ b/Gemfile.lock
@@ -20,7 +20,6 @@ GEM
     base64 (0.3.0)
     bigdecimal (3.3.1)
     builder (3.3.0)
-    connection_pool (2.5.4)
     crack (1.0.1)
       bigdecimal
       rexml
@@ -46,8 +45,6 @@ GEM
       builder
       minitest (>= 5.0)
       ruby-progressbar
-    openai (0.34.1)
-      connection_pool
     openssl (3.3.1)
     opentelemetry-api (1.7.0)
     opentelemetry-common (0.23.0)
@@ -139,7 +136,6 @@ DEPENDENCIES
   braintrust!
   minitest (~> 5.0)
   minitest-reporters (~> 1.6)
-  openai (~> 0.34)
   rake (~> 13.0)
   simplecov (~> 0.22)
   standard (~> 1.0)

From dad498a734a05b6809c09eea3f6ee5216486a97d Mon Sep 17 00:00:00 2001
From: Matt Perpick <matt@braintrustdata.com>
Date: Fri, 24 Oct 2025 15:42:49 -0400
Subject: [PATCH 3/6] ci

---
 Rakefile  |  2 +-
 mise.toml | 11 ++++++-----
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/Rakefile b/Rakefile
index 48a048ba..2226c08e 100644
--- a/Rakefile
+++ b/Rakefile
@@ -65,7 +65,7 @@ task coverage: :test do
 end
 
 desc "Verify CI (lint + test all appraisal scenarios)"
-task ci: [:lint, :"test:appraisal"]
+task ci: [:lint, :"test:appraisal:install", :"test:appraisal"]
 
 task default: :ci
 
diff --git a/mise.toml b/mise.toml
index 09b4a098..cd39c811 100644
--- a/mise.toml
+++ b/mise.toml
@@ -10,22 +10,23 @@ rust = "1.83"
 watchexec = "latest"
 
 [tasks.lint]
-run = "bundle exec standardrb"
+run = "bundle exec rake lint"
 
 [tasks."lint:fix"]
-run = "bundle exec standardrb --fix"
+run = "bundle exec rake lint:fix"
 
 [tasks.watch-test]
 description = "Runs tests when files change"
 run = "watchexec --exts rb --watch lib --watch test --restart --clear -- rake test"
 
-[tasks.verify-fmt]
-run = "bundle exec standardrb --format progress"
+[tasks.precommit]
+description = "Run linter before commit (full CI runs on push)"
+run = "bundle exec rake lint"
 
 [hooks]
 postinstall = """
 echo "Installing gem dependencies..."
 bundle install
 echo "Setting up git pre-commit hook..."
-mise generate git-pre-commit --write --task=verify-fmt
+mise generate git-pre-commit --write --task=precommit
 """

From 2f4624fa72c0ccb86b5328bceeb69ab0fa5ac9fb Mon Sep 17 00:00:00 2001
From: Matt Perpick <matt@braintrustdata.com>
Date: Fri, 24 Oct 2025 16:06:18 -0400
Subject: [PATCH 4/6] clean flaky tests

---
 test/braintrust/api/datasets_test.rb         |  2 +-
 test/braintrust/api/functions_test.rb        |  2 +-
 test/braintrust/api_test.rb                  | 10 ++++--
 test/braintrust/eval/functions_test.rb       |  2 +-
 test/braintrust/eval_test.rb                 | 36 +++++++-------------
 test/braintrust/internal/experiments_test.rb |  9 ++---
 test/braintrust/state_login_test.rb          |  2 ++
 test/test_helper.rb                          |  9 +++++
 8 files changed, 36 insertions(+), 36 deletions(-)

diff --git a/test/braintrust/api/datasets_test.rb b/test/braintrust/api/datasets_test.rb
index d526d6c3..34dbae2c 100644
--- a/test/braintrust/api/datasets_test.rb
+++ b/test/braintrust/api/datasets_test.rb
@@ -9,7 +9,7 @@ def setup
   end
 
   def get_test_api
-    state = Braintrust.init(set_global: false, blocking_login: true)
+    state = get_non_global_state
     Braintrust::API.new(state: state)
   end
 
diff --git a/test/braintrust/api/functions_test.rb b/test/braintrust/api/functions_test.rb
index 453936ac..eccb5c83 100644
--- a/test/braintrust/api/functions_test.rb
+++ b/test/braintrust/api/functions_test.rb
@@ -9,7 +9,7 @@ def setup
   end
 
   def get_test_api
-    state = Braintrust.init(set_global: false, blocking_login: true)
+    state = get_non_global_state
     Braintrust::API.new(state: state)
   end
 
diff --git a/test/braintrust/api_test.rb b/test/braintrust/api_test.rb
index 3c8b0614..68752817 100644
--- a/test/braintrust/api_test.rb
+++ b/test/braintrust/api_test.rb
@@ -7,9 +7,13 @@ def setup
     flunk "BRAINTRUST_API_KEY not set" unless ENV["BRAINTRUST_API_KEY"]
   end
 
+  def teardown
+    Braintrust::State.instance_variable_set(:@global_state, nil)
+  end
+
   def test_api_new_with_explicit_state
     VCR.use_cassette("api/new_explicit_state") do
-      state = Braintrust.init(set_global: false, blocking_login: true)
+      state = get_non_global_state
 
       api = Braintrust::API.new(state: state)
       assert_equal state, api.state
@@ -41,7 +45,7 @@ def test_api_new_raises_without_state
 
   def test_api_datasets_returns_datasets_instance
     VCR.use_cassette("api/datasets_instance") do
-      state = Braintrust.init(set_global: false, blocking_login: true)
+      state = get_non_global_state
       api = Braintrust::API.new(state: state)
 
       datasets = api.datasets
@@ -51,7 +55,7 @@ def test_api_datasets_returns_datasets_instance
 
   def test_api_datasets_is_memoized
     VCR.use_cassette("api/datasets_memoized") do
-      state = Braintrust.init(set_global: false, blocking_login: true)
+      state = get_non_global_state
       api = Braintrust::API.new(state: state)
 
       datasets1 = api.datasets
diff --git a/test/braintrust/eval/functions_test.rb b/test/braintrust/eval/functions_test.rb
index 8be1dff8..290a6e06 100644
--- a/test/braintrust/eval/functions_test.rb
+++ b/test/braintrust/eval/functions_test.rb
@@ -11,7 +11,7 @@ def setup
   end
 
   def get_test_state_and_api
-    state = Braintrust.init(set_global: false, blocking_login: true)
+    state = get_non_global_state
     api = Braintrust::API.new(state: state)
     [state, api]
   end
diff --git a/test/braintrust/eval_test.rb b/test/braintrust/eval_test.rb
index a6823eb5..f49c2234 100644
--- a/test/braintrust/eval_test.rb
+++ b/test/braintrust/eval_test.rb
@@ -16,8 +16,7 @@ def test_eval_scorer_helper
 
   def test_eval_run_basic
     VCR.use_cassette("eval/run_basic") do
-      Braintrust.init(blocking_login: true)
-      state = Braintrust.current_state
+      state = get_non_global_state
 
       task = ->(input) { input.upcase }
       scorer = Braintrust::Eval.scorer("exact") do |input, expected, output|
@@ -46,8 +45,7 @@ def test_eval_run_basic
 
   def test_eval_run_with_task_error
     VCR.use_cassette("eval/run_task_error") do
-      Braintrust.init(blocking_login: true)
-      state = Braintrust.current_state
+      state = get_non_global_state
 
       task = ->(input) {
         raise "Task failed!" if input == "bad"
@@ -79,8 +77,7 @@ def test_eval_run_with_task_error
 
   def test_eval_run_with_scorer_error
     VCR.use_cassette("eval/run_scorer_error") do
-      Braintrust.init(blocking_login: true)
-      state = Braintrust.current_state
+      state = get_non_global_state
 
       task = ->(input) { input.upcase }
 
@@ -152,8 +149,7 @@ def test_eval_scorer_error_records_exception_event
 
   def test_eval_run_with_multiple_scorers
     VCR.use_cassette("eval/run_multiple_scorers") do
-      Braintrust.init(blocking_login: true)
-      state = Braintrust.current_state
+      state = get_non_global_state
 
       task = ->(input) { input.upcase }
 
@@ -183,8 +179,7 @@ def test_eval_run_with_multiple_scorers
 
   def test_eval_run_with_callable_task
     VCR.use_cassette("eval/run_callable_task") do
-      Braintrust.init(blocking_login: true)
-      state = Braintrust.current_state
+      state = get_non_global_state
 
       callable_task = Class.new do
         def call(input)
@@ -243,8 +238,7 @@ def test_eval_run_validates_task_callable
 
   def test_eval_run_with_method_scorer
     VCR.use_cassette("eval/run_method_scorer") do
-      Braintrust.init(blocking_login: true)
-      state = Braintrust.current_state
+      state = get_non_global_state
 
       task = ->(input) { input.upcase }
       # Use a lambda instead of nested method
@@ -314,8 +308,7 @@ def test_eval_run_with_tracing
       rig = setup_otel_test_rig
 
       # Initialize and login
-      Braintrust.init(blocking_login: true)
-      state = Braintrust.current_state
+      state = get_non_global_state
 
       task = ->(input) { input.upcase }
       scorer = Braintrust::Eval.scorer("exact") { |i, e, o| (o == e) ? 1.0 : 0.0 }
@@ -366,8 +359,7 @@ def test_eval_run_with_tracing
   # Test dataset integration: dataset as string (same project as experiment)
   def test_eval_run_with_dataset_string
     VCR.use_cassette("eval/dataset_string") do
-      Braintrust.init(blocking_login: true)
-      state = Braintrust.current_state
+      state = get_non_global_state
       api = Braintrust::API.new(state: state)
 
       # Create a test dataset with records
@@ -417,8 +409,7 @@ def test_eval_run_with_dataset_string
   # Test dataset integration: dataset as hash with name + project
   def test_eval_run_with_dataset_hash_name_project
     VCR.use_cassette("eval/dataset_hash_name_project") do
-      Braintrust.init(blocking_login: true)
-      state = Braintrust.current_state
+      state = get_non_global_state
       api = Braintrust::API.new(state: state)
 
       # Create a test dataset
@@ -458,8 +449,7 @@ def test_eval_run_with_dataset_hash_name_project
   # Test dataset integration: dataset as hash with id
   def test_eval_run_with_dataset_hash_id
     VCR.use_cassette("eval/dataset_hash_id") do
-      Braintrust.init(blocking_login: true)
-      state = Braintrust.current_state
+      state = get_non_global_state
       api = Braintrust::API.new(state: state)
 
       # Create a test dataset
@@ -499,8 +489,7 @@ def test_eval_run_with_dataset_hash_id
   # Test dataset integration: dataset with limit option
   def test_eval_run_with_dataset_limit
     VCR.use_cassette("eval/dataset_limit") do
-      Braintrust.init(blocking_login: true)
-      state = Braintrust.current_state
+      state = get_non_global_state
       api = Braintrust::API.new(state: state)
 
       # Create a test dataset with multiple records
@@ -552,8 +541,7 @@ def test_eval_run_with_dataset_limit
   # Test dataset integration: error when both dataset and cases provided
   def test_eval_run_with_both_dataset_and_cases_errors
     VCR.use_cassette("eval/run_both_dataset_and_cases_error") do
-      Braintrust.init(blocking_login: true)
-      state = Braintrust.current_state
+      state = get_non_global_state
 
       task = ->(input) { input.upcase }
       scorer = Braintrust::Eval.scorer("exact") { |i, e, o| (o == e) ? 1.0 : 0.0 }
diff --git a/test/braintrust/internal/experiments_test.rb b/test/braintrust/internal/experiments_test.rb
index aff745b6..f7190ef7 100644
--- a/test/braintrust/internal/experiments_test.rb
+++ b/test/braintrust/internal/experiments_test.rb
@@ -6,8 +6,7 @@
 class Braintrust::Internal::ExperimentsTest < Minitest::Test
   def test_get_or_create_basic
     VCR.use_cassette("experiments/get_or_create_basic") do
-      Braintrust.init(blocking_login: true)
-      state = Braintrust.current_state
+      state = get_non_global_state
 
       result = Braintrust::Internal::Experiments.get_or_create(
         "test-ruby-sdk-experiment-basic",
@@ -24,8 +23,7 @@ def test_get_or_create_basic
 
   def test_get_or_create_with_tags_and_metadata
     VCR.use_cassette("experiments/get_or_create_with_tags") do
-      Braintrust.init(blocking_login: true)
-      state = Braintrust.current_state
+      state = get_non_global_state
 
       result = Braintrust::Internal::Experiments.get_or_create(
         "test-ruby-sdk-experiment-tags",
@@ -42,8 +40,7 @@ def test_get_or_create_with_tags_and_metadata
 
   def test_get_or_create_with_update_flag
     VCR.use_cassette("experiments/get_or_create_with_update") do
-      Braintrust.init(blocking_login: true)
-      state = Braintrust.current_state
+      state = get_non_global_state
 
       # First create with update: false (new experiment)
       result1 = Braintrust::Internal::Experiments.get_or_create(
diff --git a/test/braintrust/state_login_test.rb b/test/braintrust/state_login_test.rb
index e7576c84..01f252a4 100644
--- a/test/braintrust/state_login_test.rb
+++ b/test/braintrust/state_login_test.rb
@@ -4,6 +4,8 @@
 
 class Braintrust::StateLoginTest < Minitest::Test
   def setup
+    # Clear any global state from previous tests to ensure isolation
+    Braintrust::State.instance_variable_set(:@global_state, nil)
     @api_key = ENV["BRAINTRUST_API_KEY"]
     assert @api_key, "BRAINTRUST_API_KEY environment variable is required for login tests"
   end
diff --git a/test/test_helper.rb b/test/test_helper.rb
index c49732f7..56ad5a64 100644
--- a/test/test_helper.rb
+++ b/test/test_helper.rb
@@ -109,6 +109,15 @@ def get_test_state(**options)
     state
   end
 
+  # Creates a non-global State by calling Braintrust.init with set_global: false and blocking_login: true
+  # This performs login (via VCR cassettes in tests) without polluting global state
+  # Use this for tests that need to interact with the API (eval, experiments, datasets, etc.)
+  # @param options [Hash] Options to pass to Braintrust.init (set_global and blocking_login are fixed)
+  # @return [Braintrust::State]
+  def get_non_global_state(**options)
+    Braintrust.init(set_global: false, blocking_login: true, **options)
+  end
+
   # Sets up OpenTelemetry with an in-memory exporter for testing
   # Returns an OtelTestRig with tracer_provider, exporter, state, and drain() method
   # The exporter can be passed to Braintrust::Trace.enable to replace OTLP exporter

From 9d2531bbfbe656ca83c66fa5711045e359342512 Mon Sep 17 00:00:00 2001
From: Matt Perpick <matt@braintrustdata.com>
Date: Fri, 24 Oct 2025 16:10:17 -0400
Subject: [PATCH 5/6] flaky test

---
 test/braintrust/api_test.rb         | 4 ----
 test/braintrust/state_login_test.rb | 6 ------
 test/braintrust_test.rb             | 6 +++---
 test/test_helper.rb                 | 7 +++++++
 4 files changed, 10 insertions(+), 13 deletions(-)

diff --git a/test/braintrust/api_test.rb b/test/braintrust/api_test.rb
index 68752817..0d21a9d8 100644
--- a/test/braintrust/api_test.rb
+++ b/test/braintrust/api_test.rb
@@ -7,10 +7,6 @@ def setup
     flunk "BRAINTRUST_API_KEY not set" unless ENV["BRAINTRUST_API_KEY"]
   end
 
-  def teardown
-    Braintrust::State.instance_variable_set(:@global_state, nil)
-  end
-
   def test_api_new_with_explicit_state
     VCR.use_cassette("api/new_explicit_state") do
       state = get_non_global_state
diff --git a/test/braintrust/state_login_test.rb b/test/braintrust/state_login_test.rb
index 01f252a4..ad7484c7 100644
--- a/test/braintrust/state_login_test.rb
+++ b/test/braintrust/state_login_test.rb
@@ -4,16 +4,10 @@
 
 class Braintrust::StateLoginTest < Minitest::Test
   def setup
-    # Clear any global state from previous tests to ensure isolation
-    Braintrust::State.instance_variable_set(:@global_state, nil)
     @api_key = ENV["BRAINTRUST_API_KEY"]
     assert @api_key, "BRAINTRUST_API_KEY environment variable is required for login tests"
   end
 
-  def teardown
-    Braintrust::State.instance_variable_set(:@global_state, nil)
-  end
-
   def test_login_fetches_org_info
     VCR.use_cassette("auth/login_success") do
       state = Braintrust::State.new(
diff --git a/test/braintrust_test.rb b/test/braintrust_test.rb
index c22cbdf5..168e42e9 100644
--- a/test/braintrust_test.rb
+++ b/test/braintrust_test.rb
@@ -16,9 +16,6 @@ def setup
   end
 
   def teardown
-    # Reset global state after each test
-    Braintrust::State.instance_variable_set(:@global_state, nil)
-
     # Reset global tracer provider to default proxy
     OpenTelemetry.tracer_provider = OpenTelemetry::Internal::ProxyTracerProvider.new
 
@@ -34,6 +31,9 @@ def teardown
     else
       ENV.delete("BRAINTRUST_DEFAULT_PROJECT")
     end
+
+    # Call parent teardown (includes global state cleanup from test_helper)
+    super
   end
 
   def test_init_sets_global_state_by_default
diff --git a/test/test_helper.rb b/test/test_helper.rb
index 56ad5a64..7a6889a1 100644
--- a/test/test_helper.rb
+++ b/test/test_helper.rb
@@ -162,4 +162,11 @@ def unique_name(prefix = "")
 # Include helper in all test cases
 class Minitest::Test
   include TracingTestHelper
+
+  # Use Minitest hooks to clear global state after every test
+  # This ensures cleanup happens even if individual tests don't have teardown methods
+  def after_teardown
+    Braintrust::State.instance_variable_set(:@global_state, nil)
+    super
+  end
 end

From c71fb59d737b6778dea4077bc0522cd89753b303 Mon Sep 17 00:00:00 2001
From: Matt Perpick <matt@braintrustdata.com>
Date: Fri, 24 Oct 2025 16:17:03 -0400
Subject: [PATCH 6/6] tweak

---
 test/test_helper.rb | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/test/test_helper.rb b/test/test_helper.rb
index 7a6889a1..c53789c1 100644
--- a/test/test_helper.rb
+++ b/test/test_helper.rb
@@ -46,7 +46,8 @@
   # Recording mode: :once (default), :all (re-record), :none (no recording)
   config.default_cassette_options = {
     record: ENV["VCR_MODE"]&.to_sym || :once,
-    match_requests_on: [:method, :uri]  # Don't match on body (contains dynamic data)
+    match_requests_on: [:method, :uri],  # Don't match on body (contains dynamic data)
+    allow_playback_repeats: true  # Allow same HTTP interaction to be replayed multiple times
   }
 end