diff --git a/ruby/lib/ci/queue/redis/worker.rb b/ruby/lib/ci/queue/redis/worker.rb index ff8d63a..284d401 100644 --- a/ruby/lib/ci/queue/redis/worker.rb +++ b/ruby/lib/ci/queue/redis/worker.rb @@ -239,7 +239,8 @@ def reserve end def try_to_reserve_test - eval_script( + current_time = CI::Queue.time_now.to_f + test_id = eval_script( :reserve, keys: [ key('queue'), @@ -249,11 +250,44 @@ def try_to_reserve_test key('owners'), key('test-group-timeout') ], - argv: [CI::Queue.time_now.to_f, 'true', config.timeout] + argv: [current_time, 'true', config.timeout] ) + + if test_id + # Check what timeout was used (dynamic or default) + dynamic_timeout = redis.hget(key('test-group-timeout'), test_id) + timeout_used = dynamic_timeout ? dynamic_timeout.to_f : config.timeout + deadline = current_time + timeout_used + gap_seconds = timeout_used + gap_hours = (gap_seconds / 3600).to_i + gap_mins = ((gap_seconds % 3600) / 60).to_i + gap_secs = (gap_seconds % 60) + + current_time_readable = Time.at(current_time).strftime('%Y-%m-%d %H:%M:%S') + deadline_readable = Time.at(deadline).strftime('%Y-%m-%d %H:%M:%S') + + # Format gap_seconds to 2 decimal places, and gap_secs for the formatted version + gap_seconds_formatted = format('%.2f', gap_seconds) + gap_secs_formatted = gap_secs < 60 ? format('%.2f', gap_secs) : gap_secs.to_i.to_s + + # Add details about how timeout was computed + timeout_details = if dynamic_timeout + # For chunks, calculate back to estimated_duration (without buffer) + estimated_duration_ms = (dynamic_timeout.to_f / 1.1 * 1000).round + estimated_duration_seconds = estimated_duration_ms / 1000.0 + "dynamic_timeout=#{dynamic_timeout.to_f}s (estimated_duration=#{estimated_duration_seconds}s * 1.1 buffer)" + else + "default_timeout=#{config.timeout}s" + end + + warn "[reserve] test=#{test_id} current_time=#{current_time_readable} (#{current_time}) deadline=#{deadline_readable} (#{deadline}) gap=#{gap_seconds_formatted}s (#{gap_hours}h#{gap_mins}m#{gap_secs_formatted}s) [#{timeout_details}]" + end + + test_id end def try_to_reserve_lost_test + current_time = CI::Queue.time_now.to_f lost_test = eval_script( :reserve_lost, keys: [ @@ -263,9 +297,39 @@ def try_to_reserve_lost_test key('owners'), key('test-group-timeout') ], - argv: [CI::Queue.time_now.to_f, timeout, 'true', config.timeout] + argv: [current_time, timeout, 'true', config.timeout] ) + if lost_test + # Check what timeout was used (dynamic or default) + dynamic_timeout = redis.hget(key('test-group-timeout'), lost_test) + timeout_used = dynamic_timeout ? dynamic_timeout.to_f : config.timeout + deadline = current_time + timeout_used + gap_seconds = timeout_used + gap_hours = (gap_seconds / 3600).to_i + gap_mins = ((gap_seconds % 3600) / 60).to_i + gap_secs = (gap_seconds % 60) + + current_time_readable = Time.at(current_time).strftime('%Y-%m-%d %H:%M:%S') + deadline_readable = Time.at(deadline).strftime('%Y-%m-%d %H:%M:%S') + + # Format gap_seconds to 2 decimal places, and gap_secs for the formatted version + gap_seconds_formatted = format('%.2f', gap_seconds) + gap_secs_formatted = gap_secs < 60 ? format('%.2f', gap_secs) : gap_secs.to_i.to_s + + # Add details about how timeout was computed + timeout_details = if dynamic_timeout + # For chunks, calculate back to estimated_duration (without buffer) + estimated_duration_ms = (dynamic_timeout.to_f / 1.1 * 1000).round + estimated_duration_seconds = estimated_duration_ms / 1000.0 + "dynamic_timeout=#{dynamic_timeout.to_f}s (estimated_duration=#{estimated_duration_seconds}s * 1.1 buffer)" + else + "default_timeout=#{config.timeout}s" + end + + warn "[reserve_lost] test=#{lost_test} current_time=#{current_time_readable} (#{current_time}) deadline=#{deadline_readable} (#{deadline}) gap=#{gap_seconds_formatted}s (#{gap_hours}h#{gap_mins}m#{gap_secs_formatted}s) [#{timeout_details}]" + end + if lost_test.nil? && idle? puts "Worker #{worker_id} could not reserve a lost test while idle" puts 'Printing running tests:' @@ -346,9 +410,14 @@ def store_chunk_metadata(chunks) transaction.sadd(key('chunks'), chunk.id) # Store dynamic timeout for this chunk - # Timeout = default_timeout * number_of_tests - chunk_timeout = config.timeout * chunk.test_count - transaction.hset(key('test-group-timeout'), chunk.id, chunk_timeout) + # Timeout = estimated_duration (in ms) converted to seconds + buffer + # estimated_duration is in milliseconds, convert to seconds and add 10% buffer + buffer_percent = 10 + estimated_duration_seconds = chunk.estimated_duration / 1000.0 + chunk_timeout = (estimated_duration_seconds * (1 + buffer_percent / 100.0)).round(2) + # Format to string to avoid floating point precision issues in Redis + # Use %g to remove trailing zeros + transaction.hset(key('test-group-timeout'), chunk.id, format('%g', chunk_timeout)) end transaction.expire(key('chunks'), config.redis_ttl) transaction.expire(key('test-group-timeout'), config.redis_ttl) diff --git a/ruby/test/ci/queue/redis/dynamic_timeout_test.rb b/ruby/test/ci/queue/redis/dynamic_timeout_test.rb index 110495a..bb485f9 100644 --- a/ruby/test/ci/queue/redis/dynamic_timeout_test.rb +++ b/ruby/test/ci/queue/redis/dynamic_timeout_test.rb @@ -37,10 +37,10 @@ def test_chunk_timeout_stored_in_redis_hash end # Verify timeout was stored in test-group-timeout hash - # Timeout should be: default_timeout (30s) * number_of_tests (3) = 90s + # Timeout should be: estimated_duration (5000ms = 5s) * 1.1 buffer = 5.5s chunk_timeout = @redis.hget('build:42:test-group-timeout', 'TestA:chunk_0') refute_nil chunk_timeout - assert_equal '90', chunk_timeout + assert_equal '5.5', chunk_timeout end def test_chunk_timeout_scales_with_test_count @@ -55,7 +55,7 @@ def test_chunk_timeout_scales_with_test_count end small_timeout = @redis.hget('build:42:test-group-timeout', 'SmallSuite:chunk_0') - assert_equal '150', small_timeout # 30s * 5 tests + assert_equal '1.1', small_timeout # 1000ms = 1s * 1.1 buffer = 1.1s @redis.flushdb @@ -70,7 +70,7 @@ def test_chunk_timeout_scales_with_test_count end large_timeout = @redis.hget('build:42:test-group-timeout', 'LargeSuite:chunk_0') - assert_equal '600', large_timeout # 30s * 20 tests + assert_equal '5.5', large_timeout # 5000ms = 5s * 1.1 buffer = 5.5s end def test_multiple_chunks_stored_with_different_timeouts @@ -92,9 +92,9 @@ def test_multiple_chunks_stored_with_different_timeouts end # Verify each chunk has correct timeout - assert_equal '60', @redis.hget('build:42:test-group-timeout', 'TestA:chunk_0') # 30s * 2 - assert_equal '90', @redis.hget('build:42:test-group-timeout', 'TestB:chunk_0') # 30s * 3 - assert_equal '30', @redis.hget('build:42:test-group-timeout', 'TestC:chunk_0') # 30s * 1 + assert_equal '2.2', @redis.hget('build:42:test-group-timeout', 'TestA:chunk_0') # 2000ms = 2s * 1.1 buffer = 2.2s + assert_equal '3.3', @redis.hget('build:42:test-group-timeout', 'TestB:chunk_0') # 3000ms = 3s * 1.1 buffer = 3.3s + assert_equal '1.1', @redis.hget('build:42:test-group-timeout', 'TestC:chunk_0') # 1000ms = 1s * 1.1 buffer = 1.1s end def test_timeout_hash_has_ttl @@ -145,8 +145,8 @@ def test_mixed_chunks_and_tests_only_chunks_have_timeouts end # Chunks should have timeouts - assert_equal '60', @redis.hget('build:42:test-group-timeout', 'TestA:chunk_0') - assert_equal '90', @redis.hget('build:42:test-group-timeout', 'TestC:chunk_0') + assert_equal '2.2', @redis.hget('build:42:test-group-timeout', 'TestA:chunk_0') # 2000ms = 2s * 1.1 buffer = 2.2s + assert_equal '3.3', @redis.hget('build:42:test-group-timeout', 'TestC:chunk_0') # 3000ms = 3s * 1.1 buffer = 3.3s # Individual test should not assert_nil @redis.hget('build:42:test-group-timeout', 'TestB#test_1') @@ -296,7 +296,7 @@ def test_batching_with_many_chunks chunks.each do |chunk| timeout = @redis.hget('build:42:test-group-timeout', chunk.id) refute_nil timeout, "Chunk #{chunk.id} should have timeout stored" - assert_equal '30', timeout # 30s * 1 test + assert_equal '1.1', timeout # 1000ms = 1s * 1.1 buffer = 1.1s end end