Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 75 additions & 6 deletions ruby/lib/ci/queue/redis/worker.rb
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,8 @@ def reserve
end

def try_to_reserve_test
eval_script(
current_time = CI::Queue.time_now.to_f
test_id = eval_script(
:reserve,
keys: [
key('queue'),
Expand All @@ -249,11 +250,44 @@ def try_to_reserve_test
key('owners'),
key('test-group-timeout')
],
argv: [CI::Queue.time_now.to_f, 'true', config.timeout]
argv: [current_time, 'true', config.timeout]
)

if test_id
# Check what timeout was used (dynamic or default)
dynamic_timeout = redis.hget(key('test-group-timeout'), test_id)
timeout_used = dynamic_timeout ? dynamic_timeout.to_f : config.timeout
deadline = current_time + timeout_used
gap_seconds = timeout_used
gap_hours = (gap_seconds / 3600).to_i
gap_mins = ((gap_seconds % 3600) / 60).to_i
gap_secs = (gap_seconds % 60)

current_time_readable = Time.at(current_time).strftime('%Y-%m-%d %H:%M:%S')
deadline_readable = Time.at(deadline).strftime('%Y-%m-%d %H:%M:%S')

# Format gap_seconds to 2 decimal places, and gap_secs for the formatted version
gap_seconds_formatted = format('%.2f', gap_seconds)
gap_secs_formatted = gap_secs < 60 ? format('%.2f', gap_secs) : gap_secs.to_i.to_s

# Add details about how timeout was computed
timeout_details = if dynamic_timeout
# For chunks, calculate back to estimated_duration (without buffer)
estimated_duration_ms = (dynamic_timeout.to_f / 1.1 * 1000).round
estimated_duration_seconds = estimated_duration_ms / 1000.0
"dynamic_timeout=#{dynamic_timeout.to_f}s (estimated_duration=#{estimated_duration_seconds}s * 1.1 buffer)"
else
"default_timeout=#{config.timeout}s"
end

warn "[reserve] test=#{test_id} current_time=#{current_time_readable} (#{current_time}) deadline=#{deadline_readable} (#{deadline}) gap=#{gap_seconds_formatted}s (#{gap_hours}h#{gap_mins}m#{gap_secs_formatted}s) [#{timeout_details}]"
end

test_id
end

def try_to_reserve_lost_test
current_time = CI::Queue.time_now.to_f
lost_test = eval_script(
:reserve_lost,
keys: [
Expand All @@ -263,9 +297,39 @@ def try_to_reserve_lost_test
key('owners'),
key('test-group-timeout')
],
argv: [CI::Queue.time_now.to_f, timeout, 'true', config.timeout]
argv: [current_time, timeout, 'true', config.timeout]
)

if lost_test
# Check what timeout was used (dynamic or default)
dynamic_timeout = redis.hget(key('test-group-timeout'), lost_test)
timeout_used = dynamic_timeout ? dynamic_timeout.to_f : config.timeout
deadline = current_time + timeout_used
gap_seconds = timeout_used
gap_hours = (gap_seconds / 3600).to_i
gap_mins = ((gap_seconds % 3600) / 60).to_i
gap_secs = (gap_seconds % 60)

current_time_readable = Time.at(current_time).strftime('%Y-%m-%d %H:%M:%S')
deadline_readable = Time.at(deadline).strftime('%Y-%m-%d %H:%M:%S')

# Format gap_seconds to 2 decimal places, and gap_secs for the formatted version
gap_seconds_formatted = format('%.2f', gap_seconds)
gap_secs_formatted = gap_secs < 60 ? format('%.2f', gap_secs) : gap_secs.to_i.to_s

# Add details about how timeout was computed
timeout_details = if dynamic_timeout
# For chunks, calculate back to estimated_duration (without buffer)
estimated_duration_ms = (dynamic_timeout.to_f / 1.1 * 1000).round
estimated_duration_seconds = estimated_duration_ms / 1000.0
"dynamic_timeout=#{dynamic_timeout.to_f}s (estimated_duration=#{estimated_duration_seconds}s * 1.1 buffer)"
else
"default_timeout=#{config.timeout}s"
end

warn "[reserve_lost] test=#{lost_test} current_time=#{current_time_readable} (#{current_time}) deadline=#{deadline_readable} (#{deadline}) gap=#{gap_seconds_formatted}s (#{gap_hours}h#{gap_mins}m#{gap_secs_formatted}s) [#{timeout_details}]"
end

if lost_test.nil? && idle?
puts "Worker #{worker_id} could not reserve a lost test while idle"
puts 'Printing running tests:'
Expand Down Expand Up @@ -346,9 +410,14 @@ def store_chunk_metadata(chunks)
transaction.sadd(key('chunks'), chunk.id)

# Store dynamic timeout for this chunk
# Timeout = default_timeout * number_of_tests
chunk_timeout = config.timeout * chunk.test_count
transaction.hset(key('test-group-timeout'), chunk.id, chunk_timeout)
# Timeout = estimated_duration (in ms) converted to seconds + buffer
# estimated_duration is in milliseconds, convert to seconds and add 10% buffer
buffer_percent = 10
estimated_duration_seconds = chunk.estimated_duration / 1000.0
chunk_timeout = (estimated_duration_seconds * (1 + buffer_percent / 100.0)).round(2)
# Format to string to avoid floating point precision issues in Redis
# Use %g to remove trailing zeros
transaction.hset(key('test-group-timeout'), chunk.id, format('%g', chunk_timeout))
end
transaction.expire(key('chunks'), config.redis_ttl)
transaction.expire(key('test-group-timeout'), config.redis_ttl)
Expand Down
20 changes: 10 additions & 10 deletions ruby/test/ci/queue/redis/dynamic_timeout_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,10 @@ def test_chunk_timeout_stored_in_redis_hash
end

# Verify timeout was stored in test-group-timeout hash
# Timeout should be: default_timeout (30s) * number_of_tests (3) = 90s
# Timeout should be: estimated_duration (5000ms = 5s) * 1.1 buffer = 5.5s
chunk_timeout = @redis.hget('build:42:test-group-timeout', 'TestA:chunk_0')
refute_nil chunk_timeout
assert_equal '90', chunk_timeout
assert_equal '5.5', chunk_timeout
end

def test_chunk_timeout_scales_with_test_count
Expand All @@ -55,7 +55,7 @@ def test_chunk_timeout_scales_with_test_count
end

small_timeout = @redis.hget('build:42:test-group-timeout', 'SmallSuite:chunk_0')
assert_equal '150', small_timeout # 30s * 5 tests
assert_equal '1.1', small_timeout # 1000ms = 1s * 1.1 buffer = 1.1s

@redis.flushdb

Expand All @@ -70,7 +70,7 @@ def test_chunk_timeout_scales_with_test_count
end

large_timeout = @redis.hget('build:42:test-group-timeout', 'LargeSuite:chunk_0')
assert_equal '600', large_timeout # 30s * 20 tests
assert_equal '5.5', large_timeout # 5000ms = 5s * 1.1 buffer = 5.5s
end

def test_multiple_chunks_stored_with_different_timeouts
Expand All @@ -92,9 +92,9 @@ def test_multiple_chunks_stored_with_different_timeouts
end

# Verify each chunk has correct timeout
assert_equal '60', @redis.hget('build:42:test-group-timeout', 'TestA:chunk_0') # 30s * 2
assert_equal '90', @redis.hget('build:42:test-group-timeout', 'TestB:chunk_0') # 30s * 3
assert_equal '30', @redis.hget('build:42:test-group-timeout', 'TestC:chunk_0') # 30s * 1
assert_equal '2.2', @redis.hget('build:42:test-group-timeout', 'TestA:chunk_0') # 2000ms = 2s * 1.1 buffer = 2.2s
assert_equal '3.3', @redis.hget('build:42:test-group-timeout', 'TestB:chunk_0') # 3000ms = 3s * 1.1 buffer = 3.3s
assert_equal '1.1', @redis.hget('build:42:test-group-timeout', 'TestC:chunk_0') # 1000ms = 1s * 1.1 buffer = 1.1s
end

def test_timeout_hash_has_ttl
Expand Down Expand Up @@ -145,8 +145,8 @@ def test_mixed_chunks_and_tests_only_chunks_have_timeouts
end

# Chunks should have timeouts
assert_equal '60', @redis.hget('build:42:test-group-timeout', 'TestA:chunk_0')
assert_equal '90', @redis.hget('build:42:test-group-timeout', 'TestC:chunk_0')
assert_equal '2.2', @redis.hget('build:42:test-group-timeout', 'TestA:chunk_0') # 2000ms = 2s * 1.1 buffer = 2.2s
assert_equal '3.3', @redis.hget('build:42:test-group-timeout', 'TestC:chunk_0') # 3000ms = 3s * 1.1 buffer = 3.3s

# Individual test should not
assert_nil @redis.hget('build:42:test-group-timeout', 'TestB#test_1')
Expand Down Expand Up @@ -296,7 +296,7 @@ def test_batching_with_many_chunks
chunks.each do |chunk|
timeout = @redis.hget('build:42:test-group-timeout', chunk.id)
refute_nil timeout, "Chunk #{chunk.id} should have timeout stored"
assert_equal '30', timeout # 30s * 1 test
assert_equal '1.1', timeout # 1000ms = 1s * 1.1 buffer = 1.1s
end
end

Expand Down