-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtest_long_context.py
More file actions
149 lines (118 loc) · 4.49 KB
/
test_long_context.py
File metadata and controls
149 lines (118 loc) · 4.49 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
"""Tests for long-context optimization features."""
import json
import pytest
from forgelm.config import ForgeConfig, load_config
BASE = {
"model": {"name_or_path": "test/model"},
"lora": {"r": 16, "alpha": 32},
"data": {"dataset_name_or_path": "test.jsonl"},
}
def _config(**overrides):
cfg = {**BASE, "training": {"output_dir": "./out"}}
for key, val in overrides.items():
if key == "training":
cfg["training"].update(val)
else:
cfg[key] = val
return ForgeConfig(**cfg)
class TestRopeScaling:
def test_rope_disabled_by_default(self):
config = _config()
assert config.training.rope_scaling is None
def test_rope_linear(self):
config = _config(training={"rope_scaling": {"type": "linear", "factor": 4.0}})
assert config.training.rope_scaling["type"] == "linear"
assert config.training.rope_scaling["factor"] == pytest.approx(4.0)
def test_rope_dynamic(self):
config = _config(training={"rope_scaling": {"type": "dynamic", "factor": 2.0}})
assert config.training.rope_scaling["type"] == "dynamic"
def test_rope_yarn(self):
config = _config(training={"rope_scaling": {"type": "yarn", "factor": 8.0}})
assert config.training.rope_scaling["type"] == "yarn"
assert config.training.rope_scaling["factor"] == pytest.approx(8.0)
class TestNeftune:
def test_neftune_disabled_by_default(self):
config = _config()
assert config.training.neftune_noise_alpha is None
def test_neftune_enabled(self):
config = _config(training={"neftune_noise_alpha": 5.0})
assert config.training.neftune_noise_alpha == pytest.approx(5.0)
def test_neftune_custom_value(self):
config = _config(training={"neftune_noise_alpha": 15.0})
assert config.training.neftune_noise_alpha == pytest.approx(15.0)
class TestSlidingWindow:
def test_sliding_window_disabled_by_default(self):
config = _config()
assert config.training.sliding_window_attention is None
def test_sliding_window_custom(self):
config = _config(training={"sliding_window_attention": 4096})
assert config.training.sliding_window_attention == 4096
class TestSamplePacking:
def test_sample_packing_disabled_by_default(self):
config = _config()
assert config.training.sample_packing is False
def test_sample_packing_enabled(self):
config = _config(training={"sample_packing": True})
assert config.training.sample_packing is True
class TestLongContextYaml:
def test_yaml_round_trip(self, tmp_path):
yaml_content = """
model:
name_or_path: "test/model"
max_length: 32768
lora:
r: 16
alpha: 32
data:
dataset_name_or_path: "test.jsonl"
training:
output_dir: "./out"
rope_scaling:
type: "yarn"
factor: 4.0
neftune_noise_alpha: 5.0
sliding_window_attention: 4096
sample_packing: true
"""
config_file = tmp_path / "longctx.yaml"
config_file.write_text(yaml_content)
config = load_config(str(config_file))
assert config.training.rope_scaling["type"] == "yarn"
assert config.training.rope_scaling["factor"] == pytest.approx(4.0)
assert config.training.neftune_noise_alpha == pytest.approx(5.0)
assert config.training.sliding_window_attention == 4096
assert config.training.sample_packing is True
def test_config_template_still_valid(self):
config = load_config("config_template.yaml")
assert config.training.rope_scaling is None
assert config.training.neftune_noise_alpha is None
class TestLongContextDryRun:
def test_dry_run_shows_rope_scaling(self, tmp_path):
from forgelm.cli import _run_dry_run
yaml_content = """
model:
name_or_path: "test/model"
lora:
r: 16
alpha: 32
data:
dataset_name_or_path: "test.jsonl"
training:
output_dir: "./out"
rope_scaling:
type: "linear"
factor: 4.0
neftune_noise_alpha: 10.0
"""
config_file = tmp_path / "longctx.yaml"
config_file.write_text(yaml_content)
config = load_config(str(config_file))
import io
from contextlib import redirect_stdout
f = io.StringIO()
with redirect_stdout(f):
_run_dry_run(config, output_format="json")
output = json.loads(f.getvalue())
assert output["rope_scaling"]["type"] == "linear"
assert output["rope_scaling"]["factor"] == pytest.approx(4.0)
assert output["neftune_noise_alpha"] == pytest.approx(10.0)