From e4df19711a9464cde92fb708d08978c6c6b9a553 Mon Sep 17 00:00:00 2001 From: Sunny Shen Date: Tue, 10 Feb 2026 15:17:08 -0800 Subject: [PATCH] Deprecate `deduplicate` setting for SOBOL `Generator`; Set `GenerationNode.should_deduplicate=True` by default Summary: Previously, we had dedup settings on both `Generator` and `GenerationNode` level, which is unnecessary and error-prone for accidentally using 2 different dedup settings at the 2 levels (see D92094386) Given that only `SobolGenerator` has special dedup logic, we've decided to deprecate `deduplicate` setting in `Generator` and only rely on `GenerationNode` level deduplication. Furthermore, since we generally expect dedup to be True for OSS use cases and AutoML use cases, we set `GenerationNode.should_deduplicate=True` by default while explicitly overwriting GNode dedup to False for online use cases. **One Potential Issue** Currently `RandomAdapter._gen` solely relies on SOBOL generator to dedup (by passing in `generated_points` to sobol generator and then `rejection_sample`- https://fburl.com/code/zmqfhn8g) and if we remove that, anyone that's using standalone `RandomAdapter` (without a GNode) may get duplicated trials if they call `RandomAdapter.gen` multiple times. This is partially mitigated by advancing `init_position` on sobol generator -- there shouldn't be duplicated points for continuous parameters, but if there are discrete parameters we can be producing duplicated points after rounding. The recommended path is through GenerationStrategy/GenerationNode, where dedup is properly handled. But there's no enforcement on never using standalone RandomAdapter. (ps. `TorchAdapter` passes pending_observations through to the BoTorch acquisition function as X_pending so it doesn't have the same problem) Differential Revision: D92884352 --- ax/adapter/factory.py | 2 - ax/adapter/random.py | 31 ---- ax/adapter/registry.py | 10 ++ ax/adapter/tests/test_random_adapter.py | 142 +----------------- ax/adapter/tests/test_registry.py | 3 +- ax/benchmark/tests/test_benchmark.py | 6 +- ax/generation_strategy/dispatch_utils.py | 8 +- ax/generation_strategy/generation_node.py | 5 +- .../tests/test_dispatch_utils.py | 8 +- .../tests/test_generation_strategy.py | 25 +-- ax/generators/random/base.py | 34 +---- ax/generators/random/sobol.py | 7 +- ax/generators/random/uniform.py | 2 - ax/generators/tests/test_sobol.py | 85 +---------- ax/generators/tests/test_uniform.py | 12 +- ax/generators/tests/test_utils.py | 22 --- ax/generators/utils.py | 49 ------ ax/storage/sqa_store/tests/test_sqa_store.py | 4 +- 18 files changed, 52 insertions(+), 403 deletions(-) diff --git a/ax/adapter/factory.py b/ax/adapter/factory.py index 45aebaa145a..5f082f2ced7 100644 --- a/ax/adapter/factory.py +++ b/ax/adapter/factory.py @@ -34,7 +34,6 @@ def get_sobol( search_space: SearchSpace, seed: int | None = None, - deduplicate: bool = False, init_position: int = 0, scramble: bool = True, fallback_to_sample_polytope: bool = False, @@ -52,7 +51,6 @@ def get_sobol( Generators.SOBOL( experiment=Experiment(search_space=search_space), seed=seed, - deduplicate=deduplicate, init_position=init_position, scramble=scramble, fallback_to_sample_polytope=fallback_to_sample_polytope, diff --git a/ax/adapter/random.py b/ax/adapter/random.py index 66b12cbba71..cbd78273ade 100644 --- a/ax/adapter/random.py +++ b/ax/adapter/random.py @@ -9,7 +9,6 @@ from collections.abc import Mapping, Sequence -import numpy as np from ax.adapter.adapter_utils import ( extract_parameter_constraints, extract_search_space_digest, @@ -93,35 +92,6 @@ def _gen( linear_constraints = extract_parameter_constraints( search_space.parameter_constraints, self.parameters ) - # Extract generated points to deduplicate against. - # Exclude out-of-design arms (which can only be manual arms - # instead of adapter-generated arms). - generated_points = None - if self.generator.deduplicate: - arms_to_deduplicate = self._experiment.arms_by_signature_for_deduplication - generated_obs = [ - ObservationFeatures.from_arm(arm=arm) - for arm in arms_to_deduplicate.values() - if self._search_space.check_membership(parameterization=arm.parameters) - ] - # Transform - for t in self.transforms.values(): - generated_obs = t.transform_observation_features(generated_obs) - # Add pending observations -- already transformed. - generated_obs.extend( - [obs for obs_list in pending_observations.values() for obs in obs_list] - ) - if len(generated_obs) > 0: - # Extract generated points array (n x d). - generated_points = np.array( - [ - [obs.parameters[p] for p in self.parameters] - for obs in generated_obs - ] - ) - # Take unique points only, since there may be duplicates coming - # from pending observations for different metrics. - generated_points = np.unique(generated_points, axis=0) # Generate the candidates X, w = self.generator.gen( @@ -131,7 +101,6 @@ def _gen( fixed_features=fixed_features_dict, model_gen_options=model_gen_options, rounding_func=transform_callback(self.parameters, self.transforms), - generated_points=generated_points, ) observation_features = parse_observation_features(X, self.parameters) return GenResults( diff --git a/ax/adapter/registry.py b/ax/adapter/registry.py index 97d8f599c66..f306c42848a 100644 --- a/ax/adapter/registry.py +++ b/ax/adapter/registry.py @@ -173,6 +173,9 @@ class GeneratorSetup(NamedTuple): default_generator_kwargs: Mapping[str, Any] | None = None standard_adapter_kwargs: Mapping[str, Any] | None = None not_saved_generator_kwargs: Sequence[str] | None = None + # Kwargs that were removed from the generator but may still exist in + # serialized data. These are silently filtered out before validation. + deprecated_generator_kwargs: Sequence[str] | None = None """A mapping of string keys that indicate a generator, to the corresponding @@ -209,6 +212,8 @@ class GeneratorSetup(NamedTuple): adapter_class=RandomAdapter, generator_class=SobolGenerator, transforms=Cont_X_trans, + # These kwargs were removed but may exist in old serialized data. + deprecated_generator_kwargs=["deduplicate"], ), "Uniform": GeneratorSetup( adapter_class=RandomAdapter, @@ -291,6 +296,11 @@ def __call__( adapter_class = model_setup_info.adapter_class search_space = experiment.search_space + # Filter out deprecated kwargs that may exist in old serialized data. + if model_setup_info.deprecated_generator_kwargs: + for deprecated_key in model_setup_info.deprecated_generator_kwargs: + kwargs.pop(deprecated_key, None) + if not silently_filter_kwargs: # Check correct kwargs are present callables = (generator_class, adapter_class) diff --git a/ax/adapter/tests/test_random_adapter.py b/ax/adapter/tests/test_random_adapter.py index 1cb4b4522a6..26a9f41080a 100644 --- a/ax/adapter/tests/test_random_adapter.py +++ b/ax/adapter/tests/test_random_adapter.py @@ -6,7 +6,6 @@ # pyre-strict -import dataclasses from unittest import mock import numpy as np @@ -20,15 +19,10 @@ from ax.core.parameter import ParameterType, RangeParameter from ax.core.parameter_constraint import ParameterConstraint from ax.core.search_space import SearchSpace -from ax.exceptions.core import SearchSpaceExhausted from ax.generators.random.base import RandomGenerator from ax.generators.random.sobol import SobolGenerator from ax.utils.common.testutils import TestCase -from ax.utils.testing.core_stubs import ( - get_data, - get_search_space_for_range_values, - get_small_discrete_search_space, -) +from ax.utils.testing.core_stubs import get_data from ax.utils.testing.modeling_stubs import get_experiment_for_value @@ -147,22 +141,6 @@ def test_gen_simple(self) -> None: self.assertIsNone(gen_args["linear_constraints"]) self.assertIsNone(gen_args["fixed_features"]) - def test_deduplicate(self) -> None: - exp = Experiment(search_space=get_small_discrete_search_space()) - sobol = RandomAdapter( - experiment=exp, - generator=SobolGenerator(deduplicate=True), - transforms=Cont_X_trans, - ) - for _ in range(4): # Search space is {[0, 1], {"red", "panda"}} - # Generate & attach trials to the experiment so that the - # generated points are used for deduplication. - gr = sobol.gen(1) - exp.new_trial(generator_run=gr).mark_running(no_runner_required=True) - self.assertEqual(len(gr.arms), 1) - with self.assertRaises(SearchSpaceExhausted): - sobol.gen(1) - def test_search_space_not_expanded(self) -> None: data = get_data(num_non_sq_arms=0) sq_arm = Arm(name="status_quo", parameters={"x": 10.0, "y": 1.0, "z": 1.0}) @@ -186,124 +164,6 @@ def test_search_space_not_expanded(self) -> None: sobol.gen(1) self.assertEqual(sobol._model_space, sobol._search_space) - def test_generated_points(self) -> None: - # Checks for generated points argument passed to Generator.gen. - # Search space has two range parameters in [0, 5]. - exp = Experiment( - search_space=get_search_space_for_range_values(min=0.0, max=5.0) - ) - ssd = extract_search_space_digest( - search_space=exp.search_space, - param_names=list(exp.search_space.parameters.keys()), - ) - ssd = dataclasses.replace(ssd, bounds=[(0.0, 1.0), (0.0, 1.0)]) - generator = SobolGenerator(deduplicate=True) - gen_res = generator.gen(n=1, search_space_digest=ssd, rounding_func=lambda x: x) - # Using Cont_X_trans, particularly UnitX here to test transform application. - adapter = RandomAdapter( - experiment=exp, generator=generator, transforms=Cont_X_trans - ) - - # No pending points or previous trials on the experiment. - with mock.patch.object(generator, "gen", return_value=gen_res) as mock_gen: - adapter.gen(n=1) - self.assertIsNone(mock_gen.call_args.kwargs["generated_points"]) - - # Attach two trials to the experiment. - exp.new_trial().add_arm(Arm(parameters={"x": 0.0, "y": 0.0})).mark_running( - no_runner_required=True - ) - exp.new_trial().add_arm(Arm(parameters={"x": 2.0, "y": 2.0})).mark_running( - no_runner_required=True - ) - with mock.patch.object(generator, "gen", return_value=gen_res) as mock_gen: - adapter.gen(n=1) - self.assertEqual( - mock_gen.call_args.kwargs["generated_points"].tolist(), - [[0.0, 0.0], [0.4, 0.4]], - ) - - # Add pending points -- only unique ones should be passed down. - pending_observations = { - m: [ObservationFeatures(parameters={"x": 3.0, "y": 3.0})] - for m in ("m1", "m2") - } - with mock.patch.object(generator, "gen", return_value=gen_res) as mock_gen: - adapter.gen(n=1, pending_observations=pending_observations) - self.assertEqual( - mock_gen.call_args.kwargs["generated_points"].tolist(), - [[0.0, 0.0], [0.4, 0.4], [0.6, 0.6]], - ) - - # Turn off deduplicate, nothing should be passed down. - generator.deduplicate = False - with mock.patch.object(generator, "gen", return_value=gen_res) as mock_gen: - adapter.gen(n=1, pending_observations=pending_observations) - self.assertIsNone(mock_gen.call_args.kwargs["generated_points"]) - - # Test filtering out-of-design arms during deduplication - # Create experiment with in-design and out-of-design arms - exp_with_ood_arms = Experiment( - search_space=get_search_space_for_range_values(min=0.0, max=5.0) - ) - in_design_arm = Arm( - name="in_design", parameters={"x": 2.0, "y": 3.0} - ) # Within [0, 5] - out_of_design_arm = Arm( - name="out_of_design", parameters={"x": 6.0, "y": 7.0} - ) # Outside [0, 5] - - exp_with_ood_arms.new_trial().add_arm(in_design_arm).mark_running( - no_runner_required=True - ) - exp_with_ood_arms.new_trial().add_arm(out_of_design_arm).mark_running( - no_runner_required=True - ) - - generator = SobolGenerator(deduplicate=True) - adapter_mixed = RandomAdapter( - experiment=exp_with_ood_arms, - generator=generator, - transforms=Cont_X_trans, - ) - - # Only the in-design arm should be included in generated_points - with mock.patch.object(generator, "gen", return_value=gen_res) as mock_gen: - adapter_mixed.gen(n=1) - - generated_points = mock_gen.call_args.kwargs["generated_points"] - self.assertEqual(len(generated_points), 1) - - # Test case where all arms are out-of-design - exp_all_out_of_design = Experiment( - search_space=get_search_space_for_range_values(min=0.0, max=5.0) - ) - out_of_design_arm1 = Arm(name="out1", parameters={"x": 6.0, "y": 7.0}) - out_of_design_arm2 = Arm(name="out2", parameters={"x": -1.0, "y": 8.0}) - - exp_all_out_of_design.new_trial().add_arm(out_of_design_arm1).mark_running( - no_runner_required=True - ) - exp_all_out_of_design.new_trial().add_arm(out_of_design_arm2).mark_running( - no_runner_required=True - ) - - generator_all_out = SobolGenerator(deduplicate=True) - adapter_all_out = RandomAdapter( - experiment=exp_all_out_of_design, - generator=generator_all_out, - transforms=Cont_X_trans, - ) - - # When all arms are out-of-design, generated_points should be empty - with mock.patch.object( - generator_all_out, "gen", return_value=gen_res - ) as mock_gen: - adapter_all_out.gen(n=1) - - generated_points_all_out = mock_gen.call_args.kwargs["generated_points"] - self.assertIsNone(generated_points_all_out) - def test_generation_with_all_fixed(self) -> None: # Make sure candidate generation succeeds and returns correct parameters # when all parameters are fixed. diff --git a/ax/adapter/tests/test_registry.py b/ax/adapter/tests/test_registry.py index 60e8e115ce5..92754f56aa2 100644 --- a/ax/adapter/tests/test_registry.py +++ b/ax/adapter/tests/test_registry.py @@ -174,7 +174,6 @@ def test_view_defaults(self) -> None: ( { "seed": None, - "deduplicate": True, "init_position": 0, "scramble": True, "generated_points": None, @@ -194,7 +193,7 @@ def test_view_defaults(self) -> None: self.assertTrue( all( kw in Generators.SOBOL.view_kwargs()[0] - for kw in ["seed", "deduplicate", "init_position", "scramble"] + for kw in ["seed", "init_position", "scramble"] ), all( kw in Generators.SOBOL.view_kwargs()[1] diff --git a/ax/benchmark/tests/test_benchmark.py b/ax/benchmark/tests/test_benchmark.py index c58b1441062..6b5789f2430 100644 --- a/ax/benchmark/tests/test_benchmark.py +++ b/ax/benchmark/tests/test_benchmark.py @@ -934,11 +934,7 @@ def test_replication_with_generation_node(self) -> None: nodes=[ GenerationNode( name="Sobol", - generator_specs=[ - GeneratorSpec( - Generators.SOBOL, generator_kwargs={"deduplicate": True} - ) - ], + generator_specs=[GeneratorSpec(Generators.SOBOL)], ) ] ), diff --git a/ax/generation_strategy/dispatch_utils.py b/ax/generation_strategy/dispatch_utils.py index ba5304a2ed0..e1ac3ed9c60 100644 --- a/ax/generation_strategy/dispatch_utils.py +++ b/ax/generation_strategy/dispatch_utils.py @@ -51,7 +51,7 @@ def _make_sobol_step( enforce_num_trials: bool = True, max_parallelism: int | None = None, seed: int | None = None, - should_deduplicate: bool = False, + should_deduplicate: bool = True, ) -> GenerationStep: """Shortcut for creating a Sobol generation step.""" return GenerationStep( @@ -61,7 +61,7 @@ def _make_sobol_step( min_trials_observed=min_trials_observed or ceil(num_trials / 2), enforce_num_trials=enforce_num_trials, max_parallelism=max_parallelism, - generator_kwargs={"deduplicate": True, "seed": seed}, + generator_kwargs={"seed": seed}, should_deduplicate=should_deduplicate, use_all_trials_in_exp=True, ) @@ -76,7 +76,7 @@ def _make_botorch_step( generator_kwargs: dict[str, Any] | None = None, winsorization_config: None | (WinsorizationConfig | dict[str, WinsorizationConfig]) = None, - should_deduplicate: bool = False, + should_deduplicate: bool = True, disable_progbar: bool | None = None, jit_compile: bool | None = None, derelativize_with_raw_status_quo: bool = False, @@ -299,7 +299,7 @@ def choose_generation_strategy_legacy( max_parallelism_cap: int | None = None, max_parallelism_override: int | None = None, optimization_config: OptimizationConfig | None = None, - should_deduplicate: bool = False, + should_deduplicate: bool = True, use_saasbo: bool = False, disable_progbar: bool | None = None, jit_compile: bool | None = None, diff --git a/ax/generation_strategy/generation_node.py b/ax/generation_strategy/generation_node.py index 89b9a0ec1ae..288dac6f0f2 100644 --- a/ax/generation_strategy/generation_node.py +++ b/ax/generation_strategy/generation_node.py @@ -122,7 +122,6 @@ class GenerationNode(SerializationMixin, SortableBase): # Required options: generator_specs: list[GeneratorSpec] - # TODO: Move `should_deduplicate` to `GeneratorSpec` if possible, and make optional should_deduplicate: bool _name: str @@ -150,7 +149,7 @@ def __init__( generator_specs: list[GeneratorSpec], transition_criteria: Sequence[TransitionCriterion] | None = None, best_model_selector: BestModelSelector | None = None, - should_deduplicate: bool = False, + should_deduplicate: bool = True, input_constructors: TInputConstructorsByPurpose | None = None, previous_node_name: str | None = None, trial_type: str | None = None, @@ -1014,7 +1013,7 @@ def __new__( min_trials_observed: int = 0, max_parallelism: int | None = None, enforce_num_trials: bool = True, - should_deduplicate: bool = False, + should_deduplicate: bool = True, generator_name: str | None = None, use_all_trials_in_exp: bool = False, use_update: bool = False, # DEPRECATED. diff --git a/ax/generation_strategy/tests/test_dispatch_utils.py b/ax/generation_strategy/tests/test_dispatch_utils.py index c382dd9fe3a..d14b4ad3b16 100644 --- a/ax/generation_strategy/tests/test_dispatch_utils.py +++ b/ax/generation_strategy/tests/test_dispatch_utils.py @@ -860,22 +860,24 @@ def test_max_parallelism_adjustments(self) -> None: self.assertEqual(self._get_max_parallelism(sobol_gpei._nodes[1]), 10) def test_set_should_deduplicate(self) -> None: + # Default is now should_deduplicate=True sobol_gpei = choose_generation_strategy_legacy( search_space=get_branin_search_space(), use_batch_trials=True, num_initialization_trials=3, ) self.assertListEqual( - [s.should_deduplicate for s in sobol_gpei._nodes], [False] * 2 + [s.should_deduplicate for s in sobol_gpei._nodes], [True] * 2 ) + # Explicitly set should_deduplicate=False sobol_gpei = choose_generation_strategy_legacy( search_space=get_branin_search_space(), use_batch_trials=True, num_initialization_trials=3, - should_deduplicate=True, + should_deduplicate=False, ) self.assertListEqual( - [s.should_deduplicate for s in sobol_gpei._nodes], [True] * 2 + [s.should_deduplicate for s in sobol_gpei._nodes], [False] * 2 ) def test_setting_experiment_attribute(self) -> None: diff --git a/ax/generation_strategy/tests/test_generation_strategy.py b/ax/generation_strategy/tests/test_generation_strategy.py index cb954845614..ea5b66610f0 100644 --- a/ax/generation_strategy/tests/test_generation_strategy.py +++ b/ax/generation_strategy/tests/test_generation_strategy.py @@ -360,7 +360,10 @@ def tearDown(self) -> None: self.registry_setup_dict_patcher.stop() def _get_sobol_mbm_step_gs( - self, num_sobol_trials: int = 5, num_mbm_trials: int = -1 + self, + num_sobol_trials: int = 5, + num_mbm_trials: int = -1, + should_deduplicate: bool = False, ) -> GenerationStrategy: return GenerationStrategy( name="Sobol+MBM", @@ -369,12 +372,14 @@ def _get_sobol_mbm_step_gs( generator=Generators.SOBOL, num_trials=num_sobol_trials, generator_kwargs=self.step_generator_kwargs, + should_deduplicate=should_deduplicate, ), GenerationStep( generator=Generators.BOTORCH_MODULAR, num_trials=num_mbm_trials, generator_kwargs=self.step_generator_kwargs, enforce_num_trials=True, + should_deduplicate=should_deduplicate, ), ], ) @@ -565,7 +570,6 @@ def test_sobol_MBM_strategy(self) -> None: generator_kwargs, { "seed": expected_seed, - "deduplicate": True, "init_position": i, "scramble": True, "fallback_to_sample_polytope": False, @@ -800,7 +804,6 @@ def test_deduplication_and_fallback(self) -> None: GenerationStrategyRepeatedPoints: GeneratorSpec( generator_enum=Generators.SOBOL, generator_key_override="Fallback_Sobol", - generator_kwargs={"deduplicate": False}, ) }, ]: @@ -828,10 +831,9 @@ def test_deduplication_and_fallback(self) -> None: generator_specs=[ GeneratorSpec( generator_enum=Generators.SOBOL, - generator_kwargs={"deduplicate": False}, ) ], - # Disable model-level deduplication. + # GNode-level deduplication is on. should_deduplicate=True, fallback_specs=fallback_specs, # pyre-ignore[6] ), @@ -844,12 +846,14 @@ def test_deduplication_and_fallback(self) -> None: self.assertEqual(len(exp.arms_by_signature), 2) if fallback_specs is None: - # With default fallback model. It tries to deduplicate but the - # search space is exhaused and errors out. - with self.assertRaisesRegex( - SearchSpaceExhausted, "Rejection sampling error" - ): + # With default fallback model (SOBOL without deduplication). + # The fallback SOBOL generator doesn't deduplicate, so it just + # generates a point and logs a warning. + with self.assertLogs(GenerationNode.__module__, logging.WARNING) as cm: g = sobol.gen_single_trial(exp) + self.assertTrue( + any("gen failed with error" in msg for msg in cm.output) + ) elif len(fallback_specs) == 0: # With no fallback specs. with ( @@ -1550,7 +1554,6 @@ def test_gs_with_generation_nodes(self) -> None: g._generator_kwargs, { "seed": expected_seed, - "deduplicate": True, "init_position": i, "scramble": True, "fallback_to_sample_polytope": False, diff --git a/ax/generators/random/base.py b/ax/generators/random/base.py index fbbcb6e148a..be44916f892 100644 --- a/ax/generators/random/base.py +++ b/ax/generators/random/base.py @@ -50,24 +50,17 @@ class RandomGenerator(Generator): `_gen_unconstrained`/`gen` can be directly implemented. Attributes: - deduplicate: If True (defaults to True), a single instantiation - of the model will not return the same point twice. This flag - is used in rejection sampling. seed: An optional seed value for scrambling. init_position: The initial state of the generator. This is the number of samples to fast-forward before generating new samples. Used to ensure that the re-loaded generator will continue generating from the same sequence rather than starting from scratch. - generated_points: A set of previously generated points to use - for deduplication. These should be provided in the raw transformed - space the model operates in. fallback_to_sample_polytope: If True, when rejection sampling fails, we fall back to the HitAndRunPolytopeSampler. """ def __init__( self, - deduplicate: bool = True, seed: int | None = None, init_position: int = 0, generated_points: npt.NDArray | None = None, @@ -75,14 +68,12 @@ def __init__( polytope_sampler_kwargs: dict[str, Any] | None = None, ) -> None: super().__init__() - self.deduplicate = deduplicate self.seed: int = ( seed if seed is not None else assert_is_instance(torch.randint(high=100_000, size=(1,)).item(), int) ) self.init_position = init_position - # Used for deduplication. self.fallback_to_sample_polytope = fallback_to_sample_polytope self.polytope_sampler_kwargs: dict[str, Any] = polytope_sampler_kwargs or {} self.attempted_draws: int = 0 @@ -116,7 +107,6 @@ def gen( fixed_features: dict[int, float] | None = None, model_gen_options: TConfig | None = None, rounding_func: Callable[[npt.NDArray], npt.NDArray] | None = None, - generated_points: npt.NDArray | None = None, ) -> tuple[npt.NDArray, npt.NDArray]: """Generate new candidates. @@ -133,8 +123,6 @@ def gen( model. rounding_func: A function that rounds an optimization result appropriately (e.g., according to `round-trip` transformations). - generated_points: A numpy array of shape `n x d` containing the - previously generated points to deduplicate against. Returns: 2-element tuple containing @@ -172,19 +160,17 @@ def gen( max_draws = int(assert_is_instance_of_tuple(max_draws, (int, float))) try: # Always rejection sample, but this only rejects if there are - # constraints or actual duplicates and deduplicate is specified. - # If rejection sampling fails, fall back to polytope sampling. + # constraints. If rejection sampling fails, fall back to polytope + # sampling. points, attempted_draws = rejection_sample( gen_unconstrained=self._gen_unconstrained, n=n, d=len(search_space_digest.bounds), tunable_feature_indices=tf_indices, linear_constraints=linear_constraints, - deduplicate=self.deduplicate, max_draws=max_draws, fixed_features=fixed_features, rounding_func=rounding_func, - existing_points=generated_points, ) except SearchSpaceExhausted as e: if has_continuous_parameters or self.fallback_to_sample_polytope: @@ -198,14 +184,6 @@ def gen( f"{self.__class__.__name__}." ) # If rejection sampling fails, try polytope sampler. - num_generated = ( - len(generated_points) if generated_points is not None else 0 - ) - interior_point = ( # A feasible point of shape `d x 1`. - torch.from_numpy(generated_points[-1].reshape((-1, 1))).double() - if generated_points is not None - else None - ) kwargs = {"n_burnin": 100, "n_thinning": 20} kwargs.update(self.polytope_sampler_kwargs) polytope_sampler: HitAndRunPolytopeSampler = HitAndRunPolytopeSampler( @@ -217,8 +195,8 @@ def gen( fixed_features=fixed_features, ), bounds=self._convert_bounds(bounds=search_space_digest.bounds), - interior_point=interior_point, - seed=self.seed + num_generated, + interior_point=None, + seed=self.seed, **kwargs, ) @@ -232,7 +210,7 @@ def gen_polytope_sampler( # in the polytope sampler, so we don't need to apply them here. return polytope_sampler.draw(n=n).numpy() - # we call rejection_sample here to reuse all the deduplication + # we call rejection_sample here for the constraint checking # logic points, _ = rejection_sample( gen_unconstrained=gen_polytope_sampler, @@ -240,11 +218,9 @@ def gen_polytope_sampler( d=len(search_space_digest.bounds), tunable_feature_indices=tf_indices, linear_constraints=linear_constraints, - deduplicate=self.deduplicate, max_draws=max_draws, fixed_features=fixed_features, rounding_func=rounding_func, - existing_points=generated_points, ) else: raise e diff --git a/ax/generators/random/sobol.py b/ax/generators/random/sobol.py index da1aa8f4362..c5ea9b616da 100644 --- a/ax/generators/random/sobol.py +++ b/ax/generators/random/sobol.py @@ -34,7 +34,6 @@ class SobolGenerator(RandomGenerator): def __init__( self, - deduplicate: bool = True, seed: int | None = None, init_position: int = 0, scramble: bool = True, @@ -43,7 +42,6 @@ def __init__( polytope_sampler_kwargs: dict[str, Any] | None = None, ) -> None: super().__init__( - deduplicate=deduplicate, seed=seed, init_position=init_position, generated_points=generated_points, @@ -84,7 +82,6 @@ def gen( fixed_features: dict[int, float] | None = None, model_gen_options: TConfig | None = None, rounding_func: Callable[[npt.NDArray], npt.NDArray] | None = None, - generated_points: npt.NDArray | None = None, ) -> tuple[npt.NDArray, npt.NDArray]: """Generate new candidates. @@ -99,8 +96,7 @@ def gen( should be fixed to a particular value during generation. rounding_func: A function that rounds an optimization result appropriately (e.g., according to `round-trip` transformations). - generated_points: A numpy array of shape `n x d` containing the - previously generated points to deduplicate against. + Returns: 2-element tuple containing @@ -120,7 +116,6 @@ def gen( fixed_features=fixed_features, model_gen_options=model_gen_options, rounding_func=rounding_func, - generated_points=generated_points, ) if self.engine: self.init_position = none_throws(self.engine).num_generated diff --git a/ax/generators/random/uniform.py b/ax/generators/random/uniform.py index 66955001220..1d3987e0f02 100644 --- a/ax/generators/random/uniform.py +++ b/ax/generators/random/uniform.py @@ -23,14 +23,12 @@ class UniformGenerator(RandomGenerator): def __init__( self, - deduplicate: bool = True, seed: int | None = None, init_position: int = 0, generated_points: npt.NDArray | None = None, fallback_to_sample_polytope: bool = False, ) -> None: super().__init__( - deduplicate=deduplicate, seed=seed, init_position=init_position, generated_points=generated_points, diff --git a/ax/generators/tests/test_sobol.py b/ax/generators/tests/test_sobol.py index c537c669ecf..a5d8c235fd1 100644 --- a/ax/generators/tests/test_sobol.py +++ b/ax/generators/tests/test_sobol.py @@ -50,15 +50,9 @@ def test_SobolGeneratorAllTunable(self) -> None: state = generator._get_state() self.assertEqual(state.get("init_position"), 3) self.assertEqual(state.get("seed"), generator.seed) - generator.gen( - n=3, - search_space_digest=ssd, - rounding_func=lambda x: x, - generated_points=generated_points, - ) def test_SobolGeneratorFixedSpace(self) -> None: - generator = SobolGenerator(seed=0, deduplicate=False) + generator = SobolGenerator(seed=0) ssd = self._create_ssd(n_tunable=0, n_fixed=2) generated_points, _ = generator.gen( n=3, @@ -70,32 +64,6 @@ def test_SobolGeneratorFixedSpace(self) -> None: np_bounds = np.array(ssd.bounds) self.assertTrue(np.all(generated_points >= np_bounds[:, 0])) self.assertTrue(np.all(generated_points <= np_bounds[:, 1])) - # Should error out if deduplicating since there's only one feasible point. - generator = SobolGenerator(seed=0, deduplicate=True) - with self.assertRaisesRegex(SearchSpaceExhausted, "Rejection sampling"): - generated_points, _ = generator.gen( - n=3, - search_space_digest=ssd, - fixed_features={0: 1, 1: 2}, - rounding_func=lambda x: x, - ) - # But we can generate 1 point. - generated_points, _ = generator.gen( - n=1, - search_space_digest=ssd, - fixed_features={0: 1, 1: 2}, - rounding_func=lambda x: x, - ) - self.assertEqual(np.shape(generated_points), (1, 2)) - # Errors out if we have already generated the only point. - with self.assertRaisesRegex(SearchSpaceExhausted, "Rejection sampling"): - generator.gen( - n=1, - search_space_digest=ssd, - fixed_features={0: 1, 1: 2}, - rounding_func=lambda x: x, - generated_points=generated_points, - ) def test_SobolGeneratorNoScramble(self) -> None: generator = SobolGenerator(scramble=False) @@ -126,16 +94,15 @@ def test_SobolGeneratorOnline(self) -> None: rounding_func=lambda x: x, ) np_bounds = np.array(ssd.bounds) - all_generated = np.zeros((0, len(ssd.bounds))) + all_generated = [] for expected_points in bulk_generated_points: generated_points, weights = generator.gen( n=1, search_space_digest=ssd, fixed_features={fixed_param_index: 1}, rounding_func=lambda x: x, - generated_points=all_generated, ) - all_generated = np.vstack((all_generated, generated_points)) + all_generated.append(generated_points) self.assertEqual(weights, [1]) self.assertTrue(np.all(generated_points >= np_bounds[:, 0])) self.assertTrue(np.all(generated_points <= np_bounds[:, 1])) @@ -226,24 +193,6 @@ def rounding_function(x: npt.NDArray) -> npt.NDArray: self.assertTrue(np.all(generated_points @ A.transpose() <= b)) rounding_func.assert_called() - rounding_func.reset_mock() - with mock.patch( - "botorch.utils.sampling.sample_polytope", - wraps=sample_polytope, - ) as wrapped_sampler: - generator.gen( - n=3, - search_space_digest=ssd, - linear_constraints=(A, b), - rounding_func=rounding_func, - generated_points=generated_points, - ) - # last call for (6th generated point) uses seed=5 - self.assertEqual(wrapped_sampler.call_args.kwargs["seed"], 5) - self.assertEqual(wrapped_sampler.call_args.kwargs["n_thinning"], 3) - - rounding_func.assert_called() - def test_SobolGeneratorFallbackToPolytopeSamplerWithFixedParam(self) -> None: # Ten parameters with sum less than 1. In this example, the rejection # sampler gives a search space exhausted error. Testing fallback to @@ -292,7 +241,6 @@ def test_SobolGeneratorOnlineRestart(self) -> None: search_space_digest=ssd, fixed_features={fixed_param_index: 1}, rounding_func=lambda x: x, - generated_points=generated_points_first_batch, ) generated_points_two_trials = np.vstack( @@ -328,30 +276,3 @@ def test_SobolGeneratorMaxDraws(self) -> None: model_gen_options={"max_rs_draws": 0}, rounding_func=lambda x: x, ) - - def test_SobolGeneratorDedupe(self) -> None: - generator = SobolGenerator(seed=0, deduplicate=True) - n_tunable = fixed_param_index = 3 - ssd = self._create_ssd(n_tunable=n_tunable, n_fixed=1) - generated_points, _ = generator.gen( - n=2, - search_space_digest=ssd, - linear_constraints=( - np.array([[1, 1, 0, 0], [0, 1, 1, 0]]), - np.array([1, 1]), - ), - fixed_features={fixed_param_index: 1}, - rounding_func=lambda x: x, - ) - self.assertEqual(len(generated_points), 2) - generated_points, _ = generator.gen( - n=1, - search_space_digest=ssd, - linear_constraints=( - np.array([[1, 1, 0, 0], [0, 1, 1, 0]]), - np.array([1, 1]), - ), - fixed_features={fixed_param_index: 1}, - rounding_func=lambda x: x, - ) - self.assertEqual(len(generated_points), 1) diff --git a/ax/generators/tests/test_uniform.py b/ax/generators/tests/test_uniform.py index 3d1b0ef8060..5b6e718bd81 100644 --- a/ax/generators/tests/test_uniform.py +++ b/ax/generators/tests/test_uniform.py @@ -9,7 +9,6 @@ import numpy as np from ax.core.search_space import SearchSpaceDigest -from ax.exceptions.core import SearchSpaceExhausted from ax.generators.random.uniform import UniformGenerator from ax.utils.common.testutils import TestCase @@ -53,14 +52,9 @@ def test_with_fixed_space(self) -> None: generator = UniformGenerator(seed=self.seed) ssd = self._create_ssd(n_tunable=0, n_fixed=2) n = 3 - with self.assertRaises(SearchSpaceExhausted): - generator.gen( - n=3, - search_space_digest=ssd, - fixed_features={0: 1, 1: 2}, - rounding_func=lambda x: x, - ) - generator = UniformGenerator(seed=self.seed, deduplicate=False) + # With all fixed features, there's only one feasible point, but the generator + # will repeat it as many times as requested since there's no deduplication + # at the generator level anymore. generated_points, _ = generator.gen( n=3, search_space_digest=ssd, diff --git a/ax/generators/tests/test_utils.py b/ax/generators/tests/test_utils.py index ced7e3a16bd..72063b3966e 100644 --- a/ax/generators/tests/test_utils.py +++ b/ax/generators/tests/test_utils.py @@ -15,7 +15,6 @@ best_observed_point, enumerate_discrete_combinations, mk_discrete_choices, - remove_duplicates, ) from ax.utils.common.testutils import TestCase @@ -142,27 +141,6 @@ def test_BestObservedPoint(self) -> None: options={"method": "feasible_threshold"}, ) - def test_RemoveDuplicates(self) -> None: - existing_points = np.array([[0, 1], [0, 2]]) - - points_with_duplicates = np.array([[0, 1], [0, 2], [0, 3], [0, 1]]) - unique_points = remove_duplicates(points_with_duplicates) - expected_points = np.array([[0, 1], [0, 2], [0, 3]]) - self.assertTrue(np.array_equal(expected_points, unique_points)) - - unique_points = remove_duplicates(points_with_duplicates, existing_points) - expected_points = np.array([[0, 3]]) - self.assertTrue(np.array_equal(expected_points, unique_points)) - - points_without_duplicates = np.array([[0, 1], [0, 2], [0, 3], [0, 4]]) - expected_points = np.array([[0, 1], [0, 2], [0, 3], [0, 4]]) - unique_points = remove_duplicates(points_without_duplicates) - self.assertTrue(np.array_equal(expected_points, unique_points)) - - unique_points = remove_duplicates(points_without_duplicates, existing_points) - expected_points = np.array([[0, 3], [0, 4]]) - self.assertTrue(np.array_equal(expected_points, unique_points)) - def test_MkDiscreteChoices(self) -> None: ssd1 = SearchSpaceDigest( feature_names=["a", "b"], diff --git a/ax/generators/utils.py b/ax/generators/utils.py index fa36c7c4761..d4a5a89d19a 100644 --- a/ax/generators/utils.py +++ b/ax/generators/utils.py @@ -64,11 +64,9 @@ def rejection_sample( d: int, tunable_feature_indices: npt.NDArray, linear_constraints: tuple[npt.NDArray, npt.NDArray] | None = None, - deduplicate: bool = False, max_draws: int | None = None, fixed_features: dict[int, float] | None = None, rounding_func: Callable[[npt.NDArray], npt.NDArray] | None = None, - existing_points: npt.NDArray | None = None, ) -> tuple[npt.NDArray, int]: """Rejection sample in parameter space. Parameter space is typically [0, 1] for all tunable parameters. @@ -87,28 +85,16 @@ def rejection_sample( linear_constraints: A tuple of (A, b). For k linear constraints on d-dimensional x, A is (k x d) and b is (k x 1) such that A x <= b. - deduplicate: If true, reject points that are duplicates of previously - generated points. The points are deduplicated after applying the - rounding function. max_draws: Maximum number of attemped draws before giving up. fixed_features: A map {feature_index: value} for features that should be fixed to a particular value during generation. rounding_func: A function that rounds an optimization result appropriately (e.g., according to `round-trip` transformations). - existing_points: A set of previously generated points to use - for deduplication. These should be provided in the parameter - space model operates in. Returns: 2-element tuple containing the generated points and the number of attempted draws. """ - # We need to perform the round trip transformation on our generated point - # in order to deduplicate in the original search space. - # The transformation is applied above. - if deduplicate and rounding_func is None: - raise ValueError("Rounding function must be provided for deduplication.") - # Rejection sample with parameter constraints. points = np.zeros((0, d)) attempted_draws = 0 @@ -141,9 +127,6 @@ def rejection_sample( point = rounding_func(point) points = np.concatenate([points, point[None, :]], axis=0) - # Deduplicate: don't add the same point twice. - if deduplicate: - points = remove_duplicates(points, existing_points) attempted_draws += 1 if points.shape[0] < n: @@ -157,38 +140,6 @@ def rejection_sample( return (points, attempted_draws) -def remove_duplicates( - points: npt.NDArray, - existing_points: npt.NDArray | None = None, -) -> npt.NDArray: - """Remove any point in points if it is duplicate or contained in existing_points. - - Args: - points: Points to remove duplicates from. - existing_points: Additional points to check for duplicates. - - Returns: - Points with duplicates removed. - """ - if existing_points is None: - existing_points = np.empty((0, points.shape[1])) - - unique_points = np.empty((0, points.shape[1])) - for point in points: - # Check if point has already been added to unique points - if np.any(np.all(point == unique_points, axis=1)): - continue - - # Check if point is duplicate of existing points - if np.any(np.all(point == existing_points, axis=1)): - continue - - # Add current point to our collection of unique points - unique_points = np.concatenate([unique_points, point[None, :]], axis=0) - - return unique_points - - def add_fixed_features( tunable_points: npt.NDArray, d: int, diff --git a/ax/storage/sqa_store/tests/test_sqa_store.py b/ax/storage/sqa_store/tests/test_sqa_store.py index d0a9df77ce4..e01e35d32b5 100644 --- a/ax/storage/sqa_store/tests/test_sqa_store.py +++ b/ax/storage/sqa_store/tests/test_sqa_store.py @@ -792,13 +792,13 @@ def test_experiment_save_and_load_reduced_state( # 3. Try case with model state and search space + opt.config on a # generator run in the experiment. gr = Generators.SOBOL(experiment=exp).gen(1) - # Expecting model kwargs to have 7 fields (seed, deduplicate, init_position, + # Expecting model kwargs to have 6 fields (seed, init_position, # scramble, generated_points, fallback_to_sample_polytope, # polytope_sampler_kwargs) # and the rest of model-state info on generator run to have values too. generator_kwargs = gr._generator_kwargs self.assertIsNotNone(generator_kwargs) - self.assertEqual(len(generator_kwargs), 7) + self.assertEqual(len(generator_kwargs), 6) adapter_kwargs = gr._adapter_kwargs self.assertIsNotNone(adapter_kwargs) self.assertEqual(len(adapter_kwargs), 6)