From e63029a7a8e6003c0e401dce252f819aacfa4c39 Mon Sep 17 00:00:00 2001 From: Christopher Pereira Date: Mon, 10 Feb 2025 03:00:33 -0300 Subject: [PATCH 1/5] Add get_element_html() Fix https://github.com/ServiceNow/BrowserGym/issues/308 --- .../core/src/browsergym/core/action/functions.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/browsergym/core/src/browsergym/core/action/functions.py b/browsergym/core/src/browsergym/core/action/functions.py index bb31db9a..eefeae28 100644 --- a/browsergym/core/src/browsergym/core/action/functions.py +++ b/browsergym/core/src/browsergym/core/action/functions.py @@ -622,3 +622,16 @@ def mouse_upload_file(x: float, y: float, file: str | list[str]): file_chooser = fc_info.value file_chooser.set_files(file) + +def get_element_html(bid: str): + """ + Returns the HTML of an element identified by its bid. + + Examples: + get_element_html('123') + """ + elem = get_elem_by_bid(page, bid, demo_mode != "off") + if elem: + send_msg_to_user("HTML:\n" + elem.inner_html()) + else: + send_msg_to_user("The element with bid " + bid + " does not exist") From 35289a7e06307bdbf1cf6889e9a6d8fe31c60119 Mon Sep 17 00:00:00 2001 From: Christopher Pereira Date: Mon, 10 Feb 2025 03:07:43 -0300 Subject: [PATCH 2/5] Update highlevel.py --- browsergym/core/src/browsergym/core/action/highlevel.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/browsergym/core/src/browsergym/core/action/highlevel.py b/browsergym/core/src/browsergym/core/action/highlevel.py index da2c539c..b1eed5ea 100644 --- a/browsergym/core/src/browsergym/core/action/highlevel.py +++ b/browsergym/core/src/browsergym/core/action/highlevel.py @@ -38,6 +38,7 @@ tab_close, tab_focus, upload_file, + get_element_html, ) from .parsers import action_docstring_parser, highlevel_action_parser @@ -59,6 +60,7 @@ clear, drag_and_drop, upload_file, + get_element_html, ], "coord": [ scroll, From 921f1e136318c0a8ec5e24a554fac67100e43c89 Mon Sep 17 00:00:00 2001 From: Christopher Pereira Date: Wed, 12 Feb 2025 00:42:06 -0300 Subject: [PATCH 3/5] Add method add_observation() --- .../core/src/browsergym/core/action/base.py | 2 ++ .../src/browsergym/core/action/functions.py | 17 +++++++++++++++-- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/browsergym/core/src/browsergym/core/action/base.py b/browsergym/core/src/browsergym/core/action/base.py index 6f06303b..06a866f8 100644 --- a/browsergym/core/src/browsergym/core/action/base.py +++ b/browsergym/core/src/browsergym/core/action/base.py @@ -38,6 +38,7 @@ def execute_python_code( code: str, page: playwright.sync_api.Page, send_message_to_user: callable, + add_observation: callable, report_infeasible_instructions: callable, ): """ @@ -56,6 +57,7 @@ def execute_python_code( globals = { "page": page, "send_message_to_user": send_message_to_user, + "add_observation": add_observation, "report_infeasible_instructions": report_infeasible_instructions, "DEMO_MODE": get_global_demo_mode(), } diff --git a/browsergym/core/src/browsergym/core/action/functions.py b/browsergym/core/src/browsergym/core/action/functions.py index eefeae28..0107ed30 100644 --- a/browsergym/core/src/browsergym/core/action/functions.py +++ b/browsergym/core/src/browsergym/core/action/functions.py @@ -14,6 +14,7 @@ page: playwright.sync_api.Page = None send_message_to_user: callable = None +add_observation: callable = None report_infeasible_instructions: callable = None demo_mode: Literal["off", "default", "all_blue", "only_visible_elements"] = None retry_with_force: bool = False @@ -33,6 +34,8 @@ def send_msg_to_user(text: str): """ send_message_to_user(text) +def add_observation(obs: dict): + pass def report_infeasible(reason: str): """ @@ -632,6 +635,16 @@ def get_element_html(bid: str): """ elem = get_elem_by_bid(page, bid, demo_mode != "off") if elem: - send_msg_to_user("HTML:\n" + elem.inner_html()) + outer_html_content = elem.evaluate('elem => elem.outerHTML') + #send_msg_to_user(f"The HTML of the element with bid {bid} is:\n--- START ---\n" + outer_html_content + "\n--- END ---\n") + add_observation({ + "type": "generic", + "html": outer_html_content, + "bid": bid, + }) else: - send_msg_to_user("The element with bid " + bid + " does not exist") + #send_msg_to_user("The element with bid " + bid + " does not exist") + add_observation({ + "type": "generic", + "error": f"The element with bid {bid} doesn't exist" + }) From 6d53e60e50e83ea6416d0b856ab03e4e391fee4d Mon Sep 17 00:00:00 2001 From: Christopher Pereira Date: Wed, 12 Feb 2025 00:55:19 -0300 Subject: [PATCH 4/5] Update env.py --- browsergym/core/src/browsergym/core/env.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/browsergym/core/src/browsergym/core/env.py b/browsergym/core/src/browsergym/core/env.py index 30b565ba..abf97f4a 100644 --- a/browsergym/core/src/browsergym/core/env.py +++ b/browsergym/core/src/browsergym/core/env.py @@ -384,6 +384,11 @@ def send_message_to_user(text: str): raise ValueError(f"Forbidden value: {text} is not a string") self.chat.add_message(role="assistant", msg=text) + def add_observation(obs: dict): + if not isinstance(obs, dict): + raise ValueError(f"Forbidden value: {obj} is not a dict") + self.obs = obs + def report_infeasible_instructions(reason: str): if not isinstance(reason, str): raise ValueError(f"Forbidden value: {reason} is not a string") @@ -401,6 +406,7 @@ def report_infeasible_instructions(reason: str): code, self.page, send_message_to_user=send_message_to_user, + add_observation=add_observation, report_infeasible_instructions=report_infeasible_instructions, ) self.last_action_error = "" From 9ba5f0698c11db3efbbdbf603e61a8e7aa2b202a Mon Sep 17 00:00:00 2001 From: Christopher Pereira Date: Wed, 12 Feb 2025 01:46:36 -0300 Subject: [PATCH 5/5] Use set observation --- browsergym/core/src/browsergym/core/env.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/browsergym/core/src/browsergym/core/env.py b/browsergym/core/src/browsergym/core/env.py index abf97f4a..dde9ac3b 100644 --- a/browsergym/core/src/browsergym/core/env.py +++ b/browsergym/core/src/browsergym/core/env.py @@ -76,6 +76,7 @@ def __init__( pw_context_kwargs: dict = {}, # agent-related arguments action_mapping: Optional[callable] = HighLevelActionSet().to_python_code, + obs: dict = None, ): """ Instantiate a ready to use BrowserEnv gym environment. @@ -397,6 +398,7 @@ def report_infeasible_instructions(reason: str): # try to execute the action logger.debug(f"Executing action") + self.obs = None try: if self.action_mapping: code = self.action_mapping(action) @@ -444,8 +446,11 @@ def report_infeasible_instructions(reason: str): if user_message: self.chat.add_message(role="user", msg=user_message) - # extract observation (generic) - obs = self._get_obs() + if self.obs: + obs = self.obs + else: + # extract observation (generic) + obs = self._get_obs() logger.debug(f"Observation extracted") # new step API wants a 5-tuple (gymnasium)