Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
114 changes: 46 additions & 68 deletions ae/core/agents/browser_nav_agent.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,28 @@
from datetime import datetime
from string import Template
import os

import autogen # type: ignore
import agentops

from ae.core.memory.static_ltm import get_user_ltm
from ae.core.prompts import LLM_PROMPTS
from ae.core.skills.click_using_selector import click as click_element

# from ae.core.skills.enter_text_and_click import enter_text_and_click
from ae.core.skills.enter_text_using_selector import bulk_enter_text
from ae.core.skills.enter_text_using_selector import entertext
from ae.core.skills.get_dom_with_content_type import get_dom_with_content_type
from ae.core.skills.get_url import geturl
from ae.core.skills.open_url import openurl
from ae.core.skills.pdf_text_extractor import extract_text_from_pdf

#from ae.core.skills.pdf_text_extractor import extract_text_from_pdf
from ae.core.skills.press_key_combination import press_key_combination

# Initialize AgentOps
agentops.init(os.getenv("AGENTOPS_API_KEY"))

@agentops.track_agent(name='BrowserNavAgent')
class BrowserNavAgent:
def __init__(self, config_list, browser_nav_executor: autogen.UserProxyAgent): # type: ignore
@agentops.record_function('init_browser_nav_agent')
def __init__(self, config_list, browser_nav_executor: autogen.UserProxyAgent):
"""
Initialize the BrowserNavAgent and store the AssistantAgent instance
as an instance attribute for external access.
Expand All @@ -33,7 +35,7 @@ def __init__(self, config_list, browser_nav_executor: autogen.UserProxyAgent): #
user_ltm = self.__get_ltm()
system_message = LLM_PROMPTS["BROWSER_AGENT_PROMPT"]
system_message = system_message + "\n" + f"Today's date is {datetime.now().strftime('%d %B %Y')}"
if user_ltm: #add the user LTM to the system prompt if it exists
if user_ltm:
user_ltm = "\n" + user_ltm
system_message = Template(system_message).substitute(basic_user_information=user_ltm)

Expand All @@ -50,75 +52,51 @@ def __init__(self, config_list, browser_nav_executor: autogen.UserProxyAgent): #
)
self.__register_skills()


@agentops.record_function('get_ltm')
def __get_ltm(self):
"""
Get the the long term memory of the user.
Get the long term memory of the user.
returns: str | None - The user LTM or None if not found.
"""
return get_user_ltm()


@agentops.record_function('register_skills')
def __register_skills(self):
"""
Register all the skills that the agent can perform.
"""

# Register openurl skill for LLM by assistant agent
self.agent.register_for_llm(description=LLM_PROMPTS["OPEN_URL_PROMPT"])(openurl)
# Register openurl skill for execution by user_proxy_agent
self.browser_nav_executor.register_for_execution()(openurl)

# Register enter_text_and_click skill for LLM by assistant agent
# self.agent.register_for_llm(description=LLM_PROMPTS["ENTER_TEXT_AND_CLICK_PROMPT"])(enter_text_and_click)
# Register enter_text_and_click skill for execution by user_proxy_agent
# self.browser_nav_executor.register_for_execution()(enter_text_and_click)

# Register get_dom_with_content_type skill for LLM by assistant agent
self.agent.register_for_llm(description=LLM_PROMPTS["GET_DOM_WITH_CONTENT_TYPE_PROMPT"])(get_dom_with_content_type)
# Register get_dom_with_content_type skill for execution by user_proxy_agent
self.browser_nav_executor.register_for_execution()(get_dom_with_content_type)

# Register click_element skill for LLM by assistant agent
self.agent.register_for_llm(description=LLM_PROMPTS["CLICK_PROMPT"])(click_element)
# Register click_element skill for execution by user_proxy_agent
self.browser_nav_executor.register_for_execution()(click_element)

# Register geturl skill for LLM by assistant agent
self.agent.register_for_llm(description=LLM_PROMPTS["GET_URL_PROMPT"])(geturl)
# Register geturl skill for execution by user_proxy_agent
self.browser_nav_executor.register_for_execution()(geturl)

# Register bulk_enter_text skill for LLM by assistant agent
self.agent.register_for_llm(description=LLM_PROMPTS["BULK_ENTER_TEXT_PROMPT"])(bulk_enter_text)
# Register bulk_enter_text skill for execution by user_proxy_agent
self.browser_nav_executor.register_for_execution()(bulk_enter_text)

# Register entertext skill for LLM by assistant agent
self.agent.register_for_llm(description=LLM_PROMPTS["ENTER_TEXT_PROMPT"])(entertext)
# Register entertext skill for execution by user_proxy_agent
self.browser_nav_executor.register_for_execution()(entertext)

# Register entertext skill for LLM by assistant agent
self.agent.register_for_llm(description=LLM_PROMPTS["PRESS_KEY_COMBINATION_PROMPT"])(press_key_combination)
# Register entertext skill for execution by user_proxy_agent
self.browser_nav_executor.register_for_execution()(press_key_combination)

self.agent.register_for_llm(description=LLM_PROMPTS["EXTRACT_TEXT_FROM_PDF_PROMPT"])(extract_text_from_pdf)
self.browser_nav_executor.register_for_execution()(extract_text_from_pdf)

'''
# Register reply function for printing messages
self.browser_nav_executor.register_reply( # type: ignore
[autogen.Agent, None],
reply_func=print_message_from_user_proxy,
config={"callback": None},
)
self.agent.register_reply( # type: ignore
[autogen.Agent, None],
reply_func=print_message_from_browser_agent,
config={"callback": None},
)
'''
# print(f">>> Function map: {self.browser_nav_executor.function_map}") # type: ignore
# print(">>> Registered skills for BrowserNavAgent and BrowserNavExecutorAgent")
skills = [
(openurl, LLM_PROMPTS["OPEN_URL_PROMPT"]),
(get_dom_with_content_type, LLM_PROMPTS["GET_DOM_WITH_CONTENT_TYPE_PROMPT"]),
(click_element, LLM_PROMPTS["CLICK_PROMPT"]),
(geturl, LLM_PROMPTS["GET_URL_PROMPT"]),
(bulk_enter_text, LLM_PROMPTS["BULK_ENTER_TEXT_PROMPT"]),
(entertext, LLM_PROMPTS["ENTER_TEXT_PROMPT"]),
(press_key_combination, LLM_PROMPTS["PRESS_KEY_COMBINATION_PROMPT"]),
(extract_text_from_pdf, LLM_PROMPTS["EXTRACT_TEXT_FROM_PDF_PROMPT"]),
]

for skill, prompt in skills:
self.__register_skill(skill, prompt)

@agentops.record_function('register_skill')
def __register_skill(self, skill, prompt):
"""
Register a single skill for both the agent and the executor.
"""
self.agent.register_for_llm(description=prompt)(skill)
self.browser_nav_executor.register_for_execution()(skill)

# Example usage
if __name__ == "__main__":
# This is just a placeholder. You'd typically create this with actual config and executor.
config_list = [{}]
browser_nav_executor = autogen.UserProxyAgent(name="executor")

browser_nav_agent = BrowserNavAgent(config_list, browser_nav_executor)

# Simulate some actions
browser_nav_agent.agent.generate_reply("Open https://www.example.com")

# End the AgentOps session
agentops.end_session('Success')
57 changes: 45 additions & 12 deletions ae/core/agents/high_level_planner_agent.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,23 @@
from datetime import datetime
from string import Template
import os

import autogen # type: ignore
from autogen import ConversableAgent # type: ignore
import agentops

from ae.core.memory.static_ltm import get_user_ltm
from ae.core.post_process_responses import final_reply_callback_planner_agent as print_message_as_planner # type: ignore
from ae.core.prompts import LLM_PROMPTS
from ae.core.skills.get_user_input import get_user_input

# Initialize AgentOps
agentops.init(os.getenv("AGENTOPS_API_KEY"))

@agentops.track_agent(name='PlannerAgent')
class PlannerAgent:
def __init__(self, config_list, user_proxy_agent:ConversableAgent): # type: ignore
@agentops.record_function('init_planner_agent')
def __init__(self, config_list, user_proxy_agent: ConversableAgent):
"""
Initialize the PlannerAgent and store the AssistantAgent instance
as an instance attribute for external access.
Expand All @@ -20,14 +26,14 @@ def __init__(self, config_list, user_proxy_agent:ConversableAgent): # type: igno
- config_list: A list of configuration parameters required for AssistantAgent.
- user_proxy_agent: An instance of the UserProxyAgent class.
"""

user_ltm = self.__get_ltm()
system_message = LLM_PROMPTS["PLANNER_AGENT_PROMPT"]

if user_ltm: #add the user LTM to the system prompt if it exists
if user_ltm: # add the user LTM to the system prompt if it exists
user_ltm = "\n" + user_ltm
system_message = Template(system_message).substitute(basic_user_information=user_ltm)
system_message = system_message + "\n" + f"Today's date is {datetime.now().strftime('%d %B %Y')}"

self.agent = autogen.AssistantAgent(
name="planner_agent",
system_message=system_message,
Expand All @@ -36,26 +42,53 @@ def __init__(self, config_list, user_proxy_agent:ConversableAgent): # type: igno
"cache_seed": None,
"temperature": 0.0,
"top_p": 0.001,
"seed":12345
"seed": 12345
},
)

self.__register_skills(user_proxy_agent)
self.__register_reply()

@agentops.record_function('get_ltm')
def __get_ltm(self):
"""
Get the long term memory of the user.
returns: str | None - The user LTM or None if not found.
"""
return get_user_ltm()

@agentops.record_function('register_skills')
def __register_skills(self, user_proxy_agent: ConversableAgent):
"""
Register all the skills that the agent can perform.
"""
# Register get_user_input skill for LLM by assistant agent
self.agent.register_for_llm(description=LLM_PROMPTS["GET_USER_INPUT_PROMPT"])(get_user_input)
# Register get_user_input skill for execution by user_proxy_agent
user_proxy_agent.register_for_execution()(get_user_input)

self.agent.register_reply( # type: ignore
@agentops.record_function('register_reply')
def __register_reply(self):
"""
Register the reply function for the agent.
"""
self.agent.register_reply( # type: ignore
[autogen.AssistantAgent, None],
reply_func=print_message_as_planner,
config={"callback": None},
ignore_async_in_sync_chat=True
)

def __get_ltm(self):
"""
Get the the long term memory of the user.
returns: str | None - The user LTM or None if not found.
"""
return get_user_ltm()

# Example usage
if __name__ == "__main__":
# This is just a placeholder. You'd typically create this with actual config and user_proxy_agent.
config_list = [{}]
user_proxy_agent = ConversableAgent(name="user_proxy")

planner_agent = PlannerAgent(config_list, user_proxy_agent)

# Simulate some actions
planner_agent.agent.generate_reply("Plan a task")

# End the AgentOps session
agentops.end_session('Success')
33 changes: 25 additions & 8 deletions ae/core/autogen_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@
import autogen # type: ignore
import nest_asyncio # type: ignore
import openai
import agentops # Add this import

#from autogen import Cache
from dotenv import load_dotenv

from ae.config import SOURCE_LOG_FOLDER_PATH
Expand Down Expand Up @@ -48,7 +48,11 @@ def __init__(self, max_chat_round: int = 1000):
self.config_list: list[dict[str, str]] | None = None
self.chat_logs_dir: str = SOURCE_LOG_FOLDER_PATH

# Initialize AgentOps
agentops.init(os.getenv("AGENTOPS_API_KEY"))

@classmethod
@agentops.record_function('create_autogen_wrapper')
async def create(cls, agents_needed: list[str] | None = None, max_chat_round: int = 1000):
"""
Create an instance of AutogenWrapper.
Expand Down Expand Up @@ -162,7 +166,6 @@ def reflection_message(recipient, messages, sender, config): # type: ignore

return self


def get_chat_logs_dir(self) -> str|None:
"""
Get the directory for saving chat logs.
Expand All @@ -183,14 +186,14 @@ def set_chat_logs_dir(self, chat_logs_dir: str):
"""
self.chat_logs_dir = chat_logs_dir


@agentops.record_function('save_chat_log')
def __save_chat_log(self, chat_log: list[dict[str, Any]]):
chat_logs_file = os.path.join(self.get_chat_logs_dir() or "", f"nested_chat_log_{str(time_ns())}.json")
# Save the chat log to a file
with open(chat_logs_file, "w") as file:
json.dump(chat_log, file, indent=4)


@agentops.record_function('initialize_agents')
async def __initialize_agents(self, agents_needed: list[str]):
"""
Instantiate all agents with their appropriate prompts/skills.
Expand Down Expand Up @@ -223,7 +226,7 @@ async def __initialize_agents(self, agents_needed: list[str]):
raise ValueError(f"Unknown agent type: {agent_needed}")
return agents_map


@agentops.record_function('create_user_delegate_agent')
async def __create_user_delegate_agent(self) -> autogen.ConversableAgent:
"""
Create a ConversableAgent instance.
Expand Down Expand Up @@ -267,6 +270,7 @@ def is_planner_termination_message(x: dict[str, str])->bool: # type: ignore
)
return task_delegate_agent

@agentops.record_function('create_browser_nav_executor_agent')
def __create_browser_nav_executor_agent(self):
"""
Create a UserProxyAgent instance for executing browser control.
Expand Down Expand Up @@ -297,6 +301,7 @@ def is_browser_executor_termination_message(x: dict[str, str])->bool: # type: ig
print(">>> Created browser_nav_executor_agent:", browser_nav_executor_agent)
return browser_nav_executor_agent

@agentops.record_function('create_browser_nav_agent')
def __create_browser_nav_agent(self, user_proxy_agent: UserProxyAgent_SequentialFunctionExecution) -> autogen.ConversableAgent:
"""
Create a BrowserNavAgent instance.
Expand All @@ -312,6 +317,7 @@ def __create_browser_nav_agent(self, user_proxy_agent: UserProxyAgent_Sequential
#print(">>> browser agent tools:", json.dumps(browser_nav_agent.agent.llm_config.get("tools"), indent=2))
return browser_nav_agent.agent

@agentops.record_function('create_planner_agent')
def __create_planner_agent(self, assistant_agent: autogen.ConversableAgent):
"""
Create a Planner Agent instance. This is mainly used for exploration at this point
Expand All @@ -323,6 +329,7 @@ def __create_planner_agent(self, assistant_agent: autogen.ConversableAgent):
planner_agent = PlannerAgent(self.config_list, assistant_agent) # type: ignore
return planner_agent.agent

@agentops.record_function('process_command')
async def process_command(self, command: str, current_url: str | None = None) -> autogen.ChatResult | None:
"""
Process a command by sending it to one or more agents.
Expand All @@ -341,15 +348,13 @@ async def process_command(self, command: str, current_url: str | None = None) ->

prompt = Template(LLM_PROMPTS["COMMAND_EXECUTION_PROMPT"]).substitute(command=command, current_url_prompt_segment=current_url_prompt_segment)
logger.info(f"Prompt for command: {prompt}")
#with Cache.disk() as cache:
try:
if self.agents_map is None:
raise ValueError("Agents map is not initialized.")

result=await self.agents_map["user"].a_initiate_chat( # type: ignore
result = await self.agents_map["user"].a_initiate_chat( # type: ignore
self.agents_map["planner_agent"], # self.manager # type: ignore
max_turns=self.number_of_rounds,
#clear_history=True,
message=prompt,
silent=False,
cache=None,
Expand All @@ -362,4 +367,16 @@ async def process_command(self, command: str, current_url: str | None = None) ->
except openai.BadRequestError as bre:
logger.error(f"Unable to process command: \"{command}\". {bre}")
traceback.print_exc()
finally:
agentops.end_session('Success') # Or 'Fail' based on the outcome

# Additional methods or code that might be part of the AutogenWrapper class can be added here

# Example usage of the AutogenWrapper class
async def main():
wrapper = await AutogenWrapper.create()
result = await wrapper.process_command("Your command here")
print(result)

if __name__ == "__main__":
asyncio.run(main())
Loading