diff --git a/v7_cli/cli.py b/v7_cli/cli.py index fbf8735..7bc1482 100644 --- a/v7_cli/cli.py +++ b/v7_cli/cli.py @@ -605,6 +605,60 @@ def cmd_hub_files(client: V7Client, args: argparse.Namespace) -> None: error_output(e) +def cmd_hub_search(client: V7Client, args: argparse.Namespace) -> None: + """Search across hub files using vector search.""" + try: + hub_ids = args.hub_ids.split(",") if args.hub_ids else None + file_ids = args.file_ids.split(",") if args.file_ids else None + if args.limit > 50: + print("Warning: limit capped at 50 (API maximum)", file=sys.stderr) + args.limit = 50 + + results = client.search.search( + query=args.query, + hub_ids=hub_ids, + file_ids=file_ids, + limit=args.limit, + ) + + if is_tty(): + if not results: + print("No results found.") + return + + table_output( + ["File ID", "Similarity", "Bytes", "Content"], + [ + [ + r.file_id, + f"{r.similarity:.4f}", + f"{r.byte_start}-{r.byte_end}", + (r.chunk_content or "")[:80], + ] + for r in results + ], + [36, 12, 16, 80], + ) + else: + success_output( + { + "data": [ + { + "file_id": r.file_id, + "similarity": r.similarity, + "byte_start": r.byte_start, + "byte_end": r.byte_end, + "chunk_content": r.chunk_content, + "token_count": r.token_count, + } + for r in results + ] + } + ) + except APIError as e: + error_output(e) + + # ============================================================================= # Main CLI # ============================================================================= @@ -631,6 +685,14 @@ def create_parser() -> argparse.ArgumentParser: # noqa: PLR0915 parser.add_argument("--workspace", "-w", help="Workspace ID (overrides V7_GO_WORKSPACE_ID)") subparsers = parser.add_subparsers(dest="command", help="Commands") + # ========== Search ========== + h_search = subparsers.add_parser("search", help="Vector search across hub files") + h_search.add_argument("query", help="Search text") + h_search.add_argument("--hub-ids", "-H", help="Comma-separated list of hub IDs") + h_search.add_argument("--file-ids", "-F", help="Comma-separated list of file IDs") + h_search.add_argument("--limit", "-l", type=int, default=10, help="Max results (default: 10)") + h_search.set_defaults(func=cmd_hub_search) + # ========== Agent Builder ========== agent = subparsers.add_parser("agent_builder", help="Create agents from natural language") agent.set_defaults(func=lambda _c, _a: agent.print_help()) diff --git a/v7_cli/core/client.py b/v7_cli/core/client.py index 8167f1b..be04e3f 100644 --- a/v7_cli/core/client.py +++ b/v7_cli/core/client.py @@ -188,7 +188,7 @@ def get(self, path: str, params: dict[str, Any] | None = None) -> dict[str, Any] # Filter out None values and URL-encode filtered_params = {k: v for k, v in params.items() if v is not None} if filtered_params: - query_string = urllib.parse.urlencode(filtered_params) + query_string = urllib.parse.urlencode(filtered_params, doseq=True) separator = "&" if "?" in path else "?" path = f"{path}{separator}{query_string}" return self._make_request("GET", path) diff --git a/v7_cli/core/types.py b/v7_cli/core/types.py index 87db1a1..e10cebe 100644 --- a/v7_cli/core/types.py +++ b/v7_cli/core/types.py @@ -444,3 +444,27 @@ def from_dict(cls, data: dict[str, Any]) -> "Hub": created_at=data.get("created_at"), updated_at=data.get("updated_at"), ) + + +@dataclass +class SearchResult: + """A single vector search result from a hub.""" + + file_id: str + similarity: float + byte_start: int + byte_end: int + chunk_content: str | None = None + token_count: int | None = None + + @classmethod + def from_dict(cls, data: dict[str, Any]) -> "SearchResult": + """Create from API response dict.""" + return cls( + file_id=data["file_id"], + similarity=data["similarity"], + byte_start=data["byte_start"], + byte_end=data["byte_end"], + chunk_content=data.get("chunk_content"), + token_count=data.get("token_count"), + ) diff --git a/v7_cli/sdk.py b/v7_cli/sdk.py index ad48f63..1e04c7b 100644 --- a/v7_cli/sdk.py +++ b/v7_cli/sdk.py @@ -23,6 +23,7 @@ PaginatedResponse, Project, Property, + SearchResult, Template, ) @@ -78,6 +79,7 @@ def __init__( self.invitations = InvitationOperations(self._client) self.templates = TemplateOperations(self._client) self.hubs = HubOperations(self._client) + self.search = SearchOperations(self._client) @property def workspace_id(self) -> str | None: @@ -1111,3 +1113,45 @@ def reindex(self, hub_id: str) -> dict[str, Any]: """ return self._client.workspace_post(f"/hubs/{hub_id}/reindex") + + +# ============================================================================= +# Search Operations +# ============================================================================= + + +class SearchOperations: + """Operations for managing knowledge hubs.""" + + def __init__(self, client: APIClient): + self._client = client + + def search( + self, + query: str, + hub_ids: builtins.list[str] | None = None, + file_ids: builtins.list[str] | None = None, + limit: int = 10, + ) -> builtins.list[SearchResult]: + """ + Vector search across hub files. + + Args: + query: The search text + hub_ids: Optional list of hub IDs to search within + file_ids: Optional list of file IDs to search within + limit: Maximum number of results + + Returns: + List of search results + + """ + params: dict[str, Any] = {"query": query, "limit": limit} + if hub_ids is not None: + params["hub_ids[]"] = hub_ids + if file_ids is not None: + params["file_ids[]"] = file_ids + + result = self._client.workspace_get("/search", params=params) + results = result.get("data", []) if isinstance(result, dict) else [] + return [SearchResult.from_dict(r) for r in results]