Merge AI-1172-global-search-tests to AI-1172-refactor

mariankrotil · mariankrotil · commit f997732d2d04 · 2025-07-14T11:46:08.000+02:00
diff --git a/integtests/conftest.py b/integtests/conftest.py
@@ -174,7 +174,7 @@ def _create_tables(storage_client: SyncStorageClient) -> list[TableDef]:
     ConfigDef(
         component_id='ex-generic-v2',
         configuration_id=None,
-        internal_id='config1',
+        internal_id='test_config1',
     ),
 ]
 
diff --git a/integtests/data/proj/configs/ex-generic-v2/test_config1.json b/integtests/data/proj/configs/ex-generic-v2/test_config1.json
diff --git a/integtests/tools/test_search.py b/integtests/tools/test_search.py
@@ -5,7 +5,7 @@
 
 from integtests.conftest import BucketDef, ConfigDef, TableDef
 from keboola_mcp_server.client import KeboolaClient, SuggestedComponent
-from keboola_mcp_server.tools.search import GlobalSearchOutput, find_component_id, global_search
+from keboola_mcp_server.tools.search import GlobalSearchOutput, find_component_id, find_ids_by_name
 
 LOG = logging.getLogger(__name__)
 
@@ -19,7 +19,7 @@ async def test_global_search_end_to_end(
     configs: list[ConfigDef],
 ) -> None:
     """
-    Test the global_search tool end-to-end by searching for entities that exist in the test project.
+    Test the global_search tool end-to-end by searching for items that exist in the test project.
     This verifies that the search returns expected results for buckets, tables, and configurations.
     """
 
@@ -28,48 +28,55 @@ async def test_global_search_end_to_end(
         LOG.warning('Global search is not available. Please enable it in the project settings.')
         pytest.skip('Global search is not available. Please enable it in the project settings.')
 
-    # Search for test entities by name prefix 'test' which should match our test data
-    result = await global_search(
-        ctx=mcp_context, name_prefixes=['test'], entity_types=tuple(), limit=50, offset=0  # Search all types
+    # Search for test items by name prefix 'test' which should match our test data
+    result = await find_ids_by_name(
+        ctx=mcp_context, name_prefixes=['test'], item_types=tuple(), limit=50, offset=0  # Search all types
     )
 
     # Verify the result structure
     assert isinstance(result, GlobalSearchOutput)
     assert isinstance(result.counts, dict)
-    assert isinstance(result.type_groups, list)
+    assert isinstance(result.groups, dict)
     assert 'total' in result.counts
 
     # Verify we found some results
-    assert result.counts['total'] > 0, 'Should find at least some test entities'
+    assert result.counts['total'] > 0, 'Should find at least some test items'
 
     # Create sets of expected IDs for verification
     expected_bucket_ids = {bucket.bucket_id for bucket in buckets}
     expected_table_ids = {table.table_id for table in tables}
     expected_config_ids = {config.configuration_id for config in configs if config.configuration_id}
 
     # Check that we can find test buckets
-    bucket_groups = [group for group in result.type_groups if group.group_type == 'bucket']
-    if bucket_groups:
-        bucket_group = bucket_groups[0]
-        found_bucket_ids = {item.id for item in bucket_group.group_items}
-        # At least some test buckets should be found
-        assert found_bucket_ids.intersection(expected_bucket_ids), 'Should find at least one test bucket'
+    bucket_groups = [group for group in result.groups.values() if group.type == 'bucket']
+    assert len(bucket_groups) == 1
+    bucket_group = bucket_groups[0]
+    found_bucket_ids = {item.id for item in bucket_group.items}
+    # At least some test buckets should be found
+    assert found_bucket_ids.intersection(expected_bucket_ids), 'Should find at least one test bucket'
 
     # Check that we can find test tables
-    table_groups = [group for group in result.type_groups if group.group_type == 'table']
-    if table_groups:
-        table_group = table_groups[0]
-        found_table_ids = {item.id for item in table_group.group_items}
-        # At least some test tables should be found
-        assert found_table_ids.intersection(expected_table_ids), 'Should find at least one test table'
+    table_groups = [group for group in result.groups.values() if group.type == 'table']
+    assert len(table_groups) == 1
+    table_group = table_groups[0]
+    found_table_ids = {item.id for item in table_group.items}
+    # At least some test tables should be found
+    assert found_table_ids.intersection(expected_table_ids), 'Should find at least one test table'
 
     # Check that we can find test configurations
-    config_groups = [group for group in result.type_groups if group.group_type == 'configuration']
-    if config_groups:
-        config_group = config_groups[0]
-        found_config_ids = {item.id for item in config_group.group_items}
-        # At least some test configurations should be found
-        assert found_config_ids.intersection(expected_config_ids), 'Should find at least one test configuration'
+    config_groups = [group for group in result.groups.values() if group.type == 'configuration']
+    assert len(config_groups) == 1
+    config_group = config_groups[0]
+    found_config_ids = {item.id for item in config_group.items}
+    # At least some test configurations should be found
+    assert found_config_ids.intersection(expected_config_ids), 'Should find at least one test configuration'
+
+    config_groups = [group for group in result.groups.values() if group.type == 'configuration']
+    assert len(config_groups) == 1
+    config_group = config_groups[0]
+    found_config_ids = {item.id for item in config_group.items}
+    # At least some test configurations should be found
+    assert found_config_ids.intersection(expected_config_ids), 'Should find at least one test configuration'
 
 
 @pytest.mark.asyncio
@@ -82,7 +89,5 @@ async def test_find_component_id(mcp_context: Context):
 
     assert isinstance(result, list)
     assert len(result) > 0
+    assert all(isinstance(component, SuggestedComponent) for component in result)
     assert generic_extractor_id in [component.component_id for component in result]
-
-    for component in result:
-        assert isinstance(component, SuggestedComponent)
diff --git a/src/keboola_mcp_server/client.py b/src/keboola_mcp_server/client.py
@@ -21,8 +21,8 @@
 # Project features that can be checked with the is_enabled method
 ProjectFeature = Literal['global-search']
 # Input types for the global search endpoint parameters
-GlobalSearchBranchType = Literal['production', 'development']
-GlobalSearchType = Literal[
+BranchType = Literal['production', 'development']
+ItemType = Literal[
     'flow',
     'bucket',
     'table',
@@ -362,7 +362,7 @@ class GlobalSearchResponse(BaseModel):
     class Item(BaseModel):
         id: str = Field(description='The id of the item.')
         name: str = Field(description='The name of the item.')
-        type: GlobalSearchType = Field(description='The type of the item.')
+        type: ItemType = Field(description='The type of the item.')
         full_path: dict[str, Any] = Field(
             description=(
                 'The full path of the item containing project, branch and other information depending on the '
@@ -878,7 +878,7 @@ async def global_search(
         query: str,
         limit: int = 100,
         offset: int = 0,
-        types: Sequence[GlobalSearchType] = tuple(),
+        types: Sequence[ItemType] = tuple(),
     ) -> GlobalSearchResponse:
         """
         Searches for items in the storage. It allows you to search for entities by name across all projects within an
diff --git a/src/keboola_mcp_server/tools/doc.py b/src/keboola_mcp_server/tools/doc.py
@@ -11,9 +11,6 @@
 
 LOG = logging.getLogger(__name__)
 
-MAX_GLOBAL_SEARCH_LIMIT = 100
-DEFAULT_GLOBAL_SEARCH_LIMIT = 50
-
 
 def add_doc_tools(mcp: FastMCP) -> None:
     """Add tools to the MCP server."""
diff --git a/src/keboola_mcp_server/tools/search.py b/src/keboola_mcp_server/tools/search.py
@@ -7,7 +7,7 @@
 from fastmcp.tools import FunctionTool
 from pydantic import BaseModel, Field
 
-from keboola_mcp_server.client import GlobalSearchResponse, GlobalSearchType, KeboolaClient, SuggestedComponent
+from keboola_mcp_server.client import GlobalSearchResponse, ItemType, KeboolaClient, SuggestedComponent
 from keboola_mcp_server.errors import tool_errors
 from keboola_mcp_server.mcp import with_session_state
 
@@ -20,8 +20,8 @@
 def add_search_tools(mcp: FastMCP) -> None:
     """Add tools to the MCP server."""
     search_tools = [
-        global_search,
         find_component_id,
+        find_ids_by_name,
     ]
     for tool in search_tools:
         LOG.info(f'Adding tool {tool.__name__} to the MCP server.')
@@ -30,19 +30,19 @@ def add_search_tools(mcp: FastMCP) -> None:
     LOG.info('Search tools initialized.')
 
 
-class GlobalSearchItemsGroup(BaseModel):
+class ItemsGroup(BaseModel):
     """Group of items of the same type found in the global search."""
 
-    class GroupItem(BaseModel):
+    class Item(BaseModel):
         """An item corresponding to its group type found in the global search."""
 
         name: str = Field(description='The name of the item.')
         id: str = Field(description='The id of the item.')
-        created: datetime = Field(description='The date and time the entity was created.')
+        created: datetime = Field(description='The date and time the item was created.')
         additional_info: dict[str, Any] = Field(description='Additional information about the item.')
 
         @classmethod
-        def from_api_response(cls, item: GlobalSearchResponse.Item) -> 'GlobalSearchItemsGroup.GroupItem':
+        def from_api_response(cls, item: GlobalSearchResponse.Item) -> 'ItemsGroup.Item':
             """Creates an Item from the item API response."""
             add_info = {}
             if item.type == 'table':
@@ -60,54 +60,51 @@ def from_api_response(cls, item: GlobalSearchResponse.Item) -> 'GlobalSearchItem
                     add_info['configuration_name'] = configuration_info['name']
             return cls.model_construct(name=item.name, id=item.id, created=item.created, additional_info=add_info)
 
-    group_type: GlobalSearchType = Field(description='The type of the items in the group.')
-    group_count: int = Field(description='Number of items in the group.')
-    group_items: list[GroupItem] = Field(
+    type: ItemType = Field(description='The type of the items in the group.')
+    count: int = Field(description='Number of items in the group.')
+    items: list[Item] = Field(
         description=('List of items for the type found in the global search, sorted by relevance and creation time.')
     )
 
     @classmethod
-    def from_api_response(
-        cls, group_type: GlobalSearchType, group_items: list[GlobalSearchResponse.Item]
-    ) -> 'GlobalSearchItemsGroup':
-        """Creates a GlobalSearchItemsGroupedByType from the API response items and a type."""
+    def from_api_response(cls, type: ItemType, items: list[GlobalSearchResponse.Item]) -> 'ItemsGroup':
+        """Creates a ItemsGroup from the API response items and a type."""
         # filter the items by the given type to be sure
-        group_items = [item for item in group_items if item.type == group_type]
+        items = [item for item in items if item.type == type]
         return cls.model_construct(
-            group_type=group_type,
-            group_count=len(group_items),
-            group_items=[GlobalSearchItemsGroup.GroupItem.from_api_response(item) for item in group_items],
+            type=type,
+            count=len(items),
+            items=[ItemsGroup.Item.from_api_response(item) for item in items],
         )
 
 
 class GlobalSearchOutput(BaseModel):
     """A result of a global search query for multiple name substrings."""
 
-    counts: dict[str, int] = Field(description='Number of items found for each type.')
-    type_groups: list[GlobalSearchItemsGroup] = Field(description='List of results grouped by type.')
+    counts: dict[str, int] = Field(description='Number of items in total and for each type.')
+    groups: dict[ItemType, ItemsGroup] = Field(description='Search results.')
 
     @classmethod
     def from_api_responses(cls, response: GlobalSearchResponse) -> 'GlobalSearchOutput':
-        """Creates a GlobalSearchResult from the API responses."""
-        items_by_type = defaultdict(list)
+        """Creates a GlobalSearchOutput from the API responses."""
+        items_by_type: defaultdict[ItemType, list[GlobalSearchResponse.Item]] = defaultdict(list)
         for item in response.items:
             items_by_type[item.type].append(item)
         return cls.model_construct(
             counts=response.by_type,  # contains counts for "total", and for each found type.
-            type_groups=[
-                GlobalSearchItemsGroup.from_api_response(group_type=type, group_items=items)
-                for type, items in sorted(items_by_type.items(), key=lambda x: x[0])
-            ],
+            groups={
+                type: ItemsGroup.from_api_response(type=type, items=items) for type, items in items_by_type.items()
+            },
         )
 
 
 @tool_errors()
 @with_session_state()
-async def global_search(
+async def find_ids_by_name(
     ctx: Context,
-    name_prefixes: Annotated[list[str], Field(description='Name prefixes to look for inside entity name.')],
-    entity_types: Annotated[
-        Sequence[GlobalSearchType], Field(description='Optional list of keboola object types to search for.')
+    name_prefixes: Annotated[list[str], Field(description='Name prefixes to match against item names.')],
+    item_types: Annotated[
+        Sequence[ItemType], Field(description='Optional list of keboola item types to filter by.')
     ] = tuple(),
     limit: Annotated[
         int,
@@ -116,14 +113,17 @@ async def global_search(
             f'{MAX_GLOBAL_SEARCH_LIMIT}).'
         ),
     ] = DEFAULT_GLOBAL_SEARCH_LIMIT,
-    offset: Annotated[int, Field(description='How many matching items to skip, pagination.')] = 0,
-) -> Annotated[GlobalSearchOutput, Field(description='Search results ordered by relevance, then creation time.')]:
+    offset: Annotated[int, Field(description='Number of matching items to skip, pagination.')] = 0,
+) -> Annotated[
+    GlobalSearchOutput,
+    Field(description='Search results grouped by item type, ordered by relevance and creation time.'),
+]:
     """
-    Searches for Keboola entities by each name prefix in the production branch of the current project, potentially
-    narrowed down by entity type, limited and paginated. Results are ordered by relevance, then creation time.
+    Searches for Keboola items in the production branch of the current project whose names match the given prefixes,
+    potentially narrowed down by item type, limited and paginated. Results are ordered by relevance, then creation time.
 
     Considerations:
-    - The search is purely name-based, and an entity is returned when its name or any word in the name starts with any
+    - The search is purely name-based, and an item is returned when its name or any word in the name starts with any
       of the "name_prefixes" parameter.
     """
 
@@ -144,7 +144,7 @@ async def global_search(
     # separately.
     joined_prefixes = ' '.join(name_prefixes)
     response = await client.storage_client.global_search(
-        query=joined_prefixes, types=entity_types, limit=limit, offset=offset
+        query=joined_prefixes, types=item_types, limit=limit, offset=offset
     )
     return GlobalSearchOutput.from_api_responses(response)
 
diff --git a/tests/test_server.py b/tests/test_server.py
@@ -26,6 +26,7 @@ async def test_list_tools(self):
             'create_sql_transformation',
             'docs_query',
             'find_component_id',
+            'find_ids_by_name',
             'get_bucket',
             'get_component',
             'get_config',
@@ -36,7 +37,6 @@ async def test_list_tools(self):
             'get_project_info',
             'get_sql_dialect',
             'get_table',
-            'global_search',
             'list_buckets',
             'list_configs',
             'list_flows',
diff --git a/tests/tools/test_search.py b/tests/tools/test_search.py

Original file line number	Diff line number	Diff line change
`@@ -174,7 +174,7 @@ def _create_tables(storage_client: SyncStorageClient) -> list[TableDef]:`
`174`	`174`	`ConfigDef(`
`175`	`175`	`component_id='ex-generic-v2',`
`176`	`176`	`configuration_id=None,`
`177`		`- internal_id='config1',`
	`177`	`+ internal_id='test_config1',`
`178`	`178`	`),`
`179`	`179`	`]`
`180`	`180`