Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion python/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ usearch = [
"pyarrow >= 12.0,< 22.0"
]
weaviate = [
"weaviate-client>=4.10,<5.0",
"weaviate-client>=4.10,<5.0,!=4.16.7",
]

[tool.uv]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,8 @@ async def invoke(

if store_enabled:
thread.response_id = response.id
# Chain subsequent requests to this response so tool outputs are associated correctly
previous_response_id = response.id

if response.status in cls.error_message_states:
error_message = ""
Expand Down Expand Up @@ -216,7 +218,10 @@ async def invoke(

response_message = cls._create_response_message_content(response, agent.ai_model_id, agent.name) # type: ignore
yield False, response_message
# Update both histories so subsequent requests include tool call context
chat_history.add_message(message=response_message)
if override_history is not chat_history:
override_history.add_message(message=response_message)

logger.info(f"processing {fc_count} tool calls in parallel.")

Expand All @@ -227,7 +232,7 @@ async def invoke(
*[
kernel.invoke_function_call(
function_call=function_call,
chat_history=chat_history,
chat_history=override_history,
arguments=kwargs.get("arguments"),
execution_settings=None,
function_call_count=fc_count,
Expand All @@ -239,7 +244,7 @@ async def invoke(
)

terminate_flag = any(result.terminate for result in results if result is not None)
for msg in merge_function_results(chat_history.messages[-len(results) :]):
for msg in merge_function_results(override_history.messages[-len(results) :]):
# Terminate flag should only be true when the filter's terminate is true
yield terminate_flag, msg
else:
Expand Down Expand Up @@ -376,6 +381,8 @@ async def invoke_stream(
logger.debug(f"Agent response created with ID: {event.response.id}")
if store_enabled:
thread.response_id = event.response.id
# Ensure subsequent requests link to this response context
previous_response_id = event.response.id
case ResponseOutputItemAddedEvent():
function_calls = cls._get_tool_calls_from_output([event.item]) # type: ignore
if function_calls:
Expand Down Expand Up @@ -435,6 +442,8 @@ async def invoke_stream(
output_messages.append(full_completion)
function_calls = [item for item in full_completion.items if isinstance(item, FunctionCallContent)]
chat_history.add_message(message=full_completion)
if override_history is not chat_history:
override_history.add_message(message=full_completion)

fc_count = len(function_calls)
logger.info(f"processing {fc_count} tool calls in parallel.")
Expand All @@ -446,7 +455,7 @@ async def invoke_stream(
*[
kernel.invoke_function_call(
function_call=function_call,
chat_history=chat_history,
chat_history=override_history,
arguments=kwargs.get("arguments"),
is_streaming=True,
execution_settings=None,
Expand All @@ -462,7 +471,7 @@ async def invoke_stream(
# Include the ai_model_id so we can later add two streaming messages together
# Some settings may not have an ai_model_id, so we need to check for it
function_result_messages = cls._merge_streaming_function_results(
messages=chat_history.messages[-len(results) :], # type: ignore
messages=override_history.messages[-len(results) :], # type: ignore
name=agent.name,
ai_model_id=agent.ai_model_id, # type: ignore
function_invoke_attempt=request_index,
Expand Down Expand Up @@ -493,7 +502,9 @@ async def _get_response(
) -> Response | AsyncStream[ResponseStreamEvent]:
try:
response: Response = await agent.client.responses.create(
input=cls._prepare_chat_history_for_request(chat_history),
input=cls._prepare_chat_history_for_request(
chat_history, store_output_enabled if store_output_enabled is not None else agent.store_enabled
),
instructions=merged_instructions or agent.instructions,
previous_response_id=previous_response_id,
store=store_output_enabled,
Expand Down Expand Up @@ -650,6 +661,7 @@ def _merge_streaming_function_results(
def _prepare_chat_history_for_request(
cls: type[_T],
chat_history: "ChatHistory",
store_enabled: bool,
) -> Any:
"""Prepare the chat history for a request.

Expand Down Expand Up @@ -706,14 +718,14 @@ def _prepare_chat_history_for_request(

contents.append({"type": "input_image", "image_url": image_url})
case FunctionCallContent():
fc_dict = {
"type": "function_call",
"id": content.id,
"call_id": content.call_id,
"name": content.name,
"arguments": content.arguments,
}
response_inputs.append(fc_dict)
if not store_enabled:
fc_dict = {
"type": "function_call",
"call_id": content.call_id,
"name": content.name,
"arguments": content.arguments,
}
response_inputs.append(fc_dict)
case FunctionResultContent():
rfrc_dict = {
"type": "function_call_output",
Expand Down Expand Up @@ -864,7 +876,7 @@ def _create_output_item_done(
metadata=metadata,
role=AuthorRole(role_str),
items=items,
status=Status(response.status) if hasattr(response, "status") else None,
status=Status(response.status) if getattr(response, "status", None) is not None else None, # type: ignore
)

@classmethod
Expand Down
2 changes: 1 addition & 1 deletion python/semantic_kernel/data/vector.py
Original file line number Diff line number Diff line change
Expand Up @@ -1457,7 +1457,7 @@ async def get(
get_args["top"] = kwargs.pop("top", None)
if "skip" in kwargs:
get_args["skip"] = kwargs.pop("skip", None)
order_by: dict[str, bool] | None = None
order_by: dict[str, bool] | None = None # type: ignore
if kw_order_by is not None:
order_by = {}
if isinstance(kw_order_by, str):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -405,7 +405,7 @@ def test_prepare_chat_history_multiple_images_no_duplication():
chat_history.add_message(message)

# Call the method that was causing duplication
result = ResponsesAgentThreadActions._prepare_chat_history_for_request(chat_history)
result = ResponsesAgentThreadActions._prepare_chat_history_for_request(chat_history, True)

# Verify we have exactly one message in the result
assert len(result) == 1, f"Expected 1 message, got {len(result)}"
Expand Down
Loading
Loading