Skip to content

Commit 701da3a

Browse files
.Net: Fix OpenAiResponseAgent chat message images removed by SK (#13019)
### Motivation and Context Closes #12888 ### Description <!-- Describe your changes, the overall approach, the underlying design. These notes will help understanding how your code works. Thanks! --> ### Contribution Checklist <!-- Before submitting this PR, please make sure: --> - [ ] The code builds clean without any errors or warnings - [ ] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [ ] All unit tests pass, and I have added new tests where possible - [ ] I didn't break anyone 😄
1 parent e65a0ab commit 701da3a

File tree

4 files changed

+154
-5
lines changed

4 files changed

+154
-5
lines changed

dotnet/samples/GettingStartedWithAgents/OpenAIResponse/Step01_OpenAIResponseAgent.cs

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,4 +111,33 @@ public async Task UseOpenAIResponseAgentWithThreadedConversationStreamingAsync()
111111
agentThread = await WriteAgentStreamMessageAsync(responseItems);
112112
}
113113
}
114+
115+
[Fact]
116+
public async Task UseOpenAIResponseAgentWithImageContentAsync()
117+
{
118+
// Define the agent
119+
OpenAIResponseAgent agent = new(this.Client)
120+
{
121+
Name = "ResponseAgent",
122+
Instructions = "Provide a detailed description including the weather conditions.",
123+
};
124+
125+
ICollection<ChatMessageContent> messages =
126+
[
127+
new ChatMessageContent(
128+
AuthorRole.User,
129+
items: [
130+
new TextContent("What is in this image?"),
131+
new ImageContent(new Uri("https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"))
132+
]
133+
),
134+
];
135+
136+
// Invoke the agent and output the response
137+
var responseItems = agent.InvokeAsync(messages);
138+
await foreach (ChatMessageContent responseItem in responseItems)
139+
{
140+
WriteAgentChatMessage(responseItem);
141+
}
142+
}
114143
}

dotnet/src/Agents/OpenAI/Extensions/ChatContentMessageExtensions.cs

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -43,13 +43,14 @@ public static IEnumerable<ThreadInitializationMessage> ToThreadInitializationMes
4343
/// <returns>A <see cref="ResponseItem"/> instance.</returns>
4444
public static ResponseItem ToResponseItem(this ChatMessageContent message)
4545
{
46-
string content = message.Content ?? string.Empty;
46+
var items = message.Items;
47+
IEnumerable<ResponseContentPart> contentParts = items.Select(item => item.ToResponseContentPart());
4748
return message.Role.Label.ToUpperInvariant() switch
4849
{
49-
"SYSTEM" => ResponseItem.CreateSystemMessageItem(content),
50-
"USER" => ResponseItem.CreateUserMessageItem(content),
51-
"DEVELOPER" => ResponseItem.CreateDeveloperMessageItem(content),
52-
"ASSISTANT" => ResponseItem.CreateAssistantMessageItem(content),
50+
"SYSTEM" => ResponseItem.CreateSystemMessageItem(contentParts),
51+
"USER" => ResponseItem.CreateUserMessageItem(contentParts),
52+
"DEVELOPER" => ResponseItem.CreateDeveloperMessageItem(contentParts),
53+
"ASSISTANT" => ResponseItem.CreateAssistantMessageItem(contentParts),
5354
_ => throw new NotSupportedException($"Unsupported role {message.Role.Label}. Only system, user, developer or assistant roles are allowed."),
5455
};
5556
}
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
// Copyright (c) Microsoft. All rights reserved.
2+
3+
using System;
4+
using OpenAI.Responses;
5+
6+
namespace Microsoft.SemanticKernel.Agents.OpenAI;
7+
8+
/// <summary>
9+
/// Extensons methods for <see cref="KernelContent"/>.
10+
/// </summary>
11+
internal static class KernelContentExtensions
12+
{
13+
internal static ResponseContentPart ToResponseContentPart(this KernelContent content)
14+
{
15+
return content switch
16+
{
17+
TextContent textContent => textContent.ToResponseContentPart(),
18+
ImageContent imageContent => imageContent.ToResponseContentPart(),
19+
BinaryContent binaryContent => binaryContent.ToResponseContentPart(),
20+
FileReferenceContent fileReferenceContent => fileReferenceContent.ToResponseContentPart(),
21+
_ => throw new NotSupportedException($"Unsupported content type {content.GetType().Name}. Cannot convert to {nameof(ResponseContentPart)}.")
22+
};
23+
}
24+
25+
internal static ResponseContentPart ToResponseContentPart(this TextContent content)
26+
{
27+
return ResponseContentPart.CreateInputTextPart(content.Text);
28+
}
29+
30+
internal static ResponseContentPart ToResponseContentPart(this ImageContent content)
31+
{
32+
return content.Uri is not null
33+
? ResponseContentPart.CreateInputImagePart(content.Uri)
34+
: content.Data is not null
35+
? ResponseContentPart.CreateInputImagePart(new BinaryData(content.Data), content.MimeType)
36+
: throw new NotSupportedException("ImageContent cannot be converted to ResponseContentPart. Only ImageContent with a uri or binary data is supported.");
37+
}
38+
39+
internal static ResponseContentPart ToResponseContentPart(this BinaryContent content)
40+
{
41+
return content.Data is not null
42+
? ResponseContentPart.CreateInputFilePart(new BinaryData(content.Data), content.MimeType, Guid.NewGuid().ToString())
43+
: throw new NotSupportedException("AudioContent cannot be converted to ResponseContentPart. Only AudioContent with binary data is supported.");
44+
}
45+
46+
internal static ResponseContentPart ToResponseContentPart(this FileReferenceContent content)
47+
{
48+
return content.FileId is not null
49+
? ResponseContentPart.CreateInputFilePart(content.FileId)
50+
: throw new NotSupportedException("FileReferenceContent cannot be converted to ResponseContentPart. Only FileReferenceContent with a file id is supported.");
51+
}
52+
}
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
// Copyright (c) Microsoft. All rights reserved.
2+
3+
using System;
4+
using System.Linq;
5+
using Microsoft.SemanticKernel;
6+
using Microsoft.SemanticKernel.Agents.OpenAI;
7+
using Microsoft.SemanticKernel.ChatCompletion;
8+
using OpenAI.Responses;
9+
using Xunit;
10+
11+
namespace SemanticKernel.Agents.UnitTests.OpenAI.Extensions;
12+
13+
/// <summary>
14+
/// Unit tests for ChatContentMessageExtensions
15+
/// </summary>
16+
public class ChatContentMessageExtensionsTests
17+
{
18+
[Theory]
19+
[InlineData("User")]
20+
[InlineData("Assistant")]
21+
[InlineData("System")]
22+
public void VerifyToResponseItemWithUserChatMessageContent(string roleLabel)
23+
{
24+
// Arrange
25+
var role = new AuthorRole(roleLabel);
26+
var content = new ChatMessageContent(
27+
role,
28+
items: [
29+
new TextContent("What is in this image?"),
30+
new ImageContent(new Uri("https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg")),
31+
new BinaryContent(new ReadOnlyMemory<byte>([0x52, 0x49, 0x46, 0x46, 0x24, 0x08, 0x00, 0x00, 0x57, 0x41, 0x56, 0x45]), "audio/wav"),
32+
new FileReferenceContent("file-abc123")
33+
]
34+
);
35+
36+
// Act
37+
var responseItem = content.ToResponseItem();
38+
39+
// Assert
40+
Assert.NotNull(responseItem);
41+
Assert.IsType<MessageResponseItem>(responseItem, exactMatch: false);
42+
var messageResponseItem = responseItem as MessageResponseItem;
43+
Assert.NotNull(messageResponseItem);
44+
Assert.Equal(role.Label.ToUpperInvariant(), messageResponseItem.Role.ToString().ToUpperInvariant());
45+
Assert.Equal(4, messageResponseItem.Content.Count);
46+
47+
// Validate TextContent conversion - should create InputText part
48+
var textContent = messageResponseItem.Content.FirstOrDefault(p => p.Kind == ResponseContentPartKind.InputText);
49+
Assert.NotNull(textContent);
50+
//Assert.IsType<>(textContent);
51+
Assert.Equal("What is in this image?", textContent.Text);
52+
53+
// Validate ImageContent conversion - should create InputImage part
54+
var imageContent = messageResponseItem.Content.FirstOrDefault(p => p.Kind == ResponseContentPartKind.InputImage);
55+
Assert.NotNull(imageContent);
56+
57+
// Validate BinaryContent conversion - should create InputFile part
58+
var binaryContent = messageResponseItem.Content.FirstOrDefault(p => p.Kind == ResponseContentPartKind.InputFile && p.InputFileBytes is not null);
59+
Assert.NotNull(binaryContent);
60+
Assert.Equal("audio/wav", binaryContent.InputFileBytesMediaType);
61+
62+
// Validate FileReferenceContent conversion - should create InputImage part
63+
var fileContent = messageResponseItem.Content.FirstOrDefault(p => p.Kind == ResponseContentPartKind.InputFile && p.InputFileId is not null);
64+
Assert.NotNull(fileContent);
65+
Assert.Equal("file-abc123", fileContent.InputFileId);
66+
}
67+
}

0 commit comments

Comments
 (0)