Skip to content

Commit de44b71

Browse files
committed
Make list of MediaContent, add example, update docs
1 parent 6375ce0 commit de44b71

File tree

17 files changed

+406
-247
lines changed

17 files changed

+406
-247
lines changed

agents/agents-features/agents-features-trace/src/jvmTest/kotlin/ai/koog/agents/features/tracing/writer/TraceTestAPI.kt

Lines changed: 51 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,21 +4,68 @@ import ai.koog.agents.core.tools.ToolRegistry
44
import ai.koog.agents.core.agent.AIAgent
55
import ai.koog.agents.core.agent.config.AIAgentConfig
66
import ai.koog.agents.core.agent.entity.AIAgentStrategy
7+
import ai.koog.prompt.dsl.AttachmentBuilder
78
import ai.koog.prompt.dsl.prompt
89
import ai.koog.prompt.executor.clients.openai.OpenAIModels
10+
import ai.koog.prompt.message.MediaContent
911
import ai.koog.prompt.message.Message
1012
import ai.koog.prompt.message.RequestMetaInfo
1113
import ai.koog.prompt.message.ResponseMetaInfo
1214
import kotlinx.datetime.Clock
1315

14-
val testClock: Clock = object : Clock {
16+
internal val testClock: Clock = object : Clock {
1517
override fun now(): kotlinx.datetime.Instant = kotlinx.datetime.Instant.parse("2023-01-01T00:00:00Z")
1618
}
1719

18-
fun userMessage(content: String): Message.User = Message.User(content, metaInfo = RequestMetaInfo.create(testClock))
19-
fun assistantMessage(content: String): Message.Assistant = Message.Assistant(content, metaInfo = ResponseMetaInfo.create(testClock))
20-
fun systemMessage(content: String): Message.System = Message.System(content, metaInfo = RequestMetaInfo.create(testClock))
20+
/**
21+
* Creates a user message with optional media attachments.
22+
*
23+
* The method constructs a user message using the provided text content and any additional
24+
* media content defined via the `attachmentsBlock`. This allows the user to include
25+
* various types of media attachments, such as images, audio files, or documents, alongside
26+
* the message content.
27+
*
28+
* @param content The text content of the user message.
29+
* @param attachmentsBlock A lambda function used to configure the media attachments
30+
* for the message, using the `AttachmentBuilder` DSL.
31+
* @return A `Message.User` object containing the message content, metadata,
32+
* and any associated media attachments.
33+
*/
34+
fun userMessage(content: String, attachmentsBlock: AttachmentBuilder.() -> Unit): Message.User = Message.User(
35+
content,
36+
metaInfo = RequestMetaInfo.create(testClock),
37+
mediaContent = AttachmentBuilder().apply(attachmentsBlock).build()
38+
)
2139

40+
/**
41+
* Creates an instance of [Message.Assistant] with the provided content and generated metadata.
42+
*
43+
* @param content The textual content of the assistant's message.
44+
* @return A new instance of [Message.Assistant] containing the given content and metadata generated using the test clock.
45+
*/
46+
fun assistantMessage(content: String): Message.Assistant =
47+
Message.Assistant(content, metaInfo = ResponseMetaInfo.create(testClock))
48+
49+
/**
50+
* Creates a system-generated message encapsulated in a [Message.System] instance.
51+
*
52+
* @param content The textual content of the system message.
53+
* @return A [Message.System] object containing the provided content and autogenerated metadata.
54+
*/
55+
fun systemMessage(content: String): Message.System =
56+
Message.System(content, metaInfo = RequestMetaInfo.create(testClock))
57+
58+
/**
59+
* Creates an AI agent with the specified configuration, strategy, and optional prompts.
60+
*
61+
* @param strategy The strategy used to define the workflow and execution pattern for the AI agent.
62+
* @param promptId The identifier for the prompt configuration. If null, a default prompt ID will be used.
63+
* @param systemPrompt Optional system-level message to include in the prompt. If null, a default message will be used.
64+
* @param userPrompt Optional user-level message to include in the prompt. If null, a default message will be used.
65+
* @param assistantPrompt Optional assistant response to include in the prompt. If null, a default response will be used.
66+
* @param installFeatures Lambda function allowing additional features to be installed on the agent.
67+
* @return A configured instance of the AIAgent class ready for execution.
68+
*/
2269
fun createAgent(
2370
strategy: AIAgentStrategy,
2471
promptId: String? = null,

examples/build.gradle.kts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ registerRunExampleTask("runExampleMarkdownStreamingWithTool", "ai.koog.agents.ex
7171
registerRunExampleTask("runExampleRiderProjectTemplate", "ai.koog.agents.example.rider.project.template.RiderProjectTemplateKt")
7272
registerRunExampleTask("runExampleExecSandbox", "ai.koog.agents.example.execsandbox.ExecSandboxKt")
7373
registerRunExampleTask("runExampleLoopComponent", "ai.koog.agents.example.components.loop.ProjectGeneratorKt")
74+
registerRunExampleTask("runExampleInstagramPostDescriber", "ai.koog.agents.example.media.InstagramPostDescriberKt")
7475

7576
dokka {
7677
dokkaSourceSets.named("main") {

examples/src/main/kotlin/ai/koog/agents/example/ApiKeyService.kt

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,11 @@ package ai.koog.agents.example
22

33
internal object ApiKeyService {
44
val openAIApiKey: String
5-
get() = System.getenv("OPEN_AI_TOKEN") ?: throw IllegalArgumentException("OPENAI_API_KEY env is not set")
5+
get() = System.getenv("OPENAI_API_KEY") ?: throw IllegalArgumentException("OPENAI_API_KEY env is not set")
66

77
val anthropicApiKey: String
88
get() = System.getenv("ANTHROPIC_API_KEY") ?: throw IllegalArgumentException("ANTHROPIC_API_KEY env is not set")
9+
10+
val googleApiKey: String
11+
get() = System.getenv("GOOGLE_API_KEY") ?: throw IllegalArgumentException("GOOGLE_API_KEY env is not set")
912
}
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
package ai.koog.agents.example.media
2+
3+
import ai.koog.agents.example.ApiKeyService
4+
import ai.koog.prompt.dsl.prompt
5+
import ai.koog.prompt.executor.clients.anthropic.AnthropicModels
6+
import ai.koog.prompt.executor.clients.google.GoogleModels
7+
import ai.koog.prompt.executor.model.PromptExecutorExt.execute
8+
import ai.koog.prompt.executor.clients.openai.OpenAIModels
9+
import ai.koog.prompt.executor.llms.all.simpleAnthropicExecutor
10+
import ai.koog.prompt.executor.llms.all.simpleGoogleAIExecutor
11+
import ai.koog.prompt.executor.llms.all.simpleOpenAIExecutor
12+
import ai.koog.prompt.markdown.markdown
13+
import kotlinx.coroutines.runBlocking
14+
15+
fun main() {
16+
val openaiExecutor = simpleOpenAIExecutor(ApiKeyService.openAIApiKey)
17+
// val anthropicExecutor = simpleAnthropicExecutor(ApiKeyService.anthropicApiKey)
18+
// val googleExecutor = simpleGoogleAIExecutor(ApiKeyService.googleApiKey)
19+
20+
val resourcePath =
21+
object {}.javaClass.classLoader.getResource("images")?.path ?: error("images directory not found")
22+
23+
val prompt = prompt("example-prompt") {
24+
system("You are professional assistant that can write cool and funny descriptions for Instagram posts.")
25+
26+
user {
27+
markdown {
28+
+"I want to create a new post on Instagram."
29+
br()
30+
+"Can you write something creative under my instagram post with the following photos?"
31+
br()
32+
h2("Requirements")
33+
bulleted {
34+
item("It must be very funny and creative")
35+
item("It must increase my chance of becoming an ultra-famous blogger!!!!")
36+
item("It not contain explicit content, harassment or bullying")
37+
item("It must be a short catching phrase")
38+
item("You must include relevant hashtags that would increase the visibility of my post")
39+
}
40+
}
41+
42+
attachments {
43+
image("$resourcePath/photo1.png")
44+
image("$resourcePath/photo2.png")
45+
}
46+
}
47+
}
48+
49+
runBlocking {
50+
openaiExecutor.execute(prompt, OpenAIModels.Chat.GPT4_1).content.also(::println)
51+
// anthropicExecutor.execute(prompt, AnthropicModels.Sonnet_4).content.also(::println)
52+
// googleExecutor.execute(prompt, GoogleModels.Gemini2_0Flash).content.also(::println)
53+
}
54+
}
529 KB
Loading
231 KB
Loading

prompt/prompt-executor/prompt-executor-clients/Module.md

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,9 @@ val response = client.execute(
104104
prompt = prompt {
105105
user {
106106
text("What do you see in this image?")
107-
image("/path/to/image.jpg")
107+
attachments {
108+
image("/path/to/image.jpg")
109+
}
108110
}
109111
},
110112
model = visionModel
@@ -115,7 +117,9 @@ val response = client.execute(
115117
prompt = prompt {
116118
user {
117119
text("Summarize this document")
118-
document("/path/to/document.pdf")
120+
attachments {
121+
document("/path/to/document.pdf")
122+
}
119123
}
120124
},
121125
model = documentModel
@@ -127,7 +131,9 @@ val response = client.execute(
127131
prompt = prompt {
128132
user {
129133
text("Transcribe this audio")
130-
audio(audioData, "mp3")
134+
attachments {
135+
audio(audioData, "mp3")
136+
}
131137
}
132138
},
133139
model = audioModel
@@ -138,8 +144,10 @@ val response = client.execute(
138144
prompt = prompt {
139145
user {
140146
text("Compare the image with the document content:")
141-
image("/path/to/screenshot.png")
142-
document("/path/to/report.pdf")
147+
attachments {
148+
image("/path/to/screenshot.png")
149+
document("/path/to/report.pdf")
150+
}
143151
text("What are the key differences?")
144152
}
145153
},

prompt/prompt-executor/prompt-executor-clients/prompt-executor-anthropic-client/src/commonMain/kotlin/ai/koog/prompt/executor/clients/anthropic/AnthropicLLMClient.kt

Lines changed: 42 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -276,58 +276,58 @@ public open class AnthropicLLMClient(
276276
)
277277
}
278278

279-
private fun Message.User.toAnthropicUserMessage(model: LLModel): AnthropicMessage =
280-
when (val media = mediaContent) {
281-
null -> AnthropicMessage(role = "user", content = listOf(AnthropicContent.Text(content)))
282-
is MediaContent.Image -> {
283-
require(model.capabilities.contains(LLMCapability.Vision.Image)) {
284-
"Model ${model.id} does not support image"
285-
}
286-
val listOfContent = buildList {
287-
if (content.isNotEmpty()) {
288-
add(AnthropicContent.Text(content))
289-
}
290-
if (media.isUrl()) {
291-
add(AnthropicContent.Image(ImageSource.Url(media.source)))
292-
} else {
293-
require(media.format in listOf("png", "jpg", "jpeg", "webp", "gif")) {
294-
"Image format ${media.format} not supported"
279+
private fun Message.User.toAnthropicUserMessage(model: LLModel): AnthropicMessage {
280+
val listOfContent = buildList {
281+
if (content.isNotEmpty() || mediaContent.isEmpty()) {
282+
add(AnthropicContent.Text(content))
283+
}
284+
285+
mediaContent.forEach { media ->
286+
when (media) {
287+
is MediaContent.Image -> {
288+
require(model.capabilities.contains(LLMCapability.Vision.Image)) {
289+
"Model ${model.id} does not support image"
295290
}
296-
add(
297-
AnthropicContent.Image(
298-
ImageSource.Base64(
299-
data = media.toBase64(),
300-
mediaType = media.getMimeType()
291+
292+
if (media.isUrl()) {
293+
add(AnthropicContent.Image(ImageSource.Url(media.source)))
294+
} else {
295+
require(media.format in listOf("png", "jpg", "jpeg", "webp", "gif")) {
296+
"Image format ${media.format} not supported"
297+
}
298+
add(
299+
AnthropicContent.Image(
300+
ImageSource.Base64(
301+
data = media.toBase64(),
302+
mediaType = media.getMimeType()
303+
)
301304
)
302305
)
303-
)
306+
}
304307
}
305-
}
306-
AnthropicMessage(role = "user", content = listOfContent)
307-
}
308308

309-
is MediaContent.File -> {
310-
require(model.capabilities.contains(LLMCapability.Vision.Image)) {
311-
"Model ${model.id} does not support files"
312-
}
313-
val listOfContent = buildList {
314-
if (content.isNotEmpty()) {
315-
add(AnthropicContent.Text(content))
316-
}
317-
val docSource = when {
318-
media.isUrl() -> DocumentSource.PDFUrl(media.source)
319-
media.format == "pdf" -> DocumentSource.PDFBase64(media.toBase64())
320-
media.format == "txt" || media.format == "md" -> DocumentSource.PlainText(media.readText())
321-
else -> throw IllegalArgumentException("File format ${media.format} not supported. Supported formats: `pdf`, `text`")
309+
is MediaContent.File -> {
310+
require(model.capabilities.contains(LLMCapability.Vision.Image)) {
311+
"Model ${model.id} does not support files"
312+
}
313+
314+
val docSource = when {
315+
media.isUrl() -> DocumentSource.PDFUrl(media.source)
316+
media.format == "pdf" -> DocumentSource.PDFBase64(media.toBase64())
317+
media.format == "txt" || media.format == "md" -> DocumentSource.PlainText(media.readText())
318+
else -> throw IllegalArgumentException("File format ${media.format} not supported. Supported formats: `pdf`, `text`")
319+
}
320+
add(AnthropicContent.Document(docSource))
322321
}
323-
add(AnthropicContent.Document(docSource))
322+
323+
else -> throw IllegalArgumentException("Media content not supported: $media")
324324
}
325-
AnthropicMessage(role = "user", content = listOfContent)
326325
}
327-
328-
else -> throw IllegalArgumentException("Media content not supported: $media")
329326
}
330327

328+
return AnthropicMessage(role = "user", content = listOfContent)
329+
}
330+
331331
private fun processAnthropicResponse(response: AnthropicResponse): List<Message.Response> {
332332
// Extract token count from the response
333333
val inputTokensCount = response.usage?.inputTokens

0 commit comments

Comments
 (0)