[prompt] Fix LLM clients after #195, make LLM request construction again more explicit in LLM clients (#229)

EugeneTheDev · web-flow · commit 6f0532fec763 · 2025-06-05T17:23:40.000+02:00
diff --git a/examples/src/main/kotlin/ai/koog/agents/example/media/InstagramPostDescriber.kt b/examples/src/main/kotlin/ai/koog/agents/example/media/InstagramPostDescriber.kt
@@ -47,8 +47,11 @@ fun main() {
     }
 
     runBlocking {
+        println("OpenAI response:")
         openaiExecutor.execute(prompt, OpenAIModels.Chat.GPT4_1).content.also(::println)
+//        println("Anthropic response:")
 //        anthropicExecutor.execute(prompt, AnthropicModels.Sonnet_4).content.also(::println)
+//        println("Google response:")
 //        googleExecutor.execute(prompt, GoogleModels.Gemini2_0Flash).content.also(::println)
     }
 }
diff --git a/prompt/prompt-executor/prompt-executor-clients/prompt-executor-anthropic-client/src/commonMain/kotlin/ai/koog/prompt/executor/clients/anthropic/AnthropicLLMClient.kt b/prompt/prompt-executor/prompt-executor-clients/prompt-executor-anthropic-client/src/commonMain/kotlin/ai/koog/prompt/executor/clients/anthropic/AnthropicLLMClient.kt
@@ -190,13 +190,21 @@ public open class AnthropicLLMClient(
         model: LLModel,
         stream: Boolean
     ): AnthropicMessageRequest {
-        val (systemMessages, convMessages) = prompt.messages.partition { it is Message.System }
+        val systemMessage = mutableListOf<SystemAnthropicMessage>()
+        val messages = mutableListOf<AnthropicMessage>()
 
-        val messages = convMessages.fold(mutableListOf<AnthropicMessage>()) { acc, message ->
+        for (message in prompt.messages) {
             when (message) {
-                is Message.User -> acc.add(message.toAnthropicUserMessage(model))
+                is Message.System -> {
+                    systemMessage.add(SystemAnthropicMessage(message.content))
+                }
+
+                is Message.User -> {
+                    messages.add(message.toAnthropicUserMessage(model))
+                }
+
                 is Message.Assistant -> {
-                    acc.add(
+                    messages.add(
                         AnthropicMessage(
                             role = "assistant",
                             content = listOf(AnthropicContent.Text(message.content))
@@ -205,31 +213,35 @@ public open class AnthropicLLMClient(
                 }
 
                 is Message.Tool.Result -> {
-                    val toolResult = AnthropicContent.ToolResult(
-                        toolUseId = message.id.orEmpty(),
-                        content = message.content
+                    messages.add(
+                        AnthropicMessage(
+                            role = "user",
+                            content = listOf(
+                                AnthropicContent.ToolResult(
+                                    toolUseId = message.id ?: "",
+                                    content = message.content
+                                )
+                            )
+                        )
                     )
-                    acc.lastOrNull { it.role == "user" }?.let { lastUserMessage ->
-                        acc[acc.lastIndex] = lastUserMessage.copy(content = lastUserMessage.content + toolResult)
-                    } ?: acc.add(AnthropicMessage(role = "user", content = listOf(toolResult)))
                 }
 
                 is Message.Tool.Call -> {
-                    val toolUse = AnthropicContent.ToolUse(
-                        id = message.id ?: Uuid.random().toString(),
-                        name = message.tool,
-                        input = Json.parseToJsonElement(message.content).jsonObject
+                    // Create a new assistant message with the tool call
+                    messages.add(
+                        AnthropicMessage(
+                            role = "assistant",
+                            content = listOf(
+                                AnthropicContent.ToolUse(
+                                    id = message.id ?: Uuid.random().toString(),
+                                    name = message.tool,
+                                    input = Json.parseToJsonElement(message.content).jsonObject
+                                )
+                            )
+                        )
                     )
-                    acc.lastOrNull { it.role == "assistant" }?.let { lastAssistantMessage ->
-                        acc[acc.lastIndex] = lastAssistantMessage.copy(content = lastAssistantMessage.content + toolUse)
-                    } ?: acc.add(AnthropicMessage(role = "assistant", content = listOf(toolUse)))
-                }
-
-                is Message.System -> {
-                    logger.warn { "System messages already prepares for Anthropic. Ignoring: ${message.content}" }
                 }
             }
-            acc
         }
 
         val anthropicTools = tools.map { tool ->
@@ -269,7 +281,7 @@ public open class AnthropicLLMClient(
             maxTokens = 2048, // This is required by the API
             // TODO why 0.7 and not 0.0?
             temperature = prompt.params.temperature ?: 0.7, // Default temperature if not provided
-            system = systemMessages.map { SystemAnthropicMessage(it.content) },
+            system = systemMessage,
             tools = if (tools.isNotEmpty()) anthropicTools else emptyList(), // Always provide a list for tools
             stream = stream,
             toolChoice = toolChoice,
diff --git a/prompt/prompt-executor/prompt-executor-clients/prompt-executor-google-client/src/commonMain/kotlin/ai/koog/prompt/executor/clients/google/GoogleLLMClient.kt b/prompt/prompt-executor/prompt-executor-clients/prompt-executor-google-client/src/commonMain/kotlin/ai/koog/prompt/executor/clients/google/GoogleLLMClient.kt
@@ -190,7 +190,7 @@ public open class GoogleLLMClient(
      * @return A formatted GoogleAI request
      */
     private fun createGoogleRequest(prompt: Prompt, model: LLModel, tools: List<ToolDescriptor>): GoogleRequest {
-        val (systemMessages, convMessages) = prompt.messages.partition { it is Message.System }
+        val systemMessageParts = mutableListOf<GooglePart.Text>()
         val contents = mutableListOf<GoogleContent>()
         val pendingCalls = mutableListOf<GooglePart.FunctionCall>()
 
@@ -201,18 +201,55 @@ public open class GoogleLLMClient(
             }
         }
 
-        convMessages.forEach { message ->
-            if (message is Message.Tool.Call) {
-                pendingCalls += GooglePart.FunctionCall(
-                    functionCall = GoogleData.FunctionCall(
-                        id = message.id,
-                        name = message.tool,
-                        args = json.decodeFromString(message.content)
+        for (message in prompt.messages) {
+            when (message) {
+                is Message.System -> {
+                    systemMessageParts.add(GooglePart.Text(message.content))
+                }
+
+                is Message.User -> {
+                    flushCalls()
+                    // User messages become 'user' role content
+                    contents.add(message.toGoogleContent(model))
+                }
+
+                is Message.Assistant -> {
+                    flushCalls()
+                    contents.add(
+                        GoogleContent(
+                            role = "model",
+                            parts = listOf(GooglePart.Text(message.content))
+                        )
                     )
-                )
-            } else {
-                flushCalls()
-                contents += message.toGoogleContent(model) ?: return@forEach
+                }
+
+                is Message.Tool.Result -> {
+                    flushCalls()
+                    contents.add(
+                        GoogleContent(
+                            role = "user",
+                            parts = listOf(
+                                GooglePart.FunctionResponse(
+                                    functionResponse = GoogleData.FunctionResponse(
+                                        id = message.id,
+                                        name = message.tool,
+                                        response = buildJsonObject { put("result", message.content) }
+                                    )
+                                )
+                            )
+                        )
+                    )
+                }
+
+                is Message.Tool.Call -> {
+                    pendingCalls += GooglePart.FunctionCall(
+                        functionCall = GoogleData.FunctionCall(
+                            id = message.id,
+                            name = message.tool,
+                            args = json.decodeFromString(message.content)
+                        )
+                    )
+                }
             }
         }
         flushCalls()
@@ -236,9 +273,9 @@ public open class GoogleLLMClient(
             .takeIf { it.isNotEmpty() }
             ?.let { declarations -> listOf(GoogleTool(functionDeclarations = declarations)) }
 
-        val googleSystemInstruction = systemMessages
+        val googleSystemInstruction = systemMessageParts
             .takeIf { it.isNotEmpty() }
-            ?.let { GoogleContent(parts = it.map { message -> GooglePart.Text(message.content) }) }
+            ?.let { GoogleContent(parts = it) }
 
         val generationConfig = GoogleGenerationConfig(
             temperature = if (model.capabilities.contains(LLMCapability.Temperature)) prompt.params.temperature else null,
@@ -269,84 +306,69 @@ public open class GoogleLLMClient(
         )
     }
 
-    private fun Message.toGoogleContent(model: LLModel): GoogleContent? = when (this) {
-        is Message.User -> {
-            val contentParts = buildList {
-                if (content.isNotEmpty() || mediaContent.isEmpty()) {
-                    add(GooglePart.Text(content))
-                }
-                mediaContent.forEach { media ->
-                    when (media) {
-                        is MediaContent.Image -> {
-                            require(model.capabilities.contains(LLMCapability.Vision.Image)) {
-                                "Model ${model.id} does not support image"
-                            }
-                            if (media.isUrl()) {
-                                throw IllegalArgumentException("URL images not supported for Gemini models")
-                            }
-                            require(media.format in listOf("png", "jpg", "jpeg", "webp", "heic", "heif")) {
-                                "Image format ${media.format} not supported"
-                            }
-                            add(
-                                GooglePart.InlineData(
-                                    GoogleData.Blob(
-                                        mimeType = media.getMimeType(),
-                                        data = media.toBase64()
-                                    )
+    private fun Message.User.toGoogleContent(model: LLModel): GoogleContent {
+        val contentParts = buildList {
+            if (content.isNotEmpty() || mediaContent.isEmpty()) {
+                add(GooglePart.Text(content))
+            }
+            mediaContent.forEach { media ->
+                when (media) {
+                    is MediaContent.Image -> {
+                        require(model.capabilities.contains(LLMCapability.Vision.Image)) {
+                            "Model ${model.id} does not support image"
+                        }
+                        if (media.isUrl()) {
+                            throw IllegalArgumentException("URL images not supported for Gemini models")
+                        }
+                        require(media.format in listOf("png", "jpg", "jpeg", "webp", "heic", "heif")) {
+                            "Image format ${media.format} not supported"
+                        }
+                        add(
+                            GooglePart.InlineData(
+                                GoogleData.Blob(
+                                    mimeType = media.getMimeType(),
+                                    data = media.toBase64()
                                 )
                             )
+                        )
 
-                        }
+                    }
 
-                        is MediaContent.Audio -> {
-                            require(model.capabilities.contains(LLMCapability.Audio)) {
-                                "Model ${model.id} does not support audio"
-                            }
-                            require(media.format in listOf("wav", "mp3", "aiff", "aac", "ogg", "flac")) {
-                                "Audio format ${media.format} not supported"
-                            }
-                            add(GooglePart.InlineData(GoogleData.Blob(media.getMimeType(), media.toBase64())))
+                    is MediaContent.Audio -> {
+                        require(model.capabilities.contains(LLMCapability.Audio)) {
+                            "Model ${model.id} does not support audio"
                         }
+                        require(media.format in listOf("wav", "mp3", "aiff", "aac", "ogg", "flac")) {
+                            "Audio format ${media.format} not supported"
+                        }
+                        add(GooglePart.InlineData(GoogleData.Blob(media.getMimeType(), media.toBase64())))
+                    }
 
-                        is MediaContent.File -> {
-                            if (media.isUrl()) {
-                                throw IllegalArgumentException("URL files not supported for Gemini models")
-                            }
-                            add(
-                                GooglePart.InlineData(
-                                    GoogleData.Blob(
-                                        mimeType = media.getMimeType(),
-                                        data = media.toBase64()
-                                    )
+                    is MediaContent.File -> {
+                        if (media.isUrl()) {
+                            throw IllegalArgumentException("URL files not supported for Gemini models")
+                        }
+                        add(
+                            GooglePart.InlineData(
+                                GoogleData.Blob(
+                                    mimeType = media.getMimeType(),
+                                    data = media.toBase64()
                                 )
                             )
-                        }
+                        )
+                    }
 
-                        is MediaContent.Video -> {
-                            require(model.capabilities.contains(LLMCapability.Vision.Video)) {
-                                "Model ${model.id} does not support video"
-                            }
-                            add(GooglePart.InlineData(GoogleData.Blob(media.getMimeType(), media.toBase64())))
+                    is MediaContent.Video -> {
+                        require(model.capabilities.contains(LLMCapability.Vision.Video)) {
+                            "Model ${model.id} does not support video"
                         }
+                        add(GooglePart.InlineData(GoogleData.Blob(media.getMimeType(), media.toBase64())))
                     }
                 }
             }
-            GoogleContent(role = "user", parts = contentParts)
         }
 
-        is Message.Assistant -> GoogleContent(role = "model", parts = listOf(GooglePart.Text(content)))
-        is Message.Tool.Result -> GoogleContent(
-            role = "user",
-            parts = listOf(
-                GooglePart.FunctionResponse(
-                    functionResponse = GoogleData.FunctionResponse(
-                        id = id, name = tool, response = buildJsonObject { put("result", content) })
-                )
-            )
-        )
-
-        is Message.Tool.Call -> null
-        is Message.System -> null
+        return GoogleContent(role = "user", parts = contentParts)
     }
 
     /**
diff --git a/prompt/prompt-executor/prompt-executor-clients/prompt-executor-openai-client/src/commonMain/kotlin/ai/koog/prompt/executor/clients/openai/DataModel.kt b/prompt/prompt-executor/prompt-executor-clients/prompt-executor-openai-client/src/commonMain/kotlin/ai/koog/prompt/executor/clients/openai/DataModel.kt
@@ -1,22 +1,13 @@
 package ai.koog.prompt.executor.clients.openai
 
-import kotlinx.serialization.InternalSerializationApi
-import kotlinx.serialization.KSerializer
-import kotlinx.serialization.SerialName
-import kotlinx.serialization.Serializable
-import kotlinx.serialization.SerializationException
+import kotlinx.serialization.*
 import kotlinx.serialization.builtins.ListSerializer
 import kotlinx.serialization.descriptors.PolymorphicKind
 import kotlinx.serialization.descriptors.SerialDescriptor
 import kotlinx.serialization.descriptors.buildSerialDescriptor
 import kotlinx.serialization.encoding.Decoder
 import kotlinx.serialization.encoding.Encoder
-import kotlinx.serialization.json.JsonArray
-import kotlinx.serialization.json.JsonDecoder
-import kotlinx.serialization.json.JsonEncoder
-import kotlinx.serialization.json.JsonNull
-import kotlinx.serialization.json.JsonObject
-import kotlinx.serialization.json.JsonPrimitive
+import kotlinx.serialization.json.*
 import kotlin.jvm.JvmInline
 
 @Serializable
@@ -25,12 +16,20 @@ internal data class OpenAIRequest(
     val messages: List<OpenAIMessage>,
     val temperature: Double? = null,
     val tools: List<OpenAITool>? = null,
-    val modalities: List<String>? = null,
+    val modalities: List<OpenAIModalities>? = null,
     val audio: OpenAIAudioConfig? = null,
     val stream: Boolean = false,
     val toolChoice: OpenAIToolChoice? = null
 )
 
+@Serializable
+internal enum class OpenAIModalities {
+    @SerialName("text")
+    Text,
+    @SerialName("audio")
+    Audio,
+}
+
 @Serializable
 internal data class OpenAIMessage(
     val role: String,
@@ -206,10 +205,24 @@ internal sealed interface OpenAIToolChoice {
 
 @Serializable
 internal data class OpenAIAudioConfig(
-    val format: String = "wav",
-    val voice: String = "alloy"
+    val format: OpenAIAudioFormat,
+    val voice: OpenAIAudioVoice,
 )
 
+@Serializable
+internal enum class OpenAIAudioFormat {
+    @SerialName("wav")
+    WAV,
+    @SerialName("pcm16")
+    PCM16,
+}
+
+@Serializable
+internal enum class OpenAIAudioVoice {
+    @SerialName("alloy")
+    Alloy,
+}
+
 @Serializable
 internal data class OpenAIAudio(
     val data: String,
diff --git a/prompt/prompt-executor/prompt-executor-clients/prompt-executor-openai-client/src/commonMain/kotlin/ai/koog/prompt/executor/clients/openai/OpenAILLMClient.kt b/prompt/prompt-executor/prompt-executor-clients/prompt-executor-openai-client/src/commonMain/kotlin/ai/koog/prompt/executor/clients/openai/OpenAILLMClient.kt
diff --git a/prompt/prompt-executor/prompt-executor-clients/prompt-executor-openrouter-client/src/commonMain/kotlin/ai/koog/prompt/executor/clients/openrouter/OpenRouterLLMClient.kt b/prompt/prompt-executor/prompt-executor-clients/prompt-executor-openrouter-client/src/commonMain/kotlin/ai/koog/prompt/executor/clients/openrouter/OpenRouterLLMClient.kt

Original file line number	Diff line number	Diff line change
`@@ -47,8 +47,11 @@ fun main() {`
`47`	`47`	`}`
`48`	`48`
`49`	`49`	`runBlocking {`
	`50`	`+ println("OpenAI response:")`
`50`	`51`	`openaiExecutor.execute(prompt, OpenAIModels.Chat.GPT4_1).content.also(::println)`
	`52`	`+// println("Anthropic response:")`
`51`	`53`	`// anthropicExecutor.execute(prompt, AnthropicModels.Sonnet_4).content.also(::println)`
	`54`	`+// println("Google response:")`
`52`	`55`	`// googleExecutor.execute(prompt, GoogleModels.Gemini2_0Flash).content.also(::println)`
`53`	`56`	`}`
`54`	`57`	`}`