Removed suspend modifier from LLMClient.executeStreaming (JetBrains#240)

nomisRev · Bruno Lannoo · commit ebd08c820f2e · 2025-06-26T14:19:07.000+02:00
diff --git a/integration-tests/src/jvmTest/kotlin/ai/koog/integration/tests/KotlinAIAgentWithMultipleLLMIntegrationTest.kt b/integration-tests/src/jvmTest/kotlin/ai/koog/integration/tests/KotlinAIAgentWithMultipleLLMIntegrationTest.kt
@@ -32,7 +32,9 @@ import ai.koog.prompt.params.LLMParams
 import kotlinx.coroutines.CoroutineScope
 import kotlinx.coroutines.DelicateCoroutinesApi
 import kotlinx.coroutines.channels.Channel
+import kotlinx.coroutines.coroutineScope
 import kotlinx.coroutines.flow.Flow
+import kotlinx.coroutines.flow.flow
 import kotlinx.coroutines.launch
 import kotlinx.coroutines.runBlocking
 import kotlinx.coroutines.test.runTest
@@ -78,8 +80,8 @@ internal class ReportingLLMLLMClient(
         return underlyingClient.execute(prompt, model, tools)
     }
 
-    override suspend fun executeStreaming(prompt: Prompt, model: LLModel): Flow<String> {
-        CoroutineScope(coroutineContext).launch {
+    override fun executeStreaming(prompt: Prompt, model: LLModel): Flow<String> = flow {
+        coroutineScope {
             eventsChannel.send(
                 Event.Message(
                     llmClient = underlyingClient::class.simpleName ?: "null",
@@ -90,7 +92,8 @@ internal class ReportingLLMLLMClient(
                 )
             )
         }
-        return underlyingClient.executeStreaming(prompt, model)
+        underlyingClient.executeStreaming(prompt, model)
+            .collect(this)
     }
 }
 
diff --git a/prompt/prompt-executor/prompt-executor-clients/prompt-executor-anthropic-client/src/commonMain/kotlin/ai/koog/prompt/executor/clients/anthropic/AnthropicLLMClient.kt b/prompt/prompt-executor/prompt-executor-clients/prompt-executor-anthropic-client/src/commonMain/kotlin/ai/koog/prompt/executor/clients/anthropic/AnthropicLLMClient.kt
@@ -142,44 +142,42 @@ public open class AnthropicLLMClient(
         }
     }
 
-    override suspend fun executeStreaming(prompt: Prompt, model: LLModel): Flow<String> {
+    override fun executeStreaming(prompt: Prompt, model: LLModel): Flow<String> = flow {
         logger.debug { "Executing streaming prompt: $prompt with model: $model without tools" }
         require(model.capabilities.contains(LLMCapability.Completion)) {
             "Model ${model.id} does not support chat completions"
         }
 
         val request = createAnthropicRequest(prompt, emptyList(), model, true)
 
-        return flow {
-            try {
-                httpClient.sse(
-                    urlString = DEFAULT_MESSAGE_PATH,
-                    request = {
-                        method = HttpMethod.Post
-                        accept(ContentType.Text.EventStream)
-                        headers {
-                            append(HttpHeaders.CacheControl, "no-cache")
-                            append(HttpHeaders.Connection, "keep-alive")
-                        }
-                        setBody(request)
-                    }
-                ) {
-                    incoming.collect { event ->
-                        event
-                            .takeIf { it.event == "content_block_delta" }
-                            ?.data?.trim()?.let { json.decodeFromString<AnthropicStreamResponse>(it) }
-                            ?.delta?.text?.let { emit(it) }
+        try {
+            httpClient.sse(
+                urlString = DEFAULT_MESSAGE_PATH,
+                request = {
+                    method = HttpMethod.Post
+                    accept(ContentType.Text.EventStream)
+                    headers {
+                        append(HttpHeaders.CacheControl, "no-cache")
+                        append(HttpHeaders.Connection, "keep-alive")
                     }
+                    setBody(request)
                 }
-            } catch (e: SSEClientException) {
-                e.response?.let { response ->
-                    logger.error { "Error from Anthropic API: ${response.status}: ${e.message}" }
-                    error("Error from Anthropic API: ${response.status}: ${e.message}")
+            ) {
+                incoming.collect { event ->
+                    event
+                        .takeIf { it.event == "content_block_delta" }
+                        ?.data?.trim()?.let { json.decodeFromString<AnthropicStreamResponse>(it) }
+                        ?.delta?.text?.let { emit(it) }
                 }
-            } catch (e: Exception) {
-                logger.error { "Exception during streaming: $e" }
-                error(e.message ?: "Unknown error during streaming")
             }
+        } catch (e: SSEClientException) {
+            e.response?.let { response ->
+                logger.error { "Error from Anthropic API: ${response.status}: ${e.message}" }
+                error("Error from Anthropic API: ${response.status}: ${e.message}")
+            }
+        } catch (e: Exception) {
+            logger.error { "Exception during streaming: $e" }
+            error(e.message ?: "Unknown error during streaming")
         }
     }
 
diff --git a/prompt/prompt-executor/prompt-executor-clients/prompt-executor-google-client/src/commonMain/kotlin/ai/koog/prompt/executor/clients/google/GoogleLLMClient.kt b/prompt/prompt-executor/prompt-executor-clients/prompt-executor-google-client/src/commonMain/kotlin/ai/koog/prompt/executor/clients/google/GoogleLLMClient.kt
@@ -138,46 +138,44 @@ public open class GoogleLLMClient(
         }
     }
 
-    override suspend fun executeStreaming(prompt: Prompt, model: LLModel): Flow<String> {
+    override fun executeStreaming(prompt: Prompt, model: LLModel): Flow<String> = flow {
         logger.debug { "Executing streaming prompt: $prompt with model: $model" }
         require(model.capabilities.contains(LLMCapability.Completion)) {
             "Model ${model.id} does not support chat completions"
         }
 
         val request = createGoogleRequest(prompt, model, emptyList())
 
-        return flow {
-            try {
-                httpClient.sse(
-                    urlString = "$DEFAULT_PATH/${model.id}:$DEFAULT_METHOD_STREAM_GENERATE_CONTENT",
-                    request = {
-                        method = HttpMethod.Post
-                        parameter("alt", "sse")
-                        accept(ContentType.Text.EventStream)
-                        headers {
-                            append(HttpHeaders.CacheControl, "no-cache")
-                            append(HttpHeaders.Connection, "keep-alive")
-                        }
-                        setBody(request)
-                    }
-                ) {
-                    incoming.collect { event ->
-                        event
-                            .takeIf { it.data != "[DONE]" }
-                            ?.data?.trim()?.let { json.decodeFromString<GoogleResponse>(it) }
-                            ?.candidates?.firstOrNull()?.content
-                            ?.parts?.forEach { part -> if (part is GooglePart.Text) emit(part.text) }
+        try {
+            httpClient.sse(
+                urlString = "$DEFAULT_PATH/${model.id}:$DEFAULT_METHOD_STREAM_GENERATE_CONTENT",
+                request = {
+                    method = HttpMethod.Post
+                    parameter("alt", "sse")
+                    accept(ContentType.Text.EventStream)
+                    headers {
+                        append(HttpHeaders.CacheControl, "no-cache")
+                        append(HttpHeaders.Connection, "keep-alive")
                     }
+                    setBody(request)
                 }
-            } catch (e: SSEClientException) {
-                e.response?.let { response ->
-                    logger.error { "Error from GoogleAI API: ${response.status}: ${e.message}" }
-                    error("Error from GoogleAI API: ${response.status}: ${e.message}")
+            ) {
+                incoming.collect { event ->
+                    event
+                        .takeIf { it.data != "[DONE]" }
+                        ?.data?.trim()?.let { json.decodeFromString<GoogleResponse>(it) }
+                        ?.candidates?.firstOrNull()?.content
+                        ?.parts?.forEach { part -> if (part is GooglePart.Text) emit(part.text) }
                 }
-            } catch (e: Exception) {
-                logger.error { "Exception during streaming: $e" }
-                error(e.message ?: "Unknown error during streaming")
             }
+        } catch (e: SSEClientException) {
+            e.response?.let { response ->
+                logger.error { "Error from GoogleAI API: ${response.status}: ${e.message}" }
+                error("Error from GoogleAI API: ${response.status}: ${e.message}")
+            }
+        } catch (e: Exception) {
+            logger.error { "Exception during streaming: $e" }
+            error(e.message ?: "Unknown error during streaming")
         }
     }
 
diff --git a/prompt/prompt-executor/prompt-executor-clients/prompt-executor-ollama-client/src/commonMain/kotlin/ai/koog/prompt/executor/ollama/client/OllamaClient.kt b/prompt/prompt-executor/prompt-executor-clients/prompt-executor-ollama-client/src/commonMain/kotlin/ai/koog/prompt/executor/ollama/client/OllamaClient.kt
@@ -144,7 +144,7 @@ public class OllamaClient(
         }
     }
 
-    override suspend fun executeStreaming(
+    override fun executeStreaming(
         prompt: Prompt, model: LLModel
     ): Flow<String> = flow {
         require(model.provider == LLMProvider.Ollama) { "Model not supported by Ollama" }
diff --git a/prompt/prompt-executor/prompt-executor-clients/prompt-executor-openai-client/src/commonMain/kotlin/ai/koog/prompt/executor/clients/openai/OpenAILLMClient.kt b/prompt/prompt-executor/prompt-executor-clients/prompt-executor-openai-client/src/commonMain/kotlin/ai/koog/prompt/executor/clients/openai/OpenAILLMClient.kt
@@ -145,45 +145,43 @@ public open class OpenAILLMClient(
         }
     }
 
-    override suspend fun executeStreaming(prompt: Prompt, model: LLModel): Flow<String> {
+    override fun executeStreaming(prompt: Prompt, model: LLModel): Flow<String> = flow {
         logger.debug { "Executing streaming prompt: $prompt with model: $model" }
         require(model.capabilities.contains(LLMCapability.Completion)) {
             "Model ${model.id} does not support chat completions"
         }
 
         val request = createOpenAIRequest(prompt, emptyList(), model, true)
 
-        return flow {
-            try {
-                httpClient.sse(
-                    urlString = settings.chatCompletionsPath,
-                    request = {
-                        method = HttpMethod.Post
-                        accept(ContentType.Text.EventStream)
-                        headers {
-                            append(HttpHeaders.CacheControl, "no-cache")
-                            append(HttpHeaders.Connection, "keep-alive")
-                        }
-                        setBody(request)
-                    }
-                ) {
-                    incoming.collect { event ->
-                        event
-                            .takeIf { it.data != "[DONE]" }
-                            ?.data?.trim()?.let { json.decodeFromString<OpenAIStreamResponse>(it) }
-                            ?.choices?.forEach { choice -> choice.delta.content?.let { emit(it) } }
+        try {
+            httpClient.sse(
+                urlString = settings.chatCompletionsPath,
+                request = {
+                    method = HttpMethod.Post
+                    accept(ContentType.Text.EventStream)
+                    headers {
+                        append(HttpHeaders.CacheControl, "no-cache")
+                        append(HttpHeaders.Connection, "keep-alive")
                     }
+                    setBody(request)
                 }
-            } catch (e: SSEClientException) {
-                e.response?.let { response ->
-                    val body = response.readRawBytes().decodeToString()
-                    logger.error(e) { "Error from OpenAI API: ${response.status}: ${e.message}.\nBody:\n$body" }
-                    error("Error from OpenAI API: ${response.status}: ${e.message}")
+            ) {
+                incoming.collect { event ->
+                    event
+                        .takeIf { it.data != "[DONE]" }
+                        ?.data?.trim()?.let { json.decodeFromString<OpenAIStreamResponse>(it) }
+                        ?.choices?.forEach { choice -> choice.delta.content?.let { emit(it) } }
                 }
-            } catch (e: Exception) {
-                logger.error { "Exception during streaming: $e" }
-                error(e.message ?: "Unknown error during streaming")
             }
+        } catch (e: SSEClientException) {
+            e.response?.let { response ->
+                val body = response.readRawBytes().decodeToString()
+                logger.error(e) { "Error from OpenAI API: ${response.status}: ${e.message}.\nBody:\n$body" }
+                error("Error from OpenAI API: ${response.status}: ${e.message}")
+            }
+        } catch (e: Exception) {
+            logger.error { "Exception during streaming: $e" }
+            error(e.message ?: "Unknown error during streaming")
         }
     }
 
@@ -330,7 +328,10 @@ public open class OpenAILLMClient(
             null -> null
         }
 
-        val modalities = if (model.capabilities.contains(LLMCapability.Audio)) listOf(OpenAIModalities.Text, OpenAIModalities.Audio) else null
+        val modalities = if (model.capabilities.contains(LLMCapability.Audio)) listOf(
+            OpenAIModalities.Text,
+            OpenAIModalities.Audio
+        ) else null
         // TODO allow passing this externally and actually controlling this behavior
         val audio = modalities?.let {
             OpenAIAudioConfig(
diff --git a/prompt/prompt-executor/prompt-executor-clients/prompt-executor-openrouter-client/src/commonMain/kotlin/ai/koog/prompt/executor/clients/openrouter/OpenRouterLLMClient.kt b/prompt/prompt-executor/prompt-executor-clients/prompt-executor-openrouter-client/src/commonMain/kotlin/ai/koog/prompt/executor/clients/openrouter/OpenRouterLLMClient.kt
@@ -123,44 +123,42 @@ public class OpenRouterLLMClient(
         }
     }
 
-    override suspend fun executeStreaming(prompt: Prompt, model: LLModel): Flow<String> {
+    override fun executeStreaming(prompt: Prompt, model: LLModel): Flow<String> = flow {
         logger.debug { "Executing streaming prompt: $prompt" }
         require(model.capabilities.contains(LLMCapability.Completion)) {
             "Model ${model.id} does not support chat completions"
         }
 
         val request = createOpenRouterRequest(prompt, model, emptyList(), true)
 
-        return flow {
-            try {
-                httpClient.sse(
-                    urlString = DEFAULT_MESSAGE_PATH,
-                    request = {
-                        method = HttpMethod.Post
-                        accept(ContentType.Text.EventStream)
-                        headers {
-                            append(HttpHeaders.CacheControl, "no-cache")
-                            append(HttpHeaders.Connection, "keep-alive")
-                        }
-                        setBody(request)
-                    }
-                ) {
-                    incoming.collect { event ->
-                        event
-                            .takeIf { it.data != "[DONE]" }
-                            ?.data?.trim()?.let { json.decodeFromString<OpenRouterStreamResponse>(it) }
-                            ?.choices?.forEach { choice -> choice.delta.content?.let { emit(it) } }
+        try {
+            httpClient.sse(
+                urlString = DEFAULT_MESSAGE_PATH,
+                request = {
+                    method = HttpMethod.Post
+                    accept(ContentType.Text.EventStream)
+                    headers {
+                        append(HttpHeaders.CacheControl, "no-cache")
+                        append(HttpHeaders.Connection, "keep-alive")
                     }
+                    setBody(request)
                 }
-            } catch (e: SSEClientException) {
-                e.response?.let { response ->
-                    logger.error { "Error from OpenRouter API: ${response.status}: ${e.message}" }
-                    error("Error from OpenRouter API: ${response.status}: ${e.message}")
+            ) {
+                incoming.collect { event ->
+                    event
+                        .takeIf { it.data != "[DONE]" }
+                        ?.data?.trim()?.let { json.decodeFromString<OpenRouterStreamResponse>(it) }
+                        ?.choices?.forEach { choice -> choice.delta.content?.let { emit(it) } }
                 }
-            } catch (e: Exception) {
-                logger.error { "Exception during streaming: $e" }
-                error(e.message ?: "Unknown error during streaming")
             }
+        } catch (e: SSEClientException) {
+            e.response?.let { response ->
+                logger.error { "Error from OpenRouter API: ${response.status}: ${e.message}" }
+                error("Error from OpenRouter API: ${response.status}: ${e.message}")
+            }
+        } catch (e: Exception) {
+            logger.error { "Exception during streaming: $e" }
+            error(e.message ?: "Unknown error during streaming")
         }
     }
 
diff --git a/prompt/prompt-executor/prompt-executor-clients/src/commonMain/kotlin/ai/koog/prompt/executor/clients/LLMClient.kt b/prompt/prompt-executor/prompt-executor-clients/src/commonMain/kotlin/ai/koog/prompt/executor/clients/LLMClient.kt
@@ -32,7 +32,7 @@ public interface LLMClient {
      * @param model The LLM model to use
      * @return Flow of response chunks
      */
-    public suspend fun executeStreaming(prompt: Prompt, model: LLModel): Flow<String>
+    public fun executeStreaming(prompt: Prompt, model: LLModel): Flow<String>
 }
 
 /**
diff --git a/prompt/prompt-executor/prompt-executor-llms-all/src/jvmTest/kotlin/ai/koog/prompt/executor/llms/all/MultipleLLMPromptExecutorMockTest.kt b/prompt/prompt-executor/prompt-executor-llms-all/src/jvmTest/kotlin/ai/koog/prompt/executor/llms/all/MultipleLLMPromptExecutorMockTest.kt
@@ -42,7 +42,7 @@ class MultipleLLMPromptExecutorMockTest {
             return listOf(Message.Assistant("OpenAI response", ResponseMetaInfo.create(mockClock)))
         }
 
-        override suspend fun executeStreaming(prompt: Prompt, model: LLModel): Flow<String> {
+        override fun executeStreaming(prompt: Prompt, model: LLModel): Flow<String> {
             return flowOf("OpenAI", " streaming", " response")
         }
     }
@@ -57,7 +57,7 @@ class MultipleLLMPromptExecutorMockTest {
             return listOf(Message.Assistant("Anthropic response", ResponseMetaInfo.create(mockClock)))
         }
 
-        override suspend fun executeStreaming(prompt: Prompt, model: LLModel): Flow<String> {
+        override fun executeStreaming(prompt: Prompt, model: LLModel): Flow<String> {
             return flowOf("Anthropic", " streaming", " response")
         }
     }
@@ -72,7 +72,7 @@ class MultipleLLMPromptExecutorMockTest {
             return listOf(Message.Assistant("Gemini response", ResponseMetaInfo.create(mockClock)))
         }
 
-        override suspend fun executeStreaming(prompt: Prompt, model: LLModel): Flow<String> {
+        override fun executeStreaming(prompt: Prompt, model: LLModel): Flow<String> {
             return flowOf("Gemini", " streaming", " response")
         }
     }
diff --git a/prompt/prompt-executor/prompt-executor-llms/src/commonTest/kotlin/ai/koog/prompt/executor/llms/LLMPromptExecutorMockTest.kt b/prompt/prompt-executor/prompt-executor-llms/src/commonTest/kotlin/ai/koog/prompt/executor/llms/LLMPromptExecutorMockTest.kt

Original file line number	Diff line number	Diff line change
`@@ -32,7 +32,9 @@ import ai.koog.prompt.params.LLMParams`
`32`	`32`	`import kotlinx.coroutines.CoroutineScope`
`33`	`33`	`import kotlinx.coroutines.DelicateCoroutinesApi`
`34`	`34`	`import kotlinx.coroutines.channels.Channel`
	`35`	`+import kotlinx.coroutines.coroutineScope`
`35`	`36`	`import kotlinx.coroutines.flow.Flow`
	`37`	`+import kotlinx.coroutines.flow.flow`
`36`	`38`	`import kotlinx.coroutines.launch`
`37`	`39`	`import kotlinx.coroutines.runBlocking`
`38`	`40`	`import kotlinx.coroutines.test.runTest`
`@@ -78,8 +80,8 @@ internal class ReportingLLMLLMClient(`
`78`	`80`	`return underlyingClient.execute(prompt, model, tools)`
`79`	`81`	`}`
`80`	`82`
`81`		`- override suspend fun executeStreaming(prompt: Prompt, model: LLModel): Flow<String> {`
`82`		`- CoroutineScope(coroutineContext).launch {`
	`83`	`+ override fun executeStreaming(prompt: Prompt, model: LLModel): Flow<String> = flow {`
	`84`	`+ coroutineScope {`
`83`	`85`	`eventsChannel.send(`
`84`	`86`	`Event.Message(`
`85`	`87`	`llmClient = underlyingClient::class.simpleName ?: "null",`
`@@ -90,7 +92,8 @@ internal class ReportingLLMLLMClient(`
`90`	`92`	`)`
`91`	`93`	`)`
`92`	`94`	`}`
`93`		`- return underlyingClient.executeStreaming(prompt, model)`
	`95`	`+ underlyingClient.executeStreaming(prompt, model)`
	`96`	`+ .collect(this)`
`94`	`97`	`}`
`95`	`98`	`}`
`96`	`99`
Original file line number	Diff line number	Diff line change
`@@ -144,7 +144,7 @@ public class OllamaClient(`
`144`	`144`	`}`
`145`	`145`	`}`
`146`	`146`
`147`		`- override suspend fun executeStreaming(`
	`147`	`+ override fun executeStreaming(`
`148`	`148`	`prompt: Prompt, model: LLModel`
`149`	`149`	`): Flow<String> = flow {`
`150`	`150`	`require(model.provider == LLMProvider.Ollama) { "Model not supported by Ollama" }`
Original file line number	Diff line number	Diff line change
`@@ -32,7 +32,7 @@ public interface LLMClient {`
`32`	`32`	`* @param model The LLM model to use`
`33`	`33`	`* @return Flow of response chunks`
`34`	`34`	`*/`
`35`		`- public suspend fun executeStreaming(prompt: Prompt, model: LLModel): Flow<String>`
	`35`	`+ public fun executeStreaming(prompt: Prompt, model: LLModel): Flow<String>`
`36`	`36`	`}`
`37`	`37`
`38`	`38`	`/**`
Original file line number	Diff line number	Diff line change
`@@ -42,7 +42,7 @@ class MultipleLLMPromptExecutorMockTest {`
`42`	`42`	`return listOf(Message.Assistant("OpenAI response", ResponseMetaInfo.create(mockClock)))`
`43`	`43`	`}`
`44`	`44`
`45`		`- override suspend fun executeStreaming(prompt: Prompt, model: LLModel): Flow<String> {`
	`45`	`+ override fun executeStreaming(prompt: Prompt, model: LLModel): Flow<String> {`
`46`	`46`	`return flowOf("OpenAI", " streaming", " response")`
`47`	`47`	`}`
`48`	`48`	`}`
`@@ -57,7 +57,7 @@ class MultipleLLMPromptExecutorMockTest {`
`57`	`57`	`return listOf(Message.Assistant("Anthropic response", ResponseMetaInfo.create(mockClock)))`
`58`	`58`	`}`
`59`	`59`
`60`		`- override suspend fun executeStreaming(prompt: Prompt, model: LLModel): Flow<String> {`
	`60`	`+ override fun executeStreaming(prompt: Prompt, model: LLModel): Flow<String> {`
`61`	`61`	`return flowOf("Anthropic", " streaming", " response")`
`62`	`62`	`}`
`63`	`63`	`}`
`@@ -72,7 +72,7 @@ class MultipleLLMPromptExecutorMockTest {`
`72`	`72`	`return listOf(Message.Assistant("Gemini response", ResponseMetaInfo.create(mockClock)))`
`73`	`73`	`}`
`74`	`74`
`75`		`- override suspend fun executeStreaming(prompt: Prompt, model: LLModel): Flow<String> {`
	`75`	`+ override fun executeStreaming(prompt: Prompt, model: LLModel): Flow<String> {`
`76`	`76`	`return flowOf("Gemini", " streaming", " response")`
`77`	`77`	`}`
`78`	`78`	`}`