|
| 1 | +package ai.koog.prompt.executor.clients.retry |
| 2 | + |
| 3 | +import kotlin.time.Duration |
| 4 | +import kotlin.time.Duration.Companion.milliseconds |
| 5 | +import kotlin.time.Duration.Companion.seconds |
| 6 | + |
| 7 | +/** |
| 8 | + * Configuration for retry behavior in LLM client operations. |
| 9 | + * |
| 10 | + * @property maxAttempts Maximum number of attempts (including initial) |
| 11 | + * @property initialDelay Initial delay before first retry |
| 12 | + * @property maxDelay Maximum delay between retries |
| 13 | + * @property backoffMultiplier Multiplier for exponential backoff |
| 14 | + * @property jitterFactor Random jitter factor (0.0 to 1.0) |
| 15 | + * @property retryablePatterns Patterns to identify retryable errors |
| 16 | + * @property retryAfterExtractor Optional extractor for retry-after hints |
| 17 | + */ |
| 18 | +public data class RetryConfig( |
| 19 | + val maxAttempts: Int = 3, |
| 20 | + val initialDelay: Duration = 1.seconds, |
| 21 | + val maxDelay: Duration = 30.seconds, |
| 22 | + val backoffMultiplier: Double = 2.0, |
| 23 | + val jitterFactor: Double = 0.1, |
| 24 | + val retryablePatterns: List<RetryablePattern> = DEFAULT_PATTERNS, |
| 25 | + val retryAfterExtractor: RetryAfterExtractor? = DefaultRetryAfterExtractor |
| 26 | +) { |
| 27 | + init { |
| 28 | + require(maxAttempts >= 1) { "maxAttempts must be at least 1" } |
| 29 | + require(backoffMultiplier >= 1.0) { "backoffMultiplier must be at least 1.0" } |
| 30 | + require(jitterFactor in 0.0..1.0) { "jitterFactor must be between 0.0 and 1.0" } |
| 31 | + require(initialDelay <= maxDelay) { "initialDelay ($initialDelay) must not be greater than maxDelay ($maxDelay)" } |
| 32 | + } |
| 33 | + |
| 34 | + public companion object { |
| 35 | + /** |
| 36 | + * Default retry patterns that work across all providers. |
| 37 | + */ |
| 38 | + public val DEFAULT_PATTERNS: List<RetryablePattern> = listOf( |
| 39 | + // HTTP status codes |
| 40 | + RetryablePattern.Status(429), // Rate limit |
| 41 | + RetryablePattern.Status(500), // Internal server error |
| 42 | + RetryablePattern.Status(502), // Bad gateway |
| 43 | + RetryablePattern.Status(503), // Service unavailable |
| 44 | + RetryablePattern.Status(504), // Gateway timeout |
| 45 | + RetryablePattern.Status(529), // Anthropic overloaded |
| 46 | + |
| 47 | + // Error keywords |
| 48 | + RetryablePattern.Keyword("rate limit"), |
| 49 | + RetryablePattern.Keyword("too many requests"), |
| 50 | + RetryablePattern.Keyword("overloaded"), |
| 51 | + RetryablePattern.Keyword("request timeout"), |
| 52 | + RetryablePattern.Keyword("connection timeout"), |
| 53 | + RetryablePattern.Keyword("read timeout"), |
| 54 | + RetryablePattern.Keyword("write timeout"), |
| 55 | + RetryablePattern.Keyword("connection reset by peer"), |
| 56 | + RetryablePattern.Keyword("connection refused"), |
| 57 | + RetryablePattern.Keyword("temporarily unavailable"), |
| 58 | + RetryablePattern.Keyword("service unavailable") |
| 59 | + ) |
| 60 | + |
| 61 | + /** |
| 62 | + * Conservative configuration - fewer retries, longer delays. |
| 63 | + */ |
| 64 | + public val CONSERVATIVE: RetryConfig = RetryConfig( |
| 65 | + maxAttempts = 3, |
| 66 | + initialDelay = 2.seconds, |
| 67 | + maxDelay = 30.seconds |
| 68 | + ) |
| 69 | + |
| 70 | + /** |
| 71 | + * Aggressive configuration - more retries, shorter delays. |
| 72 | + */ |
| 73 | + public val AGGRESSIVE: RetryConfig = RetryConfig( |
| 74 | + maxAttempts = 5, |
| 75 | + initialDelay = 500.milliseconds, |
| 76 | + maxDelay = 20.seconds, |
| 77 | + backoffMultiplier = 1.5 |
| 78 | + ) |
| 79 | + |
| 80 | + /** |
| 81 | + * Production configuration - balanced for production use. |
| 82 | + */ |
| 83 | + public val PRODUCTION: RetryConfig = RetryConfig( |
| 84 | + maxAttempts = 3, |
| 85 | + initialDelay = 1.seconds, |
| 86 | + maxDelay = 20.seconds, |
| 87 | + backoffMultiplier = 2.0, |
| 88 | + jitterFactor = 0.2 |
| 89 | + ) |
| 90 | + |
| 91 | + /** |
| 92 | + * No retry - effectively disables retry logic. |
| 93 | + */ |
| 94 | + public val DISABLED: RetryConfig = RetryConfig(maxAttempts = 1) |
| 95 | + } |
| 96 | +} |
| 97 | + |
| 98 | +/** |
| 99 | + * Pattern for identifying retryable errors. |
| 100 | + */ |
| 101 | +public sealed class RetryablePattern { |
| 102 | + public abstract fun matches(message: String): Boolean |
| 103 | + |
| 104 | + /** |
| 105 | + * Matches HTTP status codes in error messages. |
| 106 | + */ |
| 107 | + public data class Status(val code: Int) : RetryablePattern() { |
| 108 | + private val patterns = listOf( |
| 109 | + Regex("\\b$code\\b"), |
| 110 | + Regex("status:?\\s*$code"), |
| 111 | + Regex("error:?\\s*$code", RegexOption.IGNORE_CASE) |
| 112 | + ) |
| 113 | + |
| 114 | + override fun matches(message: String): Boolean = |
| 115 | + patterns.any { it.containsMatchIn(message) } |
| 116 | + } |
| 117 | + |
| 118 | + /** |
| 119 | + * Matches keywords in error messages. |
| 120 | + */ |
| 121 | + public data class Keyword(val keyword: String) : RetryablePattern() { |
| 122 | + override fun matches(message: String): Boolean = |
| 123 | + keyword.lowercase() in message.lowercase() |
| 124 | + } |
| 125 | + |
| 126 | + /** |
| 127 | + * Matches using a custom regex. |
| 128 | + */ |
| 129 | + public data class Regex(val pattern: kotlin.text.Regex) : RetryablePattern() { |
| 130 | + override fun matches(message: String): Boolean = |
| 131 | + pattern.containsMatchIn(message) |
| 132 | + } |
| 133 | + |
| 134 | + /** |
| 135 | + * Custom matching logic. |
| 136 | + */ |
| 137 | + public class Custom(private val matcher: (String) -> Boolean) : RetryablePattern() { |
| 138 | + override fun matches(message: String): Boolean = matcher(message) |
| 139 | + } |
| 140 | +} |
| 141 | + |
| 142 | +/** |
| 143 | + * Extracts retry-after hints from error messages. |
| 144 | + */ |
| 145 | +public fun interface RetryAfterExtractor { |
| 146 | + public fun extract(message: String): Duration? |
| 147 | +} |
| 148 | + |
| 149 | +/** |
| 150 | + * Default implementation that extracts common retry-after patterns. |
| 151 | + */ |
| 152 | +public object DefaultRetryAfterExtractor : RetryAfterExtractor { |
| 153 | + private val patterns = listOf( |
| 154 | + Regex("retry\\s+after\\s+(\\d+)\\s+second", RegexOption.IGNORE_CASE), |
| 155 | + Regex("retry-after:\\s*(\\d+)", RegexOption.IGNORE_CASE), |
| 156 | + Regex("wait\\s+(\\d+)\\s+second", RegexOption.IGNORE_CASE) |
| 157 | + ) |
| 158 | + |
| 159 | + override fun extract(message: String): Duration? { |
| 160 | + for (pattern in patterns) { |
| 161 | + pattern.find(message)?.let { match -> |
| 162 | + match.groupValues.getOrNull(1)?.toLongOrNull()?.let { seconds -> |
| 163 | + return seconds.seconds |
| 164 | + } |
| 165 | + } |
| 166 | + } |
| 167 | + return null |
| 168 | + } |
| 169 | +} |
0 commit comments