Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -55,100 +55,8 @@ public virtual async ValueTask<EvaluationResult> EvaluateAsync(
return result;
}

(ChatMessage? userRequest, List<ChatMessage> history) = GetUserRequestAndHistory(messages);

int inputTokenLimit = 0;
int ignoredMessagesCount = 0;

if (chatConfiguration.TokenCounter is not null)
{
IEvaluationTokenCounter tokenCounter = chatConfiguration.TokenCounter;
inputTokenLimit = tokenCounter.InputTokenLimit;
int tokenBudget = inputTokenLimit;

void OnTokenBudgetExceeded()
{
EvaluationDiagnostic tokenBudgetExceeded =
EvaluationDiagnostic.Error(
$"Evaluation failed because the specified limit of {inputTokenLimit} input tokens was exceeded.");

result.AddDiagnosticsToAllMetrics(tokenBudgetExceeded);
}

if (!string.IsNullOrWhiteSpace(SystemPrompt))
{
tokenBudget -= tokenCounter.CountTokens(SystemPrompt!);
if (tokenBudget < 0)
{
OnTokenBudgetExceeded();
return result;
}
}

string baseEvaluationPrompt =
await RenderEvaluationPromptAsync(
userRequest,
modelResponse,
includedHistory: [],
additionalContext,
cancellationToken).ConfigureAwait(false);

tokenBudget -= tokenCounter.CountTokens(baseEvaluationPrompt);
if (tokenBudget < 0)
{
OnTokenBudgetExceeded();
return result;
}

if (history.Count > 0 && !IgnoresHistory)
{
if (history.Count == 1)
{
(bool canRender, tokenBudget) =
await CanRenderAsync(
history[0],
tokenBudget,
chatConfiguration,
cancellationToken).ConfigureAwait(false);

if (!canRender)
{
ignoredMessagesCount = 1;
history = [];
}
}
else
{
int totalMessagesCount = history.Count;
int includedMessagesCount = 0;

history.Reverse();

foreach (ChatMessage message in history)
{
cancellationToken.ThrowIfCancellationRequested();

(bool canRender, tokenBudget) =
await CanRenderAsync(
message,
tokenBudget,
chatConfiguration,
cancellationToken).ConfigureAwait(false);

if (!canRender)
{
ignoredMessagesCount = totalMessagesCount - includedMessagesCount;
history.RemoveRange(index: includedMessagesCount, count: ignoredMessagesCount);
break;
}

includedMessagesCount++;
}

history.Reverse();
}
}
}
(ChatMessage? userRequest, List<ChatMessage> conversationHistory) =
GetUserRequestAndConversationHistory(messages);

var evaluationMessages = new List<ChatMessage>();
if (!string.IsNullOrWhiteSpace(SystemPrompt))
Expand All @@ -160,7 +68,7 @@ await CanRenderAsync(
await RenderEvaluationPromptAsync(
userRequest,
modelResponse,
includedHistory: history,
conversationHistory,
additionalContext,
cancellationToken).ConfigureAwait(false);

Expand All @@ -172,84 +80,9 @@ await PerformEvaluationAsync(
result,
cancellationToken).ConfigureAwait(false);

if (inputTokenLimit > 0 && ignoredMessagesCount > 0)
{
#pragma warning disable S103 // Lines should not be too long
result.AddDiagnosticsToAllMetrics(
EvaluationDiagnostic.Warning(
$"The evaluation may be inconclusive because the oldest {ignoredMessagesCount} messages in the supplied conversation history were ignored in order to stay under the specified limit of {inputTokenLimit} input tokens."));
#pragma warning restore S103
}

return result;
}

/// <summary>
/// Determines if there is sufficient <paramref name="tokenBudget"/> remaining to render the
/// supplied <paramref name="message"/> as part of the evaluation prompt that this <see cref="IEvaluator"/> uses.
/// </summary>
/// <param name="message">
/// A message that is part of the conversation history for the response being evaluated and that is to be rendered
/// as part of the evaluation prompt.
/// </param>
/// <param name="tokenBudget">
/// The number of tokens available for the rendering additional content as part of the evaluation prompt.
/// </param>
/// <param name="chatConfiguration">
/// A <see cref="ChatConfiguration"/> that specifies the <see cref="IChatClient"/> and the
/// <see cref="IEvaluationTokenCounter"/> that this <see cref="IEvaluator"/> uses to perform the evaluation.
/// </param>
/// <param name="cancellationToken">A <see cref="CancellationToken"/> that can cancel the operation.</param>
/// <returns>
/// A tuple containing a <see langword="bool"/> indicating whether there is sufficient
/// <paramref name="tokenBudget"/> remaining to render the supplied <paramref name="message"/> as part of the
/// evaluation prompt, and an <see langword="int"/> containing the remaining token budget that would be available
/// once this <paramref name="message"/> is rendered.
/// </returns>
protected virtual ValueTask<(bool canRender, int remainingTokenBudget)> CanRenderAsync(
ChatMessage message,
int tokenBudget,
ChatConfiguration chatConfiguration,
CancellationToken cancellationToken)
{
_ = Throw.IfNull(message);
_ = Throw.IfNull(chatConfiguration);

IEvaluationTokenCounter? tokenCounter = chatConfiguration.TokenCounter;
if (tokenCounter is null)
{
return new ValueTask<(bool, int)>((true, tokenBudget));
}

string? author = message.AuthorName;
string role = message.Role.Value;
string content = message.Text ?? string.Empty;

int tokenCount =
string.IsNullOrWhiteSpace(author)
? tokenCounter.CountTokens("[") +
tokenCounter.CountTokens(role) +
tokenCounter.CountTokens("] ") +
tokenCounter.CountTokens(content) +
tokenCounter.CountTokens("\n")
: tokenCounter.CountTokens("[") +
tokenCounter.CountTokens(author!) +
tokenCounter.CountTokens(" (") +
tokenCounter.CountTokens(role) +
tokenCounter.CountTokens(")] ") +
tokenCounter.CountTokens(content) +
tokenCounter.CountTokens("\n");

if (tokenCount > tokenBudget)
{
return new ValueTask<(bool, int)>((false, tokenBudget));
}
else
{
return new ValueTask<(bool, int)>((true, tokenBudget - tokenCount));
}
}

/// <summary>
/// Renders the supplied <paramref name="response"/> to a string that can be included as part of the evaluation
/// prompt that this <see cref="IEvaluator"/> uses.
Expand Down Expand Up @@ -313,21 +146,21 @@ protected virtual ValueTask<string> RenderAsync(ChatMessage message, Cancellatio
/// The request that produced the <paramref name="modelResponse"/> that is to be evaluated.
/// </param>
/// <param name="modelResponse">The response that is to be evaluated.</param>
/// <param name="includedHistory">
/// <param name="conversationHistory">
/// The conversation history (excluding the <paramref name="userRequest"/> and <paramref name="modelResponse"/>)
/// that is to be included as part of the evaluation prompt.
/// </param>
/// <param name="additionalContext">
/// Additional contextual information (beyond that which is available in the <paramref name="userRequest"/> and
/// <paramref name="includedHistory"/>) that this <see cref="IEvaluator"/> may need to accurately evaluate the
/// <paramref name="conversationHistory"/>) that this <see cref="IEvaluator"/> may need to accurately evaluate the
/// supplied <paramref name="modelResponse"/>.
/// </param>
/// <param name="cancellationToken">A <see cref="CancellationToken"/> that can cancel the operation.</param>
/// <returns>The evaluation prompt.</returns>
protected abstract ValueTask<string> RenderEvaluationPromptAsync(
ChatMessage? userRequest,
ChatResponse modelResponse,
IEnumerable<ChatMessage>? includedHistory,
IEnumerable<ChatMessage>? conversationHistory,
IEnumerable<EvaluationContext>? additionalContext,
CancellationToken cancellationToken);

Expand All @@ -351,8 +184,8 @@ protected abstract ValueTask<string> RenderEvaluationPromptAsync(
/// <see cref="EvaluationMetric"/>s in the supplied <paramref name="result"/>.
/// </summary>
/// <param name="chatConfiguration">
/// A <see cref="ChatConfiguration"/> that specifies the <see cref="IChatClient"/> and the
/// <see cref="IEvaluationTokenCounter"/> that this <see cref="IEvaluator"/> uses to perform the evaluation.
/// A <see cref="ChatConfiguration"/> that specifies the <see cref="IChatClient"/> that should be used if one or
/// more composed <see cref="IEvaluator"/>s use an AI model to perform evaluation.
/// </param>
/// <param name="evaluationMessages">
/// The set of messages that are to be sent to the supplied <see cref="ChatConfiguration.ChatClient"/> to perform
Expand All @@ -370,11 +203,11 @@ protected abstract ValueTask PerformEvaluationAsync(
EvaluationResult result,
CancellationToken cancellationToken);

private (ChatMessage? userRequest, List<ChatMessage> history) GetUserRequestAndHistory(
private (ChatMessage? userRequest, List<ChatMessage> conversationHistory) GetUserRequestAndConversationHistory(
IEnumerable<ChatMessage> messages)
{
ChatMessage? userRequest = null;
List<ChatMessage> history;
List<ChatMessage> conversationHistory;

if (IgnoresHistory)
{
Expand All @@ -383,22 +216,22 @@ protected abstract ValueTask PerformEvaluationAsync(
? lastMessage
: null;

history = [];
conversationHistory = [];
}
else
{
history = [.. messages];
int lastMessageIndex = history.Count - 1;
conversationHistory = [.. messages];
int lastMessageIndex = conversationHistory.Count - 1;

if (lastMessageIndex >= 0 &&
history[lastMessageIndex] is ChatMessage lastMessage &&
conversationHistory[lastMessageIndex] is ChatMessage lastMessage &&
lastMessage.Role == ChatRole.User)
{
userRequest = lastMessage;
history.RemoveAt(lastMessageIndex);
conversationHistory.RemoveAt(lastMessageIndex);
}
}

return (userRequest, history);
return (userRequest, conversationHistory);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ public sealed class CoherenceEvaluator : SingleNumericMetricEvaluator
protected override async ValueTask<string> RenderEvaluationPromptAsync(
ChatMessage? userRequest,
ChatResponse modelResponse,
IEnumerable<ChatMessage>? includedHistory,
IEnumerable<ChatMessage>? conversationHistory,
IEnumerable<EvaluationContext>? additionalContext,
CancellationToken cancellationToken)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ await base.EvaluateAsync(
protected override async ValueTask<string> RenderEvaluationPromptAsync(
ChatMessage? userRequest,
ChatResponse modelResponse,
IEnumerable<ChatMessage>? includedHistory,
IEnumerable<ChatMessage>? conversationHistory,
IEnumerable<EvaluationContext>? additionalContext,
CancellationToken cancellationToken)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ public sealed class FluencyEvaluator : SingleNumericMetricEvaluator
protected override async ValueTask<string> RenderEvaluationPromptAsync(
ChatMessage? userRequest,
ChatResponse modelResponse,
IEnumerable<ChatMessage>? includedHistory,
IEnumerable<ChatMessage>? conversationHistory,
IEnumerable<EvaluationContext>? additionalContext,
CancellationToken cancellationToken)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ await base.EvaluateAsync(
protected override async ValueTask<string> RenderEvaluationPromptAsync(
ChatMessage? userRequest,
ChatResponse modelResponse,
IEnumerable<ChatMessage>? includedHistory,
IEnumerable<ChatMessage>? conversationHistory,
IEnumerable<EvaluationContext>? additionalContext,
CancellationToken cancellationToken)
{
Expand All @@ -99,9 +99,9 @@ userRequest is not null
_ = builder.AppendLine();
}

if (includedHistory is not null)
if (conversationHistory is not null)
{
foreach (ChatMessage message in includedHistory)
foreach (ChatMessage message in conversationHistory)
{
_ = builder.Append(await RenderAsync(message, cancellationToken).ConfigureAwait(false));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ protected override EvaluationResult InitializeResult()
protected override async ValueTask<string> RenderEvaluationPromptAsync(
ChatMessage? userRequest,
ChatResponse modelResponse,
IEnumerable<ChatMessage>? includedHistory,
IEnumerable<ChatMessage>? conversationHistory,
IEnumerable<EvaluationContext>? additionalContext,
CancellationToken cancellationToken)
{
Expand All @@ -107,9 +107,9 @@ userRequest is not null
: string.Empty;

var builder = new StringBuilder();
if (includedHistory is not null)
if (conversationHistory is not null)
{
foreach (ChatMessage message in includedHistory)
foreach (ChatMessage message in conversationHistory)
{
_ = builder.Append(await RenderAsync(message, cancellationToken).ConfigureAwait(false));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ internal static ReadOnlySpan<char> TrimMarkdownDelimiters(string json)
// Trim 'json' marker from markdown if it exists.
const string JsonMarker = "json";
int markerLength = JsonMarker.Length;
if (trimmed.Length > markerLength && trimmed[0..markerLength].SequenceEqual(JsonMarker.AsSpan()))
if (trimmed.Length > markerLength && trimmed.Slice(0, markerLength).SequenceEqual(JsonMarker.AsSpan()))
{
trimmed = trimmed.Slice(markerLength);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,9 @@ public static class AzureStorageReportingConfiguration
/// survive in the cache before they are considered expired and evicted.
/// </param>
/// <param name="chatConfiguration">
/// A <see cref="ChatConfiguration"/> that specifies the <see cref="IChatClient"/> and the
/// <see cref="IEvaluationTokenCounter"/> that are used by AI-based <paramref name="evaluators"/> included in the
/// returned <see cref="ReportingConfiguration"/>. Can be omitted if none of the included
/// <paramref name="evaluators"/> are AI-based.
/// A <see cref="ChatConfiguration"/> that specifies the <see cref="IChatClient"/> that is used by AI-based
/// <paramref name="evaluators"/> included in the returned <see cref="ReportingConfiguration"/>. Can be omitted if
/// none of the included <paramref name="evaluators"/> are AI-based.
/// </param>
/// <param name="enableResponseCaching">
/// <see langword="true"/> to enable caching of AI responses; <see langword="false"/> otherwise.
Expand Down
Loading
Loading