Skip to content

Commit f090ca1

Browse files
vertex-sdk-botcopybara-github
authored andcommitted
feat: GenAI Evaluation: Release GenAI Evaluation SDK multimodal evaluation to vertexai.preview module.
PiperOrigin-RevId: 741668994
1 parent 30f0fcf commit f090ca1

File tree

4 files changed

+226
-13
lines changed

4 files changed

+226
-13
lines changed

tests/unit/vertexai/test_evaluation.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
from google.cloud.aiplatform_v1beta1.services import (
3737
evaluation_service as gapic_evaluation_services_preview,
3838
)
39+
from google.cloud.aiplatform_v1beta1.types import content
3940
from google.cloud.aiplatform_v1beta1.types import (
4041
evaluation_service as gapic_evaluation_service_types_preview,
4142
)
@@ -70,6 +71,10 @@
7071
PairwisePreview = (
7172
evaluation_preview.metrics.metric_prompt_template_examples.MetricPromptTemplateExamples.Pairwise
7273
)
74+
ContentMap = gapic_evaluation_service_types_preview.ContentMap
75+
Content = content.Content
76+
Part = content.Part
77+
7378

7479
_TEST_PROJECT = "test-project"
7580
_TEST_LOCATION = "us-central1"
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
# -*- coding: utf-8 -*-
2+
3+
# Copyright 2024 Google LLC
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
#
17+
"""Unit tests for multimodal utils."""
18+
19+
from google.cloud.aiplatform_v1beta1.types import content
20+
from google.cloud.aiplatform_v1beta1.types import (
21+
evaluation_service as gapic_eval_service_types,
22+
)
23+
from vertexai.preview.evaluation import (
24+
multimodal_utils,
25+
)
26+
27+
28+
ContentMap = gapic_eval_service_types.ContentMap
29+
Content = content.Content
30+
Part = content.Part
31+
32+
_TEST_PROJECT = "test-project"
33+
_TEST_LOCATION = "us-central1"
34+
35+
_MODEL_BASED_METRIC_INSTANCE_INPUT = {
36+
"prompt": '{"contents": [{"parts": [{"text": "test prompt"}]}]}',
37+
"response": (
38+
'{"contents": [{"parts": [{"file_data": {"mime_type": "image/png",'
39+
' "file_uri": "gs://test-bucket/image1.png"}}]}]}'
40+
),
41+
"baseline_response": (
42+
'{"contents": [{"parts": [{"file_data": {"mime_type": "image/jepg",'
43+
' "file_uri": "gs://test-bucket/image2.png"}}]}]}'
44+
),
45+
}
46+
_INVALID_MODEL_BASED_METRIC_INSTANCE_INPUT = {
47+
"prompt": "test prompt",
48+
"invalid_response_format": (
49+
'{"contents": [{{{{"parts": [{"file_data": {"mime_type": "image/png",'
50+
' "file_uri": "gs://test-bucket/image1.png"}}]}]}'
51+
),
52+
"baseline_response": "test image",
53+
}
54+
55+
56+
class TestMultimodalUtils:
57+
"""Unit tests for multimodal utils."""
58+
59+
def test_is_multimodal_instance(self):
60+
assert multimodal_utils.is_multimodal_instance(
61+
_MODEL_BASED_METRIC_INSTANCE_INPUT
62+
)
63+
64+
def test_not_multimodal_instance(self):
65+
assert not multimodal_utils.is_multimodal_instance(
66+
_INVALID_MODEL_BASED_METRIC_INSTANCE_INPUT
67+
)
68+
69+
def test_convert_multimodal_response_to_content_map(self):
70+
"""Test convert_multimodal_response_to_content_map."""
71+
content_map = multimodal_utils.convert_multimodal_response_to_content_map(
72+
_MODEL_BASED_METRIC_INSTANCE_INPUT
73+
)
74+
assert content_map.values["prompt"] == ContentMap.Contents(
75+
contents=[Content(parts=[Part(text="test prompt")])]
76+
)
77+
assert content_map.values["response"] == ContentMap.Contents(
78+
contents=[
79+
Content(
80+
parts=[
81+
Part(
82+
file_data={
83+
"mime_type": "image/png",
84+
"file_uri": "gs://test-bucket/image1.png",
85+
}
86+
)
87+
]
88+
)
89+
]
90+
)
91+
assert content_map.values["baseline_response"] == ContentMap.Contents(
92+
contents=[
93+
Content(
94+
parts=[
95+
Part(
96+
file_data={
97+
"mime_type": "image/jepg",
98+
"file_uri": "gs://test-bucket/image2.png",
99+
}
100+
)
101+
]
102+
)
103+
]
104+
)

vertexai/preview/evaluation/metrics/_instance_evaluation.py

Lines changed: 35 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,9 @@
3030
)
3131
from vertexai.preview.evaluation import _base as eval_base
3232
from vertexai.preview.evaluation import constants
33+
from vertexai.preview.evaluation import (
34+
multimodal_utils,
35+
)
3336
from vertexai.preview.evaluation import (
3437
prompt_template as prompt_template_base,
3538
)
@@ -46,7 +49,6 @@
4649

4750
from google.protobuf import json_format
4851

49-
5052
_LOGGER = base.Logger(__name__)
5153
_METRIC_NAME_TO_METRIC_SPEC = {
5254
# Automatic Metrics.
@@ -317,24 +319,44 @@ def build_request(
317319
tool_parameter_kv_match_input=instance,
318320
)
319321
elif metric_name == constants.Metric.POINTWISE_METRIC:
320-
instance = gapic_eval_service_types.PointwiseMetricInput(
321-
metric_spec=metric_spec,
322-
instance=gapic_eval_service_types.PointwiseMetricInstance(
323-
json_instance=json.dumps(model_based_metric_instance_input),
324-
),
325-
)
322+
if multimodal_utils.is_multimodal_instance(model_based_metric_instance_input):
323+
instance = gapic_eval_service_types.PointwiseMetricInput(
324+
metric_spec=metric_spec,
325+
instance=gapic_eval_service_types.PointwiseMetricInstance(
326+
content_map_instance=multimodal_utils.convert_multimodal_response_to_content_map(
327+
model_based_metric_instance_input
328+
),
329+
),
330+
)
331+
else:
332+
instance = gapic_eval_service_types.PointwiseMetricInput(
333+
metric_spec=metric_spec,
334+
instance=gapic_eval_service_types.PointwiseMetricInstance(
335+
json_instance=json.dumps(model_based_metric_instance_input),
336+
),
337+
)
326338
return gapic_eval_service_types.EvaluateInstancesRequest(
327339
location=location_path,
328340
pointwise_metric_input=instance,
329341
autorater_config=evaluation_run_config.autorater_config,
330342
)
331343
elif metric_name == constants.Metric.PAIRWISE_METRIC:
332-
instance = gapic_eval_service_types.PairwiseMetricInput(
333-
metric_spec=metric_spec,
334-
instance=gapic_eval_service_types.PairwiseMetricInstance(
335-
json_instance=json.dumps(model_based_metric_instance_input),
336-
),
337-
)
344+
if multimodal_utils.is_multimodal_instance(model_based_metric_instance_input):
345+
instance = gapic_eval_service_types.PairwiseMetricInput(
346+
metric_spec=metric_spec,
347+
instance=gapic_eval_service_types.PairwiseMetricInstance(
348+
content_map_instance=multimodal_utils.convert_multimodal_response_to_content_map(
349+
model_based_metric_instance_input
350+
),
351+
),
352+
)
353+
else:
354+
instance = gapic_eval_service_types.PairwiseMetricInput(
355+
metric_spec=metric_spec,
356+
instance=gapic_eval_service_types.PairwiseMetricInstance(
357+
json_instance=json.dumps(model_based_metric_instance_input),
358+
),
359+
)
338360
return gapic_eval_service_types.EvaluateInstancesRequest(
339361
location=location_path,
340362
pairwise_metric_input=instance,
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
"""Utility functions for multimodal evaluation."""
2+
3+
import logging
4+
from typing import Dict
5+
6+
from google.cloud.aiplatform_v1beta1.types import content
7+
from google.cloud.aiplatform_v1beta1.types import (
8+
evaluation_service as gapic_eval_service_types,
9+
)
10+
from google.protobuf import json_format
11+
12+
13+
ContentMap = gapic_eval_service_types.ContentMap
14+
Content = content.Content
15+
Part = content.Part
16+
_CONTENTS_DETECTOR = "contents {"
17+
_PARTS_DETECTOR = "parts {"
18+
19+
20+
def _string_to_content_list(input_str: str) -> ContentMap.Contents:
21+
"""Converts a string to a list if possible, otherwise returns None."""
22+
try:
23+
return json_format.Parse(
24+
input_str,
25+
ContentMap.Contents.pb(ContentMap.Contents()),
26+
)
27+
except json_format.ParseError as e:
28+
if _CONTENTS_DETECTOR in input_str and _PARTS_DETECTOR in input_str:
29+
logging.warning(
30+
"Failed to parse %s to ContentMap.Contents: %s", input_str, e
31+
)
32+
return None
33+
34+
35+
def _is_multimodal_response(response: str) -> bool:
36+
"""Checks if the model response contains multimodal input."""
37+
content_list = _string_to_content_list(response)
38+
if content_list is None:
39+
if _CONTENTS_DETECTOR in response and _PARTS_DETECTOR in response:
40+
logging.warning(
41+
"Response contains multimodal input: %s. Please check whether"
42+
" the response format conforms to ContentMap type.",
43+
response,
44+
)
45+
return False
46+
else:
47+
return True
48+
49+
50+
def is_multimodal_instance(
51+
model_based_metric_instance_input: Dict[str, str],
52+
) -> bool:
53+
"""Checks if the evaluation instance contains multimodal input."""
54+
for placeholder in model_based_metric_instance_input:
55+
if _is_multimodal_response(model_based_metric_instance_input[placeholder]):
56+
return True
57+
return False
58+
59+
60+
def convert_multimodal_response_to_content_map(
61+
model_based_metric_instance_input: Dict[str, str],
62+
) -> ContentMap:
63+
"""Converts a multimodal model response to a ContentMap."""
64+
content_map = ContentMap()
65+
for placeholder in model_based_metric_instance_input.keys():
66+
content_list = _string_to_content_list(
67+
model_based_metric_instance_input[placeholder]
68+
)
69+
if content_list is None:
70+
content_map.values[placeholder] = ContentMap.Contents(
71+
contents=[
72+
Content(
73+
parts=[
74+
Part(text=model_based_metric_instance_input[placeholder])
75+
]
76+
)
77+
]
78+
)
79+
else:
80+
content_map.values[placeholder] = content_list
81+
82+
return content_map

0 commit comments

Comments
 (0)