|
24 | 24 | from ducktape.utils.util import wait_until
|
25 | 25 | from rptest.services.metrics_check import MetricCheck
|
26 | 26 |
|
27 |
| -NO_SCHEMA_ERRORS = [ |
28 |
| - r'Must have parsed schema when using structured data mode', |
29 |
| - r'Error translating data to binary record' |
30 |
| -] |
31 |
| - |
32 | 27 |
|
33 | 28 | class DatalakeE2ETests(RedpandaTest):
|
34 | 29 | def __init__(self, test_ctx, *args, **kwargs):
|
@@ -190,45 +185,79 @@ def table_deleted():
|
190 | 185 | dl.produce_to_topic(self.topic_name, 1024, count)
|
191 | 186 | dl.wait_for_translation(self.topic_name, msg_count=count)
|
192 | 187 |
|
193 |
| - @cluster(num_nodes=3, log_allow_list=NO_SCHEMA_ERRORS) |
194 |
| - @matrix(cloud_storage_type=supported_storage_types()) |
195 |
| - def test_metrics(self, cloud_storage_type): |
196 | 188 |
|
197 |
| - commit_lag = 'vectorized_cluster_partition_iceberg_offsets_pending_commit' |
198 |
| - translation_lag = 'vectorized_cluster_partition_iceberg_offsets_pending_translation' |
| 189 | +class DatalakeMetricsTest(RedpandaTest): |
| 190 | + |
| 191 | + commit_lag = 'vectorized_cluster_partition_iceberg_offsets_pending_commit' |
| 192 | + translation_lag = 'vectorized_cluster_partition_iceberg_offsets_pending_translation' |
| 193 | + |
| 194 | + def __init__(self, test_ctx, *args, **kwargs): |
| 195 | + super(DatalakeMetricsTest, |
| 196 | + self).__init__(test_ctx, |
| 197 | + num_brokers=3, |
| 198 | + si_settings=SISettings(test_context=test_ctx), |
| 199 | + extra_rp_conf={ |
| 200 | + "iceberg_enabled": "true", |
| 201 | + "iceberg_catalog_commit_interval_ms": "5000", |
| 202 | + "enable_leader_balancer": False |
| 203 | + }, |
| 204 | + schema_registry_config=SchemaRegistryConfig(), |
| 205 | + pandaproxy_config=PandaproxyConfig(), |
| 206 | + *args, |
| 207 | + **kwargs) |
| 208 | + self.test_ctx = test_ctx |
| 209 | + self.topic_name = "test" |
| 210 | + |
| 211 | + def setUp(self): |
| 212 | + pass |
| 213 | + |
| 214 | + def wait_for_lag(self, metric_check: MetricCheck, metric_name: str, |
| 215 | + count: int): |
| 216 | + wait_until( |
| 217 | + lambda: metric_check.evaluate([(metric_name, lambda _, val: val == |
| 218 | + count)]), |
| 219 | + timeout_sec=30, |
| 220 | + backoff_sec=5, |
| 221 | + err_msg=f"Timed out waiting for {metric_name} to reach: {count}") |
| 222 | + |
| 223 | + @cluster(num_nodes=5) |
| 224 | + @matrix(cloud_storage_type=supported_storage_types()) |
| 225 | + def test_lag_metrics(self, cloud_storage_type): |
199 | 226 |
|
200 | 227 | with DatalakeServices(self.test_ctx,
|
201 | 228 | redpanda=self.redpanda,
|
202 | 229 | filesystem_catalog_mode=False,
|
203 | 230 | include_query_engines=[]) as dl:
|
204 | 231 |
|
205 |
| - dl.create_iceberg_enabled_topic( |
206 |
| - self.topic_name, |
207 |
| - partitions=1, |
208 |
| - replicas=1, |
209 |
| - iceberg_mode="value_schema_id_prefix") |
| 232 | + # Stop the catalog to halt the translation flow |
| 233 | + dl.catalog_service.stop() |
| 234 | + |
| 235 | + dl.create_iceberg_enabled_topic(self.topic_name, |
| 236 | + partitions=1, |
| 237 | + replicas=3) |
| 238 | + topic_leader = self.redpanda.partitions(self.topic_name)[0].leader |
210 | 239 | count = randint(12, 21)
|
211 |
| - # Populate schemaless messages in schema-ed mode, this should |
212 |
| - # hold up translation and commits |
213 |
| - dl.produce_to_topic(self.topic_name, 1024, msg_count=count) |
| 240 | + dl.produce_to_topic(self.topic_name, 1, msg_count=count) |
214 | 241 |
|
215 | 242 | m = MetricCheck(self.redpanda.logger,
|
216 | 243 | self.redpanda,
|
217 |
| - self.redpanda.nodes[0], |
218 |
| - [commit_lag, translation_lag], |
| 244 | + topic_leader, [ |
| 245 | + DatalakeMetricsTest.commit_lag, |
| 246 | + DatalakeMetricsTest.translation_lag |
| 247 | + ], |
219 | 248 | labels={
|
220 | 249 | 'namespace': 'kafka',
|
221 | 250 | 'topic': self.topic_name,
|
222 | 251 | 'partition': '0'
|
223 | 252 | },
|
224 | 253 | reduce=sum)
|
225 |
| - expectations = [] |
226 |
| - for metric in [commit_lag, translation_lag]: |
227 |
| - expectations.append([metric, lambda _, val: val == count]) |
228 |
| - |
229 |
| - # Ensure lag metric builds up as expected. |
230 |
| - wait_until( |
231 |
| - lambda: m.evaluate(expectations), |
232 |
| - timeout_sec=30, |
233 |
| - backoff_sec=5, |
234 |
| - err_msg=f"Timed out waiting for metrics to reach: {count}") |
| 254 | + |
| 255 | + # Wait for lag build up |
| 256 | + self.wait_for_lag(m, DatalakeMetricsTest.translation_lag, count) |
| 257 | + self.wait_for_lag(m, DatalakeMetricsTest.commit_lag, count) |
| 258 | + |
| 259 | + # Resume iceberg translation |
| 260 | + dl.catalog_service.start() |
| 261 | + |
| 262 | + self.wait_for_lag(m, DatalakeMetricsTest.translation_lag, 0) |
| 263 | + self.wait_for_lag(m, DatalakeMetricsTest.commit_lag, 0) |
0 commit comments