@@ -945,15 +945,12 @@ bool DAGStorageInterpreter::checkRetriableForBatchCopOrMPP(
945
945
const TableID & table_id,
946
946
const SelectQueryInfo & query_info,
947
947
const RegionException & e,
948
- int num_allow_retry)
948
+ const Int32 num_allow_retry)
949
949
{
950
950
const DAGContext & dag_context = *context.getDAGContext ();
951
951
assert ((dag_context.isBatchCop () || dag_context.isMPPTask ()));
952
952
const auto & dag_regions = dag_context.getTableRegionsInfoByTableID (table_id).local_regions ;
953
953
FmtBuffer buffer;
954
- // Normally there is only few regions need to retry when super batch is enabled. Retry to read
955
- // from local first. However, too many retry in different places may make the whole process
956
- // time out of control. We limit the number of retries to 1 now.
957
954
if (likely (num_allow_retry > 0 ))
958
955
{
959
956
auto & regions_query_info = query_info.mvcc_query_info ->regions_query_info ;
@@ -967,16 +964,22 @@ bool DAGStorageInterpreter::checkRetriableForBatchCopOrMPP(
967
964
region_retry_from_local_region.emplace_back (region_iter->second );
968
965
buffer.fmtAppend (" {}," , region_iter->first );
969
966
}
967
+ // remove the unavailable region for next local read attempt
970
968
iter = regions_query_info.erase (iter);
971
969
}
972
970
else
973
971
{
974
972
++iter;
975
973
}
976
974
}
975
+ // `tot_num_remote_region` is the total number of regions that we will retry from other tiflash nodes among all retries
976
+ // `current_retry_regions` is the number of regions that we will retry from other tiflash nodes in this retry
977
977
LOG_WARNING (
978
978
log,
979
- " RegionException after read from storage, regions [{}], message: {}{}" ,
979
+ " RegionException after read from storage, tot_num_remote_region={} cur_retry_regions={}"
980
+ " regions [{}], message: {}{}" ,
981
+ region_retry_from_local_region.size (),
982
+ e.unavailable_region .size (),
980
983
buffer.toString (),
981
984
e.message (),
982
985
(regions_query_info.empty () ? " " : " , retry to read from local" ));
@@ -996,15 +999,44 @@ bool DAGStorageInterpreter::checkRetriableForBatchCopOrMPP(
996
999
buffer.fmtAppend (" {}," , iter->first );
997
1000
}
998
1001
}
1002
+ // `tot_num_remote_region` is the total number of regions that we will retry from other tiflash nodes among all retries
1003
+ // `current_retry_regions` is the number of regions that we will retry from other tiflash nodes in this retry
999
1004
LOG_WARNING (
1000
1005
log,
1001
- " RegionException after read from storage, regions [{}], message: {}" ,
1006
+ " RegionException after read from storage, tot_num_remote_region={} cur_retry_regions={}"
1007
+ " regions [{}], message: {}" ,
1008
+ region_retry_from_local_region.size (),
1009
+ e.unavailable_region .size (),
1002
1010
buffer.toString (),
1003
1011
e.message ());
1004
1012
return false ; // break retry loop
1005
1013
}
1006
1014
}
1007
1015
1016
+ namespace
1017
+ {
1018
+ Int32 getMaxAllowRetryForLocalRead (const SelectQueryInfo & query_info)
1019
+ {
1020
+ size_t region_num = query_info.mvcc_query_info ->regions_query_info .size ();
1021
+ if (region_num > 1000 )
1022
+ {
1023
+ // 1000 regions is about 93GB for 96MB region size / 250GB for 256MB region size.
1024
+ return 10 ;
1025
+ }
1026
+ else if (region_num > 500 )
1027
+ {
1028
+ // 500 regions is about 46.5GB for 96MB region size / 125GB for 256MB region size.
1029
+ return 8 ;
1030
+ }
1031
+ else if (region_num > 100 )
1032
+ {
1033
+ // 100 regions is about 9.3GB for 96MB region size / 25GB for 256MB region size.
1034
+ return 5 ;
1035
+ }
1036
+ return 1 ;
1037
+ }
1038
+ } // namespace
1039
+
1008
1040
DM::Remote::DisaggPhysicalTableReadSnapshotPtr DAGStorageInterpreter::buildLocalStreamsForPhysicalTable (
1009
1041
const TableID & table_id,
1010
1042
const SelectQueryInfo & query_info,
@@ -1021,7 +1053,14 @@ DM::Remote::DisaggPhysicalTableReadSnapshotPtr DAGStorageInterpreter::buildLocal
1021
1053
1022
1054
const DAGContext & dag_context = *context.getDAGContext ();
1023
1055
const auto keyspace_id = dag_context.getKeyspaceID ();
1024
- for (int num_allow_retry = 1 ; num_allow_retry >= 0 ; --num_allow_retry)
1056
+ // Normally there is only few regions need to retry when super batch is enabled. Retry to read
1057
+ // from local first.
1058
+ // When the table is large and too hot for writing, the number of regions may be large
1059
+ // and region split is frequent. In this case, we allow more retries for building
1060
+ // inputstream from local in order to avoid large number of RemoteRead requests.
1061
+ // However, too many retry may make the whole execution time out of control.
1062
+ Int32 num_allow_retry = getMaxAllowRetryForLocalRead (query_info);
1063
+ for (; num_allow_retry >= 0 ; --num_allow_retry)
1025
1064
{
1026
1065
try
1027
1066
{
@@ -1063,7 +1102,7 @@ DM::Remote::DisaggPhysicalTableReadSnapshotPtr DAGStorageInterpreter::buildLocal
1063
1102
// clean all streams from local because we are not sure the correctness of those streams
1064
1103
pipeline.streams .clear ();
1065
1104
if (likely (checkRetriableForBatchCopOrMPP (table_id, query_info, e, num_allow_retry)))
1066
- continue ;
1105
+ continue ; // next retry to read from local storage
1067
1106
else
1068
1107
break ;
1069
1108
}
0 commit comments