|
| 1 | +// Copyright 2021 PingCAP, Inc. |
| 2 | +// |
| 3 | +// Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | +// you may not use this file except in compliance with the License. |
| 5 | +// You may obtain a copy of the License at |
| 6 | +// |
| 7 | +// http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | +// |
| 9 | +// Unless required by applicable law or agreed to in writing, software |
| 10 | +// distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | +// See the License for the specific language governing permissions and |
| 13 | +// limitations under the License. |
| 14 | + |
| 15 | +package core |
| 16 | + |
| 17 | +import ( |
| 18 | + "context" |
| 19 | + "time" |
| 20 | + |
| 21 | + "github.com/pingcap/failpoint" |
| 22 | + "github.com/pingcap/tidb/pkg/domain" |
| 23 | + "github.com/pingcap/tidb/pkg/infoschema" |
| 24 | + "github.com/pingcap/tidb/pkg/parser/model" |
| 25 | + "github.com/pingcap/tidb/pkg/sessionctx/variable" |
| 26 | + "github.com/pingcap/tidb/pkg/statistics" |
| 27 | + "github.com/pingcap/tidb/pkg/table" |
| 28 | + "github.com/pingcap/tidb/pkg/util/logutil" |
| 29 | + "go.uber.org/zap" |
| 30 | +) |
| 31 | + |
| 32 | +type collectPredicateColumnsPoint struct{} |
| 33 | + |
| 34 | +func (collectPredicateColumnsPoint) optimize(_ context.Context, plan LogicalPlan, _ *logicalOptimizeOp) (LogicalPlan, bool, error) { |
| 35 | + planChanged := false |
| 36 | + if plan.SCtx().GetSessionVars().InRestrictedSQL { |
| 37 | + return plan, planChanged, nil |
| 38 | + } |
| 39 | + predicateNeeded := variable.EnableColumnTracking.Load() |
| 40 | + syncWait := plan.SCtx().GetSessionVars().StatsLoadSyncWait |
| 41 | + histNeeded := syncWait > 0 |
| 42 | + predicateColumns, histNeededColumns := CollectColumnStatsUsage(plan, predicateNeeded, histNeeded) |
| 43 | + if len(predicateColumns) > 0 { |
| 44 | + plan.SCtx().UpdateColStatsUsage(predicateColumns) |
| 45 | + } |
| 46 | + if !histNeeded { |
| 47 | + return plan, planChanged, nil |
| 48 | + } |
| 49 | + |
| 50 | + // Prepare the table metadata to avoid repeatedly fetching from the infoSchema below. |
| 51 | + is := plan.SCtx().GetInfoSchema().(infoschema.InfoSchema) |
| 52 | + tblID2Tbl := make(map[int64]table.Table) |
| 53 | + for _, neededCol := range histNeededColumns { |
| 54 | + tbl, _ := infoschema.FindTableByTblOrPartID(is, neededCol.TableID) |
| 55 | + if tbl == nil { |
| 56 | + continue |
| 57 | + } |
| 58 | + tblID2Tbl[neededCol.TableID] = tbl |
| 59 | + } |
| 60 | + |
| 61 | + // collect needed virtual columns from already needed columns |
| 62 | + // Note that we use the dependingVirtualCols only to collect needed index stats, but not to trigger stats loading on |
| 63 | + // the virtual columns themselves. It's because virtual columns themselves don't have statistics, while expression |
| 64 | + // indexes, which are indexes on virtual columns, have statistics. We don't waste the resource here now. |
| 65 | + dependingVirtualCols := CollectDependingVirtualCols(tblID2Tbl, histNeededColumns) |
| 66 | + |
| 67 | + histNeededIndices := collectSyncIndices(plan.SCtx(), append(histNeededColumns, dependingVirtualCols...), tblID2Tbl) |
| 68 | + histNeededItems := collectHistNeededItems(histNeededColumns, histNeededIndices) |
| 69 | + if histNeeded && len(histNeededItems) > 0 { |
| 70 | + err := RequestLoadStats(plan.SCtx(), histNeededItems, syncWait) |
| 71 | + return plan, planChanged, err |
| 72 | + } |
| 73 | + return plan, planChanged, nil |
| 74 | +} |
| 75 | + |
| 76 | +func (collectPredicateColumnsPoint) name() string { |
| 77 | + return "collect_predicate_columns_point" |
| 78 | +} |
| 79 | + |
| 80 | +type syncWaitStatsLoadPoint struct{} |
| 81 | + |
| 82 | +func (syncWaitStatsLoadPoint) optimize(_ context.Context, plan LogicalPlan, _ *logicalOptimizeOp) (LogicalPlan, bool, error) { |
| 83 | + planChanged := false |
| 84 | + if plan.SCtx().GetSessionVars().InRestrictedSQL { |
| 85 | + return plan, planChanged, nil |
| 86 | + } |
| 87 | + if plan.SCtx().GetSessionVars().StmtCtx.IsSyncStatsFailed { |
| 88 | + return plan, planChanged, nil |
| 89 | + } |
| 90 | + err := SyncWaitStatsLoad(plan) |
| 91 | + return plan, planChanged, err |
| 92 | +} |
| 93 | + |
| 94 | +func (syncWaitStatsLoadPoint) name() string { |
| 95 | + return "sync_wait_stats_load_point" |
| 96 | +} |
| 97 | + |
| 98 | +// RequestLoadStats send load column/index stats requests to stats handle |
| 99 | +func RequestLoadStats(ctx PlanContext, neededHistItems []model.TableItemID, syncWait int64) error { |
| 100 | + maxExecutionTime := ctx.GetSessionVars().GetMaxExecutionTime() |
| 101 | + if maxExecutionTime > 0 && maxExecutionTime < uint64(syncWait) { |
| 102 | + syncWait = int64(maxExecutionTime) |
| 103 | + } |
| 104 | + failpoint.Inject("assertSyncWaitFailed", func(val failpoint.Value) { |
| 105 | + if val.(bool) { |
| 106 | + if syncWait != 1 { |
| 107 | + panic("syncWait should be 1(ms)") |
| 108 | + } |
| 109 | + } |
| 110 | + }) |
| 111 | + var timeout = time.Duration(syncWait * time.Millisecond.Nanoseconds()) |
| 112 | + stmtCtx := ctx.GetSessionVars().StmtCtx |
| 113 | + err := domain.GetDomain(ctx).StatsHandle().SendLoadRequests(stmtCtx, neededHistItems, timeout) |
| 114 | + if err != nil { |
| 115 | + stmtCtx.IsSyncStatsFailed = true |
| 116 | + if variable.StatsLoadPseudoTimeout.Load() { |
| 117 | + logutil.BgLogger().Warn("RequestLoadStats failed", zap.Error(err)) |
| 118 | + stmtCtx.AppendWarning(err) |
| 119 | + return nil |
| 120 | + } |
| 121 | + logutil.BgLogger().Error("RequestLoadStats failed", zap.Error(err)) |
| 122 | + return err |
| 123 | + } |
| 124 | + return nil |
| 125 | +} |
| 126 | + |
| 127 | +// SyncWaitStatsLoad sync-wait for stats load until timeout |
| 128 | +func SyncWaitStatsLoad(plan LogicalPlan) error { |
| 129 | + stmtCtx := plan.SCtx().GetSessionVars().StmtCtx |
| 130 | + if len(stmtCtx.StatsLoad.NeededItems) <= 0 { |
| 131 | + return nil |
| 132 | + } |
| 133 | + err := domain.GetDomain(plan.SCtx()).StatsHandle().SyncWaitStatsLoad(stmtCtx) |
| 134 | + if err != nil { |
| 135 | + stmtCtx.IsSyncStatsFailed = true |
| 136 | + if variable.StatsLoadPseudoTimeout.Load() { |
| 137 | + logutil.BgLogger().Warn("SyncWaitStatsLoad failed", zap.Error(err)) |
| 138 | + stmtCtx.AppendWarning(err) |
| 139 | + return nil |
| 140 | + } |
| 141 | + logutil.BgLogger().Error("SyncWaitStatsLoad failed", zap.Error(err)) |
| 142 | + return err |
| 143 | + } |
| 144 | + return nil |
| 145 | +} |
| 146 | + |
| 147 | +// CollectDependingVirtualCols collects the virtual columns that depend on the needed columns, and returns them in a new slice. |
| 148 | +// |
| 149 | +// Why do we need this? |
| 150 | +// It's mainly for stats sync loading. |
| 151 | +// Currently, virtual columns themselves don't have statistics. But expression indexes, which are indexes on virtual |
| 152 | +// columns, have statistics. We need to collect needed virtual columns, then needed expression index stats can be |
| 153 | +// collected for sync loading. |
| 154 | +// In normal cases, if a virtual column can be used, which means related statistics may be needed, the corresponding |
| 155 | +// expressions in the query must have already been replaced with the virtual column before here. So we just need to treat |
| 156 | +// them like normal columns in stats sync loading, which means we just extract the Column from the expressions, the |
| 157 | +// virtual columns we want will be there. |
| 158 | +// However, in some cases (the mv index case now), the expressions are not replaced with the virtual columns before here. |
| 159 | +// Instead, we match the expression in the query against the expression behind the virtual columns after here when |
| 160 | +// building the access paths. This means we are unable to known what virtual columns will be needed by just extracting |
| 161 | +// the Column from the expressions here. So we need to manually collect the virtual columns that may be needed. |
| 162 | +// |
| 163 | +// Note 1: As long as a virtual column depends on the needed columns, it will be collected. This could collect some virtual |
| 164 | +// columns that are not actually needed. |
| 165 | +// It's OK because that's how sync loading is expected. Sync loading only needs to ensure all actually needed stats are |
| 166 | +// triggered to be loaded. Other logic of sync loading also works like this. |
| 167 | +// If we want to collect only the virtual columns that are actually needed, we need to make the checking logic here exactly |
| 168 | +// the same as the logic for generating the access paths, which will make the logic here very complicated. |
| 169 | +// |
| 170 | +// Note 2: Only direct dependencies are considered here. |
| 171 | +// If a virtual column depends on another virtual column, and the latter depends on the needed columns, then the former |
| 172 | +// will not be collected. |
| 173 | +// For example: create table t(a int, b int, c int as (a+b), d int as (c+1)); If a is needed, then c will be collected, |
| 174 | +// but d will not be collected. |
| 175 | +// It's because currently it's impossible that statistics related to indirectly depending columns are actually needed. |
| 176 | +// If we need to check indirect dependency some day, we can easily extend the logic here. |
| 177 | +func CollectDependingVirtualCols(tblID2Tbl map[int64]table.Table, neededItems []model.TableItemID) []model.TableItemID { |
| 178 | + generatedCols := make([]model.TableItemID, 0) |
| 179 | + |
| 180 | + // group the neededItems by table id |
| 181 | + tblID2neededColIDs := make(map[int64][]int64, len(tblID2Tbl)) |
| 182 | + for _, item := range neededItems { |
| 183 | + if item.IsIndex { |
| 184 | + continue |
| 185 | + } |
| 186 | + tblID2neededColIDs[item.TableID] = append(tblID2neededColIDs[item.TableID], item.ID) |
| 187 | + } |
| 188 | + |
| 189 | + // process them by table id |
| 190 | + for tblID, colIDs := range tblID2neededColIDs { |
| 191 | + tbl := tblID2Tbl[tblID] |
| 192 | + if tbl == nil { |
| 193 | + continue |
| 194 | + } |
| 195 | + // collect the needed columns on this table into a set for faster lookup |
| 196 | + colNameSet := make(map[string]struct{}, len(colIDs)) |
| 197 | + for _, colID := range colIDs { |
| 198 | + name := tbl.Meta().FindColumnNameByID(colID) |
| 199 | + if name == "" { |
| 200 | + continue |
| 201 | + } |
| 202 | + colNameSet[name] = struct{}{} |
| 203 | + } |
| 204 | + // iterate columns in this table, and collect the virtual columns that depend on the needed columns |
| 205 | + for _, col := range tbl.Cols() { |
| 206 | + // only handles virtual columns |
| 207 | + if !col.IsVirtualGenerated() { |
| 208 | + continue |
| 209 | + } |
| 210 | + // If this column is already needed, then skip it. |
| 211 | + if _, ok := colNameSet[col.Name.L]; ok { |
| 212 | + continue |
| 213 | + } |
| 214 | + // If there exists a needed column that is depended on by this virtual column, |
| 215 | + // then we think this virtual column is needed. |
| 216 | + for depCol := range col.Dependences { |
| 217 | + if _, ok := colNameSet[depCol]; ok { |
| 218 | + generatedCols = append(generatedCols, model.TableItemID{TableID: tblID, ID: col.ID, IsIndex: false}) |
| 219 | + break |
| 220 | + } |
| 221 | + } |
| 222 | + } |
| 223 | + } |
| 224 | + return generatedCols |
| 225 | +} |
| 226 | + |
| 227 | +// collectSyncIndices will collect the indices which includes following conditions: |
| 228 | +// 1. the indices contained the any one of histNeededColumns, eg: histNeededColumns contained A,B columns, and idx_a is |
| 229 | +// composed up by A column, then we thought the idx_a should be collected |
| 230 | +// 2. The stats condition of idx_a can't meet IsFullLoad, which means its stats was evicted previously |
| 231 | +func collectSyncIndices(ctx PlanContext, |
| 232 | + histNeededColumns []model.TableItemID, |
| 233 | + tblID2Tbl map[int64]table.Table, |
| 234 | +) map[model.TableItemID]struct{} { |
| 235 | + histNeededIndices := make(map[model.TableItemID]struct{}) |
| 236 | + stats := domain.GetDomain(ctx).StatsHandle() |
| 237 | + for _, column := range histNeededColumns { |
| 238 | + if column.IsIndex { |
| 239 | + continue |
| 240 | + } |
| 241 | + tbl := tblID2Tbl[column.TableID] |
| 242 | + if tbl == nil { |
| 243 | + continue |
| 244 | + } |
| 245 | + colName := tbl.Meta().FindColumnNameByID(column.ID) |
| 246 | + if colName == "" { |
| 247 | + continue |
| 248 | + } |
| 249 | + for _, idx := range tbl.Indices() { |
| 250 | + if idx.Meta().State != model.StatePublic { |
| 251 | + continue |
| 252 | + } |
| 253 | + idxCol := idx.Meta().FindColumnByName(colName) |
| 254 | + idxID := idx.Meta().ID |
| 255 | + if idxCol != nil { |
| 256 | + tblStats := stats.GetTableStats(tbl.Meta()) |
| 257 | + if tblStats == nil || tblStats.Pseudo { |
| 258 | + continue |
| 259 | + } |
| 260 | + idxStats, ok := tblStats.Indices[idx.Meta().ID] |
| 261 | + if ok && idxStats.IsStatsInitialized() && !idxStats.IsFullLoad() { |
| 262 | + histNeededIndices[model.TableItemID{TableID: column.TableID, ID: idxID, IsIndex: true}] = struct{}{} |
| 263 | + } |
| 264 | + } |
| 265 | + } |
| 266 | + } |
| 267 | + return histNeededIndices |
| 268 | +} |
| 269 | + |
| 270 | +func collectHistNeededItems(histNeededColumns []model.TableItemID, histNeededIndices map[model.TableItemID]struct{}) (histNeededItems []model.TableItemID) { |
| 271 | + for idx := range histNeededIndices { |
| 272 | + histNeededItems = append(histNeededItems, idx) |
| 273 | + } |
| 274 | + histNeededItems = append(histNeededItems, histNeededColumns...) |
| 275 | + return |
| 276 | +} |
| 277 | + |
| 278 | +func recordTableRuntimeStats(sctx PlanContext, tbls map[int64]struct{}) { |
| 279 | + tblStats := sctx.GetSessionVars().StmtCtx.TableStats |
| 280 | + if tblStats == nil { |
| 281 | + tblStats = map[int64]any{} |
| 282 | + } |
| 283 | + for tblID := range tbls { |
| 284 | + tblJSONStats, skip, err := recordSingleTableRuntimeStats(sctx, tblID) |
| 285 | + if err != nil { |
| 286 | + logutil.BgLogger().Warn("record table json stats failed", zap.Int64("tblID", tblID), zap.Error(err)) |
| 287 | + } |
| 288 | + if tblJSONStats == nil && !skip { |
| 289 | + logutil.BgLogger().Warn("record table json stats failed due to empty", zap.Int64("tblID", tblID)) |
| 290 | + } |
| 291 | + tblStats[tblID] = tblJSONStats |
| 292 | + } |
| 293 | + sctx.GetSessionVars().StmtCtx.TableStats = tblStats |
| 294 | +} |
| 295 | + |
| 296 | +func recordSingleTableRuntimeStats(sctx PlanContext, tblID int64) (stats *statistics.Table, skip bool, err error) { |
| 297 | + dom := domain.GetDomain(sctx) |
| 298 | + statsHandle := dom.StatsHandle() |
| 299 | + is := sctx.GetDomainInfoSchema().(infoschema.InfoSchema) |
| 300 | + tbl, ok := is.TableByID(tblID) |
| 301 | + if !ok { |
| 302 | + return nil, false, nil |
| 303 | + } |
| 304 | + tableInfo := tbl.Meta() |
| 305 | + stats = statsHandle.GetTableStats(tableInfo) |
| 306 | + // Skip the warning if the table is a temporary table because the temporary table doesn't have stats. |
| 307 | + skip = tableInfo.TempTableType != model.TempTableNone |
| 308 | + return stats, skip, nil |
| 309 | +} |
0 commit comments