diff --git a/pkg/planner/core/casetest/vectorsearch/BUILD.bazel b/pkg/planner/core/casetest/vectorsearch/BUILD.bazel index 35815a7250a08..ca2f66ab0a98c 100644 --- a/pkg/planner/core/casetest/vectorsearch/BUILD.bazel +++ b/pkg/planner/core/casetest/vectorsearch/BUILD.bazel @@ -9,21 +9,25 @@ go_test( ], data = glob(["testdata/**"]), flaky = True, - shard_count = 4, + shard_count = 5, deps = [ "//pkg/config", "//pkg/domain", "//pkg/domain/infosync", "//pkg/meta/model", "//pkg/parser/model", + "//pkg/planner", "//pkg/planner/core", "//pkg/planner/core/base", + "//pkg/planner/core/resolve", + "//pkg/session", "//pkg/store/mockstore", "//pkg/testkit", "//pkg/testkit/testdata", "//pkg/testkit/testfailpoint", "//pkg/testkit/testmain", "//pkg/testkit/testsetup", + "//pkg/types", "//pkg/util/plancodec", "@com_github_pingcap_tipb//go-tipb", "@com_github_stretchr_testify//require", diff --git a/pkg/planner/core/casetest/vectorsearch/vector_index_test.go b/pkg/planner/core/casetest/vectorsearch/vector_index_test.go index 0329332a12de2..a7b1231e36310 100644 --- a/pkg/planner/core/casetest/vectorsearch/vector_index_test.go +++ b/pkg/planner/core/casetest/vectorsearch/vector_index_test.go @@ -24,12 +24,16 @@ import ( "github.com/pingcap/tidb/pkg/domain/infosync" "github.com/pingcap/tidb/pkg/meta/model" pmodel "github.com/pingcap/tidb/pkg/parser/model" + "github.com/pingcap/tidb/pkg/planner" "github.com/pingcap/tidb/pkg/planner/core" "github.com/pingcap/tidb/pkg/planner/core/base" + "github.com/pingcap/tidb/pkg/planner/core/resolve" + "github.com/pingcap/tidb/pkg/session" "github.com/pingcap/tidb/pkg/store/mockstore" "github.com/pingcap/tidb/pkg/testkit" "github.com/pingcap/tidb/pkg/testkit/testdata" "github.com/pingcap/tidb/pkg/testkit/testfailpoint" + "github.com/pingcap/tidb/pkg/types" "github.com/pingcap/tidb/pkg/util/plancodec" "github.com/pingcap/tipb/go-tipb" "github.com/stretchr/testify/require" @@ -244,3 +248,67 @@ func TestANNInexWithSimpleCBO(t *testing.T) { testkit.SetTiFlashReplica(t, dom, "test", "t1") tk.MustUseIndex("select * from t1 order by vec_cosine_distance(vec, '[1,1,1]') limit 1", "vector_index") } + +func TestANNIndexWithNonIntClusteredPk(t *testing.T) { + store := testkit.CreateMockStoreWithSchemaLease(t, 1*time.Second, mockstore.WithMockTiFlash(2)) + + tk := testkit.NewTestKit(t, store) + + tiflash := infosync.NewMockTiFlash() + infosync.SetMockTiFlash(tiflash) + defer func() { + tiflash.Lock() + tiflash.StatusServer.Close() + tiflash.Unlock() + }() + + testfailpoint.Enable(t, "github.com/pingcap/tidb/pkg/ddl/MockCheckVectorIndexProcess", `return(1)`) + + tk.MustExec("use test") + tk.MustExec("drop table if exists t1") + tk.MustExec(` + create table t1 ( + vec vector(3), + a int, + b int, + c vector(3), + d vector, + primary key (a, b) + ) + `) + tk.MustExec("alter table t1 set tiflash replica 1;") + tk.MustExec("alter table t1 add vector index ((vec_cosine_distance(vec))) USING HNSW;") + tk.MustExec("insert into t1 values ('[1,1,1]', 1, 1, '[1,1,1]', '[1,1,1]')") + dom := domain.GetDomain(tk.Session()) + testkit.SetTiFlashReplica(t, dom, "test", "t1") + sctx := tk.Session() + stmts, err := session.Parse(sctx, "select * from t1 use index(vector_index) order by vec_cosine_distance(vec, '[1,1,1]') limit 1") + require.NoError(t, err) + require.Len(t, stmts, 1) + stmt := stmts[0] + ret := &core.PreprocessorReturn{} + nodeW := resolve.NewNodeW(stmt) + err = core.Preprocess(context.Background(), sctx, nodeW, core.WithPreprocessorReturn(ret)) + require.NoError(t, err) + var finalPlanTree base.Plan + finalPlanTree, _, err = planner.Optimize(context.Background(), sctx, nodeW, ret.InfoSchema) + require.NoError(t, err) + physicalTree, ok := finalPlanTree.(base.PhysicalPlan) + require.True(t, ok) + // Find the PhysicalTableReader node. + tableReader := physicalTree + for ; len(tableReader.Children()) > 0; tableReader = tableReader.Children()[0] { + } + castedTableReader, ok := tableReader.(*core.PhysicalTableReader) + require.True(t, ok) + tableScan, err := castedTableReader.GetTableScan() + require.NoError(t, err) + // Check that it has the extra vector index information. + require.NotNil(t, tableScan.AnnIndexExtra) + require.Len(t, tableScan.Ranges, 1) + // Check that it's full scan. + require.Equal(t, "[-inf,+inf]", tableScan.Ranges[0].String()) + // Check that the -inf and +inf are the correct types. + require.Equal(t, types.KindMinNotNull, tableScan.Ranges[0].LowVal[0].Kind()) + require.Equal(t, types.KindMaxValue, tableScan.Ranges[0].HighVal[0].Kind()) +} diff --git a/pkg/planner/core/find_best_task.go b/pkg/planner/core/find_best_task.go index 32038b7762f62..3601d43a30d81 100644 --- a/pkg/planner/core/find_best_task.go +++ b/pkg/planner/core/find_best_task.go @@ -761,6 +761,19 @@ func compareCandidates(sctx base.PlanContext, prop *property.PhysicalProperty, l } func isMatchProp(ds *logicalop.DataSource, path *util.AccessPath, prop *property.PhysicalProperty) bool { + if prop.VectorProp.VectorHelper != nil && path.Index != nil && path.Index.VectorInfo != nil { + if path.Index == nil || path.Index.VectorInfo == nil { + return false + } + if ds.TableInfo.Columns[path.Index.Columns[0].Offset].ID != prop.VectorProp.Column.ID { + return false + } + + if model.IndexableFnNameToDistanceMetric[prop.VectorProp.DistanceFnName.L] != path.Index.VectorInfo.DistanceMetric { + return false + } + return true + } var isMatchProp bool if path.IsIntHandlePath { pkCol := ds.GetPKIsHandleCol() @@ -808,19 +821,6 @@ func isMatchProp(ds *logicalop.DataSource, path *util.AccessPath, prop *property } } } - if prop.VectorProp.VectorHelper != nil && path.Index.VectorInfo != nil { - if path.Index == nil || path.Index.VectorInfo == nil { - return false - } - if ds.TableInfo.Columns[path.Index.Columns[0].Offset].ID != prop.VectorProp.Column.ID { - return false - } - - if model.IndexableFnNameToDistanceMetric[prop.VectorProp.DistanceFnName.L] != path.Index.VectorInfo.DistanceMetric { - return false - } - return true - } return isMatchProp } diff --git a/pkg/planner/core/planbuilder.go b/pkg/planner/core/planbuilder.go index 9558a0417fd03..2fb0daf4b6b08 100644 --- a/pkg/planner/core/planbuilder.go +++ b/pkg/planner/core/planbuilder.go @@ -1194,15 +1194,19 @@ func getPossibleAccessPaths(ctx base.PlanContext, tableHints *hint.PlanHints, in continue } } - path := &util.AccessPath{Index: index} if index.VectorInfo != nil { // Because the value of `TiFlashReplica.Available` changes as the user modify replica, it is not ideal if the state of index changes accordingly. // So the current way to use the vector indexes is to require the TiFlash Replica to be available. if !tblInfo.TiFlashReplica.Available { continue } + path := genTiFlashPath(tblInfo) path.StoreType = kv.TiFlash + path.Index = index + publicPaths = append(publicPaths, path) + continue } + path := &util.AccessPath{Index: index} publicPaths = append(publicPaths, path) } } diff --git a/pkg/planner/util/path.go b/pkg/planner/util/path.go index 92da155765238..f01f1ac1caa5d 100644 --- a/pkg/planner/util/path.go +++ b/pkg/planner/util/path.go @@ -156,7 +156,7 @@ func (path *AccessPath) IsTiKVTablePath() bool { // IsTiFlashSimpleTablePath returns true if it's a TiFlash path and will not use any special indexes like vector index. func (path *AccessPath) IsTiFlashSimpleTablePath() bool { - return (path.IsIntHandlePath || path.IsCommonHandlePath) && path.StoreType == kv.TiFlash + return path.StoreType == kv.TiFlash && path.Index == nil } // SplitCorColAccessCondFromFilters move the necessary filter in the form of index_col = corrlated_col to access conditions.