From 2fd5ce33f9dc86711e29d42e61c61fb74d94f830 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20van=20Eeden?= Date: Sat, 20 Apr 2024 15:10:06 +0200 Subject: [PATCH 01/13] expression: JSON_SCHEMA_VALID() --- go.mod | 2 + go.sum | 5 +++ pkg/expression/builtin.go | 1 + pkg/expression/builtin_json.go | 68 +++++++++++++++++++++++++++++ pkg/parser/ast/functions.go | 1 + pkg/sessionctx/variable/varsutil.go | 1 + 6 files changed, 78 insertions(+) diff --git a/go.mod b/go.mod index 5bd9c8be1a6fc..2729558d5d08f 100644 --- a/go.mod +++ b/go.mod @@ -149,6 +149,8 @@ require ( require ( github.com/cockroachdb/tokenbucket v0.0.0-20230807174530-cc333fc44b06 // indirect github.com/getsentry/sentry-go v0.27.0 // indirect + github.com/qri-io/jsonpointer v0.1.1 // indirect + github.com/qri-io/jsonschema v0.2.1 // indirect ) require ( diff --git a/go.sum b/go.sum index 8c1719a41b5c4..1540daa38600d 100644 --- a/go.sum +++ b/go.sum @@ -683,6 +683,10 @@ github.com/prometheus/procfs v0.13.0 h1:GqzLlQyfsPbaEHaQkO7tbDlriv/4o5Hudv6OXHGK github.com/prometheus/procfs v0.13.0/go.mod h1:cd4PFCR54QLnGKPaKGA6l+cfuNXtht43ZKY6tow0Y1g= github.com/prometheus/prometheus v0.50.1 h1:N2L+DYrxqPh4WZStU+o1p/gQlBaqFbcLBTjlp3vpdXw= github.com/prometheus/prometheus v0.50.1/go.mod h1:FvE8dtQ1Ww63IlyKBn1V4s+zMwF9kHkVNkQBR1pM4CU= +github.com/qri-io/jsonpointer v0.1.1 h1:prVZBZLL6TW5vsSB9fFHFAMBLI4b0ri5vribQlTJiBA= +github.com/qri-io/jsonpointer v0.1.1/go.mod h1:DnJPaYgiKu56EuDp8TU5wFLdZIcAnb/uH9v37ZaMV64= +github.com/qri-io/jsonschema v0.2.1 h1:NNFoKms+kut6ABPf6xiKNM5214jzxAhDBrPHCJ97Wg0= +github.com/qri-io/jsonschema v0.2.1/go.mod h1:g7DPkiOsK1xv6T/Ao5scXRkd+yTFygcANPBaaqW+VrI= github.com/rcrowley/go-metrics v0.0.0-20201227073835-cf1acfcdf475 h1:N/ElC8H3+5XpJzTSTfLsJV/mx9Q9g7kxmchpfZyxgzM= github.com/rcrowley/go-metrics v0.0.0-20201227073835-cf1acfcdf475/go.mod h1:bCqnVzQkZxMG4s8nGwiZ5l3QUCyqpo9Y+/ZMZ9VjZe4= github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE= @@ -708,6 +712,7 @@ github.com/scalalang2/golang-fifo v0.1.5 h1:cl70TQhlMGGpI2DZGcr+7/GFTJOjHMeor0t7 github.com/scalalang2/golang-fifo v0.1.5/go.mod h1:IK3OZBg7iHbVdQVGPDjcW1MWPb6JcWjaS/w0iRBS8gs= github.com/segmentio/asm v1.2.0 h1:9BQrFxC+YOHJlTlHGkTrFWf59nbL3XnCoFLTwDCI7ys= github.com/segmentio/asm v1.2.0/go.mod h1:BqMnlJP91P8d+4ibuonYZw9mfnzI9HfxselHZr5aAcs= +github.com/sergi/go-diff v1.0.0/go.mod h1:0CfEIISq7TuYL3j771MWULgwwjU+GofnZX9QAmXWZgo= github.com/sergi/go-diff v1.1.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM= github.com/shabbyrobe/gocovmerge v0.0.0-20190829150210-3e036491d500 h1:WnNuhiq+FOY3jNj6JXFT+eLN3CQ/oPIsDPRanvwsmbI= github.com/shabbyrobe/gocovmerge v0.0.0-20190829150210-3e036491d500/go.mod h1:+njLrG5wSeoG4Ds61rFgEzKvenR2UHbjMoDHsczxly0= diff --git a/pkg/expression/builtin.go b/pkg/expression/builtin.go index d4f1e2c71f169..fe9fc937ba23d 100644 --- a/pkg/expression/builtin.go +++ b/pkg/expression/builtin.go @@ -902,6 +902,7 @@ var funcs = map[string]functionClass{ ast.JSONMergePreserve: &jsonMergePreserveFunctionClass{baseFunctionClass{ast.JSONMergePreserve, 2, -1}}, ast.JSONPretty: &jsonPrettyFunctionClass{baseFunctionClass{ast.JSONPretty, 1, 1}}, ast.JSONQuote: &jsonQuoteFunctionClass{baseFunctionClass{ast.JSONQuote, 1, 1}}, + ast.JSONSchemaValid: &jsonSchemaValidFunctionClass{baseFunctionClass{ast.JSONSchemaValid, 2, 2}}, ast.JSONSearch: &jsonSearchFunctionClass{baseFunctionClass{ast.JSONSearch, 3, -1}}, ast.JSONStorageFree: &jsonStorageFreeFunctionClass{baseFunctionClass{ast.JSONStorageFree, 1, 1}}, ast.JSONStorageSize: &jsonStorageSizeFunctionClass{baseFunctionClass{ast.JSONStorageSize, 1, 1}}, diff --git a/pkg/expression/builtin_json.go b/pkg/expression/builtin_json.go index 44c752dd31766..e64a29d60385c 100644 --- a/pkg/expression/builtin_json.go +++ b/pkg/expression/builtin_json.go @@ -16,6 +16,8 @@ package expression import ( "bytes" + "context" + "encoding/json" goJSON "encoding/json" "strconv" "strings" @@ -28,6 +30,7 @@ import ( "github.com/pingcap/tidb/pkg/util/chunk" "github.com/pingcap/tidb/pkg/util/hack" "github.com/pingcap/tipb/go-tipb" + "github.com/qri-io/jsonschema" ) var ( @@ -53,6 +56,7 @@ var ( _ functionClass = &jsonMergePreserveFunctionClass{} _ functionClass = &jsonPrettyFunctionClass{} _ functionClass = &jsonQuoteFunctionClass{} + _ functionClass = &jsonSchemaValidFunctionClass{} _ functionClass = &jsonSearchFunctionClass{} _ functionClass = &jsonStorageSizeFunctionClass{} _ functionClass = &jsonDepthFunctionClass{} @@ -77,6 +81,7 @@ var ( _ builtinFunc = &builtinJSONOverlapsSig{} _ builtinFunc = &builtinJSONStorageSizeSig{} _ builtinFunc = &builtinJSONDepthSig{} + _ builtinFunc = &builtinJSONSchemaValidSig{} _ builtinFunc = &builtinJSONSearchSig{} _ builtinFunc = &builtinJSONKeysSig{} _ builtinFunc = &builtinJSONKeys2ArgsSig{} @@ -1796,3 +1801,66 @@ func (b *builtinJSONLengthSig) evalInt(ctx EvalContext, row chunk.Row) (res int6 } return int64(obj.GetElemCount()), false, nil } + +type jsonSchemaValidFunctionClass struct { + baseFunctionClass +} + +func (c *jsonSchemaValidFunctionClass) getFunction(ctx BuildContext, args []Expression) (builtinFunc, error) { + if err := c.verifyArgs(args); err != nil { + return nil, err + } + bf, err := newBaseBuiltinFuncWithTp(ctx, c.funcName, args, types.ETInt, types.ETJson, types.ETJson) + if err != nil { + return nil, err + } + + sig := &builtinJSONSchemaValidSig{bf} + return sig, nil +} + +type builtinJSONSchemaValidSig struct { + baseBuiltinFunc +} + +func (b *builtinJSONSchemaValidSig) Clone() builtinFunc { + newSig := &builtinJSONSchemaValidSig{} + newSig.cloneFrom(&b.baseBuiltinFunc) + return newSig +} + +func (b *builtinJSONSchemaValidSig) evalInt(ctx EvalContext, row chunk.Row) (res int64, isNull bool, err error) { + schema := &jsonschema.Schema{} + + // First argument is the schema + schemaData, isNull, err := b.args[0].EvalJSON(ctx, row) + if err != nil { + return res, false, err + } + dataBin, err := schemaData.MarshalJSON() + if err != nil { + return res, false, err + } + if err := json.Unmarshal(dataBin, schema); err != nil { + return res, false, err + } + + // Second argument is the JSON document + docData, _, err := b.args[1].EvalJSON(ctx, row) + if err != nil { + return res, false, err + } + docDataBin, err := docData.MarshalJSON() + if err != nil { + return res, false, err + } + errs, err := schema.ValidateBytes(context.Background(), docDataBin) + if err != nil { + return res, false, err + } + if len(errs) > 0 { + return res, false, nil + } + res = 1 + return res, false, nil +} diff --git a/pkg/parser/ast/functions.go b/pkg/parser/ast/functions.go index db6625e14135a..b1ba7be0fc81e 100644 --- a/pkg/parser/ast/functions.go +++ b/pkg/parser/ast/functions.go @@ -348,6 +348,7 @@ const ( JSONMergePreserve = "json_merge_preserve" JSONPretty = "json_pretty" JSONQuote = "json_quote" + JSONSchemaValid = "json_schema_valid" JSONSearch = "json_search" JSONStorageFree = "json_storage_free" JSONStorageSize = "json_storage_size" diff --git a/pkg/sessionctx/variable/varsutil.go b/pkg/sessionctx/variable/varsutil.go index 8d3262786bcc3..8d4625e8de4d8 100644 --- a/pkg/sessionctx/variable/varsutil.go +++ b/pkg/sessionctx/variable/varsutil.go @@ -542,6 +542,7 @@ var GAFunction4ExpressionIndex = map[string]struct{}{ ast.JSONMergePreserve: {}, ast.JSONPretty: {}, ast.JSONQuote: {}, + ast.JSONSchemaValid: {}, ast.JSONSearch: {}, ast.JSONStorageSize: {}, ast.JSONDepth: {}, From cf70c8d20b55dcf1981346c7b908e3ce76c63ca0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20van=20Eeden?= Date: Sat, 20 Apr 2024 15:20:36 +0200 Subject: [PATCH 02/13] Add deps to Bazel --- DEPS.bzl | 26 ++++++++++++++++++++++++++ pkg/expression/BUILD.bazel | 1 + 2 files changed, 27 insertions(+) diff --git a/DEPS.bzl b/DEPS.bzl index 9238d8a494955..ab57630a46ca2 100644 --- a/DEPS.bzl +++ b/DEPS.bzl @@ -5932,6 +5932,32 @@ def go_deps(): "https://storage.googleapis.com/pingcapmirror/gomod/github.com/prometheus/prometheus/com_github_prometheus_prometheus-v0.50.1.zip", ], ) + go_repository( + name = "com_github_qri_io_jsonpointer", + build_file_proto_mode = "disable_global", + importpath = "github.com/qri-io/jsonpointer", + sha256 = "6870d4b9fc5ac8efb9226447975fecfb07241133e23c7e661f5aac1a3088f338", + strip_prefix = "github.com/qri-io/jsonpointer@v0.1.1", + urls = [ + "http://bazel-cache.pingcap.net:8080/gomod/github.com/qri-io/jsonpointer/com_github_qri_io_jsonpointer-v0.1.1.zip", + "http://ats.apps.svc/gomod/github.com/qri-io/jsonpointer/com_github_qri_io_jsonpointer-v0.1.1.zip", + "https://cache.hawkingrei.com/gomod/github.com/qri-io/jsonpointer/com_github_qri_io_jsonpointer-v0.1.1.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/github.com/qri-io/jsonpointer/com_github_qri_io_jsonpointer-v0.1.1.zip", + ], + ) + go_repository( + name = "com_github_qri_io_jsonschema", + build_file_proto_mode = "disable_global", + importpath = "github.com/qri-io/jsonschema", + sha256 = "51305cc45fd383b24de94e2eb421ffba8d83679520c18348842c4255025c5940", + strip_prefix = "github.com/qri-io/jsonschema@v0.2.1", + urls = [ + "http://bazel-cache.pingcap.net:8080/gomod/github.com/qri-io/jsonschema/com_github_qri_io_jsonschema-v0.2.1.zip", + "http://ats.apps.svc/gomod/github.com/qri-io/jsonschema/com_github_qri_io_jsonschema-v0.2.1.zip", + "https://cache.hawkingrei.com/gomod/github.com/qri-io/jsonschema/com_github_qri_io_jsonschema-v0.2.1.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/github.com/qri-io/jsonschema/com_github_qri_io_jsonschema-v0.2.1.zip", + ], + ) go_repository( name = "com_github_quasilyte_go_ruleguard", build_file_proto_mode = "disable_global", diff --git a/pkg/expression/BUILD.bazel b/pkg/expression/BUILD.bazel index 03284c4e945fa..2361363275616 100644 --- a/pkg/expression/BUILD.bazel +++ b/pkg/expression/BUILD.bazel @@ -123,6 +123,7 @@ go_library( "@com_github_pingcap_errors//:errors", "@com_github_pingcap_failpoint//:failpoint", "@com_github_pingcap_tipb//go-tipb", + "@com_github_qri_io_jsonschema//:jsonschema", "@com_github_tikv_client_go_v2//oracle", "@org_golang_x_tools//container/intsets", "@org_uber_go_atomic//:atomic", From c967c7aae05d9e867c4a8cc29b1c84efcb84f7c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20van=20Eeden?= Date: Sat, 20 Apr 2024 15:36:30 +0200 Subject: [PATCH 03/13] Fix duplicate import --- pkg/expression/builtin_json.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pkg/expression/builtin_json.go b/pkg/expression/builtin_json.go index e64a29d60385c..e52616935d11e 100644 --- a/pkg/expression/builtin_json.go +++ b/pkg/expression/builtin_json.go @@ -17,7 +17,6 @@ package expression import ( "bytes" "context" - "encoding/json" goJSON "encoding/json" "strconv" "strings" @@ -1841,7 +1840,7 @@ func (b *builtinJSONSchemaValidSig) evalInt(ctx EvalContext, row chunk.Row) (res if err != nil { return res, false, err } - if err := json.Unmarshal(dataBin, schema); err != nil { + if err := goJSON.Unmarshal(dataBin, schema); err != nil { return res, false, err } From 08206dc2e263d393149742da27c21e9541327fa0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20van=20Eeden?= Date: Sat, 20 Apr 2024 15:37:41 +0200 Subject: [PATCH 04/13] Tidy go mods --- go.mod | 4 ++-- go.sum | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/go.mod b/go.mod index 2729558d5d08f..06514aa199053 100644 --- a/go.mod +++ b/go.mod @@ -93,6 +93,7 @@ require ( github.com/prometheus/client_model v0.6.1 github.com/prometheus/common v0.52.2 github.com/prometheus/prometheus v0.50.1 + github.com/qri-io/jsonschema v0.2.1 github.com/robfig/cron/v3 v3.0.1 github.com/sasha-s/go-deadlock v0.3.1 github.com/scalalang2/golang-fifo v0.1.5 @@ -150,7 +151,6 @@ require ( github.com/cockroachdb/tokenbucket v0.0.0-20230807174530-cc333fc44b06 // indirect github.com/getsentry/sentry-go v0.27.0 // indirect github.com/qri-io/jsonpointer v0.1.1 // indirect - github.com/qri-io/jsonschema v0.2.1 // indirect ) require ( @@ -303,7 +303,7 @@ require ( google.golang.org/genproto/googleapis/rpc v0.0.0-20240308144416-29370a3891b7 // indirect google.golang.org/protobuf v1.33.0 // indirect gopkg.in/inf.v0 v0.9.1 // indirect - gopkg.in/natefinch/lumberjack.v2 v2.2.1 // indirect + gopkg.in/natefinch/lumberjack.v2 v2.2.1 gopkg.in/yaml.v3 v3.0.1 // indirect k8s.io/apimachinery v0.28.6 // indirect k8s.io/klog/v2 v2.120.1 // indirect diff --git a/go.sum b/go.sum index 1540daa38600d..a08c9deb4ad86 100644 --- a/go.sum +++ b/go.sum @@ -713,6 +713,7 @@ github.com/scalalang2/golang-fifo v0.1.5/go.mod h1:IK3OZBg7iHbVdQVGPDjcW1MWPb6Jc github.com/segmentio/asm v1.2.0 h1:9BQrFxC+YOHJlTlHGkTrFWf59nbL3XnCoFLTwDCI7ys= github.com/segmentio/asm v1.2.0/go.mod h1:BqMnlJP91P8d+4ibuonYZw9mfnzI9HfxselHZr5aAcs= github.com/sergi/go-diff v1.0.0/go.mod h1:0CfEIISq7TuYL3j771MWULgwwjU+GofnZX9QAmXWZgo= +github.com/sergi/go-diff v1.1.0 h1:we8PVUC3FE2uYfodKH/nBHMSetSfHDR6scGdBi+erh0= github.com/sergi/go-diff v1.1.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM= github.com/shabbyrobe/gocovmerge v0.0.0-20190829150210-3e036491d500 h1:WnNuhiq+FOY3jNj6JXFT+eLN3CQ/oPIsDPRanvwsmbI= github.com/shabbyrobe/gocovmerge v0.0.0-20190829150210-3e036491d500/go.mod h1:+njLrG5wSeoG4Ds61rFgEzKvenR2UHbjMoDHsczxly0= From d4b571b5fcdde3eee397c4f9e2e64e69edd3f8c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20van=20Eeden?= Date: Sat, 20 Apr 2024 16:04:29 +0200 Subject: [PATCH 05/13] Update tests --- tests/integrationtest/r/executor/show.result | 1 + .../integrationtest/r/explain_generate_column_substitute.result | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/integrationtest/r/executor/show.result b/tests/integrationtest/r/executor/show.result index 1864df7315998..2d031feb62ad7 100644 --- a/tests/integrationtest/r/executor/show.result +++ b/tests/integrationtest/r/executor/show.result @@ -726,6 +726,7 @@ json_pretty json_quote json_remove json_replace +json_schema_valid json_search json_set json_storage_free diff --git a/tests/integrationtest/r/explain_generate_column_substitute.result b/tests/integrationtest/r/explain_generate_column_substitute.result index f80b514fad34e..b244be8f8f539 100644 --- a/tests/integrationtest/r/explain_generate_column_substitute.result +++ b/tests/integrationtest/r/explain_generate_column_substitute.result @@ -505,7 +505,7 @@ a b select @@tidb_allow_function_for_expression_index; @@tidb_allow_function_for_expression_index -json_array, json_array_append, json_array_insert, json_contains, json_contains_path, json_depth, json_extract, json_insert, json_keys, json_length, json_merge_patch, json_merge_preserve, json_object, json_pretty, json_quote, json_remove, json_replace, json_search, json_set, json_storage_size, json_type, json_unquote, json_valid, lower, md5, reverse, tidb_shard, upper, vitess_hash +json_array, json_array_append, json_array_insert, json_contains, json_contains_path, json_depth, json_extract, json_insert, json_keys, json_length, json_merge_patch, json_merge_preserve, json_object, json_pretty, json_quote, json_remove, json_replace, json_schema_valid, json_search, json_set, json_storage_size, json_type, json_unquote, json_valid, lower, md5, reverse, tidb_shard, upper, vitess_hash CREATE TABLE `PK_S_MULTI_30_tmp` ( `COL1` double NOT NULL, `COL2` double NOT NULL, From 7102e6e6e3ea798294ee9261d59490375050ce9a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20van=20Eeden?= Date: Sun, 21 Apr 2024 13:16:10 +0200 Subject: [PATCH 06/13] Add tests and handling of NULLs --- pkg/expression/builtin_json.go | 10 ++++++++-- tests/integrationtest/t/expression/json.test | 14 ++++++++++++++ 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/pkg/expression/builtin_json.go b/pkg/expression/builtin_json.go index e52616935d11e..fe0797fdf724a 100644 --- a/pkg/expression/builtin_json.go +++ b/pkg/expression/builtin_json.go @@ -1832,10 +1832,13 @@ func (b *builtinJSONSchemaValidSig) evalInt(ctx EvalContext, row chunk.Row) (res schema := &jsonschema.Schema{} // First argument is the schema - schemaData, isNull, err := b.args[0].EvalJSON(ctx, row) + schemaData, schemaIsNull, err := b.args[0].EvalJSON(ctx, row) if err != nil { return res, false, err } + if schemaIsNull { + return res, true, err + } dataBin, err := schemaData.MarshalJSON() if err != nil { return res, false, err @@ -1845,10 +1848,13 @@ func (b *builtinJSONSchemaValidSig) evalInt(ctx EvalContext, row chunk.Row) (res } // Second argument is the JSON document - docData, _, err := b.args[1].EvalJSON(ctx, row) + docData, docIsNull, err := b.args[1].EvalJSON(ctx, row) if err != nil { return res, false, err } + if docIsNull { + return res, true, err + } docDataBin, err := docData.MarshalJSON() if err != nil { return res, false, err diff --git a/tests/integrationtest/t/expression/json.test b/tests/integrationtest/t/expression/json.test index 714aab87f2edd..fcef59e3bc2f7 100644 --- a/tests/integrationtest/t/expression/json.test +++ b/tests/integrationtest/t/expression/json.test @@ -393,3 +393,17 @@ select 1 from t where cast(vc as json) = '1'; select 1 from t where cast(c as json) = '1'; select 1 from t where cast(BINARY vc as json) = '1'; select 1 from t where cast(BINARY c as json) = '1'; + +# TestJSONSchemaValid +SELECT JSON_SCHEMA_VALID(NULL, NULL); +SELECT JSON_SCHEMA_VALID('{}', NULL); +SELECT JSON_SCHEMA_VALID(NULL, '{}'); +SELECT JSON_SCHEMA_VALID('{"required": ["a","b"]}', '{"a": 5,"b": 6}'); +SELECT JSON_SCHEMA_VALID('{"required": ["a","b"]}', '{"a": 5,"c": 6}'); +SELECT JSON_SCHEMA_VALID('{"type": "object"}', '{}'); +SELECT JSON_SCHEMA_VALID('{"type": "object"}', '"foo"'); +SELECT JSON_SCHEMA_VALID('{"properties": {"a": {"type": "number"}}}', '{}'); +SELECT JSON_SCHEMA_VALID('{"properties": {"a": {"type": "number"}}}', '{"a": "foo"}'); +SELECT JSON_SCHEMA_VALID('{"properties": {"a": {"type": "number"}}}', '{"a": 5}'); +SELECT JSON_SCHEMA_VALID('{"properties": {"a": {"type": "number", "minimum": 5}}}', '{"a": 5}'); +SELECT JSON_SCHEMA_VALID('{"properties": {"a": {"type": "number", "minimum": 5}}}', '{"a": 6}'); From ebd7afb027605ce0ffeae3a1c77678221092e150 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20van=20Eeden?= Date: Sun, 21 Apr 2024 13:21:59 +0200 Subject: [PATCH 07/13] Add result for expression/json test --- .../integrationtest/r/expression/json.result | 36 +++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/tests/integrationtest/r/expression/json.result b/tests/integrationtest/r/expression/json.result index 2f1cfdece1660..676a3581d96e5 100644 --- a/tests/integrationtest/r/expression/json.result +++ b/tests/integrationtest/r/expression/json.result @@ -649,3 +649,39 @@ select 1 from t where cast(BINARY vc as json) = '1'; 1 select 1 from t where cast(BINARY c as json) = '1'; 1 +SELECT JSON_SCHEMA_VALID(NULL, NULL); +JSON_SCHEMA_VALID(NULL, NULL) +NULL +SELECT JSON_SCHEMA_VALID('{}', NULL); +JSON_SCHEMA_VALID('{}', NULL) +NULL +SELECT JSON_SCHEMA_VALID(NULL, '{}'); +JSON_SCHEMA_VALID(NULL, '{}') +NULL +SELECT JSON_SCHEMA_VALID('{"required": ["a","b"]}', '{"a": 5,"b": 6}'); +JSON_SCHEMA_VALID('{"required": ["a","b"]}', '{"a": 5,"b": 6}') +1 +SELECT JSON_SCHEMA_VALID('{"required": ["a","b"]}', '{"a": 5,"c": 6}'); +JSON_SCHEMA_VALID('{"required": ["a","b"]}', '{"a": 5,"c": 6}') +0 +SELECT JSON_SCHEMA_VALID('{"type": "object"}', '{}'); +JSON_SCHEMA_VALID('{"type": "object"}', '{}') +1 +SELECT JSON_SCHEMA_VALID('{"type": "object"}', '"foo"'); +JSON_SCHEMA_VALID('{"type": "object"}', '"foo"') +0 +SELECT JSON_SCHEMA_VALID('{"properties": {"a": {"type": "number"}}}', '{}'); +JSON_SCHEMA_VALID('{"properties": {"a": {"type": "number"}}}', '{}') +1 +SELECT JSON_SCHEMA_VALID('{"properties": {"a": {"type": "number"}}}', '{"a": "foo"}'); +JSON_SCHEMA_VALID('{"properties": {"a": {"type": "number"}}}', '{"a": "foo"}') +0 +SELECT JSON_SCHEMA_VALID('{"properties": {"a": {"type": "number"}}}', '{"a": 5}'); +JSON_SCHEMA_VALID('{"properties": {"a": {"type": "number"}}}', '{"a": 5}') +1 +SELECT JSON_SCHEMA_VALID('{"properties": {"a": {"type": "number", "minimum": 5}}}', '{"a": 5}'); +JSON_SCHEMA_VALID('{"properties": {"a": {"type": "number", "minimum": 5}}}', '{"a": 5}') +1 +SELECT JSON_SCHEMA_VALID('{"properties": {"a": {"type": "number", "minimum": 5}}}', '{"a": 6}'); +JSON_SCHEMA_VALID('{"properties": {"a": {"type": "number", "minimum": 5}}}', '{"a": 6}') +1 From f9f82e7e102f02bf17b4ff91713e78cfb63f8e07 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20van=20Eeden?= Date: Sun, 21 Apr 2024 13:26:54 +0200 Subject: [PATCH 08/13] Add test for JSON_SCHEMA_VALID with pattern --- tests/integrationtest/r/expression/json.result | 6 ++++++ tests/integrationtest/t/expression/json.test | 2 ++ 2 files changed, 8 insertions(+) diff --git a/tests/integrationtest/r/expression/json.result b/tests/integrationtest/r/expression/json.result index 676a3581d96e5..7cef6d82d6ba3 100644 --- a/tests/integrationtest/r/expression/json.result +++ b/tests/integrationtest/r/expression/json.result @@ -685,3 +685,9 @@ JSON_SCHEMA_VALID('{"properties": {"a": {"type": "number", "minimum": 5}}}', '{" SELECT JSON_SCHEMA_VALID('{"properties": {"a": {"type": "number", "minimum": 5}}}', '{"a": 6}'); JSON_SCHEMA_VALID('{"properties": {"a": {"type": "number", "minimum": 5}}}', '{"a": 6}') 1 +SELECT JSON_SCHEMA_VALID('{"properties": {"a": {"pattern": "^a"}}}', '{"a": "abc"}'); +JSON_SCHEMA_VALID('{"properties": {"a": {"pattern": "^a"}}}', '{"a": "abc"}') +1 +SELECT JSON_SCHEMA_VALID('{"properties": {"a": {"pattern": "^a"}}}', '{"a": "cba"}'); +JSON_SCHEMA_VALID('{"properties": {"a": {"pattern": "^a"}}}', '{"a": "cba"}') +0 diff --git a/tests/integrationtest/t/expression/json.test b/tests/integrationtest/t/expression/json.test index fcef59e3bc2f7..eecfcdef0f405 100644 --- a/tests/integrationtest/t/expression/json.test +++ b/tests/integrationtest/t/expression/json.test @@ -407,3 +407,5 @@ SELECT JSON_SCHEMA_VALID('{"properties": {"a": {"type": "number"}}}', '{"a": "fo SELECT JSON_SCHEMA_VALID('{"properties": {"a": {"type": "number"}}}', '{"a": 5}'); SELECT JSON_SCHEMA_VALID('{"properties": {"a": {"type": "number", "minimum": 5}}}', '{"a": 5}'); SELECT JSON_SCHEMA_VALID('{"properties": {"a": {"type": "number", "minimum": 5}}}', '{"a": 6}'); +SELECT JSON_SCHEMA_VALID('{"properties": {"a": {"pattern": "^a"}}}', '{"a": "abc"}'); +SELECT JSON_SCHEMA_VALID('{"properties": {"a": {"pattern": "^a"}}}', '{"a": "cba"}'); From 6d51db8c1461d282c2a725784b05ea1e517f6927 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20van=20Eeden?= Date: Sun, 21 Apr 2024 14:20:28 +0200 Subject: [PATCH 09/13] Mark JSON_SCHEMA_VALID as Boolean Function --- pkg/expression/function_traits.go | 1 + 1 file changed, 1 insertion(+) diff --git a/pkg/expression/function_traits.go b/pkg/expression/function_traits.go index dc9cbca9a48e3..bdd3e9acbbddf 100644 --- a/pkg/expression/function_traits.go +++ b/pkg/expression/function_traits.go @@ -279,6 +279,7 @@ var booleanFunctions = map[string]struct{}{ ast.IsIPv4Compat: {}, ast.IsIPv4Mapped: {}, ast.IsIPv6: {}, + ast.JSONSchemaValid: {}, ast.JSONValid: {}, ast.RegexpLike: {}, } From 5089ea219204b0a18c1940d7a8cded14cc381af0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20van=20Eeden?= Date: Wed, 24 Apr 2024 09:51:44 +0100 Subject: [PATCH 10/13] Updates --- .../reload_expr_pushdown_blacklist.go | 1 + pkg/expression/builtin_json_test.go | 59 +++++++++++++++++++ .../tiflash_selection_late_materialization.go | 2 + 3 files changed, 62 insertions(+) diff --git a/pkg/executor/reload_expr_pushdown_blacklist.go b/pkg/executor/reload_expr_pushdown_blacklist.go index 8cb6f62a07ac5..c751ec3f9623a 100644 --- a/pkg/executor/reload_expr_pushdown_blacklist.go +++ b/pkg/executor/reload_expr_pushdown_blacklist.go @@ -347,6 +347,7 @@ var funcName2Alias = map[string]string{ "json_merge_preserve": ast.JSONMergePreserve, "json_pretty": ast.JSONPretty, "json_quote": ast.JSONQuote, + "json_schema_valid": ast.JSONSchemaValid, "json_search": ast.JSONSearch, "json_storage_size": ast.JSONStorageSize, "json_depth": ast.JSONDepth, diff --git a/pkg/expression/builtin_json_test.go b/pkg/expression/builtin_json_test.go index 60561566730f7..49ea739cb5811 100644 --- a/pkg/expression/builtin_json_test.go +++ b/pkg/expression/builtin_json_test.go @@ -1342,3 +1342,62 @@ func TestJSONMergePatch(t *testing.T) { } } } + +func TestJSONSchemaValid(t *testing.T) { + ctx := createContext(t) + fc := funcs[ast.JSONSchemaValid] + tbl := []struct { + Input any + Expected any + }{ + // nulls + {[]any{nil, `{}`}, nil}, + {[]any{`{}`, nil}, nil}, + {[]any{nil, nil}, nil}, + + // empty + {[]any{`{}`, `{}`}, 1}, + + // required + {[]any{`{"required": ["a","b"]}`, `{"a": 5}`}, 0}, + {[]any{`{"required": ["a","b"]}`, `{"a": 5, "b": 6}`}, 1}, + + // type + {[]any{`{"type": ["string"]}`, `{}`}, 0}, + {[]any{`{"type": ["string"]}`, `"foobar"`}, 1}, + {[]any{`{"type": ["object"]}`, `{}`}, 1}, + {[]any{`{"type": ["object"]}`, `"foobar"`}, 0}, + + // properties, type + {[]any{`{"properties": {"a": {"type": "number"}}}`, `{}`}, 1}, + {[]any{`{"properties": {"a": {"type": "number"}}}`, `{"a": "foobar"}`}, 0}, + {[]any{`{"properties": {"a": {"type": "number"}}}`, `{"a": 5}`}, 1}, + + // properties, minimum + {[]any{`{"properties": {"a": {"type": "number", "minimum": 6}}}`, `{"a": 5}`}, 0}, + + // properties, pattern + {[]any{`{"properties": {"a": {"type": "string", "pattern": "^a"}}}`, `{"a": "abc"}`}, 1}, + {[]any{`{"properties": {"a": {"type": "string", "pattern": "^a"}}}`, `{"a": "cba"}`}, 0}, + } + dtbl := tblToDtbl(tbl) + for _, tt := range dtbl { + f, err := fc.getFunction(ctx, datumsToConstants(tt["Input"])) + require.NoError(t, err) + d, err := evalBuiltinFunc(f, ctx, chunk.Row{}) + require.NoError(t, err) + if tt["Expected"][0].IsNull() { + require.True(t, d.IsNull()) + } else { + testutil.DatumEqual( + t, tt["Expected"][0], d, + fmt.Sprintf("JSON_SCHEMA_VALID(%s,%s) = %d (expected: %d)", + tt["Input"][0].GetString(), + tt["Input"][1].GetString(), + d.GetInt64(), + tt["Expected"][0].GetInt64(), + ), + ) + } + } +} diff --git a/pkg/planner/core/tiflash_selection_late_materialization.go b/pkg/planner/core/tiflash_selection_late_materialization.go index da09afef5dbe8..b48c5ce9b17be 100644 --- a/pkg/planner/core/tiflash_selection_late_materialization.go +++ b/pkg/planner/core/tiflash_selection_late_materialization.go @@ -170,6 +170,8 @@ func withHeavyCostFunctionForTiFlashPrefetch(cond expression.Expression) bool { return true case ast.JSONStorageFree, ast.JSONStorageSize, ast.JSONDepth, ast.JSONKeys, ast.JSONLength, ast.JSONContains, ast.JSONSearch, ast.JSONOverlaps, ast.JSONQuote: return true + case ast.JSONSchemaValid: + return true // some time functions case ast.AddDate, ast.AddTime, ast.ConvertTz, ast.DateLiteral, ast.DateAdd, ast.DateFormat, ast.FromUnixTime, ast.GetFormat, ast.UTCTimestamp: return true From 3c00ec06a3e2e3f31fde484ea17d0ccd672cc953 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20van=20Eeden?= Date: Wed, 8 May 2024 07:13:19 +0200 Subject: [PATCH 11/13] Update based on review --- .../tiflash_selection_late_materialization.go | 35 +++++++++++++++---- 1 file changed, 28 insertions(+), 7 deletions(-) diff --git a/pkg/planner/core/tiflash_selection_late_materialization.go b/pkg/planner/core/tiflash_selection_late_materialization.go index b48c5ce9b17be..f5b1f1e503d9a 100644 --- a/pkg/planner/core/tiflash_selection_late_materialization.go +++ b/pkg/planner/core/tiflash_selection_late_materialization.go @@ -164,13 +164,34 @@ func withHeavyCostFunctionForTiFlashPrefetch(cond expression.Expression) bool { if binop, ok := cond.(*expression.ScalarFunction); ok { switch binop.FuncName.L { // JSON functions - case ast.JSONType, ast.JSONExtract, ast.JSONUnquote, ast.JSONArray, ast.JSONObject, ast.JSONMerge, ast.JSONSet, ast.JSONInsert, ast.JSONReplace, ast.JSONRemove: - return true - case ast.JSONMemberOf, ast.JSONContainsPath, ast.JSONValid, ast.JSONArrayAppend, ast.JSONArrayInsert, ast.JSONMergePatch, ast.JSONMergePreserve, ast.JSONPretty: - return true - case ast.JSONStorageFree, ast.JSONStorageSize, ast.JSONDepth, ast.JSONKeys, ast.JSONLength, ast.JSONContains, ast.JSONSearch, ast.JSONOverlaps, ast.JSONQuote: - return true - case ast.JSONSchemaValid: + case ast.JSONArray, + ast.JSONArrayAppend, + ast.JSONArrayInsert, + ast.JSONContains, + ast.JSONContainsPath, + ast.JSONDepth, + ast.JSONExtract, + ast.JSONInsert, + ast.JSONKeys, + ast.JSONLength, + ast.JSONMemberOf, + ast.JSONMerge, + ast.JSONMergePatch, + ast.JSONMergePreserve, + ast.JSONObject, + ast.JSONOverlaps, + ast.JSONPretty, + ast.JSONQuote, + ast.JSONRemove, + ast.JSONReplace, + ast.JSONSchemaValid, + ast.JSONSearch, + ast.JSONSet, + ast.JSONStorageFree, + ast.JSONStorageSize, + ast.JSONType, + ast.JSONUnquote, + ast.JSONValid: return true // some time functions case ast.AddDate, ast.AddTime, ast.ConvertTz, ast.DateLiteral, ast.DateAdd, ast.DateFormat, ast.FromUnixTime, ast.GetFormat, ast.UTCTimestamp: From c5c2feb06dfe2a6e13d9d7f3d7942bac7cd61901 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20van=20Eeden?= Date: Wed, 8 May 2024 09:28:19 +0200 Subject: [PATCH 12/13] Use builtinFuncCache for static JSON schema --- pkg/expression/builtin_json.go | 35 ++++++++++++++++++++++++++-------- 1 file changed, 27 insertions(+), 8 deletions(-) diff --git a/pkg/expression/builtin_json.go b/pkg/expression/builtin_json.go index 064a69501d543..89070d2278917 100644 --- a/pkg/expression/builtin_json.go +++ b/pkg/expression/builtin_json.go @@ -1814,12 +1814,14 @@ func (c *jsonSchemaValidFunctionClass) getFunction(ctx BuildContext, args []Expr return nil, err } - sig := &builtinJSONSchemaValidSig{bf} + sig := &builtinJSONSchemaValidSig{baseBuiltinFunc: bf} return sig, nil } type builtinJSONSchemaValidSig struct { baseBuiltinFunc + + schemaCache builtinFuncCache[jsonschema.Schema] } func (b *builtinJSONSchemaValidSig) Clone() builtinFunc { @@ -1829,7 +1831,7 @@ func (b *builtinJSONSchemaValidSig) Clone() builtinFunc { } func (b *builtinJSONSchemaValidSig) evalInt(ctx EvalContext, row chunk.Row) (res int64, isNull bool, err error) { - schema := &jsonschema.Schema{} + var schema jsonschema.Schema // First argument is the schema schemaData, schemaIsNull, err := b.args[0].EvalJSON(ctx, row) @@ -1839,12 +1841,29 @@ func (b *builtinJSONSchemaValidSig) evalInt(ctx EvalContext, row chunk.Row) (res if schemaIsNull { return res, true, err } - dataBin, err := schemaData.MarshalJSON() - if err != nil { - return res, false, err - } - if err := goJSON.Unmarshal(dataBin, schema); err != nil { - return res, false, err + + if b.args[0].ConstLevel() >= ConstOnlyInContext { + schema, err = b.schemaCache.getOrInitCache(ctx, func() (jsonschema.Schema, error) { + dataBin, err := schemaData.MarshalJSON() + if err != nil { + return jsonschema.Schema{}, err + } + if err := goJSON.Unmarshal(dataBin, &schema); err != nil { + return jsonschema.Schema{}, err + } + return schema, nil + }) + if err != nil { + return res, false, err + } + } else { + dataBin, err := schemaData.MarshalJSON() + if err != nil { + return res, false, err + } + if err := goJSON.Unmarshal(dataBin, &schema); err != nil { + return res, false, err + } } // Second argument is the JSON document From d87f556fa5cb542416e324d18d2bece60ac7cbd5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20van=20Eeden?= Date: Tue, 28 May 2024 21:08:27 +0200 Subject: [PATCH 13/13] Add TestJSONSchemaValidCache --- pkg/expression/builtin_json.go | 4 +++ pkg/expression/builtin_json_test.go | 39 +++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+) diff --git a/pkg/expression/builtin_json.go b/pkg/expression/builtin_json.go index 89070d2278917..18eaf24005120 100644 --- a/pkg/expression/builtin_json.go +++ b/pkg/expression/builtin_json.go @@ -22,6 +22,7 @@ import ( "strings" "github.com/pingcap/errors" + "github.com/pingcap/failpoint" "github.com/pingcap/tidb/pkg/parser/ast" "github.com/pingcap/tidb/pkg/parser/charset" "github.com/pingcap/tidb/pkg/parser/mysql" @@ -1844,6 +1845,9 @@ func (b *builtinJSONSchemaValidSig) evalInt(ctx EvalContext, row chunk.Row) (res if b.args[0].ConstLevel() >= ConstOnlyInContext { schema, err = b.schemaCache.getOrInitCache(ctx, func() (jsonschema.Schema, error) { + failpoint.Inject("jsonSchemaValidDisableCacheRefresh", func() { + failpoint.Return(jsonschema.Schema{}, errors.New("Cache refresh disabled by failpoint")) + }) dataBin, err := schemaData.MarshalJSON() if err != nil { return jsonschema.Schema{}, err diff --git a/pkg/expression/builtin_json_test.go b/pkg/expression/builtin_json_test.go index 49ea739cb5811..928c2ed28fd80 100644 --- a/pkg/expression/builtin_json_test.go +++ b/pkg/expression/builtin_json_test.go @@ -18,6 +18,7 @@ import ( "fmt" "testing" + "github.com/pingcap/failpoint" "github.com/pingcap/tidb/pkg/parser/ast" "github.com/pingcap/tidb/pkg/parser/mysql" "github.com/pingcap/tidb/pkg/parser/terror" @@ -1401,3 +1402,41 @@ func TestJSONSchemaValid(t *testing.T) { } } } + +// TestJSONSchemaValidCache is to test if the cached schema is used +func TestJSONSchemaValidCache(t *testing.T) { + ctx := createContext(t) + fc := funcs[ast.JSONSchemaValid] + tbl := []struct { + Input any + Expected any + }{ + {[]any{`{}`, `{}`}, 1}, + } + dtbl := tblToDtbl(tbl) + + for _, tt := range dtbl { + // Get the function and eval once, ensuring it is cached + f, err := fc.getFunction(ctx, datumsToConstants(tt["Input"])) + require.NoError(t, err) + _, err = evalBuiltinFunc(f, ctx, chunk.Row{}) + require.NoError(t, err) + + // Disable the cache function + require.NoError(t, failpoint.Enable("github.com/pingcap/tidb/pkg/expression/jsonSchemaValidDisableCacheRefresh", `return(true)`)) + + // This eval should use the cache and not call the function. + _, err = evalBuiltinFunc(f, ctx, chunk.Row{}) + require.NoError(t, err) + + // Now get a new cache by getting the function again. + f, err = fc.getFunction(ctx, datumsToConstants(tt["Input"])) + require.NoError(t, err) + + // Empty cache, we call the function. This should return an error. + _, err = evalBuiltinFunc(f, ctx, chunk.Row{}) + require.Error(t, err) + } + + require.NoError(t, failpoint.Disable("github.com/pingcap/tidb/pkg/expression/jsonSchemaValidDisableCacheRefresh")) +}