Skip to content

Commit 381d1ff

Browse files
Defined2014Benjamin2037
authored andcommitted
expression: let cast function supports explicit set charset (pingcap#55724)
close pingcap#55677
1 parent 468d429 commit 381d1ff

File tree

13 files changed

+138
-17
lines changed

13 files changed

+138
-17
lines changed

pkg/expression/bench_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1449,7 +1449,7 @@ func genVecBuiltinFuncBenchCase(ctx BuildContext, funcName string, testCase vecE
14491449
case types.ETJson:
14501450
fc = &castAsJSONFunctionClass{baseFunctionClass{ast.Cast, 1, 1}, tp}
14511451
case types.ETString:
1452-
fc = &castAsStringFunctionClass{baseFunctionClass{ast.Cast, 1, 1}, tp}
1452+
fc = &castAsStringFunctionClass{baseFunctionClass{ast.Cast, 1, 1}, tp, false}
14531453
}
14541454
baseFunc, err = fc.getFunction(ctx, cols)
14551455
} else if funcName == ast.GetVar {

pkg/expression/builtin.go

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -464,6 +464,35 @@ func newBaseBuiltinCastFunc(builtinFunc baseBuiltinFunc, inUnion bool) baseBuilt
464464
}
465465
}
466466

467+
func newBaseBuiltinCastFunc4String(ctx BuildContext, funcName string, args []Expression, tp *types.FieldType, isExplicitCharset bool) (baseBuiltinFunc, error) {
468+
var bf baseBuiltinFunc
469+
var err error
470+
if isExplicitCharset {
471+
bf = baseBuiltinFunc{
472+
bufAllocator: newLocalColumnPool(),
473+
childrenVectorizedOnce: new(sync.Once),
474+
475+
args: args,
476+
tp: tp,
477+
}
478+
bf.SetCharsetAndCollation(tp.GetCharset(), tp.GetCollate())
479+
bf.setCollator(collate.GetCollator(tp.GetCollate()))
480+
bf.SetCoercibility(CoercibilityExplicit)
481+
bf.SetExplicitCharset(true)
482+
if tp.GetCharset() == charset.CharsetASCII {
483+
bf.SetRepertoire(ASCII)
484+
} else {
485+
bf.SetRepertoire(UNICODE)
486+
}
487+
} else {
488+
bf, err = newBaseBuiltinFunc(ctx, funcName, args, tp)
489+
if err != nil {
490+
return baseBuiltinFunc{}, err
491+
}
492+
}
493+
return bf, nil
494+
}
495+
467496
// vecBuiltinFunc contains all vectorized methods for a builtin function.
468497
type vecBuiltinFunc interface {
469498
// vectorized returns if this builtin function itself supports vectorized evaluation.

pkg/expression/builtin_cast.go

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -288,14 +288,15 @@ func (c *castAsDecimalFunctionClass) getFunction(ctx BuildContext, args []Expres
288288
type castAsStringFunctionClass struct {
289289
baseFunctionClass
290290

291-
tp *types.FieldType
291+
tp *types.FieldType
292+
isExplicitCharset bool
292293
}
293294

294295
func (c *castAsStringFunctionClass) getFunction(ctx BuildContext, args []Expression) (sig builtinFunc, err error) {
295296
if err := c.verifyArgs(args); err != nil {
296297
return nil, err
297298
}
298-
bf, err := newBaseBuiltinFunc(ctx, c.funcName, args, c.tp)
299+
bf, err := newBaseBuiltinCastFunc4String(ctx, c.funcName, args, c.tp, c.isExplicitCharset)
299300
if err != nil {
300301
return nil, err
301302
}
@@ -2265,7 +2266,7 @@ func CanImplicitEvalReal(expr Expression) bool {
22652266
// BuildCastFunction4Union build a implicitly CAST ScalarFunction from the Union
22662267
// Expression.
22672268
func BuildCastFunction4Union(ctx BuildContext, expr Expression, tp *types.FieldType) (res Expression) {
2268-
res, err := BuildCastFunctionWithCheck(ctx, expr, tp, true)
2269+
res, err := BuildCastFunctionWithCheck(ctx, expr, tp, true, false)
22692270
terror.Log(err)
22702271
return
22712272
}
@@ -2302,13 +2303,13 @@ func BuildCastCollationFunction(ctx BuildContext, expr Expression, ec *ExprColla
23022303

23032304
// BuildCastFunction builds a CAST ScalarFunction from the Expression.
23042305
func BuildCastFunction(ctx BuildContext, expr Expression, tp *types.FieldType) (res Expression) {
2305-
res, err := BuildCastFunctionWithCheck(ctx, expr, tp, false)
2306+
res, err := BuildCastFunctionWithCheck(ctx, expr, tp, false, false)
23062307
terror.Log(err)
23072308
return
23082309
}
23092310

23102311
// BuildCastFunctionWithCheck builds a CAST ScalarFunction from the Expression and return error if any.
2311-
func BuildCastFunctionWithCheck(ctx BuildContext, expr Expression, tp *types.FieldType, inUnion bool) (res Expression, err error) {
2312+
func BuildCastFunctionWithCheck(ctx BuildContext, expr Expression, tp *types.FieldType, inUnion bool, isExplicitCharset bool) (res Expression, err error) {
23122313
argType := expr.GetType(ctx.GetEvalCtx())
23132314
// If source argument's nullable, then target type should be nullable
23142315
if !mysql.HasNotNullFlag(argType.GetFlag()) {
@@ -2336,7 +2337,7 @@ func BuildCastFunctionWithCheck(ctx BuildContext, expr Expression, tp *types.Fie
23362337
case types.ETVectorFloat32:
23372338
fc = &castAsVectorFloat32FunctionClass{baseFunctionClass{ast.Cast, 1, 1}, tp}
23382339
case types.ETString:
2339-
fc = &castAsStringFunctionClass{baseFunctionClass{ast.Cast, 1, 1}, tp}
2340+
fc = &castAsStringFunctionClass{baseFunctionClass{ast.Cast, 1, 1}, tp, isExplicitCharset}
23402341
if expr.GetType(ctx.GetEvalCtx()).GetType() == mysql.TypeBit {
23412342
tp.SetFlen((expr.GetType(ctx.GetEvalCtx()).GetFlen() + 7) / 8)
23422343
}

pkg/expression/builtin_cast_test.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -655,7 +655,7 @@ func TestCastFuncSig(t *testing.T) {
655655
tp := types.NewFieldType(mysql.TypeVarString)
656656
tp.SetCharset(charset.CharsetBin)
657657
args := []Expression{c.before}
658-
stringFunc, err := newBaseBuiltinFunc(ctx, "", args, tp)
658+
stringFunc, err := newBaseBuiltinCastFunc4String(ctx, "", args, tp, false)
659659
require.NoError(t, err)
660660
switch i {
661661
case 0:
@@ -742,7 +742,7 @@ func TestCastFuncSig(t *testing.T) {
742742
tp := types.NewFieldType(mysql.TypeVarString)
743743
tp.SetFlen(c.flen)
744744
tp.SetCharset(charset.CharsetBin)
745-
stringFunc, err := newBaseBuiltinFunc(ctx, "", args, tp)
745+
stringFunc, err := newBaseBuiltinCastFunc4String(ctx, "", args, tp, false)
746746
require.NoError(t, err)
747747
switch i {
748748
case 0:
@@ -1099,7 +1099,7 @@ func TestCastFuncSig(t *testing.T) {
10991099
// null case
11001100
args := []Expression{&Column{RetType: types.NewFieldType(mysql.TypeDouble), Index: 0}}
11011101
row := chunk.MutRowFromDatums([]types.Datum{types.NewDatum(nil)})
1102-
bf, err := newBaseBuiltinFunc(ctx, "", args, types.NewFieldType(mysql.TypeVarString))
1102+
bf, err := newBaseBuiltinCastFunc4String(ctx, "", args, types.NewFieldType(mysql.TypeVarString), false)
11031103
require.NoError(t, err)
11041104
sig = &builtinCastRealAsStringSig{bf}
11051105
sRes, err := evalBuiltinFunc(sig, ctx, row.ToRow())
@@ -1694,7 +1694,7 @@ func TestCastArrayFunc(t *testing.T) {
16941694
},
16951695
}
16961696
for _, tt := range tbl {
1697-
f, err := BuildCastFunctionWithCheck(ctx, datumsToConstants(types.MakeDatums(types.CreateBinaryJSON(tt.input)))[0], tt.tp, false)
1697+
f, err := BuildCastFunctionWithCheck(ctx, datumsToConstants(types.MakeDatums(types.CreateBinaryJSON(tt.input)))[0], tt.tp, false, false)
16981698
if !tt.buildFuncSuccess {
16991699
require.Error(t, err, tt.input)
17001700
continue

pkg/expression/collation.go

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@ type collationInfo struct {
4646

4747
charset string
4848
collation string
49+
50+
isExplicitCharset bool
4951
}
5052

5153
// Hash64 implements the base.Hasher.<0th> interface.
@@ -55,6 +57,7 @@ func (c *collationInfo) Hash64(h base.Hasher) {
5557
h.HashInt(int(c.repertoire))
5658
h.HashString(c.charset)
5759
h.HashString(c.collation)
60+
h.HashBool(c.isExplicitCharset)
5861
}
5962

6063
// Equals implements the base.Hasher.<1th> interface.
@@ -76,7 +79,8 @@ func (c *collationInfo) Equals(other any) bool {
7679
c.coerInit.Load() == c2.coerInit.Load() &&
7780
c.repertoire == c2.repertoire &&
7881
c.charset == c2.charset &&
79-
c.collation == c2.collation
82+
c.collation == c2.collation &&
83+
c.isExplicitCharset == c2.isExplicitCharset
8084
}
8185

8286
func (c *collationInfo) HasCoercibility() bool {
@@ -109,6 +113,14 @@ func (c *collationInfo) CharsetAndCollation() (string, string) {
109113
return c.charset, c.collation
110114
}
111115

116+
func (c *collationInfo) IsExplicitCharset() bool {
117+
return c.isExplicitCharset
118+
}
119+
120+
func (c *collationInfo) SetExplicitCharset(explicit bool) {
121+
c.isExplicitCharset = explicit
122+
}
123+
112124
// CollationInfo contains all interfaces about dealing with collation.
113125
type CollationInfo interface {
114126
// HasCoercibility returns if the Coercibility value is initialized.
@@ -131,6 +143,12 @@ type CollationInfo interface {
131143

132144
// SetCharsetAndCollation sets charset and collation.
133145
SetCharsetAndCollation(chs, coll string)
146+
147+
// IsExplicitCharset return the charset is explicit set or not.
148+
IsExplicitCharset() bool
149+
150+
// SetExplicitCharset set the charset is explicit or not.
151+
SetExplicitCharset(bool)
134152
}
135153

136154
// Coercibility values are used to check whether the collation of one item can be coerced to
@@ -279,9 +297,8 @@ func deriveCollation(ctx BuildContext, funcName string, args []Expression, retTy
279297
case ast.Cast:
280298
// We assume all the cast are implicit.
281299
ec = &ExprCollation{args[0].Coercibility(), args[0].Repertoire(), args[0].GetType(ctx.GetEvalCtx()).GetCharset(), args[0].GetType(ctx.GetEvalCtx()).GetCollate()}
282-
// Non-string type cast to string type should use @@character_set_connection and @@collation_connection.
283-
// String type cast to string type should keep its original charset and collation. It should not happen.
284-
if retType == types.ETString && argTps[0] != types.ETString {
300+
// Cast to string type should use @@character_set_connection and @@collation_connection.
301+
if retType == types.ETString {
285302
ec.Charset, ec.Collation = ctx.GetCharsetInfo()
286303
}
287304
return ec, nil

pkg/expression/scalar_function.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -893,6 +893,16 @@ func (sf *ScalarFunction) SetRepertoire(r Repertoire) {
893893
sf.Function.SetRepertoire(r)
894894
}
895895

896+
// IsExplicitCharset return the charset is explicit set or not.
897+
func (sf *ScalarFunction) IsExplicitCharset() bool {
898+
return sf.Function.IsExplicitCharset()
899+
}
900+
901+
// SetExplicitCharset set the charset is explicit or not.
902+
func (sf *ScalarFunction) SetExplicitCharset(explicit bool) {
903+
sf.Function.SetExplicitCharset(explicit)
904+
}
905+
896906
const emptyScalarFunctionSize = int64(unsafe.Sizeof(ScalarFunction{}))
897907

898908
// MemoryUsage return the memory usage of ScalarFunction

pkg/expression/util.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -454,8 +454,10 @@ func ColumnSubstituteImpl(ctx BuildContext, expr Expression, schema *Schema, new
454454
if substituted {
455455
flag := v.RetType.GetFlag()
456456
var e Expression
457+
var err error
457458
if v.FuncName.L == ast.Cast {
458-
e = BuildCastFunction(ctx, newArg, v.RetType)
459+
e, err = BuildCastFunctionWithCheck(ctx, newArg, v.RetType, false, v.Function.IsExplicitCharset())
460+
terror.Log(err)
459461
} else {
460462
// for grouping function recreation, use clone (meta included) instead of newFunction
461463
e = v.Clone()

pkg/expression/util_test.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -650,6 +650,8 @@ func (m *MockExpr) Coercibility() Coercibility { return
650650
func (m *MockExpr) SetCoercibility(Coercibility) {}
651651
func (m *MockExpr) Repertoire() Repertoire { return UNICODE }
652652
func (m *MockExpr) SetRepertoire(Repertoire) {}
653+
func (m *MockExpr) IsExplicitCharset() bool { return false }
654+
func (m *MockExpr) SetExplicitCharset(bool) {}
653655

654656
func (m *MockExpr) CharsetAndCollation() (string, string) {
655657
return "", ""

pkg/planner/core/expression_rewriter.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1507,7 +1507,7 @@ func (er *expressionRewriter) Leave(originInNode ast.Node) (retNode ast.Node, ok
15071507
return retNode, false
15081508
}
15091509

1510-
castFunction, err := expression.BuildCastFunctionWithCheck(er.sctx, arg, v.Tp, false)
1510+
castFunction, err := expression.BuildCastFunctionWithCheck(er.sctx, arg, v.Tp, false, v.ExplicitCharSet)
15111511
if err != nil {
15121512
er.err = err
15131513
return retNode, false

tests/integrationtest/r/executor/executor.result

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4379,4 +4379,5 @@ LOCK TABLE executor__executor.t WRITE, test2.t2 WRITE;
43794379
LOCK TABLE executor__executor.t WRITE, test2.t2 WRITE;
43804380
Error 8020 (HY000): Table 't' was locked in WRITE by server: <server> session: <session>
43814381
unlock tables;
4382+
unlock tables;
43824383
drop user 'testuser'@'localhost';

0 commit comments

Comments
 (0)