Skip to content

Commit 4521592

Browse files
committed
fix the length of multiple types
Signed-off-by: Yang Keao <[email protected]>
1 parent c56481e commit 4521592

File tree

9 files changed

+319
-50
lines changed

9 files changed

+319
-50
lines changed

pkg/expression/builtin_cast.go

Lines changed: 188 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ import (
3737
"github.com/pingcap/tidb/pkg/sessionctx/stmtctx"
3838
"github.com/pingcap/tidb/pkg/types"
3939
"github.com/pingcap/tidb/pkg/util/chunk"
40+
"github.com/pingcap/tidb/pkg/util/intest"
4041
"github.com/pingcap/tipb/go-tipb"
4142
)
4243

@@ -114,6 +115,18 @@ var (
114115
_ builtinFunc = &builtinCastVectorFloat32AsUnsupportedSig{}
115116
)
116117

118+
const (
119+
maxTinyBlobSize = 255
120+
maxBlobSize = 65535
121+
maxMediumBlobSize = 16777215
122+
maxLongBlobSize = 4294967295
123+
// These two are magic numbers to be compatible with MySQL.
124+
// They are `MaxBlobSize * 4` and `MaxMediumBlobSize * 4`, but multiply by 4 (mblen) is not necessary here. However
125+
// a bigger number is always safer to avoid truncation, so they are kept as is.
126+
castBlobFlen = maxBlobSize * 4
127+
castMediumBlobFlen = maxMediumBlobSize * 4
128+
)
129+
117130
type castAsIntFunctionClass struct {
118131
baseFunctionClass
119132

@@ -314,26 +327,11 @@ func (c *castAsStringFunctionClass) getFunction(ctx BuildContext, args []Express
314327
tp.AddFlag(mysql.BinaryFlag)
315328
args[0] = BuildCastFunction(ctx, args[0], tp)
316329
}
317-
argTp := args[0].GetType(ctx.GetEvalCtx()).EvalType()
318-
switch argTp {
330+
argFt := args[0].GetType(ctx.GetEvalCtx())
331+
adjustRetFtForCastString(bf.tp, argFt)
332+
333+
switch argFt.EvalType() {
319334
case types.ETInt:
320-
if bf.tp.GetFlen() == types.UnspecifiedLength {
321-
// check https://github.com/pingcap/tidb/issues/44786
322-
// set flen from integers may truncate integers, e.g. char(1) can not display -1[int(1)]
323-
switch args[0].GetType(ctx.GetEvalCtx()).GetType() {
324-
case mysql.TypeTiny:
325-
bf.tp.SetFlen(4)
326-
case mysql.TypeShort:
327-
bf.tp.SetFlen(6)
328-
case mysql.TypeInt24:
329-
bf.tp.SetFlen(9)
330-
case mysql.TypeLong:
331-
// set it to 11 as mysql
332-
bf.tp.SetFlen(11)
333-
default:
334-
bf.tp.SetFlen(args[0].GetType(ctx.GetEvalCtx()).GetFlen())
335-
}
336-
}
337335
sig = &builtinCastIntAsStringSig{bf}
338336
sig.setPbCode(tipb.ScalarFuncSig_CastIntAsString)
339337
case types.ETReal:
@@ -361,11 +359,156 @@ func (c *castAsStringFunctionClass) getFunction(ctx BuildContext, args []Express
361359
sig = &builtinCastStringAsStringSig{bf}
362360
sig.setPbCode(tipb.ScalarFuncSig_CastStringAsString)
363361
default:
364-
return nil, errors.Errorf("cannot cast from %s to %s", argTp, "String")
362+
return nil, errors.Errorf("cannot cast from %s to %s", argFt.EvalType(), "String")
365363
}
366364
return sig, nil
367365
}
368366

367+
func adjustRetFtForCastString(retFt, argFt *types.FieldType) {
368+
originalFlen := retFt.GetFlen()
369+
370+
// Only estimate the length for variable length string types, because different length for fixed
371+
// length string types will have different behaviors and may cause compatibility issues.
372+
if retFt.GetType() == mysql.TypeString {
373+
return
374+
}
375+
376+
if argFt.GetType() == mysql.TypeNull {
377+
return
378+
}
379+
380+
argTp := argFt.EvalType()
381+
switch argTp {
382+
case types.ETInt:
383+
if originalFlen == types.UnspecifiedLength {
384+
// check https://github.com/pingcap/tidb/issues/44786
385+
// set flen from integers may truncate integers, e.g. char(1) can not display -1[int(1)]
386+
switch argFt.GetType() {
387+
case mysql.TypeTiny:
388+
if mysql.HasUnsignedFlag(argFt.GetFlag()) {
389+
retFt.SetFlen(3)
390+
} else {
391+
retFt.SetFlen(4)
392+
}
393+
case mysql.TypeShort:
394+
if mysql.HasUnsignedFlag(argFt.GetFlag()) {
395+
retFt.SetFlen(5)
396+
} else {
397+
retFt.SetFlen(6)
398+
}
399+
case mysql.TypeInt24:
400+
if mysql.HasUnsignedFlag(argFt.GetFlag()) {
401+
retFt.SetFlen(8)
402+
} else {
403+
retFt.SetFlen(9)
404+
}
405+
case mysql.TypeLong:
406+
if mysql.HasUnsignedFlag(argFt.GetFlag()) {
407+
retFt.SetFlen(10)
408+
} else {
409+
retFt.SetFlen(11)
410+
}
411+
case mysql.TypeLonglong:
412+
// the length of BIGINT is always 20 without considering the unsigned flag, because the
413+
// bigint range from -9223372036854775808 to 9223372036854775807, and unsigned bigint range
414+
// from 0 to 18446744073709551615, they are all 20 characters long.
415+
retFt.SetFlen(20)
416+
case mysql.TypeYear:
417+
retFt.SetFlen(4)
418+
case mysql.TypeBit:
419+
retFt.SetFlen(argFt.GetFlen())
420+
case mysql.TypeEnum:
421+
intest.Assert(false, "cast Enum to String should not set mysql.EnumSetAsIntFlag")
422+
return
423+
case mysql.TypeSet:
424+
intest.Assert(false, "cast Set to String should not set mysql.EnumSetAsIntFlag")
425+
return
426+
default:
427+
intest.Assert(false, "unknown type %d for INT", argFt.GetType())
428+
return
429+
}
430+
}
431+
case types.ETReal:
432+
// MySQL used 12/22 for float/double, it's because MySQL turns float/double into scientific notation
433+
// in some situations. TiDB choose to use 'f' format for all the cases, so TiDB needs much longer length
434+
// for float/double.
435+
//
436+
// The largest float/double value is around `3.40e38`/`1.79e308`, and the smallest positive float/double value
437+
// is around `1.40e-45`/`4.94e-324`. Therefore, we need at least `1 (sign) + 1 (integer) + 1 (dot) + (45 + 39) (decimal) = 87`
438+
// for float and `1 (sign) + 1 (integer) + 1 (dot) + (324 + 43) (decimal) = 370` for double.
439+
//
440+
// Actually, the golang will usually generate a much smaller string. It used ryu algorithm to generate the shortest
441+
// decimal representation. It's not necessary to keep all decimals. Ref:
442+
// - https://github.com/ulfjack/ryu
443+
// - https://dl.acm.org/doi/10.1145/93548.93559
444+
// So maybe 48/327 is enough for float/double, but we still set 87/370 for safety.
445+
if originalFlen == types.UnspecifiedLength {
446+
if argFt.GetType() == mysql.TypeFloat {
447+
retFt.SetFlen(87)
448+
} else if argFt.GetType() == mysql.TypeDouble {
449+
retFt.SetFlen(370)
450+
}
451+
}
452+
case types.ETDecimal:
453+
if originalFlen == types.UnspecifiedLength {
454+
retFt.SetFlen(decimalPrecisionToLength(argFt))
455+
}
456+
case types.ETDatetime, types.ETTimestamp:
457+
if originalFlen == types.UnspecifiedLength {
458+
if argFt.GetType() == mysql.TypeDate {
459+
retFt.SetFlen(mysql.MaxDateWidth)
460+
} else {
461+
retFt.SetFlen(mysql.MaxDatetimeWidthNoFsp)
462+
}
463+
464+
// Theoretically, the decimal of `DATE` will never be greater than 0.
465+
decimal := argFt.GetDecimal()
466+
if decimal > 0 {
467+
// If the type is datetime or timestamp with fractional seconds, we need to set the length to
468+
// accommodate the fractional seconds part.
469+
retFt.SetFlen(retFt.GetFlen() + 1 + decimal)
470+
}
471+
}
472+
case types.ETDuration:
473+
if originalFlen == types.UnspecifiedLength {
474+
retFt.SetFlen(mysql.MaxDurationWidthNoFsp)
475+
decimal := argFt.GetDecimal()
476+
if decimal > 0 {
477+
// If the type is time with fractional seconds, we need to set the length to
478+
// accommodate the fractional seconds part.
479+
retFt.SetFlen(retFt.GetFlen() + 1 + decimal)
480+
}
481+
}
482+
case types.ETJson:
483+
if originalFlen == types.UnspecifiedLength {
484+
retFt.SetFlen(mysql.MaxLongBlobWidth)
485+
retFt.SetType(mysql.TypeLongBlob)
486+
}
487+
case types.ETVectorFloat32:
488+
489+
case types.ETString:
490+
if originalFlen == types.UnspecifiedLength {
491+
switch argFt.GetType() {
492+
case mysql.TypeString, mysql.TypeVarchar, mysql.TypeVarString:
493+
if argFt.GetFlen() > 0 {
494+
retFt.SetFlen(argFt.GetFlen())
495+
}
496+
case mysql.TypeTinyBlob:
497+
retFt.SetFlen(maxTinyBlobSize)
498+
case mysql.TypeBlob:
499+
retFt.SetFlen(castBlobFlen)
500+
case mysql.TypeMediumBlob:
501+
retFt.SetFlen(castMediumBlobFlen)
502+
case mysql.TypeLongBlob:
503+
retFt.SetFlen(maxLongBlobSize)
504+
default:
505+
intest.Assert(false, "unknown type %d for String", argFt.GetType())
506+
return
507+
}
508+
}
509+
}
510+
}
511+
369512
type castAsTimeFunctionClass struct {
370513
baseFunctionClass
371514

@@ -1307,6 +1450,7 @@ func (b *builtinCastRealAsStringSig) evalString(ctx EvalContext, row chunk.Row)
13071450
// If we strconv.FormatFloat the value with 64bits, the result is incorrect!
13081451
bits = 32
13091452
}
1453+
13101454
res, err = types.ProduceStrWithSpecifiedTp(strconv.FormatFloat(val, 'f', -1, bits), b.tp, typeCtx(ctx), false)
13111455
if err != nil {
13121456
return res, false, err
@@ -2834,3 +2978,27 @@ func TryPushCastIntoControlFunctionForHybridType(ctx BuildContext, expr Expressi
28342978
}
28352979
return expr
28362980
}
2981+
2982+
func decimalPrecisionToLength(ft *types.FieldType) int {
2983+
precision := ft.GetFlen()
2984+
scale := ft.GetDecimal()
2985+
unsigned := mysql.HasUnsignedFlag(ft.GetFlag())
2986+
2987+
if precision == types.UnspecifiedLength || scale == types.UnspecifiedLength {
2988+
return types.UnspecifiedLength
2989+
}
2990+
2991+
ret := precision
2992+
if scale > 0 {
2993+
ret++
2994+
}
2995+
2996+
if !unsigned && precision > 0 {
2997+
ret++ // for negative sign
2998+
}
2999+
3000+
if ret == 0 {
3001+
return 1
3002+
}
3003+
return ret
3004+
}

0 commit comments

Comments
 (0)