Skip to content

Commit 1ec09e6

Browse files
authored
*: migrate to the new parser redact function (pingcap#51590)
close pingcap#51588
1 parent 56e5009 commit 1ec09e6

File tree

2 files changed

+60
-15
lines changed

2 files changed

+60
-15
lines changed

digester.go

Lines changed: 42 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -81,11 +81,17 @@ func DigestNormalized(normalized string) (digest *Digest) {
8181
// Normalize generates the normalized statements.
8282
// it will get normalized form of statement text
8383
// which removes general property of a statement but keeps specific property.
84+
// possible values for 'redact' is "OFF", "ON" or "MARKER". Passing "" is seen as "OFF".
8485
//
85-
// for example: Normalize('select 1 from b where a = 1') => 'select ? from b where a = ?'
86-
func Normalize(sql string) (result string) {
86+
// when "OFF", it is returned as is
87+
// for example, when "ON": Normalize('select 1 from b where a = 1') => 'select ? from b where a = ?'
88+
// for example, when "MARKER": Normalize('select 1 from b where a = 1') => 'select ‹1› from b where a = ‹1›'
89+
func Normalize(sql string, redact string) (result string) {
90+
if redact == "" || redact == "OFF" {
91+
return sql
92+
}
8793
d := digesterPool.Get().(*sqlDigester)
88-
result = d.doNormalize(sql, false)
94+
result = d.doNormalize(sql, redact, false)
8995
digesterPool.Put(d)
9096
return
9197
}
@@ -109,7 +115,7 @@ func NormalizeForBinding(sql string, forPlanReplayerReload bool) (result string)
109115
// for example: Normalize('select /*+ use_index(t, primary) */ 1 from b where a = 1') => 'select /*+ use_index(t, primary) */ ? from b where a = ?'
110116
func NormalizeKeepHint(sql string) (result string) {
111117
d := digesterPool.Get().(*sqlDigester)
112-
result = d.doNormalize(sql, true)
118+
result = d.doNormalize(sql, "ON", true)
113119
digesterPool.Put(d)
114120
return
115121
}
@@ -161,30 +167,30 @@ func (d *sqlDigester) doDigestNormalized(normalized string) (digest *Digest) {
161167
}
162168

163169
func (d *sqlDigester) doDigest(sql string) (digest *Digest) {
164-
d.normalize(sql, false, false, false)
170+
d.normalize(sql, "ON", false, false, false)
165171
d.hasher.Write(d.buffer.Bytes())
166172
d.buffer.Reset()
167173
digest = NewDigest(d.hasher.Sum(nil))
168174
d.hasher.Reset()
169175
return
170176
}
171177

172-
func (d *sqlDigester) doNormalize(sql string, keepHint bool) (result string) {
173-
d.normalize(sql, keepHint, false, false)
178+
func (d *sqlDigester) doNormalize(sql string, redact string, keepHint bool) (result string) {
179+
d.normalize(sql, redact, keepHint, false, false)
174180
result = d.buffer.String()
175181
d.buffer.Reset()
176182
return
177183
}
178184

179185
func (d *sqlDigester) doNormalizeForBinding(sql string, keepHint bool, forPlanReplayerReload bool) (result string) {
180-
d.normalize(sql, keepHint, true, forPlanReplayerReload)
186+
d.normalize(sql, "ON", keepHint, true, forPlanReplayerReload)
181187
result = d.buffer.String()
182188
d.buffer.Reset()
183189
return
184190
}
185191

186192
func (d *sqlDigester) doNormalizeDigest(sql string) (normalized string, digest *Digest) {
187-
d.normalize(sql, false, false, false)
193+
d.normalize(sql, "ON", false, false, false)
188194
normalized = d.buffer.String()
189195
d.hasher.Write(d.buffer.Bytes())
190196
d.buffer.Reset()
@@ -194,7 +200,7 @@ func (d *sqlDigester) doNormalizeDigest(sql string) (normalized string, digest *
194200
}
195201

196202
func (d *sqlDigester) doNormalizeDigestForBinding(sql string) (normalized string, digest *Digest) {
197-
d.normalize(sql, false, true, false)
203+
d.normalize(sql, "ON", false, true, false)
198204
normalized = d.buffer.String()
199205
d.hasher.Write(d.buffer.Bytes())
200206
d.buffer.Reset()
@@ -212,7 +218,7 @@ const (
212218
genericSymbolList = -2
213219
)
214220

215-
func (d *sqlDigester) normalize(sql string, keepHint bool, forBinding bool, forPlanReplayerReload bool) {
221+
func (d *sqlDigester) normalize(sql string, redact string, keepHint bool, forBinding bool, forPlanReplayerReload bool) {
216222
d.lexer.reset(sql)
217223
d.lexer.setKeepHint(keepHint)
218224
for {
@@ -229,7 +235,7 @@ func (d *sqlDigester) normalize(sql string, keepHint bool, forBinding bool, forP
229235
continue
230236
}
231237

232-
d.reduceLit(&currTok, forBinding)
238+
d.reduceLit(&currTok, redact, forBinding, forPlanReplayerReload)
233239
if forPlanReplayerReload {
234240
// Apply for plan replayer to match specific rules, changing IN (...) to IN (?). This can avoid plan replayer load failures caused by parse errors.
235241
d.replaceSingleLiteralWithInList(&currTok)
@@ -313,10 +319,33 @@ func (d *sqlDigester) reduceOptimizerHint(tok *token) (reduced bool) {
313319
return
314320
}
315321

316-
func (d *sqlDigester) reduceLit(currTok *token, forBinding bool) {
322+
func (d *sqlDigester) reduceLit(currTok *token, redact string, forBinding bool, forPlanReplayer bool) {
317323
if !d.isLit(*currTok) {
318324
return
319325
}
326+
327+
if redact == "MARKER" && !forBinding && !forPlanReplayer {
328+
switch currTok.lit {
329+
case "?", "*":
330+
return
331+
}
332+
input := currTok.lit
333+
b := &strings.Builder{}
334+
b.Grow(len(input))
335+
_, _ = b.WriteRune('‹')
336+
for _, c := range input {
337+
if c == '‹' || c == '›' {
338+
_, _ = b.WriteRune(c)
339+
_, _ = b.WriteRune(c)
340+
} else {
341+
_, _ = b.WriteRune(c)
342+
}
343+
}
344+
_, _ = b.WriteRune('›')
345+
currTok.lit = b.String()
346+
return
347+
}
348+
320349
// count(*) => count(?)
321350
if currTok.lit == "*" {
322351
if d.isStarParam() {

digester_test.go

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ func TestNormalize(t *testing.T) {
7070
{"insert into t values (1)", "insert into `t` values ( ? )"},
7171
}
7272
for _, test := range tests_for_generic_normalization_rules {
73-
normalized := parser.Normalize(test.input)
73+
normalized := parser.Normalize(test.input, "ON")
7474
digest := parser.DigestNormalized(normalized)
7575
require.Equal(t, test.expect, normalized)
7676

@@ -102,6 +102,22 @@ func TestNormalize(t *testing.T) {
102102
}
103103
}
104104

105+
func TestNormalizeRedact(t *testing.T) {
106+
cases := []struct {
107+
input string
108+
expect string
109+
}{
110+
{"select * from t where a in (1)", "select * from `t` where `a` in ( ‹1› )"},
111+
{"select * from t where a in (1, 3)", "select * from `t` where `a` in ( ‹1› , ‹3› )"},
112+
{"select ? from b order by 2", "select ? from `b` order by ‹2›"},
113+
}
114+
115+
for _, c := range cases {
116+
normalized := parser.Normalize(c.input, "MARKER")
117+
require.Equal(t, c.expect, normalized)
118+
}
119+
}
120+
105121
func TestNormalizeKeepHint(t *testing.T) {
106122
tests := []struct {
107123
input string
@@ -162,7 +178,7 @@ func TestNormalizeDigest(t *testing.T) {
162178
require.Equal(t, test.normalized, normalized)
163179
require.Equal(t, test.digest, digest.String())
164180

165-
normalized = parser.Normalize(test.sql)
181+
normalized = parser.Normalize(test.sql, "ON")
166182
digest = parser.DigestNormalized(normalized)
167183
require.Equal(t, test.normalized, normalized)
168184
require.Equal(t, test.digest, digest.String())

0 commit comments

Comments
 (0)