Skip to content

Commit 70e5da7

Browse files
authored
executor: handle collate for min/max(enum/set column) (#31819) (#31857)
close #31638
1 parent ca51058 commit 70e5da7

File tree

4 files changed

+414
-6
lines changed

4 files changed

+414
-6
lines changed
Lines changed: 302 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,302 @@
1+
create database collation_agg_func;
2+
use collation_agg_func;
3+
create table t(id int, value varchar(20) charset utf8mb4 collate utf8mb4_general_ci, value1 varchar(20) charset utf8mb4 collate utf8mb4_bin);
4+
insert into t values (1, 'abc', 'abc '),(4, 'Abc', 'abc'),(3,'def', 'def '), (5, 'abc', 'ABC');
5+
desc format='brief' select group_concat(value order by 1) from t;
6+
id estRows task access object operator info
7+
HashAgg 1.00 root funcs:group_concat(collation_agg_func.t.value order by collation_agg_func.t.value separator ",")->Column#5
8+
└─TableReader 10000.00 root data:TableFullScan
9+
└─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo
10+
select group_concat(value order by 1) from t;
11+
group_concat(value order by 1)
12+
Abc,abc,abc,def
13+
desc format='brief' select group_concat(value) from t;
14+
id estRows task access object operator info
15+
HashAgg 1.00 root funcs:group_concat(collation_agg_func.t.value separator ",")->Column#5
16+
└─TableReader 10000.00 root data:TableFullScan
17+
└─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo
18+
select group_concat(value) from t;
19+
group_concat(value)
20+
abc,Abc,def,abc
21+
desc format='brief' select group_concat(value collate utf8mb4_bin) from t;
22+
id estRows task access object operator info
23+
HashAgg 1.00 root funcs:group_concat(Column#6 separator ",")->Column#5
24+
└─Projection 10000.00 root cast(collation_agg_func.t.value, varchar(20) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin)->Column#6
25+
└─TableReader 10000.00 root data:TableFullScan
26+
└─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo
27+
select group_concat(value collate utf8mb4_bin) from t;
28+
group_concat(value collate utf8mb4_bin)
29+
abc,Abc,def,abc
30+
desc format='brief' select group_concat(distinct value order by 1) from t;
31+
id estRows task access object operator info
32+
StreamAgg 1.00 root funcs:group_concat(distinct collation_agg_func.t.value order by collation_agg_func.t.value separator ",")->Column#5
33+
└─TableReader 10000.00 root data:TableFullScan
34+
└─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo
35+
select upper(group_concat(distinct value order by 1)) from t;
36+
upper(group_concat(distinct value order by 1))
37+
ABC,ABC,DEF
38+
desc format='brief' select group_concat(distinct value collate utf8mb4_bin order by 1) from t;
39+
id estRows task access object operator info
40+
StreamAgg 1.00 root funcs:group_concat(distinct Column#6 order by Column#7 separator ",")->Column#5
41+
└─Projection 10000.00 root cast(collation_agg_func.t.value, varchar(20) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin)->Column#6, cast(collation_agg_func.t.value, varchar(20) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin)->Column#7
42+
└─TableReader 10000.00 root data:TableFullScan
43+
└─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo
44+
select upper(group_concat(distinct value collate utf8mb4_bin order by 1)) from t;
45+
upper(group_concat(distinct value collate utf8mb4_bin order by 1))
46+
ABC,ABC,DEF
47+
desc format='brief' select group_concat(distinct value) from t;
48+
id estRows task access object operator info
49+
StreamAgg 1.00 root funcs:group_concat(distinct collation_agg_func.t.value separator ",")->Column#5
50+
└─TableReader 10000.00 root data:TableFullScan
51+
└─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo
52+
select upper(group_concat(distinct value)) from t;
53+
upper(group_concat(distinct value))
54+
ABC,ABC,DEF
55+
desc format='brief' select group_concat(distinct value collate utf8mb4_bin) from t;
56+
id estRows task access object operator info
57+
StreamAgg 1.00 root funcs:group_concat(distinct Column#6 separator ",")->Column#5
58+
└─Projection 10000.00 root cast(collation_agg_func.t.value, varchar(20) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin)->Column#6
59+
└─TableReader 10000.00 root data:TableFullScan
60+
└─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo
61+
select upper(group_concat(distinct value collate utf8mb4_bin)) from t;
62+
upper(group_concat(distinct value collate utf8mb4_bin))
63+
ABC,ABC,DEF
64+
desc format='brief' select count(distinct value) from t;
65+
id estRows task access object operator info
66+
StreamAgg 1.00 root funcs:count(distinct collation_agg_func.t.value)->Column#5
67+
└─TableReader 10000.00 root data:TableFullScan
68+
└─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo
69+
select count(distinct value) from t;
70+
count(distinct value)
71+
3
72+
desc format='brief' select count(distinct value collate utf8mb4_bin) from t;
73+
id estRows task access object operator info
74+
StreamAgg 1.00 root funcs:count(distinct Column#6)->Column#5
75+
└─Projection 10000.00 root cast(collation_agg_func.t.value, varchar(20) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin)->Column#6
76+
└─TableReader 10000.00 root data:TableFullScan
77+
└─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo
78+
select count(distinct value collate utf8mb4_bin) from t;
79+
count(distinct value collate utf8mb4_bin)
80+
3
81+
desc format='brief' select count(distinct value, value1) from t;
82+
id estRows task access object operator info
83+
StreamAgg 1.00 root funcs:count(distinct collation_agg_func.t.value, collation_agg_func.t.value1)->Column#5
84+
└─TableReader 10000.00 root data:TableFullScan
85+
└─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo
86+
select count(distinct value, value1) from t;
87+
count(distinct value, value1)
88+
4
89+
desc format='brief' select count(distinct value collate utf8mb4_bin, value1) from t;
90+
id estRows task access object operator info
91+
StreamAgg 1.00 root funcs:count(distinct Column#6, Column#7)->Column#5
92+
└─Projection 10000.00 root cast(collation_agg_func.t.value, varchar(20) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin)->Column#6, collation_agg_func.t.value1
93+
└─TableReader 10000.00 root data:TableFullScan
94+
└─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo
95+
select count(distinct value collate utf8mb4_bin, value1) from t;
96+
count(distinct value collate utf8mb4_bin, value1)
97+
4
98+
desc format='brief' select approx_count_distinct(value) from t;
99+
id estRows task access object operator info
100+
HashAgg 1.00 root funcs:approx_count_distinct(collation_agg_func.t.value)->Column#5
101+
└─TableReader 10000.00 root data:TableFullScan
102+
└─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo
103+
select approx_count_distinct(value) from t;
104+
approx_count_distinct(value)
105+
3
106+
desc format='brief' select approx_count_distinct(value collate utf8mb4_bin) from t;
107+
id estRows task access object operator info
108+
HashAgg 1.00 root funcs:approx_count_distinct(Column#6)->Column#5
109+
└─Projection 10000.00 root cast(collation_agg_func.t.value, varchar(20) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin)->Column#6
110+
└─TableReader 10000.00 root data:TableFullScan
111+
└─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo
112+
select approx_count_distinct(value collate utf8mb4_bin) from t;
113+
approx_count_distinct(value collate utf8mb4_bin)
114+
3
115+
desc format='brief' select approx_count_distinct(value, value1) from t;
116+
id estRows task access object operator info
117+
HashAgg 1.00 root funcs:approx_count_distinct(collation_agg_func.t.value, collation_agg_func.t.value1)->Column#5
118+
└─TableReader 10000.00 root data:TableFullScan
119+
└─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo
120+
select approx_count_distinct(value, value1) from t;
121+
approx_count_distinct(value, value1)
122+
4
123+
desc format='brief' select approx_count_distinct(value collate utf8mb4_bin, value1) from t;
124+
id estRows task access object operator info
125+
HashAgg 1.00 root funcs:approx_count_distinct(Column#6, Column#7)->Column#5
126+
└─Projection 10000.00 root cast(collation_agg_func.t.value, varchar(20) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin)->Column#6, collation_agg_func.t.value1
127+
└─TableReader 10000.00 root data:TableFullScan
128+
└─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo
129+
select approx_count_distinct(value collate utf8mb4_bin, value1) from t;
130+
approx_count_distinct(value collate utf8mb4_bin, value1)
131+
4
132+
create table tt(a char(10), b enum('a', 'B', 'c'), c set('a', 'B', 'c'), d json) collate utf8mb4_general_ci;
133+
insert into tt values ("a", "a", "a", JSON_OBJECT("a", "a"));
134+
insert into tt values ("A", "A", "A", JSON_OBJECT("A", "A"));
135+
Error 1265: Data truncated for column 'b' at row 1
136+
insert into tt values ("b", "b", "b", JSON_OBJECT("b", "b"));
137+
Error 1265: Data truncated for column 'b' at row 1
138+
insert into tt values ("B", "B", "B", JSON_OBJECT("B", "B"));
139+
insert into tt values ("c", "c", "c", JSON_OBJECT("c", "c"));
140+
insert into tt values ("C", "C", "C", JSON_OBJECT("C", "C"));
141+
Error 1265: Data truncated for column 'b' at row 1
142+
split table tt by (0), (1), (2), (3), (4), (5);
143+
desc format='brief' select min(a) from tt;
144+
id estRows task access object operator info
145+
StreamAgg 1.00 root funcs:min(collation_agg_func.tt.a)->Column#6
146+
└─TopN 1.00 root collation_agg_func.tt.a, offset:0, count:1
147+
└─TableReader 1.00 root data:TopN
148+
└─TopN 1.00 cop[tikv] collation_agg_func.tt.a, offset:0, count:1
149+
└─Selection 9990.00 cop[tikv] not(isnull(collation_agg_func.tt.a))
150+
└─TableFullScan 10000.00 cop[tikv] table:tt keep order:false, stats:pseudo
151+
select min(a) from tt;
152+
min(a)
153+
B
154+
desc format='brief' select min(a collate utf8mb4_bin) from tt;
155+
id estRows task access object operator info
156+
StreamAgg 1.00 root funcs:min(Column#8)->Column#6
157+
└─Projection 1.00 root cast(collation_agg_func.tt.a, char(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin)->Column#8
158+
└─Projection 1.00 root collation_agg_func.tt.a
159+
└─TopN 1.00 root Column#7, offset:0, count:1
160+
└─Projection 1.00 root collation_agg_func.tt.a, cast(collation_agg_func.tt.a, char(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin)->Column#7
161+
└─TableReader 1.00 root data:TopN
162+
└─TopN 1.00 cop[tikv] cast(collation_agg_func.tt.a, char(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin), offset:0, count:1
163+
└─Selection 8000.00 cop[tikv] not(isnull(cast(collation_agg_func.tt.a, char(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin)))
164+
└─TableFullScan 10000.00 cop[tikv] table:tt keep order:false, stats:pseudo
165+
select min(a collate utf8mb4_bin) from tt;
166+
min(a collate utf8mb4_bin)
167+
B
168+
desc format='brief' select max(a) from tt;
169+
id estRows task access object operator info
170+
StreamAgg 1.00 root funcs:max(collation_agg_func.tt.a)->Column#6
171+
└─TopN 1.00 root collation_agg_func.tt.a:desc, offset:0, count:1
172+
└─TableReader 1.00 root data:TopN
173+
└─TopN 1.00 cop[tikv] collation_agg_func.tt.a:desc, offset:0, count:1
174+
└─Selection 9990.00 cop[tikv] not(isnull(collation_agg_func.tt.a))
175+
└─TableFullScan 10000.00 cop[tikv] table:tt keep order:false, stats:pseudo
176+
select max(a) from tt;
177+
max(a)
178+
c
179+
desc format='brief' select max(a collate utf8mb4_bin) from tt;
180+
id estRows task access object operator info
181+
StreamAgg 1.00 root funcs:max(Column#8)->Column#6
182+
└─Projection 1.00 root cast(collation_agg_func.tt.a, char(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin)->Column#8
183+
└─Projection 1.00 root collation_agg_func.tt.a
184+
└─TopN 1.00 root Column#7:desc, offset:0, count:1
185+
└─Projection 1.00 root collation_agg_func.tt.a, cast(collation_agg_func.tt.a, char(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin)->Column#7
186+
└─TableReader 1.00 root data:TopN
187+
└─TopN 1.00 cop[tikv] cast(collation_agg_func.tt.a, char(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin):desc, offset:0, count:1
188+
└─Selection 8000.00 cop[tikv] not(isnull(cast(collation_agg_func.tt.a, char(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin)))
189+
└─TableFullScan 10000.00 cop[tikv] table:tt keep order:false, stats:pseudo
190+
select max(a collate utf8mb4_bin) from tt;
191+
max(a collate utf8mb4_bin)
192+
c
193+
desc format='brief' select min(b) from tt;
194+
id estRows task access object operator info
195+
StreamAgg 1.00 root funcs:min(Column#8)->Column#6
196+
└─TableReader 1.00 root data:StreamAgg
197+
└─StreamAgg 1.00 cop[tikv] funcs:min(collation_agg_func.tt.b)->Column#8
198+
└─TableFullScan 10000.00 cop[tikv] table:tt keep order:false, stats:pseudo
199+
select min(b) from tt;
200+
min(b)
201+
B
202+
desc format='brief' select min(b collate utf8mb4_bin) from tt;
203+
id estRows task access object operator info
204+
StreamAgg 1.00 root funcs:min(Column#8)->Column#6
205+
└─TableReader 1.00 root data:StreamAgg
206+
└─StreamAgg 1.00 cop[tikv] funcs:min(cast(collation_agg_func.tt.b, enum('a','B','c')))->Column#8
207+
└─TableFullScan 10000.00 cop[tikv] table:tt keep order:false, stats:pseudo
208+
desc format='brief' select max(b) from tt;
209+
id estRows task access object operator info
210+
StreamAgg 1.00 root funcs:max(Column#8)->Column#6
211+
└─TableReader 1.00 root data:StreamAgg
212+
└─StreamAgg 1.00 cop[tikv] funcs:max(collation_agg_func.tt.b)->Column#8
213+
└─TableFullScan 10000.00 cop[tikv] table:tt keep order:false, stats:pseudo
214+
select max(b) from tt;
215+
max(b)
216+
c
217+
desc format='brief' select max(b collate utf8mb4_bin) from tt;
218+
id estRows task access object operator info
219+
StreamAgg 1.00 root funcs:max(Column#8)->Column#6
220+
└─TableReader 1.00 root data:StreamAgg
221+
└─StreamAgg 1.00 cop[tikv] funcs:max(cast(collation_agg_func.tt.b, enum('a','B','c')))->Column#8
222+
└─TableFullScan 10000.00 cop[tikv] table:tt keep order:false, stats:pseudo
223+
desc format='brief' select min(c) from tt;
224+
id estRows task access object operator info
225+
HashAgg 1.00 root funcs:min(collation_agg_func.tt.c)->Column#6
226+
└─TableReader 10000.00 root data:TableFullScan
227+
└─TableFullScan 10000.00 cop[tikv] table:tt keep order:false, stats:pseudo
228+
select min(c) from tt;
229+
min(c)
230+
B
231+
desc format='brief' select min(c collate utf8mb4_bin) from tt;
232+
id estRows task access object operator info
233+
HashAgg 1.00 root funcs:min(Column#7)->Column#6
234+
└─Projection 10000.00 root cast(collation_agg_func.tt.c, set('a','B','c'))->Column#7
235+
└─TableReader 10000.00 root data:TableFullScan
236+
└─TableFullScan 10000.00 cop[tikv] table:tt keep order:false, stats:pseudo
237+
desc format='brief' select max(c) from tt;
238+
id estRows task access object operator info
239+
HashAgg 1.00 root funcs:max(collation_agg_func.tt.c)->Column#6
240+
└─TableReader 10000.00 root data:TableFullScan
241+
└─TableFullScan 10000.00 cop[tikv] table:tt keep order:false, stats:pseudo
242+
select max(c) from tt;
243+
max(c)
244+
c
245+
desc format='brief' select max(c collate utf8mb4_bin) from tt;
246+
id estRows task access object operator info
247+
HashAgg 1.00 root funcs:max(Column#7)->Column#6
248+
└─Projection 10000.00 root cast(collation_agg_func.tt.c, set('a','B','c'))->Column#7
249+
└─TableReader 10000.00 root data:TableFullScan
250+
└─TableFullScan 10000.00 cop[tikv] table:tt keep order:false, stats:pseudo
251+
desc format='brief' select min(d) from tt;
252+
id estRows task access object operator info
253+
StreamAgg 1.00 root funcs:min(collation_agg_func.tt.d)->Column#6
254+
└─TopN 1.00 root collation_agg_func.tt.d, offset:0, count:1
255+
└─TableReader 1.00 root data:TopN
256+
└─TopN 1.00 cop[tikv] collation_agg_func.tt.d, offset:0, count:1
257+
└─Selection 8000.00 cop[tikv] not(isnull(cast(collation_agg_func.tt.d, var_string(4294967295))))
258+
└─TableFullScan 10000.00 cop[tikv] table:tt keep order:false, stats:pseudo
259+
select min(d) from tt;
260+
min(d)
261+
{"B": "B"}
262+
desc format='brief' select min(d collate utf8mb4_bin) from tt;
263+
id estRows task access object operator info
264+
StreamAgg 1.00 root funcs:min(Column#8)->Column#6
265+
└─Projection 1.00 root cast(collation_agg_func.tt.d, json BINARY)->Column#8
266+
└─Projection 1.00 root collation_agg_func.tt.d
267+
└─TopN 1.00 root Column#7, offset:0, count:1
268+
└─Projection 1.00 root collation_agg_func.tt.d, cast(collation_agg_func.tt.d, json BINARY)->Column#7
269+
└─TableReader 1.00 root data:TopN
270+
└─TopN 1.00 cop[tikv] cast(collation_agg_func.tt.d, json BINARY), offset:0, count:1
271+
└─Selection 8000.00 cop[tikv] not(isnull(cast(cast(collation_agg_func.tt.d, json BINARY), var_string(4294967295))))
272+
└─TableFullScan 10000.00 cop[tikv] table:tt keep order:false, stats:pseudo
273+
select min(d collate utf8mb4_bin) from tt;
274+
min(d collate utf8mb4_bin)
275+
{"B": "B"}
276+
desc format='brief' select max(d) from tt;
277+
id estRows task access object operator info
278+
StreamAgg 1.00 root funcs:max(collation_agg_func.tt.d)->Column#6
279+
└─TopN 1.00 root collation_agg_func.tt.d:desc, offset:0, count:1
280+
└─TableReader 1.00 root data:TopN
281+
└─TopN 1.00 cop[tikv] collation_agg_func.tt.d:desc, offset:0, count:1
282+
└─Selection 8000.00 cop[tikv] not(isnull(cast(collation_agg_func.tt.d, var_string(4294967295))))
283+
└─TableFullScan 10000.00 cop[tikv] table:tt keep order:false, stats:pseudo
284+
select max(d) from tt;
285+
max(d)
286+
{"c": "c"}
287+
desc format='brief' select max(d collate utf8mb4_bin) from tt;
288+
id estRows task access object operator info
289+
StreamAgg 1.00 root funcs:max(Column#8)->Column#6
290+
└─Projection 1.00 root cast(collation_agg_func.tt.d, json BINARY)->Column#8
291+
└─Projection 1.00 root collation_agg_func.tt.d
292+
└─TopN 1.00 root Column#7:desc, offset:0, count:1
293+
└─Projection 1.00 root collation_agg_func.tt.d, cast(collation_agg_func.tt.d, json BINARY)->Column#7
294+
└─TableReader 1.00 root data:TopN
295+
└─TopN 1.00 cop[tikv] cast(collation_agg_func.tt.d, json BINARY):desc, offset:0, count:1
296+
└─Selection 8000.00 cop[tikv] not(isnull(cast(cast(collation_agg_func.tt.d, json BINARY), var_string(4294967295))))
297+
└─TableFullScan 10000.00 cop[tikv] table:tt keep order:false, stats:pseudo
298+
select max(d collate utf8mb4_bin) from tt;
299+
max(d collate utf8mb4_bin)
300+
{"c": "c"}
301+
drop database collation_agg_func;
302+
use test

0 commit comments

Comments
 (0)