Skip to content

Commit 952393d

Browse files
authored
fix(parser): extract the column in between to filter_column_list (#44)
1 parent abac908 commit 952393d

File tree

4 files changed

+48
-36
lines changed

4 files changed

+48
-36
lines changed

parser/mysql_parser/lexer.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
reserved = sorted(set(presto_tokens).difference(presto_nonreserved))
1818

19-
tokens = ['INTEGER', 'DECIMAL', 'NUMBER',
19+
tokens = ['INTEGER', 'NUMBER', 'DOUBLE',
2020
'IDENTIFIER', 'DIGIT_IDENTIFIER',
2121
'QUOTED_IDENTIFIER', 'BACKQUOTED_IDENTIFIER',
2222
'STRING', 'PERIOD',
@@ -56,6 +56,15 @@
5656
t_ignore = ' \t'
5757

5858

59+
def t_DOUBLE(t):
60+
r"""(\d+(?:\.\d*)?(?:[eE][+-]?\d+)?|\d*(?:\.\d+)(?:[eE][+-]?\d+)?)"""
61+
if 'e' in t.value or 'E' in t.value or '.' in t.value:
62+
t.type = 'DOUBLE'
63+
else:
64+
t.type = "INTEGER"
65+
return t
66+
67+
5968
def t_INTEGER(t):
6069
r'\d+'
6170
t.type = "INTEGER"

parser/mysql_parser/parser.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1089,9 +1089,9 @@ def p_quoted_identifier(p):
10891089

10901090

10911091
def p_number(p):
1092-
r"""number : DECIMAL
1092+
r"""number : DOUBLE
10931093
| INTEGER"""
1094-
if p.slice[1].type == "DECIMAL":
1094+
if p.slice[1].type == "DOUBLE":
10951095
p[0] = DoubleLiteral(p.lineno(1), p.lexpos(1), p[1])
10961096
else:
10971097
p[0] = LongLiteral(p.lineno(1), p.lexpos(1), p[1])

parser/parser_utils.py

Lines changed: 26 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -108,24 +108,8 @@ def visit_like_predicate(self, node, context):
108108
if isinstance(pattern, StringLiteral):
109109
if not pattern.value.startswith('%'):
110110
can_query_range = True
111-
qualifed_name = node.value.name
112111
if can_query_range:
113-
if len(qualifed_name.parts) == 2:
114-
table_or_alias_name = qualifed_name.parts[0]
115-
for _table in self.table_list:
116-
if _table['alias'] == table_or_alias_name or _table[
117-
'table_name'] == table_or_alias_name:
118-
filter_column_list = _table['filter_column_list']
119-
filter_column_list.append({
120-
'column_name': qualifed_name.parts[1],
121-
'opt': 'like'
122-
})
123-
else:
124-
filter_column_list = self.table_list[-1]['filter_column_list']
125-
filter_column_list.append({
126-
'column_name': qualifed_name.parts[0],
127-
'opt': 'like'
128-
})
112+
self.add_filter_column_with_qualified_name_reference(node.value, 'like')
129113

130114
return self.visit_expression(node, context)
131115

@@ -141,22 +125,7 @@ def visit_in_predicate(self, node, context):
141125
self.in_count_list.append(len(node.value_list.values))
142126

143127
if isinstance(value, QualifiedNameReference):
144-
if len(value.name.parts) == 2:
145-
table_or_alias_name = value.name.parts[0]
146-
for _table in self.table_list:
147-
if _table['alias'] == table_or_alias_name or _table[
148-
'table_name'] == table_or_alias_name:
149-
filter_column_list = _table['filter_column_list']
150-
filter_column_list.append({
151-
'column_name': value.name.parts[1],
152-
'opt': 'in'
153-
})
154-
else:
155-
filter_column_list = self.table_list[-1]['filter_column_list']
156-
filter_column_list.append({
157-
'column_name': value.name.parts[0],
158-
'opt': 'in'
159-
})
128+
self.add_filter_column_with_qualified_name_reference(value, 'in')
160129

161130
self.process(node.value, None)
162131
self.process(node.value_list, None)
@@ -258,6 +227,30 @@ def visit_delete(self, node, context):
258227
self.process(node.where, context)
259228
return None
260229

230+
def visit_between_predicate(self, node, context):
231+
if isinstance(node.value, QualifiedNameReference):
232+
self.add_filter_column_with_qualified_name_reference(node.value, 'between')
233+
return None
234+
235+
def add_filter_column_with_qualified_name_reference(self, qualified_name_reference: QualifiedNameReference,
236+
opt):
237+
if len(qualified_name_reference.name.parts) == 2:
238+
table_or_alias_name = qualified_name_reference.name.parts[0]
239+
for _table in self.table_list:
240+
if _table['alias'] == table_or_alias_name or _table[
241+
'table_name'] == table_or_alias_name:
242+
filter_column_list = _table['filter_column_list']
243+
filter_column_list.append({
244+
'column_name': qualified_name_reference.name.parts[1],
245+
'opt': opt
246+
})
247+
else:
248+
filter_column_list = self.table_list[-1]['filter_column_list']
249+
filter_column_list.append({
250+
'column_name': qualified_name_reference.name.parts[0],
251+
'opt': opt
252+
})
253+
261254
visitor = FormatVisitor()
262255
visitor.process(statement, None)
263256
return visitor

test/parser/test_parser_utils.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,16 @@ def test_update_parameterize(self):
219219
statement = format_sql(statement_node, 0)
220220
assert statement == """UPDATE t1 SET c = ? WHERE id = ?"""
221221

222+
def test_between(self):
223+
sql = """
224+
select max(`successRate`) AS `successRate` from `table_850d` where `period` between '2022-07-11 00:00:00' and '2022-07-11 23:59:59' and `successRate` < 0.35;
225+
"""
226+
visitor = ParserUtils.format_statement(parser.parse(sql))
227+
table_list = visitor.table_list
228+
assert table_list == [{'table_name': 'table_850d', 'alias': '',
229+
'filter_column_list': [{'column_name': 'period', 'opt': 'between'},
230+
{'column_name': 'successRate', 'opt': '<'}]}]
231+
222232

223233
if __name__ == '__main__':
224234
unittest.main()

0 commit comments

Comments
 (0)