Skip to content

Commit 3f362c9

Browse files
zhangstar333dataroaring
authored andcommitted
[improve](function) opt aes_encrypt/decrypt function to handle const column (#37194)
## Proposed changes ``` select to_base64(aes_encrypt(column,'F3229A0B371ED2D9441B830D21A390C3', '0123456789')); In most case of user, the second and third argument are const literal. so handle the special case. mysql [test]>select count(aes_encrypt(a, 'seed1seed2seed3','0123456789')) from json_table_2; +-----------------------------------------------------------+ | count(aes_encrypt(a, '***', '0123456789', 'AES_256_CBC')) | +-----------------------------------------------------------+ | 10000001 | +-----------------------------------------------------------+ 1 row in set (5.20 sec) mysql [test]>select count(aes_encrypt(a, 'seed1seed2seed3','0123456789')) from json_table_2; +-----------------------------------------------------------+ | count(aes_encrypt(a, '***', '0123456789', 'AES_256_CBC')) | +-----------------------------------------------------------+ | 10000001 | +-----------------------------------------------------------+ 1 row in set (7.05 sec) ```
1 parent 30053c5 commit 3f362c9

File tree

1 file changed

+205
-60
lines changed

1 file changed

+205
-60
lines changed

be/src/vec/functions/function_encryption.cpp

Lines changed: 205 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
#include "vec/columns/columns_number.h"
3737
#include "vec/common/assert_cast.h"
3838
#include "vec/common/pod_array.h"
39+
#include "vec/common/string_ref.h"
3940
#include "vec/core/block.h"
4041
#include "vec/core/column_numbers.h"
4142
#include "vec/core/column_with_type_and_name.h"
@@ -110,54 +111,44 @@ class FunctionEncryptionAndDecrypt : public IFunction {
110111

111112
Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
112113
size_t result, size_t input_rows_count) const override {
113-
size_t argument_size = arguments.size();
114-
std::vector<ColumnPtr> argument_columns(argument_size);
115-
std::vector<const ColumnString::Offsets*> offsets_list(argument_size);
116-
std::vector<const ColumnString::Chars*> chars_list(argument_size);
117-
118-
auto result_null_map = ColumnUInt8::create(input_rows_count, 0);
119-
auto result_data_column = ColumnString::create();
120-
121-
auto& result_data = result_data_column->get_chars();
122-
auto& result_offset = result_data_column->get_offsets();
123-
result_offset.resize(input_rows_count);
124-
125-
for (int i = 0; i < argument_size; ++i) {
126-
argument_columns[i] =
127-
block.get_by_position(arguments[i]).column->convert_to_full_column_if_const();
128-
if (auto* nullable = check_and_get_column<ColumnNullable>(*argument_columns[i])) {
129-
VectorizedUtils::update_null_map(result_null_map->get_data(),
130-
nullable->get_null_map_data());
131-
argument_columns[i] = nullable->get_nested_column_ptr();
132-
}
133-
}
134-
135-
for (size_t i = 0; i < argument_size; ++i) {
136-
auto col_str = assert_cast<const ColumnString*>(argument_columns[i].get());
137-
offsets_list[i] = &col_str->get_offsets();
138-
chars_list[i] = &col_str->get_chars();
139-
}
140-
141-
RETURN_IF_ERROR(Impl::vector_vector(offsets_list, chars_list, input_rows_count, result_data,
142-
result_offset, result_null_map->get_data()));
143-
block.get_by_position(result).column =
144-
ColumnNullable::create(std::move(result_data_column), std::move(result_null_map));
145-
return Status::OK();
114+
return Impl::execute_impl_inner(context, block, arguments, result, input_rows_count);
146115
}
147116
};
148117

149118
template <typename Impl, bool is_encrypt>
150-
void exectue_result(std::vector<const ColumnString::Offsets*>& offsets_list,
151-
std::vector<const ColumnString::Chars*>& chars_list, size_t i,
152-
EncryptionMode& encryption_mode, const char* iv_raw, int iv_length,
153-
ColumnString::Chars& result_data, ColumnString::Offsets& result_offset,
154-
NullMap& null_map) {
119+
void execute_result_vector(std::vector<const ColumnString::Offsets*>& offsets_list,
120+
std::vector<const ColumnString::Chars*>& chars_list, size_t i,
121+
EncryptionMode& encryption_mode, const char* iv_raw, int iv_length,
122+
ColumnString::Chars& result_data, ColumnString::Offsets& result_offset,
123+
NullMap& null_map) {
155124
int src_size = (*offsets_list[0])[i] - (*offsets_list[0])[i - 1];
156-
const auto src_raw =
125+
const auto* src_raw =
157126
reinterpret_cast<const char*>(&(*chars_list[0])[(*offsets_list[0])[i - 1]]);
158127
int key_size = (*offsets_list[1])[i] - (*offsets_list[1])[i - 1];
159-
const auto key_raw =
128+
const auto* key_raw =
160129
reinterpret_cast<const char*>(&(*chars_list[1])[(*offsets_list[1])[i - 1]]);
130+
execute_result<Impl, is_encrypt>(src_raw, src_size, key_raw, key_size, i, encryption_mode,
131+
iv_raw, iv_length, result_data, result_offset, null_map);
132+
}
133+
134+
template <typename Impl, bool is_encrypt>
135+
void execute_result_const(const ColumnString::Offsets* offsets_column,
136+
const ColumnString::Chars* chars_column, StringRef key_arg, size_t i,
137+
EncryptionMode& encryption_mode, const char* iv_raw, int iv_length,
138+
ColumnString::Chars& result_data, ColumnString::Offsets& result_offset,
139+
NullMap& null_map) {
140+
int src_size = (*offsets_column)[i] - (*offsets_column)[i - 1];
141+
const auto* src_raw = reinterpret_cast<const char*>(&(*chars_column)[(*offsets_column)[i - 1]]);
142+
execute_result<Impl, is_encrypt>(src_raw, src_size, key_arg.data, key_arg.size, i,
143+
encryption_mode, iv_raw, iv_length, result_data, result_offset,
144+
null_map);
145+
}
146+
147+
template <typename Impl, bool is_encrypt>
148+
void execute_result(const char* src_raw, int src_size, const char* key_raw, int key_size, size_t i,
149+
EncryptionMode& encryption_mode, const char* iv_raw, int iv_length,
150+
ColumnString::Chars& result_data, ColumnString::Offsets& result_offset,
151+
NullMap& null_map) {
161152
if (src_size == 0) {
162153
StringOP::push_null_string(i, result_data, result_offset, null_map);
163154
return;
@@ -170,7 +161,7 @@ void exectue_result(std::vector<const ColumnString::Offsets*>& offsets_list,
170161
p.reset(new char[cipher_len]);
171162
int ret_code = 0;
172163

173-
ret_code = Impl::exectue_impl(encryption_mode, (unsigned char*)src_raw, src_size,
164+
ret_code = Impl::execute_impl(encryption_mode, (unsigned char*)src_raw, src_size,
174165
(unsigned char*)key_raw, key_size, iv_raw, iv_length, true,
175166
(unsigned char*)p.get());
176167

@@ -189,18 +180,90 @@ struct EncryptionAndDecryptTwoImpl {
189180
std::make_shared<DataTypeString>()};
190181
}
191182

192-
static Status vector_vector(std::vector<const ColumnString::Offsets*>& offsets_list,
193-
std::vector<const ColumnString::Chars*>& chars_list,
194-
size_t input_rows_count, ColumnString::Chars& result_data,
195-
ColumnString::Offsets& result_offset, NullMap& null_map) {
183+
static Status execute_impl_inner(FunctionContext* context, Block& block,
184+
const ColumnNumbers& arguments, size_t result,
185+
size_t input_rows_count) {
186+
auto result_column = ColumnString::create();
187+
auto result_null_map_column = ColumnUInt8::create(input_rows_count, 0);
188+
DCHECK_EQ(3, arguments.size());
189+
const size_t argument_size = 3;
190+
bool col_const[argument_size];
191+
ColumnPtr argument_columns[argument_size];
192+
for (int i = 0; i < argument_size; ++i) {
193+
col_const[i] = is_column_const(*block.get_by_position(arguments[i]).column);
194+
}
195+
argument_columns[0] = col_const[0] ? static_cast<const ColumnConst&>(
196+
*block.get_by_position(arguments[0]).column)
197+
.convert_to_full_column()
198+
: block.get_by_position(arguments[0]).column;
199+
200+
default_preprocess_parameter_columns(argument_columns, col_const, {1, 2}, block, arguments);
201+
202+
for (int i = 0; i < argument_size; i++) {
203+
check_set_nullable(argument_columns[i], result_null_map_column, col_const[i]);
204+
}
205+
auto& result_data = result_column->get_chars();
206+
auto& result_offset = result_column->get_offsets();
207+
result_offset.resize(input_rows_count);
208+
209+
if (col_const[1] && col_const[2]) {
210+
vector_const(assert_cast<const ColumnString*>(argument_columns[0].get()),
211+
argument_columns[1]->get_data_at(0), argument_columns[2]->get_data_at(0),
212+
input_rows_count, result_data, result_offset,
213+
result_null_map_column->get_data());
214+
} else {
215+
std::vector<const ColumnString::Offsets*> offsets_list(argument_size);
216+
std::vector<const ColumnString::Chars*> chars_list(argument_size);
217+
for (size_t i = 0; i < argument_size; ++i) {
218+
const auto* col_str = assert_cast<const ColumnString*>(argument_columns[i].get());
219+
offsets_list[i] = &col_str->get_offsets();
220+
chars_list[i] = &col_str->get_chars();
221+
}
222+
vector_vector(offsets_list, chars_list, input_rows_count, result_data, result_offset,
223+
result_null_map_column->get_data());
224+
}
225+
block.get_by_position(result).column =
226+
ColumnNullable::create(std::move(result_column), std::move(result_null_map_column));
227+
return Status::OK();
228+
}
229+
230+
static void vector_const(const ColumnString* column, StringRef key_arg, StringRef mode_arg,
231+
size_t input_rows_count, ColumnString::Chars& result_data,
232+
ColumnString::Offsets& result_offset, NullMap& null_map) {
233+
EncryptionMode encryption_mode = mode;
234+
std::string mode_str(mode_arg.data, mode_arg.size);
235+
bool all_insert_null = false;
236+
if (mode_arg.size != 0) {
237+
if (!aes_mode_map.contains(mode_str)) {
238+
all_insert_null = true;
239+
}
240+
encryption_mode = aes_mode_map.at(mode_str);
241+
}
242+
const ColumnString::Offsets* offsets_column = &column->get_offsets();
243+
const ColumnString::Chars* chars_column = &column->get_chars();
244+
for (int i = 0; i < input_rows_count; ++i) {
245+
if (all_insert_null || null_map[i]) {
246+
StringOP::push_null_string(i, result_data, result_offset, null_map);
247+
continue;
248+
}
249+
execute_result_const<Impl, is_encrypt>(offsets_column, chars_column, key_arg, i,
250+
encryption_mode, nullptr, 0, result_data,
251+
result_offset, null_map);
252+
}
253+
}
254+
255+
static void vector_vector(std::vector<const ColumnString::Offsets*>& offsets_list,
256+
std::vector<const ColumnString::Chars*>& chars_list,
257+
size_t input_rows_count, ColumnString::Chars& result_data,
258+
ColumnString::Offsets& result_offset, NullMap& null_map) {
196259
for (int i = 0; i < input_rows_count; ++i) {
197260
if (null_map[i]) {
198261
StringOP::push_null_string(i, result_data, result_offset, null_map);
199262
continue;
200263
}
201264
EncryptionMode encryption_mode = mode;
202265
int mode_size = (*offsets_list[2])[i] - (*offsets_list[2])[i - 1];
203-
const auto mode_raw =
266+
const auto* mode_raw =
204267
reinterpret_cast<const char*>(&(*chars_list[2])[(*offsets_list[2])[i - 1]]);
205268
if (mode_size != 0) {
206269
std::string mode_str(mode_raw, mode_size);
@@ -210,10 +273,10 @@ struct EncryptionAndDecryptTwoImpl {
210273
}
211274
encryption_mode = aes_mode_map.at(mode_str);
212275
}
213-
exectue_result<Impl, is_encrypt>(offsets_list, chars_list, i, encryption_mode, nullptr,
214-
0, result_data, result_offset, null_map);
276+
execute_result_vector<Impl, is_encrypt>(offsets_list, chars_list, i, encryption_mode,
277+
nullptr, 0, result_data, result_offset,
278+
null_map);
215279
}
216-
return Status::OK();
217280
}
218281
};
219282

@@ -224,10 +287,92 @@ struct EncryptionAndDecryptFourImpl {
224287
std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
225288
}
226289

227-
static Status vector_vector(std::vector<const ColumnString::Offsets*>& offsets_list,
228-
std::vector<const ColumnString::Chars*>& chars_list,
229-
size_t input_rows_count, ColumnString::Chars& result_data,
230-
ColumnString::Offsets& result_offset, NullMap& null_map) {
290+
static Status execute_impl_inner(FunctionContext* context, Block& block,
291+
const ColumnNumbers& arguments, size_t result,
292+
size_t input_rows_count) {
293+
auto result_column = ColumnString::create();
294+
auto result_null_map_column = ColumnUInt8::create(input_rows_count, 0);
295+
DCHECK_EQ(4, arguments.size());
296+
const size_t argument_size = 4;
297+
bool col_const[argument_size];
298+
ColumnPtr argument_columns[argument_size];
299+
for (int i = 0; i < argument_size; ++i) {
300+
col_const[i] = is_column_const(*block.get_by_position(arguments[i]).column);
301+
}
302+
argument_columns[0] = col_const[0] ? static_cast<const ColumnConst&>(
303+
*block.get_by_position(arguments[0]).column)
304+
.convert_to_full_column()
305+
: block.get_by_position(arguments[0]).column;
306+
307+
default_preprocess_parameter_columns(argument_columns, col_const, {1, 2, 3}, block,
308+
arguments);
309+
310+
for (int i = 0; i < argument_size; i++) {
311+
check_set_nullable(argument_columns[i], result_null_map_column, col_const[i]);
312+
}
313+
auto& result_data = result_column->get_chars();
314+
auto& result_offset = result_column->get_offsets();
315+
result_offset.resize(input_rows_count);
316+
317+
if (col_const[1] && col_const[2] && col_const[3]) {
318+
vector_const(assert_cast<const ColumnString*>(argument_columns[0].get()),
319+
argument_columns[1]->get_data_at(0), argument_columns[2]->get_data_at(0),
320+
argument_columns[3]->get_data_at(0), input_rows_count, result_data,
321+
result_offset, result_null_map_column->get_data());
322+
} else {
323+
std::vector<const ColumnString::Offsets*> offsets_list(argument_size);
324+
std::vector<const ColumnString::Chars*> chars_list(argument_size);
325+
for (size_t i = 0; i < argument_size; ++i) {
326+
const auto* col_str = assert_cast<const ColumnString*>(argument_columns[i].get());
327+
offsets_list[i] = &col_str->get_offsets();
328+
chars_list[i] = &col_str->get_chars();
329+
}
330+
vector_vector(offsets_list, chars_list, input_rows_count, result_data, result_offset,
331+
result_null_map_column->get_data());
332+
}
333+
block.get_by_position(result).column =
334+
ColumnNullable::create(std::move(result_column), std::move(result_null_map_column));
335+
return Status::OK();
336+
}
337+
338+
static void vector_const(const ColumnString* column, StringRef iv_arg, StringRef key_arg,
339+
StringRef mode_arg, size_t input_rows_count,
340+
ColumnString::Chars& result_data, ColumnString::Offsets& result_offset,
341+
NullMap& null_map) {
342+
EncryptionMode encryption_mode = mode;
343+
bool all_insert_null = false;
344+
if (mode_arg.size != 0) {
345+
std::string mode_str(mode_arg.data, mode_arg.size);
346+
if constexpr (is_sm_mode) {
347+
if (sm4_mode_map.count(mode_str) == 0) {
348+
all_insert_null = true;
349+
}
350+
encryption_mode = sm4_mode_map.at(mode_str);
351+
} else {
352+
if (aes_mode_map.count(mode_str) == 0) {
353+
all_insert_null = true;
354+
}
355+
encryption_mode = aes_mode_map.at(mode_str);
356+
}
357+
}
358+
359+
const ColumnString::Offsets* offsets_column = &column->get_offsets();
360+
const ColumnString::Chars* chars_column = &column->get_chars();
361+
for (int i = 0; i < input_rows_count; ++i) {
362+
if (all_insert_null || null_map[i]) {
363+
StringOP::push_null_string(i, result_data, result_offset, null_map);
364+
continue;
365+
}
366+
execute_result_const<Impl, is_encrypt>(offsets_column, chars_column, key_arg, i,
367+
encryption_mode, iv_arg.data, iv_arg.size,
368+
result_data, result_offset, null_map);
369+
}
370+
}
371+
372+
static void vector_vector(std::vector<const ColumnString::Offsets*>& offsets_list,
373+
std::vector<const ColumnString::Chars*>& chars_list,
374+
size_t input_rows_count, ColumnString::Chars& result_data,
375+
ColumnString::Offsets& result_offset, NullMap& null_map) {
231376
for (int i = 0; i < input_rows_count; ++i) {
232377
if (null_map[i]) {
233378
StringOP::push_null_string(i, result_data, result_offset, null_map);
@@ -237,9 +382,9 @@ struct EncryptionAndDecryptFourImpl {
237382
EncryptionMode encryption_mode = mode;
238383
int mode_size = (*offsets_list[3])[i] - (*offsets_list[3])[i - 1];
239384
int iv_size = (*offsets_list[2])[i] - (*offsets_list[2])[i - 1];
240-
const auto mode_raw =
385+
const auto* mode_raw =
241386
reinterpret_cast<const char*>(&(*chars_list[3])[(*offsets_list[3])[i - 1]]);
242-
const auto iv_raw =
387+
const auto* iv_raw =
243388
reinterpret_cast<const char*>(&(*chars_list[2])[(*offsets_list[2])[i - 1]]);
244389
if (mode_size != 0) {
245390
std::string mode_str(mode_raw, mode_size);
@@ -258,15 +403,15 @@ struct EncryptionAndDecryptFourImpl {
258403
}
259404
}
260405

261-
exectue_result<Impl, is_encrypt>(offsets_list, chars_list, i, encryption_mode, iv_raw,
262-
iv_size, result_data, result_offset, null_map);
406+
execute_result_vector<Impl, is_encrypt>(offsets_list, chars_list, i, encryption_mode,
407+
iv_raw, iv_size, result_data, result_offset,
408+
null_map);
263409
}
264-
return Status::OK();
265410
}
266411
};
267412

268413
struct EncryptImpl {
269-
static int exectue_impl(EncryptionMode mode, const unsigned char* source,
414+
static int execute_impl(EncryptionMode mode, const unsigned char* source,
270415
uint32_t source_length, const unsigned char* key, uint32_t key_length,
271416
const char* iv, int iv_length, bool padding, unsigned char* encrypt) {
272417
return EncryptionUtil::encrypt(mode, source, source_length, key, key_length, iv, iv_length,
@@ -275,7 +420,7 @@ struct EncryptImpl {
275420
};
276421

277422
struct DecryptImpl {
278-
static int exectue_impl(EncryptionMode mode, const unsigned char* source,
423+
static int execute_impl(EncryptionMode mode, const unsigned char* source,
279424
uint32_t source_length, const unsigned char* key, uint32_t key_length,
280425
const char* iv, int iv_length, bool padding, unsigned char* encrypt) {
281426
return EncryptionUtil::decrypt(mode, source, source_length, key, key_length, iv, iv_length,

0 commit comments

Comments
 (0)