Skip to content

Commit 41fa7bc

Browse files
authored
[bugfix](paimon)Fixed the reading of timestamp with time zone type data for 2.1 (#37716) (#38592)
bp: #37716
1 parent 9f1e41c commit 41fa7bc

File tree

7 files changed

+400
-51
lines changed

7 files changed

+400
-51
lines changed

be/src/vec/exec/scan/vfile_scanner.cpp

Lines changed: 2 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -823,20 +823,9 @@ Status VFileScanner::_get_next_reader() {
823823
break;
824824
}
825825
case TFileFormatType::FORMAT_PARQUET: {
826-
static const cctz::time_zone utc0 = cctz::utc_time_zone();
827-
cctz::time_zone* tz;
828-
if (range.__isset.table_format_params &&
829-
range.table_format_params.table_format_type == "paimon") {
830-
// The timestmap generated by paimon does not carry metadata information (e.g., isAdjustToUTC, etc.),
831-
// and the stored data is UTC0 by default, so it is directly set to the UTC time zone.
832-
// In version 0.7, paimon fixed this issue and can remove the judgment here
833-
tz = const_cast<cctz::time_zone*>(&utc0);
834-
} else {
835-
tz = const_cast<cctz::time_zone*>(&_state->timezone_obj());
836-
}
837826
std::unique_ptr<ParquetReader> parquet_reader = ParquetReader::create_unique(
838-
_profile, *_params, range, _state->query_options().batch_size, tz,
839-
_io_ctx.get(), _state,
827+
_profile, *_params, range, _state->query_options().batch_size,
828+
const_cast<cctz::time_zone*>(&_state->timezone_obj()), _io_ctx.get(), _state,
840829
_shoudl_enable_file_meta_cache() ? ExecEnv::GetInstance()->file_meta_cache()
841830
: nullptr,
842831
_state->query_options().enable_parquet_lazy_mat);

docker/thirdparties/docker-compose/iceberg/tools/save_docker.sh

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,3 +21,7 @@ docker exec iceberg-rest bash -c 'cp /tmp/iceberg_rest_mode\=memory /mnt/data/in
2121

2222
# save iceberg from s3
2323
docker exec mc bash -c 'mc cp -r minio/warehouse /mnt/data/input/minio'
24+
25+
# package zip
26+
cp -r data iceberg_data
27+
zip -rq iceberg_data.zip iceberg_data

fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonColumnValue.java

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -24,33 +24,43 @@
2424
import org.apache.paimon.data.InternalArray;
2525
import org.apache.paimon.data.InternalMap;
2626
import org.apache.paimon.data.InternalRow;
27+
import org.apache.paimon.data.Timestamp;
28+
import org.apache.paimon.types.ArrayType;
29+
import org.apache.paimon.types.DataType;
30+
import org.apache.paimon.types.LocalZonedTimestampType;
31+
import org.apache.paimon.types.MapType;
32+
import org.apache.paimon.types.RowType;
2733
import org.slf4j.Logger;
2834
import org.slf4j.LoggerFactory;
2935

3036
import java.math.BigDecimal;
3137
import java.math.BigInteger;
3238
import java.time.LocalDate;
3339
import java.time.LocalDateTime;
40+
import java.time.ZoneId;
3441
import java.util.List;
3542

3643
public class PaimonColumnValue implements ColumnValue {
3744
private static final Logger LOG = LoggerFactory.getLogger(PaimonColumnValue.class);
3845
private int idx;
3946
private DataGetters record;
4047
private ColumnType dorisType;
48+
private DataType dataType;
4149

4250
public PaimonColumnValue() {
4351
}
4452

45-
public PaimonColumnValue(DataGetters record, int idx, ColumnType columnType) {
53+
public PaimonColumnValue(DataGetters record, int idx, ColumnType columnType, DataType dataType) {
4654
this.idx = idx;
4755
this.record = record;
4856
this.dorisType = columnType;
57+
this.dataType = dataType;
4958
}
5059

51-
public void setIdx(int idx, ColumnType dorisType) {
60+
public void setIdx(int idx, ColumnType dorisType, DataType dataType) {
5261
this.idx = idx;
5362
this.dorisType = dorisType;
63+
this.dataType = dataType;
5464
}
5565

5666
public void setOffsetRow(InternalRow record) {
@@ -124,7 +134,12 @@ public LocalDate getDate() {
124134

125135
@Override
126136
public LocalDateTime getDateTime() {
127-
return record.getTimestamp(idx, dorisType.getPrecision()).toLocalDateTime();
137+
Timestamp ts = record.getTimestamp(idx, dorisType.getPrecision());
138+
if (dataType instanceof LocalZonedTimestampType) {
139+
return LocalDateTime.ofInstant(ts.toInstant(), ZoneId.systemDefault());
140+
} else {
141+
return ts.toLocalDateTime();
142+
}
128143
}
129144

130145
@Override
@@ -142,7 +157,7 @@ public void unpackArray(List<ColumnValue> values) {
142157
InternalArray recordArray = record.getArray(idx);
143158
for (int i = 0; i < recordArray.size(); i++) {
144159
PaimonColumnValue arrayColumnValue = new PaimonColumnValue((DataGetters) recordArray, i,
145-
dorisType.getChildTypes().get(0));
160+
dorisType.getChildTypes().get(0), ((ArrayType) dataType).getElementType());
146161
values.add(arrayColumnValue);
147162
}
148163
}
@@ -153,13 +168,13 @@ public void unpackMap(List<ColumnValue> keys, List<ColumnValue> values) {
153168
InternalArray key = map.keyArray();
154169
for (int i = 0; i < key.size(); i++) {
155170
PaimonColumnValue keyColumnValue = new PaimonColumnValue((DataGetters) key, i,
156-
dorisType.getChildTypes().get(0));
171+
dorisType.getChildTypes().get(0), ((MapType) dataType).getKeyType());
157172
keys.add(keyColumnValue);
158173
}
159174
InternalArray value = map.valueArray();
160175
for (int i = 0; i < value.size(); i++) {
161176
PaimonColumnValue valueColumnValue = new PaimonColumnValue((DataGetters) value, i,
162-
dorisType.getChildTypes().get(1));
177+
dorisType.getChildTypes().get(1), ((MapType) dataType).getValueType());
163178
values.add(valueColumnValue);
164179
}
165180
}
@@ -169,7 +184,8 @@ public void unpackStruct(List<Integer> structFieldIndex, List<ColumnValue> value
169184
// todo: support pruned struct fields
170185
InternalRow row = record.getRow(idx, structFieldIndex.size());
171186
for (int i : structFieldIndex) {
172-
values.add(new PaimonColumnValue(row, i, dorisType.getChildTypes().get(i)));
187+
values.add(new PaimonColumnValue(row, i, dorisType.getChildTypes().get(i),
188+
((RowType) dataType).getFields().get(i).type()));
173189
}
174190
}
175191
}

fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonJniScanner.java

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ public class PaimonJniScanner extends JniScanner {
5353
private RecordReader<InternalRow> reader;
5454
private final PaimonColumnValue columnValue = new PaimonColumnValue();
5555
private List<String> paimonAllFieldNames;
56+
private List<DataType> paimonDataTypeList;
5657

5758
private long ctlId;
5859
private long dbId;
@@ -99,7 +100,7 @@ public void open() throws IOException {
99100
initTable();
100101
initReader();
101102
resetDatetimeV2Precision();
102-
} catch (Exception e) {
103+
} catch (Throwable e) {
103104
LOG.warn("Failed to open paimon_scanner: " + e.getMessage(), e);
104105
throw e;
105106
}
@@ -114,9 +115,12 @@ private void initReader() throws IOException {
114115
+ " Please refresh table and try again",
115116
fields.length, paimonAllFieldNames.size()));
116117
}
117-
readBuilder.withProjection(getProjected());
118+
int[] projected = getProjected();
119+
readBuilder.withProjection(projected);
118120
readBuilder.withFilter(getPredicates());
119121
reader = readBuilder.newRead().createReader(getSplit());
122+
paimonDataTypeList =
123+
Arrays.stream(projected).mapToObj(i -> table.rowType().getTypeAt(i)).collect(Collectors.toList());
120124
}
121125

122126
private int[] getProjected() {
@@ -175,7 +179,7 @@ protected int getNext() throws IOException {
175179
while ((record = recordIterator.next()) != null) {
176180
columnValue.setOffsetRow(record);
177181
for (int i = 0; i < fields.length; i++) {
178-
columnValue.setIdx(i, types[i]);
182+
columnValue.setIdx(i, types[i], paimonDataTypeList.get(i));
179183
appendData(i, columnValue);
180184
}
181185
rows++;
@@ -189,8 +193,8 @@ protected int getNext() throws IOException {
189193
} catch (Exception e) {
190194
close();
191195
LOG.warn("Failed to get the next batch of paimon. "
192-
+ "split: {}, requiredFieldNames: {}, paimonAllFieldNames: {}",
193-
getSplit(), params.get("required_fields"), paimonAllFieldNames, e);
196+
+ "split: {}, requiredFieldNames: {}, paimonAllFieldNames: {}, dataType: {}",
197+
getSplit(), params.get("required_fields"), paimonAllFieldNames, paimonDataTypeList, e);
194198
throw new IOException(e);
195199
}
196200
return rows;

fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonExternalTable.java

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ public Optional<SchemaCacheValue> initSchema() {
8888
}
8989

9090
private Type paimonPrimitiveTypeToDorisType(org.apache.paimon.types.DataType dataType) {
91+
int tsScale = 3; // default
9192
switch (dataType.getTypeRoot()) {
9293
case BOOLEAN:
9394
return Type.BOOLEAN;
@@ -114,20 +115,26 @@ private Type paimonPrimitiveTypeToDorisType(org.apache.paimon.types.DataType dat
114115
case DATE:
115116
return ScalarType.createDateV2Type();
116117
case TIMESTAMP_WITHOUT_TIME_ZONE:
117-
case TIMESTAMP_WITH_LOCAL_TIME_ZONE:
118-
int scale = 3; // default
119118
if (dataType instanceof org.apache.paimon.types.TimestampType) {
120-
scale = ((org.apache.paimon.types.TimestampType) dataType).getPrecision();
121-
if (scale > 6) {
122-
scale = 6;
119+
tsScale = ((org.apache.paimon.types.TimestampType) dataType).getPrecision();
120+
if (tsScale > 6) {
121+
tsScale = 6;
123122
}
124123
} else if (dataType instanceof org.apache.paimon.types.LocalZonedTimestampType) {
125-
scale = ((org.apache.paimon.types.LocalZonedTimestampType) dataType).getPrecision();
126-
if (scale > 6) {
127-
scale = 6;
124+
tsScale = ((org.apache.paimon.types.LocalZonedTimestampType) dataType).getPrecision();
125+
if (tsScale > 6) {
126+
tsScale = 6;
127+
}
128+
}
129+
return ScalarType.createDatetimeV2Type(tsScale);
130+
case TIMESTAMP_WITH_LOCAL_TIME_ZONE:
131+
if (dataType instanceof org.apache.paimon.types.LocalZonedTimestampType) {
132+
tsScale = ((org.apache.paimon.types.LocalZonedTimestampType) dataType).getPrecision();
133+
if (tsScale > 6) {
134+
tsScale = 6;
128135
}
129136
}
130-
return ScalarType.createDatetimeV2Type(scale);
137+
return ScalarType.createDatetimeV2Type(tsScale);
131138
case ARRAY:
132139
ArrayType arrayType = (ArrayType) dataType;
133140
Type innerType = paimonPrimitiveTypeToDorisType(arrayType.getElementType());
Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
-- This file is automatically generated. You should know what you did if you want to edit this
2+
-- !c1 --
3+
1 2024-01-02T10:04:05.100 2024-01-02T10:04:05.120 2024-01-02T10:04:05.123 2024-01-02T10:04:05.123400 2024-01-02T10:04:05.123450 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.100 2024-01-02T10:04:05.120 2024-01-02T10:04:05.123 2024-01-02T10:04:05.123400 2024-01-02T10:04:05.123450 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.123456
4+
5+
-- !c2 --
6+
1 2024-01-02T10:04:05.100 2024-01-02T10:04:05.120 2024-01-02T10:04:05.123 2024-01-02T10:04:05.123400 2024-01-02T10:04:05.123450 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.100 2024-01-02T10:04:05.120 2024-01-02T10:04:05.123 2024-01-02T10:04:05.123400 2024-01-02T10:04:05.123450 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.123456
7+
8+
-- !ltz_ntz_simple2 --
9+
1 {"2024-01-01 10:12:34.123456":"2024-01-02 10:12:34.123456", "2024-01-03 10:12:34.123456":"2024-01-04 10:12:34.123456"} {"2024-01-03 10:12:34.123456":"2024-01-04 10:12:34.123456", "2024-01-01 10:12:34.123456":"2024-01-02 10:12:34.123456"} ["2024-01-01 10:12:34.123456", "2024-01-02 10:12:34.123456", "2024-01-03 10:12:34.123456"] ["2024-01-01 10:12:34.123456", "2024-01-02 10:12:34.123456", "2024-01-03 10:12:34.123456"] {"crow1":"2024-01-01 10:12:34.123456", "crow2":"2024-01-02 10:12:34.123456"}
10+
11+
-- !ltz_ntz_simple3 --
12+
{"2024-01-01 10:12:34.123456":"2024-01-02 10:12:34.123456", "2024-01-03 10:12:34.123456":"2024-01-04 10:12:34.123456"}
13+
14+
-- !ltz_ntz_simple4 --
15+
2024-01-02T10:12:34.123456
16+
17+
-- !ltz_ntz_simple5 --
18+
2024-01-04T10:12:34.123456
19+
20+
-- !ltz_ntz_simple6 --
21+
{"2024-01-03 10:12:34.123456":"2024-01-04 10:12:34.123456", "2024-01-01 10:12:34.123456":"2024-01-02 10:12:34.123456"}
22+
23+
-- !ltz_ntz_simple7 --
24+
2024-01-02T10:12:34.123456
25+
26+
-- !ltz_ntz_simple8 --
27+
2024-01-04T10:12:34.123456
28+
29+
-- !ltz_ntz_simple9 --
30+
["2024-01-01 10:12:34.123456", "2024-01-02 10:12:34.123456", "2024-01-03 10:12:34.123456"]
31+
32+
-- !ltz_ntz_simple10 --
33+
2024-01-02T10:12:34.123456
34+
35+
-- !ltz_ntz_simple11 --
36+
["2024-01-01 10:12:34.123456", "2024-01-02 10:12:34.123456", "2024-01-03 10:12:34.123456"]
37+
38+
-- !ltz_ntz_simple12 --
39+
2024-01-02T10:12:34.123456
40+
41+
-- !ltz_ntz_simple13 --
42+
{"crow1":"2024-01-01 10:12:34.123456", "crow2":"2024-01-02 10:12:34.123456"}
43+
44+
-- !ltz_ntz_simple14 --
45+
2024-01-01T10:12:34.123456
46+
47+
-- !ltz_ntz_simple15 --
48+
2024-01-02T10:12:34.123456
49+
50+
-- !c1 --
51+
1 2024-01-02T10:04:05.100 2024-01-02T10:04:05.120 2024-01-02T10:04:05.123 2024-01-02T10:04:05.123400 2024-01-02T10:04:05.123450 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.123456 2024-01-02T02:04:05.100 2024-01-02T02:04:05.120 2024-01-02T02:04:05.123 2024-01-02T02:04:05.123400 2024-01-02T02:04:05.123450 2024-01-02T02:04:05.123456 2024-01-02T02:04:05.123456 2024-01-02T02:04:05.123456 2024-01-02T02:04:05.123456
52+
53+
-- !c2 --
54+
1 2024-01-02T10:04:05.100 2024-01-02T10:04:05.120 2024-01-02T10:04:05.123 2024-01-02T10:04:05.123400 2024-01-02T10:04:05.123450 2024-01-02T10:04:05.123456 2024-01-02T18:04:05.123456 2024-01-02T18:04:05.123456 2024-01-02T18:04:05.123456 2024-01-02T10:04:05.100 2024-01-02T10:04:05.120 2024-01-02T10:04:05.123 2024-01-02T10:04:05.123400 2024-01-02T10:04:05.123450 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.123456
55+
56+
-- !ltz_ntz_simple2 --
57+
1 {"2024-01-01 10:12:34.123456":"2024-01-02 10:12:34.123456", "2024-01-03 10:12:34.123456":"2024-01-04 10:12:34.123456"} {"2024-01-03 02:12:34.123456":"2024-01-04 02:12:34.123456", "2024-01-01 02:12:34.123456":"2024-01-02 02:12:34.123456"} ["2024-01-01 10:12:34.123456", "2024-01-02 10:12:34.123456", "2024-01-03 10:12:34.123456"] ["2024-01-01 02:12:34.123456", "2024-01-02 02:12:34.123456", "2024-01-03 02:12:34.123456"] {"crow1":"2024-01-01 10:12:34.123456", "crow2":"2024-01-02 02:12:34.123456"}
58+
59+
-- !ltz_ntz_simple3 --
60+
{"2024-01-01 10:12:34.123456":"2024-01-02 10:12:34.123456", "2024-01-03 10:12:34.123456":"2024-01-04 10:12:34.123456"}
61+
62+
-- !ltz_ntz_simple4 --
63+
2024-01-02T10:12:34.123456
64+
65+
-- !ltz_ntz_simple5 --
66+
2024-01-04T10:12:34.123456
67+
68+
-- !ltz_ntz_simple6 --
69+
{"2024-01-03 02:12:34.123456":"2024-01-04 02:12:34.123456", "2024-01-01 02:12:34.123456":"2024-01-02 02:12:34.123456"}
70+
71+
-- !ltz_ntz_simple7 --
72+
\N
73+
74+
-- !ltz_ntz_simple8 --
75+
\N
76+
77+
-- !ltz_ntz_simple9 --
78+
["2024-01-01 10:12:34.123456", "2024-01-02 10:12:34.123456", "2024-01-03 10:12:34.123456"]
79+
80+
-- !ltz_ntz_simple10 --
81+
2024-01-02T10:12:34.123456
82+
83+
-- !ltz_ntz_simple11 --
84+
["2024-01-01 02:12:34.123456", "2024-01-02 02:12:34.123456", "2024-01-03 02:12:34.123456"]
85+
86+
-- !ltz_ntz_simple12 --
87+
2024-01-02T02:12:34.123456
88+
89+
-- !ltz_ntz_simple13 --
90+
{"crow1":"2024-01-01 10:12:34.123456", "crow2":"2024-01-02 02:12:34.123456"}
91+
92+
-- !ltz_ntz_simple14 --
93+
2024-01-01T10:12:34.123456
94+
95+
-- !ltz_ntz_simple15 --
96+
2024-01-02T02:12:34.123456
97+
98+
-- !ltz_ntz_simple2 --
99+
1 {"2024-01-01 10:12:34.123456":"2024-01-02 10:12:34.123456", "2024-01-03 10:12:34.123456":"2024-01-04 10:12:34.123456"} {"2024-01-03 10:12:34.123456":"2024-01-04 10:12:34.123456", "2024-01-01 10:12:34.123456":"2024-01-02 10:12:34.123456"} ["2024-01-01 10:12:34.123456", "2024-01-02 10:12:34.123456", "2024-01-03 10:12:34.123456"] ["2024-01-01 10:12:34.123456", "2024-01-02 10:12:34.123456", "2024-01-03 10:12:34.123456"] {"crow1":"2024-01-01 10:12:34.123456", "crow2":"2024-01-02 10:12:34.123456"}
100+
101+
-- !ltz_ntz_simple3 --
102+
{"2024-01-01 10:12:34.123456":"2024-01-02 10:12:34.123456", "2024-01-03 10:12:34.123456":"2024-01-04 10:12:34.123456"}
103+
104+
-- !ltz_ntz_simple4 --
105+
2024-01-02T10:12:34.123456
106+
107+
-- !ltz_ntz_simple5 --
108+
2024-01-04T10:12:34.123456
109+
110+
-- !ltz_ntz_simple6 --
111+
{"2024-01-03 10:12:34.123456":"2024-01-04 10:12:34.123456", "2024-01-01 10:12:34.123456":"2024-01-02 10:12:34.123456"}
112+
113+
-- !ltz_ntz_simple7 --
114+
2024-01-02T10:12:34.123456
115+
116+
-- !ltz_ntz_simple8 --
117+
2024-01-04T10:12:34.123456
118+
119+
-- !ltz_ntz_simple9 --
120+
["2024-01-01 10:12:34.123456", "2024-01-02 10:12:34.123456", "2024-01-03 10:12:34.123456"]
121+
122+
-- !ltz_ntz_simple10 --
123+
2024-01-02T10:12:34.123456
124+
125+
-- !ltz_ntz_simple11 --
126+
["2024-01-01 10:12:34.123456", "2024-01-02 10:12:34.123456", "2024-01-03 10:12:34.123456"]
127+
128+
-- !ltz_ntz_simple12 --
129+
2024-01-02T10:12:34.123456
130+
131+
-- !ltz_ntz_simple13 --
132+
{"crow1":"2024-01-01 10:12:34.123456", "crow2":"2024-01-02 10:12:34.123456"}
133+
134+
-- !ltz_ntz_simple14 --
135+
2024-01-01T10:12:34.123456
136+
137+
-- !ltz_ntz_simple15 --
138+
2024-01-02T10:12:34.123456
139+

0 commit comments

Comments
 (0)