@@ -1098,7 +1098,7 @@ func (e *LoadDataController) InitDataFiles(ctx context.Context) error {
1098
1098
Compression : compressTp ,
1099
1099
Type : sourceType ,
1100
1100
}
1101
- fileMeta .RealSize = e . getFileRealSize (ctx , fileMeta , s )
1101
+ fileMeta .RealSize = mydump . EstimateRealSizeForFile (ctx , fileMeta , s )
1102
1102
dataFiles = append (dataFiles , & fileMeta )
1103
1103
totalSize = size
1104
1104
} else {
@@ -1112,28 +1112,38 @@ func (e *LoadDataController) InitDataFiles(ctx context.Context) error {
1112
1112
// access, else walkDir will fail
1113
1113
// we only support '*', in order to reuse glob library manually escape the path
1114
1114
escapedPath := stringutil .EscapeGlobQuestionMark (fileNameKey )
1115
- err := s .WalkDir (ctx , & storage.WalkOption {ObjPrefix : commonPrefix , SkipSubDir : true },
1115
+
1116
+ allFiles := make ([]mydump.RawFile , 0 , 16 )
1117
+ if err := s .WalkDir (ctx , & storage.WalkOption {ObjPrefix : commonPrefix , SkipSubDir : true },
1116
1118
func (remotePath string , size int64 ) error {
1117
1119
// we have checked in LoadDataExec.Next
1118
1120
//nolint: errcheck
1119
1121
match , _ := filepath .Match (escapedPath , remotePath )
1120
1122
if ! match {
1121
1123
return nil
1122
1124
}
1123
- compressTp := mydump .ParseCompressionOnFileExtension (remotePath )
1125
+ allFiles = append (allFiles , mydump.RawFile {Path : remotePath , Size : size })
1126
+ totalSize += size
1127
+ return nil
1128
+ }); err != nil {
1129
+ return exeerrors .ErrLoadDataCantRead .GenWithStackByArgs (GetMsgFromBRError (err ), "failed to walk dir" )
1130
+ }
1131
+
1132
+ var err error
1133
+ if dataFiles , err = mydump .ParallelProcess (ctx , allFiles , e .ThreadCnt * 2 ,
1134
+ func (ctx context.Context , f mydump.RawFile ) (* mydump.SourceFileMeta , error ) {
1135
+ path , size := f .Path , f .Size
1136
+ compressTp := mydump .ParseCompressionOnFileExtension (path )
1124
1137
fileMeta := mydump.SourceFileMeta {
1125
- Path : remotePath ,
1138
+ Path : path ,
1126
1139
FileSize : size ,
1127
1140
Compression : compressTp ,
1128
1141
Type : sourceType ,
1129
1142
}
1130
- fileMeta .RealSize = e .getFileRealSize (ctx , fileMeta , s )
1131
- dataFiles = append (dataFiles , & fileMeta )
1132
- totalSize += size
1133
- return nil
1134
- })
1135
- if err != nil {
1136
- return exeerrors .ErrLoadDataCantRead .GenWithStackByArgs (GetMsgFromBRError (err ), "failed to walk dir" )
1143
+ fileMeta .RealSize = mydump .EstimateRealSizeForFile (ctx , fileMeta , s )
1144
+ return & fileMeta , nil
1145
+ }); err != nil {
1146
+ return err
1137
1147
}
1138
1148
}
1139
1149
@@ -1142,19 +1152,6 @@ func (e *LoadDataController) InitDataFiles(ctx context.Context) error {
1142
1152
return nil
1143
1153
}
1144
1154
1145
- func (e * LoadDataController ) getFileRealSize (ctx context.Context ,
1146
- fileMeta mydump.SourceFileMeta , store storage.ExternalStorage ) int64 {
1147
- if fileMeta .Compression == mydump .CompressionNone {
1148
- return fileMeta .FileSize
1149
- }
1150
- compressRatio , err := mydump .SampleFileCompressRatio (ctx , fileMeta , store )
1151
- if err != nil {
1152
- e .logger .Warn ("failed to get compress ratio" , zap .String ("file" , fileMeta .Path ), zap .Error (err ))
1153
- return fileMeta .FileSize
1154
- }
1155
- return int64 (compressRatio * float64 (fileMeta .FileSize ))
1156
- }
1157
-
1158
1155
func (e * LoadDataController ) getSourceType () mydump.SourceType {
1159
1156
switch e .Format {
1160
1157
case DataFormatParquet :
0 commit comments