@@ -1059,15 +1059,18 @@ def _may_extract_zip(self, files, zip_dir: str):
1059
1059
"""Handle zip files"""
1060
1060
zip_files = [file for file in files if file .endswith (".zip" )]
1061
1061
remaining_files = [file for file in files if not file .endswith ("zip" )]
1062
+ errors : list [str ] = []
1062
1063
1063
1064
# Clean-up <zip_dir> before unzip to remove old files
1064
1065
shutil .rmtree (zip_dir , ignore_errors = True )
1065
1066
1067
+ # Unzip
1066
1068
for zip_file in zip_files :
1067
1069
# Prepare new zip output dir, separated for each files
1068
1070
basename = os .path .splitext (os .path .basename (zip_file ))[0 ]
1069
1071
zip_out_dir = os .path .join (zip_dir , basename )
1070
1072
os .makedirs (zip_out_dir , exist_ok = True )
1073
+
1071
1074
with zipfile .ZipFile (zip_file , "r" ) as zip_ref :
1072
1075
zip_ref .extractall (zip_out_dir )
1073
1076
@@ -1084,7 +1087,7 @@ def _may_extract_zip(self, files, zip_dir: str):
1084
1087
if n_zip_file > 0 :
1085
1088
print (f"Update zip files: { n_zip_file } " )
1086
1089
1087
- return remaining_files
1090
+ return remaining_files , errors
1088
1091
1089
1092
def index_fn (
1090
1093
self , files , urls , reindex : bool , settings , user_id
@@ -1100,20 +1103,22 @@ def index_fn(
1100
1103
"""
1101
1104
if urls :
1102
1105
files = [it .strip () for it in urls .split ("\n " )]
1103
- errors = []
1106
+ errors = self . validate_urls ( files )
1104
1107
else :
1105
1108
if not files :
1106
1109
gr .Info ("No uploaded file" )
1107
1110
yield "" , ""
1108
1111
return
1112
+ files , unzip_errors = self ._may_extract_zip (
1113
+ files , flowsettings .KH_ZIP_INPUT_DIR
1114
+ )
1115
+ errors = self .validate_files (files )
1116
+ errors .extend (unzip_errors )
1109
1117
1110
- files = self ._may_extract_zip (files , flowsettings .KH_ZIP_INPUT_DIR )
1111
-
1112
- errors = self .validate (files )
1113
- if errors :
1114
- gr .Warning (", " .join (errors ))
1115
- yield "" , ""
1116
- return
1118
+ if errors :
1119
+ gr .Warning (", " .join (errors ))
1120
+ yield "" , ""
1121
+ return
1117
1122
1118
1123
gr .Info (f"Start indexing { len (files )} files..." )
1119
1124
@@ -1569,7 +1574,7 @@ def interact_group_list(self, list_groups, ev: gr.SelectData):
1569
1574
selected_item ["files" ],
1570
1575
)
1571
1576
1572
- def validate (self , files : list [str ]):
1577
+ def validate_files (self , files : list [str ]):
1573
1578
"""Validate if the files are valid"""
1574
1579
paths = [Path (file ) for file in files ]
1575
1580
errors = []
@@ -1598,6 +1603,14 @@ def validate(self, files: list[str]):
1598
1603
1599
1604
return errors
1600
1605
1606
+ def validate_urls (self , urls : list [str ]):
1607
+ """Validate if the urls are valid"""
1608
+ errors = []
1609
+ for url in urls :
1610
+ if not url .startswith ("http" ) and not url .startswith ("https" ):
1611
+ errors .append (f"Invalid url `{ url } `" )
1612
+ return errors
1613
+
1601
1614
1602
1615
class FileSelector (BasePage ):
1603
1616
"""File selector UI in the Chat page"""
0 commit comments