Skip to content

Commit 37cdc28

Browse files
cin-nikophv2312Copilot
authored
fix: add validation to avoid path-traversal vulnerabilities (#755)
* fix: add validation to avoid path-traversal vulnerabilities * fix: update init value is_safe Co-authored-by: Copilot <[email protected]> * refactor: extract zip check * fix: dont need to check relative path * fix: disable check zip file (zipfile have taken it) --------- Co-authored-by: kan_cin <[email protected]> Co-authored-by: Copilot <[email protected]> Co-authored-by: phv2312 <[email protected]>
1 parent ec1f6ab commit 37cdc28

File tree

1 file changed

+23
-10
lines changed
  • libs/ktem/ktem/index/file

1 file changed

+23
-10
lines changed

libs/ktem/ktem/index/file/ui.py

Lines changed: 23 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1059,15 +1059,18 @@ def _may_extract_zip(self, files, zip_dir: str):
10591059
"""Handle zip files"""
10601060
zip_files = [file for file in files if file.endswith(".zip")]
10611061
remaining_files = [file for file in files if not file.endswith("zip")]
1062+
errors: list[str] = []
10621063

10631064
# Clean-up <zip_dir> before unzip to remove old files
10641065
shutil.rmtree(zip_dir, ignore_errors=True)
10651066

1067+
# Unzip
10661068
for zip_file in zip_files:
10671069
# Prepare new zip output dir, separated for each files
10681070
basename = os.path.splitext(os.path.basename(zip_file))[0]
10691071
zip_out_dir = os.path.join(zip_dir, basename)
10701072
os.makedirs(zip_out_dir, exist_ok=True)
1073+
10711074
with zipfile.ZipFile(zip_file, "r") as zip_ref:
10721075
zip_ref.extractall(zip_out_dir)
10731076

@@ -1084,7 +1087,7 @@ def _may_extract_zip(self, files, zip_dir: str):
10841087
if n_zip_file > 0:
10851088
print(f"Update zip files: {n_zip_file}")
10861089

1087-
return remaining_files
1090+
return remaining_files, errors
10881091

10891092
def index_fn(
10901093
self, files, urls, reindex: bool, settings, user_id
@@ -1100,20 +1103,22 @@ def index_fn(
11001103
"""
11011104
if urls:
11021105
files = [it.strip() for it in urls.split("\n")]
1103-
errors = []
1106+
errors = self.validate_urls(files)
11041107
else:
11051108
if not files:
11061109
gr.Info("No uploaded file")
11071110
yield "", ""
11081111
return
1112+
files, unzip_errors = self._may_extract_zip(
1113+
files, flowsettings.KH_ZIP_INPUT_DIR
1114+
)
1115+
errors = self.validate_files(files)
1116+
errors.extend(unzip_errors)
11091117

1110-
files = self._may_extract_zip(files, flowsettings.KH_ZIP_INPUT_DIR)
1111-
1112-
errors = self.validate(files)
1113-
if errors:
1114-
gr.Warning(", ".join(errors))
1115-
yield "", ""
1116-
return
1118+
if errors:
1119+
gr.Warning(", ".join(errors))
1120+
yield "", ""
1121+
return
11171122

11181123
gr.Info(f"Start indexing {len(files)} files...")
11191124

@@ -1569,7 +1574,7 @@ def interact_group_list(self, list_groups, ev: gr.SelectData):
15691574
selected_item["files"],
15701575
)
15711576

1572-
def validate(self, files: list[str]):
1577+
def validate_files(self, files: list[str]):
15731578
"""Validate if the files are valid"""
15741579
paths = [Path(file) for file in files]
15751580
errors = []
@@ -1598,6 +1603,14 @@ def validate(self, files: list[str]):
15981603

15991604
return errors
16001605

1606+
def validate_urls(self, urls: list[str]):
1607+
"""Validate if the urls are valid"""
1608+
errors = []
1609+
for url in urls:
1610+
if not url.startswith("http") and not url.startswith("https"):
1611+
errors.append(f"Invalid url `{url}`")
1612+
return errors
1613+
16011614

16021615
class FileSelector(BasePage):
16031616
"""File selector UI in the Chat page"""

0 commit comments

Comments
 (0)