Skip to content

Commit 6229330

Browse files
authored
Merge pull request #81 from davidkastner/repo2markdown
Script that converts a python repo to markdown
2 parents dad8e44 + 0e64cfc commit 6229330

File tree

2 files changed

+139
-0
lines changed

2 files changed

+139
-0
lines changed

pyqmmm/cli.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,12 +41,14 @@ def cli():
4141
@click.option("--delete_pdb_atoms", "-dpa", is_flag=True, help="Deletes atoms from PDB trajectory.")
4242
@click.option("--translate_pdb_to_center", "-tc", is_flag=True, help="Translates PDB traj to new center.")
4343
@click.option("--xyz2pdb", "-x2p", is_flag=True, help="Converts an xyz file or traj to a PDB.")
44+
@click.option("--repo2markdown", "-r2m", is_flag=True, help="Converts python package to markdown file.")
4445
def io(
4546
ppm2png,
4647
delete_xyz_atoms,
4748
delete_pdb_atoms,
4849
translate_pdb_to_center,
4950
xyz2pdb,
51+
repo2markdown
5052
):
5153
"""
5254
Tools for useful manipulations of common file types.
@@ -114,6 +116,12 @@ def io(
114116
output_pdb = "pdb_trajectory.pdb"
115117
pyqmmm.io.xyz2pdb.xyz2pdb_traj(xyz_traj, output_pdb, template)
116118

119+
elif repo2markdown:
120+
click.echo("Converts a Python package to a single markdown file")
121+
click.echo("Loading...")
122+
import pyqmmm.io.repo2markdown
123+
pyqmmm.io.repo2markdown.main()
124+
117125

118126
@cli.command()
119127
@click.option("--gbsa_submit", "-gs", is_flag=True, help="Prepares and submits a mmGBSA job.")

pyqmmm/io/repo2markdown.py

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
import os
2+
import ast
3+
from pathlib import Path
4+
5+
# Configuration
6+
ROOT_DIR = Path(".").resolve()
7+
OUTPUT_FILE = f"{ROOT_DIR.name}_LLM_readable.md"
8+
9+
INCLUDE_EXTENSIONS = {".py", ".md", ".txt", ".yaml", ".yml", ".json", ".toml", ".csv"}
10+
SKIP_DIRS = {".git", "__pycache__", ".mypy_cache", ".venv", "env", "venv", ".idea", ".vscode"}
11+
INDENT = " "
12+
13+
14+
def should_include_file(file_path: Path):
15+
return file_path.suffix in INCLUDE_EXTENSIONS
16+
17+
18+
def generate_tree_structure(root: Path) -> str:
19+
tree_lines = ["# Repository Structure\n"]
20+
21+
def _walk(path: Path, prefix=""):
22+
dir_entries = [p for p in path.iterdir() if p.is_dir() and p.name not in SKIP_DIRS]
23+
file_entries = [p for p in path.iterdir() if p.is_file() and should_include_file(p)]
24+
entries = sorted(dir_entries + file_entries, key=lambda p: p.name.lower())
25+
for i, entry in enumerate(entries):
26+
connector = "└── " if i == len(entries) - 1 else "├── "
27+
tree_lines.append(f"{prefix}{connector}{entry.name}{ '/' if entry.is_dir() else ''}")
28+
if entry.is_dir():
29+
extension = " " if i == len(entries) - 1 else "│ "
30+
_walk(entry, prefix + extension)
31+
32+
_walk(root)
33+
return "\n".join(tree_lines) + "\n"
34+
35+
36+
def get_readme_content(root: Path) -> str:
37+
for filename in ["README.md", "readme.md", "Readme.md"]:
38+
readme_path = root / filename
39+
if readme_path.exists():
40+
try:
41+
with open(readme_path, "r", encoding="utf-8") as f:
42+
return "# README\n\n" + f.read() + "\n\n"
43+
except Exception as e:
44+
return f"# README\n\n⚠️ Could not read README: {e}\n\n"
45+
return ""
46+
47+
48+
def get_imported_modules(file_path: Path) -> list:
49+
imported = set()
50+
try:
51+
with open(file_path, "r", encoding="utf-8") as f:
52+
tree = ast.parse(f.read(), filename=str(file_path))
53+
for node in ast.walk(tree):
54+
if isinstance(node, ast.Import):
55+
for alias in node.names:
56+
imported.add(alias.name)
57+
elif isinstance(node, ast.ImportFrom):
58+
if node.module:
59+
dots = "." * node.level
60+
imported.add(dots + node.module)
61+
except Exception:
62+
pass
63+
return sorted(imported)
64+
65+
66+
def write_imports_section(root_dir: Path):
67+
markdown_lines = ["# Module Imports\n"]
68+
py_files = sorted(root_dir.rglob("*.py"), key=lambda p: str(p.relative_to(root_dir)))
69+
for file_path in py_files:
70+
if not should_include_file(file_path): # Though rglob *.py should be fine
71+
continue
72+
rel_path = file_path.relative_to(root_dir)
73+
imported = get_imported_modules(file_path)
74+
markdown_lines.append(f"\n## `{rel_path}`\n")
75+
if imported:
76+
markdown_lines.append("Imported modules:\n")
77+
for mod in imported:
78+
markdown_lines.append(f"- {mod}")
79+
else:
80+
markdown_lines.append("No imported modules.")
81+
markdown_lines.append("\n")
82+
return "\n".join(markdown_lines) + "\n"
83+
84+
85+
def write_file_contents(root_dir: Path):
86+
markdown_lines = ["# File Contents\n"]
87+
readme_names = {"readme.md", "readme.md", "readme.md"} # Lowercase set
88+
89+
for dirpath, dirnames, filenames in os.walk(root_dir):
90+
dirnames[:] = sorted([d for d in dirnames if d not in SKIP_DIRS])
91+
rel_path = Path(dirpath).relative_to(root_dir)
92+
for filename in sorted(filenames):
93+
if filename.lower() in readme_names and rel_path == Path("."):
94+
continue # Skip README since it's added separately
95+
file_path = Path(dirpath) / filename
96+
if not should_include_file(file_path):
97+
continue
98+
markdown_lines.append(f"\n### `{file_path.relative_to(root_dir)}`\n")
99+
lang = file_path.suffix[1:] if file_path.suffix else "text"
100+
if lang == "md":
101+
lang = "markdown"
102+
elif lang == "txt":
103+
lang = "text"
104+
markdown_lines.append(f"```{lang}")
105+
try:
106+
with open(file_path, "r", encoding="utf-8") as f:
107+
markdown_lines.append(f.read())
108+
except Exception as e:
109+
markdown_lines.append(f"⚠️ Could not read file: {e}")
110+
markdown_lines.append("```")
111+
return "\n".join(markdown_lines) + "\n"
112+
113+
114+
def main():
115+
print(f"Generating markdown from: {ROOT_DIR}")
116+
readme_section = get_readme_content(ROOT_DIR)
117+
tree_diagram = generate_tree_structure(ROOT_DIR)
118+
imports_section = write_imports_section(ROOT_DIR)
119+
file_content_section = write_file_contents(ROOT_DIR)
120+
121+
with open(OUTPUT_FILE, "w", encoding="utf-8") as f:
122+
f.write(readme_section)
123+
f.write(tree_diagram)
124+
f.write(imports_section)
125+
f.write(file_content_section)
126+
127+
print(f"✅ Output written to {OUTPUT_FILE}")
128+
129+
130+
if __name__ == "__main__":
131+
main()

0 commit comments

Comments
 (0)