-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdependencies.py
159 lines (129 loc) · 5.59 KB
/
dependencies.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
import os
import toml
import json
import configparser
from pathlib import Path
import pandas as pd
import re
# Regex pattern to extract package name and version constraint
version_pattern = re.compile(r'([a-zA-Z0-9_\-]+)([<>=!~]*[0-9a-zA-Z\-.]+)?')
def extract_requirements_txt(filepath):
with open(filepath, 'r') as f:
return [line.strip() for line in f if line.strip() and not line.startswith('#')]
def extract_pyproject_toml(filepath):
with open(filepath, 'r') as f:
pyproject_data = toml.load(f)
# Attempt to find dependencies in various possible locations
possible_keys = [
'dependencies', # Non-standard direct dependencies list
'tool.poetry.dependencies', # Poetry standard
'tool.pdm.dependencies', # PDM standard
'project.dependencies', # PEP 621 standard under [project]
]
for key in possible_keys:
keys = key.split('.')
data = pyproject_data
for subkey in keys:
if subkey in data:
data = data[subkey]
else:
data = None
break
if data:
parsed_deps = {}
if isinstance(data, dict): # Dictionary-style dependencies
for package, version in data.items():
parsed_deps[package] = version
elif isinstance(data, list): # List-style dependencies with version constraints
for dep in data:
package, version = parse_dependency(dep)
parsed_deps[package] = version
return parsed_deps
return {}
def extract_setup_py(filepath):
dependencies = []
with open(filepath, 'r') as f:
for line in f:
if 'install_requires' in line:
start = line.find('[')
end = line.find(']')
if start != -1 and end != -1:
deps = line[start+1:end].split(',')
dependencies.extend([dep.strip().strip("'\"") for dep in deps])
return dependencies
def extract_setup_cfg(filepath):
"""Extract dependencies from setup.cfg."""
config = configparser.ConfigParser()
config.read(filepath)
if 'options' in config and 'install_requires' in config['options']:
return [dep.strip() for dep in config['options']['install_requires'].splitlines() if dep]
return []
def extract_package_json(filepath):
with open(filepath, 'r') as f:
package_data = json.load(f)
return package_data.get('dependencies', {})
def parse_dependency(dep):
"""Extract package name and version constraint from a string (e.g., 'package==1.0.0' or 'package>=1.2.3b1')."""
match = version_pattern.match(dep)
if match:
package = match.group(1)
version_constraint = match.group(2) or ''
return package, version_constraint
return dep, '' # If no match, return the dep as it is (could be a special case)
def get_dependencies_from_repo(repo_name, repo_path):
dependencies = []
# Check for requirements.txt
req_file = Path(repo_path) / 'requirements.txt'
if req_file.exists():
for dep in extract_requirements_txt(req_file):
package, version = parse_dependency(dep)
dependencies.append((repo_name, package, version))
# Check for pyproject.toml
toml_file = Path(repo_path) / 'pyproject.toml'
if toml_file.exists():
py_deps = extract_pyproject_toml(toml_file)
for dep, version in py_deps.items():
dependencies.append((repo_name, dep, version))
# Check for setup.py
setup_file = Path(repo_path) / 'setup.py'
if setup_file.exists():
for dep in extract_setup_py(setup_file):
package, version = parse_dependency(dep)
dependencies.append((repo_name, package, version))
# Check for setup.cfg
cfg_file = Path(repo_path) / 'setup.cfg'
if cfg_file.exists():
for dep in extract_setup_cfg(cfg_file):
package, version = parse_dependency(dep)
dependencies.append((repo_name, package, version))
# Check for package.json (for Node.js projects)
package_json_file = Path(repo_path) / 'package.json'
if package_json_file.exists():
node_deps = extract_package_json(package_json_file)
for dep, version in node_deps.items():
dependencies.append((repo_name, dep, version))
return dependencies
def process_repositories(repos_base_path):
all_dependencies = []
for repo_name in os.listdir(repos_base_path):
repo_path = os.path.join(repos_base_path, repo_name)
if os.path.isdir(repo_path):
dependencies = get_dependencies_from_repo(repo_name, repo_path)
all_dependencies.extend(dependencies)
return all_dependencies
def generate_dependency_table(repos_base_path):
dependencies = process_repositories(repos_base_path)
# Convert list of dependencies into a DataFrame for better readability
df = pd.DataFrame(dependencies, columns=["Repository", "Dependency", "Version/Constraint"])
# Ensure that all columns are treated as strings before dropping duplicates
df = df.astype(str)
# Drop duplicates if needed
df = df.drop_duplicates()
# Print the table format (or save it to CSV)
print(df.to_string(index=False))
# Save to CSV without modifying the values
df.to_csv('dependency_list.csv', index=False)
# Example usage:
if __name__ == "__main__":
base_path = "/path/to/your/local/repositories" # Update this to your repos' location
generate_dependency_table(base_path)