-
Notifications
You must be signed in to change notification settings - Fork 20
/
generate_numpy2_patch.py
224 lines (183 loc) · 8.42 KB
/
generate_numpy2_patch.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
import requests
import logging
import json
import re
from collections import defaultdict
from pathlib import Path
numpy2_protect_dict = {
# add any numpy dependencies that needs to be protected here
# "package_name": "protected_version"
"hypothesis": "6.111.0"
}
proposed_changes = []
# Configure the logging
logging.basicConfig(level=logging.DEBUG,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
# Create a logger object
logger = logging.getLogger(__name__)
CHANNEL_NAME = "main"
CHANNEL_ALIAS = "https://repo.anaconda.com/pkgs"
SUBDIRS = (
"noarch",
"linux-64",
"linux-aarch64",
"linux-s390x",
"osx-64",
"osx-arm64",
"win-64",
)
# Initialize NUMPY_2_CHANGES with a nested defaultdict structure
NUMPY_2_CHANGES = defaultdict(lambda: defaultdict(dict))
def collect_proposed_change(subdirectory, filename, change_type, original_dependency, updated_dependency, reason):
"""
Collects a proposed change to a dependency for later processing.
Parameters:
- subdirectory: The subdirectory where the file is located.
- filename: The name of the file being modified.
- change_type: The type of change (e.g., 'dep', 'constr').
- original_dependency: The original dependency string.
- updated_dependency: The updated dependency string.
- reason: The reason for the change.
"""
NUMPY_2_CHANGES[subdirectory][filename] = {
"type": change_type,
"original": original_dependency,
"updated": updated_dependency
}
logger.info(f"numpy 2.0.0: {reason} for {filename}. "
f"Original: '{original_dependency}' -> New: '{updated_dependency}' ({reason})")
def parse_version(version_str):
"""
Extracts the version number from a version string.
Parameters:
- version_str: The version string to parse.
Returns:
The extracted version number or None if not found.
"""
match = re.search(r'(\d+(\.\d+)*)', version_str)
return match.group(1) if match else None
def has_upper_bound(dependency):
"""
Checks if a dependency string contains an upper bound.
Parameters:
- dependency: The dependency string to check.
Returns:
True if an upper bound is found, False otherwise.
"""
return any(part.strip().startswith('<') for part in dependency.split(','))
def patch_record_with_fixed_deps(dependency, parts):
"""
Adds an upper bound to a dependency if necessary.
Parameters:
- dependency: The original dependency string.
- parts: The parts of the dependency string, split by spaces.
Returns:
The potentially modified dependency string.
"""
version_str = parts[1]
version = parse_version(version_str)
if version:
if version_str.startswith('==') or version_str.startswith('<') or version_str[0].isdigit():
return dependency
if version_str.startswith('>') or version_str.startswith('>='):
return f"{dependency},<2.0a0"
return f"{dependency} <2.0a0"
return dependency
def update_numpy_dependencies(dependencies_list, package_record, dependency_type, package_subdir, filename):
"""
Adds upper bounds to numpy dependencies as needed.
Iterates through dependencies, modifying those without upper bounds and meeting specific criteria.
Parameters:
- dependencies_list: Dependencies to check and modify.
- package_record: Metadata about the current package.
- dependency_type: Type of dependency ('run', 'build').
- package_subdir: Package location subdirectory.
- filename: Package filename.
"""
# Flag to determine if unspecified dependencies should get an upper bound
add_bound_to_unspecified = True
# Iterate through each dependency in the list
for _, dependency in enumerate(dependencies_list):
parts = dependency.split()
package_name = parts[0]
# Check if the dependency is for numpy and does not have an upper bound
if "numpy" in package_name and not has_upper_bound(dependency):
if package_record["name"] in numpy2_protect_dict and \
package_record["version"] == numpy2_protect_dict.get(package_record["name"], None):
# Handle dependencies that are in the protection dictionary
logger.info(f"numpy 2.0.0: {package_record['name']} is protected at {package_record['version']}")
elif add_bound_to_unspecified:
# Handle dependencies that are unspecified and need an upper bound
_handle_unspecified_dependency(parts, dependency, package_subdir, filename, dependency_type)
def _handle_unspecified_dependency(parts, dependency, package_subdir, filename, dependency_type):
"""
Handles dependencies that are unspecified and need an upper bound.
"""
if len(parts) > 1:
# Patch the record with fixed dependencies if there are multiple parts
new_dependency = patch_record_with_fixed_deps(dependency, parts)
if new_dependency != dependency:
collect_proposed_change(package_subdir, filename, dependency_type,
dependency, new_dependency, "Upper bound added")
else:
# Add an upper bound to the dependency if there is only one part
new_dependency = f"{dependency} <2.0a0"
collect_proposed_change(package_subdir, filename, dependency_type,
dependency, new_dependency, "Upper bound added")
def main():
base_dir = Path(__file__).parent / CHANNEL_NAME
repodatas = {}
# Iterate over each subdir to load or fetch repodata
for subdir in SUBDIRS:
repodata_path = base_dir / subdir / "repodata_from_packages.json"
# Check if the repodata file exists locally
if repodata_path.is_file():
with repodata_path.open() as fh:
repodatas[subdir] = json.load(fh) # Load repodata from local file
else:
# Fetch repodata from the remote URL if not available locally
repodata_url = f"{CHANNEL_ALIAS}/{CHANNEL_NAME}/{subdir}/repodata_from_packages.json"
response = requests.get(repodata_url)
response.raise_for_status()
repodatas[subdir] = response.json() # Load repodata from the response
repodata_path.parent.mkdir(parents=True, exist_ok=True) # Ensure the directory exists
with repodata_path.open('w') as fh:
# Save the fetched repodata to a local file
json.dump(
repodatas[subdir],
fh,
indent=2,
sort_keys=True,
separators=(",", ": "),
)
# Process each subdir's repodata to update numpy dependencies
for subdir in SUBDIRS:
index = repodatas[subdir]["packages"]
for fn, record in index.items():
name = record["name"]
depends = record["depends"]
constrains = record.get("constrains", [])
# Filter out None dependencies
depends = [dep for dep in depends if dep is not None]
# Check if the package is for specific Python versions
if any(py_ver in fn for py_ver in ["py39", "py310", "py311", "py312"]):
# Exclude certain package names from processing
if name not in ["anaconda", "_anaconda_depends", "__anaconda_core_depends", "_anaconda_core"]:
try:
# Update numpy dependencies in the 'depends' list
for dep in depends:
if dep.split()[0] in ["numpy", "numpy-base"]:
update_numpy_dependencies(depends, record, "depends", subdir, fn)
# Update numpy dependencies in the 'constrains' list
for constrain in constrains:
if constrain.split()[0] in ["numpy", "numpy-base"]:
update_numpy_dependencies(constrains, record, "constrains", subdir, fn)
except Exception as e:
# Log any errors encountered during the update process
logger.error(f"numpy 2.0.0 error {fn}: {e}")
# Write the proposed changes to a JSON file
json_filename = Path("numpy2_patch.json")
json_filename.write_text(json.dumps(dict(NUMPY_2_CHANGES), indent=2))
logger.info(f"Proposed changes have been written to {json_filename}")
if __name__ == "__main__":
main()