Coverage for src/gitlabracadabra/packages/pypi.py: 80%

119 statements  

« prev     ^ index     » next       coverage.py v7.8.0, created at 2025-04-14 23:10 +0200

1# 

2# Copyright (C) 2019-2025 Mathieu Parent <math.parent@gmail.com> 

3# 

4# This program is free software: you can redistribute it and/or modify 

5# it under the terms of the GNU Lesser General Public License as published by 

6# the Free Software Foundation, either version 3 of the License, or 

7# (at your option) any later version. 

8# 

9# This program is distributed in the hope that it will be useful, 

10# but WITHOUT ANY WARRANTY; without even the implied warranty of 

11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

12# GNU Lesser General Public License for more details. 

13# 

14# You should have received a copy of the GNU Lesser General Public License 

15# along with this program. If not, see <http://www.gnu.org/licenses/>. 

16 

17from __future__ import annotations 

18 

19from html import unescape 

20from logging import getLogger 

21from posixpath import join as posixpath_join 

22from typing import TYPE_CHECKING, Any 

23from urllib.parse import quote as urlquote 

24from urllib.parse import urljoin, urlparse, urlunparse 

25from urllib.request import parse_keqv_list 

26 

27from html5lib import parse as html5lib_parse 

28from packaging.requirements import InvalidRequirement, Requirement 

29from packaging.utils import canonicalize_name 

30from packaging.version import InvalidVersion, Version 

31from requests import codes 

32 

33from gitlabracadabra.packages.package_file import PackageFile 

34from gitlabracadabra.packages.pip import extract_version_from_fragment 

35from gitlabracadabra.packages.source import Source 

36 

37if TYPE_CHECKING: 37 ↛ 38line 37 didn't jump to line 38 because the condition on line 37 was never true

38 from requests.models import Response 

39 

40try: 

41 from packaging.utils import parse_wheel_filename 

42 

43 HAS_PACKAGING_PARSERS = True 

44except ImportError: # packaging << 20.9 

45 HAS_PACKAGING_PARSERS = False 

46 

47logger = getLogger(__name__) 

48 

49 

50class PyPI(Source): 

51 """PyPI repository.""" 

52 

53 def __init__( 

54 self, 

55 *, 

56 log_prefix: str = "", 

57 index_url: str | None = None, 

58 requirements: str | list[str], 

59 ) -> None: 

60 """Initialize a PyPI repository object. 

61 

62 Args: 

63 log_prefix: Log prefix. 

64 index_url: index-url (default to https://pypi.org/simple). 

65 requirements: Python requirements as list or string. 

66 """ 

67 super().__init__() 

68 self._log_prefix = log_prefix 

69 self._index_url = index_url or "https://pypi.org/simple" 

70 if isinstance(requirements, str): 

71 self._requirements = requirements.splitlines() 

72 else: 

73 self._requirements = [req for reqs in requirements for req in reqs.splitlines()] 

74 

75 def __str__(self) -> str: 

76 """Return string representation. 

77 

78 Returns: 

79 A string. 

80 """ 

81 return "PyPI repository" 

82 

83 @property 

84 def package_files(self) -> list[PackageFile]: 

85 """Return list of package files. 

86 

87 Returns: 

88 List of package files. 

89 """ 

90 package_files: list[PackageFile] = [] 

91 if not HAS_PACKAGING_PARSERS: 91 ↛ 92line 91 didn't jump to line 92 because the condition on line 91 was never true

92 logger.error( 

93 "%sPyPI packages mirroring requires packaging >= 20.9", 

94 self._log_prefix, 

95 ) 

96 return package_files 

97 for requirement_string in self._requirements: 

98 if requirement_string.lstrip().startswith("#"): 

99 continue 

100 package_files_from_requirement_string = self._package_files_from_requirement_string(requirement_string) 

101 if not package_files_from_requirement_string: 101 ↛ 102line 101 didn't jump to line 102 because the condition on line 101 was never true

102 logger.warning( 

103 "%sNo package files matching found for requirement: %s", 

104 self._log_prefix, 

105 requirement_string, 

106 ) 

107 package_files.extend(package_files_from_requirement_string) 

108 return package_files 

109 

110 def _package_files_from_requirement_string(self, requirement_string: str) -> list[PackageFile]: 

111 try: 

112 req = Requirement(requirement_string) 

113 except InvalidRequirement: 

114 logger.warning( 

115 '%sInvalid requirement "%s"', 

116 self._log_prefix, 

117 requirement_string, 

118 ) 

119 return [] 

120 return self._package_files_from_requirement(req) 

121 

122 def _package_files_from_requirement(self, req: Requirement) -> list[PackageFile]: 

123 index_url = self._get_index_url(req.name) 

124 index_response = self.session.request("get", index_url) 

125 if index_response.status_code != codes["ok"]: 125 ↛ 126line 125 didn't jump to line 126 because the condition on line 125 was never true

126 logger.warning( 

127 "%sUnexpected HTTP status for PyPI index %s: received %i %s", 

128 self._log_prefix, 

129 index_url, 

130 index_response.status_code, 

131 index_response.reason, 

132 ) 

133 return [] 

134 return self._package_files_from_requirement_and_response(req, index_response) 

135 

136 def _get_index_url(self, project_name: str) -> str: 

137 loc = posixpath_join( 

138 self._index_url, 

139 urlquote(canonicalize_name(project_name)), 

140 ) 

141 if not loc.endswith("/"): 141 ↛ 143line 141 didn't jump to line 143 because the condition on line 141 was always true

142 loc = f"{loc}/" 

143 return loc 

144 

145 def _package_files_from_requirement_and_response( 

146 self, 

147 req: Requirement, 

148 response: Response, 

149 ) -> list[PackageFile]: 

150 document = html5lib_parse( 

151 response.content, 

152 transport_encoding=response.encoding, 

153 namespaceHTMLElements=False, 

154 ) 

155 

156 base_url = self._get_base_url(response, document) 

157 

158 package_files: dict[Version, list[PackageFile]] = {} 

159 for anchor in document.findall(".//a"): 

160 version, package_file = self._package_file_from_requirement_and_anchor(req, anchor, base_url) 

161 if version and package_file: 

162 if version not in package_files: 

163 package_files[version] = [] 

164 package_files[version].append(package_file) 

165 

166 try: 

167 best_match = sorted(package_files, reverse=True)[0] 

168 except IndexError: 

169 return [] 

170 return package_files[best_match] 

171 

172 def _get_base_url(self, response: Response, document: Any) -> str: 

173 base_url = response.url 

174 for base in document.findall(".//base"): 174 ↛ 175line 174 didn't jump to line 175 because the loop on line 174 never started

175 href = base.get("href") 

176 if href is not None: 

177 base_url = href 

178 break 

179 return base_url 

180 

181 def _package_file_from_requirement_and_anchor( 

182 self, 

183 req: Requirement, 

184 anchor: Any, 

185 base_url: str, 

186 ) -> tuple[Version | None, PackageFile | None]: 

187 if "href" not in anchor.keys(): # noqa: SIM118 187 ↛ 188line 187 didn't jump to line 188 because the condition on line 187 was never true

188 return None, None 

189 if anchor.get("data-yanked") and not str(req.specifier).startswith("=="): 

190 return None, None 

191 

192 parsed_url = urlparse(urljoin(base_url, anchor.get("href"))) 

193 

194 filename = parsed_url.path.split("/")[-1] 

195 try: 

196 name, ver = self._parse_filename(filename, canonicalize_name(req.name)) 

197 except InvalidVersion: 

198 # Ignore invalid versions, like in pbr-0.5.2.5.g5b3e942.tar.gz 

199 logger.debug( 

200 "%sIgnoring invalid version for filename %s", 

201 self._log_prefix, 

202 filename, 

203 ) 

204 return None, None 

205 

206 if name is None or ver is None or ver not in req.specifier: 

207 return None, None 

208 

209 metadata = parse_keqv_list(parsed_url.fragment.split("&")) 

210 

211 if "data-requires-python" in anchor.keys(): # noqa: SIM118 

212 metadata["requires-python"] = unescape(anchor.get("data-requires-python")) 

213 

214 return ver, PackageFile( 

215 urlunparse(parsed_url._replace(fragment="")), 

216 "pypi", 

217 name, 

218 str(ver), 

219 filename, 

220 metadata=metadata, 

221 ) 

222 

223 def _parse_filename(self, filename: str, canonical_name: str) -> tuple[str | None, Version | None]: 

224 if filename.endswith(".whl"): 

225 name, ver, _, _ = parse_wheel_filename(filename) 

226 return name, ver 

227 if filename.endswith(".egg"): 

228 # Ignore egg files for now 

229 return None, None 

230 if filename.endswith(".tar.gz"): 230 ↛ 234line 230 didn't jump to line 234 because the condition on line 230 was always true

231 ver_str = extract_version_from_fragment(filename[:-7], canonical_name) 

232 if ver_str: 232 ↛ 234line 232 didn't jump to line 234 because the condition on line 232 was always true

233 return canonical_name, Version(ver_str) 

234 return None, None