Coverage for src/gitlabracadabra/containers/registry_importer.py: 85%

155 statements  

« prev     ^ index     » next       coverage.py v7.8.0, created at 2025-04-14 23:10 +0200

1# 

2# Copyright (C) 2019-2025 Mathieu Parent <math.parent@gmail.com> 

3# 

4# This program is free software: you can redistribute it and/or modify 

5# it under the terms of the GNU Lesser General Public License as published by 

6# the Free Software Foundation, either version 3 of the License, or 

7# (at your option) any later version. 

8# 

9# This program is distributed in the hope that it will be useful, 

10# but WITHOUT ANY WARRANTY; without even the implied warranty of 

11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

12# GNU Lesser General Public License for more details. 

13# 

14# You should have received a copy of the GNU Lesser General Public License 

15# along with this program. If not, see <http://www.gnu.org/licenses/>. 

16 

17from logging import getLogger 

18 

19from requests import HTTPError, codes 

20 

21from gitlabracadabra.containers.blob import Blob 

22from gitlabracadabra.containers.const import ( 

23 DOCKER_MANIFEST_SCHEMA1, 

24 DOCKER_MANIFEST_SCHEMA1_SIGNED, 

25 DOCKER_MANIFEST_SCHEMA2, 

26 DOCKER_MANIFEST_SCHEMA2_LIST, 

27 OCI_IMAGE_INDEX, 

28 OCI_IMAGE_MANIFEST, 

29) 

30from gitlabracadabra.containers.manifest import Manifest 

31from gitlabracadabra.containers.registry_session import RegistrySession 

32from gitlabracadabra.containers.scope import PULL, PUSH_PULL 

33from gitlabracadabra.containers.with_blobs import WithBlobs 

34 

35logger = getLogger(__name__) 

36 

37 

38class ImportStats: 

39 """Import statistics.""" 

40 

41 __slots__ = ( 

42 "uploaded_count", 

43 "mounted_count", 

44 "existing_count", 

45 "uploaded_size", 

46 "mounted_size", 

47 "existing_size", 

48 "uploaded_manifests_count", 

49 "existing_manifests_count", 

50 ) 

51 

52 def __init__(self) -> None: 

53 """Initialize.""" 

54 self.uploaded_count = 0 

55 self.mounted_count = 0 

56 self.existing_count = 0 

57 self.uploaded_size = 0 

58 self.mounted_size = 0 

59 self.existing_size = 0 

60 

61 self.uploaded_manifests_count = 0 

62 self.existing_manifests_count = 0 

63 

64 @property 

65 def blobs_count(self) -> int: 

66 """Get total blob count. 

67 

68 Returns: 

69 The number of blobs uploaded + mounted + existing. 

70 """ 

71 return self.uploaded_count + self.mounted_count + self.existing_count 

72 

73 @property 

74 def blobs_size(self) -> int: 

75 """Get total blob size. 

76 

77 Returns: 

78 The size of blobs uploaded + mounted + existing. 

79 """ 

80 return self.uploaded_size + self.mounted_size + self.existing_size 

81 

82 @property 

83 def manifests_count(self) -> int: 

84 """Get total manifest count. 

85 

86 Returns: 

87 The number of manifest uploaded + existing + skipped. 

88 """ 

89 return self.uploaded_manifests_count + self.existing_manifests_count 

90 

91 def any_stat(self) -> bool: 

92 """Test if any stat is above zero. 

93 

94 Returns: 

95 True if any statistic is non-zero. 

96 """ 

97 return any( 

98 ( 

99 self.uploaded_count, 

100 self.mounted_count, 

101 # self.existing_count, 

102 self.uploaded_size, 

103 self.mounted_size, 

104 # self.existing_size, 

105 self.uploaded_manifests_count, 

106 # self.existing_manifests_count 

107 ) 

108 ) 

109 

110 

111class RegistryImporter(RegistrySession, WithBlobs): 

112 """Container registry importer methods.""" 

113 

114 def blob_from_digest(self, digest: str, *, preferred_manifest_name: str) -> Blob | None: 

115 """Return a Blob with the given digest, within the preferred manifest if possible. 

116 

117 Args: 

118 digest: Digest of Blob to look for. 

119 preferred_manifest_name: Preferred manifest name. 

120 

121 Returns: 

122 A Blob with the expected digest, or None. 

123 """ 

124 manifest_names = self._blobs.get(digest, []) 

125 if not len(manifest_names): 

126 return None 

127 if preferred_manifest_name in manifest_names: 

128 return Blob(self, preferred_manifest_name, digest, size=self._sizes[digest]) 

129 return Blob(self, manifest_names[0], digest, size=self._sizes[digest]) 

130 

131 def import_manifest( 

132 self, 

133 source: Manifest, 

134 manifest_name: str | None = None, 

135 tag: str | None = None, 

136 *, 

137 platform: dict | None = None, 

138 log_prefix: str = "", 

139 dry_run: bool = False, 

140 ) -> None: 

141 """Import a manifest. 

142 

143 Args: 

144 source: Source manifest. 

145 manifest_name: Target manifest name (defaults to source's). 

146 tag: Target manifest tag (defaults to source's). 

147 platform: 'all' or a specific platform, defaults to linux/amd64. 

148 log_prefix: Log prefix. 

149 dry_run: Dry run. 

150 

151 Raises: 

152 ValueError: Unsupported media type. 

153 """ 

154 stats = ImportStats() 

155 if manifest_name is None: 

156 manifest_name = source.manifest_name 

157 if tag is None: 

158 tag = source.tag 

159 if platform is None: 

160 platform = {"architecture": "amd64", "os": "linux"} 

161 try: 

162 source_mime_type = source.mime_type 

163 except HTTPError as err: 

164 logger.warning( 

165 "%s%s NOT imported as %s:%s: %s", 

166 log_prefix, 

167 str(source), 

168 manifest_name, 

169 tag, 

170 str(err), 

171 ) 

172 return 

173 try: 

174 if source_mime_type in {DOCKER_MANIFEST_SCHEMA2_LIST, OCI_IMAGE_INDEX}: 

175 self._import_manifest_list(source, manifest_name, tag, platform=platform, stats=stats, dry_run=dry_run) 

176 elif source_mime_type in {DOCKER_MANIFEST_SCHEMA2, OCI_IMAGE_MANIFEST}: 

177 self._import_manifest(source, manifest_name, tag, stats=stats, dry_run=dry_run) 

178 elif source_mime_type in {DOCKER_MANIFEST_SCHEMA1, DOCKER_MANIFEST_SCHEMA1_SIGNED}: 178 ↛ 181line 178 didn't jump to line 181 because the condition on line 178 was always true

179 self._import_manifest_v1(source, manifest_name, tag, stats=stats, dry_run=dry_run) 

180 else: 

181 msg = f"Unsupported media type: {source_mime_type}" 

182 raise ValueError(msg) 

183 except HTTPError as err2: 

184 logger.warning( 

185 "%s%s NOT imported as %s:%s: %s", 

186 log_prefix, 

187 str(source), 

188 manifest_name, 

189 tag, 

190 str(err2), 

191 ) 

192 return 

193 

194 if stats.any_stat(): 

195 logger.info( 

196 "%s%s %simported as %s:%s (%s, %s, %s)", 

197 log_prefix, 

198 str(source), 

199 "NOT " if dry_run else "", 

200 manifest_name, 

201 tag, 

202 f"{stats.uploaded_manifests_count}+{stats.existing_manifests_count}={stats.manifests_count} uploaded+existing manifests", 

203 f"{stats.uploaded_count}+{stats.mounted_count}+{stats.existing_count}={stats.blobs_count} uploaded+mounted+existing blobs", 

204 f"{stats.uploaded_size}+{stats.mounted_size}+{stats.existing_size}={stats.blobs_size} uploaded+mounted+existing blobs size", 

205 ) 

206 

207 def _import_manifest_list( 

208 self, 

209 source: Manifest, 

210 manifest_name: str, 

211 tag: str, 

212 *, 

213 platform: dict, 

214 stats: ImportStats, 

215 dry_run: bool, 

216 ) -> None: 

217 if platform == "all" or source.forced_digest: 

218 self._import_manifest_list_all(source, manifest_name, stats=stats, dry_run=dry_run) 

219 else: 

220 for manifest in source.manifests(): 220 ↛ 224line 220 didn't jump to line 224 because the loop on line 220 didn't complete

221 if manifest.platform == platform: 221 ↛ 220line 221 didn't jump to line 220 because the condition on line 221 was always true

222 self._import_manifest(manifest, manifest_name, tag, stats=stats, dry_run=dry_run) 

223 return 

224 msg = f"Platform {platform} not found in manifest {source}" 

225 raise ValueError(msg) 

226 

227 def _import_manifest_list_all( 

228 self, 

229 source: Manifest, 

230 manifest_name: str, 

231 *, 

232 stats: ImportStats, 

233 dry_run: bool, 

234 ) -> None: 

235 dest = Manifest( 

236 self, 

237 manifest_name, 

238 size=source.size, 

239 mime_type=source.mime_type, 

240 tag=source.tag, 

241 ) 

242 if dest.exists() and dest.digest == source.digest: 242 ↛ 244line 242 didn't jump to line 244 because the condition on line 242 was never true

243 # Short path if manifest already exists 

244 stats.existing_manifests_count += 1 

245 self.register_manifest_blobs(dest) 

246 return 

247 for manifest in source.manifests(): 

248 self._import_manifest(manifest, manifest_name, manifest.digest, stats=stats, dry_run=dry_run) 

249 if not dry_run: 249 ↛ 251line 249 didn't jump to line 251 because the condition on line 249 was always true

250 self._upload_manifest(source, dest) 

251 stats.uploaded_manifests_count += 1 

252 

253 def _import_manifest( 

254 self, 

255 source: Manifest, 

256 manifest_name: str, 

257 tag: str, 

258 *, 

259 stats: ImportStats, 

260 dry_run: bool, 

261 ) -> None: 

262 # https://docs.docker.com/registry/spec/api/#pushing-an-image 

263 manifest = Manifest( 

264 self, 

265 manifest_name, 

266 size=source.size, 

267 mime_type=source.mime_type, 

268 tag=tag, 

269 ) 

270 if manifest.exists() and manifest.digest == source.digest: 

271 # Short path if manifest already exists 

272 stats.existing_manifests_count += 1 

273 self.register_manifest_blobs(manifest) 

274 return 

275 

276 self._upload_blob_if_needed( 

277 source, 

278 manifest_name, 

279 source.json.get("config"), 

280 stats=stats, 

281 dry_run=dry_run, 

282 ) 

283 for layer_json in source.json.get("layers"): 

284 self._upload_blob_if_needed(source, manifest_name, layer_json, stats=stats, dry_run=dry_run) 

285 if not dry_run: 

286 self._upload_manifest(source, manifest) 

287 stats.uploaded_manifests_count += 1 

288 

289 def _import_manifest_v1( 

290 self, 

291 source: Manifest, 

292 manifest_name: str, 

293 tag: str, 

294 *, 

295 stats: ImportStats, 

296 dry_run: bool, 

297 ) -> None: 

298 # https://docs.docker.com/registry/spec/api/#pushing-an-image 

299 manifest = Manifest( 

300 self, 

301 manifest_name, 

302 size=source.size, 

303 mime_type=source.mime_type, 

304 tag=tag, 

305 ) 

306 if manifest.exists() and manifest.digest == source.digest: 306 ↛ 308line 306 didn't jump to line 308 because the condition on line 306 was never true

307 # Short path if manifest already exists 

308 stats.existing_manifests_count += 1 

309 self.register_manifest_blobs(manifest) 

310 return 

311 

312 for fs_layer_json in source.json.get("fsLayers"): 

313 layer_json = { 

314 "digest": fs_layer_json["blobSum"], 

315 "size": -1, 

316 "mediaType": None, 

317 } 

318 self._upload_blob_if_needed(source, manifest_name, layer_json, stats=stats, dry_run=dry_run) 

319 if not dry_run: 319 ↛ 321line 319 didn't jump to line 321 because the condition on line 319 was always true

320 self._upload_manifest(source, manifest) 

321 stats.uploaded_manifests_count += 1 

322 

323 def _upload_blob_if_needed( 

324 self, 

325 source: Manifest, 

326 manifest_name: str, 

327 json: dict, 

328 *, 

329 stats: ImportStats, 

330 dry_run: bool, 

331 ) -> None: 

332 """Upload or mount a blob as needed. 

333 

334 Args: 

335 source: Source manifest. 

336 manifest_name: Destination manifest name. 

337 json: Blob json (as dict). 

338 stats: Import statistics. 

339 dry_run: Dry run. 

340 """ 

341 blob = self._blob_from_json(manifest_name, json) 

342 existing_blob = self.blob_from_digest(blob.digest, preferred_manifest_name=manifest_name) 

343 if (existing_blob is not None and existing_blob.manifest_name == manifest_name) or blob.exists(): 

344 stats.existing_count += 1 

345 stats.existing_size += blob.size 

346 elif existing_blob is None: 

347 if not dry_run: 

348 self._upload_blob(source, blob) 

349 stats.uploaded_size += blob.size 

350 stats.uploaded_count += 1 

351 else: 

352 if not dry_run: 352 ↛ 354line 352 didn't jump to line 354 because the condition on line 352 was always true

353 self._mount_blob(existing_blob, blob) 

354 stats.mounted_size += existing_blob.size 

355 stats.mounted_count += 1 

356 self.register_blob(blob) 

357 

358 def _blob_from_json(self, manifest_name: str, json: dict) -> Blob: 

359 return Blob( 

360 self, 

361 manifest_name, 

362 json["digest"], 

363 size=json["size"], 

364 mime_type=json["mediaType"], 

365 ) 

366 

367 def _upload_blob(self, source: Manifest, blob: Blob) -> None: 

368 """Upload a blob. 

369 

370 Args: 

371 source: Source manifest. 

372 blob: Destination blob to upload to. 

373 stats: Import statistics. 

374 """ 

375 if blob.exists(): 375 ↛ 376line 375 didn't jump to line 376 because the condition on line 375 was never true

376 return 

377 upload_url = self._start_upload(blob) 

378 if "?" in upload_url: 378 ↛ 381line 378 didn't jump to line 381 because the condition on line 378 was always true

379 blob_upload_url = f"{upload_url}&digest={blob.digest}" 

380 else: 

381 blob_upload_url = f"{upload_url}?digest={blob.digest}" 

382 orig_blob = Blob( 

383 source.registry, 

384 source.manifest_name, 

385 blob.digest, 

386 ) 

387 with orig_blob: 

388 chunk_size = 52428800 

389 self.request( 

390 "put", 

391 blob_upload_url, 

392 data=iter(lambda: orig_blob.read(chunk_size), b""), 

393 scopes={blob.scope(PUSH_PULL)}, 

394 ) 

395 

396 def _start_upload(self, blob: Blob) -> str: 

397 response = self.request( 

398 "post", 

399 f"/v2/{blob.manifest_name}/blobs/uploads/", 

400 scopes={blob.scope(PUSH_PULL)}, 

401 ) 

402 if response.status_code != codes["accepted"]: 402 ↛ 403line 402 didn't jump to line 403 because the condition on line 402 was never true

403 msg = f"Unexpected status {response.status_code}" 

404 raise ValueError(msg) 

405 return response.headers["Location"] 

406 

407 def _mount_blob(self, existing_blob: Blob, blob: Blob) -> None: 

408 """Mount a blob. 

409 

410 Args: 

411 existing_blob: Existing blob. 

412 blob: Destination blob to mount to. 

413 stats: Import statistics. 

414 

415 Raises: 

416 ValueError: Unexpected HTTP status. 

417 """ 

418 response = self.request( 

419 "post", 

420 f"/v2/{blob.manifest_name}/blobs/uploads/?mount={existing_blob.digest}&from={existing_blob.manifest_name}", 

421 scopes={ 

422 blob.scope(PUSH_PULL), 

423 existing_blob.scope(PULL), 

424 }, 

425 ) 

426 if response.status_code != codes["created"]: 426 ↛ 427line 426 didn't jump to line 427 because the condition on line 426 was never true

427 msg = f"Unexpected HTTP status {response.status_code}" 

428 raise ValueError(msg) 

429 

430 def _upload_manifest(self, source: Manifest, manifest: Manifest) -> None: 

431 with source: 

432 chunk_size = 52428800 

433 registry_path = f"/v2/{manifest.manifest_name}/manifests/{manifest.tag}" 

434 self.request( 

435 "put", 

436 registry_path, 

437 scopes={manifest.scope(PUSH_PULL)}, 

438 data=iter(lambda: source.read(chunk_size), b""), 

439 content_type=source.mime_type, 

440 )