Coverage for src/gitlabracadabra/containers/registry_importer.py: 85%
155 statements
« prev ^ index » next coverage.py v7.8.0, created at 2025-04-14 23:10 +0200
« prev ^ index » next coverage.py v7.8.0, created at 2025-04-14 23:10 +0200
1#
2# Copyright (C) 2019-2025 Mathieu Parent <math.parent@gmail.com>
3#
4# This program is free software: you can redistribute it and/or modify
5# it under the terms of the GNU Lesser General Public License as published by
6# the Free Software Foundation, either version 3 of the License, or
7# (at your option) any later version.
8#
9# This program is distributed in the hope that it will be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12# GNU Lesser General Public License for more details.
13#
14# You should have received a copy of the GNU Lesser General Public License
15# along with this program. If not, see <http://www.gnu.org/licenses/>.
17from logging import getLogger
19from requests import HTTPError, codes
21from gitlabracadabra.containers.blob import Blob
22from gitlabracadabra.containers.const import (
23 DOCKER_MANIFEST_SCHEMA1,
24 DOCKER_MANIFEST_SCHEMA1_SIGNED,
25 DOCKER_MANIFEST_SCHEMA2,
26 DOCKER_MANIFEST_SCHEMA2_LIST,
27 OCI_IMAGE_INDEX,
28 OCI_IMAGE_MANIFEST,
29)
30from gitlabracadabra.containers.manifest import Manifest
31from gitlabracadabra.containers.registry_session import RegistrySession
32from gitlabracadabra.containers.scope import PULL, PUSH_PULL
33from gitlabracadabra.containers.with_blobs import WithBlobs
35logger = getLogger(__name__)
38class ImportStats:
39 """Import statistics."""
41 __slots__ = (
42 "uploaded_count",
43 "mounted_count",
44 "existing_count",
45 "uploaded_size",
46 "mounted_size",
47 "existing_size",
48 "uploaded_manifests_count",
49 "existing_manifests_count",
50 )
52 def __init__(self) -> None:
53 """Initialize."""
54 self.uploaded_count = 0
55 self.mounted_count = 0
56 self.existing_count = 0
57 self.uploaded_size = 0
58 self.mounted_size = 0
59 self.existing_size = 0
61 self.uploaded_manifests_count = 0
62 self.existing_manifests_count = 0
64 @property
65 def blobs_count(self) -> int:
66 """Get total blob count.
68 Returns:
69 The number of blobs uploaded + mounted + existing.
70 """
71 return self.uploaded_count + self.mounted_count + self.existing_count
73 @property
74 def blobs_size(self) -> int:
75 """Get total blob size.
77 Returns:
78 The size of blobs uploaded + mounted + existing.
79 """
80 return self.uploaded_size + self.mounted_size + self.existing_size
82 @property
83 def manifests_count(self) -> int:
84 """Get total manifest count.
86 Returns:
87 The number of manifest uploaded + existing + skipped.
88 """
89 return self.uploaded_manifests_count + self.existing_manifests_count
91 def any_stat(self) -> bool:
92 """Test if any stat is above zero.
94 Returns:
95 True if any statistic is non-zero.
96 """
97 return any(
98 (
99 self.uploaded_count,
100 self.mounted_count,
101 # self.existing_count,
102 self.uploaded_size,
103 self.mounted_size,
104 # self.existing_size,
105 self.uploaded_manifests_count,
106 # self.existing_manifests_count
107 )
108 )
111class RegistryImporter(RegistrySession, WithBlobs):
112 """Container registry importer methods."""
114 def blob_from_digest(self, digest: str, *, preferred_manifest_name: str) -> Blob | None:
115 """Return a Blob with the given digest, within the preferred manifest if possible.
117 Args:
118 digest: Digest of Blob to look for.
119 preferred_manifest_name: Preferred manifest name.
121 Returns:
122 A Blob with the expected digest, or None.
123 """
124 manifest_names = self._blobs.get(digest, [])
125 if not len(manifest_names):
126 return None
127 if preferred_manifest_name in manifest_names:
128 return Blob(self, preferred_manifest_name, digest, size=self._sizes[digest])
129 return Blob(self, manifest_names[0], digest, size=self._sizes[digest])
131 def import_manifest(
132 self,
133 source: Manifest,
134 manifest_name: str | None = None,
135 tag: str | None = None,
136 *,
137 platform: dict | None = None,
138 log_prefix: str = "",
139 dry_run: bool = False,
140 ) -> None:
141 """Import a manifest.
143 Args:
144 source: Source manifest.
145 manifest_name: Target manifest name (defaults to source's).
146 tag: Target manifest tag (defaults to source's).
147 platform: 'all' or a specific platform, defaults to linux/amd64.
148 log_prefix: Log prefix.
149 dry_run: Dry run.
151 Raises:
152 ValueError: Unsupported media type.
153 """
154 stats = ImportStats()
155 if manifest_name is None:
156 manifest_name = source.manifest_name
157 if tag is None:
158 tag = source.tag
159 if platform is None:
160 platform = {"architecture": "amd64", "os": "linux"}
161 try:
162 source_mime_type = source.mime_type
163 except HTTPError as err:
164 logger.warning(
165 "%s%s NOT imported as %s:%s: %s",
166 log_prefix,
167 str(source),
168 manifest_name,
169 tag,
170 str(err),
171 )
172 return
173 try:
174 if source_mime_type in {DOCKER_MANIFEST_SCHEMA2_LIST, OCI_IMAGE_INDEX}:
175 self._import_manifest_list(source, manifest_name, tag, platform=platform, stats=stats, dry_run=dry_run)
176 elif source_mime_type in {DOCKER_MANIFEST_SCHEMA2, OCI_IMAGE_MANIFEST}:
177 self._import_manifest(source, manifest_name, tag, stats=stats, dry_run=dry_run)
178 elif source_mime_type in {DOCKER_MANIFEST_SCHEMA1, DOCKER_MANIFEST_SCHEMA1_SIGNED}: 178 ↛ 181line 178 didn't jump to line 181 because the condition on line 178 was always true
179 self._import_manifest_v1(source, manifest_name, tag, stats=stats, dry_run=dry_run)
180 else:
181 msg = f"Unsupported media type: {source_mime_type}"
182 raise ValueError(msg)
183 except HTTPError as err2:
184 logger.warning(
185 "%s%s NOT imported as %s:%s: %s",
186 log_prefix,
187 str(source),
188 manifest_name,
189 tag,
190 str(err2),
191 )
192 return
194 if stats.any_stat():
195 logger.info(
196 "%s%s %simported as %s:%s (%s, %s, %s)",
197 log_prefix,
198 str(source),
199 "NOT " if dry_run else "",
200 manifest_name,
201 tag,
202 f"{stats.uploaded_manifests_count}+{stats.existing_manifests_count}={stats.manifests_count} uploaded+existing manifests",
203 f"{stats.uploaded_count}+{stats.mounted_count}+{stats.existing_count}={stats.blobs_count} uploaded+mounted+existing blobs",
204 f"{stats.uploaded_size}+{stats.mounted_size}+{stats.existing_size}={stats.blobs_size} uploaded+mounted+existing blobs size",
205 )
207 def _import_manifest_list(
208 self,
209 source: Manifest,
210 manifest_name: str,
211 tag: str,
212 *,
213 platform: dict,
214 stats: ImportStats,
215 dry_run: bool,
216 ) -> None:
217 if platform == "all" or source.forced_digest:
218 self._import_manifest_list_all(source, manifest_name, stats=stats, dry_run=dry_run)
219 else:
220 for manifest in source.manifests(): 220 ↛ 224line 220 didn't jump to line 224 because the loop on line 220 didn't complete
221 if manifest.platform == platform: 221 ↛ 220line 221 didn't jump to line 220 because the condition on line 221 was always true
222 self._import_manifest(manifest, manifest_name, tag, stats=stats, dry_run=dry_run)
223 return
224 msg = f"Platform {platform} not found in manifest {source}"
225 raise ValueError(msg)
227 def _import_manifest_list_all(
228 self,
229 source: Manifest,
230 manifest_name: str,
231 *,
232 stats: ImportStats,
233 dry_run: bool,
234 ) -> None:
235 dest = Manifest(
236 self,
237 manifest_name,
238 size=source.size,
239 mime_type=source.mime_type,
240 tag=source.tag,
241 )
242 if dest.exists() and dest.digest == source.digest: 242 ↛ 244line 242 didn't jump to line 244 because the condition on line 242 was never true
243 # Short path if manifest already exists
244 stats.existing_manifests_count += 1
245 self.register_manifest_blobs(dest)
246 return
247 for manifest in source.manifests():
248 self._import_manifest(manifest, manifest_name, manifest.digest, stats=stats, dry_run=dry_run)
249 if not dry_run: 249 ↛ 251line 249 didn't jump to line 251 because the condition on line 249 was always true
250 self._upload_manifest(source, dest)
251 stats.uploaded_manifests_count += 1
253 def _import_manifest(
254 self,
255 source: Manifest,
256 manifest_name: str,
257 tag: str,
258 *,
259 stats: ImportStats,
260 dry_run: bool,
261 ) -> None:
262 # https://docs.docker.com/registry/spec/api/#pushing-an-image
263 manifest = Manifest(
264 self,
265 manifest_name,
266 size=source.size,
267 mime_type=source.mime_type,
268 tag=tag,
269 )
270 if manifest.exists() and manifest.digest == source.digest:
271 # Short path if manifest already exists
272 stats.existing_manifests_count += 1
273 self.register_manifest_blobs(manifest)
274 return
276 self._upload_blob_if_needed(
277 source,
278 manifest_name,
279 source.json.get("config"),
280 stats=stats,
281 dry_run=dry_run,
282 )
283 for layer_json in source.json.get("layers"):
284 self._upload_blob_if_needed(source, manifest_name, layer_json, stats=stats, dry_run=dry_run)
285 if not dry_run:
286 self._upload_manifest(source, manifest)
287 stats.uploaded_manifests_count += 1
289 def _import_manifest_v1(
290 self,
291 source: Manifest,
292 manifest_name: str,
293 tag: str,
294 *,
295 stats: ImportStats,
296 dry_run: bool,
297 ) -> None:
298 # https://docs.docker.com/registry/spec/api/#pushing-an-image
299 manifest = Manifest(
300 self,
301 manifest_name,
302 size=source.size,
303 mime_type=source.mime_type,
304 tag=tag,
305 )
306 if manifest.exists() and manifest.digest == source.digest: 306 ↛ 308line 306 didn't jump to line 308 because the condition on line 306 was never true
307 # Short path if manifest already exists
308 stats.existing_manifests_count += 1
309 self.register_manifest_blobs(manifest)
310 return
312 for fs_layer_json in source.json.get("fsLayers"):
313 layer_json = {
314 "digest": fs_layer_json["blobSum"],
315 "size": -1,
316 "mediaType": None,
317 }
318 self._upload_blob_if_needed(source, manifest_name, layer_json, stats=stats, dry_run=dry_run)
319 if not dry_run: 319 ↛ 321line 319 didn't jump to line 321 because the condition on line 319 was always true
320 self._upload_manifest(source, manifest)
321 stats.uploaded_manifests_count += 1
323 def _upload_blob_if_needed(
324 self,
325 source: Manifest,
326 manifest_name: str,
327 json: dict,
328 *,
329 stats: ImportStats,
330 dry_run: bool,
331 ) -> None:
332 """Upload or mount a blob as needed.
334 Args:
335 source: Source manifest.
336 manifest_name: Destination manifest name.
337 json: Blob json (as dict).
338 stats: Import statistics.
339 dry_run: Dry run.
340 """
341 blob = self._blob_from_json(manifest_name, json)
342 existing_blob = self.blob_from_digest(blob.digest, preferred_manifest_name=manifest_name)
343 if (existing_blob is not None and existing_blob.manifest_name == manifest_name) or blob.exists():
344 stats.existing_count += 1
345 stats.existing_size += blob.size
346 elif existing_blob is None:
347 if not dry_run:
348 self._upload_blob(source, blob)
349 stats.uploaded_size += blob.size
350 stats.uploaded_count += 1
351 else:
352 if not dry_run: 352 ↛ 354line 352 didn't jump to line 354 because the condition on line 352 was always true
353 self._mount_blob(existing_blob, blob)
354 stats.mounted_size += existing_blob.size
355 stats.mounted_count += 1
356 self.register_blob(blob)
358 def _blob_from_json(self, manifest_name: str, json: dict) -> Blob:
359 return Blob(
360 self,
361 manifest_name,
362 json["digest"],
363 size=json["size"],
364 mime_type=json["mediaType"],
365 )
367 def _upload_blob(self, source: Manifest, blob: Blob) -> None:
368 """Upload a blob.
370 Args:
371 source: Source manifest.
372 blob: Destination blob to upload to.
373 stats: Import statistics.
374 """
375 if blob.exists(): 375 ↛ 376line 375 didn't jump to line 376 because the condition on line 375 was never true
376 return
377 upload_url = self._start_upload(blob)
378 if "?" in upload_url: 378 ↛ 381line 378 didn't jump to line 381 because the condition on line 378 was always true
379 blob_upload_url = f"{upload_url}&digest={blob.digest}"
380 else:
381 blob_upload_url = f"{upload_url}?digest={blob.digest}"
382 orig_blob = Blob(
383 source.registry,
384 source.manifest_name,
385 blob.digest,
386 )
387 with orig_blob:
388 chunk_size = 52428800
389 self.request(
390 "put",
391 blob_upload_url,
392 data=iter(lambda: orig_blob.read(chunk_size), b""),
393 scopes={blob.scope(PUSH_PULL)},
394 )
396 def _start_upload(self, blob: Blob) -> str:
397 response = self.request(
398 "post",
399 f"/v2/{blob.manifest_name}/blobs/uploads/",
400 scopes={blob.scope(PUSH_PULL)},
401 )
402 if response.status_code != codes["accepted"]: 402 ↛ 403line 402 didn't jump to line 403 because the condition on line 402 was never true
403 msg = f"Unexpected status {response.status_code}"
404 raise ValueError(msg)
405 return response.headers["Location"]
407 def _mount_blob(self, existing_blob: Blob, blob: Blob) -> None:
408 """Mount a blob.
410 Args:
411 existing_blob: Existing blob.
412 blob: Destination blob to mount to.
413 stats: Import statistics.
415 Raises:
416 ValueError: Unexpected HTTP status.
417 """
418 response = self.request(
419 "post",
420 f"/v2/{blob.manifest_name}/blobs/uploads/?mount={existing_blob.digest}&from={existing_blob.manifest_name}",
421 scopes={
422 blob.scope(PUSH_PULL),
423 existing_blob.scope(PULL),
424 },
425 )
426 if response.status_code != codes["created"]: 426 ↛ 427line 426 didn't jump to line 427 because the condition on line 426 was never true
427 msg = f"Unexpected HTTP status {response.status_code}"
428 raise ValueError(msg)
430 def _upload_manifest(self, source: Manifest, manifest: Manifest) -> None:
431 with source:
432 chunk_size = 52428800
433 registry_path = f"/v2/{manifest.manifest_name}/manifests/{manifest.tag}"
434 self.request(
435 "put",
436 registry_path,
437 scopes={manifest.scope(PUSH_PULL)},
438 data=iter(lambda: source.read(chunk_size), b""),
439 content_type=source.mime_type,
440 )