Coverage for src/gitlabracadabra/containers/with_digest.py: 86%
138 statements
« prev ^ index » next coverage.py v7.8.0, created at 2025-04-14 23:10 +0200
« prev ^ index » next coverage.py v7.8.0, created at 2025-04-14 23:10 +0200
1#
2# Copyright (C) 2019-2025 Mathieu Parent <math.parent@gmail.com>
3#
4# This program is free software: you can redistribute it and/or modify
5# it under the terms of the GNU Lesser General Public License as published by
6# the Free Software Foundation, either version 3 of the License, or
7# (at your option) any later version.
8#
9# This program is distributed in the hope that it will be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12# GNU Lesser General Public License for more details.
13#
14# You should have received a copy of the GNU Lesser General Public License
15# along with this program. If not, see <http://www.gnu.org/licenses/>.
17from __future__ import annotations
19from hashlib import sha256
20from logging import getLogger
21from os.path import getsize, isfile
22from shutil import copy, copyfileobj
23from tempfile import NamedTemporaryFile
24from typing import IO, TYPE_CHECKING, BinaryIO
25from urllib.parse import quote
27from requests import HTTPError, Response, codes
29from gitlabracadabra.containers.const import DIGEST_HEADER, DOCKER_MANIFEST_SCHEMA1_SIGNED
30from gitlabracadabra.containers.scope import PULL, Scope
31from gitlabracadabra.disk_cache import cache_dir
33if TYPE_CHECKING: 33 ↛ 34line 33 didn't jump to line 34 because the condition on line 33 was never true
34 from typing import Self
36 from gitlabracadabra.containers.registry_importer import RegistryImporter
39logger = getLogger(__name__)
42class WithDigest:
43 """An object with a digest."""
45 supported_mime_types: tuple[str, ...] | None = None
47 def __init__(
48 self,
49 registry: RegistryImporter,
50 manifest_name: str,
51 digest: str | None = None,
52 *,
53 size: int | None = None,
54 mime_type: str | None = None,
55 ) -> None:
56 """Initialize an object with a digest.
58 Args:
59 registry: Registry.
60 manifest_name: Manifest name (Example: library/debian).
61 digest: Digest (Example: sha256:5890f8ba95f680c87fcf89e51190098641b4f646102ce7ca906e7f83c84874dc).
62 size: Size (Example: 42).
63 mime_type: Content-Type / mediaType.
64 """
65 self._registry = registry
66 self._manifest_name = manifest_name
67 self._digest = digest
68 self._size = size
69 self._mime_type = mime_type
70 self._exists: bool | None = None
71 self._fd: BinaryIO | None = None
72 self._retrieve_mehod = "head"
73 self.forced_digest = False
75 def __eq__(self, other: object) -> bool:
76 """Compare.
78 Args:
79 other: Compare
81 Returns:
82 True if registry, manifest name, digest, size and mime_types are equal.
83 """
84 return (isinstance(self, type(other)) or isinstance(other, type(self))) and self.__dict__ == other.__dict__
86 @property
87 def registry(self) -> RegistryImporter:
88 """Get the registry.
90 Returns:
91 The registry.
92 """
93 return self._registry
95 @property
96 def manifest_name(self) -> str:
97 """Get the manifest name.
99 Returns:
100 The manifest name.
101 """
102 return self._manifest_name
104 @property
105 def digest(self) -> str:
106 """Get the digest.
108 Returns:
109 The digest.
111 Raises:
112 ValueError: Unable to get digest.
113 """
114 if self._digest is None:
115 self._retrieve()
116 if self._digest is None: 116 ↛ 117line 116 didn't jump to line 117 because the condition on line 116 was never true
117 msg = "Unable to get digest"
118 raise ValueError(msg)
119 return self._digest
121 @property
122 def size(self) -> int:
123 """Get the size.
125 Returns:
126 The size.
128 Raises:
129 ValueError: Unable to get size.
130 """
131 if self._size is None:
132 try:
133 self._size = getsize(self.cache_path)
134 except FileNotFoundError:
135 self._retrieve()
136 if self._size is None: 136 ↛ 137line 136 didn't jump to line 137 because the condition on line 136 was never true
137 msg = "Unable to get size"
138 raise ValueError(msg)
139 return self._size
141 @property
142 def mime_type(self) -> str | None:
143 """Get the MIME type (mediaType).
145 Returns:
146 The MIME type.
147 """
148 if self._mime_type is None:
149 self._retrieve()
150 return self._mime_type
152 @property
153 def cache_path(self) -> str:
154 """Get the cache path (local).
156 Returns:
157 Local path.
158 """
159 return str(cache_dir("containers_cache") / quote(self.digest, safe=""))
161 @property
162 def registry_path(self) -> str:
163 """Get the registry path.
165 Raises:
166 NotImplementedError: Needs to be implemented in subclasses.
167 """
168 raise NotImplementedError
170 def __enter__(self) -> Self:
171 """Open the cached file.
173 Returns:
174 self.
176 Raises:
177 RuntimeError: File already opened.
178 """
179 self._ensure_cached()
180 if self._fd is not None: 180 ↛ 181line 180 didn't jump to line 181 because the condition on line 180 was never true
181 msg = "File already opened"
182 raise RuntimeError(msg)
183 self._fd = open(self.cache_path, "rb") # noqa: SIM115
184 return self
186 def __exit__(self, exc_type, exc_val, exc_tb) -> None: # type: ignore
187 """Close the cached file.
189 Args:
190 exc_type: Exception type.
191 exc_val: Exception value.
192 exc_tb: Exception traceback.
193 """
194 if self._fd is not None: 194 ↛ exitline 194 didn't return from function '__exit__' because the condition on line 194 was always true
195 self._fd.close()
196 self._fd = None
198 def read(self, n: int = -1) -> bytes:
199 """Read the cached file.
201 Args:
202 n: buffer size.
204 Returns:
205 Bytes.
207 Raises:
208 ValueError: File is not opened.
209 """
210 if self._fd is None: 210 ↛ 211line 210 didn't jump to line 211 because the condition on line 210 was never true
211 msg = "File is not opened"
212 raise ValueError(msg)
213 return self._fd.read(n)
215 def scope(self, actions: str = PULL) -> Scope:
216 """Get a scope.
218 Args:
219 actions: Scope action.
221 Returns:
222 A scope.
223 """
224 return Scope(self.manifest_name, actions)
226 def exists(self) -> bool:
227 """Get Blob/Manifest existence in the associated registry.
229 Returns:
230 True or False.
232 Raises:
233 HTTPError: Error when fetching existence.
234 """
235 if self._exists is None:
236 try:
237 self._retrieve()
238 self._exists = True
239 except HTTPError as err:
240 if (err.response is None) or (err.response.status_code != codes["not_found"]): 240 ↛ 241line 240 didn't jump to line 241 because the condition on line 240 was never true
241 raise
242 self._exists = False
243 if self._exists:
244 self.register()
245 return self._exists
247 def register(self) -> None:
248 """Notify the registry that the Digest exists."""
249 # Overridden in Blob
251 def _ensure_cached(self) -> None:
252 if self._digest is None or not isfile(self.cache_path):
253 self._retrieve(with_content=True)
255 def _retrieve(self, *, with_content: bool = False) -> None:
256 method = self._retrieve_mehod
257 if with_content:
258 method = "get"
259 with self._request(method) as response:
260 if self._digest is None:
261 self._digest = response.headers.get(DIGEST_HEADER)
262 elif DIGEST_HEADER in response.headers and self._digest != response.headers.get(DIGEST_HEADER): 262 ↛ 263line 262 didn't jump to line 263 because the condition on line 262 was never true
263 msg = f"Retrieved digest does not match {response.headers.get(DIGEST_HEADER)} != {self._digest}"
264 raise ValueError(msg)
265 if "Content-Type" in response.headers: 265 ↛ 267line 265 didn't jump to line 267 because the condition on line 265 was always true
266 self._mime_type = response.headers.get("Content-Type")
267 self._size = int(response.headers["Content-Length"])
268 if method != "head":
269 self._download_and_verify(response)
271 def _request(self, method: str) -> Response:
272 return self.registry.request(
273 method,
274 self.registry_path,
275 scopes={self.scope()},
276 accept=self.supported_mime_types,
277 stream=True,
278 )
280 def _download_and_verify(self, response: Response) -> None:
281 with NamedTemporaryFile(dir=cache_dir("containers_cache")) as fp:
282 copyfileobj(response.raw, fp)
283 downloaded_digest = self._compute_digest(fp)
284 if self._digest is None:
285 self._digest = downloaded_digest
286 else:
287 self._verify_digest(downloaded_digest)
288 copy(fp.name, self.cache_path)
290 def _verify_digest(self, digest: str) -> None:
291 if digest != self._digest:
292 if self._mime_type == DOCKER_MANIFEST_SCHEMA1_SIGNED: 292 ↛ 302line 292 didn't jump to line 302 because the condition on line 292 was always true
293 # https://docs.docker.com/registry/spec/api/#content-digests
294 # "manifest body without the signature content, also known as the JWS payload"
295 logger.info(
296 "Ignoring checksum mismatch for signed manifest %s: %s ! %s",
297 str(self),
298 digest,
299 self._digest,
300 )
301 else:
302 msg = f"Checksum mismatch: {digest} != {self._digest}"
303 raise ValueError(msg)
305 def _compute_digest(self, fp: IO[bytes]) -> str:
306 sha256_hash = sha256()
307 buf_len = 4096
308 fp.seek(0)
309 for byte_block in iter(lambda: fp.read(buf_len), b""):
310 sha256_hash.update(byte_block)
311 return f"sha256:{sha256_hash.hexdigest()}"