Coverage for pyguymer3/sha256_of_GZ.py: 95%
20 statements
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-08 18:47 +0000
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-08 18:47 +0000
1#!/usr/bin/env python3
3# Define function ...
4def sha256_of_GZ(
5 fname,
6 /,
7 *,
8 chunksize = 1048576,
9 ignoreModificationTime = True,
10):
11 """Find the SHA-256 hash of a GZ file
13 This function returns the SHA-256 hash of the passed GZ file as if the first
14 "Modification Time" field is set to zero. Using this function it is possible
15 to discover that the only binary difference between two different GZ files
16 is the first "Modification Time" field.
18 If this function is told not to ignore the first "Modification Time" field
19 then this function will return the SHA-256 identically to any other method.
21 Parameters
22 ----------
23 fname : str
24 the input GZ file name
25 chunksize : int, optional
26 the size of the chunks of any files which are read in (in bytes)
27 ignoreModificationTime : bool, optional
28 ignore the first "Modification Time" field
30 Returns
31 -------
32 hexdigest : str
33 The hash hexdigest of the input GZ file.
35 Notes
36 -----
37 The following websites have some very useful information on how to parse GZ
38 files:
40 - https://en.wikipedia.org/wiki/Gzip#File_format
41 - https://tools.ietf.org/html/rfc1952.html#page-5
43 Copyright 2017 Thomas Guymer [1]_
45 References
46 ----------
47 .. [1] PyGuymer3, https://github.com/Guymer/PyGuymer3
48 """
50 # Import standard modules ...
51 import hashlib
52 import struct
54 # **************************************************************************
56 # Construct a hash object ...
57 hObj = hashlib.sha256()
59 # Open input GZ file read-only ...
60 with open(fname, "rb") as fObj:
61 # Attempt to read 2 bytes and pass them to the hash object ...
62 src = fObj.read(2)
63 hObj.update(src)
65 # Check that this is a GZ file ...
66 if src != b"\x1f\x8b":
67 raise Exception(f"\"{fname}\" is not a GZ") from None
69 # Pass 2 bytes to the hash object ...
70 hObj.update(fObj.read(2))
72 # Check what the user wants to do ...
73 if ignoreModificationTime:
74 # Pass 0 as a little-endian un-signed 32-bit integer to the hash
75 # object ...
76 fObj.read(4)
77 hObj.update(struct.pack(">I", 0))
78 else:
79 # Pass 4 bytes to the hash object ...
80 hObj.update(fObj.read(4))
82 # Pass the rest of the file to the hash object using chunks ...
83 while True:
84 chunk = fObj.read(chunksize)
85 if len(chunk) == 0:
86 break
87 hObj.update(chunk)
89 # Return hash hexdigest ...
90 return hObj.hexdigest()