Coverage for pyguymer3/sha512_of_MP4.py: 69%
96 statements
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-08 18:47 +0000
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-08 18:47 +0000
1#!/usr/bin/env python3
3# Define function ...
4def sha512_of_MP4(
5 fname,
6 /,
7 *,
8 chunksize = 1048576,
9 ignoreModificationTime = True,
10):
11 """Find the SHA-512 hash of a MP4 file
13 This function returns the SHA-512 hash of the passed MP4 file as if the
14 "Modification Time" field (in the "mvhd" atom in the "moov" atom) is set to
15 zero. Using this function it is possible to discover that the only binary
16 difference between two different MP4 files is the "Modification Time" field
17 (in the "mvhd" atom in the "moov" atom).
19 If this function is told not to ignore the "Modification Time" field (in the
20 "mvhd" atom in the "moov" atom) then this function will return the SHA-512
21 identically to any other method.
23 Parameters
24 ----------
25 fname : str
26 the input MP4 file name
27 chunksize : int, optional
28 the size of the chunks of any files which are read in (in bytes)
29 ignoreModificationTime : bool, optional
30 ignore the "Modification Time" field (in the "mvhd" atom in the "moov"
31 atom)
33 Returns
34 -------
35 hexdigest : str
36 The hash hexdigest of the input MP4 file.
38 Notes
39 -----
40 The following websites have some very useful information on how to parse MP4
41 files - the first just forgot to say that integers are big-endian:
43 - http://atomicparsley.sourceforge.net/mpeg-4files.html
44 - https://developer.apple.com/library/archive/documentation/QuickTime/QTFF/QTFFChap2/qtff2.html
45 - https://wiki.multimedia.cx/index.php/QuickTime_container
47 Copyright 2017 Thomas Guymer [1]_
49 References
50 ----------
51 .. [1] PyGuymer3, https://github.com/Guymer/PyGuymer3
52 """
54 # Import standard modules ...
55 import hashlib
56 import os
57 import re
58 import struct
60 # **************************************************************************
62 # Construct a hash object ...
63 hObj = hashlib.sha512()
65 # Create short-hand ...
66 fsize = os.path.getsize(fname) # [B]
68 # Open input MP4 file read-only ...
69 with open(fname, "rb") as fObj:
70 # Set triggers ...
71 foundFTYP = False
72 foundMOOV = False
73 foundMVHD = False
75 # Loop over entire contents of MP4 ...
76 while fObj.tell() < fsize:
77 # Attempt to read 4 bytes as a big-endian un-signed 32-bit integer
78 # and pass it to the hash object ...
79 src = fObj.read(4)
80 val, = struct.unpack(">I", src) # [B]
81 hObj.update(src)
82 off1 = 4 # [B]
84 # Extract atom name and pass it to the hash object ...
85 src = fObj.read(4)
86 name = src.decode("utf-8")
87 hObj.update(src)
88 off1 += 4 # [B]
90 # Check that the atom name matches the pattern ...
91 if re.match(r"[a-z][a-z][a-z][a-z]", name) is None:
92 raise Exception(f"\"{name}\" is not an atom name in \"{fname}\"") from None
94 # Check that the input MP4 file is a MP4 file ...
95 if not foundFTYP and name != "ftyp":
96 raise Exception(f"\"{fname}\" is not a MP4") from None
98 # Set trigger ...
99 foundFTYP = True
101 # Check the length ...
102 if val == 0:
103 # NOTE: This atom runs until EOF.
105 # Pass the rest of the atom to the hash object using chunks ...
106 while True:
107 chunk = fObj.read(chunksize)
108 if len(chunk) == 0:
109 break
110 hObj.update(chunk)
112 # Stop looping ...
113 break
115 # Check the length ...
116 if val == 1:
117 # NOTE: This atom has 64-bit sizes.
119 # Attempt to read 8 bytes as a big-endian un-signed 64-bit
120 # integer and pass it to the hash object ...
121 src = fObj.read(8)
122 val, = struct.unpack(">Q", src) # [B]
123 hObj.update(src)
124 off1 += 8 # [B]
126 # Create short-hand ...
127 rem1 = val - off1 # [B]
129 # Check if it is the MOOV atom ...
130 if name == "moov":
131 # Set trigger ...
132 foundMOOV = True
134 # Save starting position ...
135 pos = fObj.tell()
137 # Loop over remaining contents of MOOV atom ...
138 while fObj.tell() - pos < rem1:
139 # Attempt to read 4 bytes as a big-endian un-signed 32-bit
140 # integer and pass it to the hash object ...
141 src = fObj.read(4)
142 val, = struct.unpack(">I", src) # [B]
143 hObj.update(src)
144 off2 = 4 # [B]
146 # Extract atom name and pass it to the hash object ...
147 src = fObj.read(4)
148 name = src.decode("utf-8")
149 hObj.update(src)
150 off2 += 4 # [B]
152 # Check that the atom name matches the pattern ...
153 if re.match(r"[a-z][a-z][a-z][a-z]", name) is None:
154 raise Exception(f"\"{name}\" is not an atom name in \"{fname}\"") from None
156 # Check the length ...
157 if val == 0:
158 # NOTE: This atom runs until EOF.
160 # Pass the rest of the atom to the hash object using
161 # chunks ...
162 while True:
163 chunk = fObj.read(chunksize)
164 if len(chunk) == 0:
165 break
166 hObj.update(chunk)
168 # Stop looping ...
169 break
171 # Check the length ...
172 if val == 1:
173 # NOTE: This atom has 64-bit sizes.
175 # Attempt to read 8 bytes as a big-endian un-signed
176 # 64-bit integer and pass it to the hash object ...
177 src = fObj.read(8)
178 val, = struct.unpack(">Q", src) # [B]
179 hObj.update(src)
180 off2 += 8 # [B]
182 # Create short-hand ...
183 rem2 = val - off1 # [B]
185 # Check if it is the MVHD atom ...
186 if name == "mvhd":
187 # Set trigger ...
188 foundMVHD = True
190 # Check that it is the correct size ...
191 if rem2 != 100:
192 raise Exception(f"the \"mvhd\" atom in \"{fname}\" is not the correct size") from None
194 # Pass the rest of the atom to the hash object (except
195 # the "Modification Time" for which instead pass 0 as a
196 # big-endian un-signed 32-bit integer) ...
197 # NOTE: See Figure 2-3 of https://developer.apple.com/library/archive/documentation/QuickTime/QTFF/QTFFChap2/qtff2.html
198 hObj.update(fObj.read(8))
199 if ignoreModificationTime:
200 fObj.read(4)
201 hObj.update(struct.pack(">I", 0))
202 else:
203 hObj.update(fObj.read(4))
204 hObj.update(fObj.read(88))
205 else:
206 # Pass the rest of the atom to the hash object using
207 # chunks ...
208 while True:
209 if rem2 == 0:
210 break
211 if rem2 <= chunksize:
212 hObj.update(fObj.read(rem2))
213 break
214 hObj.update(fObj.read(chunksize))
215 rem2 -= chunksize # [B]
216 else:
217 # Pass the rest of the atom to the hash object using chunks ...
218 while True:
219 if rem1 == 0:
220 break
221 if rem1 <= chunksize:
222 hObj.update(fObj.read(rem1))
223 break
224 hObj.update(fObj.read(chunksize))
225 rem1 -= chunksize # [B]
227 # Catch possible errors ...
228 if not foundMOOV:
229 raise Exception(f"did not find \"moov\" atom in \"{fname}\"") from None
230 if not foundMVHD:
231 raise Exception(f"did not find \"mvhd\" atom in \"{fname}\"") from None
233 # Return hash hexdigest ...
234 return hObj.hexdigest()