Coverage for pyguymer3/sha512_of_MP4.py: 69%

96 statements  

« prev     ^ index     » next       coverage.py v7.9.2, created at 2025-07-08 18:47 +0000

1#!/usr/bin/env python3 

2 

3# Define function ... 

4def sha512_of_MP4( 

5 fname, 

6 /, 

7 *, 

8 chunksize = 1048576, 

9 ignoreModificationTime = True, 

10): 

11 """Find the SHA-512 hash of a MP4 file 

12 

13 This function returns the SHA-512 hash of the passed MP4 file as if the 

14 "Modification Time" field (in the "mvhd" atom in the "moov" atom) is set to 

15 zero. Using this function it is possible to discover that the only binary 

16 difference between two different MP4 files is the "Modification Time" field 

17 (in the "mvhd" atom in the "moov" atom). 

18 

19 If this function is told not to ignore the "Modification Time" field (in the 

20 "mvhd" atom in the "moov" atom) then this function will return the SHA-512 

21 identically to any other method. 

22 

23 Parameters 

24 ---------- 

25 fname : str 

26 the input MP4 file name 

27 chunksize : int, optional 

28 the size of the chunks of any files which are read in (in bytes) 

29 ignoreModificationTime : bool, optional 

30 ignore the "Modification Time" field (in the "mvhd" atom in the "moov" 

31 atom) 

32 

33 Returns 

34 ------- 

35 hexdigest : str 

36 The hash hexdigest of the input MP4 file. 

37 

38 Notes 

39 ----- 

40 The following websites have some very useful information on how to parse MP4 

41 files - the first just forgot to say that integers are big-endian: 

42 

43 - http://atomicparsley.sourceforge.net/mpeg-4files.html 

44 - https://developer.apple.com/library/archive/documentation/QuickTime/QTFF/QTFFChap2/qtff2.html 

45 - https://wiki.multimedia.cx/index.php/QuickTime_container 

46 

47 Copyright 2017 Thomas Guymer [1]_ 

48 

49 References 

50 ---------- 

51 .. [1] PyGuymer3, https://github.com/Guymer/PyGuymer3 

52 """ 

53 

54 # Import standard modules ... 

55 import hashlib 

56 import os 

57 import re 

58 import struct 

59 

60 # ************************************************************************** 

61 

62 # Construct a hash object ... 

63 hObj = hashlib.sha512() 

64 

65 # Create short-hand ... 

66 fsize = os.path.getsize(fname) # [B] 

67 

68 # Open input MP4 file read-only ... 

69 with open(fname, "rb") as fObj: 

70 # Set triggers ... 

71 foundFTYP = False 

72 foundMOOV = False 

73 foundMVHD = False 

74 

75 # Loop over entire contents of MP4 ... 

76 while fObj.tell() < fsize: 

77 # Attempt to read 4 bytes as a big-endian un-signed 32-bit integer 

78 # and pass it to the hash object ... 

79 src = fObj.read(4) 

80 val, = struct.unpack(">I", src) # [B] 

81 hObj.update(src) 

82 off1 = 4 # [B] 

83 

84 # Extract atom name and pass it to the hash object ... 

85 src = fObj.read(4) 

86 name = src.decode("utf-8") 

87 hObj.update(src) 

88 off1 += 4 # [B] 

89 

90 # Check that the atom name matches the pattern ... 

91 if re.match(r"[a-z][a-z][a-z][a-z]", name) is None: 

92 raise Exception(f"\"{name}\" is not an atom name in \"{fname}\"") from None 

93 

94 # Check that the input MP4 file is a MP4 file ... 

95 if not foundFTYP and name != "ftyp": 

96 raise Exception(f"\"{fname}\" is not a MP4") from None 

97 

98 # Set trigger ... 

99 foundFTYP = True 

100 

101 # Check the length ... 

102 if val == 0: 

103 # NOTE: This atom runs until EOF. 

104 

105 # Pass the rest of the atom to the hash object using chunks ... 

106 while True: 

107 chunk = fObj.read(chunksize) 

108 if len(chunk) == 0: 

109 break 

110 hObj.update(chunk) 

111 

112 # Stop looping ... 

113 break 

114 

115 # Check the length ... 

116 if val == 1: 

117 # NOTE: This atom has 64-bit sizes. 

118 

119 # Attempt to read 8 bytes as a big-endian un-signed 64-bit 

120 # integer and pass it to the hash object ... 

121 src = fObj.read(8) 

122 val, = struct.unpack(">Q", src) # [B] 

123 hObj.update(src) 

124 off1 += 8 # [B] 

125 

126 # Create short-hand ... 

127 rem1 = val - off1 # [B] 

128 

129 # Check if it is the MOOV atom ... 

130 if name == "moov": 

131 # Set trigger ... 

132 foundMOOV = True 

133 

134 # Save starting position ... 

135 pos = fObj.tell() 

136 

137 # Loop over remaining contents of MOOV atom ... 

138 while fObj.tell() - pos < rem1: 

139 # Attempt to read 4 bytes as a big-endian un-signed 32-bit 

140 # integer and pass it to the hash object ... 

141 src = fObj.read(4) 

142 val, = struct.unpack(">I", src) # [B] 

143 hObj.update(src) 

144 off2 = 4 # [B] 

145 

146 # Extract atom name and pass it to the hash object ... 

147 src = fObj.read(4) 

148 name = src.decode("utf-8") 

149 hObj.update(src) 

150 off2 += 4 # [B] 

151 

152 # Check that the atom name matches the pattern ... 

153 if re.match(r"[a-z][a-z][a-z][a-z]", name) is None: 

154 raise Exception(f"\"{name}\" is not an atom name in \"{fname}\"") from None 

155 

156 # Check the length ... 

157 if val == 0: 

158 # NOTE: This atom runs until EOF. 

159 

160 # Pass the rest of the atom to the hash object using 

161 # chunks ... 

162 while True: 

163 chunk = fObj.read(chunksize) 

164 if len(chunk) == 0: 

165 break 

166 hObj.update(chunk) 

167 

168 # Stop looping ... 

169 break 

170 

171 # Check the length ... 

172 if val == 1: 

173 # NOTE: This atom has 64-bit sizes. 

174 

175 # Attempt to read 8 bytes as a big-endian un-signed 

176 # 64-bit integer and pass it to the hash object ... 

177 src = fObj.read(8) 

178 val, = struct.unpack(">Q", src) # [B] 

179 hObj.update(src) 

180 off2 += 8 # [B] 

181 

182 # Create short-hand ... 

183 rem2 = val - off1 # [B] 

184 

185 # Check if it is the MVHD atom ... 

186 if name == "mvhd": 

187 # Set trigger ... 

188 foundMVHD = True 

189 

190 # Check that it is the correct size ... 

191 if rem2 != 100: 

192 raise Exception(f"the \"mvhd\" atom in \"{fname}\" is not the correct size") from None 

193 

194 # Pass the rest of the atom to the hash object (except 

195 # the "Modification Time" for which instead pass 0 as a 

196 # big-endian un-signed 32-bit integer) ... 

197 # NOTE: See Figure 2-3 of https://developer.apple.com/library/archive/documentation/QuickTime/QTFF/QTFFChap2/qtff2.html 

198 hObj.update(fObj.read(8)) 

199 if ignoreModificationTime: 

200 fObj.read(4) 

201 hObj.update(struct.pack(">I", 0)) 

202 else: 

203 hObj.update(fObj.read(4)) 

204 hObj.update(fObj.read(88)) 

205 else: 

206 # Pass the rest of the atom to the hash object using 

207 # chunks ... 

208 while True: 

209 if rem2 == 0: 

210 break 

211 if rem2 <= chunksize: 

212 hObj.update(fObj.read(rem2)) 

213 break 

214 hObj.update(fObj.read(chunksize)) 

215 rem2 -= chunksize # [B] 

216 else: 

217 # Pass the rest of the atom to the hash object using chunks ... 

218 while True: 

219 if rem1 == 0: 

220 break 

221 if rem1 <= chunksize: 

222 hObj.update(fObj.read(rem1)) 

223 break 

224 hObj.update(fObj.read(chunksize)) 

225 rem1 -= chunksize # [B] 

226 

227 # Catch possible errors ... 

228 if not foundMOOV: 

229 raise Exception(f"did not find \"moov\" atom in \"{fname}\"") from None 

230 if not foundMVHD: 

231 raise Exception(f"did not find \"mvhd\" atom in \"{fname}\"") from None 

232 

233 # Return hash hexdigest ... 

234 return hObj.hexdigest()