@@ -161,6 +161,7 @@ def __init__(
161161 self .dldir .mkdir (exist_ok = True )
162162 self .to_download : list ["sdlclient.RemoteFile" ] = []
163163 self .rs = session
164+ self .known_hashes = {}
164165
165166 def register (self , files : list ["sdlclient.RemoteFile" ]):
166167 self .to_download .extend (list (files ))
@@ -169,26 +170,37 @@ def stat(self):
169170 """
170171 Returns a tuple (files to download, total size, cached files, cached bytes)
171172 """
172- nbytes = reduce (lambda acc , x : acc + x .size , self .to_download , 0 )
173- cfiles = list (filter (lambda f : Path (self .dldir / f .filename ).is_file (), self .to_download ))
173+ unique_dl = list ({v .hash : v for v in self .to_download }.values ())
174+ nbytes = reduce (lambda acc , x : acc + x .size , unique_dl , 0 )
175+ cfiles = list (filter (lambda f : Path (self .dldir / f .filename ).is_file (), unique_dl ))
174176 cbytes = reduce (lambda acc , x : acc + x .size , cfiles , 0 )
175- return (len (self . to_download ), nbytes , len (cfiles ), cbytes )
177+ return (len (unique_dl ), nbytes , len (cfiles ), cbytes )
176178
177179 def download (self , progress_cb = None ):
178180 for idx , f in enumerate (self .to_download ):
179181 if progress_cb :
180182 progress_cb (idx , len (self .to_download ), f .filename )
181183 target = Path (self .dldir / f .filename )
184+ # check if we have the file under the exact filename
182185 if target .is_file ():
183186 with open (target , "rb" ) as fd :
184187 digest = hashlib .file_digest (fd , "sha1" )
185188 if digest .hexdigest () == f .hash :
189+ self .known_hashes [f .hash ] = f .filename
186190 continue
187191 else :
188192 print (f"Checksum mismatch on { f .filename } . Download again." , file = sys .stderr )
193+ # check if we have a file with the same hash and link to it
194+ o_filename = self .known_hashes .get (f .hash )
195+ if o_filename :
196+ o_path = Path (self .dldir / o_filename ).resolve ()
197+ target .symlink_to (o_path .relative_to (self .dldir .resolve ()))
198+ continue
199+
189200 fdst = target .with_suffix (target .suffix + ".tmp" )
190201 with self .rs .get (f .downloadurl , stream = True ) as r :
191202 r .raise_for_status ()
192- with open (fdst , "wb" ) as f :
193- shutil .copyfileobj (r .raw , f )
203+ with open (fdst , "wb" ) as fp :
204+ shutil .copyfileobj (r .raw , fp )
194205 fdst .rename (target )
206+ self .known_hashes [f .hash ] = f .filename
0 commit comments