1515import os
1616import re
1717import shutil
18+ import posixpath
1819
1920import numpy as np
2021
@@ -31,31 +32,6 @@ class FileNotFoundError(Exception):
3132 pass
3233
3334
34- def nipype_hardlink_wrapper (raw_src , raw_dst ):
35- """Attempt to use hard link instead of file copy.
36- The intent is to avoid unnnecessary duplication
37- of large files when using a DataSink.
38- Hard links are not supported on all file systems
39- or os environments, and will not succeed if the
40- src and dst are not on the same physical hardware
41- partition.
42- If the hardlink fails, then fall back to using
43- a standard copy.
44- """
45- # Use realpath to avoid hardlinking symlinks
46- src = os .path .realpath (raw_src )
47- # Use normpath, in case destination is a symlink
48- dst = os .path .normpath (raw_dst )
49- del raw_src
50- del raw_dst
51- if src != dst and os .path .exists (dst ):
52- os .unlink (dst ) # First remove destination
53- try :
54- os .link (src , dst ) # Reference same inode to avoid duplication
55- except :
56- shutil .copyfile (src , dst ) # Fall back to traditional copy
57-
58-
5935def split_filename (fname ):
6036 """Split a filename into parts: path, base filename and extension.
6137
@@ -201,7 +177,13 @@ def hash_timestamp(afile):
201177
202178def copyfile (originalfile , newfile , copy = False , create_new = False ,
203179 hashmethod = None , use_hardlink = False ):
204- """Copy or symlink ``originalfile`` to ``newfile``.
180+ """Copy or link ``originalfile`` to ``newfile``.
181+
182+ If ``use_hardlink`` is True, and the file can be hard-linked, then a
183+ link is created, instead of copying the file.
184+
185+ If a hard link is not created and ``copy`` is False, then a symbolic
186+ link is created.
205187
206188 Parameters
207189 ----------
@@ -212,6 +194,9 @@ def copyfile(originalfile, newfile, copy=False, create_new=False,
212194 copy : Bool
213195 specifies whether to copy or symlink files
214196 (default=False) but only for POSIX systems
197+ use_hardlink : Bool
198+ specifies whether to hard-link files, when able
199+ (Default=False), taking precedence over copy
215200
216201 Returns
217202 -------
@@ -237,67 +222,84 @@ def copyfile(originalfile, newfile, copy=False, create_new=False,
237222 if hashmethod is None :
238223 hashmethod = config .get ('execution' , 'hash_method' ).lower ()
239224
240- elif os .path .exists (newfile ):
241- if hashmethod == 'timestamp' :
242- newhash = hash_timestamp (newfile )
243- elif hashmethod == 'content' :
244- newhash = hash_infile (newfile )
245- fmlogger .debug ("File: %s already exists,%s, copy:%d"
246- % (newfile , newhash , copy ))
247- # the following seems unnecessary
248- # if os.name is 'posix' and copy:
249- # if os.path.lexists(newfile) and os.path.islink(newfile):
250- # os.unlink(newfile)
251- # newhash = None
252- if os .name is 'posix' and not copy :
253- if os .path .lexists (newfile ):
254- if hashmethod == 'timestamp' :
255- orighash = hash_timestamp (originalfile )
256- elif hashmethod == 'content' :
257- orighash = hash_infile (originalfile )
258- fmlogger .debug ('Original hash: %s, %s' % (originalfile , orighash ))
259- if newhash != orighash :
260- os .unlink (newfile )
261- if (newhash is None ) or (newhash != orighash ):
262- try :
263- os .symlink (originalfile , newfile )
264- except OSError :
265- return copyfile (originalfile , newfile , True , create_new ,
266- hashmethod , use_hardlink )
267- else :
268- if newhash :
225+ # Existing file
226+ # -------------
227+ # Options:
228+ # symlink
229+ # to originalfile (keep if not (use_hardlink or copy))
230+ # to other file (unlink)
231+ # regular file
232+ # hard link to originalfile (keep)
233+ # copy of file (same hash) (keep)
234+ # different file (diff hash) (unlink)
235+ keep = False
236+ if os .path .lexists (newfile ):
237+ if os .path .islink (newfile ):
238+ if all (os .path .readlink (newfile ) == originalfile , not use_hardlink ,
239+ not copy ):
240+ keep = True
241+ elif posixpath .samefile (newfile , originalfile ):
242+ keep = True
243+ else :
269244 if hashmethod == 'timestamp' :
270- orighash = hash_timestamp ( originalfile )
245+ hashfn = hash_timestamp
271246 elif hashmethod == 'content' :
272- orighash = hash_infile (originalfile )
273- if (newhash is None ) or (newhash != orighash ):
274- try :
275- fmlogger .debug ("Copying File: %s->%s" %
276- (newfile , originalfile ))
277- if use_hardlink :
278- nipype_hardlink_wrapper (originalfile , newfile )
279- else :
280- shutil .copyfile (originalfile , newfile )
281- except shutil .Error as e :
282- fmlogger .warn (e .message )
283- else :
247+ hashfn = hash_infile
248+ newhash = hashfn (newfile )
249+ fmlogger .debug ("File: %s already exists,%s, copy:%d"
250+ % (newfile , newhash , copy ))
251+ orighash = hashfn (originalfile )
252+ keep = hashfn (newfile ) == hashfn (originalfile )
253+ if keep :
284254 fmlogger .debug ("File: %s already exists, not overwriting, copy:%d"
285255 % (newfile , copy ))
256+ else :
257+ os .unlink (newfile )
258+
259+ # New file
260+ # --------
261+ # use_hardlink & can_hardlink => hardlink
262+ # ~hardlink & ~copy & can_symlink => symlink
263+ # ~hardlink & ~symlink => copy
264+ if not keep and use_hardlink :
265+ try :
266+ fmlogger .debug ("Linking File: %s->%s" % (newfile , originalfile ))
267+ # Use realpath to avoid hardlinking symlinks
268+ os .link (os .path .realpath (originalfile ), newfile )
269+ except OSError :
270+ use_hardlink = False # Disable for associated files
271+ else :
272+ keep = True
273+
274+ if not keep and not copy and os .name == 'posix' :
275+ try :
276+ fmlogger .debug ("Symlinking File: %s->%s" % (newfile , originalfile ))
277+ os .symlink (originalfile , newfile )
278+ except OSError :
279+ copy = True # Disable for associated files
280+ else :
281+ keep = True
282+
283+ if not keep :
284+ try :
285+ fmlogger .debug ("Copying File: %s->%s" % (newfile , originalfile ))
286+ shutil .copyfile (originalfile , newfile )
287+ except shutil .Error as e :
288+ fmlogger .warn (e .message )
289+
290+ # Associated files
286291 if originalfile .endswith (".img" ):
287292 hdrofile = originalfile [:- 4 ] + ".hdr"
288293 hdrnfile = newfile [:- 4 ] + ".hdr"
289294 matofile = originalfile [:- 4 ] + ".mat"
290295 if os .path .exists (matofile ):
291296 matnfile = newfile [:- 4 ] + ".mat"
292- copyfile (matofile , matnfile , copy , create_new , hashmethod ,
293- use_hardlink )
294- copyfile (hdrofile , hdrnfile , copy , create_new , hashmethod ,
295- use_hardlink )
297+ copyfile (matofile , matnfile , copy , use_hardlink = use_hardlink )
298+ copyfile (hdrofile , hdrnfile , copy , use_hardlink = use_hardlink )
296299 elif originalfile .endswith (".BRIK" ):
297300 hdrofile = originalfile [:- 5 ] + ".HEAD"
298301 hdrnfile = newfile [:- 5 ] + ".HEAD"
299- copyfile (hdrofile , hdrnfile , copy , create_new , hashmethod ,
300- use_hardlink )
302+ copyfile (hdrofile , hdrnfile , copy , use_hardlink = use_hardlink )
301303
302304 return newfile
303305
0 commit comments