For the files that have already been copied we keep a map from their
identity (st_dev, st_ino)
to their destination file name. Before
each copy we consult the map to see if a file with the same identity
was already copied. If that’s the case we do a hard link on the
destination file name instead of redoing the copy. To minimize the
size of the map we remember only the files which have more than one
name, i.e. those for which st_nlink > 1
.
let copied_files = (Hashtbl.create 53 : ((int * int), string) Hashtbl.t)
let rec copy source dest =
let infos = lstat source in
match infos.st_kind with
S_REG ->
if infos.st_nlink > 1 then begin
try
let dest' =
Hashtbl.find copied_files (infos.st_dev, infos.st_ino)
in link dest' dest
with Not_found ->
Hashtbl.add copied_files (infos.st_dev, infos.st_ino) dest;
file_copy source dest;
set_infos dest infos
end else begin
file_copy source dest;
set_infos dest infos
end
| S_LNK -> ...
* * *