We reuse the data structures already defined above and collect them in
a Tarlib
module. We define a warning function which does not stop
the program or alter the return code of the program.
open Sys
open Unix
open Tarlib
let warning path message = prerr_endline (path ^ ": " ^ message)
We start with the function that writes a record header in a
buffer. It’s a tedious function but it must be done with care as a
single error in a header can corrupt the entire archive. In particular
we must pay attention to the limits imposed by the file format. For
example the size of paths is limited to 99 bytes (There are extensions
to the format to handle longer path but it’s not the goal of this
project).
let write_header_to_buffer source infos kind =
let size = if kind = REG then infos.st_size else 0 in
String.fill buffer 0 block_size '\000';
let put len string offset =
String.blit string 0 buffer offset (min (String.length string) len) in
let put_int8 x = put 7 (Printf.sprintf "%07o" x) in
let put_int12 x = put 11 (Printf.sprintf "%011o" x) in
let put_char c offset = buffer.[offset] <- c in
let put_path s offset =
if String.length s <= 99 then put 99 s offset
else raise (Error ("path too long", s)) in
put_path (if kind = DIR then source ^ "/" else source) 0;
put_int8 infos.st_perm 100;
put_int8 infos.st_uid 108;
put_int8 infos.st_gid 116;
put_int12 size 124;
put_int12 (int_of_float infos.st_mtime) 136;
put 7 "ustar " 257;
put 31 (getpwuid infos.st_uid).pw_name 265;
put 31 (getgrgid infos.st_gid).gr_name 297;
(* Fields dev and rdev are only used for special files, which we omit *)
put_char
begin match kind with
| REG -> '0'
| LINK s -> put_path s 157; '1'
| LNK s -> put_path s 157; '2'
| DIR -> '5'
| _ -> failwith "Special files not implemented"
end 156;
let rec sum s i =
if i < 0 then s else sum (s + Char.code buffer.[i]) (pred i) in
let checksum = sum (Char.code ' ' * 8) (block_size - 1) in
put 8 (Printf.sprintf "%06o\000 " checksum) 148;;
The following function creates a record header for a file. source
is
the file name, infos
is the stats information of
the file and kind
is the type of file.
let header source infos kind = {
name = source;
size = if kind = REG then infos.st_size else 0;
perm = infos.st_perm;
mtime = int_of_float infos.st_mtime;
uid = infos.st_uid;
gid = infos.st_gid;
user = (getpwuid infos.st_uid).pw_name;
group = (getgrgid infos.st_gid).gr_name;
kind = kind }
To write a file in the archive, we define a variant of file_copy
which takes as an argument the number of bytes to copy and verifies
that the end of file corresponds to that size. Otherwise, an error is
raised: this handles the abnormal case where a file is modified during
the archival process. To limit the archive’s corruption to a single
file we do not write beyond that size.
let write_file len source fdout =
let fdin = openfile source [O_RDONLY] 0 in
let error () = raise (Error ("File changed size", source)) in
let rec copy_loop len =
match read fdin buffer 0 buffer_size with
0 ->
close fdin; if len > 0 then error ()
| r ->
let len = len - r in
if len < 0 then (close fdin; error ());
ignore (write fdout buffer 0 r); copy_loop len in
copy_loop len;;
let padding fd len =
if len > 0 then ignore (write fd (String.make len '\000') 0 len);;
We now tackle the creation of the archive. The files already written
in the archive are stored in a hashtable with their path so that they
are not copied more than once. We also store the directories that were
already written so as not to copy them again: it can happen that the
archival root is already contained in another and we don’t want to
copy it again (even though that would be harmless).
The data needed to write an archive is a file descriptor pointing on
the file to write, the file and directory cache (see above) and a size
variable that remembers the current archive size (to pad it to a
minimal size if needed). The archive
type collects all this
information in a record:
type archive =
{ regfiles : (int * int, string) Hashtbl.t;
dirfiles : (int * int, bool) Hashtbl.t;
fd : file_descr; st : stats; mutable size : int }
let try_new_dir archive dir =
try Hashtbl.find archive.dirfiles dir
with Not_found -> Hashtbl.add archive.dirfiles dir false; true
Here is the main function that writes an entire hierarchy starting
from a file
path given on the command line. This function is not
difficult but needs some care with pathological cases. In particular
we saw how to detect when a file is modified the archival. A sub case
of this when the archive is being archived itself…
let verbose = ref true;;
let write_from archive file =
if not (Filename.is_relative file) then
raise (Error ("absolute path", file));
let rec write_rec archive file =
let source =
if Filename.basename file = "" then Filename.dirname file else file in
if !verbose then begin prerr_endline source end;
let st = lstat source in
if st.st_ino = archive.st.st_ino && st.st_dev = archive.st.st_dev
then warning source "Skipping archive itself!"
else
let write_header kind =
write_header_to_buffer source st kind;
ignore (write archive.fd buffer 0 block_size) in
match st.st_kind with
S_REG ->
begin try
if st.st_nlink = 1 then raise Not_found;
let path =
Hashtbl.find archive.regfiles (st.st_ino, st.st_dev) in
write_header (LINK path);
with Not_found ->
if st.st_nlink > 1 then
Hashtbl.add archive.regfiles (st.st_ino, st.st_dev) source;
write_header REG;
write_file st.st_size source archive.fd;
let t =
(block_size-1 + st.st_size) / block_size * block_size in
padding archive.fd (t - st.st_size);
archive.size <- archive.size + t + block_size;
end
| S_LNK ->
write_header (LNK (readlink source));
| S_DIR when try_new_dir archive (st.st_ino, st.st_dev) ->
write_header DIR;
Misc.iter_dir
begin
fun file ->
if file = Filename.current_dir_name then ()
else if file = Filename.parent_dir_name then ()
else write_rec archive (source ^ "/" ^ file)
end
source
| S_DIR ->
warning source "Ignoring directory already in archive."
| _ ->
prerr_endline ("Can't cope with special file " ^ source) in
write_rec archive file;;
We keep track of regular files that may have hard links in the
regfiles
table. It’s not necessary for files that have a single
link.
Here’s the main function. In case of error, it is better to remove the
erroneous archive.
let min_archive_size = 20 * block_size;;
let build tarfile files =
let fd, remove =
if tarfile = "-" then stdout, ignore
else openfile tarfile [ O_WRONLY; O_CREAT; O_TRUNC ] 0o666, unlink in
try
let arch =
{ regfiles = Hashtbl.create 13; dirfiles = Hashtbl.create 13;
st = fstat fd; fd = fd; size =0 } in
Array.iter (write_from arch) files;
padding fd (min_archive_size - arch.size);
close fd
with z ->
remove tarfile; close fd; raise z;;
We end by parsing the command line arguments.
let usage () =
prerr_endline "Usage: tar -cvf tarfile file1 [ file2 ... ] ";
exit 2;;
let tar () =
let argn = Array.length Sys.argv in
if argn > 3 && Sys.argv.(1) = "-cvf" then
build Sys.argv.(2) (Array.sub Sys.argv 3 (argn-3))
else usage ();;
let _ =
try handle_unix_error tar ()
with Error (mes, s) ->
prerr_endline ("Error: " ^ mes ^ ": " ^ s); exit 1;;
* * *