Behind this apparently trivial requirement are hidden difficulties.
Symbolic links are arbitrary paths, they can point on directories
(which is not allowed for hard links) and they may not correspond to
files contained in the archive.
A simple solution is to recreate, in memory, the file hierarchy
contained in the archive.
type info = File | Link of string list | Dir of (string * inode) list
and inode = { mutable record : record option; mutable info : info;}
Nodes of this in-memory file system are described by the inode
type. The info
field describes the file type, limited to ordinary
files, symbolic links and directories. Paths are represented by lists
of strings and directories by lists that associate a node to each file
name in the directory. The record
field stores the tar
record associated to the node. This field is optional because
intermediate directories are not always present in the archive; it is
mutable because a file may appear more than once in the archive and
the last occurrence takes precedence over the other.
let root () =
let rec i =
{ record = None; info = Dir [ Filename.current_dir_name, i ] }
in i
let link inode name nod = match inode.info with
| File | Link _ -> error name "Not a directory"
| Dir list ->
try let _ = List.assoc name list in error name "Already exists"
with Not_found -> inode.info <- Dir ((name, nod) :: list)
let mkfile inode name r =
let f = { record = r; info = File } in
link inode name f; f
let symlink inode name r path =
let s = { record = r; info = Link path } in
link inode name s; s
let mkdir inode name r =
let d = mkfile inode name r in
d.info <-
Dir [ Filename.current_dir_name, d; Filename.parent_dir_name, inode ];
d
As in Unix, each directory contains a link to itself and
to its parent, except for the root directory (in contrast to Unix
where it is its own parent). This allows us to detect and
forbid any access outside the hierarchy contained in the archive.
let rec find link inode path = match inode.info, path with
| _, [] -> inode
| Dir list, name :: rest ->
let subnode = List.assoc name list in
let subnode =
match subnode.info with
Link q ->
if link && rest = [] then subnode else find false inode q
| _ -> subnode in
find link subnode rest
| _, _ -> raise Not_found;;
The function find
finds in the archive the node corresponding to
path
by starting from the initial node inode
. If the search
result is a link, the flag link
indicates whether the link itself
should be returned (true) or the file pointed by the link (false).
let rec mkpath inode path =
match inode.info, path with
| _, [] -> inode
| Dir list, name :: rest ->
let subnode =
try List.assoc name list
with Not_found -> mkdir inode name None in
mkpath subnode rest
| _, _ -> raise Not_found;;
The function mkpath
traverses the path path
creating missing
nodes along the path.
let explode f =
let rec dec f p =
if f = Filename.current_dir_name then p
else dec (Filename.dirname f) (Filename.basename f :: p) in
dec (if Filename.basename f = "" then Filename.dirname f else f) [];;
The function explode
parses a Unix path into a list of strings.
It removes the end “/
” of directory names which are allowed
in archives.
let add archive r =
match r.header.kind with
| CHR (_,_) | BLK (_,_) | FIFO -> ()
| kind ->
match List.rev (explode r.header.name) with
| [] -> ()
| name :: parent_rev ->
let inode = mkpath archive (List.rev parent_rev) in
match kind with
| DIR -> ignore (mkdir inode name (Some r))
| REG | CONT -> ignore (mkfile inode name (Some r))
| LNK f -> ignore (symlink inode name (Some r) (explode f))
| LINK f -> link inode name (find true archive (explode f))
| _ -> assert false;;
The function add
adds the record r
to
the archive. The archive, represented by its root node, is modified by a
side effect.
let find_and_copy tarfile filename =
let fd = openfile tarfile [ O_RDONLY ] 0 in
let records = List.rev (fold (fun x y -> x :: y) [] fd) in
let archive = root () in
List.iter (add archive) records;
let inode =
try find false archive (explode filename)
with Not_found -> error filename "File not found" in
begin match inode.record with
| Some ({ header = { kind = (REG | CONT) }} as r) -> copy_file r stdout
| Some _ -> error filename "Not a regular file"
| None -> error filename "Not found"
end;
close fd;;
We end as before.
let readtar () =
let nargs = Array.length Sys.argv in
if nargs = 2 then list Sys.argv.(1)
else if nargs = 3 then find_and_copy Sys.argv.(1) Sys.argv.(2)
else prerr_endline ("Usage: " ^Sys.argv.(0)^ " <tarfile> [ <source> ]");;
Printexc.print (handle_unix_error (handle_error readtar)) ();;
* * *