Answer of exercise 7

Behind this apparently trivial requirement are hidden difficulties. Symbolic links are arbitrary paths, they can point on directories (which is not allowed for hard links) and they may not correspond to files contained in the archive.

A simple solution is to recreate, in memory, the file hierarchy contained in the archive.

type info = File | Link of string list | Dir of (string * inode) list and inode = { mutable record : record option; mutable info : info;}

Nodes of this in-memory file system are described by the inode type. The info field describes the file type, limited to ordinary files, symbolic links and directories. Paths are represented by lists of strings and directories by lists that associate a node to each file name in the directory. The record field stores the tar record associated to the node. This field is optional because intermediate directories are not always present in the archive; it is mutable because a file may appear more than once in the archive and the last occurrence takes precedence over the other.

let root () = let rec i = { record = None; info = Dir [ Filename.current_dir_name, i ] } in i let link inode name nod = match with | File | Link _ -> error name "Not a directory" | Dir list -> try let _ = List.assoc name list in error name "Already exists" with Not_found -> <- Dir ((name, nod) :: list) let mkfile inode name r = let f = { record = r; info = File } in link inode name f; f let symlink inode name r path = let s = { record = r; info = Link path } in link inode name s; s let mkdir inode name r = let d = mkfile inode name r in <- Dir [ Filename.current_dir_name, d; Filename.parent_dir_name, inode ]; d

As in Unix, each directory contains a link to itself and to its parent, except for the root directory (in contrast to Unix where it is its own parent). This allows us to detect and forbid any access outside the hierarchy contained in the archive.

let rec find link inode path = match, path with | _, [] -> inode | Dir list, name :: rest -> let subnode = List.assoc name list in let subnode = match with Link q -> if link && rest = [] then subnode else find false inode q | _ -> subnode in find link subnode rest | _, _ -> raise Not_found;;

The function find finds in the archive the node corresponding to path by starting from the initial node inode. If the search result is a link, the flag link indicates whether the link itself should be returned (true) or the file pointed by the link (false).

let rec mkpath inode path = match, path with | _, [] -> inode | Dir list, name :: rest -> let subnode = try List.assoc name list with Not_found -> mkdir inode name None in mkpath subnode rest | _, _ -> raise Not_found;;

The function mkpath traverses the path path creating missing nodes along the path.

let explode f = let rec dec f p = if f = Filename.current_dir_name then p else dec (Filename.dirname f) (Filename.basename f :: p) in dec (if Filename.basename f = "" then Filename.dirname f else f) [];;

The function explode parses a Unix path into a list of strings. It removes the end “/” of directory names which are allowed in archives.

let add archive r = match r.header.kind with | CHR (_,_) | BLK (_,_) | FIFO -> () | kind -> match List.rev (explode with | [] -> () | name :: parent_rev -> let inode = mkpath archive (List.rev parent_rev) in match kind with | DIR -> ignore (mkdir inode name (Some r)) | REG | CONT -> ignore (mkfile inode name (Some r)) | LNK f -> ignore (symlink inode name (Some r) (explode f)) | LINK f -> link inode name (find true archive (explode f)) | _ -> assert false;;

The function add adds the record r to the archive. The archive, represented by its root node, is modified by a side effect.

let find_and_copy tarfile filename = let fd = openfile tarfile [ O_RDONLY ] 0 in let records = List.rev (fold (fun x y -> x :: y) [] fd) in let archive = root () in List.iter (add archive) records; let inode = try find false archive (explode filename) with Not_found -> error filename "File not found" in begin match inode.record with | Some ({ header = { kind = (REG | CONT) }} as r) -> copy_file r stdout | Some _ -> error filename "Not a regular file" | None -> error filename "Not found" end; close fd;;

We end as before.

let readtar () = let nargs = Array.length Sys.argv in if nargs = 2 then list Sys.argv.(1) else if nargs = 3 then find_and_copy Sys.argv.(1) Sys.argv.(2) else prerr_endline ("Usage: " ^Sys.argv.(0)^ " <tarfile> [ <source> ]");; Printexc.print (handle_unix_error (handle_error readtar)) ();;
* * *