123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121moduleAst=AstmoduleLoc=LocmoduleWarning=Warningtypet={ast:Ast.t;warnings:Warning.tlist;reversed_newlines:(int*int)list;original_pos:Lexing.position;}(* odoc uses an ocamllex lexer. The "engine" for such lexers is the standard
[Lexing] module.
As the [Lexing] module reads the input, it keeps track of only the byte
offset into the input. It is normally the job of each particular lexer
implementation to decide which character sequences count as newlines, and
keep track of line/column locations. This is usually done by writing several
extra regular expressions, and calling [Lexing.new_line] at the right time.
Keeping track of newlines like this makes the odoc lexer somewhat too
diffiult to read, however. To factor the aspect of keeping track of newlines
fully out of the odoc lexer, instead of having it keep track of newlines as
it's scanning the input, the input is pre-scanned before feeding it into the
lexer. A table of all the newlines is assembled, and used to convert offsets
into line/column pairs after the lexer emits tokens.
[reversed_newlines ~input ~comment_location offset] returns a list of pairs
of (line number * offset), allowing the easy conversion from the byte
[offset], relative to the beginning of a comment, into a location, relative
to the beginning of the file containing the comment. This can then be used
to convert from byte offset to line number / column number - a Loc.point,
and additionally for converting back from a Loc.point to a Lexing.position.
*)letreversed_newlines:input:string->(int*int)list=fun~input->letrecfind_newlinesline_numberinput_indexnewlines_accumulator=ifinput_index>=String.lengthinputthennewlines_accumulatorelseif(* This is good enough to detect CR-LF also. *)input.[input_index]='\n'thenfind_newlines(line_number+1)(input_index+1)((line_number+1,input_index+1)::newlines_accumulator)elsefind_newlinesline_number(input_index+1)newlines_accumulatorinfind_newlines10[(1,0)](* [offset_to_location] converts from an offset within the comment text, where
[reversed_newlines] is the result of the above function and [comment_location]
is the location of the comment within its file. The function is meant to be
partially applied to its first two arguments, at which point it is passed to
the lexer, so it can apply the table to its emitted tokens. *)letoffset_to_location:reversed_newlines:(int*int)list->comment_location:Lexing.position->int->Loc.point=fun~reversed_newlines~comment_locationbyte_offset->letrecscan_to_last_newlinereversed_newlines_prefix=matchreversed_newlines_prefixwith|[]->assertfalse|(line_in_comment,line_start_offset)::prefix->ifline_start_offset>byte_offsetthenscan_to_last_newlineprefixelseletcolumn_in_comment=byte_offset-line_start_offsetinletline_in_file=line_in_comment+comment_location.Lexing.pos_lnum-1inletcolumn_in_file=ifline_in_comment=1thencolumn_in_comment+comment_location.Lexing.pos_cnum-comment_location.Lexing.pos_bolelsecolumn_in_commentin{Loc.line=line_in_file;column=column_in_file}inscan_to_last_newlinereversed_newlines(* Given a Loc.point and the result of [parse_comment], this function returns
a valid Lexing.position *)letposition_of_point:t->Loc.point->Lexing.position=funvpoint->let{reversed_newlines;original_pos;_}=vinletline_in_comment=point.Loc.line-original_pos.pos_lnum+1inletrecfind_pos_bolreversed_newlines_prefix=matchreversed_newlines_prefixwith|[]->assertfalse|[_]->original_pos.pos_bol|(line_number,line_start_offset)::prefix->ifline_number>line_in_commentthenfind_pos_bolprefixelseline_start_offset+original_pos.pos_cnuminletpos_bol=find_pos_bolreversed_newlinesinletpos_lnum=point.Loc.lineinletpos_cnum=point.column+pos_bolinletpos_fname=original_pos.pos_fnamein{Lexing.pos_bol;pos_lnum;pos_cnum;pos_fname}(* The main entry point for this module *)letparse_comment~location~text=letwarnings=ref[]inletreversed_newlines=reversed_newlines~input:textinlettoken_stream=letlexbuf=Lexing.from_stringtextinletoffset_to_location=offset_to_location~reversed_newlines~comment_location:locationinletinput:Lexer.input={file=location.Lexing.pos_fname;offset_to_location;warnings;lexbuf}inStream.from(fun_token_index->Some(Lexer.tokeninputlexbuf))inletast,warnings=Syntax.parsewarningstoken_streamin{ast;warnings;reversed_newlines;original_pos=location}(* Accessor functions, as [t] is opaque *)letwarningst=t.warningsletastt=t.ast