123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521(**************************************************************************)(* *)(* OCaml *)(* *)(* Pierre Weis, projet Cristal, INRIA Rocquencourt *)(* *)(* Copyright 2002 Institut National de Recherche en Informatique et *)(* en Automatique. *)(* *)(* All rights reserved. This file is distributed under the terms of *)(* the GNU Lesser General Public License version 2.1, with the *)(* special exception on linking described in the file LICENSE. *)(* *)(**************************************************************************)openCamlinternalFormatBasicsopenCamlinternalFormat(* alias to avoid warning for ambiguity between
Stdlib.format6
and CamlinternalFormatBasics.format6
(the former is in fact an alias for the latter,
but the ambiguity warning doesn't care)
*)type('a,'b,'c,'d,'e,'f)format6=('a,'b,'c,'d,'e,'f)Stdlib.format6(* The run-time library for scanners. *)(* Scanning buffers. *)moduletypeSCANNING=sigtypein_channeltypescanbuf=in_channeltypefile_name=stringvalstdin:in_channel(* The scanning buffer reading from [Stdlib.stdin]. *)valnext_char:scanbuf->char(* [Scanning.next_char ib] advance the scanning buffer for
one character.
If no more character can be read, sets a end of file condition and
returns '\000'. *)valinvalidate_current_char:scanbuf->unit(* [Scanning.invalidate_current_char ib] mark the current_char as already
scanned. *)valpeek_char:scanbuf->char(* [Scanning.peek_char ib] returns the current char available in
the buffer or reads one if necessary (when the current character is
already scanned).
If no character can be read, sets an end of file condition and
returns '\000'. *)valchecked_peek_char:scanbuf->char(* Same as [Scanning.peek_char] above but always returns a valid char or
fails: instead of returning a null char when the reading method of the
input buffer has reached an end of file, the function raises exception
[End_of_file]. *)valstore_char:int->scanbuf->char->int(* [Scanning.store_char lim ib c] adds [c] to the token buffer
of the scanning buffer [ib]. It also advances the scanning buffer for
one character and returns [lim - 1], indicating the new limit for the
length of the current token. *)valskip_char:int->scanbuf->int(* [Scanning.skip_char lim ib] ignores the current character. *)valignore_char:int->scanbuf->int(* [Scanning.ignore_char ib lim] ignores the current character and
decrements the limit. *)valtoken:scanbuf->string(* [Scanning.token ib] returns the string stored into the token
buffer of the scanning buffer: it returns the token matched by the
format. *)valreset_token:scanbuf->unit(* [Scanning.reset_token ib] resets the token buffer of
the given scanning buffer. *)valchar_count:scanbuf->int(* [Scanning.char_count ib] returns the number of characters
read so far from the given buffer. *)valline_count:scanbuf->int(* [Scanning.line_count ib] returns the number of new line
characters read so far from the given buffer. *)valtoken_count:scanbuf->int(* [Scanning.token_count ib] returns the number of tokens read
so far from [ib]. *)valeof:scanbuf->bool(* [Scanning.eof ib] returns the end of input condition
of the given buffer. *)valend_of_input:scanbuf->bool(* [Scanning.end_of_input ib] tests the end of input condition
of the given buffer (if no char has ever been read, an attempt to
read one is performed). *)valbeginning_of_input:scanbuf->bool(* [Scanning.beginning_of_input ib] tests the beginning of input
condition of the given buffer. *)valname_of_input:scanbuf->string(* [Scanning.name_of_input ib] returns the name of the character
source for input buffer [ib]. *)valopen_in:file_name->in_channelvalopen_in_bin:file_name->in_channelvalfrom_file:file_name->in_channelvalfrom_file_bin:file_name->in_channelvalfrom_string:string->in_channelvalfrom_function:(unit->char)->in_channelvalfrom_channel:Stdlib.in_channel->in_channelvalclose_in:in_channel->unitendmoduleScanning:SCANNING=struct(* The run-time library for scanf. *)typefile_name=stringtypein_channel_name=|From_channelofStdlib.in_channel|From_fileoffile_name*Stdlib.in_channel|From_function|From_stringtypein_channel={mutableic_eof:bool;mutableic_current_char:char;mutableic_current_char_is_valid:bool;mutableic_char_count:int;mutableic_line_count:int;mutableic_token_count:int;ic_get_next_char:unit->char;ic_token_buffer:Buffer.t;ic_input_name:in_channel_name;}typescanbuf=in_channelletnull_char='\000'(* Reads a new character from input buffer.
Next_char never fails, even in case of end of input:
it then simply sets the end of file condition. *)letnext_charib=tryletc=ib.ic_get_next_char()inib.ic_current_char<-c;ib.ic_current_char_is_valid<-true;ib.ic_char_count<-succib.ic_char_count;ifc='\n'thenib.ic_line_count<-succib.ic_line_count;cwith|End_of_file->letc=null_charinib.ic_current_char<-c;ib.ic_current_char_is_valid<-false;ib.ic_eof<-true;cletpeek_charib=ifib.ic_current_char_is_validthenib.ic_current_charelsenext_charib(* Returns a valid current char for the input buffer. In particular
no irrelevant null character (as set by [next_char] in case of end
of input) is returned, since [End_of_file] is raised when
[next_char] sets the end of file condition while trying to read a
new character. *)letchecked_peek_charib=letc=peek_charibinifib.ic_eofthenraiseEnd_of_file;cletend_of_inputib=ignore(peek_charib);ib.ic_eofleteofib=ib.ic_eofletbeginning_of_inputib=ib.ic_char_count=0letname_of_inputib=matchib.ic_input_namewith|From_channel_ic->"unnamed Stdlib input channel"|From_file(fname,_ic)->fname|From_function->"unnamed function"|From_string->"unnamed character string"letchar_countib=ifib.ic_current_char_is_validthenib.ic_char_count-1elseib.ic_char_countletline_countib=ib.ic_line_countletreset_tokenib=Buffer.resetib.ic_token_bufferletinvalidate_current_charib=ib.ic_current_char_is_valid<-falselettokenib=lettoken_buffer=ib.ic_token_bufferinlettok=Buffer.contentstoken_bufferinBuffer.cleartoken_buffer;ib.ic_token_count<-succib.ic_token_count;toklettoken_countib=ib.ic_token_countletskip_charwidthib=invalidate_current_charib;widthletignore_charwidthib=skip_char(width-1)ibletstore_charwidthibc=Buffer.add_charib.ic_token_bufferc;ignore_charwidthibletdefault_token_buffer_size=1024letcreateinamenext={ic_eof=false;ic_current_char=null_char;ic_current_char_is_valid=false;ic_char_count=0;ic_line_count=0;ic_token_count=0;ic_get_next_char=next;ic_token_buffer=Buffer.createdefault_token_buffer_size;ic_input_name=iname;}letfrom_strings=leti=ref0inletlen=String.lengthsinletnext()=if!i>=lenthenraiseEnd_of_fileelseletc=s.[!i]inincri;cincreateFrom_stringnextletfrom_function=createFrom_function(* Scanning from an input channel. *)(* Position of the problem:
We cannot prevent the scanning mechanism to use one lookahead character,
if needed by the semantics of the format string specifications (e.g. a
trailing 'skip space' specification in the format string); in this case,
the mandatory lookahead character is indeed read from the input and not
used to return the token read. It is thus mandatory to be able to store
an unused lookahead character somewhere to get it as the first character
of the next scan.
To circumvent this problem, all the scanning functions get a low level
input buffer argument where they store the lookahead character when
needed; additionally, the input buffer is the only source of character of
a scanner. The [scanbuf] input buffers are defined in module {!Scanning}.
Now we understand that it is extremely important that related and
successive calls to scanners indeed read from the same input buffer.
In effect, if a scanner [scan1] is reading from [ib1] and stores an
unused lookahead character [c1] into its input buffer [ib1], then
another scanner [scan2] not reading from the same buffer [ib1] will miss
the character [c1], seemingly vanished in the air from the point of view
of [scan2].
This mechanism works perfectly to read from strings, from files, and from
functions, since in those cases, allocating two buffers reading from the
same source is unnatural.
Still, there is a difficulty in the case of scanning from an input
channel. In effect, when scanning from an input channel [ic], this channel
may not have been allocated from within this library. Hence, it may be
shared (two functions of the user's program may successively read from
[ic]). This is highly error prone since, one of the function may seek the
input channel, while the other function has still an unused lookahead
character in its input buffer. In conclusion, you should never mix direct
low level reading and high level scanning from the same input channel.
*)(* Perform bufferized input to improve efficiency. *)letfile_buffer_size=ref1024(* The scanner closes the input channel at end of input. *)letscan_close_at_endic=Stdlib.close_inic;raiseEnd_of_file(* The scanner does not close the input channel at end of input:
it just raises [End_of_file]. *)letscan_raise_at_end_ic=raiseEnd_of_fileletfrom_icscan_close_icinameic=letlen=!file_buffer_sizeinletbuf=Bytes.createleninleti=ref0inletlim=ref0inleteof=reffalseinletnext()=if!i<!limthenbeginletc=Bytes.getbuf!iinincri;cendelseif!eofthenraiseEnd_of_fileelsebeginlim:=inputicbuf0len;if!lim=0thenbegineof:=true;scan_close_icicendelsebegini:=1;Bytes.getbuf0endendincreateinamenextletfrom_ic_close_at_end=from_icscan_close_at_endletfrom_ic_raise_at_end=from_icscan_raise_at_end(* The scanning buffer reading from [Stdlib.stdin].
One could try to define [stdin] as a scanning buffer reading a character
at a time (no bufferization at all), but unfortunately the top-level
interaction would be wrong. This is due to some kind of
'race condition' when reading from [Stdlib.stdin],
since the interactive compiler and [Scanf.scanf] will simultaneously
read the material they need from [Stdlib.stdin]; then, confusion
will result from what should be read by the top-level and what should be
read by [Scanf.scanf].
This is even more complicated by the one character lookahead that
[Scanf.scanf] is sometimes obliged to maintain: the lookahead character
will be available for the next [Scanf.scanf] entry, seemingly coming from
nowhere.
Also no [End_of_file] is raised when reading from stdin: if not enough
characters have been read, we simply ask to read more. *)letstdin=from_icscan_raise_at_end(From_file("-",Stdlib.stdin))Stdlib.stdinletopen_in_fileopen_infname=matchfnamewith|"-"->stdin|fname->letic=open_infnameinfrom_ic_close_at_end(From_file(fname,ic))icletopen_in=open_in_fileStdlib.open_inletopen_in_bin=open_in_fileStdlib.open_in_binletfrom_file=open_inletfrom_file_bin=open_in_binletfrom_channelic=from_ic_raise_at_end(From_channelic)icletclose_inib=matchib.ic_input_namewith|From_channelic->Stdlib.close_inic|From_file(_fname,ic)->Stdlib.close_inic|From_function|From_string->()end(* Formatted input functions. *)type('a,'b,'c,'d)scanner=('a,Scanning.in_channel,'b,'c,'a->'d,'d)format6->'ctype('a,'b,'c,'d)scanner_opt=('a,Scanning.in_channel,'b,'c,'a->'doption,'d)format6->'c(* Reporting errors. *)exceptionScan_failureofstringletbad_inputs=raise(Scan_failures)letbad_input_escapec=bad_input(Printf.sprintf"illegal escape character %C"c)letbad_token_lengthmessage=bad_input(Printf.sprintf"scanning of %s failed: \
the specified length was too short for token"message)letbad_end_of_inputmessage=bad_input(Printf.sprintf"scanning of %s failed: \
premature end of file occurred before end of token"message)letbad_float()=bad_input"no dot or exponent part found in float token"letbad_hex_float()=bad_input"not a valid float in hexadecimal notation"letcharacter_mismatch_errcci=Printf.sprintf"looking for %C, found %C"cciletcharacter_mismatchcci=bad_input(character_mismatch_errcci)letrecskip_whitesib=letc=Scanning.peek_charibinifnot(Scanning.eofib)thenbeginmatchcwith|' '|'\t'|'\n'|'\r'->Scanning.invalidate_current_charib;skip_whitesib|_->()end(* Checking that [c] is indeed in the input, then skips it.
In this case, the character [c] has been explicitly specified in the
format as being mandatory in the input; hence we should fail with
[End_of_file] in case of end_of_input.
(Remember that [Scan_failure] is raised only when (we can prove by
evidence) that the input does not match the format string given. We must
thus differentiate [End_of_file] as an error due to lack of input, and
[Scan_failure] which is due to provably wrong input. I am not sure this is
worth the burden: it is complex and somehow subliminal; should be clearer
to fail with Scan_failure "Not enough input to complete scanning"!)
That's why, waiting for a better solution, we use checked_peek_char here.
We are also careful to treat "\r\n" in the input as an end of line marker:
it always matches a '\n' specification in the input format string. *)letreccheck_charibc=matchcwith|' '->skip_whitesib|'\n'->check_newlineib|c->check_this_charibcandcheck_this_charibc=letci=Scanning.checked_peek_charibinifci=cthenScanning.invalidate_current_charibelsecharacter_mismatchcciandcheck_newlineib=letci=Scanning.checked_peek_charibinmatchciwith|'\n'->Scanning.invalidate_current_charib|'\r'->Scanning.invalidate_current_charib;check_this_charib'\n'|_->character_mismatch'\n'ci(* Extracting tokens from the output token buffer. *)lettoken_charib=(Scanning.tokenib).[0]lettoken_string=Scanning.tokenlettoken_boolib=matchScanning.tokenibwith|"true"->true|"false"->false|s->bad_input(Printf.sprintf"invalid boolean '%s'"s)(* The type of integer conversions. *)typeinteger_conversion=|B_conversion(* Unsigned binary conversion *)|D_conversion(* Signed decimal conversion *)|I_conversion(* Signed integer conversion *)|O_conversion(* Unsigned octal conversion *)|U_conversion(* Unsigned decimal conversion *)|X_conversion(* Unsigned hexadecimal conversion *)letinteger_conversion_of_char=function|'b'->B_conversion|'d'->D_conversion|'i'->I_conversion|'o'->O_conversion|'u'->U_conversion|'x'|'X'->X_conversion|_->assertfalse(* Extract an integer literal token.
Since the functions Stdlib.*int*_of_string do not accept a leading +,
we skip it if necessary. *)lettoken_int_literalconvib=lettok=matchconvwith|D_conversion|I_conversion->Scanning.tokenib|U_conversion->"0u"^Scanning.tokenib|O_conversion->"0o"^Scanning.tokenib|X_conversion->"0x"^Scanning.tokenib|B_conversion->"0b"^Scanning.tokenibinletl=String.lengthtokinifl=0||tok.[0]<>'+'thentokelseString.subtok1(l-1)(* All the functions that convert a string to a number raise the exception
Failure when the conversion is not possible.
This exception is then trapped in [kscanf]. *)lettoken_intconvib=int_of_string(token_int_literalconvib)lettoken_floatib=float_of_string(Scanning.tokenib)(* To scan native ints, int32 and int64 integers.
We cannot access to conversions to/from strings for those types,
Nativeint.of_string, Int32.of_string, and Int64.of_string,
since those modules are not available to [Scanf].
However, we can bind and use the corresponding primitives that are
available in the runtime. *)externalnativeint_of_string:string->nativeint="caml_nativeint_of_string"externalint32_of_string:string->int32="caml_int32_of_string"externalint64_of_string:string->int64="caml_int64_of_string"lettoken_nativeintconvib=nativeint_of_string(token_int_literalconvib)lettoken_int32convib=int32_of_string(token_int_literalconvib)lettoken_int64convib=int64_of_string(token_int_literalconvib)(* Scanning numbers. *)(* Digits scanning functions suppose that one character has been checked and
is available, since they return at end of file with the currently found
token selected.
Put it in another way, the digits scanning functions scan for a possibly
empty sequence of digits, (hence, a successful scanning from one of those
functions does not imply that the token is a well-formed number: to get a
true number, it is mandatory to check that at least one valid digit is
available before calling one of the digit scanning functions). *)(* The decimal case is treated especially for optimization purposes. *)letrecscan_decimal_digit_starwidthib=ifwidth=0thenwidthelseletc=Scanning.peek_charibinifScanning.eofibthenwidthelsematchcwith|'0'..'9'asc->letwidth=Scanning.store_charwidthibcinscan_decimal_digit_starwidthib|'_'->letwidth=Scanning.ignore_charwidthibinscan_decimal_digit_starwidthib|_->widthletscan_decimal_digit_pluswidthib=ifwidth=0thenbad_token_length"decimal digits"elseletc=Scanning.checked_peek_charibinmatchcwith|'0'..'9'->letwidth=Scanning.store_charwidthibcinscan_decimal_digit_starwidthib|c->bad_input(Printf.sprintf"character %C is not a decimal digit"c)(* To scan numbers from other bases, we use a predicate argument to
scan digits. *)letscan_digit_stardigitpwidthib=letrecscan_digitswidthib=ifwidth=0thenwidthelseletc=Scanning.peek_charibinifScanning.eofibthenwidthelsematchcwith|cwhendigitpc->letwidth=Scanning.store_charwidthibcinscan_digitswidthib|'_'->letwidth=Scanning.ignore_charwidthibinscan_digitswidthib|_->widthinscan_digitswidthibletscan_digit_plusbasisdigitpwidthib=(* Ensure we have got enough width left,
and read at least one digit. *)ifwidth=0thenbad_token_length"digits"elseletc=Scanning.checked_peek_charibinifdigitpcthenletwidth=Scanning.store_charwidthibcinscan_digit_stardigitpwidthibelsebad_input(Printf.sprintf"character %C is not a valid %s digit"cbasis)letis_binary_digit=function|'0'..'1'->true|_->falseletscan_binary_int=scan_digit_plus"binary"is_binary_digitletis_octal_digit=function|'0'..'7'->true|_->falseletscan_octal_int=scan_digit_plus"octal"is_octal_digitletis_hexa_digit=function|'0'..'9'|'a'..'f'|'A'..'F'->true|_->falseletscan_hexadecimal_int=scan_digit_plus"hexadecimal"is_hexa_digit(* Scan a decimal integer. *)letscan_unsigned_decimal_int=scan_decimal_digit_plusletscan_signwidthib=letc=Scanning.checked_peek_charibinmatchcwith|'+'->Scanning.store_charwidthibc|'-'->Scanning.store_charwidthibc|_->widthletscan_optionally_signed_decimal_intwidthib=letwidth=scan_signwidthibinscan_unsigned_decimal_intwidthib(* Scan an unsigned integer that could be given in any (common) basis.
If digits are prefixed by one of 0x, 0X, 0o, or 0b, the number is
assumed to be written respectively in hexadecimal, hexadecimal,
octal, or binary. *)letscan_unsigned_intwidthib=matchScanning.checked_peek_charibwith|'0'asc->letwidth=Scanning.store_charwidthibcinifwidth=0thenwidthelseletc=Scanning.peek_charibinifScanning.eofibthenwidthelsebeginmatchcwith|'x'|'X'->scan_hexadecimal_int(Scanning.store_charwidthibc)ib|'o'->scan_octal_int(Scanning.store_charwidthibc)ib|'b'->scan_binary_int(Scanning.store_charwidthibc)ib|_->scan_decimal_digit_starwidthibend|_->scan_unsigned_decimal_intwidthibletscan_optionally_signed_intwidthib=letwidth=scan_signwidthibinscan_unsigned_intwidthibletscan_int_conversionconvwidthib=matchconvwith|B_conversion->scan_binary_intwidthib|D_conversion->scan_optionally_signed_decimal_intwidthib|I_conversion->scan_optionally_signed_intwidthib|O_conversion->scan_octal_intwidthib|U_conversion->scan_unsigned_decimal_intwidthib|X_conversion->scan_hexadecimal_intwidthib(* Scanning floating point numbers. *)(* Fractional part is optional and can be reduced to 0 digits. *)letscan_fractional_partwidthib=ifwidth=0thenwidthelseletc=Scanning.peek_charibinifScanning.eofibthenwidthelsematchcwith|'0'..'9'asc->scan_decimal_digit_star(Scanning.store_charwidthibc)ib|_->width(* Exp part is optional and can be reduced to 0 digits. *)letscan_exponent_partwidthib=ifwidth=0thenwidthelseletc=Scanning.peek_charibinifScanning.eofibthenwidthelsematchcwith|'e'|'E'asc->scan_optionally_signed_decimal_int(Scanning.store_charwidthibc)ib|_->width(* Scan the integer part of a floating point number, (not using the
OCaml lexical convention since the integer part can be empty):
an optional sign, followed by a possibly empty sequence of decimal
digits (e.g. -.1). *)letscan_integer_partwidthib=letwidth=scan_signwidthibinscan_decimal_digit_starwidthib(*
For the time being we have (as found in scanf.mli):
the field width is composed of an optional integer literal
indicating the maximal width of the token to read.
Unfortunately, the type-checker let the user write an optional precision,
since this is valid for printf format strings.
Thus, the next step for Scanf is to support a full width and precision
indication, more or less similar to the one for printf, possibly extended
to the specification of a [max, min] range for the width of the token read
for strings. Something like the following spec for scanf.mli:
The optional [width] is an integer indicating the maximal
width of the token read. For instance, [%6d] reads an integer,
having at most 6 characters.
The optional [precision] is a dot [.] followed by an integer:
- in the floating point number conversions ([%f], [%e], [%g], [%F], [%E],
and [%F] conversions, the [precision] indicates the maximum number of
digits that may follow the decimal point. For instance, [%.4f] reads a
[float] with at most 4 fractional digits,
- in the string conversions ([%s], [%S], [%\[ range \]]), and in the
integer number conversions ([%i], [%d], [%u], [%x], [%o], and their
[int32], [int64], and [native_int] correspondent), the [precision]
indicates the required minimum width of the token read,
- on all other conversions, the width and precision specify the [max, min]
range for the width of the token read.
*)letscan_floatwidthprecisionib=letwidth=scan_integer_partwidthibinifwidth=0thenwidth,precisionelseletc=Scanning.peek_charibinifScanning.eofibthenwidth,precisionelsematchcwith|'.'->letwidth=Scanning.store_charwidthibcinletprecision=Int.minwidthprecisioninletwidth=width-(precision-scan_fractional_partprecisionib)inscan_exponent_partwidthib,precision|_->scan_exponent_partwidthib,precisionletcheck_case_insensitive_stringwidthiberrorstr=letlowercasec=matchcwith|'A'..'Z'->char_of_int(int_of_charc-int_of_char'A'+int_of_char'a')|_->cinletlen=String.lengthstrinletwidth=refwidthinfori=0tolen-1doletc=Scanning.peek_charibiniflowercasec<>lowercasestr.[i]thenerror();if!width=0thenerror();width:=Scanning.store_char!widthibc;done;!widthletscan_hex_floatwidthprecisionib=ifwidth=0||Scanning.end_of_inputibthenbad_hex_float();letwidth=scan_signwidthibinifwidth=0||Scanning.end_of_inputibthenbad_hex_float();matchScanning.peek_charibwith|'0'asc->(letwidth=Scanning.store_charwidthibcinifwidth=0||Scanning.end_of_inputibthenbad_hex_float();letwidth=check_case_insensitive_stringwidthibbad_hex_float"x"inifwidth=0||Scanning.end_of_inputibthenwidthelseletwidth=matchScanning.peek_charibwith|'.'|'p'|'P'->width|_->scan_hexadecimal_intwidthibinifwidth=0||Scanning.end_of_inputibthenwidthelseletwidth=matchScanning.peek_charibwith|'.'asc->(letwidth=Scanning.store_charwidthibcinifwidth=0||Scanning.end_of_inputibthenwidthelsematchScanning.peek_charibwith|'p'|'P'->width|_->letprecision=Int.minwidthprecisioninwidth-(precision-scan_hexadecimal_intprecisionib))|_->widthinifwidth=0||Scanning.end_of_inputibthenwidthelsematchScanning.peek_charibwith|'p'|'P'asc->letwidth=Scanning.store_charwidthibcinifwidth=0||Scanning.end_of_inputibthenbad_hex_float();scan_optionally_signed_decimal_intwidthib|_->width)|'n'|'N'asc->letwidth=Scanning.store_charwidthibcinifwidth=0||Scanning.end_of_inputibthenbad_hex_float();check_case_insensitive_stringwidthibbad_hex_float"an"|'i'|'I'asc->letwidth=Scanning.store_charwidthibcinifwidth=0||Scanning.end_of_inputibthenbad_hex_float();check_case_insensitive_stringwidthibbad_hex_float"nfinity"|_->bad_hex_float()letscan_caml_float_restwidthprecisionib=ifwidth=0||Scanning.end_of_inputibthenbad_float();letwidth=scan_decimal_digit_starwidthibinifwidth=0||Scanning.end_of_inputibthenbad_float();letc=Scanning.peek_charibinmatchcwith|'.'->letwidth=Scanning.store_charwidthibcin(* The effective width available for scanning the fractional part is
the minimum of declared precision and width left. *)letprecision=Int.minwidthprecisionin(* After scanning the fractional part with [precision] provisional width,
[width_precision] is left. *)letwidth_precision=scan_fractional_partprecisionibin(* Hence, scanning the fractional part took exactly
[precision - width_precision] chars. *)letfrac_width=precision-width_precisionin(* And new provisional width is [width - width_precision. *)letwidth=width-frac_widthinscan_exponent_partwidthib|'e'|'E'->scan_exponent_partwidthib|_->bad_float()letscan_caml_floatwidthprecisionib=ifwidth=0||Scanning.end_of_inputibthenbad_float();letwidth=scan_signwidthibinifwidth=0||Scanning.end_of_inputibthenbad_float();matchScanning.peek_charibwith|'0'asc->(letwidth=Scanning.store_charwidthibcinifwidth=0||Scanning.end_of_inputibthenbad_float();matchScanning.peek_charibwith|'x'|'X'asc->(letwidth=Scanning.store_charwidthibcinifwidth=0||Scanning.end_of_inputibthenbad_float();letwidth=scan_hexadecimal_intwidthibinifwidth=0||Scanning.end_of_inputibthenbad_float();letwidth=matchScanning.peek_charibwith|'.'asc->(letwidth=Scanning.store_charwidthibcinifwidth=0||Scanning.end_of_inputibthenwidthelsematchScanning.peek_charibwith|'p'|'P'->width|_->letprecision=Int.minwidthprecisioninwidth-(precision-scan_hexadecimal_intprecisionib))|'p'|'P'->width|_->bad_float()inifwidth=0||Scanning.end_of_inputibthenwidthelsematchScanning.peek_charibwith|'p'|'P'asc->letwidth=Scanning.store_charwidthibcinifwidth=0||Scanning.end_of_inputibthenbad_hex_float();scan_optionally_signed_decimal_intwidthib|_->width)|_->scan_caml_float_restwidthprecisionib)|'1'..'9'asc->letwidth=Scanning.store_charwidthibcinifwidth=0||Scanning.end_of_inputibthenbad_float();scan_caml_float_restwidthprecisionib(* Special case of nan and infinity:
| 'i' ->
| 'n' ->
*)|_->bad_float()(* Scan a regular string:
stops when encountering a space, if no scanning indication has been given;
otherwise, stops when encountering the characters in the scanning
indication [stp].
It also stops at end of file or when the maximum number of characters has
been read. *)letscan_stringstpwidthib=letrecloopwidth=ifwidth=0thenwidthelseletc=Scanning.peek_charibinifScanning.eofibthenwidthelsematchstpwith|Somec'whenc=c'->Scanning.skip_charwidthib|Some_->loop(Scanning.store_charwidthibc)|None->matchcwith|' '|'\t'|'\n'|'\r'->width|_->loop(Scanning.store_charwidthibc)inloopwidth(* Scan a char: peek strictly one character in the input, whatsoever. *)letscan_charwidthib=(* The case width = 0 could not happen here, since it is tested before
calling scan_char, in the main scanning function.
if width = 0 then bad_token_length "a character" else *)Scanning.store_charwidthib(Scanning.checked_peek_charib)letchar_for_backslash=function|'n'->'\010'|'r'->'\013'|'b'->'\008'|'t'->'\009'|c->c(* The integer value corresponding to the facial value of a valid
decimal digit character. *)letdecimal_value_of_charc=int_of_charc-int_of_char'0'letchar_for_decimal_codec0c1c2=letc=100*decimal_value_of_charc0+10*decimal_value_of_charc1+decimal_value_of_charc2inifc<0||c>255thenbad_input(Printf.sprintf"bad character decimal encoding \\%c%c%c"c0c1c2)elsechar_of_intc(* The integer value corresponding to the facial value of a valid
hexadecimal digit character. *)lethexadecimal_value_of_charc=letd=int_of_charcin(* Could also be:
if d <= int_of_char '9' then d - int_of_char '0' else
if d <= int_of_char 'F' then 10 + d - int_of_char 'A' else
if d <= int_of_char 'f' then 10 + d - int_of_char 'a' else assert false
*)ifd>=int_of_char'a'thend-87(* 10 + int_of_char c - int_of_char 'a' *)elseifd>=int_of_char'A'thend-55(* 10 + int_of_char c - int_of_char 'A' *)elsed-int_of_char'0'letchar_for_hexadecimal_codec1c2=letc=16*hexadecimal_value_of_charc1+hexadecimal_value_of_charc2inifc<0||c>255thenbad_input(Printf.sprintf"bad character hexadecimal encoding \\%c%c"c1c2)elsechar_of_intc(* Called in particular when encountering '\\' as starter of a char.
Stops before the corresponding '\''. *)letcheck_next_charmessagewidthib=ifwidth=0thenbad_token_lengthmessageelseletc=Scanning.peek_charibinifScanning.eofibthenbad_end_of_inputmessageelsecletcheck_next_char_for_char=check_next_char"a Char"letcheck_next_char_for_string=check_next_char"a String"letscan_backslash_charwidthib=matchcheck_next_char_for_charwidthibwith|'\\'|'\''|'\"'|'n'|'t'|'b'|'r'asc->Scanning.store_charwidthib(char_for_backslashc)|'0'..'9'asc->letget_digit()=letc=Scanning.next_charibinmatchcwith|'0'..'9'asc->c|c->bad_input_escapecinletc0=cinletc1=get_digit()inletc2=get_digit()inScanning.store_char(width-2)ib(char_for_decimal_codec0c1c2)|'x'->letget_digit()=letc=Scanning.next_charibinmatchcwith|'0'..'9'|'A'..'F'|'a'..'f'asc->c|c->bad_input_escapecinletc1=get_digit()inletc2=get_digit()inScanning.store_char(width-2)ib(char_for_hexadecimal_codec1c2)|c->bad_input_escapec(* Scan a character (an OCaml token). *)letscan_caml_charwidthib=letrecfind_startwidth=matchScanning.checked_peek_charibwith|'\''->find_char(Scanning.ignore_charwidthib)|c->character_mismatch'\''candfind_charwidth=matchcheck_next_char_for_charwidthibwith|'\\'->find_stop(scan_backslash_char(Scanning.ignore_charwidthib)ib)|c->find_stop(Scanning.store_charwidthibc)andfind_stopwidth=matchcheck_next_char_for_charwidthibwith|'\''->Scanning.ignore_charwidthib|c->character_mismatch'\''cinfind_startwidth(* Scan a delimited string (an OCaml token). *)letscan_caml_stringwidthib=letrecfind_startwidth=matchScanning.checked_peek_charibwith|'\"'->find_stop(Scanning.ignore_charwidthib)|c->character_mismatch'\"'candfind_stopwidth=matchcheck_next_char_for_stringwidthibwith|'\"'->Scanning.ignore_charwidthib|'\\'->scan_backslash(Scanning.ignore_charwidthib)|c->find_stop(Scanning.store_charwidthibc)andscan_backslashwidth=matchcheck_next_char_for_stringwidthibwith|'\r'->skip_newline(Scanning.ignore_charwidthib)|'\n'->skip_spaces(Scanning.ignore_charwidthib)|_->find_stop(scan_backslash_charwidthib)andskip_newlinewidth=matchcheck_next_char_for_stringwidthibwith|'\n'->skip_spaces(Scanning.ignore_charwidthib)|_->find_stop(Scanning.store_charwidthib'\r')andskip_spaceswidth=matchcheck_next_char_for_stringwidthibwith|' '->skip_spaces(Scanning.ignore_charwidthib)|_->find_stopwidthinfind_startwidth(* Scan a boolean (an OCaml token). *)letscan_boolib=letc=Scanning.checked_peek_charibinletm=matchcwith|'t'->4|'f'->5|c->bad_input(Printf.sprintf"the character %C cannot start a boolean"c)inscan_stringNonemib(* Scan a string containing elements in char_set and terminated by scan_indic
if provided. *)letscan_chars_in_char_setchar_setscan_indicwidthib=letrecscan_charsistp=letc=Scanning.peek_charibinifi>0&¬(Scanning.eofib)&&is_in_char_setchar_setc&&int_of_charc<>stpthenlet_=Scanning.store_charmax_intibcinscan_chars(i-1)stpinmatchscan_indicwith|None->scan_charswidth(-1);|Somec->scan_charswidth(int_of_charc);ifnot(Scanning.eofib)thenletci=Scanning.peek_charibinifc=cithenScanning.invalidate_current_charibelsecharacter_mismatchcci(* The global error report function for [Scanf]. *)letscanf_bad_inputib=function|Scan_failures|Failures->leti=Scanning.char_countibinbad_input(Printf.sprintf"scanf: bad input at char number %i: %s"is)|x->raisex(* Get the content of a counter from an input buffer. *)letget_counteribcounter=matchcounterwith|Line_counter->Scanning.line_countib|Char_counter->Scanning.char_countib|Token_counter->Scanning.token_countib(* Compute the width of a padding option (see "%42{" and "%123("). *)letwidth_of_pad_optpad_opt=matchpad_optwith|None->max_int|Somewidth->widthletstopper_of_formatting_litfmting=iffmting=Escaped_percentthen'%',""elseletstr=string_of_formatting_litfmtinginletstp=str.[1]inletsub_str=String.substr2(String.lengthstr-2)instp,sub_str(******************************************************************************)(* Reader management *)(* A call to take_format_readers on a format is evaluated into functions
taking readers as arguments and aggregate them into an heterogeneous list *)(* When all readers are taken, finally pass the list of the readers to the
continuation k. *)letrectake_format_readers:typeacdef.((d,e)heter_list->e)->(a,Scanning.in_channel,c,d,e,f)fmt->d=funkfmt->matchfmtwith|Readerfmt_rest->funreader->letnew_kreaders_rest=k(Cons(reader,readers_rest))intake_format_readersnew_kfmt_rest|Charrest->take_format_readerskrest|Caml_charrest->take_format_readerskrest|String(_,rest)->take_format_readerskrest|Caml_string(_,rest)->take_format_readerskrest|Int(_,_,_,rest)->take_format_readerskrest|Int32(_,_,_,rest)->take_format_readerskrest|Nativeint(_,_,_,rest)->take_format_readerskrest|Int64(_,_,_,rest)->take_format_readerskrest|Float(_,_,_,rest)->take_format_readerskrest|Bool(_,rest)->take_format_readerskrest|Alpharest->take_format_readerskrest|Thetarest->take_format_readerskrest|Flushrest->take_format_readerskrest|String_literal(_,rest)->take_format_readerskrest|Char_literal(_,rest)->take_format_readerskrest|Custom(_,_,rest)->take_format_readerskrest|Scan_char_set(_,_,rest)->take_format_readerskrest|Scan_get_counter(_,rest)->take_format_readerskrest|Scan_next_charrest->take_format_readerskrest|Formatting_lit(_,rest)->take_format_readerskrest|Formatting_gen(Open_tag(Format(fmt,_)),rest)->take_format_readersk(concat_fmtfmtrest)|Formatting_gen(Open_box(Format(fmt,_)),rest)->take_format_readersk(concat_fmtfmtrest)|Format_arg(_,_,rest)->take_format_readerskrest|Format_subst(_,fmtty,rest)->take_fmtty_format_readersk(erase_rel(symmfmtty))rest|Ignored_param(ign,rest)->take_ignored_format_readerskignrest|End_of_format->kNil(* Take readers associated to an fmtty coming from a Format_subst "%(...%)". *)andtake_fmtty_format_readers:typexyacdef.((d,e)heter_list->e)->(a,Scanning.in_channel,c,d,x,y)fmtty->(y,Scanning.in_channel,c,x,e,f)fmt->d=funkfmttyfmt->matchfmttywith|Reader_tyfmt_rest->funreader->letnew_kreaders_rest=k(Cons(reader,readers_rest))intake_fmtty_format_readersnew_kfmt_restfmt|Ignored_reader_tyfmt_rest->funreader->letnew_kreaders_rest=k(Cons(reader,readers_rest))intake_fmtty_format_readersnew_kfmt_restfmt|Char_tyrest->take_fmtty_format_readerskrestfmt|String_tyrest->take_fmtty_format_readerskrestfmt|Int_tyrest->take_fmtty_format_readerskrestfmt|Int32_tyrest->take_fmtty_format_readerskrestfmt|Nativeint_tyrest->take_fmtty_format_readerskrestfmt|Int64_tyrest->take_fmtty_format_readerskrestfmt|Float_tyrest->take_fmtty_format_readerskrestfmt|Bool_tyrest->take_fmtty_format_readerskrestfmt|Alpha_tyrest->take_fmtty_format_readerskrestfmt|Theta_tyrest->take_fmtty_format_readerskrestfmt|Any_tyrest->take_fmtty_format_readerskrestfmt|Format_arg_ty(_,rest)->take_fmtty_format_readerskrestfmt|End_of_fmtty->take_format_readerskfmt|Format_subst_ty(ty1,ty2,rest)->letty=trans(symmty1)ty2intake_fmtty_format_readersk(concat_fmttytyrest)fmt(* Take readers associated to an ignored parameter. *)andtake_ignored_format_readers:typexyacdef.((d,e)heter_list->e)->(a,Scanning.in_channel,c,d,x,y)ignored->(y,Scanning.in_channel,c,x,e,f)fmt->d=funkignfmt->matchignwith|Ignored_reader->funreader->letnew_kreaders_rest=k(Cons(reader,readers_rest))intake_format_readersnew_kfmt|Ignored_char->take_format_readerskfmt|Ignored_caml_char->take_format_readerskfmt|Ignored_string_->take_format_readerskfmt|Ignored_caml_string_->take_format_readerskfmt|Ignored_int(_,_)->take_format_readerskfmt|Ignored_int32(_,_)->take_format_readerskfmt|Ignored_nativeint(_,_)->take_format_readerskfmt|Ignored_int64(_,_)->take_format_readerskfmt|Ignored_float(_,_)->take_format_readerskfmt|Ignored_bool_->take_format_readerskfmt|Ignored_format_arg_->take_format_readerskfmt|Ignored_format_subst(_,fmtty)->take_fmtty_format_readerskfmttyfmt|Ignored_scan_char_set_->take_format_readerskfmt|Ignored_scan_get_counter_->take_format_readerskfmt|Ignored_scan_next_char->take_format_readerskfmt(******************************************************************************)(* Generic scanning *)(* Make a generic scanning function. *)(* Scan a stream according to a format and readers obtained by
take_format_readers, and aggregate scanned values into an
heterogeneous list. *)(* Return the heterogeneous list of scanned values. *)letrecmake_scanf:typeacdef.Scanning.in_channel->(a,Scanning.in_channel,c,d,e,f)fmt->(d,e)heter_list->(a,f)heter_list=funibfmtreaders->matchfmtwith|Charrest->let_=scan_char0ibinletc=token_charibinCons(c,make_scanfibrestreaders)|Caml_charrest->let_=scan_caml_char0ibinletc=token_charibinCons(c,make_scanfibrestreaders)|String(pad,Formatting_lit(fmting_lit,rest))->letstp,str=stopper_of_formatting_litfmting_litinletscanwidth_ib=scan_string(Somestp)widthibinletstr_rest=String_literal(str,rest)inpad_prec_scanfibstr_restreaderspadNo_precisionscantoken_string|String(pad,Formatting_gen(Open_tag(Format(fmt',_)),rest))->letscanwidth_ib=scan_string(Some'{')widthibinpad_prec_scanfib(concat_fmtfmt'rest)readerspadNo_precisionscantoken_string|String(pad,Formatting_gen(Open_box(Format(fmt',_)),rest))->letscanwidth_ib=scan_string(Some'[')widthibinpad_prec_scanfib(concat_fmtfmt'rest)readerspadNo_precisionscantoken_string|String(pad,rest)->letscanwidth_ib=scan_stringNonewidthibinpad_prec_scanfibrestreaderspadNo_precisionscantoken_string|Caml_string(pad,rest)->letscanwidth_ib=scan_caml_stringwidthibinpad_prec_scanfibrestreaderspadNo_precisionscantoken_string|Int(iconv,pad,prec,rest)->letc=integer_conversion_of_char(char_of_iconviconv)inletscanwidth_ib=scan_int_conversioncwidthibinpad_prec_scanfibrestreaderspadprecscan(token_intc)|Int32(iconv,pad,prec,rest)->letc=integer_conversion_of_char(char_of_iconviconv)inletscanwidth_ib=scan_int_conversioncwidthibinpad_prec_scanfibrestreaderspadprecscan(token_int32c)|Nativeint(iconv,pad,prec,rest)->letc=integer_conversion_of_char(char_of_iconviconv)inletscanwidth_ib=scan_int_conversioncwidthibinpad_prec_scanfibrestreaderspadprecscan(token_nativeintc)|Int64(iconv,pad,prec,rest)->letc=integer_conversion_of_char(char_of_iconviconv)inletscanwidth_ib=scan_int_conversioncwidthibinpad_prec_scanfibrestreaderspadprecscan(token_int64c)|Float((_,(Float_F|Float_CF)),pad,prec,rest)->pad_prec_scanfibrestreaderspadprecscan_caml_floattoken_float|Float((_,(Float_f|Float_e|Float_E|Float_g|Float_G)),pad,prec,rest)->pad_prec_scanfibrestreaderspadprecscan_floattoken_float|Float((_,(Float_h|Float_H)),pad,prec,rest)->pad_prec_scanfibrestreaderspadprecscan_hex_floattoken_float|Bool(pad,rest)->letscan__ib=scan_boolibinpad_prec_scanfibrestreaderspadNo_precisionscantoken_bool|Alpha_->invalid_arg"scanf: bad conversion \"%a\""|Theta_->invalid_arg"scanf: bad conversion \"%t\""|Custom_->invalid_arg"scanf: bad conversion \"%?\" (custom converter)"|Readerfmt_rest->beginmatchreaderswith|Cons(reader,readers_rest)->letx=readeribinCons(x,make_scanfibfmt_restreaders_rest)|Nil->invalid_arg"scanf: missing reader"end|Flushrest->ifScanning.end_of_inputibthenmake_scanfibrestreaderselsebad_input"end of input not found"|String_literal(str,rest)->String.iter(check_charib)str;make_scanfibrestreaders|Char_literal(chr,rest)->check_charibchr;make_scanfibrestreaders|Format_arg(pad_opt,fmtty,rest)->let_=scan_caml_string(width_of_pad_optpad_opt)ibinlets=token_stringibinletfmt=tryformat_of_string_fmttysfmttywithFailuremsg->bad_inputmsginCons(fmt,make_scanfibrestreaders)|Format_subst(pad_opt,fmtty,rest)->let_=scan_caml_string(width_of_pad_optpad_opt)ibinlets=token_stringibinletfmt,fmt'=tryletFmt_EBBfmt=fmt_ebb_of_stringsinletFmt_EBBfmt'=fmt_ebb_of_stringsin(* TODO: find a way to avoid reparsing twice *)(* TODO: these type-checks below *can* fail because of type
ambiguity in presence of ignored-readers: "%_r%d" and "%d%_r"
are typed in the same way.
# Scanf.sscanf "\"%_r%d\"3" "%(%d%_r%)" ignore
(fun fmt n -> string_of_format fmt, n)
Exception: CamlinternalFormat.Type_mismatch.
We should properly catch this exception.
*)type_formatfmt(erase_relfmtty),type_formatfmt'(erase_rel(symmfmtty))withFailuremsg->bad_inputmsginCons(Format(fmt,s),make_scanfib(concat_fmtfmt'rest)readers)|Scan_char_set(width_opt,char_set,Formatting_lit(fmting_lit,rest))->letstp,str=stopper_of_formatting_litfmting_litinletwidth=width_of_pad_optwidth_optinscan_chars_in_char_setchar_set(Somestp)widthib;lets=token_stringibinletstr_rest=String_literal(str,rest)inCons(s,make_scanfibstr_restreaders)|Scan_char_set(width_opt,char_set,rest)->letwidth=width_of_pad_optwidth_optinscan_chars_in_char_setchar_setNonewidthib;lets=token_stringibinCons(s,make_scanfibrestreaders)|Scan_get_counter(counter,rest)->letcount=get_counteribcounterinCons(count,make_scanfibrestreaders)|Scan_next_charrest->letc=Scanning.checked_peek_charibinCons(c,make_scanfibrestreaders)|Formatting_lit(formatting_lit,rest)->String.iter(check_charib)(string_of_formatting_litformatting_lit);make_scanfibrestreaders|Formatting_gen(Open_tag(Format(fmt',_)),rest)->check_charib'@';check_charib'{';make_scanfib(concat_fmtfmt'rest)readers|Formatting_gen(Open_box(Format(fmt',_)),rest)->check_charib'@';check_charib'[';make_scanfib(concat_fmtfmt'rest)readers|Ignored_param(ign,rest)->letParam_format_EBBfmt'=param_format_of_ignored_formatignrestinbeginmatchmake_scanfibfmt'readerswith|Cons(_,arg_rest)->arg_rest|Nil->assertfalseend|End_of_format->Nil(* Case analysis on padding and precision. *)(* Reject formats containing "%*" or "%.*". *)(* Pass padding and precision to the generic scanner `scan'. *)andpad_prec_scanf:typeacdefxyzt.Scanning.in_channel->(a,Scanning.in_channel,c,d,e,f)fmt->(d,e)heter_list->(x,y)padding->(y,z->a)precision->(int->int->Scanning.in_channel->t)->(Scanning.in_channel->z)->(x,f)heter_list=funibfmtreaderspadprecscantoken->matchpad,precwith|No_padding,No_precision->let_=scanmax_intmax_intibinletx=tokenibinCons(x,make_scanfibfmtreaders)|No_padding,Lit_precisionp->let_=scanmax_intpibinletx=tokenibinCons(x,make_scanfibfmtreaders)|Lit_padding((Right|Zeros),w),No_precision->let_=scanwmax_intibinletx=tokenibinCons(x,make_scanfibfmtreaders)|Lit_padding((Right|Zeros),w),Lit_precisionp->let_=scanwpibinletx=tokenibinCons(x,make_scanfibfmtreaders)|Lit_padding(Left,_),_->invalid_arg"scanf: bad conversion \"%-\""|Lit_padding((Right|Zeros),_),Arg_precision->invalid_arg"scanf: bad conversion \"%*\""|Arg_padding_,_->invalid_arg"scanf: bad conversion \"%*\""|No_padding,Arg_precision->invalid_arg"scanf: bad conversion \"%*\""(******************************************************************************)(* Defining [scanf] and various flavors of [scanf] *)letkscanf_genibefaf(Format(fmt,str))=letrecapply:typeab.a->(a,b)heter_list->b=funfargs->matchargswith|Cons(x,r)->apply(fx)r|Nil->finletkreadersf=Scanning.reset_tokenib;matchmake_scanfibfmtreaderswith|exception(Scan_failure_|Failure_|End_of_fileasexc)->efibexc|exceptionInvalid_argumentmsg->invalid_arg(msg^" in format \""^String.escapedstr^"\"")|args->af(applyfargs)intake_format_readerskfmtletkscanfibeffmt=kscanf_genibef(funx->x)fmtletkscanf_optibfmt=kscanf_genib(fun__->None)(funx->Somex)fmt(***)letkbscanf=kscanfletbscanfibfmt=kbscanfibscanf_bad_inputfmtletbscanf_optibfmt=kscanf_optibfmtletksscanfseffmt=kbscanf(Scanning.from_strings)effmtletsscanfsfmt=kbscanf(Scanning.from_strings)scanf_bad_inputfmtletsscanf_optsfmt=kscanf_opt(Scanning.from_strings)fmtletscanffmt=kscanfScanning.stdinscanf_bad_inputfmtletscanf_optfmt=kscanf_optScanning.stdinfmt(***)(* Scanning format strings. *)letbscanf_format:Scanning.in_channel->('a,'b,'c,'d,'e,'f)format6->(('a,'b,'c,'d,'e,'f)format6->'g)->'g=funibformatf->let_=scan_caml_stringmax_intibinletstr=token_stringibinletfmt'=tryformat_of_string_formatstrformatwithFailuremsg->bad_inputmsginffmt'letsscanf_format:string->('a,'b,'c,'d,'e,'f)format6->(('a,'b,'c,'d,'e,'f)format6->'g)->'g=funsformatf->bscanf_format(Scanning.from_strings)formatfletformat_from_stringsfmt=sscanf_format("\""^String.escapeds^"\"")fmt(funx->x)letunescapeds=sscanf("\""^s^"\"")"%S%!"(funx->x)