12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091(**************************************************************************)(* *)(* OCaml *)(* *)(* Daniel C. Buenzli *)(* *)(* Copyright 2014 Institut National de Recherche en Informatique et *)(* en Automatique. *)(* *)(* All rights reserved. This file is distributed under the terms of *)(* the GNU Lesser General Public License version 2.1, with the *)(* special exception on linking described in the file LICENSE. *)(* *)(**************************************************************************)externalformat_int:string->int->string="caml_format_int"leterr_no_pred="U+0000 has no predecessor"leterr_no_succ="U+10FFFF has no successor"leterr_not_svi=format_int"%X"i^" is not an Unicode scalar value"leterr_not_latin1u="U+"^format_int"%04X"u^" is not a latin1 character"typet=intletmin=0x0000letmax=0x10FFFFletlo_bound=0xD7FFlethi_bound=0xE000letbom=0xFEFFletrep=0xFFFDletsuccu=ifu=lo_boundthenhi_boundelseifu=maxtheninvalid_argerr_no_succelseu+1letpredu=ifu=hi_boundthenlo_boundelseifu=mintheninvalid_argerr_no_predelseu-1letis_validi=(min<=i&&i<=lo_bound)||(hi_bound<=i&&i<=max)letof_inti=ifis_validithenielseinvalid_arg(err_not_svi)externalunsafe_of_int:int->t="%identity"externalto_int:t->int="%identity"letis_charu=u<256letof_charc=Char.codecletto_charu=ifu>255theninvalid_arg(err_not_latin1u)elseChar.unsafe_chruletunsafe_to_char=Char.unsafe_chrletequal:int->int->bool=(=)letcompare:int->int->int=Stdlib.comparelethash=to_int(* UTF codecs tools *)typeutf_decode=int(* This is an int [0xDUUUUUU] decomposed as follows:
- [D] is four bits for decode information, the highest bit is set if the
decode is valid. The three lower bits indicate the number of elements
from the source that were consumed by the decode.
- [UUUUUU] is the decoded Unicode character or the Unicode replacement
character U+FFFD if for invalid decodes. *)letvalid_bit=27letdecode_bits=24let[@inline]utf_decode_is_validd=(dlsrvalid_bit)=1let[@inline]utf_decode_lengthd=(dlsrdecode_bits)land0b111let[@inline]utf_decode_uchard=unsafe_of_int(dland0xFFFFFF)let[@inline]utf_decodenu=((8lorn)lsldecode_bits)lor(to_intu)let[@inline]utf_decode_invalidn=(nlsldecode_bits)lorrepletutf_8_byte_lengthu=matchto_intuwith|uwhenu<0->assertfalse|uwhenu<=0x007F->1|uwhenu<=0x07FF->2|uwhenu<=0xFFFF->3|uwhenu<=0x10FFFF->4|_->assertfalseletutf_16_byte_lengthu=matchto_intuwith|uwhenu<0->assertfalse|uwhenu<=0xFFFF->2|uwhenu<=0x10FFFF->4|_->assertfalse