123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276(**************************************************************************)(* *)(* OCaml *)(* *)(* Damien Doligez, projet Gallium, INRIA Rocquencourt *)(* *)(* Copyright 2014 Institut National de Recherche en Informatique et *)(* en Automatique. *)(* *)(* All rights reserved. This file is distributed under the terms of *)(* the GNU Lesser General Public License version 2.1, with the *)(* special exception on linking described in the file LICENSE. *)(* *)(**************************************************************************)(* String operations, based on byte sequence operations *)(* WARNING: Some functions in this file are duplicated in bytes.ml for
efficiency reasons. When you modify the one in this file you need to
modify its duplicate in bytes.ml.
These functions have a "duplicated" comment above their definition.
*)externallength:string->int="%string_length"externalget:string->int->char="%string_safe_get"externalunsafe_get:string->int->char="%string_unsafe_get"externalunsafe_blit:string->int->bytes->int->int->unit="caml_blit_string"[@@noalloc]moduleB=Bytesletbts=B.unsafe_to_stringletbos=B.unsafe_of_stringletmakenc=B.makenc|>btsletinitnf=B.initnf|>btsletempty=""letof_bytes=B.to_stringletto_bytes=B.of_stringletsubsofslen=B.sub(boss)ofslen|>btsletblit=B.blit_stringletensure_ge(x:int)y=ifx>=ythenxelseinvalid_arg"String.concat"letrecsum_lengthsaccseplen=function|[]->acc|hd::[]->lengthhd+acc|hd::tl->sum_lengths(ensure_ge(lengthhd+seplen+acc)acc)seplentlletrecunsafe_blitsdstpossepseplen=function[]->dst|hd::[]->unsafe_blithd0dstpos(lengthhd);dst|hd::tl->unsafe_blithd0dstpos(lengthhd);unsafe_blitsep0dst(pos+lengthhd)seplen;unsafe_blitsdst(pos+lengthhd+seplen)sepseplentlletconcatsep=function[]->""|l->letseplen=lengthsepinbts@@unsafe_blits(B.create(sum_lengths0seplenl))0sepseplenlletcat=(^)(* duplicated in bytes.ml *)letiterfs=fori=0tolengths-1dof(unsafe_getsi)done(* duplicated in bytes.ml *)letiterifs=fori=0tolengths-1dofi(unsafe_getsi)doneletmapfs=B.mapf(boss)|>btsletmapifs=B.mapif(boss)|>btsletfold_rightfxa=B.fold_rightf(bosx)aletfold_leftfax=B.fold_leftfa(bosx)letexistsfs=B.existsf(boss)letfor_allfs=B.for_allf(boss)(* Beware: we cannot use B.trim or B.escape because they always make a
copy, but String.mli spells out some cases where we are not allowed
to make a copy. *)letis_space=function|' '|'\012'|'\n'|'\r'|'\t'->true|_->falselettrims=ifs=""thenselseifis_space(unsafe_gets0)||is_space(unsafe_gets(lengths-1))thenbts(B.trim(boss))elsesletescapeds=letb=bossin(* We satisfy [unsafe_escape]'s precondition by passing an
immutable byte sequence [b]. *)bts(B.unsafe_escapeb)(* duplicated in bytes.ml *)letrecindex_recslimic=ifi>=limthenraiseNot_foundelseifunsafe_getsi=cthenielseindex_recslim(i+1)c(* duplicated in bytes.ml *)letindexsc=index_recs(lengths)0c(* duplicated in bytes.ml *)letrecindex_rec_optslimic=ifi>=limthenNoneelseifunsafe_getsi=cthenSomeielseindex_rec_optslim(i+1)c(* duplicated in bytes.ml *)letindex_optsc=index_rec_opts(lengths)0c(* duplicated in bytes.ml *)letindex_fromsic=letl=lengthsinifi<0||i>ltheninvalid_arg"String.index_from / Bytes.index_from"elseindex_recslic(* duplicated in bytes.ml *)letindex_from_optsic=letl=lengthsinifi<0||i>ltheninvalid_arg"String.index_from_opt / Bytes.index_from_opt"elseindex_rec_optslic(* duplicated in bytes.ml *)letrecrindex_recsic=ifi<0thenraiseNot_foundelseifunsafe_getsi=cthenielserindex_recs(i-1)c(* duplicated in bytes.ml *)letrindexsc=rindex_recs(lengths-1)c(* duplicated in bytes.ml *)letrindex_fromsic=ifi<-1||i>=lengthstheninvalid_arg"String.rindex_from / Bytes.rindex_from"elserindex_recsic(* duplicated in bytes.ml *)letrecrindex_rec_optsic=ifi<0thenNoneelseifunsafe_getsi=cthenSomeielserindex_rec_opts(i-1)c(* duplicated in bytes.ml *)letrindex_optsc=rindex_rec_opts(lengths-1)c(* duplicated in bytes.ml *)letrindex_from_optsic=ifi<-1||i>=lengthstheninvalid_arg"String.rindex_from_opt / Bytes.rindex_from_opt"elserindex_rec_optsic(* duplicated in bytes.ml *)letcontains_fromsic=letl=lengthsinifi<0||i>ltheninvalid_arg"String.contains_from / Bytes.contains_from"elsetryignore(index_recslic);truewithNot_found->false(* duplicated in bytes.ml *)letcontainssc=contains_froms0c(* duplicated in bytes.ml *)letrcontains_fromsic=ifi<0||i>=lengthstheninvalid_arg"String.rcontains_from / Bytes.rcontains_from"elsetryignore(rindex_recsic);truewithNot_found->falseletuppercase_asciis=B.uppercase_ascii(boss)|>btsletlowercase_asciis=B.lowercase_ascii(boss)|>btsletcapitalize_asciis=B.capitalize_ascii(boss)|>btsletuncapitalize_asciis=B.uncapitalize_ascii(boss)|>bts(* duplicated in bytes.ml *)letstarts_with~prefixs=letlen_s=lengthsandlen_pre=lengthprefixinletrecauxi=ifi=len_prethentrueelseifunsafe_getsi<>unsafe_getprefixithenfalseelseaux(i+1)inlen_s>=len_pre&&aux0(* duplicated in bytes.ml *)letends_with~suffixs=letlen_s=lengthsandlen_suf=lengthsuffixinletdiff=len_s-len_sufinletrecauxi=ifi=len_sufthentrueelseifunsafe_gets(diff+i)<>unsafe_getsuffixithenfalseelseaux(i+1)indiff>=0&&aux0externalseeded_hash:int->string->int="caml_string_hash"[@@noalloc]lethashx=seeded_hash0x(* duplicated in bytes.ml *)letsplit_on_charseps=letr=ref[]inletj=ref(lengths)infori=lengths-1downto0doifunsafe_getsi=septhenbeginr:=subs(i+1)(!j-i-1)::!r;j:=ienddone;subs0!j::!rtypet=stringletcompare(x:t)(y:t)=Stdlib.comparexyexternalequal:string->string->bool="caml_string_equal"[@@noalloc](** {1 Iterators} *)letto_seqs=boss|>B.to_seqletto_seqis=boss|>B.to_seqiletof_seqg=B.of_seqg|>bts(* UTF decoders and validators *)letget_utf_8_ucharsi=B.get_utf_8_uchar(boss)iletis_valid_utf_8s=B.is_valid_utf_8(boss)letget_utf_16be_ucharsi=B.get_utf_16be_uchar(boss)iletis_valid_utf_16bes=B.is_valid_utf_16be(boss)letget_utf_16le_ucharsi=B.get_utf_16le_uchar(boss)iletis_valid_utf_16les=B.is_valid_utf_16le(boss)(** {6 Binary encoding/decoding of integers} *)externalget_uint8:string->int->int="%string_safe_get"externalget_uint16_ne:string->int->int="%caml_string_get16"externalget_int32_ne:string->int->int32="%caml_string_get32"externalget_int64_ne:string->int->int64="%caml_string_get64"letget_int8si=B.get_int8(boss)iletget_uint16_lesi=B.get_uint16_le(boss)iletget_uint16_besi=B.get_uint16_be(boss)iletget_int16_nesi=B.get_int16_ne(boss)iletget_int16_lesi=B.get_int16_le(boss)iletget_int16_besi=B.get_int16_be(boss)iletget_int32_lesi=B.get_int32_le(boss)iletget_int32_besi=B.get_int32_be(boss)iletget_int64_lesi=B.get_int64_le(boss)iletget_int64_besi=B.get_int64_be(boss)i