Module Zed_utf8
type t = string
Type of UTF-8 encoded strings.
exception Invalid of string * string
Invalid(error, text)
Exception raised when an invalid UTF-8
encoded string is encountered. text
is the faulty text and
error
is a description of the first error in text
.
exception Out_of_bounds
Exception raised when trying to access a character which is
outside the bounds of a string.
type check_result =
| Correct of int (* The string is correctly UTF-8 encoded, and the paramter is
the length of the string. *)
| Message of string (* The string is invalid and the parameter is an error
message. *)
Result of cheking a string for correct UTF-8.
val check : t -> check_result
check str
checks that str
is a valid UTF-8 encoded
string.
val validate : t -> int
Same as check but raises an exception in case the argument is
not a valid text, otherwise returns the length of the string.
val next_error : t -> int -> (int * int * string)
next_error str ofs
returns (ofs', count, msg)
where ofs'
is the offset of the start of the first invalid sequence after
ofs
(inclusive) in str
, count
is the number of unicode
character between ofs
and ofs'
(exclusive) and msg
is an
error message. If there is no error until the end of string then
ofs
is String.length str
and msg
is the empty string.
val singleton : CamomileLibrary.UChar.t -> t
singleton ch
creates a string of length 1 containing only the
given character.
val make : int -> CamomileLibrary.UChar.t -> t
make n ch
creates a string of length n
filled with ch
.
val init : int -> int -> CamomileLibrary.UChar.t -> t
init n f
returns the contenation of singleton (f 0)
,
singleton (f 1)
, ..., singleton (f (n - 1))
.
val rev_init : int -> int -> CamomileLibrary.UChar.t -> t
rev_init n f
returns the contenation of singleton (f (n -
1))
, ..., singleton (f 1)
, singleton (f 0)
.
val length : t -> int
Returns the length of the given string.
val compare : t -> t -> int
Compares two strings (in code point order).
val get : t -> int -> CamomileLibrary.UChar.t
get str idx
returns the character at index idx
in
str
.
val sub : t -> int -> int -> t
sub str ofs len
Returns the sub-string of str
starting at
ofs
and of length len
.
val break : t -> int -> (t * t)
break str pos
returns the sub-strings before and after pos
in str
. It is more efficient than creating two sub-strings
with sub.
val before : t -> int -> t
before str pos
returns the sub-string before pos
in str
val after : t -> int -> t
after str pos
returns the sub-string after pos
in str
val insert : t -> int -> t -> t
insert str pos sub
inserts sub
in str
at position
pos
.
val remove : t -> int -> int -> t
remove str pos len
removes the len
characters at position
pos
in str
val replace : t -> int -> int -> t -> t
replace str pos len repl
replaces the len
characters at
position pos
in str
by repl
.
val rev : t -> t
rev str
reverses all characters of str
.
val concat : t -> t list -> t
concat sep l
returns the concatenation of all strings of l
separated by sep
.
val rev_concat : t -> t list -> t
concat sep l
returns the concatenation of all strings of l
in reverse order separated by sep
.
val explode : t -> CamomileLibrary.UChar.t list
explode str
returns the list of all characters of str
.
val rev_explode : t -> CamomileLibrary.UChar.t list
rev_explode str
returns the list of all characters of str
in
reverse order.
val implode : CamomileLibrary.UChar.t list -> t
implode l
returns the concatenation of all characters of l
.
val rev_implode : CamomileLibrary.UChar.t list -> t
rev_implode l
is the same as implode (List.rev l)
but more
efficient.
val iter : CamomileLibrary.UChar.t -> unit -> t -> unit
iter f str
applies f
an all characters of str
starting
from the left.
val rev_iter : CamomileLibrary.UChar.t -> unit -> t -> unit
rev_iter f str
applies f
an all characters of str
starting
from the right.
val fold : CamomileLibrary.UChar.t -> 'a -> 'a -> t -> 'a -> 'a
fold f str acc
applies f
on all characters of str
starting from the left, accumulating a value.
val rev_fold : CamomileLibrary.UChar.t -> 'a -> 'a -> t -> 'a -> 'a
rev_fold f str acc
applies f
on all characters of str
starting from the right, accumulating a value.
val map : CamomileLibrary.UChar.t -> CamomileLibrary.UChar.t -> t -> t
map f str
maps all characters of str
with f
.
val rev_map : CamomileLibrary.UChar.t -> CamomileLibrary.UChar.t -> t -> t
rev_map f str
maps all characters of str
with f
in reverse
order.
val map_concat : CamomileLibrary.UChar.t -> t -> t -> t
map f str
maps all characters of str
with f
and
concatenate the result.
val rev_map_concat : CamomileLibrary.UChar.t -> t -> t -> t
rev_map f str
maps all characters of str
with f
in reverse
order and concatenate the result.
val filter : CamomileLibrary.UChar.t -> bool -> t -> t
filter f str
filters characters of str
with f
.
val rev_filter : CamomileLibrary.UChar.t -> bool -> t -> t
rev_filter f str
filters characters of str
with f
in
reverse order.
val filter_map : CamomileLibrary.UChar.t -> CamomileLibrary.UChar.t option -> t -> t
filter_map f str
filters and maps characters of str
with
f
.
val rev_filter_map : CamomileLibrary.UChar.t -> CamomileLibrary.UChar.t option -> t -> t
rev_filter_map f str
filters and maps characters of str
with
f
in reverse order.
val filter_map_concat : CamomileLibrary.UChar.t -> t option -> t -> t
filter_map f str
filters and maps characters of str
with f
and concatenate the result.
val rev_filter_map_concat : CamomileLibrary.UChar.t -> t option -> t -> t
rev_filter_map f str
filters and maps characters of str
with
f
in reverse order and concatenate the result.
val for_all : CamomileLibrary.UChar.t -> bool -> t -> bool
for_all f text
returns whether all characters of text
verify
the predicate f
.
val exists : CamomileLibrary.UChar.t -> bool -> t -> bool
exists f text
returns whether at least one character of text
verify f
.
val count : CamomileLibrary.UChar.t -> bool -> t -> int
count f text
returhs the number of characters of text
verifying f
.
val contains : t -> t -> bool
contains text sub
returns whether sub
appears in text
val starts_with : t -> t -> bool
starts_with text prefix
returns true
iff s
starts with
prefix
.
val ends_with : t -> t -> bool
ends_with text suffix
returns true
iff s
ends with
suffix
.
val strip : ?predicate:CamomileLibrary.UChar.t -> bool -> t -> t
strip ?predicate text
returns text
without its firsts and
lasts characters that match predicate
. predicate
default to
testing whether the given character has the `White_Space
unicode property. For example: strip "\n foo\n " = "foo"
val lstrip : ?predicate:CamomileLibrary.UChar.t -> bool -> t -> t
lstrip ?predicate text
is the same as strip but it only
removes characters at the left of text
.
val rstrip : ?predicate:CamomileLibrary.UChar.t -> bool -> t -> t
lstrip ?predicate text
is the same as strip but it only
removes characters at the right of text
.
val lchop : t -> t
lchop t
returns t
without is first character. Returns ""
if t = ""
val rchop : t -> t
rchop t
returns t
without is last character. Returns ""
if
t = ""
.
val add : Buffer.t -> CamomileLibrary.UChar.t -> unit
add buf ch
is the same as Buffer.add_string buf (singleton
ch)
but is more efficient.
val escaped_char : CamomileLibrary.UChar.t -> t
escaped_char ch
returns a string containg ch
or an escaped
version of ch
if:ch
is a control character (code < 32)ch
is the character with code 127ch
is a non-ascii, non-alphabetic character
\xXX
, \uXXXX
, \UXXXXXX
or a specific
escape sequence \n, \r, ...
.
val add_escaped_char : Buffer.t -> CamomileLibrary.UChar.t -> unit
add_escaped_char buf ch
is the same as Buffer.add_string buf
(escaped_char ch)
but a bit more efficient.
val escaped : t -> t
escaped text
escape all characters of text
as with
escape_char
.
val escaped_string : CamomileLibraryDyn.Camomile.CharEncoding.t -> string -> t
escaped_string enc str
escape the string str
which is
encoded with encoding enc
. If decoding str
with enc
fails,
it escape all non-printable bytes of str
with the syntax
\yAB
.
val add_escaped_string : Buffer.t -> CamomileLibraryDyn.Camomile.CharEncoding.t -> string -> unit
add_escaped_char buf enc text
is the same as
Buffer.add_string buf (escaped_string enc text)
but a bit more
efficient.
val next : t -> int -> int
next str ofs
returns the offset of the next character in
str
.
val prev : t -> int -> int
prev str ofs
returns the offset of the previous character in
str
.
val extract : t -> int -> CamomileLibrary.UChar.t
extract str ofs
returns the code-point at offset ofs
in
str
.
val extract_next : t -> int -> (CamomileLibrary.UChar.t * int)
extract_next str ofs
returns the code-point at offset ofs
in
str
and the offset the next character.
val extract_prev : t -> int -> (CamomileLibrary.UChar.t * int)
extract_prev str ofs
returns the code-point at the previous
offset in str
and this offset.
val unsafe_next : t -> int -> int
unsafe_next str ofs
returns the offset of the next character
in str
.
val unsafe_prev : t -> int -> int
unsafe_prev str ofs
returns the offset of the previous
character in str
.
val unsafe_extract : t -> int -> CamomileLibrary.UChar.t
unsafe_extract str ofs
returns the code-point at offset ofs
in str
.
val unsafe_extract_next : t -> int -> (CamomileLibrary.UChar.t * int)
unsafe_extract_next str ofs
returns the code-point at offset
ofs
in str
and the offset the next character.
val unsafe_extract_prev : t -> int -> (CamomileLibrary.UChar.t * int)
unsafe_extract_prev str ofs
returns the code-point at the
previous offset in str
and this offset.