Module CamomileLibrary
module ConfigInt : sig
Type of configuration parametor
module type Type = sig
Configuration values
val datadir : string
Directory of compiled Unicode data
val charmapdir : string
Directory of compiled character mapping tables a la ISO
val unimapdir : string
Directory of camomile-style compiled character mapping table
val localedir : string
Directory of compiled locale data
end
end
module DefaultConfig : ConfigInt.Type
Default configuration.
module OOChannel : sig
TODO: camomile.0.8.5/camomile/CamomileLibrary.OOChannel
TODO: camomile.0.8.5/camomile/CamomileLibrary.OOChannel
TODO: camomile.0.8.5/camomile/CamomileLibrary.OOChannelTODO: camomile.0.8.5/camomile/CamomileLibrary.OOChannel
TODO: camomile.0.8.5/camomile/CamomileLibrary.OOChannel
TODO: camomile.0.8.5/camomile/CamomileLibrary.OOChannel
TODO: camomile.0.8.5/camomile/CamomileLibrary.OOChannelTODO: camomile.0.8.5/camomile/CamomileLibrary.OOChannel
TODO: camomile.0.8.5/camomile/CamomileLibrary.OOChannelTODO: camomile.0.8.5/camomile/CamomileLibrary.OOChannel
TODO: camomile.0.8.5/camomile/CamomileLibrary.OOChannelTODO: camomile.0.8.5/camomile/CamomileLibrary.OOChannel
TODO: camomile.0.8.5/camomile/CamomileLibrary.OOChannelTODO: camomile.0.8.5/camomile/CamomileLibrary.OOChannel
TODO: camomile.0.8.5/camomile/CamomileLibrary.OOChannelTODO: camomile.0.8.5/camomile/CamomileLibrary.OOChannel
TODO: camomile.0.8.5/camomile/CamomileLibrary.OOChannelTODO: camomile.0.8.5/camomile/CamomileLibrary.OOChannel
type 'a obj_input_channel = TODO: b
type 'a #obj_input_channel = TODO: b
type 'a obj_output_channel = TODO: b
type 'a #obj_output_channel = TODO: b
type 'a channel_of_stream = TODO: b
type 'a #channel_of_stream = TODO: b
type char_input_channel = TODO: a
type #char_input_channel = TODO: a
type char_output_channel = TODO: a
type #char_output_channel = TODO: a
type char_input_channel_of = TODO: a
type #char_input_channel_of = TODO: a
type char_obj_input_channel_of = TODO: a
type #char_obj_input_channel_of = TODO: a
type char_output_channel_of = TODO: a
type #char_output_channel_of = TODO: a
type char_obj_output_channel_of = TODO: a
type #char_obj_output_channel_of = TODO: a
type of_in_channel = TODO: a
type #of_in_channel = TODO: a
type of_out_channel = TODO: a
type #of_out_channel = TODO: a
end
module UChar : sig
type t
Unicode characters. All 31bit code points are allowed.
exception Out_of_range
val char_of : t -> char
char_of u
returns the Latin-1 representation of u
.
If u
can not be represented by Latin-1, raises Out_of_range
val of_char : char -> t
of_char c
returns the Unicode character of the Latin-1 character c
val code : t -> int
code u
returns the Unicode code number of u
.
If the value can not be represented by a positive integer,
raise Out_of_range
val chr : int -> t
code n
returns the Unicode character with the code number n
.
If n >= 2^32 or n < 0, raises invalid_arg
val uint_code : t -> int
uint_code u
returns the Unicode code number of u
.
The returned int is unsigned, that is, on 32-bits platforms,
the sign bit is used for storing the 31-th bit of the code number.
val chr_of_uint : int -> t
chr_of_uint n
returns the Unicode character of the code number n
.
n
is interpreted as unsigned, that is, on 32-bits platforms,
the sign bit is treated as the 31-th bit of the code number.
If n exceed 31-bits values, then raise invalid_arg
.
val eq : t -> t -> bool
Equality by code point comparison
val compare : t -> t -> int
compare u1 u2
returns,
a value > 0 if u1
has a larger Unicode code number than u2
,
0 if u1
and u2
are the same Unicode character,
a value < 0 if u1
has a smaller Unicode code number than u2
.
type uchar = t
Aliases of
type t
val int_of : uchar -> int
Alias of
uint_code
val of_int : int -> uchar
Alias of
chr_of_uint
end
module USet : sig
type t
val empty : t
val is_empty : t -> bool
val mem : UChar.t -> t -> bool
val add : UChar.t -> t -> t
val add_range : UChar.t -> UChar.t -> t -> t
add_range u1 u2 s
adds the characters in the range u1
- u2
to s
. The range is determined by the code point order.
val singleton : UChar.t -> t
val remove : UChar.t -> t -> t
val remove_range : UChar.t -> UChar.t -> t -> t
remove_range u1 u2 s
removes the characters in the range u1
- u2
from s
. The range is determined by the code point order.
val union : t -> t -> t
val inter : t -> t -> t
val diff : t -> t -> t
val compl : t -> t
compl s
returns the compliment of s
.
val compare : t -> t -> int
val equal : t -> t -> bool
val subset : t -> t -> bool
val from : UChar.t -> t -> t
from u s
returns the set of elements of s
whose code points are equal or greater than u
.
val after : UChar.t -> t -> t
after u s
returns the set of elements of s
whose code points are greater than u
.
val until : UChar.t -> t -> t
until u s
returns the set of elements of s
whose code points are equal or smaller than u
.
val before : UChar.t -> t -> t
until u s
returns the set of elements of s
whose code points are smaller than u
.
val iter : UChar.t -> unit -> t -> unit
val iter_range : UChar.t -> UChar.t -> unit -> t -> unit
iter_range proc s
feeds the intervals contained in s
to
proc
in increasing order. The intervals given to proc
are always separated by the character not in s
.
val fold : UChar.t -> 'a -> 'a -> t -> 'a -> 'a
val fold_range : UChar.t -> UChar.t -> 'a -> 'a -> t -> 'a -> 'a
fold_range f s x
is equivalent to
f u_i u_(i+1) (... (f u_3 u_4 (f u_1 u_2 x)))
if s
is consisted of
the intervals u1
-u2
, u3
-u4
, ..., u_i
-u_(i + 1)
in increasing order. The intervals given to proc
are always separated by the character not in s
.
val for_all : UChar.t -> bool -> t -> bool
val exists : UChar.t -> bool -> t -> bool
val filter : UChar.t -> bool -> t -> t
val partition : UChar.t -> bool -> t -> (t * t)
val cardinal : t -> int
val elements : t -> UChar.t list
val ranges : t -> (UChar.t * UChar.t) list
The list of the intervals contained in the set.
The returned intervals are always separated
by the character not in
s
.
val min_elt : t -> UChar.t
val max_elt : t -> UChar.t
val choose : t -> UChar.t
Returns a element roughly in the middle of the set.
It is not guaranteed to return the same element for
the sets with the same elements
val uset_of_iset : ISet.t -> t
val iset_of_uset : t -> ISet.t
end
module UMap : sig
type 'a t
Maps over Unicode characters.
val empty : 'a t
val is_empty : 'a t -> bool
val add : ?eq:'a -> 'a -> bool -> UChar.t -> 'a -> 'a t -> 'a t
add ?eq u v m
returns the new map which is same to m
except it maps u
to some value v'
which satisfies eq v v'
.
If eq
is not supplied, structural equality is used.
val add_range : ?eq:'a -> 'a -> bool -> UChar.t -> UChar.t -> 'a -> 'a t -> 'a t
add ?eq u1 u2 v m
returns the new map which is same to m
except it maps characters in the range u1
-u2
to some value v'
which satisfies eq v v'
.
If eq
is not supplied, structural equality is used.
val find : UChar.t -> 'a t -> 'a
val remove : UChar.t -> 'a t -> 'a t
val remove_range : UChar.t -> UChar.t -> 'a t -> 'a t
remove_range u1 u2 m
removes u1
-u2
from the domain of m
val from : UChar.t -> 'a t -> 'a t
from u m
restricts the domain of m
to the characters whose
code points are equal or greater than u
.
val after : UChar.t -> 'a t -> 'a t
after u m
restricts the domain of m
to the characters whose
code points are greater than u
.
val until : UChar.t -> 'a t -> 'a t
until u m
restricts the domain of m
to the characters whose
code points are equal or smaller than u
.
val before : UChar.t -> 'a t -> 'a t
before u m
restricts the domain of m
to the characters whose
code points are smaller than u
.
val mem : UChar.t -> 'a t -> bool
val iter : UChar.t -> 'a -> unit -> 'a t -> unit
val iter_range : UChar.t -> UChar.t -> 'a -> unit -> 'a t -> unit
iter proc m
: For each contingent region u1
-u2
that is mapped to a constant v
, proc u1 u2 v
is called.
The order of call is determined by increasing order on u1
.
val map : ?eq:'b -> 'b -> bool -> 'a -> 'b -> 'a t -> 'b t
val mapi : ?eq:'b -> 'b -> bool -> UChar.t -> 'a -> 'b -> 'a t -> 'b t
val fold : UChar.t -> 'b -> 'a -> 'a -> 'b t -> 'a -> 'a
val fold_range : UChar.t -> UChar.t -> 'b -> 'a -> 'a -> 'b t -> 'a -> 'a
fold_range f m x
is equivalent to
f u_(2n) u_(2n+1) v_n (... (f u_1 u_2 v_1 x))
where all characters in
the range u_(2k)
-u_(2k+1)
are mapped to v_k
and
u_1
< u_3
< ... in code point order.
For each range u_(2k)
-u_(2k+1)
is separated by a character
which is not mapped to v_k
.
val set_to_map : USet.t -> 'a -> 'a t
Constant map.
val domain : 'a t -> USet.t
Domain.
val map_to_set : 'a -> bool -> 'a t -> USet.t
map_to_set p m
returns the set of characters which are mapped
to values satisfying the predicate p
by m
.
val umap_of_imap : 'a IMap.t -> 'a t
val imap_of_umap : 'a t -> 'a IMap.t
end
module UCharTbl : sig
type 'a tbl
Fast lookup tables. Accessible by constant time.
type 'a t = 'a tbl
val get : 'a tbl -> UChar.t -> 'a
module type Type = sig
type elt
type t = elt tbl
val get : elt tbl -> UChar.t -> elt
val of_map : elt -> elt UMap.t -> t
of_map def m
creates the table which has the same value to m
.
The table returns def
for the characters for which m
is undefined.
end
module Bool : sig
Tables for boolean values.
type t
val get : t -> UChar.t -> bool
val of_set : USet.t -> t
end
module Bits : sig
Tables for small (< 256, >=0) integers
type t
val of_map : int -> int UMap.t -> t
val get : t -> UChar.t -> int
end
module Bytes : sig
Tables for integers. If integers are not span the whole 31-bit or
63-bit values,
Bytes.t
is more space efficient than int tbl
.
type t
val of_map : int -> int UMap.t -> t
val get : t -> UChar.t -> int
end
module Char : sig
Tables for bytes.
type t
val of_map : char -> char UMap.t -> t
val get : t -> UChar.t -> char
end
end
module UnicodeString : sig
module type Type = sig
type t
The type of string.
val get : t -> int -> UChar.t
get t i
: i
-th character of the storage.
val init : int -> int -> UChar.t -> t
init len f
creates a new storage.
the returned storage has length len
, its nth-element is f n
.
f
is called with integers 0 ... len - 1
, only once for each integer.
The call is in the increasing order f 0, f 1, f 2, ...
val length : t -> int
The number of Unicode characters in the storage
type index
locations in storages.
val look : t -> index -> UChar.t
look t i
: The character in the location i
of t
.
val nth : t -> int -> index
nth t n
: the location of the n
-th character in t
.
val next : t -> index -> index
val prev : t -> index -> index
val out_of_range : t -> index -> bool
val iter : UChar.t -> unit -> t -> unit
val compare : t -> t -> int
val first : t -> index
The location of the first character in the storage.
val last : t -> index
The location of the last character in the storage.
val move : t -> index -> int -> index
move t i n
:
if n
>= 0, then returns n
-th character after i
and
otherwise returns -n
-th character before i
.
If there is no such character, or i
does not point
a valid character, the result is unspecified.
val compare_index : t -> index -> index -> int
compare_index t i j
returns
a positive integer if i
is the location placed after j
in t
,
0 if i
and j
point the same location, and
a negative integer if i
is the location placed before j
in t
.
module Buf : sig
Character buffers. Similar to Buffer.
type buf
val create : int -> buf
create n
creates the buffer. n
is used to determine
the initial size of the buffer. The meaning of n
differs from
modules to modules.
val contents : buf -> t
val clear : buf -> unit
val reset : buf -> unit
val add_char : buf -> UChar.t -> unit
val add_string : buf -> t -> unit
val add_buffer : buf -> buf -> unit
end
end
end
module UText : sig
type mutability = TODO: a
Phantom type for distinguishing mutability
type 'a text
type utext = TODO: a text
type ustring = TODO: a text
type t = utext
val utext_of_ustring : ustring -> utext
val ustring_of_utext : utext -> ustring
val get : 'a text -> int -> UChar.t
val set : ustring -> int -> UChar.t -> unit
set s i u
sets the i
-th character in s
to u
.
type index
val look : 'a text -> index -> UChar.t
val nth : 'a text -> int -> index
val first : 'a text -> index
val last : 'a text -> index
val out_of_range : 'a text -> index -> bool
val compare_index : 'a text -> index -> index -> int
val next : 'a text -> index -> index
val prev : 'a text -> index -> index
val move : 'a text -> index -> int -> index
val length : 'a text -> int
val of_string : string -> utext
Conversion from Latin-1 strings.
val init : int -> int -> UChar.t -> utext
val init_ustring : int -> int -> UChar.t -> ustring
val make : int -> UChar.t -> ustring
The semantics of these function are similar to
the equivalents of string.
val copy : ustring -> ustring
val sub : 'a text -> int -> int -> 'a text
val fill : ustring -> int -> int -> UChar.t -> unit
val blit : 'a text -> int -> ustring -> int -> int -> unit
val append : 'a text -> 'b text -> 'a text
val iter : UChar.t -> unit -> 'a text -> unit
val compare : 'a text -> 'b text -> int
module Buf : sig
type buf
val create : int -> buf
create n
creates the buffer which initially can contain
n
Unicode characters.
val contents : buf -> t
val contents_string : buf -> ustring
val length : buf -> int
val clear : buf -> unit
val reset : buf -> unit
val add_char : buf -> UChar.t -> unit
val add_string : buf -> 'a text -> unit
val add_buffer : buf -> buf -> unit
end
end
module XString : sig
type xstring
type t = xstring
val get : xstring -> int -> UChar.t
val set : xstring -> int -> UChar.t -> unit
val length : xstring -> int
val init : int -> int -> UChar.t -> xstring
type index
val look : xstring -> index -> UChar.t
val nth : xstring -> int -> index
val first : xstring -> index
val last : xstring -> index
val out_of_range : xstring -> index -> bool
val next : xstring -> index -> index
val prev : xstring -> index -> index
val move : xstring -> index -> int -> index
val compare_index : xstring -> index -> index -> int
val make : ?bufsize:int -> int -> UChar.t -> xstring
val clear : xstring -> unit
val reset : xstring -> unit
val copy : xstring -> xstring
val sub : xstring -> int -> int -> xstring
val add_char : xstring -> UChar.t -> unit
val add_text : xstring -> 'a UText.text -> unit
val add_xstring : xstring -> xstring -> unit
val shrink : xstring -> int -> unit
val append : xstring -> xstring -> xstring
val utext_of : xstring -> UText.t
val ustring_of : xstring -> UText.ustring
val iter : UChar.t -> unit -> xstring -> unit
val compare : t -> t -> int
module Buf : sig
type buf
val create : int -> buf
val contents : buf -> t
val length : buf -> int
val clear : buf -> unit
val reset : buf -> unit
val add_char : buf -> UChar.t -> unit
val add_string : buf -> t -> unit
val add_buffer : buf -> buf -> unit
end
end
module SubText : sig
module type Type = sig
Sub-texts, parts of original (ur-) texts.
The signature and semantics matches those of UStorage.
type t
val get : t -> int -> UChar.t
val init : int -> int -> UChar.t -> t
val length : t -> int
type index
val look : t -> index -> UChar.t
val nth : t -> int -> index
val first : t -> index
val last : t -> index
val next : t -> index -> index
val prev : t -> index -> index
val move : t -> index -> int -> index
val out_of_range : t -> index -> bool
val compare_index : t -> index -> index -> int
val iter : UChar.t -> unit -> t -> unit
val compare : t -> t -> int
module Buf : sig
type buf
val create : int -> buf
val contents : buf -> t
val clear : buf -> unit
val reset : buf -> unit
val add_char : buf -> UChar.t -> unit
val add_string : buf -> t -> unit
val add_buffer : buf -> buf -> unit
end
type ur_text
The type of original texts.
type ur_index
The type of indexes of original texts.
val refer : ur_text -> ur_index -> ur_index -> t
refer t i j
returns the part of t
from i
until j
.
The character pointed by j
is not included in the result.
If j
is equal to i
or located before j
, the result is
an empty string.
val excerpt : t -> ur_text
excerpt t
copies the contents of t
as a new ur_text.
val context : t -> (ur_text * ur_index * ur_index)
context t
returns the tuple (s, i, j)
such that
t = refer s i j
.
val ur_index_of : t -> index -> ur_index
Conversion from indexes of sub-texts to ur_texts.
end
end
module ULine : sig
type separator = TODO: a
Line separators.
`CR
specifies carriage return.`LF
specifies linefeed.`CRLF
specifies the sequence of carriage return and linefeed.`NEL
specifies next line (\u0085).`LS
specifies Unicode line separator (\u2028).`PS
specifies Unicode paragraph separator (\u2029).
type input = TODO: a
type #input = TODO: a
type output = TODO: a
type #output = TODO: a
module type Type = sig
type text
type input_line = TODO: a
type #input_line = TODO: a
type output_line = TODO: a
type #output_line = TODO: a
end
end
module Locale : sig
type t = string
Type of locales.
val read : string -> string -> Pervasives.in_channel -> 'a -> string -> 'a
read root suffix reader locale
reads locale information using reader
.
Locale data is supposed to reside in root
directory with
the name locale
.suffix
.
reader
takes in_channel
as an argument and read data from in_channel.
If data is not found, then reader
should raise Not_found.
If the file is not found or reader
raises Not_found, then
more generic locales are tried.
For example, if fr_CA.suffix
is not found, then read
tries fr.suffix
.
If fr.suffix
is also not found, then the file root
.suffix
is tried.
Still the data is not found, then Not_found
is raised.
val contain : string -> string -> bool
contain loc1 loc2
:
If loc1
is contained in loc2
then true otherwise false.
For example, "fr" is contained in "fr_CA" while "en_CA"
does not contain "fr"
end
module UTF8 : sig
type t = string
UTF-8 encoded Unicode strings. The type is normal string.
exception Malformed_code
val validate : t -> unit
validate s
successes if s is valid UTF-8, otherwise raises Malformed_code.
Other functions assume strings are valid UTF-8, so it is prudent
to test their validity for strings from untrusted origins.
val get : t -> int -> UChar.t
get s n
returns n
-th Unicode character of s
.
The call requires O(n)-time.
val init : int -> int -> UChar.t -> t
init len f
returns a new string which contains len
Unicode characters.
The i-th Unicode character is initialized by f i
val length : t -> int
length s
returns the number of Unicode characters contained in s
type index = int
Positions in the string represented by the number of bytes from the head.
The location of the first character is
0
val nth : t -> int -> index
nth s n
returns the position of the n
-th Unicode character.
The call requires O(n)-time
val first : t -> index
The position of the head of the first Unicode character.
val last : t -> index
The position of the head of the last Unicode character.
val look : t -> index -> UChar.t
look s i
returns the Unicode character of the location i
in the string s
.
val out_of_range : t -> index -> bool
out_of_range s i
tests whether i
is a position inside of s
.
val compare_index : t -> index -> index -> int
compare_index s i1 i2
returns
a value < 0 if i1
is the position located before i2
,
0 if i1
and i2
points the same location,
a value > 0 if i1
is the position located after i2
.
val next : t -> index -> index
next s i
returns the position of the head of the Unicode character
located immediately after i
.
If i
is inside of s
, the function always successes.
If i
is inside of s
and there is no Unicode character after i
,
the position outside s
is returned.
If i
is not inside of s
, the behaviour is unspecified.
val prev : t -> index -> index
prev s i
returns the position of the head of the Unicode character
located immediately before i
.
If i
is inside of s
, the function always successes.
If i
is inside of s
and there is no Unicode character before i
,
the position outside s
is returned.
If i
is not inside of s
, the behaviour is unspecified.
val move : t -> index -> int -> index
move s i n
returns n
-th Unicode character after i
if n >= 0,
n
-th Unicode character before i
if n < 0.
If there is no such character, the result is unspecified.
val iter : UChar.t -> unit -> t -> unit
iter f s
applies f
to all Unicode characters in s
.
The order of application is same to the order
of the Unicode characters in s
.
val compare : t -> t -> int
Code point comparison by the lexicographic order.
compare s1 s2
returns
a positive integer if s1
> s2
,
0 if s1
= s2
,
a negative integer if s1
< s2
.
module Buf : sig
Buffer module for UTF-8 strings
val create : int -> buf
create n
creates the buffer with the initial size n
-bytes.
val contents : buf -> t
contents buf
returns the contents of the buffer.
val clear : buf -> unit
Empty the buffer,
but retains the internal storage which was holding the contents
val reset : buf -> unit
Empty the buffer and de-allocate the internal storage.
val add_char : buf -> UChar.t -> unit
Add one Unicode character to the buffer.
val add_string : buf -> t -> unit
Add the UTF-8 string to the buffer.
val add_buffer : buf -> buf -> unit
add_buffer b1 b2
adds the contents of b2
to b1
.
The contents of b2
is not changed.
end
end
module UTF16 : sig
type t = (int, Bigarray.int16_unsigned_elt, Bigarray.c_layout) Bigarray.Array1.t
UTF-16 encoded string. the type is the bigarray of 16-bit integers.
The characters must be 21-bits code points, and not surrogate points,
0xfffe, 0xffff.
Bigarray.cma or Bigarray.cmxa must be linked when this module is used.
exception Malformed_code
val validate : t -> unit
validate s
If s
is valid UTF-16 then successes otherwise raises Malformed_code
.
Other functions assume strings are valid UTF-16, so it is prudent
to test their validity for strings from untrusted origins.
val get : t -> int -> UChar.t
get s n
returns n
-th Unicode character of s
.
The call requires O(n)-time.
exception Out_of_range
val init : int -> int -> UChar.t -> t
init len f
returns a new string which contains len
Unicode characters.
The i-th Unicode character is initialized by f i
if the character is not representable, raise Out_of_range
.
val length : t -> int
length s
returns the number of Unicode characters contained in s
type index = int
Positions in the string represented by the number of 16-bit unit
from the head.
The location of the first character is
0
val nth : t -> int -> index
nth s n
returns the position of the n
-th Unicode character.
The call requires O(n)-time
val first : t -> index
first s
: The position of the head of the last Unicode character.
val last : t -> index
last s
: The position of the head of the last Unicode character.
val look : t -> index -> UChar.t
look s i
returns the Unicode character of the location i
in the string s
.
val out_of_range : t -> index -> bool
out_of_range s i
tests whether i
is inside of s
.
val compare_index : t -> index -> index -> int
compare_aux s i1 i2
returns- If
i1
is the position located beforei2
, a value < 0, - If
i1
andi2
points the same location, 0, - If
i1
is the position located afteri2
, a value > 0.
val next : t -> index -> index
next s i
returns the position of the head of the Unicode character
located immediately after i
.- If
i
is a valid position, the function always success. - If
i
is a valid position and there is no Unicode character afteri
, the position outsides
is returned. - If
i
is not a valid position, the behaviour is undefined.
val prev : t -> index -> index
prev s i
returns the position of the head of the Unicode character
located immediately before i
.- If
i
is a valid position, the function always success. - If
i
is a valid position and there is no Unicode character beforei
, the position outsides
is returned. - If
i
is not a valid position, the behaviour is undefined.
val move : t -> index -> int -> index
val iter : UChar.t -> unit -> t -> unit
iter f s
Apply f
to all Unicode characters in s
.
The order of application is same to the order
in the Unicode characters in s
.
val compare : t -> t -> int
Code point comparison
module Buf : sig
Buffer module for UTF-16
type buf
val create : int -> buf
create n : creates the buffer with the initial size
n
.
val contents : buf -> t
val clear : buf -> unit
val reset : buf -> unit
val add_char : buf -> UChar.t -> unit
if the character is not representable, raise Out_of_range
val add_string : buf -> t -> unit
val add_buffer : buf -> buf -> unit
end
end
module UCS4 : sig
exception Malformed_code
val validate : t -> unit
validate s
If s
is valid UCS4 then successes otherwise raises Malformed_code
.
Other functions assume strings are valid UCS4, so it is prudent
to test their validity for strings from untrusted origins.
val get : t -> int -> UChar.t
get s n
returns n
-th Unicode character of s
.
val init : int -> int -> UChar.t -> t
init len f
returns a new string which contains len
Unicode characters.
The i-th Unicode character is initialised by f i
val length : t -> int
length s
returns the number of Unicode characters contained in s
type index = int
Positions in the string represented by the number of characters
from the head.
The location of the first character is
0
val nth : t -> int -> index
nth s n
returns the position of the n
-th Unicode character.
The call requires O(n)-time
val first : t -> index
first s
: The position of the head of the last Unicode character.
val last : t -> index
last s
: The position of the head of the last Unicode character.
val look : t -> index -> UChar.t
look s i
returns the Unicode character of the location i
in the string s
.
val out_of_range : t -> index -> bool
out_of_range s i
tests whether i
points the valid position of s
.
val compare_index : t -> index -> index -> int
compare_aux s i1 i2
returns
If i1
is the position located before i2
, a value < 0,
If i1
and i2
points the same location, 0,
If i1
is the position located after i2
, a value > 0.
val next : t -> index -> index
next s i
returns the position of the head of the Unicode character
located immediately after i
.
If i
is a valid position, the function always success.
If i
is a valid position and there is no Unicode character after i
,
the position outside s
is returned.
If i
is not a valid position, the behaviour is undefined.
val prev : t -> index -> index
prev s i
returns the position of the head of the Unicode character
located immediately before i
.
If i
is a valid position, the function always success.
If i
is a valid position and there is no Unicode character before i
,
the position outside s
is returned.
If i
is not a valid position, the behaviour is undefined.
val move : t -> index -> int -> index
move s i n
:
If n >= 0, returns n
-th Unicode character after i
.
If n < 0, returns -n
-th Unicode character before i
.
If there is no such character, the result is unspecified.
val iter : UChar.t -> unit -> t -> unit
iter f s
:
Apply f
to all Unicode characters in s
.
The order of application is same to the order
in the Unicode characters in s
.
val compare : t -> t -> int
Code point comparison
module Buf : sig
Buffer module for UCS4
type buf
create n
creates the buffer with the initial size n
.
val create : int -> buf
val contents : buf -> t
val clear : buf -> unit
val reset : buf -> unit
val add_char : buf -> UChar.t -> unit
val add_string : buf -> t -> unit
val add_buffer : buf -> buf -> unit
end
end
module URe : sig
type regexp = TODO: a
Abstract syntax trees of regular expressions.
type match_semantics = TODO: a
Match semantics.
val no_group : regexp -> regexp
Remove
`Group
from the regular expressions.
module type Type = sig
type text
type index
type compiled_regexp
module SubText : sig
type t
val get : t -> int -> UChar.t
val init : int -> int -> UChar.t -> t
val length : t -> int
type index
val look : t -> index -> UChar.t
val nth : t -> int -> index
val first : t -> index
val last : t -> index
val next : t -> index -> index
val prev : t -> index -> index
val move : t -> index -> int -> index
val out_of_range : t -> index -> bool
val compare_index : t -> index -> index -> int
val iter : UChar.t -> unit -> t -> unit
val compare : t -> t -> int
module Buf : sig
type buf
val create : int -> buf
val contents : buf -> t
val clear : buf -> unit
val reset : buf -> unit
val add_char : buf -> UChar.t -> unit
val add_string : buf -> t -> unit
val add_buffer : buf -> buf -> unit
end
type ur_text = text
type ur_index = index
val refer : ur_text -> ur_index -> ur_index -> t
refer t i j
returns the part of t
from i
until j
.
The character pointed by j
is not included in the result.
If j
is equal to i
or located before j
, the result is
an empty string.
val excerpt : t -> ur_text
excerpt t
copies the contents of t
as a new ur_text.
val context : t -> (ur_text * ur_index * ur_index)
context t
returns the tuple (s, i, j)
such that
t = refer s i j
.
val ur_index_of : t -> index -> ur_index
Conversion from indexes of sub-texts to ur_texts.
end
val compile : regexp -> compiled_regexp
Compile regular expressions.
val regexp_match : ?sem:match_semantics -> compiled_regexp -> text -> index -> SubText.t option array option
regexp_match ?sem r t i
tries matching r
and substrings
of t
beginning from i
. If match successes, Some g
is
returned where g
is the array containing the matched
string of n
-th group in the n
-element.
The matched string of the whole r
is stored in the 0
-th element.
If matching fails, None
is returned.
val string_match : compiled_regexp -> text -> index -> bool
string_match r t i
tests whether r
can match a substring
of t
beginning from i
.
val search_forward : ?sem:match_semantics -> compiled_regexp -> text -> index -> SubText.t option array option
search_forward ?sem r t i
searches a substring of t
matching r
from i
. The returned value is similar to
URe.Type.regexp_match.
end
end
module CharEncoding : sig
end
module UCharInfo : sig
end
module UNF : sig
module type Type = UNF.Type
end
module UCol : sig
type variable_option = TODO: a
How variables are handled
type precision = TODO: a
Strength of comparison. For European languages, each strength
roughly means as
`Primary : Ignore accents and case
`Secondary : Ignore case but accents are counted in.
`Tertiary : Accents and case are counted in.
For the case of `Shifted, `Shift_Trimmed, there is the fourth strength.
`Quaternary : Variables such as - (hyphen) are counted in.
module type Type = UCol.Type
end
module CaseMap : sig
module type Type = CaseMap.Type
end
module UReStr : sig
end
module StringPrep : sig
module type Type = StringPrep.Type
end
module type Type = sig
module OOChannel : sig
TODO: camomile.0.8.5/camomile/CamomileLibrary.OOChannel
TODO: camomile.0.8.5/camomile/CamomileLibrary.OOChannel
TODO: camomile.0.8.5/camomile/CamomileLibrary.OOChannelTODO: camomile.0.8.5/camomile/CamomileLibrary.OOChannel
TODO: camomile.0.8.5/camomile/CamomileLibrary.OOChannel
TODO: camomile.0.8.5/camomile/CamomileLibrary.OOChannel
TODO: camomile.0.8.5/camomile/CamomileLibrary.OOChannelTODO: camomile.0.8.5/camomile/CamomileLibrary.OOChannel
TODO: camomile.0.8.5/camomile/CamomileLibrary.OOChannelTODO: camomile.0.8.5/camomile/CamomileLibrary.OOChannel
TODO: camomile.0.8.5/camomile/CamomileLibrary.OOChannelTODO: camomile.0.8.5/camomile/CamomileLibrary.OOChannel
TODO: camomile.0.8.5/camomile/CamomileLibrary.OOChannelTODO: camomile.0.8.5/camomile/CamomileLibrary.OOChannel
TODO: camomile.0.8.5/camomile/CamomileLibrary.OOChannelTODO: camomile.0.8.5/camomile/CamomileLibrary.OOChannel
TODO: camomile.0.8.5/camomile/CamomileLibrary.OOChannelTODO: camomile.0.8.5/camomile/CamomileLibrary.OOChannel
type 'a obj_input_channel = TODO: b
type 'a #obj_input_channel = TODO: b
type 'a obj_output_channel = TODO: b
type 'a #obj_output_channel = TODO: b
type 'a channel_of_stream = TODO: b
type 'a #channel_of_stream = TODO: b
type char_input_channel = TODO: a
type #char_input_channel = TODO: a
type char_output_channel = TODO: a
type #char_output_channel = TODO: a
type char_input_channel_of = TODO: a
type #char_input_channel_of = TODO: a
type char_obj_input_channel_of = TODO: a
type #char_obj_input_channel_of = TODO: a
type char_output_channel_of = TODO: a
type #char_output_channel_of = TODO: a
type char_obj_output_channel_of = TODO: a
type #char_obj_output_channel_of = TODO: a
type of_in_channel = TODO: a
type #of_in_channel = TODO: a
type of_out_channel = TODO: a
type #of_out_channel = TODO: a
end
module UChar : sig
type t
Unicode characters. All 31bit code points are allowed.
exception Out_of_range
val char_of : t -> char
char_of u
returns the Latin-1 representation of u
.
If u
can not be represented by Latin-1, raises Out_of_range
val of_char : char -> t
of_char c
returns the Unicode character of the Latin-1 character c
val code : t -> int
code u
returns the Unicode code number of u
.
If the value can not be represented by a positive integer,
raise Out_of_range
val chr : int -> t
code n
returns the Unicode character with the code number n
.
If n >= 2^32 or n < 0, raises invalid_arg
val uint_code : t -> int
uint_code u
returns the Unicode code number of u
.
The returned int is unsigned, that is, on 32-bits platforms,
the sign bit is used for storing the 31-th bit of the code number.
val chr_of_uint : int -> t
chr_of_uint n
returns the Unicode character of the code number n
.
n
is interpreted as unsigned, that is, on 32-bits platforms,
the sign bit is treated as the 31-th bit of the code number.
If n exceed 31-bits values, then raise invalid_arg
.
val eq : t -> t -> bool
Equality by code point comparison
val compare : t -> t -> int
compare u1 u2
returns,
a value > 0 if u1
has a larger Unicode code number than u2
,
0 if u1
and u2
are the same Unicode character,
a value < 0 if u1
has a smaller Unicode code number than u2
.
type uchar = t
Aliases of
type t
val int_of : uchar -> int
Alias of
uint_code
val of_int : int -> uchar
Alias of
chr_of_uint
end
module USet : sig
type t
val empty : t
val is_empty : t -> bool
val mem : UChar.t -> t -> bool
val add : UChar.t -> t -> t
val add_range : UChar.t -> UChar.t -> t -> t
add_range u1 u2 s
adds the characters in the range u1
- u2
to s
. The range is determined by the code point order.
val singleton : UChar.t -> t
val remove : UChar.t -> t -> t
val remove_range : UChar.t -> UChar.t -> t -> t
remove_range u1 u2 s
removes the characters in the range u1
- u2
from s
. The range is determined by the code point order.
val union : t -> t -> t
val inter : t -> t -> t
val diff : t -> t -> t
val compl : t -> t
compl s
returns the compliment of s
.
val compare : t -> t -> int
val equal : t -> t -> bool
val subset : t -> t -> bool
val from : UChar.t -> t -> t
from u s
returns the set of elements of s
whose code points are equal or greater than u
.
val after : UChar.t -> t -> t
after u s
returns the set of elements of s
whose code points are greater than u
.
val until : UChar.t -> t -> t
until u s
returns the set of elements of s
whose code points are equal or smaller than u
.
val before : UChar.t -> t -> t
until u s
returns the set of elements of s
whose code points are smaller than u
.
val iter : UChar.t -> unit -> t -> unit
val iter_range : UChar.t -> UChar.t -> unit -> t -> unit
iter_range proc s
feeds the intervals contained in s
to
proc
in increasing order. The intervals given to proc
are always separated by the character not in s
.
val fold : UChar.t -> 'a -> 'a -> t -> 'a -> 'a
val fold_range : UChar.t -> UChar.t -> 'a -> 'a -> t -> 'a -> 'a
fold_range f s x
is equivalent to
f u_i u_(i+1) (... (f u_3 u_4 (f u_1 u_2 x)))
if s
is consisted of
the intervals u1
-u2
, u3
-u4
, ..., u_i
-u_(i + 1)
in increasing order. The intervals given to proc
are always separated by the character not in s
.
val for_all : UChar.t -> bool -> t -> bool
val exists : UChar.t -> bool -> t -> bool
val filter : UChar.t -> bool -> t -> t
val partition : UChar.t -> bool -> t -> (t * t)
val cardinal : t -> int
val elements : t -> UChar.t list
val ranges : t -> (UChar.t * UChar.t) list
The list of the intervals contained in the set.
The returned intervals are always separated
by the character not in
s
.
val min_elt : t -> UChar.t
val max_elt : t -> UChar.t
val choose : t -> UChar.t
Returns a element roughly in the middle of the set.
It is not guaranteed to return the same element for
the sets with the same elements
val uset_of_iset : ISet.t -> t
val iset_of_uset : t -> ISet.t
end
module UMap : sig
type 'a t
Maps over Unicode characters.
val empty : 'a t
val is_empty : 'a t -> bool
val add : ?eq:'a -> 'a -> bool -> UChar.t -> 'a -> 'a t -> 'a t
add ?eq u v m
returns the new map which is same to m
except it maps u
to some value v'
which satisfies eq v v'
.
If eq
is not supplied, structural equality is used.
val add_range : ?eq:'a -> 'a -> bool -> UChar.t -> UChar.t -> 'a -> 'a t -> 'a t
add ?eq u1 u2 v m
returns the new map which is same to m
except it maps characters in the range u1
-u2
to some value v'
which satisfies eq v v'
.
If eq
is not supplied, structural equality is used.
val find : UChar.t -> 'a t -> 'a
val remove : UChar.t -> 'a t -> 'a t
val remove_range : UChar.t -> UChar.t -> 'a t -> 'a t
remove_range u1 u2 m
removes u1
-u2
from the domain of m
val from : UChar.t -> 'a t -> 'a t
from u m
restricts the domain of m
to the characters whose
code points are equal or greater than u
.
val after : UChar.t -> 'a t -> 'a t
after u m
restricts the domain of m
to the characters whose
code points are greater than u
.
val until : UChar.t -> 'a t -> 'a t
until u m
restricts the domain of m
to the characters whose
code points are equal or smaller than u
.
val before : UChar.t -> 'a t -> 'a t
before u m
restricts the domain of m
to the characters whose
code points are smaller than u
.
val mem : UChar.t -> 'a t -> bool
val iter : UChar.t -> 'a -> unit -> 'a t -> unit
val iter_range : UChar.t -> UChar.t -> 'a -> unit -> 'a t -> unit
iter proc m
: For each contingent region u1
-u2
that is mapped to a constant v
, proc u1 u2 v
is called.
The order of call is determined by increasing order on u1
.
val map : ?eq:'b -> 'b -> bool -> 'a -> 'b -> 'a t -> 'b t
val mapi : ?eq:'b -> 'b -> bool -> UChar.t -> 'a -> 'b -> 'a t -> 'b t
val fold : UChar.t -> 'b -> 'a -> 'a -> 'b t -> 'a -> 'a
val fold_range : UChar.t -> UChar.t -> 'b -> 'a -> 'a -> 'b t -> 'a -> 'a
fold_range f m x
is equivalent to
f u_(2n) u_(2n+1) v_n (... (f u_1 u_2 v_1 x))
where all characters in
the range u_(2k)
-u_(2k+1)
are mapped to v_k
and
u_1
< u_3
< ... in code point order.
For each range u_(2k)
-u_(2k+1)
is separated by a character
which is not mapped to v_k
.
val set_to_map : USet.t -> 'a -> 'a t
Constant map.
val domain : 'a t -> USet.t
Domain.
val map_to_set : 'a -> bool -> 'a t -> USet.t
map_to_set p m
returns the set of characters which are mapped
to values satisfying the predicate p
by m
.
val umap_of_imap : 'a IMap.t -> 'a t
val imap_of_umap : 'a t -> 'a IMap.t
end
module UCharTbl : sig
type 'a tbl
Fast lookup tables. Accessible by constant time.
type 'a t = 'a tbl
val get : 'a tbl -> UChar.t -> 'a
module type Type = sig
type elt
type t = elt tbl
val get : elt tbl -> UChar.t -> elt
val of_map : elt -> elt UMap.t -> t
of_map def m
creates the table which has the same value to m
.
The table returns def
for the characters for which m
is undefined.
end
module Bool : sig
Tables for boolean values.
type t
val get : t -> UChar.t -> bool
val of_set : USet.t -> t
end
module Bits : sig
Tables for small (< 256, >=0) integers
type t
val of_map : int -> int UMap.t -> t
val get : t -> UChar.t -> int
end
module Bytes : sig
Tables for integers. If integers are not span the whole 31-bit or
63-bit values,
Bytes.t
is more space efficient than int tbl
.
type t
val of_map : int -> int UMap.t -> t
val get : t -> UChar.t -> int
end
module Char : sig
Tables for bytes.
type t
val of_map : char -> char UMap.t -> t
val get : t -> UChar.t -> char
end
end
module UnicodeString : sig
module type Type = sig
type t
The type of string.
val get : t -> int -> UChar.t
get t i
: i
-th character of the storage.
val init : int -> int -> UChar.t -> t
init len f
creates a new storage.
the returned storage has length len
, its nth-element is f n
.
f
is called with integers 0 ... len - 1
, only once for each integer.
The call is in the increasing order f 0, f 1, f 2, ...
val length : t -> int
The number of Unicode characters in the storage
type index
locations in storages.
val look : t -> index -> UChar.t
look t i
: The character in the location i
of t
.
val nth : t -> int -> index
nth t n
: the location of the n
-th character in t
.
val next : t -> index -> index
val prev : t -> index -> index
val out_of_range : t -> index -> bool
val iter : UChar.t -> unit -> t -> unit
val compare : t -> t -> int
val first : t -> index
The location of the first character in the storage.
val last : t -> index
The location of the last character in the storage.
val move : t -> index -> int -> index
move t i n
:
if n
>= 0, then returns n
-th character after i
and
otherwise returns -n
-th character before i
.
If there is no such character, or i
does not point
a valid character, the result is unspecified.
val compare_index : t -> index -> index -> int
compare_index t i j
returns
a positive integer if i
is the location placed after j
in t
,
0 if i
and j
point the same location, and
a negative integer if i
is the location placed before j
in t
.
module Buf : sig
Character buffers. Similar to Buffer.
type buf
val create : int -> buf
create n
creates the buffer. n
is used to determine
the initial size of the buffer. The meaning of n
differs from
modules to modules.
val contents : buf -> t
val clear : buf -> unit
val reset : buf -> unit
val add_char : buf -> UChar.t -> unit
val add_string : buf -> t -> unit
val add_buffer : buf -> buf -> unit
end
end
end
module UText : sig
type mutability = TODO: a
Phantom type for distinguishing mutability
type 'a text
type utext = TODO: a text
type ustring = TODO: a text
type t = utext
val utext_of_ustring : ustring -> utext
val ustring_of_utext : utext -> ustring
val get : 'a text -> int -> UChar.t
val set : ustring -> int -> UChar.t -> unit
set s i u
sets the i
-th character in s
to u
.
type index
val look : 'a text -> index -> UChar.t
val nth : 'a text -> int -> index
val first : 'a text -> index
val last : 'a text -> index
val out_of_range : 'a text -> index -> bool
val compare_index : 'a text -> index -> index -> int
val next : 'a text -> index -> index
val prev : 'a text -> index -> index
val move : 'a text -> index -> int -> index
val length : 'a text -> int
val of_string : string -> utext
Conversion from Latin-1 strings.
val init : int -> int -> UChar.t -> utext
val init_ustring : int -> int -> UChar.t -> ustring
val make : int -> UChar.t -> ustring
The semantics of these function are similar to
the equivalents of string.
val copy : ustring -> ustring
val sub : 'a text -> int -> int -> 'a text
val fill : ustring -> int -> int -> UChar.t -> unit
val blit : 'a text -> int -> ustring -> int -> int -> unit
val append : 'a text -> 'b text -> 'a text
val iter : UChar.t -> unit -> 'a text -> unit
val compare : 'a text -> 'b text -> int
module Buf : sig
type buf
val create : int -> buf
create n
creates the buffer which initially can contain
n
Unicode characters.
val contents : buf -> t
val contents_string : buf -> ustring
val length : buf -> int
val clear : buf -> unit
val reset : buf -> unit
val add_char : buf -> UChar.t -> unit
val add_string : buf -> 'a text -> unit
val add_buffer : buf -> buf -> unit
end
end
module XString : sig
type xstring
type t = xstring
val get : xstring -> int -> UChar.t
val set : xstring -> int -> UChar.t -> unit
val length : xstring -> int
val init : int -> int -> UChar.t -> xstring
type index
val look : xstring -> index -> UChar.t
val nth : xstring -> int -> index
val first : xstring -> index
val last : xstring -> index
val out_of_range : xstring -> index -> bool
val next : xstring -> index -> index
val prev : xstring -> index -> index
val move : xstring -> index -> int -> index
val compare_index : xstring -> index -> index -> int
val make : ?bufsize:int -> int -> UChar.t -> xstring
val clear : xstring -> unit
val reset : xstring -> unit
val copy : xstring -> xstring
val sub : xstring -> int -> int -> xstring
val add_char : xstring -> UChar.t -> unit
val add_text : xstring -> 'a UText.text -> unit
val add_xstring : xstring -> xstring -> unit
val shrink : xstring -> int -> unit
val append : xstring -> xstring -> xstring
val utext_of : xstring -> UText.t
val ustring_of : xstring -> UText.ustring
val iter : UChar.t -> unit -> xstring -> unit
val compare : t -> t -> int
module Buf : sig
type buf
val create : int -> buf
val contents : buf -> t
val length : buf -> int
val clear : buf -> unit
val reset : buf -> unit
val add_char : buf -> UChar.t -> unit
val add_string : buf -> t -> unit
val add_buffer : buf -> buf -> unit
end
end
module SubText : sig
module type Type = sig
Sub-texts, parts of original (ur-) texts.
The signature and semantics matches those of UStorage.
type t
val get : t -> int -> UChar.t
val init : int -> int -> UChar.t -> t
val length : t -> int
type index
val look : t -> index -> UChar.t
val nth : t -> int -> index
val first : t -> index
val last : t -> index
val next : t -> index -> index
val prev : t -> index -> index
val move : t -> index -> int -> index
val out_of_range : t -> index -> bool
val compare_index : t -> index -> index -> int
val iter : UChar.t -> unit -> t -> unit
val compare : t -> t -> int
module Buf : sig
type buf
val create : int -> buf
val contents : buf -> t
val clear : buf -> unit
val reset : buf -> unit
val add_char : buf -> UChar.t -> unit
val add_string : buf -> t -> unit
val add_buffer : buf -> buf -> unit
end
type ur_text
The type of original texts.
type ur_index
The type of indexes of original texts.
val refer : ur_text -> ur_index -> ur_index -> t
refer t i j
returns the part of t
from i
until j
.
The character pointed by j
is not included in the result.
If j
is equal to i
or located before j
, the result is
an empty string.
val excerpt : t -> ur_text
excerpt t
copies the contents of t
as a new ur_text.
val context : t -> (ur_text * ur_index * ur_index)
context t
returns the tuple (s, i, j)
such that
t = refer s i j
.
val ur_index_of : t -> index -> ur_index
Conversion from indexes of sub-texts to ur_texts.
end
end
module ULine : sig
type separator = TODO: a
Line separators.
`CR
specifies carriage return.`LF
specifies linefeed.`CRLF
specifies the sequence of carriage return and linefeed.`NEL
specifies next line (\u0085).`LS
specifies Unicode line separator (\u2028).`PS
specifies Unicode paragraph separator (\u2029).
type input = TODO: a
type #input = TODO: a
type output = TODO: a
type #output = TODO: a
module type Type = sig
type text
type input_line = TODO: a
type #input_line = TODO: a
type output_line = TODO: a
type #output_line = TODO: a
end
end
module Locale : sig
type t = string
Type of locales.
val read : string -> string -> Pervasives.in_channel -> 'a -> string -> 'a
read root suffix reader locale
reads locale information using reader
.
Locale data is supposed to reside in root
directory with
the name locale
.suffix
.
reader
takes in_channel
as an argument and read data from in_channel.
If data is not found, then reader
should raise Not_found.
If the file is not found or reader
raises Not_found, then
more generic locales are tried.
For example, if fr_CA.suffix
is not found, then read
tries fr.suffix
.
If fr.suffix
is also not found, then the file root
.suffix
is tried.
Still the data is not found, then Not_found
is raised.
val contain : string -> string -> bool
contain loc1 loc2
:
If loc1
is contained in loc2
then true otherwise false.
For example, "fr" is contained in "fr_CA" while "en_CA"
does not contain "fr"
end
module CharEncoding : CharEncoding.Interface
module UTF8 : sig
type t = string
UTF-8 encoded Unicode strings. The type is normal string.
exception Malformed_code
val validate : t -> unit
validate s
successes if s is valid UTF-8, otherwise raises Malformed_code.
Other functions assume strings are valid UTF-8, so it is prudent
to test their validity for strings from untrusted origins.
val get : t -> int -> UChar.t
get s n
returns n
-th Unicode character of s
.
The call requires O(n)-time.
val init : int -> int -> UChar.t -> t
init len f
returns a new string which contains len
Unicode characters.
The i-th Unicode character is initialized by f i
val length : t -> int
length s
returns the number of Unicode characters contained in s
type index = int
Positions in the string represented by the number of bytes from the head.
The location of the first character is
0
val nth : t -> int -> index
nth s n
returns the position of the n
-th Unicode character.
The call requires O(n)-time
val first : t -> index
The position of the head of the first Unicode character.
val last : t -> index
The position of the head of the last Unicode character.
val look : t -> index -> UChar.t
look s i
returns the Unicode character of the location i
in the string s
.
val out_of_range : t -> index -> bool
out_of_range s i
tests whether i
is a position inside of s
.
val compare_index : t -> index -> index -> int
compare_index s i1 i2
returns
a value < 0 if i1
is the position located before i2
,
0 if i1
and i2
points the same location,
a value > 0 if i1
is the position located after i2
.
val next : t -> index -> index
next s i
returns the position of the head of the Unicode character
located immediately after i
.
If i
is inside of s
, the function always successes.
If i
is inside of s
and there is no Unicode character after i
,
the position outside s
is returned.
If i
is not inside of s
, the behaviour is unspecified.
val prev : t -> index -> index
prev s i
returns the position of the head of the Unicode character
located immediately before i
.
If i
is inside of s
, the function always successes.
If i
is inside of s
and there is no Unicode character before i
,
the position outside s
is returned.
If i
is not inside of s
, the behaviour is unspecified.
val move : t -> index -> int -> index
move s i n
returns n
-th Unicode character after i
if n >= 0,
n
-th Unicode character before i
if n < 0.
If there is no such character, the result is unspecified.
val iter : UChar.t -> unit -> t -> unit
iter f s
applies f
to all Unicode characters in s
.
The order of application is same to the order
of the Unicode characters in s
.
val compare : t -> t -> int
Code point comparison by the lexicographic order.
compare s1 s2
returns
a positive integer if s1
> s2
,
0 if s1
= s2
,
a negative integer if s1
< s2
.
module Buf : sig
Buffer module for UTF-8 strings
val create : int -> buf
create n
creates the buffer with the initial size n
-bytes.
val contents : buf -> t
contents buf
returns the contents of the buffer.
val clear : buf -> unit
Empty the buffer,
but retains the internal storage which was holding the contents
val reset : buf -> unit
Empty the buffer and de-allocate the internal storage.
val add_char : buf -> UChar.t -> unit
Add one Unicode character to the buffer.
val add_string : buf -> t -> unit
Add the UTF-8 string to the buffer.
val add_buffer : buf -> buf -> unit
add_buffer b1 b2
adds the contents of b2
to b1
.
The contents of b2
is not changed.
end
end
module UTF16 : sig
type t = (int, Bigarray.int16_unsigned_elt, Bigarray.c_layout) Bigarray.Array1.t
UTF-16 encoded string. the type is the bigarray of 16-bit integers.
The characters must be 21-bits code points, and not surrogate points,
0xfffe, 0xffff.
Bigarray.cma or Bigarray.cmxa must be linked when this module is used.
exception Malformed_code
val validate : t -> unit
validate s
If s
is valid UTF-16 then successes otherwise raises Malformed_code
.
Other functions assume strings are valid UTF-16, so it is prudent
to test their validity for strings from untrusted origins.
val get : t -> int -> UChar.t
get s n
returns n
-th Unicode character of s
.
The call requires O(n)-time.
exception Out_of_range
val init : int -> int -> UChar.t -> t
init len f
returns a new string which contains len
Unicode characters.
The i-th Unicode character is initialized by f i
if the character is not representable, raise Out_of_range
.
val length : t -> int
length s
returns the number of Unicode characters contained in s
type index = int
Positions in the string represented by the number of 16-bit unit
from the head.
The location of the first character is
0
val nth : t -> int -> index
nth s n
returns the position of the n
-th Unicode character.
The call requires O(n)-time
val first : t -> index
first s
: The position of the head of the last Unicode character.
val last : t -> index
last s
: The position of the head of the last Unicode character.
val look : t -> index -> UChar.t
look s i
returns the Unicode character of the location i
in the string s
.
val out_of_range : t -> index -> bool
out_of_range s i
tests whether i
is inside of s
.
val compare_index : t -> index -> index -> int
compare_aux s i1 i2
returns- If
i1
is the position located beforei2
, a value < 0, - If
i1
andi2
points the same location, 0, - If
i1
is the position located afteri2
, a value > 0.
val next : t -> index -> index
next s i
returns the position of the head of the Unicode character
located immediately after i
.- If
i
is a valid position, the function always success. - If
i
is a valid position and there is no Unicode character afteri
, the position outsides
is returned. - If
i
is not a valid position, the behaviour is undefined.
val prev : t -> index -> index
prev s i
returns the position of the head of the Unicode character
located immediately before i
.- If
i
is a valid position, the function always success. - If
i
is a valid position and there is no Unicode character beforei
, the position outsides
is returned. - If
i
is not a valid position, the behaviour is undefined.
val move : t -> index -> int -> index
val iter : UChar.t -> unit -> t -> unit
iter f s
Apply f
to all Unicode characters in s
.
The order of application is same to the order
in the Unicode characters in s
.
val compare : t -> t -> int
Code point comparison
module Buf : sig
Buffer module for UTF-16
type buf
val create : int -> buf
create n : creates the buffer with the initial size
n
.
val contents : buf -> t
val clear : buf -> unit
val reset : buf -> unit
val add_char : buf -> UChar.t -> unit
if the character is not representable, raise Out_of_range
val add_string : buf -> t -> unit
val add_buffer : buf -> buf -> unit
end
end
module UCS4 : sig
exception Malformed_code
val validate : t -> unit
validate s
If s
is valid UCS4 then successes otherwise raises Malformed_code
.
Other functions assume strings are valid UCS4, so it is prudent
to test their validity for strings from untrusted origins.
val get : t -> int -> UChar.t
get s n
returns n
-th Unicode character of s
.
val init : int -> int -> UChar.t -> t
init len f
returns a new string which contains len
Unicode characters.
The i-th Unicode character is initialised by f i
val length : t -> int
length s
returns the number of Unicode characters contained in s
type index = int
Positions in the string represented by the number of characters
from the head.
The location of the first character is
0
val nth : t -> int -> index
nth s n
returns the position of the n
-th Unicode character.
The call requires O(n)-time
val first : t -> index
first s
: The position of the head of the last Unicode character.
val last : t -> index
last s
: The position of the head of the last Unicode character.
val look : t -> index -> UChar.t
look s i
returns the Unicode character of the location i
in the string s
.
val out_of_range : t -> index -> bool
out_of_range s i
tests whether i
points the valid position of s
.
val compare_index : t -> index -> index -> int
compare_aux s i1 i2
returns
If i1
is the position located before i2
, a value < 0,
If i1
and i2
points the same location, 0,
If i1
is the position located after i2
, a value > 0.
val next : t -> index -> index
next s i
returns the position of the head of the Unicode character
located immediately after i
.
If i
is a valid position, the function always success.
If i
is a valid position and there is no Unicode character after i
,
the position outside s
is returned.
If i
is not a valid position, the behaviour is undefined.
val prev : t -> index -> index
prev s i
returns the position of the head of the Unicode character
located immediately before i
.
If i
is a valid position, the function always success.
If i
is a valid position and there is no Unicode character before i
,
the position outside s
is returned.
If i
is not a valid position, the behaviour is undefined.
val move : t -> index -> int -> index
move s i n
:
If n >= 0, returns n
-th Unicode character after i
.
If n < 0, returns -n
-th Unicode character before i
.
If there is no such character, the result is unspecified.
val iter : UChar.t -> unit -> t -> unit
iter f s
:
Apply f
to all Unicode characters in s
.
The order of application is same to the order
in the Unicode characters in s
.
val compare : t -> t -> int
Code point comparison
module Buf : sig
Buffer module for UCS4
type buf
create n
creates the buffer with the initial size n
.
val create : int -> buf
val contents : buf -> t
val clear : buf -> unit
val reset : buf -> unit
val add_char : buf -> UChar.t -> unit
val add_string : buf -> t -> unit
val add_buffer : buf -> buf -> unit
end
end
module URe : sig
type regexp = TODO: a
Abstract syntax trees of regular expressions.
type match_semantics = TODO: a
Match semantics.
val no_group : regexp -> regexp
Remove
`Group
from the regular expressions.
module type Type = sig
type text
type index
type compiled_regexp
module SubText : sig
type t
val get : t -> int -> UChar.t
val init : int -> int -> UChar.t -> t
val length : t -> int
type index
val look : t -> index -> UChar.t
val nth : t -> int -> index
val first : t -> index
val last : t -> index
val next : t -> index -> index
val prev : t -> index -> index
val move : t -> index -> int -> index
val out_of_range : t -> index -> bool
val compare_index : t -> index -> index -> int
val iter : UChar.t -> unit -> t -> unit
val compare : t -> t -> int
module Buf : sig
type buf
val create : int -> buf
val contents : buf -> t
val clear : buf -> unit
val reset : buf -> unit
val add_char : buf -> UChar.t -> unit
val add_string : buf -> t -> unit
val add_buffer : buf -> buf -> unit
end
type ur_text = text
type ur_index = index
val refer : ur_text -> ur_index -> ur_index -> t
refer t i j
returns the part of t
from i
until j
.
The character pointed by j
is not included in the result.
If j
is equal to i
or located before j
, the result is
an empty string.
val excerpt : t -> ur_text
excerpt t
copies the contents of t
as a new ur_text.
val context : t -> (ur_text * ur_index * ur_index)
context t
returns the tuple (s, i, j)
such that
t = refer s i j
.
val ur_index_of : t -> index -> ur_index
Conversion from indexes of sub-texts to ur_texts.
end
val compile : regexp -> compiled_regexp
Compile regular expressions.
val regexp_match : ?sem:match_semantics -> compiled_regexp -> text -> index -> SubText.t option array option
regexp_match ?sem r t i
tries matching r
and substrings
of t
beginning from i
. If match successes, Some g
is
returned where g
is the array containing the matched
string of n
-th group in the n
-element.
The matched string of the whole r
is stored in the 0
-th element.
If matching fails, None
is returned.
val string_match : compiled_regexp -> text -> index -> bool
string_match r t i
tests whether r
can match a substring
of t
beginning from i
.
val search_forward : ?sem:match_semantics -> compiled_regexp -> text -> index -> SubText.t option array option
search_forward ?sem r t i
searches a substring of t
matching r
from i
. The returned value is similar to
URe.Type.regexp_match.
end
end
module UCharInfo : UCharInfo.Type
module UNF : sig
module type Type = UNF.Type
end
module UCol : sig
type variable_option = TODO: a
How variables are handled
type precision = TODO: a
Strength of comparison. For European languages, each strength
roughly means as
`Primary : Ignore accents and case
`Secondary : Ignore case but accents are counted in.
`Tertiary : Accents and case are counted in.
For the case of `Shifted, `Shift_Trimmed, there is the fourth strength.
`Quaternary : Variables such as - (hyphen) are counted in.
module type Type = UCol.Type
end
module CaseMap : sig
module type Type = CaseMap.Type
end
module UReStr : UReStr.Interface
module StringPrep : sig
module type Type = StringPrep.Type
end
end