Source file core.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
(*
   RE - A regular expression library

   Copyright (C) 2001 Jerome Vouillon
   email: Jerome.Vouillon@pps.jussieu.fr

   This library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
   License as published by the Free Software Foundation, with
   linking exception; either version 2.1 of the License, or (at
   your option) any later version.

   This library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License for more details.

   You should have received a copy of the GNU Lesser General Public
   License along with this library; if not, write to the Free Software
   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*)

open Import

include struct
  let cset = Ast.cset
  let char c = cset (Cset.csingle c)
  let rg c c' = cset (Cset.cseq c c')
  let any = cset Cset.cany
  let notnl = cset Cset.notnl
  let lower = cset Cset.lower
  let upper = cset Cset.upper
  let alpha = cset Cset.alpha
  let digit = cset Cset.cdigit
  let alnum = cset Cset.alnum
  let wordc = cset Cset.wordc
  let ascii = cset Cset.ascii
  let blank = cset Cset.blank
  let cntrl = cset Cset.cntrl
  let graph = cset Cset.graph
  let print = cset Cset.print
  let punct = cset Cset.punct
  let space = cset Cset.space
  let xdigit = cset Cset.xdigit
end

include Ast.Export

let exec_internal ?(pos = 0) ?(len = -1) ~partial ~groups re s =
  Compile.match_str ~groups ~partial re s ~pos ~len
;;

let exec ?pos ?len re s =
  match exec_internal ?pos ?len ~groups:true ~partial:false re s with
  | Match substr -> substr
  | _ -> raise Not_found
;;

let exec_opt ?pos ?len re s =
  match exec_internal ?pos ?len ~groups:true ~partial:false re s with
  | Match substr -> Some substr
  | _ -> None
;;

let execp ?pos ?len re s =
  match exec_internal ~groups:false ~partial:false ?pos ?len re s with
  | Match _substr -> true
  | _ -> false
;;

let exec_partial ?pos ?len re s =
  match exec_internal ~groups:false ~partial:true ?pos ?len re s with
  | Match _ -> `Full
  | Running _ -> `Partial
  | Failed -> `Mismatch
;;

let exec_partial_detailed ?pos ?len re s =
  match exec_internal ~groups:true ~partial:true ?pos ?len re s with
  | Match group -> `Full group
  | Running { no_match_starts_before } -> `Partial no_match_starts_before
  | Failed -> `Mismatch
;;

module Mark = struct
  type t = Pmark.t

  let test (g : Group.t) p = Pmark.Set.mem p g.pmarks
  let all (g : Group.t) = g.pmarks

  module Set = Pmark.Set

  let equal = Pmark.equal
  let compare = Pmark.compare
end

type split_token =
  [ `Text of string
  | `Delim of Group.t
  ]

module Gen = struct
  type 'a gen = unit -> 'a option

  let gen_of_seq (s : 'a Seq.t) : 'a gen =
    let r = ref s in
    fun () ->
      match !r () with
      | Seq.Nil -> None
      | Seq.Cons (x, tl) ->
        r := tl;
        Some x
  ;;

  let split ?pos ?len re s : _ gen = Search.split ?pos ?len re s |> gen_of_seq
  let split_full ?pos ?len re s : _ gen = Search.split_full ?pos ?len re s |> gen_of_seq
  let all ?pos ?len re s = Search.all ?pos ?len re s |> gen_of_seq
  let matches ?pos ?len re s = Search.matches ?pos ?len re s |> gen_of_seq
end

module Group = Group

(** {2 Deprecated functions} *)

let split_full_seq = Search.split_full
let split_seq = Search.split
let matches_seq = Search.matches
let all_seq = Search.all

type 'a gen = 'a Gen.gen

let all_gen = Gen.all
let matches_gen = Gen.matches
let split_gen = Gen.split
let split_full_gen = Gen.split_full

type substrings = Group.t

let get = Group.get
let get_ofs = Group.offset
let get_all = Group.all
let get_all_ofs = Group.all_offset
let test = Group.test

type markid = Mark.t

let marked = Mark.test
let mark_set = Mark.all

(**********************************)

(*
   Information about the previous character:
   - does not exists
   - is a letter
   - is not a letter
   - is a newline
   - is last newline

   Beginning of word:
   - previous is not a letter or does not exist
   - current is a letter or does not exist

   End of word:
   - previous is a letter or does not exist
   - current is not a letter or does not exist

   Beginning of line:
   - previous is a newline or does not exist

   Beginning of buffer:
   - previous does not exist

   End of buffer
   - current does not exist

   End of line
   - current is a newline or does not exist
*)

(*
   Rep: e = T,e | ()
  - semantics of the comma (shortest/longest/first)
  - semantics of the union (greedy/non-greedy)

Bounded repetition
  a{0,3} = (a,(a,a?)?)?
*)

type groups = Group.t

module List = struct
  let list_of_seq (s : 'a Seq.t) : 'a list =
    Seq.fold_left (fun l x -> x :: l) [] s |> List.rev
  ;;

  let all ?pos ?len re s = Search.all ?pos ?len re s |> list_of_seq
  let matches ?pos ?len re s = Search.matches ?pos ?len re s |> list_of_seq
  let split_full ?pos ?len re s = Search.split_full ?pos ?len re s |> list_of_seq
  let split ?pos ?len re s = Search.split ?pos ?len re s |> list_of_seq
  let split_delim ?pos ?len re s = Search.split_delim ?pos ?len re s |> list_of_seq
end

include List

include struct
  open Compile

  type nonrec re = re

  let compile = compile
  let pp_re = pp_re
  let print_re = print_re
  let group_names = group_names
  let group_count = group_count
end

module Seq = Search