2010-04-01
Added tag release-6.1.0 for changeset 3dd5b45ff844
1 (*
2 PCRE-OCAML - Perl Compatibility Regular Expressions for OCaml
4 Copyright (C) 1999- Markus Mottl
5 email: markus.mottl@gmail.com
6 WWW: http://www.ocaml.info
8 This library is free software; you can redistribute it and/or
9 modify it under the terms of the GNU Lesser General Public
10 License as published by the Free Software Foundation; either
11 version 2 of the License, or (at your option) any later version.
13 This library is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public
19 License along with this library; if not, write to the Free Software
20 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 *)
23 (** Perl Compatibility Regular Expressions *)
26 (** {6 Exceptions} *)
28 type error =
29 | Partial (** String only matched the pattern partially *)
30 | BadPartial (** Pattern contains items that cannot be used together
31 with partial matching. *)
32 | BadPattern of string * int (** [BadPattern (msg, pos)] regular
33 expression is malformed. The reason
34 is in [msg], the position of the
35 error in the pattern in [pos]. *)
36 | BadUTF8 (** UTF8 string being matched is invalid *)
37 | BadUTF8Offset (** Gets raised when a UTF8 string being matched with
38 offset is invalid. *)
39 | MatchLimit (** Maximum allowed number of match attempts with
40 backtracking or recursion is reached during matching.
41 ALL FUNCTIONS CALLING THE MATCHING ENGINE MAY RAISE
42 IT!!! *)
43 | RecursionLimit
44 | InternalError of string
45 (** [InternalError msg] C-library exhibits unknown/undefined
46 behaviour. The reason is in [msg]. *)
48 (** Exception indicating PCRE errors. *)
49 exception Error of error
51 (** [Backtrack] used in callout functions to force backtracking. *)
52 exception Backtrack
54 (** [Regexp_or (pat, error)] gets raised for sub-pattern [pat] by [regexp_or]
55 if it failed to compile. *)
56 exception Regexp_or of string * error
58 (** {6 Compilation and runtime flags and their conversion functions} *)
60 type icflag (** Internal representation of compilation flags *)
61 and irflag (** Internal representation of runtime flags *)
63 (** Compilation flags *)
64 and cflag =
65 [ `CASELESS (** Case insensitive matching *)
66 | `MULTILINE (** '^' and '$' match before/after newlines,
67 not just at the beginning/end of a string *)
68 | `DOTALL (** '.' matches all characters (newlines, too) *)
69 | `EXTENDED (** Ignores whitespace and PERL-comments. Behaves
70 like the '/x'-option in PERL *)
71 | `ANCHORED (** Pattern matches only at start of string *)
72 | `DOLLAR_ENDONLY (** '$' in pattern matches only at end of string *)
73 | `EXTRA (** Reserved for future extensions of PCRE *)
74 | `UNGREEDY (** Quantifiers not greedy anymore, only
75 if followed by '?' *)
76 | `UTF8 (** Treats patterns and strings as UTF8 characters. *)
77 | `NO_UTF8_CHECK (** Turns off validity checks on UTF8 strings for
78 efficiency reasons. WARNING: invalid UTF8
79 strings may cause a crash then! *)
80 | `NO_AUTO_CAPTURE (** Disables the use of numbered capturing parentheses *)
81 | `AUTO_CALLOUT (** Automatically inserts callouts with id 255
82 before each pattern item *)
83 | `FIRSTLINE (** Unanchored patterns must match before/at first NL *)
84 ]
86 val cflags : cflag list -> icflag
87 (** [cflags cflag_list] converts a list of compilation flags to
88 their internal representation. *)
90 val cflag_list : icflag -> cflag list
91 (** [cflag_list cflags] converts internal representation of
92 compilation flags to a list. *)
94 (** Runtime flags *)
95 type rflag =
96 [ `ANCHORED (** Treats pattern as if it were anchored *)
97 | `NOTBOL (** Beginning of string is not treated as beginning of line *)
98 | `NOTEOL (** End of string is not treated as end of line *)
99 | `NOTEMPTY (** Empty strings are not considered to be a valid match *)
100 | `PARTIAL (** Turns on partial matching *)
101 ]
103 val rflags : rflag list -> irflag
104 (** [rflags rflag_list] converts a list of runtime flags to
105 their internal representation. *)
107 val rflag_list : irflag -> rflag list
108 (** [rflag_list rflags] converts internal representation of
109 runtime flags to a list. *)
112 (** {6 Information on the PCRE-configuration (build-time options)} *)
114 (** Version information *)
115 val version : string (** Version of the PCRE-C-library *)
117 (** Indicates whether UTF8-support is enabled *)
118 val config_utf8 : bool
120 (** Character used as newline *)
121 val config_newline : char
123 (** Number of bytes used for internal linkage of regular expressions *)
124 val config_link_size : int
126 (** Default limit for calls to internal matching function *)
127 val config_match_limit : int
129 (** Indicates use of stack recursion in matching function *)
130 val config_stackrecurse : bool
133 (** {6 Information on patterns} *)
135 (** Information on matching of "first chars" in patterns *)
136 type firstbyte_info =
137 [ `Char of char (** Fixed first character *)
138 | `Start_only (** Pattern matches at beginning and end of newlines *)
139 | `ANCHORED (** Pattern is anchored *)
140 ]
142 (** Information on the study status of patterns *)
143 type study_stat =
144 [ `Not_studied (** Pattern has not yet been studied *)
145 | `Studied (** Pattern has been studied successfully *)
146 | `Optimal (** Pattern could not be improved by studying *)
147 ]
149 type regexp (** Compiled regular expressions *)
151 (** [options regexp] @return compilation flags of [regexp]. *)
152 external options : regexp -> icflag = "pcre_options_stub"
154 (** [size regexp] @return memory size of [regexp]. *)
155 external size : regexp -> int = "pcre_size_stub"
157 (** [studysize regexp] @return memory size of study information of [regexp]. *)
158 external studysize : regexp -> int = "pcre_studysize_stub"
160 (** [capturecount regexp] @return number of capturing subpatterns in
161 [regexp]. *)
162 external capturecount : regexp -> int = "pcre_capturecount_stub"
164 (** [backrefmax regexp] @return number of highest backreference in [regexp]. *)
165 external backrefmax : regexp -> int = "pcre_backrefmax_stub"
167 (** [namecount regexp] @return number of named subpatterns in [regexp]. *)
168 external namecount : regexp -> int = "pcre_namecount_stub"
170 (** [names regex] @return array of names of named substrings in [regexp]. *)
171 external names : regexp -> string array = "pcre_names_stub"
173 (** [nameentrysize regexp] @return size of longest name of named
174 subpatterns in [regexp] + 3. *)
175 external nameentrysize : regexp -> int = "pcre_nameentrysize_stub"
177 (** [firstbyte regexp] @return firstbyte info on [regexp]. *)
178 external firstbyte : regexp -> firstbyte_info = "pcre_firstbyte_stub"
180 (** [firsttable regexp] @return some 256-bit (32-byte) fixed set table in
181 form of a string for [regexp] if available, [None] otherwise. *)
182 external firsttable : regexp -> string option = "pcre_firsttable_stub"
184 (** [lastliteral regexp] @return some last matching character of [regexp]
185 if available, [None] otherwise. *)
186 external lastliteral : regexp -> char option = "pcre_lastliteral_stub"
188 (** [study_stat regexp] @return study status of [regexp]. *)
189 external study_stat : regexp -> study_stat = "pcre_study_stat_stub" "noalloc"
191 val get_stringnumber : regexp -> string -> int
192 (** [get_stringnumber rex name] @return the index of the named substring
193 [name] in regular expression [rex]. This index can then be used with
194 [get_substring].
196 @raise Invalid_arg if there is no such named substring. *)
198 external get_match_limit : regexp -> int option = "pcre_get_match_limit_stub"
199 (** [get_match_limit rex] @return some match limit of regular expression
200 [rex] or [None]. *)
203 (** {6 Compilation of patterns} *)
205 type chtables (** Alternative set of char tables for pattern matching *)
207 external maketables : unit -> chtables = "pcre_maketables_stub"
208 (** Generates new set of char tables for the current locale. *)
210 val regexp :
211 ?study : bool ->
212 ?limit : int ->
213 ?iflags : icflag ->
214 ?flags : cflag list ->
215 ?chtables : chtables ->
216 string -> regexp
217 (** [regexp ?study ?limit ?iflags ?flags ?chtables pattern] compiles
218 [pattern] with [flags] when given, with [iflags] otherwise, and
219 with char tables [chtables]. If [study] is true, then the resulting
220 regular expression will be studied. If [limit] is specified, this
221 sets a limit to the amount of recursion and backtracking (only lower
222 than the builtin default!). If this limit is exceeded, [MatchLimit]
223 will be raised during matching.
225 @param study default = true
226 @param limit default = no extra limit other than default
227 @param iflags default = no extra flags
228 @param flags default = ignored
229 @param chtables default = builtin char tables
231 @return the regular expression.
233 For detailed documentation on how you can specify PERL-style regular
234 expressions (= patterns), please consult the PCRE-documentation
235 ("man pcrepattern") or PERL-manuals.
236 @see <http://www.perl.com> www.perl.com *)
238 val regexp_or :
239 ?study : bool ->
240 ?limit : int ->
241 ?iflags : icflag ->
242 ?flags : cflag list ->
243 ?chtables : chtables ->
244 string list -> regexp
245 (** [regexp_or ?study ?limit ?iflags ?flags ?chtables patterns] like {!regexp},
246 but combines [patterns] as alternatives (or-patterns) into one regular
247 expression. *)
249 val quote : string -> string
250 (** [quote str] @return the quoted string of [str]. *)
253 (** {6 Subpattern extraction} *)
255 type substrings (** Information on substrings after pattern matching *)
257 val get_subject : substrings -> string
258 (** [get_subject substrings] @return the subject string of [substrings]. *)
260 val num_of_subs : substrings -> int
261 (** [num_of_subs substrings] @return number of strings in [substrings]
262 (whole match inclusive). *)
264 val get_substring : substrings -> int -> string
265 (** [get_substring substrings n] @return the [n]th substring
266 (0 is whole match) of [substrings].
268 @raise Invalid_argument if [n] is not in the range of the number of
269 substrings.
270 @raise Not_found if the corresponding subpattern did not capture
271 a substring. *)
273 val get_substring_ofs : substrings -> int -> int * int
274 (** [get_substring_ofs substrings n] @return the offset tuple of the
275 [n]th substring of [substrings] (0 is whole match).
277 @raise Invalid_argument if [n] is not in the range of the number
278 of substrings.
279 @raise Not_found if the corresponding subpattern did not capture
280 a substring. *)
282 val get_substrings :
283 ?full_match : bool ->
284 substrings -> string array
285 (** [get_substrings ?full_match substrings] @return the array of
286 substrings in [substrings]. It includes the full match at index 0
287 when [full_match] is [true], the captured substrings only when it
288 is [false]. If a subpattern did not capture a substring, the empty
289 string is returned in the corresponding position instead.
291 @param full_match default = true *)
293 val get_opt_substrings :
294 ?full_match : bool ->
295 substrings -> string option array
296 (** [get_opt_substrings ?full_match substrings] @return the array of
297 optional substrings in [substrings]. It includes [Some full_match_str]
298 at index 0 when [full_match] is [true], [Some captured_substrings]
299 only when it is [false]. If a subpattern did not capture a substring,
300 [None] is returned in the corresponding position instead.
302 @param full_match default = true *)
304 val get_named_substring : regexp -> string -> substrings -> string
305 (** [get_named_substring rex name substrings] @return the named substring
306 [name] in regular expression [rex] and [substrings].
308 @raise Invalid_argument if there is no such named substring.
309 @raise Not_found if the corresponding subpattern did not capture
310 a substring. *)
312 val get_named_substring_ofs : regexp -> string -> substrings -> int * int
313 (** [get_named_substring_ofs rex name substrings] @return the offset
314 tuple of the named substring [name] in regular expression [rex] and
315 [substrings].
317 @raise Invalid_argument if there is no such named substring.
318 @raise Not_found if the corresponding subpattern did not capture
319 a substring. *)
322 (** {6 Callouts} *)
324 type callout_data =
325 {
326 callout_number : int; (** Callout number *)
327 substrings : substrings; (** Substrings matched so far *)
328 start_match : int; (** Subject start offset of current match attempt *)
329 current_position : int; (** Subject offset of current match pointer *)
330 capture_top : int; (** Number of the highest captured substring so far *)
331 capture_last : int; (** Number of the most recently captured substring *)
332 pattern_position : int; (** Offset of next match item in pattern string *)
333 next_item_length : int; (** Length of next match item in pattern string *)
334 }
336 (** Type of callout functions *)
337 type callout = callout_data -> unit
338 (** Callouts are referred to in patterns as "(?Cn)" where "n" is a
339 [callout_number] ranging from 0 to 255. Substrings captured so far
340 are accessible as usual via [substrings]. You will have to consider
341 [capture_top] and [capture_last] to know about the current state of
342 valid substrings.
344 By raising exception [Backtrack] within a callout function, the user
345 can force the pattern matching engine to backtrack to other possible
346 solutions. Other exceptions will terminate matching immediately
347 and return control to OCaml.
348 *)
351 (** {6 Matching of patterns and subpattern extraction} *)
353 val pcre_exec :
354 ?iflags : irflag ->
355 ?flags : rflag list ->
356 ?rex : regexp ->
357 ?pat : string ->
358 ?pos : int ->
359 ?callout : callout ->
360 string -> int array
361 (** [pcre_exec ?iflags ?flags ?rex ?pat ?pos ?callout subj] @return an
362 array of offsets that describe the position of matched subpatterns in
363 the string [subj] starting at position [pos] with pattern [pat] when
364 given, regular expression [rex] otherwise. The array also contains
365 additional workspace needed by the match engine. Uses [flags] when
366 given, the precompiled [iflags] otherwise. Callouts are handled by
367 [callout].
369 @param iflags default = no extra flags
370 @param flags default = ignored
371 @param rex default = matches whitespace
372 @param pat default = ignored
373 @param pos default = 0
374 @param callout default = ignore callouts
376 @raise Not_found if pattern does not match. *)
378 val exec :
379 ?iflags : irflag ->
380 ?flags : rflag list ->
381 ?rex : regexp ->
382 ?pat : string ->
383 ?pos : int ->
384 ?callout : callout ->
385 string -> substrings
386 (** [exec ?iflags ?flags ?rex ?pat ?pos ?callout subj] @return substring
387 information on string [subj] starting at position [pos] with pattern
388 [pat] when given, regular expression [rex] otherwise. Uses [flags]
389 when given, the precompiled [iflags] otherwise. Callouts are handled
390 by [callout].
392 @param iflags default = no extra flags
393 @param flags default = ignored
394 @param rex default = matches whitespace
395 @param pat default = ignored
396 @param pos default = 0
397 @param callout default = ignore callouts
399 @raise Not_found if pattern does not match. *)
401 val exec_all :
402 ?iflags : irflag ->
403 ?flags : rflag list ->
404 ?rex : regexp ->
405 ?pat : string ->
406 ?pos : int ->
407 ?callout : callout ->
408 string -> substrings array
409 (** [exec_all ?iflags ?flags ?rex ?pat ?pos ?callout subj] @return
410 an array of substring information of all matching substrings in
411 string [subj] starting at position [pos] with pattern [pat] when
412 given, regular expression [rex] otherwise. Uses [flags] when given,
413 the precompiled [iflags] otherwise. Callouts are handled by [callout].
415 @param iflags default = no extra flags
416 @param flags default = ignored
417 @param rex default = matches whitespace
418 @param pat default = ignored
419 @param pos default = 0
420 @param callout default = ignore callouts
422 @raise Not_found if pattern does not match. *)
424 val next_match :
425 ?iflags : irflag ->
426 ?flags : rflag list ->
427 ?rex : regexp ->
428 ?pat : string ->
429 ?pos : int ->
430 ?callout : callout ->
431 substrings -> substrings
432 (** [next_match ?iflags ?flags ?rex ?pat ?pos ?callout substrs] @return
433 substring information on the match that follows on the last
434 match denoted by [substrs], jumping over [pos] characters (also
435 backwards!), using pattern [pat] when given, regular expression
436 [rex] otherwise. Uses [flags] when given, the precompiled [iflags]
437 otherwise. Callouts are handled by [callout].
439 @param iflags default = no extra flags
440 @param flags default = ignored
441 @param rex default = matches whitespace
442 @param pat default = ignored
443 @param pos default = 0
444 @param callout default = ignore callouts
446 @raise Not_found if pattern does not match.
447 @raise Invalid_arg if [pos] let matching start outside of
448 the subject string. *)
450 val extract :
451 ?iflags : irflag ->
452 ?flags : rflag list ->
453 ?rex : regexp ->
454 ?pat : string ->
455 ?pos : int ->
456 ?full_match : bool ->
457 ?callout : callout ->
458 string -> string array
459 (** [extract ?iflags ?flags ?rex ?pat ?pos ?full_match ?callout subj]
460 @return the array of substrings that match [subj] starting at
461 position [pos], using pattern [pat] when given, regular expression
462 [rex] otherwise. Uses [flags] when given, the precompiled [iflags]
463 otherwise. It includes the full match at index 0 when [full_match] is
464 [true], the captured substrings only when it is [false]. Callouts are
465 handled by [callout]. If a subpattern did not capture a substring,
466 the empty string is returned in the corresponding position instead.
468 @param iflags default = no extra flags
469 @param flags default = ignored
470 @param rex default = matches whitespace
471 @param pat default = ignored
472 @param pos default = 0
473 @param full_match default = true
474 @param callout default = ignore callouts
476 @raise Not_found if pattern does not match. *)
478 val extract_opt :
479 ?iflags : irflag ->
480 ?flags : rflag list ->
481 ?rex : regexp ->
482 ?pat : string ->
483 ?pos : int ->
484 ?full_match : bool ->
485 ?callout : callout ->
486 string -> string option array
487 (** [extract_opt ?iflags ?flags ?rex ?pat ?pos ?full_match ?callout subj]
488 @return the array of optional substrings that match [subj] starting
489 at position [pos], using pattern [pat] when given, regular expression
490 [rex] otherwise. Uses [flags] when given, the precompiled [iflags]
491 otherwise. It includes [Some full_match_str] at index 0 when
492 [full_match] is [true], [Some captured-substrings] only when it is
493 [false]. Callouts are handled by [callout]. If a subpattern did
494 not capture a substring, [None] is returned in the corresponding
495 position instead.
497 @param iflags default = no extra flags
498 @param flags default = ignored
499 @param rex default = matches whitespace
500 @param pat default = ignored
501 @param pos default = 0
502 @param full_match default = true
503 @param callout default = ignore callouts
505 @raise Not_found if pattern does not match. *)
507 val extract_all :
508 ?iflags : irflag ->
509 ?flags : rflag list ->
510 ?rex : regexp ->
511 ?pat : string ->
512 ?pos : int ->
513 ?full_match : bool ->
514 ?callout : callout ->
515 string -> string array array
516 (** [extract_all ?iflags ?flags ?rex ?pat ?pos ?full_match ?callout subj]
517 @return an array of arrays of all matching substrings that match
518 [subj] starting at position [pos], using pattern [pat] when given,
519 regular expression [rex] otherwise. Uses [flags] when given, the
520 precompiled [iflags] otherwise. It includes the full match at index
521 0 of the extracted string arrays when [full_match] is [true], the
522 captured substrings only when it is [false]. Callouts are handled by
523 [callout].
525 @param iflags default = no extra flags
526 @param flags default = ignored
527 @param rex default = matches whitespace
528 @param pat default = ignored
529 @param pos default = 0
530 @param full_match default = true
531 @param callout default = ignore callouts
533 @raise Not_found if pattern does not match. *)
535 val extract_all_opt :
536 ?iflags : irflag ->
537 ?flags : rflag list ->
538 ?rex : regexp ->
539 ?pat : string ->
540 ?pos : int ->
541 ?full_match : bool ->
542 ?callout : callout ->
543 string -> string option array array
544 (** [extract_all_opt
545 ?iflags ?flags ?rex ?pat ?pos ?full_match ?callout subj]
546 @return an array of arrays of all optional matching substrings that
547 match [subj] starting at position [pos], using pattern [pat] when
548 given, regular expression [rex] otherwise. Uses [flags] when given,
549 the precompiled [iflags] otherwise. It includes [Some full_match_str]
550 at index 0 of the extracted string arrays when [full_match] is [true],
551 [Some captured_substrings] only when it is [false]. Callouts are
552 handled by [callout]. If a subpattern did not capture a substring,
553 [None] is returned in the corresponding position instead.
555 @param iflags default = no extra flags
556 @param flags default = ignored
557 @param rex default = matches whitespace
558 @param pat default = ignored
559 @param pos default = 0
560 @param full_match default = true
561 @param callout default = ignore callouts
563 @raise Not_found if pattern does not match. *)
565 val pmatch :
566 ?iflags : irflag ->
567 ?flags : rflag list ->
568 ?rex : regexp ->
569 ?pat : string ->
570 ?pos : int ->
571 ?callout : callout ->
572 string -> bool
573 (** [pmatch ?iflags ?flags ?rex ?pat ?pos ?callout subj] @return [true]
574 if [subj] is matched by pattern [pat] when given, regular expression
575 [rex] otherwise, starting at position [pos]. Uses [flags] when given,
576 the precompiled [iflags] otherwise. Callouts are handled by [callout].
578 @param iflags default = no extra flags
579 @param flags default = ignored
580 @param rex default = matches whitespace
581 @param pat default = ignored
582 @param pos default = 0
583 @param callout default = ignore callouts *)
586 (** {6 String substitution} *)
588 (** Information on substitution patterns *)
589 type substitution
591 val subst : string -> substitution
592 (** [subst str] converts the string [str] representing a
593 substitution pattern to the internal representation
595 The contents of the substitution string [str] can be normal text
596 mixed with any of the following (mostly as in PERL):
598 - {e $\[0-9\]+} - a "$" immediately followed by an arbitrary number.
599 "$0" stands for the name of the executable,
600 any other number for the n-th backreference.
601 - {e $&} - the whole matched pattern
602 - {e $`} - the text before the match
603 - {e $'} - the text after the match
604 - {e $+} - the last group that matched
605 - {e $$} - a single "$"
606 - {e $!} - delimiter which does not appear in the substitution.
607 Can be used to part "$[0-9]+" from an immediately
608 following other number. *)
610 val replace :
611 ?iflags : irflag ->
612 ?flags : rflag list ->
613 ?rex : regexp ->
614 ?pat : string ->
615 ?pos : int ->
616 ?itempl : substitution ->
617 ?templ : string ->
618 ?callout : callout ->
619 string -> string
620 (** [replace ?iflags ?flags ?rex ?pat ?pos ?itempl ?templ ?callout subj]
621 replaces all substrings of [subj] matching pattern [pat] when given,
622 regular expression [rex] otherwise, starting at position [pos] with
623 the substitution string [templ] when given, [itempl] otherwise. Uses
624 [flags] when given, the precompiled [iflags] otherwise. Callouts
625 are handled by [callout].
627 @param iflags default = no extra flags
628 @param flags default = ignored
629 @param rex default = matches whitespace
630 @param pat default = ignored
631 @param pos default = 0
632 @param itempl default = empty string
633 @param templ default = ignored
634 @param callout default = ignore callouts
636 @raise Failure if there are backreferences to nonexistent subpatterns. *)
638 val qreplace :
639 ?iflags : irflag ->
640 ?flags : rflag list ->
641 ?rex : regexp ->
642 ?pat : string ->
643 ?pos : int ->
644 ?templ : string ->
645 ?callout : callout ->
646 string -> string
647 (** [qreplace ?iflags ?flags ?rex ?pat ?pos ?templ ?callout subj]
648 replaces all substrings of [subj] matching pattern [pat] when given,
649 regular expression [rex] otherwise, starting at position [pos]
650 with the string [templ]. Uses [flags] when given, the precompiled
651 [iflags] otherwise. Callouts are handled by [callout].
653 @param iflags default = no extra flags
654 @param flags default = ignored
655 @param rex default = matches whitespace
656 @param pat default = ignored
657 @param pos default = 0
658 @param templ default = ignored
659 @param callout default = ignore callouts *)
661 val substitute_substrings :
662 ?iflags : irflag ->
663 ?flags : rflag list ->
664 ?rex : regexp ->
665 ?pat : string ->
666 ?pos : int ->
667 ?callout : callout ->
668 subst : (substrings -> string) ->
669 string -> string
670 (** [substitute_substrings ?iflags ?flags ?rex ?pat ?pos ?callout ~subst subj]
671 replaces all substrings of [subj] matching pattern [pat] when given,
672 regular expression [rex] otherwise, starting at position [pos]
673 with the result of function [subst] applied to the substrings
674 of the match. Uses [flags] when given, the precompiled [iflags]
675 otherwise. Callouts are handled by [callout].
677 @param iflags default = no extra flags
678 @param flags default = ignored
679 @param rex default = matches whitespace
680 @param pat default = ignored
681 @param pos default = 0
682 @param callout default = ignore callouts *)
684 val substitute :
685 ?iflags : irflag ->
686 ?flags : rflag list ->
687 ?rex : regexp ->
688 ?pat : string ->
689 ?pos : int ->
690 ?callout : callout ->
691 subst : (string -> string) ->
692 string -> string
693 (** [substitute ?iflags ?flags ?rex ?pat ?pos ?callout ~subst subj]
694 replaces all substrings of [subj] matching pattern [pat] when given,
695 regular expression [rex] otherwise, starting at position [pos] with
696 the result of function [subst] applied to the match. Uses [flags]
697 when given, the precompiled [iflags] otherwise. Callouts are handled
698 by [callout].
700 @param iflags default = no extra flags
701 @param flags default = ignored
702 @param rex default = matches whitespace
703 @param pat default = ignored
704 @param pos default = 0
705 @param callout default = ignore callouts *)
707 val replace_first :
708 ?iflags : irflag ->
709 ?flags : rflag list ->
710 ?rex : regexp ->
711 ?pat : string ->
712 ?pos : int ->
713 ?itempl : substitution ->
714 ?templ : string ->
715 ?callout : callout ->
716 string -> string
717 (** [replace_first ?iflags ?flags ?rex ?pat ?pos ?itempl ?templ ?callout subj]
718 replaces the first substring of [subj] matching pattern [pat] when
719 given, regular expression [rex] otherwise, starting at position
720 [pos] with the substitution string [templ] when given, [itempl]
721 otherwise. Uses [flags] when given, the precompiled [iflags]
722 otherwise. Callouts are handled by [callout].
724 @param iflags default = no extra flags
725 @param flags default = ignored
726 @param rex default = matches whitespace
727 @param pat default = ignored
728 @param pos default = 0
729 @param itempl default = empty string
730 @param templ default = ignored
731 @param callout default = ignore callouts
733 @raise Failure if there are backreferences to nonexistent subpatterns. *)
735 val qreplace_first :
736 ?iflags : irflag ->
737 ?flags : rflag list ->
738 ?rex : regexp ->
739 ?pat : string ->
740 ?pos : int ->
741 ?templ : string ->
742 ?callout : callout ->
743 string -> string
744 (** [qreplace_first ?iflags ?flags ?rex ?pat ?pos ?templ ?callout subj]
745 replaces the first substring of [subj] matching pattern [pat] when
746 given, regular expression [rex] otherwise, starting at position [pos]
747 with the string [templ]. Uses [flags] when given, the precompiled
748 [iflags] otherwise. Callouts are handled by [callout].
750 @param iflags default = no extra flags
751 @param flags default = ignored
752 @param rex default = matches whitespace
753 @param pat default = ignored
754 @param pos default = 0
755 @param templ default = ignored
756 @param callout default = ignore callouts *)
758 val substitute_substrings_first :
759 ?iflags : irflag ->
760 ?flags : rflag list ->
761 ?rex : regexp ->
762 ?pat : string ->
763 ?pos : int ->
764 ?callout : callout ->
765 subst : (substrings -> string) ->
766 string -> string
767 (** [substitute_substrings_first
768 ?iflags ?flags ?rex ?pat ?pos ?callout ~subst subj]
769 replaces the first substring of [subj] matching pattern [pat] when
770 given, regular expression [rex] otherwise, starting at position
771 [pos] with the result of function [subst] applied to the substrings
772 of the match. Uses [flags] when given, the precompiled [iflags]
773 otherwise. Callouts are handled by [callout].
775 @param iflags default = no extra flags
776 @param flags default = ignored
777 @param rex default = matches whitespace
778 @param pat default = ignored
779 @param pos default = 0
780 @param callout default = ignore callouts *)
782 val substitute_first :
783 ?iflags : irflag ->
784 ?flags : rflag list ->
785 ?rex : regexp ->
786 ?pat : string ->
787 ?pos : int ->
788 ?callout : callout ->
789 subst : (string -> string) ->
790 string -> string
791 (** [substitute_first ?iflags ?flags ?rex ?pat ?pos ?callout ~subst subj]
792 replaces the first substring of [subj] matching pattern [pat] when
793 given, regular expression [rex] otherwise, starting at position
794 [pos] with the result of function [subst] applied to the match. Uses
795 [flags] when given, the precompiled [iflags] otherwise. Callouts
796 are handled by [callout].
798 @param iflags default = no extra flags
799 @param flags default = ignored
800 @param rex default = matches whitespace
801 @param pat default = ignored
802 @param pos default = 0
803 @param callout default = ignore callouts *)
806 (** {6 Splitting} *)
808 val split :
809 ?iflags : irflag ->
810 ?flags : rflag list ->
811 ?rex : regexp ->
812 ?pat : string ->
813 ?pos : int ->
814 ?max : int ->
815 ?callout : callout ->
816 string -> string list
817 (** [split ?iflags ?flags ?rex ?pat ?pos ?max ?callout subj] splits [subj]
818 into a list of at most [max] strings, using as delimiter pattern
819 [pat] when given, regular expression [rex] otherwise, starting at
820 position [pos]. Uses [flags] when given, the precompiled [iflags]
821 otherwise. If [max] is zero, trailing empty fields are stripped. If
822 it is negative, it is treated as arbitrarily large. If neither [pat]
823 nor [rex] are specified, leading whitespace will be stripped! Should
824 behave exactly as in PERL. Callouts are handled by [callout].
826 @param iflags default = no extra flags
827 @param flags default = ignored
828 @param rex default = matches whitespace
829 @param pat default = ignored
830 @param pos default = 0
831 @param max default = 0
832 @param callout default = ignore callouts *)
834 val asplit :
835 ?iflags : irflag ->
836 ?flags : rflag list ->
837 ?rex : regexp ->
838 ?pat : string ->
839 ?pos : int ->
840 ?max : int ->
841 ?callout : callout ->
842 string -> string array
843 (** [asplit ?iflags ?flags ?rex ?pat ?pos ?max ?callout subj] same as
844 {!Pcre.split} but @return an array instead of a list. *)
846 (** Result of a {!Pcre.full_split} *)
847 type split_result = Text of string (** Text part of splitted string *)
848 | Delim of string (** Delimiter part of splitted
849 string *)
850 | Group of int * string (** Subgroup of matched delimiter
851 (subgroup_nr, subgroup_str) *)
852 | NoGroup (** Unmatched subgroup *)
854 val full_split :
855 ?iflags : irflag ->
856 ?flags : rflag list ->
857 ?rex : regexp ->
858 ?pat : string ->
859 ?pos : int ->
860 ?max : int ->
861 ?callout : callout ->
862 string -> split_result list
863 (** [full_split ?iflags ?flags ?rex ?pat ?pos ?max ?callout subj] splits
864 [subj] into a list of at most [max] elements of type "split_result",
865 using as delimiter pattern [pat] when given, regular expression
866 [rex] otherwise, starting at position [pos]. Uses [flags] when given,
867 the precompiled [iflags] otherwise. If [max] is zero, trailing empty
868 fields are stripped. If it is negative, it is treated as arbitrarily
869 large. Should behave exactly as in PERL. Callouts are handled by
870 [callout].
872 @param iflags default = no extra flags
873 @param flags default = ignored
874 @param rex default = matches whitespace
875 @param pat default = ignored
876 @param pos default = 0
877 @param max default = 0
878 @param callout default = ignore callouts *)
881 (** {6 Additional convenience functions} *)
883 val foreach_line :
884 ?ic : in_channel ->
885 (string -> unit) -> unit
886 (** [foreach_line ?ic f] applies [f] to each line in inchannel [ic] until
887 the end-of-file is reached.
889 @param ic default = stdin *)
891 val foreach_file : string list -> (string -> in_channel -> unit) -> unit
892 (** [foreach_file filenames f] opens each file in the list [filenames]
893 for input and applies [f] to each filename and the corresponding
894 channel. Channels are closed after each operation (even when
895 exceptions occur - they get reraised afterwards!). *)
898 (** {6 {b UNSAFE STUFF - USE WITH CAUTION!}} *)
900 external unsafe_pcre_exec :
901 irflag -> regexp -> int -> string ->
902 int -> int array -> callout option
903 -> unit = "pcre_exec_stub_bc" "pcre_exec_stub"
904 (** [unsafe_pcre_exec flags rex pos subject subgroup_offsets offset_vector].
905 You should read the C-source to know what happens.
906 If you do not understand it - {b don't use this function!} *)
908 val make_ovector : regexp -> int * int array
909 (** [make_ovector regexp] calculates the tuple (subgroups2, ovector)
910 which is the number of subgroup offsets and the offset array. *)