lib/pcre.mli

2010-04-01

author
Markus Mottl <mmottl@janestreet.com>
date
Thu Apr 01 14:54:00 2010 -0400
changeset 51
8393f8f80c40
parent 43
030b178dd70b
child 54
2fbb7a4c884a
permissions
-rw-r--r--

Added tag release-6.1.0 for changeset 3dd5b45ff844

     1 (*
     2    PCRE-OCAML - Perl Compatibility Regular Expressions for OCaml
     4    Copyright (C) 1999-  Markus Mottl
     5    email: markus.mottl@gmail.com
     6    WWW:   http://www.ocaml.info
     8    This library is free software; you can redistribute it and/or
     9    modify it under the terms of the GNU Lesser General Public
    10    License as published by the Free Software Foundation; either
    11    version 2 of the License, or (at your option) any later version.
    13    This library is distributed in the hope that it will be useful,
    14    but WITHOUT ANY WARRANTY; without even the implied warranty of
    15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    16    Lesser General Public License for more details.
    18    You should have received a copy of the GNU Lesser General Public
    19    License along with this library; if not, write to the Free Software
    20    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
    21 *)
    23 (** Perl Compatibility Regular Expressions *)
    26 (** {6 Exceptions} *)
    28 type error =
    29   | Partial  (** String only matched the pattern partially *)
    30   | BadPartial  (** Pattern contains items that cannot be used together
    31                     with partial matching. *)
    32   | BadPattern of string * int  (** [BadPattern (msg, pos)] regular
    33                                     expression is malformed.  The reason
    34                                     is in [msg], the position of the
    35                                     error in the pattern in [pos]. *)
    36   | BadUTF8  (** UTF8 string being matched is invalid *)
    37   | BadUTF8Offset  (** Gets raised when a UTF8 string being matched with
    38                        offset is invalid. *)
    39   | MatchLimit  (** Maximum allowed number of match attempts with
    40                     backtracking or recursion is reached during matching.
    41                     ALL FUNCTIONS CALLING THE MATCHING ENGINE MAY RAISE
    42                     IT!!! *)
    43   | RecursionLimit
    44   | InternalError of string
    45       (** [InternalError msg] C-library exhibits unknown/undefined
    46           behaviour.  The reason is in [msg]. *)
    48 (** Exception indicating PCRE errors. *)
    49 exception Error of error
    51 (** [Backtrack] used in callout functions to force backtracking. *)
    52 exception Backtrack
    54 (** [Regexp_or (pat, error)] gets raised for sub-pattern [pat] by [regexp_or]
    55     if it failed to compile. *)
    56 exception Regexp_or of string * error
    58 (** {6 Compilation and runtime flags and their conversion functions} *)
    60 type icflag (** Internal representation of compilation flags *)
    61 and  irflag (** Internal representation of runtime flags *)
    63 (** Compilation flags *)
    64 and cflag =
    65   [ `CASELESS        (** Case insensitive matching *)
    66   | `MULTILINE       (** '^' and '$' match before/after newlines,
    67                          not just at the beginning/end of a string *)
    68   | `DOTALL          (** '.' matches all characters (newlines, too) *)
    69   | `EXTENDED        (** Ignores whitespace and PERL-comments. Behaves
    70                          like the '/x'-option in PERL *)
    71   | `ANCHORED        (** Pattern matches only at start of string *)
    72   | `DOLLAR_ENDONLY  (** '$' in pattern matches only at end of string *)
    73   | `EXTRA           (** Reserved for future extensions of PCRE *)
    74   | `UNGREEDY        (** Quantifiers not greedy anymore, only
    75                          if followed by '?' *)
    76   | `UTF8            (** Treats patterns and strings as UTF8 characters. *)
    77   | `NO_UTF8_CHECK   (** Turns off validity checks on UTF8 strings for
    78                          efficiency reasons. WARNING: invalid UTF8
    79                          strings may cause a crash then! *)
    80   | `NO_AUTO_CAPTURE (** Disables the use of numbered capturing parentheses *)
    81   | `AUTO_CALLOUT    (** Automatically inserts callouts with id 255
    82                          before each pattern item *)
    83   | `FIRSTLINE       (** Unanchored patterns must match before/at first NL *)
    84   ]
    86 val cflags : cflag list -> icflag
    87 (** [cflags cflag_list] converts a list of compilation flags to
    88     their internal representation. *)
    90 val cflag_list : icflag -> cflag list
    91 (** [cflag_list cflags] converts internal representation of
    92     compilation flags to a list. *)
    94 (** Runtime flags *)
    95 type rflag =
    96   [ `ANCHORED  (** Treats pattern as if it were anchored *)
    97   | `NOTBOL    (** Beginning of string is not treated as beginning of line *)
    98   | `NOTEOL    (** End of string is not treated as end of line *)
    99   | `NOTEMPTY  (** Empty strings are not considered to be a valid match *)
   100   | `PARTIAL   (** Turns on partial matching *)
   101   ]
   103 val rflags : rflag list -> irflag
   104 (** [rflags rflag_list] converts a list of runtime flags to
   105     their internal representation. *)
   107 val rflag_list : irflag -> rflag list
   108 (** [rflag_list rflags] converts internal representation of
   109     runtime flags to a list. *)
   112 (** {6 Information on the PCRE-configuration (build-time options)} *)
   114 (** Version information *)
   115 val version : string  (** Version of the PCRE-C-library *)
   117 (** Indicates whether UTF8-support is enabled *)
   118 val config_utf8 : bool
   120 (** Character used as newline *)
   121 val config_newline : char
   123 (** Number of bytes used for internal linkage of regular expressions *)
   124 val config_link_size : int
   126 (** Default limit for calls to internal matching function *)
   127 val config_match_limit : int
   129 (** Indicates use of stack recursion in matching function *)
   130 val config_stackrecurse : bool
   133 (** {6 Information on patterns} *)
   135 (** Information on matching of "first chars" in patterns *)
   136 type firstbyte_info =
   137   [ `Char of char  (** Fixed first character *)
   138   | `Start_only    (** Pattern matches at beginning and end of newlines *)
   139   | `ANCHORED      (** Pattern is anchored *)
   140   ]
   142 (** Information on the study status of patterns *)
   143 type study_stat =
   144   [ `Not_studied (** Pattern has not yet been studied *)
   145   | `Studied     (** Pattern has been studied successfully *)
   146   | `Optimal     (** Pattern could not be improved by studying *)
   147   ]
   149 type regexp (** Compiled regular expressions *)
   151 (** [options regexp] @return compilation flags of [regexp]. *)
   152 external options : regexp -> icflag = "pcre_options_stub"
   154 (** [size regexp] @return memory size of [regexp]. *)
   155 external size : regexp -> int = "pcre_size_stub"
   157 (** [studysize regexp] @return memory size of study information of [regexp]. *)
   158 external studysize : regexp -> int = "pcre_studysize_stub"
   160 (** [capturecount regexp] @return number of capturing subpatterns in
   161     [regexp]. *)
   162 external capturecount : regexp -> int = "pcre_capturecount_stub"
   164 (** [backrefmax regexp] @return number of highest backreference in [regexp]. *)
   165 external backrefmax : regexp -> int = "pcre_backrefmax_stub"
   167 (** [namecount regexp] @return number of named subpatterns in [regexp]. *)
   168 external namecount : regexp -> int = "pcre_namecount_stub"
   170 (** [names regex] @return array of names of named substrings in [regexp]. *)
   171 external names : regexp -> string array = "pcre_names_stub"
   173 (** [nameentrysize regexp] @return size of longest name of named
   174     subpatterns in [regexp] + 3. *)
   175 external nameentrysize : regexp -> int = "pcre_nameentrysize_stub"
   177 (** [firstbyte regexp] @return firstbyte info on [regexp]. *)
   178 external firstbyte : regexp -> firstbyte_info = "pcre_firstbyte_stub"
   180 (** [firsttable regexp] @return some 256-bit (32-byte) fixed set table in
   181     form of a string for [regexp] if available, [None] otherwise. *)
   182 external firsttable : regexp -> string option = "pcre_firsttable_stub"
   184 (** [lastliteral regexp] @return some last matching character of [regexp]
   185     if available, [None] otherwise. *)
   186 external lastliteral : regexp -> char option = "pcre_lastliteral_stub"
   188 (** [study_stat regexp] @return study status of [regexp]. *)
   189 external study_stat : regexp -> study_stat = "pcre_study_stat_stub" "noalloc"
   191 val get_stringnumber : regexp -> string -> int
   192 (** [get_stringnumber rex name] @return the index of the named substring
   193     [name] in regular expression [rex]. This index can then be used with
   194     [get_substring].
   196     @raise Invalid_arg if there is no such named substring. *)
   198 external get_match_limit : regexp -> int option = "pcre_get_match_limit_stub"
   199 (** [get_match_limit rex] @return some match limit of regular expression
   200     [rex] or [None]. *)
   203 (** {6 Compilation of patterns} *)
   205 type chtables (** Alternative set of char tables for pattern matching *)
   207 external maketables : unit -> chtables = "pcre_maketables_stub"
   208 (** Generates new set of char tables for the current locale. *)
   210 val regexp :
   211   ?study : bool ->
   212   ?limit : int ->
   213   ?iflags : icflag ->
   214   ?flags : cflag list ->
   215   ?chtables : chtables ->
   216   string -> regexp
   217 (** [regexp ?study ?limit ?iflags ?flags ?chtables pattern] compiles
   218     [pattern] with [flags] when given, with [iflags] otherwise, and
   219     with char tables [chtables]. If [study] is true, then the resulting
   220     regular expression will be studied. If [limit] is specified, this
   221     sets a limit to the amount of recursion and backtracking (only lower
   222     than the builtin default!). If this limit is exceeded, [MatchLimit]
   223     will be raised during matching.
   225     @param study default = true
   226     @param limit default = no extra limit other than default
   227     @param iflags default = no extra flags
   228     @param flags default = ignored
   229     @param chtables default = builtin char tables
   231     @return the regular expression.
   233     For detailed documentation on how you can specify PERL-style regular
   234     expressions (= patterns), please consult the PCRE-documentation
   235     ("man pcrepattern") or PERL-manuals.
   236     @see <http://www.perl.com> www.perl.com *)
   238 val regexp_or :
   239   ?study : bool ->
   240   ?limit : int ->
   241   ?iflags : icflag ->
   242   ?flags : cflag list ->
   243   ?chtables : chtables ->
   244   string list -> regexp
   245 (** [regexp_or ?study ?limit ?iflags ?flags ?chtables patterns] like {!regexp},
   246     but combines [patterns] as alternatives (or-patterns) into one regular
   247     expression. *)
   249 val quote : string -> string
   250 (** [quote str] @return the quoted string of [str]. *)
   253 (** {6 Subpattern extraction} *)
   255 type substrings (** Information on substrings after pattern matching *)
   257 val get_subject : substrings -> string
   258 (** [get_subject substrings] @return the subject string of [substrings]. *)
   260 val num_of_subs : substrings -> int
   261 (** [num_of_subs substrings] @return number of strings in [substrings]
   262     (whole match inclusive). *)
   264 val get_substring : substrings -> int -> string
   265 (** [get_substring substrings n] @return the [n]th substring
   266     (0 is whole match) of [substrings].
   268     @raise Invalid_argument if [n] is not in the range of the number of
   269     substrings.
   270     @raise Not_found if the corresponding subpattern did not capture
   271            a substring. *)
   273 val get_substring_ofs : substrings -> int -> int * int
   274 (** [get_substring_ofs substrings n] @return the offset tuple of the
   275     [n]th substring of [substrings] (0 is whole match).
   277     @raise Invalid_argument if [n] is not in the range of the number
   278            of substrings.
   279     @raise Not_found if the corresponding subpattern did not capture
   280            a substring. *)
   282 val get_substrings :
   283   ?full_match : bool ->
   284   substrings -> string array
   285 (** [get_substrings ?full_match substrings] @return the array of
   286     substrings in [substrings]. It includes the full match at index 0
   287     when [full_match] is [true], the captured substrings only when it
   288     is [false]. If a subpattern did not capture a substring, the empty
   289     string is returned in the corresponding position instead.
   291     @param full_match default = true *)
   293 val get_opt_substrings :
   294   ?full_match : bool ->
   295   substrings -> string option array
   296 (** [get_opt_substrings ?full_match substrings] @return the array of
   297     optional substrings in [substrings]. It includes [Some full_match_str]
   298     at index 0 when [full_match] is [true], [Some captured_substrings]
   299     only when it is [false]. If a subpattern did not capture a substring,
   300     [None] is returned in the corresponding position instead.
   302     @param full_match default = true *)
   304 val get_named_substring : regexp -> string -> substrings -> string
   305 (** [get_named_substring rex name substrings] @return the named substring
   306     [name] in regular expression [rex] and [substrings].
   308     @raise Invalid_argument if there is no such named substring.
   309     @raise Not_found if the corresponding subpattern did not capture
   310            a substring. *)
   312 val get_named_substring_ofs : regexp -> string -> substrings -> int * int
   313 (** [get_named_substring_ofs rex name substrings] @return the offset
   314     tuple of the named substring [name] in regular expression [rex] and
   315     [substrings].
   317     @raise Invalid_argument if there is no such named substring.
   318     @raise Not_found if the corresponding subpattern did not capture
   319            a substring. *)
   322 (** {6 Callouts} *)
   324 type callout_data =
   325   {
   326     callout_number : int; (** Callout number *)
   327     substrings : substrings; (** Substrings matched so far *)
   328     start_match : int;  (** Subject start offset of current match attempt *)
   329     current_position : int;  (** Subject offset of current match pointer *)
   330     capture_top : int;  (** Number of the highest captured substring so far *)
   331     capture_last : int;  (** Number of the most recently captured substring *)
   332     pattern_position : int;  (** Offset of next match item in pattern string *)
   333     next_item_length : int;  (** Length of next match item in pattern string *)
   334   }
   336 (** Type of callout functions *)
   337 type callout = callout_data -> unit
   338 (** Callouts are referred to in patterns as "(?Cn)" where "n" is a
   339     [callout_number] ranging from 0 to 255.  Substrings captured so far
   340     are accessible as usual via [substrings].  You will have to consider
   341     [capture_top] and [capture_last] to know about the current state of
   342     valid substrings.
   344     By raising exception [Backtrack] within a callout function, the user
   345     can force the pattern matching engine to backtrack to other possible
   346     solutions.  Other exceptions will terminate matching immediately
   347     and return control to OCaml.
   348 *)
   351 (** {6 Matching of patterns and subpattern extraction} *)
   353 val pcre_exec :
   354   ?iflags : irflag ->
   355   ?flags : rflag list ->
   356   ?rex : regexp ->
   357   ?pat : string ->
   358   ?pos : int ->
   359   ?callout : callout ->
   360   string -> int array
   361 (** [pcre_exec ?iflags ?flags ?rex ?pat ?pos ?callout subj] @return an
   362     array of offsets that describe the position of matched subpatterns in
   363     the string [subj] starting at position [pos] with pattern [pat] when
   364     given, regular expression [rex] otherwise. The array also contains
   365     additional workspace needed by the match engine. Uses [flags] when
   366     given, the precompiled [iflags] otherwise. Callouts are handled by
   367     [callout].
   369     @param iflags default = no extra flags
   370     @param flags default = ignored
   371     @param rex default = matches whitespace
   372     @param pat default = ignored
   373     @param pos default = 0
   374     @param callout default = ignore callouts
   376     @raise Not_found if pattern does not match. *)
   378 val exec :
   379   ?iflags : irflag ->
   380   ?flags : rflag list ->
   381   ?rex : regexp ->
   382   ?pat : string ->
   383   ?pos : int ->
   384   ?callout : callout ->
   385   string -> substrings
   386 (** [exec ?iflags ?flags ?rex ?pat ?pos ?callout subj] @return substring
   387     information on string [subj] starting at position [pos] with pattern
   388     [pat] when given, regular expression [rex] otherwise. Uses [flags]
   389     when given, the precompiled [iflags] otherwise. Callouts are handled
   390     by [callout].
   392     @param iflags default = no extra flags
   393     @param flags default = ignored
   394     @param rex default = matches whitespace
   395     @param pat default = ignored
   396     @param pos default = 0
   397     @param callout default = ignore callouts
   399     @raise Not_found if pattern does not match. *)
   401 val exec_all :
   402   ?iflags : irflag ->
   403   ?flags : rflag list ->
   404   ?rex : regexp ->
   405   ?pat : string ->
   406   ?pos : int ->
   407   ?callout : callout ->
   408   string -> substrings array
   409 (** [exec_all ?iflags ?flags ?rex ?pat ?pos ?callout subj] @return
   410     an array of substring information of all matching substrings in
   411     string [subj] starting at position [pos] with pattern [pat] when
   412     given, regular expression [rex] otherwise. Uses [flags] when given,
   413     the precompiled [iflags] otherwise. Callouts are handled by [callout].
   415     @param iflags default = no extra flags
   416     @param flags default = ignored
   417     @param rex default = matches whitespace
   418     @param pat default = ignored
   419     @param pos default = 0
   420     @param callout default = ignore callouts
   422     @raise Not_found if pattern does not match. *)
   424 val next_match :
   425   ?iflags : irflag ->
   426   ?flags : rflag list ->
   427   ?rex : regexp ->
   428   ?pat : string ->
   429   ?pos : int ->
   430   ?callout : callout ->
   431   substrings -> substrings
   432 (** [next_match ?iflags ?flags ?rex ?pat ?pos ?callout substrs] @return
   433     substring information on the match that follows on the last
   434     match denoted by [substrs], jumping over [pos] characters (also
   435     backwards!), using pattern [pat] when given, regular expression
   436     [rex] otherwise. Uses [flags] when given, the precompiled [iflags]
   437     otherwise. Callouts are handled by [callout].
   439     @param iflags default = no extra flags
   440     @param flags default = ignored
   441     @param rex default = matches whitespace
   442     @param pat default = ignored
   443     @param pos default = 0
   444     @param callout default = ignore callouts
   446     @raise Not_found if pattern does not match.
   447     @raise Invalid_arg if [pos] let matching start outside of
   448            the subject string. *)
   450 val extract :
   451   ?iflags : irflag ->
   452   ?flags : rflag list ->
   453   ?rex : regexp ->
   454   ?pat : string ->
   455   ?pos : int ->
   456   ?full_match : bool ->
   457   ?callout : callout ->
   458   string -> string array
   459 (** [extract ?iflags ?flags ?rex ?pat ?pos ?full_match ?callout subj]
   460     @return the array of substrings that match [subj] starting at
   461     position [pos], using pattern [pat] when given, regular expression
   462     [rex] otherwise. Uses [flags] when given, the precompiled [iflags]
   463     otherwise. It includes the full match at index 0 when [full_match] is
   464     [true], the captured substrings only when it is [false]. Callouts are
   465     handled by [callout].  If a subpattern did not capture a substring,
   466     the empty string is returned in the corresponding position instead.
   468     @param iflags default = no extra flags
   469     @param flags default = ignored
   470     @param rex default = matches whitespace
   471     @param pat default = ignored
   472     @param pos default = 0
   473     @param full_match default = true
   474     @param callout default = ignore callouts
   476     @raise Not_found if pattern does not match. *)
   478 val extract_opt :
   479   ?iflags : irflag ->
   480   ?flags : rflag list ->
   481   ?rex : regexp ->
   482   ?pat : string ->
   483   ?pos : int ->
   484   ?full_match : bool ->
   485   ?callout : callout ->
   486   string -> string option array
   487 (** [extract_opt ?iflags ?flags ?rex ?pat ?pos ?full_match ?callout subj]
   488     @return the array of optional substrings that match [subj] starting
   489     at position [pos], using pattern [pat] when given, regular expression
   490     [rex] otherwise. Uses [flags] when given, the precompiled [iflags]
   491     otherwise. It includes [Some full_match_str] at index 0 when
   492     [full_match] is [true], [Some captured-substrings] only when it is
   493     [false]. Callouts are handled by [callout].  If a subpattern did
   494     not capture a substring, [None] is returned in the corresponding
   495     position instead.
   497     @param iflags default = no extra flags
   498     @param flags default = ignored
   499     @param rex default = matches whitespace
   500     @param pat default = ignored
   501     @param pos default = 0
   502     @param full_match default = true
   503     @param callout default = ignore callouts
   505     @raise Not_found if pattern does not match. *)
   507 val extract_all :
   508   ?iflags : irflag ->
   509   ?flags : rflag list ->
   510   ?rex : regexp ->
   511   ?pat : string ->
   512   ?pos : int ->
   513   ?full_match : bool ->
   514   ?callout : callout ->
   515   string -> string array array
   516 (** [extract_all ?iflags ?flags ?rex ?pat ?pos ?full_match ?callout subj]
   517     @return an array of arrays of all matching substrings that match
   518     [subj] starting at position [pos], using pattern [pat] when given,
   519     regular expression [rex] otherwise. Uses [flags] when given, the
   520     precompiled [iflags] otherwise. It includes the full match at index
   521     0 of the extracted string arrays when [full_match] is [true], the
   522     captured substrings only when it is [false]. Callouts are handled by
   523     [callout].
   525     @param iflags default = no extra flags
   526     @param flags default = ignored
   527     @param rex default = matches whitespace
   528     @param pat default = ignored
   529     @param pos default = 0
   530     @param full_match default = true
   531     @param callout default = ignore callouts
   533     @raise Not_found if pattern does not match. *)
   535 val extract_all_opt :
   536   ?iflags : irflag ->
   537   ?flags : rflag list ->
   538   ?rex : regexp ->
   539   ?pat : string ->
   540   ?pos : int ->
   541   ?full_match : bool ->
   542   ?callout : callout ->
   543   string -> string option array array
   544 (** [extract_all_opt
   545       ?iflags ?flags ?rex ?pat ?pos ?full_match ?callout subj]
   546     @return an array of arrays of all optional matching substrings that
   547     match [subj] starting at position [pos], using pattern [pat] when
   548     given, regular expression [rex] otherwise. Uses [flags] when given,
   549     the precompiled [iflags] otherwise. It includes [Some full_match_str]
   550     at index 0 of the extracted string arrays when [full_match] is [true],
   551     [Some captured_substrings] only when it is [false]. Callouts are
   552     handled by [callout].  If a subpattern did not capture a substring,
   553     [None] is returned in the corresponding position instead.
   555     @param iflags default = no extra flags
   556     @param flags default = ignored
   557     @param rex default = matches whitespace
   558     @param pat default = ignored
   559     @param pos default = 0
   560     @param full_match default = true
   561     @param callout default = ignore callouts
   563     @raise Not_found if pattern does not match. *)
   565 val pmatch :
   566   ?iflags : irflag ->
   567   ?flags : rflag list ->
   568   ?rex : regexp ->
   569   ?pat : string ->
   570   ?pos : int ->
   571   ?callout : callout ->
   572   string -> bool
   573 (** [pmatch ?iflags ?flags ?rex ?pat ?pos ?callout subj] @return [true]
   574     if [subj] is matched by pattern [pat] when given, regular expression
   575     [rex] otherwise, starting at position [pos]. Uses [flags] when given,
   576     the precompiled [iflags] otherwise. Callouts are handled by [callout].
   578     @param iflags default = no extra flags
   579     @param flags default = ignored
   580     @param rex default = matches whitespace
   581     @param pat default = ignored
   582     @param pos default = 0
   583     @param callout default = ignore callouts *)
   586 (** {6 String substitution} *)
   588 (** Information on substitution patterns *)
   589 type substitution
   591 val subst : string -> substitution
   592 (** [subst str] converts the string [str] representing a
   593     substitution pattern to the internal representation
   595     The contents of the substitution string [str] can be normal text
   596     mixed with any of the following (mostly as in PERL):
   598     - {e $\[0-9\]+}  - a "$" immediately followed by an arbitrary number.
   599                        "$0" stands for the name of the executable,
   600                        any other number for the n-th backreference.
   601     - {e $&}         - the whole matched pattern
   602     - {e $`}         - the text before the match
   603     - {e $'}         - the text after the match
   604     - {e $+}         - the last group that matched
   605     - {e $$}         - a single "$"
   606     - {e $!}         - delimiter which does not appear in the substitution.
   607                        Can be used to part "$[0-9]+" from an immediately
   608                        following other number. *)
   610 val replace :
   611   ?iflags : irflag ->
   612   ?flags : rflag list ->
   613   ?rex : regexp ->
   614   ?pat : string ->
   615   ?pos : int ->
   616   ?itempl : substitution ->
   617   ?templ : string ->
   618   ?callout : callout ->
   619   string -> string
   620 (** [replace ?iflags ?flags ?rex ?pat ?pos ?itempl ?templ ?callout subj]
   621     replaces all substrings of [subj] matching pattern [pat] when given,
   622     regular expression [rex] otherwise, starting at position [pos] with
   623     the substitution string [templ] when given, [itempl] otherwise. Uses
   624     [flags] when given, the precompiled [iflags] otherwise. Callouts
   625     are handled by [callout].
   627     @param iflags default = no extra flags
   628     @param flags default = ignored
   629     @param rex default = matches whitespace
   630     @param pat default = ignored
   631     @param pos default = 0
   632     @param itempl default = empty string
   633     @param templ default = ignored
   634     @param callout default = ignore callouts
   636     @raise Failure if there are backreferences to nonexistent subpatterns. *)
   638 val qreplace :
   639   ?iflags : irflag ->
   640   ?flags : rflag list ->
   641   ?rex : regexp ->
   642   ?pat : string ->
   643   ?pos : int ->
   644   ?templ : string ->
   645   ?callout : callout ->
   646   string -> string
   647 (** [qreplace ?iflags ?flags ?rex ?pat ?pos ?templ ?callout subj]
   648     replaces all substrings of [subj] matching pattern [pat] when given,
   649     regular expression [rex] otherwise, starting at position [pos]
   650     with the string [templ]. Uses [flags] when given, the precompiled
   651     [iflags] otherwise. Callouts are handled by [callout].
   653     @param iflags default = no extra flags
   654     @param flags default = ignored
   655     @param rex default = matches whitespace
   656     @param pat default = ignored
   657     @param pos default = 0
   658     @param templ default = ignored
   659     @param callout default = ignore callouts *)
   661 val substitute_substrings :
   662   ?iflags : irflag ->
   663   ?flags : rflag list ->
   664   ?rex : regexp ->
   665   ?pat : string ->
   666   ?pos : int ->
   667   ?callout : callout ->
   668   subst : (substrings -> string) ->
   669   string -> string
   670 (** [substitute_substrings ?iflags ?flags ?rex ?pat ?pos ?callout ~subst subj]
   671     replaces all substrings of [subj] matching pattern [pat] when given,
   672     regular expression [rex] otherwise, starting at position [pos]
   673     with the result of function [subst] applied to the substrings
   674     of the match. Uses [flags] when given, the precompiled [iflags]
   675     otherwise. Callouts are handled by [callout].
   677     @param iflags default = no extra flags
   678     @param flags default = ignored
   679     @param rex default = matches whitespace
   680     @param pat default = ignored
   681     @param pos default = 0
   682     @param callout default = ignore callouts *)
   684 val substitute :
   685   ?iflags : irflag ->
   686   ?flags : rflag list ->
   687   ?rex : regexp ->
   688   ?pat : string ->
   689   ?pos : int ->
   690   ?callout : callout ->
   691   subst : (string -> string) ->
   692   string -> string
   693 (** [substitute ?iflags ?flags ?rex ?pat ?pos ?callout ~subst subj]
   694     replaces all substrings of [subj] matching pattern [pat] when given,
   695     regular expression [rex] otherwise, starting at position [pos] with
   696     the result of function [subst] applied to the match. Uses [flags]
   697     when given, the precompiled [iflags] otherwise. Callouts are handled
   698     by [callout].
   700     @param iflags default = no extra flags
   701     @param flags default = ignored
   702     @param rex default = matches whitespace
   703     @param pat default = ignored
   704     @param pos default = 0
   705     @param callout default = ignore callouts *)
   707 val replace_first :
   708   ?iflags : irflag ->
   709   ?flags : rflag list ->
   710   ?rex : regexp ->
   711   ?pat : string ->
   712   ?pos : int ->
   713   ?itempl : substitution ->
   714   ?templ : string ->
   715   ?callout : callout ->
   716   string -> string
   717 (** [replace_first ?iflags ?flags ?rex ?pat ?pos ?itempl ?templ ?callout subj]
   718     replaces the first substring of [subj] matching pattern [pat] when
   719     given, regular expression [rex] otherwise, starting at position
   720     [pos] with the substitution string [templ] when given, [itempl]
   721     otherwise. Uses [flags] when given, the precompiled [iflags]
   722     otherwise. Callouts are handled by [callout].
   724     @param iflags default = no extra flags
   725     @param flags default = ignored
   726     @param rex default = matches whitespace
   727     @param pat default = ignored
   728     @param pos default = 0
   729     @param itempl default = empty string
   730     @param templ default = ignored
   731     @param callout default = ignore callouts
   733     @raise Failure if there are backreferences to nonexistent subpatterns. *)
   735 val qreplace_first :
   736   ?iflags : irflag ->
   737   ?flags : rflag list ->
   738   ?rex : regexp ->
   739   ?pat : string ->
   740   ?pos : int ->
   741   ?templ : string ->
   742   ?callout : callout ->
   743   string -> string
   744 (** [qreplace_first ?iflags ?flags ?rex ?pat ?pos ?templ ?callout subj]
   745     replaces the first substring of [subj] matching pattern [pat] when
   746     given, regular expression [rex] otherwise, starting at position [pos]
   747     with the string [templ]. Uses [flags] when given, the precompiled
   748     [iflags] otherwise. Callouts are handled by [callout].
   750     @param iflags default = no extra flags
   751     @param flags default = ignored
   752     @param rex default = matches whitespace
   753     @param pat default = ignored
   754     @param pos default = 0
   755     @param templ default = ignored
   756     @param callout default = ignore callouts *)
   758 val substitute_substrings_first :
   759   ?iflags : irflag ->
   760   ?flags : rflag list ->
   761   ?rex : regexp ->
   762   ?pat : string ->
   763   ?pos : int ->
   764   ?callout : callout ->
   765   subst : (substrings -> string) ->
   766   string -> string
   767 (** [substitute_substrings_first
   768        ?iflags ?flags ?rex ?pat ?pos ?callout ~subst subj]
   769     replaces the first substring of [subj] matching pattern [pat] when
   770     given, regular expression [rex] otherwise, starting at position
   771     [pos] with the result of function [subst] applied to the substrings
   772     of the match. Uses [flags] when given, the precompiled [iflags]
   773     otherwise. Callouts are handled by [callout].
   775     @param iflags default = no extra flags
   776     @param flags default = ignored
   777     @param rex default = matches whitespace
   778     @param pat default = ignored
   779     @param pos default = 0
   780     @param callout default = ignore callouts *)
   782 val substitute_first :
   783   ?iflags : irflag ->
   784   ?flags : rflag list ->
   785   ?rex : regexp ->
   786   ?pat : string ->
   787   ?pos : int ->
   788   ?callout : callout ->
   789   subst : (string -> string) ->
   790   string -> string
   791 (** [substitute_first ?iflags ?flags ?rex ?pat ?pos ?callout ~subst subj]
   792     replaces the first substring of [subj] matching pattern [pat] when
   793     given, regular expression [rex] otherwise, starting at position
   794     [pos] with the result of function [subst] applied to the match. Uses
   795     [flags] when given, the precompiled [iflags] otherwise. Callouts
   796     are handled by [callout].
   798     @param iflags default = no extra flags
   799     @param flags default = ignored
   800     @param rex default = matches whitespace
   801     @param pat default = ignored
   802     @param pos default = 0
   803     @param callout default = ignore callouts *)
   806 (** {6 Splitting} *)
   808 val split :
   809   ?iflags : irflag ->
   810   ?flags : rflag list ->
   811   ?rex : regexp ->
   812   ?pat : string ->
   813   ?pos : int ->
   814   ?max : int ->
   815   ?callout : callout ->
   816   string -> string list
   817 (** [split ?iflags ?flags ?rex ?pat ?pos ?max ?callout subj] splits [subj]
   818     into a list of at most [max] strings, using as delimiter pattern
   819     [pat] when given, regular expression [rex] otherwise, starting at
   820     position [pos]. Uses [flags] when given, the precompiled [iflags]
   821     otherwise. If [max] is zero, trailing empty fields are stripped. If
   822     it is negative, it is treated as arbitrarily large. If neither [pat]
   823     nor [rex] are specified, leading whitespace will be stripped! Should
   824     behave exactly as in PERL. Callouts are handled by [callout].
   826     @param iflags default = no extra flags
   827     @param flags default = ignored
   828     @param rex default = matches whitespace
   829     @param pat default = ignored
   830     @param pos default = 0
   831     @param max default = 0
   832     @param callout default = ignore callouts *)
   834 val asplit :
   835   ?iflags : irflag ->
   836   ?flags : rflag list ->
   837   ?rex : regexp ->
   838   ?pat : string ->
   839   ?pos : int ->
   840   ?max : int ->
   841   ?callout : callout ->
   842   string -> string array
   843 (** [asplit ?iflags ?flags ?rex ?pat ?pos ?max ?callout subj] same as
   844     {!Pcre.split} but @return an array instead of a list. *)
   846 (** Result of a {!Pcre.full_split} *)
   847 type split_result = Text of string        (** Text part of splitted string *)
   848                   | Delim of string       (** Delimiter part of splitted
   849                                               string *)
   850                   | Group of int * string (** Subgroup of matched delimiter
   851                                               (subgroup_nr, subgroup_str) *)
   852                   | NoGroup               (** Unmatched subgroup *)
   854 val full_split :
   855   ?iflags : irflag ->
   856   ?flags : rflag list ->
   857   ?rex : regexp ->
   858   ?pat : string ->
   859   ?pos : int ->
   860   ?max : int ->
   861   ?callout : callout ->
   862   string -> split_result list
   863 (** [full_split ?iflags ?flags ?rex ?pat ?pos ?max ?callout subj] splits
   864     [subj] into a list of at most [max] elements of type "split_result",
   865     using as delimiter pattern [pat] when given, regular expression
   866     [rex] otherwise, starting at position [pos]. Uses [flags] when given,
   867     the precompiled [iflags] otherwise. If [max] is zero, trailing empty
   868     fields are stripped. If it is negative, it is treated as arbitrarily
   869     large. Should behave exactly as in PERL. Callouts are handled by
   870     [callout].
   872     @param iflags default = no extra flags
   873     @param flags default = ignored
   874     @param rex default = matches whitespace
   875     @param pat default = ignored
   876     @param pos default = 0
   877     @param max default = 0
   878     @param callout default = ignore callouts *)
   881 (** {6 Additional convenience functions} *)
   883 val foreach_line :
   884   ?ic : in_channel ->
   885   (string -> unit) -> unit
   886 (** [foreach_line ?ic f] applies [f] to each line in inchannel [ic] until
   887     the end-of-file is reached.
   889     @param ic default = stdin *)
   891 val foreach_file : string list -> (string -> in_channel -> unit) -> unit
   892 (** [foreach_file filenames f] opens each file in the list [filenames]
   893     for input and applies [f] to each filename and the corresponding
   894     channel. Channels are closed after each operation (even when
   895     exceptions occur - they get reraised afterwards!). *)
   898 (** {6 {b UNSAFE STUFF - USE WITH CAUTION!}} *)
   900 external unsafe_pcre_exec :
   901   irflag -> regexp -> int -> string ->
   902   int -> int array -> callout option
   903   -> unit = "pcre_exec_stub_bc" "pcre_exec_stub"
   904 (** [unsafe_pcre_exec flags rex pos subject subgroup_offsets offset_vector].
   905     You should read the C-source to know what happens.
   906     If you do not understand it - {b don't use this function!} *)
   908 val make_ovector : regexp -> int * int array
   909 (** [make_ovector regexp] calculates the tuple (subgroups2, ovector)
   910     which is the number of subgroup offsets and the offset array. *)

mercurial