module String: Core_string
typet =
string
val typerep_of_t : t Typerep_lib.Std.Typerep.t
val typename_of_t : t Typerep_lib.Std.Typename.t
module Caseless:sig
..end
Caseless
compares and hashes strings ignoring case, so that for example
Caseless.equal "OCaml" "ocaml"
and Caseless.("apple" < "Banana")
are true
, and
Caseless.Map
, Caseless.Table
lookup and Caseless.Set
membership is
case-insensitive.
include Blit.S
include Container.S0
include Identifiable.S
val max_length : int
val length : t -> int
val get : t -> int -> char
val set : t -> int -> char -> unit
val create : int -> t
val make : int -> char -> t
val copy : t -> t
val init : int -> f:(int -> char) -> t
val fill : t -> pos:int -> len:int -> char -> unit
val (^) : t -> t -> t
Note that a ^ b
must copy both a
and b
into a newly-allocated result string, so
a ^ b ^ c ^ ... ^ z
is quadratic in the number of strings. String.concat
does not
have this problem -- it allocates the result buffer only once. The Rope
module
provides a data structure which uses a similar trick to achieve fast concatenation at
either end of a string.
val concat : ?sep:t -> t list -> t
sep
(default sep "")val escaped : t -> t
val contains : ?pos:int -> ?len:int -> t -> char -> bool
val uppercase : t -> t
val lowercase : t -> t
val capitalize : t -> t
val uncapitalize : t -> t
val index : t -> char -> int option
val index_exn : t -> char -> int
val rindex : t -> char -> int option
val rindex_exn : t -> char -> int
val index_from : t -> int -> char -> int option
val index_from_exn : t -> int -> char -> int
val rindex_from : t -> int -> char -> int option
val rindex_from_exn : t -> int -> char -> int
module Search_pattern:sig
..end
val substr_index : ?pos:int -> t -> pattern:t -> int option
Search_pattern.create
and then forget the preprocessed pattern when the search is complete. pos < 0
or
pos >= length t
result in no match (hence substr_index
returns None
and
substr_index_exn
raises). may_overlap
indicates whether to report overlapping
matches, see Search_pattern.index_all
.val substr_index_exn : ?pos:int -> t -> pattern:t -> int
val substr_index_all : t -> may_overlap:bool -> pattern:t -> int list
val substr_replace_first : ?pos:int ->
t ->
pattern:t -> with_:t -> t
Search_pattern.replace_all
, the result may still contain pattern
.val substr_replace_all : t ->
pattern:t -> with_:t -> t
val is_substring : t -> substring:t -> bool
is_substring ~substring:"bar" "foo bar baz"
is trueval slice : t -> int -> int -> t
slice s start stop
gets a slice of s
between start
and stop
.
start
and stop
will be normalized before the access.
(viz. Core_array.normalize).val to_list_rev : t -> char list
val rev : t -> t
rev t
returns t
in reverse order.val nget : t -> int -> char
nget s i
Gets the char at normalized position i
in s
.val nset : t -> int -> char -> unit
nset s i c
Sets the char at normalized position i
to c
.val is_suffix : t -> suffix:t -> bool
is_suffix s ~suffix
returns true
if s
ends with suffix
.val is_prefix : t -> prefix:t -> bool
is_prefix s ~prefix
returns true
if s
starts with prefix
.val lsplit2_exn : t -> on:char -> t * t
s
contains the character on
, then lsplit2_exn
s ~on
returns a pair containing s
split around the first
appearance of on
(from the left).Not_found
When on
cannot be found in s
val rsplit2_exn : t -> on:char -> t * t
s
contains the character on
, then rsplit2_exn
s ~on
returns a pair containing s
split around the first
appearance of on
(from the right).Not_found
When on
cannot be found in s
val lsplit2 : t -> on:char -> (t * t) option
lsplit2 line ~on
optionally returns line
split into two strings around the
first appearance of on
from the leftval rsplit2 : t -> on:char -> (t * t) option
rsplit2 line ~on
optionally returns line
split into two strings around the
first appearance of on
from the rightval split : t -> on:char -> t list
split s ~on
s
that are separated by
on
. Consecutive on
characters will cause multiple empty strings
in the result. Splitting the empty string returns a list of the empty
string, not the empty list.val split_on_chars : t -> on:char list -> t list
split_on_chars s ~on
s
that are separated by one of the chars from on
. on
are not grouped. So a grouping of on
in the source string will
produce multiple empty string splits in the result.val split_lines : t -> t list
split_lines t
returns the list of lines that comprise t
. The lines do
not include the trailing "\n"
or "\r\n"
.val lfindi : ?pos:int -> t -> f:(int -> char -> bool) -> int option
lfindi ?pos t ~f
returns the smallest i >= pos
such that f i t.[i]
, if there is
such an i
. By default, pos = 0
.val rfindi : ?pos:int -> t -> f:(int -> char -> bool) -> int option
rfindi ?pos t ~f
returns the largest i <= pos
such that f i t.[i]
, if there is
such an i
. By default pos = length t - 1
.val lstrip : ?drop:(char -> bool) -> t -> t
lstrip ?drop s
returns a string with consecutive chars satisfying drop
(by default
white space, e.g. tabs, spaces, newlines, and carriage returns) stripped from the
beginning of s
.val rstrip : ?drop:(char -> bool) -> t -> t
rstrip ?drop s
returns a string with consecutive chars satisfying drop
(by default
white space, e.g. tabs, spaces, newlines, and carriage returns) stripped from the end
of s
.val strip : ?drop:(char -> bool) -> t -> t
strip ?drop s
returns a string with consecutive chars satisfying drop
(by default
white space, e.g. tabs, spaces, newlines, and carriage returns) stripped from the
beginning and end of s
.val map : t -> f:(char -> char) -> t
map f s
applies f
to each character in s
, and returns the
resulting string.val mapi : t -> f:(int -> char -> char) -> t
mapi f s
applies f
to each character in s
and its index, and returns the
resulting string.val foldi : t -> init:'a -> f:(int -> 'a -> char -> 'a) -> 'a
foldi
works similarly to fold
, but also pass in index of each character to f
val concat_map : ?sep:t ->
t -> f:(char -> t) -> t
map
, but allows replacement of a single character with zero or two or more
characters.val filter : t -> f:(char -> bool) -> t
filter s ~f:predicate
discards characters not satisfying predicate
val tr : target:char -> replacement:char -> t -> t
tr target replacement s
replaces every instance of target
in s
with
replacement
.val tr_inplace : target:char -> replacement:char -> t -> unit
tr_inplace target replacement s
destructively modifies s (in place!)
replacing every instance of target
in s
with replacement
.val chop_suffix_exn : t -> suffix:t -> t
chop_suffix s ~suf
returns a copy s
without the trailing suff
Invalid_argument
is suff
is not a suffix of s
val chop_prefix_exn : t -> prefix:t -> t
chop_prefix s ~pref
returns a copy s
without the leading pref
Invalid_argument
is pref
is not a prefix of s
val chop_suffix : t -> suffix:t -> t option
val chop_prefix : t -> prefix:t -> t option
val suffix : t -> int -> t
suffix s n
returns the longest suffix of s
of length less than or equal to n
val prefix : t -> int -> t
prefix s n
returns the longest prefix of s
of length less than or equal to n
val drop_suffix : t -> int -> t
drop_suffix s n
drops the longest suffix of s
of length less than or equal to n
val drop_prefix : t -> int -> t
drop_prefix s n
drops the longest prefix of s
of length less than or equal to n
val concat_array : ?sep:t -> t array -> t
concat_array sep ar
like String.concat
, but operates on arraysval hash : t -> int
val equal : t -> t -> bool
val is_empty : t -> bool
is_empty s
returns true
iff s
is empty (i.e. its length is 0).module Infix:sig
..end
val of_char : char -> t
val of_char_list : char list -> t
module Escaping:sig
..end
val unsafe_get : string -> int -> char
val unsafe_set : string -> int -> char -> unit
val t_of_sexp : Sexplib.Sexp.t -> t
val sexp_of_t : t -> Sexplib.Sexp.t
val bin_t : t Bin_prot.Type_class.t
val bin_read_t : t Bin_prot.Read.reader
val __bin_read_t__ : (int -> t) Bin_prot.Read.reader
val bin_reader_t : t Bin_prot.Type_class.reader
val bin_size_t : t Bin_prot.Size.sizer
val bin_write_t : t Bin_prot.Write.writer
val bin_writer_t : t Bin_prot.Type_class.writer
Caseless
compares and hashes strings ignoring case, so that for example
Caseless.equal "OCaml" "ocaml"
and Caseless.("apple" < "Banana")
are true
, and
Caseless.Map
, Caseless.Table
lookup and Caseless.Set
membership is
case-insensitive.
Note that a ^ b
must copy both a
and b
into a newly-allocated result string, so
a ^ b ^ c ^ ... ^ z
is quadratic in the number of strings. String.concat
does not
have this problem -- it allocates the result buffer only once. The Rope
module
provides a data structure which uses a similar trick to achieve fast concatenation at
either end of a string.
concatanate all strings in the list using separator sep
(default sep "")
Warning: Only returns a copy if changes are necessary! Special characters are
represented by escape sequences, following the lexical conventions of Objective
Caml.
Substring search and replace functions. They use the Knuth-Morris-Pratt algorithm
(KMP) under the hood.
The functions in the Search_pattern
module allow the program to preprocess the
searched pattern once and then use it many times without further allocations.
create pattern
preprocesses pattern
as per KMP, building an int array
of
length length pattern
. All inputs are valid.
pos < 0
or pos >= length string
result in no match (hence index
returns
None
and index_exn
raises).
may_overlap
determines whether after a successful match, index_all
should start
looking for another one at the very next position (~may_overlap:true
), or jump to
the end of that match and continue from there (~may_overlap:false
), e.g.:
index_all (create "aaa") ~may_overlap:false ~in_:"aaaaBaaaaaa" = [0; 5; 8]
index_all (create "aaa") ~may_overlap:true ~in_:"aaaaBaaaaaa" = [0; 1; 5; 6; 7; 8]
E.g. replace_all
internally calls index_all ~may_overlap:false
.
Note that the result of replace_all pattern ~in_:text ~with_:r
may still
contain pattern
, e.g.
replace_all (create "bc") ~in:"aabbcc" ~with_:"cb" = "aabcbc"
Search_pattern.create
and then forget the preprocessed pattern when the search is complete. pos < 0
or
pos >= length t
result in no match (hence substr_index
returns None
and
substr_index_exn
raises). may_overlap
indicates whether to report overlapping
matches, see Search_pattern.index_all
.Search_pattern.replace_all
, the result may still contain pattern
.is_substring ~substring:"bar" "foo bar baz"
is trueslice s start stop
gets a slice of s
between start
and stop
.
start
and stop
will be normalized before the access.
(viz. Core_array.normalize).rev t
returns t
in reverse order.nget s i
Gets the char at normalized position i
in s
.nset s i c
Sets the char at normalized position i
to c
.is_suffix s ~suffix
returns true
if s
ends with suffix
.is_prefix s ~prefix
returns true
if s
starts with prefix
.s
contains the character on
, then lsplit2_exn
s ~on
returns a pair containing s
split around the first
appearance of on
(from the left).s
contains the character on
, then rsplit2_exn
s ~on
returns a pair containing s
split around the first
appearance of on
(from the right).lsplit2 line ~on
optionally returns line
split into two strings around the
first appearance of on
from the leftrsplit2 line ~on
optionally returns line
split into two strings around the
first appearance of on
from the rightsplit s ~on
split_on_chars s ~on
split_lines t
returns the list of lines that comprise t
. The lines do
not include the trailing "\n"
or "\r\n"
.lfindi ?pos t ~f
returns the smallest i >= pos
such that f i t.[i]
, if there is
such an i
. By default, pos = 0
.rfindi ?pos t ~f
returns the largest i <= pos
such that f i t.[i]
, if there is
such an i
. By default pos = length t - 1
.lstrip ?drop s
returns a string with consecutive chars satisfying drop
(by default
white space, e.g. tabs, spaces, newlines, and carriage returns) stripped from the
beginning of s
.rstrip ?drop s
returns a string with consecutive chars satisfying drop
(by default
white space, e.g. tabs, spaces, newlines, and carriage returns) stripped from the end
of s
.strip ?drop s
returns a string with consecutive chars satisfying drop
(by default
white space, e.g. tabs, spaces, newlines, and carriage returns) stripped from the
beginning and end of s
.map f s
applies f
to each character in s
, and returns the
resulting string.mapi f s
applies f
to each character in s
and its index, and returns the
resulting string.foldi
works similarly to fold
, but also pass in index of each character to f
map
, but allows replacement of a single character with zero or two or more
characters.filter s ~f:predicate
discards characters not satisfying predicate
tr target replacement s
replaces every instance of target
in s
with
replacement
.tr_inplace target replacement s
destructively modifies s (in place!)
replacing every instance of target
in s
with replacement
.chop_suffix s ~suf
returns a copy s
without the trailing suff
chop_prefix s ~pref
returns a copy s
without the leading pref
suffix s n
returns the longest suffix of s
of length less than or equal to n
prefix s n
returns the longest prefix of s
of length less than or equal to n
drop_suffix s n
drops the longest suffix of s
of length less than or equal to n
drop_prefix s n
drops the longest prefix of s
of length less than or equal to n
concat_array sep ar
like String.concat
, but operates on arraysis_empty s
returns true
iff s
is empty (i.e. its length is 0).escape_gen_exn escapeworthy_map escape_char
returns a function that will escape a
string s
as follows: if (c1,c2)
is in escapeworthy_map
, then all occurences of
c1
are replaced by escape_char
concatenated to c2
.
Raises an exception if escapeworthy_map
is not one-to-one. If escape_char
is
not in escapeworthy_map
, then it will be escaped to itself.
escape ~escapeworthy ~escape_char s
is
escape_gen_exn ~escapeworthy_map:(List.zip_exn escapeworthy escapeworthy)
~escape_char
.
Duplicates and escape_char
will be removed from escapeworthy
. So, no
exception will be raisedunescape_gen_exn
is the inverse operation of escape_gen_exn
. That is,
let escape = Staged.unstage (escape_gen_exn ~escapeworthy_map ~escape_char) in
let unescape = Staged.unstage (unescape_gen_exn ~escapeworthy_map ~escape_char) in
assert (s = unescape (escape s))
always succeed when ~escapeworthy_map is not causing exceptions.unescape ~escape_char
is defined as unescape_gen_exn ~map:[] ~escape_char
is_char_escaping s ~escape_char pos
return true if the char at pos
is escaping,
false otherwise.
is_char_escaped s ~escape_char pos
return true if the char at pos
is escaped,
false otherwise.
is_literal s ~escape_char pos
return true if the char at pos
is not escaped or
escaping.
index s ~escape_char char
find the first literal (not escaped) instance of
char in s starting from 0.
rindex s ~escape_char char
find the first literal (not escaped) instance of
char in s starting from the end of s and proceeding towards 0.
index_from s ~escape_char pos char
find the first literal (not escaped)
instance of char in s starting from pos and proceeding towards the end of s.
rindex_from s ~escape_char pos char
find the first literal (not escaped)
instance of char in s starting from pos and towards 0.
split s ~escape_char ~on
split_on_chars s ~on
lsplit2 s on escape_char
splits s into a pair on the first literal instance of
on
(meaning the first unescaped instance) starting from the left.
rsplit2 s on escape_char
splits s
into a pair on the first literal instance of
on
(meaning the first unescaped instance) starting from the right.