From c9469311c35f1cd24871682d343a86bea7491962 Mon Sep 17 00:00:00 2001
From: Paul Guyot <pguyot@kallisys.net>
Date: Sun, 29 Sep 2024 12:21:18 +0200
Subject: [PATCH] Add `binary:split/3` and `string:find/2,3`

Also update `string:split/2,3` to work with unicode binaries and fix specs.

Signed-off-by: Paul Guyot <pguyot@kallisys.net>
---
 CHANGELOG.md                       |   1 +
 libs/estdlib/src/binary.erl        |  18 ++++-
 libs/estdlib/src/string.erl        | 102 +++++++++++++++++++++++++---
 src/libAtomVM/defaultatoms.c       |   4 ++
 src/libAtomVM/defaultatoms.h       |   6 +-
 src/libAtomVM/nifs.c               | 104 +++++++++++++++++++++--------
 src/libAtomVM/nifs.gperf           |   1 +
 tests/libs/estdlib/CMakeLists.txt  |   1 +
 tests/libs/estdlib/test_binary.erl |  36 ++++++++++
 tests/libs/estdlib/test_string.erl |  31 +++++++++
 10 files changed, 266 insertions(+), 38 deletions(-)
 create mode 100644 tests/libs/estdlib/test_binary.erl

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 7bffe7a1d..9ee4586a5 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -27,6 +27,7 @@ also non string parameters (e.g. `Enum.join([1, 2], ",")`
 - Support for `code:ensure_loaded/1`
 - Support for `io_lib:latin1_char_list/1`
 - Add support to Elixir for `Keyword.split/2`
+- Support for `binary:split/3` and `string:find/2,3`
 
 ### Changed
 
diff --git a/libs/estdlib/src/binary.erl b/libs/estdlib/src/binary.erl
index cb93f9044..2254232c4 100644
--- a/libs/estdlib/src/binary.erl
+++ b/libs/estdlib/src/binary.erl
@@ -24,7 +24,7 @@
 %%-----------------------------------------------------------------------------
 -module(binary).
 
--export([at/2, part/3, split/2]).
+-export([at/2, part/3, split/2, split/3]).
 
 %%-----------------------------------------------------------------------------
 %% @param   Binary binary to get a byte from
@@ -51,6 +51,7 @@ part(_Binary, _Pos, _Len) ->
     erlang:nif_error(undefined).
 
 %%-----------------------------------------------------------------------------
+%% @equiv split(Binary, Pattern, [])
 %% @param   Binary  binary to split
 %% @param   Pattern pattern to perform the split
 %% @return a list composed of one or two binaries
@@ -62,3 +63,18 @@ part(_Binary, _Pos, _Len) ->
 -spec split(Binary :: binary(), Pattern :: binary()) -> [binary()].
 split(_Binary, _Pattern) ->
     erlang:nif_error(undefined).
+
+%%-----------------------------------------------------------------------------
+%% @param   Binary  binary to split
+%% @param   Pattern pattern to perform the split
+%% @param   Options options for the split
+%% @return a list composed of one or two binaries
+%% @doc Split a binary according to pattern.
+%% If pattern is not found, returns a singleton list with the passed binary.
+%% Unlike Erlang/OTP, pattern must be a binary.
+%% Only implemented option is `global'
+%% @end
+%%-----------------------------------------------------------------------------
+-spec split(Binary :: binary(), Pattern :: binary(), Option :: [global]) -> [binary()].
+split(_Binary, _Pattern, _Option) ->
+    erlang:nif_error(undefined).
diff --git a/libs/estdlib/src/string.erl b/libs/estdlib/src/string.erl
index ac701f207..63ee7a896 100644
--- a/libs/estdlib/src/string.erl
+++ b/libs/estdlib/src/string.erl
@@ -27,7 +27,7 @@
 %%-----------------------------------------------------------------------------
 -module(string).
 
--export([to_upper/1, to_lower/1, split/2, split/3, trim/1, trim/2]).
+-export([to_upper/1, to_lower/1, split/2, split/3, trim/1, trim/2, find/2, find/3]).
 
 %%-----------------------------------------------------------------------------
 %% @param Input a string or character to convert
@@ -76,7 +76,7 @@ lower_char(C) when is_integer(C) ->
 %% @returns chardata
 %% @end
 %%-----------------------------------------------------------------------------
--spec split(String :: string(), Pattern :: string()) -> string() | char().
+-spec split(String :: unicode:chardata(), Pattern :: unicode:chardata()) -> [unicode:chardata()].
 split(String, Pattern) ->
     split(String, Pattern, leading).
 
@@ -98,25 +98,50 @@ split(String, Pattern) ->
 %% [<<"ab">>,<<"bc">>,<<>>,<<"cd">>]'''
 %% @end
 %%-----------------------------------------------------------------------------
--spec split(String :: string(), Pattern :: string() | char(), Where :: atom()) -> string() | char().
-split(String, Pattern, Where) ->
-    split(String, Pattern, Where, [], []).
+-spec split(
+    String :: unicode:chardata(), Pattern :: unicode:chardata(), Where :: leading | trailing | all
+) -> [unicode:chardata()].
+split(String, Pattern, Where) when is_binary(String) andalso is_list(Pattern) ->
+    split_binary(String, unicode:characters_to_binary(Pattern), Where);
+split(String, Pattern, Where) when is_binary(String) andalso is_binary(Pattern) ->
+    split_binary(String, Pattern, Where);
+split(String, Pattern, Where) when is_list(String) andalso is_binary(Pattern) ->
+    split_list(String, unicode:characters_to_list(Pattern), Where);
+split(String, Pattern, Where) when is_list(String) andalso is_list(Pattern) ->
+    split_list(String, Pattern, Where).
 
 %% @private
-split([], _Pattern, _Where, Token, Accum) ->
+split_binary(String, Pattern, leading) ->
+    binary:split(String, Pattern);
+split_binary(String, Pattern, all) ->
+    binary:split(String, Pattern, [global]);
+split_binary(String, Pattern, trailing) ->
+    case find_binary(String, Pattern, trailing) of
+        nomatch ->
+            [String];
+        Rest ->
+            [binary:part(String, 0, byte_size(String) - byte_size(Rest) - byte_size(Pattern)), Rest]
+    end.
+
+%% @private
+split_list(String, Pattern, Where) ->
+    split_list(String, Pattern, Where, [], []).
+
+%% @private
+split_list([], _Pattern, _Where, Token, Accum) ->
     lists:reverse([lists:reverse(Token) | Accum]);
-split(String, Pattern, Where, Token, Accum) ->
+split_list(String, Pattern, Where, Token, Accum) ->
     case prefix_match(String, Pattern) of
         {ok, Rest} ->
             case Where of
                 leading ->
                     [lists:reverse(Token), Rest];
                 all ->
-                    split(Rest, Pattern, Where, [], [lists:reverse(Token) | Accum])
+                    split_list(Rest, Pattern, Where, [], [lists:reverse(Token) | Accum])
             end;
         no ->
             [Char | Rest] = String,
-            split(Rest, Pattern, Where, [Char | Token], Accum)
+            split_list(Rest, Pattern, Where, [Char | Token], Accum)
     end.
 
 %% @private
@@ -167,3 +192,62 @@ triml([$\s | R]) ->
     triml(R);
 triml(R) ->
     R.
+
+%%-----------------------------------------------------------------------------
+%% @equiv find(String, SearchPattern, leading)
+%% @param String string to search in
+%% @param SearchPattern pattern to search
+%% @returns remainder of String starting from first occurrence of SearchPattern
+%% or `nomatch' if SearchPattern cannot be found in String
+%% @end
+%%-----------------------------------------------------------------------------
+-spec find(String :: unicode:chardata(), SearchPattern :: unicode:chardata()) ->
+    unicode:chardata() | nomatch.
+find(String, SearchPattern) ->
+    find(String, SearchPattern, leading).
+
+%%-----------------------------------------------------------------------------
+%% @param String string to search in
+%% @param SearchPattern pattern to search
+%% @param Direction direction to search, `leading' or `trailing'
+%% @returns remainder of String starting from first or last occurrence of
+%% SearchPattern or `nomatch' if SearchPattern cannot be found in String
+%% @end
+%%-----------------------------------------------------------------------------
+-spec find(
+    String :: unicode:chardata(),
+    SearchPattern :: unicode:chardata(),
+    Direction :: leading | trailing
+) -> unicode:chardata() | nomatch.
+find(String, "", _Direction) ->
+    String;
+find(String, <<>>, _Direction) ->
+    String;
+find(String, SearchPattern, Direction) when is_binary(String) andalso is_list(SearchPattern) ->
+    find_binary(String, unicode:characters_to_binary(SearchPattern), Direction);
+find(String, SearchPattern, Direction) when is_binary(String) andalso is_binary(SearchPattern) ->
+    find_binary(String, SearchPattern, Direction);
+find(String, SearchPattern, Direction) when is_list(String) andalso is_binary(SearchPattern) ->
+    find_list(String, unicode:characters_to_list(SearchPattern), Direction);
+find(String, SearchPattern, Direction) when is_list(String) andalso is_list(SearchPattern) ->
+    find_list(String, SearchPattern, Direction).
+
+%% @private
+find_binary(<<_C, Rest/binary>> = String, SearchPattern, leading) when
+    byte_size(String) >= byte_size(SearchPattern)
+->
+    case binary:part(String, 0, byte_size(SearchPattern)) =:= SearchPattern of
+        true -> String;
+        false -> find_binary(Rest, SearchPattern, leading)
+    end;
+find_binary(_Sring, _SearchPattern, leading) ->
+    nomatch.
+
+%% @private
+find_list([_C | Rest] = String, SearchPattern, leading) ->
+    case prefix_match(String, SearchPattern) of
+        {ok, _Rest} -> String;
+        no -> find_list(Rest, SearchPattern, leading)
+    end;
+find_list([], _SearchPattern, leading) ->
+    nomatch.
diff --git a/src/libAtomVM/defaultatoms.c b/src/libAtomVM/defaultatoms.c
index a8645adfd..f10d24293 100644
--- a/src/libAtomVM/defaultatoms.c
+++ b/src/libAtomVM/defaultatoms.c
@@ -160,6 +160,8 @@ static const char *const cast_atom = "\x5" "$cast";
 
 static const char *const unicode_atom = "\x7" "unicode";
 
+static const char *const global_atom = "\x6" "global";
+
 void defaultatoms_init(GlobalContext *glb)
 {
     int ok = 1;
@@ -304,6 +306,8 @@ void defaultatoms_init(GlobalContext *glb)
 
     ok &= globalcontext_insert_atom(glb, unicode_atom) == UNICODE_ATOM_INDEX;
 
+    ok &= globalcontext_insert_atom(glb, global_atom) == GLOBAL_ATOM_INDEX;
+
     if (!ok) {
         AVM_ABORT();
     }
diff --git a/src/libAtomVM/defaultatoms.h b/src/libAtomVM/defaultatoms.h
index eb0a4ff2c..285ee4f0e 100644
--- a/src/libAtomVM/defaultatoms.h
+++ b/src/libAtomVM/defaultatoms.h
@@ -169,7 +169,9 @@ extern "C" {
 
 #define UNICODE_ATOM_INDEX 110
 
-#define PLATFORM_ATOMS_BASE_INDEX 111
+#define GLOBAL_ATOM_INDEX 111
+
+#define PLATFORM_ATOMS_BASE_INDEX 112
 
 #define FALSE_ATOM TERM_FROM_ATOM_INDEX(FALSE_ATOM_INDEX)
 #define TRUE_ATOM TERM_FROM_ATOM_INDEX(TRUE_ATOM_INDEX)
@@ -313,6 +315,8 @@ extern "C" {
 
 #define UNICODE_ATOM TERM_FROM_ATOM_INDEX(UNICODE_ATOM_INDEX)
 
+#define GLOBAL_ATOM TERM_FROM_ATOM_INDEX(GLOBAL_ATOM_INDEX)
+
 void defaultatoms_init(GlobalContext *glb);
 
 void platform_defaultatoms_init(GlobalContext *glb);
diff --git a/src/libAtomVM/nifs.c b/src/libAtomVM/nifs.c
index 9a4732254..ea202c77f 100644
--- a/src/libAtomVM/nifs.c
+++ b/src/libAtomVM/nifs.c
@@ -91,7 +91,7 @@ static term nif_binary_at_2(Context *ctx, int argc, term argv[]);
 static term nif_binary_first_1(Context *ctx, int argc, term argv[]);
 static term nif_binary_last_1(Context *ctx, int argc, term argv[]);
 static term nif_binary_part_3(Context *ctx, int argc, term argv[]);
-static term nif_binary_split_2(Context *ctx, int argc, term argv[]);
+static term nif_binary_split(Context *ctx, int argc, term argv[]);
 static term nif_calendar_system_time_to_universal_time_2(Context *ctx, int argc, term argv[]);
 static term nif_erlang_delete_element_2(Context *ctx, int argc, term argv[]);
 static term nif_erlang_atom_to_binary(Context *ctx, int argc, term argv[]);
@@ -232,7 +232,7 @@ static const struct Nif binary_part_nif =
 static const struct Nif binary_split_nif =
 {
     .base.type = NIFFunctionType,
-    .nif_ptr = nif_binary_split_2
+    .nif_ptr = nif_binary_split
 };
 
 static const struct Nif make_ref_nif =
@@ -3007,16 +3007,33 @@ static term nif_binary_part_3(Context *ctx, int argc, term argv[])
     return term_maybe_create_sub_binary(bin_term, pos, len, &ctx->heap, ctx->global);
 }
 
-static term nif_binary_split_2(Context *ctx, int argc, term argv[])
+static term nif_binary_split(Context *ctx, int argc, term argv[])
 {
-    UNUSED(argc);
-
     term bin_term = argv[0];
     term pattern_term = argv[1];
 
     VALIDATE_VALUE(bin_term, term_is_binary);
     VALIDATE_VALUE(pattern_term, term_is_binary);
 
+    bool global = false;
+    if (argc == 3) {
+        term options = argv[2];
+        if (UNLIKELY(!term_is_list(options))) {
+            RAISE_ERROR(BADARG_ATOM);
+        }
+        if (term_is_nonempty_list(options)) {
+            term head = term_get_list_head(options);
+            term tail = term_get_list_tail(options);
+            if (UNLIKELY(head != GLOBAL_ATOM)) {
+                RAISE_ERROR(BADARG_ATOM);
+            }
+            if (UNLIKELY(!term_is_nil(tail))) {
+                RAISE_ERROR(BADARG_ATOM);
+            }
+            global = true;
+        }
+    }
+
     int bin_size = term_binary_size(bin_term);
     int pattern_size = term_binary_size(pattern_term);
 
@@ -3027,38 +3044,71 @@ static term nif_binary_split_2(Context *ctx, int argc, term argv[])
     const char *bin_data = term_binary_data(bin_term);
     const char *pattern_data = term_binary_data(pattern_term);
 
-    const char *found = (const char *) memmem(bin_data, bin_size, pattern_data, pattern_size);
+    // Count segments first to allocate memory once.
+    size_t num_segments = 1;
+    const char *temp_bin_data = bin_data;
+    int temp_bin_size = bin_size;
+    do {
+        const char *found = (const char *) memmem(temp_bin_data, temp_bin_size, pattern_data, pattern_size);
+        if (!found) break;
+        num_segments++;
+        int next_search_offset = found - temp_bin_data + pattern_size;
+        temp_bin_data += next_search_offset;
+        temp_bin_size -= next_search_offset;
+    } while (global && temp_bin_size >= pattern_size);
+
+    term result_list = term_nil();
+
+    if (num_segments == 1) {
+        // not found
+        if (UNLIKELY(memory_ensure_free_with_roots(ctx, 2, 1, argv, MEMORY_CAN_SHRINK) != MEMORY_GC_OK)) {
+            RAISE_ERROR(OUT_OF_MEMORY_ATOM);
+        }
 
-    int offset = found - bin_data;
+        return term_list_prepend(argv[0], result_list, &ctx->heap);
+    }
 
-    if (found) {
-        int tok_size = offset;
-        size_t tok_size_in_terms = term_sub_binary_heap_size(bin_term, tok_size);
+    // binary:split/2,3 always return sub binaries, except when copied binaries are as small as sub-binaries.
+    if (UNLIKELY(memory_ensure_free_with_roots(ctx, LIST_SIZE(num_segments, TERM_BOXED_SUB_BINARY_SIZE), 2, argv, MEMORY_CAN_SHRINK) != MEMORY_GC_OK)) {
+        RAISE_ERROR(OUT_OF_MEMORY_ATOM);
+    }
 
-        int rest_size = bin_size - offset - pattern_size;
-        size_t rest_size_in_terms = term_sub_binary_heap_size(bin_term, rest_size);
+    // Allocate list first
+    for (size_t index_segments = 0; index_segments < num_segments; index_segments++) {
+        result_list = term_list_prepend(term_nil(), result_list, &ctx->heap);
+    }
 
-        // + 4 which is the result cons
-        if (UNLIKELY(memory_ensure_free_with_roots(ctx, tok_size_in_terms + rest_size_in_terms + 4, 1, argv, MEMORY_CAN_SHRINK) != MEMORY_GC_OK)) {
-            RAISE_ERROR(OUT_OF_MEMORY_ATOM);
-        }
+    // Reset pointers after allocation
+    bin_data = term_binary_data(argv[0]);
+    pattern_data = term_binary_data(argv[1]);
+
+    term list_cursor = result_list;
+    temp_bin_data = bin_data;
+    temp_bin_size = bin_size;
+    term *list_ptr = term_get_list_ptr(list_cursor);
+    do {
+        const char *found = (const char *) memmem(temp_bin_data, temp_bin_size, pattern_data, pattern_size);
 
-        bin_term = argv[0];
-        term tok = term_maybe_create_sub_binary(bin_term, 0, tok_size, &ctx->heap, ctx->global);
-        term rest = term_maybe_create_sub_binary(bin_term, offset + pattern_size, rest_size, &ctx->heap, ctx->global);
+        if (found) {
+            term tok = term_maybe_create_sub_binary(argv[0], temp_bin_data - bin_data, found - temp_bin_data, &ctx->heap, ctx->global);
+            list_ptr[LIST_HEAD_INDEX] = tok;
 
-        term result_list = term_list_prepend(rest, term_nil(), &ctx->heap);
-        result_list = term_list_prepend(tok, result_list, &ctx->heap);
+            list_cursor = list_ptr[LIST_TAIL_INDEX];
+            list_ptr = term_get_list_ptr(list_cursor);
 
-        return result_list;
+            int next_search_offset = found - temp_bin_data + pattern_size;
+            temp_bin_data += next_search_offset;
+            temp_bin_size -= next_search_offset;
+        }
 
-    } else {
-        if (UNLIKELY(memory_ensure_free_with_roots(ctx, 2, 1, argv, MEMORY_CAN_SHRINK) != MEMORY_GC_OK)) {
-            RAISE_ERROR(OUT_OF_MEMORY_ATOM);
+        if (!found || !global) {
+            term rest = term_maybe_create_sub_binary(argv[0], temp_bin_data - bin_data, temp_bin_size, &ctx->heap, ctx->global);
+            list_ptr[LIST_HEAD_INDEX] = rest;
+            break;
         }
+    } while (!term_is_nil(list_cursor));
 
-        return term_list_prepend(argv[0], term_nil(), &ctx->heap);
-    }
+    return result_list;
 }
 
 static term nif_erlang_throw(Context *ctx, int argc, term argv[])
diff --git a/src/libAtomVM/nifs.gperf b/src/libAtomVM/nifs.gperf
index 49b1530e1..5852e86bf 100644
--- a/src/libAtomVM/nifs.gperf
+++ b/src/libAtomVM/nifs.gperf
@@ -36,6 +36,7 @@ binary:first/1, &binary_first_nif
 binary:last/1, &binary_last_nif
 binary:part/3, &binary_part_nif
 binary:split/2, &binary_split_nif
+binary:split/3, &binary_split_nif
 calendar:system_time_to_universal_time/2, &system_time_to_universal_time_nif
 erlang:atom_to_binary/1, &atom_to_binary_nif
 erlang:atom_to_binary/2, &atom_to_binary_nif
diff --git a/tests/libs/estdlib/CMakeLists.txt b/tests/libs/estdlib/CMakeLists.txt
index 2dfe988bc..ebc4754f4 100644
--- a/tests/libs/estdlib/CMakeLists.txt
+++ b/tests/libs/estdlib/CMakeLists.txt
@@ -24,6 +24,7 @@ include(BuildErlang)
 
 set(ERLANG_MODULES
     test_apply
+    test_binary
     test_calendar
     test_gen_event
     test_gen_server
diff --git a/tests/libs/estdlib/test_binary.erl b/tests/libs/estdlib/test_binary.erl
new file mode 100644
index 000000000..2549e045d
--- /dev/null
+++ b/tests/libs/estdlib/test_binary.erl
@@ -0,0 +1,36 @@
+%
+% This file is part of AtomVM.
+%
+% Copyright 2024 Paul Guyot <pguyot@kallisys.net>
+%
+% Licensed under the Apache License, Version 2.0 (the "License");
+% you may not use this file except in compliance with the License.
+% You may obtain a copy of the License at
+%
+%    http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS,
+% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+% See the License for the specific language governing permissions and
+% limitations under the License.
+%
+% SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
+%
+
+-module(test_binary).
+
+-export([test/0]).
+
+-include("etest.hrl").
+
+test() ->
+    ok = test_split(),
+    ok.
+
+test_split() ->
+    ?ASSERT_MATCH(binary:split(<<"foobar">>, <<"oo">>), [<<"f">>, <<"bar">>]),
+    ?ASSERT_MATCH(binary:split(<<"foobar">>, <<"ooz">>), [<<"foobar">>]),
+    ?ASSERT_MATCH(binary:split(<<"foobar">>, <<"o">>), [<<"f">>, <<"obar">>]),
+    ?ASSERT_MATCH(binary:split(<<"foobar">>, <<"o">>, [global]), [<<"f">>, <<>>, <<"bar">>]),
+    ok.
diff --git a/tests/libs/estdlib/test_string.erl b/tests/libs/estdlib/test_string.erl
index 178df5562..3006dab40 100644
--- a/tests/libs/estdlib/test_string.erl
+++ b/tests/libs/estdlib/test_string.erl
@@ -28,6 +28,7 @@ test() ->
     ok = test_to_upper(),
     ok = test_split(),
     ok = test_trim(),
+    ok = test_find(),
     ok.
 
 test_to_upper() ->
@@ -47,6 +48,18 @@ test_split() ->
     ?ASSERT_MATCH(string:split("fooXXXbar", "XXX"), ["foo", "bar"]),
     ?ASSERT_MATCH(string:split("foo barXXXtapas", "XXX"), ["foo bar", "tapas"]),
     ?ASSERT_MATCH(string:split("foo barXXXXXXtapas", "XXX", all), ["foo bar", [], "tapas"]),
+
+    ?ASSERT_MATCH(string:split("ab..bc..cd", ".."), ["ab", "bc..cd"]),
+    ?ASSERT_MATCH(string:split(<<"ab..bc..cd">>, ".."), [<<"ab">>, <<"bc..cd">>]),
+    ?ASSERT_MATCH(string:split(<<"ab..bc..cd">>, "..", leading), [<<"ab">>, <<"bc..cd">>]),
+    %   ?ASSERT_MATCH(string:split(<<"ab..bc..cd">>, "..", trailing), [<<"ab..bc">>, <<"cd">>]),
+    ?ASSERT_MATCH(string:split(<<"ab..bc..cd">>, "..", all), [<<"ab">>, <<"bc">>, <<"cd">>]),
+
+    ?ASSERT_MATCH(string:split("ab..bc..cd", <<"..">>), ["ab", "bc..cd"]),
+    ?ASSERT_MATCH(string:split(<<"ab..bc..cd">>, <<"..">>, leading), [<<"ab">>, <<"bc..cd">>]),
+    %   ?ASSERT_MATCH(string:split(<<"ab..bc..cd">>, <<"..">>, trailing), [<<"ab..bc">>, <<"cd">>]),
+    ?ASSERT_MATCH(string:split(<<"ab..bc..cd">>, <<"..">>, all), [<<"ab">>, <<"bc">>, <<"cd">>]),
+
     ok.
 
 test_trim() ->
@@ -60,4 +73,22 @@ test_trim() ->
     ?ASSERT_MATCH(string:trim("      foo bar      ", both), "foo bar"),
     ok.
 
+test_find() ->
+    ?ASSERT_MATCH(string:find("", ""), ""),
+    ?ASSERT_MATCH(string:find("foo", ""), "foo"),
+    ?ASSERT_MATCH(string:find("", "foo"), nomatch),
+    ?ASSERT_MATCH(string:find(<<>>, <<>>), <<>>),
+    ?ASSERT_MATCH(string:find(<<>>, ""), <<>>),
+    ?ASSERT_MATCH(string:find(<<"foo">>, <<"">>), <<"foo">>),
+    ?ASSERT_MATCH(string:find(<<"foo">>, ""), <<"foo">>),
+    ?ASSERT_MATCH(string:find(<<"">>, <<"foo">>), nomatch),
+    ?ASSERT_MATCH(string:find(<<"">>, "foo"), nomatch),
+
+    ?ASSERT_MATCH(string:find("foobar", "ba"), "bar"),
+    ?ASSERT_MATCH(string:find(<<"foobar">>, "ba"), <<"bar">>),
+    ?ASSERT_MATCH(string:find("foobar", <<"ba">>), "bar"),
+    ?ASSERT_MATCH(string:find(<<"foobar">>, <<"ba">>), <<"bar">>),
+
+    ok.
+
 id(X) -> X.