-module(word_split). -export([words/1, to_list/1, words_from_file/1]). % data WordState = {chunk, S} | {segment, L, C, R} nil() -> nil. singleton(S) -> {single, S}. conc(A, B) -> {conc, A, B}. to_list(nil) -> []; to_list({single, S}) -> [S]; to_list({conc, A, B}) -> to_list(A) ++ to_list(B). maybe_word("") -> nil(); maybe_word(S) -> singleton(S). process_char(C) -> io:format("~p: process_char start~n", [node()]), {_, _, Start} = erlang:now(), Result = case lists:member(C, " \t\n\f") of true -> {segment, "", nil(), ""}; false -> {chunk, [C]} end, {_, _, Finish} = erlang:now(), io:format("~p: process_char finish, time ~p~n", [node(), Finish-Start]), Result. combine_timed(Arg1, Arg2) -> io:format("~p: combine start~n", [node()]), {_, _, Start} = erlang:now(), Result = combine(Arg1, Arg2), {_, _, Finish} = erlang:now(), io:format("~p: combine finish, time ~p~n", [node(), Finish-Start]), Result. combine({chunk, S1}, {chunk, S2}) -> {chunk, S1 ++ S2}; combine({chunk, S}, {segment, L, C, R}) -> {segment, S ++ L, C, R}; combine({segment, L, C, R}, {chunk, S}) -> {segment, L, C, R ++ S}; combine({segment, L1, C1, R1}, {segment, L2, C2, R2}) -> {segment, L1, conc(C1, conc(maybe_word(R1 ++ L2), C2)), R2}. words(Str) -> T = ebb_flow:map_reduce(fun(X) -> process_char(X) end, fun(X, Y) -> combine_timed(X, Y) end, Str), case ebb_run:run_distributed(T) of {ok, [{chunk, S}]} -> to_list(maybe_word(S)); {ok, [{segment, L, C, R}]} -> to_list(conc(maybe_word(L), conc(C, maybe_word(R)))); _ -> error end. words_from_file(Str) -> case file:read_file(Str) of {ok, Data} -> io:format("Number of words: ~w\n", [length(words(binary_to_list(Data)))]); _ -> io:format("File not found!") end.