-module(word_split). -export([words/1, to_list/1, words_from_file/1]). -export([words_traced/1]). % data WordState = {chunk, S} | {segment, L, C, R} nil() -> nil. singleton(S) -> {single, S}. conc(A, B) -> {conc, A, B}. to_list(nil) -> []; to_list({single, S}) -> [S]; to_list({conc, A, B}) -> to_list(A) ++ to_list(B). maybe_word("") -> nil(); maybe_word(S) -> singleton(S). process_char(C) -> case lists:member(C, " \t\n\f") of true -> {segment, "", nil(), ""}; false -> {chunk, [C]} end. combine({chunk, S1}, {chunk, S2}) -> {chunk, S1 ++ S2}; combine({chunk, S}, {segment, L, C, R}) -> {segment, S ++ L, C, R}; combine({segment, L, C, R}, {chunk, S}) -> {segment, L, C, R ++ S}; combine({segment, L1, C1, R1}, {segment, L2, C2, R2}) -> {segment, L1, conc(C1, conc(maybe_word(R1 ++ L2), C2)), R2}. trace(Name, Time, Units, Func, Args) -> io:format("~p<< ~s start~n", [node(), Name]), Start = Time(), Result = apply(Func, Args), Finish = Time(), io:format("~p>> ~s finish (~s ~p)~n", [node(), Name, Units, Finish-Start]), Result. trace_usec(Name, Func, Args) -> trace(Name, fun() -> element(3, erlang:now()) end, "usec", Func, Args). trace_sec(Name, Func, Args) -> trace(Name, fun() -> element(2, erlang:now()) end, "sec", Func, Args). process_char_traced(C) -> trace_usec("word_split:process_char/1", fun process_char/1, [C]). combine_traced(Piece1, Piece2) -> trace_usec("word_split:combine/2", fun combine/2, [Piece1, Piece2]). words(Str) -> T = ebb_flow:map_reduce(fun(X) -> process_char(X) end, fun(X, Y) -> combine(X, Y) end, Str), case ebb_run:run_distributed(T) of {ok, [{chunk, S}]} -> to_list(maybe_word(S)); {ok, [{segment, L, C, R}]} -> to_list(conc(maybe_word(L), conc(C, maybe_word(R)))); _ -> error end. words_traced(Str) -> trace_sec("words", fun words/1, [Str]). words_from_file(Str) -> case file:read_file(Str) of {ok, Data} -> io:format("Number of words: ~w\n", [length(words(binary_to_list(Data)))]); _ -> io:format("File not found!") end.