1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677 |
- -module(word_split).
- -export([words/1, to_list/1, words_from_file/1]).
- % data WordState = {chunk, S} | {segment, L, C, R}
- nil() ->
- nil.
- singleton(S) ->
- {single, S}.
- conc(A, B) ->
- {conc, A, B}.
- to_list(nil) ->
- [];
- to_list({single, S}) ->
- [S];
- to_list({conc, A, B}) ->
- to_list(A) ++ to_list(B).
- maybe_word("") ->
- nil();
- maybe_word(S) ->
- singleton(S).
- process_char(C) ->
- io:format("~p: process_char start~n", [node()]),
- {_, _, Start} = erlang:now(),
- Result = case lists:member(C, " \t\n\f") of
- true ->
- {segment, "", nil(), ""};
- false ->
- {chunk, [C]}
- end,
- {_, _, Finish} = erlang:now(),
- io:format("~p: process_char finish, time ~p~n", [node(), Finish-Start]),
- Result.
- combine_timed(Arg1, Arg2) ->
- io:format("~p: combine start~n", [node()]),
- {_, _, Start} = erlang:now(),
- Result = combine(Arg1, Arg2),
- {_, _, Finish} = erlang:now(),
- io:format("~p: combine finish, time ~p~n", [node(), Finish-Start]),
- Result.
-
- combine({chunk, S1}, {chunk, S2}) ->
- {chunk, S1 ++ S2};
- combine({chunk, S}, {segment, L, C, R}) ->
- {segment, S ++ L, C, R};
- combine({segment, L, C, R}, {chunk, S}) ->
- {segment, L, C, R ++ S};
- combine({segment, L1, C1, R1}, {segment, L2, C2, R2}) ->
- {segment, L1, conc(C1, conc(maybe_word(R1 ++ L2), C2)), R2}.
- words(Str) ->
- T = ebb_flow:map_reduce(fun(X) -> process_char(X) end,
- fun(X, Y) -> combine_timed(X, Y) end,
- Str),
- case ebb_run:run_distributed(T) of
- {ok, [{chunk, S}]} ->
- to_list(maybe_word(S));
- {ok, [{segment, L, C, R}]} ->
- to_list(conc(maybe_word(L), conc(C, maybe_word(R))));
- _ ->
- error
- end.
- words_from_file(Str) ->
- case file:read_file(Str) of
- {ok, Data} ->
- io:format("Number of words: ~w\n", [length(words(binary_to_list(Data)))]);
- _ ->
- io:format("File not found!")
- end.
-
|