|
@@ -0,0 +1,64 @@
|
|
|
|
+-module(word_split).
|
|
|
|
+-export([words/1, to_list/1, words_from_file/1]).
|
|
|
|
+
|
|
|
|
+% data WordState = {chunk, S} | {segment, L, C, R}
|
|
|
|
+
|
|
|
|
+nil() ->
|
|
|
|
+ nil.
|
|
|
|
+
|
|
|
|
+singleton(S) ->
|
|
|
|
+ {single, S}.
|
|
|
|
+
|
|
|
|
+conc(A, B) ->
|
|
|
|
+ {conc, A, B}.
|
|
|
|
+
|
|
|
|
+to_list(nil) ->
|
|
|
|
+ [];
|
|
|
|
+to_list({single, S}) ->
|
|
|
|
+ [S];
|
|
|
|
+to_list({conc, A, B}) ->
|
|
|
|
+ to_list(A) ++ to_list(B).
|
|
|
|
+
|
|
|
|
+maybe_word("") ->
|
|
|
|
+ nil();
|
|
|
|
+maybe_word(S) ->
|
|
|
|
+ singleton(S).
|
|
|
|
+
|
|
|
|
+process_char(C) ->
|
|
|
|
+ case lists:member(C, " \t\n\f\l") of
|
|
|
|
+ true ->
|
|
|
|
+ {segment, "", nil(), ""};
|
|
|
|
+ false ->
|
|
|
|
+ {chunk, [C]}
|
|
|
|
+ end.
|
|
|
|
+
|
|
|
|
+combine({chunk, S1}, {chunk, S2}) ->
|
|
|
|
+ {chunk, S1 ++ S2};
|
|
|
|
+combine({chunk, S}, {segment, L, C, R}) ->
|
|
|
|
+ {segment, S ++ L, C, R};
|
|
|
|
+combine({segment, L, C, R}, {chunk, S}) ->
|
|
|
|
+ {segment, L, C, R ++ S};
|
|
|
|
+combine({segment, L1, C1, R1}, {segment, L2, C2, R2}) ->
|
|
|
|
+ {segment, L1, conc(C1, conc(maybe_word(R1 ++ L2), C2)), R2}.
|
|
|
|
+
|
|
|
|
+words(Str) ->
|
|
|
|
+ T = ebb_flow:map_reduce(fun process_char/1,
|
|
|
|
+ fun(X, Y) -> combine(X, Y) end,
|
|
|
|
+ Str),
|
|
|
|
+ case ebb_run:run_distributed(T) of
|
|
|
|
+ {ok, [{chunk, S}]} ->
|
|
|
|
+ to_list(maybe_word(S));
|
|
|
|
+ {ok, [{segment, L, C, R}]} ->
|
|
|
|
+ to_list(conc(maybe_word(L), conc(C, maybe_word(R))));
|
|
|
|
+ _ ->
|
|
|
|
+ error
|
|
|
|
+ end.
|
|
|
|
+
|
|
|
|
+words_from_file(Str) ->
|
|
|
|
+ case file:read_file(Str) of
|
|
|
|
+ {ok, Data} ->
|
|
|
|
+ io:format("Number of words: ~w\n", [length(words(binary_to_list(Data)))]);
|
|
|
|
+ _ ->
|
|
|
|
+ io:format("File not found!")
|
|
|
|
+ end.
|
|
|
|
+
|