word_split.erl 1.8 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677
  1. -module(word_split).
  2. -export([words/1, to_list/1, words_from_file/1]).
  3. % data WordState = {chunk, S} | {segment, L, C, R}
  4. nil() ->
  5. nil.
  6. singleton(S) ->
  7. {single, S}.
  8. conc(A, B) ->
  9. {conc, A, B}.
  10. to_list(nil) ->
  11. [];
  12. to_list({single, S}) ->
  13. [S];
  14. to_list({conc, A, B}) ->
  15. to_list(A) ++ to_list(B).
  16. maybe_word("") ->
  17. nil();
  18. maybe_word(S) ->
  19. singleton(S).
  20. process_char(C) ->
  21. io:format("~p: process_char start~n", [node()]),
  22. {_, _, Start} = erlang:now(),
  23. Result = case lists:member(C, " \t\n\f") of
  24. true ->
  25. {segment, "", nil(), ""};
  26. false ->
  27. {chunk, [C]}
  28. end,
  29. {_, _, Finish} = erlang:now(),
  30. io:format("~p: process_char finish, time ~p~n", [node(), Finish-Start]),
  31. Result.
  32. combine_timed(Arg1, Arg2) ->
  33. io:format("~p: combine start~n", [node()]),
  34. {_, _, Start} = erlang:now(),
  35. Result = combine(Arg1, Arg2),
  36. {_, _, Finish} = erlang:now(),
  37. io:format("~p: combine finish, time ~p~n", [node(), Finish-Start]),
  38. Result.
  39. combine({chunk, S1}, {chunk, S2}) ->
  40. {chunk, S1 ++ S2};
  41. combine({chunk, S}, {segment, L, C, R}) ->
  42. {segment, S ++ L, C, R};
  43. combine({segment, L, C, R}, {chunk, S}) ->
  44. {segment, L, C, R ++ S};
  45. combine({segment, L1, C1, R1}, {segment, L2, C2, R2}) ->
  46. {segment, L1, conc(C1, conc(maybe_word(R1 ++ L2), C2)), R2}.
  47. words(Str) ->
  48. T = ebb_flow:map_reduce(fun(X) -> process_char(X) end,
  49. fun(X, Y) -> combine_timed(X, Y) end,
  50. Str),
  51. case ebb_run:run_distributed(T) of
  52. {ok, [{chunk, S}]} ->
  53. to_list(maybe_word(S));
  54. {ok, [{segment, L, C, R}]} ->
  55. to_list(conc(maybe_word(L), conc(C, maybe_word(R))));
  56. _ ->
  57. error
  58. end.
  59. words_from_file(Str) ->
  60. case file:read_file(Str) of
  61. {ok, Data} ->
  62. io:format("Number of words: ~w\n", [length(words(binary_to_list(Data)))]);
  63. _ ->
  64. io:format("File not found!")
  65. end.