word_split.erl 2.1 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788
  1. -module(word_split).
  2. -export([words/1, to_list/1, words_from_file/1]).
  3. -export([words_traced/1]).
  4. % data WordState = {chunk, S} | {segment, L, C, R}
  5. nil() ->
  6. nil.
  7. singleton(S) ->
  8. {single, S}.
  9. conc(A, B) ->
  10. {conc, A, B}.
  11. to_list(nil) ->
  12. [];
  13. to_list({single, S}) ->
  14. [S];
  15. to_list({conc, A, B}) ->
  16. to_list(A) ++ to_list(B).
  17. maybe_word("") ->
  18. nil();
  19. maybe_word(S) ->
  20. singleton(S).
  21. process_char(C) ->
  22. case lists:member(C, " \t\n\f") of
  23. true ->
  24. {segment, "", nil(), ""};
  25. false ->
  26. {chunk, [C]}
  27. end.
  28. combine({chunk, S1}, {chunk, S2}) ->
  29. {chunk, S1 ++ S2};
  30. combine({chunk, S}, {segment, L, C, R}) ->
  31. {segment, S ++ L, C, R};
  32. combine({segment, L, C, R}, {chunk, S}) ->
  33. {segment, L, C, R ++ S};
  34. combine({segment, L1, C1, R1}, {segment, L2, C2, R2}) ->
  35. {segment, L1, conc(C1, conc(maybe_word(R1 ++ L2), C2)), R2}.
  36. trace(Name, Time, Units, Func, Args) ->
  37. io:format("~p<< ~s start~n", [node(), Name]),
  38. Start = Time(),
  39. Result = apply(Func, Args),
  40. Finish = Time(),
  41. io:format("~p>> ~s finish (~s ~p)~n", [node(), Name, Units, Finish-Start]),
  42. Result.
  43. trace_usec(Name, Func, Args) ->
  44. trace(Name, fun() -> element(3, erlang:now()) end, "usec", Func, Args).
  45. trace_sec(Name, Func, Args) ->
  46. trace(Name, fun() -> element(2, erlang:now()) end, "sec", Func, Args).
  47. process_char_traced(C) ->
  48. trace_usec("word_split:process_char/1", fun process_char/1, [C]).
  49. combine_traced(Piece1, Piece2) ->
  50. trace_usec("word_split:combine/2", fun combine/2, [Piece1, Piece2]).
  51. words(Str) ->
  52. T = ebb_flow:map_reduce(fun(X) -> process_char(X) end,
  53. fun(X, Y) -> combine(X, Y) end,
  54. Str),
  55. case ebb_run:run_distributed(T) of
  56. {ok, [{chunk, S}]} ->
  57. to_list(maybe_word(S));
  58. {ok, [{segment, L, C, R}]} ->
  59. to_list(conc(maybe_word(L), conc(C, maybe_word(R))));
  60. _ ->
  61. error
  62. end.
  63. words_traced(Str) ->
  64. trace_sec("words", fun words/1, [Str]).
  65. words_from_file(Str) ->
  66. case file:read_file(Str) of
  67. {ok, Data} ->
  68. io:format("Number of words: ~w\n",
  69. [length(words(binary_to_list(Data)))]);
  70. _ ->
  71. io:format("File not found!")
  72. end.