Bladeren bron

Better add this; need to refactor/test a lot

getty 13 jaren geleden
bovenliggende
commit
45679b7be3
2 gewijzigde bestanden met toevoegingen van 271 en 0 verwijderingen
  1. 187 0
      compiler/scheme.erl
  2. 84 0
      compiler/scheme_prelude.erl

+ 187 - 0
compiler/scheme.erl

@@ -0,0 +1,187 @@
+-module(scheme).
+-export([compile/1, lex/1, parse/1, is_string/1]).
+
+%%% CONSTANTS %%%
+% Mostly used for built-in Scheme functions.
+
+-define(PRELUDE, ["+", "-", "*", "/", "=", "<", ">", "<=", ">=",
+		  "cons", "list", "append", "car", "cdr", "map",
+		  "filter", "member", "list-ref"]).
+
+%%% LEXING %%%
+
+% Lex a string representing Scheme code into a list.
+lex(S) -> lists:filter(fun(Match) -> (Match /= "") and
+                                     (re:run(Match, "[ \n]+") == nomatch) end,
+                       re:split(S, "(\()|(\))|(')|([^ \n()']+)", [{return, list}])).
+
+
+%%% PARSING %%%
+
+% String -> Regex -> Bool
+% true if Regex matches the entirety of Str; false otherwise.
+string_matches(Str, Regex) -> case re:run(Str, Regex, [{capture, first, list}]) of
+                           {match, [S]} -> S == Str;
+                           _ -> false
+                       end.
+
+% String -> Bool
+% true if Str consists entirely of number characters; false otherwise.
+is_number_string(Str) -> string_matches(Str, "[0-9]+").
+
+% [String] -> Heterogeneous List
+% Returns the tuple { Parsed, Unparsed } consisting of a deep list of
+% parsed tokens and a flat list of unparsed tokens.
+parse([]) -> [];
+parse(["(" | Rest]) -> parse_list(Rest);
+parse([")" | _]) -> error;
+parse(["'" | Rest]) -> case parse(Rest) of
+			   {Dat, R} ->
+			       {[{atom, "quote"}, Dat], R};
+			   error ->
+			       error
+		       end;
+parse([S | Rest]) -> case is_number_string(S) of
+			 true ->
+			     {{number, S}, Rest};
+			 false ->
+			     {{atom, S}, Rest}
+		     end.
+
+
+parse_list([]) -> {[], []};
+parse_list([")" | Rest]) -> {[], Rest};
+parse_list(S) -> case parse(S) of
+                     {L, R} -> case parse_list(R) of
+                                       {List, Rest} -> {lists:append([L], List),
+                                                        Rest};
+                                       error -> error
+                               end;
+                     error -> error
+                 end.
+
+%%% COMPILATION %%%
+
+% Char -> String
+% converts a Scheme character to an Erlang character for identifiers; converts
+% certain special characters to three-character equivalents.
+char_convert(C) -> case C of
+                       $* -> "_ml";
+                       $+ -> "_pl";
+                       $/ -> "_dv";
+                       $- -> "_mn";
+                       $_ -> "_us";
+                       $< -> "_lt";
+		       $> -> "_gt";
+		       $= -> "_eq";
+		       $! -> "_ex";
+		       $? -> "_qs";
+		       X -> [X]
+                   end.
+
+% a -> Bool
+% Well; "string" is more properly "is a list of non-lists", which also (in this case)
+% includes strings and lists of integers indistinguishably. With that caveat--
+% recogizes whether something is a "string".
+is_string(S) ->
+  is_list(S) and lists:foldl(fun(X, Y) -> X and Y end, true,
+                             lists:map(fun(X) -> not(is_list(X)) end, S)).
+
+% Heterogeneous List of Strings -> String
+deep_string_append(L) ->
+  string:join(lists:map(fun(M) -> case is_string(M) of
+                                      true -> M;
+                                      false -> deep_string_append(M)
+                                  end end, L),
+	      "").
+
+% String -> String
+% Convert a string representing a Scheme identifier to a string representing
+% an Erlang function identifier or an atom, i.e. lower-case
+to_erlang_atom([S|XS]) ->
+  lists:foldr(fun(X, Y) -> X ++ Y end, "",
+    lists:map(fun(C) -> char_convert(C) end,
+              [string:to_lower(S)|XS]));
+to_erlang_atom({atom, X}) -> to_erlang_atom(X).
+
+% Convert a string representing a Scheme identifier to a string representing
+% an Erlang variable, i.e. upper-case.
+to_erlang_variable([S|XS]) ->
+  lists:foldr(fun(X, Y) -> X ++ Y end, "",
+    lists:map(fun(C) -> char_convert(C) end,
+              [string:to_upper(S)|XS]));
+to_erlang_variable({atom, X}) -> to_erlang_variable(X).
+
+% Heterogeneous List -> String
+generate([{atom, "define"} |
+	  [ [Name | Args] | Body]], Vars, Variadics) ->
+    [to_erlang_atom(Name),
+     " ( ", string:join(
+	      lists:map(fun(X) -> to_erlang_variable(X) end, Args),
+	      " , "), " ) -> ",
+     string:join(lists:map(fun(X) -> generate(X, Vars ++ Args, Variadics) end,
+			   Body), " ; "),
+    " . "];
+generate([{atom, "lambda"} | [Args | Body]], Vars, Variadics) ->
+    ["fun (",
+     string:join(
+       lists:map(fun(X) -> to_erlang_variable(X) end, Args),
+       " , "),
+     ") -> ",
+     string:join(lists:map(fun(X) -> generate(X, Vars ++ Args, Variadics) end,
+			   Body), " ; "),
+     "end"];
+generate([{atom, "if"}
+	  | [ Condition 
+	     | [ Then_Case
+		| [ Else_Case ]]]], Vars, Variadics) ->
+    ["case scheme_prelude:is_true( ",
+     generate(Condition, Vars, Variadics),
+     ") of true -> ",
+     generate(Then_Case, Vars, Variadics),
+     " ; false -> ",
+     generate(Else_Case, Vars, Variadics),
+     " end "];
+generate([{atom, "quote"} | [Rest]], _, _) ->
+    generate_quoted(Rest);
+generate([{atom, Func} | Args], Vars, Variadics) ->
+    case lists:member(Func, Variadics) of
+	true -> [generate({atom, Func}, Vars, Variadics),
+		 " ([ ",
+		 string:join(lists:map(fun(X) -> generate(X, Vars, Variadics) end, Args),
+			     [" , "]),
+		 " ]) "];
+	false -> [generate({atom, Func}, Vars, Variadics),
+		  " ( ",
+		  string:join(lists:map(fun(X) -> generate(X, Vars, Variadics) end, Args),
+			      [" , "]),
+		  " ) "]
+    end;
+generate({number, X}, _, _) -> [X];
+generate({atom, X}, Vars, _) ->
+    [case lists:member(X, Vars) of
+	 true -> to_erlang_variable(X);
+	 false -> case lists:member(X, ?PRELUDE) of
+		      true -> ["scheme_prelude:", to_erlang_variable(X)];
+		      false -> to_erlang_variable(X)
+		  end
+     end].
+
+generate_quoted({atom, X}) ->
+    [to_erlang_atom(X)];
+generate_quoted({number, X}) ->
+    [X];
+generate_quoted(List) ->
+    [" [ ", string:join(lists:map(fun(X) -> generate_quoted(X) end,
+			  List), [" , "]),
+    " ] "].
+
+% String -> String
+% Compiles a string representing Scheme code into a string representing an
+% Erlang module.
+compile(S) -> case parse_list(lex(S)) of
+                  {[X|_], _} -> deep_string_append(generate(X,
+							    [],
+							    ["-", "+", "*", "list"]));
+                  _ -> "NOTHING"
+              end.

+ 84 - 0
compiler/scheme_prelude.erl

@@ -0,0 +1,84 @@
+-module(scheme_prelude).
+-export([cons/2, car/1, cdr/1, list_p/1]).
+
+% Scheme prelude for programs compiled  from Scheme to Erlang.
+% A few conventions apply:
+%  - question marks become _p, so even? becomes even_p
+%  - exclamation marks are outlawed because the Scheme subset
+%    is pure-functional, so set! and its ilk are not present.
+%  - hyphens become _mn, so list-ref becomes list_mnref
+%  - underscores will become double underscores.
+% At present, this still leaves an ambiguity between even-p and
+% even?, but we will ignore that. All that aside, operator
+% symbols become three-character abbreviations whose names all
+% begin with an underscore. For example:
+%   + -> _pl
+%   * -> _st
+%   - -> _mn
+%   / -> _dv
+% Therefore, symbols can still be used in variable names, such
+% as *some-variable* which becomes _stsome_mnvariable_st. This
+% would be hideous to work with, but for our purposes is
+% transparent enough.
+
+% variadic functions
+_pl([A]) -> A;
+_pl([A|B]) -> A + _pl(B).
+_st([A]) -> A;
+_st([A|B]) -> A * _st(B).
+_mn([A]) -> A;
+_mn([A|B]) -> A - _mn(B).
+list(L) -> L.
+
+
+_eq(A, B) -> A == B.
+cons(A, B) -> [A|B].
+car([A|_]) -> A.
+cdr([_|B]) -> B.
+append(A, B) -> lists:append(A, B).
+filter(Proc, L) -> lists:filter(Proc, L).
+map(Proc, L) -> lists:map(Proc, L).
+
+member(X, [X|L]) -> [X|L];
+member(X, [_|L]) -> member(X, L);
+member(_, []) -> false.
+
+assoc(X, [{X, A}|L]) -> {X, A};
+assoc(X, [{_, _}|L]) -> scheme_prelude:assoc(X, L);
+assoc(_, []) -> false.
+
+reverse(L) -> lists:reverse(L).
+
+reduce(Proc, [X|L]) -> lists:foldl(Proc, X, L).
+
+list_qs([_|B]) -> scheme_prelude:list_qs(B);
+list_qs([]) -> true;
+list_qs(_) -> false.
+
+null_qs([]) -> true;
+null_qs(_) -> false.
+
+pair_qs([_|_]) -> true;
+pair_qs(_) -> false.
+
+number_qs(X) -> is_number(X).
+
+equals_qs(X, Y) -> X == Y.
+
+length([X|Y]) -> 1 + scheme_prelude:length(Y);
+length([]) -> 0.
+
+symbol_qs(X) -> is_atom(X).
+
+boolean_qs(true) -> true.
+boolean_qs(false) -> true.
+boolean_qs(_) -> false.
+
+and(L) -> all(scheme_prelude:is_true, L).
+or(L) -> any(scheme_prelude:is_true, L).
+
+not(false) -> true;
+not(_) -> false.
+
+is_true(false) -> false;
+is_true(X) -> X.