diff --git a/libnixf/include/nixf/Basic/Nodes/Op.h b/libnixf/include/nixf/Basic/Nodes/Op.h index 23350fb44..a1597846b 100644 --- a/libnixf/include/nixf/Basic/Nodes/Op.h +++ b/libnixf/include/nixf/Basic/Nodes/Op.h @@ -1,9 +1,9 @@ #pragma once #include "Basic.h" +#include "Tokens.h" #include "nixf/Basic/Nodes/Attrs.h" -#include "nixf/Basic/TokenKinds.h" #include diff --git a/libnixf/include/nixf/Basic/TokenKinds.h b/libnixf/include/nixf/Basic/TokenKinds.h deleted file mode 100644 index e9648e125..000000000 --- a/libnixf/include/nixf/Basic/TokenKinds.h +++ /dev/null @@ -1,11 +0,0 @@ -#pragma once - -namespace nixf::tok { - -enum TokenKind { -#define TOK(NAME) tok_##NAME, -#include "nixf/Basic/Tokens.inc" -#undef TOK -}; - -} // namespace nixf::tok diff --git a/libnixf/include/nixf/Basic/TokenKinds.inc b/libnixf/include/nixf/Basic/TokenKinds.inc deleted file mode 100644 index b04fc57d6..000000000 --- a/libnixf/include/nixf/Basic/TokenKinds.inc +++ /dev/null @@ -1,100 +0,0 @@ -/// TokenKinds.inc, token names. - -/// provides: TOK(NAME), for all tokens. - -/// note: keyword tokens will be prefixed with kw_ and they are declared in -/// TokenKeywords.inc - -// Keywords goes here, they are special identifiers. -#ifdef TOK_KEYWORD - -TOK_KEYWORD(if) -TOK_KEYWORD(then) -TOK_KEYWORD(else) -TOK_KEYWORD(assert) -TOK_KEYWORD(with) -TOK_KEYWORD(let) -TOK_KEYWORD(in) -TOK_KEYWORD(rec) -TOK_KEYWORD(inherit) -TOK_KEYWORD(or) - -#endif // TOK_KEYWORD - -#ifdef TOK -TOK(eof) - -// Identifier. -TOK(id) - -// Numbers. -TOK(int) -TOK(float) - -// Strings. -TOK(dquote) -TOK(string_part) -TOK(string_escape) // escaped string, e.g. \r \n \x \" - -TOK(quote2) // '' - -// Path. -TOK(path_fragment) - -// -TOK(spath) - -// URI. -TOK(uri) - -// Interpolation. -TOK(r_curly) // { -TOK(dollar_curly) // ${ - -TOK(ellipsis) // ... -TOK(comma) // , -TOK(dot) // . -TOK(semi_colon) // ; -TOK(eq) // = -TOK(l_curly) // { -TOK(l_paren) // ( -TOK(r_paren) // ) -TOK(l_bracket) // [ -TOK(r_bracket) // ] - -TOK(question) // ? -TOK(at) // @ -TOK(colon) // : - -TOK(unknown) - -TOK(path_end) // emtpy token, notifies the parser it should exit parsing path. - -TOK(op_not) // ! - -#endif // TOK - -// Binary Ops -#ifdef TOK_BIN_OP - -TOK_BIN_OP(impl) // -> -TOK_BIN_OP(or) // || -TOK_BIN_OP(and) // && -TOK_BIN_OP(eq) // == -TOK_BIN_OP(neq) // != -TOK_BIN_OP(lt) // < -TOK_BIN_OP(gt) // > -TOK_BIN_OP(le) // <= -TOK_BIN_OP(ge) // >= -TOK_BIN_OP(update) // // -TOK_BIN_OP(add) // + -TOK_BIN_OP(negate) // - -TOK_BIN_OP(mul) // * -TOK_BIN_OP(div) // / -TOK_BIN_OP(concat) // ++ - -// [RFC 0418 Pipe operator](https://github.com/NixOS/rfcs/pull/148) -TOK_BIN_OP(pipe_into) // |> -TOK_BIN_OP(pipe_from) // <| - -#endif // TOK_BIN_OP diff --git a/libnixf/include/nixf/Basic/Tokens.inc b/libnixf/include/nixf/Basic/Tokens.inc deleted file mode 100644 index f35a08fea..000000000 --- a/libnixf/include/nixf/Basic/Tokens.inc +++ /dev/null @@ -1,9 +0,0 @@ -/// Tokens.inc, merge all tokens under "TOK" - -#ifdef TOK -#define TOK_BIN_OP(NAME) TOK(op_##NAME) -#define TOK_KEYWORD(NAME) TOK(kw_##NAME) -#include "TokenKinds.inc" -#undef TOK_KEYWORD -#undef TOK_BIN_OP -#endif diff --git a/libnixf/src/Parse/Lexer.cpp b/libnixf/src/Parse/Lexer.cpp index 7047937c5..15ce653f2 100644 --- a/libnixf/src/Parse/Lexer.cpp +++ b/libnixf/src/Parse/Lexer.cpp @@ -320,7 +320,7 @@ void Lexer::maybeKW() { Tok = tok_kw_##NAME; \ return; \ } -#include "nixf/Basic/TokenKinds.inc" +#include "TokenKinds.inc" #undef TOK_KEYWORD } diff --git a/libnixf/src/Parse/ParseOp.cpp b/libnixf/src/Parse/ParseOp.cpp index 772a6cf7e..9f48ea332 100644 --- a/libnixf/src/Parse/ParseOp.cpp +++ b/libnixf/src/Parse/ParseOp.cpp @@ -108,7 +108,7 @@ std::shared_ptr Parser::parseExprOpBP(unsigned LeftRBP) { for (;;) { switch (Token Tok = peek(); Tok.kind()) { #define TOK_BIN_OP(NAME) case tok_op_##NAME: -#include "nixf/Basic/TokenKinds.inc" +#include "TokenKinds.inc" #undef TOK_BIN_OP { // For all binary ops: diff --git a/libnixf/src/Parse/ParseSupport.cpp b/libnixf/src/Parse/ParseSupport.cpp index 995ddf3d8..48a8d365e 100644 --- a/libnixf/src/Parse/ParseSupport.cpp +++ b/libnixf/src/Parse/ParseSupport.cpp @@ -3,7 +3,7 @@ #include "Parser.h" -#include "nixf/Basic/TokenKinds.h" +#include "Tokens.h" #include "nixf/Parse/Parser.h" using namespace nixf; diff --git a/libnixf/src/Parse/Token.h b/libnixf/src/Parse/Token.h index 3ab4c647d..768084306 100644 --- a/libnixf/src/Parse/Token.h +++ b/libnixf/src/Parse/Token.h @@ -1,54 +1,13 @@ #pragma once +#include "Tokens.h" #include "nixf/Basic/Range.h" -#include "nixf/Basic/TokenKinds.h" #include #include namespace nixf { -namespace tok { - -constexpr std::string_view spelling(TokenKind Kind) { - switch (Kind) { -#define TOK_KEYWORD(NAME) \ - case tok_kw_##NAME: \ - return #NAME; -#include "nixf/Basic/TokenKinds.inc" -#undef TOK_KEYWORD - case tok_dquote: - return "\""; - case tok_quote2: - return "''"; - case tok_dollar_curly: - return "${"; - case tok_l_curly: - return "{"; - case tok_r_curly: - return "}"; - case tok_l_paren: - return "("; - case tok_r_paren: - return ")"; - case tok_eq: - return "="; - case tok_semi_colon: - return ";"; - case tok_l_bracket: - return "["; - case tok_r_bracket: - return "]"; - case tok_colon: - return ":"; - default: - assert(false && "Not yet implemented!"); - } - __builtin_unreachable(); -} - -} // namespace tok - /// \brief A token. With it's kind, and the range in source code. /// /// This class is trivially copyable. diff --git a/libnixf/src/Parse/TokenKinds.inc.py b/libnixf/src/Parse/TokenKinds.inc.py new file mode 100644 index 000000000..fdfcd6e40 --- /dev/null +++ b/libnixf/src/Parse/TokenKinds.inc.py @@ -0,0 +1,29 @@ +from tokens import bin_op_tokens, keyword_tokens, tokens + + +def generate_token_section(section_name: str, tokens: list) -> str: + if not tokens: + return "" + + section = [f"#ifdef {section_name}"] + section.extend(f"{section_name}({token.name})" for token in tokens) + section.append(f"#endif // {section_name}\n") + + return "\n".join(section) + + +def generate_token_kinds_inc() -> str: + sections = [ + generate_token_section("TOK_KEYWORD", keyword_tokens), + generate_token_section("TOK", tokens), + generate_token_section("TOK_BIN_OP", bin_op_tokens), + ] + + return "\n".join(filter(None, sections)).strip() + + +if __name__ == "__main__": + import sys + + with open(sys.argv[1], "w") as f: + f.write(generate_token_kinds_inc()) diff --git a/libnixf/src/Parse/Tokens.h.py b/libnixf/src/Parse/Tokens.h.py new file mode 100644 index 000000000..b929e5e67 --- /dev/null +++ b/libnixf/src/Parse/Tokens.h.py @@ -0,0 +1,49 @@ +import tokens + + +def tok_id(tok: tokens.Token): + prefix = "tok" + if isinstance(tok, tokens.KwToken): + return f"{prefix}_kw_{tok.name}" + if isinstance(tok, tokens.OpToken): + return f"{prefix}_op_{tok.name}" + return f"{prefix}_{tok.name}" + + +def generate_tokens_h() -> str: + header = """#pragma once + +#include + +namespace nixf::tok { + +enum TokenKind { +""" + for token in tokens.tokens: + header += f" {tok_id(token)},\n" + + header += "};\n\n" + + header += """constexpr std::string_view spelling(int Kind) { + using namespace std::literals; + switch (Kind) { +""" + + for token in tokens.tokens: + header += f' case {tok_id(token)}: return R"({token.spelling})"sv;\n' + + header += """ default: return ""sv; + } +} +""" + + header += "} // namespace nixf::tok" + + return header + + +if __name__ == "__main__": + import sys + + with open(sys.argv[1], "w") as f: + f.write(generate_tokens_h()) diff --git a/libnixf/src/Parse/tokens.py b/libnixf/src/Parse/tokens.py new file mode 100644 index 000000000..1834c6347 --- /dev/null +++ b/libnixf/src/Parse/tokens.py @@ -0,0 +1,87 @@ +from dataclasses import dataclass +from typing import List + + +@dataclass +class Token: + name: str + spelling: str + + +class KwToken(Token): + def __init__(self, name): + self.name = name + self.spelling = name + + +keyword_tokens: List[Token] = [ + KwToken("if"), + KwToken("then"), + KwToken("else"), + KwToken("assert"), + KwToken("with"), + KwToken("let"), + KwToken("in"), + KwToken("rec"), + KwToken("inherit"), + KwToken("or"), +] + + +class OpToken(Token): + pass + + +bin_op_tokens: List[Token] = [ + OpToken("not", "!"), + OpToken("impl", "->"), + OpToken("or", "||"), + OpToken("and", "&&"), + OpToken("eq", "=="), + OpToken("neq", "!="), + OpToken("lt", "<"), + OpToken("gt", ">"), + OpToken("le", "<="), + OpToken("ge", ">="), + OpToken("update", "//"), + OpToken("add", "+"), + OpToken("negate", "-"), + OpToken("mul", "*"), + OpToken("div", "/"), + OpToken("concat", "++"), + OpToken("pipe_into", "|>"), + OpToken("pipe_from", "<|"), +] + +tokens: List[Token] = [ + *keyword_tokens, + Token("eof", "eof"), + Token("id", "id"), + Token("int", "int"), + Token("float", "float"), + Token("dquote", '"'), + Token("string_part", "string_part"), + Token("string_escape", "string_escape"), + Token("quote2", "''"), + Token("path_fragment", "path_fragment"), + Token("spath", ""), + Token("uri", "uri"), + Token("r_curly", "}"), + Token("dollar_curly", "${"), + Token("ellipsis", "..."), + Token("comma", ","), + Token("dot", "."), + Token("semi_colon", ";"), + Token("eq", "="), + Token("l_curly", "{"), + Token("l_paren", "("), + Token("r_paren", ")"), + Token("l_bracket", "["), + Token("r_bracket", "]"), + Token("question", "?"), + Token("at", "@"), + Token("colon", ":"), + Token("unknown", "unknown"), + Token("path_end", "path_end"), + *bin_op_tokens, +] diff --git a/libnixf/src/meson.build b/libnixf/src/meson.build index 909372668..d905072b2 100644 --- a/libnixf/src/meson.build +++ b/libnixf/src/meson.build @@ -19,6 +19,22 @@ diagnostic_cpp = custom_target( command: [python3, '@INPUT0@', '@OUTPUT@'] ) +tokens_h = custom_target( + input: ['Parse/Tokens.h.py', 'Parse/tokens.py'], + output: 'Tokens.h', + command: [python3, '@INPUT0@', '@OUTPUT@'], + install: true, + install_dir: 'include/nixf/Parse/' +) + +tokens_kinds_inc = custom_target( + input: ['Parse/TokenKinds.inc.py', 'Parse/tokens.py'], + output: 'TokenKinds.inc', + command: [python3, '@INPUT0@', '@OUTPUT@'], + install: true, + install_dir: 'include/nixf/Parse/' +) + libnixf = library( 'nixf', 'Basic/Nodes.cpp', @@ -37,6 +53,8 @@ libnixf = library( 'Sema/VariableLookup.cpp', diagnostic_enum_h, diagnostic_cpp, + tokens_h, + tokens_kinds_inc, include_directories: libnixf_inc, dependencies: libnixf_deps, install: true, diff --git a/libnixf/test/Parse/ParseOp.cpp b/libnixf/test/Parse/ParseOp.cpp index 1661297da..cc9bd751e 100644 --- a/libnixf/test/Parse/ParseOp.cpp +++ b/libnixf/test/Parse/ParseOp.cpp @@ -156,4 +156,19 @@ TEST(Parser, Op_PipeOperator_NonAssociative) { ASSERT_EQ(Diags[0].kind(), nixf::Diagnostic::DK_OperatorNotAssociative); } +TEST(Parser, Op_Issue647) { + auto Src = R"(!)"sv; + + std::vector Diags; + Parser P(Src, Diags); + auto AST = P.parseExpr(); + + ASSERT_TRUE(AST); + + ASSERT_EQ(AST->kind(), Node::NK_ExprUnaryOp); + + ASSERT_EQ(Diags.size(), 1); + ASSERT_EQ(Diags[0].kind(), nixf::Diagnostic::DK_Expected); +} + } // namespace