The middle of the parser

This commit is contained in:
skythedragon
2025-06-29 11:53:50 -07:00
parent 1cfb6f4185
commit 17d7a8bfbe
11 changed files with 652 additions and 58 deletions

3
.gitignore vendored
View File

@@ -1,3 +1,4 @@
cmake-build-*
tfc
.idea
.idea
reference

40
grammar/t.bnf Normal file
View File

@@ -0,0 +1,40 @@
{
tokens = [
OP_MUL='*'
OP_DIV='/'
OP_ADD='+'
OP_SUB='-'
STAT_TERM=';'
EXIT="exit"
INT="[0-9]+"
DEC="[0-9]+\.[0-9]+"
]
}
letter ::= "A" | "B" | "C" | "D" | "E" | "F" | "G"
| "H" | "I" | "J" | "K" | "L" | "M" | "N"
| "O" | "P" | "Q" | "R" | "S" | "T" | "U"
| "V" | "W" | "X" | "Y" | "Z" | "a" | "b"
| "c" | "d" | "e" | "f" | "g" | "h" | "i"
| "j" | "k" | "l" | "m" | "n" | "o" | "p"
| "q" | "r" | "s" | "t" | "u" | "v" | "w"
| "x" | "y" | "z" ;
num_lit ::= INT | DEC;
l1_operator ::= OP_MUL | OP_DIV;
l2_operator ::= OP_ADD | OP_SUB;
math_operator ::= l1_operator | l2_operator;
val ::= expr | num_lit;
expr_val ::= num_lit;
term ::= (expr_val) l1_operator (expr_val | term);
expr ::= (expr_val | term) [l2_operator (expr_val | expr | term)];
exit_stat ::= EXIT [val] STAT_TERM;
statement ::= (val STAT_TERM) | exit_stat | STAT_TERM;
grammar ::= {statement};

2
install.sh Normal file
View File

@@ -0,0 +1,2 @@
#!/usr/bin/env sh
cp ./cmake-build-release/TFC ~/local/bin/tcs

View File

@@ -0,0 +1 @@
LALR(1) parsers, recursive descent parser, EBNF (Extended Backus-Naur Form).

40
src/alib/optional_ptr.hpp Normal file
View File

@@ -0,0 +1,40 @@
//
// Created by skythedragon on 6/25/25.
//
#pragma once
#include <memory>
namespace skythedragon
{
struct null_optional_ptr
{
void* ptr = nullptr;
};
inline constexpr null_optional_ptr null_opt;
template <typename T>
class optional_ptr
{
std::unique_ptr<T> ptr_;
public:
optional_ptr() : ptr_(nullptr) {}
explicit optional_ptr(std::unique_ptr<T> ptr) : ptr_(std::move(ptr)) {}
template <typename... Args>
explicit optional_ptr(Args... args) : ptr_(std::make_unique<T>(args...)) {}
optional_ptr(T& instance) : ptr_(std::make_unique<T>(instance)) {}
optional_ptr(null_optional_ptr) : ptr_(nullptr) {}
bool is_null() const { return ptr_ == nullptr; }
T& operator*() const { return *ptr_; }
T* operator->() const { return ptr_.get(); }
optional_ptr& operator=(std::unique_ptr<T> ptr) { ptr_ = std::move(ptr); return *this; }
optional_ptr& operator=(T& t) { *ptr_ = t; return *this;}
optional_ptr& operator=(null_optional_ptr t) { this = optional_ptr(t); return *this; }
operator bool() const { return ptr_ == nullptr; }
std::unique_ptr<T> release() { return std::move(ptr_); }
};
}

View File

@@ -6,7 +6,16 @@
#include <iostream>
inline std::string process_octal_escape(std::ifstream& file, char c)
static bool is_bracket(const int c)
{
if (c == '(' || c == ')' || c == '{' || c == '}' || c == '[' || c == ']' || c == '<' || c == '>')
{
return true;
}
return false;
}
static std::string process_octal_escape(std::ifstream& file, char c)
{
std::string oct;
oct += c;
@@ -35,7 +44,7 @@ inline std::string process_octal_escape(std::ifstream& file, char c)
}
}
inline char process_hex_escape(std::ifstream& file)
static char process_hex_escape(std::ifstream& file)
{
std::string hex;
@@ -68,13 +77,13 @@ void Lexer::lex()
{
char c = file_.get();
if (isalpha(c))
if (std::isalpha(c))
{
std::string lexeme;
lexeme += c;
while (!std::isspace(file_.peek()) && file_.peek() != EOF)
while (std::isalnum(file_.peek()) || file_.peek() == '_')
{
lexeme += file_.get();
}
@@ -87,7 +96,7 @@ void Lexer::lex()
}
}
if (isdigit(c))
if (std::isdigit(c))
{
std::string lexeme;
lexeme += c;
@@ -113,7 +122,7 @@ void Lexer::lex()
c = file_.get();
if (isalpha(c))
if (std::isalpha(c))
{
std::cerr << "ERR: Literal operators are not yet supported.\n";
exit(1);
@@ -126,7 +135,7 @@ void Lexer::lex()
}
if (isnum) {
tokens_.emplace_back(TokenType::number, lexeme);
tokens_.emplace_back(TokenType::decimal, lexeme);
} else {
tokens_.emplace_back(TokenType::integer, lexeme);
}
@@ -152,6 +161,11 @@ void Lexer::lex()
if (c == '\\')
{
if (file_.peek() == EOF)
{
std::cerr << "ERR: Unterminated string\n";
}
c = file_.get();
switch (c)
@@ -218,5 +232,117 @@ void Lexer::lex()
{
tokens_.emplace_back(TokenType::statement_term);
}
if (is_bracket(c))
{
std::cerr << "ERR: None of these things are yet supported.\n";
exit(1);
}
if (c == '+')
tokens_.emplace_back(TokenType::op_plus);
if (c == '-')
tokens_.emplace_back(TokenType::op_minus);
if (c == '*')
tokens_.emplace_back(TokenType::op_multiply);
if (c == '/')
tokens_.emplace_back(TokenType::op_divide);
}
}
bool Lexer::done(const size_t in)
{
return current_token_ + in == tokens_.size();
}
Token Lexer::consume()
{
Token token = tokens_[current_token_];
current_token_++;
return token;
}
Token Lexer::peek(const size_t ahead)
{
return tokens_[current_token_ + ahead];
}
void Lexer::reset()
{
current_token_ = 0;
}
void Lexer::backtrack(const size_t by)
{
current_token_ -= by;
}
std::string to_string(const Token& token)
{
std::string o = to_string(token.type);
if (token.lexeme)
{
o += ": " + *(token.lexeme);
}
return o;
}
std::string to_string(const TokenType& type)
{
std::string o;
switch (type)
{
case TokenType::exit:
o = "exit";
break;
case TokenType::decimal:
o = "number";
break;
case TokenType::integer:
o = "integer";
break;
case TokenType::op_divide:
o = "op_divide";
break;
case TokenType::string:
o = "string";
break;
case TokenType::op_minus:
o = "op_minus";
break;
case TokenType::op_multiply:
o = "op_multiply";
break;
case TokenType::op_plus:
o = "op_plus";
break;
case TokenType::statement_term:
o = "statement_term";
break;
default:
o = "error";
break;
}
return o;
}
std::ostream& operator<<(std::ostream& os, const Token& token)
{
os << to_string(token);
return os;
}
std::ostream& operator<<(std::ostream& os, const TokenType& type)
{
os << to_string(type);
return os;
}

View File

@@ -13,7 +13,7 @@ enum class TokenType
statement_term,
exit,
integer,
number,
decimal,
string,
op_plus,
op_minus,
@@ -32,9 +32,21 @@ public:
#endif
std::ifstream file_;
std::vector<Token> tokens_;
std::vector<Token>::size_type current_token_;
public:
Lexer(std::ifstream& file);
void lex();
[[nodiscard]] Token consume();
Token peek(size_t ahead = 0);
void backtrack(size_t by);
bool done(size_t in = 0);
void reset();
};
std::string to_string(const Token& token);
std::string to_string(const TokenType& type);
std::ostream& operator<<(std::ostream& os, const Token& token);
std::ostream& operator<<(std::ostream& os, const TokenType& type);

View File

@@ -3,3 +3,292 @@
//
#include "Parser.hpp"
#include <iostream>
#include <cassert>
static bool typeIsMathOperator(TokenType type);
static bool typeIsLiteral(TokenType type);
static bool typeIsArithmeticOperator(TokenType type);
static bool typeIsGeometricOperator(TokenType type);
Parser::Parser(Lexer& lexer) : lexer_(lexer) {}
void Parser::backtrack()
{
lexer_.backtrack(start_indices_.back());
start_indices_.pop_back();
}
void Parser::start_parse()
{
start_indices_.push_back(0);
}
void Parser::end_parse()
{
size_t last = start_indices_.back();
start_indices_.pop_back();
start_indices_.back() += last;
}
Token Parser::consume()
{
start_indices_.back()++;
if (at_end())
{
std::cerr << "ERR: expected token, not EOF\n";
exit(1);
}
return lexer_.consume();
}
std::optional<Token> Parser::peek(const size_t count) const
{
if (lexer_.done(count))
{
return std::nullopt;
}
return lexer_.peek(count);
}
void Parser::expect(TokenType expected, size_t ahead)
{
if (peek(ahead) != std::nullopt)
{
std::cerr << "ERR: expected " << expected << ", not EOF\n";
exit(1);
}
if (peek(ahead)->type != expected)
{
std::cerr << "ERR: expected " << expected << ", not " << peek(ahead)->type << '\n';
exit(1);
}
}
bool Parser::at_end(const size_t in) const
{
return lexer_.done(in);
}
void Parser::parse()
{
start_parse();
while (true) {
optional_ptr<Statement> statement = parseStatement();
if (!statement)
{
break;
}
if (statement->type == StatementType::empty)
{
std::cout << "WARN: empty statement\n";
} else {
root_.statements.emplace_back(*statement);
}
}
start_indices_.clear();
lexer_.reset();
}
optional_ptr<Statement> Parser::parseStatement()
{
start_parse();
optional_ptr<Statement> result(StatementType::empty);
if (at_end()) return skythedragon::null_opt;
if (peek()->type == TokenType::exit)
{
result = parseExitStatement().release();
if (!result)
{
std::cerr << "ERR: invalid exit statement\n";
exit(1);
}
}
if (result->type == StatementType::empty)
{
std::cerr << "ERR: invalid statement\n";
}
end_parse();
return result;
}
optional_ptr<ExitStatement> Parser::parseExitStatement()
{
start_parse();
if (consume().type != TokenType::exit)
{
assert(false && "parseExitStatement called erroneously, tell the devs, and give them your code, or don't and the problem won't get fixed\n");
}
if (at_end())
{
std::cerr << "ERR: invalid exit statement, expected semicolon, value or expression not EOF\n";
exit(1);
}
optional_ptr<ExitStatement> result;
result->type = StatementType::exit;
if (peek()->type == TokenType::statement_term)
{
result->exitcode = {ValueType::literal, "0", LiteralType::integer};
end_parse();
return result;
}
optional_ptr<ValueNode> exitcode = parseValue();
if (!exitcode)
{
backtrack();
return skythedragon::null_opt;
}
result->exitcode = std::move(*exitcode.release());
end_parse();
return result;
}
optional_ptr<ValueStatement> Parser::parseValueStatement()
{
start_parse();
optional_ptr<ValueNode> value = parseValue();
optional_ptr<ValueStatement> result;
result->type = StatementType::value;
if (!value)
{
backtrack();
return skythedragon::null_opt;
}
if (peek()->type != TokenType::statement_term)
{
backtrack();
return skythedragon::null_opt;
}
result->value = std::move(*value.release());
std::cout << "WARN: expression result unused.\n";
end_parse();
return;
}
optional_ptr<ValueNode> Parser::parseValue()
{
start_parse();
if (!peek())
{
std::cerr << "ERR: expected value-returning expression, but got EOF.\n";
exit(1);
}
optional_ptr<ValueNode> result;
{
optional_ptr<ExpressionNode> expression = parseExpression();
if (expression)
{
result->type = ValueType::expression;
result->expression = expression.release();
end_parse();
return result;
}
}
if (typeIsLiteral(peek()->type))
{
result->type = ValueType::literal;
switch (peek()->type)
{
case TokenType::decimal:
result->literal_type = LiteralType::decimal;
break;
case TokenType::integer:
result->literal_type = LiteralType::integer;
break;
case TokenType::string:
result->literal_type = LiteralType::string;
break;
default:
backtrack();
return skythedragon::null_opt;
}
result->value = *consume().lexeme;
}
end_parse();
return result;
}
optional_ptr<ExpressionNode> Parser::parseExpression()
{
start_parse();
optional_ptr<ExpressionNode> result;
if (typeIsArithmeticOperator(peek(1)->type))
{
}
if (typeIsGeometricOperator(peek(1)->type))
{
optional_ptr<TermNode> term = parseTerm();
if (!term)
{
backtrack();
return skythedragon::null_opt;
}
result->sub1_type = SubExpressionType::term;
result->sub1_term = term.release();
}
}
optional_ptr<TermNode> Parser::parseTerm()
{
}
static bool typeIsMathOperator(TokenType type)
{
return type == TokenType::op_plus || type == TokenType::op_minus || type == TokenType::op_multiply || type == TokenType::op_divide;
}
static bool typeIsLiteral(TokenType type)
{
return type == TokenType::integer || type == TokenType::decimal || type == TokenType::string;
}
bool typeIsArithmeticOperator(TokenType type)
{
return type == TokenType::op_plus || type == TokenType::op_minus;
}
bool typeIsGeometricOperator(TokenType type)
{
return type == TokenType::op_multiply || type == TokenType::op_divide;
}

View File

@@ -3,16 +3,131 @@
//
#pragma once
#include <memory>
#include <vector>
#include "../lexer/Lexer.hpp"
#include "../alib/optional_ptr.hpp"
struct ParseNode {
using skythedragon::optional_ptr;
enum class StatementType
{
value,
exit,
empty
};
struct StatementNode : ParseNode {};
enum class LiteralType
{
integer,
decimal,
string
};
struct RootNode : ParseNode {};
struct Statement
{
StatementType type;
};
struct RootNode
{
std::vector<Statement> statements;
};
enum class SubExpressionType
{
expression,
term,
value
};
enum class ValueType
{
literal,
expression
};
struct ExprValueNode
{
ValueType type;
std::string value;
std::optional<LiteralType> literal_type;
};
struct TermNode
{
TokenType operator_;
SubExpressionType sub1_type;
SubExpressionType sub2_type;
optional_ptr<TermNode> subterm;
std::optional<ExprValueNode> sub1_value;
std::optional<ExprValueNode> sub2_value;
};
struct ExpressionNode
{
TokenType operator_;
SubExpressionType sub1_type;
SubExpressionType sub2_type;
optional_ptr<ExpressionNode> subexpr;
optional_ptr<TermNode> sub1_term;
optional_ptr<TermNode> sub2_term;
optional_ptr<ExprValueNode> sub1_value;
optional_ptr<ExprValueNode> sub2_value;
};
struct ValueNode
{
ValueType type;
std::string value;
std::optional<LiteralType> literal_type;
optional_ptr<ExpressionNode> expression;
};
struct EmptyStatement : Statement {};
struct ExitStatement : Statement
{
ValueNode exitcode;
};
struct ValueStatement : Statement
{
ValueNode value;
};
class Parser {
#ifdef TEST
public:
#endif
RootNode root_;
Lexer& lexer_;
std::vector<size_t> start_indices_;
Parser(Lexer& lexer);
void backtrack();
Token consume();
std::optional<Token> peek(size_t count = 0) const;
void expect(TokenType expected, size_t ahead = 0);
[[nodiscard]] bool at_end(size_t in = 0) const;
void start_parse();
void end_parse();
[[nodiscard]] optional_ptr<Statement> parseStatement();
[[nodiscard]] optional_ptr<ExitStatement> parseExitStatement();
[[nodiscard]] optional_ptr<ValueStatement> parseValueStatement();
[[nodiscard]] optional_ptr<ValueNode> parseValue();
[[nodiscard]] optional_ptr<ExprValueNode> parseExprValue();
[[nodiscard]] optional_ptr<ExpressionNode> parseExpression();
[[nodiscard]] optional_ptr<TermNode> parseTerm();
void parse();
void flatten();
};

View File

@@ -6,6 +6,7 @@
#include "alib/colors.h"
#include "lexer/Lexer.hpp"
#include "parser/Parser.hpp"
using str = std::string;
@@ -13,6 +14,8 @@ int main(int argc, char** argv)
{
std::vector<str> args;
int a = 1 - - 2;
for (int i = 1; i < argc; i++)
{
args.emplace_back(argv[i]);
@@ -26,58 +29,17 @@ int main(int argc, char** argv)
std::ifstream input(args[0]);
Lexer lexer(input);
Lexer lexer = input;
lexer.lex();
for (Token& token : lexer.tokens_)
{
str o;
switch (token.type)
{
case TokenType::exit:
o = "exit";
break;
case TokenType::number:
o = "number";
break;
case TokenType::integer:
o = "integer";
break;
case TokenType::op_divide:
o = "op_divide";
break;
case TokenType::string:
o = "string";
break;
case TokenType::op_minus:
o = "op_minus";
break;
case TokenType::op_multiply:
o = "op_multiply";
break;
case TokenType::op_plus:
o = "op_plus";
break;
case TokenType::statement_term:
o = "statement_term";
break;
default:
o = "error";
break;
}
std::cout << o;
if (token.lexeme)
{
std::cout << ": " << *token.lexeme;
}
std::cout << '\n';
std::cout << token << '\n';
}
Parser parser(lexer);
std::cout << RESET;
return 0;

8
test.t
View File

@@ -1,2 +1,8 @@
exit 0;
exit 2;
exit;
2 * 3 + 4 / 3 * 2 - 2;
exit 2 * 3;
exit 2 *;
;
*