ALib C++ Framework
by
Library Version: 2511 R0
Documentation generated by doxygen
Loading...
Searching...
No Matches
parser_impl.inl
Go to the documentation of this file.
1//==================================================================================================
2/// \file
3/// This header-file is part of module \alib_expressions of the \aliblong.
4///
5/// \emoji :copyright: 2013-2025 A-Worx GmbH, Germany.
6/// Published under #"mainpage_license".
7//==================================================================================================
8ALIB_EXPORT namespace alib { namespace expressions { namespace detail {
9
10//==================================================================================================
11/// Implementation of the default parser of module \alib_expressions_nl.
12///
13/// This internal class is not too well documented. Nevertheless, it is designed to be able
14/// to tweak its behavior slightly and in case of need, even derive and use a custom parser
15/// class. For doing so, please consult the source code of this class.
16/// A custom parser might be set to protected field #"Compiler::parser;*"by a derived
17/// compiler type before compiling a first expression.
18///
19/// It is possible to define scannable custom unary and binary operators. Definitions of binary
20/// operators include a "precedence value" that allows aligning them with the built-in types.
21/// Also, built-in operators can be \em removed if wanted.
22//==================================================================================================
23class ParserImpl : public Parser
24{
25 protected:
26 /// Types of tokens.
27 enum class Tokens : char
28 {
29 EOT = '\0', ///< End of tokens. (No next token available.)
30 SymbolicOp = 'O' , ///< A symbolic operator. Can be unary or binary.
31 AlphaUnOp = 'U' , ///< An alphabetic unary operator.
32 AlphaBinOp = 'B' , ///< An alphabetic binary operator.
33
34 LitString = 'S' , ///< a string literal.
35 LitInteger = 'I' , ///< a integer literal.
36 LitFloat = 'F' , ///< a float literal.
37
38 Identifier = 'A' , ///< An identifier.
39
40 BraceOpen = '(' , ///< An opening brace.
41 BraceClose = ')' , ///< A closing brace.
42 Comma = ',' , ///< A comma.
43
44 SubscriptOpen = '[' , ///< An opening subscript brace.
45 SubscriptClose = ']' , ///< A closing subscript brace.
46 };
47
48 /// Memory for temporary allocations, like AST objects or literal strings with converted
49 /// escape sequences. Provided by the compiler with the method #".Parse".
51
52 /// The actual token type.
54
55 /// The actual token type.
57
58 /// Integer value of token (if applicable).
60
61 /// Float value of token (if applicable).
62 double tokFloat;
63
64 /// String value of token (if applicable).
66
67 /// The position of the token in #".expression".
69
70 /// The compiler that this parser works for.
72
73 /// Used for scanning literals. Provided to this class with each parse request.
75
76
77 /// The given expression to parse.
79
80 /// The rest of #".expression".
82
83 /// Lists single characters that get directly converted into tokens of corresponding type
84 /// when found in the expression string. Tokens are <c>"()[],"</c>.
86
87 /// Lists single characters that got found in operator strings which have been registered
88 /// with
89 /// #"Compiler::AddUnaryOperator;*" and
90 /// #"Compiler::AddBinaryOperator;*".
91 ///
92 /// Used by the internal token scanner (lexer) and by default will become something like
93 /// <c>"=+-*%/?:~!|&^!<>/%"</c>.
94 /// when found in the expression string.
96
97 /// Hash set of unary operators. The key of the table is the operator string, which usually
98 /// consists of one character, like <c>'-'</c> or <c>'!'</c>.
99 ///
100 /// This table is filled in the constructor of the class with the values stored in
101 /// #"Compiler::UnaryOperators;*" and used for testing of existence.
103 String,
104 alib::hash_string_ignore_case <character>,
105 alib::equal_to_string_ignore_case<character> > unaryOperators;
106
107 /// Hash set of binary operators. The key of the table is the operator string, which usually
108 /// consists of one to three characters, like <c>'+'</c> or <c>'<<='</c>.
109 ///
110 /// This table is filled in the constructor of the class with the values stored in
111 /// #"Compiler::BinaryOperators;*" and used for testing of existence.
113 String,
114 alib::hash_string_ignore_case <character>,
115 alib::equal_to_string_ignore_case<character> > binaryOperators;
116
117 /// List of ASTs currently created in recursion.
118 /// \note
119 /// This vector is created in the monotonic allocator and never even deleted, as all
120 /// inserted \b AST elements, exclusively allocate from the same temporary allocator.
122
123 //################################################################################################
124 // Constructor/destructor, interface
125 //################################################################################################
126 public:
127 /// Constructor.
128 /// @param compiler The compiler that this parser works for.
129 /// @param allocator A monotonic allocator for permanent allocations.
131
132 /// Virtual destructor.
133 virtual ~ParserImpl() override {}
134
135 /// Parses the given expression string.
136 /// \note
137 /// The return value is hidden by using <c>void*</c>. This is to allow avoid flooding
138 /// of \c boost header includes files to the code entities using module \alib_expressions_nl.
139 ///
140 /// @param exprString The string to parse.
141 /// @param nf Used to scan number literals.
142 /// @return The abstract syntax tree representing the expression.
143 ALIB_DLL virtual
144 detail::AST* Parse( const String& exprString, NumberFormat* nf ) override;
145
146
147 protected:
148 //################################################################################################
149 // Lexer
150 //################################################################################################
151 /// This is the "scanner" or "lexer" method.
152 void NextToken();
153
154
155 //################################################################################################
156 // Parser
157 //################################################################################################
158
159 /// Tests if the actual token represents a known unary operator.
160 /// @return Returns the binary operator symbol, respectively a \e nulled string on failure.
163
164 /// Tests if the actual token represents a known binary operator.
165 /// @return Returns the binary operator symbol, respectively a \e nulled string on failure.
168
169 /// Internal method that optionally parses a conditional operator (<c>Q ? T : F</c> )
170 ///
171 /// @return T.
173
174 /// Internal method that optionally parses a binary operator and levels (recursively)
175 /// trees of such according to operator precedence and brackets given.
176 /// @return T.
177 AST* parseBinary();
178
179 /// Parses unary ops, literals, identifiers, functions and expressions surrounded by
180 /// brackets.
181 /// @return The abstract syntax tree node parsed.
182 AST* parseSimple();
183
184 /// Invoked after an identifier or function was parsed. Tests for subscript
185 /// operator, otherwise returns the given ast as is.
186 /// @param function The identifier or function parsed.
187 /// @return Either the given node or a node of type #"detail::ASTBinaryOp"
188 /// with \c lhs set to \p{function}, \c rhs to the parsed subscript arg and
189 /// operator set to <c>'[]'</c>.
190 AST* parseSubscript( AST* function );
191
192
193
194 /// Simple shortcut popping and returning last ast from the current list.
195 /// @return Popped AST object.
196 AST* pop() {
197 AST* ast= ASTs->back();
198 ASTs->pop_back();
199 return ast;
200 }
201
202 /// Simple shortcut pushing an ast to current list and returning it.
203 /// @param ast The AST node to push.
204 /// @return Popped AST object.
205 AST* push( AST* ast ) { ASTs->emplace_back(ast); return ast; }
206
207 /// Simple shortcut to the topmost AST.
208 /// @return The topmost AST object.
209 AST* top() { return ASTs->back(); }
210
211 /// Simple shortcut replacing the topmost ast.
212 /// @param ast The new AST node to replace the existing one with.
213 /// @return The given object.
214 AST* replace( AST* ast ) { ASTs->back()= ast; return ast; }
215}; // class ParserImpl
216
217
218}}} // namespace [alib::expressions::detail]
#define ALIB_DLL
Definition alib.inl:573
#define ALIB_EXPORT
Definition alib.inl:562
Tokens token
The actual token type.
HashSet< MonoAllocator, String, alib::hash_string_ignore_case< character >, alib::equal_to_string_ignore_case< character > > unaryOperators
Compiler & compiler
The compiler that this parser works for.
ASTLiteral::NFHint tokLiteralHint
The actual token type.
double tokFloat
Float value of token (if applicable).
virtual detail::AST * Parse(const String &exprString, NumberFormat *nf) override
String expression
The given expression to parse.
integer tokPosition
The position of the token in #".expression".
String tokString
String value of token (if applicable).
integer tokInteger
Integer value of token (if applicable).
ParserImpl(Compiler &compiler, MonoAllocator &allocator)
virtual ~ParserImpl() override
Virtual destructor.
@ SubscriptClose
A closing subscript brace.
@ EOT
End of tokens. (No next token available.).
@ AlphaBinOp
An alphabetic binary operator.
@ SymbolicOp
A symbolic operator. Can be unary or binary.
@ AlphaUnOp
An alphabetic unary operator.
@ SubscriptOpen
An opening subscript brace.
HashSet< MonoAllocator, String, alib::hash_string_ignore_case< character >, alib::equal_to_string_ignore_case< character > > binaryOperators
void NextToken()
This is the "scanner" or "lexer" method.
Substring scanner
The rest of #".expression".
NumberFormat * numberFormat
Used for scanning literals. Provided to this class with each parse request.
monomem::TMonoAllocator< lang::HeapAllocator > MonoAllocator
strings::TNumberFormat< character > NumberFormat
Type alias in namespace alib.
containers::HashSet< TAllocator, T, THash, TEqual, THashCaching, TRecycling > HashSet
Type alias in namespace alib. See type definition #"alib::containers::HashSet".
lang::integer integer
Type alias in namespace alib.
Definition integers.inl:149
strings::TString< character > String
Type alias in namespace alib.
Definition string.inl:2172
strings::TSubstring< character > Substring
Type alias in namespace alib.
lang::TBitSet< int, TEnd, TBegin > BitSet
Type alias in namespace alib.
Definition bitset.inl:815
std::vector< T, StdMA< T > > StdVectorMA
Type alias in namespace alib.
This detail class constitutes an abstract base class for expression parsers.
Definition parser.inl:15