initial commit
This commit is contained in:
commit
5210d78d7d
969 changed files with 223828 additions and 0 deletions
476
kirby/src/Query/Parser/Parser.php
Normal file
476
kirby/src/Query/Parser/Parser.php
Normal file
|
|
@ -0,0 +1,476 @@
|
|||
<?php
|
||||
|
||||
namespace Kirby\Query\Parser;
|
||||
|
||||
use Exception;
|
||||
use Iterator;
|
||||
use Kirby\Query\AST\ArgumentListNode;
|
||||
use Kirby\Query\AST\ArithmeticNode;
|
||||
use Kirby\Query\AST\ArrayListNode;
|
||||
use Kirby\Query\AST\ClosureNode;
|
||||
use Kirby\Query\AST\CoalesceNode;
|
||||
use Kirby\Query\AST\ComparisonNode;
|
||||
use Kirby\Query\AST\GlobalFunctionNode;
|
||||
use Kirby\Query\AST\LiteralNode;
|
||||
use Kirby\Query\AST\LogicalNode;
|
||||
use Kirby\Query\AST\MemberAccessNode;
|
||||
use Kirby\Query\AST\Node;
|
||||
use Kirby\Query\AST\TernaryNode;
|
||||
use Kirby\Query\AST\VariableNode;
|
||||
|
||||
/**
|
||||
* Parses query string by first splitting it into tokens
|
||||
* and then matching and consuming tokens to create
|
||||
* an abstract syntax tree (AST) of matching nodes
|
||||
*
|
||||
* @package Kirby Query
|
||||
* @author Roman Steiner <roman@toastlab.ch>,
|
||||
* Nico Hoffmann <nico@getkirby.com>
|
||||
* @link https://getkirby.com
|
||||
* @license https://opensource.org/licenses/MIT
|
||||
* @since 5.1.0
|
||||
* @unstable
|
||||
*/
|
||||
class Parser
|
||||
{
|
||||
protected Token $current;
|
||||
protected Token|null $previous = null;
|
||||
|
||||
/**
|
||||
* @var Iterator<Token>
|
||||
*/
|
||||
protected Iterator $tokens;
|
||||
|
||||
public function __construct(string|Iterator $query)
|
||||
{
|
||||
if (is_string($query) === true) {
|
||||
$tokenizer = new Tokenizer($query);
|
||||
$query = $tokenizer->tokens();
|
||||
}
|
||||
|
||||
$this->tokens = $query;
|
||||
$this->current = $this->tokens->current();
|
||||
}
|
||||
|
||||
/**
|
||||
* Move to the next token
|
||||
*/
|
||||
protected function advance(): Token|null
|
||||
{
|
||||
if ($this->isAtEnd() === false) {
|
||||
$this->previous = $this->current;
|
||||
$this->tokens->next();
|
||||
$this->current = $this->tokens->current();
|
||||
}
|
||||
|
||||
return $this->previous;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses an array
|
||||
*/
|
||||
private function array(): ArrayListNode|null
|
||||
{
|
||||
if ($this->consume(TokenType::T_OPEN_BRACKET)) {
|
||||
return new ArrayListNode(
|
||||
elements: $this->consumeList(TokenType::T_CLOSE_BRACKET)
|
||||
);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses a list of arguments
|
||||
*/
|
||||
private function argumentList(): ArgumentListNode
|
||||
{
|
||||
return new ArgumentListNode(
|
||||
arguments: $this->consumeList(TokenType::T_CLOSE_PAREN)
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks for and parses several atomic expressions
|
||||
*/
|
||||
private function atomic(): Node
|
||||
{
|
||||
$token = $this->scalar();
|
||||
$token ??= $this->array();
|
||||
$token ??= $this->identifier();
|
||||
$token ??= $this->grouping();
|
||||
|
||||
if ($token === null) {
|
||||
throw new Exception('Expect expression'); // @codeCoverageIgnore
|
||||
}
|
||||
|
||||
return $token;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks for and parses a coalesce expression
|
||||
*/
|
||||
private function coalesce(): Node
|
||||
{
|
||||
$node = $this->logical();
|
||||
|
||||
while ($this->consume(TokenType::T_COALESCE)) {
|
||||
$node = new CoalesceNode(
|
||||
left: $node,
|
||||
right: $this->logical()
|
||||
);
|
||||
}
|
||||
|
||||
return $node;
|
||||
}
|
||||
|
||||
/**
|
||||
* Collect the next token of a type
|
||||
*
|
||||
* @throws \Exception when next token is not of specified type
|
||||
*/
|
||||
protected function consume(
|
||||
TokenType $type,
|
||||
string|false $error = false
|
||||
): Token|false {
|
||||
if ($this->is($type) === true) {
|
||||
return $this->advance();
|
||||
}
|
||||
|
||||
if (is_string($error) === true) {
|
||||
throw new Exception($error);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Move to next token if of any specific type
|
||||
*/
|
||||
protected function consumeAny(array $types): Token|false
|
||||
{
|
||||
foreach ($types as $type) {
|
||||
if ($this->is($type) === true) {
|
||||
return $this->advance();
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Collect all list element until closing token
|
||||
*/
|
||||
private function consumeList(TokenType $until): array
|
||||
{
|
||||
$elements = [];
|
||||
|
||||
while (
|
||||
$this->isAtEnd() === false &&
|
||||
$this->is($until) === false
|
||||
) {
|
||||
$elements[] = $this->expression();
|
||||
|
||||
if ($this->consume(TokenType::T_COMMA) === false) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// consume the closing token
|
||||
$this->consume($until, 'Expect closing bracket after list');
|
||||
|
||||
return $elements;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the current token
|
||||
*/
|
||||
public function current(): Token
|
||||
{
|
||||
return $this->current;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert a full query expression into a node
|
||||
*/
|
||||
private function expression(): Node
|
||||
{
|
||||
// Top-level expression should be ternary
|
||||
return $this->ternary();
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses comparison expressions with proper precedence
|
||||
*/
|
||||
private function comparison(): Node
|
||||
{
|
||||
$left = $this->arithmetic();
|
||||
|
||||
while ($token = $this->consumeAny([
|
||||
TokenType::T_EQUAL,
|
||||
TokenType::T_IDENTICAL,
|
||||
TokenType::T_NOT_EQUAL,
|
||||
TokenType::T_NOT_IDENTICAL,
|
||||
TokenType::T_LESS_THAN,
|
||||
TokenType::T_LESS_EQUAL,
|
||||
TokenType::T_GREATER_THAN,
|
||||
TokenType::T_GREATER_EQUAL
|
||||
])) {
|
||||
$left = new ComparisonNode(
|
||||
left: $left,
|
||||
operator: $token->lexeme,
|
||||
right: $this->arithmetic()
|
||||
);
|
||||
}
|
||||
|
||||
return $left;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses a grouping (e.g. closure)
|
||||
*/
|
||||
private function grouping(): ClosureNode|Node|null
|
||||
{
|
||||
if ($this->consume(TokenType::T_OPEN_PAREN)) {
|
||||
$list = $this->consumeList(TokenType::T_CLOSE_PAREN);
|
||||
|
||||
if ($this->consume(TokenType::T_ARROW)) {
|
||||
$expression = $this->expression();
|
||||
|
||||
/**
|
||||
* Assert that all elements are VariableNodes
|
||||
* @var VariableNode[] $list
|
||||
*/
|
||||
foreach ($list as $element) {
|
||||
if ($element instanceof VariableNode === false) {
|
||||
throw new Exception('Expecting only variables in closure argument list');
|
||||
}
|
||||
}
|
||||
|
||||
$arguments = array_map(fn ($element) => $element->name, $list);
|
||||
|
||||
return new ClosureNode(
|
||||
arguments: $arguments,
|
||||
body: $expression
|
||||
);
|
||||
}
|
||||
|
||||
if (count($list) > 1) {
|
||||
throw new Exception('Expecting "=>" after closure argument list');
|
||||
}
|
||||
|
||||
// this is just a grouping
|
||||
return $list[0];
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses an identifier (global functions or variables)
|
||||
*/
|
||||
private function identifier(): GlobalFunctionNode|VariableNode|null
|
||||
{
|
||||
if ($token = $this->consume(TokenType::T_IDENTIFIER)) {
|
||||
if ($this->consume(TokenType::T_OPEN_PAREN)) {
|
||||
return new GlobalFunctionNode(
|
||||
name: $token->lexeme,
|
||||
arguments: $this->argumentList()
|
||||
);
|
||||
}
|
||||
|
||||
return new VariableNode(name: $token->lexeme);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Whether the current token is of a specific type
|
||||
*/
|
||||
protected function is(TokenType $type): bool
|
||||
{
|
||||
if ($this->isAtEnd() === true) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return $this->current->is($type);
|
||||
}
|
||||
|
||||
/**
|
||||
* Whether the parser has reached the end of the query
|
||||
*/
|
||||
protected function isAtEnd(): bool
|
||||
{
|
||||
return $this->current->is(TokenType::T_EOF);
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks for and parses a member access expression
|
||||
*/
|
||||
private function memberAccess(): Node
|
||||
{
|
||||
$object = $this->atomic();
|
||||
|
||||
while ($token = $this->consumeAny([
|
||||
TokenType::T_DOT,
|
||||
TokenType::T_NULLSAFE,
|
||||
TokenType::T_OPEN_BRACKET
|
||||
])) {
|
||||
if ($token->is(TokenType::T_OPEN_BRACKET) === true) {
|
||||
// For subscript notation, parse the inside as expression…
|
||||
$member = $this->expression();
|
||||
|
||||
// …and ensure consuming the closing bracket
|
||||
$this->consume(
|
||||
TokenType::T_CLOSE_BRACKET,
|
||||
'Expect subscript closing bracket'
|
||||
);
|
||||
} elseif ($member = $this->consume(TokenType::T_IDENTIFIER)) {
|
||||
$member = new LiteralNode($member->lexeme);
|
||||
} elseif ($member = $this->consume(TokenType::T_INTEGER)) {
|
||||
$member = new LiteralNode($member->literal);
|
||||
} else {
|
||||
throw new Exception('Expect property name after "."');
|
||||
}
|
||||
|
||||
$object = new MemberAccessNode(
|
||||
object: $object,
|
||||
member: $member,
|
||||
arguments: match ($this->consume(TokenType::T_OPEN_PAREN)) {
|
||||
false => null,
|
||||
default => $this->argumentList(),
|
||||
},
|
||||
nullSafe: $token->is(TokenType::T_NULLSAFE)
|
||||
);
|
||||
}
|
||||
|
||||
return $object;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses arithmetic expressions with proper precedence
|
||||
*/
|
||||
private function arithmetic(): Node
|
||||
{
|
||||
$left = $this->term();
|
||||
|
||||
while ($token = $this->consumeAny([
|
||||
TokenType::T_PLUS,
|
||||
TokenType::T_MINUS
|
||||
])) {
|
||||
$left = new ArithmeticNode(
|
||||
left: $left,
|
||||
operator: $token->lexeme,
|
||||
right: $this->term()
|
||||
);
|
||||
}
|
||||
|
||||
return $left;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses multiplication, division, and modulo expressions
|
||||
*/
|
||||
private function term(): Node
|
||||
{
|
||||
$left = $this->memberAccess();
|
||||
|
||||
while ($token = $this->consumeAny([
|
||||
TokenType::T_MULTIPLY,
|
||||
TokenType::T_DIVIDE,
|
||||
TokenType::T_MODULO
|
||||
])) {
|
||||
$left = new ArithmeticNode(
|
||||
left: $left,
|
||||
operator: $token->lexeme,
|
||||
right: $this->memberAccess()
|
||||
);
|
||||
}
|
||||
|
||||
return $left;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses logical expressions with proper precedence
|
||||
*/
|
||||
private function logical(): Node
|
||||
{
|
||||
$left = $this->comparison();
|
||||
|
||||
while ($token = $this->consumeAny([
|
||||
TokenType::T_AND,
|
||||
TokenType::T_OR
|
||||
])) {
|
||||
$left = new LogicalNode(
|
||||
left: $left,
|
||||
operator: $token->lexeme,
|
||||
right: $this->comparison()
|
||||
);
|
||||
}
|
||||
|
||||
return $left;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses the tokenized query into AST node tree
|
||||
*/
|
||||
public function parse(): Node
|
||||
{
|
||||
// Start parsing chain
|
||||
$expression = $this->expression();
|
||||
|
||||
// Ensure that we consumed all tokens
|
||||
if ($this->isAtEnd() === false) {
|
||||
$this->consume(TokenType::T_EOF, 'Expect end of expression'); // @codeCoverageIgnore
|
||||
}
|
||||
|
||||
return $expression;
|
||||
}
|
||||
|
||||
private function scalar(): LiteralNode|null
|
||||
{
|
||||
if ($token = $this->consumeAny([
|
||||
TokenType::T_TRUE,
|
||||
TokenType::T_FALSE,
|
||||
TokenType::T_NULL,
|
||||
TokenType::T_STRING,
|
||||
TokenType::T_INTEGER,
|
||||
TokenType::T_FLOAT,
|
||||
])) {
|
||||
return new LiteralNode(value: $token->literal);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks for and parses a ternary expression
|
||||
* (full `a ? b : c` or elvis shorthand `a ?: c`)
|
||||
*/
|
||||
private function ternary(): Node
|
||||
{
|
||||
$condition = $this->coalesce();
|
||||
|
||||
if ($token = $this->consumeAny([
|
||||
TokenType::T_QUESTION_MARK,
|
||||
TokenType::T_TERNARY_DEFAULT
|
||||
])) {
|
||||
if ($token->is(TokenType::T_TERNARY_DEFAULT) === false) {
|
||||
$true = $this->expression();
|
||||
$this->consume(
|
||||
type: TokenType::T_COLON,
|
||||
error: 'Expect ":" after true branch'
|
||||
);
|
||||
}
|
||||
|
||||
return new TernaryNode(
|
||||
condition: $condition,
|
||||
true: $true ?? null,
|
||||
false: $this->expression()
|
||||
);
|
||||
}
|
||||
|
||||
return $condition;
|
||||
}
|
||||
}
|
||||
30
kirby/src/Query/Parser/Token.php
Normal file
30
kirby/src/Query/Parser/Token.php
Normal file
|
|
@ -0,0 +1,30 @@
|
|||
<?php
|
||||
|
||||
namespace Kirby\Query\Parser;
|
||||
|
||||
/**
|
||||
* Represents a single token of a particular type
|
||||
* within a query
|
||||
*
|
||||
* @package Kirby Query
|
||||
* @author Roman Steiner <roman@toastlab.ch>,
|
||||
* Nico Hoffmann <nico@getkirby.com>
|
||||
* @link https://getkirby.com
|
||||
* @license https://opensource.org/licenses/MIT
|
||||
* @since 5.1.0
|
||||
* @unstable
|
||||
*/
|
||||
class Token
|
||||
{
|
||||
public function __construct(
|
||||
public TokenType $type,
|
||||
public string $lexeme,
|
||||
public mixed $literal = null,
|
||||
) {
|
||||
}
|
||||
|
||||
public function is(TokenType $type): bool
|
||||
{
|
||||
return $this->type === $type;
|
||||
}
|
||||
}
|
||||
61
kirby/src/Query/Parser/TokenType.php
Normal file
61
kirby/src/Query/Parser/TokenType.php
Normal file
|
|
@ -0,0 +1,61 @@
|
|||
<?php
|
||||
|
||||
namespace Kirby\Query\Parser;
|
||||
|
||||
/**
|
||||
* @package Kirby Query
|
||||
* @author Roman Steiner <roman@toastlab.ch>,
|
||||
* Nico Hoffmann <nico@getkirby.com>
|
||||
* @link https://getkirby.com
|
||||
* @license https://opensource.org/licenses/MIT
|
||||
* @since 5.1.0
|
||||
* @unstable
|
||||
*/
|
||||
enum TokenType
|
||||
{
|
||||
case T_DOT;
|
||||
case T_COLON;
|
||||
case T_QUESTION_MARK;
|
||||
case T_OPEN_PAREN;
|
||||
case T_CLOSE_PAREN;
|
||||
case T_OPEN_BRACKET;
|
||||
case T_CLOSE_BRACKET;
|
||||
case T_TERNARY_DEFAULT; // ?:
|
||||
case T_NULLSAFE; // ?.
|
||||
case T_COALESCE; // ??
|
||||
case T_COMMA;
|
||||
case T_ARROW;
|
||||
case T_WHITESPACE;
|
||||
case T_EOF;
|
||||
|
||||
// Comparison operators
|
||||
case T_EQUAL; // ==
|
||||
case T_IDENTICAL; // ===
|
||||
case T_NOT_EQUAL; // !=
|
||||
case T_NOT_IDENTICAL; // !==
|
||||
case T_LESS_THAN; // <
|
||||
case T_LESS_EQUAL; // <=
|
||||
case T_GREATER_THAN; // >
|
||||
case T_GREATER_EQUAL; // >=
|
||||
|
||||
// Math operators
|
||||
case T_PLUS; // +
|
||||
case T_MINUS; // -
|
||||
case T_MULTIPLY; // *
|
||||
case T_DIVIDE; // /
|
||||
case T_MODULO; // %
|
||||
|
||||
// Logical operators
|
||||
case T_AND; // AND or &&
|
||||
case T_OR; // OR or ||
|
||||
|
||||
// Literals
|
||||
case T_STRING;
|
||||
case T_INTEGER;
|
||||
case T_FLOAT;
|
||||
case T_TRUE;
|
||||
case T_FALSE;
|
||||
case T_NULL;
|
||||
|
||||
case T_IDENTIFIER;
|
||||
}
|
||||
256
kirby/src/Query/Parser/Tokenizer.php
Normal file
256
kirby/src/Query/Parser/Tokenizer.php
Normal file
|
|
@ -0,0 +1,256 @@
|
|||
<?php
|
||||
|
||||
namespace Kirby\Query\Parser;
|
||||
|
||||
use Exception;
|
||||
use Generator;
|
||||
|
||||
/**
|
||||
* Parses a query string into its individual tokens
|
||||
*
|
||||
* @package Kirby Query
|
||||
* @author Roman Steiner <roman@toastlab.ch>,
|
||||
* Nico Hoffmann <nico@getkirby.com>
|
||||
* @link https://getkirby.com
|
||||
* @license https://opensource.org/licenses/MIT
|
||||
* @since 5.1.0
|
||||
* @unstable
|
||||
*/
|
||||
class Tokenizer
|
||||
{
|
||||
private int $length = 0;
|
||||
|
||||
/**
|
||||
* The more complex regexes are written here in nowdoc format
|
||||
* so we don't need to double or triple escape backslashes
|
||||
* (that becomes ridiculous rather fast).
|
||||
*
|
||||
* Identifiers can contain letters, numbers and underscores.
|
||||
* They can't start with a number.
|
||||
* For more complex identifier strings, subscript member access
|
||||
* should be used. With `this` to access the global context.
|
||||
*/
|
||||
private const IDENTIFIER_REGEX = <<<'REGEX'
|
||||
(?:[\p{L}\p{N}_])*
|
||||
REGEX;
|
||||
|
||||
private const SINGLEQUOTE_STRING_REGEX = <<<'REGEX'
|
||||
'([^'\\]*(?:\\.[^'\\]*)*)'
|
||||
REGEX;
|
||||
|
||||
private const DOUBLEQUOTE_STRING_REGEX = <<<'REGEX'
|
||||
"([^"\\]*(?:\\.[^"\\]*)*)"
|
||||
REGEX;
|
||||
|
||||
public function __construct(
|
||||
private readonly string $query,
|
||||
) {
|
||||
$this->length = mb_strlen($query);
|
||||
}
|
||||
|
||||
/**
|
||||
* Matches a regex pattern at the current position in the query string.
|
||||
* The matched lexeme will be stored in the $lexeme variable.
|
||||
*
|
||||
* @param int $offset Current position in the query string
|
||||
* @param string $regex Regex pattern without delimiters/flags
|
||||
*/
|
||||
public static function match(
|
||||
string $query,
|
||||
int $offset,
|
||||
string $regex,
|
||||
bool $caseInsensitive = false
|
||||
): string|null {
|
||||
// Add delimiters and flags to the regex
|
||||
$regex = '/\G' . $regex . '/u';
|
||||
|
||||
if ($caseInsensitive === true) {
|
||||
$regex .= 'i';
|
||||
}
|
||||
|
||||
if (preg_match($regex, $query, $matches, 0, $offset) !== 1) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return $matches[0];
|
||||
}
|
||||
|
||||
/**
|
||||
* Scans the source string for a next token
|
||||
* starting from the given position
|
||||
*
|
||||
* @param int $current The current position in the source string
|
||||
*
|
||||
* @throws \Exception If an unexpected character is encountered
|
||||
*/
|
||||
public static function token(string $query, int $current): Token
|
||||
{
|
||||
$char = $query[$current];
|
||||
|
||||
// Multi character tokens (check these first):
|
||||
// Whitespace
|
||||
if ($lex = static::match($query, $current, '\s+')) {
|
||||
return new Token(TokenType::T_WHITESPACE, $lex);
|
||||
}
|
||||
|
||||
// true
|
||||
if ($lex = static::match($query, $current, 'true', true)) {
|
||||
return new Token(TokenType::T_TRUE, $lex, true);
|
||||
}
|
||||
|
||||
// false
|
||||
if ($lex = static::match($query, $current, 'false', true)) {
|
||||
return new Token(TokenType::T_FALSE, $lex, false);
|
||||
}
|
||||
|
||||
// null
|
||||
if ($lex = static::match($query, $current, 'null', true)) {
|
||||
return new Token(TokenType::T_NULL, $lex, null);
|
||||
}
|
||||
|
||||
// "string"
|
||||
if ($lex = static::match($query, $current, static::DOUBLEQUOTE_STRING_REGEX)) {
|
||||
return new Token(
|
||||
TokenType::T_STRING,
|
||||
$lex,
|
||||
stripcslashes(substr($lex, 1, -1))
|
||||
);
|
||||
}
|
||||
|
||||
// 'string'
|
||||
if ($lex = static::match($query, $current, static::SINGLEQUOTE_STRING_REGEX)) {
|
||||
return new Token(
|
||||
TokenType::T_STRING,
|
||||
$lex,
|
||||
stripcslashes(substr($lex, 1, -1))
|
||||
);
|
||||
}
|
||||
|
||||
// float (check before single character tokens)
|
||||
$lex = static::match($query, $current, '-?\d+\.\d+\b');
|
||||
if ($lex !== null) {
|
||||
return new Token(TokenType::T_FLOAT, $lex, (float)$lex);
|
||||
}
|
||||
|
||||
// int (check before single character tokens)
|
||||
$lex = static::match($query, $current, '-?\d+\b');
|
||||
if ($lex !== null) {
|
||||
return new Token(TokenType::T_INTEGER, $lex, (int)$lex);
|
||||
}
|
||||
|
||||
// Two character tokens:
|
||||
// ??
|
||||
if ($lex = static::match($query, $current, '\?\?')) {
|
||||
return new Token(TokenType::T_COALESCE, $lex);
|
||||
}
|
||||
|
||||
// ?.
|
||||
if ($lex = static::match($query, $current, '\?\s*\.')) {
|
||||
return new Token(TokenType::T_NULLSAFE, $lex);
|
||||
}
|
||||
|
||||
// ?:
|
||||
if ($lex = static::match($query, $current, '\?\s*:')) {
|
||||
return new Token(TokenType::T_TERNARY_DEFAULT, $lex);
|
||||
}
|
||||
|
||||
// =>
|
||||
if ($lex = static::match($query, $current, '=>')) {
|
||||
return new Token(TokenType::T_ARROW, $lex);
|
||||
}
|
||||
|
||||
// Logical operators (check before comparison operators)
|
||||
if ($lex = static::match($query, $current, '&&|AND')) {
|
||||
return new Token(TokenType::T_AND, $lex);
|
||||
}
|
||||
|
||||
if ($lex = static::match($query, $current, '\|\||OR')) {
|
||||
return new Token(TokenType::T_OR, $lex);
|
||||
}
|
||||
|
||||
// Comparison operators (three characters first, then two, then one)
|
||||
// === (must come before ==)
|
||||
if ($lex = static::match($query, $current, '===')) {
|
||||
return new Token(TokenType::T_IDENTICAL, $lex);
|
||||
}
|
||||
|
||||
// !== (must come before !=)
|
||||
if ($lex = static::match($query, $current, '!==')) {
|
||||
return new Token(TokenType::T_NOT_IDENTICAL, $lex);
|
||||
}
|
||||
|
||||
// <= (must come before <)
|
||||
if ($lex = static::match($query, $current, '<=')) {
|
||||
return new Token(TokenType::T_LESS_EQUAL, $lex);
|
||||
}
|
||||
|
||||
// >= (must come before >)
|
||||
if ($lex = static::match($query, $current, '>=')) {
|
||||
return new Token(TokenType::T_GREATER_EQUAL, $lex);
|
||||
}
|
||||
|
||||
// ==
|
||||
if ($lex = static::match($query, $current, '==')) {
|
||||
return new Token(TokenType::T_EQUAL, $lex);
|
||||
}
|
||||
|
||||
// !=
|
||||
if ($lex = static::match($query, $current, '!=')) {
|
||||
return new Token(TokenType::T_NOT_EQUAL, $lex);
|
||||
}
|
||||
|
||||
// Single character tokens (check these last):
|
||||
$token = match ($char) {
|
||||
'.' => new Token(TokenType::T_DOT, '.'),
|
||||
'(' => new Token(TokenType::T_OPEN_PAREN, '('),
|
||||
')' => new Token(TokenType::T_CLOSE_PAREN, ')'),
|
||||
'[' => new Token(TokenType::T_OPEN_BRACKET, '['),
|
||||
']' => new Token(TokenType::T_CLOSE_BRACKET, ']'),
|
||||
',' => new Token(TokenType::T_COMMA, ','),
|
||||
':' => new Token(TokenType::T_COLON, ':'),
|
||||
'+' => new Token(TokenType::T_PLUS, '+'),
|
||||
'-' => new Token(TokenType::T_MINUS, '-'),
|
||||
'*' => new Token(TokenType::T_MULTIPLY, '*'),
|
||||
'/' => new Token(TokenType::T_DIVIDE, '/'),
|
||||
'%' => new Token(TokenType::T_MODULO, '%'),
|
||||
'?' => new Token(TokenType::T_QUESTION_MARK, '?'),
|
||||
'<' => new Token(TokenType::T_LESS_THAN, '<'),
|
||||
'>' => new Token(TokenType::T_GREATER_THAN, '>'),
|
||||
default => null
|
||||
};
|
||||
|
||||
if ($token !== null) {
|
||||
return $token;
|
||||
}
|
||||
|
||||
// Identifier
|
||||
if ($lex = static::match($query, $current, static::IDENTIFIER_REGEX)) {
|
||||
return new Token(TokenType::T_IDENTIFIER, $lex);
|
||||
}
|
||||
|
||||
// Unknown token
|
||||
throw new Exception('Invalid character in query: ' . $query[$current]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Tokenizes the query string and returns a generator of tokens.
|
||||
* @return Generator<Token>
|
||||
*/
|
||||
public function tokens(): Generator
|
||||
{
|
||||
$current = 0;
|
||||
|
||||
while ($current < $this->length) {
|
||||
$token = static::token($this->query, $current);
|
||||
|
||||
// Don't yield whitespace tokens (ignore them)
|
||||
if ($token->type !== TokenType::T_WHITESPACE) {
|
||||
yield $token;
|
||||
}
|
||||
|
||||
$current += mb_strlen($token->lexeme);
|
||||
}
|
||||
|
||||
yield new Token(TokenType::T_EOF, '', null);
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue