<'; const OPERATORS_POSTFIX = '%'; /** * Formula. * * @var string */ private $formula; /** * Tokens. * * @var FormulaToken[] */ private $tokens = []; /** * Create a new FormulaParser. * * @param string $pFormula Formula to parse * * @throws Exception */ public function __construct($pFormula = '') { // Check parameters if (is_null($pFormula)) { throw new Exception('Invalid parameter passed: formula'); } // Initialise values $this->formula = trim($pFormula); // Parse! $this->parseToTokens(); } /** * Get Formula. * * @return string */ public function getFormula() { return $this->formula; } /** * Get Token. * * @param int $pId Token id * * @throws Exception * * @return string */ public function getToken($pId = 0) { if (isset($this->tokens[$pId])) { return $this->tokens[$pId]; } throw new Exception("Token with id $pId does not exist."); } /** * Get Token count. * * @return int */ public function getTokenCount() { return count($this->tokens); } /** * Get Tokens. * * @return FormulaToken[] */ public function getTokens() { return $this->tokens; } /** * Parse to tokens. */ private function parseToTokens() { // No attempt is made to verify formulas; assumes formulas are derived from Excel, where // they can only exist if valid; stack overflows/underflows sunk as nulls without exceptions. // Check if the formula has a valid starting = $formulaLength = strlen($this->formula); if ($formulaLength < 2 || $this->formula[0] != '=') { return; } // Helper variables $tokens1 = $tokens2 = $stack = []; $inString = $inPath = $inRange = $inError = false; $token = $previousToken = $nextToken = null; $index = 1; $value = ''; $ERRORS = ['#NULL!', '#DIV/0!', '#VALUE!', '#REF!', '#NAME?', '#NUM!', '#N/A']; $COMPARATORS_MULTI = ['>=', '<=', '<>']; while ($index < $formulaLength) { // state-dependent character evaluation (order is important) // double-quoted strings // embeds are doubled // end marks token if ($inString) { if ($this->formula[$index] == self::QUOTE_DOUBLE) { if ((($index + 2) <= $formulaLength) && ($this->formula[$index + 1] == self::QUOTE_DOUBLE)) { $value .= self::QUOTE_DOUBLE; ++$index; } else { $inString = false; $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_OPERAND, FormulaToken::TOKEN_SUBTYPE_TEXT); $value = ''; } } else { $value .= $this->formula[$index]; } ++$index; continue; } // single-quoted strings (links) // embeds are double // end does not mark a token if ($inPath) { if ($this->formula[$index] == self::QUOTE_SINGLE) { if ((($index + 2) <= $formulaLength) && ($this->formula[$index + 1] == self::QUOTE_SINGLE)) { $value .= self::QUOTE_SINGLE; ++$index; } else { $inPath = false; } } else { $value .= $this->formula[$index]; } ++$index; continue; } // bracked strings (R1C1 range index or linked workbook name) // no embeds (changed to "()" by Excel) // end does not mark a token if ($inRange) { if ($this->formula[$index] == self::BRACKET_CLOSE) { $inRange = false; } $value .= $this->formula[$index]; ++$index; continue; } // error values // end marks a token, determined from absolute list of values if ($inError) { $value .= $this->formula[$index]; ++$index; if (in_array($value, $ERRORS)) { $inError = false; $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_OPERAND, FormulaToken::TOKEN_SUBTYPE_ERROR); $value = ''; } continue; } // scientific notation check if (strpos(self::OPERATORS_SN, $this->formula[$index]) !== false) { if (strlen($value) > 1) { if (preg_match("/^[1-9]{1}(\.[0-9]+)?E{1}$/", $this->formula[$index]) != 0) { $value .= $this->formula[$index]; ++$index; continue; } } } // independent character evaluation (order not important) // establish state-dependent character evaluations if ($this->formula[$index] == self::QUOTE_DOUBLE) { if (strlen($value > 0)) { // unexpected $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_UNKNOWN); $value = ''; } $inString = true; ++$index; continue; } if ($this->formula[$index] == self::QUOTE_SINGLE) { if (strlen($value) > 0) { // unexpected $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_UNKNOWN); $value = ''; } $inPath = true; ++$index; continue; } if ($this->formula[$index] == self::BRACKET_OPEN) { $inRange = true; $value .= self::BRACKET_OPEN; ++$index; continue; } if ($this->formula[$index] == self::ERROR_START) { if (strlen($value) > 0) { // unexpected $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_UNKNOWN); $value = ''; } $inError = true; $value .= self::ERROR_START; ++$index; continue; } // mark start and end of arrays and array rows if ($this->formula[$index] == self::BRACE_OPEN) { if (strlen($value) > 0) { // unexpected $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_UNKNOWN); $value = ''; } $tmp = new FormulaToken('ARRAY', FormulaToken::TOKEN_TYPE_FUNCTION, FormulaToken::TOKEN_SUBTYPE_START); $tokens1[] = $tmp; $stack[] = clone $tmp; $tmp = new FormulaToken('ARRAYROW', FormulaToken::TOKEN_TYPE_FUNCTION, FormulaToken::TOKEN_SUBTYPE_START); $tokens1[] = $tmp; $stack[] = clone $tmp; ++$index; continue; } if ($this->formula[$index] == self::SEMICOLON) { if (strlen($value) > 0) { $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_OPERAND); $value = ''; } $tmp = array_pop($stack); $tmp->setValue(''); $tmp->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_STOP); $tokens1[] = $tmp; $tmp = new FormulaToken(',', FormulaToken::TOKEN_TYPE_ARGUMENT); $tokens1[] = $tmp; $tmp = new FormulaToken('ARRAYROW', FormulaToken::TOKEN_TYPE_FUNCTION, FormulaToken::TOKEN_SUBTYPE_START); $tokens1[] = $tmp; $stack[] = clone $tmp; ++$index; continue; } if ($this->formula[$index] == self::BRACE_CLOSE) { if (strlen($value) > 0) { $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_OPERAND); $value = ''; } $tmp = array_pop($stack); $tmp->setValue(''); $tmp->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_STOP); $tokens1[] = $tmp; $tmp = array_pop($stack); $tmp->setValue(''); $tmp->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_STOP); $tokens1[] = $tmp; ++$index; continue; } // trim white-space if ($this->formula[$index] == self::WHITESPACE) { if (strlen($value) > 0) { $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_OPERAND); $value = ''; } $tokens1[] = new FormulaToken('', FormulaToken::TOKEN_TYPE_WHITESPACE); ++$index; while (($this->formula[$index] == self::WHITESPACE) && ($index < $formulaLength)) { ++$index; } continue; } // multi-character comparators if (($index + 2) <= $formulaLength) { if (in_array(substr($this->formula, $index, 2), $COMPARATORS_MULTI)) { if (strlen($value) > 0) { $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_OPERAND); $value = ''; } $tokens1[] = new FormulaToken(substr($this->formula, $index, 2), FormulaToken::TOKEN_TYPE_OPERATORINFIX, FormulaToken::TOKEN_SUBTYPE_LOGICAL); $index += 2; continue; } } // standard infix operators if (strpos(self::OPERATORS_INFIX, $this->formula[$index]) !== false) { if (strlen($value) > 0) { $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_OPERAND); $value = ''; } $tokens1[] = new FormulaToken($this->formula[$index], FormulaToken::TOKEN_TYPE_OPERATORINFIX); ++$index; continue; } // standard postfix operators (only one) if (strpos(self::OPERATORS_POSTFIX, $this->formula[$index]) !== false) { if (strlen($value) > 0) { $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_OPERAND); $value = ''; } $tokens1[] = new FormulaToken($this->formula[$index], FormulaToken::TOKEN_TYPE_OPERATORPOSTFIX); ++$index; continue; } // start subexpression or function if ($this->formula[$index] == self::PAREN_OPEN) { if (strlen($value) > 0) { $tmp = new FormulaToken($value, FormulaToken::TOKEN_TYPE_FUNCTION, FormulaToken::TOKEN_SUBTYPE_START); $tokens1[] = $tmp; $stack[] = clone $tmp; $value = ''; } else { $tmp = new FormulaToken('', FormulaToken::TOKEN_TYPE_SUBEXPRESSION, FormulaToken::TOKEN_SUBTYPE_START); $tokens1[] = $tmp; $stack[] = clone $tmp; } ++$index; continue; } // function, subexpression, or array parameters, or operand unions if ($this->formula[$index] == self::COMMA) { if (strlen($value) > 0) { $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_OPERAND); $value = ''; } $tmp = array_pop($stack); $tmp->setValue(''); $tmp->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_STOP); $stack[] = $tmp; if ($tmp->getTokenType() == FormulaToken::TOKEN_TYPE_FUNCTION) { $tokens1[] = new FormulaToken(',', FormulaToken::TOKEN_TYPE_OPERATORINFIX, FormulaToken::TOKEN_SUBTYPE_UNION); } else { $tokens1[] = new FormulaToken(',', FormulaToken::TOKEN_TYPE_ARGUMENT); } ++$index; continue; } // stop subexpression if ($this->formula[$index] == self::PAREN_CLOSE) { if (strlen($value) > 0) { $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_OPERAND); $value = ''; } $tmp = array_pop($stack); $tmp->setValue(''); $tmp->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_STOP); $tokens1[] = $tmp; ++$index; continue; } // token accumulation $value .= $this->formula[$index]; ++$index; } // dump remaining accumulation if (strlen($value) > 0) { $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_OPERAND); } // move tokenList to new set, excluding unnecessary white-space tokens and converting necessary ones to intersections $tokenCount = count($tokens1); for ($i = 0; $i < $tokenCount; ++$i) { $token = $tokens1[$i]; if (isset($tokens1[$i - 1])) { $previousToken = $tokens1[$i - 1]; } else { $previousToken = null; } if (isset($tokens1[$i + 1])) { $nextToken = $tokens1[$i + 1]; } else { $nextToken = null; } if (is_null($token)) { continue; } if ($token->getTokenType() != FormulaToken::TOKEN_TYPE_WHITESPACE) { $tokens2[] = $token; continue; } if (is_null($previousToken)) { continue; } if (!( (($previousToken->getTokenType() == FormulaToken::TOKEN_TYPE_FUNCTION) && ($previousToken->getTokenSubType() == FormulaToken::TOKEN_SUBTYPE_STOP)) || (($previousToken->getTokenType() == FormulaToken::TOKEN_TYPE_SUBEXPRESSION) && ($previousToken->getTokenSubType() == FormulaToken::TOKEN_SUBTYPE_STOP)) || ($previousToken->getTokenType() == FormulaToken::TOKEN_TYPE_OPERAND) )) { continue; } if (is_null($nextToken)) { continue; } if (!( (($nextToken->getTokenType() == FormulaToken::TOKEN_TYPE_FUNCTION) && ($nextToken->getTokenSubType() == FormulaToken::TOKEN_SUBTYPE_START)) || (($nextToken->getTokenType() == FormulaToken::TOKEN_TYPE_SUBEXPRESSION) && ($nextToken->getTokenSubType() == FormulaToken::TOKEN_SUBTYPE_START)) || ($nextToken->getTokenType() == FormulaToken::TOKEN_TYPE_OPERAND) )) { continue; } $tokens2[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_OPERATORINFIX, FormulaToken::TOKEN_SUBTYPE_INTERSECTION); } // move tokens to final list, switching infix "-" operators to prefix when appropriate, switching infix "+" operators // to noop when appropriate, identifying operand and infix-operator subtypes, and pulling "@" from function names $this->tokens = []; $tokenCount = count($tokens2); for ($i = 0; $i < $tokenCount; ++$i) { $token = $tokens2[$i]; if (isset($tokens2[$i - 1])) { $previousToken = $tokens2[$i - 1]; } else { $previousToken = null; } if (isset($tokens2[$i + 1])) { $nextToken = $tokens2[$i + 1]; } else { $nextToken = null; } if (is_null($token)) { continue; } if ($token->getTokenType() == FormulaToken::TOKEN_TYPE_OPERATORINFIX && $token->getValue() == '-') { if ($i == 0) { $token->setTokenType(FormulaToken::TOKEN_TYPE_OPERATORPREFIX); } elseif ((($previousToken->getTokenType() == FormulaToken::TOKEN_TYPE_FUNCTION) && ($previousToken->getTokenSubType() == FormulaToken::TOKEN_SUBTYPE_STOP)) || (($previousToken->getTokenType() == FormulaToken::TOKEN_TYPE_SUBEXPRESSION) && ($previousToken->getTokenSubType() == FormulaToken::TOKEN_SUBTYPE_STOP)) || ($previousToken->getTokenType() == FormulaToken::TOKEN_TYPE_OPERATORPOSTFIX) || ($previousToken->getTokenType() == FormulaToken::TOKEN_TYPE_OPERAND)) { $token->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_MATH); } else { $token->setTokenType(FormulaToken::TOKEN_TYPE_OPERATORPREFIX); } $this->tokens[] = $token; continue; } if ($token->getTokenType() == FormulaToken::TOKEN_TYPE_OPERATORINFIX && $token->getValue() == '+') { if ($i == 0) { continue; } elseif ((($previousToken->getTokenType() == FormulaToken::TOKEN_TYPE_FUNCTION) && ($previousToken->getTokenSubType() == FormulaToken::TOKEN_SUBTYPE_STOP)) || (($previousToken->getTokenType() == FormulaToken::TOKEN_TYPE_SUBEXPRESSION) && ($previousToken->getTokenSubType() == FormulaToken::TOKEN_SUBTYPE_STOP)) || ($previousToken->getTokenType() == FormulaToken::TOKEN_TYPE_OPERATORPOSTFIX) || ($previousToken->getTokenType() == FormulaToken::TOKEN_TYPE_OPERAND)) { $token->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_MATH); } else { continue; } $this->tokens[] = $token; continue; } if ($token->getTokenType() == FormulaToken::TOKEN_TYPE_OPERATORINFIX && $token->getTokenSubType() == FormulaToken::TOKEN_SUBTYPE_NOTHING) { if (strpos('<>=', substr($token->getValue(), 0, 1)) !== false) { $token->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_LOGICAL); } elseif ($token->getValue() == '&') { $token->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_CONCATENATION); } else { $token->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_MATH); } $this->tokens[] = $token; continue; } if ($token->getTokenType() == FormulaToken::TOKEN_TYPE_OPERAND && $token->getTokenSubType() == FormulaToken::TOKEN_SUBTYPE_NOTHING) { if (!is_numeric($token->getValue())) { if (strtoupper($token->getValue()) == 'TRUE' || strtoupper($token->getValue() == 'FALSE')) { $token->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_LOGICAL); } else { $token->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_RANGE); } } else { $token->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_NUMBER); } $this->tokens[] = $token; continue; } if ($token->getTokenType() == FormulaToken::TOKEN_TYPE_FUNCTION) { if (strlen($token->getValue() > 0)) { if (substr($token->getValue(), 0, 1) == '@') { $token->setValue(substr($token->getValue(), 1)); } } } $this->tokens[] = $token; } } }