Skip to content

Commit f583f4c

Browse files
committed
Optimize getDiagnostics for speed
Add a simple benchmarking script Extreme optimization, sacrificing code style in some areas. - This is meant for discussion, since the project is in the optimization phase. This commit reduces the time needed to getDiagnostics from 0.013 to 0.007 seconds. Average time to generate diagnostics (before): 0.013... Average time to generate diagnostics (after): 0.006921 time to parse (Without diagnostics): 0.036374 (E.g. getDiagnostics previously took 36% of the time compared to generating an AST, and now it takes 19% of that time.)
1 parent 0e936a1 commit f583f4c

File tree

6 files changed

+230
-100
lines changed

6 files changed

+230
-100
lines changed
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
<?php
2+
// Autoload required classes
3+
require dirname(__DIR__) . "/vendor/autoload.php";
4+
5+
use Microsoft\PhpParser\{DiagnosticsProvider, Node, Parser, PositionUtilities};
6+
7+
8+
const ITERATIONS = 100;
9+
// Return and print an AST from string contents
10+
$main = function() {
11+
// Instantiate new parser instance
12+
// TODO: Multiple source files to be realistic.
13+
$parser = new Parser();
14+
$t0 = microtime(true);
15+
$astNode = $parser->parseSourceFile(file_get_contents(__DIR__ . '/../src/Parser.php'));
16+
$t1 = microtime(true);
17+
for ($i = 0; $i < ITERATIONS; $i++) {
18+
$diagnostics = DiagnosticsProvider::getDiagnostics($astNode);
19+
}
20+
$t2 = microtime(true);
21+
printf("Average time to generate diagnostics: %.6f\n", ($t2 - $t1) / ITERATIONS);
22+
printf("time to parse: %.6f\n", ($t1 - $t0));
23+
global $__counts;
24+
var_export($__counts);
25+
};
26+
$main();

src/DiagnosticsProvider.php

Lines changed: 57 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -12,123 +12,79 @@ class DiagnosticsProvider {
1212

1313
private static $tokenKindToText;
1414

15+
/**
16+
* @param int $kind (must be a valid token kind)
17+
* @return string
18+
*/
19+
public static function getTextForTokenKind($kind) {
20+
if (!isset(self::$tokenKindToText)) {
21+
self::initTokenKindToText();
22+
}
23+
return self::$tokenKindToText[$kind];
24+
}
25+
26+
/**
27+
* @return string[]
28+
*/
29+
private static function initTokenKindToText() {
30+
self::$tokenKindToText = \array_flip(\array_merge(
31+
TokenStringMaps::OPERATORS_AND_PUNCTUATORS,
32+
TokenStringMaps::KEYWORDS,
33+
TokenStringMaps::RESERVED_WORDS
34+
));
35+
}
36+
1537
/**
1638
* Returns the diagnostic for $node, or null.
17-
* @param \Microsoft\PhpParser\Node $node
39+
* @param \Microsoft\PhpParser\Node|\Microsoft\PhpParser\Token $node
1840
* @return Diagnostic|null
1941
*/
2042
public static function checkDiagnostics($node) {
21-
if (!isset(self::$tokenKindToText)) {
22-
self::$tokenKindToText = \array_flip(\array_merge(
23-
TokenStringMaps::OPERATORS_AND_PUNCTUATORS,
24-
TokenStringMaps::KEYWORDS,
25-
TokenStringMaps::RESERVED_WORDS
26-
));
43+
if ($node instanceof Token) {
44+
if (\get_class($node) === Token::class) {
45+
return null;
46+
}
47+
return self::checkDiagnosticForUnexpectedToken($node);
2748
}
2849

29-
if ($node instanceof SkippedToken) {
50+
if ($node instanceof Node) {
51+
return $node->getDiagnosticForNode();
52+
}
53+
return null;
54+
}
55+
56+
/**
57+
* @param Token $token
58+
* @return Diagnostic|null
59+
*/
60+
private static function checkDiagnosticForUnexpectedToken($token) {
61+
if (!isset(self::$tokenKindToText)) {
62+
self::initTokenKindToText();
63+
}
64+
if ($token instanceof SkippedToken) {
3065
// TODO - consider also attaching parse context information to skipped tokens
3166
// this would allow us to provide more helpful error messages that inform users what to do
3267
// about the problem rather than simply pointing out the mistake.
3368
return new Diagnostic(
3469
DiagnosticKind::Error,
3570
"Unexpected '" .
36-
(self::$tokenKindToText[$node->kind]
37-
?? Token::getTokenKindNameFromValue($node->kind)) .
71+
(self::$tokenKindToText[$token->kind]
72+
?? Token::getTokenKindNameFromValue($token->kind)) .
3873
"'",
39-
$node->start,
40-
$node->getEndPosition() - $node->start
74+
$token->start,
75+
$token->getEndPosition() - $token->start
4176
);
42-
} elseif ($node instanceof MissingToken) {
77+
} elseif ($token instanceof MissingToken) {
4378
return new Diagnostic(
4479
DiagnosticKind::Error,
4580
"'" .
46-
(self::$tokenKindToText[$node->kind]
47-
?? Token::getTokenKindNameFromValue($node->kind)) .
81+
(self::$tokenKindToText[$token->kind]
82+
?? Token::getTokenKindNameFromValue($token->kind)) .
4883
"' expected.",
49-
$node->start,
50-
$node->getEndPosition() - $node->start
84+
$token->start,
85+
$token->getEndPosition() - $token->start
5186
);
5287
}
53-
54-
if ($node === null || $node instanceof Token) {
55-
return null;
56-
}
57-
58-
if ($node instanceof Node) {
59-
if ($node instanceof Node\MethodDeclaration) {
60-
foreach ($node->modifiers as $modifier) {
61-
if ($modifier->kind === TokenKind::VarKeyword) {
62-
return new Diagnostic(
63-
DiagnosticKind::Error,
64-
"Unexpected modifier '" . self::$tokenKindToText[$modifier->kind] . "'",
65-
$modifier->start,
66-
$modifier->length
67-
);
68-
}
69-
}
70-
}
71-
elseif ($node instanceof Node\Statement\NamespaceUseDeclaration) {
72-
if (
73-
$node->useClauses != null
74-
&& \count($node->useClauses->children) > 1
75-
) {
76-
foreach ($node->useClauses->children as $useClause) {
77-
if($useClause instanceof Node\NamespaceUseClause && !is_null($useClause->openBrace)) {
78-
return new Diagnostic(
79-
DiagnosticKind::Error,
80-
"; expected.",
81-
$useClause->getEndPosition(),
82-
1
83-
);
84-
}
85-
}
86-
}
87-
}
88-
else if ($node instanceof Node\Statement\BreakOrContinueStatement) {
89-
if ($node->breakoutLevel === null) {
90-
return null;
91-
}
92-
93-
$breakoutLevel = $node->breakoutLevel;
94-
while ($breakoutLevel instanceof Node\Expression\ParenthesizedExpression) {
95-
$breakoutLevel = $breakoutLevel->expression;
96-
}
97-
98-
if (
99-
$breakoutLevel instanceof Node\NumericLiteral
100-
&& $breakoutLevel->children->kind === TokenKind::IntegerLiteralToken
101-
) {
102-
$literalString = $breakoutLevel->getText();
103-
$firstTwoChars = \substr($literalString, 0, 2);
104-
105-
if ($firstTwoChars === '0b' || $firstTwoChars === '0B') {
106-
if (\bindec(\substr($literalString, 2)) > 0) {
107-
return null;
108-
}
109-
}
110-
else if (\intval($literalString, 0) > 0) {
111-
return null;
112-
}
113-
}
114-
115-
if ($breakoutLevel instanceof Token) {
116-
$start = $breakoutLevel->getStartPosition();
117-
}
118-
else {
119-
$start = $breakoutLevel->getStart();
120-
}
121-
$end = $breakoutLevel->getEndPosition();
122-
123-
return new Diagnostic(
124-
DiagnosticKind::Error,
125-
"Positive integer literal expected.",
126-
$start,
127-
$end - $start
128-
);
129-
}
130-
}
131-
return null;
13288
}
13389

13490
/**
@@ -139,11 +95,14 @@ public static function checkDiagnostics($node) {
13995
public static function getDiagnostics(Node $n) : array {
14096
$diagnostics = [];
14197

142-
foreach ($n->getDescendantNodesAndTokens() as $node) {
98+
/**
99+
* @param \Microsoft\PhpParser\Node|\Microsoft\PhpParser\Token $node
100+
*/
101+
$n->walkDescendantNodesAndTokens(function($node) use (&$diagnostics) {
143102
if (($diagnostic = self::checkDiagnostics($node)) !== null) {
144103
$diagnostics[] = $diagnostic;
145104
}
146-
}
105+
});
147106

148107
return $diagnostics;
149108
}

src/Node.php

Lines changed: 48 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -158,17 +158,54 @@ public function getDescendantNodesAndTokens(callable $shouldDescendIntoChildrenF
158158
// TODO - write unit tests to prove invariants
159159
// (concatenating all descendant Tokens should produce document, concatenating all Nodes should produce document)
160160
foreach ($this->getChildNodesAndTokens() as $child) {
161+
// Check possible types of $child, most frequent first
161162
if ($child instanceof Node) {
162163
yield $child;
163-
if ($shouldDescendIntoChildrenFn == null || $shouldDescendIntoChildrenFn($child)) {
164-
yield from $child->getDescendantNodesAndTokens($shouldDescendIntoChildrenFn);
164+
if ($shouldDescendIntoChildrenFn === null || $shouldDescendIntoChildrenFn($child)) {
165+
yield from $child->getDescendantNodesAndTokens($shouldDescendIntoChildrenFn);
165166
}
166167
} elseif ($child instanceof Token) {
167168
yield $child;
168169
}
169170
}
170171
}
171172

173+
/**
174+
* Iterate over all descendant Nodes and Tokens, calling $callback.
175+
* This can often be faster than getDescendantNodesAndTokens if you just need to call something and don't be a generator.
176+
*
177+
* @param callable $callback a callback that accepts Node|Token
178+
* @param callable|null $shouldDescendIntoChildrenFn
179+
* @return void
180+
*/
181+
public function walkDescendantNodesAndTokens(\Closure $callback, callable $shouldDescendIntoChildrenFn = null) {
182+
// TODO - write unit tests to prove invariants
183+
// (concatenating all descendant Tokens should produce document, concatenating all Nodes should produce document)
184+
foreach (static::CHILD_NAMES as $name) {
185+
$child = $this->$name;
186+
// Check possible types of $child, most frequent first
187+
if ($child instanceof Token) {
188+
$callback($child);
189+
} elseif ($child instanceof Node) {
190+
$callback($child);
191+
if ($shouldDescendIntoChildrenFn === null || $shouldDescendIntoChildrenFn($child)) {
192+
$child->walkDescendantNodesAndTokens($callback, $shouldDescendIntoChildrenFn);
193+
}
194+
} elseif (\is_array($child)) {
195+
foreach ($child as $childElement) {
196+
if ($childElement instanceof Token) {
197+
$callback($childElement);
198+
} elseif ($childElement instanceof Node) {
199+
$callback($childElement);
200+
if ($shouldDescendIntoChildrenFn === null || $shouldDescendIntoChildrenFn($childElement)) {
201+
$childElement->walkDescendantNodesAndTokens($callback, $shouldDescendIntoChildrenFn);
202+
}
203+
}
204+
}
205+
}
206+
}
207+
}
208+
172209
/**
173210
* Gets a generator containing all descendant Nodes.
174211
* @param callable|null $shouldDescendIntoChildrenFn
@@ -639,4 +676,13 @@ private function addToImportTable($alias, $functionOrConst, $namespaceNameParts,
639676
}
640677
return array($namespaceImportTable, $functionImportTable, $constImportTable);
641678
}
679+
680+
/**
681+
* This is overridden in subclasses
682+
* @return Diagnostic|null - Callers should use DiagnosticsProvider::getDiagnostics instead
683+
* @internal
684+
*/
685+
public function getDiagnosticForNode() {
686+
return null;
687+
}
642688
}

src/Node/MethodDeclaration.php

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,9 @@
66

77
namespace Microsoft\PhpParser\Node;
88

9+
use Microsoft\PhpParser\Diagnostic;
10+
use Microsoft\PhpParser\DiagnosticKind;
11+
use Microsoft\PhpParser\DiagnosticsProvider;
912
use Microsoft\PhpParser\FunctionLike;
1013
use Microsoft\PhpParser\Node;
1114
use Microsoft\PhpParser\Token;
@@ -52,4 +55,22 @@ public function isStatic() : bool {
5255
public function getName() {
5356
return $this->name->getText($this->getFileContents());
5457
}
58+
59+
/**
60+
* @return Diagnostic|null - Callers should use DiagnosticsProvider::getDiagnostics instead
61+
* @internal
62+
* @override
63+
*/
64+
public function getDiagnosticForNode() {
65+
foreach ($this->modifiers as $modifier) {
66+
if ($modifier->kind === TokenKind::VarKeyword) {
67+
return new Diagnostic(
68+
DiagnosticKind::Error,
69+
"Unexpected modifier '" . DiagnosticsProvider::getTextForTokenKind($modifier->kind) . "'",
70+
$modifier->start,
71+
$modifier->length
72+
);
73+
}
74+
}
75+
}
5576
}

src/Node/Statement/BreakOrContinueStatement.php

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,13 @@
66

77
namespace Microsoft\PhpParser\Node\Statement;
88

9+
use Microsoft\PhpParser\Diagnostic;
10+
use Microsoft\PhpParser\DiagnosticKind;
11+
use Microsoft\PhpParser\Node;
912
use Microsoft\PhpParser\Node\Expression;
1013
use Microsoft\PhpParser\Node\StatementNode;
1114
use Microsoft\PhpParser\Token;
15+
use Microsoft\PhpParser\TokenKind;
1216

1317
class BreakOrContinueStatement extends StatementNode {
1418
/** @var Token */
@@ -23,4 +27,52 @@ class BreakOrContinueStatement extends StatementNode {
2327
'breakoutLevel',
2428
'semicolon'
2529
];
30+
31+
/**
32+
* @return Diagnostic|null - Callers should use DiagnosticsProvider::getDiagnostics instead
33+
* @internal
34+
* @override
35+
*/
36+
public function getDiagnosticForNode() {
37+
if ($this->breakoutLevel === null) {
38+
return null;
39+
}
40+
41+
$breakoutLevel = $this->breakoutLevel;
42+
while ($breakoutLevel instanceof Node\Expression\ParenthesizedExpression) {
43+
$breakoutLevel = $breakoutLevel->expression;
44+
}
45+
46+
if (
47+
$breakoutLevel instanceof Node\NumericLiteral
48+
&& $breakoutLevel->children->kind === TokenKind::IntegerLiteralToken
49+
) {
50+
$literalString = $breakoutLevel->getText();
51+
$firstTwoChars = \substr($literalString, 0, 2);
52+
53+
if ($firstTwoChars === '0b' || $firstTwoChars === '0B') {
54+
if (\bindec(\substr($literalString, 2)) > 0) {
55+
return null;
56+
}
57+
}
58+
else if (\intval($literalString, 0) > 0) {
59+
return null;
60+
}
61+
}
62+
63+
if ($breakoutLevel instanceof Token) {
64+
$start = $breakoutLevel->getStartPosition();
65+
}
66+
else {
67+
$start = $breakoutLevel->getStart();
68+
}
69+
$end = $breakoutLevel->getEndPosition();
70+
71+
return new Diagnostic(
72+
DiagnosticKind::Error,
73+
"Positive integer literal expected.",
74+
$start,
75+
$end - $start
76+
);
77+
}
2678
}

0 commit comments

Comments
 (0)