Skip to content

Commit

Permalink
Merge pull request #17875 from github/tausbn/python-improve-parser-lo…
Browse files Browse the repository at this point in the history
…gging-and-timing

Python: Improve parser logging/timing/customisability
  • Loading branch information
tausbn authored Nov 1, 2024
2 parents 2892f0f + 2ef3ae9 commit 0bb5b4b
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 6 deletions.
1 change: 1 addition & 0 deletions python/extractor/semmle/python/imports.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ def get_imports(self, module, loaded_module):
def get_import_nodes(self, loaded_module):
'Return list of AST nodes representing imports'
try:
self.logger.debug("Looking for imports in %s", loaded_module.path)
return imports_from_ast(loaded_module.py_ast)
except Exception as ex:
if isinstance(ex, SyntaxError):
Expand Down
25 changes: 21 additions & 4 deletions python/extractor/semmle/python/modules.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,14 +109,25 @@ def ast(self):
def old_py_ast(self):
# The py_ast is the raw ast from the Python parser.
if self._py_ast is None:
self._py_ast = semmle.python.parser.parse(self.tokens, self.logger)
with timers["old_py_ast"]:
self.logger.debug("Trying old parser on %s", self.path)
self._py_ast = semmle.python.parser.parse(self.tokens, self.logger)
self.logger.debug("Old parser successful on %s", self.path)
else:
self.logger.debug("Found (during old_py_ast) parse tree for %s in cache", self.path)
return self._py_ast

@property
def py_ast(self):
try:
# First, try to parse the source with the old Python parser.
return self.old_py_ast
# If the `CODEQL_PYTHON_DISABLE_OLD_PARSER` flag is present, we do not try to use the
# old parser, and instead jump straight to the exception handler.
if os.environ.get("CODEQL_PYTHON_DISABLE_OLD_PARSER"):
self.logger.debug("Old parser disabled, skipping old parse attempt for %s", self.path)
raise Exception("Skipping old parser")
# Otherwise, we first try to parse the source with the old Python parser.
self._py_ast = self.old_py_ast
return self._py_ast
except Exception as ex:
# If that fails, try to parse the source with the new Python parser (unless it has been
# explicitly disabled).
Expand All @@ -131,7 +142,13 @@ def py_ast(self):
raise SyntaxError("Exception %s while parsing %s" % (ex, self.path))
else:
try:
self._py_ast = semmle.python.parser.tsg_parser.parse(self.path, self.logger)
with timers["tsg_py_ast"]:
if self._py_ast is None:
self.logger.debug("Trying tsg-python on %s", self.path)
self._py_ast = semmle.python.parser.tsg_parser.parse(self.path, self.logger)
self.logger.debug("tsg-python successful on %s", self.path)
else:
self.logger.debug("Found (during py_ast) parse tree for %s in cache", self.path)
return self._py_ast
except SyntaxError as ex:
raise ex
Expand Down
2 changes: 1 addition & 1 deletion python/extractor/semmle/python/parser/tsg_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ def read_tsg_python_output(path, logger):
p.stdout.close()
p.terminate()
p.wait()
logger.info("Read {} nodes and {} edges from TSG output".format(len(node_attr), len(edge_attr)))
logger.debug("Read {} nodes and {} edges from TSG output".format(len(node_attr), len(edge_attr)))
return node_attr, edge_attr

def evaluate_string(s):
Expand Down
2 changes: 1 addition & 1 deletion python/extractor/semmle/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

#Semantic version of extractor.
#Update this if any changes are made
VERSION = "7.1.0"
VERSION = "7.1.1"

PY_EXTENSIONS = ".py", ".pyw"

Expand Down

0 comments on commit 0bb5b4b

Please sign in to comment.