Skip to content

Commit

Permalink
Merge pull request #48 from regisb/regisb/multiline-with-singlelineco…
Browse files Browse the repository at this point in the history
…mment

[BD-21] Multiline annotations with single-line comment prefix ("#")
  • Loading branch information
robrap authored Sep 2, 2020
2 parents 12dc1a7 + 097db99 commit f61565d
Show file tree
Hide file tree
Showing 6 changed files with 95 additions and 24 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,11 @@ Change Log

.. There should always be an "Unreleased" section for changes pending release.
[0.6.0] - 2020-08-27
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

* Add support for multiline annotations for lines prefixed with single-line comment signs ("#")

[0.5.1] - 2020-08-25
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Expand Down
2 changes: 1 addition & 1 deletion code_annotations/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
Extensible tools for parsing annotations in codebases.
"""

__version__ = '0.5.1'
__version__ = '0.6.0'
80 changes: 57 additions & 23 deletions code_annotations/extensions/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,22 +42,27 @@ class SimpleRegexAnnotationExtension(AnnotationExtension, metaclass=ABCMeta):
# Javascript and Python extensions for examples.
lang_comment_definition = None

r"""
This format string/regex finds all comments in the file. The format tokens will be replaced with the
language-specific comment definitions defined in the sub-classes.
{multi_start} - start of the language-specific multi-line comment (ex. /*)
([\d\D]*?) - capture all of the characters...
{multi_end} - until you find the end of the language-specific multi-line comment (ex. */)
| - If you don't find any of those...
{single} - start by finding the single-line comment token (ex. //)
(.*) - and capture all characters until the end of the line
Returns a 2-tuple of:
- ("Comment text", None) in the case of a multi-line comment OR
- (None, "Comment text") in the case of a single-line comment
# This format string/regex finds all comments in the file. The format tokens will be replaced with the
# language-specific comment definitions defined in the sub-classes.
#
# Match groupdict will contain two named subgroups: 'comment' and 'prefixed_comment', of which at most
# one will be non-None.
comment_regex_fmt = r"""
{multi_start} # start of the language-specific multi-line comment (ex. /*)
(?P<comment> # Look for a multiline comment
[\d\D]*? # capture all of the characters...
)
{multi_end} # until you find the end of the language-specific multi-line comment (ex. */)
| # If you don't find any of those...
(?P<prefixed_comment> # Look for a group of single-line comments
(?: # Non-capture mode
{single} # start by finding the single-line comment token (ex. //)
.* # and capture all characters until the end of the line
\n? # followed by an optional carriage return
\ * # and some empty space
)* # multiple times
)
"""
comment_regex_fmt = r'{multi_start}([\d\D]*?){multi_end}|{single}(.*)'

def __init__(self, config, echo):
"""
Expand All @@ -74,7 +79,12 @@ def __init__(self, config, echo):

# pylint: disable=not-a-mapping
self.comment_regex = re.compile(
self.comment_regex_fmt.format(**self.lang_comment_definition)
self.comment_regex_fmt.format(**self.lang_comment_definition),
flags=re.VERBOSE
)
self.prefixed_comment_regex = re.compile(
r"^ *{single}".format(**self.lang_comment_definition),
flags=re.MULTILINE
)

# Parent class will allow this class to populate self.strings_to_search via
Expand Down Expand Up @@ -102,15 +112,15 @@ def search(self, file_handle):
if any(anno in txt for anno in self.config.annotation_tokens):
fname = clean_abs_path(file_handle.name, self.config.source_path)

# Iterate on all comments: both prefixed- and non-prefixed.
for match in self.comment_regex.finditer(txt):
# Should only be one match
comment_content = [item for item in match.groups() if item is not None][0]
for inner_match in self.query.finditer(comment_content):
# Get the line number by counting newlines + 1 (for the first line).
# Note that this is the line number of the beginning of the comment, not the
# annotation token itself.
line = txt.count('\n', 0, match.start()) + 1
# Get the line number by counting newlines + 1 (for the first line).
# Note that this is the line number of the beginning of the comment, not the
# annotation token itself.
line = txt.count('\n', 0, match.start()) + 1

comment_content = self._find_comment_content(match)
for inner_match in self.query.finditer(comment_content):
try:
annotation_token = inner_match.group('token')
annotation_data = inner_match.group('data')
Expand All @@ -131,3 +141,27 @@ def search(self, file_handle):
})

return found_annotations

def _find_comment_content(self, match):
"""
Return the comment content as text.
Args:
match (sre.SRE_MATCH): one of the matches of the self.comment_regex regular expression.
"""
comment_content = match.groupdict()["comment"]
if comment_content:
return comment_content

# Find single-line comments and strip comment tokens
comment_content = match.groupdict()["prefixed_comment"]
return self._strip_single_line_comment_tokens(comment_content)

def _strip_single_line_comment_tokens(self, content):
"""
Strip the leading single-line comment tokens from a comment text.
Args:
content (str): token-prefixed multi-line comment string.
"""
return self.prefixed_comment_regex.sub("", content)
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# Docstring
#.. pii: A long description that
# spans multiple
# lines
# A comment that is not indented and not part of the above multi-line annotation
#.. pii_types: id, name
# Some comment that comes after the multiple-line annotation
16 changes: 16 additions & 0 deletions tests/extensions/test_base_extensions.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,19 @@ def test_nothing_found():
r = FakeExtension(config, VerboseEcho())
with open('tests/extensions/base_test_files/empty.foo') as f:
r.search(f)


def test_strip_single_line_comment_tokens():
config = FakeConfig()

extension = FakeExtension(config, VerboseEcho())
text = """baz line1
baz line2
bazline3
baz line4"""
expected_result = """ line1
line2
line3
line4"""
# pylint: disable=protected-access
assert expected_result == extension._strip_single_line_comment_tokens(text)
9 changes: 9 additions & 0 deletions tests/extensions/test_extension_python.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,15 @@ def test_grouping_and_choice_failures(test_file, expected_exit_code, expected_me
Multi-line and multi-paragraph.""")
]
),
(
'multiline_singlelinecomment.pyt',
[
('.. pii:', """A long description that
spans multiple
lines"""),
('.. pii_types:', 'id, name'),
]
),
])
def test_multi_line_annotations(test_file, annotations):
config = AnnotationConfig('tests/test_configurations/.annotations_test')
Expand Down

0 comments on commit f61565d

Please sign in to comment.