Skip to content

Commit

Permalink
[BD-21] Implement multi-line code annotations
Browse files Browse the repository at this point in the history
Make it possible (again) to write multi-line code annotations, such as:

    .. pi: This is an annotation that
      spans multiple lines and allows developers to
      write more extensive docs.

The only problem with multi-line annotations is that we did not find a
way to get rid of empty spaces that prefix every new line after the
first.
  • Loading branch information
regisb committed Jul 22, 2020
1 parent bb3f1aa commit 4cfcf96
Show file tree
Hide file tree
Showing 11 changed files with 202 additions and 64 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,11 @@ Unreleased

*

[0.4.0] - 2020-07-22
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

* Add support for multi-line code annotations

[0.3.4] - 2020-05-06
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Expand Down
2 changes: 1 addition & 1 deletion code_annotations/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
Extensible tools for parsing annotations in codebases.
"""

__version__ = '0.3.4'
__version__ = '0.4.0'
39 changes: 11 additions & 28 deletions code_annotations/extensions/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import re
from abc import ABCMeta, abstractmethod

from code_annotations.helpers import clean_abs_path, get_annotation_regex
from code_annotations.helpers import clean_abs_path, clean_annotation, get_annotation_regex


class AnnotationExtension(object, metaclass=ABCMeta):
Expand Down Expand Up @@ -56,24 +56,9 @@ class SimpleRegexAnnotationExtension(AnnotationExtension, metaclass=ABCMeta):
Returns a 2-tuple of:
- ("Comment text", None) in the case of a multi-line comment OR
- (None, "Comment text") in the case of a single-line comment
TODO: Make this handle multi-line annotation comments again.
"""
comment_regex_fmt = r'{multi_start}([\d\D]*?){multi_end}|{single}(.*)'

r"""
This format string/regex finds our annotation token and choices / comments inside a comment:
[\s\S]*? - Strip out any characters between the start of the comment and the annotation
({}) - {} is a Python format string that will be replaced with a regex escaped and
then or-joined to make a list of the annotation tokens we're looking for
Ex: (\.\.\ pii\:\:|\.\.\ pii\_types\:\:)
(.*) - and capture all characters until the end of the line
Returns a 2-tuple of found annotation token and annotation comment
TODO: Make multi line annotation comments work again.
"""
def __init__(self, config, echo):
"""
Set up the extension and create the regexes used to do searches.
Expand Down Expand Up @@ -126,25 +111,23 @@ def search(self, file_handle):
# annotation token itself.
line = txt.count('\n', 0, match.start()) + 1

# No matter how long the regex is, there should only be 2 non-None items,
# with the first being the annotation token and the 2nd being the comment.
cleaned_groups = [item for item in inner_match.groups() if item is not None]

if len(cleaned_groups) != 2: # pragma: no cover
raise Exception('{}::{}: Number of found items in the list is not 2. Found: {}'.format(
try:
annotation_token = inner_match.group('token')
annotation_data = inner_match.group('data')
except IndexError:
# pragma: no cover
raise ValueError('{}::{}: Could not find "data" or "token" groups. Found: {}'.format(
fname,
line,
cleaned_groups
inner_match.groupdict()
))

annotation, comment = cleaned_groups

annotation_token, annotation_data = clean_annotation(annotation_token, annotation_data)
found_annotations.append({
'found_by': self.extension_name,
'filename': fname,
'line_number': line,
'annotation_token': annotation.strip(),
'annotation_data': comment.strip()
'annotation_token': annotation_token,
'annotation_data': annotation_data,
})

return found_annotations
39 changes: 18 additions & 21 deletions code_annotations/find_django.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from django.db import models

from code_annotations.base import BaseSearch
from code_annotations.helpers import fail, get_annotation_regex
from code_annotations.helpers import clean_annotation, fail, get_annotation_regex

DEFAULT_SAFELIST_FILE_PATH = '.annotation_safe_list.yml'

Expand Down Expand Up @@ -108,33 +108,30 @@ def _append_model_annotations(self, model_type, model_id, query, model_annotatio
with open(filename, 'r') as file_handle:
txt = file_handle.read()

for inner_match in query.finditer(model_type.__doc__):
# TODO: This is duplicated code with extensions/base.py
# No matter how long the regex is, there should only be 2 non-None items,
# with the first being the annotation token and the 2nd being the comment.
cleaned_groups = [item for item in inner_match.groups() if item is not None]
# Get the line number by counting newlines + 1 (for the first line).
# Note that this is the line number of the beginning of the comment, not the
# annotation token itself. We find based on the entire code content of the model
# as that seems to be the only way to be sure we're getting the correct line number.
# It is slow and should be replaced if we can find a better way that is accurate.
line = txt.count('\n', 0, txt.find(inspect.getsource(model_type))) + 1

if len(cleaned_groups) != 2: # pragma: no cover
raise Exception('{}: Number of found items in the list is not 2. Found: {}'.format(
for inner_match in query.finditer(model_type.__doc__):
try:
annotation_token = inner_match.group('token')
annotation_data = inner_match.group('data')
except IndexError:
# pragma: no cover
raise ValueError('{}: Could not find "data" or "token" groups. Found: {}'.format(
self.get_model_id(model_type),
cleaned_groups
inner_match.groupdict()
))

annotation, comment = cleaned_groups

# Get the line number by counting newlines + 1 (for the first line).
# Note that this is the line number of the beginning of the comment, not the
# annotation token itself. We find based on the entire code content of the model
# as that seems to be the only way to be sure we're getting the correct line number.
# It is slow and should be replaced if we can find a better way that is accurate.
line = txt.count('\n', 0, txt.find(inspect.getsource(model_type))) + 1

annotation_token, annotation_data = clean_annotation(annotation_token, annotation_data)
model_annotations.append({
'found_by': "django",
'filename': filename,
'line_number': line,
'annotation_token': annotation.strip(),
'annotation_data': comment.strip(),
'annotation_token': annotation_token,
'annotation_data': annotation_data,
'extra': {
'object_id': model_id,
'full_comment': model_type.__doc__.strip()
Expand Down
58 changes: 46 additions & 12 deletions code_annotations/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,25 +113,59 @@ def clean_abs_path(filename_to_clean, parent_path):
def get_annotation_regex(annotation_regexes):
"""
Return the full regex to search inside comments for configured annotations.
A match against the regex will returns a 2-tuple of found annotation token and annotation comment
A successful match against the regex will return two groups of interest: 'token'
and 'data'.
This regular expression supports annotation tokens that span multiple lines. To do
so, prefix each line after the first by at least two leading spaces. E.g:
.. pii: First line
second line
Unfortunately, the indenting spaces will find their way to the content of the "token" group.
Args:
annotation_regexes: List of re.escaped annotation tokens to search for.
Returns:
Regex ready for searching comments for annotations.
"""
# pylint: disable=pointless-string-statement
r"""
This format string/regex finds our annotation token and choices / comments inside a comment:
"""
annotation_regex = r"""
[\s\S]*? # Strip out any characters between the start of the comment and the annotation
({}) # Python format string that will be replaced with a regex escaped and
# then or-joined to make a list of the annotation tokens we're looking for
# Ex: (\.\.\ pii\:\:|\.\.\ pii\_types\:\:)
(.*) # capture all characters until the end of the line
(?P<space>[\ \t]*) # Leading empty spaces
(?P<token>{tokens}) # Python format string that will be replaced with a
# regex, escaped and then or-joined to make a list
# of the annotation tokens we're looking for
# Ex: (\.\.\ pii\:\:|\.\.\ pii\_types\:\:)
(?P<data> # Captured annotation data
(?: # non-capture mode
. # any non-newline character
| # or new line of multi-line annotation data
(?: # non-capture mode
\n{{1,}} # at least one newline,
(?P=space) # followed by as much space as the prefix,
(?P<indent>\ {{2,}}) # at least two spaces,
(?=[^\ ]) # and a non-space character (look-ahead)
(?!{tokens}) # that does not match any of the token regexes
) #
)* # any number of times
)
"""
annotation_regex = annotation_regex.format('|'.join(annotation_regexes))
annotation_regex = annotation_regex.format(tokens='|'.join(annotation_regexes))
return re.compile(annotation_regex, flags=re.VERBOSE)


def clean_annotation(token, data):
"""
Clean annotation token and data by stripping all trailing/prefix empty spaces.
Args:
token (str)
data (str)
Returns:
(str, str): Tuple of cleaned token, data
"""
token = token.strip()
data = data.strip()
return token, data
30 changes: 28 additions & 2 deletions docs/writing_annotations.rst
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,10 @@ comments into two parts- the annotation token, and the annotation data.

- Annotation data
Annotation data can either be a simple free text comment that is on the same line as the token, or a choice list.
The choices in a choice list are configured in the configuration file and can be separated by spaces or commas when
used in comments. As such, the choices themselves should not contain spaces or commas.
Free text annotations can span multiple lines, provided all lines after the first
are indented by at least two spaces. The choices in a choice list are configured in
the configuration file and can be separated by spaces or commas when used in
comments. As such, the choices themselves should not contain spaces or commas.

The information below applies to both the Static Search and Django Model Search tools, with the exception that the
Django Model Search only looks in model docstrings.
Expand Down Expand Up @@ -51,6 +53,30 @@ When a report is run against this code an entry like this will be generated in t
*Note that the rest of the comment is ignored in the report.*

An annotation can also span multiple lines. For instance:

.. code-block:: python
"""
This function handles setting the price on an item in the database.
.. fun_fact: This code is the only remaining piece of our first commit!
To write long texts, prepend at least two additional spaces at the start
of every line after the first.
"""
This code would result in the following report:

.. code-block:: yaml
- annotation_data: "This code is the only remaining piece of our first commit!\n \
\ To write long texts, prepend at least two additional spaces at the start\n\
\ of every line after the first."
annotation_token: '.. fun_fact:'
filename: foo/bar/something.py
found_by: python
line_number: 1
Configuration for an "async" annotation type, denoted by the annotation token ``.. async:`` and choices denoting the
types of asynchronous processors hooked up to it:

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
"""
.. pii:
This is an annotation that
spans multiple lines and allows developers to
write more extensive docs.
Comment after annotation and being annotated
"""
6 changes: 6 additions & 0 deletions tests/extensions/python_test_files/multiline_indented.pyt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
"""
.. pii: A long description that
spans multiple indented
lines
.. pii_types: id, name
"""
15 changes: 15 additions & 0 deletions tests/extensions/python_test_files/multiline_paragraphs.pyt
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
"""
.. pii: This is an annotation that
spans multiple paragraphs.
This allows developers to write even more
extensive docs.
Comment after annotation and being annotated
"""

"""
Docstring
.. pii: Annotation 1 with:
Multi-line and multi-paragraph.
"""
8 changes: 8 additions & 0 deletions tests/extensions/python_test_files/multiline_simple.pyt
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
"""
Docstring
.. pii: A long description that
spans multiple
lines
.. pii_types: id, name
"""
57 changes: 57 additions & 0 deletions tests/extensions/test_extension_python.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@
"""
import pytest

from code_annotations.base import AnnotationConfig
from code_annotations.extensions.python import PythonAnnotationExtension
from code_annotations.helpers import VerboseEcho
from tests.helpers import EXIT_CODE_FAILURE, EXIT_CODE_SUCCESS, call_script


Expand Down Expand Up @@ -31,3 +34,57 @@ def test_grouping_and_choice_failures(test_file, expected_exit_code, expected_me

if expected_exit_code == EXIT_CODE_FAILURE:
assert "Search failed due to linting errors!" in result.output


@pytest.mark.parametrize('test_file,annotations', [
(
'multiline_simple.pyt',
[
('.. pii:', """A long description that
spans multiple
lines"""),
('.. pii_types:', 'id, name'),
]
),
(
'multiline_indented.pyt',
[
('.. pii:', """A long description that
spans multiple indented
lines"""),
('.. pii_types:', 'id, name'),
]
),
(
'multiline_empty_first_line.pyt',
[
('.. pii:', """This is an annotation that
spans multiple lines and allows developers to
write more extensive docs."""),
]
),
(
'multiline_paragraphs.pyt',
[
('.. pii:', """This is an annotation that
spans multiple paragraphs.
This allows developers to write even more
extensive docs."""),
('.. pii:', """Annotation 1 with:
Multi-line and multi-paragraph.""")
]
),
])
def test_multi_line_annotations(test_file, annotations):
config = AnnotationConfig('tests/test_configurations/.annotations_test')
annotator = PythonAnnotationExtension(config, VerboseEcho())

with open('tests/extensions/python_test_files/{}'.format(test_file)) as fi:
result_annotations = annotator.search(fi)

assert len(annotations) == len(result_annotations)
for annotation, result_annotation in zip(annotations, result_annotations):
assert result_annotation['annotation_token'] == annotation[0]
assert result_annotation['annotation_data'] == annotation[1]

0 comments on commit 4cfcf96

Please sign in to comment.