Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Compute TokenList.value dynamically #623

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions sqlparse/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ def parsestream(stream, encoding=None):
:returns: A generator of :class:`~sqlparse.sql.Statement` instances.
"""
stack = engine.FilterStack()
stack.stmtprocess.append(stack.grouping_filter)
stack.enable_grouping()
return stack.run(stream, encoding)

Expand Down
9 changes: 3 additions & 6 deletions sqlparse/engine/filter_stack.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@

"""filter"""

from sqlparse import filters
from sqlparse import lexer
from sqlparse.engine import grouping
from sqlparse.engine.statement_splitter import StatementSplitter


Expand All @@ -17,10 +17,10 @@ def __init__(self):
self.preprocess = []
self.stmtprocess = []
self.postprocess = []
self._grouping = False
self.grouping_filter = filters.GroupingFilter()

def enable_grouping(self):
self._grouping = True
self.grouping_filter.enable()

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was working off of this change today and found myself wanting to initialize it to None on line 20, then build the GroupingFilter and append it it to stmtprocess here if it is None.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The reason I went with the approach that I did is that I wanted to ensure that grouping continued to happen before the other filters were run. Looking at the usages within sqlparse, implementing your approach would keep the existing behavior. I also know that Django Debug Toolbar calls .enable_grouping() manually, but it shouldn't be affected by your approach either.

The only concern would be if some other code appends a filter that requires grouping and then calls .enable_grouping() after appending that filter, things would break.


def run(self, sql, encoding=None):
stream = lexer.tokenize(sql, encoding)
Expand All @@ -32,9 +32,6 @@ def run(self, sql, encoding=None):

# Output: Stream processed Statements
for stmt in stream:
if self._grouping:
stmt = grouping.group(stmt)

for filter_ in self.stmtprocess:
filter_.process(stmt)

Expand Down
2 changes: 2 additions & 0 deletions sqlparse/filters/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
# This module is part of python-sqlparse and is released under
# the BSD License: https://opensource.org/licenses/BSD-3-Clause

from sqlparse.filters.others import GroupingFilter
from sqlparse.filters.others import SerializerUnicode
from sqlparse.filters.others import StripCommentsFilter
from sqlparse.filters.others import StripWhitespaceFilter
Expand All @@ -22,6 +23,7 @@
from sqlparse.filters.aligned_indent import AlignedIndentFilter

__all__ = [
'GroupingFilter',
'SerializerUnicode',
'StripCommentsFilter',
'StripWhitespaceFilter',
Expand Down
35 changes: 24 additions & 11 deletions sqlparse/filters/others.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,30 @@
import re

from sqlparse import sql, tokens as T
from sqlparse.engine import grouping
from sqlparse.utils import split_unquoted_newlines


class StripCommentsFilter:
class GroupingFilter:
def __init__(self):
self._enabled = False

def enable(self):
self._enabled = True

def process(self, stmt):
if self._enabled:
return grouping.group(stmt)
else:
return stmt


class StripCommentsFilter:
@staticmethod
def _process(tlist):
def process(stmt):
def get_next_comment():
# TODO(andi) Comment types should be unified, see related issue38
return tlist.token_next_by(i=sql.Comment, t=T.Comment)
return stmt.token_next_by(i=sql.Comment, t=T.Comment)

def _get_insert_token(token):
"""Returns either a whitespace or the line breaks from token."""
Expand All @@ -28,10 +42,12 @@ def _get_insert_token(token):
else:
return sql.Token(T.Whitespace, ' ')

grouping.group_comments(stmt)

tidx, token = get_next_comment()
while token:
pidx, prev_ = tlist.token_prev(tidx, skip_ws=False)
nidx, next_ = tlist.token_next(tidx, skip_ws=False)
pidx, prev_ = stmt.token_prev(tidx, skip_ws=False)
nidx, next_ = stmt.token_next(tidx, skip_ws=False)
# Replace by whitespace if prev and next exist and if they're not
# whitespaces. This doesn't apply if prev or next is a parenthesis.
if (prev_ is None or next_ is None
Expand All @@ -40,16 +56,13 @@ def _get_insert_token(token):
# Insert a whitespace to ensure the following SQL produces
# a valid SQL (see #425).
if prev_ is not None and not prev_.match(T.Punctuation, '('):
tlist.tokens.insert(tidx, _get_insert_token(token))
tlist.tokens.remove(token)
stmt.tokens.insert(tidx, _get_insert_token(token))
stmt.tokens.remove(token)
else:
tlist.tokens[tidx] = _get_insert_token(token)
stmt.tokens[tidx] = _get_insert_token(token)

tidx, token = get_next_comment()

def process(self, stmt):
[self.process(sgroup) for sgroup in stmt.get_sublists()]
StripCommentsFilter._process(stmt)
return stmt


Expand Down
13 changes: 8 additions & 5 deletions sqlparse/formatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,14 +149,17 @@ def build_filter_stack(stack, options):
stack.preprocess.append(filters.TruncateStringFilter(
width=options['truncate_strings'], char=options['truncate_char']))

if options.get('use_space_around_operators', False):
stack.enable_grouping()
stack.stmtprocess.append(filters.SpacesAroundOperatorsFilter())
# Before grouping
if options.get('strip_comments'):
stack.stmtprocess.append(filters.StripCommentsFilter())

# Grouping
stack.stmtprocess.append(stack.grouping_filter)

# After grouping
if options.get('strip_comments'):
if options.get('use_space_around_operators', False):
stack.enable_grouping()
stack.stmtprocess.append(filters.StripCommentsFilter())
stack.stmtprocess.append(filters.SpacesAroundOperatorsFilter())

if options.get('strip_whitespace') or options.get('reindent'):
stack.enable_grouping()
Expand Down
75 changes: 46 additions & 29 deletions sqlparse/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,26 +36,14 @@ def get_alias(self):
return self._get_first_name(reverse=True)


class Token:
class TokenBase:
"""Base class for all other classes in this module.

It represents a single token and has two instance attributes:
``value`` is the unchanged value of the token and ``ttype`` is
the type of the token.
"""

__slots__ = ('value', 'ttype', 'parent', 'normalized', 'is_keyword',
'is_group', 'is_whitespace')
__slots__ = ('parent')

def __init__(self, ttype, value):
value = str(value)
self.value = value
self.ttype = ttype
def __init__(self):
self.parent = None
self.is_group = False
self.is_keyword = ttype in T.Keyword
self.is_whitespace = self.ttype in T.Whitespace
self.normalized = value.upper() if self.is_keyword else value

def __str__(self):
return self.value
Expand All @@ -72,19 +60,12 @@ def __repr__(self):
return "<{cls} {q}{value}{q} at 0x{id:2X}>".format(
id=id(self), **locals())

def _get_repr_name(self):
return str(self.ttype).split('.')[-1]

def _get_repr_value(self):
raw = str(self)
if len(raw) > 7:
raw = raw[:6] + '...'
return re.sub(r'\s+', ' ', raw)

def flatten(self):
"""Resolve subgroups."""
yield self

def match(self, ttype, values, regex=False):
"""Checks whether the token matches the given arguments.

Expand Down Expand Up @@ -146,24 +127,61 @@ def has_ancestor(self, other):
return False


class TokenList(Token):
class Token(TokenBase):
""""A single token.

It has two instance attributes:
``value`` is the unchanged value of the token and ``ttype`` is
the type of the token.
"""
is_group = False

__slots__ = ('value', 'ttype', 'normalized', 'is_keyword', 'is_whitespace')

def __init__(self, ttype, value):
super().__init__()
value = str(value)
self.value = value
self.ttype = ttype
self.is_keyword = ttype in T.Keyword
self.is_whitespace = ttype in T.Whitespace
self.normalized = value.upper() if self.is_keyword else value

def _get_repr_name(self):
return str(self.ttype).split('.')[-1]

def flatten(self):
"""Resolve subgroups."""
yield self


class TokenList(TokenBase):
"""A group of tokens.

It has an additional instance attribute ``tokens`` which holds a
list of child-tokens.
It has one instance attribute ``tokens`` which holds a list of
child-tokens.
"""

__slots__ = 'tokens'

is_group = True
ttype = None
is_keyword = False
is_whitespace = False

def __init__(self, tokens=None):
super().__init__()
self.tokens = tokens or []
[setattr(token, 'parent', self) for token in self.tokens]
super().__init__(None, str(self))
self.is_group = True

def __str__(self):
@property
def value(self):
return ''.join(token.value for token in self.flatten())

@property
def normalized(self):
return self.value

# weird bug
# def __len__(self):
# return len(self.tokens)
Expand Down Expand Up @@ -322,7 +340,6 @@ def group_tokens(self, grp_cls, start, end, include_end=True,
grp = start
grp.tokens.extend(subtokens)
del self.tokens[start_idx + 1:end_idx]
grp.value = str(start)
else:
subtokens = self.tokens[start_idx:end_idx]
grp = grp_cls(subtokens)
Expand Down