Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(chartdata): disable sqlparse calls for chart data requests to improve querying performance #19572

Open
wants to merge 12 commits into
base: master
Choose a base branch
from
6 changes: 5 additions & 1 deletion superset/common/query_object.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,11 @@ def validate(
try:
self._validate_there_are_no_missing_series()
self._validate_no_have_duplicate_labels()
self._sanitize_filters()
# problematic line where calling sqlparser.parse() causes quadratic
# performance for WHERE ... IN (...) clauses
# Clauses are anyway checked for their validity in
# e.g., connectors/sqla/models/get_query_str_extended
# self._sanitize_filters()
return None
except QueryObjectValidationError as ex:
if raise_exceptions:
Expand Down
2 changes: 1 addition & 1 deletion superset/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -1607,7 +1607,7 @@ def EMAIL_HEADER_MUTATOR( # pylint: disable=invalid-name,unused-argument
# def DATASET_HEALTH_CHECK(datasource: SqlaTable) -> Optional[str]:
# if (
# datasource.sql and
# len(sql_parse.ParsedQuery(datasource.sql, strip_comments=True).tables) == 1
# len(sql_parse.ParsedQuery(datasource.sql).tables) == 1
# ):
# return (
# "This virtual dataset queries only one table and therefore could be "
Expand Down
3 changes: 1 addition & 2 deletions superset/db_engine_specs/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1090,8 +1090,7 @@ def apply_top_to_sql(cls, sql: str, limit: int) -> str:

cte = None
sql_remainder = None
sql = sql.strip(" \t\n;")
sql_statement = sqlparse.format(sql, strip_comments=True)
sql_statement = sql.strip(" \t\n;")
query_limit: int | None = sql_parse.extract_top_from_query(
sql_statement, cls.top_keywords
)
Expand Down
11 changes: 9 additions & 2 deletions superset/models/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
from sqlalchemy.pool import NullPool
from sqlalchemy.schema import UniqueConstraint
from sqlalchemy.sql import ColumnElement, expression, Select
from sqlglot import parse

from superset import app, db_engine_specs
from superset.commands.database.exceptions import DatabaseInvalidError
Expand Down Expand Up @@ -573,7 +574,11 @@ def get_df( # pylint: disable=too-many-locals
schema: str | None = None,
mutator: Callable[[pd.DataFrame], None] | None = None,
) -> pd.DataFrame:
sqls = self.db_engine_spec.parse_sql(sql)
# before we split sqls using sql parse, however this core code is only reachable
# with single sql queries. Thus, we remove the engine spec parser here
# sqls = self.db_engine_spec.parse_sql(sql)
sqls = parse(sql)

with self.get_sqla_engine_with_context(schema) as engine:
engine_url = engine.url
mutate_after_split = config["MUTATE_AFTER_SPLIT"]
Expand All @@ -598,7 +603,9 @@ def _log_query(sql: str) -> None:

with self.get_raw_connection(schema=schema) as conn:
cursor = conn.cursor()

for sql_ in sqls[:-1]:
sql_ = str(sql_)
if mutate_after_split:
sql_ = sql_query_mutator(
sql_,
Expand All @@ -611,7 +618,7 @@ def _log_query(sql: str) -> None:

if mutate_after_split:
last_sql = sql_query_mutator(
sqls[-1],
str(sqls[-1]),
security_manager=security_manager,
database=None,
)
Expand Down
4 changes: 0 additions & 4 deletions superset/sql_parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -719,12 +719,8 @@ class ParsedQuery:
def __init__(
self,
sql_statement: str,
strip_comments: bool = False,
engine: str | None = None,
):
if strip_comments:
sql_statement = sqlparse.format(sql_statement, strip_comments=True)

self.sql: str = sql_statement
self._dialect = SQLGLOT_DIALECTS.get(engine) if engine else None
self._tables: set[Table] = set()
Expand Down
1 change: 0 additions & 1 deletion superset/sqllab/query_render.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,6 @@ def render(self, execution_context: SqlJsonExecutionContext) -> str:

parsed_query = ParsedQuery(
query_model.sql,
strip_comments=True,
engine=query_model.database.db_engine_spec.engine,
)
rendered_query = sql_template_processor.process_template(
Expand Down
1 change: 1 addition & 0 deletions tests/example_data/test-long-where-clause-1k.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Liam,James,Noah,Wyatt,Gabriel,Lucas,Ethan,Alexander,Joseph,Benjamin,William,Logan,Mason,Jack,John,Asher,Elijah,Daniel,Henry,Jacob,Jaxon,Michael,Oliver,Hunter,David,Levi,Matthew,Landon,Aiden,Isaac,Jackson,Caleb,Ryan,Elias,Connor,Evan,Joshua,Samuel,Christian,Jayden,Jeremiah,Cooper,Eli,Robert,Ryder,Christopher,Colton,Josiah,Andrew,Austin,Carson,Jaxson,Jonathan,Luke,Malachi,Nathan,Owen,Blake,Lincoln,Ezra,Gavin,Thomas,Dylan,Grayson,Kai,Ryker,Zachary,Anthony,Isaiah,Jase,Jason,Micah,Sebastian,Silas,Titus,Bentley,Brody,Cameron,Carter,Chase,Gideon,Jace,Sawyer,Tristan,Tyler,Weston,Adam,Charles,Everett,Wesley,Xander,Brandon,Brayden,Nathaniel,Theodore,Xavier,Ashton,Avery,Dominic,Easton,Finn,George,Hudson,Ian,Jasper,Kayden,Marshall,Max,Maxwell,Miles,Orion,Richard,Timothy,Abel,Drake,Garrett,Jameson,Jayce,Joel,Kenneth,Maximus,Nicholas,Parker,Travis,Cody,Dean,Declan,Elliot,Ezekiel,Karter,Nolan,Patrick,Riley,Seth,Solomon,Steven,Victor,Waylon,Aaron,August,Bradley,Braxton,Bryce,Calvin,Camden,Cayden,Charlie,Cole,Damian,Dawson,Eric,Greyson,Jake,Jeffrey,Jesse,Jonah,Julian,Kaiden,Killian,Kingston,Maddox,Matthias,Maverick,Odin,Paul,Peter,Roman,Trevor,Zane,Alex,Archer,Caden,Collin,Colt,Edward,Gage,Gunner,Harrison,Ivan,Jax,Leo,Lukas,Marcus,Paxton,Soren,Sullivan,Tanner,Trenton,Troy,Tucker,Vincent,Walter,Warren,Adrian,Augustus,Axel,Beckett,Cade,Clayton,Dante,Emmett,Felix,Grant,Hatcher,Jordan,Jude,Julius,Kaleb,Kevin,Remington,Rowan,Russell,Simon,Skyler,Stephen,Sterling,Talon,Westin,Aidan,Alden,Angelo,Arthur,Atticus,Barrett,Brantley,Brooks,Bruce,Cash,Cyrus,Derek,Desmond,Erik,Griffin,Hayden,Jeremy,Justice,Kaden,Kane,Kyle,Leif,Mark,Mateo,Miguel,Myles,Nikolai,Paxson,Peyton,Raymond,Reid,River,Rodney,Roland,Ronin,Rylan,Shawn,Spencer,Thaddeus,Tobias,Tyson,Walker,William,James,John,Mason,Elijah,Noah,Jackson,Aiden,Jacob,Joshua,Samuel,Michael,Ethan,Carter,Liam,Grayson,Christopher,Jayden,Logan,Hunter,Andrew,Landon,Caleb,Gabriel,Joseph,Charles,David,Jaxon,Colton,Luke,Ayden,Levi,Cameron,Matthew,Christian,Carson,Wyatt,Alexander,Jordan,Benjamin,Robert,Daniel,Kayden,Lucas,Easton,Bentley,Eli,Brantley,Braxton,Brayden,Jonathan,Henry,Anthony,Thomas,Jeremiah,Jace,Connor,Sawyer,Austin,Bryson,Oliver,Parker,Cooper,Hudson,Dylan,Isaac,Tyler,Isaiah,Josiah,Nathan,Karter,Brandon,Jase,Timothy,Jack,Kingston,Nicholas,Gavin,Tucker,Ashton,Tristan,Asher,Justin,Kaleb,Chase,Brody,Jayceon,Owen,Kaiden,Jaxson,Kevin,Ryan,Aaron,Cayden,Ian,Jason,Jayce,Micah,Ryder,Evan,Greyson,Hayden,Kaden,Harrison,Karson,Camden,Braylen,Rylan,Zachary,Zayden,Adam,Jose,Kameron,Wesley,Barrett,Marcus,Nolan,Blake,Dallas,Gunner,Kenneth,Malachi,Preston,Silas,Bryce,Collin,Antonio,Caden,Jonah,King,Peyton,Adrian,Maddox,Messiah,Brooks,Dawson,Trenton,Avery,Bennett,Lincoln,Steven,Alex,Richard,Sebastian,Weston,Braylon,Brian,Dalton,Ezra,Jaden,Jaiden,Riley,Tanner,Zion,Corbin,Walker,Xavier,Amari,Judah,Lawson,Maxwell,River,Jude,Angel,Carlos,Eric,Kason,Griffin,Juan,Princeton,Ryker,Abel,Bryan,Cason,Derrick,George,Roman,Aidan,Conner,Everett,Garrett,Jaylen,Miles,Stephen,Dominic,Emmett,Jameson,Johnny,Julian,Kyler,Zane,Bradley,Cole,Edward,Elias,Graham,Jesse,Kai,Nathaniel,Paul,Bryant,Caiden,Clayton,Gregory,Jeremy,Judson,Kylan,Patrick,Walter,Alan,Charlie,Colin,Landen,Chandler,Luis,Titus,Waylon,Anderson,Axel,Cody,Ezekiel,Grant,Jax,Kayson,Miguel,Tyson,Chance,Declan,Jett,Kyle,Vincent,Amir,Braydon,Johnathan,Seth,Travis,Brentley,Case,Devin,Jasper,Jeffery,Joel,Keegan,Marshall,Skyler,Cade,Fisher,Jensen,Max,Paxton,Rhett,Spencer,Trevor,Calvin,Cohen,Colt,Kendrick,Major,Taylor,Xander,Zander,Beau,Braden,Brycen,Curtis,Finn,Iker,Ivan,Jamari,Jesus,Mark,Maverick,Myles,Reid,Troy,Victor,Zaiden,Casen,Cash,Colby,Damien,Ellis,Grady,Houston,Kamden,Kolton,Landyn,Malcolm,Phillip,Warren,Brodie,Corey,Cristian,Dustin,Elliott,Jeffrey,Kasen,Raylan,Rowan,Wilson,Aden,Cedric,Coleman,Diego,Edwin,Gage,Jake,Kash,Prince,Ronald,Russell,Ryland,Theodore,Abram,Ace,Damian,Demarcus,Hayes,Holden,Jamarcus,Javier,Mateo,Maurice,Nicolas,Remington,Terry,Tony,Abraham,Brantlee,Carmelo,Demetrius,Dillon,Emmanuel,Israel,Jamichael,Jerry,Kobe,Kylen,Lane,Phoenix,Willie,August,Baylor,Channing,Daxton,Desmond,Donald,Donovan,Eduardo,Emerson,Jimmy,Kamari,Kendall,Porter,Skylar,Alejandro,Asa,Beckett,Bentlee,Bo,Brady,Brendan,Brock,Cannon,Casey,Courtney,Danny,Deacon,Dean,Elliot,Gannon,Jakobe,Jamarion,Jaxton,Jermaine,Jon,Leonardo,Luca,Morgan,Nehemiah,Pedro,Sylas,Tripp,Triston,Turner,Alexis,Andre,Brennan,Bruce,Cayson,Cornelius,Crimson,Davion,Davis,Deandre,Dennis,Franklin,Frederick,Gary,Harley,Jalen,Justice,Kade,Kaison,Kyson,Layton,Legend,Malik,Manuel,Micheal,Miller,Oakley,Omar,Reed,Ricardo,Ricky,Simon,Terrance,Trent,Zechariah,Archer,Dakota,Derek,Dexter,Erick,Felix,Jagger,Jasiah,Jorge,Keaton,Khalil,Kyrie,Ladarius,Leland,Leo,London,Lorenzo,Lukas,Maximus,Omari,Oscar,Quinton,Reese,Reginald,Rodney,Samson,Sean,Shane,Trace,Zachariah,Zaylen,Adan,Atticus,Branson,Braylin,Damon,Darius,Dominick,Dorian,Fletcher,Francisco,Issac,Jamison,Jared,Javion,Jay,Joe,Jordyn,Josue,Kamryn,Knox,Kolten,Kristian,Lance,Lathan,Louis,Lyric,Maison,Mitchell,Neymar,Peter,Randall,Reece,Scott,Shawn,Solomon,Tatum,Terrence,Tylan,Ahmad,Alijah,Andres,Andy,Beckham,Benton,Billy,Blaine,Bowen,Brenton,Byron,Camdyn,Cullen,Deangelo,Devon,Drake,Drew,Emory,Finley,Gael,Gerald,Gideon,Gunnar,Jacoby,Jakaiden,Jaydon,Jayson,Karsyn,Keagan,Keelan,Keith,Kristopher,Landan,Leon,Macon,Mario,Markell,Martin,Marvin,Raymond,Roderick,Sutton,Uriah,Allen,Amarion,Amos,Brayan,Brayson,Brenden,Callen,Camron,Chevy,Colten,Cortez,Dante,Deshawn,Eddie,Emanuel,Fernando,Foster,Giovanni,Hector,Izaiah,Jabari,Jakob,Jamir,Jaxen,Jayveon,Jayvion,Jorden,Kane,Kaysen,Keandre,Keller,Kenny,Kody,Konnor,Lamar,Langston,Larry,Lewis,Malakai,Mayson,Mohammed,Payton,Philip,Pierce,Quentin,Ridge,Ronan,Ronnie,Roy,Rylee,Santiago,Shepherd,Sullivan,Talon,Thaddeus,Toby,Tristen,Tristin,Will,Xzavier,Zackary,Zackery,Zayne,Zyon,Aiyden,Albert,Ali,Alton,Anakin,Angelo,Armani,Arthur,Aston,Barron,Ben,Bishop,Brent,Briar,Brylon,Camren,Canaan,Carl,Cashton,Caysen,Clark,Clinton,Damion,Darren,Daylen,Dayton,Douglas,Elisha,Enoch,Erik,Felipe,Forrest,Frank,Gatlin,Graysen,Isiah,Jacen,Jaceyon,Jaiceon,Jakaden,Jakari,Jakayden,Jamie,Jaquan,Jarvis,Jasen,Jaylon,Jerome,Justus,Kaidyn,Kalel,Kamron,Kashton,Kellan,Kelvin,Kemari,Kendarius,Kharter,Kohen,Kooper,Kymani,Lamarcus,Lawrence,Layne,Leonel,Marquis,Memphis,Nash,Nigel,Omarion,Pablo,Pierson,Randy,Ray,Rayden,Roland,Ruben,Stanley,Stephon,Sterling,Taylon,Tommy,Tristian,Wade,Wallace,Wells,Zamarion,Zayvion,Adrien,Aidyn,Alden,Alec,Alvin,Antwan,Armando,Aubrey,Aydon,Benson,Braylan,Brentlee,Brylan,Carsen,Chris,Christon,Collins,Conor,Corbyn,Coy,Crew,Dakari,Darrell,Darrion,Darryl,Dax,Deanthony,Demari,Deon,Dewayne,Dontavious,Draven,Eden,Emery,Ezequiel,Finnegan,Francis,Gauge,Gentry,Hank,Harlem,Harper,Harris,Hollis,Hosea,Jessie,Johan,Johnathon,Julien,Julio,Julius,Junior,Kace,Kaisen,Karsen,Kaydon,
37 changes: 37 additions & 0 deletions tests/integration_tests/charts/data/api_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,10 @@
# isort:skip_file
"""Unit tests for Superset"""
import json
import time
import unittest
import copy
import numpy as np
from datetime import datetime
from io import BytesIO
from typing import Any, Optional
Expand Down Expand Up @@ -643,6 +645,41 @@ def test_with_invalid_where_parameter_closing_unclosed__400(self):

assert rv.status_code == 400

@pytest.mark.usefixtures("load_birth_names_dashboard_with_slices")
def test_with_long_where_parameter(self):
long_clause = np.loadtxt(
"tests/example_data/test-long-where-clause-1k.txt", delimiter=",", dtype=str
).tolist()
self.query_context_payload["queries"][0]["filters"][2]["val"] = long_clause
start_a = time.time()
rv = self.post_assert_metric(CHART_DATA_URI, self.query_context_payload, "data")
compute_time_a = time.time() - start_a
table = self.get_table_by_id(1)
del self.query_context_payload["queries"][0]["time_range"]
sqla_query = table.get_sqla_query(
**{
"granularity": None,
"from_dttm": None,
"to_dttm": None,
"groupby": ["gender"],
"metrics": ["count"],
"is_timeseries": False,
"filter": self.query_context_payload["queries"][0]["filters"],
"extras": {},
}
)
start_b = time.time()
sql = table.database.compile_sqla_query(sqla_query.sqla_query)
result = table.database.get_df(sql, schema=table.schema)
compute_time_b = time.time() - start_b

buffer_time = 2
self.assertLess(
compute_time_a,
compute_time_b + buffer_time,
f"computation of post query payload: {compute_time_a} sec is higher than unparsed sqla querying time: {compute_time_b} sec plus {buffer_time} sec buffer time",
)

@pytest.mark.usefixtures("load_birth_names_dashboard_with_slices")
def test_with_where_parameter_including_comment___200(self):
self.query_context_payload["queries"][0]["filters"] = []
Expand Down
16 changes: 2 additions & 14 deletions tests/unit_tests/sql_parse_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -902,9 +902,7 @@ def test_is_valid_ctas() -> None:

A valid CTAS has a ``SELECT`` as its last statement.
"""
assert (
ParsedQuery("SELECT * FROM table", strip_comments=True).is_valid_ctas() is True
)
assert ParsedQuery("SELECT * FROM table").is_valid_ctas() is True

assert (
ParsedQuery(
Expand All @@ -913,7 +911,6 @@ def test_is_valid_ctas() -> None:
SELECT * FROM table
-- comment 2
""",
strip_comments=True,
).is_valid_ctas()
is True
)
Expand All @@ -926,7 +923,6 @@ def test_is_valid_ctas() -> None:
SELECT @value as foo;
-- comment 2
""",
strip_comments=True,
).is_valid_ctas()
is True
)
Expand All @@ -938,7 +934,6 @@ def test_is_valid_ctas() -> None:
EXPLAIN SELECT * FROM table
-- comment 2
""",
strip_comments=True,
).is_valid_ctas()
is False
)
Expand All @@ -949,7 +944,6 @@ def test_is_valid_ctas() -> None:
SELECT * FROM table;
INSERT INTO TABLE (foo) VALUES (42);
""",
strip_comments=True,
).is_valid_ctas()
is False
)
Expand All @@ -961,9 +955,7 @@ def test_is_valid_cvas() -> None:

A valid CVAS has a single ``SELECT`` statement.
"""
assert (
ParsedQuery("SELECT * FROM table", strip_comments=True).is_valid_cvas() is True
)
assert ParsedQuery("SELECT * FROM table").is_valid_cvas() is True

assert (
ParsedQuery(
Expand All @@ -972,7 +964,6 @@ def test_is_valid_cvas() -> None:
SELECT * FROM table
-- comment 2
""",
strip_comments=True,
).is_valid_cvas()
is True
)
Expand All @@ -985,7 +976,6 @@ def test_is_valid_cvas() -> None:
SELECT @value as foo;
-- comment 2
""",
strip_comments=True,
).is_valid_cvas()
is False
)
Expand All @@ -997,7 +987,6 @@ def test_is_valid_cvas() -> None:
EXPLAIN SELECT * FROM table
-- comment 2
""",
strip_comments=True,
).is_valid_cvas()
is False
)
Expand All @@ -1008,7 +997,6 @@ def test_is_valid_cvas() -> None:
SELECT * FROM table;
INSERT INTO TABLE (foo) VALUES (42);
""",
strip_comments=True,
).is_valid_cvas()
is False
)
Expand Down