diff --git a/superset/common/query_object.py b/superset/common/query_object.py index a16166134408d..f75044825e2bd 100644 --- a/superset/common/query_object.py +++ b/superset/common/query_object.py @@ -272,7 +272,11 @@ def validate( try: self._validate_there_are_no_missing_series() self._validate_no_have_duplicate_labels() - self._sanitize_filters() + # problematic line where calling sqlparser.parse() causes quadratic + # performance for WHERE ... IN (...) clauses + # Clauses are anyway checked for their validity in + # e.g., connectors/sqla/models/get_query_str_extended + # self._sanitize_filters() return None except QueryObjectValidationError as ex: if raise_exceptions: diff --git a/superset/config.py b/superset/config.py index 1b06f96db8e71..fc4035fa5b551 100644 --- a/superset/config.py +++ b/superset/config.py @@ -1607,7 +1607,7 @@ def EMAIL_HEADER_MUTATOR( # pylint: disable=invalid-name,unused-argument # def DATASET_HEALTH_CHECK(datasource: SqlaTable) -> Optional[str]: # if ( # datasource.sql and -# len(sql_parse.ParsedQuery(datasource.sql, strip_comments=True).tables) == 1 +# len(sql_parse.ParsedQuery(datasource.sql).tables) == 1 # ): # return ( # "This virtual dataset queries only one table and therefore could be " diff --git a/superset/db_engine_specs/base.py b/superset/db_engine_specs/base.py index bcb4035c9c672..35ac09616e78e 100644 --- a/superset/db_engine_specs/base.py +++ b/superset/db_engine_specs/base.py @@ -1090,8 +1090,7 @@ def apply_top_to_sql(cls, sql: str, limit: int) -> str: cte = None sql_remainder = None - sql = sql.strip(" \t\n;") - sql_statement = sqlparse.format(sql, strip_comments=True) + sql_statement = sql.strip(" \t\n;") query_limit: int | None = sql_parse.extract_top_from_query( sql_statement, cls.top_keywords ) diff --git a/superset/models/core.py b/superset/models/core.py index 92f6946f1e0e2..d0c157f902d76 100755 --- a/superset/models/core.py +++ b/superset/models/core.py @@ -57,6 +57,7 @@ from sqlalchemy.pool import NullPool from sqlalchemy.schema import UniqueConstraint from sqlalchemy.sql import ColumnElement, expression, Select +from sqlglot import parse from superset import app, db_engine_specs from superset.commands.database.exceptions import DatabaseInvalidError @@ -573,7 +574,11 @@ def get_df( # pylint: disable=too-many-locals schema: str | None = None, mutator: Callable[[pd.DataFrame], None] | None = None, ) -> pd.DataFrame: - sqls = self.db_engine_spec.parse_sql(sql) + # before we split sqls using sql parse, however this core code is only reachable + # with single sql queries. Thus, we remove the engine spec parser here + # sqls = self.db_engine_spec.parse_sql(sql) + sqls = parse(sql) + with self.get_sqla_engine_with_context(schema) as engine: engine_url = engine.url mutate_after_split = config["MUTATE_AFTER_SPLIT"] @@ -598,7 +603,9 @@ def _log_query(sql: str) -> None: with self.get_raw_connection(schema=schema) as conn: cursor = conn.cursor() + for sql_ in sqls[:-1]: + sql_ = str(sql_) if mutate_after_split: sql_ = sql_query_mutator( sql_, @@ -611,7 +618,7 @@ def _log_query(sql: str) -> None: if mutate_after_split: last_sql = sql_query_mutator( - sqls[-1], + str(sqls[-1]), security_manager=security_manager, database=None, ) diff --git a/superset/sql_parse.py b/superset/sql_parse.py index c8316dd6cc2c7..4203d5401c6c3 100644 --- a/superset/sql_parse.py +++ b/superset/sql_parse.py @@ -719,12 +719,8 @@ class ParsedQuery: def __init__( self, sql_statement: str, - strip_comments: bool = False, engine: str | None = None, ): - if strip_comments: - sql_statement = sqlparse.format(sql_statement, strip_comments=True) - self.sql: str = sql_statement self._dialect = SQLGLOT_DIALECTS.get(engine) if engine else None self._tables: set[Table] = set() diff --git a/superset/sqllab/query_render.py b/superset/sqllab/query_render.py index caf9a3cb2b206..67bea2b1fb4cd 100644 --- a/superset/sqllab/query_render.py +++ b/superset/sqllab/query_render.py @@ -60,7 +60,6 @@ def render(self, execution_context: SqlJsonExecutionContext) -> str: parsed_query = ParsedQuery( query_model.sql, - strip_comments=True, engine=query_model.database.db_engine_spec.engine, ) rendered_query = sql_template_processor.process_template( diff --git a/tests/example_data/test-long-where-clause-1k.txt b/tests/example_data/test-long-where-clause-1k.txt new file mode 100644 index 0000000000000..78b51f3fb3cd0 --- /dev/null +++ b/tests/example_data/test-long-where-clause-1k.txt @@ -0,0 +1 @@ +Liam,James,Noah,Wyatt,Gabriel,Lucas,Ethan,Alexander,Joseph,Benjamin,William,Logan,Mason,Jack,John,Asher,Elijah,Daniel,Henry,Jacob,Jaxon,Michael,Oliver,Hunter,David,Levi,Matthew,Landon,Aiden,Isaac,Jackson,Caleb,Ryan,Elias,Connor,Evan,Joshua,Samuel,Christian,Jayden,Jeremiah,Cooper,Eli,Robert,Ryder,Christopher,Colton,Josiah,Andrew,Austin,Carson,Jaxson,Jonathan,Luke,Malachi,Nathan,Owen,Blake,Lincoln,Ezra,Gavin,Thomas,Dylan,Grayson,Kai,Ryker,Zachary,Anthony,Isaiah,Jase,Jason,Micah,Sebastian,Silas,Titus,Bentley,Brody,Cameron,Carter,Chase,Gideon,Jace,Sawyer,Tristan,Tyler,Weston,Adam,Charles,Everett,Wesley,Xander,Brandon,Brayden,Nathaniel,Theodore,Xavier,Ashton,Avery,Dominic,Easton,Finn,George,Hudson,Ian,Jasper,Kayden,Marshall,Max,Maxwell,Miles,Orion,Richard,Timothy,Abel,Drake,Garrett,Jameson,Jayce,Joel,Kenneth,Maximus,Nicholas,Parker,Travis,Cody,Dean,Declan,Elliot,Ezekiel,Karter,Nolan,Patrick,Riley,Seth,Solomon,Steven,Victor,Waylon,Aaron,August,Bradley,Braxton,Bryce,Calvin,Camden,Cayden,Charlie,Cole,Damian,Dawson,Eric,Greyson,Jake,Jeffrey,Jesse,Jonah,Julian,Kaiden,Killian,Kingston,Maddox,Matthias,Maverick,Odin,Paul,Peter,Roman,Trevor,Zane,Alex,Archer,Caden,Collin,Colt,Edward,Gage,Gunner,Harrison,Ivan,Jax,Leo,Lukas,Marcus,Paxton,Soren,Sullivan,Tanner,Trenton,Troy,Tucker,Vincent,Walter,Warren,Adrian,Augustus,Axel,Beckett,Cade,Clayton,Dante,Emmett,Felix,Grant,Hatcher,Jordan,Jude,Julius,Kaleb,Kevin,Remington,Rowan,Russell,Simon,Skyler,Stephen,Sterling,Talon,Westin,Aidan,Alden,Angelo,Arthur,Atticus,Barrett,Brantley,Brooks,Bruce,Cash,Cyrus,Derek,Desmond,Erik,Griffin,Hayden,Jeremy,Justice,Kaden,Kane,Kyle,Leif,Mark,Mateo,Miguel,Myles,Nikolai,Paxson,Peyton,Raymond,Reid,River,Rodney,Roland,Ronin,Rylan,Shawn,Spencer,Thaddeus,Tobias,Tyson,Walker,William,James,John,Mason,Elijah,Noah,Jackson,Aiden,Jacob,Joshua,Samuel,Michael,Ethan,Carter,Liam,Grayson,Christopher,Jayden,Logan,Hunter,Andrew,Landon,Caleb,Gabriel,Joseph,Charles,David,Jaxon,Colton,Luke,Ayden,Levi,Cameron,Matthew,Christian,Carson,Wyatt,Alexander,Jordan,Benjamin,Robert,Daniel,Kayden,Lucas,Easton,Bentley,Eli,Brantley,Braxton,Brayden,Jonathan,Henry,Anthony,Thomas,Jeremiah,Jace,Connor,Sawyer,Austin,Bryson,Oliver,Parker,Cooper,Hudson,Dylan,Isaac,Tyler,Isaiah,Josiah,Nathan,Karter,Brandon,Jase,Timothy,Jack,Kingston,Nicholas,Gavin,Tucker,Ashton,Tristan,Asher,Justin,Kaleb,Chase,Brody,Jayceon,Owen,Kaiden,Jaxson,Kevin,Ryan,Aaron,Cayden,Ian,Jason,Jayce,Micah,Ryder,Evan,Greyson,Hayden,Kaden,Harrison,Karson,Camden,Braylen,Rylan,Zachary,Zayden,Adam,Jose,Kameron,Wesley,Barrett,Marcus,Nolan,Blake,Dallas,Gunner,Kenneth,Malachi,Preston,Silas,Bryce,Collin,Antonio,Caden,Jonah,King,Peyton,Adrian,Maddox,Messiah,Brooks,Dawson,Trenton,Avery,Bennett,Lincoln,Steven,Alex,Richard,Sebastian,Weston,Braylon,Brian,Dalton,Ezra,Jaden,Jaiden,Riley,Tanner,Zion,Corbin,Walker,Xavier,Amari,Judah,Lawson,Maxwell,River,Jude,Angel,Carlos,Eric,Kason,Griffin,Juan,Princeton,Ryker,Abel,Bryan,Cason,Derrick,George,Roman,Aidan,Conner,Everett,Garrett,Jaylen,Miles,Stephen,Dominic,Emmett,Jameson,Johnny,Julian,Kyler,Zane,Bradley,Cole,Edward,Elias,Graham,Jesse,Kai,Nathaniel,Paul,Bryant,Caiden,Clayton,Gregory,Jeremy,Judson,Kylan,Patrick,Walter,Alan,Charlie,Colin,Landen,Chandler,Luis,Titus,Waylon,Anderson,Axel,Cody,Ezekiel,Grant,Jax,Kayson,Miguel,Tyson,Chance,Declan,Jett,Kyle,Vincent,Amir,Braydon,Johnathan,Seth,Travis,Brentley,Case,Devin,Jasper,Jeffery,Joel,Keegan,Marshall,Skyler,Cade,Fisher,Jensen,Max,Paxton,Rhett,Spencer,Trevor,Calvin,Cohen,Colt,Kendrick,Major,Taylor,Xander,Zander,Beau,Braden,Brycen,Curtis,Finn,Iker,Ivan,Jamari,Jesus,Mark,Maverick,Myles,Reid,Troy,Victor,Zaiden,Casen,Cash,Colby,Damien,Ellis,Grady,Houston,Kamden,Kolton,Landyn,Malcolm,Phillip,Warren,Brodie,Corey,Cristian,Dustin,Elliott,Jeffrey,Kasen,Raylan,Rowan,Wilson,Aden,Cedric,Coleman,Diego,Edwin,Gage,Jake,Kash,Prince,Ronald,Russell,Ryland,Theodore,Abram,Ace,Damian,Demarcus,Hayes,Holden,Jamarcus,Javier,Mateo,Maurice,Nicolas,Remington,Terry,Tony,Abraham,Brantlee,Carmelo,Demetrius,Dillon,Emmanuel,Israel,Jamichael,Jerry,Kobe,Kylen,Lane,Phoenix,Willie,August,Baylor,Channing,Daxton,Desmond,Donald,Donovan,Eduardo,Emerson,Jimmy,Kamari,Kendall,Porter,Skylar,Alejandro,Asa,Beckett,Bentlee,Bo,Brady,Brendan,Brock,Cannon,Casey,Courtney,Danny,Deacon,Dean,Elliot,Gannon,Jakobe,Jamarion,Jaxton,Jermaine,Jon,Leonardo,Luca,Morgan,Nehemiah,Pedro,Sylas,Tripp,Triston,Turner,Alexis,Andre,Brennan,Bruce,Cayson,Cornelius,Crimson,Davion,Davis,Deandre,Dennis,Franklin,Frederick,Gary,Harley,Jalen,Justice,Kade,Kaison,Kyson,Layton,Legend,Malik,Manuel,Micheal,Miller,Oakley,Omar,Reed,Ricardo,Ricky,Simon,Terrance,Trent,Zechariah,Archer,Dakota,Derek,Dexter,Erick,Felix,Jagger,Jasiah,Jorge,Keaton,Khalil,Kyrie,Ladarius,Leland,Leo,London,Lorenzo,Lukas,Maximus,Omari,Oscar,Quinton,Reese,Reginald,Rodney,Samson,Sean,Shane,Trace,Zachariah,Zaylen,Adan,Atticus,Branson,Braylin,Damon,Darius,Dominick,Dorian,Fletcher,Francisco,Issac,Jamison,Jared,Javion,Jay,Joe,Jordyn,Josue,Kamryn,Knox,Kolten,Kristian,Lance,Lathan,Louis,Lyric,Maison,Mitchell,Neymar,Peter,Randall,Reece,Scott,Shawn,Solomon,Tatum,Terrence,Tylan,Ahmad,Alijah,Andres,Andy,Beckham,Benton,Billy,Blaine,Bowen,Brenton,Byron,Camdyn,Cullen,Deangelo,Devon,Drake,Drew,Emory,Finley,Gael,Gerald,Gideon,Gunnar,Jacoby,Jakaiden,Jaydon,Jayson,Karsyn,Keagan,Keelan,Keith,Kristopher,Landan,Leon,Macon,Mario,Markell,Martin,Marvin,Raymond,Roderick,Sutton,Uriah,Allen,Amarion,Amos,Brayan,Brayson,Brenden,Callen,Camron,Chevy,Colten,Cortez,Dante,Deshawn,Eddie,Emanuel,Fernando,Foster,Giovanni,Hector,Izaiah,Jabari,Jakob,Jamir,Jaxen,Jayveon,Jayvion,Jorden,Kane,Kaysen,Keandre,Keller,Kenny,Kody,Konnor,Lamar,Langston,Larry,Lewis,Malakai,Mayson,Mohammed,Payton,Philip,Pierce,Quentin,Ridge,Ronan,Ronnie,Roy,Rylee,Santiago,Shepherd,Sullivan,Talon,Thaddeus,Toby,Tristen,Tristin,Will,Xzavier,Zackary,Zackery,Zayne,Zyon,Aiyden,Albert,Ali,Alton,Anakin,Angelo,Armani,Arthur,Aston,Barron,Ben,Bishop,Brent,Briar,Brylon,Camren,Canaan,Carl,Cashton,Caysen,Clark,Clinton,Damion,Darren,Daylen,Dayton,Douglas,Elisha,Enoch,Erik,Felipe,Forrest,Frank,Gatlin,Graysen,Isiah,Jacen,Jaceyon,Jaiceon,Jakaden,Jakari,Jakayden,Jamie,Jaquan,Jarvis,Jasen,Jaylon,Jerome,Justus,Kaidyn,Kalel,Kamron,Kashton,Kellan,Kelvin,Kemari,Kendarius,Kharter,Kohen,Kooper,Kymani,Lamarcus,Lawrence,Layne,Leonel,Marquis,Memphis,Nash,Nigel,Omarion,Pablo,Pierson,Randy,Ray,Rayden,Roland,Ruben,Stanley,Stephon,Sterling,Taylon,Tommy,Tristian,Wade,Wallace,Wells,Zamarion,Zayvion,Adrien,Aidyn,Alden,Alec,Alvin,Antwan,Armando,Aubrey,Aydon,Benson,Braylan,Brentlee,Brylan,Carsen,Chris,Christon,Collins,Conor,Corbyn,Coy,Crew,Dakari,Darrell,Darrion,Darryl,Dax,Deanthony,Demari,Deon,Dewayne,Dontavious,Draven,Eden,Emery,Ezequiel,Finnegan,Francis,Gauge,Gentry,Hank,Harlem,Harper,Harris,Hollis,Hosea,Jessie,Johan,Johnathon,Julien,Julio,Julius,Junior,Kace,Kaisen,Karsen,Kaydon, diff --git a/tests/integration_tests/charts/data/api_tests.py b/tests/integration_tests/charts/data/api_tests.py index 4eabd16eaa803..6d98a686c89af 100644 --- a/tests/integration_tests/charts/data/api_tests.py +++ b/tests/integration_tests/charts/data/api_tests.py @@ -17,8 +17,10 @@ # isort:skip_file """Unit tests for Superset""" import json +import time import unittest import copy +import numpy as np from datetime import datetime from io import BytesIO from typing import Any, Optional @@ -643,6 +645,41 @@ def test_with_invalid_where_parameter_closing_unclosed__400(self): assert rv.status_code == 400 + @pytest.mark.usefixtures("load_birth_names_dashboard_with_slices") + def test_with_long_where_parameter(self): + long_clause = np.loadtxt( + "tests/example_data/test-long-where-clause-1k.txt", delimiter=",", dtype=str + ).tolist() + self.query_context_payload["queries"][0]["filters"][2]["val"] = long_clause + start_a = time.time() + rv = self.post_assert_metric(CHART_DATA_URI, self.query_context_payload, "data") + compute_time_a = time.time() - start_a + table = self.get_table_by_id(1) + del self.query_context_payload["queries"][0]["time_range"] + sqla_query = table.get_sqla_query( + **{ + "granularity": None, + "from_dttm": None, + "to_dttm": None, + "groupby": ["gender"], + "metrics": ["count"], + "is_timeseries": False, + "filter": self.query_context_payload["queries"][0]["filters"], + "extras": {}, + } + ) + start_b = time.time() + sql = table.database.compile_sqla_query(sqla_query.sqla_query) + result = table.database.get_df(sql, schema=table.schema) + compute_time_b = time.time() - start_b + + buffer_time = 2 + self.assertLess( + compute_time_a, + compute_time_b + buffer_time, + f"computation of post query payload: {compute_time_a} sec is higher than unparsed sqla querying time: {compute_time_b} sec plus {buffer_time} sec buffer time", + ) + @pytest.mark.usefixtures("load_birth_names_dashboard_with_slices") def test_with_where_parameter_including_comment___200(self): self.query_context_payload["queries"][0]["filters"] = [] diff --git a/tests/unit_tests/sql_parse_tests.py b/tests/unit_tests/sql_parse_tests.py index 593085c4ea568..fc8edef986c91 100644 --- a/tests/unit_tests/sql_parse_tests.py +++ b/tests/unit_tests/sql_parse_tests.py @@ -902,9 +902,7 @@ def test_is_valid_ctas() -> None: A valid CTAS has a ``SELECT`` as its last statement. """ - assert ( - ParsedQuery("SELECT * FROM table", strip_comments=True).is_valid_ctas() is True - ) + assert ParsedQuery("SELECT * FROM table").is_valid_ctas() is True assert ( ParsedQuery( @@ -913,7 +911,6 @@ def test_is_valid_ctas() -> None: SELECT * FROM table -- comment 2 """, - strip_comments=True, ).is_valid_ctas() is True ) @@ -926,7 +923,6 @@ def test_is_valid_ctas() -> None: SELECT @value as foo; -- comment 2 """, - strip_comments=True, ).is_valid_ctas() is True ) @@ -938,7 +934,6 @@ def test_is_valid_ctas() -> None: EXPLAIN SELECT * FROM table -- comment 2 """, - strip_comments=True, ).is_valid_ctas() is False ) @@ -949,7 +944,6 @@ def test_is_valid_ctas() -> None: SELECT * FROM table; INSERT INTO TABLE (foo) VALUES (42); """, - strip_comments=True, ).is_valid_ctas() is False ) @@ -961,9 +955,7 @@ def test_is_valid_cvas() -> None: A valid CVAS has a single ``SELECT`` statement. """ - assert ( - ParsedQuery("SELECT * FROM table", strip_comments=True).is_valid_cvas() is True - ) + assert ParsedQuery("SELECT * FROM table").is_valid_cvas() is True assert ( ParsedQuery( @@ -972,7 +964,6 @@ def test_is_valid_cvas() -> None: SELECT * FROM table -- comment 2 """, - strip_comments=True, ).is_valid_cvas() is True ) @@ -985,7 +976,6 @@ def test_is_valid_cvas() -> None: SELECT @value as foo; -- comment 2 """, - strip_comments=True, ).is_valid_cvas() is False ) @@ -997,7 +987,6 @@ def test_is_valid_cvas() -> None: EXPLAIN SELECT * FROM table -- comment 2 """, - strip_comments=True, ).is_valid_cvas() is False ) @@ -1008,7 +997,6 @@ def test_is_valid_cvas() -> None: SELECT * FROM table; INSERT INTO TABLE (foo) VALUES (42); """, - strip_comments=True, ).is_valid_cvas() is False )