-
Notifications
You must be signed in to change notification settings - Fork 3.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
How to recognize schema name at CreateTable #4113
Comments
should add schema compatible here grammars-v4/sql/mysql/Oracle/MySQLParser.g4 Line 4345 in b91b6da
|
I would not do that. This grammar was just added and is maintained by @mike-lischke who works at Oracle on MySQL! |
A table name can be qualified, e.g. |
parse is borken at schemaName and then the DOT_SYMBOL is treated as a ErrorSymbol |
FYI https://dev.mysql.com/doc/refman/8.0/en/create-table.html should a nulllable schemaName or databaseName here |
It's not the dot which is an error but your comment for the foreign key, which is invalid syntax. For the correct syntax I get this parse tree:
That contains the part The query I used was: CREATE TABLE if not exists sample.sample_table (
id INT AUTO_INCREMENT PRIMARY KEY COMMENT 'Primary Key - Unique ID',
name VARCHAR(255) NOT NULL COMMENT 'Name of the entity',
description TEXT COMMENT 'Detailed description',
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP COMMENT 'Record creation timestamp',
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT 'Record last update timestamp' ,
email VARCHAR(255) UNIQUE COMMENT 'Unique email address',
phone_number VARCHAR(20) DEFAULT NULL COMMENT 'Phone number',
status ENUM('active', 'inactive', 'suspended') NOT NULL DEFAULT 'active' COMMENT 'Status of the record',
balance DECIMAL(10,2) DEFAULT 0.00 COMMENT 'Account balance',
json_data JSON COMMENT 'Additional data in JSON format',
metadata BLOB COMMENT 'Binary data for metadata',
FOREIGN KEY (status) REFERENCES status_table(status_id)
ON DELETE CASCADE
) ENGINE=InnoDB
DEFAULT CHARSET=utf8mb4
COMMENT='A sample table with various features'; |
the sql is use chatgpt generated, but parse the schema is down, I want to know the reason, Can you try this on CSharp perhaps the custom MySQLBaseLexer cause the problem? without schema I use CSharp to implement MySQLBaseLexer.cs MySQLBaseRecognizer.cs follow TypeScript Implement using Antlr4.Runtime;
namespace MySql.Antlr4.gen;
public abstract class MySQLBaseLexer : Lexer
{
protected MySQLBaseLexer(ICharStream input) : base(input)
{
}
protected MySQLBaseLexer(ICharStream input, TextWriter output, TextWriter errorOutput) : base(input, output, errorOutput)
{
}
public int serverVersion = 0;
public HashSet<SqlMode> sqlModes = new();
/** Enable Multi Language Extension support. */
public bool supportMle = true;
public readonly HashSet<string> charSets = new(); // Used to check repertoires.
protected bool inVersionComment = false;
private readonly Stack<IToken> pendingTokens = new();
public const string longString = "2147483647";
public const int longLength = 10;
public const string signedLongString = "-2147483648";
public const string longLongString = "9223372036854775807";
public const int longLongLength = 19;
public const string signedLongLongString = "-9223372036854775808";
public const int signedLongLongLength = 19;
public const string unsignedLongLongString = "18446744073709551615";
public const int unsignedLongLongLength = 20;
public bool isSqlModeActive(SqlMode sqlMode)
{
return sqlModes.Contains(sqlMode);
}
/**
* Converts a mode string into individual mode flags.
*
* @param modes The input string to parse.
*/
public void sqlModeFromString(string modes) {
sqlModes = new HashSet<SqlMode>();
var parts = modes.ToUpper().Split(",").ToList();
parts.ForEach(mode => {
if (mode == "ANSI" || mode == "DB2" || mode == "MAXDB" || mode == "MSSQL" || mode == "ORACLE" ||
mode == "POSTGRESQL")
{
sqlModes.Add(SqlMode.AnsiQuotes);
sqlModes.Add(SqlMode.PipesAsConcat);
sqlModes.Add(SqlMode.IgnoreSpace);
} else if (mode == "ANSI_QUOTES") {
sqlModes.Add(SqlMode.AnsiQuotes);
} else if (mode == "PIPES_AS_CONCAT") {
sqlModes.Add(SqlMode.PipesAsConcat);
} else if (mode == "NO_BACKSLASH_ESCAPES") {
sqlModes.Add(SqlMode.NoBackslashEscapes);
} else if (mode == "IGNORE_SPACE") {
sqlModes.Add(SqlMode.IgnoreSpace);
} else if (mode == "HIGH_NOT_PRECEDENCE" || mode == "MYSQL323" || mode == "MYSQL40") {
sqlModes.Add(SqlMode.HighNotPrecedence);
}
});
}
/**
* Resets the lexer by setting initial values to transient member, resetting the input stream position etc.
*/
public override void Reset() {
inVersionComment = false;
base.Reset();
}
/**
* Implements the multi token feature required in our lexer.
* A lexer rule can emit more than a single token, if needed.
*
* @returns The next token in the token stream.
*/
public IToken? nextToken() {
// First respond with pending tokens to the next token request, if there are any.
if (pendingTokens.TryPop(out var token)) {
return token;
}
// Let the main lexer class run the next token recognition.
// This might create additional tokens again.
var next = base.NextToken();
if (pendingTokens.TryPop(out token)) {
pendingTokens.Push(next);
return token;
}
return next;
}
protected bool checkMySQLVersion(string text) {
if (text.Length < 8) {// Minimum is: /*!12345
return false;
}
// Skip version comment introducer.
var version = int.Parse(text[3..]);
if (version <= serverVersion) {
inVersionComment = true;
return true;
}
return false;
}
/**
* Called when a keyword was consumed that represents an internal MySQL function and checks if that keyword is
* followed by an open parenthesis. If not then it is not considered a keyword but treated like a normal identifier.
*
* @param proposed The token type to use if the check succeeds.
*
* @returns If a function call is found then return the proposed token type, otherwise just IDENTIFIER.
*/
protected int determineFunction(int proposed) {
// Skip any whitespace character if the sql mode says they should be ignored,
// before actually trying to match the open parenthesis.
// Ascii code int to string
var input = Convert.ToChar(InputStream.LA(1));
if (this.isSqlModeActive(SqlMode.IgnoreSpace)) {
while (input is ' ' or '\t' or '\r' or '\n') {
Interpreter.Consume(((ITokenSource)this).InputStream);
Channel = Lexer.Hidden;
Type = MySQLLexer.WHITESPACE;
input = Convert.ToChar(InputStream.LA(1));
}
}
return input == '(' ? proposed : MySQLLexer.IDENTIFIER;
}
/**
* Checks the given text and determines the smallest number type from it. Code has been taken from sql_lex.cc.
*
* @param text The text to parse (which must be a number).
*
* @returns The token type for that text.
*/
protected int determineNumericType(string text) {
// The original code checks for leading +/- but actually that can never happen, neither in the
// server parser (as a digit is used to trigger processing in the lexer) nor in our parser
// as our rules are defined without signs. But we do it anyway for maximum compatibility.
var length = text.Length - 1;
if (length < longLength) { // quick normal case
return MySQLLexer.INT_NUMBER;
}
var negative = false;
var index = 0;
if (text[index] == '+') { // Remove sign and pre-zeros
++index;
--length;
} else if (text[index] == '-') {
++index;
--length;
negative = true;
}
while (text[index] == '0' && length > 0) {
++index;
--length;
}
if (length < longLength) {
return MySQLLexer.INT_NUMBER;
}
int smaller;
int bigger;
string cmp;
if (negative) {
if (length == longLength) {
cmp = signedLongString[1..];
smaller = MySQLLexer.INT_NUMBER; // If <= signed_long_str
bigger = MySQLLexer.LONG_NUMBER; // If >= signed_long_str
} else if (length < signedLongLongLength) {
return MySQLLexer.LONG_NUMBER;
} else if (length > signedLongLongLength) {
return MySQLLexer.DECIMAL_NUMBER;
} else {
cmp = signedLongLongString[1..];
smaller = MySQLLexer.LONG_NUMBER; // If <= signed_longlong_str
bigger = MySQLLexer.DECIMAL_NUMBER;
}
} else {
if (length == longLength) {
cmp = longString;
smaller = MySQLLexer.INT_NUMBER;
bigger = MySQLLexer.LONG_NUMBER;
} else if (length < longLongLength) {
return MySQLLexer.LONG_NUMBER;
} else if (length > longLongLength) {
if (length > unsignedLongLongLength) {
return MySQLLexer.DECIMAL_NUMBER;
}
cmp = unsignedLongLongString;
smaller = MySQLLexer.ULONGLONG_NUMBER;
bigger = MySQLLexer.DECIMAL_NUMBER;
} else {
cmp = longLongString;
smaller = MySQLLexer.LONG_NUMBER;
bigger = MySQLLexer.ULONGLONG_NUMBER;
}
}
var otherIndex = 0;
while (index < text.Length && cmp[otherIndex++] == text[index++]) {
//
}
return text[index - 1] <= cmp[otherIndex - 1] ? smaller : bigger;
}
/**
* Checks if the given text corresponds to a charset defined in the server (text is preceded by an underscore).
*
* @param text The text to check.
*
* @returns UNDERSCORE_CHARSET if so, otherwise IDENTIFIER.
*/
protected int checkCharset(string text) {
return charSets.Contains(text) ? MySQLLexer.UNDERSCORE_CHARSET : MySQLLexer.IDENTIFIER;
}
/**
* Creates a DOT token in the token stream.
*/
protected void emitDot() {
this.pendingTokens.Push(TokenFactory.Create(new Tuple<ITokenSource, ICharStream>(this, ((ITokenSource)this).InputStream), MySQLLexer.DOT_SYMBOL,
Text, Channel, TokenStartCharIndex, TokenStartCharIndex, Line,
Column
));
++this.Column;
this.InputStream.Seek(TokenStartCharIndex + 1);
}
}
using Antlr4.Runtime;
namespace MySql.Antlr4.gen;
public abstract class MySQLBaseRecognizer : Parser
{
public int serverVersion = 0;
public bool supportMle = true;
public HashSet<SqlMode> SqlModes { get; set; }
public bool isSqlModeActive(SqlMode sqlMode)
{
return SqlModes.Contains(sqlMode);
}
protected MySQLBaseRecognizer(ITokenStream input) : base(input)
{
}
protected MySQLBaseRecognizer(ITokenStream input, TextWriter output, TextWriter errorOutput) : base(input, output, errorOutput)
{
}
} |
You are not giving us complete, factual code. The grammars-v4/sql/mysql/Oracle/*.g4 grammar cannot be used as is with the CSharp target. There are many Antlr4ng target-specific codes in the grammar. (NB: this grammar is not Antlr4 TypeScript, but Antlr4ng TypeScript. There is a huge difference.)
Further, your base classes for lexer and parser (MySQLBaseLexer and MySQLBaseRecognizer) don't compile.
But, even after correcting all that, it still does not work. It is not clear whether I will port the grammar to the other targets because it should be written in target-agnostic format for this repo. That is the current best solution so that the .g4's are not duplicated 8 times (one for each target), and cause a maintenance issue (because people only change the target they want). |
Um... Find the problem at the NextToken and the emitDot method in MySQLBaseLexer.cs I try to refactor the emitDot implement in CSharp protected void emitDot() {
pendingTokens.Push(TokenFactory.Create(MySQLLexer.DOT_SYMBOL, Text));
} It seems to be working now. Here is the full implement MySqlBaseLexer.cs using Antlr4.Runtime;
namespace MySql.Antlr4.gen;
public abstract class MySQLBaseLexer : Lexer
{
protected MySQLBaseLexer(ICharStream input) : base(input)
{
}
protected MySQLBaseLexer(ICharStream input, TextWriter output, TextWriter errorOutput) : base(input, output, errorOutput)
{
}
public int serverVersion = 0;
public HashSet<SqlMode> sqlModes = new();
/** Enable Multi Language Extension support. */
public bool supportMle = true;
public readonly HashSet<string> charSets = new(); // Used to check repertoires.
protected bool inVersionComment = false;
private readonly Stack<IToken> pendingTokens = new();
public const string longString = "2147483647";
public const int longLength = 10;
public const string signedLongString = "-2147483648";
public const string longLongString = "9223372036854775807";
public const int longLongLength = 19;
public const string signedLongLongString = "-9223372036854775808";
public const int signedLongLongLength = 19;
public const string unsignedLongLongString = "18446744073709551615";
public const int unsignedLongLongLength = 20;
public bool isSqlModeActive(SqlMode sqlMode)
{
return sqlModes.Contains(sqlMode);
}
/**
* Converts a mode string into individual mode flags.
*
* @param modes The input string to parse.
*/
public void sqlModeFromString(string modes) {
sqlModes = new HashSet<SqlMode>();
var parts = modes.ToUpper().Split(",").ToList();
parts.ForEach(mode => {
if (mode == "ANSI" || mode == "DB2" || mode == "MAXDB" || mode == "MSSQL" || mode == "ORACLE" ||
mode == "POSTGRESQL")
{
sqlModes.Add(SqlMode.AnsiQuotes);
sqlModes.Add(SqlMode.PipesAsConcat);
sqlModes.Add(SqlMode.IgnoreSpace);
} else if (mode == "ANSI_QUOTES") {
sqlModes.Add(SqlMode.AnsiQuotes);
} else if (mode == "PIPES_AS_CONCAT") {
sqlModes.Add(SqlMode.PipesAsConcat);
} else if (mode == "NO_BACKSLASH_ESCAPES") {
sqlModes.Add(SqlMode.NoBackslashEscapes);
} else if (mode == "IGNORE_SPACE") {
sqlModes.Add(SqlMode.IgnoreSpace);
} else if (mode == "HIGH_NOT_PRECEDENCE" || mode == "MYSQL323" || mode == "MYSQL40") {
sqlModes.Add(SqlMode.HighNotPrecedence);
}
});
}
/**
* Resets the lexer by setting initial values to transient member, resetting the input stream position etc.
*/
public override void Reset() {
inVersionComment = false;
base.Reset();
}
/**
* Implements the multi token feature required in our lexer.
* A lexer rule can emit more than a single token, if needed.
*
* @returns The next token in the token stream.
*/
public override IToken? NextToken() {
// First respond with pending tokens to the next token request, if there are any.
if (pendingTokens.TryPop(out var token)) {
return token;
}
// Let the main lexer class run the next token recognition.
// This might create additional tokens again.
var next = base.NextToken();
if (pendingTokens.TryPop(out token)) {
pendingTokens.Push(next);
return token;
}
return next;
}
protected bool checkMySQLVersion(string text) {
if (text.Length < 8) {// Minimum is: /*!12345
return false;
}
// Skip version comment introducer.
var version = int.Parse(text[3..]);
if (version <= serverVersion) {
inVersionComment = true;
return true;
}
return false;
}
/**
* Called when a keyword was consumed that represents an internal MySQL function and checks if that keyword is
* followed by an open parenthesis. If not then it is not considered a keyword but treated like a normal identifier.
*
* @param proposed The token type to use if the check succeeds.
*
* @returns If a function call is found then return the proposed token type, otherwise just IDENTIFIER.
*/
protected int determineFunction(int proposed) {
// Skip any whitespace character if the sql mode says they should be ignored,
// before actually trying to match the open parenthesis.
// Ascii code int to string
var input = Convert.ToChar(InputStream.LA(1));
if (isSqlModeActive(SqlMode.IgnoreSpace)) {
while (input is ' ' or '\t' or '\r' or '\n') {
Interpreter.Consume(((ITokenSource)this).InputStream);
Channel = Lexer.Hidden;
Type = MySQLLexer.WHITESPACE;
input = Convert.ToChar(InputStream.LA(1));
}
}
return input == '(' ? proposed : MySQLLexer.IDENTIFIER;
}
/**
* Checks the given text and determines the smallest number type from it. Code has been taken from sql_lex.cc.
*
* @param text The text to parse (which must be a number).
*
* @returns The token type for that text.
*/
protected int determineNumericType(string text) {
// The original code checks for leading +/- but actually that can never happen, neither in the
// server parser (as a digit is used to trigger processing in the lexer) nor in our parser
// as our rules are defined without signs. But we do it anyway for maximum compatibility.
var length = text.Length - 1;
if (length < longLength) { // quick normal case
return MySQLLexer.INT_NUMBER;
}
var negative = false;
var index = 0;
if (text[index] == '+') { // Remove sign and pre-zeros
++index;
--length;
} else if (text[index] == '-') {
++index;
--length;
negative = true;
}
while (text[index] == '0' && length > 0) {
++index;
--length;
}
if (length < longLength) {
return MySQLLexer.INT_NUMBER;
}
int smaller;
int bigger;
string cmp;
if (negative) {
if (length == longLength) {
cmp = signedLongString[1..];
smaller = MySQLLexer.INT_NUMBER; // If <= signed_long_str
bigger = MySQLLexer.LONG_NUMBER; // If >= signed_long_str
} else if (length < signedLongLongLength) {
return MySQLLexer.LONG_NUMBER;
} else if (length > signedLongLongLength) {
return MySQLLexer.DECIMAL_NUMBER;
} else {
cmp = signedLongLongString[1..];
smaller = MySQLLexer.LONG_NUMBER; // If <= signed_longlong_str
bigger = MySQLLexer.DECIMAL_NUMBER;
}
} else {
if (length == longLength) {
cmp = longString;
smaller = MySQLLexer.INT_NUMBER;
bigger = MySQLLexer.LONG_NUMBER;
} else if (length < longLongLength) {
return MySQLLexer.LONG_NUMBER;
} else if (length > longLongLength) {
if (length > unsignedLongLongLength) {
return MySQLLexer.DECIMAL_NUMBER;
}
cmp = unsignedLongLongString;
smaller = MySQLLexer.ULONGLONG_NUMBER;
bigger = MySQLLexer.DECIMAL_NUMBER;
} else {
cmp = longLongString;
smaller = MySQLLexer.LONG_NUMBER;
bigger = MySQLLexer.ULONGLONG_NUMBER;
}
}
var otherIndex = 0;
while (index < text.Length && cmp[otherIndex++] == text[index++]) {
//
}
return text[index - 1] <= cmp[otherIndex - 1] ? smaller : bigger;
}
/**
* Checks if the given text corresponds to a charset defined in the server (text is preceded by an underscore).
*
* @param text The text to check.
*
* @returns UNDERSCORE_CHARSET if so, otherwise IDENTIFIER.
*/
protected int checkCharset(string text) {
return charSets.Contains(text) ? MySQLLexer.UNDERSCORE_CHARSET : MySQLLexer.IDENTIFIER;
}
/**
* Creates a DOT token in the token stream.
*/
protected void emitDot() {
pendingTokens.Push(TokenFactory.Create(MySQLLexer.DOT_SYMBOL, Text));
}
} |
There is a problem at TableName definitiion at |
Try to remove DOT_IDENTIFIER in MySQLLexer.g4 and then add strong type for tableName
|
I still have no idea, what you are after @huiyuanai709. What's wrong with the existing |
A few days ago, someone on StackOverflow asked about porting the grammars-v4/sql/mysql/Oracle/ grammar to CSharp (https://stackoverflow.com/q/78558707/4779853). His port contained some bugs, but I fixed it, and posted the complete program here: https://github.com/kaby76/MySqlParser. @huiyuanai709 We need to see your program and input. For an example of what we want, see https://github.com/kaby76/MySqlParser. The input string ( We can then see what you are trying to parse and then discuss the parse tree. That is the level of detail required. |
grammars-v4/sql/mysql/Oracle/MySQLParser.g4
Line 450 in b91b6da
eg: create table if not exists payment.example
The text was updated successfully, but these errors were encountered: