From bb23b5dd2b9645ba4fca30c18e83fd6ffcf6915e Mon Sep 17 00:00:00 2001 From: spongedc Date: Mon, 13 Apr 2020 11:21:12 +0800 Subject: [PATCH 1/3] 1. Support charset gbk in parser; 2. fix description for charsetlatin1 and charsetbin --- charset/charset.go | 20 +++++++++++------ parser_test.go | 56 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+), 7 deletions(-) diff --git a/charset/charset.go b/charset/charset.go index 1758569ac..2a2e2cc62 100644 --- a/charset/charset.go +++ b/charset/charset.go @@ -56,17 +56,19 @@ var charsetInfos = []*Charset{ {CharsetUTF8, CollationUTF8, make(map[string]*Collation), "UTF-8 Unicode", 3}, {CharsetUTF8MB4, CollationUTF8MB4, make(map[string]*Collation), "UTF-8 Unicode", 4}, {CharsetASCII, CollationASCII, make(map[string]*Collation), "US ASCII", 1}, - {CharsetLatin1, CollationLatin1, make(map[string]*Collation), "Latin1", 1}, - {CharsetBin, CollationBin, make(map[string]*Collation), "binary", 1}, + {CharsetLatin1, CollationLatin1, make(map[string]*Collation), "cp1252 West European", 1}, + {CharsetBin, CollationBin, make(map[string]*Collation), "Binary pseudo charset", 1}, + {CharsetGBK, CollationGBKChineseCi, make(map[string]*Collation), " GBK Simplified Chinese", 1}, } // All the names supported collations should be in the following table. var supportedCollationNames = map[string]struct{}{ - CollationUTF8: {}, - CollationUTF8MB4: {}, - CollationASCII: {}, - CollationLatin1: {}, - CollationBin: {}, + CollationUTF8: {}, + CollationUTF8MB4: {}, + CollationASCII: {}, + CollationLatin1: {}, + CollationBin: {}, + CollationGBKChineseCi: {}, } // Desc is a charset description. @@ -186,6 +188,10 @@ const ( CharsetUTF8 = "utf8" // CollationUTF8 is the default collation for CharsetUTF8. CollationUTF8 = "utf8_bin" + // CharsetGBK is an extension of the GB2312 character set for simplified Chinese characters. + CharsetGBK = "gbk" + // CollationGBKChineseCi is the default collation for CharsetGBK. + CollationGBKChineseCi = "gbk_chinese_ci" // CharsetUTF8MB4 represents 4 bytes utf8, which works the same way as utf8 in Go. CharsetUTF8MB4 = "utf8mb4" // CollationUTF8MB4 is the default collation for CharsetUTF8MB4. diff --git a/parser_test.go b/parser_test.go index abbb8cf2c..b0cbeebdf 100644 --- a/parser_test.go +++ b/parser_test.go @@ -209,6 +209,62 @@ func (s *testParserSuite) TestSimple(c *C) { _, err = parser.ParseOneStmt(src, "", "") c.Assert(err, IsNil) + src = "CREATE TABLE t38 (c1 CHAR(20) CHARACTER SET gbk);" + _, err = parser.ParseOneStmt(src, "", "") + c.Assert(err, IsNil) + + src = "CREATE TABLE t39 (c1 CHAR(20) CHARACTER SET gbk COLLATE gbk_bin);" + _, err = parser.ParseOneStmt(src, "", "") + c.Assert(err, IsNil) + + src = "CREATE TABLE t219 (c1 TEXT CHARACTER SET gbk COLLATE gbk_chinese_ci)DEFAULT CHARACTER SET=utf8 COLLATE=utf8_general_ci;" + _, err = parser.ParseOneStmt(src, "", "") + c.Assert(err, IsNil) + + src = "CREATE TABLE t218 (c1 TEXT CHARACTER SET gbk)DEFAULT CHARACTER SET=utf8 COLLATE=utf8_general_ci;" + _, err = parser.ParseOneStmt(src, "", "") + c.Assert(err, IsNil) + + src = "CREATE TABLE t211 (c1 TINYTEXT CHARACTER SET gbk COLLATE gbk_chinese_ci)DEFAULT CHARACTER SET=utf8 COLLATE=utf8_general_ci;" + _, err = parser.ParseOneStmt(src, "", "") + c.Assert(err, IsNil) + + src = "CREATE TABLE t210 (c1 TINYTEXT CHARACTER SET gbk)DEFAULT CHARACTER SET=utf8 COLLATE=utf8_general_ci;" + _, err = parser.ParseOneStmt(src, "", "") + c.Assert(err, IsNil) + + src = "CREATE TABLE t203 (c1 VARCHAR(20) CHARACTER SET gbk COLLATE gbk_chinese_ci)DEFAULT CHARACTER SET=utf8 COLLATE=utf8_general_ci;" + _, err = parser.ParseOneStmt(src, "", "") + c.Assert(err, IsNil) + + src = "CREATE TABLE t197 (c1 CHAR(20) CHARACTER SET gbk COLLATE gbk_bin) DEFAULT CHARACTER SET=utf8 COLLATE=utf8_general_ci;" + _, err = parser.ParseOneStmt(src, "", "") + c.Assert(err, IsNil) + + src = "CREATE TABLE t196 (c1 CHAR(20) CHARACTER SET gbk) DEFAULT CHARACTER SET=utf8 COLLATE=utf8_general_ci;" + _, err = parser.ParseOneStmt(src, "", "") + c.Assert(err, IsNil) + + src = "CREATE TABLE `c_s*s.` (a INT, v VARCHAR(255));" + _, err = parser.ParseOneStmt(src, "", "") + c.Assert(err, IsNil) + + src = "CREATE TABLE `\"$c_s*s.` (a INT, v VARCHAR(255));" + _, err = parser.ParseOneStmt(src, "", "") + c.Assert(err, IsNil) + + src = "CREATE TABLE `'<$c_s*s.` (a INT, v VARCHAR(255));" + _, err = parser.ParseOneStmt(src, "", "") + c.Assert(err, IsNil) + + src = "CREATE TABLE `'c_s*s.` (a INT, v VARCHAR(255));" + _, err = parser.ParseOneStmt(src, "", "") + c.Assert(err, IsNil) + + src = "CREATE TABLE `@.` (a INT, v VARCHAR(255));" + _, err = parser.ParseOneStmt(src, "", "") + c.Assert(err, IsNil) + // for #7371, support NATIONAL CHARACTER // reference link: https://dev.mysql.com/doc/refman/5.7/en/charset-national.html src = "CREATE TABLE t(c1 NATIONAL CHARACTER(10));" From 72cf9d6db0b7daedc30540d61566d701401aae1d Mon Sep 17 00:00:00 2001 From: Du Chuan Date: Mon, 13 Apr 2020 14:15:15 +0800 Subject: [PATCH 2/3] Update charset/charset.go Co-Authored-By: kennytm --- charset/charset.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/charset/charset.go b/charset/charset.go index 2a2e2cc62..a0f6ecd93 100644 --- a/charset/charset.go +++ b/charset/charset.go @@ -58,7 +58,7 @@ var charsetInfos = []*Charset{ {CharsetASCII, CollationASCII, make(map[string]*Collation), "US ASCII", 1}, {CharsetLatin1, CollationLatin1, make(map[string]*Collation), "cp1252 West European", 1}, {CharsetBin, CollationBin, make(map[string]*Collation), "Binary pseudo charset", 1}, - {CharsetGBK, CollationGBKChineseCi, make(map[string]*Collation), " GBK Simplified Chinese", 1}, + {CharsetGBK, CollationGBKChineseCi, make(map[string]*Collation), "GBK Simplified Chinese", 1}, } // All the names supported collations should be in the following table. From 824320ba6c4aa5c468fb2d06127dbaed1cfbb7b6 Mon Sep 17 00:00:00 2001 From: Du Chuan Date: Tue, 21 Apr 2020 20:27:08 +0800 Subject: [PATCH 3/3] Update charset/charset.go Co-Authored-By: tangenta --- charset/charset.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/charset/charset.go b/charset/charset.go index a0f6ecd93..b115b18b5 100644 --- a/charset/charset.go +++ b/charset/charset.go @@ -58,7 +58,7 @@ var charsetInfos = []*Charset{ {CharsetASCII, CollationASCII, make(map[string]*Collation), "US ASCII", 1}, {CharsetLatin1, CollationLatin1, make(map[string]*Collation), "cp1252 West European", 1}, {CharsetBin, CollationBin, make(map[string]*Collation), "Binary pseudo charset", 1}, - {CharsetGBK, CollationGBKChineseCi, make(map[string]*Collation), "GBK Simplified Chinese", 1}, + {CharsetGBK, CollationGBKChineseCi, make(map[string]*Collation), "GBK Simplified Chinese", 2}, } // All the names supported collations should be in the following table.