Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

support gbk charset #808

Open
wants to merge 10 commits into
base: master
Choose a base branch
from
20 changes: 13 additions & 7 deletions charset/charset.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,17 +56,19 @@ var charsetInfos = []*Charset{
{CharsetUTF8, CollationUTF8, make(map[string]*Collation), "UTF-8 Unicode", 3},
{CharsetUTF8MB4, CollationUTF8MB4, make(map[string]*Collation), "UTF-8 Unicode", 4},
{CharsetASCII, CollationASCII, make(map[string]*Collation), "US ASCII", 1},
{CharsetLatin1, CollationLatin1, make(map[string]*Collation), "Latin1", 1},
{CharsetBin, CollationBin, make(map[string]*Collation), "binary", 1},
{CharsetLatin1, CollationLatin1, make(map[string]*Collation), "cp1252 West European", 1},
{CharsetBin, CollationBin, make(map[string]*Collation), "Binary pseudo charset", 1},
{CharsetGBK, CollationGBKChineseCi, make(map[string]*Collation), " GBK Simplified Chinese", 1},
spongedu marked this conversation as resolved.
Show resolved Hide resolved
}

// All the names supported collations should be in the following table.
var supportedCollationNames = map[string]struct{}{
CollationUTF8: {},
CollationUTF8MB4: {},
CollationASCII: {},
CollationLatin1: {},
CollationBin: {},
CollationUTF8: {},
CollationUTF8MB4: {},
CollationASCII: {},
CollationLatin1: {},
CollationBin: {},
CollationGBKChineseCi: {},
}

// Desc is a charset description.
Expand Down Expand Up @@ -186,6 +188,10 @@ const (
CharsetUTF8 = "utf8"
// CollationUTF8 is the default collation for CharsetUTF8.
CollationUTF8 = "utf8_bin"
// CharsetGBK is an extension of the GB2312 character set for simplified Chinese characters.
CharsetGBK = "gbk"
// CollationGBKChineseCi is the default collation for CharsetGBK.
CollationGBKChineseCi = "gbk_chinese_ci"
// CharsetUTF8MB4 represents 4 bytes utf8, which works the same way as utf8 in Go.
CharsetUTF8MB4 = "utf8mb4"
// CollationUTF8MB4 is the default collation for CharsetUTF8MB4.
Expand Down
56 changes: 56 additions & 0 deletions parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,62 @@ func (s *testParserSuite) TestSimple(c *C) {
_, err = parser.ParseOneStmt(src, "", "")
c.Assert(err, IsNil)

src = "CREATE TABLE t38 (c1 CHAR(20) CHARACTER SET gbk);"
_, err = parser.ParseOneStmt(src, "", "")
c.Assert(err, IsNil)

src = "CREATE TABLE t39 (c1 CHAR(20) CHARACTER SET gbk COLLATE gbk_bin);"
_, err = parser.ParseOneStmt(src, "", "")
c.Assert(err, IsNil)

src = "CREATE TABLE t219 (c1 TEXT CHARACTER SET gbk COLLATE gbk_chinese_ci)DEFAULT CHARACTER SET=utf8 COLLATE=utf8_general_ci;"
_, err = parser.ParseOneStmt(src, "", "")
c.Assert(err, IsNil)

src = "CREATE TABLE t218 (c1 TEXT CHARACTER SET gbk)DEFAULT CHARACTER SET=utf8 COLLATE=utf8_general_ci;"
_, err = parser.ParseOneStmt(src, "", "")
c.Assert(err, IsNil)

src = "CREATE TABLE t211 (c1 TINYTEXT CHARACTER SET gbk COLLATE gbk_chinese_ci)DEFAULT CHARACTER SET=utf8 COLLATE=utf8_general_ci;"
_, err = parser.ParseOneStmt(src, "", "")
c.Assert(err, IsNil)

src = "CREATE TABLE t210 (c1 TINYTEXT CHARACTER SET gbk)DEFAULT CHARACTER SET=utf8 COLLATE=utf8_general_ci;"
_, err = parser.ParseOneStmt(src, "", "")
c.Assert(err, IsNil)

src = "CREATE TABLE t203 (c1 VARCHAR(20) CHARACTER SET gbk COLLATE gbk_chinese_ci)DEFAULT CHARACTER SET=utf8 COLLATE=utf8_general_ci;"
_, err = parser.ParseOneStmt(src, "", "")
c.Assert(err, IsNil)

src = "CREATE TABLE t197 (c1 CHAR(20) CHARACTER SET gbk COLLATE gbk_bin) DEFAULT CHARACTER SET=utf8 COLLATE=utf8_general_ci;"
_, err = parser.ParseOneStmt(src, "", "")
c.Assert(err, IsNil)

src = "CREATE TABLE t196 (c1 CHAR(20) CHARACTER SET gbk) DEFAULT CHARACTER SET=utf8 COLLATE=utf8_general_ci;"
_, err = parser.ParseOneStmt(src, "", "")
c.Assert(err, IsNil)

src = "CREATE TABLE `c_s*s.` (a INT, v VARCHAR(255));"
_, err = parser.ParseOneStmt(src, "", "")
c.Assert(err, IsNil)

src = "CREATE TABLE `\"$c_s*s.` (a INT, v VARCHAR(255));"
_, err = parser.ParseOneStmt(src, "", "")
c.Assert(err, IsNil)

src = "CREATE TABLE `'<$c_s*s.` (a INT, v VARCHAR(255));"
_, err = parser.ParseOneStmt(src, "", "")
c.Assert(err, IsNil)

src = "CREATE TABLE `'c_s*s.` (a INT, v VARCHAR(255));"
_, err = parser.ParseOneStmt(src, "", "")
c.Assert(err, IsNil)

src = "CREATE TABLE `@.` (a INT, v VARCHAR(255));"
_, err = parser.ParseOneStmt(src, "", "")
c.Assert(err, IsNil)

// for #7371, support NATIONAL CHARACTER
// reference link: https://dev.mysql.com/doc/refman/5.7/en/charset-national.html
src = "CREATE TABLE t(c1 NATIONAL CHARACTER(10));"
Expand Down