From 88cbdf3dd3178a2f965145642ff9855ae9656950 Mon Sep 17 00:00:00 2001 From: Onur Cinar Date: Sun, 25 Jun 2023 17:45:03 +0000 Subject: [PATCH] HTML escape and unescape normalizer. Fixes #104 --- README.md | 2 ++ checker.go | 56 +++++++++++++++++--------------- doc/normalizers/html_escape.md | 19 +++++++++++ doc/normalizers/html_unescape.md | 22 +++++++++++++ html_escape.go | 26 +++++++++++++++ html_escape_test.go | 38 ++++++++++++++++++++++ html_unescape.go | 26 +++++++++++++++ html_unescape_test.go | 38 ++++++++++++++++++++++ 8 files changed, 200 insertions(+), 27 deletions(-) create mode 100644 doc/normalizers/html_escape.md create mode 100644 doc/normalizers/html_unescape.md create mode 100644 html_escape.go create mode 100644 html_escape_test.go create mode 100644 html_unescape.go create mode 100644 html_unescape_test.go diff --git a/README.md b/README.md index b9afa74..20bc66a 100644 --- a/README.md +++ b/README.md @@ -98,6 +98,8 @@ This package currently provides the following checkers: This package currently provides the following normalizers. They can be mixed with the checkers when defining the validation steps for user data. +- [html_escape](doc/normalizers/html_escape.md) applies HTML escaping to special characters. +- [html_unescape](doc//normalizers/html_unescape.md) applies HTML unescaping to special characters. - [lower](doc/normalizers/lower.md) maps all Unicode letters in the given value to their lower case. - [upper](doc/normalizers/upper.md) maps all Unicode letters in the given value to their upper case. - [title](doc/normalizers/title.md) maps the first letter of each word to their upper case. diff --git a/checker.go b/checker.go index cfe586a..98ff61b 100644 --- a/checker.go +++ b/checker.go @@ -37,33 +37,35 @@ const ResultValid Result = "VALID" // makers provides mapping to maker function for the checkers. var makers = map[string]MakeFunc{ - CheckerAlphanumeric: makeAlphanumeric, - CheckerASCII: makeASCII, - CheckerCreditCard: makeCreditCard, - CheckerCidr: makeCidr, - CheckerDigits: makeDigits, - CheckerEmail: makeEmail, - CheckerFqdn: makeFqdn, - CheckerIP: makeIP, - CheckerIPV4: makeIPV4, - CheckerIPV6: makeIPV6, - CheckerISBN: makeISBN, - CheckerLuhn: makeLuhn, - CheckerMac: makeMac, - CheckerMax: makeMax, - CheckerMaxLength: makeMaxLength, - CheckerMin: makeMin, - CheckerMinLength: makeMinLength, - CheckerRegexp: makeRegexp, - CheckerRequired: makeRequired, - CheckerSame: makeSame, - CheckerURL: makeURL, - NormalizerLower: makeLower, - NormalizerUpper: makeUpper, - NormalizerTitle: makeTitle, - NormalizerTrim: makeTrim, - NormalizerTrimLeft: makeTrimLeft, - NormalizerTrimRight: makeTrimRight, + CheckerAlphanumeric: makeAlphanumeric, + CheckerASCII: makeASCII, + CheckerCreditCard: makeCreditCard, + CheckerCidr: makeCidr, + CheckerDigits: makeDigits, + CheckerEmail: makeEmail, + CheckerFqdn: makeFqdn, + CheckerIP: makeIP, + CheckerIPV4: makeIPV4, + CheckerIPV6: makeIPV6, + CheckerISBN: makeISBN, + CheckerLuhn: makeLuhn, + CheckerMac: makeMac, + CheckerMax: makeMax, + CheckerMaxLength: makeMaxLength, + CheckerMin: makeMin, + CheckerMinLength: makeMinLength, + CheckerRegexp: makeRegexp, + CheckerRequired: makeRequired, + CheckerSame: makeSame, + CheckerURL: makeURL, + NormalizerHTMLEscape: makeHTMLEscape, + NormalizerHTMLUnescape: makeHTMLUnescape, + NormalizerLower: makeLower, + NormalizerUpper: makeUpper, + NormalizerTitle: makeTitle, + NormalizerTrim: makeTrim, + NormalizerTrimLeft: makeTrimLeft, + NormalizerTrimRight: makeTrimRight, } // Register registers the given checker name and the maker function. diff --git a/doc/normalizers/html_escape.md b/doc/normalizers/html_escape.md new file mode 100644 index 0000000..a8f9f9f --- /dev/null +++ b/doc/normalizers/html_escape.md @@ -0,0 +1,19 @@ +# HTML Escape Normalizer + +The `html-escape` normalizer uses [html.EscapeString](https://pkg.go.dev/html#EscapeString) to escape special characters like "<" to become "<". It escapes only five such characters: <, >, &, ' and ". + +```golang +type Comment struct { + Body string `checkers:"html-escape"` +} + +comment := &Comment{ + Body: " \"Checker\" & 'Library' ", +} + +checker.Check(comment) + +// Outputs: +// <tag> "Checker" & 'Library' </tag> +fmt.Println(comment.Body) +``` diff --git a/doc/normalizers/html_unescape.md b/doc/normalizers/html_unescape.md new file mode 100644 index 0000000..ae64303 --- /dev/null +++ b/doc/normalizers/html_unescape.md @@ -0,0 +1,22 @@ +# HTML Unescape Normalizer + +The `html-unescape` normalizer uses [html.UnescapeString](https://pkg.go.dev/html#UnescapeString) to unescape entities like "<" to become "<". It unescapes a larger range of entities than EscapeString escapes. For example, "á" unescapes to "รก", as does "á" and "á". + +```golang +type Comment struct { + Body string `checkers:"html-unescape"` +} + +comment := &Comment{ + Body: "<tag> "Checker" & 'Library' </tag>", +} + +_, valid := checker.Check(comment) +if !valid { + t.Fail() +} + +// Outputs: +// \"Checker\" & 'Library' +fmt.Println(comment.Body) +``` diff --git a/html_escape.go b/html_escape.go new file mode 100644 index 0000000..ed540ad --- /dev/null +++ b/html_escape.go @@ -0,0 +1,26 @@ +package checker + +import ( + "html" + "reflect" +) + +// NormalizerHTMLEscape is the name of the normalizer. +const NormalizerHTMLEscape = "html-escape" + +// makeHTMLEscape makes a normalizer function for the HTML escape normalizer. +func makeHTMLEscape(_ string) CheckFunc { + return normalizeHTMLEscape +} + +// normalizeHTMLEscape applies HTML escaping to special characters. +// Uses html.EscapeString for the actual escape operation. +func normalizeHTMLEscape(value, _ reflect.Value) Result { + if value.Kind() != reflect.String { + panic("string expected") + } + + value.SetString(html.EscapeString(value.String())) + + return ResultValid +} diff --git a/html_escape_test.go b/html_escape_test.go new file mode 100644 index 0000000..3ce5ba5 --- /dev/null +++ b/html_escape_test.go @@ -0,0 +1,38 @@ +package checker_test + +import ( + "testing" + + "github.com/cinar/checker" +) + +func TestNormalizeHTMLEscapeNonString(t *testing.T) { + defer checker.FailIfNoPanic(t) + + type Comment struct { + Body int `checkers:"html-escape"` + } + + comment := &Comment{} + + checker.Check(comment) +} + +func TestNormalizeHTMLEscape(t *testing.T) { + type Comment struct { + Body string `checkers:"html-escape"` + } + + comment := &Comment{ + Body: " \"Checker\" & 'Library' ", + } + + _, valid := checker.Check(comment) + if !valid { + t.Fail() + } + + if comment.Body != "<tag> "Checker" & 'Library' </tag>" { + t.Fail() + } +} diff --git a/html_unescape.go b/html_unescape.go new file mode 100644 index 0000000..f348805 --- /dev/null +++ b/html_unescape.go @@ -0,0 +1,26 @@ +package checker + +import ( + "html" + "reflect" +) + +// NormalizerHTMLUnescape is the name of the normalizer. +const NormalizerHTMLUnescape = "html-unescape" + +// makeHTMLUnescape makes a normalizer function for the HTML unscape normalizer. +func makeHTMLUnescape(_ string) CheckFunc { + return normalizeHTMLUnescape +} + +// normalizeHTMLUnescape applies HTML unescaping to special characters. +// Uses html.UnescapeString for the actual unescape operation. +func normalizeHTMLUnescape(value, _ reflect.Value) Result { + if value.Kind() != reflect.String { + panic("string expected") + } + + value.SetString(html.UnescapeString(value.String())) + + return ResultValid +} diff --git a/html_unescape_test.go b/html_unescape_test.go new file mode 100644 index 0000000..a459736 --- /dev/null +++ b/html_unescape_test.go @@ -0,0 +1,38 @@ +package checker_test + +import ( + "testing" + + "github.com/cinar/checker" +) + +func TestNormalizeHTMLUnescapeNonString(t *testing.T) { + defer checker.FailIfNoPanic(t) + + type Comment struct { + Body int `checkers:"html-unescape"` + } + + comment := &Comment{} + + checker.Check(comment) +} + +func TestNormalizeHTMLUnescape(t *testing.T) { + type Comment struct { + Body string `checkers:"html-unescape"` + } + + comment := &Comment{ + Body: "<tag> "Checker" & 'Library' </tag>", + } + + _, valid := checker.Check(comment) + if !valid { + t.Fail() + } + + if comment.Body != " \"Checker\" & 'Library' " { + t.Fail() + } +}