Skip to content

Commit

Permalink
HTML escape and unescape normalizer. Fixes #104
Browse files Browse the repository at this point in the history
  • Loading branch information
cinar committed Jun 25, 2023
1 parent 4bf287b commit 88cbdf3
Show file tree
Hide file tree
Showing 8 changed files with 200 additions and 27 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,8 @@ This package currently provides the following checkers:

This package currently provides the following normalizers. They can be mixed with the checkers when defining the validation steps for user data.

- [html_escape](doc/normalizers/html_escape.md) applies HTML escaping to special characters.
- [html_unescape](doc//normalizers/html_unescape.md) applies HTML unescaping to special characters.
- [lower](doc/normalizers/lower.md) maps all Unicode letters in the given value to their lower case.
- [upper](doc/normalizers/upper.md) maps all Unicode letters in the given value to their upper case.
- [title](doc/normalizers/title.md) maps the first letter of each word to their upper case.
Expand Down
56 changes: 29 additions & 27 deletions checker.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,33 +37,35 @@ const ResultValid Result = "VALID"

// makers provides mapping to maker function for the checkers.
var makers = map[string]MakeFunc{
CheckerAlphanumeric: makeAlphanumeric,
CheckerASCII: makeASCII,
CheckerCreditCard: makeCreditCard,
CheckerCidr: makeCidr,
CheckerDigits: makeDigits,
CheckerEmail: makeEmail,
CheckerFqdn: makeFqdn,
CheckerIP: makeIP,
CheckerIPV4: makeIPV4,
CheckerIPV6: makeIPV6,
CheckerISBN: makeISBN,
CheckerLuhn: makeLuhn,
CheckerMac: makeMac,
CheckerMax: makeMax,
CheckerMaxLength: makeMaxLength,
CheckerMin: makeMin,
CheckerMinLength: makeMinLength,
CheckerRegexp: makeRegexp,
CheckerRequired: makeRequired,
CheckerSame: makeSame,
CheckerURL: makeURL,
NormalizerLower: makeLower,
NormalizerUpper: makeUpper,
NormalizerTitle: makeTitle,
NormalizerTrim: makeTrim,
NormalizerTrimLeft: makeTrimLeft,
NormalizerTrimRight: makeTrimRight,
CheckerAlphanumeric: makeAlphanumeric,
CheckerASCII: makeASCII,
CheckerCreditCard: makeCreditCard,
CheckerCidr: makeCidr,
CheckerDigits: makeDigits,
CheckerEmail: makeEmail,
CheckerFqdn: makeFqdn,
CheckerIP: makeIP,
CheckerIPV4: makeIPV4,
CheckerIPV6: makeIPV6,
CheckerISBN: makeISBN,
CheckerLuhn: makeLuhn,
CheckerMac: makeMac,
CheckerMax: makeMax,
CheckerMaxLength: makeMaxLength,
CheckerMin: makeMin,
CheckerMinLength: makeMinLength,
CheckerRegexp: makeRegexp,
CheckerRequired: makeRequired,
CheckerSame: makeSame,
CheckerURL: makeURL,
NormalizerHTMLEscape: makeHTMLEscape,
NormalizerHTMLUnescape: makeHTMLUnescape,
NormalizerLower: makeLower,
NormalizerUpper: makeUpper,
NormalizerTitle: makeTitle,
NormalizerTrim: makeTrim,
NormalizerTrimLeft: makeTrimLeft,
NormalizerTrimRight: makeTrimRight,
}

// Register registers the given checker name and the maker function.
Expand Down
19 changes: 19 additions & 0 deletions doc/normalizers/html_escape.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# HTML Escape Normalizer

The `html-escape` normalizer uses [html.EscapeString](https://pkg.go.dev/html#EscapeString) to escape special characters like "<" to become "&lt;". It escapes only five such characters: <, >, &, ' and ".

```golang
type Comment struct {
Body string `checkers:"html-escape"`
}

comment := &Comment{
Body: "<tag> \"Checker\" & 'Library' </tag>",
}

checker.Check(comment)

// Outputs:
// &lt;tag&gt; &#34;Checker&#34; &amp; &#39;Library&#39; &lt;/tag&gt;
fmt.Println(comment.Body)
```
22 changes: 22 additions & 0 deletions doc/normalizers/html_unescape.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# HTML Unescape Normalizer

The `html-unescape` normalizer uses [html.UnescapeString](https://pkg.go.dev/html#UnescapeString) to unescape entities like "&lt;" to become "<". It unescapes a larger range of entities than EscapeString escapes. For example, "&aacute;" unescapes to "á", as does "&#225;" and "&#xE1;".

```golang
type Comment struct {
Body string `checkers:"html-unescape"`
}

comment := &Comment{
Body: "&lt;tag&gt; &#34;Checker&#34; &amp; &#39;Library&#39; &lt;/tag&gt;",
}

_, valid := checker.Check(comment)
if !valid {
t.Fail()
}

// Outputs:
// <tag> \"Checker\" & 'Library' </tag>
fmt.Println(comment.Body)
```
26 changes: 26 additions & 0 deletions html_escape.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
package checker

import (
"html"
"reflect"
)

// NormalizerHTMLEscape is the name of the normalizer.
const NormalizerHTMLEscape = "html-escape"

// makeHTMLEscape makes a normalizer function for the HTML escape normalizer.
func makeHTMLEscape(_ string) CheckFunc {
return normalizeHTMLEscape
}

// normalizeHTMLEscape applies HTML escaping to special characters.
// Uses html.EscapeString for the actual escape operation.
func normalizeHTMLEscape(value, _ reflect.Value) Result {
if value.Kind() != reflect.String {
panic("string expected")
}

value.SetString(html.EscapeString(value.String()))

return ResultValid
}
38 changes: 38 additions & 0 deletions html_escape_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
package checker_test

import (
"testing"

"github.com/cinar/checker"
)

func TestNormalizeHTMLEscapeNonString(t *testing.T) {
defer checker.FailIfNoPanic(t)

type Comment struct {
Body int `checkers:"html-escape"`
}

comment := &Comment{}

checker.Check(comment)
}

func TestNormalizeHTMLEscape(t *testing.T) {
type Comment struct {
Body string `checkers:"html-escape"`
}

comment := &Comment{
Body: "<tag> \"Checker\" & 'Library' </tag>",
}

_, valid := checker.Check(comment)
if !valid {
t.Fail()
}

if comment.Body != "&lt;tag&gt; &#34;Checker&#34; &amp; &#39;Library&#39; &lt;/tag&gt;" {
t.Fail()
}
}
26 changes: 26 additions & 0 deletions html_unescape.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
package checker

import (
"html"
"reflect"
)

// NormalizerHTMLUnescape is the name of the normalizer.
const NormalizerHTMLUnescape = "html-unescape"

// makeHTMLUnescape makes a normalizer function for the HTML unscape normalizer.
func makeHTMLUnescape(_ string) CheckFunc {
return normalizeHTMLUnescape
}

// normalizeHTMLUnescape applies HTML unescaping to special characters.
// Uses html.UnescapeString for the actual unescape operation.
func normalizeHTMLUnescape(value, _ reflect.Value) Result {
if value.Kind() != reflect.String {
panic("string expected")
}

value.SetString(html.UnescapeString(value.String()))

return ResultValid
}
38 changes: 38 additions & 0 deletions html_unescape_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
package checker_test

import (
"testing"

"github.com/cinar/checker"
)

func TestNormalizeHTMLUnescapeNonString(t *testing.T) {
defer checker.FailIfNoPanic(t)

type Comment struct {
Body int `checkers:"html-unescape"`
}

comment := &Comment{}

checker.Check(comment)
}

func TestNormalizeHTMLUnescape(t *testing.T) {
type Comment struct {
Body string `checkers:"html-unescape"`
}

comment := &Comment{
Body: "&lt;tag&gt; &#34;Checker&#34; &amp; &#39;Library&#39; &lt;/tag&gt;",
}

_, valid := checker.Check(comment)
if !valid {
t.Fail()
}

if comment.Body != "<tag> \"Checker\" & 'Library' </tag>" {
t.Fail()
}
}

0 comments on commit 88cbdf3

Please sign in to comment.