-
Notifications
You must be signed in to change notification settings - Fork 55
/
Copy pathutf8_test.go
66 lines (58 loc) · 1.9 KB
/
utf8_test.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
package transformer
import (
"testing"
tt "github.com/ampproject/amppackager/transformer/internal/testing"
)
var minimumValidAMP = tt.Concat(
tt.Doctype, "<html ⚡><head>",
tt.MetaCharset, tt.MetaViewport, tt.ScriptAMPRuntime,
tt.LinkFavicon, tt.LinkCanonical, tt.StyleAMPBoilerplate,
tt.NoscriptAMPBoilerplate, "</head><body></body></html>",
)
// True if the code point is known to cause parse errors during HTML
// preprocessing, per
// https://html.spec.whatwg.org/multipage/parsing.html#preprocessing-the-input-stream,
// or if it is U+0000 NULL.
//
// This is easier to visually inspect and compare against the spec, so it's
// used as a slower implementation to test against.
func isHTMLInvalid(r rune) bool {
return (
// U+0000 NULL + https://infra.spec.whatwg.org/#control
(r <= 0x1F && r != 0x9 && r != 0xA && r != 0xC && r != 0xD) ||
(r >= 0x7F && r <= 0x9F) ||
// https://infra.spec.whatwg.org/#surrogate
(r >= 0xD800 && r <= 0xDFFF) ||
// https://infra.spec.whatwg.org/#noncharacter
(r >= 0xFDD0 && r <= 0xFDEF) ||
(r >= 0xFFFE && r <= 0x10FFFF && r & 0xFFFE == 0xFFFE) ||
// http://unicode.org/glossary/#codespace
(r >= 0x110000))
}
func TestIsHTMLValid(t *testing.T) {
for r := '\000'; r <= 0x110000; r++ {
want := !isHTMLInvalid(r)
got := isHTMLValid(r)
if got != want {
t.Errorf("IsHTMLValid(U+%06x) got=%t, want=%t", r, got, want)
}
}
}
func TestValidateUTF8ForHTMLAllowsReplacementCharacter(t *testing.T) {
html := "\uFFFD"
if err := validateUTF8ForHTML(html); err != nil {
t.Errorf("validateUTF8ForHTML(U+FFFD) error=%q", err)
}
}
func BenchmarkIsHTMLValid(b *testing.B) {
for i := 0; i < b.N; i++ {
validateUTF8ForHTML(minimumValidAMP)
}
}
func BenchmarkIsHTMLInvalid(b *testing.B) {
isHTMLValid = func(r rune) bool { return !isHTMLInvalid(r) }
defer func() { isHTMLValid = isHTMLValidInternal }()
for i := 0; i < b.N; i++ {
validateUTF8ForHTML(minimumValidAMP)
}
}