diff --git a/utils/string.go b/utils/string.go index eb702fe4..aa8b236d 100644 --- a/utils/string.go +++ b/utils/string.go @@ -1,11 +1,11 @@ package utils import ( - "encoding/xml" "math/rand" "reflect" "strconv" "strings" + "unicode/utf8" "unsafe" ) @@ -69,9 +69,63 @@ func S2B(s string) (b []byte) { return } -// XmlEscape xml escape string -func XmlEscape(c string) string { - buf := new(strings.Builder) - _ = xml.EscapeText(buf, []byte(c)) - return buf.String() +const ( + escQuot = """ // shorter than """ + escApos = "'" // shorter than "'" + escAmp = "&" + escLT = "<" + escGT = ">" + escTab = " " + escNL = " " + escCR = " " + escFFFD = "\uFFFD" // Unicode replacement character +) + +func isInCharacterRange(r rune) (inrange bool) { + return r == 0x09 || + r == 0x0A || + r == 0x0D || + r >= 0x20 && r <= 0xD7FF || + r >= 0xE000 && r <= 0xFFFD || + r >= 0x10000 && r <= 0x10FFFF +} + +// XmlEscape xml escape string +func XmlEscape(s string) string { + var esc string + var sb strings.Builder + sb.Grow(len(s)) + last := 0 + for i, r := range s { + width := utf8.RuneLen(r) + switch r { + case '"': + esc = escQuot + case '\'': + esc = escApos + case '&': + esc = escAmp + case '<': + esc = escLT + case '>': + esc = escGT + case '\t': + esc = escTab + case '\n': + esc = escNL + case '\r': + esc = escCR + default: + if !isInCharacterRange(r) || (r == 0xFFFD && width == 1) { + esc = escFFFD + break + } + continue + } + sb.WriteString(s[last:i]) + sb.WriteString(esc) + last = i + width + } + sb.WriteString(s[last:]) + return sb.String() } diff --git a/utils/string_test.go b/utils/string_test.go new file mode 100644 index 00000000..23310da9 --- /dev/null +++ b/utils/string_test.go @@ -0,0 +1,14 @@ +package utils + +import ( + "testing" +) + +func TestXmlEscape(t *testing.T) { + input := "A \x00 terminated string." + expected := "A \uFFFD terminated string." + text := XmlEscape(input) + if text != expected { + t.Errorf("have %v, want %v", text, expected) + } +}