ref: 96375d3675de6ca7e7cc4d3a841d0c6a61cd7d73
parent: 910bfe3e9d2177b8efde268281233d5d1ed333cd
author: Philip Silva <philip.silva@protonmail.com>
date: Mon Apr 26 13:55:12 EDT 2021
Map full width runes to canonical widths
--- a/nodes/nodes.go
+++ b/nodes/nodes.go
@@ -4,6 +4,7 @@
"bytes"
"fmt"
"golang.org/x/net/html"
+ "golang.org/x/text/width"
"github.com/chris-ramon/douceur/css"
"github.com/psilva261/opossum/logger"
"github.com/psilva261/opossum/style"
@@ -77,9 +78,10 @@
return
}
-// filterText removes line break runes (TODO: add this later but handle properly)
-func filterText(t string) (text string) {
- return strings.ReplaceAll(t, "", "")
+// filterText removes line break runes (TODO: add this later but handle properly) and maps runes to canonical widths
+func filterText(t string) string {
+ t = strings.ReplaceAll(t, "", "")
+ return width.Fold.String(t)
}
func (n Node) Type() html.NodeType {
--- a/nodes/nodes_test.go
+++ b/nodes/nodes_test.go
@@ -17,6 +17,14 @@
}
}
+func TestFilterTextFw(t *testing.T) {
+ const s = "(1999)"
+ f := filterText(s)
+ if f != "(1999)" {
+ t.Errorf("%v", f)
+ }
+}
+
func TestQueryRef(t *testing.T) {
buf := strings.NewReader(`
<html>