ref: a0b30cd29a72e5544f92e0f8dd1fe0fbc55b941d
parent: aa74144236a4014954fd58c94758a4cec6fe3094
author: Philip Silva <philip.silva@protonmail.com>
date: Fri May 7 13:11:02 EDT 2021
Encode non-utf8 form input - encode into current Content Type charset - html-escape runes outside of that charset
--- a/browser/browser.go
+++ b/browser/browser.go
@@ -174,7 +174,7 @@
var cached bool
src := attr(*n.DomSubtree, "src")
log.Printf("newImage: src: %v", src)
-
+
if src == img.SrcZero {
return
}
@@ -646,8 +646,7 @@
log.Infof("click processed")
offset := scroller.Offset
- browser.Website.html = res
- browser.Website.layout(browser, ClickRelayout)
+ browser.Website.layout(browser, res, ClickRelayout)
scroller.Offset = offset
dui.MarkLayout(dui.Top.UI)
dui.MarkDraw(dui.Top.UI)
@@ -1317,11 +1316,10 @@
}
b.History.Push(u, 0)
- buf, _, err := b.get(u, true)
+ buf, ct, err := b.get(u, true)
if err != nil {
log.Fatalf("get: %v", err)
}
- b.Website.html = string(buf)
browser = b
style.SetFetcher(b)
@@ -1332,7 +1330,7 @@
}
display = dui.Display
- b.Website.layout(b, InitialLayout)
+ b.render(ct, buf)
return
}
@@ -1420,7 +1418,7 @@
return
}
if contentType.IsHTML() || contentType.IsPlain() || contentType.IsEmpty() {
- b.render(buf)
+ b.render(contentType, buf)
} else {
done := make(chan int)
res := b.Download(done)
@@ -1442,13 +1440,14 @@
}
}
-func (b *Browser) render(buf []byte) {
+func (b *Browser) render(ct opossum.ContentType, buf []byte) {
log.Printf("Empty some cache...")
cache.Tidy()
imageCache = make(map[string]*draw.Image)
- b.Website.html = string(buf) // TODO: correctly interpret UTF8
- b.Website.layout(b, InitialLayout)
+ b.Website.ContentType = ct
+ htm := ct.Utf8(buf)
+ b.Website.layout(b, htm, InitialLayout)
log.Printf("Render...")
dui.Call <- func() {
@@ -1525,10 +1524,6 @@
return nil, opossum.ContentType{}, fmt.Errorf("error reading")
}
contentType, err = opossum.NewContentType(resp.Header.Get("Content-Type"), resp.Request.URL)
- log.Printf("%v\n", resp.Header)
- if err == nil && (contentType.IsHTML() || contentType.IsCSS() || contentType.IsPlain()) {
- buf = contentType.Utf8(buf)
- }
if isNewOrigin {
of := 0
if scroller != nil {
@@ -1546,12 +1541,13 @@
dui.MarkLayout(dui.Top.UI)
dui.MarkDraw(dui.Top.UI)
dui.Render()
- req, err := http.NewRequest("POST", uri.String(), strings.NewReader(data.Encode()))
+ fb := strings.NewReader(escapeValues(b.Website.ContentType, data).Encode())
+ req, err := http.NewRequest("POST", uri.String(), fb)
if err != nil {
return
}
req.Header.Add("User-Agent", "opossum")
- req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
+ req.Header.Set("Content-Type", fmt.Sprintf("application/x-www-form-urlencoded; charset=%v", b.Website.Charset()))
resp, err := b.client.Do(req)
if err != nil {
return nil, opossum.ContentType{}, fmt.Errorf("error loading %v: %w", uri, err)
--- a/browser/website.go
+++ b/browser/website.go
@@ -3,6 +3,7 @@
import (
"github.com/mjl-/duit"
"golang.org/x/net/html"
+ "golang.org/x/text/encoding"
"io/ioutil"
"net/url"
"github.com/psilva261/opossum"
@@ -19,11 +20,11 @@
type Website struct {
duit.UI
- html string
+ opossum.ContentType
d *domino.Domino
}
-func (w *Website) layout(f opossum.Fetcher, layouting int) {
+func (w *Website) layout(f opossum.Fetcher, htm string, layouting int) {
defer func() {
browser.statusBarMsg("", false)
}()
@@ -69,12 +70,12 @@
}
log.Printf("1st pass")
- doc, _ := pass(w.html)
+ doc, _ := pass(htm)
log.Printf("2nd pass")
log.Printf("Download style...")
csss := cssSrcs(f, doc)
- doc, nodeMap := pass(w.html, csss...)
+ doc, nodeMap := pass(htm, csss...)
// 3rd pass is only needed initially to load the scripts and set the goja VM
// state. During subsequent calls from click handlers that state is kept.
@@ -103,15 +104,15 @@
log.Infof("Stop existing JS instance")
w.d.Stop()
}
- w.d = domino.NewDomino(w.html, browser, nt)
+ w.d = domino.NewDomino(htm, browser, nt)
w.d.Start()
jsProcessed, changed, err := processJS2(w.d, codes)
if changed && err == nil {
- w.html = jsProcessed
+ htm = jsProcessed
if debugPrintHtml {
log.Printf("%v\n", jsProcessed)
}
- doc, nodeMap = pass(w.html, csss...)
+ doc, nodeMap = pass(htm, csss...)
} else if err != nil {
log.Errorf("JS error: %v", err)
}
@@ -205,7 +206,9 @@
for c := n.FirstChild; c != nil; c = c.NextSibling {
for k, vs := range formData(c, submitBtn) {
- data.Set(k, vs[0]) // TODO: what aboot the rest?
+ for _, v := range vs {
+ data.Add(k, v)
+ }
}
}
@@ -212,6 +215,29 @@
return
}
+func escapeValues(ct opossum.ContentType, q url.Values) (qe url.Values) {
+ qe = make(url.Values)
+ enc := encoding.HTMLEscapeUnsupported(ct.Encoding().NewEncoder())
+
+ for k, vs := range q {
+ ke, err := enc.String(k)
+ if err != nil {
+ log.Errorf("string: %v", err)
+ ke = k
+ }
+ for _, v := range vs {
+ ve, err := enc.String(v)
+ if err != nil {
+ log.Errorf("string: %v", err)
+ ve = v
+ }
+ qe.Add(ke, ve)
+ }
+ }
+
+ return
+}
+
func (b *Browser) submit(form *html.Node, submitBtn *html.Node) {
var err error
var buf []byte
@@ -235,7 +261,7 @@
for k, vs := range formData(form, submitBtn) {
q.Set(k, vs[0]) // TODO: what is with the rest?
}
- uri.RawQuery = q.Encode()
+ uri.RawQuery = escapeValues(b.Website.ContentType, q).Encode()
buf, contentType, err = b.get(uri, true)
} else {
buf, contentType, err = b.PostForm(uri, formData(form, submitBtn))
@@ -251,5 +277,5 @@
return
}
- b.render(buf)
+ b.render(contentType, buf)
}
--- a/browser/website_test.go
+++ b/browser/website_test.go
@@ -1,7 +1,9 @@
package browser
import (
+ "github.com/psilva261/opossum"
"golang.org/x/net/html"
+ "net/url"
"strings"
"testing"
)
@@ -21,5 +23,49 @@
data := formData(f, nil)
if len(data) != 2 {
t.Fatalf("%+v", f)
+ }
+}
+
+func TestPercentEncoding(t *testing.T) {
+ htm := `<form>
+ <input name=a value=ツ>
+ </form>`
+ doc, err := html.Parse(
+ strings.NewReader(string(htm)),
+ )
+ if err != nil {
+ t.Fatalf(err.Error())
+ }
+ f := grep(doc, "form")
+ data := formData(f, nil)
+ if len(data) != 1 {
+ t.Fatalf("%+v", f)
+ }
+
+ uri, err := url.Parse("http://example.com")
+ if err != nil {
+ t.Fatalf(err.Error())
+ }
+
+ q := uri.Query()
+ for k, vs := range data {
+ q.Set(k, vs[0])
+ }
+
+ ct := opossum.ContentType{
+ MediaType: "text/html",
+ Params: map[string]string{
+ "charset": "UTF-8",
+ },
+ }
+ res := escapeValues(ct, q).Encode()
+ if res != "a=%E3%83%84" {
+ t.Errorf("%v", res)
+ }
+
+ ct.Params["charset"] = "ISO-8859-1"
+ res = escapeValues(ct, q).Encode()
+ if res != "a=%26%2312484%3B" {
+ t.Errorf("%v", res)
}
}
--- a/opossum.go
+++ b/opossum.go
@@ -2,7 +2,9 @@
import (
"bytes"
- "golang.org/x/text/encoding/ianaindex"
+ "golang.org/x/text/encoding"
+ "golang.org/x/text/encoding/htmlindex"
+ "golang.org/x/text/encoding/unicode"
"io/ioutil"
"mime"
"github.com/psilva261/opossum/logger"
@@ -84,16 +86,34 @@
return c.MediaType == "image/svg+xml"
}
-func (c ContentType) Utf8(buf []byte) []byte {
+func (c ContentType) Charset() (cs string) {
+ cs, ok := c.Params["charset"]
+ if !ok {
+ return "UTF-8"
+ }
+ return
+}
+
+func (c ContentType) Encoding() (e encoding.Encoding) {
charset, ok := c.Params["charset"]
if !ok || charset == "utf8" || charset == "utf-8" {
- return buf
+ return unicode.UTF8
}
- e, err := ianaindex.IANA.Encoding(charset)
- if err != nil {
- log.Errorf("get encoding %v: %v", charset, err)
- return buf
+ e, err := htmlindex.Get(charset)
+ if err != nil || e == nil {
+ log.Errorf("encoding %v: %v", charset, err)
+ return unicode.UTF8
}
+ return
+}
+
+func (c ContentType) Utf8(buf []byte) string {
+ e := c.Encoding()
+
+ if e == unicode.UTF8 {
+ return string(buf)
+ }
+
r := bytes.NewReader(buf)
cr := e.NewDecoder().Reader(r)
@@ -101,8 +121,8 @@
if err == nil {
buf = updated
} else {
- log.Errorf("utf8: unable to decode to %v: %v", charset, err)
+ log.Errorf("utf8: unable to decode to %v: %v", e, err)
}
- return buf
+ return string(buf)
}
\ No newline at end of file
--- a/style/stylesheets.go
+++ b/style/stylesheets.go
@@ -281,6 +281,7 @@
if ok {
return font
}
+ log.Infof("call dui.Display.OpenFont(%v)", fn)
font, err := dui.Display.OpenFont(fn)
if err != nil {
log.Printf("%v is not avail", fn)