shithub: opossum

Download patch

ref: a0b30cd29a72e5544f92e0f8dd1fe0fbc55b941d
parent: aa74144236a4014954fd58c94758a4cec6fe3094
author: Philip Silva <philip.silva@protonmail.com>
date: Fri May 7 13:11:02 EDT 2021

Encode non-utf8 form input

- encode into current Content Type charset
- html-escape runes outside of that charset

--- a/browser/browser.go
+++ b/browser/browser.go
@@ -174,7 +174,7 @@
 	var cached bool
 	src := attr(*n.DomSubtree, "src")
 	log.Printf("newImage: src: %v", src)
- 
+
 	if src == img.SrcZero {
 		return
 	}
@@ -646,8 +646,7 @@
 	log.Infof("click processed")
 
 	offset := scroller.Offset
-	browser.Website.html = res
-	browser.Website.layout(browser, ClickRelayout)
+	browser.Website.layout(browser, res, ClickRelayout)
 	scroller.Offset = offset
 	dui.MarkLayout(dui.Top.UI)
 	dui.MarkDraw(dui.Top.UI)
@@ -1317,11 +1316,10 @@
 	}
 	b.History.Push(u, 0)
 
-	buf, _, err := b.get(u, true)
+	buf, ct, err := b.get(u, true)
 	if err != nil {
 		log.Fatalf("get: %v", err)
 	}
-	b.Website.html = string(buf)
 
 	browser = b
 	style.SetFetcher(b)
@@ -1332,7 +1330,7 @@
 	}
 	display = dui.Display
 
-	b.Website.layout(b, InitialLayout)
+	b.render(ct, buf)
 
 	return
 }
@@ -1420,7 +1418,7 @@
 		return
 	}
 	if contentType.IsHTML() || contentType.IsPlain() || contentType.IsEmpty() {
-		b.render(buf)
+		b.render(contentType, buf)
 	} else {
 		done := make(chan int)
 		res := b.Download(done)
@@ -1442,13 +1440,14 @@
 	}
 }
 
-func (b *Browser) render(buf []byte) {
+func (b *Browser) render(ct opossum.ContentType, buf []byte) {
 	log.Printf("Empty some cache...")
 	cache.Tidy()
 	imageCache = make(map[string]*draw.Image)
 
-	b.Website.html = string(buf) // TODO: correctly interpret UTF8
-	b.Website.layout(b, InitialLayout)
+	b.Website.ContentType = ct
+	htm := ct.Utf8(buf)
+	b.Website.layout(b, htm, InitialLayout)
 
 	log.Printf("Render...")
 	dui.Call <- func() {
@@ -1525,10 +1524,6 @@
 		return nil, opossum.ContentType{}, fmt.Errorf("error reading")
 	}
 	contentType, err = opossum.NewContentType(resp.Header.Get("Content-Type"), resp.Request.URL)
-	log.Printf("%v\n", resp.Header)
-	if err == nil && (contentType.IsHTML() || contentType.IsCSS() || contentType.IsPlain()) {
-		buf = contentType.Utf8(buf)
-	}
 	if isNewOrigin {
 		of := 0
 		if scroller != nil {
@@ -1546,12 +1541,13 @@
 	dui.MarkLayout(dui.Top.UI)
 	dui.MarkDraw(dui.Top.UI)
 	dui.Render()
-	req, err := http.NewRequest("POST", uri.String(), strings.NewReader(data.Encode()))
+	fb := strings.NewReader(escapeValues(b.Website.ContentType, data).Encode())
+	req, err := http.NewRequest("POST", uri.String(), fb)
 	if err != nil {
 		return
 	}
 	req.Header.Add("User-Agent", "opossum")
-	req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
+	req.Header.Set("Content-Type", fmt.Sprintf("application/x-www-form-urlencoded; charset=%v", b.Website.Charset()))
 	resp, err := b.client.Do(req)
 	if err != nil {
 		return nil, opossum.ContentType{}, fmt.Errorf("error loading %v: %w", uri, err)
--- a/browser/website.go
+++ b/browser/website.go
@@ -3,6 +3,7 @@
 import (
 	"github.com/mjl-/duit"
 	"golang.org/x/net/html"
+	"golang.org/x/text/encoding"
 	"io/ioutil"
 	"net/url"
 	"github.com/psilva261/opossum"
@@ -19,11 +20,11 @@
 
 type Website struct {
 	duit.UI
-	html      string
+	opossum.ContentType
 	d *domino.Domino
 }
 
-func (w *Website) layout(f opossum.Fetcher, layouting int) {
+func (w *Website) layout(f opossum.Fetcher, htm string, layouting int) {
 	defer func() {
 		browser.statusBarMsg("", false)
 	}()
@@ -69,12 +70,12 @@
 	}
 
 	log.Printf("1st pass")
-	doc, _ := pass(w.html)
+	doc, _ := pass(htm)
 
 	log.Printf("2nd pass")
 	log.Printf("Download style...")
 	csss := cssSrcs(f, doc)
-	doc, nodeMap := pass(w.html, csss...)
+	doc, nodeMap := pass(htm, csss...)
 
 	// 3rd pass is only needed initially to load the scripts and set the goja VM
 	// state. During subsequent calls from click handlers that state is kept.
@@ -103,15 +104,15 @@
 			log.Infof("Stop existing JS instance")
 			w.d.Stop()
 		}
-		w.d = domino.NewDomino(w.html, browser, nt)
+		w.d = domino.NewDomino(htm, browser, nt)
 		w.d.Start()
 		jsProcessed, changed, err := processJS2(w.d, codes)
 		if changed && err == nil {
-			w.html = jsProcessed
+			htm = jsProcessed
 			if debugPrintHtml {
 				log.Printf("%v\n", jsProcessed)
 			}
-			doc, nodeMap = pass(w.html, csss...)
+			doc, nodeMap = pass(htm, csss...)
 		} else if err != nil {
 			log.Errorf("JS error: %v", err)
 		}
@@ -205,7 +206,9 @@
 
 	for c := n.FirstChild; c != nil; c = c.NextSibling {
 		for k, vs := range formData(c, submitBtn) {
-			data.Set(k, vs[0]) // TODO: what aboot the rest?
+			for _, v := range vs {
+				data.Add(k, v)
+			}
 		}
 	}
 
@@ -212,6 +215,29 @@
 	return
 }
 
+func escapeValues(ct opossum.ContentType, q url.Values) (qe url.Values) {
+	qe = make(url.Values)
+	enc := encoding.HTMLEscapeUnsupported(ct.Encoding().NewEncoder())
+
+	for k, vs := range q {
+		ke, err := enc.String(k)
+		if err != nil {
+			log.Errorf("string: %v", err)
+			ke = k
+		}
+		for _, v := range vs {
+			ve, err := enc.String(v)
+			if err != nil {
+				log.Errorf("string: %v", err)
+				ve = v
+			}
+			qe.Add(ke, ve)
+		}
+	}
+
+	return
+}
+
 func (b *Browser) submit(form *html.Node, submitBtn *html.Node) {
 	var err error
 	var buf []byte
@@ -235,7 +261,7 @@
 		for k, vs := range formData(form, submitBtn) {
 			q.Set(k, vs[0]) // TODO: what is with the rest?
 		}
-		uri.RawQuery = q.Encode()
+		uri.RawQuery = escapeValues(b.Website.ContentType, q).Encode()
 		buf, contentType, err = b.get(uri, true)
 	} else {
 		buf, contentType, err = b.PostForm(uri, formData(form, submitBtn))
@@ -251,5 +277,5 @@
 		return
 	}
 
-	b.render(buf)
+	b.render(contentType, buf)
 }
--- a/browser/website_test.go
+++ b/browser/website_test.go
@@ -1,7 +1,9 @@
 package browser
 
 import (
+	"github.com/psilva261/opossum"
 	"golang.org/x/net/html"
+	"net/url"
 	"strings"
 	"testing"
 )
@@ -21,5 +23,49 @@
 	data := formData(f, nil)
 	if len(data) != 2 {
 		t.Fatalf("%+v", f)
+	}
+}
+
+func TestPercentEncoding(t *testing.T) {
+	htm := `<form>
+		<input name=a value=ツ>
+	</form>`
+	doc, err := html.Parse(
+		strings.NewReader(string(htm)),
+	)
+	if err != nil {
+		t.Fatalf(err.Error())
+	}
+	f := grep(doc, "form")
+	data := formData(f, nil)
+	if len(data) != 1 {
+		t.Fatalf("%+v", f)
+	}
+
+	uri, err := url.Parse("http://example.com")
+	if err != nil {
+		t.Fatalf(err.Error())
+	}
+
+	q := uri.Query()
+	for k, vs := range data {
+		q.Set(k, vs[0])
+	}
+
+	ct := opossum.ContentType{
+		MediaType: "text/html",
+		Params: map[string]string{
+			"charset": "UTF-8",
+		},
+	}
+	res := escapeValues(ct, q).Encode()
+	if res != "a=%E3%83%84"  {
+		t.Errorf("%v", res)
+	}
+
+	ct.Params["charset"] = "ISO-8859-1"
+	res = escapeValues(ct, q).Encode()
+	if res != "a=%26%2312484%3B"  {
+		t.Errorf("%v", res)
 	}
 }
--- a/opossum.go
+++ b/opossum.go
@@ -2,7 +2,9 @@
 
 import (
 	"bytes"
-	"golang.org/x/text/encoding/ianaindex"
+	"golang.org/x/text/encoding"
+	"golang.org/x/text/encoding/htmlindex"
+	"golang.org/x/text/encoding/unicode"
 	"io/ioutil"
 	"mime"
 	"github.com/psilva261/opossum/logger"
@@ -84,16 +86,34 @@
 	return c.MediaType == "image/svg+xml"
 }
 
-func (c ContentType) Utf8(buf []byte) []byte {
+func (c ContentType) Charset() (cs string) {
+	cs, ok := c.Params["charset"]
+	if !ok {
+		return "UTF-8"
+	}
+	return
+}
+
+func (c ContentType) Encoding() (e encoding.Encoding) {
 	charset, ok := c.Params["charset"]
 	if !ok || charset == "utf8" || charset == "utf-8" {
-		return buf
+		return unicode.UTF8
 	}
-	e, err := ianaindex.IANA.Encoding(charset)
-	if err != nil {
-		log.Errorf("get encoding %v: %v", charset, err)
-		return buf
+	e, err := htmlindex.Get(charset)
+	if err != nil || e == nil {
+		log.Errorf("encoding %v: %v", charset, err)
+		return unicode.UTF8
 	}
+	return
+}
+
+func (c ContentType) Utf8(buf []byte) string {
+	e := c.Encoding()
+
+	if e == unicode.UTF8 {
+		return string(buf)
+	}
+
 	r := bytes.NewReader(buf)
 	cr := e.NewDecoder().Reader(r)
 
@@ -101,8 +121,8 @@
 	if err == nil {
 		buf = updated
 	} else {
-		log.Errorf("utf8: unable to decode to %v: %v", charset, err)
+		log.Errorf("utf8: unable to decode to %v: %v", e, err)
 	}
 
-	return buf
+	return string(buf)
 }
\ No newline at end of file
--- a/style/stylesheets.go
+++ b/style/stylesheets.go
@@ -281,6 +281,7 @@
 	if ok {
 		return font
 	}
+	log.Infof("call dui.Display.OpenFont(%v)", fn)
 	font, err := dui.Display.OpenFont(fn)
 	if err != nil {
 		log.Printf("%v is not avail", fn)