shithub: rrss

Download patch

ref: 9f1f55d40192f27dec4cd6dc2a24631c8921db48
parent: 37acbad25a1283e2e4cc63ec8ba36482f0c4a45b
author: telephil9 <telephil9@gmail.com>
date: Wed Mar 11 13:44:11 EDT 2020

Fix and enhancements

Fix feed fetching not working with certain sites considering requests as bot traffic (e.g. reddit)
Add ability to filter articles body by implementing custom functions (quick and dirty hack :))

--- a/main.go	Thu Feb  6 15:14:54 2020
+++ b/main.go	Wed Mar 11 13:44:11 2020
@@ -5,8 +5,10 @@
 	"bufio"
 	"flag"
 	"fmt"
+	"html"
 	"io/ioutil"
 	"log"
+	"net/http"
 	"os"
 	"path"
 	"sort"
@@ -17,6 +19,17 @@
 	"github.com/SlyMarbo/rss"
 )
 
+type Article struct {
+	Title	string
+	Link	string
+	Date	time.Time
+	Content	string
+	Tags	[]string
+}
+
+type renderer func(articles []Article)
+type filter func(article *Article)
+
 var (
 	debug	= flag.Bool("d", false, "print debug msgs to stderr")
 	format	= flag.String("f", "", "output format")
@@ -24,16 +37,10 @@
 	dest	string
 	links 	string
 )
-
-type Article struct {
-	Title	string
-	Link	string
-	Date	time.Time
-	Content	string
-	Tags	[]string
+var filters = map[string]filter {
+	"http://feeds.feedburner.com/Explosm": filterexplosm,
 }
 
-type renderfn func(articles []Article)
 
 func usage() {
 	os.Stderr.WriteString("usage: rrss [-d] [-f barf|blagh] [-r root] <feed file>\n")
@@ -47,6 +54,16 @@
 	}
 }
 
+func fetchfeed(url string) (resp *http.Response, err error) {
+	client := http.DefaultClient
+	req, err := http.NewRequest("GET", url, nil)
+	if err != nil {
+		return nil, err
+	}
+	req.Header.Add("User-Agent", "Mozilla/5.0 (compatible; hjdicks)")
+	return client.Do(req)
+}
+
 func isold(link string, path string) bool {
 	file, err := os.OpenFile(path, os.O_CREATE|os.O_RDONLY, 0775)
 	if err != nil {
@@ -151,12 +168,29 @@
 	return n
 }
 
+func filterexplosm(article *Article) {
+	r, err := http.Get(article.Link)
+	if err != nil {
+		fmt.Printf("Error: %v\n", err)
+		os.Exit(1)
+	}
+	defer r.Body.Close()
+	scanner := bufio.NewScanner(r.Body)
+	for scanner.Scan() {
+		if strings.Contains(scanner.Text(), "main-comic") {
+			s := strings.Replace(scanner.Text(), "src=\"", "src=\"http:", 1)
+			article.Content = s
+			break;
+		}
+	}
+}
+
 func loadfeed(url string, tags []string) []Article {
 	var articles []Article
 	if *debug {
 		log.Printf("Fetching feed '%s'", url)
 	}
-	feed, err := rss.Fetch(url)
+	feed, err := rss.FetchByFunc(fetchfeed, url)
 	if err != nil {
 		log.Printf("Cannot load feed '%s': %v", url, err)
 		return nil
@@ -166,6 +200,9 @@
 			continue
 		}
 		a := Article{i.Title, i.Link, i.Date, conorsum(i), tags}
+		if f, ok := filters[url]; ok {
+			f(&a)
+		}
 		articles = append(articles, a)
 	}
 	if *debug {
@@ -175,13 +212,14 @@
 }
 
 func conorsum(i *rss.Item) string {
-	if len(i.Content) > 0 {
-		return i.Content
+	s := ""
+	switch{
+	case len(i.Content) > 0:
+		s = i.Content
+	case len(i.Summary) > 0:
+		s = i.Summary
 	}
-	if len(i.Summary) > 0 {
-		return i.Summary
-	}
-	return ""
+	return html.UnescapeString(s)
 }
 
 func main() {
@@ -190,7 +228,7 @@
 	if flag.Arg(0) == "" {
 		usage()
 	}
-	var render renderfn
+	var render renderer
 	switch *format {
 	case "barf":
 		render = barf
@@ -209,7 +247,11 @@
 	var articles []Article
 	var tags []string
 	for scanner.Scan() {
-		l := strings.Split(scanner.Text(), " ")
+		t := scanner.Text()
+		if len(t) <= 0 {
+			continue
+		}
+		l := strings.Split(t, " ")
 		if len(l) > 1 {
 			tags = l[1:]
 		}