shithub: rrss

Download patch

ref: 37acbad25a1283e2e4cc63ec8ba36482f0c4a45b
author: telephil9 <telephil9@gmail.com>
date: Thu Feb 6 15:14:54 EST 2020

Initial import

--- /dev/null	Thu Dec 31 12:39:09 2020
+++ b/README	Thu Feb  6 15:14:54 2020
@@ -0,0 +1,6 @@
+This is a fork of rrss (http://code.9front.org/hg/rrss) by Stanley Lieber.
+This version is modified to handle multiple feeds at once.
+When run the program expect a text files with the following format:
+<feed url> [tag1 [tag2 ... [tagN]]]
+
+For documentation refer to rrss.txt which is the original README of the program.
--- /dev/null	Thu Dec 31 12:39:09 2020
+++ b/main.go	Thu Feb  6 15:14:54 2020
@@ -0,0 +1,223 @@
+// RSS feed reader that outputs plain text, werc/apps/barf
+package main
+
+import (
+	"bufio"
+	"flag"
+	"fmt"
+	"io/ioutil"
+	"log"
+	"os"
+	"path"
+	"sort"
+	"strconv"
+	"strings"
+	"time"
+
+	"github.com/SlyMarbo/rss"
+)
+
+var (
+	debug	= flag.Bool("d", false, "print debug msgs to stderr")
+	format	= flag.String("f", "", "output format")
+	root    = flag.String("r", "", "output root")
+	dest	string
+	links 	string
+)
+
+type Article struct {
+	Title	string
+	Link	string
+	Date	time.Time
+	Content	string
+	Tags	[]string
+}
+
+type renderfn func(articles []Article)
+
+func usage() {
+	os.Stderr.WriteString("usage: rrss [-d] [-f barf|blagh] [-r root] <feed file>\n")
+	flag.PrintDefaults()
+	os.Exit(2)
+}
+
+func check(err error) {
+	if err != nil {
+		log.Fatal(err)
+	}
+}
+
+func isold(link string, path string) bool {
+	file, err := os.OpenFile(path, os.O_CREATE|os.O_RDONLY, 0775)
+	if err != nil {
+		return true
+	}
+	defer file.Close()
+	scanner := bufio.NewScanner(file)
+	for scanner.Scan() {
+		if strings.Contains(link, scanner.Text()) {
+			return true
+		}
+	}
+	return false
+}
+
+func makeold(link string, path string) (int, error) {
+	f, err := os.OpenFile(path, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0775)
+	defer f.Close()
+	check(err)
+	if link == "" {
+		link = "empty"
+	}
+	return f.WriteString(link + "\n")
+}
+
+func writef(dir, filename, content string) {
+	err := ioutil.WriteFile(path.Join(dir, filename), []byte(content+"\n"), 0775)
+	check(err)
+}
+
+func ensuredir(dir string) {
+	err := os.MkdirAll(dir, 0775)
+	check(err)
+}
+
+// http://code.9front.org/hg/barf
+func barf(articles []Article) {
+	dest = path.Join(*root, "src")
+	ensuredir(dest)
+	n := lastarticle(dest)
+	for _, a := range articles {
+		n = n + 1
+		d := fmt.Sprintf("%s/%d", dest, n)
+		ensuredir(d)
+		writef(d, "title", a.Title)
+		writef(d, "link", a.Link)
+		writef(d, "date", a.Date.String())
+		writef(d, "body", a.Content)
+		if a.Tags != nil {
+			ensuredir(path.Join(d, "tags"))
+			for _, j := range a.Tags {
+				f, err := os.Create(d + "/tags/" + j)
+				f.Close()
+				check(err)
+			}
+		}
+		_, err := makeold(a.Link, links)
+		check(err)
+	}
+}
+
+// http://werc.cat-v.org/apps/blagh
+func blagh(articles []Article) {
+	var err error
+	for _, a := range articles  {
+		dest = path.Join(*root, fmt.Sprintf("%d/%02d/%02d", a.Date.Year(), a.Date.Month(), a.Date.Day()));
+		ensuredir(dest)
+		f, _ := os.Open(dest) // directory will usually not exist yet
+		defer f.Close()
+		n, _ := f.Readdirnames(0)
+		d := fmt.Sprintf("%s/%d", dest, len(n))
+		ensuredir(d)
+		writef(d, "index.md", fmt.Sprintf("%s\n===\n\n%s\n", a.Title, a.Content))
+		_, err = makeold(a.Link, links)
+		check(err)
+	}
+}
+
+func stdout(articles []Article) {
+	for _, a := range articles {
+		fmt.Printf("title: %s\nlink: %s\ndate: %s\n%s\n\n",
+			a.Title, a.Link, a.Date, a.Content)
+	}
+}
+
+func lastarticle(dir string) int {
+	f, err := os.Open(dir)
+	defer f.Close()
+	check(err)
+	dn, err := f.Readdirnames(0)
+	check(err)
+	var di []int
+	for _, j := range dn {
+		k, _ := strconv.Atoi(j)
+		di = append(di, k)
+	}
+	sort.Ints(di)
+	n := 0
+	if di != nil {
+		n = di[len(di)-1]
+	}
+	return n
+}
+
+func loadfeed(url string, tags []string) []Article {
+	var articles []Article
+	if *debug {
+		log.Printf("Fetching feed '%s'", url)
+	}
+	feed, err := rss.Fetch(url)
+	if err != nil {
+		log.Printf("Cannot load feed '%s': %v", url, err)
+		return nil
+	}
+	for _, i := range feed.Items {
+		if isold(i.Link, links) {
+			continue
+		}
+		a := Article{i.Title, i.Link, i.Date, conorsum(i), tags}
+		articles = append(articles, a)
+	}
+	if *debug {
+		log.Printf("Loaded %d items", len(articles))
+	}
+	return articles
+}
+
+func conorsum(i *rss.Item) string {
+	if len(i.Content) > 0 {
+		return i.Content
+	}
+	if len(i.Summary) > 0 {
+		return i.Summary
+	}
+	return ""
+}
+
+func main() {
+	flag.Usage = usage
+	flag.Parse()
+	if flag.Arg(0) == "" {
+		usage()
+	}
+	var render renderfn
+	switch *format {
+	case "barf":
+		render = barf
+	case "blagh":
+		render = blagh
+	case "":
+		render = stdout
+	default:
+		usage()
+	}		
+	links = path.Join(*root, "links")
+	file, err := os.Open(flag.Arg(0))
+	check(err)
+	defer file.Close()
+	scanner := bufio.NewScanner(file)
+	var articles []Article
+	var tags []string
+	for scanner.Scan() {
+		l := strings.Split(scanner.Text(), " ")
+		if len(l) > 1 {
+			tags = l[1:]
+		}
+		a := loadfeed(l[0], tags)
+		if a != nil {
+			articles = append(articles, a...)
+		}
+	}
+	sort.Slice(articles, func(i, j int) bool { return articles[i].Date.Before(articles[j].Date) })
+	render(articles)
+}
--- /dev/null	Thu Dec 31 12:39:09 2020
+++ b/rrss.txt	Thu Feb  6 15:14:54 2020
@@ -0,0 +1,41 @@
+     RRSS(1)                                                       RRSS(1)
+
+     NAME
+          rrss, trrss - RSS feed readers
+
+     SYNOPSIS
+          rrss [-f barf|blagh] [-r root] [-t tag] [-u url]
+
+          trrss [-f barf|blagh] [-r root] [-t tag] [-u url]
+
+     DESCRIPTION
+          Rrss pulls and parses an RSS feed.
+
+          There are a number of options:
+
+          -f   Place output in formatted directories for one
+               of two werc apps: barf or blagh. In the absence
+               of the -f flag, formatted output is placed on
+               stdout.
+
+               A file, links, is created in the root and is populated
+               with the URL of each feed item acquired. On sub-
+               sequent runs, URLs that appear in the links file are
+               not duplicated as new directories.
+
+          -r   Optionally, create barf or blagh directories
+               under root. Default is the current directory.
+
+          -t   Create tag for each post (barf only).
+
+          -u   The feed URL.
+
+          Trrss is a shell script that wraps the rrss program,
+          outputting plain text but preserving link URLs.
+
+     SOURCE
+          http://plan9.stanleylieber.com/src/rrss.tgz
+     SEE ALSO
+          http://werc.cat-v.org
+          http://werc.cat-v.org/apps/blagh
+          https://code.9front.org/hg/barf
--- /dev/null	Thu Dec 31 12:39:09 2020
+++ b/trrss	Thu Feb  6 15:14:54 2020
@@ -0,0 +1,8 @@
+#!/bin/rc
+# Run rrss and convert HTML to plain text, retaining link URLs.
+# NOTE: Requires plan9port or 9base. Fix the shebang path to rc.
+rrss $* | sed '
+	s/^title:.*$/<p>&/g
+	s/^link:.*$/<br>&/g
+	s/^date:.*$/<br>&<br>/g
+	' | tcs -t html | htmlfmt -a -c utf-8 | uhtml