shithub: rc

Download patch

ref: 0dac38003861b6a485dab758a9437f2ea800990c
parent: 6b5f42d34e85c479e9c6dfeca31f78055de1727e
author: qwx <qwx@sciops.net>
date: Thu Jan 9 00:10:38 EST 2020

add some web crawlers

--- /dev/null
+++ b/dmlotr
@@ -1,0 +1,13 @@
+#!/bin/rc -xe
+t=/tmp/dmlotr.$pid
+f='http://www.shamusyoung.com/twentysidedtale/?p=612'
+while(){
+	hget $f | sed 's/[<>]/\n/g' >$t
+	for(i in `{sed -n 's/.*img src=''(http.*\/images\/[^'']+)''.*/\1/p' $t})
+		dw $i
+	f=`{sed -n 's/.*a href="(http[^\?]+\?p=[^"]+)".*/\1/p' $t | sed -n '5{p;q;}'}
+	if(~ $#f 0){
+		rm $t
+		exit
+	}
+}
--- /dev/null
+++ b/khinmidi
@@ -1,0 +1,12 @@
+#!/bin/rc
+flag e +
+
+for(i in $*){
+	x=`{echo $i | sed -n 's/.*\/([^\/]+)$/\1/p'}
+	mkdir -p $x && cd $x
+	hget $i |\
+		sed -n '/\.mid/s/^.*<a href="(.*)".*/\1/p' |\
+		uniq |\
+		xargs dw
+	cd ..
+}
--- /dev/null
+++ b/khinsider
@@ -1,0 +1,14 @@
+#!/bin/rc
+flag e +
+
+for(i in $*){
+	x=`{echo $i | sed -n 's/.*\/([^\/]+)$/\1/p'}
+	mkdir -p $x && cd $x
+	hget $i |\
+		sed -n '/\.mp3/s/^.*<a href="(.*)".*/\1/p' |\
+		uniq |\
+		xargs -n 1 hget |\
+		sed -n '/^<audio/s/<audio id=".*" src="(.*\.mp3)".*/\1/p' |\
+		xargs dw
+		cd ..
+}
--- /dev/null
+++ b/oots
@@ -1,0 +1,24 @@
+#!/bin/rc
+# finds out which is the last comic, then downloads missing ones from cwd, exiting
+# upon the first collision
+# it's filthy.
+
+flag e +
+
+#x=(/comics/oots0002.html)
+x=(`{hget http://www.giantitp.com \
+| sed -n 's/<B>Order of the Stick <\/B><A href="([^"]+)".*/\1/p'})
+n=`{echo $x | sed 's/.*(oots[0-9]+\.)html/\1/'}
+while(! ~ $n '#' && ! test -e $n^*){
+	x=(`{hget http://www.giantitp.com/$x(1) \
+	| sed -n '
+		/IMG src="\/comics/{
+			s/.*src="([^"]+)".*/\1/p
+			q
+		}
+		s/.*<A href="([^"]+)"><IMG src="\/Images\/redesign\/ComicNav_Back.gif".*/\1/p'
+	})
+	n=$n^`{echo $x(2) | sed 's/.*\.([^\/\.]+)$/\1/'}
+	hget http://www.giantitp.com/$x(2) >$n
+	n=`{echo $x(1) | sed 's/.*(oots[0-9]+\.)html/\1/'}
+}