ref: 0dac38003861b6a485dab758a9437f2ea800990c
parent: 6b5f42d34e85c479e9c6dfeca31f78055de1727e
author: qwx <qwx@sciops.net>
date: Thu Jan 9 00:10:38 EST 2020
add some web crawlers
--- /dev/null
+++ b/dmlotr
@@ -1,0 +1,13 @@
+#!/bin/rc -xe
+t=/tmp/dmlotr.$pid
+f='http://www.shamusyoung.com/twentysidedtale/?p=612'
+while(){
+ hget $f | sed 's/[<>]/\n/g' >$t
+ for(i in `{sed -n 's/.*img src=''(http.*\/images\/[^'']+)''.*/\1/p' $t})
+ dw $i
+ f=`{sed -n 's/.*a href="(http[^\?]+\?p=[^"]+)".*/\1/p' $t | sed -n '5{p;q;}'}
+ if(~ $#f 0){
+ rm $t
+ exit
+ }
+}
--- /dev/null
+++ b/khinmidi
@@ -1,0 +1,12 @@
+#!/bin/rc
+flag e +
+
+for(i in $*){
+ x=`{echo $i | sed -n 's/.*\/([^\/]+)$/\1/p'}
+ mkdir -p $x && cd $x
+ hget $i |\
+ sed -n '/\.mid/s/^.*<a href="(.*)".*/\1/p' |\
+ uniq |\
+ xargs dw
+ cd ..
+}
--- /dev/null
+++ b/khinsider
@@ -1,0 +1,14 @@
+#!/bin/rc
+flag e +
+
+for(i in $*){
+ x=`{echo $i | sed -n 's/.*\/([^\/]+)$/\1/p'}
+ mkdir -p $x && cd $x
+ hget $i |\
+ sed -n '/\.mp3/s/^.*<a href="(.*)".*/\1/p' |\
+ uniq |\
+ xargs -n 1 hget |\
+ sed -n '/^<audio/s/<audio id=".*" src="(.*\.mp3)".*/\1/p' |\
+ xargs dw
+ cd ..
+}
--- /dev/null
+++ b/oots
@@ -1,0 +1,24 @@
+#!/bin/rc
+# finds out which is the last comic, then downloads missing ones from cwd, exiting
+# upon the first collision
+# it's filthy.
+
+flag e +
+
+#x=(/comics/oots0002.html)
+x=(`{hget http://www.giantitp.com \
+| sed -n 's/<B>Order of the Stick <\/B><A href="([^"]+)".*/\1/p'})
+n=`{echo $x | sed 's/.*(oots[0-9]+\.)html/\1/'}
+while(! ~ $n '#' && ! test -e $n^*){
+ x=(`{hget http://www.giantitp.com/$x(1) \
+ | sed -n '
+ /IMG src="\/comics/{
+ s/.*src="([^"]+)".*/\1/p
+ q
+ }
+ s/.*<A href="([^"]+)"><IMG src="\/Images\/redesign\/ComicNav_Back.gif".*/\1/p'
+ })
+ n=$n^`{echo $x(2) | sed 's/.*\.([^\/\.]+)$/\1/'}
+ hget http://www.giantitp.com/$x(2) >$n
+ n=`{echo $x(1) | sed 's/.*(oots[0-9]+\.)html/\1/'}
+}