ref: 03baa934bacdbf48e9e8e94711e6687e6bb1fb30
parent: 05158bda287766580ed5e8e3416d0046678c7d3b
author: qwx <qwx@sciops.net>
date: Fri Jan 5 03:50:11 EST 2024
add scrape(1): save web posts
--- /dev/null
+++ b/bin/scrape
@@ -1,0 +1,21 @@
+#!/bin/rc
+rfork e
+root=/sys/lib/extra/web
+if(~ $1 ''){+ echo usage: $0 URL '[DESC]' >[1=2]
+ exit usage
+}
+mkdir -p $root
+
+d=`{echo $1 | sed 's,^.*://,,;s,/+$,,'}+r=`{echo $d | sed 's,/.*,,'}+f=`{basename $d}+d=$root/`{basename -d $d}+if(test -f $root/$d){+ echo 'cannot mkdir over existing file '^$d >[2=1]
+ exit 'not a directory'
+}
+mkdir -p $d
+hget $1 | htmlfmt > $d/$f
+echo $r ' ' $d/$f ' ' $1 ' ' $*(2-) >> $root/index
+sort -du $root/index > /tmp/scrape.$pid && mv /tmp/scrape.$pid $root/index
--
⑨