commit 00d849e516d237161c4962bacbd5b2aae109bec9 (patch)
parent 22b08b005b84af5244fd9eeca8ed6f656b901977
Author: alex <alex@garbash.com>
Date: Fri, 7 Oct 2022 21:14:54 -0400
Use ids instead of timestamp to decide what to notify
The timestamp approach was problematic because some feeds (read: mine)
don't accurately report time--in my case I truncate the time, which
totally breaks the "newer than now" approach.
Getting the "ids" and diffing them is a surefire way to make sure nothing
goes missing, even if it's old (or new).
Diffstat:
2 files changed, 18 insertions(+), 14 deletions(-)
diff --git a/README b/README
@@ -5,9 +5,10 @@ Ping IRC on updates to RSS feeds.
Meant to be run in cron, irl.sh will read all the URLs
in /etc/irl/feeds and use sfeed(1) to parse them and
-awk(1) to determine which ones are new. It then uses
-nc(1) to connect to the IRC server running on localhost:6667
-(which happens to be the case for our garbash tilde).
+diff(1) to determine which ones are new (since the last run).
+It then uses nc(1) to connect to the IRC server running on
+localhost:6667 (which happens to be the case for our garbash
+tilde).
TODO:
diff --git a/irl.sh b/irl.sh
@@ -17,24 +17,27 @@ EOM
}
feeds=/etc/irl/feeds
-ts=/tmp/irl.ts
+known_ids=/tmp/irl-known.ids
+all_ids=/tmp/irl-all.ids
+new_ids=/tmp/irl-new.ids
[ ! -e $feeds ] && die "Create $feeds to begin!"
command -v sfeed >/dev/null || die "sfeed not installed"
-if [ ! -e "$ts" ]; then
- date "+%s" >"$ts"
-fi
-
-last_run=$(cat $ts)
+# Truncate last pull
+>"$all_ids"
for feed in $(cat "$feeds"); do
curl -sS "$feed" | sfeed | \
- awk -F' ' "\$1 > $last_run { print \$3, \$2 }" |
- while read url subj; do
- notify "$subj" "$url"
- done
+ awk -F' ' "{ print \$3, \$2 }" >>"$all_ids"
done
+if [ -e "$known_ids" ]; then
+ diff "$known_ids" "$all_ids" | grep '^>' | sed 's/^..//' > "$new_ids"
+ while read url subj; do
+ notify "$subj" "$url"
+ done < "$new_ids"
+fi
+
# Update
-date "+%s" >"$ts"
+cp -f "$all_ids" "$known_ids"