all repos — aaoth.xyz @ main

aaoth.xyz website

bin/rssg (view raw)

  1#!/bin/sh
  2#
  3# https://www.romanzolotarev.com/bin/rssg
  4# Copyright 2018 Roman Zolotarev <hi@romanzolotarev.com>
  5# Copyright 2022 la-ninpre <aaoth@aaoth.xyz>
  6#
  7# Permission to use, copy, modify, and/or distribute this software for any
  8# purpose with or without fee is hereby granted, provided that the above
  9# copyright notice and this permission notice appear in all copies.
 10#
 11# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 12# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 13# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 14# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 15# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 16# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 17# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 18#
 19set -e
 20# set -x
 21
 22[ -z "$RSSG_DATE_IMPL" ] && RSSG_DATE_IMPL="GNU"
 23
 24main () {
 25	test -n "$1" || usage
 26	test -n "$2" || usage
 27	test -f "$1" || no_file "$1"
 28
 29
 30	index_file=$(readlink -f "$1")
 31	test -z "${index_file##*html}" && html=$(cat "$index_file")
 32	test -z "${index_file##*md}" && html=$(md_to_html "$index_file")
 33	test -n "$html" || usage
 34
 35	base="${index_file%/*}"
 36	base_url="$(echo "$html" | get_url | sed 's#\(.*\)/[^/]*#\1#')"
 37
 38	url=$(		echo "$html" | get_url)
 39
 40	title="$2"
 41
 42	description=$(	echo "$html" | get_description |
 43			remove_tags |
 44			remove_nbsp )
 45
 46	items=$(	echo "$html" | get_items)
 47
 48	rss=$(		echo "$items" |
 49			render_items "$base" "$base_url" |
 50			render_feed "$url" "$title" "$description")
 51
 52	>&2 echo "[rssg] ${index_file##$(pwd)/} $(echo "$rss" | grep -c '<item>') items"
 53	echo "$rss"
 54}
 55
 56
 57usage() {
 58	echo "usage: ${0##*/} index.{html,md} title > rss.xml" >&2
 59	exit 1
 60}
 61
 62
 63no_file() {
 64	echo "${0##*/}: $1: No such file" >&2
 65	exit 2
 66}
 67
 68
 69md_to_html() {
 70	test -x "$(which lowdown)" || exit 3
 71	lowdown \
 72    --html-no-escapehtml \
 73    --html-no-skiphtml \
 74    --parse-no-metadata \
 75    --parse-no-autolink "$1"
 76}
 77
 78
 79get_title() {
 80	awk 'tolower($0)~/^<h1/{gsub(/<[^>]*>/,"",$0);print;exit}'
 81}
 82
 83
 84get_url() {
 85	grep -i '<a .*rss.xml"' | head -1 |
 86	sed 's#.*href="\(.*\)".*#\1#'
 87}
 88
 89
 90get_items() {
 91	grep -i 'href=".*" title="' |
 92	sed 's#.*href="\(.*\)" title="\(.*\)">\(.*\)</a>.*#\1 \2 \3#'
 93}
 94
 95
 96get_description() {
 97	start='sub("^.*<"s"*"t"("s"[^>]*)?>","")'
 98	stop='sub("</"s"*"t""s"*>.*","")&&x=1'
 99	awk -v 's=[[:space:]]' -v 't=[Pp]' "$start,$stop;x{exit}"
100}
101
102remove_tags() {
103	sed 's#<[^>]*>##g;s#</[^>]*>##g'
104}
105
106
107remove_nbsp() {
108	sed 's#\&nbsp;# #g'
109}
110
111
112rel_to_abs_urls() {
113	site_url="$1"
114	base_url="$2"
115
116	abs='s#(src|href)="/([^"]*)"#\1="'"$site_url"/'\2"#g'
117	rel='s#(src|href)="([^:/"]*)"#\1="'"$base_url"/'\2"#g'
118	sed -E "$abs;$rel"
119}
120
121
122date_rfc_822() {
123	_format='+%a, %d %b %Y %H:%M:%S %z'
124	_date="$1"
125	case "$RSSG_DATE_IMPL" in
126		"BSD")
127			_date="$(echo "$1" | tr -cd '[:digit:]')0000"
128			date -j "$_format" "$_date"
129		;;
130		"GNU")
131			date -d "$_date" "$_format"
132		;;
133		*)
134		;;
135		esac
136}
137
138
139render_items() {
140	while read -r i
141	do render_item "$1" "$2" "$i"
142	done
143}
144
145
146render_item() {
147	base="$1"
148	base_url="$2"
149	item="$3"
150
151	site_url="$(echo "$base_url"| sed 's#\(.*//.*\)/.*#\1#')"
152
153	date=$(echo "$item"|awk '{print$2}')
154	url=$(echo "$item"|awk '{print$1}')
155
156	f="$base/$url"
157	test -f "$f" && html=$(cat "$f")
158	test -f "${f%\.html}.md" && html=$(md_to_html "${f%\.html}.md")
159
160	description=$(
161		echo "$html" |
162		rel_to_abs_urls "$site_url" "$base_url" |
163		remove_nbsp
164	)
165	title=$(echo "$description" | get_title)
166	guid="$base_url/$(echo "$url" | sed 's#^/##')"
167
168	echo '
169<item>
170<guid>'"$guid"'</guid>
171<link>'"$guid"'</link>
172<pubDate>'"$(date_rfc_822 "$date")"'</pubDate>
173<title>'"$title"'</title>
174<description><![CDATA[
175
176'"$description"'
177
178]]></description>
179</item>'
180}
181
182
183render_feed() {
184	url="$1"
185	title=$(echo "$2" | remove_nbsp)
186	description="$3"
187
188	base_url="$(echo "$url" | cut -d '/' -f1-3)"
189
190	echo '<?xml version="1.0" encoding="UTF-8"?>
191<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
192<channel>
193<atom:link href="'"$url"'" rel="self" type="application/rss+xml" />
194<title>'"$title"'</title>
195<description>'"$description"'</description>
196<link>'"$base_url"'/</link>
197<lastBuildDate>'"$(date_rfc_822 $date)"'</lastBuildDate>
198'"$(cat)"'
199</channel></rss>'
200}
201
202
203main "$@"