all repos — aaoth.xyz @ 8d962aabf74c42c3afdc38f2f85fa7b06fd04ef0

aaoth.xyz website

bin/rssg (view raw)

  1#!/bin/sh
  2#
  3# https://www.romanzolotarev.com/bin/rssg
  4# Copyright 2018 Roman Zolotarev <hi@romanzolotarev.com>
  5#
  6# Permission to use, copy, modify, and/or distribute this software for any
  7# purpose with or without fee is hereby granted, provided that the above
  8# copyright notice and this permission notice appear in all copies.
  9#
 10# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 11# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 12# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 13# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 14# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 15# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 16# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 17#
 18set -e
 19set -x
 20
 21
 22main () {
 23	test -n "$1" || usage
 24	test -n "$2" || usage
 25	test -f "$1" || no_file "$1"
 26
 27
 28	index_file=$(readlink -f "$1")
 29	test -z "${index_file##*html}" && html=$(cat "$index_file")
 30	test -z "${index_file##*md}" && html=$(md_to_html "$index_file")
 31	test -n "$html" || usage
 32
 33	base="${index_file%/*}"
 34	base_url="$(echo "$html" | get_url | sed 's#\(.*\)/[^/]*#\1#')"
 35
 36	url=$(		echo "$html" | get_url)
 37
 38	title="$2"
 39
 40	description=$(	echo "$html" | get_description |
 41			remove_tags |
 42			remove_nbsp )
 43
 44	items=$(	echo "$html" | get_items)
 45
 46	rss=$(		echo "$items" |
 47			render_items "$base" "$base_url" |
 48			render_feed "$url" "$title" "$description")
 49
 50	>&2 echo "[rssg] ${index_file##$(pwd)/} $(echo "$rss" | grep -c '<item>') items"
 51	echo "$rss"
 52}
 53
 54
 55usage() {
 56	echo "usage: ${0##*/} index.{html,md} title > rss.xml" >&2
 57	exit 1
 58}
 59
 60
 61no_file() {
 62	echo "${0##*/}: $1: No such file" >&2
 63	exit 2
 64}
 65
 66
 67md_to_html() {
 68	test -x "$(which lowdown)" || exit 3
 69	lowdown \
 70    --html-no-escapehtml \
 71    --html-no-skiphtml \
 72    --parse-no-metadata \
 73    --parse-no-autolink "$1"
 74}
 75
 76
 77get_title() {
 78	awk 'tolower($0)~/^<h1/{gsub(/<[^>]*>/,"",$0);print;exit}'
 79}
 80
 81
 82get_url() {
 83	grep -i '<a .*rss.xml"' | head -1 |
 84	sed 's#.*href="\(.*\)".*#\1#'
 85}
 86
 87
 88get_items() {
 89	grep -i 'href=".*" title="' |
 90	sed 's#.*href="\(.*\)" title="\(.*\)">\(.*\)</a>.*#\1 \2 \3#'
 91}
 92
 93
 94get_description() {
 95	start='sub("^.*<"s"*"t"("s"[^>]*)?>","")'
 96	stop='sub("</"s"*"t""s"*>.*","")&&x=1'
 97	awk -v 's=[[:space:]]' -v 't=[Pp]' "$start,$stop;x{exit}"
 98}
 99
100remove_tags() {
101	sed 's#<[^>]*>##g;s#</[^>]*>##g'
102}
103
104
105remove_nbsp() {
106	sed 's#\&nbsp;# #g'
107}
108
109
110rel_to_abs_urls() {
111	site_url="$1"
112	base_url="$2"
113
114	abs='s#(src|href)="/([^"]*)"#\1="'"$site_url"/'\2"#g'
115	rel='s#(src|href)="([^:/"]*)"#\1="'"$base_url"/'\2"#g'
116	sed -E "$abs;$rel"
117}
118
119
120date_rfc_822() {
121	date '+%a, %d %b %Y %H:%M:%S %z' \
122	"$(echo "$1"| tr -cd '[:digit:]')0000"
123}
124
125
126render_items() {
127	while read -r i
128	do render_item "$1" "$2" "$i"
129	done
130}
131
132
133render_item() {
134	base="$1"
135	base_url="$2"
136	item="$3"
137
138	site_url="$(echo "$base_url"| sed 's#\(.*//.*\)/.*#\1#')"
139
140	date=$(echo "$item"|awk '{print$2}')
141	url=$(echo "$item"|awk '{print$1}')
142
143	f="$base/$url"
144	test -f "$f" && html=$(cat "$f")
145	test -f "${f%\.html}.md" && html=$(md_to_html "${f%\.html}.md")
146
147	description=$(
148		echo "$html" |
149		rel_to_abs_urls "$site_url" "$base_url" |
150		remove_nbsp
151	)
152	title=$(echo "$description" | get_title)
153	guid="$base_url/$(echo "$url" | sed 's#^/##')"
154
155	echo '
156<item>
157<guid>'"$guid"'</guid>
158<link>'"$guid"'</link>
159<pubDate>'"$(date_rfc_822 "$date")"'</pubDate>
160<title>'"$title"'</title>
161<description><![CDATA[
162
163'"$description"'
164
165]]></description>
166</item>'
167}
168
169
170render_feed() {
171	url="$1"
172	title=$(echo "$2" | remove_nbsp)
173	description="$3"
174
175	base_url="$(echo "$url" | cut -d '/' -f1-3)"
176
177	echo '<?xml version="1.0" encoding="UTF-8"?>
178<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
179<channel>
180<atom:link href="'"$url"'" rel="self" type="application/rss+xml" />
181<title>'"$title"'</title>
182<description>'"$description"'</description>
183<link>'"$base_url"'/</link>
184<lastBuildDate>'"$(date_rfc_822 date)"'</lastBuildDate>
185'"$(cat)"'
186</channel></rss>'
187}
188
189
190main "$@"