bin/rssg (view raw)
1#!/bin/sh
2#
3# https://www.romanzolotarev.com/bin/rssg
4# Copyright 2018 Roman Zolotarev <hi@romanzolotarev.com>
5# Copyright 2022 la-ninpre <aaoth@aaoth.xyz>
6#
7# Permission to use, copy, modify, and/or distribute this software for any
8# purpose with or without fee is hereby granted, provided that the above
9# copyright notice and this permission notice appear in all copies.
10#
11# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18#
19set -e
20# set -x
21
22[ -z "$RSSG_DATE_IMPL" ] && RSSG_DATE_IMPL="GNU"
23
24main () {
25 test -n "$1" || usage
26 test -n "$2" || usage
27 test -f "$1" || no_file "$1"
28
29
30 index_file=$(readlink -f "$1")
31 test -z "${index_file##*html}" && html=$(cat "$index_file")
32 test -z "${index_file##*md}" && html=$(md_to_html "$index_file")
33 test -n "$html" || usage
34
35 base="${index_file%/*}"
36 base_url="$(echo "$html" | get_url | sed 's#\(.*\)/[^/]*#\1#')"
37
38 url=$( echo "$html" | get_url)
39
40 title="$2"
41
42 description=$( echo "$html" | get_description |
43 remove_tags |
44 remove_nbsp )
45
46 items=$( echo "$html" | get_items)
47
48 rss=$( echo "$items" |
49 render_items "$base" "$base_url" |
50 render_feed "$url" "$title" "$description")
51
52 >&2 echo "[rssg] ${index_file##$(pwd)/} $(echo "$rss" | grep -c '<item>') items"
53 echo "$rss"
54}
55
56
57usage() {
58 echo "usage: ${0##*/} index.{html,md} title > rss.xml" >&2
59 exit 1
60}
61
62
63no_file() {
64 echo "${0##*/}: $1: No such file" >&2
65 exit 2
66}
67
68
69md_to_html() {
70 test -x "$(which lowdown)" || exit 3
71 lowdown \
72 --html-no-escapehtml \
73 --html-no-skiphtml \
74 --parse-no-metadata \
75 --parse-no-autolink "$1"
76}
77
78
79get_title() {
80 awk 'tolower($0)~/^<h1/{gsub(/<[^>]*>/,"",$0);print;exit}'
81}
82
83
84get_url() {
85 grep -i '<a .*rss.xml"' | head -1 |
86 sed 's#.*href="\(.*\)".*#\1#'
87}
88
89
90get_items() {
91 grep -i 'href=".*" title="' |
92 sed 's#.*href="\(.*\)" title="\(.*\)">\(.*\)</a>.*#\1 \2 \3#'
93}
94
95
96get_description() {
97 start='sub("^.*<"s"*"t"("s"[^>]*)?>","")'
98 stop='sub("</"s"*"t""s"*>.*","")&&x=1'
99 awk -v 's=[[:space:]]' -v 't=[Pp]' "$start,$stop;x{exit}"
100}
101
102remove_tags() {
103 sed 's#<[^>]*>##g;s#</[^>]*>##g'
104}
105
106
107remove_nbsp() {
108 sed 's#\ # #g'
109}
110
111
112rel_to_abs_urls() {
113 site_url="$1"
114 base_url="$2"
115
116 abs='s#(src|href)="/([^"]*)"#\1="'"$site_url"/'\2"#g'
117 rel='s#(src|href)="([^:/"]*)"#\1="'"$base_url"/'\2"#g'
118 sed -E "$abs;$rel"
119}
120
121
122date_rfc_822() {
123 _format='+%a, %d %b %Y %H:%M:%S %z'
124 _date="$1"
125 case "$RSSG_DATE_IMPL" in
126 "BSD")
127 _date="$(echo "$1" | tr -cd '[:digit:]')0000"
128 date -j "$_format" "$_date"
129 ;;
130 "GNU")
131 date -d "$_date" "$_format"
132 ;;
133 *)
134 ;;
135 esac
136}
137
138
139render_items() {
140 while read -r i
141 do render_item "$1" "$2" "$i"
142 done
143}
144
145
146render_item() {
147 base="$1"
148 base_url="$2"
149 item="$3"
150
151 site_url="$(echo "$base_url"| sed 's#\(.*//.*\)/.*#\1#')"
152
153 date=$(echo "$item"|awk '{print$2}')
154 url=$(echo "$item"|awk '{print$1}')
155
156 f="$base/$url"
157 test -f "$f" && html=$(cat "$f")
158 test -f "${f%\.html}.md" && html=$(md_to_html "${f%\.html}.md")
159
160 description=$(
161 echo "$html" |
162 rel_to_abs_urls "$site_url" "$base_url" |
163 remove_nbsp
164 )
165 title=$(echo "$description" | get_title)
166 guid="$base_url/$(echo "$url" | sed 's#^/##')"
167
168 echo '
169<item>
170<guid>'"$guid"'</guid>
171<link>'"$guid"'</link>
172<pubDate>'"$(date_rfc_822 "$date")"'</pubDate>
173<title>'"$title"'</title>
174<description><![CDATA[
175
176'"$description"'
177
178]]></description>
179</item>'
180}
181
182
183render_feed() {
184 url="$1"
185 title=$(echo "$2" | remove_nbsp)
186 description="$3"
187
188 base_url="$(echo "$url" | cut -d '/' -f1-3)"
189
190 echo '<?xml version="1.0" encoding="UTF-8"?>
191<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
192<channel>
193<atom:link href="'"$url"'" rel="self" type="application/rss+xml" />
194<title>'"$title"'</title>
195<description>'"$description"'</description>
196<link>'"$base_url"'/</link>
197<lastBuildDate>'"$(date_rfc_822 $date)"'</lastBuildDate>
198'"$(cat)"'
199</channel></rss>'
200}
201
202
203main "$@"