all repos — telebonk @ main

reposter from honk to telegram

main.go (view raw)

  1// Telebonk is a reposter from Honk to Telegram.
  2package main
  3
  4import (
  5	"bytes"
  6	"encoding/json"
  7	"flag"
  8	"fmt"
  9	"io"
 10	"log"
 11	"net/http"
 12	"net/url"
 13	"regexp"
 14	"sort"
 15	"strconv"
 16	"strings"
 17	"time"
 18
 19	"git.aaoth.xyz/la-ninpre/telebonk/config"
 20)
 21
 22// A Honk is a post from honk.
 23type Honk struct {
 24	ID     int       // unique id of a post
 25	What   string    // type of an action (post, repost, reply)
 26	Oondle string    // e-mail style handle of the original author of a post
 27	Oonker string    // url of the original author of a post
 28	XID    string    // url of a post, also unique
 29	RID    string    // url of a post that current post is replying to
 30	Date   time.Time // datetime of a post
 31	Precis string    // post summary
 32	Noise  string    // contents of a post
 33	Onts   []string  // a slice of tags in a post
 34	Donks  []*Donk   // a slice of attachments to a post
 35
 36	MessID    int // telegram message_id
 37	ReplyToID int // telegram message_id of a message to reply to
 38
 39	Action HonkAction
 40}
 41
 42// A HonkAction tells what to do with the saved honk.
 43type HonkAction int
 44
 45const (
 46	HonkNotSet HonkAction = iota
 47	HonkIgnore
 48	HonkSend
 49	HonkEdit
 50)
 51
 52// A Donk stores metadata for media files.
 53type Donk struct {
 54	URL   string
 55	Media string // mime-type of an attachment
 56	Desc  string
 57}
 58
 59// Check performs some checks on a Honk to filter out what's not going to be posted.
 60//
 61// It rejects honk if if falls into one of these categories:
 62//   - it is posted before the telebonk started
 63//   - it is replying to a honk that's not posted by telebonk (either a remote honk or old honk)
 64//   - it is of unsupported type (not a regular honk, reply or bonk)
 65//   - it contains a `#notg` tag.
 66//   - it is empty
 67func (h *Honk) Check() error {
 68	log.Print("check: checking honk #", h.ID) // info
 69
 70	switch h.What {
 71	case "honked", "bonked":
 72		break
 73	case "honked back":
 74		hi, ok := honkMap[h.RID]
 75		if !ok {
 76			return fmt.Errorf("cannot reply to nonexisting telebonk")
 77		}
 78		h.ReplyToID = hi.MessID
 79	default:
 80		return fmt.Errorf("unsupported honk type: %s", h.What)
 81	}
 82
 83	for _, ont := range h.Onts {
 84		if strings.ToLower(ont) == "#notg" {
 85			return fmt.Errorf("skipping #notg honk")
 86		}
 87	}
 88
 89	if h.Noise == emptyNoise && len(h.Donks) == 0 {
 90		return fmt.Errorf("empty honk")
 91	}
 92	return nil
 93}
 94
 95// Decide sets the Action of a Honk.
 96//
 97// It sets HonkIgnore to those honks that are: 1) old; 2) already sent and aren't edits.
 98func (h *Honk) Decide() {
 99	oldhonk, ok := honkMap[h.XID]
100	if ok {
101		if oldhonk.MessID == 0 || h.Date.Equal(oldhonk.Date) {
102			h.Action = HonkIgnore
103			h.save(oldhonk.MessID)
104			return
105		}
106		log.Print("decide: honk #", h.XID, " is to be edited")
107		h.Action = HonkEdit
108		h.MessID = oldhonk.MessID
109		return
110	}
111	if h.Date.Before(now) {
112		h.Action = HonkIgnore
113		h.save(0)
114		return
115	}
116	log.Print("decide: honk #", h.ID, " is to be sent")
117	h.Action = HonkSend
118}
119
120// save records a Honk to the honkMap
121func (h *Honk) save(messID int) {
122	h.MessID = messID
123	honkMap[h.XID] = h
124}
125
126// forget unchecks a Honk from the honkMap
127func (h *Honk) forget() {
128	oldhonk, ok := honkMap[h.XID]
129	if !ok {
130		return
131	}
132	oldhonk.MessID = 0
133	oldhonk.ReplyToID = 0
134	honkMap[oldhonk.XID] = oldhonk
135}
136
137// A Mess holds data for a message to be sent to Telegram.
138type Mess struct {
139	Text             string `json:"text"`
140	ChatID           string `json:"chat_id"`
141	ParseMode        string `json:"parse_mode,omitempty"`
142	MessageID        int    `json:"message_id,omitempty"`
143	ReplyToMessageID int    `json:"reply_to_message_id,omitempty"`
144
145	Document string `json:"document,omitempty"`
146	Photo    string `json:"photo,omitempty"`
147	Caption  string `json:"caption,omitempty"`
148
149	kind messKind
150}
151
152// A TelegramResponse is a response from Telegram API.
153type TelegramResponse struct {
154	Ok          bool
155	Description string
156	Result      struct {
157		MessageID int `json:"message_id"`
158	}
159}
160
161// NewMess creates and populates a new Mess with default values.
162func NewMess(parseMode, chatID string) *Mess {
163	return &Mess{
164		ParseMode: parseMode,
165		ChatID:    chatID,
166		kind:      messHonk,
167	}
168}
169
170// NewMessFromHonk creates a slice of Mess objects from existing Honk.
171func NewMessFromHonk(honk *Honk) []*Mess {
172	var truncateWith = "...\n\nfull honk: " + honk.XID // hardcoded == bad
173	// donks should be sent as a separate messages, so need to create all of 'em
174	// cap(messes) = 1 for honk + 1 for each donk
175	var messes = make([]*Mess, 0, 1+len(honk.Donks))
176
177	messes = append(messes, NewMess("html"))
178	for _, donk := range honk.Donks {
179		donkMess := NewMess("") // donks don't contain html
180		donkMess.Caption = TruncateNoise(donk.Desc, truncateWith, 1024)
181		switch {
182		case strings.HasPrefix(donk.Media, "image/"):
183			donkMess.Photo = donk.URL
184			donkMess.kind = messDonkPht
185		case donk.Media == "application/pdf", donk.Media == "text/plain":
186			donkMess.Document = donk.URL
187			donkMess.kind = messDonkDoc
188		}
189		messes = append(messes, donkMess)
190	}
191	if honk.Noise == emptyNoise {
192		messes = messes[1:] // just donks
193	}
194
195	if honk.Action == HonkEdit {
196		// TODO: implement editing documents and photos
197		messes[0].kind = messEdit
198		messes[0].MessageID = honk.MessID
199		messes = messes[:1] // don't donk if editing
200	}
201
202	var text = CalmNoise(honk.Noise)
203	text = TruncateNoise(text, truncateWith, 4096)
204	// bonk, then honk back - ok
205	// honk back, then bonk - not gonna sync, is it ok?
206	// upd: bonks work really confusing
207	switch honk.What {
208	case "honked":
209		break
210	case "honked back":
211		messes[0].ReplyToMessageID = honk.ReplyToID
212	case "bonked":
213		oonker := fmt.Sprintf("<a href=\"%s\">%s</a>:", honk.Oonker, honk.Oondle)
214		text = oonker + "\n" + text
215	}
216
217	// danger zone handling
218	if strings.HasPrefix(honk.Precis, "DZ:") {
219		text = strings.Join([]string{"<tg-spoiler>", "</tg-spoiler>"}, text)
220		text = honk.Precis + "\n" + text
221	}
222	messes[0].Text = text
223
224	return messes
225}
226
227// Send sends a Mess to Telegram.
228func (m *Mess) Send() (*TelegramResponse, error) {
229	var apiURL = botAPIMethod(tgSendMessage)
230
231	switch m.kind {
232	case messHonk:
233		// noop
234	case messEdit:
235		apiURL = botAPIMethod(tgEditMessageText)
236	case messDonkPht:
237		apiURL = botAPIMethod(tgSendPhoto)
238	case messDonkDoc:
239		apiURL = botAPIMethod(tgSendDocument)
240	}
241
242	junk, err := json.Marshal(m)
243	if err != nil {
244		return nil, err
245	}
246	buf := bytes.NewBuffer(junk)
247	req, err := http.NewRequest("POST", apiURL, buf)
248	if err != nil {
249		return nil, err
250	}
251	req.Header.Add("Content-type", "application/json")
252	req.Header.Add("Content-length", strconv.Itoa(buf.Len()))
253
254	resp, err := client.Do(req)
255	if err != nil {
256		return nil, err
257	}
258	defer resp.Body.Close()
259
260	var res TelegramResponse
261	json.NewDecoder(resp.Body).Decode(&res)
262	if !res.Ok {
263		return nil, fmt.Errorf("mess send: %s", res.Description)
264	}
265
266	return &res, nil
267}
268
269type messKind int
270
271const (
272	messHonk messKind = iota
273	messEdit
274	messDonkPht
275	messDonkDoc
276)
277
278// XXX: figure out how to retreive these args without passing config object thousand times
279func botAPIMethod(url, token, method string) string {
280	return fmt.Sprintf("%s/bot%s/%s", url, token, method)
281}
282
283func checkTgAPI() error {
284	var apiURL = botAPIMethod(tgGetMe)
285	resp, err := client.Get(apiURL)
286	if err != nil {
287		return err
288	}
289	if resp.StatusCode != 200 {
290		status, _ := io.ReadAll(resp.Body)
291		return fmt.Errorf("status: %d: %s", resp.StatusCode, status)
292	}
293	return nil
294}
295
296// Telegram Bot API methods
297const (
298	tgGetMe           = "getMe"
299	tgSendMessage     = "sendMessage"
300	tgEditMessageText = "editMessageText"
301	tgSendPhoto       = "sendPhoto"
302	tgSendDocument    = "sendDocument"
303)
304
305// getHonks receives and unmarshals some honks from a Honk instance.
306func getHonks(from, page, token string, after int) ([]*Honk, error) {
307	query := url.Values{}
308	query.Set("action", "gethonks")
309	query.Set("page", page)
310	query.Set("after", strconv.Itoa(after))
311	apiurl := from + "/api?" + query.Encode()
312
313	req, err := http.NewRequest("GET", apiurl, nil)
314	if err != nil {
315		return nil, err
316	}
317	req.Header.Add("Authorization", "Bearer "+token)
318
319	resp, err := client.Do(req)
320	if err != nil {
321		return nil, err
322	}
323	defer resp.Body.Close()
324
325	// honk outputs junk like `{ "honks": [ ... ] }`, need to get into the list
326	var honkJunk map[string][]*Honk
327	err = json.NewDecoder(resp.Body).Decode(&honkJunk)
328	if err != nil {
329		// XXX: honk tokens last for a week or so. when one expires, shouldn't this say something meaningful instead of `unexpected v in blah-blah'?
330		log.Print("gethonks: ", resp.Status)
331		return nil, err
332	}
333
334	honks := honkJunk["honks"]
335	// honk.ID monotonically increases, so it can be used to sort honks
336	sort.Slice(honks, func(i, j int) bool { return honks[i].ID < honks[j].ID })
337
338	return honks, nil
339}
340
341var (
342	rePTags   = regexp.MustCompile(`<\/?p>`)
343	reHlTags  = regexp.MustCompile(`<\/?span( class="[a-z]{2}")?>`)
344	reBrTags  = regexp.MustCompile(`<br>`)
345	reImgTags = regexp.MustCompile(`<img .*src="(.*)">`)
346	reUlTags  = regexp.MustCompile(`<\/?ul>`)
347	reTbTags  = regexp.MustCompile(`<table>.*</table>`)
348
349	reLiTags = regexp.MustCompile(`<li>([^<>\/]*)<\/li>`)
350	reHnTags = regexp.MustCompile(`<h[1-6]>(.*)</h[1-6]>`)
351	reHrTags = regexp.MustCompile(`<hr>.*<\/hr>`)
352	reBqTags = regexp.MustCompile(`<blockquote>(.*)<\/blockquote>`)
353)
354
355const emptyNoise = "<p></p>\n"
356
357// CalmNoise erases and rewrites html tags that are not supported by Telegram.
358func CalmNoise(s string) string {
359	// delete these
360	s = rePTags.ReplaceAllString(s, "")
361	s = reHlTags.ReplaceAllString(s, "")
362	s = reBrTags.ReplaceAllString(s, "")
363	s = reImgTags.ReplaceAllString(s, "")
364	s = reUlTags.ReplaceAllString(s, "")
365	s = reTbTags.ReplaceAllString(s, "")
366
367	// these can be repurposed
368	s = reHrTags.ReplaceAllString(s, "---\n")
369	s = reHnTags.ReplaceAllString(s, "<b>$1</b>\n\n")
370	s = reBqTags.ReplaceAllString(s, "| <i>$1</i>")
371	s = reLiTags.ReplaceAllString(s, "* $1\n")
372
373	return strings.TrimSpace(s)
374}
375
376// TruncateNoise truncates a string up to `length - len(with)` characters long and adds `with` to the end.
377func TruncateNoise(s, with string, length int) string {
378	// telegram can handle posts no longer than 4096 (or 1024) characters _after_ the parsing of entities.
379	// we could be clever and calculate the true length of text, but let's keep it simple and stupid.
380	if len(s) <= length {
381		return s
382	}
383
384	var b strings.Builder
385	b.Grow(length)
386	var end = length - 1 - len(with)
387	for i, r := range s {
388		if i >= end {
389			break
390		}
391		b.WriteRune(r)
392	}
393	b.WriteString(with)
394	return b.String()
395}
396
397// XXX: global and mutable
398var conf = &config.Config{}
399
400func init() {
401	flag.StringVar(&conf.TgBotToken, "bot_token", "", "Telegram bot token")
402	flag.StringVar(&conf.TgChatID, "chat_id", "", "Telegram chat_id")
403	flag.StringVar(&conf.TgApiURL, "tgapi_url", "https://api.telegram.org", "Telegram API URL")
404	flag.StringVar(&conf.HonkAuthToken, "honk_token", "", "Honk auth token")
405	flag.StringVar(&conf.HonkPage, "honk_page", "myhonks", "Page to get honks from. Should be one of [atme, longago, home, myhonks]")
406	flag.StringVar(&conf.HonkURL, "honk_url", "", "URL of a Honk instance")
407
408	flag.Parse()
409
410	if err := conf.Check(); err != nil {
411		log.Fatal("conf:", err) // fail
412	}
413	conf.TgApiURL = strings.TrimRight(conf.TgApiURL, "/")
414	if err := checkTgAPI(); err != nil {
415		log.Fatal("tgAPI:", err) // fail
416	}
417}
418
419var client = http.DefaultClient
420var honkMap = make(map[string]*Honk) // XXX: not safe for use by multiple goroutines!
421var now = time.Now()
422
423func main() {
424	var retry = 5
425
426	log.Print("starting telebonk") // info
427	for {
428		honks, err := getHonks(conf.HonkURL, conf.HonkPage, conf.HonkAuthToken, 0)
429		if err != nil {
430			log.Print("gethonks:", err) // error
431			retry--
432			if retry == 0 {
433				log.Fatal("gethonks: giving up") // fail
434			}
435			time.Sleep(5 * time.Second)
436			continue
437		}
438	HonkLoop:
439		for _, honk := range honks {
440			honk.Decide()
441			switch honk.Action {
442			case HonkIgnore:
443				continue
444			case HonkSend, HonkEdit:
445				if err := honk.Check(); err != nil {
446					log.Print("honk check:", err) // error
447					continue
448				}
449			}
450			messes := NewMessFromHonk(honk)
451			// messes[0] is a honk or a donk to be sent
452			resp, err := messes[0].Send()
453			if err != nil {
454				log.Print("mess send", err) // error
455				honk.forget()               // retry
456				continue
457			}
458			// remember only the first mess' response
459			honk.save(resp.Result.MessageID)
460			for _, mess := range messes[1:] {
461				if _, err := mess.Send(); err != nil {
462					log.Print("mess send", err) // error
463					continue HonkLoop
464				}
465			}
466		}
467		time.Sleep(30 * time.Second)
468	}
469}