all repos — telebonk @ 0d5906ba5db5237375240909ef5f55f5f1efa305

reposter from honk to telegram

main.go (view raw)

  1// Telebonk is a reposter from Honk to Telegram.
  2package main
  3
  4import (
  5	"bytes"
  6	"encoding/json"
  7	"flag"
  8	"fmt"
  9	"io"
 10	"log"
 11	"net/http"
 12	"net/url"
 13	"regexp"
 14	"sort"
 15	"strconv"
 16	"strings"
 17	"time"
 18)
 19
 20// A Config is holding configuration of telebonk.
 21type Config struct {
 22	TgBotToken    string
 23	TgChatID      string
 24	TgApiURL      string
 25	HonkAuthToken string
 26	HonkURL       string
 27}
 28
 29// Check makes sure that no Config fields are set to empty strings.
 30func (c *Config) Check() error {
 31	var what string
 32	if c.TgBotToken == "" {
 33		what = "bot_token"
 34	}
 35	if c.TgChatID == "" {
 36		what = "chat_id"
 37	}
 38	if c.TgApiURL == "" {
 39		what = "tgapi_url"
 40	}
 41	if c.HonkAuthToken == "" {
 42		what = "honk_token"
 43	}
 44	if c.HonkURL == "" {
 45		what = "honk_url"
 46	}
 47	if what != "" {
 48		return fmt.Errorf("'%s' shouldn't be empty", what)
 49	}
 50	return nil
 51}
 52
 53var config = &Config{}
 54
 55// A Honk is a post from honk.
 56type Honk struct {
 57	ID     int       // unique id of a post
 58	What   string    // type of an action (post, repost, reply)
 59	Oondle string    // e-mail style handle of the original author of a post
 60	Oonker string    // url of the original author of a post
 61	XID    string    // url of a post, also unique
 62	RID    string    // url of a post that current post is replying to
 63	Date   time.Time // datetime of a post
 64	Precis string    // post summary
 65	Noise  string    // contents of a post
 66	Onts   []string  // a slice of tags in a post
 67	Donks  []*Donk   // a slice of attachments to a post
 68
 69	MessID    int // telegram message_id
 70	ReplyToID int // telegram message_id of a message to reply to
 71	Action    HonkAction
 72}
 73
 74// A HonkAction tells what to do with the saved honk.
 75type HonkAction int
 76
 77const (
 78	HonkIgnore HonkAction = iota
 79	HonkSend
 80	HonkEdit
 81)
 82
 83// A Donk stores metadata for media files.
 84type Donk struct {
 85	URL   string
 86	Media string // mime-type of an attachment
 87	Desc  string
 88}
 89
 90// Check performs some checks on a Honk to filter out what's not going to be posted.
 91//
 92// It rejects honk if if falls into one of these categories:
 93//   - it is posted before the telebonk started
 94//   - it is replying to a honk that's not posted by telebonk (either a remote honk or old honk)
 95//   - it is of unsupported type (not a regular honk, reply or bonk)
 96//   - it contains a `#notg` tag.
 97//   - it is empty
 98func (h *Honk) Check() error {
 99	log.Print("checking honk #", h.ID) // info
100	if h.Date.Before(now) {
101		return fmt.Errorf("honk #%d is old", h.ID)
102	}
103	switch h.What {
104	case "honked", "bonked":
105		break
106	case "honked back":
107		hi, ok := honkMap[h.RID]
108		if !ok {
109			return fmt.Errorf("cannot reply to nonexisting telebonk")
110		}
111		h.ReplyToID = hi.MessID
112	default:
113		return fmt.Errorf("unsupported honk type: %s", h.What)
114	}
115
116	for _, ont := range h.Onts {
117		if strings.ToLower(ont) == "#notg" {
118			return fmt.Errorf("skipping #notg honk")
119		}
120	}
121
122	if h.Noise == emptyNoise && len(h.Donks) == 0 {
123		return fmt.Errorf("empty honk")
124	}
125	return nil
126}
127
128func (h *Honk) Decide() {
129	hi, ok := honkMap[h.XID]
130	if ok {
131		if hi.MessID == 0 || hi.Date.Equal(h.Date) {
132			h.save(hi.MessID)
133			h.Action = HonkIgnore
134		}
135		h.Action = HonkEdit
136	}
137	h.Action = HonkSend
138}
139
140// save records a Honk to a honkMap
141func (h *Honk) save(messID int) {
142	h.MessID = messID
143	honkMap[h.XID] = h
144}
145
146// forget removes a Honk from a honkMap
147func (h *Honk) forget() {
148	delete(honkMap, h.XID)
149}
150
151// A Mess holds data for a message to be sent to Telegram.
152type Mess struct {
153	Text             string `json:"text"`
154	ParseMode        string `json:"parse_mode"`
155	ChatID           string `json:"chat_id"`
156	MessageID        int    `json:"message_id,omitempty"`
157	ReplyToMessageID int    `json:"reply_to_message_id,omitempty"`
158
159	Document string `json:"document,omitempty"`
160	Photo    string `json:"photo,omitempty"`
161	Caption  string `json:"caption,omitempty"`
162
163	kind messKind
164}
165
166// A TelegramResponse is a response from Telegram API.
167type TelegramResponse struct {
168	Ok          bool
169	Description string
170	Result      telegramResponseResult
171}
172
173type telegramResponseResult struct {
174	MessageID int `json:"message_id"`
175}
176
177// NewMess creates and populates a new Mess with default values.
178func NewMess(parseMode string) *Mess {
179	return &Mess{
180		ParseMode: parseMode,
181		ChatID:    config.TgChatID,
182		kind:      messHonk,
183	}
184}
185
186// NewMessFromHonk creates a slice of Mess objects from existing Honk.
187func NewMessFromHonk(honk *Honk) []*Mess {
188	var truncateWith = "...\n\nfull honk: " + honk.XID // hardcoded == bad
189	// donks should be sent as a separate messages, so need to create all of 'em
190	// cap(messes) = 1 for honk + 1 for each donk
191	var messes = make([]*Mess, 0, 1+len(honk.Donks))
192
193	messes = append(messes, NewMess("html"))
194	for _, donk := range honk.Donks {
195		donkMess := NewMess("MarkdownV2") // donks don't contain html
196		donkMess.Caption = TruncateNoise(donk.Desc, truncateWith, 1024)
197		switch {
198		case strings.HasPrefix(donk.Media, "image/"):
199			donkMess.Photo = donk.URL
200			donkMess.kind = messDonkPht
201		case donk.Media == "application/pdf", donk.Media == "text/plain":
202			donkMess.Document = donk.URL
203			donkMess.kind = messDonkDoc
204		}
205		messes = append(messes, donkMess)
206	}
207	if honk.Noise == emptyNoise {
208		messes = messes[1:] // just donks
209	}
210
211	if honk.Action == HonkEdit {
212		// TODO: implement editing documents and photos
213		messes[0].kind = messEdit
214		messes[0].MessageID = honk.MessID
215		messes = messes[:1] // don't donk if editing
216	}
217
218	var text = CalmNoise(honk.Noise)
219	text = TruncateNoise(text, truncateWith, 4096)
220	// bonk, then honk back - ok
221	// honk back, then bonk - not gonna sync, is it ok?
222	// upd: bonks work really confusing
223	switch honk.What {
224	case "honked":
225		break
226	case "honked back":
227		messes[0].ReplyToMessageID = honk.ReplyToID
228	case "bonked":
229		oonker := fmt.Sprintf("<a href=\"%s\">%s</a>:", honk.Oonker, honk.Oondle)
230		text = oonker + "\n" + text
231	}
232
233	// danger zone handling
234	if strings.HasPrefix(honk.Precis, "DZ:") {
235		text = strings.Join([]string{"<tg-spoiler>", "</tg-spoiler>"}, text)
236		text = honk.Precis + "\n" + text
237	}
238	messes[0].Text = text
239
240	return messes
241}
242
243// Send sends a Mess to Telegram.
244func (m *Mess) Send() (*TelegramResponse, error) {
245	var apiURL = botAPIMethod(tgSendMessage)
246
247	switch m.kind {
248	case messHonk:
249		// noop
250	case messEdit:
251		apiURL = botAPIMethod(tgEditMessageText)
252	case messDonkPht:
253		apiURL = botAPIMethod(tgSendPhoto)
254	case messDonkDoc:
255		apiURL = botAPIMethod(tgSendDocument)
256	}
257
258	junk, err := json.Marshal(m)
259	if err != nil {
260		return nil, err
261	}
262	buf := bytes.NewBuffer(junk)
263	req, err := http.NewRequest("POST", apiURL, buf)
264	if err != nil {
265		return nil, err
266	}
267	req.Header.Add("Content-type", "application/json")
268	req.Header.Add("Content-length", strconv.Itoa(buf.Len()))
269
270	resp, err := client.Do(req)
271	if err != nil {
272		return nil, err
273	}
274	defer resp.Body.Close()
275
276	var res TelegramResponse
277	json.NewDecoder(resp.Body).Decode(&res)
278	if !res.Ok {
279		return nil, fmt.Errorf("mess send: %s", res.Description)
280	}
281
282	return &res, nil
283}
284
285type messKind int
286
287const (
288	messHonk messKind = iota
289	messEdit
290	messDonkPht
291	messDonkDoc
292)
293
294func botAPIMethod(method string) string {
295	return fmt.Sprintf("%s/bot%s/%s", config.TgApiURL, config.TgBotToken, method)
296}
297
298func checkTgAPI() error {
299	var apiURL = botAPIMethod(tgGetMe)
300	resp, err := client.Get(apiURL)
301	if err != nil {
302		return err
303	}
304	if resp.StatusCode != 200 {
305		status, _ := io.ReadAll(resp.Body)
306		return fmt.Errorf("status: %d: %s", resp.StatusCode, status)
307	}
308	return nil
309}
310
311// Telegram Bot API methods
312const (
313	tgGetMe           = "getMe"
314	tgSendMessage     = "sendMessage"
315	tgEditMessageText = "editMessageText"
316	tgSendPhoto       = "sendPhoto"
317	tgSendDocument    = "sendDocument"
318)
319
320// getHonks receives and unmarshals some honks from a Honk instance.
321func getHonks(after int) ([]*Honk, error) {
322	query := url.Values{}
323	query.Set("token", config.HonkAuthToken)
324	query.Set("action", "gethonks")
325	query.Set("page", "home")
326	query.Set("after", strconv.Itoa(after))
327
328	resp, err := client.Get(fmt.Sprint(config.HonkURL, "/api?", query.Encode()))
329	if err != nil {
330		return nil, err
331	}
332	defer resp.Body.Close()
333
334	// honk outputs junk like `{ "honks": [ ... ] }`, need to get into the list
335	var honkJunk map[string][]*Honk
336	err = json.NewDecoder(resp.Body).Decode(&honkJunk)
337	if err != nil {
338		return nil, err
339	}
340
341	honks := honkJunk["honks"]
342	// honk.ID monotonically increases, so it can be used to sort honks
343	sort.Slice(honks, func(i, j int) bool { return honks[i].ID < honks[j].ID })
344
345	return honks, nil
346}
347
348var (
349	rePTags   = regexp.MustCompile(`<\/?p>`)
350	reHlTags  = regexp.MustCompile(`<\/?span( class="[a-z]{2}")?>`)
351	reBrTags  = regexp.MustCompile(`<br>`)
352	reImgTags = regexp.MustCompile(`<img .*src="(.*)">`)
353	reUlTags  = regexp.MustCompile(`<\/?ul>`)
354	reTbTags  = regexp.MustCompile(`<table>.*</table>`)
355
356	reLiTags = regexp.MustCompile(`<li>([^<>\/]*)<\/li>`)
357	reHnTags = regexp.MustCompile(`<h[1-6]>(.*)</h[1-6]>`)
358	reHrTags = regexp.MustCompile(`<hr>.*<\/hr>`)
359	reBqTags = regexp.MustCompile(`<blockquote>(.*)<\/blockquote>`)
360)
361
362const emptyNoise = "<p></p>\n"
363
364// CalmNoise erases and rewrites html tags that are not supported by Telegram.
365func CalmNoise(s string) string {
366	// delete these
367	s = rePTags.ReplaceAllString(s, "")
368	s = reHlTags.ReplaceAllString(s, "")
369	s = reBrTags.ReplaceAllString(s, "")
370	s = reImgTags.ReplaceAllString(s, "")
371	s = reUlTags.ReplaceAllString(s, "")
372	s = reTbTags.ReplaceAllString(s, "")
373
374	// these can be repurposed
375	s = reHrTags.ReplaceAllString(s, "---\n")
376	s = reHnTags.ReplaceAllString(s, "<b>$1</b>\n\n")
377	s = reBqTags.ReplaceAllString(s, "| <i>$1</i>")
378	s = reLiTags.ReplaceAllString(s, "* $1\n")
379
380	return strings.TrimSpace(s)
381}
382
383// TruncateNoise truncates a string up to `length - len(with)` characters long and adds `with` to the end.
384func TruncateNoise(s, with string, length int) string {
385	// telegram can handle posts no longer than 4096 (or 1024) characters _after_ the parsing of entities.
386	// we could be clever and calculate the true length of text, but let's keep it simple and stupid.
387	if len(s) <= length {
388		return s
389	}
390
391	var b strings.Builder
392	b.Grow(length)
393	var end = length - 1 - len(with)
394	for i, r := range s {
395		if i >= end {
396			break
397		}
398		b.WriteRune(r)
399	}
400	b.WriteString(with)
401	return b.String()
402}
403
404func init() {
405	flag.StringVar(&config.TgBotToken, "bot_token", "", "Telegram bot token")
406	flag.StringVar(&config.TgChatID, "chat_id", "", "Telegram chat_id")
407	flag.StringVar(&config.TgApiURL, "tgapi_url", "https://api.telegram.org", "Telegram API URL")
408	flag.StringVar(&config.HonkAuthToken, "honk_token", "", "Honk auth token")
409	flag.StringVar(&config.HonkURL, "honk_url", "", "URL of a Honk instance")
410
411	flag.Parse()
412
413	if err := config.Check(); err != nil {
414		log.Fatal("config:", err) // fail
415	}
416	config.TgApiURL = strings.TrimRight(config.TgApiURL, "/")
417	if err := checkTgAPI(); err != nil {
418		log.Fatal(err) // fail
419	}
420}
421
422var client = http.DefaultClient
423var honkMap = make(map[string]*Honk) // FIXME: not safe for use by multiple goroutines!
424var now = time.Now()
425
426func main() {
427	var retry = 5
428
429	for {
430		honks, err := getHonks(0)
431		if err != nil {
432			log.Print("gethonks:", err) // error
433			retry--
434			if retry == 0 {
435				log.Fatal("gethonks: giving up") // fail
436			}
437			time.Sleep(5 * time.Second)
438			continue
439		}
440
441	HonkLoop:
442		for _, honk := range honks {
443			honk.Decide()
444			switch honk.Action {
445			case HonkIgnore:
446				continue
447			case HonkSend, HonkEdit:
448				if err := honk.Check(); err != nil {
449					log.Print(err) // error
450					continue
451				}
452			}
453			messes := NewMessFromHonk(honk)
454			// messes[0] is a honk or a donk to be sent
455			resp, err := messes[0].Send()
456			if err != nil {
457				log.Print(err) // error
458				honk.forget()  // retry
459				continue
460			}
461			// remember only the first mess' response
462			honk.save(resp.Result.MessageID)
463			for _, mess := range messes[1:] {
464				if _, err := mess.Send(); err != nil {
465					log.Print(err) // error
466					continue HonkLoop
467				}
468			}
469		}
470		time.Sleep(30 * time.Second)
471	}
472}