all repos — telebonk @ cd0118a677e8f1f3b1f6a4a89311a3fae9d7c349

reposter from honk to telegram

main.go (view raw)

  1// Telebonk is a reposter from Honk to Telegram.
  2package main
  3
  4import (
  5	"bytes"
  6	"encoding/json"
  7	"flag"
  8	"fmt"
  9	"io"
 10	"log"
 11	"net/http"
 12	"net/url"
 13	"regexp"
 14	"sort"
 15	"strconv"
 16	"strings"
 17	"time"
 18)
 19
 20// A Config is holding configuration of telebonk.
 21type Config struct {
 22	TgBotToken    string
 23	TgChatID      string
 24	TgApiURL      string
 25	HonkAuthToken string
 26	HonkPage      string
 27	HonkURL       string
 28}
 29
 30// Check makes sure that no Config fields are set to empty strings.
 31func (c *Config) Check() error {
 32	var what string
 33	if c.TgBotToken == "" {
 34		what = "bot_token"
 35	}
 36	if c.TgChatID == "" {
 37		what = "chat_id"
 38	}
 39	if c.TgApiURL == "" {
 40		what = "tgapi_url"
 41	}
 42	if c.HonkAuthToken == "" {
 43		what = "honk_token"
 44	}
 45	if c.HonkURL == "" {
 46		what = "honk_url"
 47	}
 48	switch c.HonkPage {
 49	case "atme", "longago", "home", "myhonks":
 50	default:
 51		return fmt.Errorf("bad page type: %s", c.HonkPage)
 52	}
 53	if what != "" {
 54		return fmt.Errorf("'%s' shouldn't be empty", what)
 55	}
 56	return nil
 57}
 58
 59var config = &Config{}
 60
 61// A Honk is a post from honk.
 62type Honk struct {
 63	ID     int       // unique id of a post
 64	What   string    // type of an action (post, repost, reply)
 65	Oondle string    // e-mail style handle of the original author of a post
 66	Oonker string    // url of the original author of a post
 67	XID    string    // url of a post, also unique
 68	RID    string    // url of a post that current post is replying to
 69	Date   time.Time // datetime of a post
 70	Precis string    // post summary
 71	Noise  string    // contents of a post
 72	Onts   []string  // a slice of tags in a post
 73	Donks  []*Donk   // a slice of attachments to a post
 74
 75	MessID    int // telegram message_id
 76	ReplyToID int // telegram message_id of a message to reply to
 77
 78	Action HonkAction
 79}
 80
 81// A HonkAction tells what to do with the saved honk.
 82type HonkAction int
 83
 84const (
 85	HonkNotSet HonkAction = iota
 86	HonkIgnore
 87	HonkSend
 88	HonkEdit
 89)
 90
 91// A Donk stores metadata for media files.
 92type Donk struct {
 93	URL   string
 94	Media string // mime-type of an attachment
 95	Desc  string
 96}
 97
 98// Check performs some checks on a Honk to filter out what's not going to be posted.
 99//
100// It rejects honk if if falls into one of these categories:
101//   - it is posted before the telebonk started
102//   - it is replying to a honk that's not posted by telebonk (either a remote honk or old honk)
103//   - it is of unsupported type (not a regular honk, reply or bonk)
104//   - it contains a `#notg` tag.
105//   - it is empty
106func (h *Honk) Check() error {
107	log.Print("check: checking honk #", h.ID) // info
108
109	switch h.What {
110	case "honked", "bonked":
111		break
112	case "honked back":
113		hi, ok := honkMap[h.RID]
114		if !ok {
115			return fmt.Errorf("cannot reply to nonexisting telebonk")
116		}
117		h.ReplyToID = hi.MessID
118	default:
119		return fmt.Errorf("unsupported honk type: %s", h.What)
120	}
121
122	for _, ont := range h.Onts {
123		if strings.ToLower(ont) == "#notg" {
124			return fmt.Errorf("skipping #notg honk")
125		}
126	}
127
128	if h.Noise == emptyNoise && len(h.Donks) == 0 {
129		return fmt.Errorf("empty honk")
130	}
131	return nil
132}
133
134// Decide sets the Action of a Honk.
135//
136// It sets HonkIgnore to those honks that are: 1) old; 2) already sent and not edits.
137func (h *Honk) Decide() {
138	oldhonk, ok := honkMap[h.XID]
139	if ok {
140		if oldhonk.MessID == 0 || h.Date.Equal(oldhonk.Date) {
141			h.Action = HonkIgnore
142			h.save(oldhonk.MessID)
143			return
144		}
145		log.Print("decide: honk #", h.XID, " is to be edited")
146		h.Action = HonkEdit
147		h.MessID = oldhonk.MessID
148		return
149	}
150	if h.Date.Before(now) {
151		h.Action = HonkIgnore
152		h.save(0)
153		return
154	}
155	log.Print("decide: honk #", h.ID, " is to be sent")
156	h.Action = HonkSend
157}
158
159// save records a Honk to the honkMap
160func (h *Honk) save(messID int) {
161	h.MessID = messID
162	honkMap[h.XID] = h
163}
164
165// forget unchecks a Honk from the honkMap
166func (h *Honk) forget() {
167	oldhonk, ok := honkMap[h.XID]
168	if !ok {
169		return
170	}
171	oldhonk.MessID = 0
172	oldhonk.ReplyToID = 0
173	honkMap[oldhonk.XID] = oldhonk
174}
175
176// A Mess holds data for a message to be sent to Telegram.
177type Mess struct {
178	Text             string `json:"text"`
179	ChatID           string `json:"chat_id"`
180	ParseMode        string `json:"parse_mode,omitempty"`
181	MessageID        int    `json:"message_id,omitempty"`
182	ReplyToMessageID int    `json:"reply_to_message_id,omitempty"`
183
184	Document string `json:"document,omitempty"`
185	Photo    string `json:"photo,omitempty"`
186	Caption  string `json:"caption,omitempty"`
187
188	kind messKind
189}
190
191// A TelegramResponse is a response from Telegram API.
192type TelegramResponse struct {
193	Ok          bool
194	Description string
195	Result      struct {
196		MessageID int `json:"message_id"`
197	}
198}
199
200// NewMess creates and populates a new Mess with default values.
201func NewMess(parseMode string) *Mess {
202	return &Mess{
203		ParseMode: parseMode,
204		ChatID:    config.TgChatID,
205		kind:      messHonk,
206	}
207}
208
209// NewMessFromHonk creates a slice of Mess objects from existing Honk.
210func NewMessFromHonk(honk *Honk) []*Mess {
211	var truncateWith = "...\n\nfull honk: " + honk.XID // hardcoded == bad
212	// donks should be sent as a separate messages, so need to create all of 'em
213	// cap(messes) = 1 for honk + 1 for each donk
214	var messes = make([]*Mess, 0, 1+len(honk.Donks))
215
216	messes = append(messes, NewMess("html"))
217	for _, donk := range honk.Donks {
218		donkMess := NewMess("") // donks don't contain html
219		donkMess.Caption = TruncateNoise(donk.Desc, truncateWith, 1024)
220		switch {
221		case strings.HasPrefix(donk.Media, "image/"):
222			donkMess.Photo = donk.URL
223			donkMess.kind = messDonkPht
224		case donk.Media == "application/pdf", donk.Media == "text/plain":
225			donkMess.Document = donk.URL
226			donkMess.kind = messDonkDoc
227		}
228		messes = append(messes, donkMess)
229	}
230	if honk.Noise == emptyNoise {
231		messes = messes[1:] // just donks
232	}
233
234	if honk.Action == HonkEdit {
235		// TODO: implement editing documents and photos
236		messes[0].kind = messEdit
237		messes[0].MessageID = honk.MessID
238		messes = messes[:1] // don't donk if editing
239	}
240
241	var text = CalmNoise(honk.Noise)
242	text = TruncateNoise(text, truncateWith, 4096)
243	// bonk, then honk back - ok
244	// honk back, then bonk - not gonna sync, is it ok?
245	// upd: bonks work really confusing
246	switch honk.What {
247	case "honked":
248		break
249	case "honked back":
250		messes[0].ReplyToMessageID = honk.ReplyToID
251	case "bonked":
252		oonker := fmt.Sprintf("<a href=\"%s\">%s</a>:", honk.Oonker, honk.Oondle)
253		text = oonker + "\n" + text
254	}
255
256	// danger zone handling
257	if strings.HasPrefix(honk.Precis, "DZ:") {
258		text = strings.Join([]string{"<tg-spoiler>", "</tg-spoiler>"}, text)
259		text = honk.Precis + "\n" + text
260	}
261	messes[0].Text = text
262
263	return messes
264}
265
266// Send sends a Mess to Telegram.
267func (m *Mess) Send() (*TelegramResponse, error) {
268	var apiURL = botAPIMethod(tgSendMessage)
269
270	switch m.kind {
271	case messHonk:
272		// noop
273	case messEdit:
274		apiURL = botAPIMethod(tgEditMessageText)
275	case messDonkPht:
276		apiURL = botAPIMethod(tgSendPhoto)
277	case messDonkDoc:
278		apiURL = botAPIMethod(tgSendDocument)
279	}
280
281	junk, err := json.Marshal(m)
282	if err != nil {
283		return nil, err
284	}
285	buf := bytes.NewBuffer(junk)
286	req, err := http.NewRequest("POST", apiURL, buf)
287	if err != nil {
288		return nil, err
289	}
290	req.Header.Add("Content-type", "application/json")
291	req.Header.Add("Content-length", strconv.Itoa(buf.Len()))
292
293	resp, err := client.Do(req)
294	if err != nil {
295		return nil, err
296	}
297	defer resp.Body.Close()
298
299	var res TelegramResponse
300	json.NewDecoder(resp.Body).Decode(&res)
301	if !res.Ok {
302		return nil, fmt.Errorf("mess send: %s", res.Description)
303	}
304
305	return &res, nil
306}
307
308type messKind int
309
310const (
311	messHonk messKind = iota
312	messEdit
313	messDonkPht
314	messDonkDoc
315)
316
317func botAPIMethod(method string) string {
318	return fmt.Sprintf("%s/bot%s/%s", config.TgApiURL, config.TgBotToken, method)
319}
320
321func checkTgAPI() error {
322	var apiURL = botAPIMethod(tgGetMe)
323	resp, err := client.Get(apiURL)
324	if err != nil {
325		return err
326	}
327	if resp.StatusCode != 200 {
328		status, _ := io.ReadAll(resp.Body)
329		return fmt.Errorf("status: %d: %s", resp.StatusCode, status)
330	}
331	return nil
332}
333
334// Telegram Bot API methods
335const (
336	tgGetMe           = "getMe"
337	tgSendMessage     = "sendMessage"
338	tgEditMessageText = "editMessageText"
339	tgSendPhoto       = "sendPhoto"
340	tgSendDocument    = "sendDocument"
341)
342
343// getHonks receives and unmarshals some honks from a Honk instance.
344func getHonks(page string, after int) ([]*Honk, error) {
345	query := url.Values{}
346	query.Set("action", "gethonks")
347	query.Set("page", page)
348	query.Set("after", strconv.Itoa(after))
349	apiurl := config.HonkURL + "/api?" + query.Encode()
350
351	req, err := http.NewRequest("GET", apiurl, nil)
352	if err != nil {
353		return nil, err
354	}
355	req.Header.Add("Authorization", "Bearer " + config.HonkAuthToken)
356
357	resp, err := client.Do(req)
358	if err != nil {
359		return nil, err
360	}
361	defer resp.Body.Close()
362
363	// honk outputs junk like `{ "honks": [ ... ] }`, need to get into the list
364	var honkJunk map[string][]*Honk
365	err = json.NewDecoder(resp.Body).Decode(&honkJunk)
366	if err != nil {
367		// FIXME: honk tokens last for a week or so. when one expires, shouldn't this say something meaningful instead of `unexpected v in blah-blah'?
368		log.Print("gethonks: ", resp.Status)
369		return nil, err
370	}
371
372	honks := honkJunk["honks"]
373	// honk.ID monotonically increases, so it can be used to sort honks
374	sort.Slice(honks, func(i, j int) bool { return honks[i].ID < honks[j].ID })
375
376	return honks, nil
377}
378
379var (
380	rePTags   = regexp.MustCompile(`<\/?p>`)
381	reHlTags  = regexp.MustCompile(`<\/?span( class="[a-z]{2}")?>`)
382	reBrTags  = regexp.MustCompile(`<br>`)
383	reImgTags = regexp.MustCompile(`<img .*src="(.*)">`)
384	reUlTags  = regexp.MustCompile(`<\/?ul>`)
385	reTbTags  = regexp.MustCompile(`<table>.*</table>`)
386
387	reLiTags = regexp.MustCompile(`<li>([^<>\/]*)<\/li>`)
388	reHnTags = regexp.MustCompile(`<h[1-6]>(.*)</h[1-6]>`)
389	reHrTags = regexp.MustCompile(`<hr>.*<\/hr>`)
390	reBqTags = regexp.MustCompile(`<blockquote>(.*)<\/blockquote>`)
391)
392
393const emptyNoise = "<p></p>\n"
394
395// CalmNoise erases and rewrites html tags that are not supported by Telegram.
396func CalmNoise(s string) string {
397	// delete these
398	s = rePTags.ReplaceAllString(s, "")
399	s = reHlTags.ReplaceAllString(s, "")
400	s = reBrTags.ReplaceAllString(s, "")
401	s = reImgTags.ReplaceAllString(s, "")
402	s = reUlTags.ReplaceAllString(s, "")
403	s = reTbTags.ReplaceAllString(s, "")
404
405	// these can be repurposed
406	s = reHrTags.ReplaceAllString(s, "---\n")
407	s = reHnTags.ReplaceAllString(s, "<b>$1</b>\n\n")
408	s = reBqTags.ReplaceAllString(s, "| <i>$1</i>")
409	s = reLiTags.ReplaceAllString(s, "* $1\n")
410
411	return strings.TrimSpace(s)
412}
413
414// TruncateNoise truncates a string up to `length - len(with)` characters long and adds `with` to the end.
415func TruncateNoise(s, with string, length int) string {
416	// telegram can handle posts no longer than 4096 (or 1024) characters _after_ the parsing of entities.
417	// we could be clever and calculate the true length of text, but let's keep it simple and stupid.
418	if len(s) <= length {
419		return s
420	}
421
422	var b strings.Builder
423	b.Grow(length)
424	var end = length - 1 - len(with)
425	for i, r := range s {
426		if i >= end {
427			break
428		}
429		b.WriteRune(r)
430	}
431	b.WriteString(with)
432	return b.String()
433}
434
435func init() {
436	flag.StringVar(&config.TgBotToken, "bot_token", "", "Telegram bot token")
437	flag.StringVar(&config.TgChatID, "chat_id", "", "Telegram chat_id")
438	flag.StringVar(&config.TgApiURL, "tgapi_url", "https://api.telegram.org", "Telegram API URL")
439	flag.StringVar(&config.HonkAuthToken, "honk_token", "", "Honk auth token")
440	flag.StringVar(&config.HonkPage, "honk_page", "myhonks", "Page to get honks from. Should be one of [atme, longago, home, myhonks]")
441	flag.StringVar(&config.HonkURL, "honk_url", "", "URL of a Honk instance")
442
443	flag.Parse()
444
445	if err := config.Check(); err != nil {
446		log.Fatal("config:", err) // fail
447	}
448	config.TgApiURL = strings.TrimRight(config.TgApiURL, "/")
449	if err := checkTgAPI(); err != nil {
450		log.Fatal("tgAPI:", err) // fail
451	}
452}
453
454var client = http.DefaultClient
455var honkMap = make(map[string]*Honk) // FIXME: not safe for use by multiple goroutines!
456var now = time.Now()
457
458func main() {
459	var retry = 5
460
461	log.Print("starting telebonk") // info
462	for {
463		honks, err := getHonks(config.HonkPage, 0)
464		if err != nil {
465			log.Print("gethonks:", err) // error
466			retry--
467			if retry == 0 {
468				log.Fatal("gethonks: giving up") // fail
469			}
470			time.Sleep(5 * time.Second)
471			continue
472		}
473	HonkLoop:
474		for _, honk := range honks {
475			honk.Decide()
476			switch honk.Action {
477			case HonkIgnore:
478				continue
479			case HonkSend, HonkEdit:
480				if err := honk.Check(); err != nil {
481					log.Print("honk check:", err) // error
482					continue
483				}
484			}
485			messes := NewMessFromHonk(honk)
486			// messes[0] is a honk or a donk to be sent
487			resp, err := messes[0].Send()
488			if err != nil {
489				log.Print("mess send", err) // error
490				honk.forget()               // retry
491				continue
492			}
493			// remember only the first mess' response
494			honk.save(resp.Result.MessageID)
495			for _, mess := range messes[1:] {
496				if _, err := mess.Send(); err != nil {
497					log.Print("mess send", err) // error
498					continue HonkLoop
499				}
500			}
501		}
502		time.Sleep(30 * time.Second)
503	}
504}