main.go (view raw)
1// Telebonk is a reposter from Honk to Telegram.
2package main
3
4import (
5 "bytes"
6 "encoding/json"
7 "flag"
8 "fmt"
9 "io"
10 "log"
11 "net/http"
12 "net/url"
13 "regexp"
14 "sort"
15 "strconv"
16 "strings"
17 "time"
18)
19
20// A Config is holding configuration of telebonk.
21type Config struct {
22 TgBotToken string
23 TgChatID string
24 TgApiURL string
25 HonkAuthToken string
26 HonkPage string
27 HonkURL string
28}
29
30// Check makes sure that no Config fields are set to empty strings.
31func (c *Config) Check() error {
32 var what string
33 if c.TgBotToken == "" {
34 what = "bot_token"
35 }
36 if c.TgChatID == "" {
37 what = "chat_id"
38 }
39 if c.TgApiURL == "" {
40 what = "tgapi_url"
41 }
42 if c.HonkAuthToken == "" {
43 what = "honk_token"
44 }
45 if c.HonkURL == "" {
46 what = "honk_url"
47 }
48 switch c.HonkPage {
49 case "atme", "longago", "home", "myhonks":
50 default:
51 return fmt.Errorf("bad page type: %s", c.HonkPage)
52 }
53 if what != "" {
54 return fmt.Errorf("'%s' shouldn't be empty", what)
55 }
56 return nil
57}
58
59var config = &Config{}
60
61// A Honk is a post from honk.
62type Honk struct {
63 ID int // unique id of a post
64 What string // type of an action (post, repost, reply)
65 Oondle string // e-mail style handle of the original author of a post
66 Oonker string // url of the original author of a post
67 XID string // url of a post, also unique
68 RID string // url of a post that current post is replying to
69 Date time.Time // datetime of a post
70 Precis string // post summary
71 Noise string // contents of a post
72 Onts []string // a slice of tags in a post
73 Donks []*Donk // a slice of attachments to a post
74
75 MessID int // telegram message_id
76 ReplyToID int // telegram message_id of a message to reply to
77
78 Action HonkAction
79}
80
81// A HonkAction tells what to do with the saved honk.
82type HonkAction int
83
84const (
85 HonkNotSet HonkAction = iota
86 HonkIgnore
87 HonkSend
88 HonkEdit
89)
90
91// A Donk stores metadata for media files.
92type Donk struct {
93 URL string
94 Media string // mime-type of an attachment
95 Desc string
96}
97
98// Check performs some checks on a Honk to filter out what's not going to be posted.
99//
100// It rejects honk if if falls into one of these categories:
101// - it is posted before the telebonk started
102// - it is replying to a honk that's not posted by telebonk (either a remote honk or old honk)
103// - it is of unsupported type (not a regular honk, reply or bonk)
104// - it contains a `#notg` tag.
105// - it is empty
106func (h *Honk) Check() error {
107 log.Print("check: checking honk #", h.ID) // info
108
109 switch h.What {
110 case "honked", "bonked":
111 break
112 case "honked back":
113 hi, ok := honkMap[h.RID]
114 if !ok {
115 return fmt.Errorf("cannot reply to nonexisting telebonk")
116 }
117 h.ReplyToID = hi.MessID
118 default:
119 return fmt.Errorf("unsupported honk type: %s", h.What)
120 }
121
122 for _, ont := range h.Onts {
123 if strings.ToLower(ont) == "#notg" {
124 return fmt.Errorf("skipping #notg honk")
125 }
126 }
127
128 if h.Noise == emptyNoise && len(h.Donks) == 0 {
129 return fmt.Errorf("empty honk")
130 }
131 return nil
132}
133
134// Decide sets the Action of a Honk.
135//
136// It sets HonkIgnore to those honks that are: 1) old; 2) already sent and not edits.
137func (h *Honk) Decide() {
138 oldhonk, ok := honkMap[h.XID]
139 if ok {
140 if oldhonk.MessID == 0 || h.Date.Equal(oldhonk.Date) {
141 h.Action = HonkIgnore
142 h.save(oldhonk.MessID)
143 return
144 }
145 log.Print("decide: honk #", h.XID, " is to be edited")
146 h.Action = HonkEdit
147 h.MessID = oldhonk.MessID
148 return
149 }
150 if h.Date.Before(now) {
151 h.Action = HonkIgnore
152 h.save(0)
153 return
154 }
155 log.Print("decide: honk #", h.ID, " is to be sent")
156 h.Action = HonkSend
157}
158
159// save records a Honk to the honkMap
160func (h *Honk) save(messID int) {
161 h.MessID = messID
162 honkMap[h.XID] = h
163}
164
165// forget unchecks a Honk from the honkMap
166func (h *Honk) forget() {
167 oldhonk, ok := honkMap[h.XID]
168 if !ok {
169 return
170 }
171 oldhonk.MessID = 0
172 oldhonk.ReplyToID = 0
173 honkMap[oldhonk.XID] = oldhonk
174}
175
176// A Mess holds data for a message to be sent to Telegram.
177type Mess struct {
178 Text string `json:"text"`
179 ChatID string `json:"chat_id"`
180 ParseMode string `json:"parse_mode,omitempty"`
181 MessageID int `json:"message_id,omitempty"`
182 ReplyToMessageID int `json:"reply_to_message_id,omitempty"`
183
184 Document string `json:"document,omitempty"`
185 Photo string `json:"photo,omitempty"`
186 Caption string `json:"caption,omitempty"`
187
188 kind messKind
189}
190
191// A TelegramResponse is a response from Telegram API.
192type TelegramResponse struct {
193 Ok bool
194 Description string
195 Result struct {
196 MessageID int `json:"message_id"`
197 }
198}
199
200// NewMess creates and populates a new Mess with default values.
201func NewMess(parseMode string) *Mess {
202 return &Mess{
203 ParseMode: parseMode,
204 ChatID: config.TgChatID,
205 kind: messHonk,
206 }
207}
208
209// NewMessFromHonk creates a slice of Mess objects from existing Honk.
210func NewMessFromHonk(honk *Honk) []*Mess {
211 var truncateWith = "...\n\nfull honk: " + honk.XID // hardcoded == bad
212 // donks should be sent as a separate messages, so need to create all of 'em
213 // cap(messes) = 1 for honk + 1 for each donk
214 var messes = make([]*Mess, 0, 1+len(honk.Donks))
215
216 messes = append(messes, NewMess("html"))
217 for _, donk := range honk.Donks {
218 donkMess := NewMess("") // donks don't contain html
219 donkMess.Caption = TruncateNoise(donk.Desc, truncateWith, 1024)
220 switch {
221 case strings.HasPrefix(donk.Media, "image/"):
222 donkMess.Photo = donk.URL
223 donkMess.kind = messDonkPht
224 case donk.Media == "application/pdf", donk.Media == "text/plain":
225 donkMess.Document = donk.URL
226 donkMess.kind = messDonkDoc
227 }
228 messes = append(messes, donkMess)
229 }
230 if honk.Noise == emptyNoise {
231 messes = messes[1:] // just donks
232 }
233
234 if honk.Action == HonkEdit {
235 // TODO: implement editing documents and photos
236 messes[0].kind = messEdit
237 messes[0].MessageID = honk.MessID
238 messes = messes[:1] // don't donk if editing
239 }
240
241 var text = CalmNoise(honk.Noise)
242 text = TruncateNoise(text, truncateWith, 4096)
243 // bonk, then honk back - ok
244 // honk back, then bonk - not gonna sync, is it ok?
245 // upd: bonks work really confusing
246 switch honk.What {
247 case "honked":
248 break
249 case "honked back":
250 messes[0].ReplyToMessageID = honk.ReplyToID
251 case "bonked":
252 oonker := fmt.Sprintf("<a href=\"%s\">%s</a>:", honk.Oonker, honk.Oondle)
253 text = oonker + "\n" + text
254 }
255
256 // danger zone handling
257 if strings.HasPrefix(honk.Precis, "DZ:") {
258 text = strings.Join([]string{"<tg-spoiler>", "</tg-spoiler>"}, text)
259 text = honk.Precis + "\n" + text
260 }
261 messes[0].Text = text
262
263 return messes
264}
265
266// Send sends a Mess to Telegram.
267func (m *Mess) Send() (*TelegramResponse, error) {
268 var apiURL = botAPIMethod(tgSendMessage)
269
270 switch m.kind {
271 case messHonk:
272 // noop
273 case messEdit:
274 apiURL = botAPIMethod(tgEditMessageText)
275 case messDonkPht:
276 apiURL = botAPIMethod(tgSendPhoto)
277 case messDonkDoc:
278 apiURL = botAPIMethod(tgSendDocument)
279 }
280
281 junk, err := json.Marshal(m)
282 if err != nil {
283 return nil, err
284 }
285 buf := bytes.NewBuffer(junk)
286 req, err := http.NewRequest("POST", apiURL, buf)
287 if err != nil {
288 return nil, err
289 }
290 req.Header.Add("Content-type", "application/json")
291 req.Header.Add("Content-length", strconv.Itoa(buf.Len()))
292
293 resp, err := client.Do(req)
294 if err != nil {
295 return nil, err
296 }
297 defer resp.Body.Close()
298
299 var res TelegramResponse
300 json.NewDecoder(resp.Body).Decode(&res)
301 if !res.Ok {
302 return nil, fmt.Errorf("mess send: %s", res.Description)
303 }
304
305 return &res, nil
306}
307
308type messKind int
309
310const (
311 messHonk messKind = iota
312 messEdit
313 messDonkPht
314 messDonkDoc
315)
316
317func botAPIMethod(method string) string {
318 return fmt.Sprintf("%s/bot%s/%s", config.TgApiURL, config.TgBotToken, method)
319}
320
321func checkTgAPI() error {
322 var apiURL = botAPIMethod(tgGetMe)
323 resp, err := client.Get(apiURL)
324 if err != nil {
325 return err
326 }
327 if resp.StatusCode != 200 {
328 status, _ := io.ReadAll(resp.Body)
329 return fmt.Errorf("status: %d: %s", resp.StatusCode, status)
330 }
331 return nil
332}
333
334// Telegram Bot API methods
335const (
336 tgGetMe = "getMe"
337 tgSendMessage = "sendMessage"
338 tgEditMessageText = "editMessageText"
339 tgSendPhoto = "sendPhoto"
340 tgSendDocument = "sendDocument"
341)
342
343// getHonks receives and unmarshals some honks from a Honk instance.
344func getHonks(page string, after int) ([]*Honk, error) {
345 query := url.Values{}
346 query.Set("action", "gethonks")
347 query.Set("page", page)
348 query.Set("after", strconv.Itoa(after))
349 apiurl := config.HonkURL + "/api?" + query.Encode()
350
351 req, err := http.NewRequest("GET", apiurl, nil)
352 if err != nil {
353 return nil, err
354 }
355 req.Header.Add("Authorization", "Bearer " + config.HonkAuthToken)
356
357 resp, err := client.Do(req)
358 if err != nil {
359 return nil, err
360 }
361 defer resp.Body.Close()
362
363 // honk outputs junk like `{ "honks": [ ... ] }`, need to get into the list
364 var honkJunk map[string][]*Honk
365 err = json.NewDecoder(resp.Body).Decode(&honkJunk)
366 if err != nil {
367 // FIXME: honk tokens last for a week or so. when one expires, shouldn't this say something meaningful instead of `unexpected v in blah-blah'?
368 log.Print("gethonks: ", resp.Status)
369 return nil, err
370 }
371
372 honks := honkJunk["honks"]
373 // honk.ID monotonically increases, so it can be used to sort honks
374 sort.Slice(honks, func(i, j int) bool { return honks[i].ID < honks[j].ID })
375
376 return honks, nil
377}
378
379var (
380 rePTags = regexp.MustCompile(`<\/?p>`)
381 reHlTags = regexp.MustCompile(`<\/?span( class="[a-z]{2}")?>`)
382 reBrTags = regexp.MustCompile(`<br>`)
383 reImgTags = regexp.MustCompile(`<img .*src="(.*)">`)
384 reUlTags = regexp.MustCompile(`<\/?ul>`)
385 reTbTags = regexp.MustCompile(`<table>.*</table>`)
386
387 reLiTags = regexp.MustCompile(`<li>([^<>\/]*)<\/li>`)
388 reHnTags = regexp.MustCompile(`<h[1-6]>(.*)</h[1-6]>`)
389 reHrTags = regexp.MustCompile(`<hr>.*<\/hr>`)
390 reBqTags = regexp.MustCompile(`<blockquote>(.*)<\/blockquote>`)
391)
392
393const emptyNoise = "<p></p>\n"
394
395// CalmNoise erases and rewrites html tags that are not supported by Telegram.
396func CalmNoise(s string) string {
397 // delete these
398 s = rePTags.ReplaceAllString(s, "")
399 s = reHlTags.ReplaceAllString(s, "")
400 s = reBrTags.ReplaceAllString(s, "")
401 s = reImgTags.ReplaceAllString(s, "")
402 s = reUlTags.ReplaceAllString(s, "")
403 s = reTbTags.ReplaceAllString(s, "")
404
405 // these can be repurposed
406 s = reHrTags.ReplaceAllString(s, "---\n")
407 s = reHnTags.ReplaceAllString(s, "<b>$1</b>\n\n")
408 s = reBqTags.ReplaceAllString(s, "| <i>$1</i>")
409 s = reLiTags.ReplaceAllString(s, "* $1\n")
410
411 return strings.TrimSpace(s)
412}
413
414// TruncateNoise truncates a string up to `length - len(with)` characters long and adds `with` to the end.
415func TruncateNoise(s, with string, length int) string {
416 // telegram can handle posts no longer than 4096 (or 1024) characters _after_ the parsing of entities.
417 // we could be clever and calculate the true length of text, but let's keep it simple and stupid.
418 if len(s) <= length {
419 return s
420 }
421
422 var b strings.Builder
423 b.Grow(length)
424 var end = length - 1 - len(with)
425 for i, r := range s {
426 if i >= end {
427 break
428 }
429 b.WriteRune(r)
430 }
431 b.WriteString(with)
432 return b.String()
433}
434
435func init() {
436 flag.StringVar(&config.TgBotToken, "bot_token", "", "Telegram bot token")
437 flag.StringVar(&config.TgChatID, "chat_id", "", "Telegram chat_id")
438 flag.StringVar(&config.TgApiURL, "tgapi_url", "https://api.telegram.org", "Telegram API URL")
439 flag.StringVar(&config.HonkAuthToken, "honk_token", "", "Honk auth token")
440 flag.StringVar(&config.HonkPage, "honk_page", "myhonks", "Page to get honks from. Should be one of [atme, longago, home, myhonks]")
441 flag.StringVar(&config.HonkURL, "honk_url", "", "URL of a Honk instance")
442
443 flag.Parse()
444
445 if err := config.Check(); err != nil {
446 log.Fatal("config:", err) // fail
447 }
448 config.TgApiURL = strings.TrimRight(config.TgApiURL, "/")
449 if err := checkTgAPI(); err != nil {
450 log.Fatal("tgAPI:", err) // fail
451 }
452}
453
454var client = http.DefaultClient
455var honkMap = make(map[string]*Honk) // FIXME: not safe for use by multiple goroutines!
456var now = time.Now()
457
458func main() {
459 var retry = 5
460
461 log.Print("starting telebonk") // info
462 for {
463 honks, err := getHonks(config.HonkPage, 0)
464 if err != nil {
465 log.Print("gethonks:", err) // error
466 retry--
467 if retry == 0 {
468 log.Fatal("gethonks: giving up") // fail
469 }
470 time.Sleep(5 * time.Second)
471 continue
472 }
473 HonkLoop:
474 for _, honk := range honks {
475 honk.Decide()
476 switch honk.Action {
477 case HonkIgnore:
478 continue
479 case HonkSend, HonkEdit:
480 if err := honk.Check(); err != nil {
481 log.Print("honk check:", err) // error
482 continue
483 }
484 }
485 messes := NewMessFromHonk(honk)
486 // messes[0] is a honk or a donk to be sent
487 resp, err := messes[0].Send()
488 if err != nil {
489 log.Print("mess send", err) // error
490 honk.forget() // retry
491 continue
492 }
493 // remember only the first mess' response
494 honk.save(resp.Result.MessageID)
495 for _, mess := range messes[1:] {
496 if _, err := mess.Send(); err != nil {
497 log.Print("mess send", err) // error
498 continue HonkLoop
499 }
500 }
501 }
502 time.Sleep(30 * time.Second)
503 }
504}