main.go (view raw)
1// Telebonk is a reposter from Honk to Telegram.
2package main
3
4import (
5 "bytes"
6 "encoding/json"
7 "flag"
8 "fmt"
9 "io"
10 "log"
11 "net/http"
12 "net/url"
13 "regexp"
14 "sort"
15 "strconv"
16 "strings"
17 "time"
18
19 "git.aaoth.xyz/la-ninpre/telebonk/config"
20)
21
22// A Honk is a post from honk.
23type Honk struct {
24 ID int // unique id of a post
25 What string // type of an action (post, repost, reply)
26 Oondle string // e-mail style handle of the original author of a post
27 Oonker string // url of the original author of a post
28 XID string // url of a post, also unique
29 RID string // url of a post that current post is replying to
30 Date time.Time // datetime of a post
31 Precis string // post summary
32 Noise string // contents of a post
33 Onts []string // a slice of tags in a post
34 Donks []*Donk // a slice of attachments to a post
35
36 MessID int // telegram message_id
37 ReplyToID int // telegram message_id of a message to reply to
38
39 Action HonkAction
40}
41
42// A HonkAction tells what to do with the saved honk.
43type HonkAction int
44
45const (
46 HonkNotSet HonkAction = iota
47 HonkIgnore
48 HonkSend
49 HonkEdit
50)
51
52// A Donk stores metadata for media files.
53type Donk struct {
54 URL string
55 Media string // mime-type of an attachment
56 Desc string
57}
58
59// Check performs some checks on a Honk to filter out what's not going to be posted.
60//
61// It rejects honk if if falls into one of these categories:
62// - it is posted before the telebonk started
63// - it is replying to a honk that's not posted by telebonk (either a remote honk or old honk)
64// - it is of unsupported type (not a regular honk, reply or bonk)
65// - it contains a `#notg` tag.
66// - it is empty
67func (h *Honk) Check() error {
68 log.Print("check: checking honk #", h.ID) // info
69
70 switch h.What {
71 case "honked", "bonked":
72 break
73 case "honked back":
74 hi, ok := honkMap[h.RID]
75 if !ok {
76 return fmt.Errorf("cannot reply to nonexisting telebonk")
77 }
78 h.ReplyToID = hi.MessID
79 default:
80 return fmt.Errorf("unsupported honk type: %s", h.What)
81 }
82
83 for _, ont := range h.Onts {
84 if strings.ToLower(ont) == "#notg" {
85 return fmt.Errorf("skipping #notg honk")
86 }
87 }
88
89 if h.Noise == emptyNoise && len(h.Donks) == 0 {
90 return fmt.Errorf("empty honk")
91 }
92 return nil
93}
94
95// Decide sets the Action of a Honk.
96//
97// It sets HonkIgnore to those honks that are: 1) old; 2) already sent and aren't edits.
98func (h *Honk) Decide() {
99 oldhonk, ok := honkMap[h.XID]
100 if ok {
101 if oldhonk.MessID == 0 || h.Date.Equal(oldhonk.Date) {
102 h.Action = HonkIgnore
103 h.save(oldhonk.MessID)
104 return
105 }
106 log.Print("decide: honk #", h.XID, " is to be edited")
107 h.Action = HonkEdit
108 h.MessID = oldhonk.MessID
109 return
110 }
111 if h.Date.Before(now) {
112 h.Action = HonkIgnore
113 h.save(0)
114 return
115 }
116 log.Print("decide: honk #", h.ID, " is to be sent")
117 h.Action = HonkSend
118}
119
120// save records a Honk to the honkMap
121func (h *Honk) save(messID int) {
122 h.MessID = messID
123 honkMap[h.XID] = h
124}
125
126// forget unchecks a Honk from the honkMap
127func (h *Honk) forget() {
128 oldhonk, ok := honkMap[h.XID]
129 if !ok {
130 return
131 }
132 oldhonk.MessID = 0
133 oldhonk.ReplyToID = 0
134 honkMap[oldhonk.XID] = oldhonk
135}
136
137// A Mess holds data for a message to be sent to Telegram.
138type Mess struct {
139 Text string `json:"text"`
140 ChatID string `json:"chat_id"`
141 ParseMode string `json:"parse_mode,omitempty"`
142 MessageID int `json:"message_id,omitempty"`
143 ReplyToMessageID int `json:"reply_to_message_id,omitempty"`
144
145 Document string `json:"document,omitempty"`
146 Photo string `json:"photo,omitempty"`
147 Caption string `json:"caption,omitempty"`
148
149 kind messKind
150}
151
152// A TelegramResponse is a response from Telegram API.
153type TelegramResponse struct {
154 Ok bool
155 Description string
156 Result struct {
157 MessageID int `json:"message_id"`
158 }
159}
160
161// NewMess creates and populates a new Mess with default values.
162func NewMess(parseMode, chatID string) *Mess {
163 return &Mess{
164 ParseMode: parseMode,
165 ChatID: chatID,
166 kind: messHonk,
167 }
168}
169
170// NewMessFromHonk creates a slice of Mess objects from existing Honk.
171func NewMessFromHonk(honk *Honk) []*Mess {
172 var truncateWith = "...\n\nfull honk: " + honk.XID // hardcoded == bad
173 // donks should be sent as a separate messages, so need to create all of 'em
174 // cap(messes) = 1 for honk + 1 for each donk
175 var messes = make([]*Mess, 0, 1+len(honk.Donks))
176
177 messes = append(messes, NewMess("html"))
178 for _, donk := range honk.Donks {
179 donkMess := NewMess("") // donks don't contain html
180 donkMess.Caption = TruncateNoise(donk.Desc, truncateWith, 1024)
181 switch {
182 case strings.HasPrefix(donk.Media, "image/"):
183 donkMess.Photo = donk.URL
184 donkMess.kind = messDonkPht
185 case donk.Media == "application/pdf", donk.Media == "text/plain":
186 donkMess.Document = donk.URL
187 donkMess.kind = messDonkDoc
188 }
189 messes = append(messes, donkMess)
190 }
191 if honk.Noise == emptyNoise {
192 messes = messes[1:] // just donks
193 }
194
195 if honk.Action == HonkEdit {
196 // TODO: implement editing documents and photos
197 messes[0].kind = messEdit
198 messes[0].MessageID = honk.MessID
199 messes = messes[:1] // don't donk if editing
200 }
201
202 var text = CalmNoise(honk.Noise)
203 text = TruncateNoise(text, truncateWith, 4096)
204 // bonk, then honk back - ok
205 // honk back, then bonk - not gonna sync, is it ok?
206 // upd: bonks work really confusing
207 switch honk.What {
208 case "honked":
209 break
210 case "honked back":
211 messes[0].ReplyToMessageID = honk.ReplyToID
212 case "bonked":
213 oonker := fmt.Sprintf("<a href=\"%s\">%s</a>:", honk.Oonker, honk.Oondle)
214 text = oonker + "\n" + text
215 }
216
217 // danger zone handling
218 if strings.HasPrefix(honk.Precis, "DZ:") {
219 text = strings.Join([]string{"<tg-spoiler>", "</tg-spoiler>"}, text)
220 text = honk.Precis + "\n" + text
221 }
222 messes[0].Text = text
223
224 return messes
225}
226
227// Send sends a Mess to Telegram.
228func (m *Mess) Send() (*TelegramResponse, error) {
229 var apiURL = botAPIMethod(tgSendMessage)
230
231 switch m.kind {
232 case messHonk:
233 // noop
234 case messEdit:
235 apiURL = botAPIMethod(tgEditMessageText)
236 case messDonkPht:
237 apiURL = botAPIMethod(tgSendPhoto)
238 case messDonkDoc:
239 apiURL = botAPIMethod(tgSendDocument)
240 }
241
242 junk, err := json.Marshal(m)
243 if err != nil {
244 return nil, err
245 }
246 buf := bytes.NewBuffer(junk)
247 req, err := http.NewRequest("POST", apiURL, buf)
248 if err != nil {
249 return nil, err
250 }
251 req.Header.Add("Content-type", "application/json")
252 req.Header.Add("Content-length", strconv.Itoa(buf.Len()))
253
254 resp, err := client.Do(req)
255 if err != nil {
256 return nil, err
257 }
258 defer resp.Body.Close()
259
260 var res TelegramResponse
261 json.NewDecoder(resp.Body).Decode(&res)
262 if !res.Ok {
263 return nil, fmt.Errorf("mess send: %s", res.Description)
264 }
265
266 return &res, nil
267}
268
269type messKind int
270
271const (
272 messHonk messKind = iota
273 messEdit
274 messDonkPht
275 messDonkDoc
276)
277
278// XXX: figure out how to retreive these args without passing config object thousand times
279func botAPIMethod(url, token, method string) string {
280 return fmt.Sprintf("%s/bot%s/%s", url, token, method)
281}
282
283func checkTgAPI() error {
284 var apiURL = botAPIMethod(tgGetMe)
285 resp, err := client.Get(apiURL)
286 if err != nil {
287 return err
288 }
289 if resp.StatusCode != 200 {
290 status, _ := io.ReadAll(resp.Body)
291 return fmt.Errorf("status: %d: %s", resp.StatusCode, status)
292 }
293 return nil
294}
295
296// Telegram Bot API methods
297const (
298 tgGetMe = "getMe"
299 tgSendMessage = "sendMessage"
300 tgEditMessageText = "editMessageText"
301 tgSendPhoto = "sendPhoto"
302 tgSendDocument = "sendDocument"
303)
304
305// getHonks receives and unmarshals some honks from a Honk instance.
306func getHonks(from, page, token string, after int) ([]*Honk, error) {
307 query := url.Values{}
308 query.Set("action", "gethonks")
309 query.Set("page", page)
310 query.Set("after", strconv.Itoa(after))
311 apiurl := from + "/api?" + query.Encode()
312
313 req, err := http.NewRequest("GET", apiurl, nil)
314 if err != nil {
315 return nil, err
316 }
317 req.Header.Add("Authorization", "Bearer "+token)
318
319 resp, err := client.Do(req)
320 if err != nil {
321 return nil, err
322 }
323 defer resp.Body.Close()
324
325 // honk outputs junk like `{ "honks": [ ... ] }`, need to get into the list
326 var honkJunk map[string][]*Honk
327 err = json.NewDecoder(resp.Body).Decode(&honkJunk)
328 if err != nil {
329 // XXX: honk tokens last for a week or so. when one expires, shouldn't this say something meaningful instead of `unexpected v in blah-blah'?
330 log.Print("gethonks: ", resp.Status)
331 return nil, err
332 }
333
334 honks := honkJunk["honks"]
335 // honk.ID monotonically increases, so it can be used to sort honks
336 sort.Slice(honks, func(i, j int) bool { return honks[i].ID < honks[j].ID })
337
338 return honks, nil
339}
340
341var (
342 rePTags = regexp.MustCompile(`<\/?p>`)
343 reHlTags = regexp.MustCompile(`<\/?span( class="[a-z]{2}")?>`)
344 reBrTags = regexp.MustCompile(`<br>`)
345 reImgTags = regexp.MustCompile(`<img .*src="(.*)">`)
346 reUlTags = regexp.MustCompile(`<\/?ul>`)
347 reTbTags = regexp.MustCompile(`<table>.*</table>`)
348
349 reLiTags = regexp.MustCompile(`<li>([^<>\/]*)<\/li>`)
350 reHnTags = regexp.MustCompile(`<h[1-6]>(.*)</h[1-6]>`)
351 reHrTags = regexp.MustCompile(`<hr>.*<\/hr>`)
352 reBqTags = regexp.MustCompile(`<blockquote>(.*)<\/blockquote>`)
353)
354
355const emptyNoise = "<p></p>\n"
356
357// CalmNoise erases and rewrites html tags that are not supported by Telegram.
358func CalmNoise(s string) string {
359 // delete these
360 s = rePTags.ReplaceAllString(s, "")
361 s = reHlTags.ReplaceAllString(s, "")
362 s = reBrTags.ReplaceAllString(s, "")
363 s = reImgTags.ReplaceAllString(s, "")
364 s = reUlTags.ReplaceAllString(s, "")
365 s = reTbTags.ReplaceAllString(s, "")
366
367 // these can be repurposed
368 s = reHrTags.ReplaceAllString(s, "---\n")
369 s = reHnTags.ReplaceAllString(s, "<b>$1</b>\n\n")
370 s = reBqTags.ReplaceAllString(s, "| <i>$1</i>")
371 s = reLiTags.ReplaceAllString(s, "* $1\n")
372
373 return strings.TrimSpace(s)
374}
375
376// TruncateNoise truncates a string up to `length - len(with)` characters long and adds `with` to the end.
377func TruncateNoise(s, with string, length int) string {
378 // telegram can handle posts no longer than 4096 (or 1024) characters _after_ the parsing of entities.
379 // we could be clever and calculate the true length of text, but let's keep it simple and stupid.
380 if len(s) <= length {
381 return s
382 }
383
384 var b strings.Builder
385 b.Grow(length)
386 var end = length - 1 - len(with)
387 for i, r := range s {
388 if i >= end {
389 break
390 }
391 b.WriteRune(r)
392 }
393 b.WriteString(with)
394 return b.String()
395}
396
397// XXX: global and mutable
398var conf = &config.Config{}
399
400func init() {
401 flag.StringVar(&conf.TgBotToken, "bot_token", "", "Telegram bot token")
402 flag.StringVar(&conf.TgChatID, "chat_id", "", "Telegram chat_id")
403 flag.StringVar(&conf.TgApiURL, "tgapi_url", "https://api.telegram.org", "Telegram API URL")
404 flag.StringVar(&conf.HonkAuthToken, "honk_token", "", "Honk auth token")
405 flag.StringVar(&conf.HonkPage, "honk_page", "myhonks", "Page to get honks from. Should be one of [atme, longago, home, myhonks]")
406 flag.StringVar(&conf.HonkURL, "honk_url", "", "URL of a Honk instance")
407
408 flag.Parse()
409
410 if err := conf.Check(); err != nil {
411 log.Fatal("conf:", err) // fail
412 }
413 conf.TgApiURL = strings.TrimRight(conf.TgApiURL, "/")
414 if err := checkTgAPI(); err != nil {
415 log.Fatal("tgAPI:", err) // fail
416 }
417}
418
419var client = http.DefaultClient
420var honkMap = make(map[string]*Honk) // XXX: not safe for use by multiple goroutines!
421var now = time.Now()
422
423func main() {
424 var retry = 5
425
426 log.Print("starting telebonk") // info
427 for {
428 honks, err := getHonks(conf.HonkURL, conf.HonkPage, conf.HonkAuthToken, 0)
429 if err != nil {
430 log.Print("gethonks:", err) // error
431 retry--
432 if retry == 0 {
433 log.Fatal("gethonks: giving up") // fail
434 }
435 time.Sleep(5 * time.Second)
436 continue
437 }
438 HonkLoop:
439 for _, honk := range honks {
440 honk.Decide()
441 switch honk.Action {
442 case HonkIgnore:
443 continue
444 case HonkSend, HonkEdit:
445 if err := honk.Check(); err != nil {
446 log.Print("honk check:", err) // error
447 continue
448 }
449 }
450 messes := NewMessFromHonk(honk)
451 // messes[0] is a honk or a donk to be sent
452 resp, err := messes[0].Send()
453 if err != nil {
454 log.Print("mess send", err) // error
455 honk.forget() // retry
456 continue
457 }
458 // remember only the first mess' response
459 honk.save(resp.Result.MessageID)
460 for _, mess := range messes[1:] {
461 if _, err := mess.Send(); err != nil {
462 log.Print("mess send", err) // error
463 continue HonkLoop
464 }
465 }
466 }
467 time.Sleep(30 * time.Second)
468 }
469}