From 856df0ec5823ae12682fe4c4bcd4c15a062be1dc Mon Sep 17 00:00:00 2001 From: la-ninpre Date: Wed, 14 Jul 2021 19:35:12 +0300 Subject: add support for other file types (to be improved) currently images that were sent as files are supported, but their files land in `files` directory. text file and additional image used for test are stored as symlinks now, because i don't want them to consume too much space. --- tests/files/LICENSE.md | 1 + tests/files/dice.svg | 603 +++++++++++++++++++++++++++ tests/files/dice.svg_thumb.jpg | Bin 0 -> 8124 bytes tests/photos/photo_3@14-07-2021_18-52-46.jpg | 1 + tests/result.json | 57 +++ tg2md.py | 67 ++- 6 files changed, 718 insertions(+), 11 deletions(-) create mode 120000 tests/files/LICENSE.md create mode 100644 tests/files/dice.svg create mode 100644 tests/files/dice.svg_thumb.jpg create mode 120000 tests/photos/photo_3@14-07-2021_18-52-46.jpg diff --git a/tests/files/LICENSE.md b/tests/files/LICENSE.md new file mode 120000 index 0000000..f0608a6 --- /dev/null +++ b/tests/files/LICENSE.md @@ -0,0 +1 @@ +../../LICENSE.md \ No newline at end of file diff --git a/tests/files/dice.svg b/tests/files/dice.svg new file mode 100644 index 0000000..07a9b09 --- /dev/null +++ b/tests/files/dice.svg @@ -0,0 +1,603 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/files/dice.svg_thumb.jpg b/tests/files/dice.svg_thumb.jpg new file mode 100644 index 0000000..bf2a611 Binary files /dev/null and b/tests/files/dice.svg_thumb.jpg differ diff --git a/tests/photos/photo_3@14-07-2021_18-52-46.jpg b/tests/photos/photo_3@14-07-2021_18-52-46.jpg new file mode 120000 index 0000000..d8a69f8 --- /dev/null +++ b/tests/photos/photo_3@14-07-2021_18-52-46.jpg @@ -0,0 +1 @@ +photo_1@12-11-2020_12-57-31.jpg \ No newline at end of file diff --git a/tests/result.json b/tests/result.json index 30e65f8..8853cda 100644 --- a/tests/result.json +++ b/tests/result.json @@ -217,6 +217,63 @@ "from": "test", "from_id": "channel1292077344", "text": "тестирование utf-8." + }, + { + "id": 22, + "type": "message", + "date": "2021-07-14T18:40:10", + "edited": "2021-07-14T18:40:24", + "from": "test", + "from_id": "channel1292077344", + "file": "files/dice.svg", + "thumbnail": "files/dice.svg_thumb.jpg", + "mime_type": "image/svg+xml", + "width": 622, + "height": 406, + "text": "test file other than sound" + }, + { + "id": 23, + "type": "message", + "date": "2021-07-14T18:52:46", + "from": "test", + "from_id": "channel1292077344", + "photo": "photos/photo_3@14-07-2021_18-52-46.jpg", + "width": 801, + "height": 526, + "text": "multiple photos" + }, + { + "id": 24, + "type": "message", + "date": "2021-07-14T18:52:46", + "from": "test", + "from_id": "channel1292077344", + "photo": "photos/photo_3@14-07-2021_18-52-46.jpg", + "width": 801, + "height": 526, + "text": "" + }, + { + "id": 25, + "type": "message", + "date": "2021-07-14T18:52:46", + "from": "test", + "from_id": "channel1292077344", + "photo": "photos/photo_3@14-07-2021_18-52-46.jpg", + "width": 801, + "height": 526, + "text": "" + }, + { + "id": 26, + "type": "message", + "date": "2021-07-14T19:20:23", + "from": "test", + "from_id": "channel1292077344", + "file": "files/LICENSE.md", + "mime_type": "text/markdown", + "text": "test text file" } ] } \ No newline at end of file diff --git a/tg2md.py b/tg2md.py index 706ee85..85c7c1a 100644 --- a/tg2md.py +++ b/tg2md.py @@ -8,14 +8,15 @@ # - single/muliple tags # - forwarded posts # - custom post header +# - multiple photos in one post import os import argparse import json from datetime import datetime -def print_default_post_header(post_title, post_date, post_tag): +def print_default_post_header(post_title, post_date, post_tag): ''' returns default post header @@ -59,6 +60,28 @@ def parse_post_photo(post, photo_dir): return post_photo +def parse_post_photo_as_file(post, media_dir): + + ''' + converts file tag with thumbnail to image and a link + ''' + + # links to files are currently broken, because these files are + # going to `files` directory, not `photos`. + # need to track down any files with thumbnails and then to move them + # to a photos directory. + post_photo_file_src = os.path.basename(post['file']) + post_photo_file_src = os.path.join(media_dir, post_photo_file_src) + post_photo_thumbnail_src = os.path.basename(post['thumbnail']) + post_photo_thumbnail_src = os.path.join(media_dir, + post_photo_thumbnail_src) + + post_photo_as_file = '![image]({thumb})\n[full size]({file})\n\n'\ + .format(thumb=post_photo_thumbnail_src, file=post_photo_file_src) + + return post_photo_as_file + + def text_format(string, fmt): ''' @@ -159,21 +182,37 @@ def parse_post_text(post): def parse_post_media(post, media_dir): ''' - wraps file links into html tags + wraps media files into html tags ''' - # get filename without parent directory - post_media_src = os.path.basename(post['file']) + post_media_file = os.path.basename(post['file']) + post_media_ext = post_media_file.split(".")[-1] + post_media_src = os.path.join(media_dir, post_media_file) - # add parent directory - post_media_src = os.path.join(media_dir, post_media_src) - post_media = '\n'.format(src=post_media_src, mime_type=post['mime_type']) + # audiofiles can be presented as audioplayers and other media types + # could be left as just links to them + # ??? + post_media = '\n'.format(src=post_media_src, mime_type=post['mime_type']) return post_media +def parse_post_file(post, media_dir): + + ''' + wrap files into link tags + ''' + + post_file_src = os.path.basename(post['file']) + post_file_ext = post_file_src.split('.')[-1] + post_file_name = post_file_src.removesuffix('.' + post_file_ext) + + post_file = f'\n\n[{post_file_name}]({post_file_src})\n\n' + + return post_file + def parse_post(post, photo_dir, media_dir): ''' @@ -183,15 +222,21 @@ def parse_post(post, photo_dir, media_dir): post_output = '' # optional image + # TODO: handle multiple photos in one post (maybe by comparing timestamps) if 'photo' in post: post_output += str(parse_post_photo(post, photo_dir)) + if all(['file' in post, 'thumbnail' in post]): + post_output += str(parse_post_photo_as_file(post, media_dir)) + # post text post_output += str(parse_post_text(post)) # optional media if 'media_type' in post: post_output += str(parse_post_media(post, media_dir)) + elif 'file' in post and not 'thumbnail' in post: + post_output += str(parse_post_file(post, media_dir)) return post_output @@ -256,8 +301,8 @@ def main(): post_path = os.path.join(args.out_dir, post_filename) with open(post_path, 'w', encoding='utf-8') as f: - print(print_default_post_header( - post_id, post_date, None), file=f) + print(print_default_post_header(post_id, post_date, None), + file=f) print(parse_post(post, args.photo_dir, args.media_dir), file=f) -- cgit v1.2.3