diff options
| -rw-r--r-- | parse.py | 44 | ||||
| -rw-r--r-- | tests/files/test-sound.ogg | bin | 0 -> 9060 bytes | |||
| -rw-r--r-- | tests/photos/photo_1@12-11-2020_12-57-31.jpg | bin | 0 -> 8469 bytes | |||
| -rw-r--r-- | tests/result.json | 214 | ||||
| -rw-r--r-- | tests/voice_messages/audio_1@12-11-2020_13-01-05.ogg | bin | 0 -> 19338 bytes |
5 files changed, 243 insertions, 15 deletions
@@ -13,7 +13,7 @@ # - forwarded posts import os -# import sys +import sys import json from datetime import datetime @@ -36,8 +36,8 @@ tag: {tag}\nlayout: post\n---\n'.format(\ def parse_post_photo(post, media_dir): post_photo_src = post['photo'][7:] - post_photo_src = media_dir + post_photo_src - post_photo = '\n\n'.format(\ + post_photo_src = media_dir + '/' + post_photo_src + post_photo = '\n\n'.format(\ src=post_photo_src) return post_photo @@ -80,23 +80,26 @@ def parse_text_object(obj): elif obj_type == 'phone': return obj_text + # output = '*{str}*'.format(str=string.strip()) + # output += '\n' * string.count('\n') * string.endswith('\n') + elif obj_type == 'bold': - post_inline_bold = '**{text}**'.format(text=obj_text) + post_inline_bold = '**{text}**'.format(text=obj_text.strip()) return post_inline_bold elif obj_type == 'italic': - post_inline_italic = '*{text}*'.format(text=obj_text) + post_inline_italic = '*{text}*'.format(text=obj_text.strip()) return post_inline_italic elif obj_type == 'underline': - post_inline_underline = '<u>{text}</u>'.format(text=obj_text) + post_inline_underline = '<u>{text}</u>'.format(text=obj_text.strip()) return post_inline_underline elif obj_type == 'strikethrough': - post_inline_strike = '<s>{text}</s>'.format(text=obj_text) + post_inline_strike = '<s>{text}</s>'.format(text=obj_text.strip()) return post_inline_strike - elif obj_type == 'code': + elif obj_type == 'code' or obj_type == 'pre': post_inline_code = '```\n{text}\n```'.format(text=obj_text) return post_inline_code @@ -124,7 +127,7 @@ def parse_post_media(post, media_dir): post_media_src = post['file'][post['file'].rfind("/") + 1:] # add parent directory - post_media_src = media_dir + post_media_src + post_media_src = media_dir + '/' + post_media_src post_media = '\n<audio controls>\n \ <source src="{src}" type="{mime_type}">\n \ </audio>'.format(src=post_media_src, mime_type=post['mime_type']) @@ -136,15 +139,15 @@ def parse_post(post): post_output = '' # optional image - photo_dir = '/assets/img/posts/' + photo_dir = '/photos' if 'photo' in post: post_output += str(parse_post_photo(post, photo_dir)) # post text - post_output += md_str(parse_post_text(post)) + post_output += str(parse_post_text(post)) # optional media - media_dir = '/assets/sound/posts/' + media_dir = '/files' if 'media_type' in post: post_output += str(parse_post_media(post, media_dir)) @@ -152,16 +155,27 @@ def parse_post(post): def main(): + # try directory from first argument + try: + input_dir = sys.argv[1] + except IndexError as e: + # if it's not specified, use current directory + input_dir = '.' + # create output directory - out_dir = './formatted_posts' + out_dir = input_dir + '/' + 'formatted_posts' try: os.mkdir(out_dir) except FileExistsError as e: pass # load json file - with open('result.json', 'r') as f: - data = json.load(f) + json_path = input_dir + '/' + 'result.json' + try: + with open(json_path, 'r') as f: + data = json.load(f) + except FileNotFoundError as e: + sys.exit('result.json not found.\nPlease, specify right directory') # load only messages raw_posts = data['messages'] diff --git a/tests/files/test-sound.ogg b/tests/files/test-sound.ogg Binary files differnew file mode 100644 index 0000000..c8391c6 --- /dev/null +++ b/tests/files/test-sound.ogg diff --git a/tests/photos/photo_1@12-11-2020_12-57-31.jpg b/tests/photos/photo_1@12-11-2020_12-57-31.jpg Binary files differnew file mode 100644 index 0000000..42965b4 --- /dev/null +++ b/tests/photos/photo_1@12-11-2020_12-57-31.jpg diff --git a/tests/result.json b/tests/result.json new file mode 100644 index 0000000..cd0281e --- /dev/null +++ b/tests/result.json @@ -0,0 +1,214 @@ +{ + "name": "test", + "type": "private_channel", + "id": 9882011936, + "messages": [ + { + "id": 1, + "type": "service", + "date": "2020-11-12T12:53:52", + "actor": "test", + "actor_id": 9882011936, + "action": "create_channel", + "title": "test", + "text": "" + }, + { + "id": 2, + "type": "message", + "date": "2020-11-12T12:54:07", + "from": "test", + "from_id": 9882011936, + "text": "test text post" + }, + { + "id": 3, + "type": "message", + "date": "2020-11-12T12:57:31", + "from": "test", + "from_id": 9882011936, + "photo": "photos/photo_1@12-11-2020_12-57-31.jpg", + "width": 801, + "height": 526, + "text": "" + }, + { + "id": 4, + "type": "message", + "date": "2020-11-12T12:57:40", + "from": "test", + "from_id": 9882011936, + "photo": "photos/photo_1@12-11-2020_12-57-31.jpg", + "width": 801, + "height": 526, + "text": "photo with text" + }, + { + "id": 5, + "type": "message", + "date": "2020-11-12T12:58:18", + "edited": "2020-11-12T13:03:00", + "from": "test", + "from_id": 9882011936, + "text": [ + { + "type": "italic", + "text": "italic\n\n" + }, + { + "type": "bold", + "text": "bold\n\n" + }, + { + "type": "underline", + "text": "underline\n\n" + }, + { + "type": "strikethrough", + "text": "strikethrough\n\n" + }, + { + "type": "pre", + "text": "monospace", + "language": "" + } + ] + }, + { + "id": 7, + "type": "message", + "date": "2020-11-12T13:01:05", + "from": "test", + "from_id": 9882011936, + "file": "voice_messages/audio_1@12-11-2020_13-01-05.ogg", + "media_type": "voice_message", + "mime_type": "audio/ogg", + "duration_seconds": 2, + "text": "" + }, + { + "id": 8, + "type": "message", + "date": "2020-11-12T13:02:35", + "from": "test", + "from_id": 9882011936, + "file": "files/test-sound.ogg", + "media_type": "audio_file", + "mime_type": "audio/x-vorbis+ogg", + "duration_seconds": 1, + "text": "" + }, + { + "id": 9, + "type": "message", + "date": "2020-11-12T13:43:23", + "from": "test", + "from_id": 9882011936, + "text": [ + { + "type": "italic", + "text": "italic" + }, + " in text\n\n", + { + "type": "italic", + "text": "italic on whole line" + } + ] + }, + { + "id": 10, + "type": "message", + "date": "2020-11-12T14:11:35", + "edited": "2020-11-12T14:18:17", + "from": "test", + "from_id": 9882011936, + "text": [ + { + "type": "bold", + "text": "bold" + }, + " in text\n", + { + "type": "bold", + "text": "bold below\n\n" + }, + "normal text\n\n", + { + "type": "bold", + "text": "bold with one line gap\n" + }, + "normal text" + ] + }, + { + "id": 11, + "type": "message", + "date": "2020-11-12T14:34:47", + "from": "test", + "from_id": 9882011936, + "text": [ + { + "type": "bold", + "text": "multiline\nbold\n\ntext" + } + ] + }, + { + "id": 13, + "type": "message", + "date": "2020-11-12T14:45:43", + "edited": "2020-11-12T14:45:55", + "from": "test", + "from_id": 9882011936, + "text": [ + { + "type": "code", + "text": "monospace with one baktick\n\n" + }, + { + "type": "pre", + "text": "multiline\nmonospace", + "language": "" + } + ] + }, + { + "id": 14, + "type": "message", + "date": "2020-11-12T14:48:48", + "edited": "2020-11-12T14:49:39", + "from": "test", + "from_id": 9882011936, + "text": [ + { + "type": "text_link", + "text": "text link\n\n", + "href": "http://example.com/" + }, + { + "type": "link", + "text": "example.com" + }, + "\n\n", + { + "type": "email", + "text": "example@example.com" + } + ] + }, + { + "id": 15, + "type": "message", + "date": "2020-11-12T15:05:32", + "from": "test", + "from_id": 9882011936, + "text": [ + { + "type": "italic", + "text": "bold italic" + } + ] + } + ] +}
\ No newline at end of file diff --git a/tests/voice_messages/audio_1@12-11-2020_13-01-05.ogg b/tests/voice_messages/audio_1@12-11-2020_13-01-05.ogg Binary files differnew file mode 100644 index 0000000..ffa2fd0 --- /dev/null +++ b/tests/voice_messages/audio_1@12-11-2020_13-01-05.ogg |
