From 767dc7f6954a66e187e5eefc36c21868048e04fc Mon Sep 17 00:00:00 2001 From: la-ninpre Date: Thu, 12 Nov 2020 22:38:08 +0300 Subject: add tests i've found some issues with formatted text, this could be seen in output (which is not included now). there are some complications with parsing newline characters, but i'll fix it later. also script is now usable like command-line application. you can specify the directory in which source files are located. to make some tests you need to run $ python parse.py tests this will create directory tests/formatted_posts, where markdown posts will be populated. if directory is not specified, it assumes that script is in the source folder. if needed json file could not be found, it exits with code 1. --- parse.py | 44 +++-- tests/files/test-sound.ogg | Bin 0 -> 9060 bytes tests/photos/photo_1@12-11-2020_12-57-31.jpg | Bin 0 -> 8469 bytes tests/result.json | 214 +++++++++++++++++++++ .../voice_messages/audio_1@12-11-2020_13-01-05.ogg | Bin 0 -> 19338 bytes 5 files changed, 243 insertions(+), 15 deletions(-) create mode 100644 tests/files/test-sound.ogg create mode 100644 tests/photos/photo_1@12-11-2020_12-57-31.jpg create mode 100644 tests/result.json create mode 100644 tests/voice_messages/audio_1@12-11-2020_13-01-05.ogg diff --git a/parse.py b/parse.py index 61edae9..098af81 100644 --- a/parse.py +++ b/parse.py @@ -13,7 +13,7 @@ # - forwarded posts import os -# import sys +import sys import json from datetime import datetime @@ -36,8 +36,8 @@ tag: {tag}\nlayout: post\n---\n'.format(\ def parse_post_photo(post, media_dir): post_photo_src = post['photo'][7:] - post_photo_src = media_dir + post_photo_src - post_photo = '![image](/assets/img/posts/{src})\n\n'.format(\ + post_photo_src = media_dir + '/' + post_photo_src + post_photo = '![image]({src})\n\n'.format(\ src=post_photo_src) return post_photo @@ -80,23 +80,26 @@ def parse_text_object(obj): elif obj_type == 'phone': return obj_text + # output = '*{str}*'.format(str=string.strip()) + # output += '\n' * string.count('\n') * string.endswith('\n') + elif obj_type == 'bold': - post_inline_bold = '**{text}**'.format(text=obj_text) + post_inline_bold = '**{text}**'.format(text=obj_text.strip()) return post_inline_bold elif obj_type == 'italic': - post_inline_italic = '*{text}*'.format(text=obj_text) + post_inline_italic = '*{text}*'.format(text=obj_text.strip()) return post_inline_italic elif obj_type == 'underline': - post_inline_underline = '{text}'.format(text=obj_text) + post_inline_underline = '{text}'.format(text=obj_text.strip()) return post_inline_underline elif obj_type == 'strikethrough': - post_inline_strike = '{text}'.format(text=obj_text) + post_inline_strike = '{text}'.format(text=obj_text.strip()) return post_inline_strike - elif obj_type == 'code': + elif obj_type == 'code' or obj_type == 'pre': post_inline_code = '```\n{text}\n```'.format(text=obj_text) return post_inline_code @@ -124,7 +127,7 @@ def parse_post_media(post, media_dir): post_media_src = post['file'][post['file'].rfind("/") + 1:] # add parent directory - post_media_src = media_dir + post_media_src + post_media_src = media_dir + '/' + post_media_src post_media = '\n'.format(src=post_media_src, mime_type=post['mime_type']) @@ -136,15 +139,15 @@ def parse_post(post): post_output = '' # optional image - photo_dir = '/assets/img/posts/' + photo_dir = '/photos' if 'photo' in post: post_output += str(parse_post_photo(post, photo_dir)) # post text - post_output += md_str(parse_post_text(post)) + post_output += str(parse_post_text(post)) # optional media - media_dir = '/assets/sound/posts/' + media_dir = '/files' if 'media_type' in post: post_output += str(parse_post_media(post, media_dir)) @@ -152,16 +155,27 @@ def parse_post(post): def main(): + # try directory from first argument + try: + input_dir = sys.argv[1] + except IndexError as e: + # if it's not specified, use current directory + input_dir = '.' + # create output directory - out_dir = './formatted_posts' + out_dir = input_dir + '/' + 'formatted_posts' try: os.mkdir(out_dir) except FileExistsError as e: pass # load json file - with open('result.json', 'r') as f: - data = json.load(f) + json_path = input_dir + '/' + 'result.json' + try: + with open(json_path, 'r') as f: + data = json.load(f) + except FileNotFoundError as e: + sys.exit('result.json not found.\nPlease, specify right directory') # load only messages raw_posts = data['messages'] diff --git a/tests/files/test-sound.ogg b/tests/files/test-sound.ogg new file mode 100644 index 0000000..c8391c6 Binary files /dev/null and b/tests/files/test-sound.ogg differ diff --git a/tests/photos/photo_1@12-11-2020_12-57-31.jpg b/tests/photos/photo_1@12-11-2020_12-57-31.jpg new file mode 100644 index 0000000..42965b4 Binary files /dev/null and b/tests/photos/photo_1@12-11-2020_12-57-31.jpg differ diff --git a/tests/result.json b/tests/result.json new file mode 100644 index 0000000..cd0281e --- /dev/null +++ b/tests/result.json @@ -0,0 +1,214 @@ +{ + "name": "test", + "type": "private_channel", + "id": 9882011936, + "messages": [ + { + "id": 1, + "type": "service", + "date": "2020-11-12T12:53:52", + "actor": "test", + "actor_id": 9882011936, + "action": "create_channel", + "title": "test", + "text": "" + }, + { + "id": 2, + "type": "message", + "date": "2020-11-12T12:54:07", + "from": "test", + "from_id": 9882011936, + "text": "test text post" + }, + { + "id": 3, + "type": "message", + "date": "2020-11-12T12:57:31", + "from": "test", + "from_id": 9882011936, + "photo": "photos/photo_1@12-11-2020_12-57-31.jpg", + "width": 801, + "height": 526, + "text": "" + }, + { + "id": 4, + "type": "message", + "date": "2020-11-12T12:57:40", + "from": "test", + "from_id": 9882011936, + "photo": "photos/photo_1@12-11-2020_12-57-31.jpg", + "width": 801, + "height": 526, + "text": "photo with text" + }, + { + "id": 5, + "type": "message", + "date": "2020-11-12T12:58:18", + "edited": "2020-11-12T13:03:00", + "from": "test", + "from_id": 9882011936, + "text": [ + { + "type": "italic", + "text": "italic\n\n" + }, + { + "type": "bold", + "text": "bold\n\n" + }, + { + "type": "underline", + "text": "underline\n\n" + }, + { + "type": "strikethrough", + "text": "strikethrough\n\n" + }, + { + "type": "pre", + "text": "monospace", + "language": "" + } + ] + }, + { + "id": 7, + "type": "message", + "date": "2020-11-12T13:01:05", + "from": "test", + "from_id": 9882011936, + "file": "voice_messages/audio_1@12-11-2020_13-01-05.ogg", + "media_type": "voice_message", + "mime_type": "audio/ogg", + "duration_seconds": 2, + "text": "" + }, + { + "id": 8, + "type": "message", + "date": "2020-11-12T13:02:35", + "from": "test", + "from_id": 9882011936, + "file": "files/test-sound.ogg", + "media_type": "audio_file", + "mime_type": "audio/x-vorbis+ogg", + "duration_seconds": 1, + "text": "" + }, + { + "id": 9, + "type": "message", + "date": "2020-11-12T13:43:23", + "from": "test", + "from_id": 9882011936, + "text": [ + { + "type": "italic", + "text": "italic" + }, + " in text\n\n", + { + "type": "italic", + "text": "italic on whole line" + } + ] + }, + { + "id": 10, + "type": "message", + "date": "2020-11-12T14:11:35", + "edited": "2020-11-12T14:18:17", + "from": "test", + "from_id": 9882011936, + "text": [ + { + "type": "bold", + "text": "bold" + }, + " in text\n", + { + "type": "bold", + "text": "bold below\n\n" + }, + "normal text\n\n", + { + "type": "bold", + "text": "bold with one line gap\n" + }, + "normal text" + ] + }, + { + "id": 11, + "type": "message", + "date": "2020-11-12T14:34:47", + "from": "test", + "from_id": 9882011936, + "text": [ + { + "type": "bold", + "text": "multiline\nbold\n\ntext" + } + ] + }, + { + "id": 13, + "type": "message", + "date": "2020-11-12T14:45:43", + "edited": "2020-11-12T14:45:55", + "from": "test", + "from_id": 9882011936, + "text": [ + { + "type": "code", + "text": "monospace with one baktick\n\n" + }, + { + "type": "pre", + "text": "multiline\nmonospace", + "language": "" + } + ] + }, + { + "id": 14, + "type": "message", + "date": "2020-11-12T14:48:48", + "edited": "2020-11-12T14:49:39", + "from": "test", + "from_id": 9882011936, + "text": [ + { + "type": "text_link", + "text": "text link\n\n", + "href": "http://example.com/" + }, + { + "type": "link", + "text": "example.com" + }, + "\n\n", + { + "type": "email", + "text": "example@example.com" + } + ] + }, + { + "id": 15, + "type": "message", + "date": "2020-11-12T15:05:32", + "from": "test", + "from_id": 9882011936, + "text": [ + { + "type": "italic", + "text": "bold italic" + } + ] + } + ] +} \ No newline at end of file diff --git a/tests/voice_messages/audio_1@12-11-2020_13-01-05.ogg b/tests/voice_messages/audio_1@12-11-2020_13-01-05.ogg new file mode 100644 index 0000000..ffa2fd0 Binary files /dev/null and b/tests/voice_messages/audio_1@12-11-2020_13-01-05.ogg differ -- cgit v1.2.3