aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--parse.py44
-rw-r--r--tests/files/test-sound.oggbin0 -> 9060 bytes
-rw-r--r--tests/photos/photo_1@12-11-2020_12-57-31.jpgbin0 -> 8469 bytes
-rw-r--r--tests/result.json214
-rw-r--r--tests/voice_messages/audio_1@12-11-2020_13-01-05.oggbin0 -> 19338 bytes
5 files changed, 243 insertions, 15 deletions
diff --git a/parse.py b/parse.py
index 61edae9..098af81 100644
--- a/parse.py
+++ b/parse.py
@@ -13,7 +13,7 @@
# - forwarded posts
import os
-# import sys
+import sys
import json
from datetime import datetime
@@ -36,8 +36,8 @@ tag: {tag}\nlayout: post\n---\n'.format(\
def parse_post_photo(post, media_dir):
post_photo_src = post['photo'][7:]
- post_photo_src = media_dir + post_photo_src
- post_photo = '![image](/assets/img/posts/{src})\n\n'.format(\
+ post_photo_src = media_dir + '/' + post_photo_src
+ post_photo = '![image]({src})\n\n'.format(\
src=post_photo_src)
return post_photo
@@ -80,23 +80,26 @@ def parse_text_object(obj):
elif obj_type == 'phone':
return obj_text
+ # output = '*{str}*'.format(str=string.strip())
+ # output += '\n' * string.count('\n') * string.endswith('\n')
+
elif obj_type == 'bold':
- post_inline_bold = '**{text}**'.format(text=obj_text)
+ post_inline_bold = '**{text}**'.format(text=obj_text.strip())
return post_inline_bold
elif obj_type == 'italic':
- post_inline_italic = '*{text}*'.format(text=obj_text)
+ post_inline_italic = '*{text}*'.format(text=obj_text.strip())
return post_inline_italic
elif obj_type == 'underline':
- post_inline_underline = '<u>{text}</u>'.format(text=obj_text)
+ post_inline_underline = '<u>{text}</u>'.format(text=obj_text.strip())
return post_inline_underline
elif obj_type == 'strikethrough':
- post_inline_strike = '<s>{text}</s>'.format(text=obj_text)
+ post_inline_strike = '<s>{text}</s>'.format(text=obj_text.strip())
return post_inline_strike
- elif obj_type == 'code':
+ elif obj_type == 'code' or obj_type == 'pre':
post_inline_code = '```\n{text}\n```'.format(text=obj_text)
return post_inline_code
@@ -124,7 +127,7 @@ def parse_post_media(post, media_dir):
post_media_src = post['file'][post['file'].rfind("/") + 1:]
# add parent directory
- post_media_src = media_dir + post_media_src
+ post_media_src = media_dir + '/' + post_media_src
post_media = '\n<audio controls>\n \
<source src="{src}" type="{mime_type}">\n \
</audio>'.format(src=post_media_src, mime_type=post['mime_type'])
@@ -136,15 +139,15 @@ def parse_post(post):
post_output = ''
# optional image
- photo_dir = '/assets/img/posts/'
+ photo_dir = '/photos'
if 'photo' in post:
post_output += str(parse_post_photo(post, photo_dir))
# post text
- post_output += md_str(parse_post_text(post))
+ post_output += str(parse_post_text(post))
# optional media
- media_dir = '/assets/sound/posts/'
+ media_dir = '/files'
if 'media_type' in post:
post_output += str(parse_post_media(post, media_dir))
@@ -152,16 +155,27 @@ def parse_post(post):
def main():
+ # try directory from first argument
+ try:
+ input_dir = sys.argv[1]
+ except IndexError as e:
+ # if it's not specified, use current directory
+ input_dir = '.'
+
# create output directory
- out_dir = './formatted_posts'
+ out_dir = input_dir + '/' + 'formatted_posts'
try:
os.mkdir(out_dir)
except FileExistsError as e:
pass
# load json file
- with open('result.json', 'r') as f:
- data = json.load(f)
+ json_path = input_dir + '/' + 'result.json'
+ try:
+ with open(json_path, 'r') as f:
+ data = json.load(f)
+ except FileNotFoundError as e:
+ sys.exit('result.json not found.\nPlease, specify right directory')
# load only messages
raw_posts = data['messages']
diff --git a/tests/files/test-sound.ogg b/tests/files/test-sound.ogg
new file mode 100644
index 0000000..c8391c6
--- /dev/null
+++ b/tests/files/test-sound.ogg
Binary files differ
diff --git a/tests/photos/photo_1@12-11-2020_12-57-31.jpg b/tests/photos/photo_1@12-11-2020_12-57-31.jpg
new file mode 100644
index 0000000..42965b4
--- /dev/null
+++ b/tests/photos/photo_1@12-11-2020_12-57-31.jpg
Binary files differ
diff --git a/tests/result.json b/tests/result.json
new file mode 100644
index 0000000..cd0281e
--- /dev/null
+++ b/tests/result.json
@@ -0,0 +1,214 @@
+{
+ "name": "test",
+ "type": "private_channel",
+ "id": 9882011936,
+ "messages": [
+ {
+ "id": 1,
+ "type": "service",
+ "date": "2020-11-12T12:53:52",
+ "actor": "test",
+ "actor_id": 9882011936,
+ "action": "create_channel",
+ "title": "test",
+ "text": ""
+ },
+ {
+ "id": 2,
+ "type": "message",
+ "date": "2020-11-12T12:54:07",
+ "from": "test",
+ "from_id": 9882011936,
+ "text": "test text post"
+ },
+ {
+ "id": 3,
+ "type": "message",
+ "date": "2020-11-12T12:57:31",
+ "from": "test",
+ "from_id": 9882011936,
+ "photo": "photos/photo_1@12-11-2020_12-57-31.jpg",
+ "width": 801,
+ "height": 526,
+ "text": ""
+ },
+ {
+ "id": 4,
+ "type": "message",
+ "date": "2020-11-12T12:57:40",
+ "from": "test",
+ "from_id": 9882011936,
+ "photo": "photos/photo_1@12-11-2020_12-57-31.jpg",
+ "width": 801,
+ "height": 526,
+ "text": "photo with text"
+ },
+ {
+ "id": 5,
+ "type": "message",
+ "date": "2020-11-12T12:58:18",
+ "edited": "2020-11-12T13:03:00",
+ "from": "test",
+ "from_id": 9882011936,
+ "text": [
+ {
+ "type": "italic",
+ "text": "italic\n\n"
+ },
+ {
+ "type": "bold",
+ "text": "bold\n\n"
+ },
+ {
+ "type": "underline",
+ "text": "underline\n\n"
+ },
+ {
+ "type": "strikethrough",
+ "text": "strikethrough\n\n"
+ },
+ {
+ "type": "pre",
+ "text": "monospace",
+ "language": ""
+ }
+ ]
+ },
+ {
+ "id": 7,
+ "type": "message",
+ "date": "2020-11-12T13:01:05",
+ "from": "test",
+ "from_id": 9882011936,
+ "file": "voice_messages/audio_1@12-11-2020_13-01-05.ogg",
+ "media_type": "voice_message",
+ "mime_type": "audio/ogg",
+ "duration_seconds": 2,
+ "text": ""
+ },
+ {
+ "id": 8,
+ "type": "message",
+ "date": "2020-11-12T13:02:35",
+ "from": "test",
+ "from_id": 9882011936,
+ "file": "files/test-sound.ogg",
+ "media_type": "audio_file",
+ "mime_type": "audio/x-vorbis+ogg",
+ "duration_seconds": 1,
+ "text": ""
+ },
+ {
+ "id": 9,
+ "type": "message",
+ "date": "2020-11-12T13:43:23",
+ "from": "test",
+ "from_id": 9882011936,
+ "text": [
+ {
+ "type": "italic",
+ "text": "italic"
+ },
+ " in text\n\n",
+ {
+ "type": "italic",
+ "text": "italic on whole line"
+ }
+ ]
+ },
+ {
+ "id": 10,
+ "type": "message",
+ "date": "2020-11-12T14:11:35",
+ "edited": "2020-11-12T14:18:17",
+ "from": "test",
+ "from_id": 9882011936,
+ "text": [
+ {
+ "type": "bold",
+ "text": "bold"
+ },
+ " in text\n",
+ {
+ "type": "bold",
+ "text": "bold below\n\n"
+ },
+ "normal text\n\n",
+ {
+ "type": "bold",
+ "text": "bold with one line gap\n"
+ },
+ "normal text"
+ ]
+ },
+ {
+ "id": 11,
+ "type": "message",
+ "date": "2020-11-12T14:34:47",
+ "from": "test",
+ "from_id": 9882011936,
+ "text": [
+ {
+ "type": "bold",
+ "text": "multiline\nbold\n\ntext"
+ }
+ ]
+ },
+ {
+ "id": 13,
+ "type": "message",
+ "date": "2020-11-12T14:45:43",
+ "edited": "2020-11-12T14:45:55",
+ "from": "test",
+ "from_id": 9882011936,
+ "text": [
+ {
+ "type": "code",
+ "text": "monospace with one baktick\n\n"
+ },
+ {
+ "type": "pre",
+ "text": "multiline\nmonospace",
+ "language": ""
+ }
+ ]
+ },
+ {
+ "id": 14,
+ "type": "message",
+ "date": "2020-11-12T14:48:48",
+ "edited": "2020-11-12T14:49:39",
+ "from": "test",
+ "from_id": 9882011936,
+ "text": [
+ {
+ "type": "text_link",
+ "text": "text link\n\n",
+ "href": "http://example.com/"
+ },
+ {
+ "type": "link",
+ "text": "example.com"
+ },
+ "\n\n",
+ {
+ "type": "email",
+ "text": "example@example.com"
+ }
+ ]
+ },
+ {
+ "id": 15,
+ "type": "message",
+ "date": "2020-11-12T15:05:32",
+ "from": "test",
+ "from_id": 9882011936,
+ "text": [
+ {
+ "type": "italic",
+ "text": "bold italic"
+ }
+ ]
+ }
+ ]
+} \ No newline at end of file
diff --git a/tests/voice_messages/audio_1@12-11-2020_13-01-05.ogg b/tests/voice_messages/audio_1@12-11-2020_13-01-05.ogg
new file mode 100644
index 0000000..ffa2fd0
--- /dev/null
+++ b/tests/voice_messages/audio_1@12-11-2020_13-01-05.ogg
Binary files differ