From 767dc7f6954a66e187e5eefc36c21868048e04fc Mon Sep 17 00:00:00 2001
From: la-ninpre <leobrekalini@gmail.com>
Date: Thu, 12 Nov 2020 22:38:08 +0300
Subject: add tests

i've found some issues with formatted text, this could be seen in output
(which is not included now). there are some complications with parsing
newline characters, but i'll fix it later.

also script is now usable like command-line application. you can specify
the directory in which source files are located.

to make some tests you need to run
$ python parse.py tests

this will create directory tests/formatted_posts, where markdown posts
will be populated.

if directory is not specified, it assumes that script is in the source
    folder. if needed json file could not be found, it exits with code
    1.
---
 parse.py                                           |  44 +++--
 tests/files/test-sound.ogg                         | Bin 0 -> 9060 bytes
 tests/photos/photo_1@12-11-2020_12-57-31.jpg       | Bin 0 -> 8469 bytes
 tests/result.json                                  | 214 +++++++++++++++++++++
 .../voice_messages/audio_1@12-11-2020_13-01-05.ogg | Bin 0 -> 19338 bytes
 5 files changed, 243 insertions(+), 15 deletions(-)
 create mode 100644 tests/files/test-sound.ogg
 create mode 100644 tests/photos/photo_1@12-11-2020_12-57-31.jpg
 create mode 100644 tests/result.json
 create mode 100644 tests/voice_messages/audio_1@12-11-2020_13-01-05.ogg
diff --git a/parse.py b/parse.py
index 61edae9..098af81 100644
--- a/parse.py
+++ b/parse.py
@@ -13,7 +13,7 @@
 # - forwarded posts
 
 import os
-# import sys
+import sys
 import json
 from datetime import datetime
 
@@ -36,8 +36,8 @@ tag: {tag}\nlayout: post\n---\n'.format(\
 
 def parse_post_photo(post, media_dir):
     post_photo_src = post['photo'][7:]
-    post_photo_src = media_dir + post_photo_src
-    post_photo = '![image](/assets/img/posts/{src})\n\n'.format(\
+    post_photo_src = media_dir + '/' + post_photo_src
+    post_photo = '![image]({src})\n\n'.format(\
             src=post_photo_src)
 
     return post_photo
@@ -80,23 +80,26 @@ def parse_text_object(obj):
     elif obj_type == 'phone':
         return obj_text
 
+    # output = '*{str}*'.format(str=string.strip())
+    # output += '\n' * string.count('\n') * string.endswith('\n')
+
     elif obj_type == 'bold':
-        post_inline_bold = '**{text}**'.format(text=obj_text)
+        post_inline_bold = '**{text}**'.format(text=obj_text.strip())
         return post_inline_bold
 
     elif obj_type == 'italic':
-        post_inline_italic = '*{text}*'.format(text=obj_text)
+        post_inline_italic = '*{text}*'.format(text=obj_text.strip())
         return post_inline_italic
 
     elif obj_type == 'underline':
-        post_inline_underline = '<u>{text}</u>'.format(text=obj_text)
+        post_inline_underline = '<u>{text}</u>'.format(text=obj_text.strip())
         return post_inline_underline
 
     elif obj_type == 'strikethrough':
-        post_inline_strike = '<s>{text}</s>'.format(text=obj_text)
+        post_inline_strike = '<s>{text}</s>'.format(text=obj_text.strip())
         return post_inline_strike
 
-    elif obj_type == 'code':
+    elif obj_type == 'code' or obj_type == 'pre':
         post_inline_code = '```\n{text}\n```'.format(text=obj_text)
         return post_inline_code
 
@@ -124,7 +127,7 @@ def parse_post_media(post, media_dir):
     post_media_src = post['file'][post['file'].rfind("/") + 1:]
 
     # add parent directory
-    post_media_src = media_dir + post_media_src
+    post_media_src = media_dir + '/' + post_media_src
     post_media = '\n<audio controls>\n \
         <source src="{src}" type="{mime_type}">\n \
         </audio>'.format(src=post_media_src, mime_type=post['mime_type'])
@@ -136,15 +139,15 @@ def parse_post(post):
     post_output = ''
     
     # optional image
-    photo_dir = '/assets/img/posts/'
+    photo_dir = '/photos'
     if 'photo' in post:
         post_output += str(parse_post_photo(post, photo_dir))
 
     # post text
-    post_output += md_str(parse_post_text(post))
+    post_output += str(parse_post_text(post))
 
     # optional media
-    media_dir = '/assets/sound/posts/'
+    media_dir = '/files'
     if 'media_type' in post:
         post_output += str(parse_post_media(post, media_dir))
 
@@ -152,16 +155,27 @@ def parse_post(post):
 
 
 def main():
+    # try directory from first argument
+    try:
+        input_dir = sys.argv[1]
+    except IndexError as e:
+        # if it's not specified, use current directory
+        input_dir = '.'
+
     # create output directory
-    out_dir = './formatted_posts'
+    out_dir = input_dir + '/' + 'formatted_posts'
     try:
         os.mkdir(out_dir)
     except FileExistsError as e:
         pass
 
     # load json file
-    with open('result.json', 'r') as f:
-        data = json.load(f)
+    json_path = input_dir + '/' + 'result.json'
+    try:
+        with open(json_path, 'r') as f:
+            data = json.load(f)
+    except FileNotFoundError as e:
+        sys.exit('result.json not found.\nPlease, specify right directory')
 
     # load only messages
     raw_posts = data['messages']
diff --git a/tests/files/test-sound.ogg b/tests/files/test-sound.ogg
new file mode 100644
index 0000000..c8391c6
Binary files /dev/null and b/tests/files/test-sound.ogg differ
diff --git a/tests/photos/photo_1@12-11-2020_12-57-31.jpg b/tests/photos/photo_1@12-11-2020_12-57-31.jpg
new file mode 100644
index 0000000..42965b4
Binary files /dev/null and b/tests/photos/photo_1@12-11-2020_12-57-31.jpg differ
diff --git a/tests/result.json b/tests/result.json
new file mode 100644
index 0000000..cd0281e
--- /dev/null
+++ b/tests/result.json
@@ -0,0 +1,214 @@
+{
+ "name": "test",
+ "type": "private_channel",
+ "id": 9882011936,
+ "messages": [
+  {
+   "id": 1,
+   "type": "service",
+   "date": "2020-11-12T12:53:52",
+   "actor": "test",
+   "actor_id": 9882011936,
+   "action": "create_channel",
+   "title": "test",
+   "text": ""
+  },
+  {
+   "id": 2,
+   "type": "message",
+   "date": "2020-11-12T12:54:07",
+   "from": "test",
+   "from_id": 9882011936,
+   "text": "test text post"
+  },
+  {
+   "id": 3,
+   "type": "message",
+   "date": "2020-11-12T12:57:31",
+   "from": "test",
+   "from_id": 9882011936,
+   "photo": "photos/photo_1@12-11-2020_12-57-31.jpg",
+   "width": 801,
+   "height": 526,
+   "text": ""
+  },
+  {
+   "id": 4,
+   "type": "message",
+   "date": "2020-11-12T12:57:40",
+   "from": "test",
+   "from_id": 9882011936,
+   "photo": "photos/photo_1@12-11-2020_12-57-31.jpg",
+   "width": 801,
+   "height": 526,
+   "text": "photo with text"
+  },
+  {
+   "id": 5,
+   "type": "message",
+   "date": "2020-11-12T12:58:18",
+   "edited": "2020-11-12T13:03:00",
+   "from": "test",
+   "from_id": 9882011936,
+   "text": [
+    {
+     "type": "italic",
+     "text": "italic\n\n"
+    },
+    {
+     "type": "bold",
+     "text": "bold\n\n"
+    },
+    {
+     "type": "underline",
+     "text": "underline\n\n"
+    },
+    {
+     "type": "strikethrough",
+     "text": "strikethrough\n\n"
+    },
+    {
+     "type": "pre",
+     "text": "monospace",
+     "language": ""
+    }
+   ]
+  },
+  {
+   "id": 7,
+   "type": "message",
+   "date": "2020-11-12T13:01:05",
+   "from": "test",
+   "from_id": 9882011936,
+   "file": "voice_messages/audio_1@12-11-2020_13-01-05.ogg",
+   "media_type": "voice_message",
+   "mime_type": "audio/ogg",
+   "duration_seconds": 2,
+   "text": ""
+  },
+  {
+   "id": 8,
+   "type": "message",
+   "date": "2020-11-12T13:02:35",
+   "from": "test",
+   "from_id": 9882011936,
+   "file": "files/test-sound.ogg",
+   "media_type": "audio_file",
+   "mime_type": "audio/x-vorbis+ogg",
+   "duration_seconds": 1,
+   "text": ""
+  },
+  {
+   "id": 9,
+   "type": "message",
+   "date": "2020-11-12T13:43:23",
+   "from": "test",
+   "from_id": 9882011936,
+   "text": [
+    {
+     "type": "italic",
+     "text": "italic"
+    },
+    " in text\n\n",
+    {
+     "type": "italic",
+     "text": "italic on whole line"
+    }
+   ]
+  },
+  {
+   "id": 10,
+   "type": "message",
+   "date": "2020-11-12T14:11:35",
+   "edited": "2020-11-12T14:18:17",
+   "from": "test",
+   "from_id": 9882011936,
+   "text": [
+    {
+     "type": "bold",
+     "text": "bold"
+    },
+    " in text\n",
+    {
+     "type": "bold",
+     "text": "bold below\n\n"
+    },
+    "normal text\n\n",
+    {
+     "type": "bold",
+     "text": "bold with one line gap\n"
+    },
+    "normal text"
+   ]
+  },
+  {
+   "id": 11,
+   "type": "message",
+   "date": "2020-11-12T14:34:47",
+   "from": "test",
+   "from_id": 9882011936,
+   "text": [
+    {
+     "type": "bold",
+     "text": "multiline\nbold\n\ntext"
+    }
+   ]
+  },
+  {
+   "id": 13,
+   "type": "message",
+   "date": "2020-11-12T14:45:43",
+   "edited": "2020-11-12T14:45:55",
+   "from": "test",
+   "from_id": 9882011936,
+   "text": [
+    {
+     "type": "code",
+     "text": "monospace with one baktick\n\n"
+    },
+    {
+     "type": "pre",
+     "text": "multiline\nmonospace",
+     "language": ""
+    }
+   ]
+  },
+  {
+   "id": 14,
+   "type": "message",
+   "date": "2020-11-12T14:48:48",
+   "edited": "2020-11-12T14:49:39",
+   "from": "test",
+   "from_id": 9882011936,
+   "text": [
+    {
+     "type": "text_link",
+     "text": "text link\n\n",
+     "href": "http://example.com/"
+    },
+    {
+     "type": "link",
+     "text": "example.com"
+    },
+    "\n\n",
+    {
+     "type": "email",
+     "text": "example@example.com"
+    }
+   ]
+  },
+  {
+   "id": 15,
+   "type": "message",
+   "date": "2020-11-12T15:05:32",
+   "from": "test",
+   "from_id": 9882011936,
+   "text": [
+    {
+     "type": "italic",
+     "text": "bold italic"
+    }
+   ]
+  }
+ ]
+}
\ No newline at end of file
diff --git a/tests/voice_messages/audio_1@12-11-2020_13-01-05.ogg b/tests/voice_messages/audio_1@12-11-2020_13-01-05.ogg
new file mode 100644
index 0000000..ffa2fd0
Binary files /dev/null and b/tests/voice_messages/audio_1@12-11-2020_13-01-05.ogg differ
-- 
cgit v1.2.3