From 2d26d28c8f891bae27bf075b443b1e82185e4a37 Mon Sep 17 00:00:00 2001 From: la-ninpre Date: Mon, 16 Nov 2020 19:45:19 +0300 Subject: fix formatted text parsing earlier, situations when there were strings like 'bold\ntext\n' that needed to be treated as bold text were converted in a wrong way (e.g. like '**bold\ntext\n**', which is breaking markup because of newline characters before closing asterisks), but now it's kind of fixed. also there's now support for emails and
 blocks.
---
 parse.py                               | 80 +++++++++++++++++-----------------
 tests/formatted_posts/2020-11-12-10.md | 14 ++++++
 tests/formatted_posts/2020-11-12-11.md | 11 +++++
 tests/formatted_posts/2020-11-12-13.md | 11 +++++
 tests/formatted_posts/2020-11-12-14.md | 12 +++++
 tests/formatted_posts/2020-11-12-15.md |  8 ++++
 tests/formatted_posts/2020-11-12-2.md  |  8 ++++
 tests/formatted_posts/2020-11-12-3.md  | 10 +++++
 tests/formatted_posts/2020-11-12-4.md  | 10 +++++
 tests/formatted_posts/2020-11-12-5.md  | 16 +++++++
 tests/formatted_posts/2020-11-12-7.md  | 11 +++++
 tests/formatted_posts/2020-11-12-8.md  | 11 +++++
 tests/formatted_posts/2020-11-12-9.md  | 10 +++++
 13 files changed, 171 insertions(+), 41 deletions(-)
 create mode 100644 tests/formatted_posts/2020-11-12-10.md
 create mode 100644 tests/formatted_posts/2020-11-12-11.md
 create mode 100644 tests/formatted_posts/2020-11-12-13.md
 create mode 100644 tests/formatted_posts/2020-11-12-14.md
 create mode 100644 tests/formatted_posts/2020-11-12-15.md
 create mode 100644 tests/formatted_posts/2020-11-12-2.md
 create mode 100644 tests/formatted_posts/2020-11-12-3.md
 create mode 100644 tests/formatted_posts/2020-11-12-4.md
 create mode 100644 tests/formatted_posts/2020-11-12-5.md
 create mode 100644 tests/formatted_posts/2020-11-12-7.md
 create mode 100644 tests/formatted_posts/2020-11-12-8.md
 create mode 100644 tests/formatted_posts/2020-11-12-9.md

diff --git a/parse.py b/parse.py
index 098af81..4a768d0 100644
--- a/parse.py
+++ b/parse.py
@@ -11,29 +11,24 @@
 # - replies
 # - single/muliple tags
 # - forwarded posts
+# - custom post header
 
 import os
 import sys
 import json
 from datetime import datetime
 
-# post:
-# header
-# [photo?]
-# text
-# [media?]
-
-# text:
-# [str|list(str|obj, ...)]
 
 def print_post_header(post_title, post_date, post_tag):
     # TODO: handle post tag/tags
+    # TODO: support for custom header
     post_header = '---\ntitle: {title}\ndate: {date}\n\
 tag: {tag}\nlayout: post\n---\n'.format(\
             title=post_title, date=post_date, tag=post_tag)
 
     return post_header
 
+
 def parse_post_photo(post, media_dir):
     post_photo_src = post['photo'][7:]
     post_photo_src = media_dir + '/' + post_photo_src
@@ -42,22 +37,34 @@ def parse_post_photo(post, media_dir):
 
     return post_photo
 
-def md_str(string):
-    string = string.replace('\n','\n\n')
-    string = string.replace('. ', '.\n')
 
-    return string
+# def md_str(string):
+    # string = string.replace('\n','\n\n')
+    # string = string.replace('. ', '.\n')
 
+    # return string
 
-def parse_text_object(obj):
-    '''
-    Parse object from post text.
 
-    Objects are text links, plain links, underlined text, strikethrough text,
-    italic text, bold text, code blocks and hashtags.
+def text_format(string, fmt):
+    if fmt in ('*', '**', '***', '`', '```'):
+        output = '{fmt}{txt}{fmt}'
+    elif fmt == '```':
+        output = '{fmt}\n{txt}\n{fmt}'
+    else:
+        output = '<{fmt}>{txt}'
+
+    output = output.format(fmt=fmt, txt=string.strip())
+    output += '\n' * string.split('\n').count('') * string.endswith('\n')
+    return output
+
+def text_link_format(text, link):
+    link_fmt = '[{text}]({href})'
+    link_fmt = link_fmt.format(text=text.strip(), href=link)
+    link_fmt += '\n' * text.count('\n') * text.endswith('\n')
+    return link_fmt
 
-    This is a mess, but what is better?
-    '''
+
+def parse_text_object(obj):
 
     obj_type = obj['type']
     obj_text = obj['text']
@@ -67,41 +74,32 @@ def parse_text_object(obj):
         return post_tag
 
     elif obj_type == 'text_link':
-        post_text_link = '[{text}]({href})'.format(text=obj_text, \
-                href=obj['href'])
-        return post_text_link
+        return text_link_format(obj_text, obj['href'])
 
-    elif obj_type == 'link':
-        post_link = '[link]({href})'.format(href=obj_text)
+    elif obj_type == 'link' or obj_type == 'email':
+        post_link = '<{href}>'.format(href=obj_text.strip())
         return post_link
 
-    # I dunno how this appeared, but it seems like hyphenated numbers
-    # are treated as phone numbers, so return them as plain text.
     elif obj_type == 'phone':
         return obj_text
 
-    # output = '*{str}*'.format(str=string.strip())
-    # output += '\n' * string.count('\n') * string.endswith('\n')
+    elif obj_type == 'italic':
+        return text_format(obj_text, '*')
 
     elif obj_type == 'bold':
-        post_inline_bold = '**{text}**'.format(text=obj_text.strip())
-        return post_inline_bold
+        return text_format(obj_text, '**')
 
-    elif obj_type == 'italic':
-        post_inline_italic = '*{text}*'.format(text=obj_text.strip())
-        return post_inline_italic
+    elif obj_type == 'code':
+        return text_format(obj_text, '`')
+
+    elif obj_type == 'pre':
+        return text_format(obj_text, '```')
 
     elif obj_type == 'underline':
-        post_inline_underline = '{text}'.format(text=obj_text.strip())
-        return post_inline_underline
+        return text_format(obj_text, 'u')
 
     elif obj_type == 'strikethrough':
-        post_inline_strike = '{text}'.format(text=obj_text.strip())
-        return post_inline_strike
-
-    elif obj_type == 'code' or obj_type == 'pre':
-        post_inline_code = '```\n{text}\n```'.format(text=obj_text)
-        return post_inline_code
+        return text_format(obj_text, 's')
 
 
 def parse_post_text(post):
diff --git a/tests/formatted_posts/2020-11-12-10.md b/tests/formatted_posts/2020-11-12-10.md
new file mode 100644
index 0000000..dab9ba4
--- /dev/null
+++ b/tests/formatted_posts/2020-11-12-10.md
@@ -0,0 +1,14 @@
+---
+title: 10
+date: 2020-11-12 14:11:35
+tag: None
+layout: post
+---
+
+**bold** in text
+**bold below**
+
+normal text
+
+**bold with one line gap**
+normal text
diff --git a/tests/formatted_posts/2020-11-12-11.md b/tests/formatted_posts/2020-11-12-11.md
new file mode 100644
index 0000000..2e4b2c2
--- /dev/null
+++ b/tests/formatted_posts/2020-11-12-11.md
@@ -0,0 +1,11 @@
+---
+title: 11
+date: 2020-11-12 14:34:47
+tag: None
+layout: post
+---
+
+**multiline
+bold
+
+text**
diff --git a/tests/formatted_posts/2020-11-12-13.md b/tests/formatted_posts/2020-11-12-13.md
new file mode 100644
index 0000000..01fa3c6
--- /dev/null
+++ b/tests/formatted_posts/2020-11-12-13.md
@@ -0,0 +1,11 @@
+---
+title: 13
+date: 2020-11-12 14:45:43
+tag: None
+layout: post
+---
+
+`monospace with one baktick`
+
+```multiline
+monospace```
diff --git a/tests/formatted_posts/2020-11-12-14.md b/tests/formatted_posts/2020-11-12-14.md
new file mode 100644
index 0000000..092be7d
--- /dev/null
+++ b/tests/formatted_posts/2020-11-12-14.md
@@ -0,0 +1,12 @@
+---
+title: 14
+date: 2020-11-12 14:48:48
+tag: None
+layout: post
+---
+
+[text link](http://example.com/)
+
+
+
+
diff --git a/tests/formatted_posts/2020-11-12-15.md b/tests/formatted_posts/2020-11-12-15.md
new file mode 100644
index 0000000..b8e65e0
--- /dev/null
+++ b/tests/formatted_posts/2020-11-12-15.md
@@ -0,0 +1,8 @@
+---
+title: 15
+date: 2020-11-12 15:05:32
+tag: None
+layout: post
+---
+
+*bold italic*
diff --git a/tests/formatted_posts/2020-11-12-2.md b/tests/formatted_posts/2020-11-12-2.md
new file mode 100644
index 0000000..ded2627
--- /dev/null
+++ b/tests/formatted_posts/2020-11-12-2.md
@@ -0,0 +1,8 @@
+---
+title: 2
+date: 2020-11-12 12:54:07
+tag: None
+layout: post
+---
+
+test text post
diff --git a/tests/formatted_posts/2020-11-12-3.md b/tests/formatted_posts/2020-11-12-3.md
new file mode 100644
index 0000000..f9bfe3d
--- /dev/null
+++ b/tests/formatted_posts/2020-11-12-3.md
@@ -0,0 +1,10 @@
+---
+title: 3
+date: 2020-11-12 12:57:31
+tag: None
+layout: post
+---
+
+![image](/photos/photo_1@12-11-2020_12-57-31.jpg)
+
+
diff --git a/tests/formatted_posts/2020-11-12-4.md b/tests/formatted_posts/2020-11-12-4.md
new file mode 100644
index 0000000..8beb106
--- /dev/null
+++ b/tests/formatted_posts/2020-11-12-4.md
@@ -0,0 +1,10 @@
+---
+title: 4
+date: 2020-11-12 12:57:40
+tag: None
+layout: post
+---
+
+![image](/photos/photo_1@12-11-2020_12-57-31.jpg)
+
+photo with text
diff --git a/tests/formatted_posts/2020-11-12-5.md b/tests/formatted_posts/2020-11-12-5.md
new file mode 100644
index 0000000..13e734e
--- /dev/null
+++ b/tests/formatted_posts/2020-11-12-5.md
@@ -0,0 +1,16 @@
+---
+title: 5
+date: 2020-11-12 12:58:18
+tag: None
+layout: post
+---
+
+*italic*
+
+**bold**
+
+underline
+
+strikethrough
+
+```monospace```
diff --git a/tests/formatted_posts/2020-11-12-7.md b/tests/formatted_posts/2020-11-12-7.md
new file mode 100644
index 0000000..5a9e6ec
--- /dev/null
+++ b/tests/formatted_posts/2020-11-12-7.md
@@ -0,0 +1,11 @@
+---
+title: 7
+date: 2020-11-12 13:01:05
+tag: None
+layout: post
+---
+
+
+
diff --git a/tests/formatted_posts/2020-11-12-8.md b/tests/formatted_posts/2020-11-12-8.md
new file mode 100644
index 0000000..0b30a03
--- /dev/null
+++ b/tests/formatted_posts/2020-11-12-8.md
@@ -0,0 +1,11 @@
+---
+title: 8
+date: 2020-11-12 13:02:35
+tag: None
+layout: post
+---
+
+
+
diff --git a/tests/formatted_posts/2020-11-12-9.md b/tests/formatted_posts/2020-11-12-9.md
new file mode 100644
index 0000000..cbf1a9f
--- /dev/null
+++ b/tests/formatted_posts/2020-11-12-9.md
@@ -0,0 +1,10 @@
+---
+title: 9
+date: 2020-11-12 13:43:23
+tag: None
+layout: post
+---
+
+*italic* in text
+
+*italic on whole line*
-- 
cgit v1.2.3