fix formatted text parsing earlier, situations when there were strings like 'bold\ntext\n' that needed to be treated as bold text were converted in a wrong way (e.g. like '**bold\ntext\n**', which is breaking markup because of newline characters before closing asterisks), but now it's kind of fixed. also there's now support for emails and <pre> blocks.
la-ninpre leobrekalini@gmail.com
Mon, 16 Nov 2020 19:45:19 +0300
13 files changed,
171 insertions(+),
41 deletions(-)
jump to
M
parse.py
→
parse.py
@@ -11,29 +11,24 @@ # TODO summary:
# - replies # - single/muliple tags # - forwarded posts +# - custom post header import os import sys import json from datetime import datetime -# post: -# header -# [photo?] -# text -# [media?] - -# text: -# [str|list(str|obj, ...)] def print_post_header(post_title, post_date, post_tag): # TODO: handle post tag/tags + # TODO: support for custom header post_header = '---\ntitle: {title}\ndate: {date}\n\ tag: {tag}\nlayout: post\n---\n'.format(\ title=post_title, date=post_date, tag=post_tag) return post_header + def parse_post_photo(post, media_dir): post_photo_src = post['photo'][7:] post_photo_src = media_dir + '/' + post_photo_src@@ -42,22 +37,34 @@ src=post_photo_src)
return post_photo -def md_str(string): - string = string.replace('\n','\n\n') - string = string.replace('. ', '.\n') - return string +# def md_str(string): + # string = string.replace('\n','\n\n') + # string = string.replace('. ', '.\n') + # return string -def parse_text_object(obj): - ''' - Parse object from post text. - Objects are text links, plain links, underlined text, strikethrough text, - italic text, bold text, code blocks and hashtags. +def text_format(string, fmt): + if fmt in ('*', '**', '***', '`', '```'): + output = '{fmt}{txt}{fmt}' + elif fmt == '```': + output = '{fmt}\n{txt}\n{fmt}' + else: + output = '<{fmt}>{txt}</{fmt}>' - This is a mess, but what is better? - ''' + output = output.format(fmt=fmt, txt=string.strip()) + output += '\n' * string.split('\n').count('') * string.endswith('\n') + return output + +def text_link_format(text, link): + link_fmt = '[{text}]({href})' + link_fmt = link_fmt.format(text=text.strip(), href=link) + link_fmt += '\n' * text.count('\n') * text.endswith('\n') + return link_fmt + + +def parse_text_object(obj): obj_type = obj['type'] obj_text = obj['text']@@ -67,41 +74,32 @@ post_tag = obj_text
return post_tag elif obj_type == 'text_link': - post_text_link = '[{text}]({href})'.format(text=obj_text, \ - href=obj['href']) - return post_text_link + return text_link_format(obj_text, obj['href']) - elif obj_type == 'link': - post_link = '[link]({href})'.format(href=obj_text) + elif obj_type == 'link' or obj_type == 'email': + post_link = '<{href}>'.format(href=obj_text.strip()) return post_link - # I dunno how this appeared, but it seems like hyphenated numbers - # are treated as phone numbers, so return them as plain text. elif obj_type == 'phone': return obj_text - # output = '*{str}*'.format(str=string.strip()) - # output += '\n' * string.count('\n') * string.endswith('\n') + elif obj_type == 'italic': + return text_format(obj_text, '*') elif obj_type == 'bold': - post_inline_bold = '**{text}**'.format(text=obj_text.strip()) - return post_inline_bold + return text_format(obj_text, '**') - elif obj_type == 'italic': - post_inline_italic = '*{text}*'.format(text=obj_text.strip()) - return post_inline_italic + elif obj_type == 'code': + return text_format(obj_text, '`') + + elif obj_type == 'pre': + return text_format(obj_text, '```') elif obj_type == 'underline': - post_inline_underline = '<u>{text}</u>'.format(text=obj_text.strip()) - return post_inline_underline + return text_format(obj_text, 'u') elif obj_type == 'strikethrough': - post_inline_strike = '<s>{text}</s>'.format(text=obj_text.strip()) - return post_inline_strike - - elif obj_type == 'code' or obj_type == 'pre': - post_inline_code = '```\n{text}\n```'.format(text=obj_text) - return post_inline_code + return text_format(obj_text, 's') def parse_post_text(post):
A
tests/formatted_posts/2020-11-12-10.md
@@ -0,0 +1,14 @@
+--- +title: 10 +date: 2020-11-12 14:11:35 +tag: None +layout: post +--- + +**bold** in text +**bold below** + +normal text + +**bold with one line gap** +normal text
A
tests/formatted_posts/2020-11-12-11.md
@@ -0,0 +1,11 @@
+--- +title: 11 +date: 2020-11-12 14:34:47 +tag: None +layout: post +--- + +**multiline +bold + +text**
A
tests/formatted_posts/2020-11-12-13.md
@@ -0,0 +1,11 @@
+--- +title: 13 +date: 2020-11-12 14:45:43 +tag: None +layout: post +--- + +`monospace with one baktick` + +```multiline +monospace```
A
tests/formatted_posts/2020-11-12-14.md
@@ -0,0 +1,12 @@
+--- +title: 14 +date: 2020-11-12 14:48:48 +tag: None +layout: post +--- + +[text link](http://example.com/) + +<example.com> + +<example@example.com>
A
tests/formatted_posts/2020-11-12-15.md
@@ -0,0 +1,8 @@
+--- +title: 15 +date: 2020-11-12 15:05:32 +tag: None +layout: post +--- + +*bold italic*
A
tests/formatted_posts/2020-11-12-2.md
@@ -0,0 +1,8 @@
+--- +title: 2 +date: 2020-11-12 12:54:07 +tag: None +layout: post +--- + +test text post
A
tests/formatted_posts/2020-11-12-3.md
@@ -0,0 +1,10 @@
+--- +title: 3 +date: 2020-11-12 12:57:31 +tag: None +layout: post +--- + + + +
A
tests/formatted_posts/2020-11-12-4.md
@@ -0,0 +1,10 @@
+--- +title: 4 +date: 2020-11-12 12:57:40 +tag: None +layout: post +--- + + + +photo with text
A
tests/formatted_posts/2020-11-12-5.md
@@ -0,0 +1,16 @@
+--- +title: 5 +date: 2020-11-12 12:58:18 +tag: None +layout: post +--- + +*italic* + +**bold** + +<u>underline</u> + +<s>strikethrough</s> + +```monospace```
A
tests/formatted_posts/2020-11-12-7.md
@@ -0,0 +1,11 @@
+--- +title: 7 +date: 2020-11-12 13:01:05 +tag: None +layout: post +--- + + +<audio controls> + <source src="/files/audio_1@12-11-2020_13-01-05.ogg" type="audio/ogg"> + </audio>
A
tests/formatted_posts/2020-11-12-8.md
@@ -0,0 +1,11 @@
+--- +title: 8 +date: 2020-11-12 13:02:35 +tag: None +layout: post +--- + + +<audio controls> + <source src="/files/test-sound.ogg" type="audio/x-vorbis+ogg"> + </audio>
A
tests/formatted_posts/2020-11-12-9.md
@@ -0,0 +1,10 @@
+--- +title: 9 +date: 2020-11-12 13:43:23 +tag: None +layout: post +--- + +*italic* in text + +*italic on whole line*