diff options
| author | la-ninpre <leobrekalini@gmail.com> | 2020-11-16 19:45:19 +0300 |
|---|---|---|
| committer | la-ninpre <leobrekalini@gmail.com> | 2020-11-16 19:45:19 +0300 |
| commit | 2d26d28c8f891bae27bf075b443b1e82185e4a37 (patch) | |
| tree | c8e136490e16f2388e5927f9a54738978b0987fb | |
| parent | 767dc7f6954a66e187e5eefc36c21868048e04fc (diff) | |
| download | tg2md-2d26d28c8f891bae27bf075b443b1e82185e4a37.tar.gz tg2md-2d26d28c8f891bae27bf075b443b1e82185e4a37.zip | |
fix formatted text parsing
earlier, situations when there were strings like 'bold\ntext\n' that
needed to be treated as bold text were converted in a wrong way (e.g.
like '**bold\ntext\n**', which is breaking markup because of newline
characters before closing asterisks), but now it's kind of fixed.
also there's now support for emails and <pre> blocks.
| -rw-r--r-- | parse.py | 80 | ||||
| -rw-r--r-- | tests/formatted_posts/2020-11-12-10.md | 14 | ||||
| -rw-r--r-- | tests/formatted_posts/2020-11-12-11.md | 11 | ||||
| -rw-r--r-- | tests/formatted_posts/2020-11-12-13.md | 11 | ||||
| -rw-r--r-- | tests/formatted_posts/2020-11-12-14.md | 12 | ||||
| -rw-r--r-- | tests/formatted_posts/2020-11-12-15.md | 8 | ||||
| -rw-r--r-- | tests/formatted_posts/2020-11-12-2.md | 8 | ||||
| -rw-r--r-- | tests/formatted_posts/2020-11-12-3.md | 10 | ||||
| -rw-r--r-- | tests/formatted_posts/2020-11-12-4.md | 10 | ||||
| -rw-r--r-- | tests/formatted_posts/2020-11-12-5.md | 16 | ||||
| -rw-r--r-- | tests/formatted_posts/2020-11-12-7.md | 11 | ||||
| -rw-r--r-- | tests/formatted_posts/2020-11-12-8.md | 11 | ||||
| -rw-r--r-- | tests/formatted_posts/2020-11-12-9.md | 10 |
13 files changed, 171 insertions, 41 deletions
@@ -11,29 +11,24 @@ # - replies # - single/muliple tags # - forwarded posts +# - custom post header import os import sys import json from datetime import datetime -# post: -# header -# [photo?] -# text -# [media?] - -# text: -# [str|list(str|obj, ...)] def print_post_header(post_title, post_date, post_tag): # TODO: handle post tag/tags + # TODO: support for custom header post_header = '---\ntitle: {title}\ndate: {date}\n\ tag: {tag}\nlayout: post\n---\n'.format(\ title=post_title, date=post_date, tag=post_tag) return post_header + def parse_post_photo(post, media_dir): post_photo_src = post['photo'][7:] post_photo_src = media_dir + '/' + post_photo_src @@ -42,22 +37,34 @@ def parse_post_photo(post, media_dir): return post_photo -def md_str(string): - string = string.replace('\n','\n\n') - string = string.replace('. ', '.\n') - return string +# def md_str(string): + # string = string.replace('\n','\n\n') + # string = string.replace('. ', '.\n') + # return string -def parse_text_object(obj): - ''' - Parse object from post text. - Objects are text links, plain links, underlined text, strikethrough text, - italic text, bold text, code blocks and hashtags. +def text_format(string, fmt): + if fmt in ('*', '**', '***', '`', '```'): + output = '{fmt}{txt}{fmt}' + elif fmt == '```': + output = '{fmt}\n{txt}\n{fmt}' + else: + output = '<{fmt}>{txt}</{fmt}>' + + output = output.format(fmt=fmt, txt=string.strip()) + output += '\n' * string.split('\n').count('') * string.endswith('\n') + return output + +def text_link_format(text, link): + link_fmt = '[{text}]({href})' + link_fmt = link_fmt.format(text=text.strip(), href=link) + link_fmt += '\n' * text.count('\n') * text.endswith('\n') + return link_fmt - This is a mess, but what is better? - ''' + +def parse_text_object(obj): obj_type = obj['type'] obj_text = obj['text'] @@ -67,41 +74,32 @@ def parse_text_object(obj): return post_tag elif obj_type == 'text_link': - post_text_link = '[{text}]({href})'.format(text=obj_text, \ - href=obj['href']) - return post_text_link + return text_link_format(obj_text, obj['href']) - elif obj_type == 'link': - post_link = '[link]({href})'.format(href=obj_text) + elif obj_type == 'link' or obj_type == 'email': + post_link = '<{href}>'.format(href=obj_text.strip()) return post_link - # I dunno how this appeared, but it seems like hyphenated numbers - # are treated as phone numbers, so return them as plain text. elif obj_type == 'phone': return obj_text - # output = '*{str}*'.format(str=string.strip()) - # output += '\n' * string.count('\n') * string.endswith('\n') + elif obj_type == 'italic': + return text_format(obj_text, '*') elif obj_type == 'bold': - post_inline_bold = '**{text}**'.format(text=obj_text.strip()) - return post_inline_bold + return text_format(obj_text, '**') - elif obj_type == 'italic': - post_inline_italic = '*{text}*'.format(text=obj_text.strip()) - return post_inline_italic + elif obj_type == 'code': + return text_format(obj_text, '`') + + elif obj_type == 'pre': + return text_format(obj_text, '```') elif obj_type == 'underline': - post_inline_underline = '<u>{text}</u>'.format(text=obj_text.strip()) - return post_inline_underline + return text_format(obj_text, 'u') elif obj_type == 'strikethrough': - post_inline_strike = '<s>{text}</s>'.format(text=obj_text.strip()) - return post_inline_strike - - elif obj_type == 'code' or obj_type == 'pre': - post_inline_code = '```\n{text}\n```'.format(text=obj_text) - return post_inline_code + return text_format(obj_text, 's') def parse_post_text(post): diff --git a/tests/formatted_posts/2020-11-12-10.md b/tests/formatted_posts/2020-11-12-10.md new file mode 100644 index 0000000..dab9ba4 --- /dev/null +++ b/tests/formatted_posts/2020-11-12-10.md @@ -0,0 +1,14 @@ +--- +title: 10 +date: 2020-11-12 14:11:35 +tag: None +layout: post +--- + +**bold** in text +**bold below** + +normal text + +**bold with one line gap** +normal text diff --git a/tests/formatted_posts/2020-11-12-11.md b/tests/formatted_posts/2020-11-12-11.md new file mode 100644 index 0000000..2e4b2c2 --- /dev/null +++ b/tests/formatted_posts/2020-11-12-11.md @@ -0,0 +1,11 @@ +--- +title: 11 +date: 2020-11-12 14:34:47 +tag: None +layout: post +--- + +**multiline +bold + +text** diff --git a/tests/formatted_posts/2020-11-12-13.md b/tests/formatted_posts/2020-11-12-13.md new file mode 100644 index 0000000..01fa3c6 --- /dev/null +++ b/tests/formatted_posts/2020-11-12-13.md @@ -0,0 +1,11 @@ +--- +title: 13 +date: 2020-11-12 14:45:43 +tag: None +layout: post +--- + +`monospace with one baktick` + +```multiline +monospace``` diff --git a/tests/formatted_posts/2020-11-12-14.md b/tests/formatted_posts/2020-11-12-14.md new file mode 100644 index 0000000..092be7d --- /dev/null +++ b/tests/formatted_posts/2020-11-12-14.md @@ -0,0 +1,12 @@ +--- +title: 14 +date: 2020-11-12 14:48:48 +tag: None +layout: post +--- + +[text link](http://example.com/) + +<example.com> + +<example@example.com> diff --git a/tests/formatted_posts/2020-11-12-15.md b/tests/formatted_posts/2020-11-12-15.md new file mode 100644 index 0000000..b8e65e0 --- /dev/null +++ b/tests/formatted_posts/2020-11-12-15.md @@ -0,0 +1,8 @@ +--- +title: 15 +date: 2020-11-12 15:05:32 +tag: None +layout: post +--- + +*bold italic* diff --git a/tests/formatted_posts/2020-11-12-2.md b/tests/formatted_posts/2020-11-12-2.md new file mode 100644 index 0000000..ded2627 --- /dev/null +++ b/tests/formatted_posts/2020-11-12-2.md @@ -0,0 +1,8 @@ +--- +title: 2 +date: 2020-11-12 12:54:07 +tag: None +layout: post +--- + +test text post diff --git a/tests/formatted_posts/2020-11-12-3.md b/tests/formatted_posts/2020-11-12-3.md new file mode 100644 index 0000000..f9bfe3d --- /dev/null +++ b/tests/formatted_posts/2020-11-12-3.md @@ -0,0 +1,10 @@ +--- +title: 3 +date: 2020-11-12 12:57:31 +tag: None +layout: post +--- + + + + diff --git a/tests/formatted_posts/2020-11-12-4.md b/tests/formatted_posts/2020-11-12-4.md new file mode 100644 index 0000000..8beb106 --- /dev/null +++ b/tests/formatted_posts/2020-11-12-4.md @@ -0,0 +1,10 @@ +--- +title: 4 +date: 2020-11-12 12:57:40 +tag: None +layout: post +--- + + + +photo with text diff --git a/tests/formatted_posts/2020-11-12-5.md b/tests/formatted_posts/2020-11-12-5.md new file mode 100644 index 0000000..13e734e --- /dev/null +++ b/tests/formatted_posts/2020-11-12-5.md @@ -0,0 +1,16 @@ +--- +title: 5 +date: 2020-11-12 12:58:18 +tag: None +layout: post +--- + +*italic* + +**bold** + +<u>underline</u> + +<s>strikethrough</s> + +```monospace``` diff --git a/tests/formatted_posts/2020-11-12-7.md b/tests/formatted_posts/2020-11-12-7.md new file mode 100644 index 0000000..5a9e6ec --- /dev/null +++ b/tests/formatted_posts/2020-11-12-7.md @@ -0,0 +1,11 @@ +--- +title: 7 +date: 2020-11-12 13:01:05 +tag: None +layout: post +--- + + +<audio controls> + <source src="/files/audio_1@12-11-2020_13-01-05.ogg" type="audio/ogg"> + </audio> diff --git a/tests/formatted_posts/2020-11-12-8.md b/tests/formatted_posts/2020-11-12-8.md new file mode 100644 index 0000000..0b30a03 --- /dev/null +++ b/tests/formatted_posts/2020-11-12-8.md @@ -0,0 +1,11 @@ +--- +title: 8 +date: 2020-11-12 13:02:35 +tag: None +layout: post +--- + + +<audio controls> + <source src="/files/test-sound.ogg" type="audio/x-vorbis+ogg"> + </audio> diff --git a/tests/formatted_posts/2020-11-12-9.md b/tests/formatted_posts/2020-11-12-9.md new file mode 100644 index 0000000..cbf1a9f --- /dev/null +++ b/tests/formatted_posts/2020-11-12-9.md @@ -0,0 +1,10 @@ +--- +title: 9 +date: 2020-11-12 13:43:23 +tag: None +layout: post +--- + +*italic* in text + +*italic on whole line* |
