aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorla-ninpre <leobrekalini@gmail.com>2020-12-10 20:09:50 +0300
committerla-ninpre <leobrekalini@gmail.com>2020-12-10 20:09:50 +0300
commit4c57d96e545b73e0423566f22695179f58341abf (patch)
tree191d0875e8ab2655a9f325040b836fd50cab81a9
parent63d6cfec84bfa750604734e269d3c1c9e3c1fc04 (diff)
downloadtg2md-4c57d96e545b73e0423566f22695179f58341abf.tar.gz
tg2md-4c57d96e545b73e0423566f22695179f58341abf.zip
add argparse
and do some refactoring
-rw-r--r--tg2md.py156
1 files changed, 111 insertions, 45 deletions
diff --git a/tg2md.py b/tg2md.py
index ca4ef35..90e219d 100644
--- a/tg2md.py
+++ b/tg2md.py
@@ -11,37 +11,61 @@
import os
import sys
+import argparse
import json
from datetime import datetime
-def print_post_header(post_title, post_date, post_tag):
+def print_default_post_header(post_title, post_date, post_tag):
+
+ '''
+ returns default post header
+ '''
+
# TODO: handle post tag/tags
# TODO: support for custom header
- post_header = '---\ntitle: {title}\ndate: {date}\n\
-tag: {tag}\nlayout: post\n---\n'.format(\
- title=post_title, date=post_date, tag=post_tag)
+ post_header = '---\n'\
+ 'title: {title}\n'\
+ 'date: {date}\n'\
+ 'tags: {tag}\n'\
+ 'layout: post\n'\
+ '---\n'.format(title=post_title, date=post_date, tag=post_tag)
return post_header
-def parse_post_photo(post, media_dir):
- post_photo_src = post['photo'][7:]
- post_photo_src = media_dir + '/' + post_photo_src
- post_photo = '![image]({src})\n\n'.format(\
- src=post_photo_src)
+def print_custom_post_header(post_header_file, *args):
+
+ '''
+ now unusable (i dunno how it may work)
+ '''
+
+ with post_header_file as f:
+ post_header_content = read(post_header_file)
+ for arg in args:
+ pass
+ return post_header_content
- return post_photo
+def parse_post_photo(post, photo_dir):
-# def md_str(string):
- # string = string.replace('\n','\n\n')
- # string = string.replace('. ', '.\n')
+ '''
+ converts photo tag to markdown image link
+ '''
- # return string
+ post_photo_src = os.path.basename(post['photo'])
+ post_photo_src = os.path.join(photo_dir, post_photo_src)
+ post_photo = '![image]({src})\n\n'.format(src=post_photo_src)
+
+ return post_photo
def text_format(string, fmt):
+
+ '''
+ wraps string in markdown-styled formatting
+ '''
+
if fmt in ('*', '**', '***', '`', '```'):
output = '{fmt}{txt}{fmt}'
elif fmt == '```':
@@ -53,7 +77,13 @@ def text_format(string, fmt):
output += '\n' * string.split('\n').count('') * string.endswith('\n')
return output
+
def text_link_format(text, link):
+
+ '''
+ formats links
+ '''
+
# convert telegram links to anchors
# this implies that telegram links are pointing to the same channel
if link.startswith('https://t.me/c/'):
@@ -66,6 +96,10 @@ def text_link_format(text, link):
def parse_text_object(obj):
+ '''
+ detects type of text object and wraps it in corresponding formatting
+ '''
+
obj_type = obj['type']
obj_text = obj['text']
@@ -79,7 +113,7 @@ def parse_text_object(obj):
elif obj_type == 'link' or obj_type == 'email':
link = obj_text.strip()
link = 'https://' * (obj_type == 'link') * \
- (1 - link.startswith('https://')) + link
+ (1 - link.startswith('https://')) + link
post_link = '<{href}>'.format(href=link)
return post_link
@@ -124,23 +158,32 @@ def parse_post_text(post):
def parse_post_media(post, media_dir):
+
+ '''
+ wraps file links into html tags
+ '''
+
# get filename without parent directory
- post_media_src = post['file'][post['file'].rfind("/") + 1:]
+ post_media_src = os.path.basename(post['file'])
# add parent directory
- post_media_src = media_dir + '/' + post_media_src
+ post_media_src = os.path.join(media_dir, post_media_src)
post_media = '\n<audio controls>\n \
<source src="{src}" type="{mime_type}">\n \
</audio>'.format(src=post_media_src, mime_type=post['mime_type'])
return post_media
-
-def parse_post(post):
+
+def parse_post(post, photo_dir, media_dir):
+
+ '''
+ converts post object to formatted text
+ '''
+
post_output = ''
-
+
# optional image
- photo_dir = '/photos'
if 'photo' in post:
post_output += str(parse_post_photo(post, photo_dir))
@@ -148,7 +191,6 @@ def parse_post(post):
post_output += str(parse_post_text(post))
# optional media
- media_dir = '/files'
if 'media_type' in post:
post_output += str(parse_post_media(post, media_dir))
@@ -156,47 +198,71 @@ def parse_post(post):
def main():
- # try directory from first argument
- try:
- input_dir = sys.argv[1]
- except IndexError as e:
- # if it's not specified, use current directory
- input_dir = '.'
- # create output directory
- out_dir = input_dir + '/' + 'formatted_posts'
+ parser = argparse.ArgumentParser(
+ usage='%(prog)s [options] json_file',
+ description='Convert exported Telegram channel data json to \
+ bunch of markdown posts ready to use with jekyll')
+ parser.add_argument(
+ 'json', metavar='json_file',
+ type=open,
+ help='result.json file from telegram export')
+ parser.add_argument(
+ '--out-dir', metavar='out_dir',
+ nargs='?', default='formatted_posts',
+ help='output directory for markdown files\
+ (default: formatted_posts)')
+ parser.add_argument(
+ '--photo-dir', metavar='photo_dir',
+ nargs='?', default='photos',
+ help='location of image files. this changes only links\
+ to photos in markdown text, so specify your\
+ desired location (default: photos)')
+ parser.add_argument(
+ '--media-dir', metavar='media_dir',
+ nargs='?', default='files',
+ help='location of media files. this changes only links\
+ to files in markdown text, so specify your \
+ desired location (default: files)')
+ args_wip = parser.add_argument_group('work in progress')
+ args_wip.add_argument(
+ '--post-header', metavar='post_header',
+ nargs='?',
+ help='yaml front matter for your posts \
+ (now doesn\'t work)')
+
+ args = parser.parse_args()
+
try:
- os.mkdir(out_dir)
- except FileExistsError as e:
+ os.mkdir(args.out_dir)
+ except FileExistsError:
pass
# load json file
- json_path = input_dir + '/' + 'result.json'
try:
- with open(json_path, 'r') as f:
+ with args.json as f:
data = json.load(f)
- except FileNotFoundError as e:
- sys.exit('result.json not found.\nPlease, specify right directory')
+ except FileNotFoundError:
+ sys.exit('result.json not found.\nPlease, specify right file')
# load only messages
raw_posts = data['messages']
for post in raw_posts:
- # TODO: handle forwarded posts
+ # TODO: handle forwarded posts
if post['type'] == 'message' and 'forwarded_from' not in post:
post_date = datetime.fromisoformat(post['date'])
post_id = post['id']
- post_filename = out_dir + '/' + str(post_date.date()) + '-' \
- + str(post_id) + '.md'
+ post_filename = str(post_date.date()) + '-' + str(post_id) + '.md'
+ post_path = os.path.join(args.out_dir, post_filename)
- with open (post_filename, 'w') as f:
- print(print_post_header(
- post_id, post_date, None),
- file=f)
- print(parse_post(post), file=f)
+ with open(post_path, 'w') as f:
+ print(print_default_post_header(
+ post_id, post_date, None),
+ file=f)
+ print(parse_post(post, args.photo_dir, args.media_dir), file=f)
if __name__ == '__main__':
main()
-