From 48d5e233fda099780ab6705505f4154b0cefa11b Mon Sep 17 00:00:00 2001
From: la-ninpre <leobrekalini@gmail.com>
Date: Thu, 26 Nov 2020 23:56:43 +0300
Subject: make a rename

notice repo name also changed
---
 README.md |   3 +-
 parse.py  | 195 -----------------------------------------------------------
 tg2md.py  | 202 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 204 insertions(+), 196 deletions(-)
 delete mode 100644 parse.py
 create mode 100644 tg2md.py

diff --git a/README.md b/README.md
index 2eb604e..959d9b3 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-# telegram2md
+# tg2md
 
 This script parses output from Telegram channel and converts each post to 
 jekyll-applicable post in markdown.
@@ -37,6 +37,7 @@ and `files`, you should change `photo_dir` and `media_dir` variables.
 
 ## todo's
 
+- use argparse module instead of crap
 - replies
 - single/muliple tags
 - forwarded posts
diff --git a/parse.py b/parse.py
deleted file mode 100644
index a19cf0b..0000000
--- a/parse.py
+++ /dev/null
@@ -1,195 +0,0 @@
-#!/usr/bin/env python
-
-# parse.py - converts telegram json to jekyll md.
-# Copyright (c) 2020, Lev Brekalov
-
-# TODO summary:
-# - replies
-# - single/muliple tags
-# - forwarded posts
-# - custom post header
-
-import os
-import sys
-import json
-from datetime import datetime
-
-
-def print_post_header(post_title, post_date, post_tag):
-    # TODO: handle post tag/tags
-    # TODO: support for custom header
-    post_header = '---\ntitle: {title}\ndate: {date}\n\
-tag: {tag}\nlayout: post\n---\n'.format(\
-            title=post_title, date=post_date, tag=post_tag)
-
-    return post_header
-
-
-def parse_post_photo(post, media_dir):
-    post_photo_src = post['photo'][7:]
-    post_photo_src = media_dir + '/' + post_photo_src
-    post_photo = '![image]({src})\n\n'.format(\
-            src=post_photo_src)
-
-    return post_photo
-
-
-# def md_str(string):
-    # string = string.replace('\n','\n\n')
-    # string = string.replace('. ', '.\n')
-
-    # return string
-
-
-def text_format(string, fmt):
-    if fmt in ('*', '**', '***', '`', '```'):
-        output = '{fmt}{txt}{fmt}'
-    elif fmt == '```':
-        output = '{fmt}\n{txt}\n{fmt}'
-    else:
-        output = '<{fmt}>{txt}</{fmt}>'
-
-    output = output.format(fmt=fmt, txt=string.strip())
-    output += '\n' * string.split('\n').count('') * string.endswith('\n')
-    return output
-
-def text_link_format(text, link):
-    link_fmt = '[{text}]({href})'
-    link_fmt = link_fmt.format(text=text.strip(), href=link)
-    link_fmt += '\n' * text.count('\n') * text.endswith('\n')
-    return link_fmt
-
-
-def parse_text_object(obj):
-
-    obj_type = obj['type']
-    obj_text = obj['text']
-
-    if obj_type == 'hashtag':
-        post_tag = obj_text
-        return post_tag
-
-    elif obj_type == 'text_link':
-        return text_link_format(obj_text, obj['href'])
-
-    elif obj_type == 'link' or obj_type == 'email':
-        post_link = '<{href}>'.format(href=obj_text.strip())
-        return post_link
-
-    elif obj_type == 'phone':
-        return obj_text
-
-    elif obj_type == 'italic':
-        return text_format(obj_text, '*')
-
-    elif obj_type == 'bold':
-        return text_format(obj_text, '**')
-
-    elif obj_type == 'code':
-        return text_format(obj_text, '`')
-
-    elif obj_type == 'pre':
-        return text_format(obj_text, '```')
-
-    elif obj_type == 'underline':
-        return text_format(obj_text, 'u')
-
-    elif obj_type == 'strikethrough':
-        return text_format(obj_text, 's')
-
-
-def parse_post_text(post):
-    # TODO: handle reply-to
-    post_raw_text = post['text']
-    post_parsed_text = ''
-
-    if type(post_raw_text) == str:
-        return str(post_raw_text)
-
-    else:
-        for obj in post_raw_text:
-            if type(obj) == str:
-                post_parsed_text += obj
-            else:
-                post_parsed_text += str(parse_text_object(obj))
-
-        return post_parsed_text
-
-
-def parse_post_media(post, media_dir):
-    # get filename without parent directory
-    post_media_src = post['file'][post['file'].rfind("/") + 1:]
-
-    # add parent directory
-    post_media_src = media_dir + '/' + post_media_src
-    post_media = '\n<audio controls>\n \
-        <source src="{src}" type="{mime_type}">\n \
-        </audio>'.format(src=post_media_src, mime_type=post['mime_type'])
-
-    return post_media
-    
-
-def parse_post(post):
-    post_output = ''
-    
-    # optional image
-    photo_dir = '/photos'
-    if 'photo' in post:
-        post_output += str(parse_post_photo(post, photo_dir))
-
-    # post text
-    post_output += str(parse_post_text(post))
-
-    # optional media
-    media_dir = '/files'
-    if 'media_type' in post:
-        post_output += str(parse_post_media(post, media_dir))
-
-    return post_output
-
-
-def main():
-    # try directory from first argument
-    try:
-        input_dir = sys.argv[1]
-    except IndexError as e:
-        # if it's not specified, use current directory
-        input_dir = '.'
-
-    # create output directory
-    out_dir = input_dir + '/' + 'formatted_posts'
-    try:
-        os.mkdir(out_dir)
-    except FileExistsError as e:
-        pass
-
-    # load json file
-    json_path = input_dir + '/' + 'result.json'
-    try:
-        with open(json_path, 'r') as f:
-            data = json.load(f)
-    except FileNotFoundError as e:
-        sys.exit('result.json not found.\nPlease, specify right directory')
-
-    # load only messages
-    raw_posts = data['messages']
-
-    for post in raw_posts:
-    # TODO: handle forwarded posts
-        if post['type'] == 'message' and 'forwarded_from' not in post:
-
-            post_date = datetime.fromisoformat(post['date'])
-            post_id = post['id']
-            post_filename = out_dir + '/' + str(post_date.date()) + '-' \
-                    + str(post_id) + '.md'
-
-            with open (post_filename, 'w') as f:
-                print(print_post_header(
-                    post_id, post_date, None), 
-                    file=f)
-                print(parse_post(post), file=f)
-
-
-if __name__ == '__main__':
-    main()
-
diff --git a/tg2md.py b/tg2md.py
new file mode 100644
index 0000000..ca4ef35
--- /dev/null
+++ b/tg2md.py
@@ -0,0 +1,202 @@
+#!/usr/bin/env python
+
+# parse.py - converts telegram json to jekyll md.
+# Copyright (c) 2020, Lev Brekalov
+
+# TODO summary:
+# - replies
+# - single/muliple tags
+# - forwarded posts
+# - custom post header
+
+import os
+import sys
+import json
+from datetime import datetime
+
+
+def print_post_header(post_title, post_date, post_tag):
+    # TODO: handle post tag/tags
+    # TODO: support for custom header
+    post_header = '---\ntitle: {title}\ndate: {date}\n\
+tag: {tag}\nlayout: post\n---\n'.format(\
+            title=post_title, date=post_date, tag=post_tag)
+
+    return post_header
+
+
+def parse_post_photo(post, media_dir):
+    post_photo_src = post['photo'][7:]
+    post_photo_src = media_dir + '/' + post_photo_src
+    post_photo = '![image]({src})\n\n'.format(\
+            src=post_photo_src)
+
+    return post_photo
+
+
+# def md_str(string):
+    # string = string.replace('\n','\n\n')
+    # string = string.replace('. ', '.\n')
+
+    # return string
+
+
+def text_format(string, fmt):
+    if fmt in ('*', '**', '***', '`', '```'):
+        output = '{fmt}{txt}{fmt}'
+    elif fmt == '```':
+        output = '{fmt}\n{txt}\n{fmt}'
+    else:
+        output = '<{fmt}>{txt}</{fmt}>'
+
+    output = output.format(fmt=fmt, txt=string.strip())
+    output += '\n' * string.split('\n').count('') * string.endswith('\n')
+    return output
+
+def text_link_format(text, link):
+    # convert telegram links to anchors
+    # this implies that telegram links are pointing to the same channel
+    if link.startswith('https://t.me/c/'):
+        link = '#' + link.split('/')[-1]
+    link_fmt = '[{text}]({href})'
+    link_fmt = link_fmt.format(text=text.strip(), href=link)
+    link_fmt += '\n' * text.count('\n') * text.endswith('\n')
+    return link_fmt
+
+
+def parse_text_object(obj):
+
+    obj_type = obj['type']
+    obj_text = obj['text']
+
+    if obj_type == 'hashtag':
+        post_tag = obj_text
+        return post_tag
+
+    elif obj_type == 'text_link':
+        return text_link_format(obj_text, obj['href'])
+
+    elif obj_type == 'link' or obj_type == 'email':
+        link = obj_text.strip()
+        link = 'https://' * (obj_type == 'link') * \
+                (1 - link.startswith('https://')) + link
+        post_link = '<{href}>'.format(href=link)
+        return post_link
+
+    elif obj_type == 'phone':
+        return obj_text
+
+    elif obj_type == 'italic':
+        return text_format(obj_text, '*')
+
+    elif obj_type == 'bold':
+        return text_format(obj_text, '**')
+
+    elif obj_type == 'code':
+        return text_format(obj_text, '`')
+
+    elif obj_type == 'pre':
+        return text_format(obj_text, '```')
+
+    elif obj_type == 'underline':
+        return text_format(obj_text, 'u')
+
+    elif obj_type == 'strikethrough':
+        return text_format(obj_text, 's')
+
+
+def parse_post_text(post):
+    # TODO: handle reply-to
+    post_raw_text = post['text']
+    post_parsed_text = ''
+
+    if type(post_raw_text) == str:
+        return str(post_raw_text)
+
+    else:
+        for obj in post_raw_text:
+            if type(obj) == str:
+                post_parsed_text += obj
+            else:
+                post_parsed_text += str(parse_text_object(obj))
+
+        return post_parsed_text
+
+
+def parse_post_media(post, media_dir):
+    # get filename without parent directory
+    post_media_src = post['file'][post['file'].rfind("/") + 1:]
+
+    # add parent directory
+    post_media_src = media_dir + '/' + post_media_src
+    post_media = '\n<audio controls>\n \
+        <source src="{src}" type="{mime_type}">\n \
+        </audio>'.format(src=post_media_src, mime_type=post['mime_type'])
+
+    return post_media
+    
+
+def parse_post(post):
+    post_output = ''
+    
+    # optional image
+    photo_dir = '/photos'
+    if 'photo' in post:
+        post_output += str(parse_post_photo(post, photo_dir))
+
+    # post text
+    post_output += str(parse_post_text(post))
+
+    # optional media
+    media_dir = '/files'
+    if 'media_type' in post:
+        post_output += str(parse_post_media(post, media_dir))
+
+    return post_output
+
+
+def main():
+    # try directory from first argument
+    try:
+        input_dir = sys.argv[1]
+    except IndexError as e:
+        # if it's not specified, use current directory
+        input_dir = '.'
+
+    # create output directory
+    out_dir = input_dir + '/' + 'formatted_posts'
+    try:
+        os.mkdir(out_dir)
+    except FileExistsError as e:
+        pass
+
+    # load json file
+    json_path = input_dir + '/' + 'result.json'
+    try:
+        with open(json_path, 'r') as f:
+            data = json.load(f)
+    except FileNotFoundError as e:
+        sys.exit('result.json not found.\nPlease, specify right directory')
+
+    # load only messages
+    raw_posts = data['messages']
+
+    for post in raw_posts:
+    # TODO: handle forwarded posts
+        if post['type'] == 'message' and 'forwarded_from' not in post:
+
+            post_date = datetime.fromisoformat(post['date'])
+            post_id = post['id']
+            post_filename = out_dir + '/' + str(post_date.date()) + '-' \
+                    + str(post_id) + '.md'
+
+            with open (post_filename, 'w') as f:
+                print(print_post_header(
+                    post_id, post_date, None), 
+                    file=f)
+                print(parse_post(post), file=f)
+
+
+if __name__ == '__main__':
+    main()
+
-- 
cgit v1.2.3