#!/usr/bin/env python

# parse.py - converts telegram json to jekyll md.
# Copyright (c) 2020, Lev Brekalov

# TODO summary:
# - replies
# - single/muliple tags
# - forwarded posts
# - custom post header

import os
import sys
import json
from datetime import datetime


def print_post_header(post_title, post_date, post_tag):
    # TODO: handle post tag/tags
    # TODO: support for custom header
    post_header = '---\ntitle: {title}\ndate: {date}\n\
tag: {tag}\nlayout: post\n---\n'.format(\
            title=post_title, date=post_date, tag=post_tag)

    return post_header


def parse_post_photo(post, media_dir):
    post_photo_src = post['photo'][7:]
    post_photo_src = media_dir + '/' + post_photo_src
    post_photo = '![image]({src})\n\n'.format(\
            src=post_photo_src)

    return post_photo


# def md_str(string):
    # string = string.replace('\n','\n\n')
    # string = string.replace('. ', '.\n')

    # return string


def text_format(string, fmt):
    if fmt in ('*', '**', '***', '`', '```'):
        output = '{fmt}{txt}{fmt}'
    elif fmt == '```':
        output = '{fmt}\n{txt}\n{fmt}'
    else:
        output = '<{fmt}>{txt}</{fmt}>'

    output = output.format(fmt=fmt, txt=string.strip())
    output += '\n' * string.split('\n').count('') * string.endswith('\n')
    return output

def text_link_format(text, link):
    # convert telegram links to anchors
    # this implies that telegram links are pointing to the same channel
    if link.startswith('https://t.me/c/'):
        link = '#' + link.split('/')[-1]
    link_fmt = '[{text}]({href})'
    link_fmt = link_fmt.format(text=text.strip(), href=link)
    link_fmt += '\n' * text.count('\n') * text.endswith('\n')
    return link_fmt


def parse_text_object(obj):

    obj_type = obj['type']
    obj_text = obj['text']

    if obj_type == 'hashtag':
        post_tag = obj_text
        return post_tag

    elif obj_type == 'text_link':
        return text_link_format(obj_text, obj['href'])

    elif obj_type == 'link' or obj_type == 'email':
        link = obj_text.strip()
        link = 'https://' * (obj_type == 'link') * \
                (1 - link.startswith('https://')) + link
        post_link = '<{href}>'.format(href=link)
        return post_link

    elif obj_type == 'phone':
        return obj_text

    elif obj_type == 'italic':
        return text_format(obj_text, '*')

    elif obj_type == 'bold':
        return text_format(obj_text, '**')

    elif obj_type == 'code':
        return text_format(obj_text, '`')

    elif obj_type == 'pre':
        return text_format(obj_text, '```')

    elif obj_type == 'underline':
        return text_format(obj_text, 'u')

    elif obj_type == 'strikethrough':
        return text_format(obj_text, 's')


def parse_post_text(post):
    # TODO: handle reply-to
    post_raw_text = post['text']
    post_parsed_text = ''

    if type(post_raw_text) == str:
        return str(post_raw_text)

    else:
        for obj in post_raw_text:
            if type(obj) == str:
                post_parsed_text += obj
            else:
                post_parsed_text += str(parse_text_object(obj))

        return post_parsed_text


def parse_post_media(post, media_dir):
    # get filename without parent directory
    post_media_src = post['file'][post['file'].rfind("/") + 1:]

    # add parent directory
    post_media_src = media_dir + '/' + post_media_src
    post_media = '\n<audio controls>\n \
        <source src="{src}" type="{mime_type}">\n \
        </audio>'.format(src=post_media_src, mime_type=post['mime_type'])

    return post_media
    

def parse_post(post):
    post_output = ''
    
    # optional image
    photo_dir = '/photos'
    if 'photo' in post:
        post_output += str(parse_post_photo(post, photo_dir))

    # post text
    post_output += str(parse_post_text(post))

    # optional media
    media_dir = '/files'
    if 'media_type' in post:
        post_output += str(parse_post_media(post, media_dir))

    return post_output


def main():
    # try directory from first argument
    try:
        input_dir = sys.argv[1]
    except IndexError as e:
        # if it's not specified, use current directory
        input_dir = '.'

    # create output directory
    out_dir = input_dir + '/' + 'formatted_posts'
    try:
        os.mkdir(out_dir)
    except FileExistsError as e:
        pass

    # load json file
    json_path = input_dir + '/' + 'result.json'
    try:
        with open(json_path, 'r') as f:
            data = json.load(f)
    except FileNotFoundError as e:
        sys.exit('result.json not found.\nPlease, specify right directory')

    # load only messages
    raw_posts = data['messages']

    for post in raw_posts:
    # TODO: handle forwarded posts
        if post['type'] == 'message' and 'forwarded_from' not in post:

            post_date = datetime.fromisoformat(post['date'])
            post_id = post['id']
            post_filename = out_dir + '/' + str(post_date.date()) + '-' \
                    + str(post_id) + '.md'

            with open (post_filename, 'w') as f:
                print(print_post_header(
                    post_id, post_date, None), 
                    file=f)
                print(parse_post(post), file=f)


if __name__ == '__main__':
    main()