#!/usr/bin/env python3
#
# This file is part of Step.
# SPDX-FileCopyrightText: 2009 Vladimir Kuznetsov <ks.vladimir@gmail.com>
#
# SPDX-License-Identifier: GPL-2.0-or-later

import xml.parsers.expat
import xml.sax.saxutils
from io import BytesIO
import optparse
import tempfile
import gettext
import locale
import copy
import sys
import re
import os

# current python gettext module does not
# support contexts, so we implement them ourself
class GNUTranslations(gettext.GNUTranslations):
    # The encoding of a msgctxt and a msgid in a .mo file is
    # msgctxt + "\x04" + msgid (gettext version >= 0.15)
    CONTEXT_ENCODING = "%s\x04%s"

    def upgettext(self, context, message):
        ctxt_message_id = self.CONTEXT_ENCODING % (context, message)
        missing = object()
        tmsg = self._catalog.get(ctxt_message_id, missing)
        if tmsg is missing:
            if self._fallback:
                return self._fallback.upgettext(context, message)
            return str(message)
        return tmsg

class XmlFileTranslator(object):
    def __init__(self, opt):
        self.opt = opt
        self.tag_regex = []
        for r in self.opt.tag_regex:
            self.tag_regex.append(re.compile(r))

    def init_parser(self):
        self.parser = xml.parsers.expat.ParserCreate()
        self.parser.ordered_attributes = 1
        self.parser.DefaultHandler = self.default_handler
        self.parser.StartElementHandler = self.start_element_handler
        self.parser.EndElementHandler = self.end_element_handler

    def parse(self, infile):
        try:
            self.parser.ParseFile(infile)
        except xml.parsers.expat.ExpatError as e:
            raise

    def translate(self, infile_name, infile, outfile,
                translation, i18n_stack_base=[], i18n_line_base=0):
        self.i18n_file = infile_name
        self.outfile = outfile
        self.translation = translation

        self.i18n_line_base = i18n_line_base
        self.i18n_stack_base = i18n_stack_base
        self.i18n_stack = []
        self.i18n_save = False
        self.i18n_string = ''

        self.init_parser()
        self.parse(infile)

    def extract(self, infile_name, infile, outfile,
                i18n_stack_base=[], i18n_line_base=0):
        self.i18n_file = infile_name
        self.outfile = outfile
        self.translation = None

        self.i18n_line_base = i18n_line_base
        self.i18n_stack_base = []
        self.i18n_stack = []
        self.i18n_save = False
        self.i18n_string = ''

        self.init_parser()
        self.parse(infile)

    def parse_unquoted_substring(self, string):
        infile = BytesIO(string.encode('UTF-8'))
        translator1 = XmlFileTranslator(self.opt.parse_unquoted)
        if self.opt.extract:
            translator1.extract(self.i18n_file, infile,
                    self.outfile, self.i18n_stack, self.i18n_stack[-1]['line'])
        else:
            outfile = BytesIO()
            translator1.translate(self.i18n_file, infile,
                outfile, self.translation, self.i18n_stack, self.i18n_stack[-1]['line'])
            string = outfile.getvalue().decode()
            if self.opt.unquote:
                string = self.quote_str(string)
            self.outfile.write(self.encode_utf8(string))

    def quote_str(self, s):
        return s.replace('&', '&amp;').replace('"', '&quot;') \
                .replace('>', '&gt;').replace('<', '&lt;')

    def unquote_str(self, s):
        return s.replace('&lt;', '<').replace('&gt;', '>') \
                .replace('&quot;', '"').replace('&amp;', '&')

    def encode_str(self, s):
        return '"' + s.replace('\\', '\\\\').replace('\"', '\\"') \
                      .replace('\r', '\\r').replace('\n', '\\n"\n"') + '"'

    def encode_utf8(self, s):
        if isinstance(s, str):
            return s.encode()
        else:
            return s

    def select_context(self, patterns, attr):
        for pattern in patterns:
            try:
                return pattern % attr
            except (KeyError, ValueError):
                pass

    def write_data(self, data):
        if self.i18n_save:
            self.i18n_string += data
        elif self.translation is not None:
            self.outfile.write(self.encode_utf8(data))

    def write_i18n(self):
        string = self.i18n_string
        if self.opt.unquote:
            string = self.unquote_str(string)

        if self.opt.unquote and self.opt.parse_unquoted \
                and string.lstrip().startswith('<'):
            self.parse_unquoted_substring(string)

        else:
            if self.opt.strip:
                string0 = self.i18n_string.lstrip()
                begin_string = self.i18n_string[:-len(string0)]
                string = string0.rstrip()
                end_string = string0[len(string):]
            else:
                string = self.i18n_string
                begin_string = end_string = ''

            if not string:
                return

            info = {'file': self.i18n_file, \
                    'filename': os.path.basename(self.i18n_file)}
            for n in range(2):
                try:
                    d = self.i18n_stack[-1-n]
                except IndexError:
                    break
                p = '../'*n
                info[p+'tag'] = d['name']
                info[p+'line'] = d['line']
                for aname, avalue in d['attr'].items():
                    info[p+'attr/'+aname] = avalue

            ectx = self.select_context(self.opt.ectx, info)
            context = self.select_context(self.opt.context, info)

            if self.translation is not None:
                if context:
                    string = self.translation.upgettext(context, string)
                else:
                    string = self.translation.ugettext(string)

                if self.opt.unquote:
                    string = self.quote_str(string)
                self.outfile.write(self.encode_utf8(begin_string + string + end_string))

            else:
                self.outfile.write(self.encode_utf8('%s i18n: file: %s:%d\n' % \
                         (self.opt.cstart, self.i18n_file, info['line'])))

                if ectx:
                    self.outfile.write(self.encode_utf8('%s i18n: ectx: %s\n' % \
                         (self.opt.cstart, ectx)))

                if context:
                    self.outfile.write(self.encode_utf8('i18nc(%s, %s)\n' % \
                         (self.encode_str(context), self.encode_str(string))))
                else:
                    self.outfile.write(self.encode_utf8('i18n(%s)\n' % \
                        (self.encode_str(string),)))

    def default_handler(self, data):
        self.write_data(data)

    def start_element_handler(self, name, attr):
        data = '<' + name
        attr_dict = {}
        for n in range(0, len(attr), 2):
            attr_dict[attr[n]] = attr[n+1]
            data += ' %s=%s' % (attr[n], xml.sax.saxutils.quoteattr(attr[n+1]))
        data += '>'

        match = False
        if name in self.opt.tag:
            match = True
        else:
            for regex in self.tag_regex:
                if regex.search(name):
                    match = True
                    break

        if self.i18n_stack and self.opt.recursive:
            if match:
                self.write_i18n()
                self.i18n_string = ''
                self.i18n_save = False

        self.write_data(data)

        if match:
            self.i18n_stack.append(dict(name=name, attr=attr_dict,
                    line=self.i18n_line_base+self.parser.CurrentLineNumber))
            self.i18n_save = True

    def end_element_handler(self, name):
        if self.i18n_stack and self.i18n_stack[-1]['name'] == name:
            if self.opt.recursive or len(self.i18n_stack) == 1:
                self.write_i18n()
                self.i18n_string = ''
                self.i18n_save = False
            self.i18n_stack.pop()

        self.write_data('</%s>' % (name,))

        if self.i18n_stack:
            self.i18n_stack[-1]['line'] = self.i18n_line_base + \
                                    self.parser.CurrentLineNumber
            self.i18n_save = True

def safe_remove(fname):
    try:
        os.remove(fname)
    except (IOError, OSError):
        pass

def open_mo_file(opt, mo_file_name, remove=False):
    try:
        mo_file = open(mo_file_name, 'rb')
    except IOError as e:
        sys.stderr.write('Cannot open .mo file: %s\n' % (str(e),))
        mo_file.close()
        if remove:
            safe_remove(mo_file_name)
        sys.exit(1)

    try:
        translation = GNUTranslations(mo_file)
    except IOError as e:
        sys.stderr.write('Cannot parse .mo file: %s\n' % (str(e),))
        mo_file.close()
        if remove:
            safe_remove(mo_file_name)
        sys.exit(1)

    mo_file.close()
    if remove:
        safe_remove(mo_file_name)

    return translation

def compile_po_file(opt, po_file_name):
    (mo_file_id, mo_file_name) = tempfile.mkstemp(suffix='.mo')
    os.close(mo_file_id)
    msgfmt_cmd = 'msgfmt "%s" -o "%s"' % (po_file_name, mo_file_name)

    if os.system(msgfmt_cmd):
        sys.stderr.write('Error running msgfmt\n')
        sys.exit(1)

    return open_mo_file(opt, mo_file_name, remove=True)

def decode_options(options, str_options):
    enc = locale.getdefaultlocale()[1] or 'UTF8'
    for name in str_options:
        opt = getattr(options, name)
        if isinstance(opt, str):
            opt = opt
        elif isinstance(opt, list):
            opt = [x for x in opt]
        setattr(options, name, opt)

def main():
    format_options = [
        optparse.make_option('-n', '--tag', action='append', default=[],
                help='Extract TAG constants as i18n string. ' + \
                     'Repeat this option to specify multiple tags'),
        optparse.make_option('-x', '--tag-regex', action='append', default=[],
                help='Extract contents of all tags matching TAG_REGEX as i18n string. ' + \
                     'Repeat this option to specify multiple regex'),
        optparse.make_option('-r', '--recursive', action='store_true', default=False,
                help='Recursively pass i18n tags. This means that children tags ' + \
                     'will be extracted separately even if parent is also i18n-enabled'),
        optparse.make_option('-s', '--strip', action='store_true', default=False,
                help='Strip leading and trailing whitespaces of i18n strings'),
        optparse.make_option('-q', '--unquote', action='store_true', default=False,
                help='Unquote XML-quoted entities on extraction ' + \
                     'and quote them back when translating'),
        optparse.make_option('--parse-unquoted', default=None, metavar='PARSE_UNQUOTED_OPTIONS',
                help='Parse unquoted strings using PARSE_UNQUOTED_OPTIONS as options. '
                     'This option is useful when XML file contains quoted HTML fragments')
    ]
    context_options = [
        optparse.make_option('--context', action='append', default=[],
                help='Pattern to generate context. ' + \
                     'TODO: pattern syntax. ' + \
                     'If specified multiple times, the first matching pattern will be used'),
        optparse.make_option('--ectx', action='append', default=[],
                help='Pattern to generate ectx. Format is the same as in --context')
    ]


    optparser = optparse.OptionParser(usage='\n\t%prog --extract [options] XML_FILE...\n' + \
                                     '\t%prog --translate [options] XML_FILE...')

    optparser.add_option('-e', '--extract', action='store_true', default=False,
                help='Extract i18n strings from xml files')
    optparser.add_option('-t', '--translate', action='store_true', default=False,
                help='Translate i18n strings in xml files')

    optgroup_format = optparse.OptionGroup(optparser, 'Formatting options')
    list(map(optgroup_format.add_option, copy.deepcopy(format_options)))
    list(map(optgroup_format.add_option, copy.deepcopy(context_options)))
    optparser.add_option_group(optgroup_format)

    optgroup_extract = optparse.OptionGroup(optparser, 'Options for extracting messages')
    optgroup_extract.add_option('--cstart', default='//',
                help='A string to used to start the comment')
    optgroup_extract.add_option('--output', help='Output file for extracted messages')
    optgroup_extract.add_option('--xgettext', action='store_true', help='Execute xgettext after extracting messages')
    optgroup_extract.add_option('--xgettext-args',
                default='-ki18n -ki18nc:1c,2 -ci18n --no-location --from-code=UTF-8',
                help='Arguments for xgettext (overrides the defaults)')
    optgroup_extract.add_option('--xgettext-extra-args', default='',
                help='Additional arguments for xgettext (appends to the defaults)')
    optparser.add_option_group(optgroup_extract)

    optgroup_translate = optparse.OptionGroup(optparser, 'Options for translating messages')
    optgroup_translate.add_option('--po-file', help='A file with translations')
    optgroup_translate.add_option('--mo-file', help='A file with translations')
    optgroup_translate.add_option('--output-dir', default='./i18n',
                help='A directory to output translated files')
    optparser.add_option_group(optgroup_translate)

    opt, args = optparser.parse_args()
    decode_options(opt, ('tag', 'tag_regex', 'context', 'ectx'))

    if not args:
        optparser.error('no xml files was specified')

    if opt.extract and opt.translate:
        optparser.error('options --extract and --translate are mutually exclusive')
    
    if not opt.extract and not opt.translate:
        optparser.error('please specify either --extract or --translate option')

    if opt.parse_unquoted is not None:
        optparser1 = optparse.OptionParser(usage='%prog --parse-unquoted="[options]"')
        options = copy.deepcopy(format_options+context_options)
        list(map(optparser1.add_option, options))
        opt1, args1 = optparser1.parse_args(opt.parse_unquoted.split(' '))
        decode_options(opt1, ('tag', 'tag_regex', 'context', 'ectx'))
        if args1:
            optparser1.error('unexpected argument')
        opt.parse_unquoted = copy.deepcopy(opt)
        for option in options:
            setattr(opt.parse_unquoted, option.dest,
                    getattr(opt1, option.dest))
        opt.parse_unquoted.parse_unquoted = None

    if opt.extract:
        if opt.xgettext:
            (tmp_id, tmp_fname) = tempfile.mkstemp(suffix='.cc')
            os.close(tmp_id)
            outfile = open(tmp_fname, 'wb')
        else:
            if opt.output:
                try:
                    outfile = open(opt.output, 'wb')
                except IOError as e:
                    optparser.error('can not open output file: ' + str(e))
            else:
                outfile = sys.stdout.buffer
    else:
        if not opt.po_file and not opt.mo_file:
            optparser.error('please specify either --po-file or --mo-file option for translation')

        if opt.po_file:
            gnutranslation = compile_po_file(opt, opt.po_file)
        else:
            gnutranslation = open_mo_file(opt, opt.mo_file)

        if not os.path.isdir(opt.output_dir):
            try:
                os.mkdir(opt.output_dir)
            except IOError as e:
                sys.stderr.write('Cannot create output directory: %s\n' % (str(e),))
                sys.exit(1)

    translator = XmlFileTranslator(opt)
    for fname in args:
        try:
            infile = open(fname, 'rb')
        except IOError as e:
            sys.stderr.write('can not open input file: %s\n' % (str(e),))
            sys.exit(1)

        if opt.extract:
            try:
                translator.extract(fname, infile, outfile)
            except xml.parsers.expat.ExpatError as e:
                sys.stderr.write('cannot parse file %s: %s\n' % (fname, str(e)))
                sys.exit(1)

        else:
            outfile_name = os.path.join(opt.output_dir, os.path.basename(fname))
            try:
                outfile = open(outfile_name, 'wb')
            except IOError as e:
                sys.stderr.write('cannot open output file: %s\n' % (str(e),))
                sys.exit(1)

            try:
                translator.translate(fname, infile, outfile, gnutranslation)
            except xml.parsers.expat.ExpatError as e:
                sys.stderr.write('can not parse file %s: %s\n' % (fname, str(e)))
                sys.exit(1)


        infile.close()

    if outfile:
        outfile.close()

    if opt.extract and opt.xgettext:
        xgettext_cmd = 'xgettext ' + opt.xgettext_args
        xgettext_cmd += ' ' + opt.xgettext_extra_args
        if opt.output:
            xgettext_cmd += ' --output="' + opt.output + '"'
        else:
            xgettext_cmd += ' --output=-'
        xgettext_cmd += ' "' + tmp_fname + '"'
        ret = os.system(xgettext_cmd)
        if ret != 0:
            sys.stderr.write('error running xgettext: exit code = %d' % (ret,))
            sys.exit(1)

if __name__ == '__main__':
    main()

