/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.time;

import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.io.RuntimeIOException;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.Annotator;
import edu.stanford.nlp.time.TimeAnnotations;
import edu.stanford.nlp.time.Timex;
import edu.stanford.nlp.time.XMLUtils;
import edu.stanford.nlp.util.ArrayCoreMap;
import edu.stanford.nlp.util.CoreMap;
import edu.stanford.nlp.util.DataFilePaths;
import edu.stanford.nlp.util.SystemUtils;
import java.io.File;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.HashMap;
import java.util.List;
import java.util.Properties;
import java.util.regex.Pattern;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.dom.Text;

public class HeidelTimeAnnotator
implements Annotator {
    private static final String BASE_PATH = "$NLP_DATA_HOME/packages/heideltime/";
    private static final String DEFAULT_PATH = DataFilePaths.convert("$NLP_DATA_HOME/packages/heideltime/");
    private final File heideltimePath;
    private final boolean outputResults;
    public static final String HEIDELTIME_PATH_PROPERTY = "heideltime.path";
    public static final String HEIDELTIME_OUTPUT_RESULTS = "heideltime.outputResults";

    public HeidelTimeAnnotator() {
        this(new File(System.getProperty("heideltime", DEFAULT_PATH)));
    }

    public HeidelTimeAnnotator(File heideltimePath) {
        this.heideltimePath = heideltimePath;
        this.outputResults = false;
    }

    public HeidelTimeAnnotator(String name, Properties props) {
        String path = props.getProperty(HEIDELTIME_PATH_PROPERTY, System.getProperty("heideltime", DEFAULT_PATH));
        this.heideltimePath = new File(path);
        this.outputResults = Boolean.valueOf(props.getProperty(HEIDELTIME_OUTPUT_RESULTS, "false"));
    }

    @Override
    public void annotate(Annotation annotation) {
        try {
            this.annotate((CoreMap)annotation);
        }
        catch (IOException e) {
            throw new RuntimeIOException(e);
        }
    }

    public void annotate(CoreMap document) throws IOException {
        Element outputXML;
        File inputFile = File.createTempFile("heideltime", ".input");
        PrintWriter inputWriter = new PrintWriter(inputFile);
        inputWriter.println((String)document.get(CoreAnnotations.TextAnnotation.class));
        inputWriter.close();
        if (!document.has(CoreAnnotations.CalendarAnnotation.class) && !document.has(CoreAnnotations.DocDateAnnotation.class)) {
            throw new IllegalArgumentException("CoreMap must have either a Calendar or DocDate annotation");
        }
        Calendar dateCalendar = (Calendar)document.get(CoreAnnotations.CalendarAnnotation.class);
        String pubDate = null;
        if (dateCalendar != null) {
            pubDate = String.format("%TF", dateCalendar);
        } else {
            String s = (String)document.get(CoreAnnotations.DocDateAnnotation.class);
            if (s != null) {
                pubDate = s;
            }
        }
        ArrayList<String> args = new ArrayList<String>();
        args.add("java");
        args.add("-jar");
        args.add(this.heideltimePath.getPath() + "/heideltime.jar");
        args.add("-t");
        args.add("NEWS");
        if (pubDate != null) {
            args.add("-dct");
            args.add(pubDate);
        }
        args.add(inputFile.getPath());
        ProcessBuilder process = new ProcessBuilder(args);
        StringWriter outputWriter = new StringWriter();
        SystemUtils.run(process, outputWriter, null);
        String output = outputWriter.getBuffer().toString();
        Pattern docClose = Pattern.compile("</DOC>.*", 32);
        output = docClose.matcher(output).replaceAll("</DOC>").replaceAll("<!DOCTYPE TimeML SYSTEM \"TimeML.dtd\">", "");
        Pattern badNestedTimex = Pattern.compile(Pattern.quote("<T</TIMEX3>IMEX3"));
        output = badNestedTimex.matcher(output).replaceAll("</TIMEX3><TIMEX3");
        Pattern badNestedTimex2 = Pattern.compile(Pattern.quote("<TI</TIMEX3>MEX3"));
        output = badNestedTimex2.matcher(output).replaceAll("</TIMEX3><TIMEX3");
        output = output.replaceAll("\\n\\n<TimeML>\\n\\n", "<TimeML>");
        try {
            outputXML = XMLUtils.parseElement(output);
        }
        catch (Exception ex) {
            throw new RuntimeException(String.format("error:\n%s\ninput:\n%s\noutput:\n%s", ex, IOUtils.slurpFile(inputFile), output), ex);
        }
        inputFile.delete();
        List<CoreMap> timexAnns = HeidelTimeAnnotator.toTimexCoreMaps(outputXML, document);
        document.set(TimeAnnotations.TimexAnnotations.class, timexAnns);
        if (this.outputResults) {
            System.out.println(timexAnns);
        }
        int timexIndex = 0;
        for (CoreMap sentence : (List)document.get(CoreAnnotations.SentencesAnnotation.class)) {
            int sentBegin = HeidelTimeAnnotator.beginOffset(sentence);
            int sentEnd = HeidelTimeAnnotator.endOffset(sentence);
            while (timexIndex < timexAnns.size() && HeidelTimeAnnotator.beginOffset(timexAnns.get(timexIndex)) < sentBegin) {
                ++timexIndex;
            }
            int sublistBegin = timexIndex;
            int sublistEnd = timexIndex;
            while (timexIndex < timexAnns.size() && sentBegin <= HeidelTimeAnnotator.beginOffset(timexAnns.get(timexIndex)) && HeidelTimeAnnotator.endOffset(timexAnns.get(timexIndex)) <= sentEnd) {
                ++sublistEnd;
                ++timexIndex;
            }
            sentence.set(TimeAnnotations.TimexAnnotations.class, timexAnns.subList(sublistBegin, sublistEnd));
        }
    }

    private static int beginOffset(CoreMap ann) {
        return (Integer)ann.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class);
    }

    private static int endOffset(CoreMap ann) {
        return (Integer)ann.get(CoreAnnotations.CharacterOffsetEndAnnotation.class);
    }

    private static List<CoreMap> toTimexCoreMaps(Element docElem, CoreMap originalDocument) {
        HashMap<Integer, Integer> beginMap = new HashMap<Integer, Integer>();
        HashMap<Integer, Integer> endMap = new HashMap<Integer, Integer>();
        boolean haveTokenOffsets = true;
        for (CoreMap sent : (List)originalDocument.get(CoreAnnotations.SentencesAnnotation.class)) {
            for (CoreLabel token : (List)sent.get(CoreAnnotations.TokensAnnotation.class)) {
                Integer tokBegin = (Integer)token.get(CoreAnnotations.TokenBeginAnnotation.class);
                Integer tokEnd = (Integer)token.get(CoreAnnotations.TokenEndAnnotation.class);
                if (tokBegin == null || tokEnd == null) {
                    haveTokenOffsets = false;
                }
                int charBegin = (Integer)token.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class);
                int charEnd = (Integer)token.get(CoreAnnotations.CharacterOffsetEndAnnotation.class);
                beginMap.put(charBegin, tokBegin);
                endMap.put(charEnd, tokEnd);
            }
        }
        ArrayList<CoreMap> timexMaps = new ArrayList<CoreMap>();
        int offset = 0;
        NodeList docNodes = docElem.getChildNodes();
        for (int i = 0; i < docNodes.getLength(); ++i) {
            Node content = docNodes.item(i);
            if (content instanceof Text) {
                Text text = (Text)content;
                offset += text.getWholeText().length();
                continue;
            }
            if (content instanceof Element) {
                Element child = (Element)content;
                if (child.getNodeName().equals("TIMEX3")) {
                    Timex timex = new Timex(child);
                    if (child.getChildNodes().getLength() != 1) {
                        throw new RuntimeException("TIMEX3 should only contain text " + child);
                    }
                    String timexText = child.getTextContent();
                    ArrayCoreMap timexMap = new ArrayCoreMap();
                    timexMap.set(TimeAnnotations.TimexAnnotation.class, timex);
                    timexMap.set(CoreAnnotations.TextAnnotation.class, timexText);
                    int charBegin = offset;
                    timexMap.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, offset);
                    timexMap.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, offset += timexText.length());
                    int charEnd = offset;
                    if (haveTokenOffsets) {
                        Integer tokBegin = (Integer)beginMap.get(charBegin);
                        int searchStep = 1;
                        while (tokBegin == null) {
                            tokBegin = (Integer)beginMap.get(charBegin - searchStep);
                            if (tokBegin == null) {
                                tokBegin = (Integer)beginMap.get(charBegin + searchStep);
                            }
                            ++searchStep;
                        }
                        searchStep = 1;
                        Integer tokEnd = (Integer)endMap.get(charEnd);
                        while (tokEnd == null) {
                            tokEnd = (Integer)endMap.get(charEnd - searchStep);
                            if (tokEnd == null) {
                                tokEnd = (Integer)endMap.get(charEnd + searchStep);
                            }
                            ++searchStep;
                        }
                        timexMap.set(CoreAnnotations.TokenBeginAnnotation.class, tokBegin);
                        timexMap.set(CoreAnnotations.TokenEndAnnotation.class, tokEnd);
                    }
                    timexMaps.add(timexMap);
                    continue;
                }
                throw new RuntimeException("unexpected element " + child);
            }
            throw new RuntimeException("unexpected content " + content);
        }
        return timexMaps;
    }
}

