/*
 * Decompiled with CFR 0.152.
 */
package opennlp.grok.preprocess.chunk;

import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.Reader;
import java.text.DecimalFormat;
import java.text.FieldPosition;
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.HashSet;
import java.util.Hashtable;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.SortedSet;
import java.util.StringTokenizer;
import java.util.TreeSet;
import opennlp.common.preprocess.POSTagger;
import opennlp.common.util.FilterFcn;
import opennlp.common.util.Pair;
import opennlp.common.xml.NLPDocument;
import opennlp.common.xml.XmlUtils;
import opennlp.grok.preprocess.chunk.ChunkerContextGenerator;
import opennlp.grok.preprocess.chunk.ChunkerEventCollector;
import opennlp.maxent.ContextGenerator;
import opennlp.maxent.Evalable;
import opennlp.maxent.EventCollector;
import opennlp.maxent.MaxentModel;
import opennlp.maxent.TrainEval;
import opennlp.maxent.io.SuffixSensitiveGISModelReader;
import org.jdom.Element;

public class ChunkerME
implements Evalable,
POSTagger {
    protected MaxentModel _chunkModel;
    protected ContextGenerator _contextGen = new ChunkerContextGenerator();
    protected FilterFcn _closedClassTagsFilter;
    protected boolean _useClosedClassTagsFilter = false;
    static /* synthetic */ Class class$opennlp$common$preprocess$Tokenizer;

    protected ChunkerME() {
    }

    public ChunkerME(MaxentModel mod) {
        this(mod, new ChunkerContextGenerator());
    }

    public ChunkerME(MaxentModel mod, ContextGenerator cg) {
        this._chunkModel = mod;
        this._contextGen = cg;
    }

    public String getNegativeOutcome() {
        return "";
    }

    public EventCollector getEventCollector(Reader r) {
        return new ChunkerEventCollector(r, this._contextGen);
    }

    public void process(NLPDocument doc) {
        Iterator sentIt = doc.sentenceIterator();
        while (sentIt.hasNext()) {
            Element sentEl = (Element)sentIt.next();
            List wordEls = doc.getWordElements(sentEl);
            ArrayList<String> words = new ArrayList<String>(wordEls.size());
            ArrayList<String> pos = new ArrayList<String>(wordEls.size());
            Iterator wordIt = wordEls.iterator();
            while (wordIt.hasNext()) {
                Element word = (Element)wordIt.next();
                words.add(word.getText());
                pos.add(word.getAttributeValue("pos"));
            }
            List tags = this.bestSequence(words, pos);
            int i = 0;
            while (i < tags.size()) {
                String tag = (String)tags.get(i);
                if (tag.charAt(0) == 'B' || tag.charAt(0) == 'I') {
                    Element wordEl = (Element)wordEls.get(i);
                    Element parentToken = wordEl.getParent();
                    int split = tag.lastIndexOf("-");
                    if (split > 0) {
                        String type = tag.substring(split + 1);
                        Element chunkEl = new Element("chunk");
                        chunkEl.setAttribute("type", type);
                        Element token = new Element("t");
                        token.addContent(wordEl.detach());
                        chunkEl.addContent(token);
                        while (++i < tags.size() && ((String)tags.get(i)).charAt(0) == 'I') {
                            wordEl = (Element)wordEls.get(i);
                            chunkEl.addContent(wordEl.getParent().detach());
                        }
                        XmlUtils.replace((Element)parentToken, (Element)chunkEl);
                        --i;
                    }
                }
                ++i;
            }
        }
    }

    public void processSimple(NLPDocument doc) {
        Iterator sentIt = doc.sentenceIterator();
        while (sentIt.hasNext()) {
            Element sentEl = (Element)sentIt.next();
            List wordEls = doc.getWordElements(sentEl);
            ArrayList<String> words = new ArrayList<String>(wordEls.size());
            ArrayList<String> pos = new ArrayList<String>(wordEls.size());
            Iterator wordIt = wordEls.iterator();
            while (wordIt.hasNext()) {
                Element word = (Element)wordIt.next();
                words.add(word.getText());
                pos.add(word.getAttributeValue("pos"));
            }
            List tags = this.bestSequence(words, pos);
            int index = 0;
            Iterator wordIt2 = wordEls.iterator();
            while (wordIt2.hasNext()) {
                ((Element)wordIt2.next()).setAttribute("chunk", (String)tags.get(index++));
            }
        }
    }

    private static Pair split(String s) {
        int split = s.lastIndexOf("/");
        if (split == -1) {
            System.out.println("There is a problem in your training data: " + s + " does not conform to the format WORD/TAG.");
            return new Pair((Object)s, (Object)"UNKNOWN");
        }
        return new Pair((Object)s.substring(0, split), (Object)s.substring(split + 1));
    }

    public List tag(List sentence, List pos) {
        return this.bestSequence(sentence, pos);
    }

    public List tag(List sentence) {
        ArrayList<Object> s = new ArrayList<Object>();
        ArrayList<Object> p = new ArrayList<Object>();
        int i = 0;
        while (i < sentence.size()) {
            Pair pair = ChunkerME.split((String)sentence.get(i));
            s.add(pair.a);
            p.add(pair.b);
            ++i;
        }
        return this.tag(s, p);
    }

    public String[] tag(String[] sentence) {
        ArrayList<Object> l = new ArrayList<Object>();
        ArrayList<Object> p = new ArrayList<Object>();
        int i = 0;
        while (i < sentence.length) {
            Pair pair = ChunkerME.split(sentence[i]);
            l.add(pair.a);
            p.add(pair.b);
            ++i;
        }
        List t = this.tag(l, p);
        String[] tags = new String[t.size()];
        int c = 0;
        Iterator i2 = t.iterator();
        while (i2.hasNext()) {
            tags[c] = (String)i2.next();
            ++c;
        }
        return tags;
    }

    public String tag(String sentence) {
        ArrayList<String> toks = new ArrayList<String>();
        StringTokenizer st = new StringTokenizer(sentence);
        while (st.hasMoreTokens()) {
            toks.add(st.nextToken());
        }
        List tags = this.tag(toks);
        StringBuffer sb = new StringBuffer();
        int i = 0;
        while (i < tags.size()) {
            sb.append(toks.get(i) + "/" + tags.get(i) + " ");
            ++i;
        }
        return sb.toString().trim();
    }

    public String tagTest(String sentence) {
        ArrayList<Object> toks = new ArrayList<Object>();
        ArrayList<Object> correct = new ArrayList<Object>();
        StringTokenizer st = new StringTokenizer(sentence);
        while (st.hasMoreTokens()) {
            Pair pair = ChunkerME.split(st.nextToken());
            toks.add(pair.a);
            correct.add(pair.b);
        }
        List tags = this.tag(toks);
        StringBuffer sb = new StringBuffer();
        int i = 0;
        while (i < tags.size()) {
            sb.append(toks.get(i) + " " + correct.get(i) + " " + tags.get(i) + "\n");
            ++i;
        }
        return sb.toString().trim();
    }

    private static void increment(Hashtable hash, String key) {
        int value = 0;
        if (key != null) {
            if (hash.containsKey(key)) {
                value = Integer.parseInt((String)hash.get(key));
            }
            hash.put(key, Integer.toString(++value));
        }
    }

    private static boolean chunkStart(String loc, String prevLoc, String type, String prevType) {
        if (loc.charAt(0) == 'B') {
            return true;
        }
        if (loc.charAt(0) == 'I' && prevLoc.charAt(0) == 'O') {
            return true;
        }
        return loc.charAt(0) != 'O' && !type.equals(prevType);
    }

    private static boolean chunkEnd(String loc, String prevLoc, String type, String prevType) {
        if (loc.charAt(0) == 'B' || loc.charAt(0) == 'O') {
            return true;
        }
        return loc.charAt(0) != 'O' && !type.equals(prevType);
    }

    /*
     * WARNING - void declaration
     */
    public void localEval(MaxentModel chunkModel, Reader r, Evalable e, boolean verbose) {
        this._chunkModel = chunkModel;
        BufferedReader br = new BufferedReader(r);
        float nrSent = 0.0f;
        float nrWords = 0.0f;
        float nrCorrectSent = 0.0f;
        float nrCorrectTags = 0.0f;
        Hashtable nrCorrectChunks = new Hashtable();
        Hashtable nrMarkedChunks = new Hashtable();
        Hashtable nrCorrectMarkedChunks = new Hashtable();
        try {
            String line;
            while ((line = br.readLine()) != null) {
                void var6_13;
                nrSent += 1.0f;
                ArrayList<Object> toks = new ArrayList<Object>();
                ArrayList<Object> pos = new ArrayList<Object>();
                ArrayList<Object> label = new ArrayList<Object>();
                StringTokenizer st = new StringTokenizer((String)var6_13);
                while (st.hasMoreTokens()) {
                    nrWords += 1.0f;
                    Pair pair = ChunkerME.split(st.nextToken());
                    label.add(pair.b);
                    pair = ChunkerME.split((String)pair.a);
                    toks.add(pair.a);
                    pos.add(pair.b);
                }
                List tags = this.tag(toks, pos);
                StringBuffer sb = new StringBuffer();
                String prevCorrect = "O";
                String prevGuessed = "O";
                String prevCorrectType = null;
                String prevGuessedType = null;
                boolean correctChunk = false;
                int i = 0;
                while (i < tags.size()) {
                    String guessed = (String)tags.get(i);
                    String correct = (String)label.get(i);
                    int split = guessed.lastIndexOf("-");
                    String guessedType = guessed.substring(split + 1);
                    split = correct.lastIndexOf("-");
                    String correctType = correct.substring(split + 1);
                    boolean correctEnd = false;
                    boolean guessedEnd = false;
                    if (prevCorrectType != null) {
                        correctEnd = ChunkerME.chunkEnd(correct, prevCorrect, correctType, prevCorrectType);
                    }
                    if (prevGuessedType != null) {
                        guessedEnd = ChunkerME.chunkEnd(guessed, prevGuessed, guessedType, prevGuessedType);
                    }
                    if (correctChunk) {
                        if (correctEnd && guessedEnd && prevCorrectType.equals(prevGuessedType)) {
                            ChunkerME.increment(nrCorrectMarkedChunks, prevGuessedType);
                            correctChunk = false;
                        } else if (correctEnd != guessedEnd || !correctType.equals(guessedType)) {
                            correctChunk = false;
                        }
                    }
                    boolean correctStart = ChunkerME.chunkStart(correct, prevCorrect, correctType, correctType);
                    boolean guessedStart = ChunkerME.chunkStart(guessed, prevGuessed, guessedType, guessedType);
                    if (correctStart) {
                        ChunkerME.increment(nrCorrectChunks, correctType);
                    }
                    if (guessedStart) {
                        ChunkerME.increment(nrMarkedChunks, guessedType);
                        if (correctStart && correctType.equals(guessedType)) {
                            correctChunk = true;
                        }
                    }
                    prevCorrect = correct;
                    prevGuessed = guessed;
                    prevCorrectType = correctType;
                    prevGuessedType = guessedType;
                    if (correct.charAt(0) == 'O') {
                        prevCorrectType = null;
                    }
                    if (guessed.charAt(0) == 'O') {
                        prevGuessedType = null;
                    }
                    if (guessed.equals(correct)) {
                        nrCorrectTags += 1.0f;
                    }
                    ++i;
                }
                if (!correctChunk) continue;
                ChunkerME.increment(nrCorrectMarkedChunks, prevGuessedType);
            }
        }
        catch (IOException E) {
            E.printStackTrace();
        }
        System.out.println("\tphrase\tprecision\trecall\t\tFB1");
        DecimalFormat format = new DecimalFormat("#0.0000");
        FieldPosition field = new FieldPosition(0);
        float totalNrCorrect = 0.0f;
        float totalNrMarked = 0.0f;
        float totalNrCorrectMarked = 0.0f;
        Enumeration en = nrCorrectChunks.keys();
        while (en.hasMoreElements()) {
            String key = (String)en.nextElement();
            float a = 0.0f;
            float b = 0.0f;
            float c = 0.0f;
            if (nrCorrectChunks.containsKey(key)) {
                a = Integer.parseInt((String)nrCorrectChunks.get(key));
            }
            if (nrMarkedChunks.containsKey(key)) {
                b = Integer.parseInt((String)nrMarkedChunks.get(key));
            }
            if (nrCorrectMarkedChunks.containsKey(key)) {
                c = Integer.parseInt((String)nrCorrectMarkedChunks.get(key));
            }
            totalNrCorrect += a;
            totalNrMarked += b;
            totalNrCorrectMarked += c;
            float pre = c / b;
            float rec = c / a;
            float f = 2.0f * pre * rec / (pre + rec);
            StringBuffer pr = new StringBuffer();
            format.format((double)pre, pr, field);
            StringBuffer re = new StringBuffer();
            format.format((double)rec, re, field);
            StringBuffer fb = new StringBuffer();
            format.format((double)f, fb, field);
            System.out.println("\t" + key + "\t" + pr + "\t\t" + re + "\t\t" + fb);
        }
        float pre = totalNrCorrectMarked / totalNrMarked;
        float rec = totalNrCorrectMarked / totalNrCorrect;
        float f = 2.0f * pre * rec / (pre + rec);
        System.out.println("\ttotal\t" + pre + "\t" + rec + "\t" + f);
        System.out.println("processed " + nrWords + " tokens with " + totalNrCorrect + " phrases; found " + totalNrMarked + "; correct " + totalNrCorrectMarked);
        System.out.println("\nnr correct tags  : " + nrCorrectTags);
        System.out.println("accuracy         : " + nrCorrectTags / nrWords);
    }

    public List bestSequence(List words, List pos) {
        int n = words.size();
        int N = 3;
        Sequence s = new Sequence();
        SortedSet[] h = new SortedSet[n + 1];
        int i = 0;
        while (i < h.length) {
            h[i] = new TreeSet();
            ++i;
        }
        h[0].add(new Sequence());
        int i2 = 0;
        while (i2 < n) {
            int sz = Math.min(N, h[i2].size());
            int j = 1;
            while (j <= sz) {
                Sequence top = (Sequence)h[i2].first();
                h[i2].remove(top);
                Object[] params = new Object[]{words, pos, top, new Integer(i2)};
                double[] scores = this._chunkModel.eval(this._contextGen.getContext((Object)params));
                int p = 0;
                while (p < scores.length) {
                    if (!this._useClosedClassTagsFilter || this._closedClassTagsFilter.filter((String)words.get(i2), this._chunkModel.getOutcome(p))) {
                        Sequence newS = top.copy();
                        newS.add(this._chunkModel.getOutcome(p), scores[p]);
                        h[i2 + 1].add(newS);
                    }
                    ++p;
                }
                ++j;
            }
            ++i2;
        }
        return (List)h[n].first();
    }

    public Set requires() {
        HashSet<Class> set = new HashSet<Class>();
        set.add(class$opennlp$common$preprocess$Tokenizer == null ? (class$opennlp$common$preprocess$Tokenizer = ChunkerME.class$("opennlp.common.preprocess.Tokenizer")) : class$opennlp$common$preprocess$Tokenizer);
        return set;
    }

    public static void main(String[] args) throws IOException {
        if (args[0].equals("-test")) {
            System.out.println(new ChunkerME((MaxentModel)new SuffixSensitiveGISModelReader(new File(args[1])).getModel()).tag(args[3]));
            return;
        }
        TrainEval.run((String[])args, (Evalable)new ChunkerME());
    }

    static /* synthetic */ Class class$(String x0) {
        try {
            return Class.forName(x0);
        }
        catch (ClassNotFoundException x1) {
            throw new NoClassDefFoundError(x1.getMessage());
        }
    }

    private static class Sequence
    extends ArrayList
    implements Comparable {
        double score = 1.0;

        Sequence() {
        }

        Sequence(double s) {
            this.score = s;
        }

        public int compareTo(Object o) {
            Sequence s = (Sequence)o;
            if (this.score < s.score) {
                return 1;
            }
            if (this.score == s.score) {
                return 0;
            }
            return -1;
        }

        public Sequence copy() {
            Sequence s = new Sequence(this.score);
            s.addAll(this);
            return s;
        }

        public void add(String t, double d) {
            super.add(t);
            this.score *= d;
        }

        public String toString() {
            return super.toString() + " " + this.score;
        }
    }
}

