/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.ie.regexp;

import edu.stanford.nlp.ie.AbstractSequenceClassifier;
import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.io.RuntimeIOException;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.sequences.DocumentReaderAndWriter;
import edu.stanford.nlp.util.CoreMap;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.logging.Redwood;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Properties;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;

public class RegexNERSequenceClassifier
extends AbstractSequenceClassifier<CoreLabel> {
    private static Redwood.RedwoodChannels log = Redwood.channels(RegexNERSequenceClassifier.class);
    private final List<Entry> entries;
    private final Set<String> myLabels;
    private final boolean ignoreCase;
    private final Pattern validPosPattern;
    public static final String DEFAULT_VALID_POS = "^(NN|JJ)";

    public RegexNERSequenceClassifier(String mapping, boolean ignoreCase, boolean overwriteMyLabels) {
        this(mapping, ignoreCase, overwriteMyLabels, DEFAULT_VALID_POS);
    }

    public RegexNERSequenceClassifier(String mapping, boolean ignoreCase, boolean overwriteMyLabels, String validPosRegex) {
        super(new Properties());
        this.validPosPattern = validPosRegex != null && !validPosRegex.equals("") ? Pattern.compile(validPosRegex) : null;
        try (BufferedReader rd = IOUtils.readerFromString(mapping);){
            this.entries = RegexNERSequenceClassifier.readEntries(rd, ignoreCase);
        }
        catch (IOException e) {
            throw new RuntimeIOException("Couldn't read RegexNER from " + mapping, e);
        }
        this.ignoreCase = ignoreCase;
        this.myLabels = Generics.newHashSet();
        this.myLabels.add(this.flags.backgroundSymbol);
        this.myLabels.add(null);
        if (overwriteMyLabels) {
            for (Entry entry : this.entries) {
                this.myLabels.add(entry.type);
            }
        }
    }

    public RegexNERSequenceClassifier(BufferedReader reader, boolean ignoreCase, boolean overwriteMyLabels, String validPosRegex) {
        super(new Properties());
        this.validPosPattern = validPosRegex != null && !validPosRegex.equals("") ? Pattern.compile(validPosRegex) : null;
        try {
            this.entries = RegexNERSequenceClassifier.readEntries(reader, ignoreCase);
        }
        catch (IOException e) {
            throw new RuntimeIOException("Couldn't read RegexNER from reader", e);
        }
        this.ignoreCase = ignoreCase;
        this.myLabels = Generics.newHashSet();
        this.myLabels.add(this.flags.backgroundSymbol);
        this.myLabels.add(null);
        if (overwriteMyLabels) {
            for (Entry entry : this.entries) {
                this.myLabels.add(entry.type);
            }
        }
    }

    @Override
    public Set<String> labels() {
        return this.myLabels;
    }

    private boolean containsValidPos(List<CoreLabel> tokens, int start, int end) {
        if (this.validPosPattern == null) {
            return true;
        }
        for (int i = start; i < end; ++i) {
            if (tokens.get(i).tag() == null) {
                throw new IllegalArgumentException("RegexNER was asked to check for valid tags on an untagged sequence. Either tag the sequence, perhaps with the pos annotator, or create RegexNER with an empty validPosPattern, perhaps with the property regexner.validpospattern");
            }
            Matcher m = this.validPosPattern.matcher(tokens.get(i).tag());
            if (!m.find()) continue;
            return true;
        }
        return false;
    }

    @Override
    public List<CoreLabel> classify(List<CoreLabel> document) {
        for (Entry entry : this.entries) {
            int start = 0;
            while ((start = RegexNERSequenceClassifier.findStartIndex(entry, document, start, this.myLabels, this.ignoreCase)) >= 0) {
                if (this.containsValidPos(document, start, start + entry.regex.size())) {
                    for (int i = start; i < start + entry.regex.size(); ++i) {
                        CoreLabel token = document.get(i);
                        token.set(CoreAnnotations.AnswerAnnotation.class, entry.type);
                    }
                }
                ++start;
            }
        }
        return document;
    }

    private static List<Entry> readEntries(BufferedReader mapping, boolean ignoreCase) throws IOException {
        String line;
        ArrayList<Entry> entries = new ArrayList<Entry>();
        int lineCount = 0;
        while ((line = mapping.readLine()) != null) {
            ++lineCount;
            if (line.trim().equals("")) continue;
            String[] split = line.split("\t");
            if (split.length < 2 || split.length > 4) {
                throw new IllegalArgumentException("Provided mapping file is in wrong format: " + line);
            }
            String[] regexes = split[0].trim().split("\\s+");
            String type = split[1].trim();
            Set<String> overwritableTypes = Generics.newHashSet();
            double priority = 0.0;
            ArrayList<Pattern> tokens = new ArrayList<Pattern>();
            if (split.length >= 3) {
                overwritableTypes.addAll(Arrays.asList(split[2].trim().split(",")));
            }
            overwritableTypes.add("O");
            if (split.length == 4) {
                try {
                    priority = Double.parseDouble(split[3].trim());
                }
                catch (NumberFormatException e) {
                    throw new IllegalArgumentException("ERROR: Invalid line " + lineCount + " in regexner file " + mapping + ": \"" + line + "\"!", e);
                }
            }
            try {
                for (String str : regexes) {
                    if (ignoreCase) {
                        tokens.add(Pattern.compile(str, 66));
                        continue;
                    }
                    tokens.add(Pattern.compile(str));
                }
            }
            catch (PatternSyntaxException e) {
                throw new IllegalArgumentException("ERROR: Invalid line " + lineCount + " in regexner file " + mapping + ": \"" + line + "\"!", e);
            }
            entries.add(new Entry(tokens, type, overwritableTypes, priority));
        }
        Collections.sort(entries);
        return entries;
    }

    private static int findStartIndex(Entry entry, List<CoreLabel> document, int searchStart, Set<String> myLabels, boolean ignoreCase) {
        List<Pattern> regex = entry.regex;
        int rSize = regex.size();
        int end = document.size() - regex.size();
        for (int start = searchStart; start <= end; ++start) {
            boolean failed = false;
            for (int i = 0; i < rSize; ++i) {
                Pattern pattern = regex.get(i);
                String exact = entry.exact.get(i);
                CoreLabel token = document.get(start + i);
                String NERType = (String)token.get(CoreAnnotations.NamedEntityTagAnnotation.class);
                String currentType = (String)token.get(CoreAnnotations.AnswerAnnotation.class);
                if (!(currentType != null && !entry.overwritableTypes.contains(currentType) || exact != null && !(ignoreCase ? exact.equalsIgnoreCase(token.word()) : exact.equals(token.word()))) && (entry.overwritableTypes.contains(NERType) || myLabels.contains(NERType)) && pattern.matcher(token.word()).matches()) continue;
                failed = true;
                break;
            }
            if (failed) continue;
            return start;
        }
        return -1;
    }

    @Override
    public List<CoreLabel> classifyWithGlobalInformation(List<CoreLabel> tokenSeq, CoreMap doc, CoreMap sent) {
        return this.classify(tokenSeq);
    }

    @Override
    public void train(Collection<List<CoreLabel>> docs, DocumentReaderAndWriter<CoreLabel> readerAndWriter) {
    }

    @Override
    public void serializeClassifier(String serializePath) {
    }

    @Override
    public void serializeClassifier(ObjectOutputStream oos) {
    }

    @Override
    public void loadClassifier(ObjectInputStream in, Properties props) throws IOException, ClassCastException, ClassNotFoundException {
    }

    private static class Entry
    implements Comparable<Entry> {
        public List<Pattern> regex;
        public List<String> exact = new ArrayList<String>();
        public String type;
        public Set<String> overwritableTypes;
        public double priority;

        public Entry(List<Pattern> regex, String type, Set<String> overwritableTypes, double priority) {
            this.regex = regex;
            this.type = type.intern();
            this.overwritableTypes = overwritableTypes;
            this.priority = priority;
            for (Pattern p : regex) {
                if (p.toString().matches("[a-zA-Z0-9]+")) {
                    this.exact.add(p.toString());
                    continue;
                }
                this.exact.add(null);
            }
        }

        @Override
        public int compareTo(Entry other) {
            if (this.priority > other.priority) {
                return -1;
            }
            if (this.priority < other.priority) {
                return 1;
            }
            return other.regex.size() - this.regex.size();
        }

        public String toString() {
            return "Entry{" + this.regex + ' ' + this.type + ' ' + this.overwritableTypes + ' ' + this.priority + '}';
        }
    }
}

