/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.pipeline;

import edu.stanford.nlp.international.arabic.process.ArabicSegmenter;
import edu.stanford.nlp.ling.CoreAnnotation;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.Annotator;
import edu.stanford.nlp.util.CoreMap;
import edu.stanford.nlp.util.PropertiesUtils;
import edu.stanford.nlp.util.StringUtils;
import edu.stanford.nlp.util.logging.Redwood;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.regex.Pattern;

public class ArabicSegmenterAnnotator
implements Annotator {
    private static final Redwood.RedwoodChannels log = Redwood.channels(ArabicSegmenterAnnotator.class);
    private ArabicSegmenter segmenter;
    private final boolean VERBOSE;
    private final boolean tokenizeNewline;
    private final boolean sentenceSplitOnTwoNewlines;
    private static final String DEFAULT_SEG_LOC = "/u/nlp/data/arabic-segmenter/arabic-segmenter-atb+bn+arztrain.ser.gz";
    private static final String NEWLINE_REGEX = "\\R";
    private static final Pattern NEWLINE_PATTERN = Pattern.compile("\\R");

    public ArabicSegmenterAnnotator() {
        this(DEFAULT_SEG_LOC, false);
    }

    public ArabicSegmenterAnnotator(boolean verbose) {
        this(DEFAULT_SEG_LOC, verbose);
    }

    public ArabicSegmenterAnnotator(String segLoc, boolean verbose) {
        this.VERBOSE = verbose;
        Properties props = new Properties();
        this.loadModel(segLoc, props);
        this.tokenizeNewline = false;
        this.sentenceSplitOnTwoNewlines = false;
    }

    public ArabicSegmenterAnnotator(String name, Properties props) {
        String model = null;
        Properties modelProps = new Properties();
        String desiredKey = name + '.';
        for (String key : props.stringPropertyNames()) {
            if (!key.startsWith(desiredKey)) continue;
            String modelKey = key.substring(desiredKey.length());
            if (modelKey.equals("model")) {
                model = props.getProperty(key);
                continue;
            }
            modelProps.setProperty(modelKey, props.getProperty(key));
        }
        this.VERBOSE = PropertiesUtils.getBool(props, name + ".verbose", false);
        if (model == null) {
            throw new RuntimeException("Expected a property " + name + ".model");
        }
        this.loadModel(model, modelProps);
        this.tokenizeNewline = !props.getProperty("ssplit.newlineIsSentenceBreak", "never").equals("never") || Boolean.valueOf(props.getProperty("ssplit.eolonly", "false")) != false;
        this.sentenceSplitOnTwoNewlines = props.getProperty("ssplit.newlineIsSentenceBreak", "never").equals("two");
    }

    private void loadModel(String segLoc) {
        if (this.VERBOSE) {
            log.info("Loading segmentation model ... ");
        }
        Properties modelProps = new Properties();
        modelProps.setProperty("model", segLoc);
        this.segmenter = ArabicSegmenter.getSegmenter(modelProps);
    }

    private void loadModel(String segLoc, Properties props) {
        if (this.VERBOSE) {
            log.info("Loading Segmentation Model ... ");
        }
        Properties modelProps = new Properties();
        modelProps.setProperty("model", segLoc);
        modelProps.putAll((Map<?, ?>)props);
        try {
            this.segmenter = ArabicSegmenter.getSegmenter(modelProps);
        }
        catch (RuntimeException e) {
            throw e;
        }
        catch (Exception e) {
            throw new RuntimeException(e);
        }
    }

    @Override
    public void annotate(Annotation annotation) {
        List sentences;
        if (this.VERBOSE) {
            log.info("Adding Segmentation annotation ... ");
        }
        if ((sentences = (List)annotation.get(CoreAnnotations.SentencesAnnotation.class)) != null) {
            for (CoreMap sentence : sentences) {
                this.doOneSentence(sentence);
            }
        } else {
            this.doOneSentence(annotation);
        }
    }

    private CoreLabel makeNewlineCoreLabel(String piece, int stringConsumed) {
        CoreLabel newline = new CoreLabel();
        newline.setWord("*NL*");
        newline.setValue("*NL*");
        newline.set(CoreAnnotations.OriginalTextAnnotation.class, piece);
        newline.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, stringConsumed);
        newline.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, stringConsumed + piece.length());
        return newline;
    }

    private void doOneSentence(CoreMap annotation) {
        List<Object> tokens;
        String text = (String)annotation.get(CoreAnnotations.TextAnnotation.class);
        if (!this.tokenizeNewline) {
            tokens = this.segmenter.segmentStringToTokenList(text);
        } else {
            tokens = new ArrayList();
            List<String> pieces = StringUtils.splitLinesKeepNewlines(text);
            int stringConsumed = 0;
            boolean sawNewline = false;
            boolean sawTwoNewlines = false;
            for (String piece : pieces) {
                if (NEWLINE_PATTERN.matcher(piece).matches()) {
                    if (!sawNewline) {
                        tokens.add(this.makeNewlineCoreLabel(piece, stringConsumed));
                        sawNewline = true;
                    } else if (this.sentenceSplitOnTwoNewlines && !sawTwoNewlines) {
                        tokens.add(this.makeNewlineCoreLabel(piece, stringConsumed));
                        sawTwoNewlines = true;
                    }
                } else {
                    sawNewline = false;
                    sawTwoNewlines = false;
                    List<CoreLabel> pieceTokens = this.segmenter.segmentStringToTokenList(piece);
                    for (CoreLabel label : pieceTokens) {
                        label.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, (Integer)label.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class) + stringConsumed);
                        label.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, (Integer)label.get(CoreAnnotations.CharacterOffsetEndAnnotation.class) + stringConsumed);
                    }
                    tokens.addAll(pieceTokens);
                }
                stringConsumed += piece.length();
            }
        }
        annotation.set(CoreAnnotations.TokensAnnotation.class, tokens);
    }

    @Override
    public Set<Class<? extends CoreAnnotation>> requires() {
        return Collections.emptySet();
    }

    @Override
    public Set<Class<? extends CoreAnnotation>> requirementsSatisfied() {
        return new HashSet<Class<? extends CoreAnnotation>>(Arrays.asList(CoreAnnotations.TextAnnotation.class, CoreAnnotations.TokensAnnotation.class, CoreAnnotations.CharacterOffsetBeginAnnotation.class, CoreAnnotations.CharacterOffsetEndAnnotation.class, CoreAnnotations.BeforeAnnotation.class, CoreAnnotations.AfterAnnotation.class, CoreAnnotations.TokenBeginAnnotation.class, CoreAnnotations.TokenEndAnnotation.class, CoreAnnotations.PositionAnnotation.class, CoreAnnotations.IndexAnnotation.class, CoreAnnotations.OriginalTextAnnotation.class, CoreAnnotations.ValueAnnotation.class));
    }
}

