/*
 * Decompiled with CFR 0.152.
 */
package org.apache.ctakes.gui.dictionary.util;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

public final class TextTokenizer {
    private static final String[] PREFIXES = new String[]{"e-", "a-", "u-", "x-", "agro-", "ante-", "anti-", "arch-", "be-", "bi-", "bio-", "co-", "counter-", "cross-", "cyber-", "de-", "eco-", "ex-", "extra-", "inter-", "intra-", "macro-", "mega-", "micro-", "mid-", "mini-", "multi-", "neo-", "non-", "over-", "pan-", "para-", "peri-", "post-", "pre-", "pro-", "pseudo-", "quasi-", "re-", "semi-", "sub-", "super-", "tri-", "ultra-", "un-", "uni-", "vice-", "electro-", "gasto-", "homo-", "hetero-", "ortho-", "phospho-"};
    private static final String[] SUFFIXES = new String[]{"-esque", "-ette", "-fest", "-fold", "-gate", "-itis", "-less", "-most", "-o-torium", "-rama", "-wise"};
    private static final Set<String> PREFIX_SET = new HashSet<String>(Arrays.asList(PREFIXES));
    private static final Set<String> SUFFIX_SET = new HashSet<String>(Arrays.asList(SUFFIXES));
    private static Pattern WHITESPACE = Pattern.compile("\\s+");

    private TextTokenizer() {
    }

    private static String getNextCharTerm(String word) {
        StringBuilder sb = new StringBuilder();
        int count = word.length();
        for (int i = 0; i < count; ++i) {
            char c = word.charAt(i);
            if (!Character.isLetterOrDigit(c)) {
                return sb.toString();
            }
            sb.append(c);
        }
        return sb.toString();
    }

    private static boolean isPrefix(String word) {
        String prefixQ = word + "-";
        return PREFIX_SET.contains(prefixQ);
    }

    private static boolean isSuffix(String word, int startIndex) {
        if (word.length() <= startIndex) {
            return false;
        }
        String nextCharTerm = TextTokenizer.getNextCharTerm(word.substring(startIndex));
        if (nextCharTerm.isEmpty()) {
            return false;
        }
        String suffixQ = "-" + nextCharTerm;
        return SUFFIX_SET.contains(suffixQ);
    }

    private static boolean isOwnerApostrophe(CharSequence word, int startIndex) {
        return word.length() == startIndex + 1 && word.charAt(startIndex) == 's';
    }

    private static boolean isNumberDecimal(CharSequence word, int startIndex) {
        return word.length() == startIndex + 1 && Character.isDigit(word.charAt(startIndex));
    }

    public static List<String> getTokens(String word) {
        return TextTokenizer.getTokens(word, false);
    }

    public static List<String> getTokens(String word, boolean separateDigits) {
        ArrayList<String> tokens = new ArrayList<String>();
        StringBuilder sb = new StringBuilder();
        int count = word.length();
        boolean wasDigit = false;
        for (int i = 0; i < count; ++i) {
            char c = word.charAt(i);
            if (Character.isLetterOrDigit(c)) {
                if (sb.length() != 0 && separateDigits && wasDigit && !Character.isDigit(c)) {
                    tokens.add(sb.toString());
                    sb.setLength(0);
                }
                wasDigit = Character.isDigit(c);
                sb.append(c);
                continue;
            }
            wasDigit = false;
            if (c == '-' && (TextTokenizer.isPrefix(sb.toString()) || TextTokenizer.isSuffix(word, i + 1))) {
                sb.append(c);
                continue;
            }
            if (c == '\'' && TextTokenizer.isOwnerApostrophe(word, i + 1) || c == '.' && TextTokenizer.isNumberDecimal(word, i + 1)) {
                if (sb.length() != 0) {
                    tokens.add(sb.toString());
                    sb.setLength(0);
                }
                sb.append(c);
                continue;
            }
            if (sb.length() != 0) {
                tokens.add(sb.toString());
                sb.setLength(0);
            }
            tokens.add("" + c);
        }
        if (sb.length() != 0) {
            tokens.add(sb.toString());
        }
        return tokens;
    }

    public static String getTokenizedText(String text) {
        return TextTokenizer.getTokenizedText(text, false);
    }

    public static String getTokenizedText(String text, boolean separateDigits) {
        if (text.isEmpty()) {
            return text;
        }
        String[] splits = WHITESPACE.split(text.toLowerCase());
        if (splits.length == 0) {
            return "";
        }
        String lastSplit = splits[splits.length - 1];
        if (lastSplit.endsWith(",") || lastSplit.endsWith(";") || lastSplit.endsWith(".")) {
            splits[splits.length - 1] = lastSplit.substring(0, lastSplit.length() - 1);
        }
        return Arrays.stream(splits).map(s -> TextTokenizer.getTokens(s, separateDigits)).flatMap(Collection::stream).collect(Collectors.joining(" "));
    }
}

