/*
 * Decompiled with CFR 0.152.
 */
package org.apache.ctakes.coreference.ae.features;

import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import org.apache.ctakes.core.resource.FileLocator;
import org.apache.ctakes.dependency.parser.util.DependencyUtility;
import org.apache.ctakes.relationextractor.ae.features.RelationFeaturesExtractor;
import org.apache.ctakes.typesystem.type.syntax.BaseToken;
import org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode;
import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
import org.apache.ctakes.utils.distsem.WordEmbeddings;
import org.apache.ctakes.utils.distsem.WordVector;
import org.apache.ctakes.utils.distsem.WordVectorReader;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.tcas.Annotation;
import org.cleartk.ml.Feature;

public class DistSemFeatureExtractor
implements RelationFeaturesExtractor<IdentifiedAnnotation, IdentifiedAnnotation> {
    public static final double DEFAULT_SIM = 0.5;
    private WordEmbeddings words = WordVectorReader.getEmbeddings((InputStream)FileLocator.getAsStream((String)"org/apache/ctakes/coreference/distsem/mimic_vectors.txt"));

    public List<Feature> extract(JCas jCas, IdentifiedAnnotation arg1, IdentifiedAnnotation arg2) throws AnalysisEngineProcessException {
        ArrayList<Feature> feats = new ArrayList<Feature>();
        double sim = 0.0;
        ConllDependencyNode node1 = DependencyUtility.getNominalHeadNode((JCas)jCas, (Annotation)arg1);
        ConllDependencyNode node2 = DependencyUtility.getNominalHeadNode((JCas)jCas, (Annotation)arg2);
        String head1 = node1 != null ? node1.getCoveredText().toLowerCase() : null;
        String head2 = node2 != null ? node2.getCoveredText().toLowerCase() : null;
        sim = head1 != null && head2 != null && this.words.containsKey(head1) && this.words.containsKey(head2) ? this.words.getSimilarity(head1, head2) : 0.5;
        feats.add(new Feature("HEAD_SIMILARITY_WORD2VEC", (Object)sim));
        return feats;
    }

    private double[] getArgVector(IdentifiedAnnotation arg) {
        double[] vec = null;
        List tokens = JCasUtil.selectCovered(BaseToken.class, (AnnotationFS)arg);
        for (BaseToken token : tokens) {
            WordVector wv = this.words.getVector(token.getCoveredText());
            if (wv == null) {
                wv = this.words.getVector(token.getCoveredText().toLowerCase());
            }
            if (wv == null) continue;
            if (vec == null) {
                vec = new double[wv.size()];
                Arrays.fill(vec, 0.0);
            }
            for (int i = 0; i < vec.length; ++i) {
                int n = i;
                vec[n] = vec[n] + wv.getValue(i);
            }
        }
        if (vec != null) {
            int i;
            double len = 0.0;
            for (i = 0; i < vec.length; ++i) {
                len += vec[i] * vec[i];
            }
            len = Math.sqrt(len);
            assert (!Double.isNaN(len));
            i = 0;
            while (i < vec.length) {
                int n = i++;
                vec[n] = vec[n] / len;
            }
        }
        return vec;
    }
}

