Start line:  
End line:  

Snippet Preview

Snippet HTML Code

Stack Overflow Questions
Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file to you under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
 
 package org.apache.ctakes.temporal.ae;
 
 import java.io.File;
 import java.net.URL;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
 
 import  org.cleartk.ml.CleartkAnnotator;
 import  org.cleartk.ml.DataWriter;
 import  org.cleartk.ml.Feature;
 import  org.cleartk.ml.Instance;
 import  org.cleartk.ml.feature.extractor.CleartkExtractor;
 import  org.cleartk.ml.feature.extractor.CleartkExtractor.Bag;
 import  org.cleartk.ml.feature.extractor.CleartkExtractor.Covered;
 import  org.cleartk.ml.feature.extractor.CombinedExtractor1;
 import  org.cleartk.ml.feature.extractor.CoveredTextExtractor;
 import  org.cleartk.ml.feature.extractor.FeatureExtractor1;
 import  org.cleartk.ml.feature.extractor.TypePathExtractor;
 import  org.cleartk.ml.feature.function.CharacterCategoryPatternFunction;
 import  org.cleartk.ml.feature.function.CharacterCategoryPatternFunction.PatternType;
 import  org.cleartk.ml.jar.DefaultDataWriterFactory;
 import  org.cleartk.ml.jar.DirectoryDataWriterFactory;
 import  org.cleartk.ml.jar.GenericJarClassifierFactory;
 import  org.cleartk.timeml.util.TimeWordsExtractor;
 
 
 public class ConstituencyBasedTimeAnnotator extends
 
   private static final String NON_MENTION = "NON_TIME_MENTION";
   private static final String MENTION = "TIME_MENTION";
   private static Logger logger = Logger.getLogger(ConstituencyBasedTimeAnnotator.class);
   private static final int	SPAN_LIMIT = 12;
 
   public static final String PARAM_TIMEX_VIEW = "TimexView";
       name = ,
       mandatory = false,
       description = "View to write timexes to (used for ensemble methods)")
   protected String timexView = .;
 
       Class<? extends DataWriter<String>> dataWriterClass,
           File outputDirectorythrows ResourceInitializationException {
     return AnalysisEngineFactory.createEngineDescription(
         ConstituencyBasedTimeAnnotator.class,
         CleartkAnnotator.PARAM_IS_TRAINING,
         true,
         DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
         dataWriterClass,
         DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
         outputDirectory);
   }
 
 	      throws ResourceInitializationException {
 	    return AnalysisEngineFactory.createEngineDescription(
	        ConstituencyBasedTimeAnnotator.class,
	        CleartkAnnotator.PARAM_IS_TRAINING,
	        false,
	        GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
	        modelPath);
	  }
  
	      String viewNamethrows ResourceInitializationException {
	    return AnalysisEngineFactory.createEngineDescription(
	        ConstituencyBasedTimeAnnotator.class,
	        CleartkAnnotator.PARAM_IS_TRAINING,
	        false,
	        viewName,
	        GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
	        modelPath);
	  }
  

Deprecated:
use String path instead of File. ClearTK will automatically Resolve the String to an InputStream. This will allow resources to be read within from a jar as well as File.
  public static AnalysisEngineDescription createAnnotatorDescription(File modelDirectory)
      throws ResourceInitializationException {
    return AnalysisEngineFactory.createEngineDescription(
        ConstituencyBasedTimeAnnotator.class,
        CleartkAnnotator.PARAM_IS_TRAINING,
        false,
        GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
        new File(modelDirectory"model.jar"));
  }

  

Deprecated:
use String path instead of File. ClearTK will automatically Resolve the String to an InputStream. This will allow resources to be read within from a jar as well as File.
  public static AnalysisEngineDescription createEnsembleDescription(File modelDirectory,
      String viewNamethrows ResourceInitializationException {
    return AnalysisEngineFactory.createEngineDescription(
        ConstituencyBasedTimeAnnotator.class,
        CleartkAnnotator.PARAM_IS_TRAINING,
        false,
        viewName,
        GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
        new File(modelDirectory"model.jar"));
  }
  protected List<FeatureExtractor1> featureExtractors;
  
  protected FeatureExtractor1 wordTypeExtractor;
  
  private static final String LOOKUP_PATH = "/org/apache/ctakes/temporal/time_word_types.txt";
  
  private Map<StringStringwordTypes;
  
  public void initialize(UimaContext context)
      throws ResourceInitializationException {
    super.initialize(context);
    CombinedExtractor1<BaseTokencharExtractors = new CombinedExtractor1<>(CharacterCategoryPatternFunction.<BaseToken>createExtractor(PatternType.REPEATS_MERGED),
            CharacterCategoryPatternFunction.<BaseToken>createExtractor(PatternType.ONE_PER_CHAR));
    
    this. = Maps.newHashMap();
    URL url = TimeWordsExtractor.class.getResource();
    try {
      for (String line : Resources.readLines(url.)) {
        String[] typeAndWord = line.split("\\s+");
        if (typeAndWord.length != 2) {
          throw new IllegalArgumentException("Expected '<type> <word>', found: " + line);
        }
        this..put(typeAndWord[1], typeAndWord[0]);
      }
    } catch (IOException e) {
      throw new ResourceInitializationException(e);
    }
    
    CombinedExtractor1<BaseTokenallExtractors = new CombinedExtractor1<>(
        new CoveredTextExtractor<BaseToken>(),
//        new TimeWordTypeExtractor(),
        charExtractors,
        new TypePathExtractor<>(BaseToken.class"partOfSpeech"));
    
     = new ArrayList<FeatureExtractor1>();
//    featureExtractors.add(new CleartkExtractor(BaseToken.class, new CoveredTextExtractor(), new Bag(new Covered())));
    .add(new CleartkExtractor(BaseToken.classallExtractorsnew Bag(new Covered())));
//    featureExtractors.add(charExtractors);
     = new CleartkExtractor(BaseToken.classnew TimeWordTypeExtractor<BaseToken>(), new Bag(new Covered()));
//    featureExtractors.add(new CleartkExtractor(BaseToken.class, new CoveredTextExtractor(), new Bag(new Preceding(1))));
 //   featureExtractors.add(new CleartkExtractor(BaseToken.class, new CoveredTextExtractor(), new Bag(new Following(1))));
    // bag of constituent descendent labels
//    featureExtractors.add(new CleartkExtractor(TreebankNode.class, new TypePathExtractor(TreebankNode.class, "nodeType"), new Bag(new Covered())));
    
  }
  
  public void process(JCas jCasSegment segment)
      throws AnalysisEngineProcessException {
    HashSet<TimeMentionmentions = new HashSet<TimeMention>(JCasUtil.selectCovered(TimeMention.classsegment));
    
    //output the gold time expression's length and real words
//    if(this.isTraining()){
//    	for( TimeMention time: mentions){
//    		int numTokens = JCasUtil.selectCovered(BaseToken.class, time).size();
//    		System.out.println(numTokens + ";" +time.getCoveredText());
//    	}
//    }
	  
    for(TopTreebankNode root : JCasUtil.selectCovered(TopTreebankNode.classsegment)){
      recursivelyProcessNode(jCasroot.getChildren(0), mentions, 0.0);
    }
  }
  private double recursivelyProcessNode(JCas jCasTreebankNode nodeSet<TimeMentionmentionsdouble parentScorethrows AnalysisEngineProcessException {
    // accumulate features:
    double score=0.0;
    parentScore = 0.0;
    ArrayList<Feature> features = new ArrayList<Feature>();
    String category = ;
    // node-based features
    if(node.getParent().getParent() == nullfeatures.add(new Feature("IS_ROOT"));
    features.add(new Feature("NODE_LABEL"node.getNodeType()));
    features.add(new Feature("PARENT_LABEL"node.getParent().getNodeType()));
    List<BaseTokencoveredTokens = JCasUtil.selectCovered(BaseToken.classnode);
    
    //check span length, check if a small node contains any time word
    int numTokens = coveredTokens.size();
    
    if(node.getLeaf()){
      features.add(new Feature("IS_LEAF"));
      features.addAll(.extract(jCasnode));
    }else{
      StringBuilder buffer = new StringBuilder();
      for(int i = 0; i < node.getChildren().size(); i++){
        buffer.append(node.getChildren(i).getNodeType());
        buffer.append("_");
        features.add(new Feature("CHILD_BAG"node.getChildren(i).getNodeType()));
      }
//      features.add(new Feature("NUM_TOKENS", JCasUtil.selectCovered(BaseToken.class, node).size()));
      features.add(new Feature("PRODUCTION"buffer.toString()));
//      features.add(new Feature("LeftSibling", getSiblingCategory(node, -1)));
//      features.add(new Feature("RightSibling", getSiblingCategory(node, 1)));
    }
    
    // other feature types:
    for(FeatureExtractor1 extractor : ){
      features.addAll(extractor.extract(jCasnode));
    }
      
    if(this.isTraining()){
      List<TimeMentiongoldMentions = JCasUtil.selectCovered(TimeMention.classnode);
      for(TimeMention mention : goldMentions){
        if(mention.getBegin() == node.getBegin() && mention.getEnd() == node.getEnd()){
          category = ;
          score=1.0;
          mentions.remove(mention);
          if(node.getCoveredText().contains("postoperative")){
            ..println("*** Positive Example: ***");
            ..println("*** Parent: " + node.getParent().getCoveredText());
            printFeatures(nodefeatures);
          }
        }
      }
      if(numTokens < ){
        this..write(new Instance<String>(categoryfeatures));
      }
    }else{
      Map<String,Doubleoutcomes = this..score(features);
      score = outcomes.get();
      category = this..classify(features);
      if(category.equals()){
        // add to cas
        JCas timexCas;
        try {
          timexCas = jCas.getView();
        } catch (CASException e) {
          throw new AnalysisEngineProcessException(e);
        }
        TimeMention mention = new TimeMention(timexCasnode.getBegin(), node.getEnd());
        mention.setConfidence((float)score);
        mention.addToIndexes();
      }else{
        score = 1 - score;
      }
    }
    // now do children if not a leaf & not a mention
    if(node.getLeaf() || .equals(category)) return score;
    
    double highestScore = 0.5;
    TreebankNode highestScoringChild = null;
    
    for(int i = 0; i < node.getChildren().size(); i++){
      TreebankNode child = node.getChildren(i);
      double childScore = recursivelyProcessNode(jCaschildmentions, Math.max(scoreparentScore));
      if(childScore > highestScore){
        highestScoringChild = child;
        highestScore = childScore;
      }
    }
    if(!this.isTraining() && .equals(category)){
      .info(String.format("\nFound mention (%s) with score %f\n\tParent (%s) : %f\n\tBest child (%s) : %f\n"node.getCoveredText(), scorenode.getParent().getCoveredText(), parentScorehighestScoringChild == null ? "(none)" : highestScoringChild.getCoveredText(), highestScore));
    }
    return score;
  }
  
  private static void printFeatures(TreebankNode nodeList<Feature> features) {
    for(Feature feat : features){
      ..printf("%s => %s\n"feat.getName(), feat.getValue());
    }    
  }
New to GrepCode? Check out our FAQ X