Start line:  
End line:  

Snippet Preview

Snippet HTML Code

Stack Overflow Questions
Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file to you under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
 
 package org.apache.ctakes.temporal.duration;
 
 import java.io.File;
 import java.util.List;
 import java.util.Map;
 
 
Extract durations of event mentions (e.g. sign/symptom or disease/disorder).

Author(s):
dmitriy dligach
 
 public class EventDurationDistribution {
 
   private static Class<? extends EventMentiontargetClass = MedicationMention.class;
   
   public static class Options  {
     @Option(
         name = "--input-dir",
         usage = "specify the path to the directory containing the xmi files",
         required = true)
     public File inputDirectory;
     
     @Option(
         name = "--output-file",
         usage = "specify the path to the output file",
         required = true)
     public String outputFile;
   }
   
 	public static void main(String[] argsthrows Exception {
 		
 	  Options options = new Options();
 	  CmdLineParser parser = new CmdLineParser(options);
 	  parser.parseArgument(args);
 	  
 	  
 		List<FiletrainFiles = Arrays.asList(options.inputDirectory.listFiles());
     CollectionReader collectionReader = getCollectionReader(trainFiles);
 		
     AnalysisEngine temporalDurationExtractor = AnalysisEngineFactory.createEngine(
     		TemporalDurationExtractor.class,
     		"OutputFile",
     		options.outputFile);
    		
		SimplePipeline.runPipeline(collectionReadertemporalDurationExtractor);
	}
  
  public static class TemporalDurationExtractor extends JCasAnnotator_ImplBase {
    
        name = "OutputFile",
        mandatory = true,
        description = "path to the output file that will store the distributions")
    private String outputFilePath;
    private File outputFile;
    
    // regular expression to match temporal durations in time mention annotations
    private final static String regex = "(sec|min|hour|hrs|day|week|wk|month|year|yr|decade)";
    
    // mapping between time units and their normalized forms
    private final static Map<StringStringabbreviationToTimeUnit = ImmutableMap.<StringString>builder()
        .put("sec""second")
        .put("min""minute")
        .put("hour""hour")
        .put("hrs""hour")
        .put("day""day")
        .put("week""week")
        .put("wk""week")
        .put("month""month")
        .put("year""year")
        .put("yr""year")
        .put("decade""decade")
        .build(); 
    
    // max distance between an event and the time mention that defines the event's duration
    private final static int MAXDISTANCE = 2;
    // regex to match different time units (e.g. 'day', 'month')
    
    @Override
    public void initialize(UimaContext contextthrows ResourceInitializationException  {
      super.initialize(context);
       = new File();
      if(.exists()) {
        ..println( + " exists... deleting...");
        .delete();
      }
    }
    
    
    @Override
    public void process(JCas jCasthrows AnalysisEngineProcessException {
      Collection<DocumentIDids = JCasUtil.select(jCasDocumentID.class);
      String fileName = ids.iterator().next().getDocumentID();
      String mentionText = fileName.split("\\.")[0]; // e.g. "smoker.txt"
      // counts of different time units for this sign/symptom
      Multiset<StringdurationDistribution = HashMultiset.create();
      for(EventMention mention : JCasUtil.select(jCas)) {
        if(mention.getCoveredText().equals(mentionText)) {
          if(isNegated(jCasmention) || isMedicationPattern(jCasmention)) {
            continue;
          }
          TimeMention nearestTimeMention = getNearestTimeMention(jCasmention);
          if(nearestTimeMention == null) {
            continue;
          }
          
          // try to parse this timex with Bethard normalizer
          HashSet<StringtimeUnits = Utils.getTimeUnits(nearestTimeMention.getCoveredText());
          if(timeUnits.size() > 0) {
            for(String timeUnit : timeUnits) {
              durationDistribution.add(timeUnit);
            }
          } else {
            // could be an abbreviation e.g. "wks"
            Matcher matcher = .matcher(nearestTimeMention.getCoveredText());
            // need a loop to handle things like 'several days/weeks'
            while(matcher.find()) {
              String matchedTimeUnit = matcher.group(); // e.g. "wks"
              String normalizedTimeUnit = .get(matchedTimeUnit);
              durationDistribution.add(normalizedTimeUnit);
            }            
          }
        }
      }
      if(durationDistribution.size() > 0) { 
        try {
          Files.append(Utils.formatDistribution(mentionTextdurationDistribution", "false) + "\n".);
        } catch (IOException e) {
          ..println("Could not open output file: " + );
        } 
      } else {
        ..println("No duration data for: " + mentionText);
      }
    }
    
    
Return true if sign/symptom is negated. TODO: using rules for now; switch to using a negation module
    private static boolean isNegated(JCas jCasEventMention mention) {
      
      for(BaseToken token : JCasUtil.selectPreceding(jCasBaseToken.classmention, 3)) {
        if(token.getCoveredText().equals("no") || 
           token.getCoveredText().equals("not") || 
           token.getCoveredText().equals("off")) {
          return true;
        }
      }
      
      return false;
    }

    
Return true of this is a medication pattern. E.g. five (5) ml po qid (4 times a day) as needed for heartburn for 2 weeks.
    private static boolean isMedicationPattern(JCas jCasEventMention mention) {
      
      for(BaseToken token : JCasUtil.selectPreceding(jCasBaseToken.classmention, 1)) {
        if(token.getCoveredText().equals("for")) {
          return true;
        }
      }
           
      return false;
    }
    
    
Find nearest time mention on the right that is within allowable distance. Return null if none found.
    private static TimeMention getNearestTimeMention(JCas jCasEventMention mention) {
      List<TimeMentiontimeMentions = JCasUtil.selectFollowing(jCasTimeMention.classmention, 1);
      if(timeMentions.size() < 1) {
        return null;
      }
      
      assert timeMentions.size() == 1;
      
      TimeMention nearestTimeMention = timeMentions.get(0);
      int distance = JCasUtil.selectBetween(jCasBaseToken.classmentionnearestTimeMention).size();
      if(distance > ) {
        return null;
      }
      
      return nearestTimeMention;
    }
    
    @SuppressWarnings("unused")
    private static String getAnnotationContext(Annotation annotationint maxContextWindowSize) {
      
      String text = annotation.getCAS().getDocumentText();
      int begin = Math.max(0, annotation.getBegin() - maxContextWindowSize);
      int end = Math.min(text.length(), annotation.getEnd() + maxContextWindowSize);
      
      return text.substring(beginend).replaceAll("[\r\n]"" ");
    }
    
    @SuppressWarnings("unused")
    private static String formatDistribution(Multiset<StringdurationDistribution) {
      
      List<StringdurationBins = Arrays.asList("second""minute""hour""day""week""month""year""decade");
      List<IntegerdurationValues = new LinkedList<Integer>();
      
      for(String durationBin : durationBins) {
        durationValues.add(durationDistribution.count(durationBin));
      }
      Joiner joiner = Joiner.on(',');
      return joiner.join(durationValues);
    }
  }
  
  private static CollectionReader getCollectionReader(List<Fileitemsthrows Exception {
    String[] paths = new String[items.size()];
    Collections.sort(itemsnew FileSizeComparator());
    for (int i = 0; i < paths.length; ++i) {
      paths[i] = items.get(i).getPath();
    }
    
    return CollectionReaderFactory.createReader(
        XMIReader.class,
        .,
        paths);
  }
  
  public static class FileSizeComparator implements Comparator<File> {
    @Override
    public int compare(File o1File o2) {
      if(o1.length() > o2.length()){
        return 1;
      }else if(o1.length() < o2.length()){
        return -1;
      }else{
        return 0;
      }
    } 
  }
New to GrepCode? Check out our FAQ X