Start line:  
End line:  

Snippet Preview

Snippet HTML Code

Stack Overflow Questions
Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file to you under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
 
 package org.apache.ctakes.temporal.ae;
 
 import java.io.File;
 import java.net.URI;
 import java.util.List;
 
 import  org.cleartk.ml.CleartkAnnotator;
 import  org.cleartk.ml.Feature;
 import  org.cleartk.ml.Instance;
 import  org.cleartk.ml.chunking.BioChunking;
 import  org.cleartk.ml.feature.extractor.CleartkExtractor;
 import  org.cleartk.ml.feature.extractor.CleartkExtractor.Following;
 import  org.cleartk.ml.feature.extractor.CleartkExtractor.Preceding;
 import  org.cleartk.ml.feature.extractor.CombinedExtractor1;
 import  org.cleartk.ml.feature.extractor.CoveredTextExtractor;
 import  org.cleartk.ml.feature.extractor.FeatureExtractor1;
 import  org.cleartk.ml.feature.extractor.TypePathExtractor;
 import  org.cleartk.ml.feature.function.CharacterCategoryPatternFunction;
 import  org.cleartk.ml.feature.function.CharacterCategoryPatternFunction.PatternType;
 import  org.cleartk.ml.jar.DefaultDataWriterFactory;
 import  org.cleartk.ml.jar.DirectoryDataWriterFactory;
 import  org.cleartk.ml.jar.GenericJarClassifierFactory;
 
 
 	public static final String PARAM_FEATURE_SELECTION_THRESHOLD = "WhetherToDoFeatureSelection";
 
 	    mandatory = false,
 	    description = "the Chi-squared threshold at which features should be removed")
 	protected Float featureSelectionThreshold = 1f;
 	
 	public static final String PARAM_FEATURE_SELECTION_URI = "FeatureSelectionURI";
 
 			mandatory = false,
 			description = "provides a URI where the feature selection data will be written")
 	protected URI featureSelectionURI;
 	
 	public static final String PARAM_SMOTE_NUM_NEIGHBORS = "NumOfNeighborForSMOTE";
 
 	    name = ,
 	    mandatory = false,
 	    description = "the number of neighbors used for minority instances for SMOTE algorithm")
 	protected Float smoteNumOfNeighbors = 0f;
 
 	public static final String PARAM_TIMEX_VIEW = "TimexView";
 	    name = ,
 	    mandatory = false,
 	    description = "View to write timexes to (used for ensemble methods)")
 
 			Class<?> dataWriterClass,
 					File outputDirectory,
 					float featureSelect,
					float smoteNeighborNumberthrows ResourceInitializationException {
		return AnalysisEngineFactory.createEngineDescription(
				CleartkAnnotator.PARAM_IS_TRAINING,
				true,
				DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
				dataWriterClass,
				DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
				outputDirectory,
		        featureSelect,
		        smoteNeighborNumber);
	}
		return AnalysisEngineFactory.createEngineDescription(
				CleartkAnnotator.PARAM_IS_TRAINING,
				false,
				GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
				modelPath);
	}

  

Deprecated:
use String path instead of File. ClearTK will automatically Resolve the String to an InputStream. This will allow resources to be read within from a jar as well as File.
		return AnalysisEngineFactory.createEngineDescription(
				CleartkAnnotator.PARAM_IS_TRAINING,
				false,
				GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
				new File(modelDirectory"model.jar"),
				TimeAnnotator.createFeatureSelectionURI(modelDirectory));
	}
	public static AnalysisEngineDescription createEnsembleDescription(File modelDirectoryString mappedView)
    return AnalysisEngineFactory.createEngineDescription(
        TimeAnnotator.class,
        CleartkAnnotator.PARAM_IS_TRAINING,
        false,
        GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
        new File(modelDirectory"model.jar"),
        mappedView,
        TimeAnnotator.createFeatureSelectionURI(modelDirectory));	  
	}
	protected List<FeatureExtractor1> tokenFeatureExtractors;
	protected List<CleartkExtractor> contextFeatureExtractors;
	//  protected List<FeatureExtractor1> parseFeatureExtractors;
	private BioChunking<BaseTokenTimeMentiontimeChunking;
	private static final String FEATURE_SELECTION_NAME = "SelectNeighborFeatures";
	public static FeatureSelection<StringcreateFeatureSelection(double threshold) {
	}
	public static URI createFeatureSelectionURI(File outputDirectoryName) {
		return new File(outputDirectoryName + "_Chi2_extractor.dat").toURI();
	}
	public void initialize(UimaContext contextthrows ResourceInitializationException {
		super.initialize(context);
		// define chunking
		this. = new BioChunking<BaseTokenTimeMention>(BaseToken.classTimeMention.class);
		CombinedExtractor1 allExtractors = new CombinedExtractor1(
				new CoveredTextExtractor(),
		        CharacterCategoryPatternFunction.<BaseToken>createExtractor(PatternType.REPEATS_MERGED),
		        CharacterCategoryPatternFunction.<BaseToken>createExtractor(PatternType.ONE_PER_CHAR),
				new TypePathExtractor(BaseToken.class"partOfSpeech"),
		//    CombinedExtractor1 parseExtractors = new CombinedExtractor(
		//        new ParseSpanFeatureExtractor()
		//        );
		this. = new ArrayList<FeatureExtractor1>();
		this..add(allExtractors);
		this. = new ArrayList<CleartkExtractor>();
		this..add(new CleartkExtractor(
				BaseToken.class,
				allExtractors,
				new Preceding(3),
				new Following(3)));
		//    this.parseFeatureExtractors = new ArrayList<ParseSpanFeatureExtractor>();
		//    this.parseFeatureExtractors.add(new ParseSpanFeatureExtractor());
		//initialize feature selection
			this. = null;
else {
			if (this. != null) {
				try {
catch (IOException e) {
				}
			}
		}
	}
	public void process(JCas jCasSegment segmentthrows AnalysisEngineProcessException {
		//TRY SMOTE algorithm here to generate more minority class samples
	    SMOTEplus smote = new SMOTEplus((int)Math.ceil(this.));
	    
		// classify tokens within each sentence
		for (Sentence sentence : JCasUtil.selectCovered(jCasSentence.classsegment)) {
			List<BaseTokentokens = JCasUtil.selectCovered(jCasBaseToken.classsentence);
			// during training, the list of all outcomes for the tokens
			List<Stringoutcomes;
			if (this.isTraining()) {
				List<TimeMentiontimes = JCasUtil.selectCovered(jCasTimeMention.classsentence);
				outcomes = this..createOutcomes(jCastokenstimes);
			}
			// during prediction, the list of outcomes predicted so far
			else {
				outcomes = new ArrayList<String>();
			}
			// extract features for all tokens
			int tokenIndex = -1;
			for (BaseToken token : tokens) {
				++tokenIndex;
				List<Feature> features = new ArrayList<Feature>();
				// features from token attributes
				for (FeatureExtractor1 extractor : this.) {
					features.addAll(extractor.extract(jCastoken));
				}
				// features from surrounding tokens
				for (CleartkExtractor extractor : this.) {
					features.addAll(extractor.extractWithin(jCastokensentence));
				}
				// features from previous classifications
				int nPreviousClassifications = 2;
				for (int i = nPreviousClassificationsi > 0; --i) {
					int index = tokenIndex - i;
					String previousOutcome = index < 0 ? "O" : outcomes.get(index);
					features.add(new Feature("PreviousOutcome_" + ipreviousOutcome));
				}
				//add segment ID as a features:
				features.add(new Feature("SegmentID"segment.getId()));
				// features from dominating parse tree
				//        for(FeatureExtractor1 extractor : this.parseFeatureExtractors){
				BaseToken startToken = token;
				for(int i = tokenIndex-1; i >= 0; --i){
					String outcome = outcomes.get(i);
					if(outcome.equals("O")){
						break;
					}
					startToken = tokens.get(i);
				}
				features.addAll(.extract(jCasstartToken.getBegin(), token.getEnd()));
				//        }
				// apply feature selection, if necessary
		        if (this. != null) {
		          features = this..transform(features);
		        }
				// if training, write to data file
		        if (this.isTraining()) {
		        	String outcome = outcomes.get(tokenIndex);
		        	// if it is an "O" down-sample it
		        	if (outcome.equals("O")) {
		        		this..write(new Instance<String>(outcomefeatures));
		        	}else{//for minority instances:
		        		Instance<StringminorityInst = new Instance<String>(outcomefeatures);
		        		this..write(minorityInst);
		        		smote.addInstance(minorityInst);//add minority instances to SMOTE algorithm
		        	}
		        }else {// if predicting, add prediction to outcomes
		        	outcomes.add(this..classify(features));
		        }
			}
			// during prediction, convert chunk labels to times and add them to the CAS
			if (!this.isTraining()) {
				JCas timexCas;
				try {
				  timexCas = jCas.getView();
catch (CASException e) {
				}
				this..createChunks(timexCastokensoutcomes);
			}
		}
		if(this.isTraining() && this. >= 1){ //add synthetic instances to datawriter, if smote is selected
	    	Iterable<Instance<String>> syntheticInsts = smote.populateMinorityClass();
	    	for( Instance<StringsytheticInstsyntheticInsts){
	    		this..write(sytheticInst);
	    	}
	    }
	}
New to GrepCode? Check out our FAQ X