Start line:  
End line:  

Snippet Preview

Snippet HTML Code

Stack Overflow Questions
  package eu.clarin.weblicht.wlfxb.io;
  
 import java.io.File;
 import java.util.List;
 import org.junit.Test;
 
 public class TextCorpusStreamedTest {
 
     private static final String INPUT_FILE_FOR_READ = "/data/streamer/tcf-text_toks_sents_pos_lem.xml";
     private static final String INPUT_FILE_FOR_REWRITE = "/data/streamer/tcf-text_tok_pos.xml";
     private static final String OUTPUT_FILE_ADD_LEMMS_SENTS = "/tmp/output-add_lemms_sents.xml";
     private static final String OUTPUT_FILE_ADD_LEMMS_SENTS_METADATA = "/tmp/output-add_lemms_sents_metadata.xml";
 
     @Test
     public void testRead() throws Exception {
         ..println();
         ..println("--- READ TEST START ---");
         ..println("--- READ TEST END ---");
         ..println();
     }
 
     private void testRead(InputStream isEnumSet<TextCorpusLayerTaglayersToReadthrows Exception {
         TextCorpus tc = new TextCorpusStreamed(islayersToRead);
         ..println(tc.getTokensLayer());
         SentencesLayer sentencesLayer = tc.getSentencesLayer();
         Sentence sentence1 = sentencesLayer.getSentence(0);
         Token[] sentence1Tokens = sentencesLayer.getTokens(sentence1);
         ..println(Arrays.toString(sentence1Tokens));
         ..println(tc);
     }
 
     @Test
    
This test rewrites tcf from input file to output file adding lemmas and sentences layers. All the layers in input file, including those not used (not requested for read) are rewritten into the output file.
 
     public void testReadWriteAddingSentsLemms() throws Exception {
         ..println();
         ..println("--- WRITE TEST START ---");
         File ofile = new File();
         OutputStream os = new FileOutputStream(ofile);
         testReadWrite(is, EnumSet.of(.),
                 osfalse);
         ..println("--- WRITE TEST END ---");
         ..println();
     }
 
     @Test
    
This test rewrites tcf from input file to output file adding lemmas and sentences layers. Additionally it adds MetaData items. All the layers in input file, including those not used (not requested for read) are rewritten into the output file. All the metadata in the input file is also rewritten into the output file.
 
     public void testReadWriteAddingSentsLemmsMetadata() throws Exception {
         ..println();
         ..println("--- WRITE TEST START ---");
         File ofile = new File();
         OutputStream os = new FileOutputStream(ofile);
                 os,
                 createTestMetadata().getMetaDataItems());
         ..println("--- WRITE TEST END ---");
         ..println();
     }
 
     private void testReadWrite(InputStream is,
             EnumSet<TextCorpusLayerTaglayersToReadOutputStream os,
             boolean outputAsXmlFragmentthrows WLFormatException {
 
         TextCorpusStreamed tc = new TextCorpusStreamed(islayersToReadosoutputAsXmlFragment);
         ..println();
         ..println("Before write:\n" + tc);
         ..println();
 
         addLemmasSentsAndCloseTheStreams(tc);
 
         ..println();
        ..println("INSPECT XML RESULT IN:\n" + );
    }
    private void testReadWriteAddingMetadata(
            InputStream isEnumSet<TextCorpusLayerTaglayersToReadOutputStream os,
            List<MetaDataItemmetadataItemsToAddthrows WLFormatException {
        TextCorpusStreamed tc = new TextCorpusStreamed(
                islayersToReadosmetadataItemsToAdd);
        ..println();
        ..println("Before write:\n" + tc);
        ..println();
        ..println();
        ..println("INSPECT XML RESULT IN:\n" + );
    }
        // add lemmas:
        LemmasLayer lemmasLayer = tc.createLemmasLayer();
        for (int i = 0; i < tc.getTokensLayer().size(); i++) {
            Token token = tc.getTokensLayer().getToken(i);
            lemmasLayer.addLemma("_" + token.getString() + "_"token);
        }
        ..println("After lemmas write:\n" + tc);
        // add sentences:
        SentencesLayer sentsLayer = tc.createSentencesLayer();
        List<Tokensent1Tokens = new ArrayList<Token>();
        for (int i = 0; i < 5; i++) {
            sent1Tokens.add(tc.getTokensLayer().getToken(i));
        }
        sentsLayer.addSentence(sent1Tokens, 0, 15);
        List<Tokensent2Tokens = new ArrayList<Token>();
        for (int i = 5; i < tc.getTokensLayer().size(); i++) {
            sent2Tokens.add(tc.getTokensLayer().getToken(i));
        }
        sentsLayer.addSentence(sent2Tokens, 16, 44);
        ..println();
        ..println("After sentences write:\n" + tc);
        // IMPORTANT: Close the TextCorpusStreamer streams!!!
        tc.close();
    }
        MetaData metaData = new MetaData();
        metaData.addMetaDataItem("sentences-boundary-detector""Tuebingen Uni");
        metaData.addMetaDataItem("lemmatizer""Tuebingen Uni");
        return metaData;
    }
New to GrepCode? Check out our FAQ X