Start line:  
End line:  

Snippet Preview

Snippet HTML Code

Stack Overflow Questions
  /*
   * This file is part of the DiffX library.
   *
   * For licensing information please see the file license.txt included in the release.
   * A copy of this licence can also be found at
   *   http://www.opensource.org/licenses/artistic-license-2.0.php
   */
  package com.topologi.diffx.load.text;
  
 import java.util.List;
 
The tokeniser for characters events.

This class is not synchronized.

Author(s):
Christophe Lauret
Version:
11 May 2010
 
 public final class TokenizerByText implements TextTokenizer {

  
Define the whitespace processing.
 
   private final WhiteSpaceProcessing whitespace;

  
Creates a new tokenizer.

Parameters:
whitespace the whitespace processing for this tokenizer.
Throws:
java.lang.NullPointerException if the white space processing is not specified.
 
   public TokenizerByText(WhiteSpaceProcessing whitespace) {
     if (whitespace == nullthrow new NullPointerException("the white space processing must be specified.");
     this. = whitespace;
   }

  
 
   public List<TextEventtokenize(CharSequence seq) {
     if (seq == nullreturn null;
     if (seq.length() == 0) return Collections.emptyList();
     int x = TokenizerUtils.getLeadingWhiteSpace(seq);
     int y = TokenizerUtils.getTrailingWhiteSpace(seq);
     // no leading or trailing spaces return a singleton in all configurations
     if (x == 0 && y == 0) {
       TextEvent e = new CharactersEvent(seq);
       return Collections.singletonList(e);
     }
     // The text node is only white space (white space = trailing space)
     if (x == seq.length()) {
       switch (this.) {
         case :
           return Collections.singletonList((TextEvent)SpaceEvent.getInstance(seq.toString()));
         case :
           return Collections.singletonList((TextEvent)new IgnorableSpaceEvent(seq.toString()));
         case :
           return Collections.emptyList();
         default:
       }
       TextEvent e = new CharactersEvent(seq);
       return Collections.singletonList(e);
     }
     // some trailing or leading whitespace, behaviour changes depending on whitespace processing
     List<TextEventevents = null;
     switch (this.) {
       case :
         events = new ArrayList<TextEvent>(1 + (x > 0 ? 1 : 0) + (y > 0 ? 1 : 0));
         if (x > 0) {
           events.add(SpaceEvent.getInstance(seq.subSequence(0, x)));
         }
         events.add(new CharactersEvent(seq.subSequence(xseq.length()-y)));
         if (y > 0) {
           events.add(SpaceEvent.getInstance(seq.subSequence(seq.length()-yseq.length())));
         }
         break;
       case :
         events = new ArrayList<TextEvent>(1 + (x > 0 ? 1 : 0) + (y > 0 ? 1 : 0));
         if (x > 0) {
           events.add(new IgnorableSpaceEvent(seq.subSequence(0, x)));
         }
         events.add(new CharactersEvent(seq.subSequence(xseq.length()-y)));
         if (y > 0) {
           events.add(new IgnorableSpaceEvent(seq.subSequence(seq.length()-yseq.length())));
         }
         break;
       case :
         TextEvent e = new CharactersEvent(seq.subSequence(xseq.length()-y));
        events = Collections.singletonList(e);
        break;
      default:
    }
    return events;
  }

  
Always TextGranularity.CHARACTER.
    return .;
  }
New to GrepCode? Check out our FAQ X