StringTokenizer
class. The CSVTokenizer
* methods do not distinguish among identifiers, numbers, and quoted strings, nor do they recognize and skip comments.
*
* The set of separator (the characters that separate tokens) may be specified either at creation time or on a per-token
* basis.
*
* An instance of CSVTokenizer
behaves in one of two ways, depending on whether it was created with the
* returnSeparators
flag having the value true
or false
: false
, delimiter characters serve to separate tokens. A token is a maximal sequence of consecutive
* characters that are not separator. true
, delimiter characters are themselves
* considered to be tokens. A token is thus either one delimiter character, or a maximal sequence of consecutive
* characters that are not separator. A CSVTokenizer object internally maintains a current position * within the string to be tokenized. Some operations advance this current position past the characters processed.
A * token is returned by taking a substring of the string that was used to create the CSVTokenizer object. *
* The following is one example of the use of the tokenizer. The code: ** * prints the following output: ** CSVTokenizer csvt = new CSVTokenizer("this,is,a,test"); * while (csvt.hasMoreTokens()) { * println(csvt.nextToken()); * } *
* * @author abupon */ public class CSVTokenizer implements Enumeration { /** * The complete record that should be separated into elements. */ private String record; /** * The separator. */ private String separator; /** * The quoting char. */ private String quate; /** * the current parsing position. */ private int currentIndex; /** * A flag indicating that the current parse position is before the start. */ private boolean beforeStart; /** * A possible separator constant. */ public static final String SEPARATOR_COMMA = ","; /** * A possible separator constant. */ public static final String SEPARATOR_TAB = "\t"; /** * A possible separator constant. */ public static final String SEPARATOR_SPACE = " "; /** * A possible quote character constant. */ public static final String DOUBLE_QUATE = "\""; /** * A possible quote character constant. */ public static final String SINGLE_QUATE = "'"; /** * Constructs a csv tokenizer for the specified string.* this * is * a * test *
theSeparator
argument is the separator for
* separating tokens.
*
* If the returnSeparators
flag is true
, then the separator string is also returned as
* tokens. separator is returned as a string. If the flag is false
, the separator string is skipped and
* only serve as separator between tokens.
*
* @param aString a string to be parsed.
* @param theSeparator the separator (CSVTokenizer.SEPARATOR_COMMA, CSVTokenizer.TAB, CSVTokenizer.SPACE, etc.).
* @param theQuate the quate (CSVTokenizer.SINGLE_QUATE, CSVTokenizer.DOUBLE_QUATE, etc.).
*/
public CSVTokenizer(final String aString, final String theSeparator,
final String theQuate)
{
if (aString == null)
{
throw new NullPointerException("The given string is null");
}
if (theSeparator == null)
{
throw new NullPointerException("The given separator is null");
}
if (theQuate == null)
{
throw new NullPointerException("The given quate is null");
}
this.record = aString.trim();
this.separator = theSeparator;
this.quate = theQuate;
this.currentIndex = 0;
this.beforeStart = true;
}
/**
* Constructs a csv tokenizer for the specified string. The characters in the theSeparator
argument are
* the separator for separating tokens. Separator string themselves will not be treated as tokens.
*
* @param aString a string to be parsed.
* @param theSeparator the separator (CSVTokenizer.SEPARATOR_COMMA, CSVTokenizer.TAB, CSVTokenizer.SPACE, etc.).
*/
public CSVTokenizer(final String aString, final String theSeparator)
{
this(aString, theSeparator, CSVTokenizer.DOUBLE_QUATE);
}
/**
* Constructs a string tokenizer for the specified string. The tokenizer uses the default separator set, which is
* CSVTokenizer.SEPARATOR_COMMA
. Separator string themselves will not be treated as tokens.
*
* @param aString a string to be parsed.
*/
public CSVTokenizer(final String aString)
{
this(aString, CSVTokenizer.SEPARATOR_COMMA);
}
/**
* Tests if there are more tokens available from this tokenizer's string. If this method returns true, then a
* subsequent call to nextToken with no argument will successfully return a token.
*
* @return true
if and only if there is at least one token in the string after the current position;
* false
otherwise.
*/
public boolean hasMoreTokens()
{
return (this.currentIndex < this.record.length());
}
/**
* Returns the next token from this string tokenizer.
*
* @return the next token from this string tokenizer.
* @throws NoSuchElementException if there are no more tokens in this tokenizer's string.
* @throws IllegalArgumentException if given parameter string format was wrong
*/
public String nextToken()
throws NoSuchElementException, IllegalArgumentException
{
if (!this.hasMoreTokens())
{
throw new NoSuchElementException();
}
if (beforeStart == false)
{
currentIndex += this.separator.length();
}
else
{
beforeStart = false;
}
if (this.record.startsWith(this.quate, this.currentIndex))
{
final StringBuffer token = new StringBuffer();
String rec = this.record.substring(this.currentIndex + this.quate.length());
while (true)
{
final int end = rec.indexOf(this.quate);
if (end < 0)
{
throw new IllegalArgumentException("Illegal format");
}
if (!rec.startsWith(this.quate, end + 1))
{
token.append(rec.substring(0, end));
break;
}
token.append(rec.substring(0, end + 1));
rec = rec.substring(end + this.quate.length() * 2);
this.currentIndex++;
}
this.currentIndex += (token.length() + this.quate.length() * 2);
return token.toString();
}
final int end = this.record.indexOf(this.separator, this.currentIndex);
if (end >= 0)
{
final int start = this.currentIndex;
final String token = this.record.substring(start, end);
this.currentIndex = end;
return token;
}
else
{
final int start = this.currentIndex;
final String token = this.record.substring(start);
this.currentIndex = this.record.length();
return token;
}
}
/**
* Returns the next token in this string tokenizer's string. First, the set of characters considered to be separator
* by this CSVTokenizer object is changed to be the characters in the string separator. Then the
* next token in the string after the current position is returned. The current position is advanced beyond the
* recognized token. The new delimiter set remains the default after this call.
*
* @param theSeparator the new separator.
* @return the next token, after switching to the new delimiter set.
* @throws java.util.NoSuchElementException
* if there are no more tokens in this tokenizer's string.
*/
public String nextToken(final String theSeparator)
{
separator = theSeparator;
return nextToken();
}
/**
* Returns the same value as the hasMoreTokens
method. It exists so that this class can implement the
* Enumeration
interface.
*
* @return true
if there are more tokens; false
otherwise.
* @see java.util.Enumeration
* @see org.jfree.report.util.CSVTokenizer#hasMoreTokens()
*/
public boolean hasMoreElements()
{
return hasMoreTokens();
}
/**
* Returns the same value as the nextToken
method, except that its declared return value is
* Object
rather than String
. It exists so that this class can implement the
* Enumeration
interface.
*
* @return the next token in the string.
* @throws java.util.NoSuchElementException
* if there are no more tokens in this tokenizer's string.
* @see java.util.Enumeration
* @see org.jfree.report.util.CSVTokenizer#nextToken()
*/
public Object nextElement()
{
return nextToken();
}
/**
* Calculates the number of times that this tokenizer's nextToken
method can be called before it
* generates an exception. The current position is not advanced.
*
* @return the number of tokens remaining in the string using the current delimiter set.
* @see org.jfree.report.util.CSVTokenizer#nextToken()
*/
public int countTokens()
{
int count = 0;
final int preserve = this.currentIndex;
final boolean preserveStart = this.beforeStart;
while (this.hasMoreTokens())
{
this.nextToken();
count++;
}
this.currentIndex = preserve;
this.beforeStart = preserveStart;
return count;
}
/**
* Returns the quate.
*
* @return char
*/
public String getQuate()
{
return this.quate;
}
/**
* Sets the quate.
*
* @param quate The quate to set
*/
public void setQuate(final String quate)
{
this.quate = quate;
}
}