Mega Code Archive
Makes writing XML much much easier
/*
* The Apache Software License, Version 1.1
*
* Copyright (c) 2001 The Apache Software Foundation. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution,
* if any, must include the following acknowledgment:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org /)."
* Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Apache" and "Apache Software Foundation" and
* "Apache Commons" must not be used to endorse or promote products
* derived from this software without prior written permission. For
* written permission, please contact apache@apache.org.
*
* 5. Products derived from this software may not be called "Apache",
* "Apache Turbine", nor may "Apache" appear in their name, without
* prior written permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation. For more
* information on the Apache Software Foundation, please see
* .
*/
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.UnsupportedEncodingException;
import java.io.Writer;
import java.util.Stack;
/**
* Makes writing XML much much easier.
* Improved from
* article
*
* @author Henri Yandell
* @author Peter Cassetta
* @author Last changed by: $Author: gommma $
* @version $Revision: 859 $ $Date: 2008-11-02 12:50:23 +0100 (dom, 02 nov 2008) $
* @since 1.0
*/
public class XmlWriter
{
/**
* CDATA start tag: {@value}
*/
public static final String CDATA_START = "");
}
}
// write out all current attributes
private void writeAttributes() throws IOException
{
if (this.attrs != null)
{
this.out.write(this.attrs.toString());
this.attrs.setLength(0);
this.empty = false;
}
}
/**
* Write an attribute out for the current element.
* Any XML characters in the value are escaped.
* Currently it does not actually throw the exception, but
* the API is set that way for future changes.
*
* @param attr name of attribute.
* @param value value of attribute.
* @see #writeAttribute(String, String, boolean)
*/
public XmlWriter writeAttribute(String attr, String value) throws IOException
{
return this.writeAttribute(attr, value, false);
}
/**
* Write an attribute out for the current element.
* Any XML characters in the value are escaped.
* Currently it does not actually throw the exception, but
* the API is set that way for future changes.
*
* @param attr name of attribute.
* @param value value of attribute.
* @param literally If the writer should be literally on the given value
* which means that meta characters will also be preserved by escaping them.
* Mainly preserves newlines and tabs.
*/
public XmlWriter writeAttribute(String attr, String value, boolean literally) throws IOException
{
if(this.wroteText==true) {
throw new IllegalStateException("The text for the current element has already been written. Cannot add attributes afterwards.");
}
// maintain API
if (false) throw new IOException();
if (this.attrs == null)
{
this.attrs = new StringBuffer();
}
this.attrs.append(" ");
this.attrs.append(attr);
this.attrs.append("=\"");
String val = escapeXml(value);
if(literally){
val = escapeMetaCharacters(val);
}
this.attrs.append(val);
this.attrs.append("\"");
return this;
}
/**
* End the current element. This will throw an exception
* if it is called when there is not a currently open
* element.
*/
public XmlWriter endElement() throws IOException
{
if (this.stack.empty())
{
throw new IOException("Called endElement too many times. ");
}
String name = (String)this.stack.pop();
if (name != null)
{
if (this.empty)
{
writeAttributes();
this.out.write("/>");
}
else
{
if (this.pretty && !this.wroteText)
{
for (int i = 0; i < this.stack.size(); i++)
{
this.out.write(indent); // Indent closing tag to proper level
}
}
this.out.write("");
this.out.write(name);
this.out.write(">");
}
if (this.pretty)
this.out.write(newline); // Add a newline after the closing tag
this.empty = false;
this.closed = true;
this.wroteText = false;
}
return this;
}
/**
* Close this writer. It does not close the underlying
* writer, but does throw an exception if there are
* as yet unclosed tags.
*/
public void close() throws IOException
{
this.out.flush();
if (!this.stack.empty())
{
throw new IOException("Tags are not all closed. " +
"Possibly, " + this.stack.pop() + " is unclosed. ");
}
}
/**
* Output body text. Any XML characters are escaped.
* @param text The text to be written
* @return This writer
* @throws IOException
* @see #writeText(String, boolean)
*/
public XmlWriter writeText(String text) throws IOException
{
return this.writeText(text, false);
}
/**
* Output body text. Any XML characters are escaped.
* @param text The text to be written
* @param literally If the writer should be literally on the given value
* which means that meta characters will also be preserved by escaping them.
* Mainly preserves newlines and tabs.
* @return This writer
* @throws IOException
*/
public XmlWriter writeText(String text, boolean literally) throws IOException
{
closeOpeningTag();
this.empty = false;
this.wroteText = true;
String val = escapeXml(text);
if(literally){
val = escapeMetaCharacters(val);
}
this.out.write(val);
return this;
}
/**
* Write out a chunk of CDATA. This helper method surrounds the
* passed in data with the CDATA tag.
*
* @param cdata of CDATA text.
*/
public XmlWriter writeCData(String cdata) throws IOException
{
closeOpeningTag();
boolean hasAlreadyEnclosingCdata = cdata.startsWith(CDATA_START) && cdata.endsWith(CDATA_END);
// There may already be CDATA sections inside the data.
// But CDATA sections can't be nested - can't have ]]> inside a CDATA section.
// (See http://www.w3.org/TR/REC-xml/#NT-CDStart in the W3C specs)
// The solutions is to replace any occurrence of "]]>" by "]]]]>",
// so that the top CDATA section is split into many valid CDATA sections (you
// can look at the "]]]]>" as if it was an escape sequence for "]]>").
if(!hasAlreadyEnclosingCdata) {
cdata = cdata.replaceAll(CDATA_END, "]]]]>");
}
this.empty = false;
this.wroteText = true;
if(!hasAlreadyEnclosingCdata)
this.out.write(CDATA_START);
this.out.write(cdata);
if(!hasAlreadyEnclosingCdata)
this.out.write(CDATA_END);
return this;
}
/**
* Write out a chunk of comment. This helper method surrounds the
* passed in data with the XML comment tag.
*
* @param comment of text to comment.
*/
public XmlWriter writeComment(String comment) throws IOException
{
writeChunk("");
return this;
}
private void writeChunk(String data) throws IOException
{
closeOpeningTag();
this.empty = false;
if (this.pretty && !this.wroteText)
{
for (int i = 0; i < this.stack.size(); i++)
{
this.out.write(indent);
}
}
this.out.write(data);
if (this.pretty)
{
this.out.write(newline);
}
}
// Two example methods. They should output the same XML:
// 425343
static public void main(String[] args) throws IOException
{
test1();
test2();
}
static public void test1() throws IOException
{
Writer writer = new java.io.StringWriter();
XmlWriter xmlwriter = new XmlWriter(writer);
xmlwriter.writeElement("person").writeAttribute("name", "fred").writeAttribute("age", "12").writeElement("phone").writeText("4254343").endElement().writeElement("friends").writeElement("bob").endElement().writeElement("jim").endElement().endElement().endElement();
xmlwriter.close();
System.err.println(writer.toString());
}
static public void test2() throws IOException
{
Writer writer = new java.io.StringWriter();
XmlWriter xmlwriter = new XmlWriter(writer);
xmlwriter.writeComment("Example of XmlWriter running");
xmlwriter.writeElement("person");
xmlwriter.writeAttribute("name", "fred");
xmlwriter.writeAttribute("age", "12");
xmlwriter.writeElement("phone");
xmlwriter.writeText("4254343");
xmlwriter.endElement();
xmlwriter.writeComment("Examples of empty tags");
// xmlwriter.setDefaultNamespace("test");
xmlwriter.writeElement("friends");
xmlwriter.writeEmptyElement("bob");
xmlwriter.writeEmptyElement("jim");
xmlwriter.endElement();
xmlwriter.writeElementWithText("foo", "This is an example.");
xmlwriter.endElement();
xmlwriter.close();
System.err.println(writer.toString());
}
////////////////////////////////////////////////////////////////////////////
// Added for DbUnit
/**
* Escapes some meta characters like \n, \r that should be preserved in the XML
* so that a reader will not filter out those symbols.
* @param str The string to be escaped
* @return The escaped string
* @since 2.3.0
*/
private String escapeMetaCharacters(String str)
{
// 2. Do additional escapes. See http://www.w3.org/TR/2004/REC-xml-20040204/#AVNormalize
str = replace(str, "\n", "
"); // linefeed (LF)
str = replace(str, "\r", "
"); // carriage return (CR)
return str;
}
private String escapeXml(String str)
{
str = replace(str, "&", "&");
str = replace(str, "<", "<");
str = replace(str, ">", ">");
str = replace(str, "\"", """);
str = replace(str, "'", "'");
str = replace(str, "\t", " "); // tab
return str;
}
private String replace(String value, String original, String replacement)
{
StringBuffer buffer = null;
int startIndex = 0;
int lastEndIndex = 0;
for (; ;)
{
startIndex = value.indexOf(original, lastEndIndex);
if (startIndex == -1)
{
if (buffer != null)
{
buffer.append(value.substring(lastEndIndex));
}
break;
}
if (buffer == null)
{
buffer = new StringBuffer((int)(original.length() * 1.5));
}
buffer.append(value.substring(lastEndIndex, startIndex));
buffer.append(replacement);
lastEndIndex = startIndex + original.length();
}
return buffer == null ? value : buffer.toString();
}
private void setEncoding(String encoding)
{
if (encoding == null && out instanceof OutputStreamWriter)
encoding = ((OutputStreamWriter)out).getEncoding();
if (encoding != null)
{
encoding = encoding.toUpperCase();
// Use official encoding names where we know them,
// avoiding the Java-only names. When using common
// encodings where we can easily tell if characters
// are out of range, we'll escape out-of-range
// characters using character refs for safety.
// I _think_ these are all the main synonyms for these!
if ("UTF8".equals(encoding))
{
encoding = "UTF-8";
}
else if ("US-ASCII".equals(encoding)
|| "ASCII".equals(encoding))
{
// dangerMask = (short)0xff80;
encoding = "US-ASCII";
}
else if ("ISO-8859-1".equals(encoding)
|| "8859_1".equals(encoding)
|| "ISO8859_1".equals(encoding))
{
// dangerMask = (short)0xff00;
encoding = "ISO-8859-1";
}
else if ("UNICODE".equals(encoding)
|| "UNICODE-BIG".equals(encoding)
|| "UNICODE-LITTLE".equals(encoding))
{
encoding = "UTF-16";
// TODO: UTF-16BE, UTF-16LE ... no BOM; what
// release of JDK supports those Unicode names?
}
// if (dangerMask != 0)
// stringBuf = new StringBuffer();
}
this.encoding = encoding;
}
/**
* Resets the handler to write a new text document.
*
* @param writer XML text is written to this writer.
* @param encoding if non-null, and an XML declaration is written,
* this is the name that will be used for the character encoding.
*
* @exception IllegalStateException if the current
* document hasn't yet ended (i.e. the output stream {@link #out} is not null)
*/
final public void setWriter(Writer writer, String encoding)
{
if (this.out != null)
throw new IllegalStateException(
"can't change stream in mid course");
this.out = writer;
if (this.out != null)
setEncoding(encoding);
// if (!(this.out instanceof BufferedWriter))
// this.out = new BufferedWriter(this.out);
}
public XmlWriter writeDeclaration() throws IOException
{
if (this.encoding != null)
{
this.out.write("");
this.out.write(this.newline);
}
return this;
}
public XmlWriter writeDoctype(String systemId, String publicId) throws IOException
{
if (systemId != null || publicId != null)
{
this.out.write("");
this.out.write(this.newline);
}
return this;
}
}