The supported XML encodings are the intersection of XML-supported code sets and those * supported in JDK 1.1. *
MIME charset names are used on xmlEncoding parameters to methods such
* as TXDocument#setEncoding
and DTD#setEncoding
.
*
Java encoding names are used on encoding parameters to
* methods such as TXDocument#printWithFormat
and DTD#printExternal
.
*
*
* Common Name * |
*
* Use this name in XML files * |
*
* Name Type * |
*
* Xerces converts to this Java Encoder Name * |
*
8 bit Unicode | *
* UTF-8 * |
*
* IANA * |
*
* UTF8 * |
*
ISO Latin 1 | *
* ISO-8859-1 * |
*
* MIME * |
*
* ISO-8859-1 * |
*
ISO Latin 2 | *
* ISO-8859-2 * |
*
* MIME * |
*
* ISO-8859-2 * |
*
ISO Latin 3 | *
* ISO-8859-3 * |
*
* MIME * |
*
* ISO-8859-3 * |
*
ISO Latin 4 | *
* ISO-8859-4 * |
*
* MIME * |
*
* ISO-8859-4 * |
*
ISO Latin Cyrillic | *
* ISO-8859-5 * |
*
* MIME * |
*
* ISO-8859-5 * |
*
ISO Latin Arabic | *
* ISO-8859-6 * |
*
* MIME * |
*
* ISO-8859-6 * |
*
ISO Latin Greek | *
* ISO-8859-7 * |
*
* MIME * |
*
* ISO-8859-7 * |
*
ISO Latin Hebrew | *
* ISO-8859-8 * |
*
* MIME * |
*
* ISO-8859-8 * |
*
ISO Latin 5 | *
* ISO-8859-9 * |
*
* MIME * |
*
* ISO-8859-9 * |
*
EBCDIC: US | *
* ebcdic-cp-us * |
*
* IANA * |
*
* cp037 * |
*
EBCDIC: Canada | *
* ebcdic-cp-ca * |
*
* IANA * |
*
* cp037 * |
*
EBCDIC: Netherlands | *
* ebcdic-cp-nl * |
*
* IANA * |
*
* cp037 * |
*
EBCDIC: Denmark | *
* ebcdic-cp-dk * |
*
* IANA * |
*
* cp277 * |
*
EBCDIC: Norway | *
* ebcdic-cp-no * |
*
* IANA * |
*
* cp277 * |
*
EBCDIC: Finland | *
* ebcdic-cp-fi * |
*
* IANA * |
*
* cp278 * |
*
EBCDIC: Sweden | *
* ebcdic-cp-se * |
*
* IANA * |
*
* cp278 * |
*
EBCDIC: Italy | *
* ebcdic-cp-it * |
*
* IANA * |
*
* cp280 * |
*
EBCDIC: Spain, Latin America | *
* ebcdic-cp-es * |
*
* IANA * |
*
* cp284 * |
*
EBCDIC: Great Britain | *
* ebcdic-cp-gb * |
*
* IANA * |
*
* cp285 * |
*
EBCDIC: France | *
* ebcdic-cp-fr * |
*
* IANA * |
*
* cp297 * |
*
EBCDIC: Arabic | *
* ebcdic-cp-ar1 * |
*
* IANA * |
*
* cp420 * |
*
EBCDIC: Hebrew | *
* ebcdic-cp-he * |
*
* IANA * |
*
* cp424 * |
*
EBCDIC: Switzerland | *
* ebcdic-cp-ch * |
*
* IANA * |
*
* cp500 * |
*
EBCDIC: Roece | *
* ebcdic-cp-roece * |
*
* IANA * |
*
* cp870 * |
*
EBCDIC: Yogoslavia | *
* ebcdic-cp-yu * |
*
* IANA * |
*
* cp870 * |
*
EBCDIC: Iceland | *
* ebcdic-cp-is * |
*
* IANA * |
*
* cp871 * |
*
EBCDIC: Urdu | *
* ebcdic-cp-ar2 * |
*
* IANA * |
*
* cp918 * |
*
Chinese for PRC, mixed 1/2 byte | *
* gb2312 * |
*
* MIME * |
*
* GB2312 * |
*
Extended Unix Code, packed for Japanese | *
* euc-jp * |
*
* MIME * |
*
* eucjis * |
*
Japanese: iso-2022-jp | *
* iso-2020-jp * |
*
* MIME * |
*
* JIS * |
*
Japanese: Shift JIS | *
* Shift_JIS * |
*
* MIME * |
*
* SJIS * |
*
Chinese: Big5 | *
* Big5 * |
*
* MIME * |
*
* Big5 * |
*
Extended Unix Code, packed for Korean | *
* euc-kr * |
*
* MIME * |
*
* iso2022kr * |
*
Cyrillic | *
* koi8-r * |
*
* MIME * |
*
* koi8-r * |
*
UTF-8, US-ASCII, ISO-8859-1,
* ISO-8859-2, ISO-8859-3, ISO-8859-4, ISO-8859-5, ISO-8859-6,
* ISO-8859-7, ISO-8859-8, ISO-8859-9, ISO-2022-JP, Shift_JIS,
* EUC-JP, GB2312, Big5, EUC-KR, ISO-2022-KR, KOI8-R,
* EBCDIC-CP-US, EBCDIC-CP-CA, EBCDIC-CP-NL, EBCDIC-CP-DK,
* EBCDIC-CP-NO, EBCDIC-CP-FI, EBCDIC-CP-SE, EBCDIC-CP-IT,
* EBCDIC-CP-ES, EBCDIC-CP-GB, EBCDIC-CP-FR, EBCDIC-CP-AR1,
* EBCDIC-CP-HE, EBCDIC-CP-CH, EBCDIC-CP-ROECE, EBCDIC-CP-YU,
* EBCDIC-CP-IS and EBCDIC-CP-AR2
.
* @return Java encoding name, or null if mimeCharsetName
* is unknown.
* @see #reverse
*/
public static String convert(String mimeCharsetName) {
return (String)s_enchash.get(mimeCharsetName.toUpperCase());
}
/**
* Convert a Java encoding name to MIME charset name.
* Available values of encoding are "UTF8", "8859_1", "8859_2", "8859_3", "8859_4",
* "8859_5", "8859_6", "8859_7", "8859_8", "8859_9", "JIS", "SJIS", "EUCJIS",
* "GB2312", "BIG5", "KSC5601", "ISO2022KR", "KOI8_R", "CP037", "CP277", "CP278",
* "CP280", "CP284", "CP285", "CP297", "CP420", "CP424", "CP500", "CP870", "CP871" and "CP918".
* @param encoding Case insensitive Java encoding name: UTF8, 8859_1, 8859_2, 8859_3,
* 8859_4, 8859_5, 8859_6, 8859_7, 8859_8, 8859_9, JIS, SJIS, EUCJIS,
* GB2312, BIG5, KSC5601, ISO2022KR, KOI8_R, CP037, CP277, CP278,
* CP280, CP284, CP285, CP297, CP420, CP424, CP500, CP870, CP871
* and CP918
.
* @return MIME charset name, or null if encoding is unknown.
* @see #convert
*/
public static String reverse(String encoding) {
return (String)s_revhash.get(encoding.toUpperCase());
}
}