Mega Code Archive

 
Categories / Java Tutorial / Email
 

A convenience class which handles conversions between MIME charset names and Java encoding names

import java.util.*; /**  * MIME2Java is a convenience class which handles conversions between MIME charset names  * and Java encoding names.  * <p>The supported XML encodings are the intersection of XML-supported code sets and those  * supported in JDK 1.1.  * <p>MIME charset names are used on <var>xmlEncoding</var> parameters to methods such  * as <code>TXDocument#setEncoding</code> and <code>DTD#setEncoding</code>.  * <p>Java encoding names are used on <var>encoding</var> parameters to  * methods such as <code>TXDocument#printWithFormat</code> and <code>DTD#printExternal</code>.  * <P>  * <TABLE BORDER="0" WIDTH="100%">  *  <TR>  *      <TD WIDTH="33%">  *          <P ALIGN="CENTER"><B>Common Name</B>  *      </TD>  *      <TD WIDTH="15%">  *          <P ALIGN="CENTER"><B>Use this name in XML files</B>  *      </TD>  *      <TD WIDTH="12%">  *          <P ALIGN="CENTER"><B>Name Type</B>  *      </TD>  *      <TD WIDTH="31%">  *          <P ALIGN="CENTER"><B>Xerces converts to this Java Encoder Name</B>  *      </TD>  *  </TR>  *  <TR>  *      <TD WIDTH="33%">8 bit Unicode</TD>  *      <TD WIDTH="15%">  *          <P ALIGN="CENTER">UTF-8  *      </TD>  *      <TD WIDTH="12%">  *          <P ALIGN="CENTER">IANA  *      </TD>  *      <TD WIDTH="31%">  *          <P ALIGN="CENTER">UTF8  *      </TD>  *  </TR>  *  <TR>  *      <TD WIDTH="33%">ISO Latin 1</TD>  *      <TD WIDTH="15%">  *          <P ALIGN="CENTER">ISO-8859-1  *      </TD>  *      <TD WIDTH="12%">  *          <P ALIGN="CENTER">MIME  *      </TD>  *      <TD WIDTH="31%">  *          <P ALIGN="CENTER">ISO-8859-1  *      </TD>  *  </TR>  *  <TR>  *      <TD WIDTH="33%">ISO Latin 2</TD>  *      <TD WIDTH="15%">  *          <P ALIGN="CENTER">ISO-8859-2  *      </TD>  *      <TD WIDTH="12%">  *          <P ALIGN="CENTER">MIME  *      </TD>  *      <TD WIDTH="31%">  *          <P ALIGN="CENTER">ISO-8859-2  *      </TD>  *  </TR>  *  <TR>  *      <TD WIDTH="33%">ISO Latin 3</TD>  *      <TD WIDTH="15%">  *          <P ALIGN="CENTER">ISO-8859-3  *      </TD>  *      <TD WIDTH="12%">  *          <P ALIGN="CENTER">MIME  *      </TD>  *      <TD WIDTH="31%">  *          <P ALIGN="CENTER">ISO-8859-3  *      </TD>  *  </TR>  *  <TR>  *      <TD WIDTH="33%">ISO Latin 4</TD>  *      <TD WIDTH="15%">  *          <P ALIGN="CENTER">ISO-8859-4  *      </TD>  *      <TD WIDTH="12%">  *          <P ALIGN="CENTER">MIME  *      </TD>  *      <TD WIDTH="31%">  *          <P ALIGN="CENTER">ISO-8859-4  *      </TD>  *  </TR>  *  <TR>  *      <TD WIDTH="33%">ISO Latin Cyrillic</TD>  *      <TD WIDTH="15%">  *          <P ALIGN="CENTER">ISO-8859-5  *      </TD>  *      <TD WIDTH="12%">  *          <P ALIGN="CENTER">MIME  *      </TD>  *      <TD WIDTH="31%">  *          <P ALIGN="CENTER">ISO-8859-5  *      </TD>  *  </TR>  *  <TR>  *      <TD WIDTH="33%">ISO Latin Arabic</TD>  *      <TD WIDTH="15%">  *          <P ALIGN="CENTER">ISO-8859-6  *      </TD>  *      <TD WIDTH="12%">  *          <P ALIGN="CENTER">MIME  *      </TD>  *      <TD WIDTH="31%">  *          <P ALIGN="CENTER">ISO-8859-6  *      </TD>  *  </TR>  *  <TR>  *      <TD WIDTH="33%">ISO Latin Greek</TD>  *      <TD WIDTH="15%">  *          <P ALIGN="CENTER">ISO-8859-7  *      </TD>  *      <TD WIDTH="12%">  *          <P ALIGN="CENTER">MIME  *      </TD>  *      <TD WIDTH="31%">  *          <P ALIGN="CENTER">ISO-8859-7  *      </TD>  *  </TR>  *  <TR>  *      <TD WIDTH="33%">ISO Latin Hebrew</TD>  *      <TD WIDTH="15%">  *          <P ALIGN="CENTER">ISO-8859-8  *      </TD>  *      <TD WIDTH="12%">  *          <P ALIGN="CENTER">MIME  *      </TD>  *      <TD WIDTH="31%">  *          <P ALIGN="CENTER">ISO-8859-8  *      </TD>  *  </TR>  *  <TR>  *      <TD WIDTH="33%">ISO Latin 5</TD>  *      <TD WIDTH="15%">  *          <P ALIGN="CENTER">ISO-8859-9  *      </TD>  *      <TD WIDTH="12%">  *          <P ALIGN="CENTER">MIME  *      </TD>  *      <TD WIDTH="31%">  *          <P ALIGN="CENTER">ISO-8859-9  *      </TD>  *  </TR>  *  <TR>  *      <TD WIDTH="33%">EBCDIC: US</TD>  *      <TD WIDTH="15%">  *          <P ALIGN="CENTER">ebcdic-cp-us  *      </TD>  *      <TD WIDTH="12%">  *          <P ALIGN="CENTER">IANA  *      </TD>  *      <TD WIDTH="31%">  *          <P ALIGN="CENTER">cp037  *      </TD>  *  </TR>  *  <TR>  *      <TD WIDTH="33%">EBCDIC: Canada</TD>  *      <TD WIDTH="15%">  *          <P ALIGN="CENTER">ebcdic-cp-ca  *      </TD>  *      <TD WIDTH="12%">  *          <P ALIGN="CENTER">IANA  *      </TD>  *      <TD WIDTH="31%">  *          <P ALIGN="CENTER">cp037  *      </TD>  *  </TR>  *  <TR>  *      <TD WIDTH="33%">EBCDIC: Netherlands</TD>  *      <TD WIDTH="15%">  *          <P ALIGN="CENTER">ebcdic-cp-nl  *      </TD>  *      <TD WIDTH="12%">  *          <P ALIGN="CENTER">IANA  *      </TD>  *      <TD WIDTH="31%">  *          <P ALIGN="CENTER">cp037  *      </TD>  *  </TR>  *  <TR>  *      <TD WIDTH="33%">EBCDIC: Denmark</TD>  *      <TD WIDTH="15%">  *          <P ALIGN="CENTER">ebcdic-cp-dk  *      </TD>  *      <TD WIDTH="12%">  *          <P ALIGN="CENTER">IANA  *      </TD>  *      <TD WIDTH="31%">  *          <P ALIGN="CENTER">cp277  *      </TD>  *  </TR>  *  <TR>  *      <TD WIDTH="33%">EBCDIC: Norway</TD>  *      <TD WIDTH="15%">  *          <P ALIGN="CENTER">ebcdic-cp-no  *      </TD>  *      <TD WIDTH="12%">  *          <P ALIGN="CENTER">IANA  *      </TD>  *      <TD WIDTH="31%">  *          <P ALIGN="CENTER">cp277  *      </TD>  *  </TR>  *  <TR>  *      <TD WIDTH="33%">EBCDIC: Finland</TD>  *      <TD WIDTH="15%">  *          <P ALIGN="CENTER">ebcdic-cp-fi  *      </TD>  *      <TD WIDTH="12%">  *          <P ALIGN="CENTER">IANA  *      </TD>  *      <TD WIDTH="31%">  *          <P ALIGN="CENTER">cp278  *      </TD>  *  </TR>  *  <TR>  *      <TD WIDTH="33%">EBCDIC: Sweden</TD>  *      <TD WIDTH="15%">  *          <P ALIGN="CENTER">ebcdic-cp-se  *      </TD>  *      <TD WIDTH="12%">  *          <P ALIGN="CENTER">IANA  *      </TD>  *      <TD WIDTH="31%">  *          <P ALIGN="CENTER">cp278  *      </TD>  *  </TR>  *  <TR>  *      <TD WIDTH="33%">EBCDIC: Italy</TD>  *      <TD WIDTH="15%">  *          <P ALIGN="CENTER">ebcdic-cp-it  *      </TD>  *      <TD WIDTH="12%">  *          <P ALIGN="CENTER">IANA  *      </TD>  *      <TD WIDTH="31%">  *          <P ALIGN="CENTER">cp280  *      </TD>  *  </TR>  *  <TR>  *      <TD WIDTH="33%">EBCDIC: Spain, Latin America</TD>  *      <TD WIDTH="15%">  *          <P ALIGN="CENTER">ebcdic-cp-es  *      </TD>  *      <TD WIDTH="12%">  *          <P ALIGN="CENTER">IANA  *      </TD>  *      <TD WIDTH="31%">  *          <P ALIGN="CENTER">cp284  *      </TD>  *  </TR>  *  <TR>  *      <TD WIDTH="33%">EBCDIC: Great Britain</TD>  *      <TD WIDTH="15%">  *          <P ALIGN="CENTER">ebcdic-cp-gb  *      </TD>  *      <TD WIDTH="12%">  *          <P ALIGN="CENTER">IANA  *      </TD>  *      <TD WIDTH="31%">  *          <P ALIGN="CENTER">cp285  *      </TD>  *  </TR>  *  <TR>  *      <TD WIDTH="33%">EBCDIC: France</TD>  *      <TD WIDTH="15%">  *          <P ALIGN="CENTER">ebcdic-cp-fr  *      </TD>  *      <TD WIDTH="12%">  *          <P ALIGN="CENTER">IANA  *      </TD>  *      <TD WIDTH="31%">  *          <P ALIGN="CENTER">cp297  *      </TD>  *  </TR>  *  <TR>  *      <TD WIDTH="33%">EBCDIC: Arabic</TD>  *      <TD WIDTH="15%">  *          <P ALIGN="CENTER">ebcdic-cp-ar1  *      </TD>  *      <TD WIDTH="12%">  *          <P ALIGN="CENTER">IANA  *      </TD>  *      <TD WIDTH="31%">  *          <P ALIGN="CENTER">cp420  *      </TD>  *  </TR>  *  <TR>  *      <TD WIDTH="33%">EBCDIC: Hebrew</TD>  *      <TD WIDTH="15%">  *          <P ALIGN="CENTER">ebcdic-cp-he  *      </TD>  *      <TD WIDTH="12%">  *          <P ALIGN="CENTER">IANA  *      </TD>  *      <TD WIDTH="31%">  *          <P ALIGN="CENTER">cp424  *      </TD>  *  </TR>  *  <TR>  *      <TD WIDTH="33%">EBCDIC: Switzerland</TD>  *      <TD WIDTH="15%">  *          <P ALIGN="CENTER">ebcdic-cp-ch  *      </TD>  *      <TD WIDTH="12%">  *          <P ALIGN="CENTER">IANA  *      </TD>  *      <TD WIDTH="31%">  *          <P ALIGN="CENTER">cp500  *      </TD>  *  </TR>  *  <TR>  *      <TD WIDTH="33%">EBCDIC: Roece</TD>  *      <TD WIDTH="15%">  *          <P ALIGN="CENTER">ebcdic-cp-roece  *      </TD>  *      <TD WIDTH="12%">  *          <P ALIGN="CENTER">IANA  *      </TD>  *      <TD WIDTH="31%">  *          <P ALIGN="CENTER">cp870  *      </TD>  *  </TR>  *  <TR>  *      <TD WIDTH="33%">EBCDIC: Yogoslavia</TD>  *      <TD WIDTH="15%">  *          <P ALIGN="CENTER">ebcdic-cp-yu  *      </TD>  *      <TD WIDTH="12%">  *          <P ALIGN="CENTER">IANA  *      </TD>  *      <TD WIDTH="31%">  *          <P ALIGN="CENTER">cp870  *      </TD>  *  </TR>  *  <TR>  *      <TD WIDTH="33%">EBCDIC: Iceland</TD>  *      <TD WIDTH="15%">  *          <P ALIGN="CENTER">ebcdic-cp-is  *      </TD>  *      <TD WIDTH="12%">  *          <P ALIGN="CENTER">IANA  *      </TD>  *      <TD WIDTH="31%">  *          <P ALIGN="CENTER">cp871  *      </TD>  *  </TR>  *  <TR>  *      <TD WIDTH="33%">EBCDIC: Urdu</TD>  *      <TD WIDTH="15%">  *          <P ALIGN="CENTER">ebcdic-cp-ar2  *      </TD>  *      <TD WIDTH="12%">  *          <P ALIGN="CENTER">IANA  *      </TD>  *      <TD WIDTH="31%">  *          <P ALIGN="CENTER">cp918  *      </TD>  *  </TR>  *  <TR>  *      <TD WIDTH="33%">Chinese for PRC, mixed 1/2 byte</TD>  *      <TD WIDTH="15%">  *          <P ALIGN="CENTER">gb2312  *      </TD>  *      <TD WIDTH="12%">  *          <P ALIGN="CENTER">MIME  *      </TD>  *      <TD WIDTH="31%">  *          <P ALIGN="CENTER">GB2312  *      </TD>  *  </TR>  *  <TR>  *      <TD WIDTH="33%">Extended Unix Code, packed for Japanese</TD>  *      <TD WIDTH="15%">  *          <P ALIGN="CENTER">euc-jp  *      </TD>  *      <TD WIDTH="12%">  *          <P ALIGN="CENTER">MIME  *      </TD>  *      <TD WIDTH="31%">  *          <P ALIGN="CENTER">eucjis  *      </TD>  *  </TR>  *  <TR>  *      <TD WIDTH="33%">Japanese: iso-2022-jp</TD>  *      <TD WIDTH="15%">  *          <P ALIGN="CENTER">iso-2020-jp  *      </TD>  *      <TD WIDTH="12%">  *          <P ALIGN="CENTER">MIME  *      </TD>  *      <TD WIDTH="31%">  *          <P ALIGN="CENTER">JIS  *      </TD>  *  </TR>  *  <TR>  *      <TD WIDTH="33%">Japanese: Shift JIS</TD>  *      <TD WIDTH="15%">  *          <P ALIGN="CENTER">Shift_JIS  *      </TD>  *      <TD WIDTH="12%">  *          <P ALIGN="CENTER">MIME  *      </TD>  *      <TD WIDTH="31%">  *          <P ALIGN="CENTER">SJIS  *      </TD>  *  </TR>  *  <TR>  *      <TD WIDTH="33%">Chinese: Big5</TD>  *      <TD WIDTH="15%">  *          <P ALIGN="CENTER">Big5  *      </TD>  *      <TD WIDTH="12%">  *          <P ALIGN="CENTER">MIME  *      </TD>  *      <TD WIDTH="31%">  *          <P ALIGN="CENTER">Big5  *      </TD>  *  </TR>  *  <TR>  *      <TD WIDTH="33%">Extended Unix Code, packed for Korean</TD>  *      <TD WIDTH="15%">  *          <P ALIGN="CENTER">euc-kr  *      </TD>  *      <TD WIDTH="12%">  *          <P ALIGN="CENTER">MIME  *      </TD>  *      <TD WIDTH="31%">  *          <P ALIGN="CENTER">iso2022kr  *      </TD>  *  </TR>  *  <TR>  *      <TD WIDTH="33%">Cyrillic</TD>  *      <TD WIDTH="15%">  *          <P ALIGN="CENTER">koi8-r  *      </TD>  *      <TD WIDTH="12%">  *          <P ALIGN="CENTER">MIME  *      </TD>  *      <TD WIDTH="31%">  *          <P ALIGN="CENTER">koi8-r  *      </TD>  *  </TR>  * </TABLE>  *  * @version $Revision: 515 $ $Date: 2008-03-17 22:02:23 +0100 (Mon, 17 Mar 2008) $  * @author TAMURA Kent &lt;kent@trl.ibm.co.jp&gt;  */ public class MIME2Java {     static private Hashtable s_enchash;     static private Hashtable s_revhash;     static {         s_enchash = new Hashtable();         //    <preferred MIME name>, <Java encoding name>         s_enchash.put("UTF-8", "UTF8");         s_enchash.put("US-ASCII",        "8859_1");    // ?         s_enchash.put("ISO-8859-1",      "8859_1");         s_enchash.put("ISO-8859-2",      "8859_2");         s_enchash.put("ISO-8859-3",      "8859_3");         s_enchash.put("ISO-8859-4",      "8859_4");         s_enchash.put("ISO-8859-5",      "8859_5");         s_enchash.put("ISO-8859-6",      "8859_6");         s_enchash.put("ISO-8859-7",      "8859_7");         s_enchash.put("ISO-8859-8",      "8859_8");         s_enchash.put("ISO-8859-9",      "8859_9");         s_enchash.put("ISO-2022-JP",     "JIS");         s_enchash.put("SHIFT_JIS",       "SJIS");         s_enchash.put("EUC-JP",          "EUCJIS");         s_enchash.put("GB2312",          "GB2312");         s_enchash.put("BIG5",            "Big5");         s_enchash.put("EUC-KR",          "KSC5601");         s_enchash.put("ISO-2022-KR",     "ISO2022KR");         s_enchash.put("KOI8-R",          "KOI8_R");         s_enchash.put("EBCDIC-CP-US",    "CP037");         s_enchash.put("EBCDIC-CP-CA",    "CP037");         s_enchash.put("EBCDIC-CP-NL",    "CP037");         s_enchash.put("EBCDIC-CP-DK",    "CP277");         s_enchash.put("EBCDIC-CP-NO",    "CP277");         s_enchash.put("EBCDIC-CP-FI",    "CP278");         s_enchash.put("EBCDIC-CP-SE",    "CP278");         s_enchash.put("EBCDIC-CP-IT",    "CP280");         s_enchash.put("EBCDIC-CP-ES",    "CP284");         s_enchash.put("EBCDIC-CP-GB",    "CP285");         s_enchash.put("EBCDIC-CP-FR",    "CP297");         s_enchash.put("EBCDIC-CP-AR1",   "CP420");         s_enchash.put("EBCDIC-CP-HE",    "CP424");         s_enchash.put("EBCDIC-CP-CH",    "CP500");         s_enchash.put("EBCDIC-CP-ROECE", "CP870");         s_enchash.put("EBCDIC-CP-YU",    "CP870");         s_enchash.put("EBCDIC-CP-IS",    "CP871");         s_enchash.put("EBCDIC-CP-AR2",   "CP918");                                                 // j:CNS11643 -> EUC-TW?                                                 // ISO-2022-CN? ISO-2022-CN-EXT?         s_revhash = new Hashtable();         //    <Java encoding name>, <preferred MIME name>         s_revhash.put("UTF8", "UTF-8");         //s_revhash.put("8859_1", "US-ASCII");    // ?         s_revhash.put("8859_1", "ISO-8859-1");         s_revhash.put("8859_2", "ISO-8859-2");         s_revhash.put("8859_3", "ISO-8859-3");         s_revhash.put("8859_4", "ISO-8859-4");         s_revhash.put("8859_5", "ISO-8859-5");         s_revhash.put("8859_6", "ISO-8859-6");         s_revhash.put("8859_7", "ISO-8859-7");         s_revhash.put("8859_8", "ISO-8859-8");         s_revhash.put("8859_9", "ISO-8859-9");         s_revhash.put("JIS", "ISO-2022-JP");         s_revhash.put("SJIS", "Shift_JIS");         s_revhash.put("EUCJIS", "EUC-JP");         s_revhash.put("GB2312", "GB2312");         s_revhash.put("BIG5", "Big5");         s_revhash.put("KSC5601", "EUC-KR");         s_revhash.put("ISO2022KR", "ISO-2022-KR");         s_revhash.put("KOI8_R", "KOI8-R");         s_revhash.put("CP037", "EBCDIC-CP-US");         s_revhash.put("CP037", "EBCDIC-CP-CA");         s_revhash.put("CP037", "EBCDIC-CP-NL");         s_revhash.put("CP277", "EBCDIC-CP-DK");         s_revhash.put("CP277", "EBCDIC-CP-NO");         s_revhash.put("CP278", "EBCDIC-CP-FI");         s_revhash.put("CP278", "EBCDIC-CP-SE");         s_revhash.put("CP280", "EBCDIC-CP-IT");         s_revhash.put("CP284", "EBCDIC-CP-ES");         s_revhash.put("CP285", "EBCDIC-CP-GB");         s_revhash.put("CP297", "EBCDIC-CP-FR");         s_revhash.put("CP420", "EBCDIC-CP-AR1");         s_revhash.put("CP424", "EBCDIC-CP-HE");         s_revhash.put("CP500", "EBCDIC-CP-CH");         s_revhash.put("CP870", "EBCDIC-CP-ROECE");         s_revhash.put("CP870", "EBCDIC-CP-YU");         s_revhash.put("CP871", "EBCDIC-CP-IS");         s_revhash.put("CP918", "EBCDIC-CP-AR2");     }     private MIME2Java() {     }     /**      * Convert a MIME charset name, also known as an XML encoding name, to a Java encoding name.      * @param   mimeCharsetName Case insensitive MIME charset name: <code>UTF-8, US-ASCII, ISO-8859-1,      *                          ISO-8859-2, ISO-8859-3, ISO-8859-4, ISO-8859-5, ISO-8859-6,      *                          ISO-8859-7, ISO-8859-8, ISO-8859-9, ISO-2022-JP, Shift_JIS,      *                          EUC-JP, GB2312, Big5, EUC-KR, ISO-2022-KR, KOI8-R,      *                          EBCDIC-CP-US, EBCDIC-CP-CA, EBCDIC-CP-NL, EBCDIC-CP-DK,      *                          EBCDIC-CP-NO, EBCDIC-CP-FI, EBCDIC-CP-SE, EBCDIC-CP-IT,      *                          EBCDIC-CP-ES, EBCDIC-CP-GB, EBCDIC-CP-FR, EBCDIC-CP-AR1,      *                          EBCDIC-CP-HE, EBCDIC-CP-CH, EBCDIC-CP-ROECE, EBCDIC-CP-YU,      *                          EBCDIC-CP-IS and EBCDIC-CP-AR2</code>.      * @return                  Java encoding name, or <var>null</var> if <var>mimeCharsetName</var>      *                          is unknown.      * @see #reverse      */     public static String convert(String mimeCharsetName) {         return (String)s_enchash.get(mimeCharsetName.toUpperCase());     }     /**      * Convert a Java encoding name to MIME charset name.      * Available values of <i>encoding</i> are "UTF8", "8859_1", "8859_2", "8859_3", "8859_4",      * "8859_5", "8859_6", "8859_7", "8859_8", "8859_9", "JIS", "SJIS", "EUCJIS",      * "GB2312", "BIG5", "KSC5601", "ISO2022KR",  "KOI8_R", "CP037", "CP277", "CP278",      * "CP280", "CP284", "CP285", "CP297", "CP420", "CP424", "CP500", "CP870", "CP871" and "CP918".      * @param   encoding    Case insensitive Java encoding name: <code>UTF8, 8859_1, 8859_2, 8859_3,      *                      8859_4, 8859_5, 8859_6, 8859_7, 8859_8, 8859_9, JIS, SJIS, EUCJIS,      *                      GB2312, BIG5, KSC5601, ISO2022KR, KOI8_R, CP037, CP277, CP278,      *                      CP280, CP284, CP285, CP297, CP420, CP424, CP500, CP870, CP871      *                      and CP918</code>.      * @return              MIME charset name, or <var>null</var> if <var>encoding</var> is unknown.      * @see #convert      */     public static String reverse(String encoding) {         return (String)s_revhash.get(encoding.toUpperCase());     } }