Mega Code Archive

 
Categories / Java / XML
 

Determining validity of characters outside basic 7-bit range of Unicode, for XML 1 0

// Revised from ctc wstx /**  * Simple utility class that encapsulates logic of determining validity  * of characters outside basic 7-bit range of Unicode, for XML 1.0  */ public final class XmlChars {     /* We don't need full 64k bits... (0x80 - 0x312C) / 32. But to      * simplify things, let's just include first 0x80 entries in there etc      */     final static int SIZE = (0x3140 >> 5); // 32 bits per int     final static int[] sXml10StartChars = new int[SIZE];     static {         SETBITS(sXml10StartChars, 0xC0, 0xD6);         SETBITS(sXml10StartChars, 0xD8, 0xF6);         SETBITS(sXml10StartChars, 0xF8, 0xFF);         SETBITS(sXml10StartChars, 0x100, 0x131);         SETBITS(sXml10StartChars, 0x134, 0x13e);         SETBITS(sXml10StartChars, 0x141, 0x148);         SETBITS(sXml10StartChars, 0x14a, 0x17e);         SETBITS(sXml10StartChars, 0x180, 0x1c3);         SETBITS(sXml10StartChars, 0x1cd, 0x1f0);         SETBITS(sXml10StartChars, 0x1f4, 0x1f5);         SETBITS(sXml10StartChars, 0x1fa, 0x217);         SETBITS(sXml10StartChars, 0x250, 0x2a8);         SETBITS(sXml10StartChars, 0x2bb, 0x2c1);         SETBITS(sXml10StartChars, 0x386);         SETBITS(sXml10StartChars, 0x388, 0x38a);         SETBITS(sXml10StartChars, 0x38c);         SETBITS(sXml10StartChars, 0x38e, 0x3a1);         SETBITS(sXml10StartChars, 0x3a3, 0x3ce);         SETBITS(sXml10StartChars, 0x3d0, 0x3d6);         SETBITS(sXml10StartChars, 0x3da);         SETBITS(sXml10StartChars, 0x3dc);         SETBITS(sXml10StartChars, 0x3de);         SETBITS(sXml10StartChars, 0x3e0);         SETBITS(sXml10StartChars, 0x3e2, 0x3f3);         SETBITS(sXml10StartChars, 0x401, 0x40c);         SETBITS(sXml10StartChars, 0x40e, 0x44f);         SETBITS(sXml10StartChars, 0x451, 0x45c);         SETBITS(sXml10StartChars, 0x45e, 0x481);         SETBITS(sXml10StartChars, 0x490, 0x4c4);         SETBITS(sXml10StartChars, 0x4c7, 0x4c8);         SETBITS(sXml10StartChars, 0x4cb, 0x4cc);         SETBITS(sXml10StartChars, 0x4d0, 0x4eb);         SETBITS(sXml10StartChars, 0x4ee, 0x4f5);         SETBITS(sXml10StartChars, 0x4f8, 0x4f9);         SETBITS(sXml10StartChars, 0x531, 0x556);         SETBITS(sXml10StartChars, 0x559);         SETBITS(sXml10StartChars, 0x561, 0x586);         SETBITS(sXml10StartChars, 0x5d0, 0x5ea);         SETBITS(sXml10StartChars, 0x5f0, 0x5f2);         SETBITS(sXml10StartChars, 0x621, 0x63a);         SETBITS(sXml10StartChars, 0x641, 0x64a);         SETBITS(sXml10StartChars, 0x671, 0x6b7);         SETBITS(sXml10StartChars, 0x6ba, 0x6be);         SETBITS(sXml10StartChars, 0x6c0, 0x6ce);         SETBITS(sXml10StartChars, 0x6d0, 0x6d3);         SETBITS(sXml10StartChars, 0x6d5);         SETBITS(sXml10StartChars, 0x6e5, 0x6e6);         SETBITS(sXml10StartChars, 0x905, 0x939);         SETBITS(sXml10StartChars, 0x93d);         SETBITS(sXml10StartChars, 0x958, 0x961);         SETBITS(sXml10StartChars, 0x985, 0x98c);         SETBITS(sXml10StartChars, 0x98f, 0x990);         SETBITS(sXml10StartChars, 0x993, 0x9a8);         SETBITS(sXml10StartChars, 0x9aa, 0x9b0);         SETBITS(sXml10StartChars, 0x9b2);         SETBITS(sXml10StartChars, 0x9b6, 0x9b9);         SETBITS(sXml10StartChars, 0x9dc);         SETBITS(sXml10StartChars, 0x9dd);         SETBITS(sXml10StartChars, 0x9df, 0x9e1);         SETBITS(sXml10StartChars, 0x9f0); SETBITS(sXml10StartChars, 0x9f1);         SETBITS(sXml10StartChars, 0xA05, 0xA0A);         SETBITS(sXml10StartChars, 0xA0F); SETBITS(sXml10StartChars, 0xA10);         SETBITS(sXml10StartChars, 0xA13, 0xA28);         SETBITS(sXml10StartChars, 0xA2A, 0xA30);         SETBITS(sXml10StartChars, 0xA32); SETBITS(sXml10StartChars, 0xA33);         SETBITS(sXml10StartChars, 0xA35); SETBITS(sXml10StartChars, 0xA36);         SETBITS(sXml10StartChars, 0xA38); SETBITS(sXml10StartChars, 0xA39);         SETBITS(sXml10StartChars, 0xA59, 0xA5C);         SETBITS(sXml10StartChars, 0xA5E);         SETBITS(sXml10StartChars, 0xA72, 0xA74);         SETBITS(sXml10StartChars, 0xA85, 0xA8B);         SETBITS(sXml10StartChars, 0xA8D);         SETBITS(sXml10StartChars, 0xA8F, 0xA91);         SETBITS(sXml10StartChars, 0xA93, 0xAA8);         SETBITS(sXml10StartChars, 0xAAA, 0xAB0);         SETBITS(sXml10StartChars, 0xAB2, 0xAB3);         SETBITS(sXml10StartChars, 0xAB5, 0xAB9);         SETBITS(sXml10StartChars, 0xABD);         SETBITS(sXml10StartChars, 0xAE0);         SETBITS(sXml10StartChars, 0xB05, 0xB0C);         SETBITS(sXml10StartChars, 0xB0F); SETBITS(sXml10StartChars, 0xB10);         SETBITS(sXml10StartChars, 0xB13, 0xB28);         SETBITS(sXml10StartChars, 0xB2A, 0xB30);         SETBITS(sXml10StartChars, 0xB32); SETBITS(sXml10StartChars, 0xB33);         SETBITS(sXml10StartChars, 0xB36, 0xB39);         SETBITS(sXml10StartChars, 0xB3D);         SETBITS(sXml10StartChars, 0xB5C); SETBITS(sXml10StartChars, 0xB5D);         SETBITS(sXml10StartChars, 0xB5F, 0xB61);         SETBITS(sXml10StartChars, 0xB85, 0xB8A);         SETBITS(sXml10StartChars, 0xB8E, 0xB90);         SETBITS(sXml10StartChars, 0xB92, 0xB95);         SETBITS(sXml10StartChars, 0xB99, 0xB9A);         SETBITS(sXml10StartChars, 0xB9C);         SETBITS(sXml10StartChars, 0xB9E); SETBITS(sXml10StartChars, 0xB9F);         SETBITS(sXml10StartChars, 0xBA3); SETBITS(sXml10StartChars, 0xBA4);         SETBITS(sXml10StartChars, 0xBA8, 0xBAA);         SETBITS(sXml10StartChars, 0xBAE, 0xBB5);         SETBITS(sXml10StartChars, 0xBB7, 0xBB9);         SETBITS(sXml10StartChars, 0xC05, 0xC0C);         SETBITS(sXml10StartChars, 0xC0E, 0xC10);         SETBITS(sXml10StartChars, 0xC12, 0xC28);         SETBITS(sXml10StartChars, 0xC2A, 0xC33);         SETBITS(sXml10StartChars, 0xC35, 0xC39);         SETBITS(sXml10StartChars, 0xC60); SETBITS(sXml10StartChars, 0xC61);         SETBITS(sXml10StartChars, 0xC85, 0xC8C);         SETBITS(sXml10StartChars, 0xC8E, 0xC90);         SETBITS(sXml10StartChars, 0xC92, 0xCA8);         SETBITS(sXml10StartChars, 0xCAA, 0xCB3);         SETBITS(sXml10StartChars, 0xCB5, 0xCB9);         SETBITS(sXml10StartChars, 0xCDE);         SETBITS(sXml10StartChars, 0xCE0); SETBITS(sXml10StartChars, 0xCE1);         SETBITS(sXml10StartChars, 0xD05, 0xD0C);         SETBITS(sXml10StartChars, 0xD0E, 0xD10);         SETBITS(sXml10StartChars, 0xD12, 0xD28);         SETBITS(sXml10StartChars, 0xD2A, 0xD39);         SETBITS(sXml10StartChars, 0xD60); SETBITS(sXml10StartChars, 0xD61);         SETBITS(sXml10StartChars, 0xE01, 0xE2E);         SETBITS(sXml10StartChars, 0xE30);         SETBITS(sXml10StartChars, 0xE32); SETBITS(sXml10StartChars, 0xE33);         SETBITS(sXml10StartChars, 0xE40, 0xE45);         SETBITS(sXml10StartChars, 0xE81); SETBITS(sXml10StartChars, 0xE82);         SETBITS(sXml10StartChars, 0xE84);         SETBITS(sXml10StartChars, 0xE87); SETBITS(sXml10StartChars, 0xE88);         SETBITS(sXml10StartChars, 0xE8A); SETBITS(sXml10StartChars, 0xE8D);         SETBITS(sXml10StartChars, 0xE94, 0xE97);         SETBITS(sXml10StartChars, 0xE99, 0xE9F);         SETBITS(sXml10StartChars, 0xEA1, 0xEA3);         SETBITS(sXml10StartChars, 0xEA5); SETBITS(sXml10StartChars, 0xEA7);         SETBITS(sXml10StartChars, 0xEAA); SETBITS(sXml10StartChars, 0xEAB);         SETBITS(sXml10StartChars, 0xEAD); SETBITS(sXml10StartChars, 0xEAE);         SETBITS(sXml10StartChars, 0xEB0);         SETBITS(sXml10StartChars, 0xEB2); SETBITS(sXml10StartChars, 0xEB3);         SETBITS(sXml10StartChars, 0xEBD);         SETBITS(sXml10StartChars, 0xEC0, 0xEC4);         SETBITS(sXml10StartChars, 0xF40, 0xF47);         SETBITS(sXml10StartChars, 0xF49, 0xF69);         SETBITS(sXml10StartChars, 0x10a0, 0x10c5);         SETBITS(sXml10StartChars, 0x10d0, 0x10f6);         SETBITS(sXml10StartChars, 0x1100);         SETBITS(sXml10StartChars, 0x1102, 0x1103);         SETBITS(sXml10StartChars, 0x1105, 0x1107);         SETBITS(sXml10StartChars, 0x1109);         SETBITS(sXml10StartChars, 0x110b, 0x110c);         SETBITS(sXml10StartChars, 0x110e, 0x1112);         SETBITS(sXml10StartChars, 0x113c);         SETBITS(sXml10StartChars, 0x113e);         SETBITS(sXml10StartChars, 0x1140);         SETBITS(sXml10StartChars, 0x114c);         SETBITS(sXml10StartChars, 0x114e);         SETBITS(sXml10StartChars, 0x1150);         SETBITS(sXml10StartChars, 0x1154, 0x1155);         SETBITS(sXml10StartChars, 0x1159);         SETBITS(sXml10StartChars, 0x115f, 0x1161);         SETBITS(sXml10StartChars, 0x1163);         SETBITS(sXml10StartChars, 0x1165);         SETBITS(sXml10StartChars, 0x1167);         SETBITS(sXml10StartChars, 0x1169);         SETBITS(sXml10StartChars, 0x116d, 0x116e);         SETBITS(sXml10StartChars, 0x1172, 0x1173);         SETBITS(sXml10StartChars, 0x1175);         SETBITS(sXml10StartChars, 0x119e);         SETBITS(sXml10StartChars, 0x11a8);         SETBITS(sXml10StartChars, 0x11ab);         SETBITS(sXml10StartChars, 0x11ae, 0x11af);         SETBITS(sXml10StartChars, 0x11b7, 0x11b8);         SETBITS(sXml10StartChars, 0x11ba);         SETBITS(sXml10StartChars, 0x11bc, 0x11c2);         SETBITS(sXml10StartChars, 0x11eb);         SETBITS(sXml10StartChars, 0x11f0);         SETBITS(sXml10StartChars, 0x11f9);         SETBITS(sXml10StartChars, 0x1e00, 0x1e9b);         SETBITS(sXml10StartChars, 0x1ea0, 0x1ef9);         SETBITS(sXml10StartChars, 0x1f00, 0x1f15);         SETBITS(sXml10StartChars, 0x1f18, 0x1f1d);         SETBITS(sXml10StartChars, 0x1f20, 0x1f45);         SETBITS(sXml10StartChars, 0x1f48, 0x1f4d);         SETBITS(sXml10StartChars, 0x1f50, 0x1f57);         SETBITS(sXml10StartChars, 0x1f59);         SETBITS(sXml10StartChars, 0x1f5b);         SETBITS(sXml10StartChars, 0x1f5d);         SETBITS(sXml10StartChars, 0x1f5f, 0x1f7d);         SETBITS(sXml10StartChars, 0x1f80, 0x1fb4);         SETBITS(sXml10StartChars, 0x1fb6, 0x1fbc);         SETBITS(sXml10StartChars, 0x1fbe);         SETBITS(sXml10StartChars, 0x1fc2, 0x1fc4);         SETBITS(sXml10StartChars, 0x1fc6, 0x1fcc);         SETBITS(sXml10StartChars, 0x1fd0, 0x1fd3);         SETBITS(sXml10StartChars, 0x1fd6, 0x1fdb);         SETBITS(sXml10StartChars, 0x1fe0, 0x1fec);         SETBITS(sXml10StartChars, 0x1ff2, 0x1ff4);         SETBITS(sXml10StartChars, 0x1ff6, 0x1ffc);         SETBITS(sXml10StartChars, 0x2126);         SETBITS(sXml10StartChars, 0x212a, 0x212b);         SETBITS(sXml10StartChars, 0x212e);         SETBITS(sXml10StartChars, 0x2180, 0x2182);         SETBITS(sXml10StartChars, 0x3041, 0x3094);         SETBITS(sXml10StartChars, 0x30a1, 0x30fa);         SETBITS(sXml10StartChars, 0x3105, 0x312c);         // note: AC00 - D7A3 handled separately         // [86] Ideographic (but note: > 0x312c handled separately)         SETBITS(sXml10StartChars, 0x3007);         SETBITS(sXml10StartChars, 0x3021, 0x3029);     }     final static int[] sXml10Chars = new int[SIZE];     static {         // Let's start with all valid start chars:         System.arraycopy(sXml10StartChars, 0, sXml10Chars, 0, SIZE);         // [87] CombiningChar    ::=         SETBITS(sXml10Chars, 0x300, 0x345);         SETBITS(sXml10Chars, 0x360, 0x361);         SETBITS(sXml10Chars, 0x483, 0x486);         SETBITS(sXml10Chars, 0x591, 0x5a1);         SETBITS(sXml10Chars, 0x5a3, 0x5b9);         SETBITS(sXml10Chars, 0x5bb, 0x5bd);         SETBITS(sXml10Chars, 0x5bf);         SETBITS(sXml10Chars, 0x5c1, 0x5c2);         SETBITS(sXml10Chars, 0x5c4);         SETBITS(sXml10Chars, 0x64b, 0x652);         SETBITS(sXml10Chars, 0x670);         SETBITS(sXml10Chars, 0x6d6, 0x6dc);         SETBITS(sXml10Chars, 0x6dd, 0x6df);         SETBITS(sXml10Chars, 0x6e0, 0x6e4);         SETBITS(sXml10Chars, 0x6e7, 0x6e8);         SETBITS(sXml10Chars, 0x6ea, 0x6ed);         SETBITS(sXml10Chars, 0x901, 0x903);         SETBITS(sXml10Chars, 0x93c);         SETBITS(sXml10Chars, 0x93e, 0x94c);         SETBITS(sXml10Chars, 0x94d);         SETBITS(sXml10Chars, 0x951, 0x954);         SETBITS(sXml10Chars, 0x962); SETBITS(sXml10Chars, 0x963);         SETBITS(sXml10Chars, 0x981, 0x983);         SETBITS(sXml10Chars, 0x9bc);         SETBITS(sXml10Chars, 0x9be); SETBITS(sXml10Chars, 0x9bf);         SETBITS(sXml10Chars, 0x9c0, 0x9c4);         SETBITS(sXml10Chars, 0x9c7); SETBITS(sXml10Chars, 0x9c8);         SETBITS(sXml10Chars, 0x9cb, 0x9cd);         SETBITS(sXml10Chars, 0x9d7);         SETBITS(sXml10Chars, 0x9e2); SETBITS(sXml10Chars, 0x9e3);         SETBITS(sXml10Chars, 0xA02);         SETBITS(sXml10Chars, 0xA3C);         SETBITS(sXml10Chars, 0xA3E); SETBITS(sXml10Chars, 0xA3F);         SETBITS(sXml10Chars, 0xA40, 0xA42);         SETBITS(sXml10Chars, 0xA47); SETBITS(sXml10Chars, 0xA48);         SETBITS(sXml10Chars, 0xA4B, 0xA4D);         SETBITS(sXml10Chars, 0xA70); SETBITS(sXml10Chars, 0xA71);         SETBITS(sXml10Chars, 0xA81, 0xA83);         SETBITS(sXml10Chars, 0xABC);         SETBITS(sXml10Chars, 0xABE, 0xAC5);         SETBITS(sXml10Chars, 0xAC7, 0xAC9);         SETBITS(sXml10Chars, 0xACB, 0xACD);         SETBITS(sXml10Chars, 0xB01, 0xB03);         SETBITS(sXml10Chars, 0xB3C);         SETBITS(sXml10Chars, 0xB3E, 0xB43);         SETBITS(sXml10Chars, 0xB47); SETBITS(sXml10Chars, 0xB48);         SETBITS(sXml10Chars, 0xB4B, 0xB4D);         SETBITS(sXml10Chars, 0xB56); SETBITS(sXml10Chars, 0xB57);         SETBITS(sXml10Chars, 0xB82); SETBITS(sXml10Chars, 0xB83);         SETBITS(sXml10Chars, 0xBBE, 0xBC2);         SETBITS(sXml10Chars, 0xBC6, 0xBC8);         SETBITS(sXml10Chars, 0xBCA, 0xBCD);         SETBITS(sXml10Chars, 0xBD7);         SETBITS(sXml10Chars, 0xC01, 0xC03);         SETBITS(sXml10Chars, 0xC3E, 0xC44);         SETBITS(sXml10Chars, 0xC46, 0xC48);         SETBITS(sXml10Chars, 0xC4A, 0xC4D);         SETBITS(sXml10Chars, 0xC55, 0xC56);         SETBITS(sXml10Chars, 0xC82, 0xC83);         SETBITS(sXml10Chars, 0xCBE, 0xCC4);         SETBITS(sXml10Chars, 0xCC6, 0xCC8);         SETBITS(sXml10Chars, 0xCCA, 0xCCD);         SETBITS(sXml10Chars, 0xCD5, 0xCD6);         SETBITS(sXml10Chars, 0xD02, 0xD03);         SETBITS(sXml10Chars, 0xD3E, 0xD43);         SETBITS(sXml10Chars, 0xD46, 0xD48);         SETBITS(sXml10Chars, 0xD4A, 0xD4D);         SETBITS(sXml10Chars, 0xD57);         SETBITS(sXml10Chars, 0xE31);         SETBITS(sXml10Chars, 0xE34, 0xE3A);         SETBITS(sXml10Chars, 0xE47, 0xE4E);         SETBITS(sXml10Chars, 0xEB1);         SETBITS(sXml10Chars, 0xEB4, 0xEB9);         SETBITS(sXml10Chars, 0xEBB, 0xEBC);         SETBITS(sXml10Chars, 0xEC8, 0xECD);         SETBITS(sXml10Chars, 0xF18, 0xF19);         SETBITS(sXml10Chars, 0xF35); SETBITS(sXml10Chars, 0xF37);         SETBITS(sXml10Chars, 0xF39);         SETBITS(sXml10Chars, 0xF3E); SETBITS(sXml10Chars, 0xF3F);         SETBITS(sXml10Chars, 0xF71, 0xF84);         SETBITS(sXml10Chars, 0xF86, 0xF8B);         SETBITS(sXml10Chars, 0xF90, 0xF95);         SETBITS(sXml10Chars, 0xF97);         SETBITS(sXml10Chars, 0xF99, 0xFAD);         SETBITS(sXml10Chars, 0xFB1, 0xFB7);         SETBITS(sXml10Chars, 0xFB9);         SETBITS(sXml10Chars, 0x20D0, 0x20DC);         SETBITS(sXml10Chars, 0x20E1);         SETBITS(sXml10Chars, 0x302A, 0x302F);         SETBITS(sXml10Chars, 0x3099); SETBITS(sXml10Chars, 0x309A);         // [88] Digit:         SETBITS(sXml10Chars, 0x660, 0x669);         SETBITS(sXml10Chars, 0x6f0, 0x6f9);         SETBITS(sXml10Chars, 0x966, 0x96f);         SETBITS(sXml10Chars, 0x9e6, 0x9ef);         SETBITS(sXml10Chars, 0xa66, 0xa6f);         SETBITS(sXml10Chars, 0xae6, 0xaef);         SETBITS(sXml10Chars, 0xb66, 0xb6f);         SETBITS(sXml10Chars, 0xbe7, 0xbef);         SETBITS(sXml10Chars, 0xc66, 0xc6f);         SETBITS(sXml10Chars, 0xce6, 0xcef);         SETBITS(sXml10Chars, 0xd66, 0xd6f);         SETBITS(sXml10Chars, 0xe50, 0xe59);         SETBITS(sXml10Chars, 0xed0, 0xed9);         SETBITS(sXml10Chars, 0xf20, 0xf29);                  // [89] Extender:         SETBITS(sXml10Chars, 0xb7);         SETBITS(sXml10Chars, 0x2d0);         SETBITS(sXml10Chars, 0x2d1);         SETBITS(sXml10Chars, 0x387);         SETBITS(sXml10Chars, 0x640);         SETBITS(sXml10Chars, 0xE46);         SETBITS(sXml10Chars, 0xEC6);         SETBITS(sXml10Chars, 0x3005);         SETBITS(sXml10Chars, 0x3031, 0x3035);         SETBITS(sXml10Chars, 0x309d, 0x309e);         SETBITS(sXml10Chars, 0x30fc, 0x30fe);     }          private XmlChars() { }     public final static boolean is10NameStartChar(char c)     {         // First, let's deal with outliers         if (c > 0x312C) { // Most valid chars are below this..             if (c < 0xAC00) {                 return (c >= 0x4E00 && c <= 0x9FA5); // valid ideograms             }             if (c <= 0xD7A3) { // 0xAC00 - 0xD7A3, valid base chars                 return true;             }             /* As to surrogate pairs... let's do the bare minimum;              * 0xD800 - 0xDBFF (high surrogate) are ok; low surrogates              * can only follow high one              */             return (c <= 0xDBFF && c >= 0xD800);         }         // but then we'll just need to use the table...         int ix = (int) c;         return (sXml10StartChars[ix >> 5] & (1 << (ix & 31))) != 0;     }     public final static boolean is10NameChar(char c)     {         // First, let's deal with outliers         if (c > 0x312C) { // Most valid chars are below this..             if (c < 0xAC00) {                 return (c >= 0x4E00 && c <= 0x9FA5); // valid ideograms             }             if (c <= 0xD7A3) { // 0xAC00 - 0xD7A3, valid base chars                 return true;             }             /* As to surrogate pairs... let's do the bare minimum;              * 0xD800 - 0xDFFF (high, low surrogate) are ok (need to              * check pairing in future)              */             return (c >= 0xD800 && c <= 0xDFFF);         }         // but then we'll just need to use the table...         int ix = (int) c;         return (sXml10Chars[ix >> 5] & (1 << (ix & 31))) != 0;     }     public final static boolean is11NameStartChar(char c)     {         // Others are checked block-by-block:         if (c <= 0x2FEF) {             if (c < 0x300) {                 if (c < 0x00C0) { // 8-bit ctrl chars                     return false;                 }                 // most of the rest are fine...                 return (c != 0xD7 && c != 0xF7);             }             if (c >= 0x2C00) {                 // 0x2C00 - 0x2FEF are ok                 return true;             }             if (c < 0x370 || c > 0x218F) {                 // 0x300 - 0x36F, 0x2190 - 0x2BFF invalid                 return false;             }             if (c < 0x2000) {                 // 0x370 - 0x37D, 0x37F - 0x1FFF are ok                 return (c != 0x37E);             }             if (c >= 0x2070) {                 // 0x2070 - 0x218F are ok                 return (c <= 0x218F);             }             // And finally, 0x200C - 0x200D             return (c == 0x200C || c == 0x200D);         }         // 0x3000 and above:         if (c >= 0x3001) {             /* Hmmh, let's allow high surrogates here, without checking              * that they are properly followed... crude basic support,              * I know, but allows valid combinations, just doesn't catch              * invalid ones              */             if (c <= 0xDBFF) { // 0x3001 - 0xD7FF (chars),                 // 0xD800 - 0xDBFF (high surrogate) are ok (unlike DC00-DFFF)                 return true;             }             if (c >= 0xF900 && c <= 0xFFFD) {                 /* Check above removes low surrogate (since one can not                  * START an identifier), and byte-order markers..                  */                 return (c <= 0xFDCF || c >= 0xFDF0);             }         }         return false;     }     public final static boolean is11NameChar(char c)     {         // Others are checked block-by-block:         if (c <= 0x2FEF) {             if (c < 0x2000) { // only 8-bit ctrl chars and 0x37E to filter out                 return (c >= 0x00C0 && c != 0x37E) || (c == 0xB7);             }             if (c >= 0x2C00) {                 // 0x100 - 0x1FFF, 0x2C00 - 0x2FEF are ok                 return true;             }             if (c < 0x200C || c > 0x218F) {                 // 0x2000 - 0x200B, 0x2190 - 0x2BFF invalid                 return false;             }             if (c >= 0x2070) {                 // 0x2070 - 0x218F are ok                 return true;             }             // And finally, 0x200C - 0x200D, 0x203F - 0x2040 are ok             return (c == 0x200C || c == 0x200D                 || c == 0x203F || c == 0x2040);         }         // 0x3000 and above:         if (c >= 0x3001) {             /* Hmmh, let's allow surrogate heres, without checking that              * they have proper ordering. For non-first name chars, both are              * ok, for valid names. Crude basic support,              * I know, but allows valid combinations, just doesn't catch              * invalid ones              */             if (c <= 0xDFFF) { // 0x3001 - 0xD7FF (chars),                 // 0xD800 - 0xDFFF (high, low surrogate) are ok:                 return true;             }             if (c >= 0xF900 && c <= 0xFFFD) {                 /* Check above removes other invalid chars (below valid                  * range), and byte-order markers (0xFFFE, 0xFFFF).                  */                 return (c <= 0xFDCF || c >= 0xFDF0);             }         }         return false;     }     private static void SETBITS(int[] array, int start, int end)     {         int bit1 = (start & 31);         int bit2 = (end & 31);         start >>= 5;         end >>= 5;         /* Ok; this is not perfectly optimal, but should be good enough...          * we'll only do one-by-one at the ends.          */         if (start == end) {             for (; bit1 <= bit2; ++bit1) {                 array[start] |= (1 << bit1);             }         } else {             for (int bit = bit1; bit <= 31; ++bit) {                 array[start] |= (1 << bit);             }             while (++start < end) {                 array[start] = -1;             }             for (int bit = 0; bit <= bit2; ++bit) {                 array[end] |= (1 << bit);             }         }     }     private static void SETBITS(int[] array, int point) {         int ix = (point >> 5);         int bit = (point & 31);         array[ix] |= (1 << bit);     } }