nkfのGitリポジトリのfork
修訂 | 7b023c499dd520e7a50c7cfff9e210c03b073397 (tree) |
---|---|
時間 | 2006-03-25 00:14:32 |
作者 | NARUSE, Yui <naruse@user...> |
Commiter | NARUSE, Yui |
* Add codeset ISO-2022-JP-1, ISO-2022-P-3
* Fix JIS X 0213 conversion
@@ -39,9 +39,9 @@ | ||
39 | 39 | ** E-Mail: furukawa@tcp-ip.or.jp |
40 | 40 | ** まで御連絡をお願いします。 |
41 | 41 | ***********************************************************************/ |
42 | -/* $Id: nkf.c,v 1.93 2006/03/14 15:55:58 naruse Exp $ */ | |
42 | +/* $Id: nkf.c,v 1.94 2006/03/24 06:14:32 naruse Exp $ */ | |
43 | 43 | #define NKF_VERSION "2.0.6" |
44 | -#define NKF_RELEASE_DATE "2006-03-14" | |
44 | +#define NKF_RELEASE_DATE "2006-03-24" | |
45 | 45 | #include "config.h" |
46 | 46 | |
47 | 47 | #define COPY_RIGHT \ |
@@ -191,9 +191,9 @@ | ||
191 | 191 | #define X0201 2 |
192 | 192 | #define ISO8859_1 8 |
193 | 193 | #define NO_X0201 3 |
194 | -#define X0212 0x2844 | |
195 | -#define X0213_1 0x2850 | |
196 | -#define X0213_2 0x2850 | |
194 | +#define X0212 0x2844 | |
195 | +#define X0213_1 0x284F | |
196 | +#define X0213_2 0x2850 | |
197 | 197 | |
198 | 198 | /* Input Assumption */ |
199 | 199 |
@@ -1154,6 +1154,17 @@ options(cp) | ||
1154 | 1154 | codeset[i] = 0; |
1155 | 1155 | if(strcmp(codeset, "ISO-2022-JP") == 0){ |
1156 | 1156 | input_f = JIS_INPUT; |
1157 | + }else if(strcmp(codeset, "ISO-2022-JP-1") == 0){ | |
1158 | + input_f = JIS_INPUT; | |
1159 | +#ifdef X0212_ENABLE | |
1160 | + x0212_f = TRUE; | |
1161 | +#endif | |
1162 | + }else if(strcmp(codeset, "ISO-2022-JP-3") == 0){ | |
1163 | + input_f = JIS_INPUT; | |
1164 | +#ifdef X0212_ENABLE | |
1165 | + x0212_f = TRUE; | |
1166 | +#endif | |
1167 | + x0213_f = TRUE; | |
1157 | 1168 | }else if(strcmp(codeset, "SHIFT_JIS") == 0){ |
1158 | 1169 | input_f = SJIS_INPUT; |
1159 | 1170 | if (x0201_f==NO_X0201) x0201_f=TRUE; |
@@ -1164,10 +1175,10 @@ options(cp) | ||
1164 | 1175 | input_f = SJIS_INPUT; |
1165 | 1176 | x0201_f = FALSE; |
1166 | 1177 | #ifdef SHIFTJIS_CP932 |
1167 | - cp51932_f = TRUE; | |
1178 | + cp51932_f = TRUE; | |
1168 | 1179 | #endif |
1169 | 1180 | #ifdef UTF8_OUTPUT_ENABLE |
1170 | - ms_ucs_map_f = UCS_MAP_CP932; | |
1181 | + ms_ucs_map_f = UCS_MAP_CP932; | |
1171 | 1182 | #endif |
1172 | 1183 | }else if(strcmp(codeset, "EUCJP") == 0 || |
1173 | 1184 | strcmp(codeset, "EUC-JP") == 0){ |
@@ -1176,10 +1187,10 @@ options(cp) | ||
1176 | 1187 | input_f = JIS_INPUT; |
1177 | 1188 | x0201_f = FALSE; |
1178 | 1189 | #ifdef SHIFTJIS_CP932 |
1179 | - cp51932_f = TRUE; | |
1190 | + cp51932_f = TRUE; | |
1180 | 1191 | #endif |
1181 | 1192 | #ifdef UTF8_OUTPUT_ENABLE |
1182 | - ms_ucs_map_f = UCS_MAP_CP932; | |
1193 | + ms_ucs_map_f = UCS_MAP_CP932; | |
1183 | 1194 | #endif |
1184 | 1195 | }else if(strcmp(codeset, "EUC-JP-MS") == 0 || |
1185 | 1196 | strcmp(codeset, "EUCJP-MS") == 0 || |
@@ -1187,27 +1198,27 @@ options(cp) | ||
1187 | 1198 | input_f = JIS_INPUT; |
1188 | 1199 | x0201_f = FALSE; |
1189 | 1200 | #ifdef SHIFTJIS_CP932 |
1190 | - cp51932_f = FALSE; | |
1201 | + cp51932_f = FALSE; | |
1191 | 1202 | #endif |
1192 | 1203 | #ifdef UTF8_OUTPUT_ENABLE |
1193 | - ms_ucs_map_f = UCS_MAP_MS; | |
1204 | + ms_ucs_map_f = UCS_MAP_MS; | |
1194 | 1205 | #endif |
1195 | 1206 | }else if(strcmp(codeset, "EUC-JP-ASCII") == 0 || |
1196 | 1207 | strcmp(codeset, "EUCJP-ASCII") == 0){ |
1197 | 1208 | input_f = JIS_INPUT; |
1198 | 1209 | x0201_f = FALSE; |
1199 | 1210 | #ifdef SHIFTJIS_CP932 |
1200 | - cp51932_f = FALSE; | |
1211 | + cp51932_f = FALSE; | |
1201 | 1212 | #endif |
1202 | 1213 | #ifdef UTF8_OUTPUT_ENABLE |
1203 | - ms_ucs_map_f = UCS_MAP_ASCII; | |
1214 | + ms_ucs_map_f = UCS_MAP_ASCII; | |
1204 | 1215 | #endif |
1205 | 1216 | }else if(strcmp(codeset, "SHIFT_JISX0213") == 0){ |
1206 | 1217 | input_f = SJIS_INPUT; |
1207 | 1218 | x0213_f = TRUE; |
1208 | 1219 | #ifdef SHIFTJIS_CP932 |
1209 | - cp51932_f = FALSE; | |
1210 | - cp932inv_f = FALSE; | |
1220 | + cp51932_f = FALSE; | |
1221 | + cp932inv_f = FALSE; | |
1211 | 1222 | #endif |
1212 | 1223 | if (x0201_f==NO_X0201) x0201_f=TRUE; |
1213 | 1224 | }else if(strcmp(codeset, "EUC-JISX0213") == 0){ |
@@ -1215,8 +1226,8 @@ options(cp) | ||
1215 | 1226 | x0201_f = FALSE; |
1216 | 1227 | x0213_f = TRUE; |
1217 | 1228 | #ifdef SHIFTJIS_CP932 |
1218 | - cp51932_f = FALSE; | |
1219 | - cp932inv_f = FALSE; | |
1229 | + cp51932_f = FALSE; | |
1230 | + cp932inv_f = FALSE; | |
1220 | 1231 | #endif |
1221 | 1232 | #ifdef UTF8_INPUT_ENABLE |
1222 | 1233 | }else if(strcmp(codeset, "UTF-8") == 0 || |
@@ -1251,6 +1262,32 @@ options(cp) | ||
1251 | 1262 | codeset[i] = 0; |
1252 | 1263 | if(strcmp(codeset, "ISO-2022-JP") == 0){ |
1253 | 1264 | output_conv = j_oconv; |
1265 | + }else if(strcmp(codeset, "ISO-2022-JP-1") == 0){ | |
1266 | + output_conv = j_oconv; | |
1267 | +#ifdef X0212_ENABLE | |
1268 | + x0212_f = TRUE; | |
1269 | +#endif | |
1270 | +#ifdef SHIFTJIS_CP932 | |
1271 | + cp51932_f = FALSE; | |
1272 | +#endif | |
1273 | + }else if(strcmp(codeset, "ISO-2022-JP-3") == 0){ | |
1274 | + output_conv = j_oconv; | |
1275 | +#ifdef X0212_ENABLE | |
1276 | + x0212_f = TRUE; | |
1277 | +#endif | |
1278 | + x0213_f = TRUE; | |
1279 | +#ifdef SHIFTJIS_CP932 | |
1280 | + cp51932_f = FALSE; | |
1281 | +#endif | |
1282 | + }else if(strcmp(codeset, "ISO-2022-JP-MS") == 0){ | |
1283 | + output_conv = j_oconv; | |
1284 | + x0201_f = FALSE; | |
1285 | +#ifdef X0212_ENABLE | |
1286 | + x0212_f = TRUE; | |
1287 | +#endif | |
1288 | +#ifdef SHIFTJIS_CP932 | |
1289 | + cp51932_f = FALSE; | |
1290 | +#endif | |
1254 | 1291 | }else if(strcmp(codeset, "SHIFT_JIS") == 0){ |
1255 | 1292 | output_conv = s_oconv; |
1256 | 1293 | }else if(strcmp(codeset, "WINDOWS-31J") == 0 || |
@@ -1264,7 +1301,7 @@ options(cp) | ||
1264 | 1301 | cp932inv_f = TRUE; |
1265 | 1302 | #endif |
1266 | 1303 | #ifdef UTF8_OUTPUT_ENABLE |
1267 | - ms_ucs_map_f = UCS_MAP_CP932; | |
1304 | + ms_ucs_map_f = UCS_MAP_CP932; | |
1268 | 1305 | #endif |
1269 | 1306 | }else if(strcmp(codeset, "EUCJP") == 0 || |
1270 | 1307 | strcmp(codeset, "EUC-JP") == 0){ |
@@ -1276,7 +1313,7 @@ options(cp) | ||
1276 | 1313 | cp51932_f = TRUE; |
1277 | 1314 | #endif |
1278 | 1315 | #ifdef UTF8_OUTPUT_ENABLE |
1279 | - ms_ucs_map_f = UCS_MAP_CP932; | |
1316 | + ms_ucs_map_f = UCS_MAP_CP932; | |
1280 | 1317 | #endif |
1281 | 1318 | }else if(strcmp(codeset, "EUC-JP-MS") == 0 || |
1282 | 1319 | strcmp(codeset, "EUCJP-MS") == 0 || |
@@ -1303,7 +1340,7 @@ options(cp) | ||
1303 | 1340 | cp51932_f = FALSE; |
1304 | 1341 | #endif |
1305 | 1342 | #ifdef UTF8_OUTPUT_ENABLE |
1306 | - ms_ucs_map_f = UCS_MAP_ASCII; | |
1343 | + ms_ucs_map_f = UCS_MAP_ASCII; | |
1307 | 1344 | #endif |
1308 | 1345 | }else if(strcmp(codeset, "SHIFT_JISX0213") == 0){ |
1309 | 1346 | output_conv = s_oconv; |
@@ -2483,7 +2520,8 @@ kanji_convert(f) | ||
2483 | 2520 | /* look like bogus code */ |
2484 | 2521 | NEXT; |
2485 | 2522 | } |
2486 | - } else if (input_mode == X0208) { | |
2523 | + } else if (input_mode == X0208 || input_mode == X0212 || | |
2524 | + input_mode == X0213_1 || input_mode == X0213_2) { | |
2487 | 2525 | /* in case of Kanji shifted */ |
2488 | 2526 | c2 = c1; |
2489 | 2527 | NEXT; |
@@ -2557,6 +2595,14 @@ kanji_convert(f) | ||
2557 | 2595 | shift_mode = FALSE; |
2558 | 2596 | NEXT; |
2559 | 2597 | #endif /* X0212_ENABLE */ |
2598 | + } else if (c1 == (X0213_1&0x7F)){ | |
2599 | + input_mode = X0213_1; | |
2600 | + shift_mode = FALSE; | |
2601 | + NEXT; | |
2602 | + } else if (c1 == (X0213_2&0x7F)){ | |
2603 | + input_mode = X0213_2; | |
2604 | + shift_mode = FALSE; | |
2605 | + NEXT; | |
2560 | 2606 | } else { |
2561 | 2607 | /* could be some special code */ |
2562 | 2608 | (*oconv)(0, ESC); |
@@ -2652,21 +2698,31 @@ kanji_convert(f) | ||
2652 | 2698 | SEND; |
2653 | 2699 | } |
2654 | 2700 | /* send: */ |
2655 | - if (input_mode == X0208) | |
2656 | - (*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */ | |
2701 | + switch(input_mode){ | |
2702 | + case ASCII: | |
2703 | + if ((*iconv)(c2, c1, 0) < 0){ /* can be EUC/SJIS */ | |
2704 | + int c0 = (*i_getc)(f); | |
2705 | + if (c0 != EOF){ | |
2706 | + code_status(c0); | |
2707 | + (*iconv)(c2, c1, c0); | |
2708 | + } | |
2709 | + } | |
2710 | + break; | |
2711 | + case X0208: | |
2712 | + case X0213_1: | |
2713 | + (*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */ | |
2714 | + break; | |
2657 | 2715 | #ifdef X0212_ENABLE |
2658 | - else if (input_mode == X0212) | |
2659 | - (*oconv)((0x8f << 8) | c2, c1); | |
2716 | + case X0212: | |
2717 | + (*oconv)((0x8f << 8) | c2, c1); | |
2718 | + break; | |
2660 | 2719 | #endif /* X0212_ENABLE */ |
2661 | - else if (input_mode) | |
2662 | - (*oconv)(input_mode, c1); /* other special case */ | |
2663 | - else if ((*iconv)(c2, c1, 0) < 0){ /* can be EUC/SJIS */ | |
2664 | - int c0 = (*i_getc)(f); | |
2665 | - if (c0 != EOF){ | |
2666 | - code_status(c0); | |
2667 | - (*iconv)(c2, c1, c0); | |
2668 | - } | |
2669 | - } | |
2720 | + case X0213_2: | |
2721 | + (*oconv)((0x8f << 8) | c2, c1); | |
2722 | + break; | |
2723 | + default: | |
2724 | + (*oconv)(input_mode, c1); /* other special case */ | |
2725 | + } | |
2670 | 2726 | |
2671 | 2727 | c2 = 0; |
2672 | 2728 | continue; |
@@ -2800,8 +2856,6 @@ push_hold_buf(c2) | ||
2800 | 2856 | return ((hold_count >= HOLD_SIZE*2) ? EOF : hold_count); |
2801 | 2857 | } |
2802 | 2858 | |
2803 | -const int shift_jisx0213_s1a3_table[5][2] ={ { 1, 8}, { 3, 4}, { 5,12}, {13,14}, {15, 0} }; | |
2804 | - | |
2805 | 2859 | int s2e_conv(c2, c1, p2, p1) |
2806 | 2860 | int c2, c1; |
2807 | 2861 | int *p2, *p1; |
@@ -2809,6 +2863,7 @@ int s2e_conv(c2, c1, p2, p1) | ||
2809 | 2863 | #if defined(SHIFTJIS_CP932) || defined(X0212_ENABLE) |
2810 | 2864 | int val; |
2811 | 2865 | #endif |
2866 | + STATIC const int shift_jisx0213_s1a3_table[5][2] ={ { 1, 8}, { 3, 4}, { 5,12}, {13,14}, {15, 0} }; | |
2812 | 2867 | #ifdef SHIFTJIS_CP932 |
2813 | 2868 | if (cp51932_f && CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END){ |
2814 | 2869 | extern const unsigned short shiftjis_cp932[3][189]; |
@@ -2820,7 +2875,7 @@ int s2e_conv(c2, c1, p2, p1) | ||
2820 | 2875 | } |
2821 | 2876 | #endif /* SHIFTJIS_CP932 */ |
2822 | 2877 | #ifdef X0212_ENABLE |
2823 | - if (!x0213_f && x0212_f && 0xfa <= c2 && c2 <= 0xfc){ | |
2878 | + if (!x0213_f && 0xfa <= c2 && c2 <= 0xfc){ | |
2824 | 2879 | extern const unsigned short shiftjis_x0212[3][189]; |
2825 | 2880 | val = shiftjis_x0212[c2 - 0xfa][c1 - 0x40]; |
2826 | 2881 | if (val){ |
@@ -2857,9 +2912,7 @@ int s2e_conv(c2, c1, p2, p1) | ||
2857 | 2912 | } |
2858 | 2913 | |
2859 | 2914 | #ifdef X0212_ENABLE |
2860 | - if (x0212_f){ | |
2861 | - c2 = x0212_unshift(c2); | |
2862 | - } | |
2915 | + c2 = x0212_unshift(c2); | |
2863 | 2916 | #endif |
2864 | 2917 | if (p2) *p2 = c2; |
2865 | 2918 | if (p1) *p1 = c1; |
@@ -3649,7 +3702,6 @@ e2s_conv(c2, c1, p2, p1) | ||
3649 | 3702 | int val = 0; |
3650 | 3703 | const unsigned short *ptr; |
3651 | 3704 | extern const unsigned short *const x0212_shiftjis[]; |
3652 | - ndx = c2 & 0x7f; | |
3653 | 3705 | ptr = x0212_shiftjis[ndx - 0x21]; |
3654 | 3706 | if (ptr){ |
3655 | 3707 | val = ptr[(c1 & 0x7f) - 0x21]; |
@@ -3761,16 +3813,20 @@ j_oconv(c2, c1) | ||
3761 | 3813 | if(x0213_f){ |
3762 | 3814 | if(output_mode!=X0213_2){ |
3763 | 3815 | output_mode = X0213_2; |
3816 | + (*o_putc)(ESC); | |
3817 | + (*o_putc)('$'); | |
3818 | + (*o_putc)('('); | |
3819 | + (*o_putc)(X0213_2&0x7F); | |
3764 | 3820 | } |
3765 | 3821 | }else{ |
3766 | 3822 | if(output_mode!=X0212){ |
3767 | 3823 | output_mode = X0212; |
3824 | + (*o_putc)(ESC); | |
3825 | + (*o_putc)('$'); | |
3826 | + (*o_putc)('('); | |
3827 | + (*o_putc)(X0212&0x7F); | |
3768 | 3828 | } |
3769 | 3829 | } |
3770 | - (*o_putc)(ESC); | |
3771 | - (*o_putc)('$'); | |
3772 | - (*o_putc)('('); | |
3773 | - (*o_putc)(output_mode & 0x7F); | |
3774 | 3830 | (*o_putc)(c2 & 0x7f); |
3775 | 3831 | (*o_putc)(c1); |
3776 | 3832 | #endif |
@@ -3797,13 +3853,14 @@ j_oconv(c2, c1) | ||
3797 | 3853 | } |
3798 | 3854 | (*o_putc)(c1); |
3799 | 3855 | } else { |
3856 | + if(c2<0x20 || 0x7e<c2 || c1<0x20 || 0x7e<c1) return; | |
3800 | 3857 | if(x0213_f){ |
3801 | 3858 | if (output_mode!=X0213_1) { |
3802 | 3859 | output_mode = X0213_1; |
3803 | 3860 | (*o_putc)(ESC); |
3804 | 3861 | (*o_putc)('$'); |
3805 | 3862 | (*o_putc)('('); |
3806 | - (*o_putc)(output_mode & 0x7F); | |
3863 | + (*o_putc)(X0213_1&0x7F); | |
3807 | 3864 | } |
3808 | 3865 | }else if (output_mode != X0208) { |
3809 | 3866 | output_mode = X0208; |
@@ -3811,10 +3868,6 @@ j_oconv(c2, c1) | ||
3811 | 3868 | (*o_putc)('$'); |
3812 | 3869 | (*o_putc)(kanji_intro); |
3813 | 3870 | } |
3814 | - if (c1<0x20 || 0x7e<c1) | |
3815 | - return; | |
3816 | - if (c2<0x20 || 0x7e<c2) | |
3817 | - return; | |
3818 | 3871 | (*o_putc)(c2); |
3819 | 3872 | (*o_putc)(c1); |
3820 | 3873 | } |