nkfのGitリポジトリのfork
修訂 | 8b45d952875b47ebcf8677de8d079c36a644dcce (tree) |
---|---|
時間 | 2012-03-16 06:37:07 |
作者 | NARUSE, Yui <naruse@user...> |
Commiter | NARUSE, Yui |
Merge NKF_python [nkf-bug#27841]
http://sourceforge.jp/ticket/browse.php?group_id=248&tid=27841
@@ -0,0 +1,202 @@ | ||
1 | +/* | |
2 | +Changes. | |
3 | +2009.6.2 Remove WISH_TRUE, use get_guessed_code() for nkf-2.0.9 | |
4 | + by SATOH Fumiyasu (fumiyas @ osstech co jp) | |
5 | +2008.7.17 Change the type of strlen from long to int, by SATOH Fumiyasu. | |
6 | +2007.2.1 Add guess() function. | |
7 | +2007.1.13 Remove pynkf_parseopts(), by SATOH Fumiyasu. | |
8 | +*/ | |
9 | +/** Python Interface to NKF | |
10 | +*************************************************************************** | |
11 | +** Copyright (c) 2005 Matsumoto, Tadashi <ma2@city.plala.jp> | |
12 | +** All Rights Reserved. | |
13 | +** | |
14 | +** Everyone is permitted to do anything on this program | |
15 | +** including copying, modifying, improving, | |
16 | +** as long as you don't try to pretend that you wrote it. | |
17 | +** i.e., the above copyright notice has to appear in all copies. | |
18 | +** Binary distribution requires original version messages. | |
19 | +** You don't have to ask before copying, redistribution or publishing. | |
20 | +** THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE. | |
21 | +***************************************************************************/ | |
22 | + | |
23 | +#include "Python.h" | |
24 | +#include <setjmp.h> | |
25 | + | |
26 | +#undef getc | |
27 | +#undef ungetc | |
28 | +#define getc(f) pynkf_getc(f) | |
29 | +#define ungetc(c,f) pynkf_ungetc(c,f) | |
30 | + | |
31 | +#undef putchar | |
32 | +#undef TRUE | |
33 | +#undef FALSE | |
34 | +#define putchar(c) pynkf_putchar(c) | |
35 | + | |
36 | +static int pynkf_ibufsize, pynkf_obufsize; | |
37 | +static unsigned char *pynkf_inbuf, *pynkf_outbuf; | |
38 | +static int pynkf_icount,pynkf_ocount; | |
39 | +static unsigned char *pynkf_iptr, *pynkf_optr; | |
40 | +static jmp_buf env; | |
41 | +static int pynkf_guess_flag; | |
42 | + | |
43 | +static int | |
44 | +pynkf_getc(FILE *f) | |
45 | +{ | |
46 | + unsigned char c; | |
47 | + if (pynkf_icount >= pynkf_ibufsize) return EOF; | |
48 | + c = *pynkf_iptr++; | |
49 | + pynkf_icount++; | |
50 | + return (int)c; | |
51 | +} | |
52 | + | |
53 | +static int | |
54 | +pynkf_ungetc(int c, FILE *f) | |
55 | +{ | |
56 | + if (pynkf_icount--){ | |
57 | + *(--pynkf_iptr) = c; | |
58 | + return c; | |
59 | + }else{ return EOF; } | |
60 | +} | |
61 | + | |
62 | +static void | |
63 | +pynkf_putchar(int c) | |
64 | +{ | |
65 | + size_t size; | |
66 | + unsigned char *p; | |
67 | + | |
68 | + if (pynkf_guess_flag) { | |
69 | + return; | |
70 | + } | |
71 | + | |
72 | + if (pynkf_ocount--){ | |
73 | + *pynkf_optr++ = c; | |
74 | + }else{ | |
75 | + size = pynkf_obufsize + pynkf_obufsize; | |
76 | + p = (unsigned char *)PyMem_Realloc(pynkf_outbuf, size + 1); | |
77 | + if (pynkf_outbuf == NULL){ longjmp(env, 1); } | |
78 | + pynkf_outbuf = p; | |
79 | + pynkf_optr = pynkf_outbuf + pynkf_obufsize; | |
80 | + pynkf_ocount = pynkf_obufsize; | |
81 | + pynkf_obufsize = size; | |
82 | + *pynkf_optr++ = c; | |
83 | + pynkf_ocount--; | |
84 | + } | |
85 | +} | |
86 | + | |
87 | +#define PERL_XS 1 | |
88 | +#include "../utf8tbl.c" | |
89 | +#include "../nkf.c" | |
90 | + | |
91 | +static PyObject * | |
92 | +pynkf_convert(unsigned char* str, int strlen, char* opts, int optslen) | |
93 | +{ | |
94 | + PyObject * res; | |
95 | + | |
96 | + pynkf_ibufsize = strlen + 1; | |
97 | + pynkf_obufsize = pynkf_ibufsize * 1.5 + 256; | |
98 | + pynkf_outbuf = (unsigned char *)PyMem_Malloc(pynkf_obufsize); | |
99 | + if (pynkf_outbuf == NULL){ | |
100 | + PyErr_NoMemory(); | |
101 | + return NULL; | |
102 | + } | |
103 | + pynkf_outbuf[0] = '\0'; | |
104 | + pynkf_ocount = pynkf_obufsize; | |
105 | + pynkf_optr = pynkf_outbuf; | |
106 | + pynkf_icount = 0; | |
107 | + pynkf_inbuf = str; | |
108 | + pynkf_iptr = pynkf_inbuf; | |
109 | + pynkf_guess_flag = 0; | |
110 | + | |
111 | + if (setjmp(env) == 0){ | |
112 | + | |
113 | + reinit(); | |
114 | + | |
115 | + options(opts); | |
116 | + | |
117 | + kanji_convert(NULL); | |
118 | + | |
119 | + }else{ | |
120 | + PyMem_Free(pynkf_outbuf); | |
121 | + PyErr_NoMemory(); | |
122 | + return NULL; | |
123 | + } | |
124 | + | |
125 | + *pynkf_optr = 0; | |
126 | + res = PyString_FromString(pynkf_outbuf); | |
127 | + PyMem_Free(pynkf_outbuf); | |
128 | + return res; | |
129 | +} | |
130 | + | |
131 | +static PyObject * | |
132 | +pynkf_convert_guess(unsigned char* str, int strlen) | |
133 | +{ | |
134 | + PyObject * res; | |
135 | + const char *codename; | |
136 | + | |
137 | + pynkf_ibufsize = strlen + 1; | |
138 | + pynkf_icount = 0; | |
139 | + pynkf_inbuf = str; | |
140 | + pynkf_iptr = pynkf_inbuf; | |
141 | + | |
142 | + pynkf_guess_flag = 1; | |
143 | + reinit(); | |
144 | + guess_f = 1; | |
145 | + | |
146 | + kanji_convert(NULL); | |
147 | + | |
148 | + codename = get_guessed_code(); | |
149 | + | |
150 | + res = PyString_FromString(codename); | |
151 | + return res; | |
152 | +} | |
153 | + | |
154 | +#ifndef EXTERN_NKF | |
155 | +static | |
156 | +#endif | |
157 | +PyObject *pynkf_nkf(PyObject *self, PyObject *args) | |
158 | +{ | |
159 | + unsigned char *str; | |
160 | + int strlen; | |
161 | + char *opts; | |
162 | + int optslen; | |
163 | + PyObject* res; | |
164 | + | |
165 | + if (!PyArg_ParseTuple(args, "s#s#", &opts, &optslen, &str, &strlen)) { | |
166 | + return NULL; | |
167 | + } | |
168 | + res = pynkf_convert(str, strlen, opts, optslen); | |
169 | + return res; | |
170 | +} | |
171 | + | |
172 | +#ifndef EXTERN_NKF | |
173 | +static | |
174 | +#endif | |
175 | +PyObject *pynkf_guess(PyObject *self, PyObject *args) | |
176 | +{ | |
177 | + unsigned char *str; | |
178 | + int strlen; | |
179 | + PyObject* res; | |
180 | + | |
181 | + if (!PyArg_ParseTuple(args, "s#", &str, &strlen)) { | |
182 | + return NULL; | |
183 | + } | |
184 | + res = pynkf_convert_guess(str, strlen); | |
185 | + return res; | |
186 | +} | |
187 | + | |
188 | +#ifndef EXTERN_NKF | |
189 | +static PyMethodDef | |
190 | +nkfmethods[] = { | |
191 | + {"nkf", pynkf_nkf, METH_VARARGS}, | |
192 | + {"guess", pynkf_guess, METH_VARARGS}, | |
193 | + {NULL, NULL} | |
194 | +}; | |
195 | + | |
196 | +/* Module initialization function */ | |
197 | +void | |
198 | +initnkf(void) | |
199 | +{ | |
200 | + Py_InitModule("nkf", nkfmethods); | |
201 | +} | |
202 | +#endif |
@@ -0,0 +1,44 @@ | ||
1 | +Python Interface to NKF2 (2009.06.02) | |
2 | + | |
3 | +1. How to Install | |
4 | + | |
5 | + 1) Download latest nkf-2.0.9+ from http://sourceforge.jp/projects/nkf/ | |
6 | + | |
7 | + 2) $ tar zxvf nkf-2.x.x.tar.gz | |
8 | + | |
9 | + 3) $ cd nkf-2.x.x | |
10 | + | |
11 | + 4) $ tar zxvf NKF_python.tgz | |
12 | + | |
13 | + 5) $ cd NKF.python | |
14 | + | |
15 | + 6) # python setup.py install | |
16 | + | |
17 | +2. Usage | |
18 | + | |
19 | + ## flag is same as the flags of nkf itself | |
20 | + import nkf | |
21 | + output = nkf.nkf(flag, input) | |
22 | + | |
23 | + ## For example, to convert from euc-jp to utf-8 | |
24 | + output = nkf.nkf('-Ew', 'some euc-jp string') | |
25 | + | |
26 | +3. guess() function, | |
27 | + which guesses input_string_code and returns one of next strings: | |
28 | + "BINARY" | |
29 | + "ASCII" | |
30 | + "Shift_JIS" | |
31 | + "CP932" | |
32 | + "EUC-JP" | |
33 | + "EUCJP-MS" | |
34 | + "CP51932" | |
35 | + "ISO-2022-JP" | |
36 | + "CP50221" | |
37 | + "CP50220" | |
38 | + "UTF-8" | |
39 | + | |
40 | + ## For example | |
41 | + import nkf | |
42 | + input_code = nkf.guess('some string') | |
43 | + | |
44 | +Matsumoto, Tadashi ma2@city.plala.jp |
@@ -0,0 +1,12 @@ | ||
1 | +#!/usr/bin/env python | |
2 | + | |
3 | +from distutils.core import setup, Extension | |
4 | + | |
5 | +setup (name = "nkf", | |
6 | + version="1.0", | |
7 | + description="Python Interface to NKF", | |
8 | + author="Matsumoto Tadashi", | |
9 | + author_email="ma2@city.plala.jp", | |
10 | + ext_modules = [ | |
11 | + Extension("nkf", ["NKF_python.c"], | |
12 | + extra_link_args = ['-s'])]) |
@@ -0,0 +1,210 @@ | ||
1 | +/* | |
2 | +Changes. | |
3 | +2009.6.2 Remove WISH_TRUE, use get_guessed_code() for nkf-2.0.9 | |
4 | + by SATOH Fumiyasu (fumiyas @ osstech co jp) | |
5 | +2008.7.17 Change the type of strlen from long to int, by SATOH Fumiyasu. | |
6 | +2007.2.1 Add guess() function. | |
7 | +2007.1.13 Remove pynkf_parseopts(), by SATOH Fumiyasu. | |
8 | +*/ | |
9 | +/** Python Interface to NKF | |
10 | +*************************************************************************** | |
11 | +** Copyright (c) 2005 Matsumoto, Tadashi <ma2@city.plala.jp> | |
12 | +** All Rights Reserved. | |
13 | +** | |
14 | +** Everyone is permitted to do anything on this program | |
15 | +** including copying, modifying, improving, | |
16 | +** as long as you don't try to pretend that you wrote it. | |
17 | +** i.e., the above copyright notice has to appear in all copies. | |
18 | +** Binary distribution requires original version messages. | |
19 | +** You don't have to ask before copying, redistribution or publishing. | |
20 | +** THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE. | |
21 | +***************************************************************************/ | |
22 | + | |
23 | +#include "Python.h" | |
24 | +#include <setjmp.h> | |
25 | + | |
26 | +#undef getc | |
27 | +#undef ungetc | |
28 | +#define getc(f) pynkf_getc(f) | |
29 | +#define ungetc(c,f) pynkf_ungetc(c,f) | |
30 | + | |
31 | +#undef putchar | |
32 | +#undef TRUE | |
33 | +#undef FALSE | |
34 | +#define putchar(c) pynkf_putchar(c) | |
35 | + | |
36 | +static int pynkf_ibufsize, pynkf_obufsize; | |
37 | +static unsigned char *pynkf_inbuf, *pynkf_outbuf; | |
38 | +static int pynkf_icount,pynkf_ocount; | |
39 | +static unsigned char *pynkf_iptr, *pynkf_optr; | |
40 | +static jmp_buf env; | |
41 | +static int pynkf_guess_flag; | |
42 | + | |
43 | +static int | |
44 | +pynkf_getc(FILE *f) | |
45 | +{ | |
46 | + unsigned char c; | |
47 | + if (pynkf_icount >= pynkf_ibufsize) return EOF; | |
48 | + c = *pynkf_iptr++; | |
49 | + pynkf_icount++; | |
50 | + return (int)c; | |
51 | +} | |
52 | + | |
53 | +static int | |
54 | +pynkf_ungetc(int c, FILE *f) | |
55 | +{ | |
56 | + if (pynkf_icount--){ | |
57 | + *(--pynkf_iptr) = c; | |
58 | + return c; | |
59 | + }else{ return EOF; } | |
60 | +} | |
61 | + | |
62 | +static void | |
63 | +pynkf_putchar(int c) | |
64 | +{ | |
65 | + size_t size; | |
66 | + unsigned char *p; | |
67 | + | |
68 | + if (pynkf_guess_flag) { | |
69 | + return; | |
70 | + } | |
71 | + | |
72 | + if (pynkf_ocount--){ | |
73 | + *pynkf_optr++ = c; | |
74 | + }else{ | |
75 | + size = pynkf_obufsize + pynkf_obufsize; | |
76 | + p = (unsigned char *)PyMem_Realloc(pynkf_outbuf, size + 1); | |
77 | + if (pynkf_outbuf == NULL){ longjmp(env, 1); } | |
78 | + pynkf_outbuf = p; | |
79 | + pynkf_optr = pynkf_outbuf + pynkf_obufsize; | |
80 | + pynkf_ocount = pynkf_obufsize; | |
81 | + pynkf_obufsize = size; | |
82 | + *pynkf_optr++ = c; | |
83 | + pynkf_ocount--; | |
84 | + } | |
85 | +} | |
86 | + | |
87 | +#define PERL_XS 1 | |
88 | +#include "../utf8tbl.c" | |
89 | +#include "../nkf.c" | |
90 | + | |
91 | +static PyObject * | |
92 | +pynkf_convert(unsigned char* str, int strlen, char* opts, int optslen) | |
93 | +{ | |
94 | + PyObject * res; | |
95 | + | |
96 | + pynkf_ibufsize = strlen + 1; | |
97 | + pynkf_obufsize = pynkf_ibufsize * 1.5 + 256; | |
98 | + pynkf_outbuf = (unsigned char *)PyMem_Malloc(pynkf_obufsize); | |
99 | + if (pynkf_outbuf == NULL){ | |
100 | + PyErr_NoMemory(); | |
101 | + return NULL; | |
102 | + } | |
103 | + pynkf_outbuf[0] = '\0'; | |
104 | + pynkf_ocount = pynkf_obufsize; | |
105 | + pynkf_optr = pynkf_outbuf; | |
106 | + pynkf_icount = 0; | |
107 | + pynkf_inbuf = str; | |
108 | + pynkf_iptr = pynkf_inbuf; | |
109 | + pynkf_guess_flag = 0; | |
110 | + | |
111 | + if (setjmp(env) == 0){ | |
112 | + | |
113 | + reinit(); | |
114 | + | |
115 | + options(opts); | |
116 | + | |
117 | + kanji_convert(NULL); | |
118 | + | |
119 | + }else{ | |
120 | + PyMem_Free(pynkf_outbuf); | |
121 | + PyErr_NoMemory(); | |
122 | + return NULL; | |
123 | + } | |
124 | + | |
125 | + *pynkf_optr = 0; | |
126 | + res = PyBytes_FromString(pynkf_outbuf); | |
127 | + PyMem_Free(pynkf_outbuf); | |
128 | + return res; | |
129 | +} | |
130 | + | |
131 | +static PyObject * | |
132 | +pynkf_convert_guess(unsigned char* str, int strlen) | |
133 | +{ | |
134 | + PyObject * res; | |
135 | + const char *codename; | |
136 | + | |
137 | + pynkf_ibufsize = strlen + 1; | |
138 | + pynkf_icount = 0; | |
139 | + pynkf_inbuf = str; | |
140 | + pynkf_iptr = pynkf_inbuf; | |
141 | + | |
142 | + pynkf_guess_flag = 1; | |
143 | + reinit(); | |
144 | + guess_f = 1; | |
145 | + | |
146 | + kanji_convert(NULL); | |
147 | + | |
148 | + codename = get_guessed_code(); | |
149 | + | |
150 | + res = PyUnicode_FromString(codename); | |
151 | + return res; | |
152 | +} | |
153 | + | |
154 | +#ifndef EXTERN_NKF | |
155 | +static | |
156 | +#endif | |
157 | +PyObject *pynkf_nkf(PyObject *self, PyObject *args) | |
158 | +{ | |
159 | + unsigned char *str; | |
160 | + int strlen; | |
161 | + char *opts; | |
162 | + int optslen; | |
163 | + PyObject* res; | |
164 | + | |
165 | + if (!PyArg_ParseTuple(args, "s#y#", &opts, &optslen, &str, &strlen)) { | |
166 | + return NULL; | |
167 | + } | |
168 | + res = pynkf_convert(str, strlen, opts, optslen); | |
169 | + return res; | |
170 | +} | |
171 | + | |
172 | +#ifndef EXTERN_NKF | |
173 | +static | |
174 | +#endif | |
175 | +PyObject *pynkf_guess(PyObject *self, PyObject *args) | |
176 | +{ | |
177 | + unsigned char *str; | |
178 | + int strlen; | |
179 | + PyObject* res; | |
180 | + | |
181 | + if (!PyArg_ParseTuple(args, "y#", &str, &strlen)) { | |
182 | + return NULL; | |
183 | + } | |
184 | + res = pynkf_convert_guess(str, strlen); | |
185 | + return res; | |
186 | +} | |
187 | + | |
188 | +#ifndef EXTERN_NKF | |
189 | +static PyMethodDef | |
190 | +nkfMethods[] = { | |
191 | + {"nkf", pynkf_nkf, METH_VARARGS, ""}, | |
192 | + {"guess", pynkf_guess, METH_VARARGS, ""}, | |
193 | + {NULL, NULL, 0, NULL} | |
194 | +}; | |
195 | + | |
196 | +static struct PyModuleDef nkfmodule = { | |
197 | + PyModuleDef_HEAD_INIT, | |
198 | + "nkf", | |
199 | + "", | |
200 | + -1, | |
201 | + nkfMethods | |
202 | +}; | |
203 | + | |
204 | +/* Module initialization function */ | |
205 | +PyMODINIT_FUNC | |
206 | +PyInit_nkf(void) | |
207 | +{ | |
208 | + return PyModule_Create(&nkfmodule); | |
209 | +} | |
210 | +#endif |
@@ -0,0 +1,50 @@ | ||
1 | +===================================== | |
2 | +Python Interface to NKF (2009.07.08) | |
3 | +===================================== | |
4 | +Required: Python3.1+, nkf-2.0.9+ | |
5 | + | |
6 | +1. How to Install | |
7 | + | |
8 | + 1) Download latest nkf-2.0.9+ from http://sourceforge.jp/projects/nkf/ | |
9 | + | |
10 | + 2) $ tar zxvf nkf-2.x.x.tar.gz | |
11 | + | |
12 | + 3) $ cd nkf-2.x.x | |
13 | + | |
14 | + 4) $ tar zxvf NKF_python.tgz | |
15 | + | |
16 | + 5) $ cd NKF.python | |
17 | + | |
18 | + 6) # python setup.py install | |
19 | + | |
20 | +2. Usage | |
21 | + | |
22 | + ## flag is same as the flags of nkf itself | |
23 | + ## The type of input/output is bytes | |
24 | + ## The type of flag is str or bytes | |
25 | + import nkf | |
26 | + output = nkf.nkf(flag, input) | |
27 | + | |
28 | + ## For example, to convert from euc-jp to utf-8 | |
29 | + output = nkf.nkf('-Ew', b'some euc-jp bytes') | |
30 | + | |
31 | +3. guess() function, | |
32 | + which guesses input_string_code and returns one of next strings: | |
33 | + "BINARY" | |
34 | + "ASCII" | |
35 | + "Shift_JIS" | |
36 | + "CP932" | |
37 | + "EUC-JP" | |
38 | + "EUCJP-MS" | |
39 | + "CP51932" | |
40 | + "ISO-2022-JP" | |
41 | + "CP50221" | |
42 | + "CP50220" | |
43 | + "UTF-8" | |
44 | + | |
45 | + ## For example | |
46 | + ## The type of return value is str | |
47 | + import nkf | |
48 | + input_code = nkf.guess(b'some bytes') | |
49 | + | |
50 | +Matsumoto, Tadashi ma2@city.plala.jp |
@@ -0,0 +1,12 @@ | ||
1 | +#!/usr/bin/env python | |
2 | + | |
3 | +from distutils.core import setup, Extension | |
4 | + | |
5 | +setup (name = "nkf", | |
6 | + version="1.0", | |
7 | + description="Python Interface to NKF", | |
8 | + author="Matsumoto Tadashi", | |
9 | + author_email="ma2@city.plala.jp", | |
10 | + ext_modules = [ | |
11 | + Extension("nkf", ["NKF_python.c"], | |
12 | + extra_link_args = ['-s'])]) |