修訂 | ad2f87d3004033a07eb3334a71346bfb73faa4cf (tree) |
---|---|
時間 | 2015-03-12 23:00:08 |
作者 | ![]() |
Commiter | Takuya Nishimoto |
ti34973 fixed
@@ -307,7 +307,42 @@ jdic = [ | ||
307 | 307 | 'speech':'トーキング', |
308 | 308 | 'accent':'1/5', |
309 | 309 | }, |
310 | - | |
310 | + {'text':'(日)', | |
311 | + 'speech':'(ニチ)', | |
312 | + 'braille':'(ニチ)', | |
313 | + 'accent':"1/2", | |
314 | + 'cost':1345, | |
315 | + 'pos':"名詞,一般,*,*,*,*" | |
316 | + }, | |
317 | + {'text':'(月)', | |
318 | + 'speech':'(ゲツ)', | |
319 | + 'braille':'(ゲツ)', | |
320 | + 'accent':"1/2", | |
321 | + 'cost':1345, | |
322 | + 'pos':"名詞,一般,*,*,*,*" | |
323 | + }, | |
324 | + {'text':'(火)', | |
325 | + 'speech':'(カ)', | |
326 | + 'braille':'(カ)', | |
327 | + 'accent':"1/1", | |
328 | + 'cost':1345, | |
329 | + 'pos':"名詞,一般,*,*,*,*" | |
330 | + }, | |
331 | + {'text':'(水)', | |
332 | + 'speech':'(スイ)', | |
333 | + 'braille':'(スイ)', | |
334 | + 'accent':"1/2", | |
335 | + 'cost':1345, | |
336 | + 'pos':"名詞,一般,*,*,*,*" | |
337 | + }, | |
338 | + {'text':'(木)', | |
339 | + 'speech':'(モク)', | |
340 | + 'braille':'(モク)', | |
341 | + 'accent':"1/2", | |
342 | + 'cost':1345, | |
343 | + 'pos':"名詞,一般,*,*,*,*" | |
344 | + }, | |
345 | + | |
311 | 346 | # ['きゃ', 'キャ', "1/1", 15000, "記号,一般,*,*,*,*"], |
312 | 347 | # ['きゅ', 'キュ', "1/1", 15000, "記号,一般,*,*,*,*"], |
313 | 348 | # ['きょ', 'キョ', "1/1", 15000, "記号,一般,*,*,*,*"], |
@@ -178,9 +178,43 @@ tests = [ | ||
178 | 178 | 'text': '(日)', |
179 | 179 | 'input': '(ニチ)', |
180 | 180 | 'output': '⠶⠇⠗⠶', |
181 | - 'inpos2': [0,1,2], | |
181 | + 'inpos2': [0,1,1,2], | |
182 | 182 | 'inpos1': [0,1,2,3], |
183 | - 'inpos': [0,1,1,1,2], | |
183 | + 'inpos': [0,1,1,2], | |
184 | + 'comment': 'nvdajp ticket34973', | |
185 | + }, | |
186 | + { | |
187 | + 'text': '(月)', | |
188 | + 'input': '(ゲツ)', | |
189 | + 'comment': 'nvdajp ticket34973', | |
190 | + }, | |
191 | + { | |
192 | + 'text': '(火)', | |
193 | + 'input': '(カ)', | |
194 | + 'output': '⠶⠡⠶', | |
195 | + 'inpos2': [0,1,2], | |
196 | + 'inpos1': [0,1,2], | |
197 | + 'inpos': [0,1,2], | |
198 | + 'comment': 'nvdajp ticket34973', | |
199 | + }, | |
200 | + { | |
201 | + 'text': '(水)', | |
202 | + 'input': '(スイ)', | |
203 | + 'comment': 'nvdajp ticket34973', | |
204 | + }, | |
205 | + { | |
206 | + 'text': '(木)', | |
207 | + 'input': '(モク)', | |
208 | + 'comment': 'nvdajp ticket34973', | |
209 | + }, | |
210 | + { | |
211 | + 'text': '(金)', | |
212 | + 'input': '(キン)', | |
213 | + 'comment': 'nvdajp ticket34973', | |
214 | + }, | |
215 | + { | |
216 | + 'text': '(土)', | |
217 | + 'input': '(ド)', | |
184 | 218 | 'comment': 'nvdajp ticket34973', |
185 | 219 | }, |
186 | 220 | { |
@@ -79,6 +79,31 @@ CONNECTED_MORPHS = { | ||
79 | 79 | ['なく', 'ナク', '2/2', None, None, None], |
80 | 80 | ['なる', 'ナル', '1/2', '動詞', '自立', None], |
81 | 81 | ], |
82 | + '(日)': [ | |
83 | + ['(', '(', '*/*', '記号', '括弧開', '*'], | |
84 | + ['日', 'ニチ', '1/2', '名詞', '一般', None], | |
85 | + [')', ')', '*/*', '記号', '括弧閉', '*'], | |
86 | + ], | |
87 | + '(月)': [ | |
88 | + ['(', '(', '*/*', '記号', '括弧開', '*'], | |
89 | + ['月', 'ゲツ', '1/2', '名詞', '一般', None], | |
90 | + [')', ')', '*/*', '記号', '括弧閉', '*'], | |
91 | + ], | |
92 | + '(火)': [ | |
93 | + ['(', '(', '*/*', '記号', '括弧開', '*'], | |
94 | + ['火', 'カ', '1/1', '名詞', '一般', None], | |
95 | + [')', ')', '*/*', '記号', '括弧閉', '*'], | |
96 | + ], | |
97 | + '(水)': [ | |
98 | + ['(', '(', '*/*', '記号', '括弧開', '*'], | |
99 | + ['水', 'スイ', '1/2', '名詞', '一般', None], | |
100 | + [')', ')', '*/*', '記号', '括弧閉', '*'], | |
101 | + ], | |
102 | + '(木)': [ | |
103 | + ['(', '(', '*/*', '記号', '括弧開', '*'], | |
104 | + ['木', 'モク', '1/2', '名詞', '一般', None], | |
105 | + [')', ')', '*/*', '記号', '括弧閉', '*'], | |
106 | + ], | |
82 | 107 | } |
83 | 108 | |
84 | 109 | class MecabMorph(object): |
@@ -166,12 +191,12 @@ def mecab_to_morphs(mf): | ||
166 | 191 | if len(ar) > 7: |
167 | 192 | mo.kihon = ar[7] |
168 | 193 | if len(ar) > 9: |
169 | - mo.kana = ar[8] | |
170 | - mo.yomi = ar[9] | |
194 | + mo.kana = unicode_normalize(ar[8]) # "(ニチ)" -> "(ニチ)" | |
195 | + mo.yomi = unicode_normalize(ar[9]) | |
171 | 196 | mo.accent = ar[10] |
172 | 197 | if len(ar) > 12: |
173 | 198 | # Mecab辞書の拡張フィールドの点訳表記があれば使用する |
174 | - mo.output = ar[12] | |
199 | + mo.output = unicode_normalize(ar[12]) | |
175 | 200 | else: |
176 | 201 | mo.output = ar[9] |
177 | 202 | update_phonetic_symbols(mo) |
@@ -186,11 +211,13 @@ def replace_morphs(li, dic): | ||
186 | 211 | new_morphs = dic[mo.hyouki] |
187 | 212 | for i in new_morphs: |
188 | 213 | m = copy.deepcopy(mo) |
189 | - m.hyouki = m.nhyouki = i[0] # に | |
214 | + m.hyouki = i[0] # に | |
215 | + m.nhyouki = unicode_normalize(i[0]) # に | |
190 | 216 | if i[3]: m.hinshi1 = i[3] |
191 | 217 | if i[4]: m.hinshi2 = i[4] |
192 | 218 | if i[5]: m.hinshi3 = i[5] |
193 | - m.output = m.kana = m.yomi = i[1] # ニ | |
219 | + m.kana = i[0] | |
220 | + m.output = m.yomi = unicode_normalize(i[1]) # ニ | |
194 | 221 | m.accent = i[2] # 0/1 |
195 | 222 | new_li.append(m) |
196 | 223 | else: |