NIIBE Yutaka
gniib****@fsij*****
2010年 7月 2日 (金) 13:06:29 JST
ptab.h のAPIを使わない libanthy の方向でひとつ進めます。 思いのほか依存は無く、下記の変更で無くなります。動作に影響は有りません。 あと、mkdepgraph.c が使っています。品詞の名前で depgraph を作っていて、 仮名漢字変換の際に、実際の単語の「品詞にマッチ」するルールを適用すると いう動きです。DFA の方向にすれば、必然的にここで ptab.h は使わなくなる と思います。 janitor/no-ptab-use branch に入れます。 2010-07-02 NIIBE Yutaka <gniib****@fsij*****> * src-worddic/ext_ent.c (wt_num): Removed. (anthy_init_ext_ent): Remove initialization of wt_num. (anthy_get_nth_dic_ent_wtype_of_ext_ent) (anthy_get_ext_seq_ent_wtype): Use anthy_wtype_num_noun instead of wt_num. * src-main/main.c (anthy_init): Call anthy_init_wordlist. * src-splitter/wordborder.h: Removed variable declarations of anthy_wtype_*. * src-splitter/wordlist.c (anthy_wtype_noun) (anthy_wtype_name_noun, anthy_wtype_prefix) (anthy_wtype_num_prefix, anthy_wtype_num_postfix) (anthy_wtype_name_postfix, anthy_wtype_sv_postfix): Moved from splitter.c, and make them static. (anthy_init_wordlist): New function. Moved calls of anthy_init_wtype_by_name from splitter.c, and change the calls to anthy_type_to_wtype. * src-worddic/wtype.c (anthy_wtype_a_tail_of_v_renyou) (anthy_wtype_num_noun): Moved from splitter.c. (anthy_init_wtypes): Initialize anthy_wtype_a_tail_of_v_renyou and anthy_wtype_num_noun by anthy_type_to_wtype. * anthy/wtype.h (anthy_init_wordlist, anthy_wtype_num_noun) (anthy_wtype_a_tail_of_v_renyou): Added declarations. * src-splitter/splitter.c (anthy_wtype_noun_tail) (anthy_wtype_v_renyou, anthy_wtype_n1, anthy_wtype_n10): Removed unused variables. diff --git a/anthy/wtype.h b/anthy/wtype.h index 165a1c0..a34feab 100644 --- a/anthy/wtype.h +++ b/anthy/wtype.h @@ -300,4 +300,9 @@ void anthy_init_wtypes(void); extern wtype_t anthy_wt_all;/* すべてにマッチする自立語 */ extern wtype_t anthy_wt_none;/* 品詞無しPOS_INVAL */ +extern wtype_t anthy_wtype_num_noun; +extern wtype_t anthy_wtype_a_tail_of_v_renyou; + +/* In src-splitter/wordlist.c */ +int anthy_init_wordlist (void); #endif diff --git a/src-main/main.c b/src-main/main.c index 024cb6e..23ffcf6 100644 --- a/src-main/main.c +++ b/src-main/main.c @@ -86,6 +86,7 @@ anthy_init(void) anthy_log(0, "Failed to init splitter.\n"); return -1; } + anthy_init_wordlist(); anthy_init_contexts(); anthy_init_personality(); anthy_infosort_init(); diff --git a/src-splitter/splitter.c b/src-splitter/splitter.c index a91f11c..f011162 100644 --- a/src-splitter/splitter.c +++ b/src-splitter/splitter.c @@ -43,22 +43,6 @@ static int splitter_debug_flags; -/**/ -wtype_t anthy_wtype_noun; -wtype_t anthy_wtype_name_noun; -wtype_t anthy_wtype_num_noun; -wtype_t anthy_wtype_prefix; -wtype_t anthy_wtype_num_prefix; -wtype_t anthy_wtype_num_postfix; -wtype_t anthy_wtype_name_postfix; -wtype_t anthy_wtype_sv_postfix; -wtype_t anthy_wtype_a_tail_of_v_renyou; -wtype_t anthy_wtype_v_renyou; -wtype_t anthy_wtype_noun_tail;/* いれ「たて」とか */ -wtype_t anthy_wtype_n1; -wtype_t anthy_wtype_n10; - - /** make_word_cacheで作成した文節情報を解放する */ static void @@ -304,20 +288,6 @@ anthy_init_splitter(void) anthy_log(0, "Failed to init dependent word table.\n"); return -1; } - /**/ - anthy_wtype_noun = anthy_init_wtype_by_name("名詞35"); - anthy_wtype_name_noun = anthy_init_wtype_by_name("人名"); - anthy_wtype_num_noun = anthy_init_wtype_by_name("数詞"); - anthy_wtype_a_tail_of_v_renyou = anthy_init_wtype_by_name("形容詞化接尾語"); - anthy_wtype_v_renyou = anthy_init_wtype_by_name("動詞連用形"); - anthy_wtype_noun_tail = anthy_init_wtype_by_name("名詞化接尾語"); - anthy_wtype_prefix = anthy_init_wtype_by_name("名詞接頭辞"); - anthy_wtype_num_prefix = anthy_init_wtype_by_name("数接頭辞"); - anthy_wtype_num_postfix = anthy_init_wtype_by_name("数接尾辞"); - anthy_wtype_name_postfix = anthy_init_wtype_by_name("人名接尾辞"); - anthy_wtype_sv_postfix = anthy_init_wtype_by_name("サ変接尾辞"); - anthy_wtype_n1 = anthy_init_wtype_by_name("数詞1"); - anthy_wtype_n10 = anthy_init_wtype_by_name("数詞10"); return 0; } diff --git a/src-splitter/wordborder.h b/src-splitter/wordborder.h index 1b93727..569976d 100644 --- a/src-splitter/wordborder.h +++ b/src-splitter/wordborder.h @@ -192,19 +192,4 @@ void anthy_mark_borders(struct splitter_context *sc, int from, int to); /* defined at seg_class.c */ void anthy_set_seg_class(struct word_list* wl); -/* 品詞(anthy_init_splitterで初期化される) */ -extern wtype_t anthy_wtype_noun; -extern wtype_t anthy_wtype_name_noun; -extern wtype_t anthy_wtype_num_noun; -extern wtype_t anthy_wtype_prefix; -extern wtype_t anthy_wtype_num_prefix; -extern wtype_t anthy_wtype_num_postfix; -extern wtype_t anthy_wtype_name_postfix; -extern wtype_t anthy_wtype_sv_postfix; -extern wtype_t anthy_wtype_a_tail_of_v_renyou; -extern wtype_t anthy_wtype_v_renyou; -extern wtype_t anthy_wtype_noun_tail;/* いれ「たて」とか */ -extern wtype_t anthy_wtype_n1; -extern wtype_t anthy_wtype_n10; - #endif diff --git a/src-splitter/wordlist.c b/src-splitter/wordlist.c index 58c60bf..2377bd9 100644 --- a/src-splitter/wordlist.c +++ b/src-splitter/wordlist.c @@ -33,6 +33,16 @@ static void *weak_word_array; +static wtype_t anthy_wtype_noun; +static wtype_t anthy_wtype_name_noun; +#if 0 +static wtype_t anthy_wtype_prefix; +#endif +static wtype_t anthy_wtype_num_prefix; +static wtype_t anthy_wtype_num_postfix; +static wtype_t anthy_wtype_name_postfix; +static wtype_t anthy_wtype_sv_postfix; + /* デバッグ用 */ void anthy_print_word_list(struct splitter_context *sc, @@ -322,11 +332,14 @@ make_pre_words(struct splitter_context *sc, make_following_word_list(sc, &new_tmpl); /* 数の場合は接尾辞もくっつける */ make_suc_words(sc, &new_tmpl); - }/* else if (anthy_get_seq_ent_wtype_freq(pre, anthy_wtype_prefix)) { + } +#if 0 + else if (anthy_get_seq_ent_wtype_freq(pre, anthy_wtype_prefix)) { new_tmpl = *tmpl; push_part_front(&new_tmpl, i, pre, anthy_wtype_prefix); make_following_word_list(sc, &new_tmpl); - }*/ + } +#endif } } } @@ -580,3 +593,34 @@ anthy_make_word_list_all(struct splitter_context *sc) anthy_free_allocator(de_ator); } + +int +anthy_init_wordlist (void) +{ + /* {"名詞35",POS_NOUN,COS_NONE,SCOS_T35,CC_NONE,CT_NONE,WF_INDEP} */ + anthy_type_to_wtype ("#T", &anthy_wtype_noun); + + /* {"人名",POS_NOUN,COS_JN,SCOS_NONE,CC_NONE,CT_NONE,WF_INDEP} */ + anthy_type_to_wtype ("#JN", &anthy_wtype_name_noun); +#if 0 + /* {"名詞接頭辞",POS_PRE,COS_NONE,SCOS_NONE,CC_NONE,CT_NONE,WF_INDEP} */ + anthy_type_to_wtype ("#PRE", &anthy_wtype_prefix); +#endif + /* {"数接頭辞",POS_PRE,COS_NN,SCOS_NONE,CC_NONE,CT_NONE,WF_NONE} */ + anthy_type_to_wtype ("#NNPRE", &anthy_wtype_num_prefix); + + /* {"数接尾辞",POS_SUC,COS_NN,SCOS_NONE,CC_NONE,CT_NONE,WF_NONE} */ + /* {"#JS",POS_SUC,COS_NN,SCOS_NONE,CC_NONE,CT_NONE,WF_INDEP} # "助数詞" */ + anthy_type_to_wtype ("#JS", &anthy_wtype_num_postfix); + + /* {"人名接尾辞",POS_SUC,COS_JN,SCOS_NONE,CC_NONE,CT_NONE,WF_INDEP} */ + anthy_type_to_wtype ("JNSUC", &anthy_wtype_name_postfix); + + /* {"サ変接尾辞",POS_SUC,COS_SVSUFFIX,SCOS_NONE,CC_NONE,CT_NONE,WF_INDEP} */ + anthy_type_to_wtype ("#SVSUC", &anthy_wtype_sv_postfix); + + /* {"数詞",POS_NUMBER,COS_NN,SCOS_NONE,CC_NONE,CT_NONE,WF_INDEP} */ + anthy_type_to_wtype ("#NN", &anthy_wtype_num_noun); /* exported for ext_ent.c */ + + return 0; +} diff --git a/src-worddic/ext_ent.c b/src-worddic/ext_ent.c index e3de315..e913f10 100644 --- a/src-worddic/ext_ent.c +++ b/src-worddic/ext_ent.c @@ -36,8 +36,6 @@ static struct seq_ent unkseq_ent;/*未知文字列たとえば英文字列とか*/ static struct seq_ent num_ent;/*数字など*/ static struct seq_ent sep_ent;/*セパレータなど。*/ -/* ext entryのwtype*/ -static wtype_t wt_num; static xchar narrow_wide_tab[]= {WIDE_0, WIDE_1, WIDE_2, WIDE_3, WIDE_4, WIDE_5, @@ -520,7 +518,7 @@ anthy_get_nth_dic_ent_wtype_of_ext_ent(xstr *xs, int nth, (void)nth; type = anthy_get_xstr_type(xs); if (type & (XCT_NUM | XCT_WIDENUM)) { - *wt = wt_num; + *wt = anthy_wtype_num_noun; return 0; } if (type & XCT_KATA) { @@ -543,7 +541,7 @@ int anthy_get_ext_seq_ent_wtype(struct seq_ent *se, wtype_t w) { if (se == &num_ent) { - if (anthy_wtype_include(w, wt_num)) { + if (anthy_wtype_include(w, anthy_wtype_num_noun)) { /* 数字の場合 */ return 10; } @@ -568,6 +566,4 @@ anthy_init_ext_ent(void) num_ent.nr_dic_ents = 0; sep_ent.seq_type = 0; sep_ent.nr_dic_ents = 0; - /**/ - wt_num = anthy_init_wtype_by_name("数詞"); } diff --git a/src-worddic/wtype.c b/src-worddic/wtype.c index df29de3..974dd07 100644 --- a/src-worddic/wtype.c +++ b/src-worddic/wtype.c @@ -27,6 +27,9 @@ wtype_t anthy_wt_none, anthy_wt_all; +wtype_t anthy_wtype_num_noun; +wtype_t anthy_wtype_a_tail_of_v_renyou; + struct wttable { const char *name; int pos; @@ -71,6 +74,13 @@ anthy_init_wtypes(void) anthy_wt_none = anthy_wt_all; anthy_wt_none.pos = POS_INVAL; + + /* {"数詞",POS_NUMBER,COS_NN,SCOS_NONE,CC_NONE,CT_NONE,WF_INDEP} */ + anthy_type_to_wtype ("#NN", &anthy_wtype_num_noun); /* exported for ext_ent.c */ + + /* {"形容詞化接尾語",POS_D2KY,COS_NONE,SCOS_A1,CC_NONE,CT_HEAD,WF_INDEP} */ + /* {"#D2KY",POS_D2KY,COS_SUFFIX,SCOS_A1,CC_A_KU,CT_HEAD,WF_INDEP} # "形容詞化接尾語(しづらい,がたい)" */ + anthy_type_to_wtype ("D2KY", &anthy_wtype_a_tail_of_v_renyou); /* exported for metaword.c */ } /* --