sumom****@users*****
sumom****@users*****
2008年 12月 12日 (金) 16:14:58 JST
Index: julius4/libjulius/src/beam.c diff -u julius4/libjulius/src/beam.c:1.10 julius4/libjulius/src/beam.c:1.11 --- julius4/libjulius/src/beam.c:1.10 Fri Nov 14 13:12:12 2008 +++ julius4/libjulius/src/beam.c Fri Dec 12 16:14:57 2008 @@ -42,7 +42,7 @@ * @author Akinobu LEE * @date Tue Feb 22 17:00:45 2005 * - * $Revision: 1.10 $ + * $Revision: 1.11 $ * */ /* @@ -583,6 +583,7 @@ LOGPROB maxscore; int i; TRELLIS_ATOM **idx; + int num; if (r->lmvar != LM_DFA_WORD) return; @@ -629,13 +630,14 @@ /* more than one candidate is requested */ /* get actual number of candidates to output */ - r->result.sentnum = r->config->output.output_hypo_maxnum; - if (r->result.sentnum > bt->num[last_time]) { - r->result.sentnum = bt->num[last_time]; + num = r->config->output.output_hypo_maxnum; + if (num > bt->num[last_time]) { + num = bt->num[last_time]; } /* prepare result storage */ - r->result.sent = (Sentence *)mymalloc(sizeof(Sentence)* r->result.sentnum); + result_sentence_malloc(r, num); + r->result.sentnum = num; /* sort by score */ idx = (TRELLIS_ATOM **)mymalloc(sizeof(TRELLIS_ATOM *)*bt->num[last_time]); @@ -662,7 +664,6 @@ } else { s->gram_id = 0; } - s->align.filled = FALSE; } /* free work area for sort */ free(idx); @@ -670,7 +671,7 @@ } else { /* only max is needed */ /* prepare result storage */ - r->result.sent = (Sentence *)mymalloc(sizeof(Sentence)); + result_sentence_malloc(r, 1); r->result.sentnum = 1; s = &(r->result.sent[0]); s->word_num = 1; @@ -686,11 +687,11 @@ } else { s->gram_id = 0; } - s->align.filled = FALSE; } /* copy as 1st pass result */ memcpy(&(r->result.pass1), &(r->result.sent[0]), sizeof(Sentence)); + r->result.pass1.align = NULL; //callback_exec(CALLBACK_RESULT, r); //free(r->result.sent); Index: julius4/libjulius/src/jfunc.c diff -u julius4/libjulius/src/jfunc.c:1.4 julius4/libjulius/src/jfunc.c:1.5 --- julius4/libjulius/src/jfunc.c:1.4 Thu Sep 25 14:00:06 2008 +++ julius4/libjulius/src/jfunc.c Fri Dec 12 16:14:57 2008 @@ -19,7 +19,7 @@ * @author Akinobu Lee * @date Wed Aug 8 15:04:28 2007 * - * $Revision: 1.4 $ + * $Revision: 1.5 $ * */ /* @@ -1484,8 +1484,10 @@ LOGPROB max_score; PROCESS_AM *am; MFCCCalc *mfcc; + SentenceAlign *align; s = &(r->result.sent[0]); + align = result_align_new(); max_score = LOG_ZERO; @@ -1507,10 +1509,10 @@ return; } outprob_prepare(&(r->am->hmmwrk), mfcc->param->samplenum); - word_align(s->word, s->word_num, mfcc->param, s, r); - printf("%f: %f\n", alpha, s->align.allscore); - if (max_score < s->align.allscore) { - max_score = s->align.allscore; + word_align(s->word, s->word_num, mfcc->param, align, r); + printf("%f: %f\n", alpha, align->allscore); + if (max_score < align->allscore) { + max_score = align->allscore; max_alpha = alpha; } } @@ -1523,6 +1525,8 @@ printf("------------ end VTLN -------------\n"); + result_align_free(align); + } #endif Index: julius4/libjulius/src/recogmain.c diff -u julius4/libjulius/src/recogmain.c:1.9 julius4/libjulius/src/recogmain.c:1.10 --- julius4/libjulius/src/recogmain.c:1.9 Tue Nov 18 17:46:59 2008 +++ julius4/libjulius/src/recogmain.c Fri Dec 12 16:14:57 2008 @@ -12,7 +12,7 @@ * @author Akinobu Lee * @date Wed Aug 8 14:53:53 2007 * - * $Revision: 1.9 $ + * $Revision: 1.10 $ * */ @@ -192,6 +192,67 @@ /** * <EN> + * allocate storage of recognition alignment results. + * + * @return the new pointer + * </EN> + * <JA> + * アラインメント結果の格納場所を確保 + * + * @return 確保された領域へのポインタ + * </JA> + * + * @callgraph + * @callergraph + * + */ +SentenceAlign * +result_align_new() +{ + SentenceAlign *new; + new = (SentenceAlign *)mymalloc(sizeof(SentenceAlign)); + new->w = NULL; + new->ph = NULL; + new->loc = NULL; + new->begin_frame = NULL; + new->end_frame = NULL; + new->avgscore = NULL; + new->is_iwsp = NULL; + new->next = NULL; + return new; +} + +/** + * <EN> + * free storage of recognition alignment results. + * + * @param a [i/o] alignment data to be released + * </EN> + * <JA> + * アラインメント結果の格納場所を確保 + * + * @param a [i/o] 解放されるアラインメントデータ + * </JA> + * + * @callgraph + * @callergraph + * + */ +void +result_align_free(SentenceAlign *a) +{ + if (a->w) free(a->w); + if (a->ph) free(a->ph); + if (a->loc) free(a->loc); + if (a->begin_frame) free(a->begin_frame); + if (a->end_frame) free(a->end_frame); + if (a->avgscore) free(a->avgscore); + if (a->is_iwsp) free(a->is_iwsp); + free(a); +} + +/** + * <EN> * Allocate storage of recognition results. * </EN> * <JA> @@ -210,16 +271,7 @@ { int i; r->result.sent = (Sentence *)mymalloc(sizeof(Sentence) * num); - for(i=0;i<num;i++) { - r->result.sent[i].align.filled = FALSE; - r->result.sent[i].align.w = NULL; - r->result.sent[i].align.ph = NULL; - r->result.sent[i].align.loc = NULL; - r->result.sent[i].align.begin_frame = NULL; - r->result.sent[i].align.end_frame = NULL; - r->result.sent[i].align.avgscore = NULL; - r->result.sent[i].align.is_iwsp = NULL; - } + for(i=0;i<num;i++) r->result.sent[i].align = NULL; r->result.sentnum = 0; } @@ -240,15 +292,15 @@ result_sentence_free(RecogProcess *r) { int i; + SentenceAlign *a, *atmp; if (r->result.sent) { for(i=0;i<r->result.sentnum;i++) { - if (r->result.sent[i].align.w) free(r->result.sent[i].align.w); - if (r->result.sent[i].align.ph) free(r->result.sent[i].align.ph); - if (r->result.sent[i].align.loc) free(r->result.sent[i].align.loc); - if (r->result.sent[i].align.begin_frame) free(r->result.sent[i].align.begin_frame); - if (r->result.sent[i].align.end_frame) free(r->result.sent[i].align.end_frame); - if (r->result.sent[i].align.avgscore) free(r->result.sent[i].align.avgscore); - if (r->result.sent[i].align.is_iwsp) free(r->result.sent[i].align.is_iwsp); + a = r->result.sent[i].align; + while(a) { + atmp = a->next; + result_align_free(a); + a = atmp; + } } free(r->result.sent); r->result.sent = NULL; Index: julius4/libjulius/src/word_align.c diff -u julius4/libjulius/src/word_align.c:1.4 julius4/libjulius/src/word_align.c:1.5 --- julius4/libjulius/src/word_align.c:1.4 Sun Nov 16 21:28:04 2008 +++ julius4/libjulius/src/word_align.c Fri Dec 12 16:14:57 2008 @@ -33,7 +33,7 @@ * @author Akinobu Lee * @date Sat Sep 24 16:09:46 2005 * - * $Revision: 1.4 $ + * $Revision: 1.5 $ * */ /* @@ -170,7 +170,7 @@ * @param wnum [in] @a words の長さ * @param param [in] 入力特徴パラメータ列 * @param per_what [in] 単語・音素・状態のどの単位でアラインメントを取るかを指定 - * @param s [out] アラインメント結果を格納するSentence構造体 + * @param align [out] アラインメント結果を格納するSentence構造体 * @param r [i/o] 認識処理インスタンス * </JA> * <EN> @@ -185,7 +185,7 @@ * </EN> */ static void -do_align(WORD_ID *words, short wnum, HTK_Param *param, int per_what, Sentence *s, RecogProcess *r) +do_align(WORD_ID *words, short wnum, HTK_Param *param, int per_what, SentenceAlign *align, RecogProcess *r) { HMM_Logical **phones; /* phoneme sequence */ boolean *has_sp; /* whether phone can follow short pause */ @@ -277,53 +277,51 @@ allscore = viterbi_segment(shmm, param, r->wchmm->hmmwrk, hmminfo->multipath, end_state, end_num, &id_seq, &end_frame, &end_score, &rlen); /* store result to s */ - s->align.num = rlen; - s->align.unittype = per_what; - s->align.begin_frame = (int *)mymalloc(sizeof(int) * rlen); - s->align.end_frame = (int *)mymalloc(sizeof(int) * rlen); - s->align.avgscore = (LOGPROB *)mymalloc(sizeof(LOGPROB) * rlen); + align->num = rlen; + align->unittype = per_what; + align->begin_frame = (int *)mymalloc(sizeof(int) * rlen); + align->end_frame = (int *)mymalloc(sizeof(int) * rlen); + align->avgscore = (LOGPROB *)mymalloc(sizeof(LOGPROB) * rlen); for(i=0;i<rlen;i++) { - s->align.begin_frame[i] = (i == 0) ? 0 : end_frame[i-1] + 1; - s->align.end_frame[i] = end_frame[i]; - s->align.avgscore[i] = end_score[i]; + align->begin_frame[i] = (i == 0) ? 0 : end_frame[i-1] + 1; + align->end_frame[i] = end_frame[i]; + align->avgscore[i] = end_score[i]; } switch(per_what) { case PER_WORD: - s->align.w = (WORD_ID *)mymalloc(sizeof(WORD_ID) * rlen); + align->w = (WORD_ID *)mymalloc(sizeof(WORD_ID) * rlen); for(i=0;i<rlen;i++) { - s->align.w[i] = words[id_seq[i]]; + align->w[i] = words[id_seq[i]]; } break; case PER_PHONEME: - s->align.ph = (HMM_Logical **)mymalloc(sizeof(HMM_Logical *) * rlen); + align->ph = (HMM_Logical **)mymalloc(sizeof(HMM_Logical *) * rlen); for(i=0;i<rlen;i++) { - s->align.ph[i] = phones[id_seq[i]]; + align->ph[i] = phones[id_seq[i]]; } break; case PER_STATE: - s->align.ph = (HMM_Logical **)mymalloc(sizeof(HMM_Logical *) * rlen); - s->align.loc = (short *)mymalloc(sizeof(short) * rlen); - if (hmminfo->multipath) s->align.is_iwsp = (boolean *)mymalloc(sizeof(boolean) * rlen); + align->ph = (HMM_Logical **)mymalloc(sizeof(HMM_Logical *) * rlen); + align->loc = (short *)mymalloc(sizeof(short) * rlen); + if (hmminfo->multipath) align->is_iwsp = (boolean *)mymalloc(sizeof(boolean) * rlen); for(i=0;i<rlen;i++) { - s->align.ph[i] = phones[phloc[id_seq[i]]]; + align->ph[i] = phones[phloc[id_seq[i]]]; if (hmminfo->multipath) { if (enable_iwsp && stloc[id_seq[i]] > end_num) { - s->align.loc[i] = stloc[id_seq[i]] - end_num; - s->align.is_iwsp[i] = TRUE; + align->loc[i] = stloc[id_seq[i]] - end_num; + align->is_iwsp[i] = TRUE; } else { - s->align.loc[i] = stloc[id_seq[i]]; - s->align.is_iwsp[i] = FALSE; + align->loc[i] = stloc[id_seq[i]]; + align->is_iwsp[i] = FALSE; } } else { - s->align.loc[i] = stloc[id_seq[i]]; + align->loc[i] = stloc[id_seq[i]]; } } break; } - s->align.allscore = allscore; - - s->align.filled = TRUE; + align->allscore = allscore; free_hmm(shmm); free(id_seq); @@ -353,7 +351,7 @@ * @param words [in] 単語列 * @param wnum [in] @a words の単語数 * @param param [in] 入力特徴ベクトル列 - * @param s [out] アラインメント結果を格納するSentence構造体 + * @param align [out] アラインメント結果を格納するSentence構造体 * @param r [i/o] 認識処理インスタンス * </JA> * <EN> @@ -362,16 +360,16 @@ * @param words [in] word sequence * @param wnum [in] length of @a words * @param param [in] input parameter vectors - * @param s [out] Sentence data area to store the alignment result + * @param align [out] Sentence data area to store the alignment result * @param r [i/o] recognition process instance * </EN> * @callgraph * @callergraph */ void -word_align(WORD_ID *words, short wnum, HTK_Param *param, Sentence *s, RecogProcess *r) +word_align(WORD_ID *words, short wnum, HTK_Param *param, SentenceAlign *align, RecogProcess *r) { - do_align(words, wnum, param, PER_WORD, s, r); + do_align(words, wnum, param, PER_WORD, align, r); } /** @@ -381,7 +379,7 @@ * @param revwords [in] 単語列(逆順) * @param wnum [in] @a revwords の単語数 * @param param [in] 入力特徴ベクトル列 - * @param s [out] アラインメント結果を格納するSentence構造体 + * @param align [out] アラインメント結果を格納するSentence構造体 * @param r [i/o] 認識処理インスタンス * </JA> * <EN> @@ -390,20 +388,20 @@ * @param revwords [in] word sequence in reversed direction * @param wnum [in] length of @a revwords * @param param [in] input parameter vectors - * @param s [out] Sentence data area to store the alignment result + * @param align [out] Sentence data area to store the alignment result * @param r [i/o] recognition process instance * </EN> * @callgraph * @callergraph */ void -word_rev_align(WORD_ID *revwords, short wnum, HTK_Param *param, Sentence *s, RecogProcess *r) +word_rev_align(WORD_ID *revwords, short wnum, HTK_Param *param, SentenceAlign *align, RecogProcess *r) { WORD_ID *words; /* word sequence (true order) */ int w; words = (WORD_ID *)mymalloc(sizeof(WORD_ID) * wnum); for (w=0;w<wnum;w++) words[w] = revwords[wnum-w-1]; - do_align(words, wnum, param, PER_WORD, s, r); + do_align(words, wnum, param, PER_WORD, align, r); free(words); } @@ -414,7 +412,7 @@ * @param words [in] 単語列 * @param num [in] @a words の単語数 * @param param [in] 入力特徴ベクトル列 - * @param s [out] アラインメント結果を格納するSentence構造体 + * @param align [out] アラインメント結果を格納するSentence構造体 * @param r [i/o] 認識処理インスタンス * </JA> * <EN> @@ -423,16 +421,16 @@ * @param words [in] word sequence * @param num [in] length of @a words * @param param [in] input parameter vectors - * @param s [out] Sentence data area to store the alignment result + * @param align [out] Sentence data area to store the alignment result * @param r [i/o] recognition process instance * </EN> * @callgraph * @callergraph */ void -phoneme_align(WORD_ID *words, short num, HTK_Param *param, Sentence *s, RecogProcess *r) +phoneme_align(WORD_ID *words, short num, HTK_Param *param, SentenceAlign *align, RecogProcess *r) { - do_align(words, num, param, PER_PHONEME, s, r); + do_align(words, num, param, PER_PHONEME, align, r); } /** @@ -442,7 +440,7 @@ * @param revwords [in] 単語列(逆順) * @param num [in] @a revwords の単語数 * @param param [in] 入力特徴ベクトル列 - * @param s [out] アラインメント結果を格納するSentence構造体 + * @param align [out] アラインメント結果を格納するSentence構造体 * @param r [i/o] 認識処理インスタンス * </JA> * <EN> @@ -451,20 +449,20 @@ * @param revwords [in] word sequence in reversed direction * @param num [in] length of @a revwords * @param param [in] input parameter vectors - * @param s [out] Sentence data area to store the alignment result + * @param align [out] Sentence data area to store the alignment result * @param r [i/o] recognition process instance * </EN> * @callgraph * @callergraph */ void -phoneme_rev_align(WORD_ID *revwords, short num, HTK_Param *param, Sentence *s, RecogProcess *r) +phoneme_rev_align(WORD_ID *revwords, short num, HTK_Param *param, SentenceAlign *align, RecogProcess *r) { WORD_ID *words; /* word sequence (true order) */ int p; words = (WORD_ID *)mymalloc(sizeof(WORD_ID) * num); for (p=0;p<num;p++) words[p] = revwords[num-p-1]; - do_align(words, num, param, PER_PHONEME, s, r); + do_align(words, num, param, PER_PHONEME, align, r); free(words); } @@ -475,7 +473,7 @@ * @param words [in] 単語列 * @param num [in] @a words の単語数 * @param param [in] 入力特徴ベクトル列 - * @param s [out] アラインメント結果を格納するSentence構造体 + * @param align [out] アラインメント結果を格納するSentence構造体 * @param r [i/o] 認識処理インスタンス * </JA> * <EN> @@ -484,16 +482,16 @@ * @param words [in] word sequence * @param num [in] length of @a words * @param param [in] input parameter vectors - * @param s [out] Sentence data area to store the alignment result + * @param align [out] Sentence data area to store the alignment result * @param r [i/o] recognition process instance * </EN> * @callgraph * @callergraph */ void -state_align(WORD_ID *words, short num, HTK_Param *param, Sentence *s, RecogProcess *r) +state_align(WORD_ID *words, short num, HTK_Param *param, SentenceAlign *align, RecogProcess *r) { - do_align(words, num, param, PER_STATE, s, r); + do_align(words, num, param, PER_STATE, align, r); } /** @@ -503,7 +501,7 @@ * @param revwords [in] 単語列(逆順) * @param num [in] @a revwords の単語数 * @param param [in] 入力特徴ベクトル列 - * @param s [out] アラインメント結果を格納するSentence構造体 + * @param align [out] アラインメント結果を格納するSentence構造体 * @param r [i/o] 認識処理インスタンス * </JA> * <EN> @@ -512,20 +510,20 @@ * @param revwords [in] word sequence in reversed direction * @param num [in] length of @a revwords * @param param [in] input parameter vectors - * @param s [out] Sentence data area to store the alignment result + * @param align [out] Sentence data area to store the alignment result * @param r [i/o] recognition process instance * </EN> * @callgraph * @callergraph */ void -state_rev_align(WORD_ID *revwords, short num, HTK_Param *param, Sentence *s, RecogProcess *r) +state_rev_align(WORD_ID *revwords, short num, HTK_Param *param, SentenceAlign *align, RecogProcess *r) { WORD_ID *words; /* word sequence (true order) */ int p; words = (WORD_ID *)mymalloc(sizeof(WORD_ID) * num); for (p=0;p<num;p++) words[p] = revwords[num-p-1]; - do_align(words, num, param, PER_STATE, s, r); + do_align(words, num, param, PER_STATE, align, r); free(words); } @@ -550,16 +548,32 @@ { int n; Sentence *s; + SentenceAlign *now, *prev; for(n = 0; n < r->result.sentnum; n++) { s = &(r->result.sent[n]); - /* do forced alignment if needed */ - if (r->config->annotate.align_result_word_flag) - word_align(s->word, s->word_num, param, s, r); - if (r->config->annotate.align_result_phoneme_flag) - phoneme_align(s->word, s->word_num, param, s, r); - if (r->config->annotate.align_result_state_flag) - state_align(s->word, s->word_num, param, s, r); + /* do forced alignment if needed */ + if (r->config->annotate.align_result_word_flag) { + now = result_align_new(); + word_align(s->word, s->word_num, param, now, r); + if (s->align == NULL) s->align = now; + else prev->next = now; + prev = now; + } + if (r->config->annotate.align_result_phoneme_flag) { + now = result_align_new(); + phoneme_align(s->word, s->word_num, param, now, r); + if (s->align == NULL) s->align = now; + else prev->next = now; + prev = now; + } + if (r->config->annotate.align_result_state_flag) { + now = result_align_new(); + state_align(s->word, s->word_num, param, now, r); + if (s->align == NULL) s->align = now; + else prev->next = now; + prev = now; + } } }