sumom****@users*****
sumom****@users*****
2013年 12月 18日 (水) 12:55:21 JST
Index: julius4/libsent/src/hmminfo/put_htkdata_info.c diff -u julius4/libsent/src/hmminfo/put_htkdata_info.c:1.8 julius4/libsent/src/hmminfo/put_htkdata_info.c:1.9 --- julius4/libsent/src/hmminfo/put_htkdata_info.c:1.8 Fri Jun 21 02:14:21 2013 +++ julius4/libsent/src/hmminfo/put_htkdata_info.c Wed Dec 18 12:55:21 2013 @@ -12,7 +12,7 @@ * @author Akinobu LEE * @date Tue Feb 15 23:36:00 2005 * - * $Revision: 1.8 $ + * $Revision: 1.9 $ * */ /* @@ -148,6 +148,7 @@ fprintf(fp, "no output state\n"); } else { if (s->name != NULL) fprintf(fp, "[~s \"%s\"]\n", s->name); + fprintf(fp, "id: %d\n", s->id); for (st=0;st<s->nstream;st++) { fprintf(fp, "stream %d:", st + 1); if (s->w != NULL) { Index: julius4/libsent/src/hmminfo/rdhmmdef.c diff -u julius4/libsent/src/hmminfo/rdhmmdef.c:1.8 julius4/libsent/src/hmminfo/rdhmmdef.c:1.9 --- julius4/libsent/src/hmminfo/rdhmmdef.c:1.8 Fri Jun 21 02:14:21 2013 +++ julius4/libsent/src/hmminfo/rdhmmdef.c Wed Dec 18 12:55:21 2013 @@ -30,7 +30,7 @@ * @author Akinobu LEE * @date Wed Feb 16 00:17:18 2005 * - * $Revision: 1.8 $ + * $Revision: 1.9 $ * */ /* @@ -43,10 +43,14 @@ #include <sent/stddefs.h> #include <sent/htk_param.h> #include <sent/htk_hmm.h> +#ifdef HAVE_ZLIB +#include <zlib.h> +#endif #define MAXBUFLEN 4096 ///< Maximum length of a line in the input char *rdhmmdef_token; ///< Current token string (GLOBAL) +static boolean last_line_full = FALSE; static char buf[MAXBUFLEN]; ///< Local work area for token reading static int line; ///< Input Line count @@ -79,17 +83,52 @@ char * read_token(FILE *fp) { + int len; + int bp = 0; + int maxlen = MAXBUFLEN; + static char delims[] = HMMDEF_DELM; + if ((rdhmmdef_token = mystrtok_quote(NULL, HMMDEF_DELM)) != NULL) { - /* return next token */ - return rdhmmdef_token; + /* has token */ + if (mystrtok_movetonext(NULL, HMMDEF_DELM) != NULL || last_line_full == FALSE) { + /* return the current token, if this is not a last token, or + last is newline terminated */ + return rdhmmdef_token; + } else { + /* concatinate the last token with next line */ + len = strlen(rdhmmdef_token); + memmove(buf, rdhmmdef_token, len); + bp = len; + maxlen -= len; + } } - /* read new 1 line */ - if (getl(buf, MAXBUFLEN, fp) == NULL) { - rdhmmdef_token = NULL; - } else { - rdhmmdef_token = mystrtok_quote(buf, HMMDEF_DELM); - line++; + + /* read new 1 line a*/ + while( +#ifdef HAVE_ZLIB + gzgets((gzFile)fp, &(buf[bp]), maxlen) != Z_NULL +#else + fgets(&(buf[bp]), maxlen, fp) != NULL +#endif + ) { + /* chop delimiters at end of line (incl. newline) */ + /* if no delimiter at end of line, last_line_full is TRUE */ + last_line_full = TRUE; + len = strlen(buf)-1; + while (len >= 0 && strchr(delims, buf[len])) { + last_line_full = FALSE; + buf[len--] = '\0'; + } + if (buf[0] != '\0') { + /* start getting next token */ + rdhmmdef_token = mystrtok_quote(buf, HMMDEF_DELM); + /* increment line */ + line++; + return rdhmmdef_token; + } } + /* when reading error, return NULL */ + rdhmmdef_token = NULL; return rdhmmdef_token; } @@ -162,6 +201,52 @@ } #endif +boolean +htk_hmm_check_sid(HTK_HMM_INFO *hmm) +{ + HTK_HMM_State *stmp; + boolean *check; + int i; + boolean ok_p; + + /* check if each state is assigned a valid sid */ + check = (boolean *)mymalloc(sizeof(boolean) * hmm->totalstatenum); + for(i = 0; i < hmm->totalstatenum; i++) check[i] = FALSE; + for (stmp = hmm->ststart; stmp; stmp = stmp->next) { + if (stmp->id == -1) { + jlog("Error: rdhmmdef: no SID on some states\n"); + free(check); + return FALSE; + } + if (stmp->id < 0) { + jlog("Error: rdhmmdef: invalid SID value: %d\n", stmp->id); + free(check); + return FALSE; + } + if (stmp->id >= hmm->totalstatenum) { + jlog("Error: rdhmmdef: SID value exceeds the number of states? (%d > %d)\n", stmp->id, hmm->totalstatenum); + free(check); + return FALSE; + } + if (check[stmp->id] == TRUE) { + jlog("Error: rdhmmdef: duplicate definition to the same SID: %d\n", stmp->id); + free(check); + return FALSE; + } + check[stmp->id] = TRUE; + } + ok_p = TRUE; + for(i = 0; i < hmm->totalstatenum; i++) { + if (check[i] == FALSE) { + jlog("Error: rdhmmdef: missing SID: %d\n", i); + ok_p = FALSE; + } + } + free(check); + + return ok_p; +} + /** * @brief Main top routine to read in HTK %HMM definition file. * @@ -288,25 +373,54 @@ return FALSE; } - /* add ID number for all HTK_HMM_State */ - /* also calculate the maximum number of mixture */ + /* add ID number for all HTK_HMM_State if not assigned */ { HTK_HMM_State *stmp; - int n, max, s, mix; + int n; + boolean has_sid; + + /* caclculate total num and check if has sid */ + has_sid = FALSE; n = 0; - max = 0; for (stmp = hmm->ststart; stmp; stmp = stmp->next) { - for(s=0;s<stmp->nstream;s++) { - mix = stmp->pdf[s]->mix_num; - if (max < mix) max = mix; - } - stmp->id = n++; + n++; if (n >= MAX_STATE_NUM) { jlog("Error: rdhmmdef: too much states in a model > %d\n", MAX_STATE_NUM); return FALSE; } + if (stmp->id != -1) { + has_sid = TRUE; + } } hmm->totalstatenum = n; + if (has_sid) { + jlog("Stat: rdhmmdef: <SID> found in the definition\n"); + /* check if each state is assigned a valid sid */ + if (htk_hmm_check_sid(hmm) == FALSE) { + jlog("Error: rdhmmdef: error in SID\n"); + return FALSE; + } + } else { + /* assign internal sid (will not be saved) */ + jlog("Stat: rdhmmdef: no <SID> embedded\n"); + jlog("Stat: rdhmmdef: assign SID by the order of appearance\n"); + n = hmm->totalstatenum; + for (stmp = hmm->ststart; stmp; stmp = stmp->next) { + stmp->id = --n; + } + } + } + /* calculate the maximum number of mixture */ + { + HTK_HMM_State *stmp; + int max, s, mix; + max = 0; + for (stmp = hmm->ststart; stmp; stmp = stmp->next) { + for(s=0;s<stmp->nstream;s++) { + mix = stmp->pdf[s]->mix_num; + if (max < mix) max = mix; + } + } hmm->maxmixturenum = max; } /* compute total number of HMM models and maximum length */ Index: julius4/libsent/src/hmminfo/rdhmmdef_state.c diff -u julius4/libsent/src/hmminfo/rdhmmdef_state.c:1.7 julius4/libsent/src/hmminfo/rdhmmdef_state.c:1.8 --- julius4/libsent/src/hmminfo/rdhmmdef_state.c:1.7 Fri Jun 21 02:14:21 2013 +++ julius4/libsent/src/hmminfo/rdhmmdef_state.c Wed Dec 18 12:55:21 2013 @@ -12,7 +12,7 @@ * @author Akinobu LEE * @date Wed Feb 16 03:07:44 2005 * - * $Revision: 1.7 $ + * $Revision: 1.8 $ * */ /* @@ -46,7 +46,7 @@ for(i=0;i<new->nstream;i++) { new->pdf[i] = NULL; } - new->id = 0; + new->id = -1; new->next = NULL; return(new); @@ -128,6 +128,13 @@ new = state_new(hmm); + if (currentis("SID")) { + read_token(fp); + NoTokErr("missing SID value"); + new->id = atoi(rdhmmdef_token); + read_token(fp); + } + if (currentis("NUMMIXES")) { if (hmm->tmp_mixnum == NULL) { hmm->tmp_mixnum = (int *)mybmalloc2(sizeof(int) * hmm->opt.stream_info.num, &(hmm->mroot)); @@ -188,7 +195,7 @@ { HTK_HMM_State *tmp; - if (currentis("NUMMIXES")||currentis("SWEIGHTS")||currentis("~w")||currentis("STREAM")||currentis("MIXTURE")||currentis("TMIX")||currentis("MEAN")||currentis("~m")||currentis("RCLASS")) { + if (currentis("SID")||currentis("NUMMIXES")||currentis("SWEIGHTS")||currentis("~w")||currentis("STREAM")||currentis("MIXTURE")||currentis("TMIX")||currentis("MEAN")||currentis("~m")||currentis("RCLASS")) { /* definition: define state data, and return the pointer */ tmp = state_read(fp, hmm); tmp->name = NULL; /* no name */ Index: julius4/libsent/src/hmminfo/read_binhmm.c diff -u julius4/libsent/src/hmminfo/read_binhmm.c:1.10 julius4/libsent/src/hmminfo/read_binhmm.c:1.11 --- julius4/libsent/src/hmminfo/read_binhmm.c:1.10 Fri Jun 21 02:14:21 2013 +++ julius4/libsent/src/hmminfo/read_binhmm.c Wed Dec 18 12:55:21 2013 @@ -22,7 +22,7 @@ * @author Akinobu LEE * @date Wed Feb 16 05:23:59 2005 * - * $Revision: 1.10 $ + * $Revision: 1.11 $ * */ /* @@ -862,13 +862,22 @@ hmm->totalpdfnum = n; } - /* re-number state id */ + /* check state id */ { HTK_HMM_State *stmp; - int n = 0; + int n; + boolean has_sid; + + /* check if each state is assigned a valid sid */ + if (htk_hmm_check_sid(hmm) == FALSE) { + jlog("Error: rdhmmdef: error in SID\n"); + return FALSE; + } +#if 0 for (stmp = hmm->ststart; stmp; stmp = stmp->next) { stmp->id = n++; } +#endif } /* assign ID number for all HTK_HMM_Trans */ { Index: julius4/libsent/src/hmminfo/write_binhmm.c diff -u julius4/libsent/src/hmminfo/write_binhmm.c:1.8 julius4/libsent/src/hmminfo/write_binhmm.c:1.9 --- julius4/libsent/src/hmminfo/write_binhmm.c:1.8 Fri Jun 21 02:14:21 2013 +++ julius4/libsent/src/hmminfo/write_binhmm.c Wed Dec 18 12:55:21 2013 @@ -22,7 +22,7 @@ * @author Akinobu LEE * @date Wed Feb 16 06:03:36 2005 * - * $Revision: 1.8 $ + * $Revision: 1.9 $ * */ /* @@ -31,7 +31,7 @@ * All rights reserved */ -/* $Id: write_binhmm.c,v 1.8 2013/06/20 17:14:21 sumomo Exp $ */ +/* $Id: write_binhmm.c,v 1.9 2013/12/18 03:55:21 sumomo Exp $ */ #include <sent/stddefs.h> #include <sent/htk_param.h> @@ -823,8 +823,9 @@ static int qsort_st_index(HTK_HMM_State **s1, HTK_HMM_State **s2) { - if (*s1 > *s2) return 1; - else if (*s1 < *s2) return -1; + /* keep ID order */ + if ((*s1)->id > (*s2)->id) return 1; + else if ((*s1)->id < (*s2)->id) return -1; else return 0; } @@ -918,7 +919,8 @@ while (left < right) { mid = (left + right) / 2; - if (st_index[mid] < s) { + /* search by id */ + if (st_index[mid]->id < s->id) { left = mid + 1; } else { right = mid;