The ChaSen legacy source.
修訂 | 0ad8b0968f4de665092eee49921b9ae11a1e99f4 (tree) |
---|---|
時間 | 2007-03-25 20:57:27 |
作者 | kazuma-t <kazuma-t> |
Commiter | kazuma-t |
read outputs of ChaSen
@@ -27,7 +27,7 @@ | ||
27 | 27 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
28 | 28 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
29 | 29 | * |
30 | - * $Id: chalib.c,v 1.2 2007/03/14 04:01:23 masayu-a Exp $ | |
30 | + * $Id: chalib.c,v 1.3 2007/03/25 11:57:27 kazuma-t Exp $ | |
31 | 31 | */ |
32 | 32 | |
33 | 33 | #include "chalib.h" |
@@ -461,20 +461,28 @@ seg_tokenize(unsigned char *line, cha_seg_t *seg) | ||
461 | 461 | return seg->len; |
462 | 462 | } |
463 | 463 | |
464 | -static void | |
465 | -chomp(char *buf) | |
464 | +static int | |
465 | +strip(unsigned char *s) | |
466 | 466 | { |
467 | - int len = strlen(buf); | |
468 | - if (buf[len - 1] == '\n') | |
469 | - buf[len - 1] = '\0'; | |
467 | + int len = strlen(s); | |
468 | + | |
469 | + if (s[len - 1] == '\n') | |
470 | + s[len-- - 1] = '\0'; | |
471 | + | |
472 | + while (len > 0 && s[len - 1] == '\t') | |
473 | + s[len-- - 1] = '\0'; | |
474 | + | |
475 | + return len; | |
470 | 476 | } |
471 | 477 | |
478 | + | |
472 | 479 | int |
473 | 480 | chasen_parse_segments(FILE *input, FILE *output) |
474 | 481 | { |
475 | 482 | cha_lat_t lat; |
476 | 483 | unsigned char buf[CHA_INPUT_SIZE]; /* XXX */ |
477 | 484 | cha_seg_t seg; |
485 | + int is_eos = 1; | |
478 | 486 | |
479 | 487 | if (!Cha_undef_info_num) |
480 | 488 | cha_init(); |
@@ -483,18 +491,18 @@ chasen_parse_segments(FILE *input, FILE *output) | ||
483 | 491 | |
484 | 492 | cha_set_output(output); |
485 | 493 | |
486 | - cha_print_reset(); | |
487 | - cha_parse_bos(&lat); | |
488 | - | |
489 | 494 | while (fgets(buf, CHA_INPUT_SIZE, input) != NULL) { |
490 | - chomp(buf); | |
495 | + strip(buf); | |
496 | + if (is_eos) { | |
497 | + cha_print_reset(); | |
498 | + cha_parse_bos(&lat); | |
499 | + is_eos = 0; | |
500 | + } | |
491 | 501 | if (!buf[0] || cha_litmatch(buf, 2, STR_EOS, STR_BOS_EOS)) { |
492 | 502 | /* EOS */ |
493 | 503 | cha_parse_eos(&lat); |
494 | 504 | cha_print_path(&lat, opt_show, opt_form, opt_form_string); |
495 | - /* BOS */ | |
496 | - cha_print_reset(); | |
497 | - cha_parse_bos(&lat); | |
505 | + is_eos = 1; | |
498 | 506 | continue; |
499 | 507 | } |
500 | 508 | if (seg_tokenize(buf, &seg) < 0) { |
@@ -503,8 +511,10 @@ chasen_parse_segments(FILE *input, FILE *output) | ||
503 | 511 | } |
504 | 512 | cha_parse_segment(&lat, &seg); |
505 | 513 | } |
506 | - cha_parse_eos(&lat); | |
507 | - cha_print_path(&lat, opt_show, opt_form, opt_form_string); | |
514 | + if (!is_eos) { | |
515 | + cha_parse_eos(&lat); | |
516 | + cha_print_path(&lat, opt_show, opt_form, opt_form_string); | |
517 | + } | |
508 | 518 | |
509 | 519 | return lat.len; |
510 | 520 | } |