Hiroyuki Komatsu
komat****@users*****
2005年 3月 10日 (木) 13:01:35 JST
Index: prime/lib/composer.rb diff -u prime/lib/composer.rb:1.6 prime/lib/composer.rb:1.7 --- prime/lib/composer.rb:1.6 Wed Mar 9 17:42:52 2005 +++ prime/lib/composer.rb Thu Mar 10 13:01:34 2005 @@ -1,5 +1,5 @@ # composer.rb: Module of composition for PrimeSession -# $Id: composer.rb,v 1.6 2005/03/09 08:42:52 komatsu Exp $ +# $Id: composer.rb,v 1.7 2005/03/10 04:01:34 komatsu Exp $ # # Copyright (C) 2005 Hiroyuki Komatsu <komat****@taiya*****> # All rights reserved. @@ -21,6 +21,22 @@ :PrimeEngineUserdict2, :PrimeEngineUserdict2English, :PrimeEnginePersonalDict ] ) + @composer_prime_engine_english = + PrimeEngines::initialize_engines( [ :PrimeEngineEnglish, + :PrimeEngineUserdict2English, + :PrimeEnginePersonalDict ] ) + @prime_context = PrimeContext.new() + end + + def edit_get_preediting_string( context = nil ) + if context then + set_prime_context( context ) + end + return super() + end + + def set_prime_context ( context ) + @prime_context = context end ## This checks the validation of the preediting string. If the preedition @@ -64,7 +80,26 @@ end max_score = words[0].score - threshold = 4000 + + prev_words = @prime_context.previous_words() +# if prev_words.length >= 2 and +# prev_words[-1] == " " and prev_words[-2] =~ /[a-z]$/ then + if prev_words.length >= 1 and prev_words[-1] =~ /[a-z]$/ then + + query_line = prev_words[-1] + " " + rawinput + query = PrimeQuery.new( [ query_line ], nil, :exact ) + words_list = @composer_prime_engine_english.command( :search, query ) + words2 = PrimeWordList::merge(words_list) + if words2.length > 0 then + return false + end + + threshold = 1 + else + threshold = 4000 + end + $stderr.puts( "[ " + prev_words.join(" | ") + " ] :#{threshold}" ) + words.each { | word | if max_score - word.score > threshold then break @@ -165,7 +200,7 @@ end def edit_get_preedition () - retur****@compo*****_get_preediting_string() + retur****@compo*****_get_preediting_string( get_context() ) end def edit_get_query_string () @@ -199,7 +234,7 @@ base = reading base_reading = reading pos = nil - context = get_context() + context = get_context().previous_word() adjunct = "" rest = "" @@ -207,7 +242,7 @@ base_reading, base, pos, context, adjunct, rest) ## Set next context - context_reset() +# context_reset() context_set_previous_word(reading) ## Reset the conversion. Index: prime/lib/prime-japanese.rb diff -u prime/lib/prime-japanese.rb:1.11 prime/lib/prime-japanese.rb:1.12 --- prime/lib/prime-japanese.rb:1.11 Mon Mar 7 16:51:32 2005 +++ prime/lib/prime-japanese.rb Thu Mar 10 13:01:34 2005 @@ -1,5 +1,5 @@ # prime/prime-japnese.rb: Japanese module for PRIME. -# $Id: prime-japanese.rb,v 1.11 2005/03/07 07:51:32 komatsu Exp $ +# $Id: prime-japanese.rb,v 1.12 2005/03/10 04:01:34 komatsu Exp $ # # Copyright (C) 2004 Hiroyuki Komatsu <komat****@taiya*****> # All rights reserved. @@ -158,7 +158,7 @@ ## FIXME: How far is a range of glue. ## FIXME: <komat****@taiya*****> (2004-05-13) - glue = Prime::get_prefix(literal + conjugation, literal1) + glue = Prime::get_separator(literal + conjugation, literal1) pron += (conjugation + pron1) literal += (conjugation + glue + literal1) score = (Math::sqrt(score * score1) * cost).to_i Index: prime/lib/prime.rb diff -u prime/lib/prime.rb:1.18 prime/lib/prime.rb:1.19 --- prime/lib/prime.rb:1.18 Mon Mar 7 16:51:32 2005 +++ prime/lib/prime.rb Thu Mar 10 13:01:34 2005 @@ -1,5 +1,5 @@ # prime/prime.rb -# $Id: prime.rb,v 1.18 2005/03/07 07:51:32 komatsu Exp $ +# $Id: prime.rb,v 1.19 2005/03/10 04:01:34 komatsu Exp $ # # Copyright (C) 2002, 2003, 2004 Hiroyuki Komatsu <komat****@taiya*****> # All rights reserved. @@ -81,24 +81,65 @@ end public - ## This returns an avairable prefix string for the literal. - ## ex). get_prefix("This", "is") => " " - def Prime::get_prefix (context, literal) - if PRIME_ENV['style_auto_space'] and context and context != "" then - if (context[-1] > 128 and literal[0] > 128) or - (literal[0] < ?a and literal[0] > ?z and - literal[0] < ?A and literal[0] > ?Z) or - (context =~ /[(「、。『【〈《(〔[{“‘]$/) then - return "" - else - return " " - end - else +# ## This returns an avairable prefix string for the literal. +# ## ex). get_prefix("This", "is") => " " +# def Prime::get_prefix (context, literal) +# if PRIME_ENV['style_auto_space'] and context and context != "" then +# if (context[-1] > 128 and literal[0] > 128) or +# (literal[0] < ?a and literal[0] > ?z and +# literal[0] < ?A and literal[0] > ?Z) or +# (context =~ /[(「、。『【〈《(〔[{“‘]$/) then +# return "" +# else +# return " " +# end +# else +# return "" +# end +# end + + ## This returns a separator string among two words. + ## ex). Prime::get_separator("This", "is") => " " + ## Prime::get_separator("pen", "です") => "" + def Prime::get_separator (word1, word2) + if PRIME_ENV['style_auto_space'] == false then + return "" + end + + if word1.nil? or word1.empty? then return "" end + + if (word1[-1] > 128 and word2[0] > 128) or + (word2[0] < ?a and word2[0] > ?z and + word2[0] < ?A and word2[0] > ?Z) or + (word1 =~ /[(「、。『【〈《(〔[{“‘]$/) then + return "" + end + + return " " end end +class PrimeContext + def initialize (word = nil) + @words = [] + if word then + set_previous_word( word ) + end + end + + def set_previous_word ( word ) + @words.push( word ) + end + + def previous_words () + return @words + end + def previous_word () + retur****@words*****() + end +end class PrimeQuery attr_accessor :input, :pos, :method, :context @@ -107,7 +148,17 @@ @pos = pos # method = {:prefix, :exact, :literal_prefix, :literal_exact,:overall, :context} @method = method - @context = context + + ## FIXME: The type of @context should be PrimeContext. + ## FIXME: (2005-03-09) <Hiroyuki Komatsu> + case context + when PrimeContext then + @context = context.previous_word() + when String then + @context = context + else + @context = nil + end end end @@ -268,7 +319,12 @@ } else mark[word_key] = word - word.prefix = Prime::get_prefix(context, word.literal) + if context.nil? then + prev_word = nil + else + prev_word = context.previous_word() + end + word.prefix = Prime::get_separator( prev_word, word.literal ) merged.push(word) end end @@ -280,7 +336,8 @@ ## and returns the given words. def PrimeWordList::attach_prefix (context, words) words.each { | word | - word.prefix = Prime::get_prefix(context, word.literal) + word.prefix = Prime::get_separator(context.previous_word, + word.literal) } return words end Index: prime/lib/session-english.rb diff -u prime/lib/session-english.rb:1.3 prime/lib/session-english.rb:1.4 --- prime/lib/session-english.rb:1.3 Mon Mar 7 18:26:57 2005 +++ prime/lib/session-english.rb Thu Mar 10 13:01:34 2005 @@ -1,5 +1,5 @@ # session-english.rb: Session library for English. -# $Id: session-english.rb,v 1.3 2005/03/07 09:26:57 komatsu Exp $ +# $Id: session-english.rb,v 1.4 2005/03/10 04:01:34 komatsu Exp $ # # Copyright (C) 2005 Hiroyuki Komatsu <komat****@taiya*****> # All rights reserved. @@ -27,41 +27,6 @@ @language = "English" end - def set_conversions (conversions) - @conversions = conversions - end - - def set_selection (index_no) - @conversions.set_conversion_index(index_no) - return @conversions[index_no] - end - def get_selection () - retur****@conve*****_conversion() - end - - def set_context (context) - @context = context - end - def get_context () - return @context - end - - ## - ## Context methods - ## - def context_reset () - set_context("") - end - - def context_set_previous_segment (segment) - context_set_previous_word( segment.get_base() ) - end - - def context_set_previous_word (word) - set_context(word) - end - - ## ## Conversion methods ## @@ -98,7 +63,7 @@ conversion.segments.each { | segment | learn_segment( segment, get_context() ) - context_reset() +# context_reset() context_set_previous_segment(segment) } commited_string = conversion.get_literal() @@ -115,9 +80,10 @@ pos = segment.get_pos() adjunct = segment.get_adjunct() rest = "" + prev_word = context.previous_word() @engines.command(:learn_word, - base_reading, base, pos, context, adjunct, rest) + base_reading, base, pos, prev_word, adjunct, rest) end def _adhoc_wordlist_to_conversionlist (wordlist) @@ -184,7 +150,7 @@ end ## The current context is just the previous word. - context = conversion.get_literal() + context = PrimeContext.new( conversion.get_literal() ) query = PrimeQuery.new([""], nil, :context, context) next_words = search(query) @@ -196,7 +162,8 @@ next_words = next_words[0,1] next_word = next_words.first - next_word.prefix = Prime::get_prefix(context, next_word.literal) + next_word.prefix = Prime::get_separator(context.previous_word, + next_word.literal) reading = next_word.to_text_pron() base_reading = "" Index: prime/lib/session-japanese.rb diff -u prime/lib/session-japanese.rb:1.4 prime/lib/session-japanese.rb:1.5 --- prime/lib/session-japanese.rb:1.4 Tue Mar 8 20:24:57 2005 +++ prime/lib/session-japanese.rb Thu Mar 10 13:01:34 2005 @@ -1,5 +1,5 @@ # session-japanese.rb: Session library for Japanese. -# $Id: session-japanese.rb,v 1.4 2005/03/08 11:24:57 komatsu Exp $ +# $Id: session-japanese.rb,v 1.5 2005/03/10 04:01:34 komatsu Exp $ # # Copyright (C) 2005 Hiroyuki Komatsu <komat****@taiya*****> # All rights reserved. @@ -74,40 +74,6 @@ return composer end - def set_conversions (conversions) - @conversions = conversions - end - - def set_selection (index_no) - @conversions.set_conversion_index(index_no) - return @conversions[index_no] - end - def get_selection () - retur****@conve*****_conversion() - end - - def set_context (context) - @context = context - end - def get_context () - return @context - end - - ## - ## Context methods - ## - def context_reset () - set_context("") - end - - def context_set_previous_segment (segment) - context_set_previous_word( segment.get_base() ) - end - - def context_set_previous_word (word) - set_context(word) - end - ## ## Conversion methods ## @@ -168,8 +134,8 @@ conversion.segments.each { | segment | learn_segment( segment, get_context() ) - context_reset() - context_set_previous_segment(segment) +# context_reset() + context_set_previous_segment( segment ) } commited_string = conversion.get_literal() @@ -185,9 +151,10 @@ pos = segment.get_pos() adjunct = segment.get_adjunct() rest = "" + prev_word = context.previous_word() @engines.command(:learn_word, - base_reading, base, pos, context, adjunct, rest) + base_reading, base, pos, prev_word, adjunct, rest) end def _adhoc_wordlist_to_conversionlist (wordlist) @@ -350,14 +317,14 @@ end ## The current context is just the previous word. - context = conversion.get_literal() + context = PrimeContext.new( conversion.get_literal() ) ## If the last character of the specified conversion is one of stop_words, ## This method stops its prediction. (EXPERIMENTAL) stop_words = \ [ PRIME_ENV['style_japanese_period'], PRIME_ENV['style_japanese_comma'] ] - if context =~ /(#{stop_words.join('|')})$/ then + if context.previous_word =~ /(#{stop_words.join('|')})$/ then return nil end @@ -371,7 +338,8 @@ next_words = next_words[0,1] next_word = next_words.first - next_word.prefix = Prime::get_prefix(context, next_word.literal) + next_word.prefix = Prime::get_separator(context.previous_word, + next_word.literal) reading = next_word.to_text_pron() base_reading = "" Index: prime/lib/session.rb diff -u prime/lib/session.rb:1.9 prime/lib/session.rb:1.10 --- prime/lib/session.rb:1.9 Mon Mar 7 18:26:57 2005 +++ prime/lib/session.rb Thu Mar 10 13:01:34 2005 @@ -1,5 +1,5 @@ # session.rb: -# $Id: session.rb,v 1.9 2005/03/07 09:26:57 komatsu Exp $ +# $Id: session.rb,v 1.10 2005/03/10 04:01:34 komatsu Exp $ # # Copyright (C) 2005 Hiroyuki Komatsu <komat****@taiya*****> # All rights reserved. @@ -20,7 +20,7 @@ @composer = initialize_composer() @engines = PrimeEngines::initialize_engines(engine_classes) @conversions = PrimeConversionList.new() - @context = nil + @context = PrimeContext.new() end def initialize_engines (engine_classes) @@ -67,8 +67,10 @@ retur****@conve*****_conversion() end - def set_context (context) - @context = context + ## This method sets the word to the context. + ## Because of a historical issue, the name is set_context. + def set_context ( word ) + context_set_previous_word( word ) end def get_context () return @context @@ -78,14 +80,14 @@ ## Context methods ## def context_reset () - set_context("") + @context = PrimeContext.new() end - def context_set_previous_segment (segment) + def context_set_previous_segment ( segment ) context_set_previous_word( segment.get_base() ) end - def context_set_previous_word (word) - set_context(word) + def context_set_previous_word ( word ) + @context.set_previous_word( word ) end end