svnno****@sourc*****
svnno****@sourc*****
2008年 4月 21日 (月) 10:32:32 JST
Revision: 112 http://svn.sourceforge.jp/cgi-bin/viewcvs.cgi?root=tritonn&view=rev&rev=112 Author: mir Date: 2008-04-21 10:32:32 +0900 (Mon, 21 Apr 2008) Log Message: ----------- removed dictgen/datagen prototype because of poor performance. Removed Paths: ------------- misc/data/datagen.sh misc/data/dictgen.sh Deleted: misc/data/datagen.sh =================================================================== --- misc/data/datagen.sh 2008-04-18 07:33:04 UTC (rev 111) +++ misc/data/datagen.sh 2008-04-21 01:32:32 UTC (rev 112) @@ -1,106 +0,0 @@ -#!/bin/sh - -#set -x -ROW_SIZE=40 -ROW_NUM=3 -MYSQL="/usr/local/mysql/bin/mysql -uroot --default-character-set=utf8 dict " - - -# 25 tables 0...24 -TABLE_LIST=(Adj Adverb Auxil Conjunction Filler \ - Interjection Noun Noun_adjv Noun_adverbal Noun_demonst \ - Noun_nai Noun_name Noun_number Noun_org Noun_others \ - Noun_place Noun_proper Noun_verbal Others Postp-col \ - Postp Prefix Suffix Symbol Verb) - -# appearance raito for each table -RAITO=(2 2 1 1 1 \ - 1 5 1 1 1 \ - 1 1 1 1 1 \ - 1 1 1 1 1 \ - 1 1 1 1 5) - -TABLE_ROWS=(0 0 0 0 0 \ - 0 0 0 0 0 \ - 0 0 0 0 0 \ - 0 0 0 0 0 \ - 0 0 0 0 0) - -load_table_rows() { - i=0 - while test $i -lt 25 - do - SQL="SELECT COUNT(*) FROM \`${TABLE_LIST[$i]}\`" - RESULT=`$MYSQL -s -e "$SQL"` - TABLE_ROWS[$i]=$RESULT - i=`expr $i + 1` - done -} - -RAITO_SUM= -get_raito_sum() { - i=0 - while test $i -lt 25 - do - RAITO_SUM=`expr $RAITO_SUM + ${RAITO[$i]}` - i=`expr $i + 1` - done -} - -TABLE= -ID= -get_table_and_id() { - i=0 - DICE=`expr $RANDOM % $RAITO_SUM` - while test $i -lt 25 - do - if [ $DICE -lt ${RAITO[$i]} ] - then - TABLE=${TABLE_LIST[$i]} - # TODO: should calculate more correctly - ID=`expr $RANDOM % ${TABLE_ROWS[$i]} + 1` - i=25 - else - DICE=`expr $DICE - ${RAITO[$i]}` - i=`expr $i + 1` - fi - done -} - - - -WORD= -SIZE= -get_word() { - SQL="SELECT size, word FROM \`$TABLE\` WHERE id=$ID" - RESULT=`$MYSQL -s -e "$SQL" | grep -v "size"` - SIZE=`echo $RESULT | awk '{print $1}'` - WORD=`echo $RESULT | awk '{print $2}'` -} - -STRING= -CUR_SIZE=0 -get_string() { - while test $CUR_SIZE -lt $ROW_SIZE - do - get_table_and_id - get_word - STRING=${STRING}${WORD} - CUR_SIZE=`expr $CUR_SIZE + $SIZE` - TABLE="" - ID="" - done -} - -# main logic -CUR_ROW_NUM=0 -load_table_rows -get_raito_sum -while test $CUR_ROW_NUM -lt $ROW_NUM - do - get_string - echo $STRING - STRING="" - CUR_SIZE=0 - CUR_ROW_NUM=`expr $CUR_ROW_NUM + 1` -done Deleted: misc/data/dictgen.sh =================================================================== --- misc/data/dictgen.sh 2008-04-18 07:33:04 UTC (rev 111) +++ misc/data/dictgen.sh 2008-04-21 01:32:32 UTC (rev 112) @@ -1,35 +0,0 @@ -#!/bin/sh - -TMP=dictgen_workdir -MYSQL="/usr/local/mysql/bin/mysql -uroot" - -rm -rf $TMP -mkdir $TMP - -find -name "*\.csv" > $TMP/dict_file.txt -sed 's/\.\///g' $TMP/dict_file.txt > $TMP/file_name.txt - -while read LINE -do - awk -F, '{print $1}' $LINE > $TMP/$LINE -done < $TMP/file_name.txt - -$MYSQL -e "DROP DATABASE IF EXISTS dict" -$MYSQL -e "CREATE DATABASE dict DEFAULT CHARSET eucjpms" - -while read LINE -do - file_name=$LINE - tbl_name=`echo $LINE | sed 's/\.csv//g' | sed 's/\.\///g' | sed 's/\./_/g'` - DDL="CREATE TABLE \`$tbl_name\` (id INT PRIMARY KEY AUTO_INCREMENT, size INT, word VARCHAR(128)) DEFAULT CHARSET eucjpms" - echo $DDL - $MYSQL dict -e "$DDL" - DML="LOAD DATA INFILE \"`pwd`/$TMP/${file_name}\" INTO TABLE \`$tbl_name\` (word)" - echo $DML - $MYSQL dict -e "$DML" - DML2="UPDATE \`$tbl_name\` SET size=LENGTH(word)" - echo $DML2 - $MYSQL dict -e "$DML2" -done < $TMP/file_name.txt - -rm -rf $TMP