[Tritonn-commit 99] [svn] [112] removed dictgen/datagen prototype because of poor performance.

Back to archive index

svnno****@sourc***** svnno****@sourc*****
2008年 4月 21日 (月) 10:32:32 JST


Revision: 112
          http://svn.sourceforge.jp/cgi-bin/viewcvs.cgi?root=tritonn&view=rev&rev=112
Author:   mir
Date:     2008-04-21 10:32:32 +0900 (Mon, 21 Apr 2008)

Log Message:
-----------
removed dictgen/datagen prototype because of poor performance.

Removed Paths:
-------------
    misc/data/datagen.sh
    misc/data/dictgen.sh

Deleted: misc/data/datagen.sh
===================================================================
--- misc/data/datagen.sh	2008-04-18 07:33:04 UTC (rev 111)
+++ misc/data/datagen.sh	2008-04-21 01:32:32 UTC (rev 112)
@@ -1,106 +0,0 @@
-#!/bin/sh
-
-#set -x
-ROW_SIZE=40
-ROW_NUM=3
-MYSQL="/usr/local/mysql/bin/mysql -uroot --default-character-set=utf8 dict "
-
-
-# 25 tables 0...24
-TABLE_LIST=(Adj Adverb Auxil Conjunction Filler \
-    Interjection Noun Noun_adjv Noun_adverbal Noun_demonst \
-    Noun_nai Noun_name Noun_number Noun_org Noun_others \
-    Noun_place Noun_proper Noun_verbal Others Postp-col \
-    Postp Prefix Suffix Symbol Verb)
-
-# appearance raito for each table
-RAITO=(2 2 1 1 1 \
-    1 5 1 1 1 \
-    1 1 1 1 1 \
-    1 1 1 1 1 \
-    1 1 1 1 5)
-
-TABLE_ROWS=(0 0 0 0 0 \
-    0 0 0 0 0 \
-    0 0 0 0 0 \
-    0 0 0 0 0 \
-    0 0 0 0 0)
-
-load_table_rows() {
-    i=0
-    while test $i -lt 25
-      do
-      SQL="SELECT COUNT(*) FROM \`${TABLE_LIST[$i]}\`"
-      RESULT=`$MYSQL -s -e "$SQL"`
-      TABLE_ROWS[$i]=$RESULT
-      i=`expr $i + 1`
-    done
-}
-
-RAITO_SUM=
-get_raito_sum() {
-    i=0
-    while test $i -lt 25
-      do
-      RAITO_SUM=`expr $RAITO_SUM + ${RAITO[$i]}`
-      i=`expr $i + 1`
-    done
-}
-
-TABLE=
-ID=
-get_table_and_id() {
-    i=0
-    DICE=`expr $RANDOM % $RAITO_SUM`
-    while test $i -lt 25
-      do
-      if [ $DICE -lt ${RAITO[$i]} ]
-	  then
-	  TABLE=${TABLE_LIST[$i]}
-	  # TODO: should calculate more correctly
-	  ID=`expr $RANDOM % ${TABLE_ROWS[$i]} + 1`
-	  i=25
-      else 
-	  DICE=`expr $DICE - ${RAITO[$i]}`
-	  i=`expr $i + 1`
-      fi
-    done
-}
-
-
-
-WORD=
-SIZE=
-get_word() {
-    SQL="SELECT size, word FROM \`$TABLE\` WHERE id=$ID" 
-    RESULT=`$MYSQL -s -e "$SQL" | grep -v "size"`
-    SIZE=`echo $RESULT | awk '{print $1}'`
-    WORD=`echo $RESULT | awk '{print $2}'`
-}
-
-STRING=
-CUR_SIZE=0
-get_string() {
-    while test $CUR_SIZE -lt $ROW_SIZE
-      do
-      get_table_and_id
-      get_word
-      STRING=${STRING}${WORD}
-      CUR_SIZE=`expr $CUR_SIZE + $SIZE`
-      TABLE=""
-      ID=""
-    done
-}
-
-# main logic
-CUR_ROW_NUM=0
-load_table_rows
-get_raito_sum
-while test $CUR_ROW_NUM -lt $ROW_NUM
-  do
-  get_string
-  echo $STRING
-  STRING=""
-  CUR_SIZE=0
-  CUR_ROW_NUM=`expr $CUR_ROW_NUM + 1`
-done

Deleted: misc/data/dictgen.sh
===================================================================
--- misc/data/dictgen.sh	2008-04-18 07:33:04 UTC (rev 111)
+++ misc/data/dictgen.sh	2008-04-21 01:32:32 UTC (rev 112)
@@ -1,35 +0,0 @@
-#!/bin/sh
-
-TMP=dictgen_workdir
-MYSQL="/usr/local/mysql/bin/mysql -uroot"
-
-rm -rf $TMP 
-mkdir $TMP
-
-find -name "*\.csv" > $TMP/dict_file.txt
-sed 's/\.\///g' $TMP/dict_file.txt > $TMP/file_name.txt
-
-while read LINE
-do
-  awk -F, '{print $1}' $LINE > $TMP/$LINE
-done < $TMP/file_name.txt
-
-$MYSQL -e "DROP DATABASE IF EXISTS dict"
-$MYSQL -e "CREATE DATABASE dict DEFAULT CHARSET eucjpms"
-
-while read LINE
-do
-  file_name=$LINE
-  tbl_name=`echo $LINE | sed 's/\.csv//g' | sed 's/\.\///g' | sed 's/\./_/g'`
-  DDL="CREATE TABLE \`$tbl_name\` (id INT PRIMARY KEY AUTO_INCREMENT, size INT, word VARCHAR(128)) DEFAULT CHARSET eucjpms"
-  echo $DDL
-  $MYSQL dict -e "$DDL"
-  DML="LOAD DATA INFILE \"`pwd`/$TMP/${file_name}\" INTO TABLE \`$tbl_name\` (word)"
-  echo $DML
-  $MYSQL dict -e "$DML"
-  DML2="UPDATE \`$tbl_name\` SET size=LENGTH(word)"
-  echo $DML2
-  $MYSQL dict -e "$DML2"
-done < $TMP/file_name.txt
-
-rm -rf $TMP




Tritonn-commit メーリングリストの案内
Back to archive index