tag: cl-igo-0.2.0
@@ -22,9 +22,12 @@ | ||
22 | 22 | final ReadLine rl = new ReadLine(System.in); |
23 | 23 | if(doWakati) |
24 | 24 | for(String s=rl.read(); s != null; s=rl.read()) { |
25 | + tagger.wakati(s); | |
26 | + /* | |
25 | 27 | for(String w : tagger.wakati(s)) |
26 | 28 | System.out.print(w+" "); |
27 | 29 | System.out.println(""); |
30 | + */ | |
28 | 31 | } |
29 | 32 | else |
30 | 33 | for(String s=rl.read(); s != null; s=rl.read()) { |
@@ -94,11 +94,11 @@ | ||
94 | 94 | nodesAry.add(new ArrayList<ViterbiNode>()); |
95 | 95 | |
96 | 96 | for(int i=0; i < len; i++, perResult.clear()) { |
97 | - if(nodesAry.get(i).isEmpty()==false) { | |
97 | + final ArrayList<ViterbiNode> prevs = nodesAry.get(i); | |
98 | + if(prevs.isEmpty()==false) { | |
98 | 99 | wdc.search(text, i, perResult); // 単語辞書から形態素を検索 |
99 | 100 | unk.search(text, i, wdc, perResult); // 未知語辞書から形態素を検索 |
100 | 101 | |
101 | - final ArrayList<ViterbiNode> prevs = nodesAry.get(i); | |
102 | 102 | for(int j=0; j < perResult.size(); j++) { |
103 | 103 | final ViterbiNode vn = perResult.get(j); |
104 | 104 | if(vn.isSpace) |
@@ -16,7 +16,7 @@ | ||
16 | 16 | |
17 | 17 | <target name="compile"> |
18 | 18 | <mkdir dir="${classes.dir}" /> |
19 | - <javac debug="off" encoding="UTF-8" srcdir="${src.dir}" destdir="${classes.dir}" /> | |
19 | + <javac debug="on" encoding="UTF-8" srcdir="${src.dir}" destdir="${classes.dir}" /> | |
20 | 20 | </target> |
21 | 21 | |
22 | 22 | <target name="javadoc"> |
@@ -0,0 +1,96 @@ | ||
1 | +(in-package :igo) | |
2 | + | |
3 | +;;;;;;;;;;; | |
4 | +;;; declaim | |
5 | +(declaim (inline coerce-to-simple-string set-mincost-node) | |
6 | + #.*optimize-fastest*) | |
7 | + | |
8 | +;;;;;;;;;; | |
9 | +;;; struct | |
10 | +(defstruct tagger | |
11 | + (wdc nil :type dic:word-dic) | |
12 | + (unk nil :type unk:unknown) | |
13 | + (mtx nil :type mtx:matrix)) | |
14 | +(defmethod print-object ((o tagger) stream) | |
15 | + (print-unreadable-object (o stream :type t :identity t))) | |
16 | + | |
17 | +;;;;;;;;;;;; | |
18 | +;;; constant | |
19 | +(eval-when (:compile-toplevel :load-toplevel :execute) | |
20 | + (igo::defconst-once-only +BOS-NODES+ (list (vn:make-bos/eos)))) | |
21 | + | |
22 | +;;;;;;;;;;;;;;;;;;;;; | |
23 | +;;; internal function | |
24 | +(defmacro nconcf (lst1 lst2) | |
25 | + `(setf ,lst1 (nconc ,lst1 ,lst2))) | |
26 | + | |
27 | +(defun set-mincost-node (vn prevs mtx wdc &aux (left-id (vn:left-id vn))) | |
28 | + (flet ((calc-cost (prev) | |
29 | + (+ (vn:cost prev) (mtx:link-cost (vn:right-id prev) left-id mtx)))) | |
30 | + (declare (inline calc-cost)) | |
31 | + | |
32 | + (let ((fst (first prevs))) | |
33 | + (setf (vn:prev vn) fst | |
34 | + (vn:cost vn) (calc-cost fst))) | |
35 | + | |
36 | + (dolist (p (cdr prevs)) | |
37 | + (let ((cost (calc-cost p))) | |
38 | + (when (< cost (vn:cost vn)) | |
39 | + (setf (vn:prev vn) p | |
40 | + (vn:cost vn) cost)))) | |
41 | + | |
42 | + (incf (vn:cost vn) (dic:word-cost (vn:word-id vn) wdc)) | |
43 | + vn)) | |
44 | + | |
45 | +(defun parse-impl (tagger cs len) | |
46 | + (declare (fixnum len)) | |
47 | + (let ((nodes (make-sequence 'simple-vector (1+ len) :initial-element nil)) | |
48 | + (wdc (tagger-wdc tagger)) | |
49 | + (unk (tagger-unk tagger)) | |
50 | + (mtx (tagger-mtx tagger))) | |
51 | + (setf (aref nodes 0) +BOS-NODES+) | |
52 | + | |
53 | + (loop FOR i FROM 0 BELOW len | |
54 | + FOR prevs = (aref nodes i) DO | |
55 | + (setf (code-stream:position cs) i) | |
56 | + (when prevs | |
57 | + (dolist (vn (unk:search cs unk wdc (dic:search cs '() wdc))) | |
58 | + (if (vn:space? vn) | |
59 | + (nconcf (aref nodes (vn:end vn)) prevs) | |
60 | + (push (set-mincost-node vn prevs mtx wdc) (aref nodes (vn:end vn))))))) | |
61 | + | |
62 | + (vn:prev (set-mincost-node (vn:make-bos/eos) (aref nodes len) mtx wdc)))) | |
63 | + | |
64 | +(defun coerce-to-simple-string (s) | |
65 | + (declare (string s)) | |
66 | + (the simple-string | |
67 | + (if (simple-string-p s) | |
68 | + s | |
69 | + (copy-seq s)))) | |
70 | + | |
71 | +(defmacro parse-then-map-result ((viterbi-node text tagger) &body body) | |
72 | + (let ((result (gensym))) | |
73 | + `(let ((,text (coerce-to-simple-string ,text)) | |
74 | + (,result '())) | |
75 | + (check-type ,tagger tagger) | |
76 | + (do ((,viterbi-node (parse-impl ,tagger (code-stream:make ,text 0) (length ,text)) | |
77 | + (vn:prev ,viterbi-node))) | |
78 | + ((null (vn:prev ,viterbi-node)) ,result) | |
79 | + (push (progn ,@body) ,result))))) | |
80 | + | |
81 | +;;;;;;;;;;;;;;;;;;;;; | |
82 | +;;; external function | |
83 | +(defun load-tagger (data-dir &optional (feature-parser #'identity)) | |
84 | + (setf *tagger* (make-tagger :wdc (dic:load data-dir feature-parser) | |
85 | + :unk (unk:load data-dir) | |
86 | + :mtx (mtx:load data-dir)))) | |
87 | + | |
88 | +(defun parse (text &optional (tagger *tagger*) &aux (wdc (tagger-wdc tagger))) | |
89 | + (parse-then-map-result (vn text tagger) | |
90 | + (list (subseq text (vn:start vn) (vn:end vn)) | |
91 | + (dic:word-data (vn:word-id vn) wdc) | |
92 | + (vn:start vn)))) | |
93 | + | |
94 | +(defun wakati (text &optional (tagger *tagger*)) | |
95 | + (parse-then-map-result (vn text tagger) | |
96 | + (subseq text (vn:start vn) (vn:end vn)))) | |
\ No newline at end of file |
@@ -0,0 +1,22 @@ | ||
1 | +(defpackage igo | |
2 | + (:use :common-lisp) | |
3 | + (:export *ipadic-feature-parser* | |
4 | + *tagger* | |
5 | + load-tagger | |
6 | + parse | |
7 | + wakati)) | |
8 | +(in-package :igo) | |
9 | + | |
10 | +(defvar *tagger*) | |
11 | + | |
12 | +(eval-when (:compile-toplevel :load-toplevel) | |
13 | + (defvar *optimize-fastest* '(optimize (speed 3) (debug 0) (safety 0) (compilation-speed 0)))) | |
14 | + | |
15 | +(defvar *ipadic-feature-parser* | |
16 | + (lambda (feature) | |
17 | + (declare #.*optimize-fastest* (simple-string feature)) | |
18 | + (flet ((kw (s) (intern s :keyword)) | |
19 | + (kw-if-* (s) (if (string= s "*") (intern s :keyword) s))) | |
20 | + (let ((fs (the list (igo::split "," feature)))) | |
21 | + (nconc (mapcar #'kw (subseq fs 0 6)) | |
22 | + (mapcar #'kw-if-* (subseq fs 6))))))) | |
\ No newline at end of file |
@@ -0,0 +1,13 @@ | ||
1 | +(in-package :igo) | |
2 | + | |
3 | +(delete-package-nickname :igo.varied-byte-stream) | |
4 | +(delete-package-nickname :igo.code-stream) | |
5 | +(delete-package-nickname :igo.trie) | |
6 | +(delete-package-nickname :igo.matrix) | |
7 | +(delete-package-nickname :igo.char-category) | |
8 | +(delete-package-nickname :igo.viterbi-node) | |
9 | +(delete-package-nickname :igo.word-dic) | |
10 | +(delete-package-nickname :igo.unknown) | |
11 | + | |
12 | + | |
13 | + |
@@ -0,0 +1,106 @@ | ||
1 | +(defpackage igo.word-dic | |
2 | + (:use :common-lisp) | |
3 | + (:nicknames :dic) | |
4 | + (:shadow load | |
5 | + search) | |
6 | + (:export load | |
7 | + word-dic | |
8 | + word-data | |
9 | + word-cost | |
10 | + search | |
11 | + search-from-trie-id)) | |
12 | +(in-package :igo.word-dic) | |
13 | + | |
14 | +;;;;;;;;;;; | |
15 | +;;; declaim | |
16 | +(declaim (inline word-data word-cost left-id right-id | |
17 | + high-surrogate-code? surrogate-char-code)) | |
18 | + | |
19 | +;;;;;;;;;; | |
20 | +;;; struct | |
21 | +(defstruct word-dic | |
22 | + (trie nil :type trie:trie) | |
23 | + (costs #() :type (simple-array (signed-byte 16))) | |
24 | + (left-ids #() :type (simple-array (signed-byte 16))) | |
25 | + (right-ids #() :type (simple-array (signed-byte 16))) | |
26 | + (data #() :type (simple-array t)) | |
27 | + (indices #() :type (simple-array (signed-byte 32)))) | |
28 | + | |
29 | +;;;;;;;;;;;;;;;;;;;;; | |
30 | +;;; internal function | |
31 | +(defun read-indices (path) | |
32 | + (vbs:with-input-file (in path) | |
33 | + (vbs:read-sequence in 4 (/ (vbs:file-size in) 4)))) | |
34 | + | |
35 | +(defun high-surrogate-code? (code) (<= #xD800 code #xDBFF)) | |
36 | +(defun surrogate-code-char (high low) | |
37 | + (code-char (+ (ash (+ (- high #xB800) #b1000000) 10) | |
38 | + (- low #xDC00)))) | |
39 | + | |
40 | +(defun read-data (path) | |
41 | + (vbs:with-input-file (in path) | |
42 | + (vbs:read-sequence in 2 (/ (vbs:file-size in) 2) :signed nil))) | |
43 | + | |
44 | +(defun data->string (data) | |
45 | + (let* ((buf (make-array (length data) :element-type 'character :fill-pointer 0)) | |
46 | + (high-sgt nil)) | |
47 | + ;; TODO: invalid surrogate code pair check | |
48 | + (loop FOR code ACROSS data DO | |
49 | + (cond ((high-surrogate-code? code) (setf high-sgt code)) | |
50 | + (high-sgt (vector-push (surrogate-code-char high-sgt code) buf)) | |
51 | + (t (vector-push (code-char code) buf)))) | |
52 | + (copy-seq buf))) | |
53 | + | |
54 | +(defun split-data (data offsets feature-parser) | |
55 | + (declare #.igo::*optimize-fastest* | |
56 | + ((simple-array (signed-byte 32)) offsets) | |
57 | + ((simple-array (unsigned-byte 16)) data) | |
58 | + (function feature-parser)) | |
59 | + (let ((ary (make-array (1- (length offsets))))) | |
60 | + (dotimes (i (length ary) ary) | |
61 | + (setf (aref ary i) | |
62 | + (funcall feature-parser | |
63 | + (data->string (subseq data (aref offsets i) (aref offsets (1+ i))))))))) | |
64 | + | |
65 | +(defun left-id (word-id wdic) (aref (word-dic-left-ids wdic) word-id)) | |
66 | +(defun right-id (word-id wdic) (aref (word-dic-right-ids wdic) word-id)) | |
67 | + | |
68 | +;;;;;;;;;;;;;;;;;;;;; | |
69 | +;;; external function | |
70 | +(defun load (root-dir &optional (feature-parser #'identity)) | |
71 | + (flet ((fullpath (name) (merge-pathnames root-dir name))) | |
72 | + (vbs:with-input-file (in (fullpath "word.inf")) | |
73 | + (let* ((word-count (/ (vbs:file-size in) (+ 4 2 2 2))) | |
74 | + (data (read-data (fullpath "word.dat"))) | |
75 | + (offsets (vbs:read-sequence in 4 word-count))) | |
76 | + (make-word-dic | |
77 | + :trie (trie:load (fullpath "word2id")) | |
78 | + :indices (read-indices (fullpath "word.ary.idx")) | |
79 | + :data (split-data data offsets feature-parser) | |
80 | + | |
81 | + :left-ids (vbs:read-sequence in 2 word-count) | |
82 | + :right-ids (vbs:read-sequence in 2 word-count) | |
83 | + :costs (vbs:read-sequence in 2 word-count)))))) | |
84 | + | |
85 | +(defun search (cs result wdic) | |
86 | + (declare #.igo::*optimize-fastest*) | |
87 | + (let ((start (code-stream:position cs)) | |
88 | + (indices (word-dic-indices wdic)) | |
89 | + (trie (word-dic-trie wdic))) | |
90 | + (trie:each-common-prefix (end id cs trie) | |
91 | + (loop FOR i fixnum FROM (aref indices id) BELOW (aref indices (1+ id)) DO | |
92 | + (push (vn:make i start end (left-id i wdic) (right-id i wdic) nil) | |
93 | + result))) | |
94 | + (setf (code-stream:position cs) start)) | |
95 | + result) | |
96 | + | |
97 | +(defun search-from-trie-id (id start end space? result wdic) | |
98 | + (declare #.igo::*optimize-fastest*) | |
99 | + (let ((indices (word-dic-indices wdic))) | |
100 | + (loop FOR i fixnum FROM (aref indices id) BELOW (aref indices (1+ id)) DO | |
101 | + (push (vn:make i start end (left-id i wdic) (right-id i wdic) space?) | |
102 | + result))) | |
103 | + result) | |
104 | + | |
105 | +(defun word-cost (word-id wdic) (aref (word-dic-costs wdic) word-id)) | |
106 | +(defun word-data (word-id wdic) (aref (word-dic-data wdic) word-id)) | |
\ No newline at end of file |
@@ -0,0 +1,18 @@ | ||
1 | +(in-package :igo) | |
2 | + | |
3 | +(defmacro delete-package-nickname (package) | |
4 | + `(eval-when (:compile-toplevel :load-toplevel :execute) | |
5 | + (rename-package ,package ,package))) | |
6 | + | |
7 | +(defmacro defconst-once-only (name value &optional documentation) | |
8 | + `(unless (boundp ',name) | |
9 | + (defconstant ,name ,value ,documentation))) | |
10 | + | |
11 | +(defun split (delim seq &aux (len (length delim))) | |
12 | + (when (zerop len) | |
13 | + (return-from split (list seq))) | |
14 | + | |
15 | + (loop FOR beg = 0 THEN (+ end len) | |
16 | + FOR end = (search delim seq :start2 beg) | |
17 | + COLLECT (subseq seq beg end) | |
18 | + WHILE end)) | |
\ No newline at end of file |
@@ -0,0 +1,55 @@ | ||
1 | +(defpackage :igo.unknown | |
2 | + (:use :common-lisp :igo.char-category) | |
3 | + (:nicknames :unk) | |
4 | + (:shadow load | |
5 | + search) | |
6 | + (:export load | |
7 | + unknown | |
8 | + search)) | |
9 | +(in-package :igo.unknown) | |
10 | + | |
11 | +;;;;;;;;;; | |
12 | +;;; struct | |
13 | +(defstruct (unknown (:conc-name "")) | |
14 | + (categorys nil :type category-set) | |
15 | + (space-id 0 :type fixnum)) | |
16 | + | |
17 | +;;;;;;;;;;;;;;;;;;;;; | |
18 | +;;; external function | |
19 | +(defun load (root-dir) | |
20 | + (let* ((cts (igo.char-category:load root-dir)) | |
21 | + (unk (make-unknown :categorys cts))) | |
22 | + (setf (space-id unk) | |
23 | + (category-trie-id (category (char-code #\Space) cts))) | |
24 | + unk)) | |
25 | + | |
26 | +(defun search (cs unk wdic result) | |
27 | + (declare #.igo::*optimize-fastest*) | |
28 | + (prog* ((start (code-stream:position cs)) | |
29 | + (code (code-stream:read cs)) | |
30 | + (categorys (categorys unk)) | |
31 | + (ct (category code categorys))) | |
32 | + (when (and result (not (category-invoke? ct))) ; invoke | |
33 | + (go :end)) | |
34 | + | |
35 | + (let* ((trie-id (category-trie-id ct)) | |
36 | + (space? (= trie-id (space-id unk))) | |
37 | + (limit (category-length ct))) | |
38 | + (loop FOR len FROM 1 TO limit DO ; length | |
39 | + (setf result | |
40 | + (dic:search-from-trie-id trie-id start (code-stream:position cs) space? result wdic)) | |
41 | + (when (or (code-stream:end? cs) | |
42 | + (not (compatible? code (code-stream:read cs) categorys))) | |
43 | + (go :end))) | |
44 | + | |
45 | + (when (and (category-group? ct)) ; group | |
46 | + (loop (when (code-stream:end? cs) | |
47 | + (return)) | |
48 | + (unless (compatible? code (code-stream:read cs) categorys) | |
49 | + (code-stream:unread cs) | |
50 | + (return))) | |
51 | + (setf result | |
52 | + (dic:search-from-trie-id trie-id start (code-stream:position cs) space? result wdic)))) | |
53 | + :end | |
54 | + (setf (code-stream:position cs) start)) | |
55 | + result) | |
\ No newline at end of file |
@@ -0,0 +1,15 @@ | ||
1 | +(defpackage igo.type | |
2 | + (:use :common-lisp) | |
3 | + (:export array-index | |
4 | + character-code | |
5 | + utf16-code | |
6 | + negative-fixnum | |
7 | + n-byte)) | |
8 | +(in-package :igo.type) | |
9 | + | |
10 | +(deftype array-index () `(integer 0 ,array-total-size-limit)) | |
11 | +(deftype character-code () `(integer 0 ,char-code-limit)) | |
12 | +(deftype utf16-code () `(integer 0 #xFFFF)) | |
13 | +(deftype negative-fixnum ()`(integer ,most-negative-fixnum -1)) | |
14 | +(deftype n-byte (byte-size signed?) | |
15 | + `(,(if signed? 'signed-byte 'unsigned-byte) ,(* byte-size 8))) | |
\ No newline at end of file |
@@ -0,0 +1,75 @@ | ||
1 | +(defpackage igo.code-stream | |
2 | + (:use :common-lisp :igo.type) | |
3 | + (:nicknames :code-stream) | |
4 | + (:shadow read | |
5 | + position) | |
6 | + (:export read | |
7 | + unread | |
8 | + make | |
9 | + end? | |
10 | + position | |
11 | + +TERMINATE-CODE+)) | |
12 | +(in-package :igo.code-stream) | |
13 | + | |
14 | +;;;;;;;;;;; | |
15 | +;;; declaim | |
16 | +(declaim (inline end? code low-surrogate high-surrogate) | |
17 | + (optimize (speed 3) (debug 0) (safety 0) (compilation-speed 0)) | |
18 | + (ftype (function (code-stream) utf16-code) read)) | |
19 | + | |
20 | +;;;;;;;;;; | |
21 | +;;; struct | |
22 | +(defstruct (code-stream (:constructor make (source start &aux (position start))) | |
23 | + (:conc-name "")) | |
24 | + (source "" :type simple-string) | |
25 | + (position 0 :type array-index) | |
26 | + (surrogate? nil :type boolean)) | |
27 | + | |
28 | +;;;;;;;;;;;; | |
29 | +;;; constant | |
30 | +(eval-when (:compile-toplevel :load-toplevel :execute) | |
31 | + (defconstant +TERMINATE-CODE+ 0)) | |
32 | + | |
33 | +;;;;;;;;;;;;;;;;;;;;; | |
34 | +;;; internal function | |
35 | +(defun code (code-stream) | |
36 | + (char-code (char (source code-stream) (position code-stream)))) | |
37 | + | |
38 | +(defun low-surrogate (code) | |
39 | + (declare (character-code code)) | |
40 | + (+ #xDC00 (ldb (byte 10 0) code))) | |
41 | + | |
42 | +(defun high-surrogate (code) | |
43 | + (declare (character-code code)) | |
44 | + (+ #xB800 (- (ldb (byte 11 10) code) #b1000000))) | |
45 | + | |
46 | +;;;;;;;;;;;;;;;;;;;;; | |
47 | +;;; external function | |
48 | +(defun end? (code-stream) | |
49 | + (= (position code-stream) (length (source code-stream)))) | |
50 | + | |
51 | +(defun read (code-stream) | |
52 | + (declare (code-stream code-stream)) | |
53 | + (with-slots (position surrogate?) code-stream | |
54 | + (cond (surrogate? | |
55 | + (setf surrogate? nil) | |
56 | + (prog1 (low-surrogate (code code-stream)) | |
57 | + (incf position))) | |
58 | + | |
59 | + ((end? code-stream) | |
60 | + +TERMINATE-CODE+) | |
61 | + | |
62 | + (t | |
63 | + (let ((code (code code-stream))) | |
64 | + (if (> code #xFFFF) | |
65 | + (progn (setf surrogate? t) | |
66 | + (high-surrogate code)) | |
67 | + (progn (incf position) | |
68 | + code))))))) | |
69 | + | |
70 | +(defun unread (code-stream) | |
71 | + (declare (code-stream code-stream)) | |
72 | + (with-slots (position surrogate?) code-stream | |
73 | + (if surrogate? | |
74 | + (setf surrogate? nil) | |
75 | + (decf position)))) | |
\ No newline at end of file |
@@ -0,0 +1,28 @@ | ||
1 | +(defpackage igo.viterbi-node | |
2 | + (:use :common-lisp) | |
3 | + (:nicknames :vn) | |
4 | + (:export make make-bos/eos | |
5 | + cost prev word-id left-id right-id start end space?)) | |
6 | +(in-package :igo.viterbi-node) | |
7 | + | |
8 | +;;;;;;;;;;; | |
9 | +;;; declaim | |
10 | +(declaim (inline new-bos/eos)) | |
11 | + | |
12 | +;;;;;;;;;; | |
13 | +;;; struct | |
14 | +(defstruct (viterbi-node (:constructor make (word-id start end left-id right-id space?)) | |
15 | + (:conc-name "") | |
16 | + (:type vector)) | |
17 | + (cost 0 :type fixnum) | |
18 | + (prev nil :type t) | |
19 | + (left-id 0 :type fixnum) | |
20 | + (right-id 0 :type fixnum) | |
21 | + (word-id 0 :type fixnum) | |
22 | + (start 0 :type fixnum) | |
23 | + (end 0 :type fixnum) | |
24 | + (space? nil :type boolean)) | |
25 | + | |
26 | +;;;;;;;;;;;;;;;;;;;;; | |
27 | +;;; external function | |
28 | +(defun make-bos/eos () (make 0 0 0 0 0 nil)) | |
\ No newline at end of file |
@@ -0,0 +1,58 @@ | ||
1 | +(defpackage igo.char-category | |
2 | + (:use :common-lisp) | |
3 | + (:shadow load) | |
4 | + (:export load | |
5 | + category | |
6 | + category-set | |
7 | + compatible? | |
8 | + category-trie-id | |
9 | + category-length | |
10 | + category-invoke? | |
11 | + category-group?)) | |
12 | +(in-package :igo.char-category) | |
13 | + | |
14 | +;;;;;;;;;;; | |
15 | +;;; declaim | |
16 | +(declaim (inline category compatible?)) | |
17 | + | |
18 | +;;;;;;;;;; | |
19 | +;;; struct | |
20 | +(defstruct category | |
21 | + (trie-id 0 :type fixnum) | |
22 | + (length 0 :type fixnum) | |
23 | + (invoke? nil :type boolean) | |
24 | + (group? nil :type boolean)) | |
25 | + | |
26 | +(defstruct (category-set (:conc-name "")) | |
27 | + (categorys #() :type (simple-array category)) | |
28 | + (char->id #() :type (simple-array (signed-byte 32))) | |
29 | + (eql-masks #() :type (simple-array (signed-byte 32)))) | |
30 | + | |
31 | +;;;;;;;;;;;;;;;;;;;;; | |
32 | +;;; internal function | |
33 | +(defun load-categorys (root-dir) | |
34 | + (vbs:with-input-file (in (merge-pathnames "char.category" root-dir)) | |
35 | + (let ((data (vbs:read-sequence in 4 (/ (vbs:file-size in) 4)))) | |
36 | + (coerce | |
37 | + (loop FOR i FROM 0 BELOW (length data) BY 4 COLLECT | |
38 | + (make-category :trie-id (aref data (+ i 0)) | |
39 | + :length (aref data (+ i 1)) | |
40 | + :invoke? (= 1 (aref data (+ i 2))) | |
41 | + :group? (= 1 (aref data (+ i 3))))) | |
42 | + 'vector)))) | |
43 | + | |
44 | +;;;;;;;;;;;;;;;;;;;;; | |
45 | +;;; external-function | |
46 | +(defun load (root-dir) | |
47 | + (vbs:with-input-file (in (merge-pathnames "code2category" root-dir)) | |
48 | + (make-category-set | |
49 | + :categorys (load-categorys root-dir) | |
50 | + :char->id (vbs:read-sequence in 4 (/ (vbs:file-size in) 4 2)) | |
51 | + :eql-masks (vbs:read-sequence in 4 (/ (vbs:file-size in) 4 2))))) | |
52 | + | |
53 | +(defun category (code cset) | |
54 | + (aref (categorys cset) (aref (char->id cset) code))) | |
55 | + | |
56 | +(defun compatible? (code1 code2 cset) | |
57 | + (let ((eqls (eql-masks cset))) | |
58 | + (logtest (aref eqls code1) (aref eqls code2)))) | |
\ No newline at end of file |
@@ -0,0 +1,78 @@ | ||
1 | +(defpackage igo.trie | |
2 | + (:use :common-lisp :igo.type) | |
3 | + (:nicknames :trie) | |
4 | + (:shadow load) | |
5 | + (:export trie | |
6 | + load | |
7 | + each-common-prefix)) | |
8 | +(in-package :igo.trie) | |
9 | + | |
10 | +;;;;;;;;;;; | |
11 | +;;; declaim | |
12 | +(declaim (inline id including-tail?)) | |
13 | + | |
14 | +;;;;;;;;;; | |
15 | +;;; struct | |
16 | +(defstruct (trie (:conc-name "")) | |
17 | + (element-count 0 :type fixnum) | |
18 | + (begs #() :type (simple-array (signed-byte 32))) | |
19 | + (lens #() :type (simple-array (signed-byte 16))) | |
20 | + (base #() :type (simple-array (signed-byte 32))) | |
21 | + (chck #() :type (simple-array (unsigned-byte 16))) | |
22 | + (tail #() :type (simple-array (unsigned-byte 16)))) | |
23 | + | |
24 | +;;;;;;;;;;;;;;;;;;;;; | |
25 | +;;; internal function | |
26 | +(defun id (node) (1- (- (the negative-fixnum node)))) | |
27 | + | |
28 | +(defun including-tail? (cs node trie &aux (id (id node)) (tail (tail trie))) | |
29 | + (loop REPEAT (aref (lens trie) id) | |
30 | + FOR i fixnum FROM (aref (begs trie) id) | |
31 | + ALWAYS (= (aref tail i) (code-stream:read cs)))) | |
32 | + | |
33 | +(defmacro with-gensyms (vars &body body) | |
34 | + `(let ,(mapcar (lambda (v) `(,v (gensym))) vars) | |
35 | + ,@body)) | |
36 | + | |
37 | +;;;;;;;;;;;;;;;;;;;;; | |
38 | +;;; external function | |
39 | +(defun load (path) | |
40 | + (vbs:with-input-file (in path) | |
41 | + (let ((node-size (vbs:read-byte in 4)) | |
42 | + (tind-size (vbs:read-byte in 4)) | |
43 | + (tail-size (vbs:read-byte in 4))) | |
44 | + (make-trie | |
45 | + :element-count tind-size | |
46 | + :begs (vbs:read-sequence in 4 tind-size) | |
47 | + :base (vbs:read-sequence in 4 node-size) | |
48 | + :lens (vbs:read-sequence in 2 tind-size) | |
49 | + :chck (vbs:read-sequence in 2 node-size :signed nil) | |
50 | + :tail (vbs:read-sequence in 2 tail-size :signed nil))))) | |
51 | + | |
52 | +(defmacro each-common-prefix ((pos id cs trie) &body body) | |
53 | + (with-gensyms (base chck node code idx loop-block) | |
54 | + `(let* ((,base (base ,trie)) | |
55 | + (,chck (chck ,trie)) | |
56 | + (,node (aref ,base 0))) | |
57 | + (declare (fixnum ,node)) | |
58 | + (block ,loop-block | |
59 | + (loop FOR ,code = (code-stream:read ,cs) DO | |
60 | + (unless (= ,code code-stream:+TERMINATE-CODE+) | |
61 | + (let ((,idx (+ ,node code-stream:+TERMINATE-CODE+))) | |
62 | + (when (= (aref ,chck ,idx) code-stream:+TERMINATE-CODE+) | |
63 | + (let ((,pos (1- (code-stream:position ,cs))) | |
64 | + (,id (id (aref ,base ,idx)))) | |
65 | + ,@body)))) | |
66 | + | |
67 | + (prog ((,idx (+ ,node ,code))) | |
68 | + (setf ,node (aref ,base ,idx)) | |
69 | + (when (= (aref ,chck ,idx) ,code) | |
70 | + (if (plusp ,node) | |
71 | + (go :continue) | |
72 | + (when (including-tail? ,cs ,node ,trie) | |
73 | + (let ((,pos (code-stream:position ,cs)) | |
74 | + (,id (id ,node))) | |
75 | + ,@body)))) | |
76 | + (return-from ,loop-block) | |
77 | + | |
78 | + :continue)))))) | |
\ No newline at end of file |
@@ -0,0 +1,35 @@ | ||
1 | +(defpackage igo.matrix | |
2 | + (:use :common-lisp) | |
3 | + (:nicknames :mtx) | |
4 | + (:shadow load) | |
5 | + (:export load | |
6 | + link-cost | |
7 | + matrix)) | |
8 | +(in-package :igo.matrix) | |
9 | + | |
10 | +;;;;;;;;;;; | |
11 | +;;; declaim | |
12 | +(declaim (inline link-cost)) | |
13 | + | |
14 | +;;;;;;;; | |
15 | +;;; struct | |
16 | +(defstruct (matrix (:conc-name "")) | |
17 | + (matrix #() :type (simple-array (signed-byte 16))) | |
18 | + (left-size 0 :type (unsigned-byte 16)) | |
19 | + (right-size 0 :type (unsigned-byte 16))) | |
20 | + | |
21 | +;;;;;;;;;;;;;;;;;;;;; | |
22 | +;;; external function | |
23 | +(defun load (data-dir) | |
24 | + (vbs:with-input-file (in (merge-pathnames "matrix.bin" data-dir)) | |
25 | + (let ((left-size (vbs:read-byte in 4)) | |
26 | + (right-size (vbs:read-byte in 4))) | |
27 | + (make-matrix :left-size left-size | |
28 | + :right-size right-size | |
29 | + :matrix (vbs:read-sequence in 2 (* left-size right-size)))))) | |
30 | + | |
31 | +(defun link-cost (left-id right-id matrix) | |
32 | + (declare ((unsigned-byte 16) left-id right-id)) | |
33 | + (the (signed-byte 16) | |
34 | + (aref (matrix matrix) | |
35 | + (+ (* (right-size matrix) right-id) left-id)))) | |
\ No newline at end of file |
@@ -0,0 +1,42 @@ | ||
1 | +(defpackage igo.varied-byte-stream | |
2 | + (:use :common-lisp :igo.type) | |
3 | + (:nicknames :vbs) | |
4 | + (:shadow read-byte | |
5 | + read-sequence) | |
6 | + (:export with-input-file | |
7 | + read-byte | |
8 | + read-sequence | |
9 | + file-size)) | |
10 | +(in-package :igo.varied-byte-stream) | |
11 | + | |
12 | +;;;;;;;;;; | |
13 | +;;; struct | |
14 | +(defstruct varied-byte-stream | |
15 | + (source nil :type file-stream) | |
16 | + (offset 0 :type fixnum)) | |
17 | + | |
18 | +;;;;;;;;;;;;;;;;;;;;; | |
19 | +;;; external function | |
20 | +(defmacro with-input-file ((stream filespec) &body body) | |
21 | + `(with-open-file (,stream ,filespec) | |
22 | + (let ((,stream (make-varied-byte-stream :source ,stream))) | |
23 | + ,@body))) | |
24 | + | |
25 | +(defun file-size (vbs) | |
26 | + (file-length (varied-byte-stream-source vbs))) | |
27 | + | |
28 | +(defun read-byte (varied-byte-stream byte-size &key (signed t)) | |
29 | + (with-slots (source offset) varied-byte-stream | |
30 | + (with-open-file (in source :element-type `(n-byte ,byte-size ,signed)) | |
31 | + (file-position in (/ offset byte-size)) | |
32 | + (prog1 (common-lisp:read-byte in) | |
33 | + (incf offset byte-size))))) | |
34 | + | |
35 | +(defun read-sequence (varied-byte-stream byte-size count &key (signed t)) | |
36 | + (with-slots (source offset) varied-byte-stream | |
37 | + (with-open-file (in source :element-type `(n-byte ,byte-size ,signed)) | |
38 | + (file-position in (/ offset byte-size)) | |
39 | + (let ((buf (make-array count :element-type `(n-byte ,byte-size ,signed)))) | |
40 | + (common-lisp:read-sequence buf in) | |
41 | + (incf offset (* byte-size count)) | |
42 | + buf)))) | |
\ No newline at end of file |
@@ -0,0 +1,21 @@ | ||
1 | +The MIT License | |
2 | + | |
3 | +Copyright (c) 2010 Takeru Ohta <phjgt308@ybb.ne.jp> | |
4 | + | |
5 | +Permission is hereby granted, free of charge, to any person obtaining a copy | |
6 | +of this software and associated documentation files (the "Software"), to deal | |
7 | +in the Software without restriction, including without limitation the rights | |
8 | +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
9 | +copies of the Software, and to permit persons to whom the Software is | |
10 | +furnished to do so, subject to the following conditions: | |
11 | + | |
12 | +The above copyright notice and this permission notice shall be included in | |
13 | +all copies or substantial portions of the Software. | |
14 | + | |
15 | +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
16 | +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
17 | +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
18 | +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
19 | +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
20 | +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | |
21 | +THE SOFTWARE. |