[ttssh2-commit] [10313] ヴィラーマ処理を追加

Back to archive index
scmno****@osdn***** scmno****@osdn*****
2022年 10月 13日 (木) 23:27:25 JST


Revision: 10313
          https://osdn.net/projects/ttssh2/scm/svn/commits/10313
Author:   zmatsuo
Date:     2022-10-13 23:27:25 +0900 (Thu, 13 Oct 2022)
Log Message:
-----------
ヴィラーマ処理を追加

- ヴィラーマ(virama)文字の次の文字を結合
  - Malayalam (マラヤーラム語)のvirama処理を削除
- 別の結合規則があるかもしれない
  - Combining_Class には virama 以外の定義ある

Modified Paths:
--------------
    trunk/teraterm/teraterm/buffer.c
    trunk/teraterm/teraterm/unicode.cpp
    trunk/teraterm/teraterm/unicode.h

Added Paths:
-----------
    trunk/teraterm/teraterm/unicode_virama.tbl

-------------- next part --------------
Modified: trunk/teraterm/teraterm/buffer.c
===================================================================
--- trunk/teraterm/teraterm/buffer.c	2022-10-13 14:27:14 UTC (rev 10312)
+++ trunk/teraterm/teraterm/buffer.c	2022-10-13 14:27:25 UTC (rev 10313)
@@ -2736,13 +2736,11 @@
 	if (combine_type != 0 || (p->u32_last == 0x200d)) {
 		return p;
 	}
-#if 1
-	// Malayalam
-	//  \x8C\xBE\x8C\xEA\x93Ǝ\xA9\x82̏\x88\x97\x9D?
-	if (p->u32_last == 0x0d4d) {
+
+	// \x83\x94\x83B\x83\x89\x81[\x83}\x8F\x88\x97\x9D
+	if (UnicodeIsVirama(p->u32_last) != 0) {
 		return p;
 	}
-#endif
 	return NULL;
 }
 

Modified: trunk/teraterm/teraterm/unicode.cpp
===================================================================
--- trunk/teraterm/teraterm/unicode.cpp	2022-10-13 14:27:14 UTC (rev 10312)
+++ trunk/teraterm/teraterm/unicode.cpp	2022-10-13 14:27:25 UTC (rev 10313)
@@ -229,7 +229,22 @@
 }
 #endif
 
+/**
+ *	\x83\x94\x83B\x83\x89\x81[\x83}?
+ *
+ *	@retval	0	\x83\x94\x83B\x83\x89\x81[\x83}\x82ł͂Ȃ\xA2
+ *	@retval	1	\x83\x94\x83B\x83\x89\x81[\x83}\x82ł\xA0\x82\xE9
+ */
+int UnicodeIsVirama(unsigned long u32)
+{
+	const static UnicodeTable_t ViramaList[] = {
+#include "unicode_virama.tbl"
+	};
+	const int index = SearchTableSimple(ViramaList, _countof(ViramaList), u32);
+	return index != -1 ? 1 : 0;
+}
 
+
 #if 0
 int main(int, char *[])
 {

Modified: trunk/teraterm/teraterm/unicode.h
===================================================================
--- trunk/teraterm/teraterm/unicode.h	2022-10-13 14:27:14 UTC (rev 10312)
+++ trunk/teraterm/teraterm/unicode.h	2022-10-13 14:27:25 UTC (rev 10313)
@@ -39,6 +39,7 @@
 unsigned short UnicodeCombining(unsigned short first_code, unsigned short code);
 int UnicodeFromISO8859(int part, unsigned char b, unsigned short *u16);
 int UnicodeToISO8859(int part, unsigned long u32, unsigned char *b);
+int UnicodeIsVirama(unsigned long u32);
 
 #ifdef __cplusplus
 }

Added: trunk/teraterm/teraterm/unicode_virama.tbl
===================================================================
--- trunk/teraterm/teraterm/unicode_virama.tbl	                        (rev 0)
+++ trunk/teraterm/teraterm/unicode_virama.tbl	2022-10-13 14:27:25 UTC (rev 10313)
@@ -0,0 +1,57 @@
+// this file was generated by get_virama_table.pl
+{ 0x00094d, 0x00094d },		// 'DEVANAGARI SIGN VIRAMA'
+{ 0x0009cd, 0x0009cd },		// 'BENGALI SIGN VIRAMA'
+{ 0x000a4d, 0x000a4d },		// 'GURMUKHI SIGN VIRAMA'
+{ 0x000acd, 0x000acd },		// 'GUJARATI SIGN VIRAMA'
+{ 0x000b4d, 0x000b4d },		// 'ORIYA SIGN VIRAMA'
+{ 0x000bcd, 0x000bcd },		// 'TAMIL SIGN VIRAMA'
+{ 0x000c4d, 0x000c4d },		// 'TELUGU SIGN VIRAMA'
+{ 0x000ccd, 0x000ccd },		// 'KANNADA SIGN VIRAMA'
+{ 0x000d3b, 0x000d3c },		// 'MALAYALAM SIGN VERTICAL BAR VIRAMA'
+{ 0x000d4d, 0x000d4d },		// 'MALAYALAM SIGN VIRAMA'
+{ 0x000dca, 0x000dca },		// 'SINHALA SIGN AL-LAKUNA'
+{ 0x000e3a, 0x000e3a },		// 'THAI CHARACTER PHINTHU'
+{ 0x000eba, 0x000eba },		// 'LAO SIGN PALI VIRAMA'
+{ 0x000f84, 0x000f84 },		// 'TIBETAN MARK HALANTA'
+{ 0x001039, 0x00103a },		// 'MYANMAR SIGN VIRAMA'
+{ 0x001714, 0x001715 },		// 'TAGALOG SIGN VIRAMA'
+{ 0x001734, 0x001734 },		// 'HANUNOO SIGN PAMUDPOD'
+{ 0x0017d2, 0x0017d2 },		// 'KHMER SIGN COENG'
+{ 0x001a60, 0x001a60 },		// 'TAI THAM SIGN SAKOT'
+{ 0x001b44, 0x001b44 },		// 'BALINESE ADEG ADEG'
+{ 0x001baa, 0x001bab },		// 'SUNDANESE SIGN PAMAAEH'
+{ 0x001bf2, 0x001bf3 },		// 'BATAK PANGOLAT'
+{ 0x002d7f, 0x002d7f },		// 'TIFINAGH CONSONANT JOINER'
+{ 0x00a806, 0x00a806 },		// 'SYLOTI NAGRI SIGN HASANTA'
+{ 0x00a82c, 0x00a82c },		// 'SYLOTI NAGRI SIGN ALTERNATE HASANTA'
+{ 0x00a8c4, 0x00a8c4 },		// 'SAURASHTRA SIGN VIRAMA'
+{ 0x00a953, 0x00a953 },		// 'REJANG VIRAMA'
+{ 0x00a9c0, 0x00a9c0 },		// 'JAVANESE PANGKON'
+{ 0x00aaf6, 0x00aaf6 },		// 'MEETEI MAYEK VIRAMA'
+{ 0x00abed, 0x00abed },		// 'MEETEI MAYEK APUN IYEK'
+{ 0x010a3f, 0x010a3f },		// 'KHAROSHTHI VIRAMA'
+{ 0x011046, 0x011046 },		// 'BRAHMI VIRAMA'
+{ 0x011070, 0x011070 },		// 'BRAHMI SIGN OLD TAMIL VIRAMA'
+{ 0x01107f, 0x01107f },		// 'BRAHMI NUMBER JOINER'
+{ 0x0110b9, 0x0110b9 },		// 'KAITHI SIGN VIRAMA'
+{ 0x011133, 0x011134 },		// 'CHAKMA VIRAMA'
+{ 0x0111c0, 0x0111c0 },		// 'SHARADA SIGN VIRAMA'
+{ 0x011235, 0x011235 },		// 'KHOJKI SIGN VIRAMA'
+{ 0x0112ea, 0x0112ea },		// 'KHUDAWADI SIGN VIRAMA'
+{ 0x01134d, 0x01134d },		// 'GRANTHA SIGN VIRAMA'
+{ 0x011442, 0x011442 },		// 'NEWA SIGN VIRAMA'
+{ 0x0114c2, 0x0114c2 },		// 'TIRHUTA SIGN VIRAMA'
+{ 0x0115bf, 0x0115bf },		// 'SIDDHAM SIGN VIRAMA'
+{ 0x01163f, 0x01163f },		// 'MODI SIGN VIRAMA'
+{ 0x0116b6, 0x0116b6 },		// 'TAKRI SIGN VIRAMA'
+{ 0x01172b, 0x01172b },		// 'AHOM SIGN KILLER'
+{ 0x011839, 0x011839 },		// 'DOGRA SIGN VIRAMA'
+{ 0x01193d, 0x01193e },		// 'DIVES AKURU SIGN HALANTA'
+{ 0x0119e0, 0x0119e0 },		// 'NANDINAGARI SIGN VIRAMA'
+{ 0x011a34, 0x011a34 },		// 'ZANABAZAR SQUARE SIGN VIRAMA'
+{ 0x011a47, 0x011a47 },		// 'ZANABAZAR SQUARE SUBJOINER'
+{ 0x011a99, 0x011a99 },		// 'SOYOMBO SUBJOINER'
+{ 0x011c3f, 0x011c3f },		// 'BHAIKSUKI SIGN VIRAMA'
+{ 0x011d44, 0x011d45 },		// 'MASARAM GONDI SIGN HALANTA'
+{ 0x011d97, 0x011d97 },		// 'GUNJALA GONDI VIRAMA'
+{ 0x011f41, 0x011f42 },		// 'KAWI SIGN KILLER'


ttssh2-commit メーリングリストの案内
Back to archive index