OSDN > 開發者 > lafekasloan101 > 工作室 > system-coremmmm > 提交

system-corennnn
Fork

提交

DO NOT MERGE libutils/Unicode.cpp: Correct length computation and add checks for utf16->utf8 am: 5fce0542b6
am: d0ad14ca21

Change-Id: Id0901a86ea469d664badfee75db32948c88e72f7

--- a/include/utils/Unicode.h

+++ b/include/utils/Unicode.h

		@@ -90,7 +90,7 @@ ssize_t utf32_to_utf8_length(const char32_t *src, size_t src_len);
90	90	* "dst" becomes \xE3\x81\x82\xE3\x81\x84
91	91	* (note that "dst" is NOT null-terminated, like strncpy)
92	92	*/
93		-void utf32_to_utf8(const char32_t* src, size_t src_len, char* dst);
	93	+void utf32_to_utf8(const char32_t* src, size_t src_len, char* dst, size_t dst_len);
94	94
95	95	/**
96	96	* Returns the unicode value at "index".

		@@ -112,7 +112,7 @@ ssize_t utf16_to_utf8_length(const char16_t *src, size_t src_len);
112	112	* enough to fit the UTF-16 as measured by utf16_to_utf8_length with an added
113	113	* NULL terminator.
114	114	*/
115		-void utf16_to_utf8(const char16_t* src, size_t src_len, char* dst);
	115	+void utf16_to_utf8(const char16_t* src, size_t src_len, char* dst, size_t dst_len);
116	116
117	117	/**
118	118	* Returns the length of "src" when "src" is valid UTF-8 string.

--- a/libutils/String8.cpp

+++ b/libutils/String8.cpp

		@@ -102,20 +102,21 @@ static char* allocFromUTF16(const char16_t* in, size_t len)
102	102	{
103	103	if (len == 0) return getEmptyString();
104	104
105		- const ssize_t bytes = utf16_to_utf8_length(in, len);
106		- if (bytes < 0) {
	105	+ // Allow for closing '\0'
	106	+ const ssize_t resultStrLen = utf16_to_utf8_length(in, len) + 1;
	107	+ if (resultStrLen < 1) {
107	108	return getEmptyString();
108	109	}
109	110
110		- SharedBuffer* buf = SharedBuffer::alloc(bytes+1);
	111	+ SharedBuffer* buf = SharedBuffer::alloc(resultStrLen);
111	112	ALOG_ASSERT(buf, "Unable to allocate shared buffer");
112	113	if (!buf) {
113	114	return getEmptyString();
114	115	}
115	116
116		- char* str = (char*)buf->data();
117		- utf16_to_utf8(in, len, str);
118		- return str;
	117	+ char* resultStr = (char*)buf->data();
	118	+ utf16_to_utf8(in, len, resultStr, resultStrLen);
	119	+ return resultStr;
119	120	}
120	121
121	122	static char* allocFromUTF32(const char32_t* in, size_t len)

		@@ -124,21 +125,21 @@ static char* allocFromUTF32(const char32_t* in, size_t len)
124	125	return getEmptyString();
125	126	}
126	127
127		- const ssize_t bytes = utf32_to_utf8_length(in, len);
128		- if (bytes < 0) {
	128	+ const ssize_t resultStrLen = utf32_to_utf8_length(in, len) + 1;
	129	+ if (resultStrLen < 1) {
129	130	return getEmptyString();
130	131	}
131	132
132		- SharedBuffer* buf = SharedBuffer::alloc(bytes+1);
	133	+ SharedBuffer* buf = SharedBuffer::alloc(resultStrLen);
133	134	ALOG_ASSERT(buf, "Unable to allocate shared buffer");
134	135	if (!buf) {
135	136	return getEmptyString();
136	137	}
137	138
138		- char* str = (char*) buf->data();
139		- utf32_to_utf8(in, len, str);
	139	+ char* resultStr = (char*) buf->data();
	140	+ utf32_to_utf8(in, len, resultStr, resultStrLen);
140	141
141		- return str;
	142	+ return resultStr;
142	143	}
143	144
144	145	// ---------------------------------------------------------------------------

--- a/libutils/Unicode.cpp

+++ b/libutils/Unicode.cpp

		@@ -14,6 +14,7 @@
14	14	* limitations under the License.
15	15	*/
16	16
	17	+#include <log/log.h>
17	18	#include <utils/Unicode.h>
18	19
19	20	#include <stddef.h>

		@@ -188,7 +189,7 @@ ssize_t utf32_to_utf8_length(const char32_t *src, size_t src_len)
188	189	return ret;
189	190	}
190	191
191		-void utf32_to_utf8(const char32_t* src, size_t src_len, char* dst)
	192	+void utf32_to_utf8(const char32_t* src, size_t src_len, char* dst, size_t dst_len)
192	193	{
193	194	if (src == NULL \|\| src_len == 0 \|\| dst == NULL) {
194	195	return;

		@@ -199,9 +200,12 @@ void utf32_to_utf8(const char32_t* src, size_t src_len, char* dst)
199	200	char *cur = dst;
200	201	while (cur_utf32 < end_utf32) {
201	202	size_t len = utf32_codepoint_utf8_length(*cur_utf32);
	203	+ LOG_ALWAYS_FATAL_IF(dst_len < len, "%zu < %zu", dst_len, len);
202	204	utf32_codepoint_to_utf8((uint8_t )cur, cur_utf32++, len);
203	205	cur += len;
	206	+ dst_len -= len;
204	207	}
	208	+ LOG_ALWAYS_FATAL_IF(dst_len < 1, "dst_len < 1: %zu < 1", dst_len);
205	209	*cur = '\0';
206	210	}
207	211

		@@ -330,7 +334,7 @@ int strzcmp16_h_n(const char16_t s1H, size_t n1, const char16_t s2N, size_t n2
330	334	: 0);
331	335	}
332	336
333		-void utf16_to_utf8(const char16_t* src, size_t src_len, char* dst)
	337	+void utf16_to_utf8(const char16_t* src, size_t src_len, char* dst, size_t dst_len)
334	338	{
335	339	if (src == NULL \|\| src_len == 0 \|\| dst == NULL) {
336	340	return;

		@@ -350,9 +354,12 @@ void utf16_to_utf8(const char16_t* src, size_t src_len, char* dst)
350	354	utf32 = (char32_t) *cur_utf16++;
351	355	}
352	356	const size_t len = utf32_codepoint_utf8_length(utf32);
	357	+ LOG_ALWAYS_FATAL_IF(dst_len < len, "%zu < %zu", dst_len, len);
353	358	utf32_codepoint_to_utf8((uint8_t*)cur, utf32, len);
354	359	cur += len;
	360	+ dst_len -= len;
355	361	}
	362	+ LOG_ALWAYS_FATAL_IF(dst_len < 1, "%zu < 1", dst_len);
356	363	*cur = '\0';
357	364	}
358	365

		@@ -413,10 +420,10 @@ ssize_t utf16_to_utf8_length(const char16_t *src, size_t src_len)
413	420	const char16_t* const end = src + src_len;
414	421	while (src < end) {
415	422	if ((*src & 0xFC00) == 0xD800 && (src + 1) < end
416		- && (*++src & 0xFC00) == 0xDC00) {
	423	+ && (*(src + 1) & 0xFC00) == 0xDC00) {
417	424	// surrogate pairs are always 4 bytes.
418	425	ret += 4;
419		- src++;
	426	+ src += 2;
420	427	} else {
421	428	ret += utf32_codepoint_utf8_length((char32_t) *src++);
422	429	}

--- a/libutils/tests/String8_test.cpp

+++ b/libutils/tests/String8_test.cpp

		@@ -72,4 +73,23 @@ TEST_F(String8Test, OperatorPlusEquals) {
72	73	EXPECT_STREQ(src3, " Verify me.");
73	74	}
74	75
	76	+// http://b/29250543
	77	+TEST_F(String8Test, CorrectInvalidSurrogate) {
	78	+ // d841d8 is an invalid start for a surrogate pair. Make sure this is handled by ignoring the
	79	+ // first character in the pair and handling the rest correctly.
	80	+ char16_t char16_arr[] = { 0xd841, 0xd841, 0xdc41, 0x0000 };
	81	+ String16 string16(char16_arr);
	82	+ String8 string8(string16);
	83	+
	84	+ EXPECT_EQ(4U, string8.length());
	85	+}
	86	+
	87	+TEST_F(String8Test, CheckUtf32Conversion) {
	88	+ // Since bound checks were added, check the conversion can be done without fatal errors.
	89	+ // The utf8 lengths of these are chars are 1 + 2 + 3 + 4 = 10.
	90	+ const char32_t string32[] = { 0x0000007f, 0x000007ff, 0x0000911, 0x0010fffe, 0 };
	91	+ String8 string8(string32);
	92	+ EXPECT_EQ(10U, string8.length());
	93	+}
	94	+
75	95	}