OSDN > 開發者 > lafekasloan101 > 工作室 > system-coremmmm > 提交

system-corennnn
Fork

R/O
HTTP
SSH
HTTPS

提交

Commit MetaInfo

修訂	3c28cda5d0120eb7bf7a49b36b96f45c0a588232 (tree)
時間	2016-07-22 09:35:58
作者	Sergio Giro <sgiro@goog...>
Commiter	gitbuildkicker

Log Message

libutils/Unicode.cpp: Correct length computation and add checks for utf16->utf8

Inconsistent behaviour between utf16_to_utf8 and utf16_to_utf8_length
is causing a heap overflow.

Correcting the length computation and adding bound checks to the
conversion functions.

Test: ran libutils_tests
Bug: 29250543
Change-Id: I6115e3357141ed245c63c6eb25fc0fd0a9a7a2bb
(cherry picked from commit c4966a363e46d2e1074d1a365e232af0dcedd6a1)

Change Summary

modified: include/utils/Unicode.h (diff)
modified: libutils/String8.cpp (diff)
modified: libutils/Unicode.cpp (diff)
modified: libutils/tests/String8_test.cpp (diff)

差異

--- a/include/utils/Unicode.h

+++ b/include/utils/Unicode.h

		@@ -87,7 +87,7 @@ ssize_t utf32_to_utf8_length(const char32_t *src, size_t src_len);
87	87	* "dst" becomes \xE3\x81\x82\xE3\x81\x84
88	88	* (note that "dst" is NOT null-terminated, like strncpy)
89	89	*/
90		-void utf32_to_utf8(const char32_t* src, size_t src_len, char* dst);
	90	+void utf32_to_utf8(const char32_t* src, size_t src_len, char* dst, size_t dst_len);
91	91
92	92	/**
93	93	* Returns the unicode value at "index".

		@@ -109,7 +109,7 @@ ssize_t utf16_to_utf8_length(const char16_t *src, size_t src_len);
109	109	* enough to fit the UTF-16 as measured by utf16_to_utf8_length with an added
110	110	* NULL terminator.
111	111	*/
112		-void utf16_to_utf8(const char16_t* src, size_t src_len, char* dst);
	112	+void utf16_to_utf8(const char16_t* src, size_t src_len, char* dst, size_t dst_len);
113	113
114	114	/**
115	115	* Returns the length of "src" when "src" is valid UTF-8 string.

--- a/libutils/String8.cpp

+++ b/libutils/String8.cpp

		@@ -102,20 +102,21 @@ static char* allocFromUTF16(const char16_t* in, size_t len)
102	102	{
103	103	if (len == 0) return getEmptyString();
104	104
105		- const ssize_t bytes = utf16_to_utf8_length(in, len);
106		- if (bytes < 0) {
	105	+ // Allow for closing '\0'
	106	+ const ssize_t resultStrLen = utf16_to_utf8_length(in, len) + 1;
	107	+ if (resultStrLen < 1) {
107	108	return getEmptyString();
108	109	}
109	110
110		- SharedBuffer* buf = SharedBuffer::alloc(bytes+1);
	111	+ SharedBuffer* buf = SharedBuffer::alloc(resultStrLen);
111	112	ALOG_ASSERT(buf, "Unable to allocate shared buffer");
112	113	if (!buf) {
113	114	return getEmptyString();
114	115	}
115	116
116		- char* str = (char*)buf->data();
117		- utf16_to_utf8(in, len, str);
118		- return str;
	117	+ char* resultStr = (char*)buf->data();
	118	+ utf16_to_utf8(in, len, resultStr, resultStrLen);
	119	+ return resultStr;
119	120	}
120	121
121	122	static char* allocFromUTF32(const char32_t* in, size_t len)

		@@ -124,21 +125,21 @@ static char* allocFromUTF32(const char32_t* in, size_t len)
124	125	return getEmptyString();
125	126	}
126	127
127		- const ssize_t bytes = utf32_to_utf8_length(in, len);
128		- if (bytes < 0) {
	128	+ const ssize_t resultStrLen = utf32_to_utf8_length(in, len) + 1;
	129	+ if (resultStrLen < 1) {
129	130	return getEmptyString();
130	131	}
131	132
132		- SharedBuffer* buf = SharedBuffer::alloc(bytes+1);
	133	+ SharedBuffer* buf = SharedBuffer::alloc(resultStrLen);
133	134	ALOG_ASSERT(buf, "Unable to allocate shared buffer");
134	135	if (!buf) {
135	136	return getEmptyString();
136	137	}
137	138
138		- char* str = (char*) buf->data();
139		- utf32_to_utf8(in, len, str);
	139	+ char* resultStr = (char*) buf->data();
	140	+ utf32_to_utf8(in, len, resultStr, resultStrLen);
140	141
141		- return str;
	142	+ return resultStr;
142	143	}
143	144
144	145	// ---------------------------------------------------------------------------

--- a/libutils/Unicode.cpp

+++ b/libutils/Unicode.cpp

		@@ -14,6 +14,7 @@
14	14	* limitations under the License.
15	15	*/
16	16
	17	+#include <log/log.h>
17	18	#include <utils/Unicode.h>
18	19
19	20	#include <stddef.h>

		@@ -182,7 +183,7 @@ ssize_t utf32_to_utf8_length(const char32_t *src, size_t src_len)
182	183	return ret;
183	184	}
184	185
185		-void utf32_to_utf8(const char32_t* src, size_t src_len, char* dst)
	186	+void utf32_to_utf8(const char32_t* src, size_t src_len, char* dst, size_t dst_len)
186	187	{
187	188	if (src == NULL \|\| src_len == 0 \|\| dst == NULL) {
188	189	return;

		@@ -193,9 +194,12 @@ void utf32_to_utf8(const char32_t* src, size_t src_len, char* dst)
193	194	char *cur = dst;
194	195	while (cur_utf32 < end_utf32) {
195	196	size_t len = utf32_codepoint_utf8_length(*cur_utf32);
	197	+ LOG_ALWAYS_FATAL_IF(dst_len < len, "%zu < %zu", dst_len, len);
196	198	utf32_codepoint_to_utf8((uint8_t )cur, cur_utf32++, len);
197	199	cur += len;
	200	+ dst_len -= len;
198	201	}
	202	+ LOG_ALWAYS_FATAL_IF(dst_len < 1, "dst_len < 1: %zu < 1", dst_len);
199	203	*cur = '\0';
200	204	}
201	205

		@@ -324,7 +328,7 @@ int strzcmp16_h_n(const char16_t s1H, size_t n1, const char16_t s2N, size_t n2
324	328	: 0);
325	329	}
326	330
327		-void utf16_to_utf8(const char16_t* src, size_t src_len, char* dst)
	331	+void utf16_to_utf8(const char16_t* src, size_t src_len, char* dst, size_t dst_len)
328	332	{
329	333	if (src == NULL \|\| src_len == 0 \|\| dst == NULL) {
330	334	return;

		@@ -345,9 +349,12 @@ void utf16_to_utf8(const char16_t* src, size_t src_len, char* dst)
345	349	utf32 = (char32_t) *cur_utf16++;
346	350	}
347	351	const size_t len = utf32_codepoint_utf8_length(utf32);
	352	+ LOG_ALWAYS_FATAL_IF(dst_len < len, "%zu < %zu", dst_len, len);
348	353	utf32_codepoint_to_utf8((uint8_t*)cur, utf32, len);
349	354	cur += len;
	355	+ dst_len -= len;
350	356	}
	357	+ LOG_ALWAYS_FATAL_IF(dst_len < 1, "%zu < 1", dst_len);
351	358	*cur = '\0';
352	359	}
353	360

		@@ -408,10 +415,10 @@ ssize_t utf16_to_utf8_length(const char16_t *src, size_t src_len)
408	415	const char16_t* const end = src + src_len;
409	416	while (src < end) {
410	417	if ((*src & 0xFC00) == 0xD800 && (src + 1) < end
411		- && (*++src & 0xFC00) == 0xDC00) {
	418	+ && (*(src + 1) & 0xFC00) == 0xDC00) {
412	419	// surrogate pairs are always 4 bytes.
413	420	ret += 4;
414		- src++;
	421	+ src += 2;
415	422	} else {
416	423	ret += utf32_codepoint_utf8_length((char32_t) *src++);
417	424	}

--- a/libutils/tests/String8_test.cpp

+++ b/libutils/tests/String8_test.cpp

		@@ -17,6 +17,7 @@
17	17	#define LOG_TAG "String8_test"
18	18	#include <utils/Log.h>
19	19	#include <utils/String8.h>
	20	+#include <utils/String16.h>
20	21
21	22	#include <gtest/gtest.h>
22	23

		@@ -72,4 +73,22 @@ TEST_F(String8Test, OperatorPlusEquals) {
72	73	EXPECT_STREQ(src3, " Verify me.");
73	74	}
74	75
	76	+// http://b/29250543
	77	+TEST_F(String8Test, CorrectInvalidSurrogate) {
	78	+ // d841d8 is an invalid start for a surrogate pair. Make sure this is handled by ignoring the
	79	+ // first character in the pair and handling the rest correctly.
	80	+ String16 string16(u"\xd841\xd841\xdc41\x0000");
	81	+ String8 string8(string16);
	82	+
	83	+ EXPECT_EQ(4U, string8.length());
	84	+}
	85	+
	86	+TEST_F(String8Test, CheckUtf32Conversion) {
	87	+ // Since bound checks were added, check the conversion can be done without fatal errors.
	88	+ // The utf8 lengths of these are chars are 1 + 2 + 3 + 4 = 10.
	89	+ const char32_t string32[] = U"\x0000007f\x000007ff\x0000911\x0010fffe";
	90	+ String8 string8(string32);
	91	+ EXPECT_EQ(10U, string8.length());
	92	+}
	93	+
75	94	}

system-corennnn Fork

提交

標籤

Frequently used words (click to add to your profile)

Commit MetaInfo

Log Message

Change Summary

差異

system-corennnn
Fork