mingw-catgets
Fork

(Original repository, No fork origin)

R/O
HTTP
SSH
HTTPS

提交

Commit MetaInfo

修訂	bd8c6e043589d7809cd1645087b04afb3f2e59fc (tree)
時間	2007-05-15 04:50:17
作者	Keith Marshall <keithmarshall@user...>
Commiter	Keith Marshall

Log Message

Support all escape sequences required by POSIX 1003.1.

Change Summary

modified: ChangeLog (diff)
modified: mcsource.c (diff)

差異

--- a/ChangeLog

+++ b/ChangeLog

		@@ -1,3 +1,14 @@
	1	+2007-05-14 Keith Marshall <keithmarshall@users.sourceforge.net>
	2	+
	3	+ Support all escape sequences required by POSIX 1003.1.
	4	+
	5	+ * mcsource.c (OCTAL_SEQUENCE_DECODE): New manifest constant.
	6	+ (HEXADECIMAL_SEQUENCE_DECODE): Ditto, but not currently used.
	7	+ (mc_source): Interpret "\n", "\r", "\b", "\t", "\v" and "\f"
	8	+ standard escapes, and also "\ddd" generic octal digit sequence
	9	+ escapes, appearing in message text definitions; "\\" is also
	10	+ implicitly handled as required.
	11	+
1	12	2007-05-12 Keith Marshall <keithmarshall@users.sourceforge.net>
2	13
3	14	Add support for `delset' directive.

--- a/mcsource.c

+++ b/mcsource.c

		@@ -204,7 +204,7 @@ char mc_update_workspace( char buf, char *cache, unsigned int count )
204	204
205	205	struct msgdict mc_source( const char input )
206	206	{
207		-# define CODESET_DECLARED codeset_decl_src, codeset_decl_lineno
	207	+# define CODESET_DECLARED codeset_decl_src, codeset_decl_lineno
208	208
209	209	long accumulator;
210	210	int fd, input_fd, count;

		@@ -229,6 +229,20 @@ struct msgdict mc_source( const char input )
229	229	static iconv_t iconv_map[2] = {(iconv_t)(-1), (iconv_t)(-1)};
230	230	char *messages; off_t msgloc, headroom;
231	231
	232	+ /*
	233	+ * This `shift' state index is used to control interpretation
	234	+ * of octal escape sequences in message text; for normal text
	235	+ * processing, it should be set to zero.
	236	+ */
	237	+ unsigned shift = 0;
	238	+ /*
	239	+ * Other shift states supported, (they define the number of bits
	240	+ * by which the accumulator must be shifted to the left, in order
	241	+ * to multiply it by the associated number base), are:--
	242	+ */
	243	+# define OCTAL_SEQUENCE_DECODE 3
	244	+# define HEXADECIMAL_SEQUENCE_DECODE 4
	245	+
232	246	const char *dev_stdin = "/dev/stdin";
233	247	if( (strcmp( input, "-") == 0) \|\| (strcmp( input, dev_stdin ) == 0) )
234	248	{

		@@ -300,7 +314,7 @@ struct msgdict mc_source( const char input )
300	314	if( (status & (NEWLINE \| CONTINUED)) == NEWLINE )
301	315	{
302	316	/* When this new line is NOT simply a logical continuation
303		- * of the previous line...
	317	+ * of the previous line ...
304	318	*/
305	319	status &= ~MSGTEXT;
306	320	dfprintf(( stderr, "\n\n%s:%d:new input record", input, linenum ));

		@@ -362,7 +376,7 @@ struct msgdict mc_source( const char input )
362	376	*/
363	377	if( id == keyword )
364	378	{
365		- /* But, we didn't find any keyword...
	379	+ /* But, we didn't find any keyword ...
366	380	*
367	381	* This is a comment line, but it may be the special case of
368	382	* a codeset declaration comment, so we can't simply ignore it;

		@@ -431,14 +445,14 @@ struct msgdict mc_source( const char input )
431	445
432	446	else if( status & NUMERIC )
433	447	{
434		- /* We are parsing a numeric value...
435		- */
	448	+ /* We are parsing a numeric value ...
	449	+ */
436	450	if( isdigit( c ) )
437	451	{
438		- /* ...and the current character is part of the number,
	452	+ /* ... and the current character is part of the number,
439	453	* so add it into the accumulator.
440	454	*/
441		- accumulator = accumulator * 10 + c - L'0';
	455	+ accumulator = accumulator * 10 + c - L'0';
442	456	}
443	457
444	458	else if( isspace( c ) )

		@@ -616,105 +630,197 @@ struct msgdict mc_source( const char input )
616	630	else if( status & MSGTEXT )
617	631	{
618	632	/* We are compiling a message ...
619		- * Continue scanning the current input line,
620		- * until we find the end-of-line marker.
621		- */
622		- if( c != L'\n' )
623		- {
624		- /* We haven't reached end-of-line yet...
625		- * Check for other characters with special significance.
626		- */
627		- if( status & ESCAPE )
628		- {
629		- /* The current input character was escaped...
630		- * Clear the ESCAPE flag, and interpret this case.
631		- */
632		- size_t len = 0;
633		- status &= ~ESCAPE;
634		- dfprintf(( stderr, "%s:%u:", input, linenum ));
635		- switch ( c )
636		- {
637		- case L'r': /* embed a carriage return */
638		- len = mc_add_escape( iconv_map, messages + msgloc, L'\r' );
639		- break;
640		-
641		- case L'n': /* embed a newline */
642		- len = mc_add_escape( iconv_map, messages + msgloc, L'\n' );
643		- break;
644		-
645		- default: /* not a special case; just pass it through */
646		- xcount += skip;
647		- dfprintf(( stderr, "pass through escape code: %0#4.4x", c ));
648		- }
649		- if( len > (size_t)(0) )
	633	+ */
	634	+ if( shift )
	635	+ {
	636	+ /* The current input character is either part of an
	637	+ * escaped octal digit sequence, or it terminates one.
	638	+ */
	639	+ size_t len = 0;
	640	+ switch( c )
	641	+ {
	642	+ case L'0' ... L'7':
	643	+ /*
	644	+ * This is a continuation of the sequence ...
	645	+ */
	646	+ accumulator = (accumulator << shift) + c - L'0';
	647	+ break;
	648	+
	649	+ default:
	650	+ /*
	651	+ * This is the character immediately following
	652	+ * an encoded octal digit sequence ...
	653	+ */
	654	+ if( (accumulator > 0) && ((len =
	655	+ mc_add_escape( iconv_map, messages + msgloc, accumulator ))
	656	+ > (size_t)(0)) )
	657	+ {
	658	+ headroom -= len;
	659	+ msgloc += len;
	660	+ }
	661	+ shift = 0;
	662	+ }
	663	+ }
	664	+ /* Do not use `else' here; the `shift' state may have changed
	665	+ * since the preceding check, in which case, we may also need
	666	+ * to do this ...
	667	+ */
	668	+ if( shift == 0 )
	669	+ {
	670	+ /* Continue scanning the current input line,
	671	+ * until we find the end-of-line marker.
	672	+ */
	673	+ if( c != L'\n' )
	674	+ {
	675	+ /* We haven't reached end-of-line yet ...
	676	+ * Check for other characters with special significance.
	677	+ */
	678	+ if( status & ESCAPE )
650	679	{
651		- headroom -= len;
652		- msgloc += len;
	680	+ /* The current input character was escaped ...
	681	+ * Clear the ESCAPE flag, and interpret this case.
	682	+ */
	683	+ size_t len = 0;
	684	+ status &= ~ESCAPE;
	685	+ dfprintf(( stderr, "%s:%u:", input, linenum ));
	686	+ switch( c )
	687	+ {
	688	+ /* Thus, for the standard escape sequences ...
	689	+ */
	690	+ case L'b':
	691	+ /*
	692	+ * The "\b" escape sequence is to be interpreted as
	693	+ * a literal backspace; encode it ...
	694	+ */
	695	+ len = mc_add_escape( iconv_map, messages + msgloc, L'\b' );
	696	+ break;
	697	+
	698	+ case L'r':
	699	+ /*
	700	+ * Similarly for "\r", which is to be encoded as
	701	+ * a carriage return ...
	702	+ */
	703	+ len = mc_add_escape( iconv_map, messages + msgloc, L'\r' );
	704	+ break;
	705	+
	706	+ case L'n':
	707	+ /*
	708	+ * And for "\n", representing a newline ...
	709	+ */
	710	+ len = mc_add_escape( iconv_map, messages + msgloc, L'\n' );
	711	+ break;
	712	+
	713	+ case L't':
	714	+ /*
	715	+ * ... "\t", representing a horizontal tab ...
	716	+ */
	717	+ len = mc_add_escape( iconv_map, messages + msgloc, L'\t' );
	718	+ break;
	719	+
	720	+ case L'v':
	721	+ /*
	722	+ * ... "\v", representing a vertical tab ...
	723	+ */
	724	+ len = mc_add_escape( iconv_map, messages + msgloc, L'\v' );
	725	+ break;
	726	+
	727	+ case L'f':
	728	+ /*
	729	+ * ... and "\f", representing a form feed.
	730	+ */
	731	+ len = mc_add_escape( iconv_map, messages + msgloc, L'\f' );
	732	+ break;
	733	+
	734	+ case L'0' ... L'7':
	735	+ /*
	736	+ * This is the first in a "\ddd" octal digit sequence;
	737	+ * initialise the accumulator, and activate the appropriate
	738	+ * shift state, to capture the remaining digits.
	739	+ */
	740	+ accumulator = c - L'0';
	741	+ shift = OCTAL_SEQUENCE_DECODE;
	742	+ break;
	743	+
	744	+ default:
	745	+ /*
	746	+ * Anything else is not a special case; we can simply pass it
	747	+ * through as a regular character. Notice that we don't need
	748	+ * to treat "\\" as special; this default action produces the
	749	+ * desired effect.
	750	+ */
	751	+ xcount += skip;
	752	+ dfprintf(( stderr, "pass through escape code: %0#4.4x", c ));
	753	+ }
	754	+ if( len > (size_t)(0) )
	755	+ {
	756	+ headroom -= len;
	757	+ msgloc += len;
	758	+ }
653	759	}
654		- }
655	760
656		- else if( c == L'\\' )
657		- {
658		- /* This is the escape character...
659		- * Set the parser flags, so that any cached message data is flushed,
660		- * and switch to ESCAPE mode, to interpret the next character.
661		- */
662		- status \|= FLUSH \| ESCAPE;
663		- }
	761	+ else if( c == L'\\' )
	762	+ {
	763	+ /* This is the escape character ...
	764	+ * Set the parser flags, so that cached message data is flushed,
	765	+ * and switch to ESCAPE mode, to interpret the next character.
	766	+ */
	767	+ status \|= FLUSH \| ESCAPE;
	768	+ }
664	769
665		- else if( c == quote )
666		- {
667		- dfprintf(( stderr, "\n%s:%u:%s quoted context", input, linenum, (status & QUOTED) ? "end" : "begin" ));
668		- status = (status ^ QUOTED) \| FLUSH;
669		- }
	770	+ else if( c == quote )
	771	+ {
	772	+ dfprintf(( stderr, "\n%s:%u:%s quoted context", input, linenum, (status & QUOTED) ? "end" : "begin" ));
	773	+ status = (status ^ QUOTED) \| FLUSH;
	774	+ }
670	775
671		- else
672		- {
673		- xcount += skip;
674		- dfputc(( c, stderr ));
675		- }
676		- }
677		- if( count < ICONV_MB_LEN_MAX )
678		- {
679		- skip = 0;
680		- status \|= FLUSH;
	776	+ else
	777	+ {
	778	+ xcount += skip;
	779	+ dfputc(( c, stderr ));
	780	+ }
	781	+ }
	782	+ if( count < ICONV_MB_LEN_MAX )
	783	+ {
	784	+ skip = 0;
	785	+ status \|= FLUSH;
	786	+ }
681	787	}
682		- }
683	788
684		- if( c == L'\n' )
685		- {
686		- /* Mark the end of the current input line,
687		- * and schedule any pending message data from this line
688		- * for flushing to the message collection buffer.
689		- */
690		- status \|= NEWLINE \| FLUSH;
	789	+ if( c == L'\n' )
	790	+ {
	791	+ /* Mark the end of the current input line,
	792	+ * and schedule any pending message data from this line
	793	+ * for flushing to the message collection buffer.
	794	+ */
	795	+ status \|= NEWLINE \| FLUSH;
691	796
692		- /* If "QUOTED" context remains active, at the end of this line,
693		- * then we have an implicit continuation, so force it.
694		- */
695		- if( (status & QUOTED) == QUOTED )
696		- status \|= CONTINUED;
	797	+ /* If "QUOTED" context remains active, at the end of this line,
	798	+ * then we have an implicit continuation, so force it.
	799	+ */
	800	+ if( (status & QUOTED) == QUOTED )
	801	+ status \|= CONTINUED;
697	802
698		- /* Clean up the context of any pending directive processing.
699		- */
700		- switch( status & CATEGORY )
701		- {
702		- case DEFQUOTE:
703		- /*
704		- * If we see end of line with a DEFQUOTE pending,
705		- * then there was no defining character with the "quote" directive,
706		- * so we must disable "quote" character recognition.
707		- */
708		- quote = L'\0';
709		- dfprintf(( stderr, ": none assigned" ));
710		- break;
711		- }
	803	+ /* Clean up the context of any pending directive processing.
	804	+ */
	805	+ switch( status & CATEGORY )
	806	+ {
	807	+ case DEFQUOTE:
	808	+ /*
	809	+ * If we see end of line with a DEFQUOTE pending, then
	810	+ * there was no defining character with the "quote" directive,
	811	+ * so we must disable "quote" character recognition.
	812	+ */
	813	+ quote = L'\0';
	814	+ dfprintf(( stderr, ": none assigned" ));
	815	+ break;
	816	+ }
712	817
713		- if( (status & CONTINUED) == 0 )
714		- {
715		- status &= ~ENCODED;
716		- }
717		- }
	818	+ if( (status & CONTINUED) == 0 )
	819	+ {
	820	+ status &= ~ENCODED;
	821	+ }
	822	+ }
	823	+ }
718	824	}
719	825
720	826	if( status & FLUSH )

		@@ -801,12 +907,12 @@ struct msgdict mc_source( const char input )
801	907	status &= ~MSGTEXT;
802	908	}
803	909	/*
804		- * At the end of the current input file...
	910	+ * At the end of the current input file ...
805	911	* Check that the parser finished in an appropriate termination state.
806	912	*/
807	913	if( status & QUOTED )
808	914	{
809		- /* Abnormal termination...
	915	+ /* Abnormal termination ...
810	916	* EOF was encountered within a quoted literal, before the closing
811	917	* quote was found; diagnose abnormal termination state.
812	918	*/

		@@ -815,7 +921,7 @@ struct msgdict mc_source( const char input )
815	921
816	922	if( (status & NEWLINE) != NEWLINE )
817	923	{
818		- /* Abnormal termination...
	924	+ /* Abnormal termination ...
819	925	* The input file lacks a terminating newline; diagnose abnormal
820	926	* termination state.
821	927	*/

		@@ -824,7 +930,7 @@ struct msgdict mc_source( const char input )
824	930
825	931	if( status & MSGTEXT )
826	932	{
827		- /* Abnormal termination...
	933	+ /* Abnormal termination ...
828	934	* EOF was encountered while parsing a continued message definition;
829	935	* dignose abnormal termination state, and mark incomplete message
830	936	* for deletion.

		@@ -864,4 +970,4 @@ struct msgdict mc_source( const char input )
864	970	return head;
865	971	}
866	972
867		-/* $RCSfile$Revision: 1.5 $: end of file */
	973	+/* $RCSfile$Revision: 1.6 $: end of file */

mingw-catgets Fork

提交

標籤

Frequently used words (click to add to your profile)

Commit MetaInfo

Log Message

Change Summary

差異

mingw-catgets
Fork