• R/O
  • SSH
  • HTTPS

yash: 提交


Commit MetaInfo

修訂4073 (tree)
時間2020-09-20 12:51:34
作者magicant

Log Message

Merge branch charcategory (#39288)

https://github.com/magicant/yash/pull/8

Change Summary

差異

--- yash/trunk/NEWS (revision 4072)
+++ yash/trunk/NEWS (revision 4073)
@@ -13,6 +13,9 @@
1313 * The "command" built-in with the -v or -V option was printing
1414 the pathnames of external commands with a redundant leading slash
1515 when the current working directory is "/" or "//".
16+ * In pattern matching except for pathname expansion, when an
17+ unescaped backslash results from an expansion in the pattern,
18+ it is now treated as an escape character.
1619
1720 ----------------------------------------------------------------------
1821 Yash 2.50
--- yash/trunk/builtins/test.c (revision 4072)
+++ yash/trunk/builtins/test.c (revision 4073)
@@ -1,6 +1,6 @@
11 /* Yash: yet another shell */
22 /* test.c: test builtin */
3-/* (C) 2007-2018 magicant */
3+/* (C) 2007-2020 magicant */
44
55 /* This program is free software: you can redistribute it and/or modify
66 * it under the terms of the GNU General Public License as published by
@@ -77,9 +77,11 @@
7777 #if YASH_ENABLE_DOUBLE_BRACKET
7878 static int eval_dbexp(const dbexp_T *e)
7979 __attribute__((nonnull));
80-static inline wchar_t *expand_double_bracket_operand(const wordunit_T *w)
80+static inline wchar_t *expand_double_bracket_operand_escaped(
81+ const wordunit_T *w)
8182 __attribute__((nonnull,malloc,warn_unused_result));
82-static wchar_t *expand_and_unescape_double_bracket_operand(const wordunit_T *w)
83+static inline wchar_t *expand_double_bracket_operand_unescaped(
84+ const wordunit_T *w)
8385 __attribute__((nonnull,malloc,warn_unused_result));
8486 static bool test_triple_db(
8587 const wchar_t *lhs, const wchar_t *op, const wchar_t *rhs_escaped)
@@ -744,22 +746,22 @@
744746 }
745747
746748 case DBE_UNARY:
747- rhs = expand_and_unescape_double_bracket_operand(e->rhs.word);
749+ rhs = expand_double_bracket_operand_unescaped(e->rhs.word);
748750 if (rhs == NULL)
749751 return Exit_TESTERROR;
750752 result = test_double((void *[]) { e->operator, rhs });
751753 break;
752754 case DBE_BINARY:
753- lhs = expand_and_unescape_double_bracket_operand(e->lhs.word);
755+ lhs = expand_double_bracket_operand_unescaped(e->lhs.word);
754756 if (lhs == NULL)
755757 return Exit_TESTERROR;
756- rhs = expand_double_bracket_operand(e->rhs.word);
758+ rhs = expand_double_bracket_operand_escaped(e->rhs.word);
757759 if (rhs == NULL)
758760 return Exit_TESTERROR;
759761 result = test_triple_db(lhs, e->operator, rhs);
760762 break;
761763 case DBE_STRING:
762- rhs = expand_and_unescape_double_bracket_operand(e->rhs.word);
764+ rhs = expand_double_bracket_operand_unescaped(e->rhs.word);
763765 if (rhs == NULL)
764766 return Exit_TESTERROR;
765767 result = test_single((void *[]) { rhs });
@@ -778,19 +780,16 @@
778780
779781 /* Expands the operand of a primary.
780782 * The result may contain backslash escapes. */
781-wchar_t *expand_double_bracket_operand(const wordunit_T *w)
783+wchar_t *expand_double_bracket_operand_escaped(const wordunit_T *w)
782784 {
783- return expand_single(w, TT_SINGLE, true, false);
785+ return expand_single(w, TT_SINGLE, Q_WORD, ES_QUOTED);
784786 }
785787
786788 /* Expands the operand of a primary.
787789 * The result is literal (does not contain backslash escapes). */
788-wchar_t *expand_and_unescape_double_bracket_operand(const wordunit_T *w)
790+wchar_t *expand_double_bracket_operand_unescaped(const wordunit_T *w)
789791 {
790- wchar_t *e = expand_double_bracket_operand(w);
791- if (e == NULL)
792- return NULL;
793- return unescapefree(e);
792+ return expand_single(w, TT_SINGLE, Q_WORD, ES_NONE);
794793 }
795794
796795 /* Tests the specified three-token (binary) primary in the double-bracket
--- yash/trunk/exec.c (revision 4072)
+++ yash/trunk/exec.c (revision 4073)
@@ -513,14 +513,14 @@
513513 {
514514 assert(c->c_type == CT_CASE);
515515
516- wchar_t *word = expand_single_and_unescape(
517- c->c_casword, TT_SINGLE, true, false);
516+ wchar_t *word = expand_single(c->c_casword, TT_SINGLE, Q_WORD, ES_NONE);
518517 if (word == NULL)
519518 goto fail;
520519
521520 for (const caseitem_T *ci = c->c_casitems; ci != NULL; ci = ci->next) {
522521 for (void **pats = ci->ci_patterns; *pats != NULL; pats++) {
523- wchar_t *pattern = expand_single(*pats, TT_SINGLE, true, false);
522+ wchar_t *pattern =
523+ expand_single(*pats, TT_SINGLE, Q_WORD, ES_QUOTED);
524524 if (pattern == NULL)
525525 goto fail;
526526
@@ -555,8 +555,8 @@
555555 {
556556 assert(c->c_type == CT_FUNCDEF);
557557
558- wchar_t *funcname = expand_single_and_unescape(
559- c->c_funcname, TT_SINGLE, true, false);
558+ wchar_t *funcname =
559+ expand_single(c->c_funcname, TT_SINGLE, Q_WORD, ES_NONE);
560560 if (funcname != NULL) {
561561 if (define_function(funcname, c->c_funcbody))
562562 laststatus = Exit_SUCCESS;
--- yash/trunk/expand.c (revision 4072)
+++ yash/trunk/expand.c (revision 4073)
@@ -43,30 +43,34 @@
4343 #include "yash.h"
4444
4545
46-/* characters that have special meanings in brace expansion, quote removal, and
47- * globbing. When an unquoted expansion includes these characters, they are
48- * backslashed to protect from unexpected side effects in succeeding expansion
49- * steps. */
50-#define CHARS_ESCAPED L"\\\"\'{,}"
51-
5246 /* data passed between expansion functions */
5347 struct expand_four_T {
54- plist_T valuelist, splitlist;
55- xwcsbuf_T valuebuf;
56- xstrbuf_T splitbuf;
48+ plist_T valuelist, cclist;
5749 bool zeroword;
5850 };
51+struct expand_four_inner_T {
52+ struct expand_four_T e;
53+ xwcsbuf_T valuebuf;
54+ xstrbuf_T ccbuf;
55+};
56+/* If expansion yields multiple fields, all the fields are added to `valuelist'
57+ * except that the last field remains in `valuebuf'. Character categories
58+ * (charcategory_T) corresponding to the characters in `valuelist' and
59+ * `valuebuf' are cast to char and added to `cclist' and `ccbuf' accordingly. */
5960 /* When "$@" appears during expansion and there is no positional parameter, the
6061 * `zeroword' flag is set so that the quoted empty word can be removed later. */
6162
62-static plist_T expand_four_and_remove_quotes(
63- const wordunit_T *restrict w,
64- tildetype_T tilde, bool processquotes, bool escapeall);
65-static bool expand_four(const wordunit_T *restrict w,
66- tildetype_T tilde, bool processquotes, bool escapeall, bool rec,
67- struct expand_four_T *restrict e)
68- __attribute__((nonnull(6)));
69-static void fill_splitbuf(struct expand_four_T *e, bool splittable)
63+static plist_T expand_word(const wordunit_T *w,
64+ tildetype_T tilde, quoting_T quoting, escaping_T escaping)
65+ __attribute__((warn_unused_result));
66+static struct expand_four_T expand_four(const wordunit_T *restrict w,
67+ tildetype_T tilde, quoting_T quoting, charcategory_T defaultcc)
68+ __attribute__((warn_unused_result));
69+static bool expand_four_inner(const wordunit_T *restrict w, tildetype_T tilde,
70+ quoting_T quoting, charcategory_T defaultcc,
71+ struct expand_four_inner_T *restrict e)
72+ __attribute__((nonnull(5)));
73+static void fill_ccbuf(struct expand_four_inner_T *e, charcategory_T c)
7074 __attribute__((nonnull));
7175
7276 static wchar_t *expand_tilde(const wchar_t **ss,
@@ -76,7 +80,7 @@
7680 enum indextype_T { IDX_NONE, IDX_ALL, IDX_CONCAT, IDX_NUMBER, };
7781
7882 static bool expand_param(const paramexp_T *restrict p, bool indq,
79- struct expand_four_T *restrict e)
83+ struct expand_four_inner_T *restrict e)
8084 __attribute__((nonnull));
8185 static enum indextype_T parse_indextype(const wchar_t *indexstr)
8286 __attribute__((nonnull,pure));
@@ -99,33 +103,34 @@
99103 static void subst_length_each(void **slist)
100104 __attribute__((nonnull));
101105
102-static void expand_brace_each(void **restrict values, void **restrict splits,
103- plist_T *restrict valuelist, plist_T *restrict splitlist)
106+static void expand_brace_each(
107+ void *const *restrict values, void *const *restrict ccs,
108+ plist_T *restrict valuelist, plist_T *restrict cclist)
104109 __attribute__((nonnull));
105-static void expand_brace(wchar_t *restrict word, char *restrict split,
106- plist_T *restrict valuelist, plist_T *restrict splitlist)
110+static void expand_brace(
111+ wchar_t *restrict word, char *restrict cc,
112+ plist_T *restrict valuelist, plist_T *restrict cclist)
107113 __attribute__((nonnull));
108114 static bool try_expand_brace_sequence(
109- wchar_t *word, char *restrict split, wchar_t *startc,
110- plist_T *restrict valuelist, plist_T *restrict splitlist)
115+ wchar_t *word, char *restrict cc, wchar_t *startc,
116+ plist_T *restrict valuelist, plist_T *restrict cclist)
111117 __attribute__((nonnull));
112118 static bool has_leading_zero(const wchar_t *restrict s, bool *restrict sign)
113119 __attribute__((nonnull));
114120
115-static void fieldsplit_all(void **restrict valuelist, void **restrict splitlist,
116- plist_T *restrict dest)
121+static void fieldsplit_all(
122+ void **restrict valuelist, void **restrict cclist,
123+ plist_T *restrict outvaluelist, plist_T *restrict outcclist)
117124 __attribute__((nonnull));
118-static void fieldsplit(wchar_t *restrict s, char *restrict split,
119- const wchar_t *restrict ifs, plist_T *restrict dest)
125+static void fieldsplit(wchar_t *restrict s, char *restrict cc,
126+ const wchar_t *restrict ifs,
127+ plist_T *restrict outvaluelist, plist_T *restrict outcclist)
120128 __attribute__((nonnull));
121-static size_t skip_ifs(const wchar_t *s, const char *split,
122- bool escaped, const wchar_t *ifs)
129+static bool is_ifs_char(wchar_t c, charcategory_T cc, const wchar_t *ifs)
123130 __attribute__((nonnull,pure));
124-static size_t skip_ifs_whitespaces(const wchar_t *s, const char *split,
125- bool escaped, const wchar_t *ifs)
131+static bool is_ifs_whitespace(wchar_t c, charcategory_T cc, const wchar_t *ifs)
126132 __attribute__((nonnull,pure));
127-static size_t skip_field(const wchar_t *s, const char *split,
128- bool escaped, const wchar_t *ifs)
133+static bool is_non_ifs_char(wchar_t c, charcategory_T cc, const wchar_t *ifs)
129134 __attribute__((nonnull,pure));
130135 static void add_empty_field(plist_T *dest, const wchar_t *p)
131136 __attribute__((nonnull));
@@ -133,12 +138,17 @@
133138 static inline void add_sq(
134139 const wchar_t *restrict *ss, xwcsbuf_T *restrict buf, bool escape)
135140 __attribute__((nonnull));
136-static wchar_t *escaped_wcspbrk(const wchar_t *s, const wchar_t *accept)
137- __attribute__((nonnull));
138-static wchar_t *escaped_remove(const wchar_t *s, const wchar_t *reject)
141+static inline bool should_escape(char c, charcategory_T cc, escaping_T escaping)
142+ __attribute__((const));
143+static wchar_t *quote_removal(
144+ const wchar_t *restrict s, const char *restrict cc, escaping_T escaping)
139145 __attribute__((nonnull,malloc,warn_unused_result));
140-static inline wchar_t *escaped_remove_free(wchar_t *s, const wchar_t *reject)
146+static wchar_t *quote_removal_free(
147+ wchar_t *restrict s, char *restrict cc, escaping_T escaping)
141148 __attribute__((nonnull,malloc,warn_unused_result));
149+static void remove_empty_fields_and_quotes(
150+ struct expand_four_T *e, escaping_T escaping)
151+ __attribute__((nonnull));
142152
143153 static void glob_all(void **restrict patterns, plist_T *restrict list)
144154 __attribute__((nonnull));
@@ -185,65 +195,44 @@
185195 * On error in a non-interactive shell, the shell exits. */
186196 bool expand_multiple(const wordunit_T *w, plist_T *list)
187197 {
188- struct expand_four_T expand;
189- pl_init(&expand.valuelist);
190- pl_init(&expand.splitlist);
191- wb_init(&expand.valuebuf);
192- sb_init(&expand.splitbuf);
193- expand.zeroword = false;
194-
195198 /* four expansions (w -> valuelist) */
196- if (!expand_four(w, TT_SINGLE, true, false, false, &expand)) {
197- plfree(pl_toary(&expand.valuelist), free);
198- plfree(pl_toary(&expand.splitlist), free);
199- wb_destroy(&expand.valuebuf);
200- sb_destroy(&expand.splitbuf);
199+ struct expand_four_T expand =
200+ expand_four(w, TT_SINGLE, Q_WORD, CC_LITERAL);
201+ if (expand.valuelist.contents == NULL) {
201202 maybe_exit_on_error();
202203 return false;
203204 }
204- assert(expand.valuebuf.length == expand.splitbuf.length);
205- pl_add(&expand.valuelist, wb_towcs(&expand.valuebuf));
206- pl_add(&expand.splitlist, sb_tostr(&expand.splitbuf));
207205
208206 /* brace expansion (valuelist -> valuelist2) */
209- plist_T valuelist2, splitlist2;
207+ plist_T valuelist2, cclist2;
210208 if (shopt_braceexpand) {
211209 pl_init(&valuelist2);
212- pl_init(&splitlist2);
213- expand_brace_each(expand.valuelist.contents, expand.splitlist.contents,
214- &valuelist2, &splitlist2);
210+ pl_init(&cclist2);
211+ expand_brace_each(expand.valuelist.contents, expand.cclist.contents,
212+ &valuelist2, &cclist2);
215213 pl_clear(&expand.valuelist, 0);
216- pl_destroy(&expand.splitlist);
214+ pl_clear(&expand.cclist, 0);
217215 } else {
218216 valuelist2 = expand.valuelist;
219- splitlist2 = expand.splitlist;
217+ cclist2 = expand.cclist;
220218 pl_init(&expand.valuelist);
219+ pl_init(&expand.cclist);
221220 }
222221
223222 /* field splitting (valuelist2 -> valuelist) */
224- fieldsplit_all(
225- pl_toary(&valuelist2), pl_toary(&splitlist2), &expand.valuelist);
223+ fieldsplit_all(pl_toary(&valuelist2), pl_toary(&cclist2),
224+ &expand.valuelist, &expand.cclist);
225+ assert(expand.valuelist.length == expand.cclist.length);
226226
227- /* empty field removal */
228- if (expand.valuelist.length == 1) {
229- const wchar_t *field = expand.valuelist.contents[0];
230- if (field[0] == L'\0' ||
231- (expand.zeroword && wcscmp(field, L"\"\"") == 0)) {
232- pl_clear(&expand.valuelist, free);
233- }
234- }
227+ /* empty field removal & quote removal */
228+ remove_empty_fields_and_quotes(
229+ &expand, shopt_glob ? ES_QUOTED_HARD : ES_NONE);
235230
236- /* quote removal */
237- for (size_t i = 0; i < expand.valuelist.length; i++)
238- expand.valuelist.contents[i] =
239- escaped_remove_free(expand.valuelist.contents[i], L"\"\'");
240-
241231 /* globbing (valuelist -> list) */
242232 if (shopt_glob) {
243233 glob_all(pl_toary(&expand.valuelist), list);
244234 } else {
245- for (size_t i = 0; i < expand.valuelist.length; i++)
246- pl_add(list, unescapefree(expand.valuelist.contents[i]));
235+ pl_cat(list, expand.valuelist.contents);
247236 pl_destroy(&expand.valuelist);
248237 }
249238
@@ -250,40 +239,42 @@
250239 return true;
251240 }
252241
242+/* Expands a word to (possibly any number of) fields.
243+ * If successful, the return value is a plist_T containing newly malloced wide
244+ * strings. In most cases, the plist_T contains one string. If the word contains
245+ * "$@", however, it may contain any number of strings.
246+ * On error, the return value is a plist_T with `contents' being NULL. */
247+plist_T expand_word(const wordunit_T *w,
248+ tildetype_T tilde, quoting_T quoting, escaping_T escaping)
249+{
250+ /* four expansions */
251+ struct expand_four_T expand =
252+ expand_four(w, tilde, quoting, CC_LITERAL);
253+
254+ /* empty field removal & quote removal */
255+ if (expand.valuelist.contents != NULL)
256+ remove_empty_fields_and_quotes(&expand, escaping);
257+
258+ return expand.valuelist;
259+}
260+
253261 /* Expands a single word: the four expansions and quote removal.
254- * This function doesn't perform brace expansion, field splitting, globbing and
255- * unescaping.
256- * If `processquotes' is true, single- and double-quotations are recognized as
257- * quotes. Otherwise, they are treated like backslashed characters.
258- * If `escapeall' is true, the expanded words are all backslashed as if the
259- * entire expansion is quoted.
260- * If `processquotes' and `escapeall' are false, only backslashes not preceding
261- * any of $, `, \ are self-backslashed.
262- * If successful, the resulting word is returned as a newly malloced string
263- * that may include backslash escapes.
262+ * This function doesn't perform brace expansion, field splitting, or globbing.
263+ * If successful, the resulting word is returned as a newly malloced string.
264264 * On error, an error message is printed and NULL is returned.
265265 * On error in a non-interactive shell, the shell exits. */
266-wchar_t *expand_single(const wordunit_T *arg,
267- tildetype_T tilde, bool processquotes, bool escapeall)
266+wchar_t *expand_single(const wordunit_T *w,
267+ tildetype_T tilde, quoting_T quoting, escaping_T escaping)
268268 {
269- plist_T list =
270- expand_four_and_remove_quotes(arg, tilde, processquotes, escapeall);
269+ plist_T list = expand_word(w, tilde, quoting, escaping);
271270 if (list.contents == NULL) {
272271 maybe_exit_on_error();
273272 return NULL;
274273 }
275274
276- return concatenate_values(pl_toary(&list), true);
275+ return concatenate_values(pl_toary(&list), escaping != ES_NONE);
277276 }
278277
279-/* Like `expand_single', but the result is unescaped (if successful). */
280-wchar_t *expand_single_and_unescape(const wordunit_T *arg,
281- tildetype_T tilde, bool processquotes, bool escapeall)
282-{
283- wchar_t *result = expand_single(arg, tilde, processquotes, escapeall);
284- return result == NULL ? NULL : unescapefree(result);
285-}
286-
287278 /* Expands a single word: the four expansions, glob, quote removal and unescape.
288279 * This function doesn't perform brace expansion and field splitting.
289280 * If the result of glob is more than one word,
@@ -296,7 +287,7 @@
296287 * On error in a non-interactive shell, the shell exits. */
297288 char *expand_single_with_glob(const wordunit_T *arg, tildetype_T tilde)
298289 {
299- wchar_t *exp = expand_single(arg, tilde, true, false);
290+ wchar_t *exp = expand_single(arg, tilde, Q_WORD, ES_QUOTED_HARD);
300291 char *result;
301292
302293 if (exp == NULL)
@@ -347,78 +338,52 @@
347338 /********** Four Expansions **********/
348339
349340 /* Performs the four expansions in the specified single word.
350- * `w' is the word in which expansions occur.
351- * `tilde' is type of tilde expansion that is performed.
352- * If `processquotes' is true, single- and double-quotations are recognized as
353- * quotes. Otherwise, they are treated like backslashed characters.
354- * If `escapeall' is true, the expanded words are all backslashed as if the
355- * entire expansion is quoted.
356- * If `processquotes' and `escapeall' are false, only backslashes not preceding
357- * any of $, `, \ are self-backslashed.
358- * If successful, the return value is a plist_T containing newly malloced wide
359- * strings. In most cases, the plist_T contains one string. If the word contains
360- * "$@", however, it may contain any number of strings.
361- * Single- or double-quoted characters are unquoted and backslashed.
362- * On error, the return value is a plist_T with `contents' being NULL. */
363-plist_T expand_four_and_remove_quotes(
364- const wordunit_T *restrict w,
365- tildetype_T tilde, bool processquotes, bool escapeall)
341+ * The four expansions are tilde expansion, parameter expansion, command
342+ * substitution, and arithmetic expansion.
343+ * If successful, `valuelist' in the return value is the list of the resultant
344+ * fields, which are newly malloced wide strings, and `cclist' is the list of
345+ * the corresponding charcategory_T strings, which are also newly malloced.
346+ * If unsuccessful, `valuelist' and `cclist' are empty and have NULL `contents'.
347+ */
348+struct expand_four_T expand_four(const wordunit_T *restrict w,
349+ tildetype_T tilde, quoting_T quoting, charcategory_T defaultcc)
366350 {
367- struct expand_four_T expand;
351+ struct expand_four_inner_T e;
352+ pl_init(&e.e.valuelist);
353+ pl_init(&e.e.cclist);
354+ wb_init(&e.valuebuf);
355+ sb_init(&e.ccbuf);
356+ e.e.zeroword = false;
368357
369- pl_init(&expand.valuelist);
370- wb_init(&expand.valuebuf);
371- expand.splitlist.contents = NULL;
372- expand.zeroword = false;
373-
374- if (!expand_four(w, tilde, processquotes, escapeall, false, &expand)) {
375- plfree(pl_toary(&expand.valuelist), free);
376- wb_destroy(&expand.valuebuf);
377- expand.valuelist.contents = NULL;
378- return expand.valuelist;
358+ if (expand_four_inner(w, tilde, quoting, defaultcc, &e)) {
359+ assert(e.e.valuelist.length == e.e.cclist.length);
360+ assert(e.valuebuf.length == e.ccbuf.length);
361+ pl_add(&e.e.valuelist, wb_towcs(&e.valuebuf));
362+ pl_add(&e.e.cclist, sb_tostr(&e.ccbuf));
363+ } else {
364+ plfree(pl_toary(&e.e.valuelist), free);
365+ plfree(pl_toary(&e.e.cclist), free);
366+ wb_destroy(&e.valuebuf);
367+ sb_destroy(&e.ccbuf);
368+ e.e.valuelist.contents = e.e.cclist.contents = NULL;
379369 }
380-
381- /* remove empty word for "$@" if $# == 0 */
382- if (expand.valuelist.length == 0 && expand.zeroword &&
383- wcscmp(expand.valuebuf.contents, L"\"\"") == 0)
384- wb_destroy(&expand.valuebuf);
385- else
386- pl_add(&expand.valuelist, wb_towcs(&expand.valuebuf));
387-
388- /* quote removal */
389- for (size_t i = 0; i < expand.valuelist.length; i++)
390- expand.valuelist.contents[i] =
391- escaped_remove_free(expand.valuelist.contents[i], L"\"\'");
392-
393- return expand.valuelist;
370+ return e.e;
394371 }
395372
396373 /* Performs the four expansions in the specified single word.
397374 * The four expansions are tilde expansion, parameter expansion, command
398375 * substitution, and arithmetic expansion.
399- * `w' is the word in which expansions occur.
400- * `tilde' specifies the type of tilde expansion that is performed.
401- * If `processquotes' is true, single- and double-quotations are recognized as
402- * quotes. Otherwise, they are treated like backslashed characters.
403- * If `escapeall' is true, the expanded words are all backslashed as if the
404- * entire expansion is quoted.
405- * If `processquotes' and `escapeall' are false, only backslashes not preceding
406- * any of $, `, \ are self-backslashed.
407- * `rec' must be true iff this expansion is part of another expansion.
408- * `e->valuebuf' must be initialized before calling this function and is used to
409- * expand the current word. If `w' expands to multiple words, the last word is
410- * put in `e->valuebuf' and the others are inserted to `e->valuelist'.
411- * The splittability strings are put in `e->splitbuf' and `e->splitlist'
412- * accordingly if `e->splitlist' is non-NULL.
413- * Single- and double-quotations remain in the resulting word. In addition,
414- * characters inside those quotations are backslashed.
376+ * The lists and buffers in `e' must have been initialized before calling this
377+ * function. If the expansion yields a single field, the result is appended to
378+ * `e->valuebuf'. If more than one field result, all but the last field are
379+ * appended to `e->valuelist' as newly malloced wide strings and the last field
380+ * remains in `e->valuebuf'. The corresponding charcategory_T strings are added
381+ * to `e->cclist' and `e->ccbuf', having the same count and length as
382+ * `e->valuelist' and `e->valuebuf'.
415383 * The return value is true iff successful. */
416-/* A splittability string is an array of Boolean values that specifies where
417- * the word can be split in field splitting. The word can be split at the nth
418- * character iff the nth value of the splittability string is non-zero. */
419-bool expand_four(const wordunit_T *restrict w,
420- tildetype_T tilde, bool processquotes, bool escapeall, bool rec,
421- struct expand_four_T *restrict e)
384+bool expand_four_inner(const wordunit_T *restrict w, tildetype_T tilde,
385+ quoting_T quoting, charcategory_T defaultcc,
386+ struct expand_four_inner_T *restrict e)
422387 {
423388 bool ok = true;
424389 bool indq = false; /* in a double quote? */
@@ -426,10 +391,6 @@
426391 const wchar_t *ss;
427392 wchar_t *s;
428393
429-#define FILL_SBUF(s) fill_splitbuf(e, !indq && !escapeall && (s));
430-#define FILL_SBUF_SPLITTABLE FILL_SBUF(true)
431-#define FILL_SBUF_UNSPLITTABLE FILL_SBUF(false)
432-
433394 for (; w != NULL; w = w->next, first = false) {
434395 switch (w->wu_type) {
435396 case WT_STRING:
@@ -437,65 +398,72 @@
437398 if (first && tilde != TT_NONE) {
438399 s = expand_tilde(&ss, w->next, tilde);
439400 if (s != NULL) {
440- wb_catfree(&e->valuebuf, escapefree(s, NULL));
441- FILL_SBUF_UNSPLITTABLE;
401+ wb_catfree(&e->valuebuf, s);
402+ fill_ccbuf(e, CC_HARD_EXPANSION | (defaultcc & CC_QUOTED));
442403 }
443404 }
444405 while (*ss != L'\0') {
445406 switch (*ss) {
446407 case L'"':
447- if (!processquotes)
448- goto escape;
408+ if (quoting != Q_WORD)
409+ goto default_;
449410 indq = !indq;
450411 wb_wccat(&e->valuebuf, L'"');
451- FILL_SBUF_UNSPLITTABLE;
412+ sb_ccat(&e->ccbuf, defaultcc | CC_QUOTATION);
452413 break;
453414 case L'\'':
454- if (!processquotes || indq)
455- goto escape;
415+ if (quoting != Q_WORD || indq)
416+ goto default_;
417+
456418 wb_wccat(&e->valuebuf, L'\'');
457- add_sq(&ss, &e->valuebuf, true);
419+ sb_ccat(&e->ccbuf, defaultcc | CC_QUOTATION);
420+
421+ add_sq(&ss, &e->valuebuf, false);
422+ fill_ccbuf(e, defaultcc | CC_QUOTED);
423+
458424 wb_wccat(&e->valuebuf, L'\'');
459- FILL_SBUF_UNSPLITTABLE;
425+ sb_ccat(&e->ccbuf, defaultcc | CC_QUOTATION);
460426 break;
461427 case L'\\':
462- if (!processquotes) {
463- if (!escapeall) {
464- wchar_t c = ss[1];
465- if (c == L'$' || c == L'`' || c == L'\\')
466- ss++;
467- }
468- goto escape;
428+ switch (quoting) {
429+ case Q_WORD:
430+ if (indq && wcschr(CHARS_ESCAPABLE, ss[1]) == NULL)
431+ goto default_;
432+ break;
433+ case Q_INDQ:
434+ if (wcschr(L"$`\\", ss[1]) == NULL)
435+ goto default_;
436+ break;
437+ case Q_LITERAL:
438+ goto default_;
469439 }
470440
471- if (indq && wcschr(CHARS_ESCAPABLE, ss[1]) == NULL) {
472- goto escape;
473- } else {
474- wb_wccat(&e->valuebuf, L'\\');
475- if (*++ss != L'\0')
476- wb_wccat(&e->valuebuf, *ss++);
477- FILL_SBUF_UNSPLITTABLE;
478- continue;
441+ wb_wccat(&e->valuebuf, L'\\');
442+ sb_ccat(&e->ccbuf, defaultcc | CC_QUOTATION);
443+ ss++;
444+ if (*ss != L'\0') {
445+ wb_wccat(&e->valuebuf, *ss);
446+ sb_ccat(&e->ccbuf, defaultcc | CC_QUOTED);
479447 }
448+ break;
480449 case L':':
481- if (!indq && tilde == TT_MULTI) {
482- /* perform tilde expansion after a colon */
483- wb_wccat(&e->valuebuf, L':');
484- ss++;
485- s = expand_tilde(&ss, w->next, tilde);
486- if (s != NULL) {
487- wb_catfree(&e->valuebuf, escapefree(s, NULL));
488- FILL_SBUF_UNSPLITTABLE;
489- }
490- continue;
450+ if (indq || tilde != TT_MULTI)
451+ goto default_;
452+
453+ /* perform tilde expansion after a colon */
454+ wb_wccat(&e->valuebuf, L':');
455+ sb_ccat(&e->ccbuf, defaultcc);
456+ ss++;
457+ s = expand_tilde(&ss, w->next, tilde);
458+ if (s != NULL) {
459+ wb_catfree(&e->valuebuf, s);
460+ fill_ccbuf(e, CC_HARD_EXPANSION);
491461 }
492- /* falls thru! */
462+ continue;
463+default_:
493464 default:
494- if (indq || escapeall)
495-escape:
496- wb_wccat(&e->valuebuf, L'\\');
497465 wb_wccat(&e->valuebuf, *ss);
498- FILL_SBUF(rec);
466+ sb_ccat(&e->ccbuf, defaultcc | (indq * CC_QUOTED));
499467 break;
500468 }
501469 ss++;
@@ -502,7 +470,9 @@
502470 }
503471 break;
504472 case WT_PARAM:
505- if (!expand_param(w->wu_param, indq || escapeall, e))
473+ if (!expand_param(w->wu_param,
474+ indq || quoting == Q_LITERAL || (defaultcc & CC_QUOTED),
475+ e))
506476 ok = false;
507477 break;
508478 case WT_CMDSUB:
@@ -509,14 +479,14 @@
509479 s = exec_command_substitution(&w->wu_cmdsub);
510480 goto cat_s;
511481 case WT_ARITH:
512- s = expand_single_and_unescape(w->wu_arith, TT_NONE, true, false);
482+ s = expand_single(w->wu_arith, TT_NONE, Q_WORD, ES_NONE);
513483 if (s != NULL)
514484 s = evaluate_arithmetic(s);
515485 cat_s:
516486 if (s != NULL) {
517- wb_catfree(&e->valuebuf, escapefree(s,
518- (indq || escapeall) ? NULL : CHARS_ESCAPED));
519- FILL_SBUF_SPLITTABLE;
487+ wb_catfree(&e->valuebuf, s);
488+ fill_ccbuf(e, CC_SOFT_EXPANSION |
489+ (indq * CC_QUOTED) | (defaultcc & CC_QUOTED));
520490 } else {
521491 ok = false;
522492 }
@@ -524,21 +494,14 @@
524494 }
525495 }
526496
527-#undef FILL_SBUF_UNSPLITTABLE
528-#undef FILL_SBUF_SPLITTABLE
529-#undef FILL_SBUF
530-
531497 return ok;
532498 }
533499
534-/* Appends to `e->splitbuf' as many `splittable' as needed to match the length
535- * with `e->valuebuf'. */
536-void fill_splitbuf(struct expand_four_T *e, bool splittable)
500+/* Appends to `e->ccbuf' as many `c's as needed to match the length with
501+ * `e->valuebuf'. */
502+void fill_ccbuf(struct expand_four_inner_T *e, charcategory_T c)
537503 {
538- if (e->splitlist.contents == NULL)
539- return;
540- sb_ccat_repeat(
541- &e->splitbuf, splittable, e->valuebuf.length - e->splitbuf.length);
504+ sb_ccat_repeat(&e->ccbuf, c, e->valuebuf.length - e->ccbuf.length);
542505 }
543506
544507 /* Performs tilde expansion.
@@ -611,7 +574,7 @@
611574 * The result is put in `e'.
612575 * Returns true iff successful. */
613576 bool expand_param(const paramexp_T *restrict p, bool indq,
614- struct expand_four_T *restrict e)
577+ struct expand_four_inner_T *restrict e)
615578 {
616579 /* parse indices first */
617580 ssize_t startindex, endindex;
@@ -619,8 +582,7 @@
619582 if (p->pe_start == NULL) {
620583 startindex = 0, endindex = SSIZE_MAX, indextype = IDX_NONE;
621584 } else {
622- wchar_t *start = expand_single_and_unescape(
623- p->pe_start, TT_NONE, true, false);
585+ wchar_t *start = expand_single(p->pe_start, TT_NONE, Q_WORD, ES_NONE);
624586 if (start == NULL)
625587 return false;
626588 indextype = parse_indextype(start);
@@ -637,8 +599,8 @@
637599 if (p->pe_end == NULL) {
638600 endindex = (startindex == -1) ? SSIZE_MAX : startindex;
639601 } else {
640- wchar_t *end = expand_single_and_unescape(
641- p->pe_end, TT_NONE, true, false);
602+ wchar_t *end = expand_single(
603+ p->pe_end, TT_NONE, Q_WORD, ES_NONE);
642604 if (end == NULL || !evaluate_index(end, &endindex))
643605 return false;
644606 }
@@ -656,8 +618,7 @@
656618 struct get_variable_T v;
657619 bool unset; /* parameter is not set? */
658620 if (p->pe_type & PT_NEST) {
659- plist_T plist =
660- expand_four_and_remove_quotes(p->pe_nest, TT_NONE, true, true);
621+ plist_T plist = expand_word(p->pe_nest, TT_NONE, Q_WORD, ES_NONE);
661622 if (plist.contents == NULL)
662623 return false;
663624 v.type = (plist.length == 1) ? GV_SCALAR : GV_ARRAY;
@@ -665,8 +626,6 @@
665626 v.values = pl_toary(&plist);
666627 v.freevalues = true;
667628 unset = false;
668- for (size_t i = 0; v.values[i] != NULL; i++)
669- v.values[i] = unescapefree(v.values[i]);
670629 } else {
671630 v = get_variable(p->pe_name);
672631 if (v.type == GV_NOTFOUND) {
@@ -773,7 +732,8 @@
773732 if (unset) {
774733 subst:
775734 plfree(values, free);
776- return expand_four(p->pe_subst, TT_SINGLE, true, indq, true, e);
735+ return expand_four_inner(p->pe_subst, TT_SINGLE, Q_WORD,
736+ CC_SOFT_EXPANSION | (indq * CC_QUOTED), e);
777737 }
778738 break;
779739 case PT_ASSIGN:
@@ -795,8 +755,7 @@
795755 p->pe_name);
796756 return false;
797757 }
798- subst = expand_single_and_unescape(
799- p->pe_subst, TT_SINGLE, true, false);
758+ subst = expand_single(p->pe_subst, TT_SINGLE, Q_WORD, ES_NONE);
800759 if (subst == NULL)
801760 return false;
802761 if (v.type != GV_ARRAY) {
@@ -838,7 +797,7 @@
838797 wchar_t *match;
839798 switch (p->pe_type & PT_MASK) {
840799 case PT_MATCH:
841- match = expand_single(p->pe_match, TT_SINGLE, true, false);
800+ match = expand_single(p->pe_match, TT_SINGLE, Q_WORD, ES_QUOTED);
842801 if (match == NULL) {
843802 plfree(values, free);
844803 return false;
@@ -847,8 +806,8 @@
847806 free(match);
848807 break;
849808 case PT_SUBST:
850- match = expand_single(p->pe_match, TT_SINGLE, true, false);
851- subst = expand_single_and_unescape(p->pe_subst, TT_SINGLE, true, false);
809+ match = expand_single(p->pe_match, TT_SINGLE, Q_WORD, ES_QUOTED);
810+ subst = expand_single(p->pe_subst, TT_SINGLE, Q_WORD, ES_NONE);
852811 if (match == NULL || subst == NULL) {
853812 free(match);
854813 free(subst);
@@ -869,39 +828,25 @@
869828 if (p->pe_type & PT_NUMBER)
870829 subst_length_each(values);
871830
872- /* backslash escape */
873- for (size_t i = 0; values[i] != NULL; i++)
874- values[i] = escapefree(values[i], indq ? NULL : CHARS_ESCAPED);
875-
876831 /* add the elements of `values' to `e->valuelist' */
877832 if (values[0] == NULL) {
878833 if (indq)
879- e->zeroword = true;
834+ e->e.zeroword = true;
880835 } else {
836+ charcategory_T cc = CC_SOFT_EXPANSION | (indq * CC_QUOTED);
837+
881838 /* add the first element */
882839 wb_catfree(&e->valuebuf, values[0]);
883- fill_splitbuf(e, !indq);
884- if (values[1] != NULL) {
885- pl_add(&e->valuelist, wb_towcs(&e->valuebuf));
886- if (e->splitlist.contents != NULL)
887- pl_add(&e->splitlist, sb_tostr(&e->splitbuf));
840+ fill_ccbuf(e, cc);
888841
889- /* add the remaining but last */
890- size_t i;
891- for (i = 1; values[i + 1] != NULL; i++) {
892- pl_add(&e->valuelist, values[i]);
893- if (e->splitlist.contents != NULL) {
894- size_t len = wcslen(values[i]);
895- pl_add(&e->splitlist, memset(xmalloc(len), !indq, len));
896- }
897- }
842+ /* add the other elements */
843+ for (size_t i = 1; values[i] != NULL; i++) {
844+ pl_add(&e->e.valuelist, wb_towcs(&e->valuebuf));
845+ pl_add(&e->e.cclist, sb_tostr(&e->ccbuf));
898846
899- /* add the last element */
900847 wb_initwith(&e->valuebuf, values[i]);
901- if (e->splitlist.contents != NULL) {
902- sb_init(&e->splitbuf);
903- fill_splitbuf(e, !indq);
904- }
848+ sb_init(&e->ccbuf);
849+ fill_ccbuf(e, cc);
905850 }
906851 }
907852 free(values);
@@ -995,8 +940,7 @@
995940 void print_subst_as_error(const paramexp_T *p)
996941 {
997942 if (p->pe_subst != NULL) {
998- wchar_t *subst = expand_single_and_unescape(
999- p->pe_subst, TT_SINGLE, true, false);
943+ wchar_t *subst = expand_single(p->pe_subst, TT_SINGLE, Q_WORD, ES_NONE);
1000944 if (subst != NULL) {
1001945 if (p->pe_type & PT_NEST)
1002946 xerror(0, "%ls", subst);
@@ -1133,38 +1077,49 @@
11331077
11341078 /* Performs brace expansion in each element of the specified array.
11351079 * `values' is an array of pointers to `free'able wide strings to be expanded.
1136- * `splits' is an array of pointers to `free'able splittability strings.
1137- * `values' and 'splits' must contain the same number of elements.
1138- * Both the arrays must be NULL-terminated and their elements are freed in this
1139- * function. The arrays themselves are not freed.
1140- * Newly malloced results are added to `valuelist' and `splitlist'. */
1141-void expand_brace_each(void **restrict values, void **restrict splits,
1142- plist_T *restrict valuelist, plist_T *restrict splitlist)
1080+ * `ccs' is an array of pointers to `free'able charcategory_T strings.
1081+ * `values' and `ccs' must contain the same number of elements and be NULL-
1082+ * terminated. Their elements are freed in this function. The arrays themselves
1083+ * are not freed.
1084+ * Newly malloced results are added to `valuelist' and `cclist'. */
1085+void expand_brace_each(
1086+ void *const *restrict values, void *const *restrict ccs,
1087+ plist_T *restrict valuelist, plist_T *restrict cclist)
11431088 {
11441089 while (*values != NULL) {
1145- expand_brace(*values, *splits, valuelist, splitlist);
1146- values++, splits++;
1090+ expand_brace(*values, *ccs, valuelist, cclist);
1091+ values++, ccs++;
11471092 }
11481093 }
11491094
11501095 /* Performs brace expansion in the specified single word.
1151- * `split' is the splittability string corresponding to `word'.
1152- * `word' and `split' are freed in this function.
1153- * `Free'able results are added to `valuelist' and `splitlist'. */
1154-void expand_brace(wchar_t *restrict const word, char *restrict const split,
1155- plist_T *restrict valuelist, plist_T *restrict splitlist)
1096+ * `cc' is the charcategory_T string corresponding to `word'.
1097+ * `word' and `cc' are freed in this function.
1098+ * `Free'able results are added to `valuelist' and `cclist'. */
1099+void expand_brace(
1100+ wchar_t *restrict const word, char *restrict const cc,
1101+ plist_T *restrict valuelist, plist_T *restrict cclist)
11561102 {
1157- wchar_t *c = word;
1103+#define idx(p) ((size_t) ((wchar_t *) (p) - word))
11581104
1105+ size_t ci = 0;
1106+
11591107 start:
1160- c = escaped_wcspbrk(c, L"{");
1161- if (c == NULL || *++c == L'\0') {
1162- /* don't expand if there is no L'{' or L'{' is at the end of string */
1163- pl_add(valuelist, word);
1164- pl_add(splitlist, split);
1108+
1109+ /* find '{' */
1110+ do {
1111+ wchar_t *c = wcschr(&word[ci], L'{');
1112+ if (c == NULL) {
1113+ /* no L'{', no expansion */
1114+ pl_add(valuelist, word);
1115+ pl_add(cclist, cc);
1116+ return;
1117+ }
1118+ ci = idx(c);
1119+ } while (cc[ci++] != CC_LITERAL);
1120+
1121+ if (try_expand_brace_sequence(word, cc, &word[ci], valuelist, cclist)) {
11651122 return;
1166- } else if (try_expand_brace_sequence(word, split, c, valuelist, splitlist)){
1167- return;
11681123 }
11691124
11701125 plist_T splitpoints;
@@ -1173,16 +1128,18 @@
11731128 /* collect pointers to characters where the word is split */
11741129 /* The pointers point to the character just after L'{', L',' or L'}'. */
11751130 pl_init(&splitpoints);
1176- pl_add(&splitpoints, c);
1131+ pl_add(&splitpoints, &word[ci]);
11771132 nest = 0;
1178- while ((c = escaped_wcspbrk(c, L"{,}")) != NULL) {
1179- switch (*c++) {
1133+ for (; word[ci] != L'\0'; ci++) {
1134+ if (cc[ci] != CC_LITERAL)
1135+ continue;
1136+ switch (word[ci]) {
11801137 case L'{':
11811138 nest++;
11821139 break;
11831140 case L',':
11841141 if (nest == 0)
1185- pl_add(&splitpoints, c);
1142+ pl_add(&splitpoints, &word[ci + 1]);
11861143 break;
11871144 case L'}':
11881145 if (nest > 0) {
@@ -1189,9 +1146,10 @@
11891146 nest--;
11901147 break;
11911148 } else if (splitpoints.length == 1) {
1149+ /* no comma between { and } */
11921150 goto restart;
11931151 } else {
1194- pl_add(&splitpoints, c);
1152+ pl_add(&splitpoints, &word[ci + 1]);
11951153 goto done;
11961154 }
11971155 }
@@ -1199,53 +1157,51 @@
11991157 restart:
12001158 /* if there is no L',' or L'}' corresponding to L'{',
12011159 * find the next L'{' and try again */
1202- c = splitpoints.contents[0];
1160+ ci = idx(splitpoints.contents[0]);
12031161 pl_destroy(&splitpoints);
12041162 goto start;
12051163
12061164 done:;
1207-#define idx(p) ((wchar_t *) (p) - word)
1208-#define wtos(p) (split + idx(p))
12091165 size_t lastelemindex = splitpoints.length - 1;
12101166 size_t headlen = idx(splitpoints.contents[0]) - 1;
12111167 size_t taillen = wcslen(splitpoints.contents[lastelemindex]);
12121168 for (size_t i = 0; i < lastelemindex; i++) {
12131169 xwcsbuf_T buf;
1214- xstrbuf_T sbuf;
1170+ xstrbuf_T cbuf;
12151171 wb_init(&buf);
1216- sb_init(&sbuf);
1172+ sb_init(&cbuf);
12171173
12181174 wb_ncat_force(&buf, word, headlen);
1219- sb_ncat_force(&sbuf, split, headlen);
1175+ sb_ncat_force(&cbuf, cc, headlen);
12201176
12211177 size_t len = (wchar_t *) splitpoints.contents[i + 1] -
12221178 (wchar_t *) splitpoints.contents[i ] - 1;
1223- wb_ncat_force(&buf, splitpoints.contents[i], len);
1224- sb_ncat_force(&sbuf, wtos(splitpoints.contents[i]), len);
1179+ ci = idx(splitpoints.contents[i]);
1180+ wb_ncat_force(&buf, &word[ci], len);
1181+ sb_ncat_force(&cbuf, &cc[ci], len);
12251182
1226- wb_ncat_force(&buf, splitpoints.contents[lastelemindex], taillen);
1227- sb_ncat_force(&sbuf, wtos(splitpoints.contents[lastelemindex]), taillen);
1228- assert(buf.length == sbuf.length);
1183+ ci = idx(splitpoints.contents[lastelemindex]);
1184+ wb_ncat_force(&buf, &word[ci], taillen);
1185+ sb_ncat_force(&cbuf, &cc[ci], taillen);
1186+ assert(buf.length == cbuf.length);
12291187
12301188 /* expand the remaining portion recursively */
1231- expand_brace(wb_towcs(&buf), sb_tostr(&sbuf), valuelist, splitlist);
1189+ expand_brace(wb_towcs(&buf), sb_tostr(&cbuf), valuelist, cclist);
12321190 }
12331191 pl_destroy(&splitpoints);
12341192 free(word);
1235- free(split);
1236-#undef idx
1237-#undef wtos
1193+ free(cc);
12381194 }
12391195
12401196 /* Tries numeric brace expansion like "{01..05}".
12411197 * If unsuccessful, this function returns false without any side effects.
1242- * If successful, `word' and `split' are freed and the full expansion results
1243- * are added to `valuelist' and `splitlist'.
1198+ * If successful, `word' and `cc' are freed and the full expansion results are
1199+ * added to `valuelist' and `cclist'.
12441200 * `startc' is a pointer to the character right after L'{' in `word'.
12451201 */
12461202 bool try_expand_brace_sequence(
1247- wchar_t *word, char *restrict split, wchar_t *startc,
1248- plist_T *restrict valuelist, plist_T *restrict splitlist)
1203+ wchar_t *const word, char *restrict const cc, wchar_t *const startc,
1204+ plist_T *restrict valuelist, plist_T *restrict cclist)
12491205 {
12501206 long start, end, delta, value;
12511207 wchar_t *dotp, *dotbracep, *bracep, *c;
@@ -1298,33 +1254,38 @@
12981254 delta = -1;
12991255 }
13001256
1257+ /* validate charcategory_T */
1258+ if (cc[idx(bracep)] != CC_LITERAL)
1259+ return false;
1260+ for (size_t ci = idx(startc); ci < idx(bracep); ci++)
1261+ if (cc[ci] & CC_QUOTED)
1262+ return false;
1263+
13011264 /* expand the sequence */
13021265 value = start;
13031266 len = (startlen > endlen) ? startlen : endlen;
1304- wordlen = wcslen(word);
1267+ wordlen = idx(bracep + 1) + wcslen(bracep + 1); // = wcslen(word);
13051268 do {
13061269 xwcsbuf_T buf;
1307- xstrbuf_T sbuf;
1270+ xstrbuf_T cbuf;
13081271 wb_init(&buf);
1309- sb_init(&sbuf);
1272+ sb_init(&cbuf);
13101273
1311- wb_ncat_force(&buf, word, startc - 1 - word);
1312- sb_ncat_force(&sbuf, split, startc - 1 - word);
1274+ size_t slen = idx(startc - 1);
1275+ wb_ncat_force(&buf, word, slen);
1276+ sb_ncat_force(&cbuf, cc, slen);
13131277
13141278 int plen = wb_wprintf(&buf, sign ? L"%0+*ld" : L"%0*ld", len, value);
13151279 if (plen >= 0)
1316- sb_ccat_repeat(&sbuf, 0, plen);
1280+ sb_ccat_repeat(&cbuf, CC_HARD_EXPANSION, plen);
13171281
1318- wb_ncat_force(&buf,
1319- bracep + 1,
1320- wordlen - (bracep + 1 - word));
1321- sb_ncat_force(&sbuf,
1322- split + (bracep + 1 - word),
1323- wordlen - (bracep + 1 - word));
1324- assert(buf.length == sbuf.length);
1282+ slen = idx(bracep + 1);
1283+ wb_ncat_force(&buf, bracep + 1, wordlen - slen);
1284+ sb_ncat_force(&cbuf, cc + slen, wordlen - slen);
1285+ assert(buf.length == cbuf.length);
13251286
13261287 /* expand the remaining portion recursively */
1327- expand_brace(wb_towcs(&buf), sb_tostr(&sbuf), valuelist, splitlist);
1288+ expand_brace(wb_towcs(&buf), sb_tostr(&cbuf), valuelist, cclist);
13281289
13291290 if (delta >= 0) {
13301291 if (LONG_MAX - delta < value)
@@ -1336,8 +1297,9 @@
13361297 value += delta;
13371298 } while (delta >= 0 ? value <= end : value >= end);
13381299 free(word);
1339- free(split);
1300+ free(cc);
13401301 return true;
1302+#undef idx
13411303 }
13421304
13431305 /* Checks if the specified numeral starts with a L'0'.
@@ -1362,56 +1324,55 @@
13621324
13631325 /* Performs field splitting.
13641326 * `valuelist' is a NULL-terminated array of pointers to wide strings to split.
1365- * `splitlist' is an array of pointers to corresponding splittability strings.
1366- * `valuelist' and `splitlist' are `plfree'ed in this function.
1367- * The results are added to `dest'. */
1368-void fieldsplit_all(void **restrict valuelist, void **restrict splitlist,
1369- plist_T *restrict dest)
1327+ * `cclist' is an array of pointers to corresponding charcategory_T strings.
1328+ * `valuelist' and `cclist' are `plfree'ed in this function.
1329+ * The results are added to `outvaluelist' and `outcclist'. */
1330+void fieldsplit_all(
1331+ void **restrict const valuelist, void **restrict const cclist,
1332+ plist_T *restrict outvaluelist, plist_T *restrict outcclist)
13701333 {
1371- void **restrict s;
1372- void **restrict t;
1373- const wchar_t *ifs;
1374-
1375- ifs = getvar(L VAR_IFS);
1334+ const wchar_t *ifs = getvar(L VAR_IFS);
13761335 if (ifs == NULL)
13771336 ifs = DEFAULT_IFS;
13781337
1379- for (s = valuelist, t = splitlist; *s != NULL; s++, t++)
1380- fieldsplit(*s, *t, ifs, dest);
1338+ for (size_t i = 0; valuelist[i] != NULL; i++)
1339+ fieldsplit(valuelist[i], cclist[i], ifs, outvaluelist, outcclist);
13811340 free(valuelist);
1382- free(splitlist);
1341+ free(cclist);
13831342 }
13841343
13851344 /* Performs field splitting.
13861345 * `s' is the word to split and freed in this function.
1387- * `split' is the splittability string corresponding to `s' and also freed.
1388- * The results are added to `dest' as newly-malloced wide strings.
1389- * `ifs' must not be NULL. */
1390-void fieldsplit(wchar_t *restrict s, char *restrict split,
1391- const wchar_t *restrict ifs, plist_T *restrict dest)
1346+ * `cc' is the charcategory_T string corresponding to `s' and also freed
1347+ * `ifs' must not be NULL.
1348+ * The results are added to `outvaluelist' and `outcclist' as newly-malloced
1349+ * strings. */
1350+void fieldsplit(wchar_t *restrict s, char *restrict cc,
1351+ const wchar_t *restrict ifs,
1352+ plist_T *restrict outvaluelist, plist_T *restrict outcclist)
13921353 {
13931354 plist_T fields;
13941355
13951356 pl_init(&fields);
1396- extract_fields(s, split, true, ifs, &fields);
1357+ extract_fields(s, cc, ifs, &fields);
13971358 assert(fields.length % 2 == 0);
13981359
13991360 for (size_t i = 0; i < fields.length; i += 2) {
14001361 const wchar_t *start = fields.contents[i], *end = fields.contents[i+1];
1401- pl_add(dest, xwcsndup(start, end - start));
1362+ size_t idx = start - s, len = end - start;
1363+ pl_add(outvaluelist, xwcsndup(start, len));
1364+ pl_add(outcclist, memcpy(xmalloc(len), &cc[idx], len));
14021365 }
14031366
14041367 pl_destroy(&fields);
14051368 free(s);
1406- free(split);
1369+ free(cc);
14071370 }
14081371
14091372 /* Extracts fields from a string.
14101373 * `s' is the word to split.
1411- * `split' is the splittability string corresponding to `s'. It must be at least
1412- * as long as `wcslen(s)'.
1413- * If `escaped' is true, backslashes in `s' are treated as escapes. But
1414- * backslashes do not prevent splitting.
1374+ * `cc` is an array of charcategory_T values corresponding to `s'. It must be at
1375+ * least as long as `wcslen(s)'.
14151376 * `ifs' must not be NULL.
14161377 *
14171378 * The results are appended to `dest'. If n fields are found, 2n pointers are
@@ -1421,8 +1382,8 @@
14211382 * on.
14221383 *
14231384 * The word is split at characters that are contained in `ifs' and whose
1424- * corresponding character in the splittability string is non-zero. Refer to
1425- * POSIX for how whitespaces are treated in field splitting.
1385+ * corresponding character in `cc' is CC_SOFT_EXPANSION. Refer to POSIX for how
1386+ * whitespaces are treated in field splitting.
14261387 *
14271388 * If an IFS non-whitespace delimits an empty field, the field is assumed just
14281389 * before the non-whitespace delimiter. The empty last field is removed if
@@ -1440,8 +1401,8 @@
14401401 * "abc--123" -> "abc" "" "123"
14411402 * "abc - - 123" -> "abc" "" "123"
14421403 */
1443-wchar_t *extract_fields(const wchar_t *restrict s, const char *restrict split,
1444- bool escaped, const wchar_t *restrict ifs, plist_T *restrict dest)
1404+wchar_t *extract_fields(const wchar_t *restrict s, const char *restrict cc,
1405+ const wchar_t *restrict ifs, plist_T *restrict dest)
14451406 {
14461407 size_t index = 0;
14471408 size_t ifswhitestartindex;
@@ -1453,11 +1414,13 @@
14531414
14541415 for (;;) {
14551416 ifswhitestartindex = index;
1456- index += skip_ifs_whitespaces(&s[index], &split[index], escaped, ifs);
1417+ while (is_ifs_whitespace(s[index], cc[index], ifs))
1418+ index++;
14571419
14581420 /* extract next field, if any */
14591421 size_t fieldstartindex = index;
1460- index += skip_field(&s[index], &split[index], escaped, ifs);
1422+ while (is_non_ifs_char(s[index], cc[index], ifs))
1423+ index++;
14611424 if (index != fieldstartindex) {
14621425 pl_add(pl_add(dest, &s[fieldstartindex]), &s[index]);
14631426 afterfield = true;
@@ -1470,9 +1433,8 @@
14701433 add_empty_field(dest, &s[index]);
14711434
14721435 /* skip (only one) IFS non-whitespace */
1473- size_t ifsstartindex = index;
1474- index += skip_ifs(&s[index], &split[index], escaped, ifs);
1475- if (index != ifsstartindex) {
1436+ if (is_ifs_char(s[index], cc[index], ifs)) {
1437+ index++;
14761438 afterfield = false;
14771439 continue;
14781440 }
@@ -1491,50 +1453,22 @@
14911453 return (wchar_t *) &s[ifswhitestartindex];
14921454 }
14931455
1494-/* If `*s' is a (possibly escaped if `escaped') IFS character, returns the
1495- * number of characters to skip it. Otherwise returns zero. */
1496-size_t skip_ifs(const wchar_t *s, const char *split,
1497- bool escaped, const wchar_t *ifs)
1456+/* Returns true if `c' is a non-null, IFS character. */
1457+bool is_ifs_char(wchar_t c, charcategory_T cc, const wchar_t *ifs)
14981458 {
1499- size_t i = 0;
1500- if (escaped && s[i] == L'\\')
1501- i++;
1502- if (s[i] == L'\0')
1503- return 0;
1504- if (split[i] && wcschr(ifs, s[i]) != NULL)
1505- return i + 1;
1506- else
1507- return 0;
1459+ return cc == CC_SOFT_EXPANSION && c != L'\0' && wcschr(ifs, c) != NULL;
15081460 }
15091461
1510-/* Returns the length of IFS whitespace sequence starting at `*s'. */
1511-size_t skip_ifs_whitespaces(const wchar_t *s, const char *split,
1512- bool escaped, const wchar_t *ifs)
1462+/* Returns true if `c' is a non-null, IFS-whitespace character. */
1463+bool is_ifs_whitespace(wchar_t c, charcategory_T cc, const wchar_t *ifs)
15131464 {
1514- size_t total = 0;
1515- for (;;) {
1516- size_t current = skip_ifs(&s[total], &split[total], escaped, ifs);
1517- if (current == 0 || !iswspace(s[total + current - 1]))
1518- return total;
1519- total += current;
1520- }
1465+ return is_ifs_char(c, cc, ifs) && iswspace(c);
15211466 }
15221467
1523-/* Returns the length of a field starting at `*s'. */
1524-size_t skip_field(const wchar_t *s, const char *split,
1525- bool escaped, const wchar_t *ifs)
1468+/* Returns true if `c' is a non-null, non-IFS character. */
1469+bool is_non_ifs_char(wchar_t c, charcategory_T cc, const wchar_t *ifs)
15261470 {
1527- size_t index = 0;
1528- for (;;) {
1529- size_t saveindex = index;
1530- if (escaped && s[index] == L'\\')
1531- index++;
1532- if (s[index] == L'\0')
1533- return saveindex;
1534- if (split[index] && wcschr(ifs, s[index]) != NULL)
1535- return saveindex;
1536- index++;
1537- }
1471+ return c != L'\0' && !is_ifs_char(c, cc, ifs);
15381472 }
15391473
15401474 void add_empty_field(plist_T *dest, const wchar_t *p)
@@ -1716,50 +1650,76 @@
17161650 }
17171651 }
17181652
1719-/* Like `wcspbrk', but ignores backslashed characters in `s'. */
1720-wchar_t *escaped_wcspbrk(const wchar_t *s, const wchar_t *accept)
1653+/* Tests if a character should be backslash-escaped. */
1654+bool should_escape(char c, charcategory_T cc, escaping_T escaping)
17211655 {
1722- for (; *s != L'\0'; s++) {
1723- if (*s == L'\\') {
1724- s++;
1725- if (*s == L'\0')
1726- break;
1727- continue;
1728- }
1729- if (wcschr(accept, *s) != NULL)
1730- return (wchar_t *) s;
1656+ switch (escaping) {
1657+ case ES_NONE:
1658+ return false;
1659+ case ES_QUOTED_HARD:
1660+ if (c == L'\\' || (cc & CC_ORIGIN_MASK) == CC_HARD_EXPANSION)
1661+ return true;
1662+ /* falls thru! */
1663+ case ES_QUOTED:
1664+ return cc & CC_QUOTED;
17311665 }
1732- return NULL;
1666+ assert(false);
17331667 }
17341668
1735-/* Removes characters in `reject' from `s'.
1736- * Backslash escapes in `s' are recognized. Escapes and escaped characters are
1737- * kept in the result.
1738- * The result is a newly malloced string. */
1739-wchar_t *escaped_remove(const wchar_t *s, const wchar_t *reject)
1669+/* Removes all quotation marks in the input string `s' and optionally add
1670+ * backslash escapes to the originally quoted characters as specified by
1671+ * `escaping'. The result is a newly malloced string. */
1672+wchar_t *quote_removal(
1673+ const wchar_t *restrict s, const char *restrict cc, escaping_T escaping)
17401674 {
17411675 xwcsbuf_T result;
17421676 wb_init(&result);
1743- for (;;) {
1744- const wchar_t *rejectchar = escaped_wcspbrk(s, reject);
1745- if (rejectchar == NULL)
1746- break;
1747- wb_ncat_force(&result, s, rejectchar - s);
1748- s = rejectchar + 1;
1677+ for (size_t i = 0; s[i] != L'\0'; i++) {
1678+ if (cc[i] & CC_QUOTATION)
1679+ continue;
1680+ if (should_escape(s[i], cc[i], escaping))
1681+ wb_wccat(&result, L'\\');
1682+ wb_wccat(&result, s[i]);
17491683 }
1750- wb_cat(&result, s);
17511684 return wb_towcs(&result);
17521685 }
17531686
1754-/* Like `escaped_remove', but frees `s' before returning the result. */
1755-wchar_t *escaped_remove_free(wchar_t *s, const wchar_t *reject)
1687+/* Like `quote_removal', but frees the arguments. */
1688+wchar_t *quote_removal_free(
1689+ wchar_t *restrict s, char *restrict cc, escaping_T escaping)
17561690 {
1757- wchar_t *result = escaped_remove(s, reject);
1691+ wchar_t *result = quote_removal(s, cc, escaping);
17581692 free(s);
1693+ free(cc);
17591694 return result;
17601695 }
17611696
1697+/* Performs empty field removal and quote removal.
1698+ * In this function, `e->valuelist' is modified and `e->cclist' is destroyed. */
1699+void remove_empty_fields_and_quotes(
1700+ struct expand_four_T *e, escaping_T escaping)
1701+{
1702+ /* empty field removal */
1703+ if (e->valuelist.length == 1) {
1704+ const wchar_t *field = e->valuelist.contents[0];
1705+ const char *cc = e->cclist.contents[0];
1706+ if (field[0] == L'\0' ||
1707+ (e->zeroword && wcscmp(field, L"\"\"") == 0 &&
1708+ (cc[0] & cc[1] & CC_QUOTATION))) {
1709+ pl_clear(&e->valuelist, free);
1710+ pl_clear(&e->cclist, free);
1711+ }
1712+ }
17621713
1714+ /* quote removal */
1715+ for (size_t i = 0; i < e->valuelist.length; i++)
1716+ e->valuelist.contents[i] = quote_removal_free(
1717+ e->valuelist.contents[i], e->cclist.contents[i], escaping);
1718+
1719+ pl_destroy(&e->cclist);
1720+}
1721+
1722+
17631723 /********** File Name Expansion (Glob) **********/
17641724
17651725 /* Makes a option value from the current shell settings. */
@@ -1838,7 +1798,7 @@
18381798
18391799 if (!parse_string(&info, &word))
18401800 return NULL;
1841- result = expand_single_and_unescape(word, TT_NONE, false, !esc);
1801+ result = expand_single(word, TT_NONE, esc ? Q_INDQ : Q_LITERAL, ES_NONE);
18421802 wordfree(word);
18431803 return result;
18441804 }
--- yash/trunk/expand.h (revision 4072)
+++ yash/trunk/expand.h (revision 4073)
@@ -1,6 +1,6 @@
11 /* Yash: yet another shell */
22 /* expand.h: word expansion */
3-/* (C) 2007-2018 magicant */
3+/* (C) 2007-2020 magicant */
44
55 /* This program is free software: you can redistribute it and/or modify
66 * it under the terms of the GNU General Public License as published by
@@ -30,6 +30,48 @@
3030 /* type of tilde expansion */
3131 typedef enum { TT_NONE, TT_SINGLE, TT_MULTI, } tildetype_T;
3232
33+/* treatment of quotation marks during expansion */
34+typedef enum {
35+ Q_WORD, /* Single quotations, double quotations, and backslashes are
36+ recognized as in the normal word. */
37+ Q_INDQ, /* The string is quoted as if it is inside a pair of double
38+ quotations: Single and double quotations are not recognized.
39+ Backslashes are recognized only before a $, `, or \. */
40+ Q_LITERAL, /* No quotations are recognized. */
41+} quoting_T;
42+
43+/* Category of characters resulting from expansion.
44+ * A charcategory_T value is bitwise or of one of the origin categories
45+ * (CC_LITERAL, CC_HARD_EXPANSION, and CC_SOFT_EXPANSION) and optionally any
46+ * combinations of modifier flags (CC_QUOTED and CC_QUOTATION).
47+ * The category determines if a character is subject to brace expansion, field
48+ * splitting, and globbing (pathname expansion). */
49+typedef enum {
50+ CC_LITERAL, /* from the original word */
51+ CC_HARD_EXPANSION, /* from tilde expansion or numeric brace expansion */
52+ CC_SOFT_EXPANSION, /* from parameter expansion, command substitution or
53+ arithmetic expansion */
54+ CC_ORIGIN_MASK = (1 << 2) - 1,
55+ CC_QUOTED = 1 << 2, /* The character is quoted by backslash, single- or
56+ double-quotes. */
57+ CC_QUOTATION = 1 << 3, /* The character is a quotation mark */
58+} charcategory_T;
59+/* A character can be both CC_QUOTED and CC_QUOTATION at a time. This may happen
60+ * in a nested quotation like "\"". */
61+
62+/* type of characters to be backslash-escaped in the expansion results */
63+typedef enum {
64+ ES_NONE, /* No characters are escaped. */
65+ ES_QUOTED, /* Quoted characters remain escaped. */
66+ ES_QUOTED_HARD, /* Ditto, and characters marked CC_HARD_EXPANSION and
67+ backslashes are also escaped. */
68+} escaping_T;
69+/* ES_QUOTED_HARD is for pathname expansion patterns while ES_QUOTED is for
70+ * other patterns. With ES_QUOTED_HARD, backslashes that are not quotation
71+ * marks are escaped to prevent them from being regarded as escaping
72+ * characters. This does not apply to ES_QUOTED because the pattern is
73+ * supposed to be matched without quote removal. */
74+
3375 struct wordunit_T;
3476 struct plist_T;
3577 extern _Bool expand_line(
@@ -40,20 +82,17 @@
4082 extern _Bool expand_multiple(
4183 const struct wordunit_T *restrict w, struct plist_T *restrict list)
4284 __attribute__((nonnull(2)));
43-extern wchar_t *expand_single(const struct wordunit_T *arg,
44- tildetype_T tilde, _Bool processquotes, _Bool escapeall)
85+extern wchar_t *expand_single(
86+ const struct wordunit_T *w,
87+ tildetype_T tilde, quoting_T quoting, escaping_T escaping)
4588 __attribute__((malloc,warn_unused_result));
46-extern wchar_t *expand_single_and_unescape(const struct wordunit_T *arg,
47- tildetype_T tilde, _Bool processquotes, _Bool escapeall)
48- __attribute__((malloc,warn_unused_result));
4989 extern char *expand_single_with_glob(
5090 const struct wordunit_T *arg, tildetype_T tilde)
5191 __attribute__((malloc,warn_unused_result));
5292
5393 extern wchar_t *extract_fields(
54- const wchar_t *restrict s, const char *restrict split,
55- _Bool escaped, const wchar_t *restrict ifs,
56- struct plist_T *restrict dest)
94+ const wchar_t *restrict s, const char *restrict cc,
95+ const wchar_t *restrict ifs, struct plist_T *restrict dest)
5796 __attribute__((nonnull));
5897
5998 struct xwcsbuf_T;
--- yash/trunk/lineedit/compparse.c (revision 4072)
+++ yash/trunk/lineedit/compparse.c (revision 4073)
@@ -651,7 +651,8 @@
651651 if (pi->ctxt->pwords == NULL
652652 && (pi->ctxt->type & CTXT_VBRACED)) {
653653 xwcsbuf_T buf;
654- wchar_t *prefix = expand_single(first, tilde, true, false);
654+ wchar_t *prefix =
655+ expand_single(first, tilde, Q_WORD, ES_QUOTED_HARD);
655656 assert(prefix != NULL);
656657 pi->ctxt->pattern = wb_towcs(wb_catfree(
657658 wb_initwith(&buf, prefix), pi->ctxt->pattern));
@@ -717,7 +718,7 @@
717718 pi->ctxt->type = ctxttype;
718719 pi->ctxt->pwordc = 0;
719720 pi->ctxt->pwords = NULL;
720- pi->ctxt->pattern = expand_single(first, tilde, true, false);
721+ pi->ctxt->pattern = expand_single(first, tilde, Q_WORD, ES_QUOTED_HARD);
721722 pi->ctxt->srcindex = srcindex;
722723 wordfree(first);
723724 return NULL;
--- yash/trunk/redir.c (revision 4072)
+++ yash/trunk/redir.c (revision 4073)
@@ -1,6 +1,6 @@
11 /* Yash: yet another shell */
22 /* redir.c: manages file descriptors and provides functions for redirections */
3-/* (C) 2007-2018 magicant */
3+/* (C) 2007-2020 magicant */
44
55 /* This program is free software: you can redistribute it and/or modify
66 * it under the terms of the GNU General Public License as published by
@@ -448,8 +448,7 @@
448448 if (is_interactive) {
449449 return expand_single_with_glob(filename, TT_SINGLE);
450450 } else {
451- wchar_t *result = expand_single_and_unescape(
452- filename, TT_SINGLE, true, false);
451+ wchar_t *result = expand_single(filename, TT_SINGLE, Q_WORD, ES_NONE);
453452 if (result == NULL)
454453 return NULL;
455454 char *mbsresult = realloc_wcstombs(result);
@@ -758,8 +757,7 @@
758757 * temporary file. */
759758 int open_heredocument(const wordunit_T *contents)
760759 {
761- wchar_t *wcontents = expand_single_and_unescape(
762- contents, TT_NONE, false, false);
760+ wchar_t *wcontents = expand_single(contents, TT_NONE, Q_INDQ, ES_NONE);
763761 if (wcontents == NULL)
764762 return -1;
765763
--- yash/trunk/variable.c (revision 4072)
+++ yash/trunk/variable.c (revision 4073)
@@ -740,8 +740,8 @@
740740
741741 switch (assign->a_type) {
742742 case A_SCALAR:
743- value = expand_single_and_unescape(
744- assign->a_scalar, TT_MULTI, true, false);
743+ value =
744+ expand_single(assign->a_scalar, TT_MULTI, Q_WORD, ES_NONE);
745745 if (value == NULL)
746746 return false;
747747 if (shopt_xtrace)
@@ -1594,8 +1594,8 @@
15941594 static inline bool set_optarg(const wchar_t *value);
15951595 static bool set_variable_single_char(const wchar_t *varname, wchar_t value)
15961596 __attribute__((nonnull));
1597-static bool read_with_prompt(xwcsbuf_T *buf, xstrbuf_T *split,
1598- const struct reading_option_T *ro)
1597+static bool read_with_prompt(
1598+ xwcsbuf_T *buf, xstrbuf_T *cc, const struct reading_option_T *ro)
15991599 __attribute__((nonnull));
16001600 static struct promptset_T promptset_for_read(
16011601 bool firstline, const struct reading_option_T *ro)
@@ -1605,7 +1605,7 @@
16051605 __attribute__((malloc,warn_unused_result));
16061606 static wchar_t *read_one_line(void)
16071607 __attribute__((malloc,warn_unused_result));
1608-static bool unescape_line(const wchar_t *line, xwcsbuf_T *buf, xstrbuf_T *split)
1608+static bool unescape_line(const wchar_t *line, xwcsbuf_T *buf, xstrbuf_T *cc)
16091609 __attribute__((nonnull));
16101610 static void assign_array(const wchar_t *name, const plist_T *ranges, size_t i)
16111611 __attribute__((nonnull));
@@ -2747,12 +2747,12 @@
27472747 }
27482748
27492749 xwcsbuf_T buf;
2750- xstrbuf_T split;
2750+ xstrbuf_T cc;
27512751
27522752 wb_init(&buf);
2753- sb_init(&split);
2754- if (!read_with_prompt(&buf, &split, &ro)) {
2755- sb_destroy(&split);
2753+ sb_init(&cc);
2754+ if (!read_with_prompt(&buf, &cc, &ro)) {
2755+ sb_destroy(&cc);
27562756 wb_destroy(&buf);
27572757 return Exit_FAILURE;
27582758 }
@@ -2776,7 +2776,7 @@
27762776 if (ifs == NULL)
27772777 ifs = DEFAULT_IFS;
27782778
2779- tail = extract_fields(buf.contents, split.contents, false, ifs, &list);
2779+ tail = extract_fields(buf.contents, cc.contents, ifs, &list);
27802780 assert(list.length % 2 == 0);
27812781 }
27822782
@@ -2812,7 +2812,7 @@
28122812 }
28132813
28142814 pl_destroy(&list);
2815- sb_destroy(&split);
2815+ sb_destroy(&cc);
28162816 wb_destroy(&buf);
28172817 return (!eof && yash_error_message_count == 0)
28182818 ? Exit_SUCCESS : Exit_FAILURE;
@@ -2819,15 +2819,15 @@
28192819 }
28202820
28212821 /* Reads one line from the standard input. The result is appended to `buf' and
2822- * `split'. `buf' will contain no escapes or other special characters. `split'
2823- * is the splittability string for `buf'. The string is splittable at characters
2824- * that were not backslash-escaped.
2822+ * `cc'. `buf' will contain no escapes or other special characters. `cc' is the
2823+ * charcategory_T string for `buf'. It indicates whether `buf' can be split at
2824+ * the corresponding character when passed to `extract_fields'.
28252825 * If `ro->raw' is true, exactly one line is read and backslashes are not
28262826 * treated as escapes. Otherwise, line continuations cause this function to read
28272827 * more and backslash escapes are recognized.
28282828 * Returns false on error while reading. */
2829-bool read_with_prompt(xwcsbuf_T *buf, xstrbuf_T *split,
2830- const struct reading_option_T *ro)
2829+bool read_with_prompt(
2830+ xwcsbuf_T *buf, xstrbuf_T *cc, const struct reading_option_T *ro)
28312831 {
28322832 bool firstline = true;
28332833 bool completed = false;
@@ -2847,10 +2847,10 @@
28472847
28482848 if (ro->raw) {
28492849 wb_cat(buf, line);
2850- sb_ccat_repeat(split, true, wcslen(line));
2850+ sb_ccat_repeat(cc, CC_SOFT_EXPANSION, wcslen(line));
28512851 completed = true;
28522852 } else {
2853- completed = unescape_line(line, buf, split);
2853+ completed = unescape_line(line, buf, cc);
28542854 }
28552855 free(line);
28562856
@@ -2926,11 +2926,11 @@
29262926 }
29272927
29282928 /* Parses a string that may contain backslash escapes.
2929- * Unescaped `line' is appended to `buf' with a corresponding splittability
2930- * string appended to `split'. Characters are splittable iff not escaped.
2929+ * Unescaped `line' is appended to `buf' with a corresponding charcategory_T
2930+ * string appended to `cc'.
29312931 * The result is false iff `line' ends with a line continuation.
29322932 * The line continuation is not appended to `buf'. */
2933-bool unescape_line(const wchar_t *line, xwcsbuf_T *buf, xstrbuf_T *split)
2933+bool unescape_line(const wchar_t *line, xwcsbuf_T *buf, xstrbuf_T *cc)
29342934 {
29352935 for (;;) {
29362936 bool splitchar;
@@ -2953,7 +2953,7 @@
29532953 break;
29542954 }
29552955 wb_wccat(buf, *line);
2956- sb_ccat(split, splitchar);
2956+ sb_ccat(cc, CC_SOFT_EXPANSION | (splitchar ? 0 : CC_QUOTED));
29572957 line++;
29582958 }
29592959 }
Show on old repository browser