Merge branch charcategory (#39288)
@@ -13,6 +13,9 @@ | ||
13 | 13 | * The "command" built-in with the -v or -V option was printing |
14 | 14 | the pathnames of external commands with a redundant leading slash |
15 | 15 | when the current working directory is "/" or "//". |
16 | + * In pattern matching except for pathname expansion, when an | |
17 | + unescaped backslash results from an expansion in the pattern, | |
18 | + it is now treated as an escape character. | |
16 | 19 | |
17 | 20 | ---------------------------------------------------------------------- |
18 | 21 | Yash 2.50 |
@@ -1,6 +1,6 @@ | ||
1 | 1 | /* Yash: yet another shell */ |
2 | 2 | /* test.c: test builtin */ |
3 | -/* (C) 2007-2018 magicant */ | |
3 | +/* (C) 2007-2020 magicant */ | |
4 | 4 | |
5 | 5 | /* This program is free software: you can redistribute it and/or modify |
6 | 6 | * it under the terms of the GNU General Public License as published by |
@@ -77,9 +77,11 @@ | ||
77 | 77 | #if YASH_ENABLE_DOUBLE_BRACKET |
78 | 78 | static int eval_dbexp(const dbexp_T *e) |
79 | 79 | __attribute__((nonnull)); |
80 | -static inline wchar_t *expand_double_bracket_operand(const wordunit_T *w) | |
80 | +static inline wchar_t *expand_double_bracket_operand_escaped( | |
81 | + const wordunit_T *w) | |
81 | 82 | __attribute__((nonnull,malloc,warn_unused_result)); |
82 | -static wchar_t *expand_and_unescape_double_bracket_operand(const wordunit_T *w) | |
83 | +static inline wchar_t *expand_double_bracket_operand_unescaped( | |
84 | + const wordunit_T *w) | |
83 | 85 | __attribute__((nonnull,malloc,warn_unused_result)); |
84 | 86 | static bool test_triple_db( |
85 | 87 | const wchar_t *lhs, const wchar_t *op, const wchar_t *rhs_escaped) |
@@ -744,22 +746,22 @@ | ||
744 | 746 | } |
745 | 747 | |
746 | 748 | case DBE_UNARY: |
747 | - rhs = expand_and_unescape_double_bracket_operand(e->rhs.word); | |
749 | + rhs = expand_double_bracket_operand_unescaped(e->rhs.word); | |
748 | 750 | if (rhs == NULL) |
749 | 751 | return Exit_TESTERROR; |
750 | 752 | result = test_double((void *[]) { e->operator, rhs }); |
751 | 753 | break; |
752 | 754 | case DBE_BINARY: |
753 | - lhs = expand_and_unescape_double_bracket_operand(e->lhs.word); | |
755 | + lhs = expand_double_bracket_operand_unescaped(e->lhs.word); | |
754 | 756 | if (lhs == NULL) |
755 | 757 | return Exit_TESTERROR; |
756 | - rhs = expand_double_bracket_operand(e->rhs.word); | |
758 | + rhs = expand_double_bracket_operand_escaped(e->rhs.word); | |
757 | 759 | if (rhs == NULL) |
758 | 760 | return Exit_TESTERROR; |
759 | 761 | result = test_triple_db(lhs, e->operator, rhs); |
760 | 762 | break; |
761 | 763 | case DBE_STRING: |
762 | - rhs = expand_and_unescape_double_bracket_operand(e->rhs.word); | |
764 | + rhs = expand_double_bracket_operand_unescaped(e->rhs.word); | |
763 | 765 | if (rhs == NULL) |
764 | 766 | return Exit_TESTERROR; |
765 | 767 | result = test_single((void *[]) { rhs }); |
@@ -778,19 +780,16 @@ | ||
778 | 780 | |
779 | 781 | /* Expands the operand of a primary. |
780 | 782 | * The result may contain backslash escapes. */ |
781 | -wchar_t *expand_double_bracket_operand(const wordunit_T *w) | |
783 | +wchar_t *expand_double_bracket_operand_escaped(const wordunit_T *w) | |
782 | 784 | { |
783 | - return expand_single(w, TT_SINGLE, true, false); | |
785 | + return expand_single(w, TT_SINGLE, Q_WORD, ES_QUOTED); | |
784 | 786 | } |
785 | 787 | |
786 | 788 | /* Expands the operand of a primary. |
787 | 789 | * The result is literal (does not contain backslash escapes). */ |
788 | -wchar_t *expand_and_unescape_double_bracket_operand(const wordunit_T *w) | |
790 | +wchar_t *expand_double_bracket_operand_unescaped(const wordunit_T *w) | |
789 | 791 | { |
790 | - wchar_t *e = expand_double_bracket_operand(w); | |
791 | - if (e == NULL) | |
792 | - return NULL; | |
793 | - return unescapefree(e); | |
792 | + return expand_single(w, TT_SINGLE, Q_WORD, ES_NONE); | |
794 | 793 | } |
795 | 794 | |
796 | 795 | /* Tests the specified three-token (binary) primary in the double-bracket |
@@ -513,14 +513,14 @@ | ||
513 | 513 | { |
514 | 514 | assert(c->c_type == CT_CASE); |
515 | 515 | |
516 | - wchar_t *word = expand_single_and_unescape( | |
517 | - c->c_casword, TT_SINGLE, true, false); | |
516 | + wchar_t *word = expand_single(c->c_casword, TT_SINGLE, Q_WORD, ES_NONE); | |
518 | 517 | if (word == NULL) |
519 | 518 | goto fail; |
520 | 519 | |
521 | 520 | for (const caseitem_T *ci = c->c_casitems; ci != NULL; ci = ci->next) { |
522 | 521 | for (void **pats = ci->ci_patterns; *pats != NULL; pats++) { |
523 | - wchar_t *pattern = expand_single(*pats, TT_SINGLE, true, false); | |
522 | + wchar_t *pattern = | |
523 | + expand_single(*pats, TT_SINGLE, Q_WORD, ES_QUOTED); | |
524 | 524 | if (pattern == NULL) |
525 | 525 | goto fail; |
526 | 526 |
@@ -555,8 +555,8 @@ | ||
555 | 555 | { |
556 | 556 | assert(c->c_type == CT_FUNCDEF); |
557 | 557 | |
558 | - wchar_t *funcname = expand_single_and_unescape( | |
559 | - c->c_funcname, TT_SINGLE, true, false); | |
558 | + wchar_t *funcname = | |
559 | + expand_single(c->c_funcname, TT_SINGLE, Q_WORD, ES_NONE); | |
560 | 560 | if (funcname != NULL) { |
561 | 561 | if (define_function(funcname, c->c_funcbody)) |
562 | 562 | laststatus = Exit_SUCCESS; |
@@ -43,30 +43,34 @@ | ||
43 | 43 | #include "yash.h" |
44 | 44 | |
45 | 45 | |
46 | -/* characters that have special meanings in brace expansion, quote removal, and | |
47 | - * globbing. When an unquoted expansion includes these characters, they are | |
48 | - * backslashed to protect from unexpected side effects in succeeding expansion | |
49 | - * steps. */ | |
50 | -#define CHARS_ESCAPED L"\\\"\'{,}" | |
51 | - | |
52 | 46 | /* data passed between expansion functions */ |
53 | 47 | struct expand_four_T { |
54 | - plist_T valuelist, splitlist; | |
55 | - xwcsbuf_T valuebuf; | |
56 | - xstrbuf_T splitbuf; | |
48 | + plist_T valuelist, cclist; | |
57 | 49 | bool zeroword; |
58 | 50 | }; |
51 | +struct expand_four_inner_T { | |
52 | + struct expand_four_T e; | |
53 | + xwcsbuf_T valuebuf; | |
54 | + xstrbuf_T ccbuf; | |
55 | +}; | |
56 | +/* If expansion yields multiple fields, all the fields are added to `valuelist' | |
57 | + * except that the last field remains in `valuebuf'. Character categories | |
58 | + * (charcategory_T) corresponding to the characters in `valuelist' and | |
59 | + * `valuebuf' are cast to char and added to `cclist' and `ccbuf' accordingly. */ | |
59 | 60 | /* When "$@" appears during expansion and there is no positional parameter, the |
60 | 61 | * `zeroword' flag is set so that the quoted empty word can be removed later. */ |
61 | 62 | |
62 | -static plist_T expand_four_and_remove_quotes( | |
63 | - const wordunit_T *restrict w, | |
64 | - tildetype_T tilde, bool processquotes, bool escapeall); | |
65 | -static bool expand_four(const wordunit_T *restrict w, | |
66 | - tildetype_T tilde, bool processquotes, bool escapeall, bool rec, | |
67 | - struct expand_four_T *restrict e) | |
68 | - __attribute__((nonnull(6))); | |
69 | -static void fill_splitbuf(struct expand_four_T *e, bool splittable) | |
63 | +static plist_T expand_word(const wordunit_T *w, | |
64 | + tildetype_T tilde, quoting_T quoting, escaping_T escaping) | |
65 | + __attribute__((warn_unused_result)); | |
66 | +static struct expand_four_T expand_four(const wordunit_T *restrict w, | |
67 | + tildetype_T tilde, quoting_T quoting, charcategory_T defaultcc) | |
68 | + __attribute__((warn_unused_result)); | |
69 | +static bool expand_four_inner(const wordunit_T *restrict w, tildetype_T tilde, | |
70 | + quoting_T quoting, charcategory_T defaultcc, | |
71 | + struct expand_four_inner_T *restrict e) | |
72 | + __attribute__((nonnull(5))); | |
73 | +static void fill_ccbuf(struct expand_four_inner_T *e, charcategory_T c) | |
70 | 74 | __attribute__((nonnull)); |
71 | 75 | |
72 | 76 | static wchar_t *expand_tilde(const wchar_t **ss, |
@@ -76,7 +80,7 @@ | ||
76 | 80 | enum indextype_T { IDX_NONE, IDX_ALL, IDX_CONCAT, IDX_NUMBER, }; |
77 | 81 | |
78 | 82 | static bool expand_param(const paramexp_T *restrict p, bool indq, |
79 | - struct expand_four_T *restrict e) | |
83 | + struct expand_four_inner_T *restrict e) | |
80 | 84 | __attribute__((nonnull)); |
81 | 85 | static enum indextype_T parse_indextype(const wchar_t *indexstr) |
82 | 86 | __attribute__((nonnull,pure)); |
@@ -99,33 +103,34 @@ | ||
99 | 103 | static void subst_length_each(void **slist) |
100 | 104 | __attribute__((nonnull)); |
101 | 105 | |
102 | -static void expand_brace_each(void **restrict values, void **restrict splits, | |
103 | - plist_T *restrict valuelist, plist_T *restrict splitlist) | |
106 | +static void expand_brace_each( | |
107 | + void *const *restrict values, void *const *restrict ccs, | |
108 | + plist_T *restrict valuelist, plist_T *restrict cclist) | |
104 | 109 | __attribute__((nonnull)); |
105 | -static void expand_brace(wchar_t *restrict word, char *restrict split, | |
106 | - plist_T *restrict valuelist, plist_T *restrict splitlist) | |
110 | +static void expand_brace( | |
111 | + wchar_t *restrict word, char *restrict cc, | |
112 | + plist_T *restrict valuelist, plist_T *restrict cclist) | |
107 | 113 | __attribute__((nonnull)); |
108 | 114 | static bool try_expand_brace_sequence( |
109 | - wchar_t *word, char *restrict split, wchar_t *startc, | |
110 | - plist_T *restrict valuelist, plist_T *restrict splitlist) | |
115 | + wchar_t *word, char *restrict cc, wchar_t *startc, | |
116 | + plist_T *restrict valuelist, plist_T *restrict cclist) | |
111 | 117 | __attribute__((nonnull)); |
112 | 118 | static bool has_leading_zero(const wchar_t *restrict s, bool *restrict sign) |
113 | 119 | __attribute__((nonnull)); |
114 | 120 | |
115 | -static void fieldsplit_all(void **restrict valuelist, void **restrict splitlist, | |
116 | - plist_T *restrict dest) | |
121 | +static void fieldsplit_all( | |
122 | + void **restrict valuelist, void **restrict cclist, | |
123 | + plist_T *restrict outvaluelist, plist_T *restrict outcclist) | |
117 | 124 | __attribute__((nonnull)); |
118 | -static void fieldsplit(wchar_t *restrict s, char *restrict split, | |
119 | - const wchar_t *restrict ifs, plist_T *restrict dest) | |
125 | +static void fieldsplit(wchar_t *restrict s, char *restrict cc, | |
126 | + const wchar_t *restrict ifs, | |
127 | + plist_T *restrict outvaluelist, plist_T *restrict outcclist) | |
120 | 128 | __attribute__((nonnull)); |
121 | -static size_t skip_ifs(const wchar_t *s, const char *split, | |
122 | - bool escaped, const wchar_t *ifs) | |
129 | +static bool is_ifs_char(wchar_t c, charcategory_T cc, const wchar_t *ifs) | |
123 | 130 | __attribute__((nonnull,pure)); |
124 | -static size_t skip_ifs_whitespaces(const wchar_t *s, const char *split, | |
125 | - bool escaped, const wchar_t *ifs) | |
131 | +static bool is_ifs_whitespace(wchar_t c, charcategory_T cc, const wchar_t *ifs) | |
126 | 132 | __attribute__((nonnull,pure)); |
127 | -static size_t skip_field(const wchar_t *s, const char *split, | |
128 | - bool escaped, const wchar_t *ifs) | |
133 | +static bool is_non_ifs_char(wchar_t c, charcategory_T cc, const wchar_t *ifs) | |
129 | 134 | __attribute__((nonnull,pure)); |
130 | 135 | static void add_empty_field(plist_T *dest, const wchar_t *p) |
131 | 136 | __attribute__((nonnull)); |
@@ -133,12 +138,17 @@ | ||
133 | 138 | static inline void add_sq( |
134 | 139 | const wchar_t *restrict *ss, xwcsbuf_T *restrict buf, bool escape) |
135 | 140 | __attribute__((nonnull)); |
136 | -static wchar_t *escaped_wcspbrk(const wchar_t *s, const wchar_t *accept) | |
137 | - __attribute__((nonnull)); | |
138 | -static wchar_t *escaped_remove(const wchar_t *s, const wchar_t *reject) | |
141 | +static inline bool should_escape(char c, charcategory_T cc, escaping_T escaping) | |
142 | + __attribute__((const)); | |
143 | +static wchar_t *quote_removal( | |
144 | + const wchar_t *restrict s, const char *restrict cc, escaping_T escaping) | |
139 | 145 | __attribute__((nonnull,malloc,warn_unused_result)); |
140 | -static inline wchar_t *escaped_remove_free(wchar_t *s, const wchar_t *reject) | |
146 | +static wchar_t *quote_removal_free( | |
147 | + wchar_t *restrict s, char *restrict cc, escaping_T escaping) | |
141 | 148 | __attribute__((nonnull,malloc,warn_unused_result)); |
149 | +static void remove_empty_fields_and_quotes( | |
150 | + struct expand_four_T *e, escaping_T escaping) | |
151 | + __attribute__((nonnull)); | |
142 | 152 | |
143 | 153 | static void glob_all(void **restrict patterns, plist_T *restrict list) |
144 | 154 | __attribute__((nonnull)); |
@@ -185,65 +195,44 @@ | ||
185 | 195 | * On error in a non-interactive shell, the shell exits. */ |
186 | 196 | bool expand_multiple(const wordunit_T *w, plist_T *list) |
187 | 197 | { |
188 | - struct expand_four_T expand; | |
189 | - pl_init(&expand.valuelist); | |
190 | - pl_init(&expand.splitlist); | |
191 | - wb_init(&expand.valuebuf); | |
192 | - sb_init(&expand.splitbuf); | |
193 | - expand.zeroword = false; | |
194 | - | |
195 | 198 | /* four expansions (w -> valuelist) */ |
196 | - if (!expand_four(w, TT_SINGLE, true, false, false, &expand)) { | |
197 | - plfree(pl_toary(&expand.valuelist), free); | |
198 | - plfree(pl_toary(&expand.splitlist), free); | |
199 | - wb_destroy(&expand.valuebuf); | |
200 | - sb_destroy(&expand.splitbuf); | |
199 | + struct expand_four_T expand = | |
200 | + expand_four(w, TT_SINGLE, Q_WORD, CC_LITERAL); | |
201 | + if (expand.valuelist.contents == NULL) { | |
201 | 202 | maybe_exit_on_error(); |
202 | 203 | return false; |
203 | 204 | } |
204 | - assert(expand.valuebuf.length == expand.splitbuf.length); | |
205 | - pl_add(&expand.valuelist, wb_towcs(&expand.valuebuf)); | |
206 | - pl_add(&expand.splitlist, sb_tostr(&expand.splitbuf)); | |
207 | 205 | |
208 | 206 | /* brace expansion (valuelist -> valuelist2) */ |
209 | - plist_T valuelist2, splitlist2; | |
207 | + plist_T valuelist2, cclist2; | |
210 | 208 | if (shopt_braceexpand) { |
211 | 209 | pl_init(&valuelist2); |
212 | - pl_init(&splitlist2); | |
213 | - expand_brace_each(expand.valuelist.contents, expand.splitlist.contents, | |
214 | - &valuelist2, &splitlist2); | |
210 | + pl_init(&cclist2); | |
211 | + expand_brace_each(expand.valuelist.contents, expand.cclist.contents, | |
212 | + &valuelist2, &cclist2); | |
215 | 213 | pl_clear(&expand.valuelist, 0); |
216 | - pl_destroy(&expand.splitlist); | |
214 | + pl_clear(&expand.cclist, 0); | |
217 | 215 | } else { |
218 | 216 | valuelist2 = expand.valuelist; |
219 | - splitlist2 = expand.splitlist; | |
217 | + cclist2 = expand.cclist; | |
220 | 218 | pl_init(&expand.valuelist); |
219 | + pl_init(&expand.cclist); | |
221 | 220 | } |
222 | 221 | |
223 | 222 | /* field splitting (valuelist2 -> valuelist) */ |
224 | - fieldsplit_all( | |
225 | - pl_toary(&valuelist2), pl_toary(&splitlist2), &expand.valuelist); | |
223 | + fieldsplit_all(pl_toary(&valuelist2), pl_toary(&cclist2), | |
224 | + &expand.valuelist, &expand.cclist); | |
225 | + assert(expand.valuelist.length == expand.cclist.length); | |
226 | 226 | |
227 | - /* empty field removal */ | |
228 | - if (expand.valuelist.length == 1) { | |
229 | - const wchar_t *field = expand.valuelist.contents[0]; | |
230 | - if (field[0] == L'\0' || | |
231 | - (expand.zeroword && wcscmp(field, L"\"\"") == 0)) { | |
232 | - pl_clear(&expand.valuelist, free); | |
233 | - } | |
234 | - } | |
227 | + /* empty field removal & quote removal */ | |
228 | + remove_empty_fields_and_quotes( | |
229 | + &expand, shopt_glob ? ES_QUOTED_HARD : ES_NONE); | |
235 | 230 | |
236 | - /* quote removal */ | |
237 | - for (size_t i = 0; i < expand.valuelist.length; i++) | |
238 | - expand.valuelist.contents[i] = | |
239 | - escaped_remove_free(expand.valuelist.contents[i], L"\"\'"); | |
240 | - | |
241 | 231 | /* globbing (valuelist -> list) */ |
242 | 232 | if (shopt_glob) { |
243 | 233 | glob_all(pl_toary(&expand.valuelist), list); |
244 | 234 | } else { |
245 | - for (size_t i = 0; i < expand.valuelist.length; i++) | |
246 | - pl_add(list, unescapefree(expand.valuelist.contents[i])); | |
235 | + pl_cat(list, expand.valuelist.contents); | |
247 | 236 | pl_destroy(&expand.valuelist); |
248 | 237 | } |
249 | 238 |
@@ -250,40 +239,42 @@ | ||
250 | 239 | return true; |
251 | 240 | } |
252 | 241 | |
242 | +/* Expands a word to (possibly any number of) fields. | |
243 | + * If successful, the return value is a plist_T containing newly malloced wide | |
244 | + * strings. In most cases, the plist_T contains one string. If the word contains | |
245 | + * "$@", however, it may contain any number of strings. | |
246 | + * On error, the return value is a plist_T with `contents' being NULL. */ | |
247 | +plist_T expand_word(const wordunit_T *w, | |
248 | + tildetype_T tilde, quoting_T quoting, escaping_T escaping) | |
249 | +{ | |
250 | + /* four expansions */ | |
251 | + struct expand_four_T expand = | |
252 | + expand_four(w, tilde, quoting, CC_LITERAL); | |
253 | + | |
254 | + /* empty field removal & quote removal */ | |
255 | + if (expand.valuelist.contents != NULL) | |
256 | + remove_empty_fields_and_quotes(&expand, escaping); | |
257 | + | |
258 | + return expand.valuelist; | |
259 | +} | |
260 | + | |
253 | 261 | /* Expands a single word: the four expansions and quote removal. |
254 | - * This function doesn't perform brace expansion, field splitting, globbing and | |
255 | - * unescaping. | |
256 | - * If `processquotes' is true, single- and double-quotations are recognized as | |
257 | - * quotes. Otherwise, they are treated like backslashed characters. | |
258 | - * If `escapeall' is true, the expanded words are all backslashed as if the | |
259 | - * entire expansion is quoted. | |
260 | - * If `processquotes' and `escapeall' are false, only backslashes not preceding | |
261 | - * any of $, `, \ are self-backslashed. | |
262 | - * If successful, the resulting word is returned as a newly malloced string | |
263 | - * that may include backslash escapes. | |
262 | + * This function doesn't perform brace expansion, field splitting, or globbing. | |
263 | + * If successful, the resulting word is returned as a newly malloced string. | |
264 | 264 | * On error, an error message is printed and NULL is returned. |
265 | 265 | * On error in a non-interactive shell, the shell exits. */ |
266 | -wchar_t *expand_single(const wordunit_T *arg, | |
267 | - tildetype_T tilde, bool processquotes, bool escapeall) | |
266 | +wchar_t *expand_single(const wordunit_T *w, | |
267 | + tildetype_T tilde, quoting_T quoting, escaping_T escaping) | |
268 | 268 | { |
269 | - plist_T list = | |
270 | - expand_four_and_remove_quotes(arg, tilde, processquotes, escapeall); | |
269 | + plist_T list = expand_word(w, tilde, quoting, escaping); | |
271 | 270 | if (list.contents == NULL) { |
272 | 271 | maybe_exit_on_error(); |
273 | 272 | return NULL; |
274 | 273 | } |
275 | 274 | |
276 | - return concatenate_values(pl_toary(&list), true); | |
275 | + return concatenate_values(pl_toary(&list), escaping != ES_NONE); | |
277 | 276 | } |
278 | 277 | |
279 | -/* Like `expand_single', but the result is unescaped (if successful). */ | |
280 | -wchar_t *expand_single_and_unescape(const wordunit_T *arg, | |
281 | - tildetype_T tilde, bool processquotes, bool escapeall) | |
282 | -{ | |
283 | - wchar_t *result = expand_single(arg, tilde, processquotes, escapeall); | |
284 | - return result == NULL ? NULL : unescapefree(result); | |
285 | -} | |
286 | - | |
287 | 278 | /* Expands a single word: the four expansions, glob, quote removal and unescape. |
288 | 279 | * This function doesn't perform brace expansion and field splitting. |
289 | 280 | * If the result of glob is more than one word, |
@@ -296,7 +287,7 @@ | ||
296 | 287 | * On error in a non-interactive shell, the shell exits. */ |
297 | 288 | char *expand_single_with_glob(const wordunit_T *arg, tildetype_T tilde) |
298 | 289 | { |
299 | - wchar_t *exp = expand_single(arg, tilde, true, false); | |
290 | + wchar_t *exp = expand_single(arg, tilde, Q_WORD, ES_QUOTED_HARD); | |
300 | 291 | char *result; |
301 | 292 | |
302 | 293 | if (exp == NULL) |
@@ -347,78 +338,52 @@ | ||
347 | 338 | /********** Four Expansions **********/ |
348 | 339 | |
349 | 340 | /* Performs the four expansions in the specified single word. |
350 | - * `w' is the word in which expansions occur. | |
351 | - * `tilde' is type of tilde expansion that is performed. | |
352 | - * If `processquotes' is true, single- and double-quotations are recognized as | |
353 | - * quotes. Otherwise, they are treated like backslashed characters. | |
354 | - * If `escapeall' is true, the expanded words are all backslashed as if the | |
355 | - * entire expansion is quoted. | |
356 | - * If `processquotes' and `escapeall' are false, only backslashes not preceding | |
357 | - * any of $, `, \ are self-backslashed. | |
358 | - * If successful, the return value is a plist_T containing newly malloced wide | |
359 | - * strings. In most cases, the plist_T contains one string. If the word contains | |
360 | - * "$@", however, it may contain any number of strings. | |
361 | - * Single- or double-quoted characters are unquoted and backslashed. | |
362 | - * On error, the return value is a plist_T with `contents' being NULL. */ | |
363 | -plist_T expand_four_and_remove_quotes( | |
364 | - const wordunit_T *restrict w, | |
365 | - tildetype_T tilde, bool processquotes, bool escapeall) | |
341 | + * The four expansions are tilde expansion, parameter expansion, command | |
342 | + * substitution, and arithmetic expansion. | |
343 | + * If successful, `valuelist' in the return value is the list of the resultant | |
344 | + * fields, which are newly malloced wide strings, and `cclist' is the list of | |
345 | + * the corresponding charcategory_T strings, which are also newly malloced. | |
346 | + * If unsuccessful, `valuelist' and `cclist' are empty and have NULL `contents'. | |
347 | + */ | |
348 | +struct expand_four_T expand_four(const wordunit_T *restrict w, | |
349 | + tildetype_T tilde, quoting_T quoting, charcategory_T defaultcc) | |
366 | 350 | { |
367 | - struct expand_four_T expand; | |
351 | + struct expand_four_inner_T e; | |
352 | + pl_init(&e.e.valuelist); | |
353 | + pl_init(&e.e.cclist); | |
354 | + wb_init(&e.valuebuf); | |
355 | + sb_init(&e.ccbuf); | |
356 | + e.e.zeroword = false; | |
368 | 357 | |
369 | - pl_init(&expand.valuelist); | |
370 | - wb_init(&expand.valuebuf); | |
371 | - expand.splitlist.contents = NULL; | |
372 | - expand.zeroword = false; | |
373 | - | |
374 | - if (!expand_four(w, tilde, processquotes, escapeall, false, &expand)) { | |
375 | - plfree(pl_toary(&expand.valuelist), free); | |
376 | - wb_destroy(&expand.valuebuf); | |
377 | - expand.valuelist.contents = NULL; | |
378 | - return expand.valuelist; | |
358 | + if (expand_four_inner(w, tilde, quoting, defaultcc, &e)) { | |
359 | + assert(e.e.valuelist.length == e.e.cclist.length); | |
360 | + assert(e.valuebuf.length == e.ccbuf.length); | |
361 | + pl_add(&e.e.valuelist, wb_towcs(&e.valuebuf)); | |
362 | + pl_add(&e.e.cclist, sb_tostr(&e.ccbuf)); | |
363 | + } else { | |
364 | + plfree(pl_toary(&e.e.valuelist), free); | |
365 | + plfree(pl_toary(&e.e.cclist), free); | |
366 | + wb_destroy(&e.valuebuf); | |
367 | + sb_destroy(&e.ccbuf); | |
368 | + e.e.valuelist.contents = e.e.cclist.contents = NULL; | |
379 | 369 | } |
380 | - | |
381 | - /* remove empty word for "$@" if $# == 0 */ | |
382 | - if (expand.valuelist.length == 0 && expand.zeroword && | |
383 | - wcscmp(expand.valuebuf.contents, L"\"\"") == 0) | |
384 | - wb_destroy(&expand.valuebuf); | |
385 | - else | |
386 | - pl_add(&expand.valuelist, wb_towcs(&expand.valuebuf)); | |
387 | - | |
388 | - /* quote removal */ | |
389 | - for (size_t i = 0; i < expand.valuelist.length; i++) | |
390 | - expand.valuelist.contents[i] = | |
391 | - escaped_remove_free(expand.valuelist.contents[i], L"\"\'"); | |
392 | - | |
393 | - return expand.valuelist; | |
370 | + return e.e; | |
394 | 371 | } |
395 | 372 | |
396 | 373 | /* Performs the four expansions in the specified single word. |
397 | 374 | * The four expansions are tilde expansion, parameter expansion, command |
398 | 375 | * substitution, and arithmetic expansion. |
399 | - * `w' is the word in which expansions occur. | |
400 | - * `tilde' specifies the type of tilde expansion that is performed. | |
401 | - * If `processquotes' is true, single- and double-quotations are recognized as | |
402 | - * quotes. Otherwise, they are treated like backslashed characters. | |
403 | - * If `escapeall' is true, the expanded words are all backslashed as if the | |
404 | - * entire expansion is quoted. | |
405 | - * If `processquotes' and `escapeall' are false, only backslashes not preceding | |
406 | - * any of $, `, \ are self-backslashed. | |
407 | - * `rec' must be true iff this expansion is part of another expansion. | |
408 | - * `e->valuebuf' must be initialized before calling this function and is used to | |
409 | - * expand the current word. If `w' expands to multiple words, the last word is | |
410 | - * put in `e->valuebuf' and the others are inserted to `e->valuelist'. | |
411 | - * The splittability strings are put in `e->splitbuf' and `e->splitlist' | |
412 | - * accordingly if `e->splitlist' is non-NULL. | |
413 | - * Single- and double-quotations remain in the resulting word. In addition, | |
414 | - * characters inside those quotations are backslashed. | |
376 | + * The lists and buffers in `e' must have been initialized before calling this | |
377 | + * function. If the expansion yields a single field, the result is appended to | |
378 | + * `e->valuebuf'. If more than one field result, all but the last field are | |
379 | + * appended to `e->valuelist' as newly malloced wide strings and the last field | |
380 | + * remains in `e->valuebuf'. The corresponding charcategory_T strings are added | |
381 | + * to `e->cclist' and `e->ccbuf', having the same count and length as | |
382 | + * `e->valuelist' and `e->valuebuf'. | |
415 | 383 | * The return value is true iff successful. */ |
416 | -/* A splittability string is an array of Boolean values that specifies where | |
417 | - * the word can be split in field splitting. The word can be split at the nth | |
418 | - * character iff the nth value of the splittability string is non-zero. */ | |
419 | -bool expand_four(const wordunit_T *restrict w, | |
420 | - tildetype_T tilde, bool processquotes, bool escapeall, bool rec, | |
421 | - struct expand_four_T *restrict e) | |
384 | +bool expand_four_inner(const wordunit_T *restrict w, tildetype_T tilde, | |
385 | + quoting_T quoting, charcategory_T defaultcc, | |
386 | + struct expand_four_inner_T *restrict e) | |
422 | 387 | { |
423 | 388 | bool ok = true; |
424 | 389 | bool indq = false; /* in a double quote? */ |
@@ -426,10 +391,6 @@ | ||
426 | 391 | const wchar_t *ss; |
427 | 392 | wchar_t *s; |
428 | 393 | |
429 | -#define FILL_SBUF(s) fill_splitbuf(e, !indq && !escapeall && (s)); | |
430 | -#define FILL_SBUF_SPLITTABLE FILL_SBUF(true) | |
431 | -#define FILL_SBUF_UNSPLITTABLE FILL_SBUF(false) | |
432 | - | |
433 | 394 | for (; w != NULL; w = w->next, first = false) { |
434 | 395 | switch (w->wu_type) { |
435 | 396 | case WT_STRING: |
@@ -437,65 +398,72 @@ | ||
437 | 398 | if (first && tilde != TT_NONE) { |
438 | 399 | s = expand_tilde(&ss, w->next, tilde); |
439 | 400 | if (s != NULL) { |
440 | - wb_catfree(&e->valuebuf, escapefree(s, NULL)); | |
441 | - FILL_SBUF_UNSPLITTABLE; | |
401 | + wb_catfree(&e->valuebuf, s); | |
402 | + fill_ccbuf(e, CC_HARD_EXPANSION | (defaultcc & CC_QUOTED)); | |
442 | 403 | } |
443 | 404 | } |
444 | 405 | while (*ss != L'\0') { |
445 | 406 | switch (*ss) { |
446 | 407 | case L'"': |
447 | - if (!processquotes) | |
448 | - goto escape; | |
408 | + if (quoting != Q_WORD) | |
409 | + goto default_; | |
449 | 410 | indq = !indq; |
450 | 411 | wb_wccat(&e->valuebuf, L'"'); |
451 | - FILL_SBUF_UNSPLITTABLE; | |
412 | + sb_ccat(&e->ccbuf, defaultcc | CC_QUOTATION); | |
452 | 413 | break; |
453 | 414 | case L'\'': |
454 | - if (!processquotes || indq) | |
455 | - goto escape; | |
415 | + if (quoting != Q_WORD || indq) | |
416 | + goto default_; | |
417 | + | |
456 | 418 | wb_wccat(&e->valuebuf, L'\''); |
457 | - add_sq(&ss, &e->valuebuf, true); | |
419 | + sb_ccat(&e->ccbuf, defaultcc | CC_QUOTATION); | |
420 | + | |
421 | + add_sq(&ss, &e->valuebuf, false); | |
422 | + fill_ccbuf(e, defaultcc | CC_QUOTED); | |
423 | + | |
458 | 424 | wb_wccat(&e->valuebuf, L'\''); |
459 | - FILL_SBUF_UNSPLITTABLE; | |
425 | + sb_ccat(&e->ccbuf, defaultcc | CC_QUOTATION); | |
460 | 426 | break; |
461 | 427 | case L'\\': |
462 | - if (!processquotes) { | |
463 | - if (!escapeall) { | |
464 | - wchar_t c = ss[1]; | |
465 | - if (c == L'$' || c == L'`' || c == L'\\') | |
466 | - ss++; | |
467 | - } | |
468 | - goto escape; | |
428 | + switch (quoting) { | |
429 | + case Q_WORD: | |
430 | + if (indq && wcschr(CHARS_ESCAPABLE, ss[1]) == NULL) | |
431 | + goto default_; | |
432 | + break; | |
433 | + case Q_INDQ: | |
434 | + if (wcschr(L"$`\\", ss[1]) == NULL) | |
435 | + goto default_; | |
436 | + break; | |
437 | + case Q_LITERAL: | |
438 | + goto default_; | |
469 | 439 | } |
470 | 440 | |
471 | - if (indq && wcschr(CHARS_ESCAPABLE, ss[1]) == NULL) { | |
472 | - goto escape; | |
473 | - } else { | |
474 | - wb_wccat(&e->valuebuf, L'\\'); | |
475 | - if (*++ss != L'\0') | |
476 | - wb_wccat(&e->valuebuf, *ss++); | |
477 | - FILL_SBUF_UNSPLITTABLE; | |
478 | - continue; | |
441 | + wb_wccat(&e->valuebuf, L'\\'); | |
442 | + sb_ccat(&e->ccbuf, defaultcc | CC_QUOTATION); | |
443 | + ss++; | |
444 | + if (*ss != L'\0') { | |
445 | + wb_wccat(&e->valuebuf, *ss); | |
446 | + sb_ccat(&e->ccbuf, defaultcc | CC_QUOTED); | |
479 | 447 | } |
448 | + break; | |
480 | 449 | case L':': |
481 | - if (!indq && tilde == TT_MULTI) { | |
482 | - /* perform tilde expansion after a colon */ | |
483 | - wb_wccat(&e->valuebuf, L':'); | |
484 | - ss++; | |
485 | - s = expand_tilde(&ss, w->next, tilde); | |
486 | - if (s != NULL) { | |
487 | - wb_catfree(&e->valuebuf, escapefree(s, NULL)); | |
488 | - FILL_SBUF_UNSPLITTABLE; | |
489 | - } | |
490 | - continue; | |
450 | + if (indq || tilde != TT_MULTI) | |
451 | + goto default_; | |
452 | + | |
453 | + /* perform tilde expansion after a colon */ | |
454 | + wb_wccat(&e->valuebuf, L':'); | |
455 | + sb_ccat(&e->ccbuf, defaultcc); | |
456 | + ss++; | |
457 | + s = expand_tilde(&ss, w->next, tilde); | |
458 | + if (s != NULL) { | |
459 | + wb_catfree(&e->valuebuf, s); | |
460 | + fill_ccbuf(e, CC_HARD_EXPANSION); | |
491 | 461 | } |
492 | - /* falls thru! */ | |
462 | + continue; | |
463 | +default_: | |
493 | 464 | default: |
494 | - if (indq || escapeall) | |
495 | -escape: | |
496 | - wb_wccat(&e->valuebuf, L'\\'); | |
497 | 465 | wb_wccat(&e->valuebuf, *ss); |
498 | - FILL_SBUF(rec); | |
466 | + sb_ccat(&e->ccbuf, defaultcc | (indq * CC_QUOTED)); | |
499 | 467 | break; |
500 | 468 | } |
501 | 469 | ss++; |
@@ -502,7 +470,9 @@ | ||
502 | 470 | } |
503 | 471 | break; |
504 | 472 | case WT_PARAM: |
505 | - if (!expand_param(w->wu_param, indq || escapeall, e)) | |
473 | + if (!expand_param(w->wu_param, | |
474 | + indq || quoting == Q_LITERAL || (defaultcc & CC_QUOTED), | |
475 | + e)) | |
506 | 476 | ok = false; |
507 | 477 | break; |
508 | 478 | case WT_CMDSUB: |
@@ -509,14 +479,14 @@ | ||
509 | 479 | s = exec_command_substitution(&w->wu_cmdsub); |
510 | 480 | goto cat_s; |
511 | 481 | case WT_ARITH: |
512 | - s = expand_single_and_unescape(w->wu_arith, TT_NONE, true, false); | |
482 | + s = expand_single(w->wu_arith, TT_NONE, Q_WORD, ES_NONE); | |
513 | 483 | if (s != NULL) |
514 | 484 | s = evaluate_arithmetic(s); |
515 | 485 | cat_s: |
516 | 486 | if (s != NULL) { |
517 | - wb_catfree(&e->valuebuf, escapefree(s, | |
518 | - (indq || escapeall) ? NULL : CHARS_ESCAPED)); | |
519 | - FILL_SBUF_SPLITTABLE; | |
487 | + wb_catfree(&e->valuebuf, s); | |
488 | + fill_ccbuf(e, CC_SOFT_EXPANSION | | |
489 | + (indq * CC_QUOTED) | (defaultcc & CC_QUOTED)); | |
520 | 490 | } else { |
521 | 491 | ok = false; |
522 | 492 | } |
@@ -524,21 +494,14 @@ | ||
524 | 494 | } |
525 | 495 | } |
526 | 496 | |
527 | -#undef FILL_SBUF_UNSPLITTABLE | |
528 | -#undef FILL_SBUF_SPLITTABLE | |
529 | -#undef FILL_SBUF | |
530 | - | |
531 | 497 | return ok; |
532 | 498 | } |
533 | 499 | |
534 | -/* Appends to `e->splitbuf' as many `splittable' as needed to match the length | |
535 | - * with `e->valuebuf'. */ | |
536 | -void fill_splitbuf(struct expand_four_T *e, bool splittable) | |
500 | +/* Appends to `e->ccbuf' as many `c's as needed to match the length with | |
501 | + * `e->valuebuf'. */ | |
502 | +void fill_ccbuf(struct expand_four_inner_T *e, charcategory_T c) | |
537 | 503 | { |
538 | - if (e->splitlist.contents == NULL) | |
539 | - return; | |
540 | - sb_ccat_repeat( | |
541 | - &e->splitbuf, splittable, e->valuebuf.length - e->splitbuf.length); | |
504 | + sb_ccat_repeat(&e->ccbuf, c, e->valuebuf.length - e->ccbuf.length); | |
542 | 505 | } |
543 | 506 | |
544 | 507 | /* Performs tilde expansion. |
@@ -611,7 +574,7 @@ | ||
611 | 574 | * The result is put in `e'. |
612 | 575 | * Returns true iff successful. */ |
613 | 576 | bool expand_param(const paramexp_T *restrict p, bool indq, |
614 | - struct expand_four_T *restrict e) | |
577 | + struct expand_four_inner_T *restrict e) | |
615 | 578 | { |
616 | 579 | /* parse indices first */ |
617 | 580 | ssize_t startindex, endindex; |
@@ -619,8 +582,7 @@ | ||
619 | 582 | if (p->pe_start == NULL) { |
620 | 583 | startindex = 0, endindex = SSIZE_MAX, indextype = IDX_NONE; |
621 | 584 | } else { |
622 | - wchar_t *start = expand_single_and_unescape( | |
623 | - p->pe_start, TT_NONE, true, false); | |
585 | + wchar_t *start = expand_single(p->pe_start, TT_NONE, Q_WORD, ES_NONE); | |
624 | 586 | if (start == NULL) |
625 | 587 | return false; |
626 | 588 | indextype = parse_indextype(start); |
@@ -637,8 +599,8 @@ | ||
637 | 599 | if (p->pe_end == NULL) { |
638 | 600 | endindex = (startindex == -1) ? SSIZE_MAX : startindex; |
639 | 601 | } else { |
640 | - wchar_t *end = expand_single_and_unescape( | |
641 | - p->pe_end, TT_NONE, true, false); | |
602 | + wchar_t *end = expand_single( | |
603 | + p->pe_end, TT_NONE, Q_WORD, ES_NONE); | |
642 | 604 | if (end == NULL || !evaluate_index(end, &endindex)) |
643 | 605 | return false; |
644 | 606 | } |
@@ -656,8 +618,7 @@ | ||
656 | 618 | struct get_variable_T v; |
657 | 619 | bool unset; /* parameter is not set? */ |
658 | 620 | if (p->pe_type & PT_NEST) { |
659 | - plist_T plist = | |
660 | - expand_four_and_remove_quotes(p->pe_nest, TT_NONE, true, true); | |
621 | + plist_T plist = expand_word(p->pe_nest, TT_NONE, Q_WORD, ES_NONE); | |
661 | 622 | if (plist.contents == NULL) |
662 | 623 | return false; |
663 | 624 | v.type = (plist.length == 1) ? GV_SCALAR : GV_ARRAY; |
@@ -665,8 +626,6 @@ | ||
665 | 626 | v.values = pl_toary(&plist); |
666 | 627 | v.freevalues = true; |
667 | 628 | unset = false; |
668 | - for (size_t i = 0; v.values[i] != NULL; i++) | |
669 | - v.values[i] = unescapefree(v.values[i]); | |
670 | 629 | } else { |
671 | 630 | v = get_variable(p->pe_name); |
672 | 631 | if (v.type == GV_NOTFOUND) { |
@@ -773,7 +732,8 @@ | ||
773 | 732 | if (unset) { |
774 | 733 | subst: |
775 | 734 | plfree(values, free); |
776 | - return expand_four(p->pe_subst, TT_SINGLE, true, indq, true, e); | |
735 | + return expand_four_inner(p->pe_subst, TT_SINGLE, Q_WORD, | |
736 | + CC_SOFT_EXPANSION | (indq * CC_QUOTED), e); | |
777 | 737 | } |
778 | 738 | break; |
779 | 739 | case PT_ASSIGN: |
@@ -795,8 +755,7 @@ | ||
795 | 755 | p->pe_name); |
796 | 756 | return false; |
797 | 757 | } |
798 | - subst = expand_single_and_unescape( | |
799 | - p->pe_subst, TT_SINGLE, true, false); | |
758 | + subst = expand_single(p->pe_subst, TT_SINGLE, Q_WORD, ES_NONE); | |
800 | 759 | if (subst == NULL) |
801 | 760 | return false; |
802 | 761 | if (v.type != GV_ARRAY) { |
@@ -838,7 +797,7 @@ | ||
838 | 797 | wchar_t *match; |
839 | 798 | switch (p->pe_type & PT_MASK) { |
840 | 799 | case PT_MATCH: |
841 | - match = expand_single(p->pe_match, TT_SINGLE, true, false); | |
800 | + match = expand_single(p->pe_match, TT_SINGLE, Q_WORD, ES_QUOTED); | |
842 | 801 | if (match == NULL) { |
843 | 802 | plfree(values, free); |
844 | 803 | return false; |
@@ -847,8 +806,8 @@ | ||
847 | 806 | free(match); |
848 | 807 | break; |
849 | 808 | case PT_SUBST: |
850 | - match = expand_single(p->pe_match, TT_SINGLE, true, false); | |
851 | - subst = expand_single_and_unescape(p->pe_subst, TT_SINGLE, true, false); | |
809 | + match = expand_single(p->pe_match, TT_SINGLE, Q_WORD, ES_QUOTED); | |
810 | + subst = expand_single(p->pe_subst, TT_SINGLE, Q_WORD, ES_NONE); | |
852 | 811 | if (match == NULL || subst == NULL) { |
853 | 812 | free(match); |
854 | 813 | free(subst); |
@@ -869,39 +828,25 @@ | ||
869 | 828 | if (p->pe_type & PT_NUMBER) |
870 | 829 | subst_length_each(values); |
871 | 830 | |
872 | - /* backslash escape */ | |
873 | - for (size_t i = 0; values[i] != NULL; i++) | |
874 | - values[i] = escapefree(values[i], indq ? NULL : CHARS_ESCAPED); | |
875 | - | |
876 | 831 | /* add the elements of `values' to `e->valuelist' */ |
877 | 832 | if (values[0] == NULL) { |
878 | 833 | if (indq) |
879 | - e->zeroword = true; | |
834 | + e->e.zeroword = true; | |
880 | 835 | } else { |
836 | + charcategory_T cc = CC_SOFT_EXPANSION | (indq * CC_QUOTED); | |
837 | + | |
881 | 838 | /* add the first element */ |
882 | 839 | wb_catfree(&e->valuebuf, values[0]); |
883 | - fill_splitbuf(e, !indq); | |
884 | - if (values[1] != NULL) { | |
885 | - pl_add(&e->valuelist, wb_towcs(&e->valuebuf)); | |
886 | - if (e->splitlist.contents != NULL) | |
887 | - pl_add(&e->splitlist, sb_tostr(&e->splitbuf)); | |
840 | + fill_ccbuf(e, cc); | |
888 | 841 | |
889 | - /* add the remaining but last */ | |
890 | - size_t i; | |
891 | - for (i = 1; values[i + 1] != NULL; i++) { | |
892 | - pl_add(&e->valuelist, values[i]); | |
893 | - if (e->splitlist.contents != NULL) { | |
894 | - size_t len = wcslen(values[i]); | |
895 | - pl_add(&e->splitlist, memset(xmalloc(len), !indq, len)); | |
896 | - } | |
897 | - } | |
842 | + /* add the other elements */ | |
843 | + for (size_t i = 1; values[i] != NULL; i++) { | |
844 | + pl_add(&e->e.valuelist, wb_towcs(&e->valuebuf)); | |
845 | + pl_add(&e->e.cclist, sb_tostr(&e->ccbuf)); | |
898 | 846 | |
899 | - /* add the last element */ | |
900 | 847 | wb_initwith(&e->valuebuf, values[i]); |
901 | - if (e->splitlist.contents != NULL) { | |
902 | - sb_init(&e->splitbuf); | |
903 | - fill_splitbuf(e, !indq); | |
904 | - } | |
848 | + sb_init(&e->ccbuf); | |
849 | + fill_ccbuf(e, cc); | |
905 | 850 | } |
906 | 851 | } |
907 | 852 | free(values); |
@@ -995,8 +940,7 @@ | ||
995 | 940 | void print_subst_as_error(const paramexp_T *p) |
996 | 941 | { |
997 | 942 | if (p->pe_subst != NULL) { |
998 | - wchar_t *subst = expand_single_and_unescape( | |
999 | - p->pe_subst, TT_SINGLE, true, false); | |
943 | + wchar_t *subst = expand_single(p->pe_subst, TT_SINGLE, Q_WORD, ES_NONE); | |
1000 | 944 | if (subst != NULL) { |
1001 | 945 | if (p->pe_type & PT_NEST) |
1002 | 946 | xerror(0, "%ls", subst); |
@@ -1133,38 +1077,49 @@ | ||
1133 | 1077 | |
1134 | 1078 | /* Performs brace expansion in each element of the specified array. |
1135 | 1079 | * `values' is an array of pointers to `free'able wide strings to be expanded. |
1136 | - * `splits' is an array of pointers to `free'able splittability strings. | |
1137 | - * `values' and 'splits' must contain the same number of elements. | |
1138 | - * Both the arrays must be NULL-terminated and their elements are freed in this | |
1139 | - * function. The arrays themselves are not freed. | |
1140 | - * Newly malloced results are added to `valuelist' and `splitlist'. */ | |
1141 | -void expand_brace_each(void **restrict values, void **restrict splits, | |
1142 | - plist_T *restrict valuelist, plist_T *restrict splitlist) | |
1080 | + * `ccs' is an array of pointers to `free'able charcategory_T strings. | |
1081 | + * `values' and `ccs' must contain the same number of elements and be NULL- | |
1082 | + * terminated. Their elements are freed in this function. The arrays themselves | |
1083 | + * are not freed. | |
1084 | + * Newly malloced results are added to `valuelist' and `cclist'. */ | |
1085 | +void expand_brace_each( | |
1086 | + void *const *restrict values, void *const *restrict ccs, | |
1087 | + plist_T *restrict valuelist, plist_T *restrict cclist) | |
1143 | 1088 | { |
1144 | 1089 | while (*values != NULL) { |
1145 | - expand_brace(*values, *splits, valuelist, splitlist); | |
1146 | - values++, splits++; | |
1090 | + expand_brace(*values, *ccs, valuelist, cclist); | |
1091 | + values++, ccs++; | |
1147 | 1092 | } |
1148 | 1093 | } |
1149 | 1094 | |
1150 | 1095 | /* Performs brace expansion in the specified single word. |
1151 | - * `split' is the splittability string corresponding to `word'. | |
1152 | - * `word' and `split' are freed in this function. | |
1153 | - * `Free'able results are added to `valuelist' and `splitlist'. */ | |
1154 | -void expand_brace(wchar_t *restrict const word, char *restrict const split, | |
1155 | - plist_T *restrict valuelist, plist_T *restrict splitlist) | |
1096 | + * `cc' is the charcategory_T string corresponding to `word'. | |
1097 | + * `word' and `cc' are freed in this function. | |
1098 | + * `Free'able results are added to `valuelist' and `cclist'. */ | |
1099 | +void expand_brace( | |
1100 | + wchar_t *restrict const word, char *restrict const cc, | |
1101 | + plist_T *restrict valuelist, plist_T *restrict cclist) | |
1156 | 1102 | { |
1157 | - wchar_t *c = word; | |
1103 | +#define idx(p) ((size_t) ((wchar_t *) (p) - word)) | |
1158 | 1104 | |
1105 | + size_t ci = 0; | |
1106 | + | |
1159 | 1107 | start: |
1160 | - c = escaped_wcspbrk(c, L"{"); | |
1161 | - if (c == NULL || *++c == L'\0') { | |
1162 | - /* don't expand if there is no L'{' or L'{' is at the end of string */ | |
1163 | - pl_add(valuelist, word); | |
1164 | - pl_add(splitlist, split); | |
1108 | + | |
1109 | + /* find '{' */ | |
1110 | + do { | |
1111 | + wchar_t *c = wcschr(&word[ci], L'{'); | |
1112 | + if (c == NULL) { | |
1113 | + /* no L'{', no expansion */ | |
1114 | + pl_add(valuelist, word); | |
1115 | + pl_add(cclist, cc); | |
1116 | + return; | |
1117 | + } | |
1118 | + ci = idx(c); | |
1119 | + } while (cc[ci++] != CC_LITERAL); | |
1120 | + | |
1121 | + if (try_expand_brace_sequence(word, cc, &word[ci], valuelist, cclist)) { | |
1165 | 1122 | return; |
1166 | - } else if (try_expand_brace_sequence(word, split, c, valuelist, splitlist)){ | |
1167 | - return; | |
1168 | 1123 | } |
1169 | 1124 | |
1170 | 1125 | plist_T splitpoints; |
@@ -1173,16 +1128,18 @@ | ||
1173 | 1128 | /* collect pointers to characters where the word is split */ |
1174 | 1129 | /* The pointers point to the character just after L'{', L',' or L'}'. */ |
1175 | 1130 | pl_init(&splitpoints); |
1176 | - pl_add(&splitpoints, c); | |
1131 | + pl_add(&splitpoints, &word[ci]); | |
1177 | 1132 | nest = 0; |
1178 | - while ((c = escaped_wcspbrk(c, L"{,}")) != NULL) { | |
1179 | - switch (*c++) { | |
1133 | + for (; word[ci] != L'\0'; ci++) { | |
1134 | + if (cc[ci] != CC_LITERAL) | |
1135 | + continue; | |
1136 | + switch (word[ci]) { | |
1180 | 1137 | case L'{': |
1181 | 1138 | nest++; |
1182 | 1139 | break; |
1183 | 1140 | case L',': |
1184 | 1141 | if (nest == 0) |
1185 | - pl_add(&splitpoints, c); | |
1142 | + pl_add(&splitpoints, &word[ci + 1]); | |
1186 | 1143 | break; |
1187 | 1144 | case L'}': |
1188 | 1145 | if (nest > 0) { |
@@ -1189,9 +1146,10 @@ | ||
1189 | 1146 | nest--; |
1190 | 1147 | break; |
1191 | 1148 | } else if (splitpoints.length == 1) { |
1149 | + /* no comma between { and } */ | |
1192 | 1150 | goto restart; |
1193 | 1151 | } else { |
1194 | - pl_add(&splitpoints, c); | |
1152 | + pl_add(&splitpoints, &word[ci + 1]); | |
1195 | 1153 | goto done; |
1196 | 1154 | } |
1197 | 1155 | } |
@@ -1199,53 +1157,51 @@ | ||
1199 | 1157 | restart: |
1200 | 1158 | /* if there is no L',' or L'}' corresponding to L'{', |
1201 | 1159 | * find the next L'{' and try again */ |
1202 | - c = splitpoints.contents[0]; | |
1160 | + ci = idx(splitpoints.contents[0]); | |
1203 | 1161 | pl_destroy(&splitpoints); |
1204 | 1162 | goto start; |
1205 | 1163 | |
1206 | 1164 | done:; |
1207 | -#define idx(p) ((wchar_t *) (p) - word) | |
1208 | -#define wtos(p) (split + idx(p)) | |
1209 | 1165 | size_t lastelemindex = splitpoints.length - 1; |
1210 | 1166 | size_t headlen = idx(splitpoints.contents[0]) - 1; |
1211 | 1167 | size_t taillen = wcslen(splitpoints.contents[lastelemindex]); |
1212 | 1168 | for (size_t i = 0; i < lastelemindex; i++) { |
1213 | 1169 | xwcsbuf_T buf; |
1214 | - xstrbuf_T sbuf; | |
1170 | + xstrbuf_T cbuf; | |
1215 | 1171 | wb_init(&buf); |
1216 | - sb_init(&sbuf); | |
1172 | + sb_init(&cbuf); | |
1217 | 1173 | |
1218 | 1174 | wb_ncat_force(&buf, word, headlen); |
1219 | - sb_ncat_force(&sbuf, split, headlen); | |
1175 | + sb_ncat_force(&cbuf, cc, headlen); | |
1220 | 1176 | |
1221 | 1177 | size_t len = (wchar_t *) splitpoints.contents[i + 1] - |
1222 | 1178 | (wchar_t *) splitpoints.contents[i ] - 1; |
1223 | - wb_ncat_force(&buf, splitpoints.contents[i], len); | |
1224 | - sb_ncat_force(&sbuf, wtos(splitpoints.contents[i]), len); | |
1179 | + ci = idx(splitpoints.contents[i]); | |
1180 | + wb_ncat_force(&buf, &word[ci], len); | |
1181 | + sb_ncat_force(&cbuf, &cc[ci], len); | |
1225 | 1182 | |
1226 | - wb_ncat_force(&buf, splitpoints.contents[lastelemindex], taillen); | |
1227 | - sb_ncat_force(&sbuf, wtos(splitpoints.contents[lastelemindex]), taillen); | |
1228 | - assert(buf.length == sbuf.length); | |
1183 | + ci = idx(splitpoints.contents[lastelemindex]); | |
1184 | + wb_ncat_force(&buf, &word[ci], taillen); | |
1185 | + sb_ncat_force(&cbuf, &cc[ci], taillen); | |
1186 | + assert(buf.length == cbuf.length); | |
1229 | 1187 | |
1230 | 1188 | /* expand the remaining portion recursively */ |
1231 | - expand_brace(wb_towcs(&buf), sb_tostr(&sbuf), valuelist, splitlist); | |
1189 | + expand_brace(wb_towcs(&buf), sb_tostr(&cbuf), valuelist, cclist); | |
1232 | 1190 | } |
1233 | 1191 | pl_destroy(&splitpoints); |
1234 | 1192 | free(word); |
1235 | - free(split); | |
1236 | -#undef idx | |
1237 | -#undef wtos | |
1193 | + free(cc); | |
1238 | 1194 | } |
1239 | 1195 | |
1240 | 1196 | /* Tries numeric brace expansion like "{01..05}". |
1241 | 1197 | * If unsuccessful, this function returns false without any side effects. |
1242 | - * If successful, `word' and `split' are freed and the full expansion results | |
1243 | - * are added to `valuelist' and `splitlist'. | |
1198 | + * If successful, `word' and `cc' are freed and the full expansion results are | |
1199 | + * added to `valuelist' and `cclist'. | |
1244 | 1200 | * `startc' is a pointer to the character right after L'{' in `word'. |
1245 | 1201 | */ |
1246 | 1202 | bool try_expand_brace_sequence( |
1247 | - wchar_t *word, char *restrict split, wchar_t *startc, | |
1248 | - plist_T *restrict valuelist, plist_T *restrict splitlist) | |
1203 | + wchar_t *const word, char *restrict const cc, wchar_t *const startc, | |
1204 | + plist_T *restrict valuelist, plist_T *restrict cclist) | |
1249 | 1205 | { |
1250 | 1206 | long start, end, delta, value; |
1251 | 1207 | wchar_t *dotp, *dotbracep, *bracep, *c; |
@@ -1298,33 +1254,38 @@ | ||
1298 | 1254 | delta = -1; |
1299 | 1255 | } |
1300 | 1256 | |
1257 | + /* validate charcategory_T */ | |
1258 | + if (cc[idx(bracep)] != CC_LITERAL) | |
1259 | + return false; | |
1260 | + for (size_t ci = idx(startc); ci < idx(bracep); ci++) | |
1261 | + if (cc[ci] & CC_QUOTED) | |
1262 | + return false; | |
1263 | + | |
1301 | 1264 | /* expand the sequence */ |
1302 | 1265 | value = start; |
1303 | 1266 | len = (startlen > endlen) ? startlen : endlen; |
1304 | - wordlen = wcslen(word); | |
1267 | + wordlen = idx(bracep + 1) + wcslen(bracep + 1); // = wcslen(word); | |
1305 | 1268 | do { |
1306 | 1269 | xwcsbuf_T buf; |
1307 | - xstrbuf_T sbuf; | |
1270 | + xstrbuf_T cbuf; | |
1308 | 1271 | wb_init(&buf); |
1309 | - sb_init(&sbuf); | |
1272 | + sb_init(&cbuf); | |
1310 | 1273 | |
1311 | - wb_ncat_force(&buf, word, startc - 1 - word); | |
1312 | - sb_ncat_force(&sbuf, split, startc - 1 - word); | |
1274 | + size_t slen = idx(startc - 1); | |
1275 | + wb_ncat_force(&buf, word, slen); | |
1276 | + sb_ncat_force(&cbuf, cc, slen); | |
1313 | 1277 | |
1314 | 1278 | int plen = wb_wprintf(&buf, sign ? L"%0+*ld" : L"%0*ld", len, value); |
1315 | 1279 | if (plen >= 0) |
1316 | - sb_ccat_repeat(&sbuf, 0, plen); | |
1280 | + sb_ccat_repeat(&cbuf, CC_HARD_EXPANSION, plen); | |
1317 | 1281 | |
1318 | - wb_ncat_force(&buf, | |
1319 | - bracep + 1, | |
1320 | - wordlen - (bracep + 1 - word)); | |
1321 | - sb_ncat_force(&sbuf, | |
1322 | - split + (bracep + 1 - word), | |
1323 | - wordlen - (bracep + 1 - word)); | |
1324 | - assert(buf.length == sbuf.length); | |
1282 | + slen = idx(bracep + 1); | |
1283 | + wb_ncat_force(&buf, bracep + 1, wordlen - slen); | |
1284 | + sb_ncat_force(&cbuf, cc + slen, wordlen - slen); | |
1285 | + assert(buf.length == cbuf.length); | |
1325 | 1286 | |
1326 | 1287 | /* expand the remaining portion recursively */ |
1327 | - expand_brace(wb_towcs(&buf), sb_tostr(&sbuf), valuelist, splitlist); | |
1288 | + expand_brace(wb_towcs(&buf), sb_tostr(&cbuf), valuelist, cclist); | |
1328 | 1289 | |
1329 | 1290 | if (delta >= 0) { |
1330 | 1291 | if (LONG_MAX - delta < value) |
@@ -1336,8 +1297,9 @@ | ||
1336 | 1297 | value += delta; |
1337 | 1298 | } while (delta >= 0 ? value <= end : value >= end); |
1338 | 1299 | free(word); |
1339 | - free(split); | |
1300 | + free(cc); | |
1340 | 1301 | return true; |
1302 | +#undef idx | |
1341 | 1303 | } |
1342 | 1304 | |
1343 | 1305 | /* Checks if the specified numeral starts with a L'0'. |
@@ -1362,56 +1324,55 @@ | ||
1362 | 1324 | |
1363 | 1325 | /* Performs field splitting. |
1364 | 1326 | * `valuelist' is a NULL-terminated array of pointers to wide strings to split. |
1365 | - * `splitlist' is an array of pointers to corresponding splittability strings. | |
1366 | - * `valuelist' and `splitlist' are `plfree'ed in this function. | |
1367 | - * The results are added to `dest'. */ | |
1368 | -void fieldsplit_all(void **restrict valuelist, void **restrict splitlist, | |
1369 | - plist_T *restrict dest) | |
1327 | + * `cclist' is an array of pointers to corresponding charcategory_T strings. | |
1328 | + * `valuelist' and `cclist' are `plfree'ed in this function. | |
1329 | + * The results are added to `outvaluelist' and `outcclist'. */ | |
1330 | +void fieldsplit_all( | |
1331 | + void **restrict const valuelist, void **restrict const cclist, | |
1332 | + plist_T *restrict outvaluelist, plist_T *restrict outcclist) | |
1370 | 1333 | { |
1371 | - void **restrict s; | |
1372 | - void **restrict t; | |
1373 | - const wchar_t *ifs; | |
1374 | - | |
1375 | - ifs = getvar(L VAR_IFS); | |
1334 | + const wchar_t *ifs = getvar(L VAR_IFS); | |
1376 | 1335 | if (ifs == NULL) |
1377 | 1336 | ifs = DEFAULT_IFS; |
1378 | 1337 | |
1379 | - for (s = valuelist, t = splitlist; *s != NULL; s++, t++) | |
1380 | - fieldsplit(*s, *t, ifs, dest); | |
1338 | + for (size_t i = 0; valuelist[i] != NULL; i++) | |
1339 | + fieldsplit(valuelist[i], cclist[i], ifs, outvaluelist, outcclist); | |
1381 | 1340 | free(valuelist); |
1382 | - free(splitlist); | |
1341 | + free(cclist); | |
1383 | 1342 | } |
1384 | 1343 | |
1385 | 1344 | /* Performs field splitting. |
1386 | 1345 | * `s' is the word to split and freed in this function. |
1387 | - * `split' is the splittability string corresponding to `s' and also freed. | |
1388 | - * The results are added to `dest' as newly-malloced wide strings. | |
1389 | - * `ifs' must not be NULL. */ | |
1390 | -void fieldsplit(wchar_t *restrict s, char *restrict split, | |
1391 | - const wchar_t *restrict ifs, plist_T *restrict dest) | |
1346 | + * `cc' is the charcategory_T string corresponding to `s' and also freed | |
1347 | + * `ifs' must not be NULL. | |
1348 | + * The results are added to `outvaluelist' and `outcclist' as newly-malloced | |
1349 | + * strings. */ | |
1350 | +void fieldsplit(wchar_t *restrict s, char *restrict cc, | |
1351 | + const wchar_t *restrict ifs, | |
1352 | + plist_T *restrict outvaluelist, plist_T *restrict outcclist) | |
1392 | 1353 | { |
1393 | 1354 | plist_T fields; |
1394 | 1355 | |
1395 | 1356 | pl_init(&fields); |
1396 | - extract_fields(s, split, true, ifs, &fields); | |
1357 | + extract_fields(s, cc, ifs, &fields); | |
1397 | 1358 | assert(fields.length % 2 == 0); |
1398 | 1359 | |
1399 | 1360 | for (size_t i = 0; i < fields.length; i += 2) { |
1400 | 1361 | const wchar_t *start = fields.contents[i], *end = fields.contents[i+1]; |
1401 | - pl_add(dest, xwcsndup(start, end - start)); | |
1362 | + size_t idx = start - s, len = end - start; | |
1363 | + pl_add(outvaluelist, xwcsndup(start, len)); | |
1364 | + pl_add(outcclist, memcpy(xmalloc(len), &cc[idx], len)); | |
1402 | 1365 | } |
1403 | 1366 | |
1404 | 1367 | pl_destroy(&fields); |
1405 | 1368 | free(s); |
1406 | - free(split); | |
1369 | + free(cc); | |
1407 | 1370 | } |
1408 | 1371 | |
1409 | 1372 | /* Extracts fields from a string. |
1410 | 1373 | * `s' is the word to split. |
1411 | - * `split' is the splittability string corresponding to `s'. It must be at least | |
1412 | - * as long as `wcslen(s)'. | |
1413 | - * If `escaped' is true, backslashes in `s' are treated as escapes. But | |
1414 | - * backslashes do not prevent splitting. | |
1374 | + * `cc` is an array of charcategory_T values corresponding to `s'. It must be at | |
1375 | + * least as long as `wcslen(s)'. | |
1415 | 1376 | * `ifs' must not be NULL. |
1416 | 1377 | * |
1417 | 1378 | * The results are appended to `dest'. If n fields are found, 2n pointers are |
@@ -1421,8 +1382,8 @@ | ||
1421 | 1382 | * on. |
1422 | 1383 | * |
1423 | 1384 | * The word is split at characters that are contained in `ifs' and whose |
1424 | - * corresponding character in the splittability string is non-zero. Refer to | |
1425 | - * POSIX for how whitespaces are treated in field splitting. | |
1385 | + * corresponding character in `cc' is CC_SOFT_EXPANSION. Refer to POSIX for how | |
1386 | + * whitespaces are treated in field splitting. | |
1426 | 1387 | * |
1427 | 1388 | * If an IFS non-whitespace delimits an empty field, the field is assumed just |
1428 | 1389 | * before the non-whitespace delimiter. The empty last field is removed if |
@@ -1440,8 +1401,8 @@ | ||
1440 | 1401 | * "abc--123" -> "abc" "" "123" |
1441 | 1402 | * "abc - - 123" -> "abc" "" "123" |
1442 | 1403 | */ |
1443 | -wchar_t *extract_fields(const wchar_t *restrict s, const char *restrict split, | |
1444 | - bool escaped, const wchar_t *restrict ifs, plist_T *restrict dest) | |
1404 | +wchar_t *extract_fields(const wchar_t *restrict s, const char *restrict cc, | |
1405 | + const wchar_t *restrict ifs, plist_T *restrict dest) | |
1445 | 1406 | { |
1446 | 1407 | size_t index = 0; |
1447 | 1408 | size_t ifswhitestartindex; |
@@ -1453,11 +1414,13 @@ | ||
1453 | 1414 | |
1454 | 1415 | for (;;) { |
1455 | 1416 | ifswhitestartindex = index; |
1456 | - index += skip_ifs_whitespaces(&s[index], &split[index], escaped, ifs); | |
1417 | + while (is_ifs_whitespace(s[index], cc[index], ifs)) | |
1418 | + index++; | |
1457 | 1419 | |
1458 | 1420 | /* extract next field, if any */ |
1459 | 1421 | size_t fieldstartindex = index; |
1460 | - index += skip_field(&s[index], &split[index], escaped, ifs); | |
1422 | + while (is_non_ifs_char(s[index], cc[index], ifs)) | |
1423 | + index++; | |
1461 | 1424 | if (index != fieldstartindex) { |
1462 | 1425 | pl_add(pl_add(dest, &s[fieldstartindex]), &s[index]); |
1463 | 1426 | afterfield = true; |
@@ -1470,9 +1433,8 @@ | ||
1470 | 1433 | add_empty_field(dest, &s[index]); |
1471 | 1434 | |
1472 | 1435 | /* skip (only one) IFS non-whitespace */ |
1473 | - size_t ifsstartindex = index; | |
1474 | - index += skip_ifs(&s[index], &split[index], escaped, ifs); | |
1475 | - if (index != ifsstartindex) { | |
1436 | + if (is_ifs_char(s[index], cc[index], ifs)) { | |
1437 | + index++; | |
1476 | 1438 | afterfield = false; |
1477 | 1439 | continue; |
1478 | 1440 | } |
@@ -1491,50 +1453,22 @@ | ||
1491 | 1453 | return (wchar_t *) &s[ifswhitestartindex]; |
1492 | 1454 | } |
1493 | 1455 | |
1494 | -/* If `*s' is a (possibly escaped if `escaped') IFS character, returns the | |
1495 | - * number of characters to skip it. Otherwise returns zero. */ | |
1496 | -size_t skip_ifs(const wchar_t *s, const char *split, | |
1497 | - bool escaped, const wchar_t *ifs) | |
1456 | +/* Returns true if `c' is a non-null, IFS character. */ | |
1457 | +bool is_ifs_char(wchar_t c, charcategory_T cc, const wchar_t *ifs) | |
1498 | 1458 | { |
1499 | - size_t i = 0; | |
1500 | - if (escaped && s[i] == L'\\') | |
1501 | - i++; | |
1502 | - if (s[i] == L'\0') | |
1503 | - return 0; | |
1504 | - if (split[i] && wcschr(ifs, s[i]) != NULL) | |
1505 | - return i + 1; | |
1506 | - else | |
1507 | - return 0; | |
1459 | + return cc == CC_SOFT_EXPANSION && c != L'\0' && wcschr(ifs, c) != NULL; | |
1508 | 1460 | } |
1509 | 1461 | |
1510 | -/* Returns the length of IFS whitespace sequence starting at `*s'. */ | |
1511 | -size_t skip_ifs_whitespaces(const wchar_t *s, const char *split, | |
1512 | - bool escaped, const wchar_t *ifs) | |
1462 | +/* Returns true if `c' is a non-null, IFS-whitespace character. */ | |
1463 | +bool is_ifs_whitespace(wchar_t c, charcategory_T cc, const wchar_t *ifs) | |
1513 | 1464 | { |
1514 | - size_t total = 0; | |
1515 | - for (;;) { | |
1516 | - size_t current = skip_ifs(&s[total], &split[total], escaped, ifs); | |
1517 | - if (current == 0 || !iswspace(s[total + current - 1])) | |
1518 | - return total; | |
1519 | - total += current; | |
1520 | - } | |
1465 | + return is_ifs_char(c, cc, ifs) && iswspace(c); | |
1521 | 1466 | } |
1522 | 1467 | |
1523 | -/* Returns the length of a field starting at `*s'. */ | |
1524 | -size_t skip_field(const wchar_t *s, const char *split, | |
1525 | - bool escaped, const wchar_t *ifs) | |
1468 | +/* Returns true if `c' is a non-null, non-IFS character. */ | |
1469 | +bool is_non_ifs_char(wchar_t c, charcategory_T cc, const wchar_t *ifs) | |
1526 | 1470 | { |
1527 | - size_t index = 0; | |
1528 | - for (;;) { | |
1529 | - size_t saveindex = index; | |
1530 | - if (escaped && s[index] == L'\\') | |
1531 | - index++; | |
1532 | - if (s[index] == L'\0') | |
1533 | - return saveindex; | |
1534 | - if (split[index] && wcschr(ifs, s[index]) != NULL) | |
1535 | - return saveindex; | |
1536 | - index++; | |
1537 | - } | |
1471 | + return c != L'\0' && !is_ifs_char(c, cc, ifs); | |
1538 | 1472 | } |
1539 | 1473 | |
1540 | 1474 | void add_empty_field(plist_T *dest, const wchar_t *p) |
@@ -1716,50 +1650,76 @@ | ||
1716 | 1650 | } |
1717 | 1651 | } |
1718 | 1652 | |
1719 | -/* Like `wcspbrk', but ignores backslashed characters in `s'. */ | |
1720 | -wchar_t *escaped_wcspbrk(const wchar_t *s, const wchar_t *accept) | |
1653 | +/* Tests if a character should be backslash-escaped. */ | |
1654 | +bool should_escape(char c, charcategory_T cc, escaping_T escaping) | |
1721 | 1655 | { |
1722 | - for (; *s != L'\0'; s++) { | |
1723 | - if (*s == L'\\') { | |
1724 | - s++; | |
1725 | - if (*s == L'\0') | |
1726 | - break; | |
1727 | - continue; | |
1728 | - } | |
1729 | - if (wcschr(accept, *s) != NULL) | |
1730 | - return (wchar_t *) s; | |
1656 | + switch (escaping) { | |
1657 | + case ES_NONE: | |
1658 | + return false; | |
1659 | + case ES_QUOTED_HARD: | |
1660 | + if (c == L'\\' || (cc & CC_ORIGIN_MASK) == CC_HARD_EXPANSION) | |
1661 | + return true; | |
1662 | + /* falls thru! */ | |
1663 | + case ES_QUOTED: | |
1664 | + return cc & CC_QUOTED; | |
1731 | 1665 | } |
1732 | - return NULL; | |
1666 | + assert(false); | |
1733 | 1667 | } |
1734 | 1668 | |
1735 | -/* Removes characters in `reject' from `s'. | |
1736 | - * Backslash escapes in `s' are recognized. Escapes and escaped characters are | |
1737 | - * kept in the result. | |
1738 | - * The result is a newly malloced string. */ | |
1739 | -wchar_t *escaped_remove(const wchar_t *s, const wchar_t *reject) | |
1669 | +/* Removes all quotation marks in the input string `s' and optionally add | |
1670 | + * backslash escapes to the originally quoted characters as specified by | |
1671 | + * `escaping'. The result is a newly malloced string. */ | |
1672 | +wchar_t *quote_removal( | |
1673 | + const wchar_t *restrict s, const char *restrict cc, escaping_T escaping) | |
1740 | 1674 | { |
1741 | 1675 | xwcsbuf_T result; |
1742 | 1676 | wb_init(&result); |
1743 | - for (;;) { | |
1744 | - const wchar_t *rejectchar = escaped_wcspbrk(s, reject); | |
1745 | - if (rejectchar == NULL) | |
1746 | - break; | |
1747 | - wb_ncat_force(&result, s, rejectchar - s); | |
1748 | - s = rejectchar + 1; | |
1677 | + for (size_t i = 0; s[i] != L'\0'; i++) { | |
1678 | + if (cc[i] & CC_QUOTATION) | |
1679 | + continue; | |
1680 | + if (should_escape(s[i], cc[i], escaping)) | |
1681 | + wb_wccat(&result, L'\\'); | |
1682 | + wb_wccat(&result, s[i]); | |
1749 | 1683 | } |
1750 | - wb_cat(&result, s); | |
1751 | 1684 | return wb_towcs(&result); |
1752 | 1685 | } |
1753 | 1686 | |
1754 | -/* Like `escaped_remove', but frees `s' before returning the result. */ | |
1755 | -wchar_t *escaped_remove_free(wchar_t *s, const wchar_t *reject) | |
1687 | +/* Like `quote_removal', but frees the arguments. */ | |
1688 | +wchar_t *quote_removal_free( | |
1689 | + wchar_t *restrict s, char *restrict cc, escaping_T escaping) | |
1756 | 1690 | { |
1757 | - wchar_t *result = escaped_remove(s, reject); | |
1691 | + wchar_t *result = quote_removal(s, cc, escaping); | |
1758 | 1692 | free(s); |
1693 | + free(cc); | |
1759 | 1694 | return result; |
1760 | 1695 | } |
1761 | 1696 | |
1697 | +/* Performs empty field removal and quote removal. | |
1698 | + * In this function, `e->valuelist' is modified and `e->cclist' is destroyed. */ | |
1699 | +void remove_empty_fields_and_quotes( | |
1700 | + struct expand_four_T *e, escaping_T escaping) | |
1701 | +{ | |
1702 | + /* empty field removal */ | |
1703 | + if (e->valuelist.length == 1) { | |
1704 | + const wchar_t *field = e->valuelist.contents[0]; | |
1705 | + const char *cc = e->cclist.contents[0]; | |
1706 | + if (field[0] == L'\0' || | |
1707 | + (e->zeroword && wcscmp(field, L"\"\"") == 0 && | |
1708 | + (cc[0] & cc[1] & CC_QUOTATION))) { | |
1709 | + pl_clear(&e->valuelist, free); | |
1710 | + pl_clear(&e->cclist, free); | |
1711 | + } | |
1712 | + } | |
1762 | 1713 | |
1714 | + /* quote removal */ | |
1715 | + for (size_t i = 0; i < e->valuelist.length; i++) | |
1716 | + e->valuelist.contents[i] = quote_removal_free( | |
1717 | + e->valuelist.contents[i], e->cclist.contents[i], escaping); | |
1718 | + | |
1719 | + pl_destroy(&e->cclist); | |
1720 | +} | |
1721 | + | |
1722 | + | |
1763 | 1723 | /********** File Name Expansion (Glob) **********/ |
1764 | 1724 | |
1765 | 1725 | /* Makes a option value from the current shell settings. */ |
@@ -1838,7 +1798,7 @@ | ||
1838 | 1798 | |
1839 | 1799 | if (!parse_string(&info, &word)) |
1840 | 1800 | return NULL; |
1841 | - result = expand_single_and_unescape(word, TT_NONE, false, !esc); | |
1801 | + result = expand_single(word, TT_NONE, esc ? Q_INDQ : Q_LITERAL, ES_NONE); | |
1842 | 1802 | wordfree(word); |
1843 | 1803 | return result; |
1844 | 1804 | } |
@@ -1,6 +1,6 @@ | ||
1 | 1 | /* Yash: yet another shell */ |
2 | 2 | /* expand.h: word expansion */ |
3 | -/* (C) 2007-2018 magicant */ | |
3 | +/* (C) 2007-2020 magicant */ | |
4 | 4 | |
5 | 5 | /* This program is free software: you can redistribute it and/or modify |
6 | 6 | * it under the terms of the GNU General Public License as published by |
@@ -30,6 +30,48 @@ | ||
30 | 30 | /* type of tilde expansion */ |
31 | 31 | typedef enum { TT_NONE, TT_SINGLE, TT_MULTI, } tildetype_T; |
32 | 32 | |
33 | +/* treatment of quotation marks during expansion */ | |
34 | +typedef enum { | |
35 | + Q_WORD, /* Single quotations, double quotations, and backslashes are | |
36 | + recognized as in the normal word. */ | |
37 | + Q_INDQ, /* The string is quoted as if it is inside a pair of double | |
38 | + quotations: Single and double quotations are not recognized. | |
39 | + Backslashes are recognized only before a $, `, or \. */ | |
40 | + Q_LITERAL, /* No quotations are recognized. */ | |
41 | +} quoting_T; | |
42 | + | |
43 | +/* Category of characters resulting from expansion. | |
44 | + * A charcategory_T value is bitwise or of one of the origin categories | |
45 | + * (CC_LITERAL, CC_HARD_EXPANSION, and CC_SOFT_EXPANSION) and optionally any | |
46 | + * combinations of modifier flags (CC_QUOTED and CC_QUOTATION). | |
47 | + * The category determines if a character is subject to brace expansion, field | |
48 | + * splitting, and globbing (pathname expansion). */ | |
49 | +typedef enum { | |
50 | + CC_LITERAL, /* from the original word */ | |
51 | + CC_HARD_EXPANSION, /* from tilde expansion or numeric brace expansion */ | |
52 | + CC_SOFT_EXPANSION, /* from parameter expansion, command substitution or | |
53 | + arithmetic expansion */ | |
54 | + CC_ORIGIN_MASK = (1 << 2) - 1, | |
55 | + CC_QUOTED = 1 << 2, /* The character is quoted by backslash, single- or | |
56 | + double-quotes. */ | |
57 | + CC_QUOTATION = 1 << 3, /* The character is a quotation mark */ | |
58 | +} charcategory_T; | |
59 | +/* A character can be both CC_QUOTED and CC_QUOTATION at a time. This may happen | |
60 | + * in a nested quotation like "\"". */ | |
61 | + | |
62 | +/* type of characters to be backslash-escaped in the expansion results */ | |
63 | +typedef enum { | |
64 | + ES_NONE, /* No characters are escaped. */ | |
65 | + ES_QUOTED, /* Quoted characters remain escaped. */ | |
66 | + ES_QUOTED_HARD, /* Ditto, and characters marked CC_HARD_EXPANSION and | |
67 | + backslashes are also escaped. */ | |
68 | +} escaping_T; | |
69 | +/* ES_QUOTED_HARD is for pathname expansion patterns while ES_QUOTED is for | |
70 | + * other patterns. With ES_QUOTED_HARD, backslashes that are not quotation | |
71 | + * marks are escaped to prevent them from being regarded as escaping | |
72 | + * characters. This does not apply to ES_QUOTED because the pattern is | |
73 | + * supposed to be matched without quote removal. */ | |
74 | + | |
33 | 75 | struct wordunit_T; |
34 | 76 | struct plist_T; |
35 | 77 | extern _Bool expand_line( |
@@ -40,20 +82,17 @@ | ||
40 | 82 | extern _Bool expand_multiple( |
41 | 83 | const struct wordunit_T *restrict w, struct plist_T *restrict list) |
42 | 84 | __attribute__((nonnull(2))); |
43 | -extern wchar_t *expand_single(const struct wordunit_T *arg, | |
44 | - tildetype_T tilde, _Bool processquotes, _Bool escapeall) | |
85 | +extern wchar_t *expand_single( | |
86 | + const struct wordunit_T *w, | |
87 | + tildetype_T tilde, quoting_T quoting, escaping_T escaping) | |
45 | 88 | __attribute__((malloc,warn_unused_result)); |
46 | -extern wchar_t *expand_single_and_unescape(const struct wordunit_T *arg, | |
47 | - tildetype_T tilde, _Bool processquotes, _Bool escapeall) | |
48 | - __attribute__((malloc,warn_unused_result)); | |
49 | 89 | extern char *expand_single_with_glob( |
50 | 90 | const struct wordunit_T *arg, tildetype_T tilde) |
51 | 91 | __attribute__((malloc,warn_unused_result)); |
52 | 92 | |
53 | 93 | extern wchar_t *extract_fields( |
54 | - const wchar_t *restrict s, const char *restrict split, | |
55 | - _Bool escaped, const wchar_t *restrict ifs, | |
56 | - struct plist_T *restrict dest) | |
94 | + const wchar_t *restrict s, const char *restrict cc, | |
95 | + const wchar_t *restrict ifs, struct plist_T *restrict dest) | |
57 | 96 | __attribute__((nonnull)); |
58 | 97 | |
59 | 98 | struct xwcsbuf_T; |
@@ -651,7 +651,8 @@ | ||
651 | 651 | if (pi->ctxt->pwords == NULL |
652 | 652 | && (pi->ctxt->type & CTXT_VBRACED)) { |
653 | 653 | xwcsbuf_T buf; |
654 | - wchar_t *prefix = expand_single(first, tilde, true, false); | |
654 | + wchar_t *prefix = | |
655 | + expand_single(first, tilde, Q_WORD, ES_QUOTED_HARD); | |
655 | 656 | assert(prefix != NULL); |
656 | 657 | pi->ctxt->pattern = wb_towcs(wb_catfree( |
657 | 658 | wb_initwith(&buf, prefix), pi->ctxt->pattern)); |
@@ -717,7 +718,7 @@ | ||
717 | 718 | pi->ctxt->type = ctxttype; |
718 | 719 | pi->ctxt->pwordc = 0; |
719 | 720 | pi->ctxt->pwords = NULL; |
720 | - pi->ctxt->pattern = expand_single(first, tilde, true, false); | |
721 | + pi->ctxt->pattern = expand_single(first, tilde, Q_WORD, ES_QUOTED_HARD); | |
721 | 722 | pi->ctxt->srcindex = srcindex; |
722 | 723 | wordfree(first); |
723 | 724 | return NULL; |
@@ -1,6 +1,6 @@ | ||
1 | 1 | /* Yash: yet another shell */ |
2 | 2 | /* redir.c: manages file descriptors and provides functions for redirections */ |
3 | -/* (C) 2007-2018 magicant */ | |
3 | +/* (C) 2007-2020 magicant */ | |
4 | 4 | |
5 | 5 | /* This program is free software: you can redistribute it and/or modify |
6 | 6 | * it under the terms of the GNU General Public License as published by |
@@ -448,8 +448,7 @@ | ||
448 | 448 | if (is_interactive) { |
449 | 449 | return expand_single_with_glob(filename, TT_SINGLE); |
450 | 450 | } else { |
451 | - wchar_t *result = expand_single_and_unescape( | |
452 | - filename, TT_SINGLE, true, false); | |
451 | + wchar_t *result = expand_single(filename, TT_SINGLE, Q_WORD, ES_NONE); | |
453 | 452 | if (result == NULL) |
454 | 453 | return NULL; |
455 | 454 | char *mbsresult = realloc_wcstombs(result); |
@@ -758,8 +757,7 @@ | ||
758 | 757 | * temporary file. */ |
759 | 758 | int open_heredocument(const wordunit_T *contents) |
760 | 759 | { |
761 | - wchar_t *wcontents = expand_single_and_unescape( | |
762 | - contents, TT_NONE, false, false); | |
760 | + wchar_t *wcontents = expand_single(contents, TT_NONE, Q_INDQ, ES_NONE); | |
763 | 761 | if (wcontents == NULL) |
764 | 762 | return -1; |
765 | 763 |
@@ -740,8 +740,8 @@ | ||
740 | 740 | |
741 | 741 | switch (assign->a_type) { |
742 | 742 | case A_SCALAR: |
743 | - value = expand_single_and_unescape( | |
744 | - assign->a_scalar, TT_MULTI, true, false); | |
743 | + value = | |
744 | + expand_single(assign->a_scalar, TT_MULTI, Q_WORD, ES_NONE); | |
745 | 745 | if (value == NULL) |
746 | 746 | return false; |
747 | 747 | if (shopt_xtrace) |
@@ -1594,8 +1594,8 @@ | ||
1594 | 1594 | static inline bool set_optarg(const wchar_t *value); |
1595 | 1595 | static bool set_variable_single_char(const wchar_t *varname, wchar_t value) |
1596 | 1596 | __attribute__((nonnull)); |
1597 | -static bool read_with_prompt(xwcsbuf_T *buf, xstrbuf_T *split, | |
1598 | - const struct reading_option_T *ro) | |
1597 | +static bool read_with_prompt( | |
1598 | + xwcsbuf_T *buf, xstrbuf_T *cc, const struct reading_option_T *ro) | |
1599 | 1599 | __attribute__((nonnull)); |
1600 | 1600 | static struct promptset_T promptset_for_read( |
1601 | 1601 | bool firstline, const struct reading_option_T *ro) |
@@ -1605,7 +1605,7 @@ | ||
1605 | 1605 | __attribute__((malloc,warn_unused_result)); |
1606 | 1606 | static wchar_t *read_one_line(void) |
1607 | 1607 | __attribute__((malloc,warn_unused_result)); |
1608 | -static bool unescape_line(const wchar_t *line, xwcsbuf_T *buf, xstrbuf_T *split) | |
1608 | +static bool unescape_line(const wchar_t *line, xwcsbuf_T *buf, xstrbuf_T *cc) | |
1609 | 1609 | __attribute__((nonnull)); |
1610 | 1610 | static void assign_array(const wchar_t *name, const plist_T *ranges, size_t i) |
1611 | 1611 | __attribute__((nonnull)); |
@@ -2747,12 +2747,12 @@ | ||
2747 | 2747 | } |
2748 | 2748 | |
2749 | 2749 | xwcsbuf_T buf; |
2750 | - xstrbuf_T split; | |
2750 | + xstrbuf_T cc; | |
2751 | 2751 | |
2752 | 2752 | wb_init(&buf); |
2753 | - sb_init(&split); | |
2754 | - if (!read_with_prompt(&buf, &split, &ro)) { | |
2755 | - sb_destroy(&split); | |
2753 | + sb_init(&cc); | |
2754 | + if (!read_with_prompt(&buf, &cc, &ro)) { | |
2755 | + sb_destroy(&cc); | |
2756 | 2756 | wb_destroy(&buf); |
2757 | 2757 | return Exit_FAILURE; |
2758 | 2758 | } |
@@ -2776,7 +2776,7 @@ | ||
2776 | 2776 | if (ifs == NULL) |
2777 | 2777 | ifs = DEFAULT_IFS; |
2778 | 2778 | |
2779 | - tail = extract_fields(buf.contents, split.contents, false, ifs, &list); | |
2779 | + tail = extract_fields(buf.contents, cc.contents, ifs, &list); | |
2780 | 2780 | assert(list.length % 2 == 0); |
2781 | 2781 | } |
2782 | 2782 |
@@ -2812,7 +2812,7 @@ | ||
2812 | 2812 | } |
2813 | 2813 | |
2814 | 2814 | pl_destroy(&list); |
2815 | - sb_destroy(&split); | |
2815 | + sb_destroy(&cc); | |
2816 | 2816 | wb_destroy(&buf); |
2817 | 2817 | return (!eof && yash_error_message_count == 0) |
2818 | 2818 | ? Exit_SUCCESS : Exit_FAILURE; |
@@ -2819,15 +2819,15 @@ | ||
2819 | 2819 | } |
2820 | 2820 | |
2821 | 2821 | /* Reads one line from the standard input. The result is appended to `buf' and |
2822 | - * `split'. `buf' will contain no escapes or other special characters. `split' | |
2823 | - * is the splittability string for `buf'. The string is splittable at characters | |
2824 | - * that were not backslash-escaped. | |
2822 | + * `cc'. `buf' will contain no escapes or other special characters. `cc' is the | |
2823 | + * charcategory_T string for `buf'. It indicates whether `buf' can be split at | |
2824 | + * the corresponding character when passed to `extract_fields'. | |
2825 | 2825 | * If `ro->raw' is true, exactly one line is read and backslashes are not |
2826 | 2826 | * treated as escapes. Otherwise, line continuations cause this function to read |
2827 | 2827 | * more and backslash escapes are recognized. |
2828 | 2828 | * Returns false on error while reading. */ |
2829 | -bool read_with_prompt(xwcsbuf_T *buf, xstrbuf_T *split, | |
2830 | - const struct reading_option_T *ro) | |
2829 | +bool read_with_prompt( | |
2830 | + xwcsbuf_T *buf, xstrbuf_T *cc, const struct reading_option_T *ro) | |
2831 | 2831 | { |
2832 | 2832 | bool firstline = true; |
2833 | 2833 | bool completed = false; |
@@ -2847,10 +2847,10 @@ | ||
2847 | 2847 | |
2848 | 2848 | if (ro->raw) { |
2849 | 2849 | wb_cat(buf, line); |
2850 | - sb_ccat_repeat(split, true, wcslen(line)); | |
2850 | + sb_ccat_repeat(cc, CC_SOFT_EXPANSION, wcslen(line)); | |
2851 | 2851 | completed = true; |
2852 | 2852 | } else { |
2853 | - completed = unescape_line(line, buf, split); | |
2853 | + completed = unescape_line(line, buf, cc); | |
2854 | 2854 | } |
2855 | 2855 | free(line); |
2856 | 2856 |
@@ -2926,11 +2926,11 @@ | ||
2926 | 2926 | } |
2927 | 2927 | |
2928 | 2928 | /* Parses a string that may contain backslash escapes. |
2929 | - * Unescaped `line' is appended to `buf' with a corresponding splittability | |
2930 | - * string appended to `split'. Characters are splittable iff not escaped. | |
2929 | + * Unescaped `line' is appended to `buf' with a corresponding charcategory_T | |
2930 | + * string appended to `cc'. | |
2931 | 2931 | * The result is false iff `line' ends with a line continuation. |
2932 | 2932 | * The line continuation is not appended to `buf'. */ |
2933 | -bool unescape_line(const wchar_t *line, xwcsbuf_T *buf, xstrbuf_T *split) | |
2933 | +bool unescape_line(const wchar_t *line, xwcsbuf_T *buf, xstrbuf_T *cc) | |
2934 | 2934 | { |
2935 | 2935 | for (;;) { |
2936 | 2936 | bool splitchar; |
@@ -2953,7 +2953,7 @@ | ||
2953 | 2953 | break; |
2954 | 2954 | } |
2955 | 2955 | wb_wccat(buf, *line); |
2956 | - sb_ccat(split, splitchar); | |
2956 | + sb_ccat(cc, CC_SOFT_EXPANSION | (splitchar ? 0 : CC_QUOTED)); | |
2957 | 2957 | line++; |
2958 | 2958 | } |
2959 | 2959 | } |