Avoid undefined behavior in arithmetic expansion (#45200)
@@ -10,6 +10,8 @@ | ||
10 | 10 | ---------------------------------------------------------------------- |
11 | 11 | Yash 2.53 (????-??-??) |
12 | 12 | |
13 | + = The shell now deterministically rejects arithmetic expansions | |
14 | + that result in undefined behavior. | |
13 | 15 | * A non-interactive yash now exits on an assignment error in a for |
14 | 16 | loop. |
15 | 17 | * Fixed a bug where command substitutions contained in the regular |
@@ -91,9 +91,6 @@ | ||
91 | 91 | * In C, a null character represents the end of a string. If input to |
92 | 92 | the shell itself contains a null character, characters following |
93 | 93 | the null character will be ignored. |
94 | - * We assume that an overflow in signed integer arithmetic or type | |
95 | - conversion silently yields an implementation-defined integer value | |
96 | - rather than resulting in an error. | |
97 | 94 | * The GCC extension keyword `__attribute__' is used in the source |
98 | 95 | code. When not compiled with GCC, this keyword is removed by the |
99 | 96 | preprocessor, so generally there is no harm. But if your compiler |
@@ -1,6 +1,6 @@ | ||
1 | 1 | /* Yash: yet another shell */ |
2 | 2 | /* arith.c: arithmetic expansion */ |
3 | -/* (C) 2007-2019 magicant */ | |
3 | +/* (C) 2007-2022 magicant */ | |
4 | 4 | |
5 | 5 | /* This program is free software: you can redistribute it and/or modify |
6 | 6 | * it under the terms of the GNU General Public License as published by |
@@ -99,9 +99,17 @@ | ||
99 | 99 | evalinfo_T *info, atokentype_T ttype, |
100 | 100 | value_T *lhs, value_T *rhs, value_T *result) |
101 | 101 | __attribute__((nonnull)); |
102 | -static long do_long_calculation1(atokentype_T ttype, long v1, long v2); | |
103 | -static long do_long_calculation2(atokentype_T ttype, long v1, long v2); | |
104 | -static double do_double_calculation(atokentype_T ttype, double v1, double v2); | |
102 | +static bool do_long_calculation1( | |
103 | + atokentype_T ttype, long v1, long v2, long *result) | |
104 | + __attribute__((nonnull,warn_unused_result)); | |
105 | +static bool do_long_calculation2( | |
106 | + atokentype_T ttype, long v1, long v2, long *result) | |
107 | + __attribute__((nonnull,warn_unused_result)); | |
108 | +static long do_long_comparison(atokentype_T ttype, long v1, long v2) | |
109 | + __attribute__((const,warn_unused_result)); | |
110 | +static bool do_double_calculation( | |
111 | + atokentype_T ttype, double v1, double v2, double *result) | |
112 | + __attribute__((nonnull,warn_unused_result)); | |
105 | 113 | static long do_double_comparison(atokentype_T ttype, double v1, double v2); |
106 | 114 | static void parse_conditional(evalinfo_T *info, value_T *result) |
107 | 115 | __attribute__((nonnull)); |
@@ -129,8 +137,8 @@ | ||
129 | 137 | __attribute__((nonnull)); |
130 | 138 | static void parse_postfix(evalinfo_T *info, value_T *result) |
131 | 139 | __attribute__((nonnull)); |
132 | -static void do_increment_or_decrement(atokentype_T ttype, value_T *value) | |
133 | - __attribute__((nonnull)); | |
140 | +static bool do_increment_or_decrement(atokentype_T ttype, value_T *value) | |
141 | + __attribute__((nonnull,warn_unused_result)); | |
134 | 142 | static void parse_primary(evalinfo_T *info, value_T *result) |
135 | 143 | __attribute__((nonnull)); |
136 | 144 | static void parse_as_number(evalinfo_T *info, value_T *result) |
@@ -144,9 +152,8 @@ | ||
144 | 152 | __attribute__((nonnull)); |
145 | 153 | static void next_token(evalinfo_T *info) |
146 | 154 | __attribute__((nonnull)); |
147 | -static bool fail_if_will_divide_by_zero( | |
148 | - atokentype_T op, const value_T *rhs, evalinfo_T *info, value_T *result) | |
149 | - __attribute__((nonnull)); | |
155 | +static bool long_mul_will_overflow(long v1, long v2) | |
156 | + __attribute__((const,warn_unused_result)); | |
150 | 157 | |
151 | 158 | |
152 | 159 | /* Evaluates the specified string as an arithmetic expression. |
@@ -329,16 +336,22 @@ | ||
329 | 336 | case TT_PLUS: case TT_PLUSEQUAL: |
330 | 337 | case TT_MINUS: case TT_MINUSEQUAL: |
331 | 338 | result->type = coerce_type(info, lhs, rhs); |
332 | - if (fail_if_will_divide_by_zero(ttype, rhs, info, result)) | |
333 | - return false; | |
334 | 339 | switch (result->type) { |
335 | 340 | case VT_LONG: |
336 | - result->v_long = do_long_calculation1( | |
337 | - ttype, lhs->v_long, rhs->v_long); | |
341 | + if (!do_long_calculation1(ttype, lhs->v_long, rhs->v_long, | |
342 | + &result->v_long)) { | |
343 | + info->error = true; | |
344 | + result->type = VT_INVALID; | |
345 | + return false; | |
346 | + } | |
338 | 347 | break; |
339 | 348 | case VT_DOUBLE: |
340 | - result->v_double = do_double_calculation( | |
341 | - ttype, lhs->v_double, rhs->v_double); | |
349 | + if (!do_double_calculation(ttype, lhs->v_double, | |
350 | + rhs->v_double, &result->v_double)) { | |
351 | + info->error = true; | |
352 | + result->type = VT_INVALID; | |
353 | + return false; | |
354 | + } | |
342 | 355 | break; |
343 | 356 | case VT_VAR: |
344 | 357 | assert(false); |
@@ -354,11 +367,17 @@ | ||
354 | 367 | coerce_integer(info, lhs); |
355 | 368 | coerce_integer(info, rhs); |
356 | 369 | if (lhs->type == VT_LONG && rhs->type == VT_LONG) { |
357 | - result->type = VT_LONG; | |
358 | - result->v_long = | |
359 | - do_long_calculation2(ttype, lhs->v_long, rhs->v_long); | |
370 | + if (do_long_calculation2( | |
371 | + ttype, lhs->v_long, rhs->v_long, &result->v_long)) { | |
372 | + result->type = VT_LONG; | |
373 | + } else { | |
374 | + info->error = true; | |
375 | + result->type = VT_INVALID; | |
376 | + return false; | |
377 | + } | |
360 | 378 | } else { |
361 | 379 | result->type = VT_INVALID; |
380 | + return false; | |
362 | 381 | } |
363 | 382 | break; |
364 | 383 | case TT_EQUAL: |
@@ -370,35 +389,102 @@ | ||
370 | 389 | return true; |
371 | 390 | } |
372 | 391 | |
373 | -/* Does unary or binary long calculation according to the specified operator | |
374 | - * token. Division by zero is not allowed. */ | |
375 | -long do_long_calculation1(atokentype_T ttype, long v1, long v2) | |
392 | +/* Applies binary operator `ttype' to the given operands `v1' and `v2'. | |
393 | + * If successful, assigns the result to `*result' and returns true. | |
394 | + * Otherwise, prints an error message and returns false. */ | |
395 | +bool do_long_calculation1(atokentype_T ttype, long v1, long v2, long *result) | |
376 | 396 | { |
377 | 397 | switch (ttype) { |
378 | 398 | case TT_PLUS: case TT_PLUSEQUAL: |
379 | - return v1 + v2; | |
399 | + if (v2 >= 0 ? LONG_MAX - v2 < v1 : v1 < LONG_MIN - v2) | |
400 | + goto overflow; | |
401 | + *result = v1 + v2; | |
402 | + return true; | |
380 | 403 | case TT_MINUS: case TT_MINUSEQUAL: |
381 | - return v1 - v2; | |
404 | + if (v2 < 0 ? LONG_MAX + v2 < v1 : v1 < LONG_MIN + v2) | |
405 | + goto overflow; | |
406 | + *result = v1 - v2; | |
407 | + return true; | |
382 | 408 | case TT_ASTER: case TT_ASTEREQUAL: |
383 | - return v1 * v2; | |
409 | + if (long_mul_will_overflow(v1, v2)) | |
410 | + goto overflow; | |
411 | + *result = v1 * v2; | |
412 | + return true; | |
384 | 413 | case TT_SLASH: case TT_SLASHEQUAL: |
385 | - return v1 / v2; | |
414 | + if (v2 == 0) | |
415 | + goto division_by_zero; | |
416 | + if (v1 == LONG_MIN && v2 == -1) | |
417 | + goto overflow; | |
418 | + *result = v1 / v2; | |
419 | + return true; | |
386 | 420 | case TT_PERCENT: case TT_PERCENTEQUAL: |
387 | - return v1 % v2; | |
421 | + if (v2 == 0) | |
422 | + goto division_by_zero; | |
423 | + if (v1 == LONG_MIN && v2 == -1) | |
424 | + goto overflow; | |
425 | + *result = v1 % v2; | |
426 | + return true; | |
388 | 427 | default: |
389 | 428 | assert(false); |
390 | 429 | } |
430 | + | |
431 | +overflow: | |
432 | + xerror(0, Ngt("arithmetic: overflow")); | |
433 | + return false; | |
434 | +division_by_zero: | |
435 | + xerror(0, Ngt("arithmetic: division by zero")); | |
436 | + return false; | |
391 | 437 | } |
392 | 438 | |
393 | -/* Does unary or binary long calculation according to the specified operator | |
394 | - * token. */ | |
395 | -long do_long_calculation2(atokentype_T ttype, long v1, long v2) | |
439 | +/* Applies binary operator `ttype' to the given operands `v1' and `v2'. | |
440 | + * If successful, assigns the result to `*result' and returns true. | |
441 | + * Otherwise, prints an error message and returns false. */ | |
442 | +bool do_long_calculation2(atokentype_T ttype, long v1, long v2, long *result) | |
396 | 443 | { |
397 | 444 | switch (ttype) { |
398 | 445 | case TT_LESSLESS: case TT_LESSLESSEQUAL: |
399 | - return v1 << v2; | |
446 | + if (v1 < 0) | |
447 | + goto negative_left_shift; | |
448 | + if (v2 < 0 || v2 >= LONG_BIT) | |
449 | + goto invalid_shift_width; | |
450 | + unsigned long u1 = (unsigned long) v1; | |
451 | + if ((u1 << v2 & (unsigned long) LONG_MAX) >> v2 != u1) | |
452 | + goto overflow; | |
453 | + *result = v1 << v2; | |
454 | + return true; | |
400 | 455 | case TT_GREATERGREATER: case TT_GREATERGREATEREQUAL: |
401 | - return v1 >> v2; | |
456 | + if (v2 < 0 || v2 >= LONG_BIT) | |
457 | + goto invalid_shift_width; | |
458 | + *result = v1 >> v2; | |
459 | + return true; | |
460 | + case TT_AMP: case TT_AMPEQUAL: | |
461 | + *result = v1 & v2; | |
462 | + return true; | |
463 | + case TT_HAT: case TT_HATEQUAL: | |
464 | + *result = v1 ^ v2; | |
465 | + return true; | |
466 | + case TT_PIPE: case TT_PIPEEQUAL: | |
467 | + *result = v1 | v2; | |
468 | + return true; | |
469 | + default: | |
470 | + assert(false); | |
471 | + } | |
472 | + | |
473 | +overflow: | |
474 | + xerror(0, Ngt("arithmetic: overflow")); | |
475 | + return false; | |
476 | +negative_left_shift: | |
477 | + xerror(0, Ngt("arithmetic: negative value cannot be shifted to left")); | |
478 | + return false; | |
479 | +invalid_shift_width: | |
480 | + xerror(0, Ngt("arithmetic: invalid shift width")); | |
481 | + return false; | |
482 | +} | |
483 | + | |
484 | +/* Applies binary operator `ttype' to the given operands `v1' and `v2'. */ | |
485 | +long do_long_comparison(atokentype_T ttype, long v1, long v2) | |
486 | +{ | |
487 | + switch (ttype) { | |
402 | 488 | case TT_LESS: |
403 | 489 | return v1 < v2; |
404 | 490 | case TT_LESSEQUAL: |
@@ -411,35 +497,50 @@ | ||
411 | 497 | return v1 == v2; |
412 | 498 | case TT_EXCLEQUAL: |
413 | 499 | return v1 != v2; |
414 | - case TT_AMP: case TT_AMPEQUAL: | |
415 | - return v1 & v2; | |
416 | - case TT_HAT: case TT_HATEQUAL: | |
417 | - return v1 ^ v2; | |
418 | - case TT_PIPE: case TT_PIPEEQUAL: | |
419 | - return v1 | v2; | |
420 | 500 | default: |
421 | 501 | assert(false); |
422 | 502 | } |
423 | 503 | } |
424 | 504 | |
425 | -/* Does unary or binary double calculation according to the specified operator | |
426 | - * token. */ | |
427 | -double do_double_calculation(atokentype_T ttype, double v1, double v2) | |
505 | +/* Applies binary operator `ttype' to the given operands `v1' and `v2'. | |
506 | + * If successful, assigns the result to `*result' and returns true. | |
507 | + * Otherwise, prints an error message and returns false. */ | |
508 | +bool do_double_calculation( | |
509 | + atokentype_T ttype, double v1, double v2, double *result) | |
428 | 510 | { |
429 | 511 | switch (ttype) { |
430 | 512 | case TT_PLUS: case TT_PLUSEQUAL: |
431 | - return v1 + v2; | |
513 | + *result = v1 + v2; | |
514 | + return true; | |
432 | 515 | case TT_MINUS: case TT_MINUSEQUAL: |
433 | - return v1 - v2; | |
516 | + *result = v1 - v2; | |
517 | + return true; | |
434 | 518 | case TT_ASTER: case TT_ASTEREQUAL: |
435 | - return v1 * v2; | |
519 | + *result = v1 * v2; | |
520 | + return true; | |
436 | 521 | case TT_SLASH: case TT_SLASHEQUAL: |
437 | - return v1 / v2; | |
522 | +#if DOUBLE_DIVISION_BY_ZERO_ERROR | |
523 | + if (v2 == 0.0) | |
524 | + goto division_by_zero; | |
525 | +#endif | |
526 | + *result = v1 / v2; | |
527 | + return true; | |
438 | 528 | case TT_PERCENT: case TT_PERCENTEQUAL: |
439 | - return fmod(v1, v2); | |
529 | +#if DOUBLE_DIVISION_BY_ZERO_ERROR | |
530 | + if (v2 == 0.0) | |
531 | + goto division_by_zero; | |
532 | +#endif | |
533 | + *result = fmod(v1, v2); | |
534 | + return true; | |
440 | 535 | default: |
441 | 536 | assert(false); |
442 | 537 | } |
538 | + | |
539 | +#if DOUBLE_DIVISION_BY_ZERO_ERROR | |
540 | +division_by_zero: | |
541 | + xerror(0, Ngt("arithmetic: division by zero")); | |
542 | + return false; | |
543 | +#endif | |
443 | 544 | } |
444 | 545 | |
445 | 546 | /* Does double comparison according to the specified operator token. */ |
@@ -660,7 +761,7 @@ | ||
660 | 761 | parse_relational(info, &rhs); |
661 | 762 | switch (coerce_type(info, result, &rhs)) { |
662 | 763 | case VT_LONG: |
663 | - result->v_long = do_long_calculation2(ttype, | |
764 | + result->v_long = do_long_comparison(ttype, | |
664 | 765 | result->v_long, rhs.v_long); |
665 | 766 | break; |
666 | 767 | case VT_DOUBLE: |
@@ -702,7 +803,7 @@ | ||
702 | 803 | parse_shift(info, &rhs); |
703 | 804 | switch (coerce_type(info, result, &rhs)) { |
704 | 805 | case VT_LONG: |
705 | - result->v_long = do_long_calculation2(ttype, | |
806 | + result->v_long = do_long_comparison(ttype, | |
706 | 807 | result->v_long, rhs.v_long); |
707 | 808 | break; |
708 | 809 | case VT_DOUBLE: |
@@ -814,8 +915,8 @@ | ||
814 | 915 | } else if (result->type == VT_VAR) { |
815 | 916 | word_T saveword = result->v_var; |
816 | 917 | coerce_number(info, result); |
817 | - do_increment_or_decrement(ttype, result); | |
818 | - if (!do_assignment(&saveword, result)) | |
918 | + if (!do_increment_or_decrement(ttype, result) || | |
919 | + !do_assignment(&saveword, result)) | |
819 | 920 | info->error = true, result->type = VT_INVALID; |
820 | 921 | } else if (result->type != VT_INVALID) { |
821 | 922 | /* TRANSLATORS: This error message is shown when the operand of |
@@ -833,7 +934,17 @@ | ||
833 | 934 | coerce_number(info, result); |
834 | 935 | if (ttype == TT_MINUS) { |
835 | 936 | switch (result->type) { |
836 | - case VT_LONG: result->v_long = -result->v_long; break; | |
937 | + case VT_LONG: | |
938 | +#if LONG_MIN < -LONG_MAX | |
939 | + if (result->v_long == LONG_MIN) { | |
940 | + xerror(0, Ngt("arithmetic: overflow")); | |
941 | + info->error = true; | |
942 | + result->type = VT_INVALID; | |
943 | + break; | |
944 | + } | |
945 | +#endif | |
946 | + result->v_long = -result->v_long; | |
947 | + break; | |
837 | 948 | case VT_DOUBLE: result->v_double = -result->v_double; break; |
838 | 949 | case VT_INVALID: break; |
839 | 950 | default: assert(false); |
@@ -891,8 +1002,8 @@ | ||
891 | 1002 | word_T saveword = result->v_var; |
892 | 1003 | coerce_number(info, result); |
893 | 1004 | value_T value = *result; |
894 | - do_increment_or_decrement(info->atoken.type, &value); | |
895 | - if (!do_assignment(&saveword, &value)) { | |
1005 | + if (!do_increment_or_decrement(info->atoken.type, &value) || | |
1006 | + !do_assignment(&saveword, &value)) { | |
896 | 1007 | info->error = true; |
897 | 1008 | result->type = VT_INVALID; |
898 | 1009 | } |
@@ -913,12 +1024,19 @@ | ||
913 | 1024 | |
914 | 1025 | /* Increment or decrement the specified value. |
915 | 1026 | * `ttype' must be either TT_PLUSPLUS or TT_MINUSMINUS and the `value' must be |
916 | - * `coerce_number'ed. */ | |
917 | -void do_increment_or_decrement(atokentype_T ttype, value_T *value) | |
1027 | + * `coerce_number'ed. | |
1028 | + * Returns false on error. */ | |
1029 | +bool do_increment_or_decrement(atokentype_T ttype, value_T *value) | |
918 | 1030 | { |
919 | 1031 | if (ttype == TT_PLUSPLUS) { |
920 | 1032 | switch (value->type) { |
921 | - case VT_LONG: value->v_long++; break; | |
1033 | + case VT_LONG: | |
1034 | + if (value->v_long == LONG_MAX) { | |
1035 | + xerror(0, Ngt("arithmetic: overflow")); | |
1036 | + return false; | |
1037 | + } | |
1038 | + value->v_long++; | |
1039 | + break; | |
922 | 1040 | case VT_DOUBLE: value->v_double++; break; |
923 | 1041 | case VT_INVALID: break; |
924 | 1042 | default: assert(false); |
@@ -925,12 +1043,19 @@ | ||
925 | 1043 | } |
926 | 1044 | } else { |
927 | 1045 | switch (value->type) { |
928 | - case VT_LONG: value->v_long--; break; | |
1046 | + case VT_LONG: | |
1047 | + if (value->v_long == LONG_MIN) { | |
1048 | + xerror(0, Ngt("arithmetic: overflow")); | |
1049 | + return false; | |
1050 | + } | |
1051 | + value->v_long--; | |
1052 | + break; | |
929 | 1053 | case VT_DOUBLE: value->v_double--; break; |
930 | 1054 | case VT_INVALID: break; |
931 | 1055 | default: assert(false); |
932 | 1056 | } |
933 | 1057 | } |
1058 | + return true; | |
934 | 1059 | } |
935 | 1060 | |
936 | 1061 | /* Parses a primary expression. |
@@ -1326,44 +1451,23 @@ | ||
1326 | 1451 | } |
1327 | 1452 | } |
1328 | 1453 | |
1329 | -/* If `op' is a division operator and `rhs' is zero, then prints an error | |
1330 | - * message, sets `info->error' to true, sets `result->type' to VT_INVALID, and | |
1331 | - * returns true. Otherwise, just returns false. | |
1332 | - * `rhs->type' must not be VT_VAR. */ | |
1333 | -bool fail_if_will_divide_by_zero( | |
1334 | - atokentype_T op, const value_T *rhs, evalinfo_T *info, value_T *result) | |
1454 | +/* Tests whether the multiplication of the given two long values will overflow. | |
1455 | + */ | |
1456 | +bool long_mul_will_overflow(long v1, long v2) | |
1335 | 1457 | { |
1336 | - switch (op) { | |
1337 | - case TT_SLASH: | |
1338 | - case TT_SLASHEQUAL: | |
1339 | - case TT_PERCENT: | |
1340 | - case TT_PERCENTEQUAL: | |
1341 | - switch (rhs->type) { | |
1342 | - case VT_LONG: | |
1343 | - if (rhs->v_long == 0) | |
1344 | - goto fail; | |
1345 | - break; | |
1346 | - case VT_DOUBLE: | |
1347 | -#if DOUBLE_DIVISION_BY_ZERO_ERROR | |
1348 | - if (rhs->v_double == 0.0) | |
1349 | - goto fail; | |
1458 | + if (v1 == 0 || v1 == 1 || v2 == 0 || v2 == 1) | |
1459 | + return false; | |
1460 | +#if LONG_MIN < -LONG_MAX | |
1461 | + if (v1 == LONG_MIN || v2 == LONG_MIN) | |
1462 | + return true; | |
1350 | 1463 | #endif |
1351 | - break; | |
1352 | - case VT_VAR: | |
1353 | - assert(false); | |
1354 | - case VT_INVALID: | |
1355 | - break; | |
1356 | - } | |
1357 | - /* falls through */ | |
1358 | - default: | |
1359 | - return false; | |
1360 | - } | |
1361 | - | |
1362 | -fail: | |
1363 | - xerror(0, Ngt("arithmetic: division by zero")); | |
1364 | - info->error = true; | |
1365 | - result->type = VT_INVALID; | |
1366 | - return true; | |
1464 | + unsigned long u1 = labs(v1), u2 = labs(v2); | |
1465 | + unsigned long prod = u1 * u2; | |
1466 | +#if LONG_MIN < -LONG_MAX | |
1467 | + if (prod == (unsigned long) LONG_MIN) | |
1468 | + return ((v1 >= 0) == (v2 >= 0)) || prod / u2 != u1; | |
1469 | +#endif | |
1470 | + return (prod & (unsigned long) LONG_MAX) / u2 != u1; | |
1367 | 1471 | } |
1368 | 1472 | |
1369 | 1473 | /* vim: set ts=8 sts=4 sw=4 noet tw=80: */ |
@@ -466,6 +466,8 @@ | ||
466 | 466 | echo $((foo + 0)) # error |
467 | 467 | ---- |
468 | 468 | |
469 | +It is an expansion error if the result of an expression is not defined in C. | |
470 | + | |
469 | 471 | [[brace]] |
470 | 472 | == Brace expansion |
471 | 473 |
@@ -276,6 +276,8 @@ | ||
276 | 276 | echo $((foo + 0)) # エラー |
277 | 277 | ---- |
278 | 278 | |
279 | +C 言語で結果が定義されていない数式は展開エラーになります。 | |
280 | + | |
279 | 281 | [[brace]] |
280 | 282 | == ブレース展開 |
281 | 283 |