linux内核的atoi测试
v_JULY_v君的问题非常好(请见文章的评论)! 每次都让我思考. 现将linux内核的atoi测试代码贴出来, 为了区别了C标准库的atoi函数, 我把测试的函数名改为matoi:
- #include
- #include
- #include
-
- /*http://lxr.free-electrons.com/source/lib/kstrtox.h#L4*/
- #define KSTRTOX_OVERFLOW (1U << 31)
- const char *_parse_integer_fixup_radix(const char *s, unsigned int *base);
- unsigned int _parse_integer(const char *s, unsigned int base, unsigned long long *res);
-
- /*http://lxr.free-electrons.com/source/arch/powerpc/boot/types.h#L12*/
- typedef int s32;
- typedef unsigned int u32;
- typedef unsigned long long u64;
-
- /*http://lxr.free-electrons.com/source/drivers/media/pci/ngene/ngene-dvb.c#L127*/
- static u32 overflow;
-
- /*http://lxr.free-electrons.com/source/include/linux/kernel.h#L29*/
- #define ULLONG_MAX (~0ULL)
-
- #define unlikely(cond) (cond)
-
- /*http://lxr.free-electrons.com/source/lib/kstrtox.c#L23*/
- const char *_parse_integer_fixup_radix(const char *s, unsigned int *base)
- {
- if (*base == 0) {
- if (s[0] == '0') {
- if (_tolower(s[1]) == 'x' && isxdigit(s[2]))
- *base = 16;
- else
- *base = 8;
- } else
- *base = 10;
- }
- if (*base == 16 && s[0] == '0' && _tolower(s[1]) == 'x')
- s += 2;
- return s;
- }
-
- /*http://lxr.free-electrons.com/source/lib/kstrtox.c#L47*/
- /*
- * Convert non-negative integer string representation in explicitly given radix
- * to an integer.
- * Return number of characters consumed maybe or-ed with overflow bit.
- * If overflow occurs, result integer (incorrect) is still returned.
- *
- * Don't you dare use this function.
- */
- unsigned int _parse_integer(const char *s, unsigned int base, unsigned long long *p)
- {
- unsigned long long res;
- unsigned int rv;
- int overflow;
-
- res = 0;
- rv = 0;
- overflow = 0;
- while (*s) {
- unsigned int val;
-
- if ('0' <= *s && *s <= '9')
- val = *s - '0';
- else if ('a' <= _tolower(*s) && _tolower(*s) <= 'f')
- val = _tolower(*s) - 'a' + 10;
- else
- break;
-
- if (val >= base)
- break;
- /*
- * Check for overflow only if we are within range of
- * it in the max base we support (16)
- */
- if (unlikely(res & (~0ull << 60))) {
- if (res > ULLONG_MAX - val/base)
- overflow = 1;
- }
- res = res * base + val;
- rv++;
- s++;
- }
- *p = res;
- if (overflow)
- rv |= KSTRTOX_OVERFLOW;
- return rv;
- }
-
- /*http://lxr.free-electrons.com/source/lib/vsprintf.c#L44*/
- /**
- * simple_strtoull - convert a string to an unsigned long long
- * @cp: The start of the string
- * @endp: A pointer to the end of the parsed string will be placed here
- * @base: The number base to use
- *
- * This function is obsolete. Please use kstrtoull instead.
- */
- unsigned long long simple_strtoull(const char *cp, char **endp, unsigned int base)
- {
- unsigned long long result;
- unsigned int rv;
-
- cp = _parse_integer_fixup_radix(cp, &base);
- rv = _parse_integer(cp, base, &result);
- /* FIXME */
- cp += (rv & ~KSTRTOX_OVERFLOW);
-
- if (endp)
- *endp = (char *)cp;
-
- return result;
- }
-
- /*http://lxr.free-electrons.com/source/lib/vsprintf.c#L83*/
- /**
- * simple_strtoul - convert a string to an unsigned long
- * @cp: The start of the string
- * @endp: A pointer to the end of the parsed string will be placed here
- * @base: The number base to use
- *
- * This function is obsolete. Please use kstrtoul instead.
- */
- unsigned long simple_strtoul(const char *cp, char **endp, unsigned int base)
- {
- return simple_strtoull(cp, endp, base);
- }
-
- /*http://lxr.free-electrons.com/source/drivers/staging/tidspbridge/rmgr/dbdcd.c#L950*/
- /*
- * ======== atoi ========
- * Purpose:
- * This function converts strings in decimal or hex format to integers.
- */
- static s32 matoi(const char *psz_buf)
- {
- char *pch = psz_buf;
- s32 base = 0;
-
- while (isspace(*pch))
- pch++;
-
- if (*pch == '-' || *pch == '+') {
- base = 10;
- pch++;
- } else if (*pch && tolower(pch[strlen(pch) - 1]) == 'h') {
- base = 16;
- }
-
- return simple_strtoul(pch, NULL, base);
- }
-
- void test(const char* str) {
- printf("%s : %d\n", str, matoi(str));
- }
-
- int main() {
- test("2147483647");
- test("2147483648");
- test("-2147483648");
- test("-2147483649");
- test("10522545459");
- test("-10522545459");
-
- return 0;
- }
修改的地方在第75行, 原来的代码为:
if (res > div_u64(ULLONG_MAX - val, base))
而div_u64调用的div_u64_rem函数中包含汇编代码编译不过(原因尚未可知, 有待进一步研究), 所以我把这段程序去掉了.
程序的输出结果(很显然, 对于溢出的情况, 程序没有处理):
2147483647 : 2147483647 2147483648 : -2147483648 10522545459 : 1932610867 -2147483648 : -2147483648 -2147483649 : -2147483647 -10522545459 : 1932610867
Nut/OS的atoi测试
以下是测试代码(在ubuntu 10.4.1, gcc 4.4.3上编译通过, 为了区别于C标准库的函数, 函数名strtol更改为mstrtol, atoi更改为matoi2):
- #include
- #include
- #include
- #include
-
- #define CONST const
-
- long mstrtol(CONST char *nptr, char **endptr, int base)
- {
- register CONST char *s;
- register long acc, cutoff;
- register int c;
- register int neg, any, cutlim;
-
- /*
- * Skip white space and pick up leading +/- sign if any.
- * If base is 0, allow 0x for hex and 0 for octal, else
- * assume decimal; if base is already 16, allow 0x.
- */
- s = nptr;
- do {
- c = (unsigned char) *s++;
- } while (isspace(c));
- if (c == '-') {
- neg = 1;
- c = *s++;
- } else {
- neg = 0;
- if (c == '+')
- c = *s++;
- }
- if ((base == 0 || base == 16) && c == '0' && (*s == 'x' || *s == 'X')) {
- c = s[1];
- s += 2;
- base = 16;
- }
- if (base == 0)
- base = c == '0' ? 8 : 10;
-
- /*
- * Compute the cutoff value between legal numbers and illegal
- * numbers. That is the largest legal value, divided by the
- * base. An input number that is greater than this value, if
- * followed by a legal input character, is too big. One that
- * is equal to this value may be valid or not; the limit
- * between valid and invalid numbers is then based on the last
- * digit. For instance, if the range for longs is
- * [-2147483648..2147483647] and the input base is 10,
- * cutoff will be set to 214748364 and cutlim to either
- * 7 (neg==0) or 8 (neg==1), meaning that if we have accumulated
- * a value > 214748364, or equal but the next digit is > 7 (or 8),
- * the number is too big, and we will return a range error.
- *
- * Set any if any `digits' consumed; make it negative to indicate
- * overflow.
- */
- cutoff = neg ? LONG_MIN : LONG_MAX;
- cutlim = cutoff % base;
- cutoff /= base;
- if (neg) {
- if (cutlim > 0) {
- cutlim -= base;
- cutoff += 1;
- }
- cutlim = -cutlim;
- }
- for (acc = 0, any = 0;; c = (unsigned char) *s++) {
- if (isdigit(c))
- c -= '0';
- else if (isalpha(c))
- c -= isupper(c) ? 'A' - 10 : 'a' - 10;
- else
- break;
- if (c >= base)
- break;
- if (any < 0)
- continue;
- if (neg) {
- if ((acc < cutoff || acc == cutoff) && c > cutlim) {
- any = -1;
- acc = LONG_MIN;
- errno = ERANGE;
- } else {
- any = 1;
- acc *= base;
- acc -= c;
- }
- } else {
- if ((acc > cutoff || acc == cutoff) && c > cutlim) {
- any = -1;
- acc = LONG_MAX;
- errno = ERANGE;
- } else {
- any = 1;
- acc *= base;
- acc += c;
- }
- }
- }
- if (endptr != 0)
- *endptr = (char *) (any ? s - 1 : nptr);
- return (acc);
- }
-
- int matoi2(CONST char *str)
- {
- return ((int) mstrtol(str, (char **) NULL, 10));
- }
-
- int mgetline(char* buf, size_t n) {
- size_t idx = 0;
- int c;
-
- while (--n > 0 && (c = getchar()) != EOF && c != '\n') {
- buf[idx++] = c;
- }
- buf[idx] = '\0';
- return idx;
- }
-
- #define MAX_LINE 200
-
- int main() {
- char buf[MAX_LINE];
- while (mgetline(buf, MAX_LINE) >= 0) {
- if (strcmp(buf, "quit") == 0) break;
- printf("matoi2=%d\n", matoi2(buf));
- }
- return 0;
- }
程序的测试结果:
10522545459 matoi2=2147483647 -10522545459 matoi2=-2147483648
程序貌似对溢出的处理是正确的, 真的吗? 请注意代码的第79和第89行. 现在我把测试数据换成"10522545454", 与"10522545459"区别在于最后一个字符.
10522545454 matoi2=1932610862 -10522545454 matoi2=-1932610862
bingo! 正中下怀! 对于字串"10522545454", 在读取最后的数字字符'4'时, 整数1052254545已经大于2147483647/10了, 说明已经溢出, 不应该再判断字串的最后一位4是否大于2147483647%10, 所以第79行应该改为(89行修改方法类似):
if (acc < cutoff || (acc == cutoff && c > cutlim)) {
修改过后的代码测试正常:
10522545459 matoi2=2147483647 -10522545459\ matoi2=-2147483648 10522545454 matoi2=2147483647 -10522545454 matoi2=-2147483648 quit
关于此bug, 我已经邮件通知En-Nut-Discussion.
以下为邮件回复的截图, Uwe Bonnes说: 可以打个补丁到分支. 不过他把单词reasonable给拼错了.
评论记录:
回复评论: