From 56aba079e21e2547d9a2fa197d71203229be33d0 Mon Sep 17 00:00:00 2001 From: Greg Haerr Date: Fri, 13 Sep 2024 22:20:27 -0600 Subject: [PATCH 1/4] [libc] Rewrite vfprintf.c and tiny_printf.c to use __divmod for speed --- elks/init/main.c | 6 +- elkscmd/file_utils/cat.c | 28 +++++++++ elkscmd/lib/tiny_vfprintf.c | 101 +++++++++++++++++++------------ libc/asm/Makefile | 1 + libc/asm/divmod.S | 47 +++++++++++++++ libc/stdio/vfprintf.c | 116 ++++++++++++++++++++---------------- 6 files changed, 209 insertions(+), 90 deletions(-) create mode 100644 libc/asm/divmod.S diff --git a/elks/init/main.c b/elks/init/main.c index d90463f90..91faa8d24 100644 --- a/elks/init/main.c +++ b/elks/init/main.c @@ -122,8 +122,10 @@ void start_kernel(void) printk(" 4x: '%4x'\n", 0x2ab); printk("04d: '%04d'\n", 0x200); printk(" 4d: '%4d'\n", 0x200); - printk("05d: '%05d'\n", -200); - printk(" 5d: '%5d'\n", -200); + printk("05d: '%05d'\n", -20); + printk(" 5d: '%5d'\n", -20); + printk("+5d: '%5d'\n", -20); + printk("+5d: '%5d'\n", 20); printk(" ld: '%ld'\n", -123456789L); printk(" lx: '%lx'\n", 0x87654321L); printk(" lo: '%lo'\n", 0xFFFFFFFFL); diff --git a/elkscmd/file_utils/cat.c b/elkscmd/file_utils/cat.c index 63e2121ce..7c5a8253a 100644 --- a/elkscmd/file_utils/cat.c +++ b/elkscmd/file_utils/cat.c @@ -7,9 +7,33 @@ #include #include #include +#include static char readbuf[BUFSIZ]; /* use disk block size for stack limit and efficiency*/ +#define TEST 0 +#if TEST +void test(void) +{ + printf("#04X: '%#04X'\n", 0x2ab); + printf("04X: '%04X'\n", 0x2ab); + printf("04x: '%04x'\n", 0x2ab); + printf(" 4x: '%4x'\n", 0x2ab); + printf("04d: '%04d'\n", 0x200); + printf(" 4d: '%4d'\n", 0x200); + printf("05d: '%05d'\n", -20); + printf(" 5d: '%5d'\n", -20); + printf("+5d: '%5d'\n", -20); + printf("+5d: '%5d'\n", 20); + printf(" ld: '%ld'\n", -123456789L); + printf(" lx: '%lx'\n", 0x87654321L); + printf(" lo: '%lo'\n", 0xFFFFFFFFL); + printf(" s: '%s'\n", "thisisatest"); + printf(" 6s: '%6s'\n", "thisisatest"); + printf("20s: '%20s'\n", "thisisatest"); +} +#endif + static int copyfd(int fd) { int n; @@ -25,6 +49,10 @@ int main(int argc, char **argv) { int i, fd; +#if TEST + test(); + exit(0); +#endif if (argc <= 1) { if (copyfd(STDIN_FILENO)) { perror("stdin"); diff --git a/elkscmd/lib/tiny_vfprintf.c b/elkscmd/lib/tiny_vfprintf.c index 110203630..bc4a27b0a 100644 --- a/elkscmd/lib/tiny_vfprintf.c +++ b/elkscmd/lib/tiny_vfprintf.c @@ -15,11 +15,10 @@ */ #include -#include #include #include #include -#include +#include static unsigned char bufout[80]; @@ -51,7 +50,7 @@ FILE stderr[1] = static void __fflush(FILE *fp) { - int len; + int len; /* Return if this is a fake FILE from sprintf */ if (fp->fd < 0) @@ -79,21 +78,28 @@ static void __fputc(int ch, FILE *fp) * the number of characters output. */ static int -__fmt(FILE *op, unsigned char *buf, int ljustf, int width, int preci, char pad) +__fmt(FILE *op, unsigned char *buf, int ljustf, int width, int preci, char pad, char sign) { int cnt = 0, len; unsigned char ch; len = strlen((char *)buf); - if ((preci != -1) && (len > preci)) /* limit max data width */ + if (*buf == '-') + sign = *buf++; + else if (sign) + len++; + + + if (preci != -1 && len > preci) /* limit max data width */ len = preci; if (width < len) /* flexible field width or width overflow */ width = len; /* - * at this point: width = total field width len = actual data width + * at this point: width = total field width, len = actual data width + * (including possible sign character) */ cnt = width; width -= len; @@ -102,12 +108,21 @@ __fmt(FILE *op, unsigned char *buf, int ljustf, int width, int preci, char pad) { if (!ljustf && width) /* left padding */ { + if (len && sign && (pad == '0')) + goto showsign; ch = pad; --width; } else if (len) { - ch = *buf++; /* main field */ + if (sign) + { + showsign: + ch = sign; /* sign */ + sign = '\0'; + } + else + ch = *buf++; /* main field */ --len; } else @@ -126,34 +141,34 @@ vfprintf(FILE *op, const char *fmt, va_list ap) { int i, cnt = 0, ljustf, lval; int preci, width, radix; + unsigned int c; + unsigned long v; char pad, dpoint; - char *ptmp; - char tmp[64]; + char *p; + char sign; + char buf[64]; - while (*fmt) - { - if (*fmt == '%') - { + while (*fmt) { + if (*fmt == '%') { ljustf = 0; /* left justify flag */ dpoint = 0; /* found decimal point */ + sign = '\0'; /* sign char & status */ lval = 0; pad = ' '; /* justification padding char */ width = -1; /* min field width */ preci = -1; /* max data width */ radix = 10; /* number base */ - ptmp = tmp; /* pointer to area to print */ + p = buf; /* pointer to area to print */ fmtnxt: i = 0; - for(;;) - { + for (;;) { ++fmt; - if(*fmt < '0' || *fmt > '9' ) + if (*fmt < '0' || *fmt > '9') break; - i = (i * 10) + (*fmt - '0'); + i = i * 10 + *fmt - '0'; if (dpoint) preci = i; - else if (!i && (pad == ' ')) - { + else if (!i && pad == ' ') { pad = '0'; goto fmtnxt; } @@ -161,8 +176,7 @@ vfprintf(FILE *op, const char *fmt, va_list ap) width = i; } - switch (*fmt) - { + switch (*fmt) { case '-': /* left justification */ ljustf = 1; goto fmtnxt; @@ -176,43 +190,56 @@ vfprintf(FILE *op, const char *fmt, va_list ap) case 'h': /* short data */ goto fmtnxt; - case 'd': /* Signed decimal */ - ptmp = ltostr((long) ((lval) ? va_arg(ap, long) : va_arg(ap, int)), 10); - goto printit; - case 'o': /* Unsigned octal */ radix = 8; goto usproc; case 'x': /* Unsigned hexadecimal */ radix = 16; - /* fall thru */ + goto usproc; + + case 'd': /* Signed decimal */ + v = lval? va_arg(ap, long) : (long)va_arg(ap, int); + if ((long)v < 0) { + v = -(long)v; + sign = '-'; + } + goto convert; case 'u': /* Unsigned decimal */ usproc: - ptmp = ultostr((unsigned long) ((lval) - ? va_arg(ap, unsigned long) - : va_arg(ap, unsigned int)), radix); + v = lval? va_arg(ap, unsigned long) : (unsigned long)va_arg(ap, unsigned int); + convert: + p = buf + sizeof(buf) - 1; + *p = '\0'; + do { + c = radix; + v = __divmod(v, &c); /* remainder returned in c */ + if (c > 9) + *--p = 'A' - 10 + c; + else + *--p = '0' + c; + } while (v != 0); goto printit; case 'c': /* Character */ - ptmp[0] = va_arg(ap, int); - ptmp[1] = '\0'; + p[0] = va_arg(ap, int); + p[1] = '\0'; goto nopad; case 's': /* String */ - ptmp = va_arg(ap, char*); + p = va_arg(ap, char *); nopad: + sign = '\0'; + pad = ' '; printit: - cnt += __fmt(op, (unsigned char *)ptmp, ljustf, width, preci, pad); + cnt += __fmt(op, (unsigned char *)p, ljustf, width, preci, pad, sign); break; default: /* unknown character */ goto charout; } - } - else - { + } else { charout: __fputc(*fmt, op); /* normal char out */ ++cnt; diff --git a/libc/asm/Makefile b/libc/asm/Makefile index 2e593290a..ee4ce6d81 100644 --- a/libc/asm/Makefile +++ b/libc/asm/Makefile @@ -10,6 +10,7 @@ SRCS = \ memset-s.S \ strcpy-s.S \ strlen-s.S \ + divmod.S \ # end of list LEFTOUT = \ diff --git a/libc/asm/divmod.S b/libc/asm/divmod.S new file mode 100644 index 000000000..578ec9028 --- /dev/null +++ b/libc/asm/divmod.S @@ -0,0 +1,47 @@ +// Fast 32-bit combined divide and modulo routine +// +// unsigned long __divmod(unsigned long val, unsigned int *baserem) +// Unsigned divide 32-bits by 16-bits +// Store denominator in *baserem before calling +// Returns 32-bit quotient in DX:AX and remainder in *baserem +// +// Designed for a fast replacement of the following code which calls __udivsi3/__umodsi3: +// unsigned int rem, base; +// rem = val % base; +// val = val / base; +// New code: +// rem = base; +// val = __divmod(val, &rem); +// +// inspired by OpenWatcom ltoa.c __uldiv routine +// 13 Sep 2024 Greg Haerr + +#define NUMLO 2 +#define NUMHI 4 +#define ADDR 6 + + .arch i8086, nojumps + .code16 + .text + + .global __divmod +__divmod: + mov %sp,%bx + mov NUMLO(%bx),%ax + mov NUMHI(%bx),%dx + mov ADDR(%bx),%bx + +// divides DX:AX / [BX] +// returns DX:AX with remainder in [BX] + + xor %cx,%cx // temp CX = 0 + cmp (%bx),%dx // is upper 16 bits numerator less than denominator + jb 1f // yes - only one DIV needed + xchg %dx,%ax // AX = upper numerator, DX = lower numerator + xchg %dx,%cx // DX = 0, CX = lower numerator + divw (%bx) // AX = upper numerator / base, DX = remainder + xchg %cx,%ax // AX = lower numerator, CX = high quotient +1: divw (%bx) // AX = lower numerator / base, DX = remainder + mov %dx,(%bx) // store remainder + mov %cx,%dx // DX = high quotient, AX = low quotient + ret diff --git a/libc/stdio/vfprintf.c b/libc/stdio/vfprintf.c index d3f2589e8..96c46db7a 100644 --- a/libc/stdio/vfprintf.c +++ b/libc/stdio/vfprintf.c @@ -25,7 +25,7 @@ #include #include #include -#include +#include #ifndef __HAS_NO_FLOATS__ #include @@ -41,8 +41,8 @@ * the number of characters output. */ static int -__fmt(FILE *op, unsigned char *buf, int ljustf, int width, int preci, char pad, - char sign, int buffer_mode) +__fmt(FILE *op, unsigned char *buf, int ljustf, int width, int preci, char pad, char sign, + int buffer_mode) { int cnt = 0, len; unsigned char ch; @@ -54,14 +54,14 @@ __fmt(FILE *op, unsigned char *buf, int ljustf, int width, int preci, char pad, else if (sign) len++; - if ((preci != -1) && (len > preci)) /* limit max data width */ + if (preci != -1 && len > preci) /* limit max data width */ len = preci; if (width < len) /* flexible field width or width overflow */ width = len; /* - * at this point: width = total field width len = actual data width + * at this point: width = total field width, len = actual data width * (including possible sign character) */ cnt = width; @@ -105,21 +105,20 @@ vfprintf(FILE *op, const char *fmt, va_list ap) { int i, cnt = 0, ljustf, lval; int preci, width, radix; + unsigned int c; char pad, dpoint; char sign, hash; - unsigned long l; + unsigned long v; int buffer_mode; - char *ptmp; - char tmp[64]; + char *p; + char buf[64]; /* This speeds things up a bit for unbuffered */ - buffer_mode = (op->mode&__MODE_BUF); - op->mode &= (~__MODE_BUF); + buffer_mode = op->mode & __MODE_BUF; + op->mode &= ~__MODE_BUF; - while (*fmt) - { - if (*fmt == '%') - { + while (*fmt) { + if (*fmt == '%') { if( buffer_mode == _IONBF ) fflush(op); ljustf = 0; /* left justify flag */ hash = 0; @@ -129,20 +128,18 @@ vfprintf(FILE *op, const char *fmt, va_list ap) width = -1; /* min field width */ preci = -1; /* max data width */ radix = 10; /* number base */ - ptmp = tmp; /* pointer to area to print */ - lval = (sizeof(int)==sizeof(long)); /* long value flag */ + p = buf; /* pointer to area to print */ + lval = sizeof(int) == sizeof(long); fmtnxt: i = 0; - for(;;) - { + for (;;) { ++fmt; - if(*fmt < '0' || *fmt > '9' ) + if (*fmt < '0' || *fmt > '9') break; - i = (i * 10) + (*fmt - '0'); + i = i * 10 + *fmt - '0'; if (dpoint) preci = i; - else if (!i && (pad == ' ')) - { + else if (!i && pad == ' ') { pad = '0'; goto fmtnxt; } @@ -150,12 +147,7 @@ vfprintf(FILE *op, const char *fmt, va_list ap) width = i; } - switch (*fmt) - { - case '\0': /* early EOS */ - --fmt; - goto charout; - + switch (*fmt) { case '-': /* left justification */ ljustf = 1; goto fmtnxt; @@ -189,11 +181,6 @@ vfprintf(FILE *op, const char *fmt, va_list ap) lval = 0; goto fmtnxt; - case 'd': /* Signed decimal */ - case 'i': - ptmp = ltostr((long) ((lval) ? va_arg(ap, long) : va_arg(ap, int)), 10); - goto printit; - case 'b': /* Unsigned binary */ radix = 2; goto usproc; @@ -203,7 +190,7 @@ vfprintf(FILE *op, const char *fmt, va_list ap) goto usproc; case 'p': /* Pointer */ - lval = (sizeof(char*) == sizeof(long)); + lval = sizeof(char*) == sizeof(long); pad = '0'; width = 4; preci = 8; @@ -212,23 +199,49 @@ vfprintf(FILE *op, const char *fmt, va_list ap) case 'x': /* Unsigned hexadecimal */ case 'X': radix = 16; - /* fall thru */ + goto usproc; + + case 'd': /* Signed decimal */ + case 'i': + v = lval? va_arg(ap, long) : (long)va_arg(ap, int); + if ((long)v < 0) { + v = -(long)v; + sign = '-'; + } + goto convert; case 'u': /* Unsigned decimal */ case 'k': /* Pticks */ usproc: - l = lval? va_arg(ap, unsigned long) : (unsigned long)va_arg(ap, unsigned int); + v = lval? va_arg(ap, unsigned long) : (unsigned long)va_arg(ap, unsigned int); if (*fmt == 'k') { if (_weakaddr(ptostr)) { - (_weakfn(ptostr))(l, ptmp); + (_weakfn(ptostr))(v, p); preci = -1; goto printit; } /* if precision timing not linked in, display as unsigned */ } - ptmp = ultostr(l, radix); - if( hash && radix == 8 ) { - width = strlen(ptmp)+1; + + convert: + p = buf + sizeof(buf) - 1; + *p = '\0'; + do { +#if 1 + c = radix; + v = __divmod(v, &c); /* remainder returned in c */ +#else + c = v % radix; + v = v / radix; +#endif + if (c > 9) + *--p = ((*fmt == 'X')? 'A': 'a') - 10 + c; + else + *--p = '0' + c; + } while (v != 0); + + if (hash && radix == 8) { + width = strlen(p)+1; pad = '0'; } goto printit; @@ -238,19 +251,19 @@ vfprintf(FILE *op, const char *fmt, va_list ap) goto fmtnxt; case 'c': /* Character */ - ptmp[0] = va_arg(ap, int); - ptmp[1] = '\0'; + p[0] = va_arg(ap, int); + p[1] = '\0'; goto nopad; case 's': /* String */ - ptmp = va_arg(ap, char*); - if (!ptmp) ptmp = "(null)"; + p = va_arg(ap, char *); + if (!p) p = "(null)"; nopad: sign = '\0'; pad = ' '; printit: - cnt += __fmt(op, (unsigned char *)ptmp, ljustf, width, preci, pad, - sign, buffer_mode); + cnt += __fmt(op, (unsigned char *)p, ljustf, width, preci, pad, sign, + buffer_mode); break; #ifndef __HAS_NO_FLOATS__ @@ -260,19 +273,20 @@ vfprintf(FILE *op, const char *fmt, va_list ap) case 'E': case 'G': if (_weakaddr(dtostr)) { - (_weakfn(dtostr))(va_arg(ap, double), *fmt, preci, ptmp); + (_weakfn(dtostr))(va_arg(ap, double), *fmt, preci, p); preci = -1; goto printit; } - /* FALLTHROUGH if no floating printf available */ + /* fall thru if dotostr not linked in */ #endif + case '\0': /* early EOS */ + --fmt; + /* fall thru */ default: /* unknown character */ goto charout; } - } - else - { + } else { charout: putc(*fmt, op); /* normal char out */ ++cnt; From 4aa351cb617f57092a0f17df6a25081586ea7803 Mon Sep 17 00:00:00 2001 From: Greg Haerr Date: Fri, 13 Sep 2024 22:39:14 -0600 Subject: [PATCH 2/4] Fix Watcom build --- libc/stdio/vfprintf.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/libc/stdio/vfprintf.c b/libc/stdio/vfprintf.c index 96c46db7a..e1f4d0035 100644 --- a/libc/stdio/vfprintf.c +++ b/libc/stdio/vfprintf.c @@ -227,12 +227,12 @@ vfprintf(FILE *op, const char *fmt, va_list ap) p = buf + sizeof(buf) - 1; *p = '\0'; do { -#if 1 - c = radix; - v = __divmod(v, &c); /* remainder returned in c */ -#else +#if __WATCOMC__ c = v % radix; v = v / radix; +#else + c = radix; + v = __divmod(v, &c); /* remainder returned in c */ #endif if (c > 9) *--p = ((*fmt == 'X')? 'A': 'a') - 10 + c; From 26266a697cd9843529a5e56431d52238c6f0a5b0 Mon Sep 17 00:00:00 2001 From: Greg Haerr Date: Fri, 13 Sep 2024 23:14:58 -0600 Subject: [PATCH 3/4] Add medium model support to divmod.S --- elkscmd/lib/tiny_vfprintf.c | 2 +- libc/asm/divmod.S | 10 ++++++---- libc/stdio/vfprintf.c | 6 +++--- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/elkscmd/lib/tiny_vfprintf.c b/elkscmd/lib/tiny_vfprintf.c index bc4a27b0a..30d998811 100644 --- a/elkscmd/lib/tiny_vfprintf.c +++ b/elkscmd/lib/tiny_vfprintf.c @@ -108,7 +108,7 @@ __fmt(FILE *op, unsigned char *buf, int ljustf, int width, int preci, char pad, { if (!ljustf && width) /* left padding */ { - if (len && sign && (pad == '0')) + if (len && sign && pad == '0') goto showsign; ch = pad; --width; diff --git a/libc/asm/divmod.S b/libc/asm/divmod.S index 578ec9028..2695c0e7e 100644 --- a/libc/asm/divmod.S +++ b/libc/asm/divmod.S @@ -16,9 +16,11 @@ // inspired by OpenWatcom ltoa.c __uldiv routine // 13 Sep 2024 Greg Haerr -#define NUMLO 2 -#define NUMHI 4 -#define ADDR 6 +#include + +#define NUMLO 2+FAR_ADJ_ +#define NUMHI 4+FAR_ADJ_ +#define ADDR 6+FAR_ADJ_ .arch i8086, nojumps .code16 @@ -44,4 +46,4 @@ __divmod: 1: divw (%bx) // AX = lower numerator / base, DX = remainder mov %dx,(%bx) // store remainder mov %cx,%dx // DX = high quotient, AX = low quotient - ret + RET_(6) diff --git a/libc/stdio/vfprintf.c b/libc/stdio/vfprintf.c index e1f4d0035..bffc57cf8 100644 --- a/libc/stdio/vfprintf.c +++ b/libc/stdio/vfprintf.c @@ -71,7 +71,7 @@ __fmt(FILE *op, unsigned char *buf, int ljustf, int width, int preci, char pad, { if (!ljustf && width) /* left padding */ { - if (len && sign && (pad == '0')) + if (len && sign && pad == '0') goto showsign; ch = pad; --width; @@ -190,7 +190,7 @@ vfprintf(FILE *op, const char *fmt, va_list ap) goto usproc; case 'p': /* Pointer */ - lval = sizeof(char*) == sizeof(long); + lval = sizeof(char *) == sizeof(long); pad = '0'; width = 4; preci = 8; @@ -227,7 +227,7 @@ vfprintf(FILE *op, const char *fmt, va_list ap) p = buf + sizeof(buf) - 1; *p = '\0'; do { -#if __WATCOMC__ +#ifdef __WATCOMC__ c = v % radix; v = v / radix; #else From 5cb1f2cbd6e4cdb1a7380df14ccf724adb21180d Mon Sep 17 00:00:00 2001 From: Greg Haerr Date: Fri, 13 Sep 2024 23:23:15 -0600 Subject: [PATCH 4/4] Add regparmcall support to divmod.S --- libc/asm/divmod.S | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/libc/asm/divmod.S b/libc/asm/divmod.S index 2695c0e7e..017d2da09 100644 --- a/libc/asm/divmod.S +++ b/libc/asm/divmod.S @@ -28,10 +28,14 @@ .global __divmod __divmod: +#ifndef __IA16_CALLCVT_REGPARMCALL mov %sp,%bx mov NUMLO(%bx),%ax mov NUMHI(%bx),%dx mov ADDR(%bx),%bx +#else + mov %cx,%bx // AX:DX = val, BX = &rem +#endif // divides DX:AX / [BX] // returns DX:AX with remainder in [BX]