;; factor.asm: Copyright (C) 1999 Brian Raiter ;; Licensed under the terms of the GNU General Public License, either ;; version 2 or (at your option) any later version. ;; ;; To build: ;; nasm -f bin -o factor factor.asm && chmod +x factor ;; ;; Usage: factor [NUMBER]... Print the prime factors of each NUMBER. ;; factor --help Display online help and exit. ;; factor --version Display version information and exit. ;; ;; Valid numbers are integers between 0 and 2^64 - 1 inclusive. If no ;; arguments are specified on the command line, numbers are read from ;; standard input. ;; ;; This is version 1.3 of factor.asm, written and released 2009. ;; Version 1.2 of factor.asm was written and released 2001. ;; Version 1.0 of factor.asm was written and released 1999. BITS 32 ;; Some magic numbers. %define stdout 1 %define stderr 2 %define bias 0x3FFF ; bias of the floating-point exponent ;; The program's data. This structure appears just past the file's ;; image, in zero-initialized memory. %define iobuf_size 80 STRUC data factor: resd 2 ; number being tested for factorhood getchar_rel: resd 1 ; address of getchar() function write_rel: resd 1 ; address of write() function exit_rel: resd 1 ; address of exit() function exitcode: resd 1 ; the program's exit code resd 2 ; (filler for 32-byte alignment) buf: resd 4 ; general floating-point buffer iobuf: resb iobuf_size ; buffer for text input and output ENDSTRUC ;; ;; The ELF executable file structures ;; ;; The following structures provide the minimum necessary data ;; required by the ELF standard in order to dynamically link with ;; libc and obtain pointers to the three standard functions that this ;; program needs to call. Wherever possible, the ELF structures have ;; been made to overlap with each other to reduce overall size. The ;; various magic numbers are identified in comments. ;; org 0x08048000 ;; A basic ELF header, indicating that the program segment header ;; has three entries, and that there is no section header table. db 0x7F, 'ELF' db 1 ; ELFCLASS32 db 1 ; ELFDATA2LSB db 1 ; EV_CURRENT db 0 ; ELFOSABI_NONE dd 0 dd 0 dw 2 ; ET_EXEC dw 3 ; EM_386 dd 1 ; EV_CURRENT dd _start dd phdrs - $$ dd 0 dd 0 dw 0x34 ; sizeof(Elf32_Ehdr) dw 0x20 ; sizeof(Elf32_Phdr) dw 3 dw 0 dw 0 dw 0 ;; The hash table. With only four symbols to hash, this structure is ;; purely a formality. The last two entries in the hash table, a ;; 1 and a 0, overlap with the next structure. hash: dd 1 dd 5 dd 3 dd 0, 2, 4 ;; The program header table contains three entries. The first one ;; indicates that the entire file is to be loaded into read-write-exec ;; memory. The second entry identifies the location of the _DYNAMIC ;; section, and the third entry gives the location of the interpreter ;; pathname. The last dword value of the table, a 1, overlaps with the ;; next structure. phdrs: dd 1 ; PT_LOAD dd 0 dd $$ dd $$ dd file_size dd mem_size dd 7 ; PF_R | PF_W | PF_X dd 0x1000 dd 2 ; PT_DYNAMIC dd dynamic - $$ dd dynamic dd dynamic dd dynamic_size dd dynamic_size dd 6 ; PF_R | PF_W dd 4 dd 3 ; PT_INTERP dd interp - $$ dd interp dd interp dd interp_size dd interp_size dd 4 ; PF_R ;; The _DYNAMIC section. Indicates the presence and location of the ;; dynamic symbol section (and associated string table and hash table) ;; and the relocation section. The final DT_NULL entry in the dynamic ;; section overlaps with the next structure. dynamic: dd 1, libc_name ; DT_NEEDED dd 4, hash ; DT_HASH dd 5, dynstr ; DT_STRTAB dd 6, dynsym ; DT_SYMTAB dd 10, dynstr_size ; DT_STRSZ dd 11, 0x10 ; DT_SYMENT dd 17, reltext ; DT_REL dd 18, reltext_size ; DT_RELSZ dd 19, 0x08 ; DT_RELENT dynamic_size equ $ - dynamic + 8 ;; The dynamic symbol table. Entries are included for the _DYNAMIC ;; section and the three functions imported from libc: getchar(), ;; write(), and exit(). dynsym: dd 0 dd 0 dd 0 dw 0 dw 0 dynamic_sym equ 1 dd dynamic_name dd dynamic dd 0 dw 0x11 ; STB_GLOBAL, STT_OBJECT dw 0xFFF1 ; SHN_ABS exit_sym equ 2 dd exit_name dd 0 dd 0 dw 0x12 ; STB_GLOBAL, STT_FUNC dw 0 getchar_sym equ 3 dd getchar_name dd 0 dd 0 dw 0x12 ; STB_GLOBAL, STT_FUNC dw 0 write_sym equ 4 dd write_name dd 0 dd 0 dw 0x12 ; STB_GLOBAL, STT_FUNC dw 0 ;; The relocation table. The addresses of the three functions imported ;; from libc are stored in the program's data area. Since they will be ;; used with indirect calls, the simple R_386_32 relocation is used. reltext: dd dataorg + write_rel db 1 ; R_386_32 db write_sym dw 0 dd dataorg + getchar_rel db 1 ; R_386_32 db getchar_sym dw 0 dd dataorg + exit_rel db 1 ; R_386_32 db exit_sym dw 0 reltext_size equ $ - reltext ;; The interpreter pathname. The final NUL byte appears in the next ;; section. interp: db '/lib/ld-linux.so.2' interp_size equ $ - interp + 1 ;; The string table for the dynamic symbol table. dynstr: db 0 libc_name equ $ - dynstr db 'libc.so.6', 0 dynamic_name equ $ - dynstr db '_DYNAMIC', 0 exit_name equ $ - dynstr db 'exit', 0 getchar_name equ $ - dynstr db 'getchar', 0 write_name equ $ - dynstr db 'write', 0 dynstr_size equ $ - dynstr ;; ;; The program proper ;; ;; A number that is to be factored can be as high as 2^64 - 1. ;; Therefore, it can have no more than 63 prime factors, and at most ;; one prime factor greater than 2^32 - 1. ;; ;; The program works by testing the number against all possible ;; divisors except multiples of 2, 3, and 5 (the actual numbers 2, 3, ;; and 5 are tested directly before entering the main loop) until: ;; ;; 1. a factor is found that divides the number evenly, or ;; 2. the integer portion of a quotient exceeds the divisor, or ;; 3. the divisor exceeds 2^32 - 1. ;; ;; In the first case, the program outputs the new factor, the division ;; currently being computed is discarded, and the integer quotient ;; becomes the new number to test against for future divisions. In the ;; other two cases, the program now knows that number currently being ;; tested is prime, and is itself output as the final factor. ;; ;; Although a number can have at most a dozen or so factors beyond 2, ;; 3, and 5 (and the vast majority of numbers will only have a few), ;; the program might have to execute over a billion divisions in order ;; to identify them. Therefore it is necessary to optimize for testing ;; unsuccessful candidate factors. ;; ;; The essential idea behind this program is to use the FPU for doing ;; the divisions, even though we are only interested in integer ;; values. This allows the ALU to be used in parallel while the ;; lengthy division instruction is running, ideally having the next ;; division queued up and ready to go just as the previous division is ;; completed. In order to do this, the results of each division need ;; to be evaluated without involving the FPU. Quotients are therefore ;; examined via bit-twiddling, in order to determine if a quotient is ;; an integer and/or if it exceeds the divisor. ;; ;; Only the full-sized 80-bit floating-point numbers has a full 64 ;; bits of mantissa, so the ALU must examine the quotient in this form ;; in order to avoid losing needed precision. (The fact that 64 bits ;; is always sufficient to tell if a quotient is integral when the ;; divisor has 32 or fewer bits is left as an exercise to the reader.) ;; ;; The factorconst subroutine, called by factorize, repeatedly divides ;; the number at the top of the floating-point stack by the integer ;; stored in buf as long as the number continues to divide evenly. For ;; each successful division, the number is also displayed. factorconst: ; num fild qword [ebx] ; fact num .loop: fld st1 ; num fact num fdiv st1 ; quot fact num fld st0 ; quot quot fact num frndint ; iquo quot fact num fcomp st1 ; quot fact num fnstsw ax sahf jnz short .return fstp st2 ; fact quot call ebp jmp short .loop .return: fcompp ret ;; factorize is the main subroutine of the program. It is called with ;; esi pointing to an NUL-terminated string representing the number to ;; factor. factorize finds and displays the number's prime factors. If ;; the number is invalid, an error message is output instead and ;; exitcode is set to 1. factorize: ;; The first step is to translate the string into a number. 10.0 and 0.0 ;; are pushed onto the floating-point stack. xor eax, eax fild word [byte ebx + ten - dataorg] ; 10.0 fldz ; num 10.0 ;; Each character in the string is checked; if it is not in the range ;; of '0' to '9' inclusive, the subroutine aborts. Otherwise, the top ;; of the stack is multiplied by ten and the value of the digit is ;; added to the product. The loop exits when a NUL byte is found. .atoiloop: lodsb or al, al jz short .atoiloopexit fmul st0, st1 ; 10n 10.0 sub al, '0' jc short .badinput cmp al, 10 jnc short .badinput mov [ebx], eax fiadd dword [ebx] ; num 10.0 jmp short .atoiloop ;; If the string is not a valid number, a stock error message is ;; displayed to the user and the subroutine returns directly. .badinput: mov byte [byte ebx + exitcode], 1 push byte errmsgbadnum_size lea eax, [byte ebx + errmsgbadnum - dataorg] push eax push byte stderr jmp short .writeandret ;; The number's exponent is examined, and if the number is 2^64 or ;; greater, it is treated as invalid. .atoiloopexit: fld st0 ; num num junk fstp tword [byte ebx + buf] ; num junk cmp word [byte ebx + buf + 8], bias + 64 jae short .badinput ;; The number, having been validated, is displayed on standard output. ;; If the number is zero, no factoring should be done and the ;; subroutine exits early. ; num junk xor edi, edi call ebp ftst fnstsw ax sahf jz short .earlyout fstp st1 ; num ;; The factorconst subroutine is called three times, with the factor ;; set to two, three, and five, respectively. mov edi, factorconst mov byte [ebx], 2 call edi inc byte [ebx] call edi mov byte [ebx], 5 call edi ;; If the number is now equal to one, the subroutine is finished and ;; exits immediately. fld1 ; 1.0 num fcom st1 fnstsw ax sahf jz short .earlyout ;; factor is initialized to 7, and edi is initialized with a sequence ;; of eight four-bit values that represent the cycle of differences ;; between subsequent integers not divisible by 2, 3, or 5. A division ;; by 7 is begun, and the main loop is entered. ; junk num mov byte [ebx], 7 fild qword [ebx] ; fact junk num fdivr st0, st2 ; quot junk num mov edi, 0x42424626 ;; The main loop of the factorize subroutine. The current factor to ;; evaluate (for which the division operation should just be finishing ;; up) is saved in esi. edi is advanced to the next step value, and ;; factor is incremented. If it overflows, then all possible 32-bit ;; factors have been exhausted. .mainloop: mov esi, [ebx] rol edi, 4 mov eax, edi and eax, byte 0x0F add [ebx], eax jc short .lastfactor ;; The quotient from the division by the current potential factor is ;; stored in the buffer, and division by the factor for the next ;; iteration is begun. ; quot junk num fst st1 ; quot quot num fstp tword [byte ebx + buf] ; quot num fild qword [ebx] ; fact quot num fdivr st0, st2 ; nquo quot num ;; The integer portion of the quotient is isolated and tested against ;; the divisor (i.e., the potential factor). If the quotient is ;; smaller, then the loop has passed the number's square root. mov edx, [byte ebx + buf + 4] mov ecx, bias + 31 sub ecx, [byte ebx + buf + 8] jc short .keepgoing mov eax, edx shr eax, cl cmp eax, esi jnc short .keepgoing ;; Here, the program has determined that the number being tested has ;; no more factors. The number is displayed, along with a newline, ;; the FPU stack is cleared, and the subroutine ends. .lastfactor: ; junk junk num fxch st2 ; num junk junk call ebp fstp st0 ; junk junk .earlyout: push byte 1 lea esi, [byte ebx + ten - dataorg] .writetostdout: push esi push byte stdout .writeandret: call [byte ebx + write_rel] add esp, byte 12 fcompp ret ;; Otherwise, the integer portion of the quotient is shifted out. If ;; any nonzero bits are left, then the quotient is not an integer, and ;; thus our candidate is not a factor. The program then proceeds to ;; the next iteration. .keepgoing: mov eax, [byte ebx + buf] neg ecx js short .shift32 xchg eax, edx xor eax, eax .shift32: shld edx, eax, cl shl eax, cl or eax, edx jnz short .mainloop ;; At this point, a new factor has been found. The number being ;; factored is therefore replaced with the quotient of the previous ;; division, and the result of the division in progress is junked. The ;; new factor is displayed, and the current factor is set back to esi ;; so that it can be tested again. ; junk num junk ffree st2 ; junk num mov [ebx], esi ror edi, 4 fild qword [ebx] ; fact junk num call ebp fdivr st0, st2 ; quot junk num jmp short .mainloop ;; itoa64 is the numeric output subroutine. When the subroutine is ;; called, the number to be displayed should be on the top of the ;; floating-point stack, and there should be no more than four other ;; numbers on the stack. A space is prefixed to the output, unless ;; edi is zero, in which case a colon is suffixed instead. itoa64: ;; 10 is placed on the FPU stack. esi is set to point to the end of ;; the buffer that will hold the decimal representation, with ecx ;; pointing just past the end. ; num + fld st0 ; rnum num + fild word [byte ebx + ten - dataorg] ; 10.0 rnum num + lea ecx, [byte ebx + iobuf + 32] lea esi, [byte ebx + iobuf + 31] ;; At each iteration, the number is reduced modulo 10. This remainder ;; is subtracted from the number (and stored in iobuf as an ASCII ;; digit). The difference is then divided by ten, and if the quotient ;; is zero the loop exits. Otherwise, the quotient replaces the number ;; for the next iteration. .loop: fld st1 ; rnum 10.0 rnum num + fprem ; rem 10.0 rnum num + fist word [ecx] fsubp st2 ; 10.0 10rn num + fcom st1 fnstsw ax fdiv st1, st0 ; 10.0 rnum num + mov al, [ecx] add al, '0' mov [esi], al dec esi sahf jbe short .loop ;; A space is added to the front and a colon is added to the end. The ;; value of edi will determine which one is included in the output. ;; write() is called, and the subroutine ends. mov byte [esi], ' ' mov byte [ecx], ':' sub ecx, esi or edi, edi jnz short .prefix inc esi .prefix: push ecx jmp short factorize.writetostdout ;; Here is the program's entry point. _start: ;; argc and argv[0] are removed from the stack. ebx is initialized to ;; point to the data, and ebp holds the address of the itoa64 ;; subroutine. These registers, along with esi and edi, are guaranteed ;; to be preserved across calls to functions in libc. pop eax pop esi mov ebx, dataorg mov ebp, itoa64 ;; If there are no further arguments, then the program proceeds to the ;; input loop. If argv[1] begins with a dash, then the program ;; branches to the option-parsing routine. Otherwise, the program ;; begins iterating through the command-line arguments. dec eax jle short .inputloop pop esi cmp byte [esi], '-' jz short .handleoption ;; The factorize subroutine is called once for each command-line ;; argument. When there are no more arguments, the program exits. ;; The exitcode will be zero if no errors occurred. .argloop: call factorize pop esi or esi, esi jnz short .argloop .mainexit: push dword [byte ebx + exitcode] call [byte ebx + exit_rel] ;; The input loop routine. esi is pointed to iobuf, and edi, the ;; input byte counter, is reset to zero. .inputloop: lea esi, [byte ebx + iobuf] xor edi, edi ;; The program reads one character at a time. If the character input ;; is a non-graphic character, then the program either breaks out of ;; the loop (if iobuf currently contains anything), ignores the ;; character (if iobuf is empty and the character is a space or ;; control character), or exits (if the character is EOF). Otherwise, ;; the character input is appended to iobuf and the loop continues. .incharloop: call [byte ebx + getchar_rel] cmp al, ' ' jg short .isgraphic or edi, edi jnz short .incharloopexit or eax, eax jns short .incharloop jmp short .mainexit .isgraphic: mov [esi + edi], al inc edi cmp edi, byte iobuf_size - 1 jnz short .incharloop .incharloopexit: ;; A NUL is appended to the string obtained from standard input, the ;; factorize subroutine is called, and the program loops. mov [esi + edi], bl call factorize jmp short .inputloop ;; The program comes here when an option is given on the command ;; line. The introductory dashes are skipped over, and the initial ;; letter of the option is retrieved. .handleoption: xor edx, edx .dashes: lodsb cmp al, '-' jz short .dashes ;; There are three possibilities: v(ersion), h(elp), or an invalid ;; option. Each one requires a different text to be displayed, with ;; the last one going to stderr instead of stdout. lea ecx, [byte edx + versionmsg_size] lea esi, [byte ebx + versionmsg - dataorg] cmp al, 'v' jz short .msgandexit mov cl, helpmsg_size sub esi, ecx cmp al, 'h' jz short .msgandexit mov cl, errmsgbadopt_size sub esi, ecx inc edx ;; esi points to the text to display, ecx holds the text's length, and ;; edx contains an exit code of 0 or 1. The text is passed to write(), ;; with edx deciding whether stdout or stderr is used, and the program ;; exits. .msgandexit: mov [byte ebx + exitcode], edx push ecx push esi inc edx push edx call [byte ebx + write_rel] jmp short .mainexit ;; ;; Message texts. ;; errmsgbadopt: db 'Invalid option; try --help.', 10 errmsgbadopt_size equ $ - errmsgbadopt helpmsg: db 'factor [--help | --version | NUMBER...]', 10 db 'Print the prime factors of each NUMBER.', 10 db 'Read from stdin if no arguments.', 10 helpmsg_size equ $ - helpmsg versionmsg: db 'factor 1.3', 10 db 'Copyright 2009 Brian Raiter. License GPLv2+.', 10 versionmsg_size equ $ - versionmsg errmsgbadnum: db 'Invalid number.' ten: db 10 errmsgbadnum_size equ $ - errmsgbadnum ;; ;; End of the file image. ;; file_size equ $ - $$ ;; The data structure's location is set at 1K from the origin. ebx ;; points to this throughout the program. dataorg equ $$ + 0x0400 mem_size equ dataorg + data_size - $$