Your compiler's secret life
You know compilers go through stages right? Generally one of those stages is to convert the programming language (a la C/C++) to the assembly language for that architecture (e.g. x86 assembly). Then the assembler turns that into machine language that can be given to that type of processor. Here's how to use gcc such that it doesn't do the assembly step and prints the assembly on the terminal
gcc -S -o - a.c
Read on for a little dissection and/or go play with it and read the man page for gcc.
Here is the C file I'm using, a.c. It just loops through the command line arguments and prints them.
#include <stdio.h>
int main( int argc, char **argv ){
int i;
printf("argc: %d\n",argc);
for( i=0; i < argc; i++ ){
printf("argv[%d]: %s\n", i, argv[i]);
}
}
So, again. Here is the command to get the assembly on the terminal.
gcc -S -o - a.c
Let's break that down into pieces.
- gcc is the gnu c compiler
- -S means to stop before the assembler stage of compilation
- -o specifies the output file. It says the next argument will be the filename to output to. Normall the compiler would send the output of the compiler to a.s. But we give the '-' character to the compiler to indicate that it is to dump the output to STDOUT. This let's us catch the output with with '| less' or '> file'.
- a.c is the name of the source file that we are compiling.
Linux x86 on a PII
.file "a.c"
.version "01.01"
gcc2_compiled.:
.section .rodata
.LC0:
.string "argc: %d\n"
.LC1:
.string "argv[%d]: %s\n"
.text
.align 4
.globl main
.type main,@function
main:
pushl %ebp
movl %esp,%ebp
subl $24,%esp
addl $-8,%esp
movl 8(%ebp),%eax
pushl %eax
pushl $.LC0
call printf
addl $16,%esp
movl $0,-4(%ebp)
.p2align 4,,7
.L3:
movl -4(%ebp),%eax
cmpl 8(%ebp),%eax
jl .L6
jmp .L4
.p2align 4,,7
.L6:
addl $-4,%esp
movl -4(%ebp),%eax
leal 0(,%eax,4),%edx
movl 12(%ebp),%eax
movl (%eax,%edx),%edx
pushl %edx
movl -4(%ebp),%eax
pushl %eax
pushl $.LC1
call printf
addl $16,%esp
.L5:
incl -4(%ebp)
jmp .L3
.p2align 4,,7
.L4:
.L2:
leave
ret
.Lfe1:
.size main,.Lfe1-main
.ident "GCC: (GNU) 2.95.4 20011002 (Debian prerelease)"
Mac OS X PPC on a G4
.data
.cstring
.align 2
LC0:
.ascii "argc: %d\12\0"
.align 2
LC1:
.ascii "argv[%d]: %s\0"
.text
.align 2
.globl _main
_main:
mflr r0
stmw r30,-8(r1)
stw r0,8(r1)
stwu r1,-96(r1)
mr r30,r1
bcl 20,31,L1$pb
L1$pb:
mflr r31
stw r3,120(r30)
stw r4,124(r30)
addis r3,r31,ha16(LC0-L1$pb)
la r3,lo16(LC0-L1$pb)(r3)
lwz r4,120(r30)
bl L_printf$stub
li r0,0
stw r0,64(r30)
L6:
lwz r0,64(r30)
lwz r9,120(r30)
cmpw cr0,r0,r9
blt cr0,L9
b L7
L9:
lwz r0,64(r30)
slwi r9,r0,2
lwz r0,124(r30)
add r9,r9,r0
addis r3,r31,ha16(LC1-L1$pb)
la r3,lo16(LC1-L1$pb)(r3)
lwz r4,64(r30)
lwz r5,0(r9)
bl L_printf$stub
lwz r9,64(r30)
addi r0,r9,1
stw r0,64(r30)
b L6
L7:
mr r3,r0
lwz r1,0(r1)
lwz r0,8(r1)
mtlr r0
lmw r30,-8(r1)
blr
.data
.picsymbol_stub
L_printf$stub:
.indirect_symbol _printf
mflr r0
bcl 20,31,L0$_printf
L0$_printf:
mflr r11
addis r11,r11,ha16(L_printf$lazy_ptr-L0$_printf)
mtlr r0
lwz r12,lo16(L_printf$lazy_ptr-L0$_printf)(r11)
mtctr r12
addi r11,r11,lo16(L_printf$lazy_ptr-L0$_printf)
bctr
.data
.lazy_symbol_pointer
L_printf$lazy_ptr:
.indirect_symbol _printf
.long dyld_stub_binding_helper
This is a great tool as a starting place for learning some of the differences is the assemblies between different architectures. You could take the same "Hello world" program and compile it using gcc on a number of readily accessible architectures (x86, ppc).