在windows上,常用的函数调用方式有:
Pascal方式,WINAPI(_stdcall)方式 和C方式(_cdecl)
_cdecl调用规则:
1,参数从右到左入堆栈
2,在函数返回后,调用者要负责清除堆栈
所以这种调用常会生成较大的可执行文件。
_stdcall又称为WINAPI调用方式,规则:
1,参数从右向左入堆栈
2,被调用的函数在返回前自行清理堆栈
所以这种调用会生成比cdecl小的代码
Pascal调用方式,主要用在WIN16函数库中,现在基本不用
规则:
1,参数从左向右入堆栈
2,被调用函数在返回前自行清理堆栈
此外,在Windows内核中还常见的有快速调用方式(_fastcall)
在C++编译的代码中有this call方式(_thiscall)
在windows中,不管哪种方式,返回值都写在eax中,外部从中获取返回值
_cdecl方式步骤
1,保存ebp
2,保存esp到ebp
3,在堆栈中腾出一个区域来保存局部变量
4,保存ebx,esi,edi到堆栈中,函数调用完后返回
5,把局部变量区域初始化为0xcccccccch,实际上是int 3指令机器码,这是一个断点软中断
6,做函数里应该做的事情
7,恢复ebx,esi,edi,esp,ebp,最后返回
2: int func(int a,int b)3: {00401010 push ebp00401011 mov ebp,esp00401013 sub esp,44h00401016 push ebx00401017 push esi00401018 push edi00401019 lea edi,[ebp-44h]0040101C mov ecx,11h00401021 mov eax,0CCCCCCCCh00401026 rep stos dword ptr [edi]4: int c = a + b;00401028 mov eax,dword ptr [ebp+8]0040102B add eax,dword ptr [ebp+0Ch]0040102E mov dword ptr [ebp-4],eax5: return c;00401031 mov eax,dword ptr [ebp-4]6: }00401034 pop edi00401035 pop esi00401036 pop ebx00401037 mov esp,ebp00401039 pop ebp0040103A ret
for循环的汇编代码分析:
6: int i;7: for(i = 0 ;i < 50 ; i ++)0040B501 mov dword ptr [ebp-8],00040B508 jmp func+33h (0040b513)0040B50A mov ecx,dword ptr [ebp-8]0040B50D add ecx,10040B510 mov dword ptr [ebp-8],ecx0040B513 cmp dword ptr [ebp-8],32h0040B517 jge func+44h (0040b524)8: c = c + i;0040B519 mov edx,dword ptr [ebp-4]0040B51C add edx,dword ptr [ebp-8]0040B51F mov dword ptr [ebp-4],edx0040B522 jmp func+2Ah (0040b50a)9:10: return c;0040B524 mov eax,dword ptr [ebp-4]11: }
从上面的汇编代码可以分析出,for循环就是cmp指令+jmp指令
根据cmp判断然后跳转到那个位置执行代码
do...while循环分析
5:6: int i = 0;0040B501 mov dword ptr [ebp-8],07:8: do {9: c = c +i;0040B508 mov ecx,dword ptr [ebp-4]0040B50B add ecx,dword ptr [ebp-8]0040B50E mov dword ptr [ebp-4],ecx10: }while(c < 50);0040B511 cmp dword ptr [ebp-4],32h0040B515 jl func+28h (0040b508)11:12: return c;0040B517 mov eax,dword ptr [ebp-4]13: }0040B51A pop edi0040B51B pop esi0040B51C pop ebx0040B51D mov esp,ebp0040B51F pop ebp0040B520 ret
从上面代码可以看出
本质do...while循环和for差不多
while循环:
6: int i = 0;0040B501 mov dword ptr [ebp-8],07:8: while(i < 50)0040B508 cmp dword ptr [ebp-8],32h0040B50C jge func+39h (0040b519)9: {10: c = c +i;0040B50E mov ecx,dword ptr [ebp-4]0040B511 add ecx,dword ptr [ebp-8]0040B514 mov dword ptr [ebp-4],ecx11: };0040B517 jmp func+28h (0040b508)12:13: return c;0040B519 mov eax,dword ptr [ebp-4]14: }0040B51C pop edi0040B51D pop esi0040B51E pop ebx0040B51F mov esp,ebp0040B521 pop ebp0040B522 ret
if...else if...else语句分析
:6: int i = 0;0040B501 mov dword ptr [ebp-8],07:8: if(c>0 && c < 10)0040B508 cmp dword ptr [ebp-4],00040B50C jle func+43h (0040b523)0040B50E cmp dword ptr [ebp-4],0Ah0040B512 jge func+43h (0040b523)9: {10: printf("c > 0");0040B514 push offset string "c > 0" (0041ff5c)0040B519 call printf (0040b780)0040B51E add esp,411: }12: else if(c>10 && c<00)0040B521 jmp func+6Bh (0040b54b)0040B523 cmp dword ptr [ebp-4],0Ah0040B527 jle func+5Eh (0040b53e)0040B529 cmp dword ptr [ebp-4],00040B52D jge func+5Eh (0040b53e)13: {14: printf("c>10 && c<100");0040B52F push offset string "c>10 && c<100" (0041ff4c)0040B534 call printf (0040b780)0040B539 add esp,415: }16: else0040B53C jmp func+6Bh (0040b54b)17: {18: printf("c>10 && c < 100");0040B53E push offset string "c>10 && c < 100" (0041ff3c)0040B543 call printf (0040b780)0040B548 add esp,419: }20:21: return c;0040B54B mov eax,dword ptr [ebp-4]22: }0040B54E pop edi0040B54F pop esi0040B550 pop ebx0040B551 add esp,48h0040B554 cmp ebp,esp0040B556 call __chkesp (0040b4a0)0040B55B mov esp,ebp0040B55D pop ebp0040B55E ret
switch...case 代码分析
4: int c = a + b;0040B4F8 mov eax,dword ptr [ebp+8]0040B4FB add eax,dword ptr [ebp+0Ch]0040B4FE mov dword ptr [ebp-4],eax5:6: switch(c)7: {0040B501 mov ecx,dword ptr [ebp-4]0040B504 mov dword ptr [ebp-8],ecx0040B507 cmp dword ptr [ebp-8],00040B50B je func+35h (0040b515)0040B50D cmp dword ptr [ebp-8],10040B511 je func+42h (0040b522)0040B513 jmp func+51h (0040b531)8: case 0:9: printf("c>0");0040B515 push offset string "c>0" (0041ff4c)0040B51A call printf (0040b780)0040B51F add esp,410: case 1:11: printf("c>10 && c<100");0040B522 push offset string "c>10 && c<100" (0041ff3c)0040B527 call printf (0040b780)0040B52C add esp,412: break;0040B52F jmp func+5Eh (0040b53e)13: default:14: printf("c>10 && c<100");0040B531 push offset string "c>10 && c<100" (0041ff3c)0040B536 call printf (0040b780)0040B53B add esp,415: }16:17: return c;0040B53E mov eax,dword ptr [ebp-4]18: }0040B541 pop edi0040B542 pop esi0040B543 pop ebx0040B544 add esp,48h0040B547 cmp ebp,esp0040B549 call __chkesp (0040b4a0)0040B54E mov esp,ebp0040B550 pop ebp0040B551 ret
结构体分析
1:2: typedef struct {3: int a;4: int b;5: int c;6: }mystruct;7:8: int func(int a,int b)9: {0040B800 push ebp0040B801 mov ebp,esp0040B803 sub esp,1D8h0040B809 push ebx0040B80A push esi0040B80B push edi0040B80C lea edi,[ebp-1D8h]0040B812 mov ecx,76h0040B817 mov eax,0CCCCCCCCh0040B81C rep stos dword ptr [edi]10:11: unsigned char *buf[100];12: mystruct *strs = (mystruct *)buf;0040B81E lea eax,[ebp-190h]0040B824 mov dword ptr [ebp-194h],eax13: int i;14: for(i=0; i<5; i++)0040B82A mov dword ptr [ebp-198h],00040B834 jmp func+45h (0040b845)0040B836 mov ecx,dword ptr [ebp-198h]0040B83C add ecx,10040B83F mov dword ptr [ebp-198h],ecx0040B845 cmp dword ptr [ebp-198h],50040B84C jge func+94h (0040b894)15: {16: strs[i].a=0;0040B84E mov edx,dword ptr [ebp-198h]0040B854 imul edx,edx,0Ch0040B857 mov eax,dword ptr [ebp-194h]0040B85D mov dword ptr [eax+edx],017: strs[i].b=1;0040B864 mov ecx,dword ptr [ebp-198h]0040B86A imul ecx,ecx,0Ch0040B86D mov edx,dword ptr [ebp-194h]0040B873 mov dword ptr [edx+ecx+4],118: strs[i].c=2;0040B87B mov eax,dword ptr [ebp-198h]0040B881 imul eax,eax,0Ch0040B884 mov ecx,dword ptr [ebp-194h]0040B88A mov dword ptr [ecx+eax+8],219: }0040B892 jmp func+36h (0040b836)20:21: return 0;0040B894 xor eax,eax22: }0040B896 pop edi0040B897 pop esi0040B898 pop ebx0040B899 mov esp,ebp0040B89B pop ebp0040B89C ret
从上面不难看出,结构体赋值是先经过计算,然后把基址存放的一个变量
然后计算每个结构体的偏移量,然后对每个struct进行定数累加赋值
枚举,联合,结构结合分析:
1: typedef enum {2: ENUM_1 = 1,3: ENUM_2 = 2,4: ENUM_3,5: ENUM_46: }myenum;7:8: typedef struct {9: int a;10: int b;11: int c;12: }mystruct;13:14: typedef union {15: mystruct s;16: myenum e[3];17: }myunion;18:19: int func(int a,int b)20: {00401020 push ebp00401021 mov ebp,esp00401023 sub esp,0ACh00401029 push ebx0040102A push esi0040102B push edi0040102C lea edi,[ebp-0ACh]00401032 mov ecx,2Bh00401037 mov eax,0CCCCCCCCh0040103C rep stos dword ptr [edi]21: unsigned char buf[100] = {0};0040103E mov byte ptr [ebp-64h],000401042 mov ecx,18h00401047 xor eax,eax00401049 lea edi,[ebp-63h]0040104C rep stos dword ptr [edi]0040104E stos word ptr [edi]00401050 stos byte ptr [edi]22: myunion *uns = (myunion *)buf;00401051 lea eax,[ebp-64h]00401054 mov dword ptr [ebp-68h],eax23:24: int i;25:26: for(i = 0; i < 5; i++)00401057 mov dword ptr [ebp-6Ch],00040105E jmp func+49h (00401069)00401060 mov ecx,dword ptr [ebp-6Ch]00401063 add ecx,100401066 mov dword ptr [ebp-6Ch],ecx00401069 cmp dword ptr [ebp-6Ch],50040106D jge func+83h (004010a3)27: {28: uns[i].s.a=0;0040106F mov edx,dword ptr [ebp-6Ch]00401072 imul edx,edx,0Ch00401075 mov eax,dword ptr [ebp-68h]00401078 mov dword ptr [eax+edx],029: uns[i].s.b = 1;0040107F mov ecx,dword ptr [ebp-6Ch]00401082 imul ecx,ecx,0Ch00401085 mov edx,dword ptr [ebp-68h]00401088 mov dword ptr [edx+ecx+4],130: uns[i].e[2] = ENUM_4;00401090 mov eax,dword ptr [ebp-6Ch]00401093 imul eax,eax,0Ch00401096 mov ecx,dword ptr [ebp-68h]00401099 mov dword ptr [ecx+eax+8],431: }004010A1 jmp func+40h (00401060)32:33: return 0;004010A3 xor eax,eax34: }004010A5 pop edi004010A6 pop esi004010A7 pop ebx004010A8 mov esp,ebp004010AA pop ebp004010AB ret
我们发现这段代码和上面的汇编后代码基本一样,因此我们知道,汇编中对共用体和枚举类型没有特别的处理
并不会引入新的代码,因为共用体和枚举都是方便给程序员用的,本质没什么改变
其实上面这些控制语句,对反汇编来说很容易分析,逆向工程中最令人蛋疼的是算法
一个3*3矩阵算法的逆向分析
main函数
int main()13: {0040B640 push ebp0040B641 mov ebp,esp0040B643 sub esp,0ACh0040B649 push ebx0040B64A push esi0040B64B push edi0040B64C lea edi,[ebp-0ACh]0040B652 mov ecx,2Bh0040B657 mov eax,0CCCCCCCCh0040B65C rep stos dword ptr [edi]14: int a[3][3] = { {1,2,3},{2,3,4},{3,4,5}};0040B65E mov dword ptr [ebp-24h],10040B665 mov dword ptr [ebp-20h],20040B66C mov dword ptr [ebp-1Ch],30040B673 mov dword ptr [ebp-18h],20040B67A mov dword ptr [ebp-14h],30040B681 mov dword ptr [ebp-10h],40040B688 mov dword ptr [ebp-0Ch],30040B68F mov dword ptr [ebp-8],40040B696 mov dword ptr [ebp-4],515: int b[3][3] = { {2,3,4},{2,4,1},{6,2,1}};0040B69D mov dword ptr [ebp-48h],20040B6A4 mov dword ptr [ebp-44h],30040B6AB mov dword ptr [ebp-40h],40040B6B2 mov dword ptr [ebp-3Ch],20040B6B9 mov dword ptr [ebp-38h],40040B6C0 mov dword ptr [ebp-34h],10040B6C7 mov dword ptr [ebp-30h],60040B6CE mov dword ptr [ebp-2Ch],20040B6D5 mov dword ptr [ebp-28h],116: int c[3][3];17:18: func(a,b,c);0040B6DC lea eax,[ebp-6Ch]0040B6DF push eax0040B6E0 lea ecx,[ebp-48h]0040B6E3 push ecx0040B6E4 lea edx,[ebp-24h]0040B6E7 push edx0040B6E8 call @ILT+5(_func) (0040100a)0040B6ED add esp,0Ch19:20: return 0;0040B6F0 xor eax,eax21: }0040B6F2 pop edi0040B6F3 pop esi0040B6F4 pop ebx0040B6F5 add esp,0ACh0040B6FB cmp ebp,esp0040B6FD call __chkesp (00401130)0040B702 mov esp,ebp0040B704 pop ebp0040B705 ret
算法函数:
1: int func(int a[3][3],int b[3][3],int c[3][3])2: {0040B580 push ebp0040B581 mov ebp,esp0040B583 sub esp,48h0040B586 push ebx0040B587 push esi0040B588 push edi0040B589 lea edi,[ebp-48h]0040B58C mov ecx,12h0040B591 mov eax,0CCCCCCCCh0040B596 rep stos dword ptr [edi]3: int i,j;4: for(i = 0 ; i < 3; i++)0040B598 mov dword ptr [ebp-4],00040B59F jmp func+2Ah (0040b5aa)0040B5A1 mov eax,dword ptr [ebp-4]0040B5A4 add eax,10040B5A7 mov dword ptr [ebp-4],eax0040B5AA cmp dword ptr [ebp-4],30040B5AE jge func+0AAh (0040b62a)5: {6: for(j = 0 ; j < 3; j ++)0040B5B0 mov dword ptr [ebp-8],00040B5B7 jmp func+42h (0040b5c2)0040B5B9 mov ecx,dword ptr [ebp-8]0040B5BC add ecx,10040B5BF mov dword ptr [ebp-8],ecx0040B5C2 cmp dword ptr [ebp-8],30040B5C6 jge func+0A5h (0040b625)7: c[i][j] = a[i][0]*b[0][j]+a[i][1]*b[1][j]+a[i][2]*b[2][j];0040B5C8 mov edx,dword ptr [ebp-4]0040B5CB imul edx,edx,0Ch0040B5CE mov eax,dword ptr [ebp+8]0040B5D1 mov ecx,dword ptr [ebp-8]0040B5D4 mov esi,dword ptr [ebp+0Ch]0040B5D7 mov edx,dword ptr [eax+edx]0040B5DA imul edx,dword ptr [esi+ecx*4]0040B5DE mov eax,dword ptr [ebp-4]0040B5E1 imul eax,eax,0Ch0040B5E4 mov ecx,dword ptr [ebp+8]0040B5E7 mov esi,dword ptr [ebp-8]0040B5EA mov edi,dword ptr [ebp+0Ch]0040B5ED mov eax,dword ptr [ecx+eax+4]0040B5F1 imul eax,dword ptr [edi+esi*4+0Ch]0040B5F6 add edx,eax0040B5F8 mov ecx,dword ptr [ebp-4]0040B5FB imul ecx,ecx,0Ch0040B5FE mov eax,dword ptr [ebp+8]0040B601 mov esi,dword ptr [ebp-8]0040B604 mov edi,dword ptr [ebp+0Ch]0040B607 mov ecx,dword ptr [eax+ecx+8]0040B60B imul ecx,dword ptr [edi+esi*4+18h]0040B610 add edx,ecx0040B612 mov eax,dword ptr [ebp-4]0040B615 imul eax,eax,0Ch0040B618 mov ecx,dword ptr [ebp+10h]0040B61B add ecx,eax0040B61D mov eax,dword ptr [ebp-8]0040B620 mov dword ptr [ecx+eax*4],edx0040B623 jmp func+39h (0040b5b9)8: }0040B625 jmp func+21h (0040b5a1)9: return 0;0040B62A xor eax,eax10: }0040B62C pop edi0040B62D pop esi0040B62E pop ebx0040B62F mov esp,ebp0040B631 pop ebp0040B632 ret
从上面的代码我们可以看出,汇编对Debug模式的二位数组操作方式如下:
mov eax, <数组元素下表> imul eax,eax, <结构体的大小> mov ecx, <结构体开始地址> mov eax,dword ptr [ecx+eax]访问内部变量的时候,还要加上数字mov eax,dword ptr [ecx+eax+0CH] 结构体开始地址> 结构体的大小> 数组元素下表>