函数链调用
在编程中有时会把多个函数串在一起依次调用,以达到特定的目的,在这里我们把这样的调用方式形象地称为函数链调用。函数链中的函数有些是独立的,有些则只用在函数组合中,不会单独调用。对象的连续配置和组合是函数链调用比较常用场合,去除语法糖, linq 也是一个函数链的运用。下面通过即时编译中的一个例子,来看看函数链的用法。
几种热门的语言 C# 、 java 、 js 都采用了即时编译的方式,即时编译需要有相应的汇编类库供调用,以便把 VM 代码转换成本地机器代码。
Apple的汇编类库是比较常用的,被firefox,webkit用于js的即时编译,下面的代码是Apple汇编类库的一般调用方式:
masm.push(ecx); masm.move(ebp, edx); masm.add32( 0x12 ,edx); masm.push(edx); masm.load32(MacroAssembler::Address(edx),edx); masm.push(edx); masm.load32(MacroAssembler::Address(edx),edx); masm.add32(edx,r); masm.call(MacroAssembler::Address(r)); masm.pop(ecx);
下面再看看chrome中v8的调用习惯:
#define __ masm()-> __ mov(ebx, Operand(esp, kSavedRegistersAreaSize)); __ Set(ecx, Immediate( 0 )); __ lea(edx, Operand(esp, kSavedRegistersAreaSize + 1 * kPointerSize)); __ sub(edx, Operand(ebp)); __ neg(edx); Label pop_loop; __ bind( & pop_loop); __ pop(Operand(edx, 0 )); #undef __
与前面的调用方式差别不大,通过宏代换使得汇编调用看的更直观,遵循了宏定义用过即取消定义的习惯。
从上面的代码可以看出,普通的函数调用方式,大部分的汇编码调用还是比较整洁,涉及到内存调用的部分显得有些不太直观,对于 mov [ebx + 2 * ecx + 0x1000] , eax 这样的语句写起来会有些复杂。下面我们试着看看有没有更直观的方式来表现。
在这里我们可以看到,函数与真实汇编之间存在一定程度的失配:汇编语言本身是描述的,具有较强的组合能力,而用单个函数去模拟这样的能力,往往有点力不从心,这样失配的结果就引起功能的简化和简洁性的减弱。利用多个函数一起协同的能力,函数链可以用于解决这样的失配问题,使得调用代码书写得象汇编一样简洁。
下面的代码是一些准备工作,定义了汇编要用到的一些结构,如寄存器、地址、操作还有卷标。寄存器和卷标的代码都非常简单,操作和地址的代码复杂一些,主要是定义了一些操作符的重载,这些函数体现了函数链中函数的特点:要么返回自身或者返回新对象,以备后续调用。另外还有一些宏定义,这些宏都比较简单,不复杂。为简单起见在这里程序并不作实际的本地代码转化工作,只保证书写的代码能编译通过。具体的代码如下:
View Code
struct TNode { }; struct TOp; struct TLabel { inline TOp & operator () (TOp & r){ return r;} }; struct TInt : TNode { int val; TInt( int v):val(v){} }; struct TReg : TNode { int reg; TReg( int r):reg(r){} TReg():reg( 0 ){} inline bool operator != (TReg & l){ return this ->reg != l.reg;} inline bool operator == (TReg & l){ return this ->reg == l.reg;} inline bool operator > (TReg & l){ return this ->reg > l.reg;} inline bool operator < (TReg & l){ return this ->reg < l.reg;} }; struct TAdr : TNode { int typ; TReg * base ; int scale; TReg * index; int direct; TAdr():typ( 0 ), base (NULL),scale( 0 ),index(NULL),direct( 0 ){} }; struct TAlloc { static TAdr* allocAdr() { return new TAdr; } static TReg* allocReg() { return new TReg(); } static void free(TAdr* p) { delete p; } static void free(TReg* p) { delete p; } }; inline TAdr & operator + (TReg & l,TReg & r) { TAdr * adr = TAlloc::allocAdr(); adr -> base = & l; adr ->index = & r; return * adr; }; inline TAdr & operator * ( int l,TReg & r) { TAdr * adr = TAlloc::allocAdr(); adr ->scale = l; adr ->index = & r; return * adr; }; inline TAdr & operator + (TReg & r, int l) { TAdr * adr = TAlloc::allocAdr(); adr -> base = & r; adr ->direct = l; return * adr; }; inline TAdr & operator + (TAdr & adr, int l) { adr.direct = l; return adr; }; inline TAdr & operator + (TReg & l,TAdr & r) { TAdr * adr = TAlloc::allocAdr(); adr -> base = & l; adr ->index = r.index; adr ->scale = r.scale; adr ->direct += r.direct; return * adr; }; struct TOp { int op; TNode * left; TNode * right; TOp( int _op):op(_op),left(NULL),right(NULL){} inline TOp & operator () (TReg & r) { if (left) right = & r; else left = & r; return * this ; }; inline TOp & operator () (TAdr & r) { if (left) right = & r; else left = & r; return * this ; }; inline TOp & operator () (TInt & r) { if (left) right = & r; else left = & r; return * this ; }; inline TOp & operator () ( int r) { if (left) right = & TInt(r); else left = & TInt(r); return * this ; }; inline TOp & operator [] (TAdr & r) { if (left) right = & r; else left = & r; return * this ; }; inline TOp & operator [] (TReg & r) { if (left) right = & r; else left = & r; return * this ; }; inline TOp & operator [] ( int r) { if (left) right = & TInt(r); else left = & TInt(r); return * this ; }; inline TOp & operator + (TLabel r) { return * this ; } }; struct TOpcode { static const unsigned char mov = 1 ; static const unsigned char add = 2 ; static const unsigned char sub = 3 ; static const unsigned char mul = 4 ; static const unsigned char div = 5 ; static const unsigned char jmp = 6 ; static const unsigned char push = 7 ; static const unsigned char pop = 8 ; static const unsigned char call = 9 ; static const unsigned char ret = 10 ; }; #define ncode_mov (TOp(TOpcode::mov)) #define ncode_add (TOp(TOpcode::add)) #define ncode_sub (TOp(TOpcode::sub)) #define ncode_mul (TOp(TOpcode::mul)) #define ncode_div (TOp(TOpcode::div)) #define ncode_push (TOp(TOpcode::push)) #define ncode_pop (TOp(TOpcode::pop)) #define ncode_jmp (TOp(TOpcode::jmp)) + #define ncode_call (TOp(TOpcode::call)) #define ncode_ret (TOp(TOpcode::ret)) #define _(x,...) ncode_##x __VA_ARGS__ #define eax (TReg(0)) #define ecx (TReg(1)) #define edx (TReg(2)) #define ebx (TReg(3)) #define esp (TReg(4)) #define ebp (TReg(5)) #define esi (TReg(6)) #define edi (TReg(7))
通过上面的准备,现在可以书写汇编调用代码了:
int _tmain( int argc, _TCHAR* argv[]) { TLabel L1,L2; _(push ebp); _(mov ebp,esp); _(push esi); _(push edi); _(mov ebx, eax); _(mov eax,[ebx + 2 * ecx]); _(mov [ebx + 2 * ecx + 0x1000 ],eax); L1 _(mov eax,[eax]); L2 _(mov eax,[ 0x1234 ]); _(call eax); _(jmp L1); _(jmp L2); _(pop edi); _(pop esi); _(mov esp,ebp); _(pop ebp); _(ret ); exit( 1 ); }
是不是看起来像嵌入式汇编代码,但只是形似而已,这里是函数调用,而嵌入式汇编码是执行码。现在看起来是否更直观,YY一下。
现在再看看宏展开后的实际代码,是不是都是一些函数链调用 ?
int wmain( int argc, _TCHAR* argv[]) { TLabel L1,L2; (TOp(TOpcode::push)) (TReg( 5 )) ; (TOp(TOpcode::mov)) (TReg( 5 )) (TReg( 4 )); (TOp(TOpcode::push)) (TReg( 6 )) ; (TOp(TOpcode::push)) (TReg( 7 )) ; (TOp(TOpcode::mov)) (TReg( 3 )) (TReg( 0 )); (TOp(TOpcode::mov)) (TReg( 0 )) [(TReg( 3 )) + 2 * (TReg( 1 ))]; (TOp(TOpcode::mov)) [(TReg( 3 )) + 2 * (TReg( 1 )) + 0x1000 ] (TReg( 0 )); L1 (TOp(TOpcode::mov)) (TReg( 0 )) [(TReg( 0 ))]; L2 (TOp(TOpcode::mov)) (TReg( 0 )) [ 0x1234 ]; (TOp(TOpcode::call)) (TReg( 0 )) ; (TOp(TOpcode::jmp)) + L1 ; (TOp(TOpcode::jmp)) + L2 ; (TOp(TOpcode::pop)) (TReg( 7 )) ; (TOp(TOpcode::pop)) (TReg( 6 )) ; (TOp(TOpcode::mov)) (TReg( 4 )) (TReg( 5 )); (TOp(TOpcode::pop)) (TReg( 5 )) ; (TOp(TOpcode::ret)) ; exit( 1 ); }
有头晕的感觉吧?正好应了一点,简单的背后是复杂。
下面是完整的事例代码:
View Code
#include <stdio.h> #include <stdlib.h> #include <tchar.h> struct TNode { }; struct TOp; struct TLabel { inline TOp & operator () (TOp & r){ return r;} }; struct TInt : TNode { int val; TInt( int v):val(v){} }; struct TReg : TNode { int reg; TReg( int r):reg(r){} TReg():reg( 0 ){} inline bool operator != (TReg & l){ return this ->reg != l.reg;} inline bool operator == (TReg & l){ return this ->reg == l.reg;} inline bool operator > (TReg & l){ return this ->reg > l.reg;} inline bool operator < (TReg & l){ return this ->reg < l.reg;} }; struct TAdr : TNode { int typ; TReg * base ; int scale; TReg * index; int direct; TAdr():typ( 0 ), base (NULL),scale( 0 ),index(NULL),direct( 0 ){} }; struct TAlloc { static TAdr* allocAdr() { return new TAdr; } static TReg* allocReg() { return new TReg(); } static void free(TAdr* p) { delete p; } static void free(TReg* p) { delete p; } }; inline TAdr & operator + (TReg & l,TReg & r) { TAdr * adr = TAlloc::allocAdr(); adr -> base = & l; adr ->index = & r; return * adr; }; inline TAdr & operator * ( int l,TReg & r) { TAdr * adr = TAlloc::allocAdr(); adr ->scale = l; adr ->index = & r; return * adr; }; inline TAdr & operator + (TReg & r, int l) { TAdr * adr = TAlloc::allocAdr(); adr -> base = & r; adr ->direct = l; return * adr; }; inline TAdr & operator + (TAdr & adr, int l) { adr.direct = l; return adr; }; inline TAdr & operator + (TReg & l,TAdr & r) { TAdr * adr = TAlloc::allocAdr(); adr -> base = & l; adr ->index = r.index; adr ->scale = r.scale; adr ->direct += r.direct; return * adr; }; struct TOp { int op; TNode * left; TNode * right; TOp( int _op):op(_op),left(NULL),right(NULL){} inline TOp & operator () (TReg & r) { if (left) right = & r; else left = & r; return * this ; }; inline TOp & operator () (TAdr & r) { if (left) right = & r; else left = & r; return * this ; }; inline TOp & operator () (TInt & r) { if (left) right = & r; else left = & r; return * this ; }; inline TOp & operator () ( int r) { if (left) right = & TInt(r); else left = & TInt(r); return * this ; }; inline TOp & operator [] (TAdr & r) { if (left) right = & r; else left = & r; return * this ; }; inline TOp & operator [] (TReg & r) { if (left) right = & r; else left = & r; return * this ; }; inline TOp & operator [] ( int r) { if (left) right = & TInt(r); else left = & TInt(r); return * this ; }; inline TOp & operator + (TLabel r) { return * this ; } }; struct TOpcode { static const unsigned char mov = 1 ; static const unsigned char add = 2 ; static const unsigned char sub = 3 ; static const unsigned char mul = 4 ; static const unsigned char div = 5 ; static const unsigned char jmp = 6 ; static const unsigned char push = 7 ; static const unsigned char pop = 8 ; static const unsigned char call = 9 ; static const unsigned char ret = 10 ; }; #define ncode_mov (TOp(TOpcode::mov)) #define ncode_add (TOp(TOpcode::add)) #define ncode_sub (TOp(TOpcode::sub)) #define ncode_mul (TOp(TOpcode::mul)) #define ncode_div (TOp(TOpcode::div)) #define ncode_push (TOp(TOpcode::push)) #define ncode_pop (TOp(TOpcode::pop)) #define ncode_jmp (TOp(TOpcode::jmp)) + #define ncode_call (TOp(TOpcode::call)) #define ncode_ret (TOp(TOpcode::ret)) #define _(x,...) ncode_##x __VA_ARGS__ #define eax (TReg(0)) #define ecx (TReg(1)) #define edx (TReg(2)) #define ebx (TReg(3)) #define esp (TReg(4)) #define ebp (TReg(5)) #define esi (TReg(6)) #define edi (TReg(7)) int _tmain( int argc, _TCHAR* argv[]) { TLabel L1,L2; _(push ebp); _(mov ebp,esp); _(push esi); _(push edi); _(mov ebx, eax); _(mov eax,[ebx + 2 * ecx]); _(mov [ebx + 2 * ecx + 0x1000 ],eax); L1 _(mov eax,[eax]); L2 _(mov eax,[ 0x1234 ]); _(call eax); _(jmp L1); _(jmp L2); _(pop edi); _(pop esi); _(mov esp,ebp); _(pop ebp); _(ret ); exit( 1 ); }
-----复杂,并不会因奥卡姆剃刀而减少。
分类: 语言 , 语言编译 , 杂谈
标签: 即时编译
作者: Leo_wl
出处: http://www.cnblogs.com/Leo_wl/
本文版权归作者和博客园共有,欢迎转载,但未经作者同意必须保留此段声明,且在文章页面明显位置给出原文连接,否则保留追究法律责任的权利。
版权信息