Create Presentation
Download Presentation

Download Presentation

Winter 2012-2013 Compiler Principles IR Local Optimizations – part 2

Winter 2012-2013 Compiler Principles IR Local Optimizations – part 2

100 Views

Download Presentation
Download Presentation
## Winter 2012-2013 Compiler Principles IR Local Optimizations – part 2

- - - - - - - - - - - - - - - - - - - - - - - - - - - E N D - - - - - - - - - - - - - - - - - - - - - - - - - - -

**Winter 2012-2013Compiler PrinciplesIR Local Optimizations**– part 2 Mayer Goldberg and Roman Manevich Ben-Gurion University**Today**• Review Sethi-Ullman code generation alg. • Define Basic blocks/control-flow graphs • Define analyses and related optimizations • Common sub-expression elimination • Copy propagation • Dead code elimination • Formalize local analyses**Naive cgen for expressions**Maintain a counter for temporaries in c Initially: c = 0 cgen(e1op e2) = { Let A = cgen(e1)c = c + 1 Let B = cgen(e2)c = c + 1 Emit( _tc = A op B; ) Return _tc}**Example**cgen( (a*b)-d)**Example**c = 0 cgen( (a*b)-d)**Example**c = 0 cgen( (a*b)-d) = { Let A = cgen(a*b)c = c + 1 Let B = cgen(d)c = c + 1 Emit( _tc = A - B; ) Return _tc}**Example**c = 0 cgen( (a*b)-d) = { Let A = { Let A = cgen(a)c = c + 1 Let B = cgen(b)c = c + 1 Emit( _tc = A * B; ) Return tc } c = c + 1 Let B = cgen(d)c = c + 1 Emit( _tc = A - B; ) Return _tc}**Example**c = 0 cgen( (a*b)-d) = { Let A = { Let A = { Emit(_tc = a;), return _tc }c = c + 1 Let B = { Emit(_tc = b;), return _tc }c = c + 1 Emit( _tc = A * B; ) Return _tc } c = c + 1 Let B = { Emit(_tc = d;), return _tc }c = c + 1 Emit( _tc = A - B; ) Return _tc} Code here A=_t0**Example**c = 0 cgen( (a*b)-d) = { Let A = { Let A = { Emit(_tc = a;), return _tc }c = c + 1 Let B = { Emit(_tc = b;), return _tc }c = c + 1 Emit( _tc = A * B; ) Return _tc } c = c + 1 Let B = { Emit(_tc = d;), return _tc }c = c + 1 Emit( _tc = A - B; ) Return _tc} Code_t0=a; here A=_t0**Example**c = 0 cgen( (a*b)-d) = { Let A = { Let A = { Emit(_tc = a;), return _tc }c = c + 1 Let B = { Emit(_tc = b;), return _tc }c = c + 1 Emit( _tc = A * B; ) Return _tc } c = c + 1 Let B = { Emit(_tc = d;), return _tc }c = c + 1 Emit( _tc = A - B; ) Return _tc} Code_t0=a;_t1=b; here A=_t0**Example**c = 0 cgen( (a*b)-d) = { Let A = { Let A = { Emit(_tc = a;), return _tc }c = c + 1 Let B = { Emit(_tc = b;), return _tc }c = c + 1 Emit( _tc = A * B; ) Return _tc } c = c + 1 Let B = { Emit(_tc = d;), return _tc }c = c + 1 Emit( _tc = A - B; ) Return _tc} Code_t0=a;_t1=b;_t2=_t0*_t1 here A=_t0**Example**c = 0 cgen( (a*b)-d) = { Let A = { Let A = { Emit(_tc = a;), return _tc }c = c + 1 Let B = { Emit(_tc = b;), return _tc }c = c + 1 Emit( _tc = A * B; ) Return _tc } c = c + 1 Let B = { Emit(_tc = d;), return _tc }c = c + 1 Emit( _tc = A - B; ) Return _tc} here A=_t2 Code_t0=a;_t1=b;_t2=_t0*_t1 here A=_t0**Example**c = 0 cgen( (a*b)-d) = { Let A = { Let A = { Emit(_tc = a;), return _tc }c = c + 1 Let B = { Emit(_tc = b;), return _tc }c = c + 1 Emit( _tc = A * B; ) Return _tc } c = c + 1 Let B = { Emit(_tc = d;), return _tc }c = c + 1 Emit( _tc = A - B; ) Return _tc} here A=_t2 Code_t0=a;_t1=b;_t2=_t0*_t1_t3=d; here A=_t0**Example**c = 0 cgen( (a*b)-d) = { Let A = { Let A = { Emit(_tc = a;), return _tc }c = c + 1 Let B = { Emit(_tc = b;), return _tc }c = c + 1 Emit( _tc = A * B; ) Return _tc } c = c + 1 Let B = { Emit(_tc = d;), return _tc }c = c + 1 Emit( _tc = A - B; ) Return _tc} here A=_t2 Code_t0=a;_t1=b;_t2=_t0*_t1_t3=d;_t4=_t2-_t3 here A=_t0**Naive cgen for expressions**Maintain a counter for temporaries in c Initially: c = 0 cgen(e1op e2) = { Let A = cgen(e1)c = c + 1 Let B = cgen(e2)c = c + 1 Emit( _tc = A op B; ) Return _tc} Observation: temporaries in cgen(e1) can be reused in cgen(e2)**Improved cgen for expressions**Maintain temporaries stack by counter c Initially: c = 0 cgen(e1op e2) = { Let _tc = cgen(e1)c = c + 1 Let _tc = cgen(e2)c = c - 1 Emit( _tc = _tcop _tc+1; ) Return tc}**Example**c = 0 cgen( (a*b)-d) = { Let _tc = { Let _tc = { Emit(_tc = a;), return _tc }c = c + 1 Let _tc = { Emit(_tc = b;), return _tc }c = c - 1 Emit( _tc = _tc* _tc+1; ) Return _tc } c = c + 1 Let _tc = { Emit(_tc = d;), return _tc }c = c - 1 Emit( _tc = _tc- _tc+1; ) Return _tc} Code c=0**Example**c = 0 cgen( (a*b)-d) = { Let _tc = { Let _tc = { Emit(_tc = a;), return _tc }c = c + 1 Let _tc = { Emit(_tc = b;), return _tc }c = c - 1 Emit( _tc = _tc* _tc+1; ) Return _tc } c = c + 1 Let _tc = { Emit(_tc = d;), return _tc }c = c - 1 Emit( _tc = _tc- _tc+1; ) Return _tc} Code_t0=a; c=1**Example**c = 0 cgen( (a*b)-d) = { Let _tc = { Let _tc = { Emit(_tc = a;), return _tc }c = c + 1 Let _tc = { Emit(_tc = b;), return _tc }c = c - 1 Emit( _tc = _tc* _tc+1; ) Return _tc } c = c + 1 Let _tc = { Emit(_tc = d;), return _tc }c = c - 1 Emit( _tc = _tc- _tc+1; ) Return _tc} Code_t0=a; _t1=b; c=1**Example**c = 0 cgen( (a*b)-d) = { Let _tc = { Let _tc = { Emit(_tc = a;), return _tc }c = c + 1 Let _tc = { Emit(_tc = b;), return _tc }c = c - 1 Emit( _tc = _tc* _tc+1; ) Return _tc } c = c + 1 Let _tc = { Emit(_tc = d;), return _tc }c = c - 1 Emit( _tc = _tc- _tc+1; ) Return _tc} Code_t0=a; _t1=b; c=0**Example**c = 0 cgen( (a*b)-d) = { Let _tc = { Let _tc = { Emit(_tc = a;), return _tc }c = c + 1 Let _tc = { Emit(_tc = b;), return _tc }c = c - 1 Emit( _tc = _tc* _tc+1; ) Return _tc } c = c + 1 Let _tc = { Emit(_tc = d;), return _tc }c = c - 1 Emit( _tc = _tc- _tc+1; ) Return _tc} Code_t0=a; _t1=b;_t0=_t0*_t1 c=0**Example**c = 0 cgen( (a*b)-d) = { Let _tc = { Let _tc = { Emit(_tc = a;), return _tc }c = c + 1 Let _tc = { Emit(_tc = b;), return _tc }c = c - 1 Emit( _tc = _tc* _tc+1; ) Return _tc } c = c + 1 Let _tc = { Emit(_tc = d;), return _tc }c = c - 1 Emit( _tc = _tc- _tc+1; ) Return _tc} Code_t0=a; _t1=b;_t0=_t0*_t1; c=1**Example**c = 0 cgen( (a*b)-d) = { Let _tc = { Let _tc = { Emit(_tc = a;), return _tc }c = c + 1 Let _tc = { Emit(_tc = b;), return _tc }c = c - 1 Emit( _tc = _tc* _tc+1; ) Return _tc } c = c + 1 Let _tc = { Emit(_tc = d;), return _tc }c = c - 1 Emit( _tc = _tc- _tc+1; ) Return _tc} Code_t0=a; _t1=b;_t0=_t0*_t1;_t1=d; c=1**Example**c = 0 cgen( (a*b)-d) = { Let _tc = { Let _tc = { Emit(_tc = a;), return _tc }c = c + 1 Let _tc = { Emit(_tc = b;), return _tc }c = c - 1 Emit( _tc = _tc* _tc+1; ) Return _tc } c = c + 1 Let _tc = { Emit(_tc = d;), return _tc }c = c - 1 Emit( _tc = _tc- _tc+1; ) Return _tc} Code_t0=a; _t1=b;_t0=_t0*_t1;_t1=d; c=0**Example**c = 0 cgen( (a*b)-d) = { Let _tc = { Let _tc = { Emit(_tc = a;), return _tc }c = c + 1 Let _tc = { Emit(_tc = b;), return _tc }c = c - 1 Emit( _tc = _tc* _tc+1; ) Return _tc } c = c + 1 Let _tc = { Emit(_tc = d;), return _tc }c = c - 1 Emit( _tc = _tc- _tc+1; ) Return _tc} Code_t0=a; _t1=b;_t0=_t0*_t1;_t1=d;_t0=_t0-_t1; c=0**Weighted register allocation for trees**• Sethi-Ullman’s algorithm generates code for side-effect-free expressions yields minimal number of registers • Phase 0: check side-effect-free condition • Phase 1: Assign weights (weight = number of registers needed) • Leaf weight known (usually 0 or 1) • Internal node weight • w(left) > w(right) then w = left • w(right) > w(left) then w = right • w(right) = w(left) then w = left + 1 • Phase 2: translate heavier child first • Can be done by rewriting the expression such that heavier expressions appear first and then using improved cgen**Example**_t0 = cgen( a+(b+(c*d)) )+ and * are commutative operators**Assigning weights**_t0 = cgen( a+(b+(c*d)) )+ and * are commutative operators + w=2 a + w=1 w=2 b w=1 w=2 * c d w=1 w=1**Rewriting the expression**_t0 = cgen( a+(b+(c*d)) )+ and * are commutative operators + w=2 a + w=1 w=2 b w=1 w=2 * c d w=1 w=1 Rewriting expression yields:_t0 = cgen( ((c*d)+b)+a )**Optimization points**sourcecode Frontend Codegenerator targetcode IR Userprofile programchange algorithm Compilerapply IR optimizations Compilerregister allocationinstruction selectionpeephole transformations today**Overview of IR optimization**• Formalisms and Terminology • Control-flow graphs • Basic blocks • Local optimizations • Speeding up small pieces of a function • Global optimizations • Speeding up functions as a whole • The dataflow framework • Defining and implementing a wide class of optimizations**Program Analysis**• In order to optimize a program, the compiler has to be able to reason about the properties of that program • An analysis is called sound if it never asserts an incorrect fact about a program • All the analyses we will discuss in this class are sound • (Why?)**Soundness**int x; int y; if (y < 5) x = 137; else x = 42; Print(x); “At this point in the program, xholds some integer value”**Soundness**int x; int y; if (y < 5) x = 137; else x = 42; Print(x); “At this point in the program, xis either 137 or 42”**Soundness**int x; int y; if (y < 5) x = 137; else x = 42; Print(x); “At this point in the program, xis 137”**Soundness**int x; int y; if (y < 5) x = 137; else x = 42; Print(x); “At this point in the program, xis either 137,42, or 271”**Semantics-preserving optimizations**• An optimization is semantics-preserving if it does not alter the semantics of the original program • Examples: • Eliminating unnecessary temporary variables • Computing values that are known statically at compile-time instead of runtime • Evaluating constant expressions outside of a loop instead of inside • Non-examples: • Replacing bubble sort with quicksort (why?) • The optimizations we will consider in this class are all semantics-preserving**A formalism for IR optimization**• Every phase of the compiler uses some new abstraction: • Scanning uses regular expressions • Parsing uses CFGs • Semantic analysis uses proof systems and symbol tables • IR generation uses ASTs • In optimization, we need a formalism that captures the structure of a program in a way amenable to optimization**Visualizing IR**• main: • _tmp0 = Call _ReadInteger; • a = _tmp0; • _tmp1 = Call _ReadInteger; • b = _tmp1; • _L0: • _tmp2 = 0; • _tmp3 = b == _tmp2; • _tmp4 = 0; • _tmp5 = _tmp3 == _tmp4; • IfZ _tmp5 Goto _L1; • c = a; • a = b; • _tmp6 = c % a; • b = _tmp6; • Goto _L0; • _L1: • Push a; • Call _PrintInt;**Visualizing IR**• main: • _tmp0 = Call _ReadInteger; • a = _tmp0; • _tmp1 = Call _ReadInteger; • b = _tmp1; • _L0: • _tmp2 = 0; • _tmp3 = b == _tmp2; • _tmp4 = 0; • _tmp5 = _tmp3 == _tmp4; • IfZ _tmp5 Goto _L1; • c = a; • a = b; • _tmp6 = c % a; • b = _tmp6; • Goto _L0; • _L1: • Push a; • Call _PrintInt;**Visualizing IR**start • main: • _tmp0 = Call _ReadInteger; • a = _tmp0; • _tmp1 = Call _ReadInteger; • b = _tmp1; • _L0: • _tmp2 = 0; • _tmp3 = b == _tmp2; • _tmp4 = 0; • _tmp5 = _tmp3 == _tmp4; • IfZ _tmp5 Goto _L1; • c = a; • a = b; • _tmp6 = c % a; • b = _tmp6; • Goto _L0; • _L1: • Push a; • Call _PrintInt; _tmp0 = Call _ReadInteger; a = _tmp0; _tmp1 = Call _ReadInteger; b = _tmp1; _tmp2 = 0; _tmp3 = b == _tmp2; _tmp4 = 0; _tmp5 = _tmp3 == _tmp4; IfZ _tmp5 Goto _L1; c = a; a = b; _tmp6 = c % a; b = _tmp6; Goto _L0; Push a; Call _PrintInt; end**Basic blocks**• A basic block is a sequence of IR instructions where • There is exactly one spot where control enters the sequence, which must be at the start of the sequence • There is exactly one spot where control leaves the sequence, which must be at the end of the sequence • Informally, a sequence of instructions that always execute as a group**Control-Flow Graphs**A control-flow graph (CFG) is a graph of the basic blocks in a function The term CFG is overloaded – from here on out, we'll mean “control-flow graph” and not “context free grammar” Each edge from one basic block to another indicates that control can flow from the end of the first block to the start of the second block There is a dedicated node for the start and end of a function**Types of optimizations**• An optimization is local if it works on just a single basic block • An optimization is global if it works on an entire control-flow graph • An optimization is interprocedural if it works across the control-flow graphs of multiple functions • We won't talk about this in this course**Basic blocks exercise**• int main() { • int x; • int y; • int z; • y = 137; • if (x == 0) • z = y; • else • x = y; • { • START: • _t0 = 137; • y = _t0; • IfZ x Goto _L0; • t1 = y; • z = _t1; • Goto END: • _L0: • _t2 = y; • x = _t2; • END: Divide the code into basic blocks**Control-flow graph exercise**• int main() { • int x; • int y; • int z; • y = 137; • if (x == 0) • z = y; • else • x = y; • { • START: • _t0 = 137; • y = _t0; • IfZ x Goto _L0; • t1 = y; • z = _t1; • Goto END: • _L0: • _t2 = y; • x = _t2; • END: Draw the control-flow graph**Control-flow graph exercise**• int main() { • int x; • int y; • int z; • y = 137; • if (x == 0) • z = y; • else • x = y; • { start _t0 = 137; y = _t0; IfZ x Goto _L0; _t1 = y; z = _t1; _t2 = y; x = _t2; End**Local optimizations**• int main() { • int x; • int y; • int z; • y = 137; • if (x == 0) • z = y; • else • x = y; • { start _t0 = 137; y = _t0; IfZ x Goto _L0; _t1 = y; z = _t1; _t2 = y; x = _t2; start**Local optimizations**• int main() { • int x; • int y; • int z; • y = 137; • if (x == 0) • z = y; • else • x = y; • { start _t0 = 137; y = _t0; IfZ x Goto _L0; _t1 = y; z = _t1; _t2 = y; x = _t2; End**Local optimizations**• int main() { • int x; • int y; • int z; • y = 137; • if (x == 0) • z = y; • else • x = y; • { start y = 137; IfZ x Goto _L0; _t1 = y; z = _t1; _t2 = y; x = _t2; End