![]() |
|
Message-ID: <z6t4ztqt6uhufqoezsmn627lwly45jlh6qwvp4xth2tcj36v4o@txpjfna7inio> Date: Thu, 20 Mar 2025 12:07:21 +0100 From: Ignacy Gawędzki <ignacy.gawedzki@...en-communications.fr> To: musl@...ts.openwall.com Subject: [PATCH 1/1] tools: Rework adding of CFI annotations. Rework awk scripts used to add CFI annotations to i386 and x86_64 assembly, in order to properly maintain CFA offset across in-function jumps. Add arm and aarch64 versions of these scripts. Signed-off-by: Ignacy Gawędzki <ignacy.gawedzki@...en-communications.fr> --- tools/add-cfi.aarch64.awk | 287 +++++++++++++++++++++++++++++ tools/add-cfi.arm.awk | 367 ++++++++++++++++++++++++++++++++++++++ tools/add-cfi.common.awk | 36 +++- tools/add-cfi.i386.awk | 331 +++++++++++++++++++++++----------- tools/add-cfi.x86_64.awk | 292 ++++++++++++++++++++---------- 5 files changed, 1100 insertions(+), 213 deletions(-) create mode 100644 tools/add-cfi.aarch64.awk create mode 100644 tools/add-cfi.arm.awk diff --git a/tools/add-cfi.aarch64.awk b/tools/add-cfi.aarch64.awk new file mode 100644 index 00000000..cc27e517 --- /dev/null +++ b/tools/add-cfi.aarch64.awk @@ -0,0 +1,287 @@ +# Insert GAS CFI directives ("control frame information") into AArch64 asm input. +# +# CFI directives tell the assembler how to generate "stack frame" debug info. +# This information can tell a debugger (like gdb) how to find the current stack +# frame at any point in the program code, and how to find the values which +# various registers had at higher points in the call stack. +# With this information, the debugger can show a backtrace, and you can move up +# and down the call stack and examine the values of local variables. + +BEGIN { + # Don't put CFI data in the .eh_frame ELF section (which we don't keep). + print ".cfi_sections .debug_frame" + + # Only emit CFI directives inside a function. + in_function = "" + + # Emit .loc directives with line numbers from original source. + printf ".file 1 \"%s\"\n", ARGV[1] + line_number = 0 + + re_label = "([0-9+|[a-zA-Z_][a-zA-Z0-9_]*)" + + # Build an associative array of canonical register names. + for (i = 0; i < 30; ++i) + regname["x" i] = regname["w" i] = "x" i + regname["x30"] = regname["w30"] = regname["lr"] = "x30" + regname["xzr"] = regname["wzr"] = "xzr" + regname["sp"] = regname["wsp"] = "sp" +} + +{ + ++line_number + + # Clean the input up before doing anything else. + # Delete comments. + gsub(/^#.*|\/\/.*|\/\*.*\*\//, "") + + # Canonicalize whitespace. + gsub(/[ \t]+/, " ") # Mawk doesn't understand \s. + gsub(/ *, */, ",") + gsub(/ *: */, ": ") + gsub(/ $/, "") + gsub(/^ /, "") +} + +# Check for assembler directives which we care about. +/^\.(section|data|text)/ { + # A .cfi_startproc/.cfi_endproc pair should be within the same section + # otherwise, clang will choke when generating ELF output. + if (in_function) { + print ".cfi_endproc" + in_function = "" + } +} + +# Record each function name. +/^\.type [a-zA-Z0-9_]+( STT_FUNCTION|,[#@%"]function)/ { + functions[substr($2, 1, length($2) - 10)] = 1 +} + +# Not interested in assembler directives beyond this, just pass them through. +/^\./ { + print + next +} + +# Helper to adjust CFA offset. +function adjust_sp_offset(delta) { + if (in_function) { + printf ".cfi_adjust_cfa_offset %d\n", delta + cfa_offset[in_function] += delta + } +} + +# Helper to invalidate unsaved register. +function trashed(reg) { + if (in_function && !(reg in saved) && !(reg in dirty)) + printf ".cfi_undefined %s\n", reg + dirty[reg] = 1 +} + +# Helper to process jumps to labels by saving the current CFA offset. +function jump_to_label(label) { + if (in_function) { + if (match(label, /^[0-9]+f$/)) # "forward" label + cfa_offset[substr(label, 1, RLENGTH - 1)] = cfa_offset[in_function] + else if (match(label, /^[a-zA-Z_][a-zA-Z0-9_]*$/)) + cfa_offset[label] = cfa_offset[in_function] + } +} + +# Helper to set relative offset of registers pushed on the stack. +function push_regs(regs, numregs, i) { + adjust_sp_offset(numregs * 4) + for (i = 1; i <= numregs; ++i) { + reg = regname[regs[i]] + if (!(reg in saved) && !(reg in dirty)) { + printf ".cfi_rel_offset %s,%i\n", reg, ((i - 1) * 4) + saved[reg] = 1 + } + } +} + +# Helper to invalidate unsaved registers popped from the stack. +function pop_regs(regs, numregs, i) { + adjust_sp_offset(numregs * -4) + for (i = 1; i <= numregs; ++i) { + reg = regname[regs[i]] + trashed(reg) + } +} + +# Helper to save a single register saved in SP-relative locations. +function save_reg(reg, offset) { + reg = regname[reg] + if (!(reg in saved) && !(reg in dirty)) { + printf ".cfi_rel_offset %s,%d\n", reg, offset + saved[reg] = 1 + } +} + +# Process labels. +$0 ~ "^" re_label ":" { + # Parse each leading label. + while (match($0, "^" re_label ":")) { + + # Extract label name. + label = substr($1, 1, RLENGTH - 1) + + # Remove label from current line. + sub("^" re_label ": ?", "") + + if (label in functions) { + if (in_function) { + print ".cfi_endproc" + for (l in called) + delete called[l] + } + + in_function = label + print ".cfi_startproc" + + for (reg in saved) + delete saved[reg] + for (reg in dirty) + delete dirty[reg] + } + + printf "%s:\n", label + + # If this label has been jumped to, define the CFA offset to its + # value at the location of the jump. + if (!(label in functions) && in_function && label in cfa_offset) { + if (cfa_offset[in_function] != cfa_offset[label]) { + printf ".cfi_def_cfa_offset %d\n", cfa_offset[label] + cfa_offset[in_function] = cfa_offset[label] + } + delete cfa_offset[label] + } + + # If this label has been called, possibly invalidate LR. + if (label in called && !(label in functions)) { + trashed("lr") + delete called[label] + } + } + # An instruction may follow on the same line, so continue processing. +} + +# Skip empty line. +/^$/ { next } + +# Issue source line number. +{ + printf ".loc 1 %d\n", line_number + print +} + +# Process jumps to label (using B*). +/^b[^xrl]/ { + jump_to_label($2) +} + +# Process jumps to label (using [CT]BN?Z). +/^[ct]bn?z / { + if (match($2, /,.+$/)) + jump_to_label(substr($2, RSTART + 1, RLENGTH - 1)) +} + +# Issue relative offsets of registers stored in SP-relative locations. +/^st(n?p|r[bh]?|l[lu]?r|tr|ur) .+,\[(sp|x30)[,\]]/ { + if (in_function) { + if (match($2, /(,#?[+-]?(0x[0-9a-fA-F]+|[0-9]+))?\]$/)) { + # Offset with no write-back. + if (RLENGTH == 1) + offset = 0 + else + offset = parse_const(substr($2, RSTART + 2, RLENGTH - 3)) + split($2, operands, ",") + if (match($1, /^stn?p$/)) { + if (match(operands[1], /^x/)) { + save_reg(operands[1], offset) + save_reg(operands[2], offset + 8) + } + } else if (match(operands[1], /x^/)) + save_reg(operands[1], offset) + } else if (match($2, /,#?[+-]?(0x[0-9a-fA-F]+|[0-9]+)\]!$/)) { + # Pre-index with write-back. + offset = parse_const(substr($2, RSTART + 2, RLENGTH - 4)) + adjust_sp_offset(-offset) + split($2, operands, ",") + if ($1 == "stp") { + if (match(operands[1], /^x/)) { + save_reg(operands[1], 0) + save_reg(operands[2], 8) + } + } else if (match(operands[1], /^x/)) + save_reg(operands[1], 0) + } else if (match($2, /,#?[+-]?(0x[0-9a-fA-F]+|[0-9]+)$/)) { + # Post-index + offset = parse_const(substr($2, RSTART + 2, RLENGTH - 2)) + split($2, operands, ",") + if ($1 == "stp") { + if (match(operands[1], /^x/)) { + save_reg(operands[1], 0) + save_reg(operands[2], 8) + } + } else if (match(operands[1], /^x/)) + save_reg(operands[1], 0) + adjust_sp_offset(-offset) + } + } +} + +# Adjust CFA offset when decreasing SP. +/subs?(\.[nw])? sp,sp,/ { + if (in_function && match($2, /,#[+-]?(0x[0-9a-fA-F]+|[0-9]+)$/)) + adjust_sp_offset(parse_const(substr($2, RSTART + 2, RLENGTH - 2))) +} + +# Adjust CFA offset when increasing SP. +/adds?(\.[nw])? sp,sp,/ { + if (in_function && match($2, /,#[+-]?(0x[0-9a-fA-F]+|[0-9]+)$/)) + adjust_sp_offset(-parse_const(substr($2, RSTART + 2, RLENGTH - 2))) +} + +# Process calls to labels. +/bl[a-z]* / { + if (match($2, /^[0-9]+f$/)) # "forward" label + called[substr($2, 1, RLENGTH - 1)] = 1 + else if (match($2, /^[a-zA-Z_][0-9a-zA-Z_]*$/)) + called[$2] = 1 +} + +# Invalidate unsaved registers being written to. +/^(adcs?|adds?|adrp?|ands?|asrv?|bfc|bfi|bfm|bfxil|bics?|cin[cv]|cl[sz]|cneg|crc32[a-z]+|csel|csetm?|csin[cv]|csneg|eo[nr]|extr|ldap(r[bh]?|ur(s?[bhw]?))|ldar[bh]?|ldax[pr][bh]?|ldlar[bh]?|ldr((aa)?|s?[bhw])|ldtrs?[bhw]?|ldurs?[bhw]?|ldxr[bh]?|ls[lr]v?|madd|mneg|mov[knz]?|mrs|msub|mul|mvn|negs?|ngcs?|orn|orr|pac[a-z0-9]+|rbit|rev(16|32)?|rorv?|sbcs?|sbfiz|sbfm|sbfx|sdiv|smaddl|smnegl|smsubl|smul[hl]|subs?|sxt[bhw]|sysl|ubfiz|ubfm|ubfx|udiv|umaddl|umnegl|umsubl|umul[hl]|uxt[bhw]) ([xw]([0-9]|[12][0-9]|30)|sp),/ { + split($2, args, ",") + reg = args[1] + if (reg != "sp") + trashed(regname[reg]) +} + +# Invalidate unsaved registers being written to by atomic operations in memory. +/^ld(add|clr|eor|set|[su](max|min))/ { + split($2, args, ",") + trashed(regname[args[2]]) +} + +# Invalidate unsaved registers being written to by pair loading. +/^ld[nx]p(sw)? / { + split($2, args, ",") + trashed(regname[args[1]]) + trashed(regname[args[2]]) +} + +# Invalidate unsaved registers being written to by long instructions. +/^(smlals?|smlal(bb|bt|tb|tt)|smlaldx?|smlsldx?|smull|umaal|umlal|umulls?) / { + split($2, args, ",") + trashed(regname[args[1]]) + trashed(regname[args[2]]) +} + +END { + # Issue end of function if still inside one. + if (in_function) + print ".cfi_endproc" +} diff --git a/tools/add-cfi.arm.awk b/tools/add-cfi.arm.awk new file mode 100644 index 00000000..7aa0cf8c --- /dev/null +++ b/tools/add-cfi.arm.awk @@ -0,0 +1,367 @@ +# Insert GAS CFI directives ("control frame information") into ARM asm input. +# +# CFI directives tell the assembler how to generate "stack frame" debug info. +# This information can tell a debugger (like gdb) how to find the current stack +# frame at any point in the program code, and how to find the values which +# various registers had at higher points in the call stack. +# With this information, the debugger can show a backtrace, and you can move up +# and down the call stack and examine the values of local variables. + +BEGIN { + # Don't put CFI data in the .eh_frame ELF section (which we don't keep). + print ".cfi_sections .debug_frame" + + # Only emit CFI directives inside a function. + in_function = "" + + # Emit .loc directives with line numbers from original source. + printf ".file 1 \"%s\"\n", ARGV[1] + line_number = 0 + + re_label = "([0-9+|[a-zA-Z_][a-zA-Z0-9_]*)" + + # Build an associative array of canonical register names. + for (i = 0; i < 10; ++i) { + regname["r" i] = "r" i + regnum["r" i] = i + } + regname["r10"] = regname["sl"] = "r10" + regnum["r10"] = regnum["sl"] = 10 + regname["r11"] = regname["fp"] = "r11" + regnum["r11"] = regnum["fp"] = 11 + regname["r12"] = regname["ip"] = "r12" + regnum["r12"] = regnum["ip"] = 12 + regname["r13"] = regname["sp"] = "r13" + regnum["r13"] = regnum["sp"] = 13 + regname["r14"] = regname["lr"] = "r14" + regnum["r14"] = regnum["lr"] = 14 + regname["r15"] = regname["pc"] = "r15" + regnum["r15"] = regnum["pc"] = 15 +} + +{ + ++line_number + + # Clean the input up before doing anything else. + # Delete comments. + gsub(/(^#|@|\/\/).*|\/\*.*\*\//, "") + + # Canonicalize whitespace. + gsub(/[ \t]+/, " ") # Mawk doesn't understand \s. + gsub(/ *, */, ",") + gsub(/ *: */, ": ") + gsub(/ $/, "") + gsub(/^ /, "") +} + +# Check for assembler directives which we care about. +/^\.(section|data|text)/ { + # A .cfi_startproc/.cfi_endproc pair should be within the same section + # otherwise, clang will choke when generating ELF output. + if (in_function) { + print ".cfi_endproc" + in_function = "" + } +} + +# Record each function name. +/^\.type [a-zA-Z0-9_]+( STT_FUNCTION|,[#@%"]function)/ { + functions[substr($2, 1, length($2) - 10)] = 1 +} + +# Not interested in assembler directives beyond this, just pass them through. +/^\./ { + print + next +} + +# Helper to adjust CFA offset. +function adjust_sp_offset(delta) { + if (in_function) { + printf ".cfi_adjust_cfa_offset %d\n", delta + cfa_offset[in_function] += delta + } +} + +# Helper to invalidate unsaved register. +function trashed(reg) { + if (in_function && !(reg in saved) && !(reg in dirty)) + printf ".cfi_undefined %s\n", reg + dirty[reg] = 1 +} + +# Helper to process jumps to labels by saving the current CFA offset. +function jump_to_label(label) { + if (in_function) { + if (match(label, /^[0-9]+f$/)) # "forward" label + cfa_offset[substr(label, 1, RLENGTH - 1)] = cfa_offset[in_function] + else if (match(label, /^[a-zA-Z_][a-zA-Z0-9_]*$/)) + cfa_offset[label] = cfa_offset[in_function] + } +} + +# Helper to save a single register saved in SP-relative locations. +function save_reg(reg, offset) { + reg = regname[reg] + if (!(reg in saved) && !(reg in dirty)) { + printf ".cfi_rel_offset %s,%d\n", reg, offset + saved[reg] = 1 + } +} + +# Helper to save registers relative to SP. +function save_regs(regs, numregs, i) { + for (i = 1; i <= numregs; ++i) + save_reg(regname[regs[i]], (i - 1) * -4) +} + +# Helper to set relative offset of registers pushed on the stack. +function push_regs(regs, numregs, i) { + adjust_sp_offset(numregs * 4) + for (i = 1; i <= numregs; ++i) + save_reg(regname[regs[i]], (i - 1) * 4) +} + +# Helper to invalidate unsaved registers popped from the stack. +function pop_regs(regs, numregs, i) { + adjust_sp_offset(numregs * -4) + for (i = 1; i <= numregs; ++i) { + reg = regname[regs[i]] + trashed(reg) + } +} + +# Helper to parse register lists. +function split_reglist(arg, regs, num, toks, tmp, dash, i, j) { + while (match(arg, /^{[^}]+}/)) { + num = split(substr(arg, RSTART + 1, RLENGTH - 2), toks, ",") + for (i = 1; i <= num; ++i) + if (match(toks[i], /^r([0-9]|1[0-5])-r([0-9]|1[0-5])$/)) { + dash = index(toks[i], "-") + first = 0 + substr(toks[i], 2, dash - 2) + last = 0 + substr(toks[i], dash + 2) + for (j = first; j <= last; ++j) + tmp[j] + } else + tmp[regnum[toks[i]]] + arg = substr(arg, RSTART + RLENGTH) + if (!match(arg, /^[\t ]*[+|][\t ]*/)) + break + arg = substr(arg, RLENGTH + 1) + } + num = 0 + for (i = 0; i < 16; ++i) { + if (!(i in tmp)) + continue + regs[++num] = regname["r" i] + } + return num +} + +# Process labels. +$0 ~ "^" re_label ":" { + # Parse each leading label. + while (match($0, "^" re_label ":")) { + + # Extract label name. + label = substr($1, 1, RLENGTH - 1) + + # Remove label from current line. + sub("^" re_label ": ?", "") + + if (label in functions) { + if (in_function) { + print ".cfi_endproc" + for (l in called) + delete called[l] + } + + in_function = label + print ".cfi_startproc" + + for (reg in saved) + delete saved[reg] + for (reg in dirty) + delete dirty[reg] + } + + printf "%s:\n", label + + # If this label has been jumped to, define the CFA offset to its + # value at the location of the jump. + if (!(label in functions) && in_function && label in cfa_offset) { + if (cfa_offset[in_function] != cfa_offset[label]) { + printf ".cfi_def_cfa_offset %d\n", cfa_offset[label] + cfa_offset[in_function] = cfa_offset[label] + } + delete cfa_offset[label] + } + + # If this label has been called, possibly invalidate LR. + if (label in called && !(label in functions)) { + trashed("lr") + delete called[label] + } + } + # An instruction may follow on the same line, so continue processing. +} + +# Skip empty line. +/^$/ { next } + +# Issue source line number. +{ + printf ".loc 1 %d\n", line_number + print +} + +# Process jumps to label (using B*). +/^b[^xl]/ { + jump_to_label($2) +} + +# Process jumps to label (using CBNZ?). +/^cbnz? / { + if (match($2, /,.*$/)) + jump_to_label(substr($2, RSTART + 1, RLENGTH - 1)) +} + +# Adjust CFA offset and issue relative offsets of pushed registers using PUSH. +/^push / { + if (in_function) { + numregs = split_reglist($2, regs) + push_regs(regs, numregs); + } +} + +# Adjust CFA offset and Issue relative offsets of pushed registers using STMFD. +/^stm(fd|db)(al)?(\.[nw])? (sp|r13)!,/ { + if (in_function) { + numregs = split_reglist(substr($2, index($2, ",") + 1), regs) + push_regs(regs, numregs); + } +} + +/^stm(ia|ea)?(al)?(\.[nw])? (sp|r13),/ { + if (in_function) { + numregs = split_reglist(substr($2, index($2, ",") + 1), regs) + save_regs(regs, numregs); + } +} + +# Adjust CFA offset and invalidate unsaved registers popped using POP. +/^pop / { + if (in_function) { + numregs = split_reglist($2, regs) + pop_regs(regs, numregs) + } +} + +# Adjust CFA offset and invalidate unsaved registers popped using LDMFD. +/^ldm(fd|ia)(al)?(\.[nw])? (sp|r13)!,/ { + if (in_function) { + numregs = split_reglist(substr($2, index($2, ",") + 1), regs) + pop_regs(regs, numregs) + } +} + +# Issue relative offsets of registers stored in SP-relative locations. +/^str[a-z.]* .*,\[(sp|r13)[,\]]/ { + if (in_function && !match($1, /^str(ex)?[bh]/)) { + if (match($2, /(,#[+-]?(0x[0-9a-fA-F]+|[0-9]+))?\]$/)) { + # Offset with no write-back. + if (RLENGTH == 1) + offset = 0 + else + offset = parse_const(substr($2, RSTART + 2, RLENGTH - 3)) + split($2, operands, ",") + if (match($1, /^str(ex)?d/)) { + save_reg(operands[1], offset) + save_reg(operands[2], offset + 4) + } else + save_reg(operands[1], offset) + } else if (match($2, /,#[+-]?(0x[0-9a-fA-F]+|[0-9]+)\]!$/)) { + # Pre-index with write-back. + offset = parse_const(substr($2, RSTART + 2, RLENGTH - 4)) + adjust_sp_offset(-offset) + split($2, operands, ",") + if (match($1, /^str(ex)?d/)) { + save_reg(operands[1], 0) + save_reg(operands[2], 4) + } else + save_reg(operands[1], 0) + } else if (match($2, /,#[+-]?(0x[0-9a-fA-F]+|[0-9]+)$/)) { + # Post-index + offset = parse_const(substr($2, RSTART + 2, RLENGTH - 2)) + split($2, operands, ",") + if (match($1, /^str(ex)?d/)) { + save_reg(operands[1], 0) + save_reg(operands[2], 4) + } else + save_reg(operands[1], 0) + adjust_sp_offset(-offset) + } + } +} + +# Adjust CFA offset when decreasing SP. +/subs?(al)?(\.[nw])? (sp|r13),(sp|r13),/ { + if (in_function && match($2, /,#[+-]?(0x[0-9a-fA-F]+|[0-9]+)$/)) + adjust_sp_offset(parse_const(substr($2, RSTART + 2, RLENGTH - 2))) +} + +# Adjust CFA offset when increasing SP. +/adds?(al)?(\.[nw])? (sp|r13),(sp|r13),/ { + if (in_function && match($2, /,#[+-]?(0x[0-9a-fA-F]+|[0-9]+)$/)) + adjust_sp_offset(-parse_const(substr($2, RSTART + 2, RLENGTH - 2))) +} + +# Process calls to labels. +/bl[a-z]* / { + if (match($2, /^[0-9]+f$/)) # "forward" label + called[substr($2, 1, RLENGTH - 1)] = 1 + else if (match($2, /^[a-zA-Z_][0-9a-zA-Z_]*$/)) + called[$2] = 1 +} + +# Invalidate unsaved registers being written to. +/^((adc|add|and|asr|adr|bic|eor|lsl|lsr|mla|mov|mul|mvn|orn|orr|ror|rrx|rsb|rsc|sbc|sub)s?|bfc|bfi|clz|cpy|ldr[a-z]*|mls||movt|mrs|neg|pkh(bt|tb)|qadd(8|16)?|qasx|qdadd|qdsub|qsax|qsub(8|16)?|rbit|rev(16)?|revsh|sadd(16|8)|sasx|sbfx|sdiv|sel|shadd(16|8)|shasx|shsax|shsub(16|8)|smla(bb|bt|tb|tt)|smladx?|smlaw[tb]|smlsdx?|smmlar?|smlsr?|smmulr?|smuadx?|smul(bb|bt|tb|tt)|smulw[bt]|smusdx?|ssat(16)?|ssax|ssub(16|8)|swpb?|sxtab(16)?|sxtah|sxtb(16)?|sxth|sxtb(16)?|sxth|uadd(16|8)|uasx|ubfx|udiv|uhadd(16|8)|uhasx|uhsax|uhsub(16|8)|uqadd(16|8)|uqasx|uqsax|uqsub(16|8)|usada?8|usat(16)?|usax|usub(16|8)|uxtab(16)?|uxtah|uxtb(16)?|uxth)(eq|ne|cs|cc|mi|pl|vs|vc|hi|ls|ge|lt|gt|le|al)? (r([0-9]|1[0-5])|ip|sp|lr|pc),/ { + split($2, args, ",") + reg = args[1] + if (reg != "sp") + trashed(regname[reg]) +} + +# Invalidate unsaved registers being written to by long instructions. +/^(smlals?|smlal(bb|bt|tb|tt)|smlaldx?|smlsldx?|smull|umaal|umlal|umulls?)/ { + split($2, args, ",") + trashed(regname[args[1]]) + trashed(regname[args[2]]) +} + +# Invalidate unsaved register being modified by write-back on store multiple. +/^stm[a-z.]* [^,]+!,/ { + first_arg = substr($2, 1, index($2, ",") - 1) + if (!match(first_arg, /^(sp|r13)/)) + trashed(regname[substr(first_arg, 1, length(first_arg) - 1)]) +} + +# Invalidate unsaved registers being modified by load multiple. +/^ldm[a-z.]* [^,]+,{.*}$/ { + comma = index($2, ",") + first_arg = substr($2, 1, comma - 1) + other_args = substr($2, comma + 1) + if (!match(first_arg, /^(sp|r13)/)) { + if (match(first_arg, /!$/)) + trashed(regname[substr(first_arg, 1, RSTART - 1)]) + numregs = split_reglist(other_args, regs) + for (i = 1; i <= numregs; ++i) + trashed(regname[regs[i]]) + } +} + +END { + # Issue end of function if still inside one. + if (in_function) + print ".cfi_endproc" +} diff --git a/tools/add-cfi.common.awk b/tools/add-cfi.common.awk index 04482d43..fe3aec03 100644 --- a/tools/add-cfi.common.awk +++ b/tools/add-cfi.common.awk @@ -1,26 +1,46 @@ -function hex2int(str, i) { +function hex2int(str, i) { str = tolower(str) for (i = 1; i <= 16; i++) { char = substr("0123456789abcdef", i, 1) - lookup[char] = i-1 + lookup[char] = i - 1 } result = 0 for (i = 1; i <= length(str); i++) { - result = result * 16 - char = substr(str, i, 1) - result = result + lookup[char] + result *= 16 + char = substr(str, i, 1) + result += lookup[char] + } + return result +} + +function oct2int(str, i) { + str = tolower(str) + + for (i = 1; i <= 8; ++i) { + char = substr("01234567", i, 1) + lookup[char] = i - 1 + } + + result = 0 + for (i = 1; i <= length(str); ++i) { + result *= 8 + char = substr(str, i, 1) + result += lookup[char] } return result } function parse_const(str) { - sign = sub(/^-/, "", str) - hex = sub(/^0x/, "", str) + neg = sub(/^-/, "", str) + oct = match(str, /^0[0-7]/) + hex = sub(/^0x/, "", str) if (hex) n = hex2int(str) + else if (oct) + n = oct2int(str) else n = str+0 - return sign ? -n : n + return neg? -n: n } diff --git a/tools/add-cfi.i386.awk b/tools/add-cfi.i386.awk index d05037de..d4b59e3f 100644 --- a/tools/add-cfi.i386.awk +++ b/tools/add-cfi.i386.awk @@ -1,123 +1,179 @@ -# Insert GAS CFI directives ("control frame information") into x86-32 asm input +# Insert GAS CFI directives ("control frame information") into x86-32 asm input. # -# CFI directives tell the assembler how to generate "stack frame" debug info +# CFI directives tell the assembler how to generate "stack frame" debug info. # This information can tell a debugger (like gdb) how to find the current stack # frame at any point in the program code, and how to find the values which -# various registers had at higher points in the call stack +# various registers had at higher points in the call stack. # With this information, the debugger can show a backtrace, and you can move up -# and down the call stack and examine the values of local variables +# and down the call stack and examine the values of local variables. BEGIN { - # don't put CFI data in the .eh_frame ELF section (which we don't keep) + # Don't put CFI data in the .eh_frame ELF section (which we don't keep). print ".cfi_sections .debug_frame" - # only emit CFI directives inside a function - in_function = 0 + # Only emit CFI directives inside a function. + in_function = "" - # emit .loc directives with line numbers from original source + # Emit .loc directives with line numbers from original source. printf ".file 1 \"%s\"\n", ARGV[1] line_number = 0 - # used to detect "call label; label:" trick - called = "" + re_label = "([0-9]+|[a-zA-Z_][a-zA-Z0-9_]*)" + + for (i = 1; i <= 4; ++i) { + letter = substr("abcd", i, 1) + regname[letter "l"] = regname[letter "h"] = regname[letter "x"] = \ + regname["e" letter "x"] = "e" letter "x" + } + + regname["si"] = regname["esi"] = "esi" + regname["di"] = regname["edi"] = "edi" + regname["bp"] = regname["ebp"] = "ebp" + regname["sp"] = regname["esp"] = "esp" } +# For instructions with 2 operands, get 1st operand (assuming it is constant). function get_const1() { - # for instructions with 2 operands, get 1st operand (assuming it is constant) - match($0, /-?(0x[0-9a-fA-F]+|[0-9]+),/) - return parse_const(substr($0, RSTART, RLENGTH-1)) + match($2, /^\$[+-]?(0x[0-9a-fA-F]+|[0-9]+),/) + return parse_const(substr($2, 2, RLENGTH - 2)) } -function canonicalize_reg(register) { - if (match(register, /^e/)) - return register - else if (match(register, /[hl]$/)) # AH, AL, BH, BL, etc - return "e" substr(register, 1, 1) "x" - else # AX, BX, CX, etc - return "e" register -} +# Only use if you already know there is 1 and only 1 register. function get_reg() { - # only use if you already know there is 1 and only 1 register - match($0, /%e?([abcd][hlx]|si|di|bp)/) - return canonicalize_reg(substr($0, RSTART+1, RLENGTH-1)) + return regname[substr($2, 2, length($2) - 1)] } + +# For instructions with 2 operands, get 1st operand (assuming it is register). function get_reg1() { - # for instructions with 2 operands, get 1st operand (assuming it is register) - match($0, /%e?([abcd][hlx]|si|di|bp),/) - return canonicalize_reg(substr($0, RSTART+1, RLENGTH-2)) + match($2, /^%e?([abcd][hlx]|si|di|bp),/) + return regname[substr($2, 2, RLENGTH - 2)] } + +# For instructions with 2 operands, get 2nd operand (assuming it is register). function get_reg2() { - # for instructions with 2 operands, get 2nd operand (assuming it is register) - match($0, /,%e?([abcd][hlx]|si|di|bp)/) - return canonicalize_reg(substr($0, RSTART+2, RLENGTH-2)) + match($2, /,%e?([abcd][hlx]|si|di|bp)$/) + return regname[substr($2, RSTART + 2, RLENGTH - 2)] } +# Helper to adjust CFA offset. function adjust_sp_offset(delta) { - if (in_function) + if (in_function) { printf ".cfi_adjust_cfa_offset %d\n", delta + cfa_offset[in_function] += delta + } +} + +function save_reg(reg, offset) { + if (!(reg in saved) && !(reg in dirty)) { + printf ".cfi_rel_offset %s,%d\n", reg, offset + saved[reg] = 1 + } +} + +# Helper to process jumps to labels by saving the current CFA offset. +function jump_to_label(label) { + if (in_function) { + if (match(label, /^[0-9]+f$/)) # "forward" label + cfa_offset[substr(label, 1, RLENGTH - 1)] = cfa_offset[in_function] + else if (match(label, /^[a-zA-Z_][a-zA-Z0-9_]*$/)) + cfa_offset[label] = cfa_offset[in_function] + } } { - line_number = line_number + 1 + ++line_number - # clean the input up before doing anything else - # delete comments - gsub(/(#|\/\/).*/, "") + # Clean the input up before doing anything else. + # Delete comments. + gsub(/#.*|\/\*.*\*\//, "") - # canonicalize whitespace - gsub(/[ \t]+/, " ") # mawk doesn't understand \s + # Canonicalize whitespace. + gsub(/[ \t]+/, " ") # Mawk doesn't understand \s. gsub(/ *, */, ",") gsub(/ *: */, ": ") + gsub(/%cs: */, "%cs:") + gsub(/%ds: */, "%ds:") + gsub(/%ss: */, "%ss:") + gsub(/%es: */, "%es:") + gsub(/%fs: */, "%fs:") + gsub(/%gs: */, "%gs:") gsub(/ $/, "") gsub(/^ /, "") } -# check for assembler directives which we care about +# Check for assembler directives which we care about. /^\.(section|data|text)/ { - # a .cfi_startproc/.cfi_endproc pair should be within the same section - # otherwise, clang will choke when generating ELF output + # A .cfi_startproc/.cfi_endproc pair should be within the same section. + # Otherwise, clang will choke when generating ELF output. if (in_function) { print ".cfi_endproc" - in_function = 0 + in_function = "" } } -/^\.type [a-zA-Z0-9_]+,@function/ { - functions[substr($2, 1, length($2)-10)] = 1 + +# Record each function name. +/^\.type [a-zA-Z0-9_]+( STT_FUNCTION|,[#@%"]function)/ { + functions[substr($2, 1, length($2) - 10)] = 1 } -# not interested in assembler directives beyond this, just pass them through + +# Not interested in assembler directives beyond this, just pass them through. /^\./ { print next } -/^[a-zA-Z0-9_]+:/ { - label = substr($1, 1, length($1)-1) # drop trailing : +$0 ~ "^" re_label ":" { + # Parse each leading label. + while (match($0, "^" re_label ":")) { - if (called == label) { - # note adjustment of stack pointer from "call label; label:" - adjust_sp_offset(4) - } + # Extract label name. + label = substr($1, 1, RLENGTH - 1) - if (functions[label]) { - if (in_function) - print ".cfi_endproc" + # Remove label from current line. + sub("^" re_label ": ?", "") - in_function = 1 - print ".cfi_startproc" + if (label in functions) { + if (in_function) { + print ".cfi_endproc" + for (l in called) + delete called[l] + } - for (register in saved) - delete saved[register] - for (register in dirty) - delete dirty[register] - } + in_function = label + print ".cfi_startproc" + + for (reg in saved) + delete saved[reg] + for (reg in dirty) + delete dirty[reg] + } + + printf "%s:\n", label + + # If this label has been jumped to, define the CFA offset to its + # value at the location of the jump. + if (!(label in functions) && in_function && label in cfa_offset) { + if (cfa_offset[in_function] != cfa_offset[label]) { + printf ".cfi_def_cfa_offset %d\n", cfa_offset[label] + cfa_offset[in_function] = cfa_offset[label] + } + delete cfa_offset[label] + } - # an instruction may follow on the same line, so continue processing + # If this label has been called, adjust CFA offset. + if (label in called && !(label in functions)) { + adjust_sp_offset(4); + delete called[label] + } + } + # An instruction may follow on the same line, so continue processing. } +# Skip empty line. /^$/ { next } +# Issue source line number. { - called = "" printf ".loc 1 %d\n", line_number print } @@ -126,82 +182,145 @@ function adjust_sp_offset(delta) { # We do NOT attempt to understand foolish and ridiculous tricks like stashing # the stack pointer and then using %esp as a scratch register, or bitshifting # it or taking its square root or anything stupid like that. -# %esp should only be adjusted by pushing/popping or adding/subtracting constants +# %esp should only be adjusted by pushing/popping or adding/subtracting +# constants. # -/pushl?/ { - if (match($0, / %(ax|bx|cx|dx|di|si|bp|sp)/)) +/^push[wl]? / { + if ($1 == "pushw" || match($2, /^%([abcd]x|di|si|bp|sp)$/)) adjust_sp_offset(2) else adjust_sp_offset(4) } -/popl?/ { - if (match($0, / %(ax|bx|cx|dx|di|si|bp|sp)/)) + +/^pop[wl]? / { + if ($1 == "popw" || match($2, /^%([abcd]x|di|si|bp|sp)$/)) adjust_sp_offset(-2) else adjust_sp_offset(-4) } -/addl? \$-?(0x[0-9a-fA-F]+|[0-9]+),%esp/ { adjust_sp_offset(-get_const1()) } -/subl? \$-?(0x[0-9a-fA-F]+|[0-9]+),%esp/ { adjust_sp_offset(get_const1()) } -/call/ { - if (match($0, /call [0-9]+f/)) # "forward" label - called = substr($0, RSTART+5, RLENGTH-6) - else if (match($0, /call [0-9a-zA-Z_]+/)) - called = substr($0, RSTART+5, RLENGTH-5) +/^pushal?$/ { + adjust_sp_offset(32) + if (in_function) { + save_reg("eax", 28) + save_reg("ecx", 24) + save_reg("edx", 20) + save_reg("ebx", 16) + save_reg("esp", 12) + save_reg("ebp", 8) + save_reg("esi", 4) + save_reg("edi", 0) + } +} + +/^pushaw$/ { + adjust_sp_offset(16) +} + +/^popal?$/ { + adjust_sp_offset(-32) +} + +/^popaw$/ { + adjust_sp_offset(-16) +} + +/^pushfl?$/ { + adjust_sp_offset(4) +} + +/^pushfw$/ { + adjust_sp_offset(2) +} + +/^popfl?$/ { + adjust_sp_offset(-4) +} + +/^popfw$/ { + adjust_sp_offset(-2) +} + +/^addl? \$[+-]?(0x[0-9a-fA-F]+|[0-9]+),%esp/ { + adjust_sp_offset(-get_const1()) +} + +/^subl? \$[+-]?(0x[0-9a-fA-F]+|[0-9]+),%esp/ { + adjust_sp_offset(get_const1()) +} + +/^call / { + if (match($2, /^[0-9]+f$/)) # "forward" label + called[substr($2, 1, RLENGTH - 1)] = 1 + else if (match($2, /^[a-zA-Z_][0-9a-zA-Z_]*$/)) + called[$2] = 1 +} + +/^j/ { + jump_to_label($2) } # TRACKING REGISTER VALUES FROM THE PREVIOUS STACK FRAME # -/pushl? %e(ax|bx|cx|dx|si|di|bp)/ { # don't match "push (%reg)" - # if a register is being pushed, and its value has not changed since the +/^pushl? %e([abcd]x|si|di|bp)$/ { + # Don't match "push (%reg)" + # If a register is being pushed, and its value has not changed since the # beginning of this function, the pushed value can be used when printing - # local variables at the next level up the stack - # emit '.cfi_rel_offset' for that + # local variables at the next level up the stack. + # Emit '.cfi_rel_offset' for that. - if (in_function) { - register = get_reg() - if (!saved[register] && !dirty[register]) { - printf ".cfi_rel_offset %s,0\n", register - saved[register] = 1 - } - } + if (in_function) + save_reg(get_reg(), 0) } -/movl? %e(ax|bx|cx|dx|si|di|bp),-?(0x[0-9a-fA-F]+|[0-9]+)?\(%esp\)/ { +/^movl? %e(ax|bx|cx|dx|si|di|bp),[+-]?(0x[0-9a-fA-F]+|[0-9]+)?\(%esp\)$/ { if (in_function) { - register = get_reg() - if (match($0, /-?(0x[0-9a-fA-F]+|[0-9]+)\(%esp\)/)) { - offset = parse_const(substr($0, RSTART, RLENGTH-6)) + if (match($2, /,[+-]?(0x[0-9a-fA-F]+|[0-9]+)\(%esp\)$/)) { + offset = parse_const(substr($2, RSTART + 1, RLENGTH - 7)) } else { offset = 0 } - if (!saved[register] && !dirty[register]) { - printf ".cfi_rel_offset %s,%d\n", register, offset - saved[register] = 1 - } + save_reg(get_reg1(), offset) } } # IF REGISTER VALUES ARE UNCEREMONIOUSLY TRASHED # ...then we want to know about it. # -function trashed(register) { - if (in_function && !saved[register] && !dirty[register]) { - printf ".cfi_undefined %s\n", register +function trashed(reg) { + if (in_function && !(reg in saved) && !(reg in dirty)) { + printf ".cfi_undefined %s\n", reg + dirty[reg] = 1 } - dirty[register] = 1 } -# this does NOT exhaustively check for all possible instructions which could -# overwrite a register value inherited from the caller (just the common ones) -/mov.*,%e?([abcd][hlx]|si|di|bp)$/ { trashed(get_reg2()) } -/(add|addl|sub|subl|and|or|xor|lea|sal|sar|shl|shr).*,%e?([abcd][hlx]|si|di|bp)$/ { +# This does NOT exhaustively check for all possible instructions which could +# overwrite a register value inherited from the caller (just the common ones). +/^mov.*,%e?([abcd][hlx]|si|di|bp)$/ { trashed(get_reg2()) } -/^i?mul [^,]*$/ { trashed("eax"); trashed("edx") } -/^i?mul.*,%e?([abcd][hlx]|si|di|bp)$/ { trashed(get_reg2()) } -/^i?div/ { trashed("eax"); trashed("edx") } -/(dec|inc|not|neg|pop) %e?([abcd][hlx]|si|di|bp)/ { trashed(get_reg()) } -/cpuid/ { trashed("eax"); trashed("ebx"); trashed("ecx"); trashed("edx") } +/^(add|sub|and|x?or|lea|s[ah][lr])[bwl]? [^,]+,%e?([abcd][hlx]|si|di|bp)$/ { + trashed(get_reg2()) +} +/^i?mul[bwl] [^,]+$/ { + trashed("eax") + trashed("edx") +} +/^i?mul[bwl]? [^,]+,%e?([abcd][hlx]|si|di|bp)$/ { + trashed(get_reg2()) +} +/^i?div / { + trashed("eax") + trashed("edx") +} +/^(dec|inc|not|neg|pop)[bwl]? %e?([abcd][hlx]|si|di|bp)$/ { + trashed(get_reg()) +} +/^cpuid/ { + trashed("eax") + trashed("ebx") + trashed("ecx") + trashed("edx") +} END { if (in_function) diff --git a/tools/add-cfi.x86_64.awk b/tools/add-cfi.x86_64.awk index 7e1513d6..f484b7ab 100644 --- a/tools/add-cfi.x86_64.awk +++ b/tools/add-cfi.x86_64.awk @@ -1,169 +1,246 @@ -# Insert GAS CFI directives ("control frame information") into x86-64 asm input +# Insert GAS CFI directives ("control frame information") into x86-64 asm input. BEGIN { - # don't put CFI data in the .eh_frame ELF section (which we don't keep) + # Don't put CFI data in the .eh_frame ELF section (which we don't keep). print ".cfi_sections .debug_frame" - # only emit CFI directives inside a function - in_function = 0 + # Only emit CFI directives inside a function. + in_function = "" - # emit .loc directives with line numbers from original source + # Emit .loc directives with line numbers from original source. printf ".file 1 \"%s\"\n", ARGV[1] line_number = 0 - # used to detect "call label; label:" trick - called = "" + re_label = "([0-9]+|[a-zA-Z_][a-zA-Z0-9_]*)" + + for (i = 1; i <= 4; ++i) { + letter = substr("abcd", i, 1) + regname[letter "l"] = regname[letter "h"] = regname[letter "x"] = \ + regname["e" letter "x"] = regname["r" letter "x"] = "r" letter "x" + } + + regname["si"] = regname["esi"] = regname["rsi"] = "rsi" + regname["di"] = regname["edi"] = regname["rdi"] = "rdi" + regname["bp"] = regname["ebp"] = regname["rbp"] = "rbp" + regname["sp"] = regname["esp"] = regname["rsp"] = "rsp" + + for (i = 8; i <= 15; ++i) + regname["r" i] = "r" i } +# For instructions with 2 operands, get 1st operand (assuming it is constant). function get_const1() { - # for instructions with 2 operands, get 1st operand (assuming it is constant) - match($0, /-?(0x[0-9a-fA-F]+|[0-9]+),/) - return parse_const(substr($0, RSTART, RLENGTH-1)) + match($2, /^\$[+-]?(0x[0-9a-fA-F]+|[0-9]+),/) + return parse_const(substr($2, 2, RLENGTH - 2)) } -function canonicalize_reg(register) { - if (match(register, /^r/)) - return register - else if (match(register, /^e/)) - return "r" substr(register, 2, length(register)-1) - else if (match(register, /[hl]$/)) # AH, AL, BH, BL, etc - return "r" substr(register, 1, 1) "x" - else # AX, BX, CX, etc - return "r" register -} +# Only use if you already know there is 1 and only 1 register. function get_reg() { - # only use if you already know there is 1 and only 1 register - match($0, /%[er]?([abcd][xlh]|si|di|bp|8|9|10|11|12|13|14|15)/) - return canonicalize_reg(substr($0, RSTART+1, RLENGTH-1)) + return regname[substr($2, 2, length($2) - 1)] } + +# For instructions with 2 operands, get 1st operand (assuming it is register). function get_reg1() { - # for instructions with 2 operands, get 1st operand (assuming it is register) - match($0, /%[er]?([abcd][xlh]|si|di|bp|8|9|10|11|12|13|14|15),/) - return canonicalize_reg(substr($0, RSTART+1, RLENGTH-2)) + match($2, /^%[er]?([abcd][xlh]|si|di|bp|[89]|1[0-5]),/) + return regname[substr($2, 2, RLENGTH - 2)] } + +# For instructions with 2 operands, get 2nd operand (assuming it is register). function get_reg2() { - # for instructions with 2 operands, get 2nd operand (assuming it is register) - match($0, /,%[er]?([abcd][xlh]|si|di|bp|8|9|10|11|12|13|14|15)/) - return canonicalize_reg(substr($0, RSTART+2, RLENGTH-2)) + match($2, /,%[er]?([abcd][xlh]|si|di|bp|[89]|1[0-5])$/) + return regname[substr($2, RSTART + 2, RLENGTH - 2)] } +# Helper to adjust CFA offset. function adjust_sp_offset(delta) { - if (in_function) + if (in_function) { printf ".cfi_adjust_cfa_offset %d\n", delta + cfa_offset[in_function] += delta + } +} + +# Helper to process jumps to labels by saving the current CFA offset. +function jump_to_label(label) { + if (in_function) { + if (match(label, /^[0-9]+f$/)) # "forward" label + cfa_offset[substr(label, 1, RLENGTH - 1)] = cfa_offset[in_function] + else if (match(label, /^[a-zA-Z_][a-zA-Z0-9_]*$/)) + cfa_offset[label] = cfa_offset[in_function] + } } { - line_number = line_number + 1 + ++line_number - # clean the input up before doing anything else - # delete comments - gsub(/(#|\/\/).*/, "") + # Clean the input up before doing anything else. + # Delete comments. + gsub(/#.*|\/\*.*\*\//, "") - # canonicalize whitespace - gsub(/[ \t]+/, " ") # mawk doesn't understand \s + # Canonicalize whitespace. + gsub(/[ \t]+/, " ") # Mawk doesn't understand \s. gsub(/ *, */, ",") gsub(/ *: */, ": ") gsub(/ $/, "") gsub(/^ /, "") } -# check for assembler directives which we care about +# Check for assembler directives which we care about. /^\.(section|data|text)/ { - # a .cfi_startproc/.cfi_endproc pair should be within the same section - # otherwise, clang will choke when generating ELF output + # A .cfi_startproc/.cfi_endproc pair should be within the same section. + # Otherwise, clang will choke when generating ELF output. if (in_function) { print ".cfi_endproc" - in_function = 0 + in_function = "" } } -/^\.type [a-zA-Z0-9_]+,@function/ { - functions[substr($2, 1, length($2)-10)] = 1 + +# Record each function name. +/^\.type [a-zA-Z0-9_]+( STT_FUNCTION|,[#@%"]function)/ { + functions[substr($2, 1, length($2) - 10)] = 1 } -# not interested in assembler directives beyond this, just pass them through +# Not interested in assembler directives beyond this, just pass them through. /^\./ { print next } -/^[a-zA-Z0-9_]+:/ { - label = substr($1, 1, length($1)-1) # drop trailing : +$0 ~ "^" re_label ":" { + # Parse each leading label. + while (match($0, "^" re_label ":")) { - if (called == label) { - # note adjustment of stack pointer from "call label; label:" - adjust_sp_offset(8) - } + # Extract label name. + label = substr($1, 1, RLENGTH - 1) - if (functions[label]) { - if (in_function) - print ".cfi_endproc" + # Remove label from current line. + sub("^" re_label ": ?", "") - in_function = 1 - print ".cfi_startproc" + if (label in functions) { + if (in_function) { + print ".cfi_endproc" + for (l in called) + delete called[l] + } - for (register in saved) - delete saved[register] - for (register in dirty) - delete dirty[register] - } + in_function = label + print ".cfi_startproc" - # an instruction may follow on the same line, so continue processing + for (reg in saved) + delete saved[reg] + for (reg in dirty) + delete dirty[reg] + } + + printf "%s:\n", label + + # If this label has been jumped to, define the CFA offset to its + # value at the location of the jump. + if (!(label in functions) && in_function && label in cfa_offset) { + if (cfa_offset[in_function] != cfa_offset[label]) { + printf ".cfi_def_cfa_offset %d\n", cfa_offset[label] + cfa_offset[in_function] = cfa_offset[label] + } + delete cfa_offset[label] + } + + # If this label has been called, adjust CFA offset. + if (label in called && !(label in functions)) { + adjust_sp_offset(8); + delete called[label] + } + } + # An instruction may follow on the same line, so continue processing. } +# Skip empty line. /^$/ { next } +# Issue source line number. { - called = "" printf ".loc 1 %d\n", line_number print } # KEEPING UP WITH THE STACK POINTER -# %rsp should only be adjusted by pushing/popping or adding/subtracting constants +# %rsp should only be adjusted by pushing/popping or adding/subtracting +# constants. # -/pushl?/ { +/^push[wq]? / { + if ($1 == "pushw" || match($2, /^%([abcd]x|di|si|bp|sp)$/)) + adjust_sp_offset(2) + else + adjust_sp_offset(8) +} + +/^pop[wq]? / { + if ($1 == "popw" || match($2, /^%([abcd]x|di|si|bp|sp)$/)) + adjust_sp_offset(-2) + else + adjust_sp_offset(-8) +} + +/^pushfq?$/ { adjust_sp_offset(8) } -/popl?/ { + +/^pushfw$/ { + adjust_sp_offset(2) +} + +/^popfq?$/ { adjust_sp_offset(-8) } -/addl? \$-?(0x[0-9a-fA-F]+|[0-9]+),%rsp/ { adjust_sp_offset(-get_const1()) } -/subl? \$-?(0x[0-9a-fA-F]+|[0-9]+),%rsp/ { adjust_sp_offset(get_const1()) } -/call/ { - if (match($0, /call [0-9]+f/)) # "forward" label - called = substr($0, RSTART+5, RLENGTH-6) - else if (match($0, /call [0-9a-zA-Z_]+/)) - called = substr($0, RSTART+5, RLENGTH-5) +/^popfw$/ { + adjust_sp_offset(-2) +} + +/^addq? \$[+-]?(0x[0-9a-fA-F]+|[0-9]+),%rsp$/ { + adjust_sp_offset(-get_const1()) +} +/^subq? \$[+-]?(0x[0-9a-fA-F]+|[0-9]+),%rsp$/ { + adjust_sp_offset(get_const1()) +} + +/^call / { + if (match($2, /^[0-9]+f$/)) # "forward" label + called[substr($2, 1, RLENGTH - 1)] = 1 + else if (match($2, /^[a-zA-Z_][0-9a-zA-Z_]*$/)) + called[$2] = 1 +} + +/^j/ { + jump_to_label($2) } # TRACKING REGISTER VALUES FROM THE PREVIOUS STACK FRAME # -/pushl? %r(ax|bx|cx|dx|si|di|bp|8|9|10|11|12|13|14|15)/ { # don't match "push (%reg)" - # if a register is being pushed, and its value has not changed since the +/^pushq? %r([abcd]x|si|di|bp|[89]|1[0-5])$/ { + # Don't match "push (%reg)". + # If a register is being pushed, and its value has not changed since the # beginning of this function, the pushed value can be used when printing - # local variables at the next level up the stack - # emit '.cfi_rel_offset' for that + # local variables at the next level up the stack. + # Emit '.cfi_rel_offset' for that. if (in_function) { - register = get_reg() - if (!saved[register] && !dirty[register]) { - printf ".cfi_rel_offset %s,0\n", register - saved[register] = 1 + reg = get_reg() + if (!(reg in saved) && !(reg in dirty)) { + printf ".cfi_rel_offset %s,0\n", reg + saved[reg] = 1 } } } -/movl? %r(ax|bx|cx|dx|si|di|bp|8|9|10|11|12|13|14|15),-?(0x[0-9a-fA-F]+|[0-9]+)?\(%rsp\)/ { +/^movq? %r([abcd]x|si|di|bp|[89]|1[0-5]),[+-]?(0x[0-9a-fA-F]+|[0-9]+)?\(%rsp\)$/ { if (in_function) { - register = get_reg() - if (match($0, /-?(0x[0-9a-fA-F]+|[0-9]+)\(%rsp\)/)) { - offset = parse_const(substr($0, RSTART, RLENGTH-6)) + if (match($2, /,[+-]?(0x[0-9a-fA-F]+|[0-9]+)\(%rsp\)$/)) { + offset = parse_const(substr($2, RSTART, RLENGTH - 7)) } else { offset = 0 } - if (!saved[register] && !dirty[register]) { - printf ".cfi_rel_offset %s,%d\n", register, offset - saved[register] = 1 + reg = get_reg1() + if (!(reg in saved) && !(reg in dirty)) { + printf ".cfi_rel_offset %s,%d\n", reg, offset + saved[reg] = 1 } } } @@ -171,24 +248,41 @@ function adjust_sp_offset(delta) { # IF REGISTER VALUES ARE UNCEREMONIOUSLY TRASHED # ...then we want to know about it. # -function trashed(register) { - if (in_function && !saved[register] && !dirty[register]) { - printf ".cfi_undefined %s\n", register +function trashed(reg) { + if (in_function && !(reg in saved) && !(reg in dirty)) { + printf ".cfi_undefined %s\n", reg } - dirty[register] = 1 + dirty[reg] = 1 +} +# This does NOT exhaustively check for all possible instructions which could +# overwrite a register value inherited from the caller (just the common ones). +/^mov.*,%[er]?([abcd][xlh]|si|di|bp|[89]|1[0-5])$/ { + trashed(get_reg2()) } -# this does NOT exhaustively check for all possible instructions which could -# overwrite a register value inherited from the caller (just the common ones) -/mov.*,%[er]?([abcd][xlh]|si|di|bp|8|9|10|11|12|13|14|15)$/ { trashed(get_reg2()) } -/(add|addl|sub|subl|and|or|xor|lea|sal|sar|shl|shr).*,%[er]?([abcd][xlh]|si|di|bp|8|9|10|11|12|13|14|15)$/ { +/^(add|sub|and|x?or|lea|s[ah][lr])[bwlq]? [^,]+,%[er]?([abcd][xlh]|si|di|bp|[89]|1[0-5])$/ { trashed(get_reg2()) } -/^i?mul [^,]*$/ { trashed("rax"); trashed("rdx") } -/^i?mul.*,%[er]?([abcd][xlh]|si|di|bp|8|9|10|11|12|13|14|15)$/ { trashed(get_reg2()) } -/^i?div/ { trashed("rax"); trashed("rdx") } +/^i?mul[bwlq]? [^,]+$/ { + trashed("rax") + trashed("rdx") +} +/^i?mul[bwlq] [^,]+,%[er]?([abcd][xlh]|si|di|bp|[89]|1[0-5])$/ { + trashed(get_reg2()) +} +/^i?div[bwlq]? / { + trashed("rax") + trashed("rdx") +} -/(dec|inc|not|neg|pop) %[er]?([abcd][xlh]|si|di|bp|8|9|10|11|12|13|14|15)/ { trashed(get_reg()) } -/cpuid/ { trashed("rax"); trashed("rbx"); trashed("rcx"); trashed("rdx") } +/^(dec|inc|not|neg|pop)[bwlq]? %[er]?([abcd][xlh]|si|di|bp|[89]|1[0-5])$/ { + trashed(get_reg()) +} +/^cpuid$/ { + trashed("rax") + trashed("rbx") + trashed("rcx") + trashed("rdx") +} END { if (in_function) -- 2.45.2
Powered by blists - more mailing lists
Confused about mailing lists and their use? Read about mailing lists on Wikipedia and check out these guidelines on proper formatting of your messages.