Follow @Openwall on Twitter for new release announcements and other news
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <20240729081606.GW3766212@port70.net>
Date: Mon, 29 Jul 2024 10:16:06 +0200
From: Szabolcs Nagy <nsz@...t70.net>
To: Howard Su <howard0su@...il.com>
Cc: Rich Felker <dalias@...c.org>, Thorsten Glaser <tg@...bsd.de>,
	musl@...ts.openwall.com
Subject: Re: [Suggestion] Add cfi directives to assembly code via
 chatgpt

* Howard Su <howard0su@...il.com> [2024-07-29 08:19:42 +0800]:
> Per suggestion, I added awk script to do so. Please help review. also I
> noticed that stack unwind is still not functional.

you mean you built musl with --enable-debug and a backtrace in
gdb is missing libc symbols? you should try to figure out what
is missing there.. (did you check the annotated asm? did you
previously managed to get unwind work with manual annotation?)

note that within the process unwinding won't work at runtime,
(at least across asm code) arm has a different unwind abi that
does not use dwarf information from eh_frame, and that would
require different annotation.


> 
> On Tue, Jul 23, 2024 at 5:39 AM Szabolcs Nagy <nsz@...t70.net> wrote:
> 
> > * Howard Su <howard0su@...il.com> [2024-07-22 14:39:35 +0800]:
> > > fair enough. Let's put aside that LLM writing the CFI directives. I do
> > have
> > > knowledge about it to hand write it.
> > >
> > > Do you think it is a good idea to put effort into adding CFI to those
> > > assembly code, especially the thread_cp part, which blocks my debug.
> >
> > note that there is now support in gas to synthetize the CFI for
> > manually written x86 asm code with --scfi=experimental
> > https://sourceware.org/binutils/docs/as.html
> >
> > the aarch64 support just got committed recently
> >
> > https://sourceware.org/git/?p=binutils-gdb.git;a=commit;h=29085f7243e415db0e1617173eb80c441671ba0e
> >
> > there is no support for arm yet.
> >
> > meanwhile musl has tools/add-cfi*.awk that you could extend to
> > support arm, this is simpler than adding the gas support as it
> > only has to work for musl code.
> >
> >
> 
> -- 
> -Howard

> From fbe4007ac7e6728b6226ca52ccbc5b77571bcece Mon Sep 17 00:00:00 2001
> From: Howard Su <howard0su@...il.com>
> Date: Mon, 29 Jul 2024 08:17:31 +0800
> Subject: [PATCH] Add awk script to add CFI directives to arm
> 
> ---
>  crt/arm/crti.s        |   2 -
>  tools/add-cfi.arm.awk | 220 ++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 220 insertions(+), 2 deletions(-)
>  create mode 100644 tools/add-cfi.arm.awk
> 
> diff --git a/crt/arm/crti.s b/crt/arm/crti.s
> index 18dc1e41..acab83c9 100644
> --- a/crt/arm/crti.s
> +++ b/crt/arm/crti.s
> @@ -2,12 +2,10 @@
>  
>  .section .init
>  .global _init
> -.type _init,%function
>  _init:
>  	push {r0,lr}
>  
>  .section .fini
>  .global _fini
> -.type _fini,%function
>  _fini:
>  	push {r0,lr}
> diff --git a/tools/add-cfi.arm.awk b/tools/add-cfi.arm.awk
> new file mode 100644
> index 00000000..93b94768
> --- /dev/null
> +++ b/tools/add-cfi.arm.awk
> @@ -0,0 +1,220 @@
> +# Insert GAS CFI directives ("control frame information") into arm asm input
> +#
> +# CFI directives tell the assembler how to generate "stack frame" debug info
> +# This information can tell a debugger (like gdb) how to find the current stack
> +#   frame at any point in the program code, and how to find the values which
> +#   various registers had at higher points in the call stack
> +# With this information, the debugger can show a backtrace, and you can move up
> +#   and down the call stack and examine the values of local variables
> +
> +BEGIN {
> +  # don't put CFI data in the .eh_frame ELF section (which we don't keep)
> +  print ".cfi_sections .debug_frame"
> +
> +  # only emit CFI directives inside a function
> +  in_function = 0
> +
> +  # emit .loc directives with line numbers from original source
> +  printf ".file 1 \"%s\"\n", ARGV[1]
> +  line_number = 0
> +}
> +
> +function get_const1() {
> +  # for instructions with 2 operands, get 1st operand (assuming it is constant)
> +  match($0, /#-?(0x[0-9a-fA-F]+|[0-9]+),/)
> +  return parse_const(substr($0, RSTART, RLENGTH-1))
> +}
> +
> +function get_reg() {
> +  # only use if you already know there is 1 and only 1 register
> +  match($0, /(r(1[0-5]|[0-9])|ip|sp|lr|pc)/)
> +  return substr($0, RSTART+1, RLENGTH-1)
> +}
> +
> +function get_reg1() {
> +  # only use if you already know there is 1 and only 1 register
> +  match($0, /(r(1[0-5]|[0-9])|ip|sp|lr|ip),/)
> +  return substr($0, RSTART, RLENGTH-1)
> +}
> +
> +function adjust_sp_offset(delta) {
> +  if (in_function)
> +    printf ".cfi_adjust_cfa_offset %d\n", delta
> +}
> +
> +{
> +  line_number = line_number + 1
> +
> +  # clean the input up before doing anything else
> +  # delete comments
> +  gsub(/(\/\/).*/, "")
> +
> +  # canonicalize whitespace
> +  gsub(/[ \t]+/, " ") # mawk doesn't understand \s
> +  gsub(/ *, */, ",")
> +  gsub(/ *: */, ": ")
> +  gsub(/ $/, "")
> +  gsub(/^ /, "")
> +}
> +
> +# check for assembler directives which we care about
> +/^\.(section|data|text)/ {
> +  # a .cfi_startproc/.cfi_endproc pair should be within the same section
> +  # otherwise, clang will choke when generating ELF output
> +  if (in_function) {
> +    print ".cfi_endproc"
> +    in_function = 0
> +  }
> +}
> +
> +/^\.type [a-zA-Z0-9_]+,%function/ {
> +  functions[substr($2, 1, length($2)-10)] = 1
> +}
> +
> +# not interested in assembler directives beyond this, just pass them through
> +/^\./ {
> +  print
> +  next
> +}
> +
> +/^[a-zA-Z0-9_]+:/ {
> +  label = substr($1, 1, length($1)-1) # drop trailing :
> +
> +  if (functions[label]) {
> +    if (in_function)
> +      print ".cfi_endproc"
> +
> +    in_function = 1
> +    print ".cfi_startproc"
> +
> +    for (register in saved)
> +      delete saved[register]
> +    for (register in dirty)
> +      delete dirty[register]
> +  }
> +
> +  # an instruction may follow on the same line, so continue processing
> +}
> +
> +/^$/ { next }
> +
> +# KEEPING UP WITH THE STACK POINTER
> +# sp should only be adjusted by pushing/popping or adding/subtracting constants
> +#
> +/pop \{[^\}]+\}/ {
> +    match($2, /\{((r(1[0-5]|[0-9])|ip|sp|lr|pc|,)+)\}/)
> +    registers = substr($2, RSTART + 1, RLENGTH-2)
> +
> +    split(registers, reg_array, ",")
> +    count = 0
> +    for (i in reg_array) {
> +        count++
> +    }
> +
> +    adjust_sp_offset(-4 * count)
> +}
> +
> +/ldmfd sp!,\{[^\}]+\}/ {
> +    match($2, /\{((r(1[0-5]|[0-9])|ip|sp|lr|pc|,)+)\}/)
> +    registers = substr($2, RSTART + 1, RLENGTH-2)
> +
> +    split(registers, reg_array, ",")
> +    count = 0
> +    for (i in reg_array) {
> +        count++
> +    }
> +
> +    adjust_sp_offset(-4 * count)
> +}
> +
> +/push \{[^\}]+\}/ {
> +    match($2, /\{((r(1[0-5]|[0-9])|ip|sp|lr|pc|,)+)\}/)
> +    registers = substr($2, RSTART + 1, RLENGTH-2)
> +
> +    split(registers, reg_array, ",")
> +    count = 0
> +    for (i in reg_array) {
> +        count++
> +    }
> +
> +    adjust_sp_offset(4 * count)
> +    offset = 4 * count
> +}
> +
> +/stmfd sp!,\{[^\}]+\}/ {
> +    match($2, /\{((r(1[0-5]|[0-9])|ip|sp|lr|pc|,)+)\}/)
> +    registers = substr($2, RSTART + 1, RLENGTH-2)
> +
> +    split(registers, reg_array, ",")
> +    count = 0
> +    for (i in reg_array) {
> +        count++
> +    }
> +
> +    adjust_sp_offset(4 * count)
> +    offset = 4 * count
> +}
> +
> +# TRACKING REGISTER VALUES FROM THE PREVIOUS STACK FRAME
> +#
> +/stmfd sp!,\{[^\}]+\}/ {
> +  if (in_function) {
> +    match($2, /\{((r(1[0-5]|[0-9])|ip|sp|lr|pc|,)+)\}/)
> +    registers = substr($2, RSTART + 1, RLENGTH-2)
> +
> +    split(registers, reg_array, ",")
> +    offset = -offset + 4
> +    for (i in reg_array) {
> +        register = reg_array[i]
> +        if (!saved[register] && !dirty[register]) {
> +            printf ".cfi_rel_offset %s, %d\n", register, offset
> +            saved[register] = 1
> +            offset += 4
> +        }
> +    }
> +  }
> +}
> +
> +/push \{[^\}]+\}/ {
> +  if (in_function) {
> +    match($2, /\{((r(1[0-5]|[0-9])|ip|sp|lr|pc|,)+)\}/)
> +    registers = substr($2, RSTART + 1, RLENGTH-2)
> +
> +    split(registers, reg_array, ",")
> +    offset = -offset + 4
> +    for (i in reg_array) {
> +        register = reg_array[i]
> +        if (!saved[register] && !dirty[register]) {
> +            printf ".cfi_rel_offset %s, %d\n", register, offset
> +            saved[register] = 1
> +            offset += 4
> +        }
> +    }
> +  }
> +}
> +
> +# IF REGISTER VALUES ARE UNCEREMONIOUSLY TRASHED
> +# ...then we want to know about it.
> +#
> +function trashed(register) {
> +  if (in_function && !saved[register] && !dirty[register]) {
> +    printf ".cfi_undefined %s\n", register
> +  }
> +  dirty[register] = 1
> +}
> +# this does NOT exhaustively check for all possible instructions which could
> +# overwrite a register value inherited from the caller (just the common ones)
> +/(ldr|adds|subs|ldr|ldrex|mov) (r(1[0-5]|[0-9])|ip|sp|lr|pc),/ {
> +    trashed(get_reg1())
> +}
> +
> +{
> +  called = ""
> +  printf ".loc 1 %d\n", line_number
> +  print
> +}
> +
> +END {
> +  if (in_function)
> +    print ".cfi_endproc"
> +}
> -- 
> 2.45.2
> 

Powered by blists - more mailing lists

Confused about mailing lists and their use? Read about mailing lists on Wikipedia and check out these guidelines on proper formatting of your messages.