binman: Support reading the offset of an ELF-file symbol

Binman needs to be able to update the contents of an ELF file after it has
been build. To support this, add a function to locate the position of a
symbol's contents within the file.

Fix the comments on bss_data.c and Symbol while we are here.

Signed-off-by: Simon Glass <sjg@chromium.org>
diff --git a/tools/binman/elf.py b/tools/binman/elf.py
index 03b49d7..4aca4f8 100644
--- a/tools/binman/elf.py
+++ b/tools/binman/elf.py
@@ -24,7 +24,14 @@
 except:  # pragma: no cover
     ELF_TOOLS = False
 
-Symbol = namedtuple('Symbol', ['section', 'address', 'size', 'weak'])
+# Information about an EFL symbol:
+# section (str): Name of the section containing this symbol
+# address (int): Address of the symbol (its value)
+# size (int): Size of the symbol in bytes
+# weak (bool): True if the symbol is weak
+# offset (int or None): Offset of the symbol's data in the ELF file, or None if
+#   not known
+Symbol = namedtuple('Symbol', ['section', 'address', 'size', 'weak', 'offset'])
 
 # Information about an ELF file:
 #    data: Extracted program contents of ELF file (this would be loaded by an
@@ -71,8 +78,48 @@
         section, size =  parts[:2]
         if len(parts) > 2:
             name = parts[2] if parts[2] != '.hidden' else parts[3]
-            syms[name] = Symbol(section, int(value, 16), int(size,16),
-                                flags[1] == 'w')
+            syms[name] = Symbol(section, int(value, 16), int(size, 16),
+                                flags[1] == 'w', None)
+
+    # Sort dict by address
+    return OrderedDict(sorted(syms.items(), key=lambda x: x[1].address))
+
+def GetSymbolFileOffset(fname, patterns):
+    """Get the symbols from an ELF file
+
+    Args:
+        fname: Filename of the ELF file to read
+        patterns: List of regex patterns to search for, each a string
+
+    Returns:
+        None, if the file does not exist, or Dict:
+          key: Name of symbol
+          value: Hex value of symbol
+    """
+    def _GetFileOffset(elf, addr):
+        for seg in elf.iter_segments():
+            seg_end = seg['p_vaddr'] + seg['p_filesz']
+            if seg.header['p_type'] == 'PT_LOAD':
+                if addr >= seg['p_vaddr'] and addr < seg_end:
+                    return addr - seg['p_vaddr'] + seg['p_offset']
+
+    if not ELF_TOOLS:
+        raise ValueError('Python elftools package is not available')
+
+    syms = {}
+    with open(fname, 'rb') as fd:
+        elf = ELFFile(fd)
+
+        re_syms = re.compile('|'.join(patterns))
+        for section in elf.iter_sections():
+            if isinstance(section, SymbolTableSection):
+                for symbol in section.iter_symbols():
+                    if not re_syms or re_syms.search(symbol.name):
+                        addr = symbol.entry['st_value']
+                        syms[symbol.name] = Symbol(
+                            section.name, addr, symbol.entry['st_size'],
+                            symbol.entry['st_info']['bind'] == 'STB_WEAK',
+                            _GetFileOffset(elf, addr))
 
     # Sort dict by address
     return OrderedDict(sorted(syms.items(), key=lambda x: x[1].address))
diff --git a/tools/binman/elf_test.py b/tools/binman/elf_test.py
index bcccd78..ac69a95 100644
--- a/tools/binman/elf_test.py
+++ b/tools/binman/elf_test.py
@@ -6,6 +6,7 @@
 
 import os
 import shutil
+import struct
 import sys
 import tempfile
 import unittest
@@ -221,6 +222,42 @@
                          elf.DecodeElf(data, load + 2))
         shutil.rmtree(outdir)
 
+    def testEmbedData(self):
+        """Test for the GetSymbolFileOffset() function"""
+        if not elf.ELF_TOOLS:
+            self.skipTest('Python elftools not available')
+
+        fname = self.ElfTestFile('embed_data')
+        offset = elf.GetSymbolFileOffset(fname, ['embed_start', 'embed_end'])
+        start = offset['embed_start'].offset
+        end = offset['embed_end'].offset
+        data = tools.ReadFile(fname)
+        embed_data = data[start:end]
+        expect = struct.pack('<III', 0x1234, 0x5678, 0)
+        self.assertEqual(expect, embed_data)
+
+    def testEmbedFail(self):
+        """Test calling GetSymbolFileOffset() without elftools"""
+        try:
+            old_val = elf.ELF_TOOLS
+            elf.ELF_TOOLS = False
+            fname = self.ElfTestFile('embed_data')
+            with self.assertRaises(ValueError) as e:
+                elf.GetSymbolFileOffset(fname, ['embed_start', 'embed_end'])
+            self.assertIn('Python elftools package is not available',
+                      str(e.exception))
+        finally:
+            elf.ELF_TOOLS = old_val
+
+    def testEmbedDataNoSym(self):
+        """Test for GetSymbolFileOffset() getting no symbols"""
+        if not elf.ELF_TOOLS:
+            self.skipTest('Python elftools not available')
+
+        fname = self.ElfTestFile('embed_data')
+        offset = elf.GetSymbolFileOffset(fname, ['missing_sym'])
+        self.assertEqual({}, offset)
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tools/binman/test/Makefile b/tools/binman/test/Makefile
index 0b19b7d..6e6cc6d 100644
--- a/tools/binman/test/Makefile
+++ b/tools/binman/test/Makefile
@@ -31,7 +31,7 @@
 
 TARGETS = u_boot_ucode_ptr u_boot_no_ucode_ptr bss_data \
 	u_boot_binman_syms u_boot_binman_syms.bin u_boot_binman_syms_bad \
-	u_boot_binman_syms_size u_boot_binman_syms_x86
+	u_boot_binman_syms_size u_boot_binman_syms_x86 embed_data
 
 all: $(TARGETS)
 
@@ -44,6 +44,9 @@
 bss_data: CFLAGS += $(SRC)bss_data.lds
 bss_data: bss_data.c
 
+embed_data: CFLAGS += $(SRC)embed_data.lds
+embed_data: embed_data.c
+
 u_boot_binman_syms.bin: u_boot_binman_syms
 	$(OBJCOPY) -O binary $< -R .note.gnu.build-id $@
 
diff --git a/tools/binman/test/bss_data.c b/tools/binman/test/bss_data.c
index 79537c3..4f9b64c 100644
--- a/tools/binman/test/bss_data.c
+++ b/tools/binman/test/bss_data.c
@@ -2,7 +2,7 @@
 /*
  * Copyright (c) 2016 Google, Inc
  *
- * Simple program to create a _dt_ucode_base_size symbol which can be read
+ * Simple program to create a bss_data region so the symbol can be read
  * by binutils. This is used by binman tests.
  */
 
diff --git a/tools/binman/test/embed_data.c b/tools/binman/test/embed_data.c
new file mode 100644
index 0000000..47d8c38
--- /dev/null
+++ b/tools/binman/test/embed_data.c
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright 2021 Google LLC
+ *
+ * Simple program including some embedded data that can be accessed by binman.
+ * This is used by binman tests.
+ */
+
+int first[10] = {1};
+int embed[3] __attribute__((section(".embed"))) = {0x1234, 0x5678};
+int second[10] = {1};
+
+int main(void)
+{
+	return 0;
+}
diff --git a/tools/binman/test/embed_data.lds b/tools/binman/test/embed_data.lds
new file mode 100644
index 0000000..908bf66
--- /dev/null
+++ b/tools/binman/test/embed_data.lds
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Copyright 2021 Google LLC
+ */
+
+OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386")
+OUTPUT_ARCH(i386)
+ENTRY(_start)
+
+SECTIONS
+{
+	_start = .;
+	__data_start = .;
+	.data :
+	{
+		. = ALIGN(32);
+		embed_start = .;
+		*(.embed*)
+		embed_end = .;
+		. = ALIGN(32);
+		*(.data*)
+	}
+}