From: Eric Pouech eric.pouech@gmail.com
This improves correctness of functions like SymFromAddr() when searching local variables. Getting rid for dwarf2_read_range().
Signed-off-by: Eric Pouech eric.pouech@gmail.com --- dlls/dbghelp/dbghelp_private.h | 117 ++++++++++++++++++--------------- dlls/dbghelp/dwarf.c | 115 ++++++++++---------------------- dlls/dbghelp/msc.c | 12 ++-- dlls/dbghelp/stabs.c | 7 +- dlls/dbghelp/symbol.c | 27 ++++---- dlls/dbghelp/type.c | 12 +++- 6 files changed, 136 insertions(+), 154 deletions(-)
diff --git a/dlls/dbghelp/dbghelp_private.h b/dlls/dbghelp/dbghelp_private.h index b6a7df10061..a5ea41ae014 100644 --- a/dlls/dbghelp/dbghelp_private.h +++ b/dlls/dbghelp/dbghelp_private.h @@ -116,6 +116,25 @@ extern unsigned dbghelp_options DECLSPEC_HIDDEN; extern BOOL dbghelp_opt_native DECLSPEC_HIDDEN; extern SYSTEM_INFO sysinfo DECLSPEC_HIDDEN;
+/* FIXME: this could be optimized later on by using relative offsets and smaller integral sizes */ +struct addr_range +{ + DWORD64 low; /* absolute address of first byte of the range */ + DWORD64 high; /* absolute address of first byte after the range */ +}; + +/* tests whether ar2 is inside ar1 */ +static inline BOOL addr_range_inside(const struct addr_range* ar1, const struct addr_range* ar2) +{ + return ar1->low <= ar2->low && ar2->high <= ar1->high; +} + +/* tests whether ar1 and ar2 are disjoint */ +static inline BOOL addr_range_disjoint(const struct addr_range* ar1, const struct addr_range* ar2) +{ + return ar1->high <= ar2->low || ar2->high <= ar1->low; +} + enum location_kind {loc_error, /* reg is the error code */ loc_unavailable, /* location is not available */ loc_absolute, /* offset is the location */ @@ -160,10 +179,10 @@ static inline BOOL symt_check_tag(const struct symt* s, enum SymTagEnum tag) struct symt_block { struct symt symt; - ULONG_PTR address; - ULONG_PTR size; struct symt* container; /* block, or func */ struct vector vchildren; /* sub-blocks & local variables */ + unsigned num_ranges; + struct addr_range ranges[]; };
struct symt_module /* in fact any of .exe, .dll... */ @@ -216,36 +235,49 @@ struct symt_data } u; };
-/* We must take into account that most debug formats (dwarf and pdb) report for - * code (esp. inlined functions) inside functions the following way: - * - block - * + is represented by a contiguous area of memory, - * or at least have lo/hi addresses to encompass it's contents - * + but most importantly, block A's lo/hi range is always embedded within - * its parent (block or function) - * - inline site: - * + is most of the times represented by a set of ranges (instead of a - * contiguous block) - * + native dbghelp only exports the start address, not its size - * + the set of ranges isn't always embedded in enclosing block (if any) - * + the set of ranges is always embedded in top function - * - (top) function - * + is described as a contiguous block of memory +/* Understanding functions internal storage: + * - functions, inline sites and blocks can be described as spreading across + * several chunks of memory (hence describing potentially a non contiguous + * memory space). + * - this is described internally as an array of address ranges + * (struct addr_range) + * + * - there's a hierarchical (aka lexical) relationship: + * + function's parent is a compiland or the module itself + * + inline site's parent is either a function or another inline site + * + block's parent is either a function, an inline site or another block. * - * On top of the items above (taken as assumptions), we also assume that: - * - a range in inline site A, is disjoint from all the other ranges in - * inline site A - * - a range in inline site A, is either disjoint or embedded into any of - * the ranges of inline sites parent of A + * - internal storage rely on the following assumptions: + * + in an array of address ranges, one address range doesn't overlap over + * one of its siblings + * + each address range of a block is inside a single range of its lexical + * parent (and outside of the others since they don't overlap) + * + each address range of an inline site is inside a single range its + * lexical parent + * + a function (as of today) is only represented by a single address range + * (A). * - * Therefore, we also store all inline sites inside a function: - * - available as a linked list to simplify the walk among them - * - this linked list shall preserve the weak order of the lexical-parent - * relationship (eg for any inline site A, which has inline site B - * as lexical parent, then A is present before B in the linked list) - * - hence (from the assumptions above), when looking up which inline site - * contains a given address, the first range containing that address found - * while walking the list of inline sites is the right one. + * - all inline sites of a function are stored in a linked list: + * + this linked list shall preserve the weak order of the lexical-parent + * relationship (eg for any inline site A, which has inline site B + * as lexical parent, A must appear before B in the linked list) + * + * - lookup: + * + when looking up which inline site contains a given address, the first + * range containing that address found while walking the list of inline + * sites is the right one. + * + when lookup up which inner-most block contains an address, descend the + * blocks tree with branching on the block (if any) which contains the given + * address in one of its ranges + * + * Notes: + * (A): shall evolve but storage is native is awkward: from PGO testing, the + * top function is stored with its first range of address; all the others + * are stored as blocks, children of compiland, but which lexical parent + * is the top function. This breaks the natural assumption that + * children <> lexical parent is symetrical. + * (B): see dwarf.c for some gory discrepancies between native & builtin + * DbgHelp. */
struct symt_function @@ -261,25 +293,6 @@ struct symt_function struct symt_inlinesite* next_inlinesite;/* linked list of inline sites in this function */ };
-/* FIXME: this could be optimized later on by using relative offsets and smaller integral sizes */ -struct addr_range -{ - DWORD64 low; /* absolute address of first byte of the range */ - DWORD64 high; /* absolute address of first byte after the range */ -}; - -/* tests whether ar2 is inside ar1 */ -static inline BOOL addr_range_inside(const struct addr_range* ar1, const struct addr_range* ar2) -{ - return ar1->low <= ar2->low && ar2->high <= ar1->high; -} - -/* tests whether ar1 and ar2 are disjoint */ -static inline BOOL addr_range_disjoint(const struct addr_range* ar1, const struct addr_range* ar2) -{ - return ar1->high <= ar2->low || ar2->high <= ar1->low; -} - /* a symt_inlinesite* can be casted to a symt_function* to access all function bits */ struct symt_inlinesite { @@ -857,10 +870,10 @@ extern struct symt_data* struct symt_function* func, struct symt_block* block, struct symt* type, const char* name, VARIANT* v) DECLSPEC_HIDDEN; extern struct symt_block* - symt_open_func_block(struct module* module, + symt_open_func_block(struct module* module, struct symt_function* func, - struct symt_block* block, - unsigned pc, unsigned len) DECLSPEC_HIDDEN; + struct symt_block* block, + unsigned num_ranges) DECLSPEC_HIDDEN; extern struct symt_block* symt_close_func_block(struct module* module, const struct symt_function* func, diff --git a/dlls/dbghelp/dwarf.c b/dlls/dbghelp/dwarf.c index 03e476487cc..8d365ba88f1 100644 --- a/dlls/dbghelp/dwarf.c +++ b/dlls/dbghelp/dwarf.c @@ -1206,80 +1206,6 @@ static const char* dwarf2_get_cpp_name(dwarf2_debug_info_t* di, const char* name return last; }
-/****************************************************************** - * dwarf2_read_range - * - * read a range for a given debug_info (either using AT_range attribute, in which - * case we don't return all the details, or using AT_low_pc & AT_high_pc attributes) - * in all cases, range is relative to beginning of compilation unit - */ -static BOOL dwarf2_read_range(dwarf2_parse_context_t* ctx, const dwarf2_debug_info_t* di, - ULONG_PTR* plow, ULONG_PTR* phigh) -{ - struct attribute range; - - if (dwarf2_find_attribute(di, DW_AT_ranges, &range)) - { - dwarf2_traverse_context_t traverse; - ULONG_PTR low, high; - const ULONG_PTR UMAX = ~(ULONG_PTR)0u; - - traverse.data = ctx->module_ctx->sections[section_ranges].address + range.u.uvalue; - traverse.end_data = ctx->module_ctx->sections[section_ranges].address + - ctx->module_ctx->sections[section_ranges].size; - - *plow = UMAX; - *phigh = 0; - while (traverse.data + 2 * ctx->head.word_size < traverse.end_data) - { - low = dwarf2_parse_addr_head(&traverse, &ctx->head); - high = dwarf2_parse_addr_head(&traverse, &ctx->head); - if (low == 0 && high == 0) break; - if (low == (ctx->head.word_size == 8 ? (~(DWORD64)0u) : (DWORD64)(~0u))) - FIXME("unsupported yet (base address selection)\n"); - /* range values are relative to start of compilation unit */ - low += ctx->compiland->address - ctx->module_ctx->load_offset; - high += ctx->compiland->address - ctx->module_ctx->load_offset; - if (low < *plow) *plow = low; - if (high > *phigh) *phigh = high; - } - if (*plow == UMAX || *phigh == 0) {WARN("no entry found\n"); return FALSE;} - if (*plow == *phigh) {WARN("entry found, but low=high %Ix %Ix\n", low, high); return FALSE;} - - return TRUE; - } - else - { - struct attribute low_pc; - struct attribute high_pc; - - if (!dwarf2_find_attribute(di, DW_AT_low_pc, &low_pc) || - !dwarf2_find_attribute(di, DW_AT_high_pc, &high_pc)) - return FALSE; - *plow = low_pc.u.uvalue; - *phigh = high_pc.u.uvalue; - if (ctx->head.version >= 4) - switch (high_pc.form) - { - case DW_FORM_addr: - break; - case DW_FORM_data1: - case DW_FORM_data2: - case DW_FORM_data4: - case DW_FORM_data8: - case DW_FORM_sdata: - case DW_FORM_udata: - /* From dwarf4 on, when FORM's class is constant, high_pc is an offset from low_pc */ - *phigh += *plow; - break; - default: - FIXME("Unsupported class for high_pc\n"); - break; - } - return TRUE; - } -} - static unsigned dwarf2_get_num_ranges(const dwarf2_debug_info_t* di) { struct attribute range; @@ -2260,22 +2186,51 @@ static void dwarf2_parse_inlined_subroutine(dwarf2_subprogram_t* subpgm, static void dwarf2_parse_subprogram_block(dwarf2_subprogram_t* subpgm, dwarf2_debug_info_t* di) { - ULONG_PTR low_pc, high_pc; + unsigned int num_ranges; struct vector* children; dwarf2_debug_info_t*child; unsigned int i;
TRACE("%s\n", dwarf2_debug_di(di));
- if (!dwarf2_read_range(subpgm->ctx, di, &low_pc, &high_pc)) + num_ranges = dwarf2_get_num_ranges(di); + if (!num_ranges) { - WARN("no range\n"); + WARN("no ranges\n"); return; }
- subpgm->current_block = symt_open_func_block(subpgm->ctx->module_ctx->module, subpgm->current_func, subpgm->current_block, - subpgm->ctx->module_ctx->load_offset + low_pc - subpgm->current_func->address, - high_pc - low_pc); + /* Dwarf tends to keep the structure of the C/C++ program, and emits DW_TAG_lexical_block + * for every block in source program. + * With inlining and other optimizations, code for a block no longer lies in a contiguous + * chunk of memory. It's hence described with (potentially) multiple chunks of memory. + * Then each variable of each block is attached to its block. (A) + * + * PDB on the other hand no longer emits block information, and merge variable information + * at function level (actually function and each inline site). + * For example, if several variables of same name exist in different blocks of a function, + * only one entry for that name will exist. The information stored in (A) will point + * to the correct instance as defined by C/C++ scoping rules. + * + * (A) in all cases, there is information telling for each address of the function if a + * variable is accessible, and if so, how to get its value. + * + * DbgHelp only exposes a contiguous chunk of memory for a block. + * + * => Store internaly all the ranges of addresses in a block, but only expose the size + * of the first chunk of memory. + * Otherwise, it would break the rule: blocks' chunks don't overlap. + * Note: This could mislead some programs using the blocks' address and size information. + * That's very unlikely to happen (most will use the APIs from DbgHelp, which will + * hide this information to the caller). + */ + subpgm->current_block = symt_open_func_block(subpgm->ctx->module_ctx->module, subpgm->current_func, + subpgm->current_block, num_ranges); + if (!dwarf2_fill_ranges(di, subpgm->current_block->ranges, num_ranges)) + { + FIXME("Unexpected situation\n"); + subpgm->current_block->num_ranges = 0; + }
children = dwarf2_get_di_children(di); if (children) for (i = 0; i < vector_length(children); i++) diff --git a/dlls/dbghelp/msc.c b/dlls/dbghelp/msc.c index 94f0c6af9ee..872b0e70b58 100644 --- a/dlls/dbghelp/msc.c +++ b/dlls/dbghelp/msc.c @@ -2459,14 +2459,14 @@ static BOOL codeview_snarf(const struct msc_debug_info* msc_dbg, break;
case S_BLOCK32_ST: - block = symt_open_func_block(msc_dbg->module, curr_func, block, - codeview_get_address(msc_dbg, sym->block_v1.segment, sym->block_v1.offset) - curr_func->address, - sym->block_v1.length); + block = symt_open_func_block(msc_dbg->module, curr_func, block, 1); + block->ranges[0].low = codeview_get_address(msc_dbg, sym->block_v1.segment, sym->block_v1.offset); + block->ranges[0].high = block->ranges[0].low + sym->block_v1.length; break; case S_BLOCK32: - block = symt_open_func_block(msc_dbg->module, curr_func, block, - codeview_get_address(msc_dbg, sym->block_v3.segment, sym->block_v3.offset) - curr_func->address, - sym->block_v3.length); + block = symt_open_func_block(msc_dbg->module, curr_func, block, 1); + block->ranges[0].low = codeview_get_address(msc_dbg, sym->block_v3.segment, sym->block_v3.offset); + block->ranges[0].high = block->ranges[0].low + sym->block_v3.length; break;
case S_END: diff --git a/dlls/dbghelp/stabs.c b/dlls/dbghelp/stabs.c index e1945b41d66..5969780fe35 100644 --- a/dlls/dbghelp/stabs.c +++ b/dlls/dbghelp/stabs.c @@ -1374,15 +1374,16 @@ BOOL stabs_parse(struct module* module, ULONG_PTR load_offset, case N_LBRAC: if (curr_func) { - block = symt_open_func_block(module, curr_func, block, - n_value, 0); + block = symt_open_func_block(module, curr_func, block, 1); + block->ranges[0].low = curr_func->address + n_value; + block->ranges[0].high = 0; /* will be set by N_RBRAC */ pending_flush(&pending_block, module, curr_func, block); } break; case N_RBRAC: if (curr_func) { - block->size = curr_func->address + n_value - block->address; + block->ranges[0].high = curr_func->address + n_value; block = symt_close_func_block(module, curr_func, block); } break; diff --git a/dlls/dbghelp/symbol.c b/dlls/dbghelp/symbol.c index 9d248630393..6e6c35e9b98 100644 --- a/dlls/dbghelp/symbol.c +++ b/dlls/dbghelp/symbol.c @@ -523,21 +523,21 @@ struct symt_data* symt_add_func_constant(struct module* module, return locsym; }
-struct symt_block* symt_open_func_block(struct module* module, +struct symt_block* symt_open_func_block(struct module* module, struct symt_function* func, - struct symt_block* parent_block, - unsigned pc, unsigned len) + struct symt_block* parent_block, + unsigned num_ranges) { struct symt_block* block; struct symt** p;
assert(symt_check_tag(&func->symt, SymTagFunction) || symt_check_tag(&func->symt, SymTagInlineSite)); - + assert(num_ranges > 0); assert(!parent_block || parent_block->symt.tag == SymTagBlock); - block = pool_alloc(&module->pool, sizeof(*block)); + + block = pool_alloc(&module->pool, sizeof(*block) + num_ranges * sizeof(block->ranges[0])); block->symt.tag = SymTagBlock; - block->address = func->address + pc; - block->size = len; + block->num_ranges = num_ranges; block->container = parent_block ? &parent_block->symt : &func->symt; vector_init(&block->vchildren, sizeof(struct symt*), 4); if (parent_block) @@ -1131,10 +1131,15 @@ static BOOL symt_enum_locals_helper(struct module_pair* pair, case SymTagBlock: { struct symt_block* block = (struct symt_block*)lsym; - if (pc < block->address || block->address + block->size <= pc) - continue; - if (!symt_enum_locals_helper(pair, match, se, func, &block->vchildren)) - return FALSE; + unsigned j; + for (j = 0; j < block->num_ranges; j++) + { + if (pc >= block->ranges[j].low && pc < block->ranges[j].high) + { + if (!symt_enum_locals_helper(pair, match, se, func, &block->vchildren)) + return FALSE; + } + } } break; case SymTagData: diff --git a/dlls/dbghelp/type.c b/dlls/dbghelp/type.c index b39cd86248a..ad2b304eebe 100644 --- a/dlls/dbghelp/type.c +++ b/dlls/dbghelp/type.c @@ -150,7 +150,7 @@ BOOL symt_get_address(const struct symt* type, ULONG64* addr) } break; case SymTagBlock: - *addr = ((const struct symt_block*)type)->address; + *addr = ((const struct symt_block*)type)->ranges[0].low; break; case SymTagFunction: *addr = ((const struct symt_function*)type)->address; @@ -819,7 +819,15 @@ BOOL symt_get_info(struct module* module, const struct symt* type, X(DWORD64) = ((const struct symt_function*)type)->size; break; case SymTagBlock: - X(DWORD64) = ((const struct symt_block*)type)->size; + /* When there are several ranges available, we can only return one contiguous chunk of memory. + * We return only the first range. It will lead to not report the other ranges + * being part of the block, but it will not return gaps (between the ranges) as being + * part of the block. + * So favor a correct (yet incomplete) value than an incorrect one. + */ + if (((const struct symt_block*)type)->num_ranges > 1) + WARN("Only returning first range from multiple ranges\n"); + X(DWORD64) = ((const struct symt_block*)type)->ranges[0].high - ((const struct symt_block*)type)->ranges[0].low; break; case SymTagPointerType: X(DWORD64) = ((const struct symt_pointer*)type)->size;