diff --git a/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs b/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs index 9a9e8287e787f..c3470932516f8 100644 --- a/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs +++ b/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs @@ -152,7 +152,20 @@ fn build_pointer_or_reference_di_node<'ll, 'tcx>( cx.size_and_align_of(Ty::new_mut_ptr(cx.tcx, pointee_type)) ); - let pointee_type_di_node = type_di_node(cx, pointee_type); + let pointee_type_di_node = match pointee_type.kind() { + // `&[T]` will look like `{ data_ptr: *const T, length: usize }` + ty::Slice(element_type) => type_di_node(cx, *element_type), + // `&str` will look like `{ data_ptr: *const u8, length: usize }` + ty::Str => type_di_node(cx, cx.tcx.types.u8), + + // `&dyn K` will look like `{ pointer: _, vtable: _}` + // any Adt `Foo` containing an unsized type (eg `&[_]` or `&dyn _`) + // will look like `{ data_ptr: *const Foo, length: usize }` + // and thin pointers `&Foo` will just look like `*const Foo`. + // + // in all those cases, we just use the pointee_type + _ => type_di_node(cx, pointee_type), + }; return_if_di_node_created_in_meantime!(cx, unique_type_id); @@ -389,26 +402,11 @@ fn build_dyn_type_di_node<'ll, 'tcx>( } /// Create debuginfo for `[T]` and `str`. These are unsized. -/// -/// NOTE: We currently emit just emit the debuginfo for the element type here -/// (i.e. `T` for slices and `u8` for `str`), so that we end up with -/// `*const T` for the `data_ptr` field of the corresponding wide-pointer -/// debuginfo of `&[T]`. -/// -/// It would be preferable and more accurate if we emitted a DIArray of T -/// without an upper bound instead. That is, LLVM already supports emitting -/// debuginfo of arrays of unknown size. But GDB currently seems to end up -/// in an infinite loop when confronted with such a type. -/// -/// As a side effect of the current encoding every instance of a type like -/// `struct Foo { unsized_field: [u8] }` will look like -/// `struct Foo { unsized_field: u8 }` in debuginfo. If the length of the -/// slice is zero, then accessing `unsized_field` in the debugger would -/// result in an out-of-bounds access. fn build_slice_type_di_node<'ll, 'tcx>( cx: &CodegenCx<'ll, 'tcx>, slice_type: Ty<'tcx>, unique_type_id: UniqueTypeId<'tcx>, + span: Span, ) -> DINodeCreationResult<'ll> { let element_type = match slice_type.kind() { ty::Slice(element_type) => *element_type, @@ -423,7 +421,20 @@ fn build_slice_type_di_node<'ll, 'tcx>( let element_type_di_node = type_di_node(cx, element_type); return_if_di_node_created_in_meantime!(cx, unique_type_id); - DINodeCreationResult { di_node: element_type_di_node, already_stored_in_typemap: false } + let (size, align) = cx.spanned_size_and_align_of(slice_type, span); + let subrange = unsafe { llvm::LLVMDIBuilderGetOrCreateSubrange(DIB(cx), 0, -1) }; + let subscripts = &[subrange]; + let di_node = unsafe { + llvm::LLVMDIBuilderCreateArrayType( + DIB(cx), + size.bits(), + align.bits() as u32, + element_type_di_node, + subscripts.as_ptr(), + subscripts.len() as c_uint, + ) + }; + DINodeCreationResult { di_node, already_stored_in_typemap: false } } /// Get the debuginfo node for the given type. @@ -454,7 +465,7 @@ pub(crate) fn spanned_type_di_node<'ll, 'tcx>( } ty::Tuple(elements) if elements.is_empty() => build_basic_type_di_node(cx, t), ty::Array(..) => build_fixed_size_array_di_node(cx, unique_type_id, t, span), - ty::Slice(_) | ty::Str => build_slice_type_di_node(cx, t, unique_type_id), + ty::Slice(_) | ty::Str => build_slice_type_di_node(cx, t, unique_type_id, span), ty::Dynamic(..) => build_dyn_type_di_node(cx, t, unique_type_id), ty::Foreign(..) => build_foreign_type_di_node(cx, t, unique_type_id), ty::RawPtr(pointee_type, _) | ty::Ref(_, pointee_type, _) => { diff --git a/src/etc/gdb_lookup.py b/src/etc/gdb_lookup.py index c70944790d2b5..ae9696fa2ca92 100644 --- a/src/etc/gdb_lookup.py +++ b/src/etc/gdb_lookup.py @@ -103,6 +103,7 @@ def __call__(self, valobj): printer.add(RustType.StdString, StdStringProvider) printer.add(RustType.StdOsString, StdOsStringProvider) printer.add(RustType.StdStr, StdStrProvider) +printer.add(RustType.StdBoxStr, StdBoxStrProvider) printer.add(RustType.StdSlice, StdSliceProvider) printer.add(RustType.StdVec, StdVecProvider) printer.add(RustType.StdVecDeque, StdVecDequeProvider) diff --git a/src/etc/gdb_providers.py b/src/etc/gdb_providers.py index bd27998b37706..7d50de0d3050d 100644 --- a/src/etc/gdb_providers.py +++ b/src/etc/gdb_providers.py @@ -142,6 +142,20 @@ def display_hint(): return "array" +class StdBoxStrProvider(printer_base): + def __init__(self, valobj): + self._valobj = valobj + self._length = int(valobj["length"]) + self._data_ptr = valobj["data_ptr"] + + def to_string(self): + return self._data_ptr.lazy_string(encoding="utf-8", length=self._length) + + @staticmethod + def display_hint(): + return "string" + + class StdVecProvider(printer_base): def __init__(self, valobj): self._valobj = valobj @@ -203,6 +217,12 @@ def __init__(self, valobj, is_atomic=False): self._is_atomic = is_atomic self._ptr = unwrap_unique_or_non_null(valobj["ptr"]) self._value = self._ptr["data" if is_atomic else "value"] + # FIXME(shua): the debuginfo template type should be 'str' not 'u8' + if self._ptr.type.target().name == "alloc::rc::RcInner": + length = self._valobj["ptr"]["pointer"]["length"] + u8_ptr_ty = gdb.Type.pointer(gdb.lookup_type("u8")) + ptr = self._value.address.reinterpret_cast(u8_ptr_ty) + self._value = ptr.lazy_string(encoding="utf-8", length=length) self._strong = unwrap_scalar_wrappers(self._ptr["strong"]) self._weak = unwrap_scalar_wrappers(self._ptr["weak"]) - 1 diff --git a/src/etc/rust_types.py b/src/etc/rust_types.py index ca462654e44e6..1cc526a25604a 100644 --- a/src/etc/rust_types.py +++ b/src/etc/rust_types.py @@ -37,12 +37,14 @@ class RustType(Enum): StdNonZeroNumber = 29 StdPath = 30 StdPathBuf = 31 + StdBoxStr = 32 STD_STRING_REGEX = re.compile(r"^(alloc::([a-z_]+::)+)String$") STD_STR_REGEX = re.compile(r"^&(mut )?str$") STD_SLICE_REGEX = re.compile(r"^&(mut )?\[.+\]$") STD_OS_STRING_REGEX = re.compile(r"^(std::ffi::([a-z_]+::)+)OsString$") +STD_BOX_STR_REGEX = re.compile(r"^(alloc::([a-z_]+::)+)Box$") STD_VEC_REGEX = re.compile(r"^(alloc::([a-z_]+::)+)Vec<.+>$") STD_VEC_DEQUE_REGEX = re.compile(r"^(alloc::([a-z_]+::)+)VecDeque<.+>$") STD_BTREE_SET_REGEX = re.compile(r"^(alloc::([a-z_]+::)+)BTreeSet<.+>$") @@ -67,6 +69,7 @@ class RustType(Enum): RustType.StdString: STD_STRING_REGEX, RustType.StdOsString: STD_OS_STRING_REGEX, RustType.StdStr: STD_STR_REGEX, + RustType.StdBoxStr: STD_BOX_STR_REGEX, RustType.StdSlice: STD_SLICE_REGEX, RustType.StdVec: STD_VEC_REGEX, RustType.StdVecDeque: STD_VEC_DEQUE_REGEX, diff --git a/tests/codegen-llvm/debuginfo-unsize-field.rs b/tests/codegen-llvm/debuginfo-unsize-field.rs new file mode 100644 index 0000000000000..f82e8c4f6ae5e --- /dev/null +++ b/tests/codegen-llvm/debuginfo-unsize-field.rs @@ -0,0 +1,58 @@ +//@ compile-flags:-g -Copt-level=0 -C panic=abort + +// Check that debug information for structs with embedded str and [u8] slices is distinct from +// structs with embedded u8 + +#![crate_type = "lib"] + +// CHECK: ![[U8:[0-9]+]] = !DIBasicType(name: "u8", + +pub struct Foo { + a: u32, + b: str, +} +// CHECK: !DICompositeType(tag: DW_TAG_structure_type, name: "&{{[^"]+}}::Foo", {{.*}}elements: ![[FOO_REF_ELEMS:[0-9]+]] +// CHECK: ![[FOO_REF_ELEMS]] = !{![[FOO_REF_PTR:[0-9]+]], ![[FOO_REF_LEN:[0-9]+]]} +// CHECK: ![[FOO_REF_PTR]] = !DIDerivedType(tag: DW_TAG_member, name: "data_ptr", {{.*}}baseType: ![[FOO_PTR:[0-9]+]] +// CHECK: ![[FOO_PTR]] = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: ![[FOO:[0-9]+]] +// CHECK: ![[FOO]] = !DICompositeType(tag: DW_TAG_structure_type, name: "Foo", {{.*}}elements: ![[FOO_ELEMS:[0-9]+]] +// CHECK: ![[FOO_ELEMS]] = !{![[FOO_A:[0-9]+]], ![[FOO_B:[0-9]+]]} +// CHECK: ![[FOO_A]] = !DIDerivedType(tag: DW_TAG_member, name: "a" +// CHECK: ![[FOO_B]] = !DIDerivedType(tag: DW_TAG_member, name: "b", {{.*}}baseType: ![[U8_SLICE:[0-9]+]] +// +// CHECK: ![[U8_SLICE]] = !DICompositeType(tag: DW_TAG_array_type, baseType: ![[U8]], {{.*}}elements: ![[U8_SLICE_ELEMS:[0-9]+]] +// CHECK: ![[U8_SLICE_ELEMS]] = !{![[U8_SLICE_RANGE:[0-9]+]]} +// this is special to embedded slices, there is no upper bound on the number of elements, +// that info is stored in the length metadata for a reference to the parent struct +// CHECK: ![[U8_SLICE_RANGE]] = !DISubrange(count: -1, lowerBound: 0) +// +// CHECK: ![[FOO_REF_LEN]] = !DIDerivedType(tag: DW_TAG_member, name: "length", {{.*}}baseType: ![[USIZE:[0-9]+]] +// CHECK: ![[USIZE]] = !DIBasicType(name: "usize" +pub struct Bar { + a: u32, + b: [u8], +} +// CHECK: !DICompositeType(tag: DW_TAG_structure_type, name: "&{{[^"]+}}::Bar", {{.*}}elements: ![[BAR_REF_ELEMS:[0-9]+]] +// CHECK: ![[BAR_REF_ELEMS]] = !{![[BAR_REF_PTR:[0-9]+]], ![[BAR_REF_LEN:[0-9]+]]} +// CHECK: ![[BAR_REF_PTR]] = !DIDerivedType(tag: DW_TAG_member, name: "data_ptr", {{.*}}baseType: ![[BAR_PTR:[0-9]+]] +// CHECK: ![[BAR_PTR]] = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: ![[BAR:[0-9]+]] +// CHECK: ![[BAR]] = !DICompositeType(tag: DW_TAG_structure_type, name: "Bar", {{.*}}elements: ![[BAR_ELEMS:[0-9]+]] +// CHECK: ![[BAR_ELEMS]] = !{![[BAR_A:[0-9]+]], ![[BAR_B:[0-9]+]]} +// CHECK: ![[BAR_A]] = !DIDerivedType(tag: DW_TAG_member, name: "a" +// CHECK: ![[BAR_B]] = !DIDerivedType(tag: DW_TAG_member, name: "b", {{.*}}baseType: ![[U8_SLICE]] +// CHECK: ![[BAR_REF_LEN]] = !DIDerivedType(tag: DW_TAG_member, name: "length", {{.*}}baseType: ![[USIZE:[0-9]+]] +pub struct Baz { + a: u32, + b: u8, +} +// CHECK: !DIDerivedType(tag: DW_TAG_pointer_type, name: "&{{[^"]+}}::Baz", {{.*}}baseType: ![[BAZ:[0-9]+]] +// CHECK: ![[BAZ]] = !DICompositeType(tag: DW_TAG_structure_type, name: "Baz", {{.*}}elements: ![[BAZ_ELEMS:[0-9]+]] +// CHECK: ![[BAZ_ELEMS]] = !{![[BAZ_A:[0-9]+]], ![[BAZ_B:[0-9]+]]} +// CHECK: ![[BAZ_A]] = !DIDerivedType(tag: DW_TAG_member, name: "a" +// CHECK: ![[BAZ_B]] = !DIDerivedType(tag: DW_TAG_member, name: "b", {{.*}}baseType: ![[U8]] + +#[no_mangle] +pub fn test<'a>(a: &'a Foo, b: &'a Bar, c: &'a Baz) -> &'a u8 { + // just use this somehow so the debuginfo isn't removed + &a.b.as_bytes()[0] +} diff --git a/tests/debuginfo/strings-and-strs.rs b/tests/debuginfo/strings-and-strs.rs index 165cfcd968a67..178b92df3630a 100644 --- a/tests/debuginfo/strings-and-strs.rs +++ b/tests/debuginfo/strings-and-strs.rs @@ -23,6 +23,12 @@ //@ gdb-command:print str_in_rc //@ gdb-check:$5 = alloc::rc::Rc<&str, alloc::alloc::Global> {ptr: core::ptr::non_null::NonNull> {pointer: 0x[...]}, phantom: core::marker::PhantomData>, alloc: alloc::alloc::Global} +//@ gdb-command:print box_str +//@ gdb-check:$6 = alloc::boxed::Box [87, 111, 114, 108, 100] + +//@ gdb-command:print rc_str +//@ gdb-check:$7 = alloc::rc::Rc {ptr: core::ptr::non_null::NonNull> {pointer: alloc::rc::RcInner {strong: core::cell::Cell {value: core::cell::UnsafeCell {value: 1}}, weak: core::cell::Cell {value: core::cell::UnsafeCell {value: 1}}, value: 0x[...]}}, phantom: core::marker::PhantomData>, alloc: alloc::alloc::Global} + // === LLDB TESTS ================================================================================== //@ lldb-command:run //@ lldb-command:v plain_string @@ -40,6 +46,12 @@ //@ lldb-command:v str_in_rc //@ lldb-check:(alloc::rc::Rc<&str, alloc::alloc::Global>) str_in_rc = strong=1, weak=0 { value = "Hello" { [0] = 'H' [1] = 'e' [2] = 'l' [3] = 'l' [4] = 'o' } } +//@ lldb-command:v box_str +//@ lldb-check:(alloc::boxed::Box) box_str = { __0 = { pointer = { pointer = { data_ptr = 0x[...] "World" length = 5 } } _marker = } __1 = } + +//@ lldb-command:v rc_str +//@ lldb-check:(alloc::rc::Rc) rc_str = strong=1, weak=0 { value = "World" } + #![allow(unused_variables)] pub struct Foo<'a> { @@ -53,6 +65,8 @@ fn main() { let str_in_tuple = ("Hello", "World"); let str_in_rc = std::rc::Rc::new("Hello"); + let box_str: Box = "World".into(); + let rc_str: std::rc::Rc = "World".into(); zzz(); // #break }