diff --git a/compiler/rustc_ast_lowering/src/lib.rs b/compiler/rustc_ast_lowering/src/lib.rs index 1ce4478c09e8b..fbe293c1a851d 100644 --- a/compiler/rustc_ast_lowering/src/lib.rs +++ b/compiler/rustc_ast_lowering/src/lib.rs @@ -45,7 +45,6 @@ use rustc_attr_parsing::{AttributeParser, OmitDoc, Recovery, ShouldEmit}; use rustc_data_structures::fingerprint::Fingerprint; use rustc_data_structures::fx::FxIndexSet; use rustc_data_structures::sorted_map::SortedMap; -use rustc_data_structures::stable_hasher::{StableHash, StableHasher}; use rustc_data_structures::steal::Steal; use rustc_data_structures::tagged_ptr::TaggedRef; use rustc_errors::{DiagArgFromDisplay, DiagCtxtHandle}; @@ -506,25 +505,15 @@ fn index_crate<'a, 'b>( /// Compute the hash for the HIR of the full crate. /// This hash will then be part of the crate_hash which is stored in the metadata. -fn compute_hir_hash( - tcx: TyCtxt<'_>, - owners: &IndexSlice>, -) -> Fingerprint { - let mut hir_body_nodes: Vec<_> = owners +fn compute_hir_hash(owners: &IndexSlice>) -> Fingerprint { + owners .iter_enumerated() - .filter_map(|(def_id, info)| { + .filter_map(|(_, info)| { let info = info.as_owner()?; - let def_path_hash = tcx.hir_def_path_hash(def_id); - Some((def_path_hash, info)) + Some(info.fingerprint()) }) - .collect(); - hir_body_nodes.sort_unstable_by_key(|bn| bn.0); - - tcx.with_stable_hashing_context(|mut hcx| { - let mut stable_hasher = StableHasher::new(); - hir_body_nodes.stable_hash(&mut hcx, &mut stable_hasher); - stable_hasher.finish() - }) + .reduce(Fingerprint::combine_commutative) + .expect("HIR hash requested without any content") } pub fn lower_to_hir(tcx: TyCtxt<'_>, (): ()) -> mid_hir::Crate<'_> { @@ -561,8 +550,7 @@ pub fn lower_to_hir(tcx: TyCtxt<'_>, (): ()) -> mid_hir::Crate<'_> { } // Don't hash unless necessary, because it's expensive. - let opt_hir_hash = - if tcx.needs_hir_hash() { Some(compute_hir_hash(tcx, &owners)) } else { None }; + let opt_hir_hash = if tcx.needs_hir_hash() { Some(compute_hir_hash(&owners)) } else { None }; let delayed_resolver = Steal::new((resolver, krate)); mid_hir::Crate::new(owners, delayed_ids, delayed_resolver, opt_hir_hash) diff --git a/compiler/rustc_driver_impl/src/lib.rs b/compiler/rustc_driver_impl/src/lib.rs index 8d80e742a1b27..8db78cf4d5133 100644 --- a/compiler/rustc_driver_impl/src/lib.rs +++ b/compiler/rustc_driver_impl/src/lib.rs @@ -324,10 +324,6 @@ pub fn run_compiler(at_args: &[String], callbacks: &mut (dyn Callbacks + Send)) tcx.ensure_ok().analysis(()); - if let Some(metrics_dir) = &sess.opts.unstable_opts.metrics_dir { - dump_feature_usage_metrics(tcx, metrics_dir); - } - if callbacks.after_analysis(compiler, tcx) == Compilation::Stop { return early_exit(); } @@ -340,6 +336,10 @@ pub fn run_compiler(at_args: &[String], callbacks: &mut (dyn Callbacks + Send)) let linker = Linker::codegen_and_build_linker(tcx, &*compiler.codegen_backend); + if let Some(metrics_dir) = &sess.opts.unstable_opts.metrics_dir { + dump_feature_usage_metrics(tcx, metrics_dir); + } + tcx.report_unused_features(); Some(linker) diff --git a/compiler/rustc_hir/src/hir.rs b/compiler/rustc_hir/src/hir.rs index 2f18b09cf1ae8..2f36ff6ac59a5 100644 --- a/compiler/rustc_hir/src/hir.rs +++ b/compiler/rustc_hir/src/hir.rs @@ -1645,6 +1645,19 @@ impl<'tcx> OwnerInfo<'tcx> { pub fn node(&self) -> OwnerNode<'tcx> { self.nodes.node() } + + // A fingerprint that identifies the contents of the OwnerInfo. + // It only depends on `nodes` and `attrs` because `parenting` and `trait_map` are + // deterministically calculated from `nodes` and `attrs`. + #[inline] + pub fn fingerprint(&self) -> Fingerprint { + let body = self + .nodes + .opt_hash_including_bodies + .expect("HIR hash requested without needs_hir_hash"); + let attrs = self.attrs.opt_hash.expect("HIR hash requested without needs_hir_hash"); + body.combine(attrs) + } } #[derive(Copy, Clone, Debug, StableHash)] diff --git a/compiler/rustc_interface/src/passes.rs b/compiler/rustc_interface/src/passes.rs index cb41974af41b0..b227bd61d432c 100644 --- a/compiler/rustc_interface/src/passes.rs +++ b/compiler/rustc_interface/src/passes.rs @@ -948,8 +948,13 @@ pub fn create_and_enter_global_ctxt FnOnce(TyCtxt<'tcx>) -> T>( let definitions = FreezeLock::new(Definitions::new(stable_crate_id)); let stable_crate_ids = FreezeLock::new(StableCrateIdMap::default()); - let untracked = - Untracked { cstore, source_span: AppendOnlyIndexVec::new(), definitions, stable_crate_ids }; + let untracked = Untracked { + cstore, + source_span: AppendOnlyIndexVec::new(), + definitions, + stable_crate_ids, + local_crate_hash: OnceLock::new(), + }; // We're constructing the HIR here; we don't care what we will // read, since we haven't even constructed the *input* to diff --git a/compiler/rustc_metadata/src/lib.rs b/compiler/rustc_metadata/src/lib.rs index 1dff5740ab3bc..f76e975620d99 100644 --- a/compiler/rustc_metadata/src/lib.rs +++ b/compiler/rustc_metadata/src/lib.rs @@ -1,5 +1,6 @@ // tidy-alphabetical-start #![allow(internal_features)] +#![feature(core_intrinsics)] #![feature(error_iter)] #![feature(file_buffered)] #![feature(gen_blocks)] diff --git a/compiler/rustc_metadata/src/rmeta/encoder.rs b/compiler/rustc_metadata/src/rmeta/encoder.rs index 729a0dda7cf3b..44b7abe9f2def 100644 --- a/compiler/rustc_metadata/src/rmeta/encoder.rs +++ b/compiler/rustc_metadata/src/rmeta/encoder.rs @@ -2,11 +2,14 @@ use std::borrow::Borrow; use std::collections::hash_map::Entry; use std::fs::File; use std::io::{Read, Seek, Write}; +use std::ops::Deref; use std::path::{Path, PathBuf}; use std::sync::Arc; use rustc_data_structures::fx::{FxIndexMap, FxIndexSet}; use rustc_data_structures::memmap::{Mmap, MmapMut}; +use rustc_data_structures::owned_slice::slice_owned; +use rustc_data_structures::stable_hasher::{StableHash, StableHasher}; use rustc_data_structures::sync::{par_for_each_in, par_join}; use rustc_data_structures::temp_dir::MaybeTempDir; use rustc_data_structures::thousands::usize_with_underscores; @@ -25,7 +28,7 @@ use rustc_middle::ty::AssocContainer; use rustc_middle::ty::codec::TyEncoder; use rustc_middle::ty::fast_reject::{self, TreatParams}; use rustc_middle::{bug, span_bug}; -use rustc_serialize::{Decodable, Decoder, Encodable, Encoder, opaque}; +use rustc_serialize::{Decodable, Decoder, Encodable, Encoder}; use rustc_session::config::mitigation_coverage::DeniedPartialMitigation; use rustc_session::config::{CrateType, OptLevel, TargetModifier}; use rustc_span::hygiene::HygieneEncodeContext; @@ -40,7 +43,7 @@ use crate::errors::{FailCreateFileEncoder, FailWriteFile}; use crate::rmeta::*; pub(super) struct EncodeContext<'a, 'tcx> { - opaque: opaque::FileEncoder, + opaque: opaque::FileEncoder<'a>, tcx: TyCtxt<'tcx>, feat: &'tcx rustc_feature::Features, tables: TableBuilders, @@ -718,13 +721,16 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> { let denied_partial_mitigations = stat!("denied-partial-mitigations", || self .encode_enabled_denied_partial_mitigations()); + let hash = Svh::new(self.opaque.hash()); + tcx.untracked().local_crate_hash.set(hash).expect("local_crate_hash set twice"); + let root = stat!("final", || { let attrs = tcx.hir_krate_attrs(); self.lazy(CrateRoot { header: CrateHeader { name: tcx.crate_name(LOCAL_CRATE), triple: tcx.sess.opts.target_triple.clone(), - hash: tcx.crate_hash(LOCAL_CRATE), + hash, is_proc_macro_crate: proc_macro_data.is_some(), is_stub: false, }, @@ -2428,22 +2434,6 @@ pub fn encode_metadata(tcx: TyCtxt<'_>, path: &Path, ref_path: Option<&Path>) { // there's no need to do dep-graph tracking for any of it. tcx.dep_graph.assert_ignored(); - // Generate the metadata stub manually, as that is a small file compared to full metadata. - if let Some(ref_path) = ref_path { - let _prof_timer = tcx.prof.verbose_generic_activity("generate_crate_metadata_stub"); - - with_encode_metadata_header(tcx, ref_path, |ecx| { - let header: LazyValue = ecx.lazy(CrateHeader { - name: tcx.crate_name(LOCAL_CRATE), - triple: tcx.sess.opts.target_triple.clone(), - hash: tcx.crate_hash(LOCAL_CRATE), - is_proc_macro_crate: false, - is_stub: true, - }); - header.position.get() - }) - } - let _prof_timer = tcx.prof.verbose_generic_activity("generate_crate_metadata"); let dep_node = tcx.metadata_dep_node(); @@ -2462,6 +2452,31 @@ pub fn encode_metadata(tcx: TyCtxt<'_>, path: &Path, ref_path: Option<&Path>) { Ok(_) => {} Err(err) => tcx.dcx().emit_fatal(FailCreateFileEncoder { err }), }; + + // Read the SVH from the old metadata header. + let file = std::fs::File::open(&source_file).unwrap(); + let mmap = unsafe { Mmap::map(file) }.unwrap(); + let owned = slice_owned(mmap, Deref::deref); + let blob = MetadataBlob::new(owned); + let header = blob.expect("file already created").get_header(); + tcx.untracked().local_crate_hash.set(header.hash).expect("local_crate_hash set twice"); + + // Generate the metadata stub manually, as that is a small file compared to full metadata. + if let Some(ref_path) = ref_path { + let _prof_timer = tcx.prof.verbose_generic_activity("generate_crate_metadata_stub"); + + with_encode_metadata_header(tcx, ref_path, |ecx| { + let header: LazyValue = ecx.lazy(CrateHeader { + name: tcx.crate_name(LOCAL_CRATE), + triple: tcx.sess.opts.target_triple.clone(), + hash: tcx.crate_hash(LOCAL_CRATE), + is_proc_macro_crate: false, + is_stub: true, + }); + header.position.get() + }) + } + return; }; @@ -2503,6 +2518,22 @@ pub fn encode_metadata(tcx: TyCtxt<'_>, path: &Path, ref_path: Option<&Path>) { }, None, ); + + // Generate the metadata stub manually, as that is a small file compared to full metadata. + if let Some(ref_path) = ref_path { + let _prof_timer = tcx.prof.verbose_generic_activity("generate_crate_metadata_stub"); + + with_encode_metadata_header(tcx, ref_path, |ecx| { + let header: LazyValue = ecx.lazy(CrateHeader { + name: tcx.crate_name(LOCAL_CRATE), + triple: tcx.sess.opts.target_triple.clone(), + hash: tcx.crate_hash(LOCAL_CRATE), + is_proc_macro_crate: false, + is_stub: true, + }); + header.position.get() + }) + } } fn with_encode_metadata_header( @@ -2510,7 +2541,13 @@ fn with_encode_metadata_header( path: &Path, f: impl FnOnce(&mut EncodeContext<'_, '_>) -> usize, ) { - let mut encoder = opaque::FileEncoder::new(path) + let mut stable_hasher = StableHasher::new(); + // Add dep_tracking_hash to ensure the SVH changes when any tracked flag changes. + tcx.with_stable_hashing_context(|mut hcx| { + tcx.sess.opts.dep_tracking_hash(true).stable_hash(&mut hcx, &mut stable_hasher); + }); + + let mut encoder = opaque::FileEncoder::new(path, &mut stable_hasher) .unwrap_or_else(|err| tcx.dcx().emit_fatal(FailCreateFileEncoder { err })); encoder.emit_raw_bytes(METADATA_HEADER); diff --git a/compiler/rustc_metadata/src/rmeta/mod.rs b/compiler/rustc_metadata/src/rmeta/mod.rs index c7b2eaa15ebfb..a8b7dd08661ec 100644 --- a/compiler/rustc_metadata/src/rmeta/mod.rs +++ b/compiler/rustc_metadata/src/rmeta/mod.rs @@ -35,7 +35,6 @@ use rustc_middle::mir::ConstValue; use rustc_middle::ty::fast_reject::SimplifiedType; use rustc_middle::ty::{self, Ty, TyCtxt}; use rustc_middle::util::Providers; -use rustc_serialize::opaque::FileEncoder; use rustc_session::config::mitigation_coverage::DeniedPartialMitigation; use rustc_session::config::{SymbolManglingVersion, TargetModifier}; use rustc_session::cstore::{CrateDepKind, ForeignModule, LinkagePreference, NativeLib}; @@ -50,6 +49,7 @@ use crate::eii::EiiMapEncodedKeyValue; mod decoder; mod def_path_hash_map; mod encoder; +mod opaque; mod parameterized; mod table; @@ -364,7 +364,7 @@ macro_rules! define_tables { } impl TableBuilders { - fn encode(&self, buf: &mut FileEncoder) -> LazyTables { + fn encode(&self, buf: &mut opaque::FileEncoder<'_>) -> LazyTables { LazyTables { $($name1: self.$name1.encode(buf),)+ $($name2: self.$name2.encode(buf),)+ diff --git a/compiler/rustc_metadata/src/rmeta/opaque.rs b/compiler/rustc_metadata/src/rmeta/opaque.rs new file mode 100644 index 0000000000000..854826c2cc736 --- /dev/null +++ b/compiler/rustc_metadata/src/rmeta/opaque.rs @@ -0,0 +1,270 @@ +use std::fs::File; +use std::hash::Hasher; +use std::io::{self, Write}; +use std::path::{Path, PathBuf}; + +use rustc_data_structures::fingerprint::Fingerprint; +use rustc_data_structures::stable_hasher::StableHasher; +use rustc_serialize::int_overflow::DebugStrictAdd; +use rustc_serialize::{Encodable, Encoder, leb128}; + +// ----------------------------------------------------------------------------- +// Encoder +// ----------------------------------------------------------------------------- + +pub(super) type FileEncodeResult = Result; + +pub(super) const MAGIC_END_BYTES: &[u8] = b"rust-end-file"; + +/// The size of the buffer in `FileEncoder`. +const BUF_SIZE: usize = 64 * 1024; + +/// `FileEncoder` encodes data to file via fixed-size buffer. +/// +/// There used to be a `MemEncoder` type that encoded all the data into a +/// `Vec`. `FileEncoder` is better because its memory use is determined by the +/// size of the buffer, rather than the full length of the encoded data, and +/// because it doesn't need to reallocate memory along the way. +pub(super) struct FileEncoder<'a> { + // The input buffer. For adequate performance, we need to be able to write + // directly to the unwritten region of the buffer, without calling copy_from_slice. + // Note that our buffer is always initialized so that we can do that direct access + // without unsafe code. Users of this type write many more than BUF_SIZE bytes, so the + // initialization is approximately free. + buf: Box<[u8; BUF_SIZE]>, + buffered: usize, + flushed: usize, + file: File, + // This is used to implement delayed error handling, as described in the + // comment on `trait Encoder`. + res: Result<(), io::Error>, + path: PathBuf, + stable_hasher: &'a mut StableHasher, + #[cfg(debug_assertions)] + finished: bool, +} + +impl<'a> FileEncoder<'a> { + pub(super) fn new>( + path: P, + stable_hasher: &'a mut StableHasher, + ) -> io::Result { + // File::create opens the file for writing only. When -Zmeta-stats is enabled, the metadata + // encoder rewinds the file to inspect what was written. So we need to always open the file + // for reading and writing. + let file = + File::options().read(true).write(true).create(true).truncate(true).open(&path)?; + + Ok(FileEncoder { + buf: vec![0u8; BUF_SIZE].into_boxed_slice().try_into().unwrap(), + path: path.as_ref().into(), + buffered: 0, + flushed: 0, + file, + res: Ok(()), + stable_hasher, + #[cfg(debug_assertions)] + finished: false, + }) + } + + #[inline] + pub(super) fn position(&self) -> usize { + // Tracking position this way instead of having a `self.position` field + // means that we only need to update `self.buffered` on a write call, + // as opposed to updating `self.position` and `self.buffered`. + self.flushed.debug_strict_add(self.buffered) + } + + #[cold] + #[inline(never)] + pub(super) fn flush(&mut self) { + #[cfg(debug_assertions)] + { + self.finished = false; + } + if self.res.is_ok() { + self.res = self.file.write_all(&self.buf[..self.buffered]); + } + self.flushed += self.buffered; + self.stable_hasher.write(&self.buf[..self.buffered]); + self.buffered = 0; + } + + #[inline] + pub(super) fn file(&self) -> &File { + &self.file + } + + #[inline] + pub(super) fn path(&self) -> &Path { + &self.path + } + + #[inline] + fn buffer_empty(&mut self) -> &mut [u8] { + // SAFETY: self.buffered is inbounds as an invariant of the type + unsafe { self.buf.get_unchecked_mut(self.buffered..) } + } + + #[cold] + #[inline(never)] + fn write_all_cold_path(&mut self, buf: &[u8]) { + self.flush(); + if let Some(dest) = self.buf.get_mut(..buf.len()) { + dest.copy_from_slice(buf); + self.buffered += buf.len(); + } else { + if self.res.is_ok() { + self.stable_hasher.write(buf); + self.res = self.file.write_all(buf); + } + self.flushed += buf.len(); + } + } + + #[inline] + fn write_all(&mut self, buf: &[u8]) { + #[cfg(debug_assertions)] + { + self.finished = false; + } + if let Some(dest) = self.buffer_empty().get_mut(..buf.len()) { + dest.copy_from_slice(buf); + self.buffered = self.buffered.debug_strict_add(buf.len()); + } else { + self.write_all_cold_path(buf); + } + } + + /// Write up to `N` bytes to this encoder. + /// + /// This function can be used to avoid the overhead of calling memcpy for writes that + /// have runtime-variable length, but are small and have a small fixed upper bound. + /// + /// This can be used to do in-place encoding as is done for leb128 (without this function + /// we would need to write to a temporary buffer then memcpy into the encoder), and it can + /// also be used to implement the varint scheme we use for rmeta and dep graph encoding, + /// where we only want to encode the first few bytes of an integer. Copying in the whole + /// integer then only advancing the encoder state for the few bytes we care about is more + /// efficient than calling [`FileEncoder::write_all`], because variable-size copies are + /// always lowered to `memcpy`, which has overhead and contains a lot of logic we can bypass + /// with this function. Note that common architectures support fixed-size writes up to 8 bytes + /// with one instruction, so while this does in some sense do wasted work, we come out ahead. + #[inline] + pub(super) fn write_with( + &mut self, + visitor: impl FnOnce(&mut [u8; N]) -> usize, + ) { + #[cfg(debug_assertions)] + { + self.finished = false; + } + let flush_threshold = const { BUF_SIZE.checked_sub(N).unwrap() }; + if std::intrinsics::unlikely(self.buffered > flush_threshold) { + self.flush(); + } + // SAFETY: We checked above that N < self.buffer_empty().len(), + // and if isn't, flush ensures that our empty buffer is now BUF_SIZE. + // We produce a post-mono error if N > BUF_SIZE. + let buf = unsafe { self.buffer_empty().first_chunk_mut::().unwrap_unchecked() }; + let written = visitor(buf); + // We have to ensure that an errant visitor cannot cause self.buffered to exceed BUF_SIZE. + if written > N { + Self::panic_invalid_write::(written); + } + self.buffered = self.buffered.debug_strict_add(written); + } + + #[cold] + #[inline(never)] + fn panic_invalid_write(written: usize) { + panic!("FileEncoder::write_with::<{N}> cannot be used to write {written} bytes"); + } + + /// Helper for calls where [`FileEncoder::write_with`] always writes the whole array. + #[inline] + pub(super) fn write_array(&mut self, buf: [u8; N]) { + self.write_with(|dest| { + *dest = buf; + N + }) + } + + pub(super) fn finish(&mut self) -> FileEncodeResult { + self.write_all(MAGIC_END_BYTES); + self.flush(); + #[cfg(debug_assertions)] + { + self.finished = true; + } + match std::mem::replace(&mut self.res, Ok(())) { + Ok(()) => Ok(self.position()), + Err(e) => Err((self.path.clone(), e)), + } + } + + pub(super) fn hash(&mut self) -> Fingerprint { + self.flush(); + self.stable_hasher.clone().finish() + } +} + +#[cfg(debug_assertions)] +impl<'a> Drop for FileEncoder<'a> { + fn drop(&mut self) { + if !std::thread::panicking() { + assert!(self.finished); + } + } +} + +macro_rules! write_leb128 { + ($this_fn:ident, $int_ty:ty, $write_leb_fn:ident) => { + #[inline] + fn $this_fn(&mut self, v: $int_ty) { + self.write_with(|buf| leb128::$write_leb_fn(buf, v)) + } + }; +} + +impl Encoder for FileEncoder<'_> { + write_leb128!(emit_usize, usize, write_usize_leb128); + write_leb128!(emit_u128, u128, write_u128_leb128); + write_leb128!(emit_u64, u64, write_u64_leb128); + write_leb128!(emit_u32, u32, write_u32_leb128); + + #[inline] + fn emit_u16(&mut self, v: u16) { + self.write_array(v.to_le_bytes()); + } + + #[inline] + fn emit_u8(&mut self, v: u8) { + self.write_array([v]); + } + + write_leb128!(emit_isize, isize, write_isize_leb128); + write_leb128!(emit_i128, i128, write_i128_leb128); + write_leb128!(emit_i64, i64, write_i64_leb128); + write_leb128!(emit_i32, i32, write_i32_leb128); + + #[inline] + fn emit_i16(&mut self, v: i16) { + self.write_array(v.to_le_bytes()); + } + + #[inline] + fn emit_raw_bytes(&mut self, s: &[u8]) { + self.write_all(s); + } +} + +// Specialize encoding byte slices. This specialization also applies to encoding `Vec`s, etc., +// since the default implementations call `encode` on their slices internally. +impl Encodable> for [u8] { + fn encode(&self, e: &mut FileEncoder<'_>) { + Encoder::emit_usize(e, self.len()); + e.emit_raw_bytes(self); + } +} diff --git a/compiler/rustc_metadata/src/rmeta/table.rs b/compiler/rustc_metadata/src/rmeta/table.rs index 26c5908563777..2172979cc77be 100644 --- a/compiler/rustc_metadata/src/rmeta/table.rs +++ b/compiler/rustc_metadata/src/rmeta/table.rs @@ -486,7 +486,7 @@ impl> TableBui } } - pub(crate) fn encode(&self, buf: &mut FileEncoder) -> LazyTable { + pub(crate) fn encode(&self, buf: &mut opaque::FileEncoder<'_>) -> LazyTable { let pos = buf.position(); let width = self.width; diff --git a/compiler/rustc_middle/src/hir/map.rs b/compiler/rustc_middle/src/hir/map.rs index bda9b3a47849e..917f2fcb64cfd 100644 --- a/compiler/rustc_middle/src/hir/map.rs +++ b/compiler/rustc_middle/src/hir/map.rs @@ -9,16 +9,15 @@ use rustc_data_structures::stable_hasher::{StableHash, StableHasher}; use rustc_data_structures::svh::Svh; use rustc_data_structures::sync::{DynSend, DynSync, par_for_each_in, spawn, try_par_for_each_in}; use rustc_hir::def::{DefKind, Res}; -use rustc_hir::def_id::{DefId, LOCAL_CRATE, LocalDefId, LocalModDefId}; +use rustc_hir::def_id::{DefId, LocalDefId, LocalModDefId}; use rustc_hir::definitions::{DefKey, DefPath, DefPathHash}; use rustc_hir::intravisit::Visitor; use rustc_hir::*; use rustc_hir_pretty as pprust_hir; -use rustc_span::def_id::StableCrateId; -use rustc_span::{ErrorGuaranteed, Ident, Span, Symbol, kw, with_metavar_spans}; +use rustc_span::{ErrorGuaranteed, Ident, Span, Symbol, kw}; +use crate::hir::def_id::LOCAL_CRATE; use crate::hir::{ModuleItems, nested_filter}; -use crate::middle::debugger_visualizer::DebuggerVisualizerFile; use crate::query::LocalCrate; use crate::ty::TyCtxt; @@ -1122,92 +1121,62 @@ impl<'tcx> pprust_hir::PpAnn for TyCtxt<'tcx> { } pub(super) fn crate_hash(tcx: TyCtxt<'_>, _: LocalCrate) -> Svh { - let krate = tcx.hir_crate(()); - let hir_body_hash = krate.opt_hir_hash.expect("HIR hash missing while computing crate hash"); - - let upstream_crates = upstream_crates(tcx); - - let resolutions = tcx.resolutions(()); - - // We hash the final, remapped names of all local source files so we - // don't have to include the path prefix remapping commandline args. - // If we included the full mapping in the SVH, we could only have - // reproducible builds by compiling from the same directory. So we just - // hash the result of the mapping instead of the mapping itself. - let mut source_file_names: Vec<_> = tcx - .sess - .source_map() - .files() - .iter() - .filter(|source_file| source_file.cnum == LOCAL_CRATE) - .map(|source_file| source_file.stable_id) - .collect(); - - source_file_names.sort_unstable(); - - // We have to take care of debugger visualizers explicitly. The HIR (and - // thus `hir_body_hash`) contains the #[debugger_visualizer] attributes but - // these attributes only store the file path to the visualizer file, not - // their content. Yet that content is exported into crate metadata, so any - // changes to it need to be reflected in the crate hash. - let debugger_visualizers: Vec<_> = tcx + if tcx.needs_metadata() { + *tcx.untracked() + .local_crate_hash + .get() + .expect("crate_hash(LOCAL_CRATE) called before metadata encoding") + } else { + let krate = tcx.hir_crate(()); + let hir_body_hash = + krate.opt_hir_hash.expect("HIR hash missing while computing crate hash"); + + let upstream_crates = tcx.crates(()); //upstream_crates(tcx); + + // let resolutions = tcx.resolutions(()); + + // We hash the final, remapped names of all local source files so we + // don't have to include the path prefix remapping commandline args. + // If we included the full mapping in the SVH, we could only have + // reproducible builds by compiling from the same directory. So we just + // hash the result of the mapping instead of the mapping itself. + /*let mut source_file_names: Vec<_> = tcx + .sess + .source_map() + .files() + .iter() + .filter(|source_file| source_file.cnum == LOCAL_CRATE) + .map(|source_file| source_file.stable_id) + .collect(); + + source_file_names.sort_unstable();*/ + + // We have to take care of debugger visualizers explicitly. The HIR (and + // thus `hir_body_hash`) contains the #[debugger_visualizer] attributes but + // these attributes only store the file path to the visualizer file, not + // their content. Yet that content is exported into crate metadata, so any + // changes to it need to be reflected in the crate hash. + /*let debugger_visualizers: Vec<_> = tcx .debugger_visualizers(LOCAL_CRATE) .iter() // We ignore the path to the visualizer file since it's not going to be // encoded in crate metadata and we already hash the full contents of // the file. .map(DebuggerVisualizerFile::path_erased) - .collect(); - - let crate_hash: Fingerprint = tcx.with_stable_hashing_context(|mut hcx| { - let mut stable_hasher = StableHasher::new(); - hir_body_hash.stable_hash(&mut hcx, &mut stable_hasher); - upstream_crates.stable_hash(&mut hcx, &mut stable_hasher); - source_file_names.stable_hash(&mut hcx, &mut stable_hasher); - debugger_visualizers.stable_hash(&mut hcx, &mut stable_hasher); - if tcx.sess.opts.incremental.is_some() { - let definitions = tcx.untracked().definitions.freeze(); - let mut owner_spans: Vec<_> = tcx - .hir_crate_items(()) - .definitions() - .map(|def_id| { - let def_path_hash = definitions.def_path_hash(def_id); - let span = tcx.source_span(def_id); - debug_assert_eq!(span.parent(), None); - (def_path_hash, span) - }) - .collect(); - owner_spans.sort_unstable_by_key(|bn| bn.0); - owner_spans.stable_hash(&mut hcx, &mut stable_hasher); - } - tcx.sess.opts.dep_tracking_hash(true).stable_hash(&mut hcx, &mut stable_hasher); - tcx.stable_crate_id(LOCAL_CRATE).stable_hash(&mut hcx, &mut stable_hasher); - // Hash visibility information since it does not appear in HIR. - // FIXME: Figure out how to remove `visibilities_for_hashing` by hashing visibilities on - // the fly in the resolver, storing only their accumulated hash in `ResolverGlobalCtxt`, - // and combining it with other hashes here. - resolutions.visibilities_for_hashing.stable_hash(&mut hcx, &mut stable_hasher); - with_metavar_spans(|mspans| { - mspans.freeze_and_get_read_spans().stable_hash(&mut hcx, &mut stable_hasher); - }); - stable_hasher.finish() - }); + .collect();*/ - Svh::new(crate_hash) -} + let crate_hash: Fingerprint = tcx.with_stable_hashing_context(|mut hcx| { + let mut stable_hasher = StableHasher::new(); + hir_body_hash.stable_hash(&mut hcx, &mut stable_hasher); + upstream_crates.stable_hash(&mut hcx, &mut stable_hasher); + tcx.sess.opts.dep_tracking_hash(true).stable_hash(&mut hcx, &mut stable_hasher); + tcx.stable_crate_id(LOCAL_CRATE).stable_hash(&mut hcx, &mut stable_hasher); -fn upstream_crates(tcx: TyCtxt<'_>) -> Vec<(StableCrateId, Svh)> { - let mut upstream_crates: Vec<_> = tcx - .crates(()) - .iter() - .map(|&cnum| { - let stable_crate_id = tcx.stable_crate_id(cnum); - let hash = tcx.crate_hash(cnum); - (stable_crate_id, hash) - }) - .collect(); - upstream_crates.sort_unstable_by_key(|&(stable_crate_id, _)| stable_crate_id); - upstream_crates + stable_hasher.finish() + }); + + Svh::new(crate_hash) + } } pub(super) fn hir_module_items(tcx: TyCtxt<'_>, module_id: LocalModDefId) -> ModuleItems { diff --git a/compiler/rustc_session/src/cstore.rs b/compiler/rustc_session/src/cstore.rs index 39fe9c80923ec..bf64db91ce346 100644 --- a/compiler/rustc_session/src/cstore.rs +++ b/compiler/rustc_session/src/cstore.rs @@ -4,8 +4,10 @@ use std::any::Any; use std::path::PathBuf; +use std::sync::OnceLock; use rustc_abi::ExternAbi; +use rustc_data_structures::svh::Svh; use rustc_data_structures::sync::{self, AppendOnlyIndexVec, FreezeLock}; use rustc_hir::attrs::{CfgEntry, NativeLibKind, PeImportNameType}; use rustc_hir::def_id::{ @@ -223,4 +225,6 @@ pub struct Untracked { pub definitions: FreezeLock, /// The interned [StableCrateId]s. pub stable_crate_ids: FreezeLock, + /// The hash of the local crate as computed in metadata encoding. + pub local_crate_hash: OnceLock, } diff --git a/tests/run-make/proc-macro-dep-source-changes-crate-hash/foo.rs b/tests/run-make/proc-macro-dep-source-changes-crate-hash/foo.rs new file mode 100644 index 0000000000000..d645ab8680949 --- /dev/null +++ b/tests/run-make/proc-macro-dep-source-changes-crate-hash/foo.rs @@ -0,0 +1,16 @@ +// Consumer crate. Byte-identical across all invocations of the test; +// only the tokens spliced in by `#[derive(ChangingDerive)]` change between +// builds, driven by which version of the proc-macro is on disk. + +#![crate_type = "rlib"] + +extern crate changing_macro; + +use changing_macro::ChangingDerive; + +#[derive(ChangingDerive)] +pub struct Foo; + +pub fn answer() -> u32 { + ANSWER +} diff --git a/tests/run-make/proc-macro-dep-source-changes-crate-hash/rmake.rs b/tests/run-make/proc-macro-dep-source-changes-crate-hash/rmake.rs new file mode 100644 index 0000000000000..24eceb9b8667f --- /dev/null +++ b/tests/run-make/proc-macro-dep-source-changes-crate-hash/rmake.rs @@ -0,0 +1,75 @@ +// Regression test for #94878 / PR #154724. +// +// Verifies that when the *source* of a proc-macro dependency changes (so the +// tokens it emits in the consumer crate change), the consumer crate's +// crate_hash / SVH changes. This is the run-make equivalent of an incremental +// `#[rustc_clean]` test: we can't assert on the crate_hash query directly via +// #[rustc_clean] (it's a crate-level query and not in the per-item whitelist +// in compiler/rustc_incremental/src/persist/clean.rs), so instead we extract +// the SVH from `-Zls=root` and compare it across builds. +// +// This test is specifically meant to fail if the HIR hash is removed from the +// crate_hash without something equivalent (e.g. metadata-derived hashing) +// taking its place: in that case, the same consumer source compiled against +// two different proc-macro sources would produce the same SVH. + +//@ ignore-cross-compile +//@ needs-crate-type: proc-macro +//@ ignore-musl (FIXME: can't find `-lunwind`) + +use run_make_support::rustc; + +fn extract_hash(ls_output: &str) -> &str { + // -Zls=root prints a line of the form: + // hash stable_crate_id + for line in ls_output.lines() { + if let Some(rest) = line.strip_prefix("hash ") { + return rest.split_whitespace().next().expect("malformed -Zls=root output"); + } + } + panic!("could not find `hash` line in -Zls=root output:\n{ls_output}"); +} + +fn build_consumer_and_get_hash() -> String { + // Don't reuse the previous build's artifact. The first invocation won't + // have one, so ignore NotFound. + let _ = std::fs::remove_file("libfoo.rlib"); + + rustc().input("foo.rs").run(); + + let ls_out = rustc().arg("-Zls=root").input("libfoo.rlib").run().stdout_utf8(); + extract_hash(&ls_out).to_owned() +} + +fn build_macro(src: &str) { + rustc().input(src).crate_name("changing_macro").crate_type("proc-macro").run(); +} + +fn main() { + // 1. Build proc-macro v1, then build the consumer and record its SVH. + build_macro("v1.rs"); + let hash_v1 = build_consumer_and_get_hash(); + + // 2. Rebuild proc-macro from a different source. The consumer source + // (foo.rs) is byte-identical, but the tokens spliced in by the + // derive will differ. + build_macro("v2.rs"); + let hash_v2 = build_consumer_and_get_hash(); + + assert_ne!( + hash_v1, hash_v2, + "consumer crate's SVH did not change when the proc-macro dependency's \ + source changed (both builds produced {hash_v1}). crate_hash must \ + reflect the actual metadata produced after macro expansion.", + ); + + // 3. Sanity: going back to v1 reproduces the original hash, so the + // difference above is genuinely caused by the macro source change + // and not by non-determinism in metadata encoding. + build_macro("v1.rs"); + let hash_v1_again = build_consumer_and_get_hash(); + assert_eq!( + hash_v1, hash_v1_again, + "SVH is unstable across identical builds; the assertion above would be meaningless", + ); +} diff --git a/tests/run-make/proc-macro-dep-source-changes-crate-hash/v1.rs b/tests/run-make/proc-macro-dep-source-changes-crate-hash/v1.rs new file mode 100644 index 0000000000000..056e3f142212c --- /dev/null +++ b/tests/run-make/proc-macro-dep-source-changes-crate-hash/v1.rs @@ -0,0 +1,12 @@ +// First version of the proc-macro. Emits `pub const ANSWER: u32 = 1;`. + +#![crate_type = "proc-macro"] + +extern crate proc_macro; + +use proc_macro::TokenStream; + +#[proc_macro_derive(ChangingDerive)] +pub fn changing_derive(_input: TokenStream) -> TokenStream { + "pub const ANSWER: u32 = 1;".parse().unwrap() +} diff --git a/tests/run-make/proc-macro-dep-source-changes-crate-hash/v2.rs b/tests/run-make/proc-macro-dep-source-changes-crate-hash/v2.rs new file mode 100644 index 0000000000000..c739934013967 --- /dev/null +++ b/tests/run-make/proc-macro-dep-source-changes-crate-hash/v2.rs @@ -0,0 +1,13 @@ +// Second version of the proc-macro. Source has changed: it now emits +// `pub const ANSWER: u32 = 2;`. Crate name and exported macro name match v1. + +#![crate_type = "proc-macro"] + +extern crate proc_macro; + +use proc_macro::TokenStream; + +#[proc_macro_derive(ChangingDerive)] +pub fn changing_derive(_input: TokenStream) -> TokenStream { + "pub const ANSWER: u32 = 2;".parse().unwrap() +}