@@ -20,7 +20,7 @@ use tracing::debug;
20
20
21
21
use crate :: dep_graph:: { DepNode , WorkProduct , WorkProductId } ;
22
22
use crate :: middle:: codegen_fn_attrs:: CodegenFnAttrFlags ;
23
- use crate :: ty:: { GenericArgs , Instance , InstanceKind , SymbolName , TyCtxt } ;
23
+ use crate :: ty:: { self , GenericArgs , Instance , InstanceKind , SymbolName , Ty , TyCtxt } ;
24
24
25
25
/// Describes how a monomorphization will be instantiated in object files.
26
26
#[ derive( PartialEq ) ]
@@ -54,6 +54,39 @@ pub enum MonoItem<'tcx> {
54
54
GlobalAsm ( ItemId ) ,
55
55
}
56
56
57
+ fn opt_incr_drop_glue_mode < ' tcx > ( tcx : TyCtxt < ' tcx > , ty : Ty < ' tcx > ) -> InstantiationMode {
58
+ // Non-ADTs can't have a Drop impl. This case is mostly hit by closures whose captures require
59
+ // dropping.
60
+ let ty:: Adt ( adt_def, _) = ty. kind ( ) else {
61
+ return InstantiationMode :: LocalCopy ;
62
+ } ;
63
+
64
+ // Types that don't have a direct Drop impl, but have fields that require dropping.
65
+ let Some ( dtor) = adt_def. destructor ( tcx) else {
66
+ // We use LocalCopy for drops of enums only; this code is inherited from
67
+ // https://github.com/rust-lang/rust/pull/67332 and the theory is that we get to optimize
68
+ // out code like drop_in_place(Option::None) before crate-local ThinLTO, which improves
69
+ // compile time. At the time of writing, simply removing this entire check does seem to
70
+ // regress incr-opt compile times. But it sure seems like a more sophisticated check could
71
+ // do better here.
72
+ if adt_def. is_enum ( ) {
73
+ return InstantiationMode :: LocalCopy ;
74
+ } else {
75
+ return InstantiationMode :: GloballyShared { may_conflict : true } ;
76
+ }
77
+ } ;
78
+
79
+ // We've gotten to a drop_in_place for a type that directly implements Drop.
80
+ // The drop glue is a wrapper for the Drop::drop impl, and we are an optimized build, so in an
81
+ // effort to coordinate with the mode that the actual impl will get, we make the glue also
82
+ // LocalCopy.
83
+ if tcx. cross_crate_inlinable ( dtor. did ) {
84
+ InstantiationMode :: LocalCopy
85
+ } else {
86
+ InstantiationMode :: GloballyShared { may_conflict : true }
87
+ }
88
+ }
89
+
57
90
impl < ' tcx > MonoItem < ' tcx > {
58
91
/// Returns `true` if the mono item is user-defined (i.e. not compiler-generated, like shims).
59
92
pub fn is_user_defined ( & self ) -> bool {
@@ -123,16 +156,36 @@ impl<'tcx> MonoItem<'tcx> {
123
156
return InstantiationMode :: GloballyShared { may_conflict : false } ;
124
157
}
125
158
126
- // FIXME: The logic for which functions are permitted to get LocalCopy is actually spread
127
- // across 4 functions:
128
- // * cross_crate_inlinable(def_id)
129
- // * InstanceKind::requires_inline
130
- // * InstanceKind::generate_cgu_internal_copy
131
- // * MonoItem::instantiation_mode
132
- // Since reachable_non_generics calls InstanceKind::generates_cgu_internal_copy to decide
133
- // which symbols this crate exports, we are obligated to only generate LocalCopy when
134
- // generates_cgu_internal_copy returns true.
135
- if !instance. def . generates_cgu_internal_copy ( tcx) {
159
+ // This is technically a heuristic even though it's in the "not a heuristic" part of
160
+ // instantiation mode selection.
161
+ // It is surely possible to untangle this; the root problem is that the way we instantiate
162
+ // InstanceKind other than Item is very complicated.
163
+ //
164
+ // The fallback case is to give everything else GloballyShared at OptLevel::No and
165
+ // LocalCopy at all other opt levels. This is a good default, except for one specific build
166
+ // configuration: Optimized incremental builds.
167
+ // In the current compiler architecture there is a fundamental tension between
168
+ // optimizations (which want big CGUs with as many things LocalCopy as possible) and
169
+ // incrementality (which wants small CGUs with as many things GloballyShared as possible).
170
+ // The heuristics implemented here do better than a completely naive approach in the
171
+ // compiler benchmark suite, but there is no reason to believe they are optimal.
172
+ if let InstanceKind :: DropGlue ( _, Some ( ty) ) = instance. def {
173
+ if tcx. sess . opts . optimize == OptLevel :: No {
174
+ return InstantiationMode :: GloballyShared { may_conflict : false } ;
175
+ }
176
+ if tcx. sess . opts . incremental . is_none ( ) {
177
+ return InstantiationMode :: LocalCopy ;
178
+ }
179
+ return opt_incr_drop_glue_mode ( tcx, ty) ;
180
+ }
181
+
182
+ // We need to ensure that we do not decide the InstantiationMode of an exported symbol is
183
+ // LocalCopy. Since exported symbols are computed based on the output of
184
+ // cross_crate_inlinable, we are beholden to our previous decisions.
185
+ //
186
+ // Note that just like above, this check for requires_inline is technically a heuristic
187
+ // even though it's in the "not a heuristic" part of instantiation mode selection.
188
+ if !tcx. cross_crate_inlinable ( instance. def_id ( ) ) && !instance. def . requires_inline ( tcx) {
136
189
return InstantiationMode :: GloballyShared { may_conflict : false } ;
137
190
}
138
191
0 commit comments