Skip to content

Commit

Permalink
Option to print summaries
Browse files Browse the repository at this point in the history
  • Loading branch information
hermanventer committed Dec 10, 2024
1 parent 270643b commit 5334663
Show file tree
Hide file tree
Showing 5 changed files with 144 additions and 6 deletions.
18 changes: 15 additions & 3 deletions checker/src/call_graph.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ use serde::{Deserialize, Serialize};
use mirai_annotations::*;
use rustc_hir::def_id::DefId;
use rustc_middle::ty::TyCtxt;
use rustc_span::Span;

// An unique identifier for a Rust type string.
type TypeId = u32;
Expand Down Expand Up @@ -116,6 +117,8 @@ pub struct CallGraphConfig {
included_crates: Vec<Box<str>>,
/// Datalog output configuration
datalog_config: Option<DatalogConfig>,
/// If true, collect all call sites.
pub include_calls_in_summaries: bool,
}

impl CallGraphConfig {
Expand All @@ -132,6 +135,7 @@ impl CallGraphConfig {
reductions,
included_crates,
datalog_config,
include_calls_in_summaries: false,
}
}

Expand Down Expand Up @@ -314,7 +318,7 @@ impl CallGraphEdge {
#[derive(Clone)]
pub struct CallGraph<'tcx> {
/// Configuration for the call graph
config: CallGraphConfig,
pub config: CallGraphConfig,
/// A context to use for getting the information about DefIds that is needed
/// to generate fully qualified names for them.
tcx: TyCtxt<'tcx>,
Expand Down Expand Up @@ -368,7 +372,7 @@ impl<'tcx> CallGraph<'tcx> {
self.config.dot_output_path.is_some() || self.config.datalog_config.is_some()
}

/// Produce an updated call graph structure that preserves all of the
/// Produce an updated call graph structure that preserves all the
/// fields except `graph`, which is replaced.
fn update(&self, graph: Graph<CallGraphNode, CallGraphEdge>) -> CallGraph<'tcx> {
CallGraph {
Expand Down Expand Up @@ -433,7 +437,7 @@ impl<'tcx> CallGraph<'tcx> {
callee: DefId,
external_callee: bool,
) {
if self.config.call_sites_output_path.is_some() && !self.non_local_defs.contains(&caller) {
if self.config.include_calls_in_summaries || (self.config.call_sites_output_path.is_some() && !self.non_local_defs.contains(&caller)) {
self.call_sites.insert(loc, (caller, callee));
if external_callee {
self.non_local_defs.insert(callee);
Expand Down Expand Up @@ -1082,6 +1086,14 @@ impl<'tcx> CallGraph<'tcx> {
call_graph.to_call_sites(Path::new(call_path.as_ref()));
}
}

pub fn get_calls_for_def_ids(&self) -> HashMap<DefId, Vec<(Span, DefId)>> {
let mut calls = HashMap::<DefId, Vec<(Span, DefId)>>::new();
for (span, (caller, callee)) in self.call_sites.iter() {
calls.entry(*caller).or_default().push((*span, *callee));
}
calls
}
}

/// Supported Datalog output formats
Expand Down
4 changes: 4 additions & 0 deletions checker/src/callbacks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,11 @@ impl MiraiCallbacks {
type_cache: Rc::new(RefCell::new(TypeCache::new())),
call_graph: CallGraph::new(call_graph_config, tcx),
};
if crate_visitor.options.print_summaries {
crate_visitor.call_graph.config.include_calls_in_summaries = true;
}
crate_visitor.analyze_some_bodies();
crate_visitor.call_graph.output();
crate_visitor.print_summaries();
}
}
12 changes: 11 additions & 1 deletion checker/src/crate_visitor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ impl<'compilation> CrateVisitor<'compilation, '_> {
DefId::local(DefIndex::from_u32(0))
};

// Analyze all functions that are white listed or public
// Analyze all functions that are whitelisted or public
let building_standard_summaries = std::env::var("MIRAI_START_FRESH").is_ok();
for local_def_id in self.tcx.hir().body_owners() {
let def_id = local_def_id.to_def_id();
Expand Down Expand Up @@ -185,6 +185,7 @@ impl<'compilation> CrateVisitor<'compilation, '_> {
let kind = self.tcx.def_kind(def_id);
if matches!(kind, rustc_hir::def::DefKind::Static { .. })
|| utils::is_foreign_contract(self.tcx, def_id)
|| self.options.print_summaries
{
self.summary_cache
.set_summary_for(def_id, self.tcx, summary);
Expand Down Expand Up @@ -290,4 +291,13 @@ impl<'compilation> CrateVisitor<'compilation, '_> {
}
}
}

pub fn print_summaries(&mut self) {
if !self.options.print_summaries {
return;
}
let calls_for_def_ids = self.call_graph.get_calls_for_def_ids();
let summaries_for_llm = self.summary_cache.get_summaries_for_llm(self.tcx, calls_for_def_ids);
print!("{}", summaries_for_llm.to_json());
}
}
2 changes: 1 addition & 1 deletion checker/src/options.rs
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,7 @@ impl Options {
matches.value_source("print_summaries"),
Some(ValueSource::DefaultValue)
) {
self.print_summaries = false;
self.print_summaries = true;
}
args[rustc_args_start..].to_vec()
}
Expand Down
114 changes: 113 additions & 1 deletion checker/src/summaries.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@
// This source code is licensed under the MIT license found in the
// LICENSE file in the root directory of this source tree.

use std::path::PathBuf;
use std::cmp::Ordering;
use std::collections::{HashMap, HashSet};
use std::env::current_dir;
use std::fmt::{Debug, Formatter, Result};
use std::hash::{Hash, Hasher};
use std::ops::Deref;
Expand All @@ -18,6 +20,7 @@ use sled::{Config, Db};
use mirai_annotations::*;
use rustc_hir::def_id::DefId;
use rustc_middle::ty::{Ty, TyCtxt};
use rustc_span::Span;

use crate::abstract_value::AbstractValue;
use crate::abstract_value::AbstractValueTrait;
Expand Down Expand Up @@ -275,7 +278,7 @@ fn add_provenance(preconditions: &[Precondition], tcx: TyCtxt<'_>) -> Vec<Precon

/// Returns a list of (path, value) pairs where each path is rooted by an argument(or the result)
/// or where the path root is a heap block reachable from an argument (or the result).
/// Since paths are created by writes, these are side-effects.
/// Since paths are created by writes, these are side effects.
/// Since these values are reachable from arguments or the result, they are visible to the caller
/// and must be included in the summary.
#[logfn_inputs(TRACE)]
Expand Down Expand Up @@ -494,6 +497,57 @@ impl<'tcx> SummaryCache<'tcx> {
store_path
}

pub fn get_summaries_for_llm(&self, tcx: TyCtxt, call_site_per_def_id: HashMap<DefId, Vec<(Span, DefId)>>) -> SummariesForLLM {
let source_map = tcx.sess.source_map();
let mut entries = Vec::new();
for (key, value) in self.def_id_cache.iter() {
let fully_qualified_name = self.key_cache.get(key).unwrap().to_string();
let file_name;
let source;
if tcx.is_mir_available(*key) {
let mir = if tcx.is_const_fn(*key) {
tcx.mir_for_ctfe(*key)
} else {
let instance = rustc_middle::ty::InstanceKind::Item(*key);
tcx.instance_mir(instance)
};
file_name = source_map.span_to_filename(mir.span).into_local_path();
source = source_map.span_to_snippet(mir.span).ok().unwrap_or_default();
} else {
let span = tcx.def_span(*key);
file_name = source_map.span_to_filename(span).into_local_path();
source = source_map.span_to_snippet(span).ok().unwrap_or_default();
}
let mut path = None;
if let Some(mut p) = file_name {
if p.is_absolute() {
p = p.strip_prefix(current_dir().unwrap_or_default()).unwrap_or(&p).to_path_buf();
if p.is_absolute() {
let sysroot = utils::find_sysroot();
let rel = p.strip_prefix(sysroot).unwrap_or(&p);
p = PathBuf::from("/").join(rel).to_path_buf();
}
}
path = Some(p.to_string_lossy().to_string());
}
let mut calls = vec![];
if let Some(call_vec) = call_site_per_def_id.get(key) {
for (span, def_id) in call_vec.iter() {
let call_snippet = source_map.span_to_snippet(*span).ok().unwrap_or_default();
let callee_name = self.key_cache.get(def_id).unwrap().to_string();
calls.push((call_snippet, callee_name));
}
};
entries.push((
path.unwrap_or_default(),
fully_qualified_name,
source,
LLMSummary::from_summary(&value, calls),
));
}
SummariesForLLM { entries }
}

/// Returns (and caches) a string that uniquely identifies a definition to serve as a key to
/// the summary cache, which is a key value store. The string will always be the same as
/// long as the definition does not change its name or location, so it can be used to
Expand Down Expand Up @@ -645,6 +699,11 @@ impl<'tcx> SummaryCache<'tcx> {
summary: Summary,
) {
if let Some(func_id) = func_ref.function_id {
if let Some(def_id) = func_ref.def_id {
if func_args.is_none() && type_args.is_none() {
self.def_id_cache.insert(def_id, summary.clone());
}
}
if func_args.is_some() || type_args.is_some() {
let typed_cache_key =
CallSiteKey::new(func_args.clone(), type_args.clone(), func_id);
Expand Down Expand Up @@ -676,3 +735,56 @@ impl<'tcx> SummaryCache<'tcx> {
self.def_id_cache.insert(def_id, summary)
}
}

#[derive(Serialize)]
pub struct SummariesForLLM {
// (source path, fully qualified function name, function source, summary)
entries: Vec<(String, String, String, LLMSummary)>,
}

impl SummariesForLLM {
pub fn to_json(&self) -> String {
serde_json::to_string_pretty(&self).unwrap()
}
}

#[derive(Serialize)]
pub struct LLMSummary {
// Conditions that should hold prior to the call.
// Callers should substitute parameter values with argument values and simplify the results
// under the current path condition. Any values that do not simplify to true will require the
// caller to either generate an error message or to add a precondition to its own summary that
// will be sufficient to ensure that all the preconditions in this summary are met.
// The string value bundled with the condition is the message that details what would go
// wrong at runtime if the precondition is not satisfied by the caller.
//pub preconditions: Vec<Precondition>,

// Modifications the function makes to mutable state external to the function.
// Every path will be rooted in a static or in a mutable parameter.
// No two paths in this collection will lead to the same place in memory.
// Callers should substitute parameter values with argument values and simplify the results
// under the current path condition. They should then update their current state to reflect the
// side effects of the call.
//pub side_effects: Vec<(Rc<Path>, Rc<AbstractValue>)>,

// A condition that should hold after a call that completes normally.
// Callers should substitute parameter values with argument values and simplify the results
// under the current path condition.
// The resulting value should be conjoined to the current path condition.
//pub post_condition: Option<Rc<AbstractValue>>,

// The set of function calls made by this function. The first element is the source snippet of
// the call and the second is the fully qualified name of the function being called.
calls: Vec<(String, String)>,
}

impl LLMSummary {
pub fn from_summary(_summary: &Summary, calls: Vec<(String, String)>) -> LLMSummary {
LLMSummary {
// preconditions: vec![],
// side_effects: vec![],
// post_condition: vec![],
calls,
}
}
}

0 comments on commit 5334663

Please sign in to comment.