Skip to content

Split nautilus Python dependencies into separate feature #3191

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
May 5, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 3 additions & 8 deletions libafl/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -187,14 +187,9 @@ llmp_small_maps = [
] # reduces initial map size for llmp

## Grammar mutator. Requires nightly.
nautilus = [
"std",
"serde_json/std",
"dep:pyo3",
"rand_trait",
"regex-syntax",
"regex",
]
nautilus = ["std", "serde_json/std", "rand_trait", "regex-syntax", "regex"]

nautilus_py = ["nautilus", "dep:pyo3"]

## Use the best SIMD implementation by our benchmark
simd = ["libafl_bolts/simd"]
Expand Down
2 changes: 2 additions & 0 deletions libafl/src/common/nautilus/grammartec/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ use libafl_bolts::{
nonzero,
rands::{Rand, RomuDuoJrRand},
};
#[cfg(feature = "nautilus_py")]
use pyo3::prelude::PyObject;

use super::{
Expand Down Expand Up @@ -83,6 +84,7 @@ impl Context {
rid
}

#[cfg(feature = "nautilus_py")]
pub fn add_script(&mut self, nt: &str, nts: &[String], script: PyObject) -> RuleId {
let rid = self.rules.len().into();
let rule = Rule::from_script(self, nt, nts, script);
Expand Down
2 changes: 1 addition & 1 deletion libafl/src/common/nautilus/grammartec/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ pub mod chunkstore;
pub mod context;
pub mod mutator;
pub mod newtypes;
#[cfg(feature = "nautilus")]
#[cfg(feature = "nautilus_py")]
pub mod python_grammar_loader;
pub mod recursion_info;
pub mod rule;
Expand Down
13 changes: 12 additions & 1 deletion libafl/src/common/nautilus/grammartec/rule.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use alloc::{string::String, vec::Vec};
use std::sync::OnceLock;

use libafl_bolts::rands::Rand;
#[cfg(feature = "nautilus_py")]
use pyo3::prelude::{PyObject, Python};
use regex_syntax::hir::Hir;
use serde::{Deserialize, Serialize};
Expand Down Expand Up @@ -91,6 +92,7 @@ impl RuleIdOrCustom {
#[derive(Clone, Debug)]
pub enum Rule {
Plain(PlainRule),
#[cfg(feature = "nautilus_py")]
Script(ScriptRule),
RegExp(RegExpRule),
}
Expand All @@ -108,13 +110,15 @@ impl RegExpRule {
}
}

#[cfg(feature = "nautilus_py")]
#[derive(Debug)]
pub struct ScriptRule {
pub nonterm: NTermId,
pub nonterms: Vec<NTermId>,
pub script: PyObject,
}

#[cfg(feature = "nautilus_py")]
impl ScriptRule {
#[must_use]
pub fn debug_show(&self, ctx: &Context) -> String {
Expand Down Expand Up @@ -148,6 +152,7 @@ impl PlainRule {
}
}

#[cfg(feature = "nautilus_py")]
impl Clone for ScriptRule {
fn clone(&self) -> Self {
Python::with_gil(|py| ScriptRule {
Expand All @@ -159,6 +164,7 @@ impl Clone for ScriptRule {
}

impl Rule {
#[cfg(feature = "nautilus_py")]
pub fn from_script(
ctx: &mut Context,
nonterm: &str,
Expand Down Expand Up @@ -189,6 +195,7 @@ impl Rule {
pub fn debug_show(&self, ctx: &Context) -> String {
match self {
Self::Plain(r) => r.debug_show(ctx),
#[cfg(feature = "nautilus_py")]
Self::Script(r) => r.debug_show(ctx),
Self::RegExp(r) => r.debug_show(ctx),
}
Expand Down Expand Up @@ -281,6 +288,7 @@ impl Rule {
#[must_use]
pub fn nonterms(&self) -> &[NTermId] {
match self {
#[cfg(feature = "nautilus_py")]
Rule::Script(r) => &r.nonterms,
Rule::Plain(r) => &r.nonterms,
Rule::RegExp(_) => &[],
Expand All @@ -295,6 +303,7 @@ impl Rule {
#[must_use]
pub fn nonterm(&self) -> NTermId {
match self {
#[cfg(feature = "nautilus_py")]
Rule::Script(r) => r.nonterm,
Rule::Plain(r) => r.nonterm,
Rule::RegExp(r) => r.nonterm,
Expand Down Expand Up @@ -340,7 +349,9 @@ impl Rule {
//get a rule that can be used with the remaining length
let rid = ctx.get_random_rule_for_nt(rand, *nt, cur_child_max_len);
let rule_or_custom = match ctx.get_rule(rid) {
Rule::Plain(_) | Rule::Script(_) => RuleIdOrCustom::Rule(rid),
Rule::Plain(_) => RuleIdOrCustom::Rule(rid),
#[cfg(feature = "nautilus_py")]
Rule::Script(_) => RuleIdOrCustom::Rule(rid),
Rule::RegExp(RegExpRule { hir, .. }) => {
RuleIdOrCustom::Custom(rid, regex_mutator::generate(rand, hir))
}
Expand Down
35 changes: 26 additions & 9 deletions libafl/src/common/nautilus/grammartec/tree.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,25 +4,30 @@ use std::io::{Cursor, Write, stdout};

use hashbrown::HashSet;
use libafl_bolts::rands::Rand;
#[cfg(feature = "nautilus_py")]
use pyo3::{
PyTypeInfo,
prelude::{PyObject, PyResult, Python},
types::{PyAnyMethods, PyBytes, PyBytesMethods, PyString, PyStringMethods, PyTuple},
};
use serde::{Deserialize, Serialize};

#[cfg(feature = "nautilus_py")]
use super::rule::ScriptRule;
use super::{
super::regex_mutator,
context::Context,
newtypes::{NTermId, NodeId, RuleId},
recursion_info::RecursionInfo,
rule::{PlainRule, RegExpRule, Rule, RuleChild, RuleIdOrCustom, ScriptRule},
rule::{PlainRule, RegExpRule, Rule, RuleChild, RuleIdOrCustom},
};

enum UnparseStep<'dat> {
Term(&'dat [u8]),
Nonterm(NTermId),
#[cfg(feature = "nautilus_py")]
Script(usize, PyObject),
#[cfg(feature = "nautilus_py")]
PushBuffer(),
}

Expand Down Expand Up @@ -55,7 +60,9 @@ impl<'data, 'tree: 'data, 'ctx: 'data, W: Write, T: TreeLike> Unparser<'data, 't
match self.stack.pop() {
Some(UnparseStep::Term(data)) => self.write(data),
Some(UnparseStep::Nonterm(nt)) => self.nonterm(nt),
#[cfg(feature = "nautilus_py")]
Some(UnparseStep::Script(num, expr)) => self.unwrap_script(num, &expr),
#[cfg(feature = "nautilus_py")]
Some(UnparseStep::PushBuffer()) => self.push_buffer(),
None => return false,
}
Expand All @@ -73,13 +80,17 @@ impl<'data, 'tree: 'data, 'ctx: 'data, W: Write, T: TreeLike> Unparser<'data, 't
fn nonterm(&mut self, nt: NTermId) {
self.next_rule(nt);
}

#[cfg(feature = "nautilus_py")]
fn unwrap_script(&mut self, num: usize, expr: &PyObject) {
Python::with_gil(|py| {
self.script(py, num, expr)
.map_err(|e| e.print_and_set_sys_last_vars(py))
.unwrap();
});
}

#[cfg(feature = "nautilus_py")]
fn script(&mut self, py: Python, num: usize, expr: &PyObject) -> PyResult<()> {
let bufs = self.buffers.split_off(self.buffers.len() - num);
let bufs = bufs.into_iter().map(Cursor::into_inner).collect::<Vec<_>>();
Expand All @@ -100,6 +111,7 @@ impl<'data, 'tree: 'data, 'ctx: 'data, W: Write, T: TreeLike> Unparser<'data, 't
Ok(())
}

#[cfg(feature = "nautilus_py")]
fn push_buffer(&mut self) {
self.buffers.push(Cursor::new(vec![]));
}
Expand All @@ -111,6 +123,7 @@ impl<'data, 'tree: 'data, 'ctx: 'data, W: Write, T: TreeLike> Unparser<'data, 't
self.i += 1;
match rule {
Rule::Plain(r) => self.next_plain(r),
#[cfg(feature = "nautilus_py")]
Rule::Script(r) => self.next_script(r),
Rule::RegExp(_) => self.next_regexp(self.tree.get_custom_rule_data(nid)),
}
Expand All @@ -126,6 +139,7 @@ impl<'data, 'tree: 'data, 'ctx: 'data, W: Write, T: TreeLike> Unparser<'data, 't
}
}

#[cfg(feature = "nautilus_py")]
fn next_script(&mut self, r: &ScriptRule) {
Python::with_gil(|py| {
self.stack.push(UnparseStep::Script(
Expand Down Expand Up @@ -345,15 +359,18 @@ impl Tree {
max_len: usize,
ctx: &Context,
) {
let mut plain_or_script_rule = || {
self.truncate();
self.rules.push(RuleIdOrCustom::Rule(ruleid));
self.sizes.push(0);
self.paren.push(NodeId::from(0));
ctx.get_rule(ruleid).generate(rand, self, ctx, max_len);
self.sizes[0] = self.rules.len();
};
match ctx.get_rule(ruleid) {
Rule::Plain(..) | Rule::Script(..) => {
self.truncate();
self.rules.push(RuleIdOrCustom::Rule(ruleid));
self.sizes.push(0);
self.paren.push(NodeId::from(0));
ctx.get_rule(ruleid).generate(rand, self, ctx, max_len);
self.sizes[0] = self.rules.len();
}
Rule::Plain(..) => plain_or_script_rule(),
#[cfg(feature = "nautilus_py")]
Rule::Script(..) => plain_or_script_rule(),
Rule::RegExp(RegExpRule { hir, .. }) => {
let rid = RuleIdOrCustom::Custom(ruleid, regex_mutator::generate(rand, hir));
self.truncate();
Expand Down
25 changes: 18 additions & 7 deletions libafl/src/generators/nautilus.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,11 @@ use std::{fs, io::BufReader, path::Path};
use libafl_bolts::rands::Rand;

pub use crate::common::nautilus::grammartec::newtypes::NTermId;
#[cfg(feature = "nautilus_py")]
use crate::nautilus::grammartec::python_grammar_loader;
use crate::{
Error, common::nautilus::grammartec::context::Context, generators::Generator,
inputs::nautilus::NautilusInput, nautilus::grammartec::python_grammar_loader, state::HasRand,
inputs::nautilus::NautilusInput, state::HasRand,
};

/// The nautilus context for a generator
Expand Down Expand Up @@ -87,12 +89,21 @@ impl NautilusContext {
pub fn from_file<P: AsRef<Path>>(tree_depth: usize, grammar_file: P) -> Result<Self, Error> {
let grammar_file = grammar_file.as_ref();
if grammar_file.extension().unwrap_or_default() == "py" {
log::debug!("Creating NautilusContext from python grammar");
let mut ctx = python_grammar_loader::load_python_grammar(
fs::read_to_string(grammar_file)?.as_str(),
);
ctx.initialize(tree_depth);
return Ok(Self { ctx });
#[cfg(feature = "nautilus_py")]
{
log::debug!("Creating NautilusContext from python grammar");
let mut ctx = python_grammar_loader::load_python_grammar(
fs::read_to_string(grammar_file)?.as_str(),
);
ctx.initialize(tree_depth);
return Ok(Self { ctx });
}
#[cfg(not(feature = "nautilus_py"))]
{
return Err(Error::illegal_argument(format!(
"Feature `nautilus_py` is required to load grammar from {grammar_file:?}"
)));
}
}
log::debug!("Creating NautilusContext from json grammar");
let file = fs::File::open(grammar_file)?;
Expand Down
Loading