-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Showing
5 changed files
with
177 additions
and
26 deletions.
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,6 @@ | ||
pub mod ast; | ||
mod parser; | ||
mod repl; | ||
mod scanner; | ||
|
||
pub use parser::{ParseError, Parser}; | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
#[derive(Debug, Clone)] | ||
pub struct Repl {} | ||
Check failure on line 2 in src/frontend/repl.rs GitHub Actions / run cargo tests
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,42 +1,69 @@ | ||
#![forbid(unsafe_code)] | ||
use std::env; | ||
use std::path::PathBuf; | ||
|
||
use anyhow::Context; | ||
use clap::Parser as _; | ||
use scrapelect::{frontend::Parser, interpreter::Interpreter}; | ||
use url::Url; | ||
|
||
#[tokio::main] | ||
async fn main() -> anyhow::Result<()> { | ||
let mut args = env::args(); | ||
// skip name | ||
let name = args.next().context("usage: scrapelect <filename> <url>")?; | ||
#[derive(Debug, clap::Parser)] | ||
#[command(version, args_conflicts_with_subcommands = true)] | ||
struct Interface { | ||
#[command(subcommand)] | ||
mode: Option<Mode>, | ||
// There is some magic going on where `Option<RunArgs>` makes it required | ||
// when the subcommand is not provided. | ||
#[command(flatten)] | ||
run: Option<RunArgs>, | ||
} | ||
|
||
#[derive(Debug, clap::Args)] | ||
struct RunArgs { | ||
/// The `.scrp` file describing how to convert the web page into structured data | ||
file: PathBuf, | ||
/// The URL of the web page to start scraping at. | ||
url: Url, | ||
} | ||
|
||
let filename = args | ||
.next() | ||
.with_context(|| format!("usage: {name} <filename> <url>"))?; | ||
#[derive(Debug, clap::Subcommand)] | ||
enum Mode { | ||
Run(RunArgs), | ||
Repl, | ||
} | ||
|
||
let url = args | ||
.next() | ||
.with_context(|| format!("usage: {name} <filename = {filename}> <url>"))?; | ||
#[tokio::main] | ||
async fn main() -> anyhow::Result<()> { | ||
let args = Interface::parse(); | ||
|
||
let pgm = std::fs::read_to_string(&filename) | ||
.with_context(|| format!("error reading file {filename}"))?; | ||
match (args.mode, args.run) { | ||
(Some(Mode::Run(run_args)), None) | (None, Some(run_args)) => { | ||
let pgm = std::fs::read_to_string(&run_args.file) | ||
.with_context(|| format!("error reading file {}", run_args.file.display()))?; | ||
|
||
let parser = Parser::new(&pgm); | ||
let parser = Parser::new(&pgm); | ||
|
||
let ast = parser | ||
.parse() | ||
.with_context(|| format!("parse error in {filename}:"))?; | ||
let ast = parser | ||
.parse() | ||
.with_context(|| format!("parse error in {}:", run_args.file.display()))?; | ||
|
||
let interpreter = Interpreter::new(&ast); | ||
let interpreter = Interpreter::new(&ast); | ||
|
||
let results = interpreter | ||
.interpret( | ||
url.parse() | ||
.with_context(|| format!("Couldn't parse `{url}` into a URL"))?, | ||
) | ||
.await?; | ||
let results = interpreter.interpret(run_args.url).await?; | ||
|
||
println!("{}", serde_json::to_string_pretty(&results)?); | ||
println!("{}", serde_json::to_string_pretty(&results)?); | ||
} | ||
// TODO: investigate if the (None, None) branch is reachable (I think it isn't) | ||
(Some(Mode::Repl), None) | (None, None) => { | ||
todo!() | ||
} | ||
(Some(_), Some(_)) => { | ||
unreachable!( | ||
"This should be impossible to reach with clap's `args_conflicts_with_subcommands`. | ||
If you see this error message, please file a GitHub issue with the arguments | ||
you provided to `scrapelect`." | ||
) | ||
} | ||
} | ||
|
||
Ok(()) | ||
} |