Skip to content

Commit

Permalink
Bug/MontFerret#80 element not found (MontFerret#99)
Browse files Browse the repository at this point in the history
* SOme work

* Some refactoring

* Work on stabalizing queries

* Removed unit test for debugging

* Fixed linter errors

* Added logging when NodeID is 0

* Added --time param to CLI
  • Loading branch information
ziflex authored Oct 11, 2018
1 parent ad21fa6 commit 570c1b4
Show file tree
Hide file tree
Showing 13 changed files with 531 additions and 330 deletions.
15 changes: 15 additions & 0 deletions cli/exec.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,13 @@ func Exec(query string, opts Options) {
}
}()

var timer *Timer

if opts.ShowTime {
timer = NewTimer()
timer.Start()
}

out, err := prog.Run(
ctx,
runtime.WithBrowser(opts.Cdp),
Expand All @@ -60,6 +67,10 @@ func Exec(query string, opts Options) {
runtime.WithUserAgent(opts.UserAgent),
)

if opts.ShowTime {
timer.Stop()
}

if err != nil {
fmt.Println("Failed to execute the query")
fmt.Println(err)
Expand All @@ -68,4 +79,8 @@ func Exec(query string, opts Options) {
}

fmt.Println(string(out))

if opts.ShowTime {
fmt.Println(timer.Print())
}
}
1 change: 1 addition & 0 deletions cli/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,5 @@ type Options struct {
Params map[string]interface{}
Proxy string
UserAgent string
ShowTime bool
}
18 changes: 13 additions & 5 deletions cli/repl.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,11 @@ func Repl(version string, opts Options) {
var commands []string
var multiline bool

timer := NewTimer()
var timer *Timer

if opts.ShowTime {
timer = NewTimer()
}

l := NewLogger()

Expand Down Expand Up @@ -90,7 +94,9 @@ func Repl(version string, opts Options) {
continue
}

timer.Start()
if opts.ShowTime {
timer.Start()
}

out, err := program.Run(
ctx,
Expand All @@ -102,15 +108,17 @@ func Repl(version string, opts Options) {
runtime.WithUserAgent(opts.UserAgent),
)

timer.Stop()
fmt.Println(timer.Print())

if err != nil {
fmt.Println("Failed to execute the query")
fmt.Println(err)
continue
}

fmt.Println(string(out))

if opts.ShowTime {
timer.Stop()
fmt.Println(timer.Print())
}
}
}
8 changes: 8 additions & 0 deletions examples/inner_text_all.fql
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
LET doc = DOCUMENT('https://soundcloud.com/charts/top', true)

WAIT_ELEMENT(doc, '.chartTrack__details', 5000)

LET tracks = ELEMENTS(doc, '.chartTrack')

FOR track IN tracks
RETURN INNER_TEXT_ALL(track, '.chartTrack__details')
2 changes: 1 addition & 1 deletion examples/input.fql
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ INPUT(google, 'input[name="q"]', "ferret", 25)
CLICK(google, 'input[name="btnK"]')

WAIT_NAVIGATION(google)
WAIT_ELEMENT(google, '.g')
WAIT_ELEMENT(google, '.g', 5000)

FOR result IN ELEMENTS(google, '.g')
// filter out extra elements like videos and 'People also ask'
Expand Down
17 changes: 8 additions & 9 deletions examples/pagination.fql
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,14 @@ LET result = (

LET items = (
FOR el IN ELEMENTS(amazon, resultItemSelector)

LET priceTxtMain = INNER_TEXT(el, priceSelector)
LET priceTxt = priceTxtMain != "" ? priceTxtMain : INNER_TEXT(el, altPriceSelector)

RETURN {
title: INNER_TEXT(el, 'h2'),
vendor: INNER_TEXT(el, vendorSelector),
price: TO_FLOAT(SUBSTITUTE(priceTxt, "$", ""))
}
LET priceTxtMain = INNER_TEXT(el, priceSelector)
LET priceTxt = priceTxtMain != "" ? priceTxtMain : INNER_TEXT(el, altPriceSelector)

RETURN {
title: INNER_TEXT(el, 'h2'),
vendor: INNER_TEXT(el, vendorSelector),
price: TO_FLOAT(SUBSTITUTE(priceTxt, "$", ""))
}
)

RETURN items
Expand Down
7 changes: 7 additions & 0 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,12 @@ var (
"set custom user agent. '*' triggers UA generation",
)

showTime = flag.Bool(
"time",
false,
"show how much time was taken to execute a query",
)

version = flag.Bool(
"version",
false,
Expand Down Expand Up @@ -151,6 +157,7 @@ func main() {
Params: p,
Proxy: *proxyAddress,
UserAgent: *userAgent,
ShowTime: *showTime,
}

stat, _ := os.Stdin.Stat()
Expand Down
18 changes: 14 additions & 4 deletions pkg/compiler/compiler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2104,12 +2104,22 @@ func TestParam(t *testing.T) {
// c := compiler.New()
//
// out, err := c.MustCompile(`
//LET doc = DOCUMENT("http://getbootstrap.com/docs/4.1/components/collapse/", true)
//LET google = DOCUMENT("https://www.google.com/", true)
//
//CLICK(doc, "#headingTwo > h5 > button")
//WAIT_CLASS(doc, "#collapseTwo", "bar")
//INPUT(google, 'input[name="q"]', "ferret", 25)
//CLICK(google, 'input[name="btnK"]')
//
//RETURN TRUE
//WAIT_NAVIGATION(google)
//WAIT_ELEMENT(google, '.g', 5000)
//
//FOR result IN ELEMENTS(google, '.g')
// // filter out extra elements like videos and 'People also ask'
// FILTER TRIM(result.attributes.class) == 'g'
// RETURN {
// title: INNER_TEXT(result, 'h3'),
// description: INNER_TEXT(result, '.st'),
// url: INNER_TEXT(result, 'cite')
// }
// `).Run(context.Background())
//
// So(err, ShouldBeNil)
Expand Down
80 changes: 52 additions & 28 deletions pkg/html/dynamic/document.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ import (
"github.com/MontFerret/ferret/pkg/runtime/logging"
"github.com/MontFerret/ferret/pkg/runtime/values"
"github.com/mafredri/cdp"
"github.com/mafredri/cdp/protocol/dom"
"github.com/mafredri/cdp/protocol/input"
"github.com/mafredri/cdp/protocol/page"
"github.com/mafredri/cdp/rpcc"
Expand Down Expand Up @@ -80,25 +79,40 @@ func LoadHTMLDocument(
}
}

root, innerHTML, err := getRootElement(client)
node, err := getRootElement(ctx, client)

if err != nil {
return nil, err
return nil, errors.Wrap(err, "failed to get root element")
}

broker, err := createEventBroker(client)

if err != nil {
return nil, err
return nil, errors.Wrap(err, "failed to create event events")
}

logger := logging.FromContext(ctx)

rootElement, err := LoadElement(
ctx,
logger,
client,
broker,
node.Root.NodeID,
node.Root.BackendNodeID,
)

if err != nil {
return nil, errors.Wrap(err, "failed to load root element")
}

return NewHTMLDocument(
logging.FromContext(ctx),
logger,
conn,
client,
broker,
root,
innerHTML,
values.NewString(url),
rootElement,
), nil
}

Expand All @@ -107,20 +121,16 @@ func NewHTMLDocument(
conn *rpcc.Conn,
client *cdp.Client,
broker *events.EventBroker,
root dom.Node,
innerHTML values.String,
url values.String,
rootElement *HTMLElement,
) *HTMLDocument {
doc := new(HTMLDocument)
doc.logger = logger
doc.conn = conn
doc.client = client
doc.events = broker
doc.element = NewHTMLElement(doc.logger, client, broker, root.NodeID, root, innerHTML)
doc.url = ""

if root.BaseURL != nil {
doc.url = values.NewString(*root.BaseURL)
}
doc.url = url
doc.element = rootElement

broker.AddEventListener("load", doc.handlePageLoad)
broker.AddEventListener("error", doc.handleError)
Expand Down Expand Up @@ -201,7 +211,7 @@ func (doc *HTMLDocument) Close() error {
Timestamp().
Str("url", doc.url.String()).
Err(err).
Msg("failed to stop event broker")
Msg("failed to stop event events")
}

err = doc.events.Close()
Expand All @@ -211,7 +221,7 @@ func (doc *HTMLDocument) Close() error {
Timestamp().
Str("url", doc.url.String()).
Err(err).
Msg("failed to close event broker")
Msg("failed to close event events")
}

err = doc.element.Close()
Expand Down Expand Up @@ -645,7 +655,10 @@ func (doc *HTMLDocument) handlePageLoad(_ interface{}) {
doc.Lock()
defer doc.Unlock()

updated, innerHTML, err := getRootElement(doc.client)
ctx, cancel := contextWithTimeout()
defer cancel()

node, err := getRootElement(ctx, doc.client)

if err != nil {
doc.logger.Error().
Expand All @@ -656,22 +669,33 @@ func (doc *HTMLDocument) handlePageLoad(_ interface{}) {
return
}

// close the prev element
doc.element.Close()

// create a new root element wrapper
doc.element = NewHTMLElement(
updated, err := LoadElement(
ctx,
doc.logger,
doc.client,
doc.events,
updated.NodeID,
updated,
innerHTML,
node.Root.NodeID,
node.Root.BackendNodeID,
)

if err != nil {
doc.logger.Error().
Timestamp().
Err(err).
Msg("failed to load root node after page load")

return
}

// close the prev element
doc.element.Close()

// create a new root element wrapper
doc.element = updated
doc.url = ""

if updated.BaseURL != nil {
doc.url = values.NewString(*updated.BaseURL)
if node.Root.BaseURL != nil {
doc.url = values.NewString(*node.Root.BaseURL)
}
}

Expand Down
Loading

0 comments on commit 570c1b4

Please sign in to comment.