From 570c1b4548ee6fcc9ced3f0ba224385bdc1c3d0f Mon Sep 17 00:00:00 2001 From: Tim Voronov Date: Thu, 11 Oct 2018 12:39:03 -0400 Subject: [PATCH] Bug/#80 element not found (#99) * SOme work * Some refactoring * Work on stabalizing queries * Removed unit test for debugging * Fixed linter errors * Added logging when NodeID is 0 * Added --time param to CLI --- cli/exec.go | 15 + cli/options.go | 1 + cli/repl.go | 18 +- examples/inner_text_all.fql | 8 + examples/input.fql | 2 +- examples/pagination.fql | 17 +- main.go | 7 + pkg/compiler/compiler_test.go | 18 +- pkg/html/dynamic/document.go | 80 +++-- pkg/html/dynamic/element.go | 501 ++++++++++++++++++---------- pkg/html/dynamic/eval/eval.go | 81 +++-- pkg/html/dynamic/events/dispatch.go | 39 +-- pkg/html/dynamic/helpers.go | 74 ++-- 13 files changed, 531 insertions(+), 330 deletions(-) create mode 100644 examples/inner_text_all.fql diff --git a/cli/exec.go b/cli/exec.go index 855ba9a2..d3619383 100644 --- a/cli/exec.go +++ b/cli/exec.go @@ -50,6 +50,13 @@ func Exec(query string, opts Options) { } }() + var timer *Timer + + if opts.ShowTime { + timer = NewTimer() + timer.Start() + } + out, err := prog.Run( ctx, runtime.WithBrowser(opts.Cdp), @@ -60,6 +67,10 @@ func Exec(query string, opts Options) { runtime.WithUserAgent(opts.UserAgent), ) + if opts.ShowTime { + timer.Stop() + } + if err != nil { fmt.Println("Failed to execute the query") fmt.Println(err) @@ -68,4 +79,8 @@ func Exec(query string, opts Options) { } fmt.Println(string(out)) + + if opts.ShowTime { + fmt.Println(timer.Print()) + } } diff --git a/cli/options.go b/cli/options.go index 791a05de..03667559 100644 --- a/cli/options.go +++ b/cli/options.go @@ -5,4 +5,5 @@ type Options struct { Params map[string]interface{} Proxy string UserAgent string + ShowTime bool } diff --git a/cli/repl.go b/cli/repl.go index 29f9e5f1..5abc51d8 100644 --- a/cli/repl.go +++ b/cli/repl.go @@ -34,7 +34,11 @@ func Repl(version string, opts Options) { var commands []string var multiline bool - timer := NewTimer() + var timer *Timer + + if opts.ShowTime { + timer = NewTimer() + } l := NewLogger() @@ -90,7 +94,9 @@ func Repl(version string, opts Options) { continue } - timer.Start() + if opts.ShowTime { + timer.Start() + } out, err := program.Run( ctx, @@ -102,9 +108,6 @@ func Repl(version string, opts Options) { runtime.WithUserAgent(opts.UserAgent), ) - timer.Stop() - fmt.Println(timer.Print()) - if err != nil { fmt.Println("Failed to execute the query") fmt.Println(err) @@ -112,5 +115,10 @@ func Repl(version string, opts Options) { } fmt.Println(string(out)) + + if opts.ShowTime { + timer.Stop() + fmt.Println(timer.Print()) + } } } diff --git a/examples/inner_text_all.fql b/examples/inner_text_all.fql new file mode 100644 index 00000000..f9fe01d0 --- /dev/null +++ b/examples/inner_text_all.fql @@ -0,0 +1,8 @@ +LET doc = DOCUMENT('https://soundcloud.com/charts/top', true) + +WAIT_ELEMENT(doc, '.chartTrack__details', 5000) + +LET tracks = ELEMENTS(doc, '.chartTrack') + +FOR track IN tracks + RETURN INNER_TEXT_ALL(track, '.chartTrack__details') diff --git a/examples/input.fql b/examples/input.fql index cf946fef..678f86ab 100644 --- a/examples/input.fql +++ b/examples/input.fql @@ -4,7 +4,7 @@ INPUT(google, 'input[name="q"]', "ferret", 25) CLICK(google, 'input[name="btnK"]') WAIT_NAVIGATION(google) -WAIT_ELEMENT(google, '.g') +WAIT_ELEMENT(google, '.g', 5000) FOR result IN ELEMENTS(google, '.g') // filter out extra elements like videos and 'People also ask' diff --git a/examples/pagination.fql b/examples/pagination.fql index e57498cd..cb8f10f3 100644 --- a/examples/pagination.fql +++ b/examples/pagination.fql @@ -19,15 +19,14 @@ LET result = ( LET items = ( FOR el IN ELEMENTS(amazon, resultItemSelector) - - LET priceTxtMain = INNER_TEXT(el, priceSelector) - LET priceTxt = priceTxtMain != "" ? priceTxtMain : INNER_TEXT(el, altPriceSelector) - - RETURN { - title: INNER_TEXT(el, 'h2'), - vendor: INNER_TEXT(el, vendorSelector), - price: TO_FLOAT(SUBSTITUTE(priceTxt, "$", "")) - } + LET priceTxtMain = INNER_TEXT(el, priceSelector) + LET priceTxt = priceTxtMain != "" ? priceTxtMain : INNER_TEXT(el, altPriceSelector) + + RETURN { + title: INNER_TEXT(el, 'h2'), + vendor: INNER_TEXT(el, vendorSelector), + price: TO_FLOAT(SUBSTITUTE(priceTxt, "$", "")) + } ) RETURN items diff --git a/main.go b/main.go index ed596d3c..2963d3f7 100644 --- a/main.go +++ b/main.go @@ -77,6 +77,12 @@ var ( "set custom user agent. '*' triggers UA generation", ) + showTime = flag.Bool( + "time", + false, + "show how much time was taken to execute a query", + ) + version = flag.Bool( "version", false, @@ -151,6 +157,7 @@ func main() { Params: p, Proxy: *proxyAddress, UserAgent: *userAgent, + ShowTime: *showTime, } stat, _ := os.Stdin.Stat() diff --git a/pkg/compiler/compiler_test.go b/pkg/compiler/compiler_test.go index 8a6a638c..315f195f 100644 --- a/pkg/compiler/compiler_test.go +++ b/pkg/compiler/compiler_test.go @@ -2104,12 +2104,22 @@ func TestParam(t *testing.T) { // c := compiler.New() // // out, err := c.MustCompile(` -//LET doc = DOCUMENT("http://getbootstrap.com/docs/4.1/components/collapse/", true) +//LET google = DOCUMENT("https://www.google.com/", true) // -//CLICK(doc, "#headingTwo > h5 > button") -//WAIT_CLASS(doc, "#collapseTwo", "bar") +//INPUT(google, 'input[name="q"]', "ferret", 25) +//CLICK(google, 'input[name="btnK"]') // -//RETURN TRUE +//WAIT_NAVIGATION(google) +//WAIT_ELEMENT(google, '.g', 5000) +// +//FOR result IN ELEMENTS(google, '.g') +// // filter out extra elements like videos and 'People also ask' +// FILTER TRIM(result.attributes.class) == 'g' +// RETURN { +// title: INNER_TEXT(result, 'h3'), +// description: INNER_TEXT(result, '.st'), +// url: INNER_TEXT(result, 'cite') +// } // `).Run(context.Background()) // // So(err, ShouldBeNil) diff --git a/pkg/html/dynamic/document.go b/pkg/html/dynamic/document.go index 9ee9783e..baa43681 100644 --- a/pkg/html/dynamic/document.go +++ b/pkg/html/dynamic/document.go @@ -13,7 +13,6 @@ import ( "github.com/MontFerret/ferret/pkg/runtime/logging" "github.com/MontFerret/ferret/pkg/runtime/values" "github.com/mafredri/cdp" - "github.com/mafredri/cdp/protocol/dom" "github.com/mafredri/cdp/protocol/input" "github.com/mafredri/cdp/protocol/page" "github.com/mafredri/cdp/rpcc" @@ -80,25 +79,40 @@ func LoadHTMLDocument( } } - root, innerHTML, err := getRootElement(client) + node, err := getRootElement(ctx, client) if err != nil { - return nil, err + return nil, errors.Wrap(err, "failed to get root element") } broker, err := createEventBroker(client) if err != nil { - return nil, err + return nil, errors.Wrap(err, "failed to create event events") + } + + logger := logging.FromContext(ctx) + + rootElement, err := LoadElement( + ctx, + logger, + client, + broker, + node.Root.NodeID, + node.Root.BackendNodeID, + ) + + if err != nil { + return nil, errors.Wrap(err, "failed to load root element") } return NewHTMLDocument( - logging.FromContext(ctx), + logger, conn, client, broker, - root, - innerHTML, + values.NewString(url), + rootElement, ), nil } @@ -107,20 +121,16 @@ func NewHTMLDocument( conn *rpcc.Conn, client *cdp.Client, broker *events.EventBroker, - root dom.Node, - innerHTML values.String, + url values.String, + rootElement *HTMLElement, ) *HTMLDocument { doc := new(HTMLDocument) doc.logger = logger doc.conn = conn doc.client = client doc.events = broker - doc.element = NewHTMLElement(doc.logger, client, broker, root.NodeID, root, innerHTML) - doc.url = "" - - if root.BaseURL != nil { - doc.url = values.NewString(*root.BaseURL) - } + doc.url = url + doc.element = rootElement broker.AddEventListener("load", doc.handlePageLoad) broker.AddEventListener("error", doc.handleError) @@ -201,7 +211,7 @@ func (doc *HTMLDocument) Close() error { Timestamp(). Str("url", doc.url.String()). Err(err). - Msg("failed to stop event broker") + Msg("failed to stop event events") } err = doc.events.Close() @@ -211,7 +221,7 @@ func (doc *HTMLDocument) Close() error { Timestamp(). Str("url", doc.url.String()). Err(err). - Msg("failed to close event broker") + Msg("failed to close event events") } err = doc.element.Close() @@ -645,7 +655,10 @@ func (doc *HTMLDocument) handlePageLoad(_ interface{}) { doc.Lock() defer doc.Unlock() - updated, innerHTML, err := getRootElement(doc.client) + ctx, cancel := contextWithTimeout() + defer cancel() + + node, err := getRootElement(ctx, doc.client) if err != nil { doc.logger.Error(). @@ -656,22 +669,33 @@ func (doc *HTMLDocument) handlePageLoad(_ interface{}) { return } - // close the prev element - doc.element.Close() - - // create a new root element wrapper - doc.element = NewHTMLElement( + updated, err := LoadElement( + ctx, doc.logger, doc.client, doc.events, - updated.NodeID, - updated, - innerHTML, + node.Root.NodeID, + node.Root.BackendNodeID, ) + + if err != nil { + doc.logger.Error(). + Timestamp(). + Err(err). + Msg("failed to load root node after page load") + + return + } + + // close the prev element + doc.element.Close() + + // create a new root element wrapper + doc.element = updated doc.url = "" - if updated.BaseURL != nil { - doc.url = values.NewString(*updated.BaseURL) + if node.Root.BaseURL != nil { + doc.url = values.NewString(*node.Root.BaseURL) } } diff --git a/pkg/html/dynamic/element.go b/pkg/html/dynamic/element.go index efa85de1..278385a6 100644 --- a/pkg/html/dynamic/element.go +++ b/pkg/html/dynamic/element.go @@ -3,6 +3,7 @@ package dynamic import ( "context" "encoding/json" + "fmt" "hash/fnv" "strconv" "strings" @@ -17,59 +18,107 @@ import ( "github.com/mafredri/cdp" "github.com/mafredri/cdp/protocol/dom" "github.com/mafredri/cdp/protocol/input" + "github.com/mafredri/cdp/protocol/runtime" "github.com/rs/zerolog" ) const DefaultTimeout = time.Second * 30 -type HTMLElement struct { - sync.Mutex - logger *zerolog.Logger - client *cdp.Client - broker *events.EventBroker - connected values.Boolean - id dom.NodeID - nodeType values.Int - nodeName values.String - innerHTML values.String - innerText *common.LazyValue - value core.Value - rawAttrs []string - attributes *common.LazyValue - children []dom.NodeID - loadedChildren *common.LazyValue -} +var emptyNodeID = dom.NodeID(0) +var emptyBackendID = dom.BackendNodeID(0) +var emptyObjectID = "" + +type ( + HTMLElementIdentity struct { + nodeID dom.NodeID + backendID dom.BackendNodeID + objectID runtime.RemoteObjectID + } + + HTMLElement struct { + sync.Mutex + logger *zerolog.Logger + client *cdp.Client + events *events.EventBroker + connected values.Boolean + id *HTMLElementIdentity + nodeType values.Int + nodeName values.String + innerHTML values.String + innerText *common.LazyValue + value core.Value + rawAttrs []string + attributes *common.LazyValue + children []*HTMLElementIdentity + loadedChildren *common.LazyValue + } +) func LoadElement( + ctx context.Context, logger *zerolog.Logger, client *cdp.Client, broker *events.EventBroker, - id dom.NodeID, + nodeID dom.NodeID, + backendID dom.BackendNodeID, ) (*HTMLElement, error) { if client == nil { return nil, core.Error(core.ErrMissedArgument, "client") } - ctx, cancelFn := context.WithTimeout(context.Background(), DefaultTimeout) + // getting a remote object that represents the current DOM Node + var args *dom.ResolveNodeArgs + + if backendID > 0 { + args = dom.NewResolveNodeArgs().SetBackendNodeID(backendID) + } else { + args = dom.NewResolveNodeArgs().SetNodeID(nodeID) + } + + obj, err := client.DOM.ResolveNode(ctx, args) - defer cancelFn() + if err != nil { + return nil, err + } + + if obj.Object.ObjectID == nil { + return nil, core.Error(core.ErrNotFound, fmt.Sprintf("element %d", nodeID)) + } + + objectID := *obj.Object.ObjectID node, err := client.DOM.DescribeNode( ctx, dom. NewDescribeNodeArgs(). - SetNodeID(id). + SetObjectID(objectID). SetDepth(1), ) if err != nil { - return nil, core.Error(err, strconv.Itoa(int(id))) + return nil, core.Error(err, strconv.Itoa(int(nodeID))) + } + + id := new(HTMLElementIdentity) + id.nodeID = nodeID + id.objectID = objectID + + if backendID > 0 { + id.backendID = backendID + } else { + id.backendID = node.Node.BackendNodeID } - innerHTML, err := loadInnerHTML(client, id) + innerHTML, err := loadInnerHTML(ctx, client, id) if err != nil { - return nil, core.Error(err, strconv.Itoa(int(id))) + return nil, core.Error(err, strconv.Itoa(int(nodeID))) + } + + var val string + + if node.Node.Value != nil { + val = *node.Node.Value } return NewHTMLElement( @@ -77,8 +126,12 @@ func LoadElement( client, broker, id, - node.Node, + node.Node.NodeType, + node.Node.NodeName, + node.Node.Attributes, + val, innerHTML, + createChildrenArray(node.Node.Children), ), nil } @@ -86,30 +139,30 @@ func NewHTMLElement( logger *zerolog.Logger, client *cdp.Client, broker *events.EventBroker, - id dom.NodeID, - node dom.Node, + id *HTMLElementIdentity, + nodeType int, + nodeName string, + attributes []string, + value string, innerHTML values.String, + children []*HTMLElementIdentity, ) *HTMLElement { el := new(HTMLElement) el.logger = logger el.client = client - el.broker = broker + el.events = broker el.connected = values.True el.id = id - el.nodeType = values.NewInt(node.NodeType) - el.nodeName = values.NewString(node.NodeName) + el.nodeType = values.NewInt(nodeType) + el.nodeName = values.NewString(nodeName) el.innerHTML = innerHTML el.innerText = common.NewLazyValue(el.loadInnerText) - el.rawAttrs = node.Attributes[:] + el.rawAttrs = attributes el.attributes = common.NewLazyValue(el.loadAttrs) el.value = values.EmptyString el.loadedChildren = common.NewLazyValue(el.loadChildren) - - if node.Value != nil { - el.value = values.NewString(*node.Value) - } - - el.children = createChildrenArray(node.Children) + el.value = values.NewString(value) + el.children = children broker.AddEventListener("reload", el.handlePageReload) broker.AddEventListener("attr:modified", el.handleAttrModified) @@ -131,12 +184,12 @@ func (el *HTMLElement) Close() error { } el.connected = false - el.broker.RemoveEventListener("reload", el.handlePageReload) - el.broker.RemoveEventListener("attr:modified", el.handleAttrModified) - el.broker.RemoveEventListener("attr:removed", el.handleAttrRemoved) - el.broker.RemoveEventListener("children:count", el.handleChildrenCountChanged) - el.broker.RemoveEventListener("children:inserted", el.handleChildInserted) - el.broker.RemoveEventListener("children:deleted", el.handleChildDeleted) + el.events.RemoveEventListener("reload", el.handlePageReload) + el.events.RemoveEventListener("attr:modified", el.handleAttrModified) + el.events.RemoveEventListener("attr:removed", el.handleAttrRemoved) + el.events.RemoveEventListener("children:count", el.handleChildrenCountChanged) + el.events.RemoveEventListener("children:inserted", el.handleChildInserted) + el.events.RemoveEventListener("children:deleted", el.handleChildDeleted) return nil } @@ -164,8 +217,8 @@ func (el *HTMLElement) Compare(other core.Value) int { case core.HTMLDocumentType: other := other.(*HTMLElement) - id := int(el.id) - otherID := int(other.id) + id := int(el.id.backendID) + otherID := int(other.id.backendID) if id == otherID { return 0 @@ -210,14 +263,10 @@ func (el *HTMLElement) Value() core.Value { ctx, cancel := contextWithTimeout() defer cancel() - val, err := eval.Property(ctx, el.client, el.id, "value") + val, err := eval.Property(ctx, el.client, el.id.objectID, "value") if err != nil { - el.logger.Error(). - Timestamp(). - Err(err). - Int("id", int(el.id)). - Msg("failed to get node value") + el.logError(err).Msg("failed to get node value") return el.value } @@ -295,29 +344,33 @@ func (el *HTMLElement) QuerySelector(selector values.String) core.Value { return values.None } - ctx := context.Background() + ctx, cancel := contextWithTimeout() + defer cancel() - selectorArgs := dom.NewQuerySelectorArgs(el.id, selector.String()) + // TODO: Can we use RemoteObjectID or BackendID instead of NodeId? + selectorArgs := dom.NewQuerySelectorArgs(el.id.nodeID, selector.String()) found, err := el.client.DOM.QuerySelector(ctx, selectorArgs) if err != nil { - el.logger.Error(). - Timestamp(). - Err(err). - Int("id", int(el.id)). + el.logError(err). Str("selector", selector.String()). Msg("failed to retrieve a node by selector") return values.None } - res, err := LoadElement(el.logger, el.client, el.broker, found.NodeID) + if found.NodeID == emptyNodeID { + el.logError(err). + Str("selector", selector.String()). + Msg("failed to find a node by selector. returned 0 NodeID") + + return values.None + } + + res, err := LoadElement(ctx, el.logger, el.client, el.events, found.NodeID, emptyBackendID) if err != nil { - el.logger.Error(). - Timestamp(). - Err(err). - Int("id", int(el.id)). + el.logError(err). Str("selector", selector.String()). Msg("failed to load a child node by selector") @@ -332,16 +385,15 @@ func (el *HTMLElement) QuerySelectorAll(selector values.String) core.Value { return values.NewArray(0) } - ctx := context.Background() + ctx, cancel := contextWithTimeout() + defer cancel() - selectorArgs := dom.NewQuerySelectorAllArgs(el.id, selector.String()) + // TODO: Can we use RemoteObjectID or BackendID instead of NodeId? + selectorArgs := dom.NewQuerySelectorAllArgs(el.id.nodeID, selector.String()) res, err := el.client.DOM.QuerySelectorAll(ctx, selectorArgs) if err != nil { - el.logger.Error(). - Timestamp(). - Err(err). - Int("id", int(el.id)). + el.logError(err). Str("selector", selector.String()). Msg("failed to retrieve nodes by selector") @@ -351,13 +403,18 @@ func (el *HTMLElement) QuerySelectorAll(selector values.String) core.Value { arr := values.NewArray(len(res.NodeIDs)) for _, id := range res.NodeIDs { - childEl, err := LoadElement(el.logger, el.client, el.broker, id) + if id == emptyNodeID { + el.logError(err). + Str("selector", selector.String()). + Msg("failed to find a node by selector. returned 0 NodeID") + + continue + } + + childEl, err := LoadElement(ctx, el.logger, el.client, el.events, id, emptyBackendID) if err != nil { - el.logger.Error(). - Timestamp(). - Err(err). - Int("id", int(el.id)). + el.logError(err). Str("selector", selector.String()). Msg("failed to load nodes by selector") @@ -424,49 +481,75 @@ func (el *HTMLElement) InnerTextBySelector(selector values.String) values.String return values.EmptyString } - ctx := context.Background() + ctx, cancel := contextWithTimeout() + defer cancel() - selectorArgs := dom.NewQuerySelectorArgs(el.id, selector.String()) - found, err := el.client.DOM.QuerySelector(ctx, selectorArgs) + // TODO: Can we use RemoteObjectID or BackendID instead of NodeId? + found, err := el.client.DOM.QuerySelector(ctx, dom.NewQuerySelectorArgs(el.id.nodeID, selector.String())) if err != nil { - el.logger.Error(). - Timestamp(). - Err(err). - Int("id", int(el.id)). + el.logError(err). Str("selector", selector.String()). - Msg("failed to retrieve nodes by selector") + Msg("failed to retrieve a node by selector") return values.EmptyString } - text, err := loadInnerText(el.client, found.NodeID) + if found.NodeID == emptyNodeID { + el.logError(err). + Str("selector", selector.String()). + Msg("failed to find a node by selector. returned 0 NodeID") + + return values.EmptyString + } + + childNodeID := found.NodeID + + obj, err := el.client.DOM.ResolveNode(ctx, dom.NewResolveNodeArgs().SetNodeID(childNodeID)) if err != nil { - el.logger.Error(). - Timestamp(). - Err(err). - Int("id", int(el.id)). + el.logError(err). + Int("childNodeID", int(childNodeID)). + Str("selector", selector.String()). + Msg("failed to resolve remote object for child element") + + return values.EmptyString + } + + if obj.Object.ObjectID == nil { + el.logError(err). + Int("childNodeID", int(childNodeID)). + Str("selector", selector.String()). + Msg("failed to resolve remote object for child element") + + return values.EmptyString + } + + objID := *obj.Object.ObjectID + + text, err := eval.Property(ctx, el.client, objID, "innerText") + + if err != nil { + el.logError(err). + Str("childObjectID", string(objID)). Str("selector", selector.String()). Msg("failed to load inner text for found child element") return values.EmptyString } - return text + return values.NewString(text.String()) } func (el *HTMLElement) InnerTextBySelectorAll(selector values.String) *values.Array { - ctx := context.Background() + ctx, cancel := contextWithTimeout() + defer cancel() - selectorArgs := dom.NewQuerySelectorAllArgs(el.id, selector.String()) - res, err := el.client.DOM.QuerySelectorAll(ctx, selectorArgs) + // TODO: Can we use RemoteObjectID or BackendID instead of NodeId? + res, err := el.client.DOM.QuerySelectorAll(ctx, dom.NewQuerySelectorAllArgs(el.id.nodeID, selector.String())) if err != nil { - el.logger.Error(). - Timestamp(). - Err(err). - Int("id", int(el.id)). + el.logError(err). Str("selector", selector.String()). Msg("failed to retrieve nodes by selector") @@ -475,19 +558,42 @@ func (el *HTMLElement) InnerTextBySelectorAll(selector values.String) *values.Ar arr := values.NewArray(len(res.NodeIDs)) - for _, id := range res.NodeIDs { - text, err := loadInnerText(el.client, id) + for idx, id := range res.NodeIDs { + if id == emptyNodeID { + el.logError(err). + Str("selector", selector.String()). + Msg("failed to find a node by selector. returned 0 NodeID") + + continue + } + + obj, err := el.client.DOM.ResolveNode(ctx, dom.NewResolveNodeArgs().SetNodeID(id)) if err != nil { - el.logger.Error(). - Timestamp(). - Err(err). - Int("id", int(el.id)). + el.logError(err). + Int("index", idx). + Int("childNodeID", int(id)). + Str("selector", selector.String()). + Msg("failed to resolve remote object for child element") + + continue + } + + if obj.Object.ObjectID == nil { + continue + } + + objID := *obj.Object.ObjectID + + text, err := eval.Property(ctx, el.client, objID, "innerText") + + if err != nil { + el.logError(err). + Str("childObjectID", string(objID)). Str("selector", selector.String()). Msg("failed to load inner text for found child element") - // return what we have - return arr + continue } arr.Push(text) @@ -508,29 +614,26 @@ func (el *HTMLElement) InnerHTMLBySelector(selector values.String) values.String return values.EmptyString } - ctx := context.Background() + ctx, cancel := contextWithTimeout() + defer cancel() - selectorArgs := dom.NewQuerySelectorArgs(el.id, selector.String()) - found, err := el.client.DOM.QuerySelector(ctx, selectorArgs) + // TODO: Can we use RemoteObjectID or BackendID instead of NodeId? + found, err := el.client.DOM.QuerySelector(ctx, dom.NewQuerySelectorArgs(el.id.nodeID, selector.String())) if err != nil { - el.logger.Error(). - Timestamp(). - Err(err). - Int("id", int(el.id)). + el.logError(err). Str("selector", selector.String()). Msg("failed to retrieve nodes by selector") return values.EmptyString } - text, err := loadInnerHTML(el.client, found.NodeID) + text, err := loadInnerHTML(ctx, el.client, &HTMLElementIdentity{ + nodeID: found.NodeID, + }) if err != nil { - el.logger.Error(). - Timestamp(). - Err(err). - Int("id", int(el.id)). + el.logError(err). Str("selector", selector.String()). Msg("failed to load inner HTML for found child element") @@ -541,16 +644,15 @@ func (el *HTMLElement) InnerHTMLBySelector(selector values.String) values.String } func (el *HTMLElement) InnerHTMLBySelectorAll(selector values.String) *values.Array { - ctx := context.Background() + ctx, cancel := contextWithTimeout() + defer cancel() - selectorArgs := dom.NewQuerySelectorAllArgs(el.id, selector.String()) + // TODO: Can we use RemoteObjectID or BackendID instead of NodeId? + selectorArgs := dom.NewQuerySelectorAllArgs(el.id.nodeID, selector.String()) res, err := el.client.DOM.QuerySelectorAll(ctx, selectorArgs) if err != nil { - el.logger.Error(). - Timestamp(). - Err(err). - Int("id", int(el.id)). + el.logError(err). Str("selector", selector.String()). Msg("failed to retrieve nodes by selector") @@ -560,13 +662,12 @@ func (el *HTMLElement) InnerHTMLBySelectorAll(selector values.String) *values.Ar arr := values.NewArray(len(res.NodeIDs)) for _, id := range res.NodeIDs { - text, err := loadInnerHTML(el.client, id) + text, err := loadInnerHTML(ctx, el.client, &HTMLElementIdentity{ + nodeID: id, + }) if err != nil { - el.logger.Error(). - Timestamp(). - Err(err). - Int("id", int(el.id)). + el.logError(err). Str("selector", selector.String()). Msg("failed to load inner HTML for found child element") @@ -585,14 +686,16 @@ func (el *HTMLElement) Click() (values.Boolean, error) { defer cancel() - return events.DispatchEvent(ctx, el.client, el.id, "click") + return events.DispatchEvent(ctx, el.client, el.id.objectID, "click") } func (el *HTMLElement) Input(value core.Value, delay values.Int) error { ctx, cancel := contextWithTimeout() defer cancel() - if err := el.client.DOM.Focus(ctx, dom.NewFocusArgs().SetNodeID(el.id)); err != nil { + if err := el.client.DOM.Focus(ctx, dom.NewFocusArgs().SetObjectID(el.id.objectID)); err != nil { + el.logError(err).Msg("failed to focus") + return err } @@ -605,9 +708,13 @@ func (el *HTMLElement) Input(value core.Value, delay values.Int) error { for _, ch := range valStr { for _, ev := range []string{"keyDown", "keyUp"} { ke := input.NewDispatchKeyEventArgs(ev).SetText(string(ch)) + if err := el.client.Input.DispatchKeyEvent(ctx, ke); err != nil { + el.logError(err).Str("value", value.String()).Msg("failed to input a value") + return err } + time.Sleep(delayMs * time.Millisecond) } } @@ -623,25 +730,36 @@ func (el *HTMLElement) IsConnected() values.Boolean { } func (el *HTMLElement) loadInnerText() (core.Value, error) { + if el.IsConnected() { + ctx, cancel := contextWithTimeout() + defer cancel() + + text, err := eval.Property(ctx, el.client, el.id.objectID, "innerText") + + if err == nil { + return text, nil + } + + el.logError(err).Msg("failed to read 'innerText' property of remote object") + + // and just parse innerHTML + } + h := el.InnerHTML() if h == values.EmptyString { return h, nil } - parser, err := parseInnerText(h.String()) + parsed, err := parseInnerText(h.String()) if err != nil { - el.logger.Error(). - Timestamp(). - Err(err). - Int("id", int(el.id)). - Msg("failed to parse inner html") + el.logError(err).Msg("failed to parse inner html") return values.EmptyString, err } - return parser, nil + return parsed, nil } func (el *HTMLElement) loadAttrs() (core.Value, error) { @@ -653,16 +771,28 @@ func (el *HTMLElement) loadChildren() (core.Value, error) { return values.NewArray(0), nil } - loaded, err := loadNodes(el.logger, el.client, el.broker, el.children) + ctx, cancel := contextWithTimeout() + defer cancel() - if err != nil { - el.logger.Error(). - Timestamp(). - Err(err). - Int("id", int(el.id)). - Msg("failed to load child nodes") + loaded := values.NewArray(len(el.children)) + + for _, childID := range el.children { + child, err := LoadElement( + ctx, + el.logger, + el.client, + el.events, + childID.nodeID, + childID.backendID, + ) + + if err != nil { + el.logError(err).Msg("failed to load child nodes") + + continue + } - return values.None, err + loaded.Push(child) } return loaded, nil @@ -681,17 +811,13 @@ func (el *HTMLElement) handleAttrModified(message interface{}) { } // it's not for this element - if reply.NodeID != el.id { + if reply.NodeID != el.id.nodeID { return } el.attributes.Write(func(v core.Value, err error) { if err != nil { - el.logger.Error(). - Timestamp(). - Err(err). - Int("id", int(el.id)). - Msg("failed to update node") + el.logError(err).Msg("failed to update node") return } @@ -715,7 +841,7 @@ func (el *HTMLElement) handleAttrRemoved(message interface{}) { } // it's not for this element - if reply.NodeID != el.id { + if reply.NodeID != el.id.nodeID { return } @@ -727,11 +853,7 @@ func (el *HTMLElement) handleAttrRemoved(message interface{}) { el.attributes.Write(func(v core.Value, err error) { if err != nil { - el.logger.Error(). - Timestamp(). - Err(err). - Int("id", int(el.id)). - Msg("failed to update node") + el.logError(err).Msg("failed to update node") return } @@ -753,18 +875,20 @@ func (el *HTMLElement) handleChildrenCountChanged(message interface{}) { return } - if reply.NodeID != el.id { + if reply.NodeID != el.id.nodeID { return } - node, err := el.client.DOM.DescribeNode(context.Background(), dom.NewDescribeNodeArgs()) + ctx, cancel := contextWithTimeout() + defer cancel() + + node, err := el.client.DOM.DescribeNode( + ctx, + dom.NewDescribeNodeArgs().SetObjectID(el.id.objectID), + ) if err != nil { - el.logger.Error(). - Timestamp(). - Err(err). - Int("id", int(el.id)). - Msg("failed to update node") + el.logError(err).Msg("failed to update node") return } @@ -782,7 +906,7 @@ func (el *HTMLElement) handleChildInserted(message interface{}) { return } - if reply.ParentNodeID != el.id { + if reply.ParentNodeID != el.id.nodeID { return } @@ -794,7 +918,7 @@ func (el *HTMLElement) handleChildInserted(message interface{}) { defer el.Unlock() for idx, id := range el.children { - if id == prevID { + if id.nodeID == prevID { targetIDx = idx break } @@ -804,42 +928,43 @@ func (el *HTMLElement) handleChildInserted(message interface{}) { return } + nextIdentity := &HTMLElementIdentity{ + nodeID: reply.Node.NodeID, + backendID: reply.Node.BackendNodeID, + } + arr := el.children - el.children = append(arr[:targetIDx], append([]dom.NodeID{nextID}, arr[targetIDx:]...)...) + el.children = append(arr[:targetIDx], append([]*HTMLElementIdentity{nextIdentity}, arr[targetIDx:]...)...) if !el.loadedChildren.Ready() { return } el.loadedChildren.Write(func(v core.Value, err error) { + ctx, cancel := contextWithTimeout() + defer cancel() + loadedArr := v.(*values.Array) - loadedEl, err := LoadElement(el.logger, el.client, el.broker, nextID) + loadedEl, err := LoadElement(ctx, el.logger, el.client, el.events, nextID, emptyBackendID) if err != nil { - el.logger.Error(). - Timestamp(). - Err(err). - Int("id", int(el.id)). - Msg("failed to load an inserted node") + el.logError(err).Msg("failed to load an inserted node") return } loadedArr.Insert(values.NewInt(targetIDx), loadedEl) - newInnerHTML, err := loadInnerHTML(el.client, el.id) + newInnerHTML, err := loadInnerHTML(ctx, el.client, el.id) if err != nil { - el.logger.Error(). - Timestamp(). - Err(err). - Int("id", int(el.id)). - Msg("failed to update node") + el.logError(err).Msg("failed to update node") return } el.innerHTML = newInnerHTML + el.innerText.Reset() }) } @@ -850,7 +975,7 @@ func (el *HTMLElement) handleChildDeleted(message interface{}) { return } - if reply.ParentNodeID != el.id { + if reply.ParentNodeID != el.id.nodeID { return } @@ -861,7 +986,7 @@ func (el *HTMLElement) handleChildDeleted(message interface{}) { defer el.Unlock() for idx, id := range el.children { - if id == targetID { + if id.nodeID == targetID { targetIDx = idx break } @@ -883,27 +1008,41 @@ func (el *HTMLElement) handleChildDeleted(message interface{}) { el.logger.Error(). Timestamp(). Err(err). - Int("id", int(el.id)). + Int("nodeID", int(el.id.nodeID)). Msg("failed to update node") return } + ctx, cancel := contextWithTimeout() + defer cancel() + loadedArr := v.(*values.Array) loadedArr.RemoveAt(values.NewInt(targetIDx)) - newInnerHTML, err := loadInnerHTML(el.client, el.id) + newInnerHTML, err := loadInnerHTML(ctx, el.client, el.id) if err != nil { el.logger.Error(). Timestamp(). Err(err). - Int("id", int(el.id)). + Int("nodeID", int(el.id.nodeID)). Msg("failed to update node") return } el.innerHTML = newInnerHTML + el.innerText.Reset() }) } + +func (el *HTMLElement) logError(err error) *zerolog.Event { + return el.logger. + Error(). + Timestamp(). + Int("nodeID", int(el.id.nodeID)). + Int("backendID", int(el.id.backendID)). + Str("objectID", string(el.id.objectID)). + Err(err) +} diff --git a/pkg/html/dynamic/eval/eval.go b/pkg/html/dynamic/eval/eval.go index 1c4d09db..b22bff85 100644 --- a/pkg/html/dynamic/eval/eval.go +++ b/pkg/html/dynamic/eval/eval.go @@ -2,11 +2,11 @@ package eval import ( "context" + "encoding/json" "fmt" "github.com/MontFerret/ferret/pkg/runtime/core" "github.com/MontFerret/ferret/pkg/runtime/values" "github.com/mafredri/cdp" - "github.com/mafredri/cdp/protocol/dom" "github.com/mafredri/cdp/protocol/runtime" ) @@ -49,29 +49,12 @@ func Eval(client *cdp.Client, exp string, ret bool, async bool) (core.Value, err func Property( ctx context.Context, client *cdp.Client, - id dom.NodeID, + objectID runtime.RemoteObjectID, propName string, ) (core.Value, error) { - // get a ref to remote object representing the node - obj, err := client.DOM.ResolveNode( - ctx, - dom.NewResolveNodeArgs(). - SetNodeID(id), - ) - - if err != nil { - return values.None, err - } - - if obj.Object.ObjectID == nil { - return values.None, core.Error(core.ErrNotFound, fmt.Sprintf("element %d", id)) - } - - defer client.Runtime.ReleaseObject(ctx, runtime.NewReleaseObjectArgs(*obj.Object.ObjectID)) - res, err := client.Runtime.GetProperties( ctx, - runtime.NewGetPropertiesArgs(*obj.Object.ObjectID), + runtime.NewGetPropertiesArgs(objectID), ) if err != nil { @@ -109,6 +92,64 @@ func Property( return values.None, nil } +func Method( + ctx context.Context, + client *cdp.Client, + objectID runtime.RemoteObjectID, + methodName string, + args []runtime.CallArgument, +) (*runtime.RemoteObject, error) { + found, err := client.Runtime.CallFunctionOn( + ctx, + runtime.NewCallFunctionOnArgs(methodName). + SetObjectID(objectID). + SetArguments(args), + ) + + if err != nil { + return nil, err + } + + if found.ExceptionDetails != nil { + return nil, found.ExceptionDetails + } + + if found.Result.ObjectID == nil { + return nil, nil + } + + return &found.Result, nil +} + +func MethodQuerySelector( + ctx context.Context, + client *cdp.Client, + objectID runtime.RemoteObjectID, + selector string, +) (runtime.RemoteObjectID, error) { + bytes, err := json.Marshal(selector) + + if err != nil { + return "", err + } + + obj, err := Method(ctx, client, objectID, "querySelector", []runtime.CallArgument{ + { + Value: json.RawMessage(bytes), + }, + }) + + if err != nil { + return "", err + } + + if obj.ObjectID == nil { + return "", nil + } + + return *obj.ObjectID, nil +} + func Unmarshal(obj *runtime.RemoteObject) (core.Value, error) { if obj == nil { return values.None, nil diff --git a/pkg/html/dynamic/events/dispatch.go b/pkg/html/dynamic/events/dispatch.go index 19f3e6b5..fa3c1ed9 100644 --- a/pkg/html/dynamic/events/dispatch.go +++ b/pkg/html/dynamic/events/dispatch.go @@ -6,31 +6,15 @@ import ( "github.com/MontFerret/ferret/pkg/html/dynamic/eval" "github.com/MontFerret/ferret/pkg/runtime/values" "github.com/mafredri/cdp" - "github.com/mafredri/cdp/protocol/dom" "github.com/mafredri/cdp/protocol/runtime" ) func DispatchEvent( ctx context.Context, client *cdp.Client, - id dom.NodeID, + objectID runtime.RemoteObjectID, eventName string, ) (values.Boolean, error) { - // get a ref to remote object representing the node - obj, err := client.DOM.ResolveNode( - ctx, - dom.NewResolveNodeArgs(). - SetNodeID(id), - ) - - if err != nil { - return values.False, err - } - - if obj.Object.ObjectID == nil { - return values.False, nil - } - evt, err := client.Runtime.Evaluate(ctx, runtime.NewEvaluateArgs(eval.PrepareEval(fmt.Sprintf(` return new window.MouseEvent('%s', { bubbles: true }) `, eventName)))) @@ -52,24 +36,21 @@ func DispatchEvent( // release the event object defer client.Runtime.ReleaseObject(ctx, runtime.NewReleaseObjectArgs(*evtID)) - res, err := client.Runtime.CallFunctionOn( + _, err = eval.Method( ctx, - runtime.NewCallFunctionOnArgs("dispatchEvent"). - SetObjectID(*obj.Object.ObjectID). - SetArguments([]runtime.CallArgument{ - { - ObjectID: evt.Result.ObjectID, - }, - }), + client, + objectID, + "dispatchEvent", + []runtime.CallArgument{ + { + ObjectID: evt.Result.ObjectID, + }, + }, ) if err != nil { return values.False, err } - if res.ExceptionDetails != nil { - return values.False, res.ExceptionDetails - } - return values.True, nil } diff --git a/pkg/html/dynamic/helpers.go b/pkg/html/dynamic/helpers.go index 6e578387..5ffcd58f 100644 --- a/pkg/html/dynamic/helpers.go +++ b/pkg/html/dynamic/helpers.go @@ -10,15 +10,10 @@ import ( "github.com/mafredri/cdp" "github.com/mafredri/cdp/protocol/dom" "github.com/mafredri/cdp/protocol/page" - "github.com/rs/zerolog" "golang.org/x/sync/errgroup" "strings" ) -func pointerInt(input int) *int { - return &input -} - type batchFunc = func() error func runBatch(funcs ...batchFunc) error { @@ -31,24 +26,14 @@ func runBatch(funcs ...batchFunc) error { return eg.Wait() } -func getRootElement(client *cdp.Client) (dom.Node, values.String, error) { - args := dom.NewGetDocumentArgs() - args.Depth = pointerInt(1) // lets load the entire document - ctx := context.Background() - - d, err := client.DOM.GetDocument(ctx, args) - - if err != nil { - return dom.Node{}, values.EmptyString, err - } - - innerHTML, err := client.DOM.GetOuterHTML(ctx, dom.NewGetOuterHTMLArgs().SetNodeID(d.Root.NodeID)) +func getRootElement(ctx context.Context, client *cdp.Client) (*dom.GetDocumentReply, error) { + d, err := client.DOM.GetDocument(ctx, dom.NewGetDocumentArgs().SetDepth(1)) if err != nil { - return dom.Node{}, values.EmptyString, err + return nil, err } - return d.Root, values.NewString(innerHTML.OuterHTML), nil + return d, nil } func parseAttrs(attrs []string) *values.Object { @@ -79,28 +64,24 @@ func parseAttrs(attrs []string) *values.Object { return res } -func loadInnerHTML(client *cdp.Client, id dom.NodeID) (values.String, error) { - res, err := client.DOM.GetOuterHTML(context.Background(), dom.NewGetOuterHTMLArgs().SetNodeID(id)) +func loadInnerHTML(ctx context.Context, client *cdp.Client, id *HTMLElementIdentity) (values.String, error) { + var args *dom.GetOuterHTMLArgs - if err != nil { - return "", err + if id.objectID != "" { + args = dom.NewGetOuterHTMLArgs().SetObjectID(id.objectID) + } else if id.backendID > 0 { + args = dom.NewGetOuterHTMLArgs().SetBackendNodeID(id.backendID) + } else { + args = dom.NewGetOuterHTMLArgs().SetNodeID(id.nodeID) } - return values.NewString(res.OuterHTML), err -} - -func loadInnerText(client *cdp.Client, id dom.NodeID) (values.String, error) { - h, err := loadInnerHTML(client, id) + res, err := client.DOM.GetOuterHTML(ctx, args) if err != nil { - return values.EmptyString, err - } - - if h == values.EmptyString { - return h, nil + return "", err } - return parseInnerText(h.String()) + return values.NewString(res.OuterHTML), err } func parseInnerText(innerHTML string) (values.String, error) { @@ -115,30 +96,17 @@ func parseInnerText(innerHTML string) (values.String, error) { return values.NewString(parsed.Text()), nil } -func createChildrenArray(nodes []dom.Node) []dom.NodeID { - children := make([]dom.NodeID, len(nodes)) +func createChildrenArray(nodes []dom.Node) []*HTMLElementIdentity { + children := make([]*HTMLElementIdentity, len(nodes)) for idx, child := range nodes { - children[idx] = child.NodeID - } - - return children -} - -func loadNodes(logger *zerolog.Logger, client *cdp.Client, broker *events.EventBroker, nodes []dom.NodeID) (*values.Array, error) { - arr := values.NewArray(len(nodes)) - - for _, id := range nodes { - child, err := LoadElement(logger, client, broker, id) - - if err != nil { - return nil, err + children[idx] = &HTMLElementIdentity{ + nodeID: child.NodeID, + backendID: child.BackendNodeID, } - - arr.Push(child) } - return arr, nil + return children } func contextWithTimeout() (context.Context, context.CancelFunc) {