Skip to content

Commit

Permalink
Improvements for running in interactive mode in browser (webrecorder#6)
Browse files Browse the repository at this point in the history
improvements for interactive use in browser

* twitter: remove 'threads' state, better state labels
- support unpause on second run() call

* autoscroll improvements:
- better messages, use timing based on waitUnit
- smoother scrolling, increments of 200px
- look for 'show more' button and click if in viewport (add isInViewport() utility func)
- wait for window height change after clicking show more
- track 'segments' state when window height changes
- increase total wait time for window height increase with more segments

* instagram: enable first post preloading standalone view w/o opening separate window
  • Loading branch information
ikreymer authored Apr 10, 2021
1 parent f666ca3 commit df4c0fb
Show file tree
Hide file tree
Showing 12 changed files with 200 additions and 113 deletions.
2 changes: 1 addition & 1 deletion dist/behaviors.js

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@
},
"scripts": {
"build": "webpack --mode production",
"build-dev": "webpack --mode development"
"build-dev": "webpack --mode development",
"lint": "eslint ./src webpack.config.js"
},
"description": "Browsertrix Behaviors"
}
55 changes: 51 additions & 4 deletions src/autoscroll.js
Original file line number Diff line number Diff line change
@@ -1,9 +1,18 @@
import { sleep, Behavior } from "./lib/utils";
import { sleep, Behavior, waitUnit, xpathNode, isInViewport, waitUntil } from "./lib/utils";


// ===========================================================================
export class AutoScroll extends Behavior
{
constructor() {
super();
this.showMoreQuery = "//*[contains(text(), 'show more') or contains(text(), 'Show more')]";

this.state = {
segments: 1
};
}

static get name() {
return "Autoscroll";
}
Expand All @@ -19,12 +28,50 @@ export class AutoScroll extends Behavior
self.document.documentElement.offsetHeight
);

const scrollOpts = { top: 250, left: 0, behavior: "auto" };
const scrollOpts = { top: 200, left: 0, behavior: "auto" };
const interval = waitUnit;

//scrollOpts.top = Math.min(self.document.body.clientHeight * 0.01, 500);

let showMoreElem = null;
let origHeight = self.document.body.clientHeight;

while (canScrollMore()) {
if (self.document.body.clientHeight > origHeight) {
this.state.segments++;
}

origHeight = self.document.body.clientHeight;

if (!showMoreElem) {
showMoreElem = xpathNode(this.showMoreQuery);
}

if (showMoreElem && isInViewport(showMoreElem)) {
yield this.getState("Clicking 'Show More', awaiting more content");
showMoreElem.click();

await sleep(waitUnit);

await Promise.race([
waitUntil(() => self.document.body.clientHeight > origHeight, 500),
sleep(30000)
]);

showMoreElem = null;
}

self.scrollBy(scrollOpts);
yield {"msg": "Scrolling by " + scrollOpts.top};
await sleep(500);

yield this.getState(`Scrolling down by ${scrollOpts.top} pixels every ${interval / 1000.0} seconds`);

await sleep(interval);

// check for scrolling, but allow for more time for content to appear the longer have already scrolled
await Promise.race([
waitUntil(() => canScrollMore(), interval),
sleep(this.state.segments * 5000)
]);
}
}
}
12 changes: 10 additions & 2 deletions src/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@ import { awaitLoad, sleep, behavior_log, _setLogFunc, _setBehaviorManager, insta

import siteBehaviors from "./site";

_setBehaviorManager(BehaviorManager);


// ===========================================================================
export class BehaviorManager
Expand All @@ -15,6 +13,7 @@ export class BehaviorManager
this.behaviors = [];
this.mainBehavior = null;
this.inited = false;
this.started = false;
behavior_log("Loaded behaviors for: " + self.location.href);
}

Expand Down Expand Up @@ -87,6 +86,11 @@ export class BehaviorManager
}

async run(opts) {
if (this.started) {
this.unpause();
return;
}

this.init(opts);

await awaitLoad();
Expand All @@ -95,6 +99,8 @@ export class BehaviorManager
this.mainBehavior.start();
}

this.started = true;

let allBehaviors = Promise.allSettled(this.behaviors.map(x => x.done()));

if (this.timeout) {
Expand Down Expand Up @@ -127,4 +133,6 @@ export class BehaviorManager
}
}

_setBehaviorManager(BehaviorManager);

installBehaviors(self);
20 changes: 18 additions & 2 deletions src/lib/utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,11 @@ export function awaitLoad() {

export function behavior_log(data, type = "debug") {
if (_logFunc) {
_logFunc({data, type});
try {
_logFunc({data, type});
} catch (e) {
_logFunc(JSON.stringify({data, type}));
}
}
}

Expand Down Expand Up @@ -112,13 +116,25 @@ export function xpathString(path, root) {
}


// ===========================================================================
export function isInViewport(elem) {
var bounding = elem.getBoundingClientRect();
return (
bounding.top >= 0 &&
bounding.left >= 0 &&
bounding.bottom <= (window.innerHeight || document.documentElement.clientHeight) &&
bounding.right <= (window.innerWidth || document.documentElement.clientWidth)
);
}

// ===========================================================================
export class Behavior
{
constructor() {
this._running = null;
this.paused = null;
this._unpause = null;
this.state = {};
}

start() {
Expand Down Expand Up @@ -161,7 +177,7 @@ export class Behavior
}

getState(msg, incrValue) {
if (incrValue && this.state[incrValue] != undefined) {
if (incrValue && this.state[incrValue] !== undefined) {
this.state[incrValue]++;
}

Expand Down
92 changes: 52 additions & 40 deletions src/site/instagram.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { Behavior, behavior_log, installBehaviors } from "../lib/utils";
import { sleep, xpathNode, xpathString, RestoreState, waitUnit } from "../lib/utils";
import { sleep, xpathNode, xpathString, RestoreState, waitUnit, waitUntil } from "../lib/utils";


// ===========================================================================
Expand Down Expand Up @@ -37,14 +37,16 @@ export class InstagramPostsBehavior extends Behavior

this.scrollOpts = {block: "start", inline: "nearest", behavior: "smooth"};

this.maxCommentsTime = 10000;

// extra window for first post, if allowed
this.postOnlyWindow = null;

this.state = {
"posts": 0,
"slides": 0,
"comments": 0,
"rows": 0,
posts: 0,
slides: 0,
rows: 0,
comments: 0,
};
}

Expand Down Expand Up @@ -101,7 +103,7 @@ export class InstagramPostsBehavior extends Behavior
}
}

async* viewStandalonePost() {
async* viewStandalonePost(origLoc) {
let root = xpathNode(this.rootPath);

if (!root || !root.firstElementChild) {
Expand All @@ -110,45 +112,42 @@ export class InstagramPostsBehavior extends Behavior

const firstPostHref = xpathString(this.childMatchSelect, root.firstElementChild);

yield this.getState("Opening new window for first post: " + firstPostHref);

try {
this.postOnlyWindow = window.open(firstPostHref, "_blank", "resizable");

installBehaviors(this.postOnlyWindow);

this.postOnlyWindow.__bx_behaviors.run({autofetch: true});

await sleep(waitUnit * 10);
yield this.getState("Loading single post view for first post: " + firstPostHref);

} catch (e) {
behavior_log(e);
}
const separateWindow = false;

// yield this.getState("Closing window for first post");
if (separateWindow) {
try {
this.postOnlyWindow = window.open(firstPostHref, "_blank", "resizable");

// other.close();
installBehaviors(this.postOnlyWindow);

// const origLoc = window.location.href;
this.postOnlyWindow.__bx_behaviors.run({autofetch: true});

// window.history.replaceState({}, "", firstPostHref);
// window.dispatchEvent(new PopStateEvent("popstate", { state: {} }));
await sleep(waitUnit * 10);

} catch (e) {
behavior_log(e);
}
} else {

// yield this.getState("Loading post page via first post: " + firstPostHref);
window.history.replaceState({}, "", firstPostHref);
window.dispatchEvent(new PopStateEvent("popstate", { state: {} }));

// let root2 = null;
// let root3 = null;
let root2 = null;
let root3 = null;

// await sleep(waitUnit * 10);
await sleep(waitUnit * 10);

// await waitUntil(() => (root2 = xpathNode(this.rootPath)) !== root && root2, waitUnit * 5);
await waitUntil(() => (root2 = xpathNode(this.rootPath)) !== root && root2, waitUnit * 5);

// window.history.replaceState({}, "", origLoc);
// window.dispatchEvent(new PopStateEvent("popstate", { state: {} }));
window.history.replaceState({}, "", origLoc);
window.dispatchEvent(new PopStateEvent("popstate", { state: {} }));

// await sleep(waitUnit * 10);
await sleep(waitUnit * 10);

// await waitUntil(() => (root3 = xpathNode(this.rootPath)) !== root2 && root3, waitUnit * 5);
await waitUntil(() => (root3 = xpathNode(this.rootPath)) !== root2 && root3, waitUnit * 5);
}
}

async *iterSubposts() {
Expand Down Expand Up @@ -221,12 +220,12 @@ export class InstagramPostsBehavior extends Behavior

yield* this.iterSubposts();

if (await Promise.race([
yield this.getState("Loaded Comments", "comments");

await Promise.race([
this.iterComments(),
sleep(20000)
])) {
yield this.getState("Loaded Comments", "comments");
}
sleep(this.maxCommentsTime)
]);

next = xpathNode(this.nextPost);

Expand All @@ -238,8 +237,21 @@ export class InstagramPostsBehavior extends Behavior
await sleep(waitUnit * 5);
}

async* [Symbol.asyncIterator]() {
yield* this.viewStandalonePost();
async* [Symbol.asyncIterator]() {
const origLoc = window.location.href;

for await (const row of this.iterRow()) {
await sleep(waitUnit * 2.5);

const first = xpathNode(this.firstPostInRow, row);

first.click();
await sleep(waitUnit * 10);

break;
}

yield* this.viewStandalonePost(origLoc);

for await (const row of this.iterRow()) {
row.scrollIntoView(this.scrollOpts);
Expand Down
10 changes: 5 additions & 5 deletions src/site/twitter.js
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,10 @@ export class TwitterTimelineBehavior extends Behavior
this.seenMediaTweets = new Set();

this.state = {
tweets: 0,
images: 0,
videos: 0,
imagePopups: 0,
threads: 0,
tweets: 0
//threads: 0,
};
}

Expand Down Expand Up @@ -196,7 +196,7 @@ export class TwitterTimelineBehavior extends Behavior
if (imagePopup) {
const imageState = new HistoryState(() => imagePopup.click());

yield this.getState("Loading Image: " + window.location.href, "imagePopups");
yield this.getState("Loading Image: " + window.location.href, "images");

await sleep(waitUnit * 5);

Expand All @@ -213,7 +213,7 @@ export class TwitterTimelineBehavior extends Behavior
}
prevLocation = window.location.href;

yield this.getState("Loading Image: " + window.location.href, "imagePopups");
yield this.getState("Loading Image: " + window.location.href, "images");
await sleep(waitUnit * 5);
}

Expand Down
15 changes: 8 additions & 7 deletions test/expected-autoscroll.log
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
{"msg":"Scrolling by 250"}
{"msg":"Scrolling by 250"}
{"msg":"Scrolling by 250"}
{"msg":"Scrolling by 250"}
{"msg":"Scrolling by 250"}
{"msg":"Scrolling by 250"}
{"msg":"done!"}
{"state":{"segments":1},"msg":"Scrolling down by 200 pixels every 0.2 seconds"}
{"state":{"segments":1},"msg":"Scrolling down by 200 pixels every 0.2 seconds"}
{"state":{"segments":1},"msg":"Scrolling down by 200 pixels every 0.2 seconds"}
{"state":{"segments":1},"msg":"Scrolling down by 200 pixels every 0.2 seconds"}
{"state":{"segments":1},"msg":"Scrolling down by 200 pixels every 0.2 seconds"}
{"state":{"segments":1},"msg":"Scrolling down by 200 pixels every 0.2 seconds"}
{"state":{"segments":1},"msg":"Scrolling down by 200 pixels every 0.2 seconds"}
{"state":{"segments":1},"msg":"done!"}
21 changes: 11 additions & 10 deletions test/expected-instagram.log
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
{"state":{"posts":0,"slides":0,"comments":0,"rows":0},"msg":"Opening new window for first post: /p/CMtilvmsXzm/"}
{"state":{"posts":0,"slides":0,"comments":0,"rows":1},"msg":"Loading Row"}
{"state":{"posts":1,"slides":0,"comments":0,"rows":1},"msg":"Loading Post: https://www.instagram.com/p/CMtilvmsXzm/"}
{"state":{"posts":1,"slides":1,"comments":0,"rows":1},"msg":"Loading Slide 2 for https://www.instagram.com/p/CMtilvmsXzm/"}
{"state":{"posts":1,"slides":2,"comments":0,"rows":1},"msg":"Loading Slide 3 for https://www.instagram.com/p/CMtilvmsXzm/"}
{"state":{"posts":1,"slides":2,"comments":3,"rows":1},"msg":"Loaded Comments"}
{"state":{"posts":2,"slides":2,"comments":3,"rows":1},"msg":"Loading Post: https://www.instagram.com/p/CMqnzk-Mx7e/"}
{"state":{"posts":2,"slides":2,"comments":5,"rows":1},"msg":"Loaded Comments"}
{"state":{"posts":3,"slides":2,"comments":5,"rows":1},"msg":"Loading Post: https://www.instagram.com/p/CMqnVXZsfXR/"}
{"state":{"posts":3,"slides":2,"comments":5,"rows":1},"msg":"done!"}
{"state":{"posts":0,"slides":0,"rows":0,"comments":0},"msg":"Loading single post view for first post: /p/CMtilvmsXzm/"}
{"state":{"posts":0,"slides":0,"rows":1,"comments":0},"msg":"Loading Row"}
{"state":{"posts":1,"slides":0,"rows":1,"comments":0},"msg":"Loading Post: https://www.instagram.com/p/CMtilvmsXzm/"}
{"state":{"posts":1,"slides":1,"rows":1,"comments":0},"msg":"Loading Slide 2 for https://www.instagram.com/p/CMtilvmsXzm/"}
{"state":{"posts":1,"slides":2,"rows":1,"comments":0},"msg":"Loading Slide 3 for https://www.instagram.com/p/CMtilvmsXzm/"}
{"state":{"posts":1,"slides":2,"rows":1,"comments":1},"msg":"Loaded Comments"}
{"state":{"posts":2,"slides":2,"rows":1,"comments":3},"msg":"Loading Post: https://www.instagram.com/p/CMqnzk-Mx7e/"}
{"state":{"posts":2,"slides":2,"rows":1,"comments":4},"msg":"Loaded Comments"}
{"state":{"posts":3,"slides":2,"rows":1,"comments":5},"msg":"Loading Post: https://www.instagram.com/p/CMqnVXZsfXR/"}
{"state":{"posts":3,"slides":2,"rows":1,"comments":6},"msg":"Loaded Comments"}
{"state":{"posts":3,"slides":2,"rows":1,"comments":6},"msg":"done!"}
Loading

0 comments on commit df4c0fb

Please sign in to comment.