Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: handle cookie based login #269

Draft
wants to merge 16 commits into
base: develop
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
172 changes: 161 additions & 11 deletions maxun-core/src/interpret.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,23 @@ import { arrayToObject } from './utils/utils';
import Concurrency from './utils/concurrency';
import Preprocessor from './preprocessor';
import log, { Level } from './utils/logger';
import { RegexableString } from './types/workflow';

interface Cookie {
name: string;
path: string;
value: string;
domain: string;
secure: boolean;
expires: number;
httpOnly: boolean;
sameSite: "Strict" | "Lax" | "None";
}

interface CookieData {
cookies: Cookie[];
lastUpdated: number;
}

/**
* Extending the Window interface for custom scraping functions.
Expand Down Expand Up @@ -68,8 +85,16 @@ export default class Interpreter extends EventEmitter {

private cumulativeResults: Record<string, any>[] = [];

constructor(workflow: WorkflowFile, options?: Partial<InterpreterOptions>) {
private cookies: CookieData = {
cookies: [],
lastUpdated: 0
};

private loginSuccessful: boolean = false;

constructor(workflow: WorkflowFile, options?: Partial<InterpreterOptions>, cookies?: CookieData) {
super();
this.cookies = cookies || { cookies: [], lastUpdated: 0 };
this.workflow = workflow.workflow;
this.initializedWorkflow = null;
this.options = {
Expand Down Expand Up @@ -109,6 +134,45 @@ export default class Interpreter extends EventEmitter {
})
}

private checkCookieExpiry(cookie: Cookie): boolean {
if (cookie.expires === -1) {
return true;
}

const currentTimestamp = Math.floor(Date.now() / 1000);

const expiryTimestamp = cookie.expires > 1e10 ?
Math.floor(cookie.expires / 1000) :
cookie.expires;

return expiryTimestamp > currentTimestamp;
}

private filterValidCookies(): void {
if (!this.cookies?.cookies) return;

const originalCookies = [...this.cookies.cookies];
this.cookies.cookies = this.cookies.cookies.filter(cookie => this.checkCookieExpiry(cookie));

const removedCount = originalCookies.length - this.cookies.cookies.length;
if (removedCount > 0) {
this.log(`Filtered out ${removedCount} expired cookies`, Level.LOG);
}
}

private async applyStoredCookies(page: Page): Promise<boolean> {
if (!this.cookies?.cookies || this.cookies.cookies.length === 0) return false;

try {
await page.context().addCookies(this.cookies.cookies);

return true;
} catch (error) {
this.log(`Failed to apply cookies: ${error}`, Level.ERROR);
return false;
}
}

private async applyAdBlocker(page: Page): Promise<void> {
if (this.blocker) {
await this.blocker.enableBlockingInPage(page);
Expand All @@ -121,6 +185,42 @@ export default class Interpreter extends EventEmitter {
}
}

private isLoginUrl(url: string): boolean {
const loginKeywords = ['login', 'signin', 'sign-in', 'auth'];
const lowercaseUrl = url.toLowerCase();
return loginKeywords.some(keyword => lowercaseUrl.includes(keyword));
}

private getUrlString(url: RegexableString | undefined): string {
if (!url) return '';

if (typeof url === 'string') return url;

if ('$regex' in url) {
let normalUrl = url['$regex'];
return normalUrl
.replace(/^\^/, '')
.replace(/\$$/, '')
.replace(/\\([?])/g, '$1');
}

return '';
}

private findFirstPostLoginAction(workflow: Workflow): number {
for (let i = workflow.length - 1; i >= 0; i--) {
const action = workflow[i];
if (action.where.url && action.where.url !== "about:blank") {
const urlString = this.getUrlString(action.where.url);
if (!this.isLoginUrl(urlString)) {
return i;
}
}
}
return -1;
}


// private getSelectors(workflow: Workflow, actionId: number): string[] {
// const selectors: string[] = [];

Expand Down Expand Up @@ -217,14 +317,33 @@ export default class Interpreter extends EventEmitter {

const action = workflowCopy[workflowCopy.length - 1];

// console.log("Next action:", action)

let url: any = page.url();

if (action && action.where.url !== url && action.where.url !== "about:blank") {
url = action.where.url;
}

if (this.loginSuccessful) {
const pageCookies = await page.context().cookies([page.url()]);

this.cookies.cookies = pageCookies.map(cookie => ({
name: cookie.name,
path: cookie.path || '/',
value: cookie.value,
domain: cookie.domain,
secure: cookie.secure || false,
expires: cookie.expires || Math.floor(Date.now() / 1000) + 86400,
httpOnly: cookie.httpOnly || false,
sameSite: cookie.sameSite || 'Lax'
}));

Object.assign(this.cookies, { lastUpdated: Date.now() });

// this.filterValidCookies();
this.loginSuccessful = false;
this.log('Stored authentication cookies after successful login', Level.LOG);
}

return {
url,
cookies: (await page.context().cookies([page.url()]))
Expand Down Expand Up @@ -654,6 +773,35 @@ export default class Interpreter extends EventEmitter {
let actionId = -1
let repeatCount = 0;

// this.filterValidCookies();

if (this.cookies?.cookies?.length > 0) {
const cookiesApplied = await this.applyStoredCookies(p);
if (cookiesApplied) {
console.log("Cookies applied successfully.");
const postLoginActionId = this.findFirstPostLoginAction(workflowCopy);
if (postLoginActionId !== -1) {
const targetUrl = this.getUrlString(workflowCopy[postLoginActionId].where.url);
if (targetUrl) {
try {
await p.goto(targetUrl);

await p.waitForLoadState('networkidle');

if (!this.isLoginUrl(targetUrl)) {
workflowCopy.splice(postLoginActionId + 1);
this.log('Successfully skipped login using stored cookies', Level.LOG);
} else {
this.log('Cookie authentication failed, proceeding with manual login', Level.LOG);
}
} catch (error) {
this.log(`Failed to navigate with stored cookies: ${error}`, Level.ERROR);
}
}
}
}
}

/**
* Enables the interpreter functionality for popup windows.
* User-requested concurrency should be entirely managed by the concurrency manager,
Expand All @@ -679,11 +827,9 @@ export default class Interpreter extends EventEmitter {
}

let pageState = {};
let getStateTest = "Hello";
try {
pageState = await this.getState(p, workflowCopy, selectors);
selectors = [];
console.log("Empty selectors:", selectors)
} catch (e: any) {
this.log('The browser has been closed.');
return;
Expand All @@ -707,28 +853,30 @@ export default class Interpreter extends EventEmitter {

const action = workflowCopy[actionId];

console.log("MATCHED ACTION:", action);
console.log("MATCHED ACTION ID:", actionId);
this.log(`Matched ${JSON.stringify(action?.where)}`, Level.LOG);

if (action) { // action is matched
if (action) {
if (this.options.debugChannel?.activeId) {
this.options.debugChannel.activeId(actionId);
}

repeatCount = action === lastAction ? repeatCount + 1 : 0;

console.log("REPEAT COUNT", repeatCount);
if (this.options.maxRepeats && repeatCount > this.options.maxRepeats) {
return;
}
lastAction = action;

try {
console.log("Carrying out:", action.what);
await this.carryOutSteps(p, action.what);
usedActions.push(action.id ?? 'undefined');

const url = this.getUrlString(action.where.url);

if (this.isLoginUrl(url)) {
this.loginSuccessful = true;
}

workflowCopy.splice(actionId, 1);
console.log(`Action with ID ${action.id} removed from the workflow copy.`);

Expand Down Expand Up @@ -764,7 +912,7 @@ export default class Interpreter extends EventEmitter {
* @param {ParamType} params Workflow specific, set of parameters
* for the `{$param: nameofparam}` fields.
*/
public async run(page: Page, params?: ParamType): Promise<void> {
public async run(page: Page, params?: ParamType): Promise<CookieData> {
this.log('Starting the workflow.', Level.LOG);
const context = page.context();

Expand Down Expand Up @@ -798,6 +946,8 @@ export default class Interpreter extends EventEmitter {
await this.concurrency.waitForCompletion();

this.stopper = null;

return this.cookies;
}

public async stop(): Promise<void> {
Expand Down
10 changes: 5 additions & 5 deletions maxun-core/src/preprocessor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -168,11 +168,11 @@ export default class Preprocessor {
);
}

workflowCopy = initSpecialRecurse(
workflowCopy,
'$regex',
(regex) => new RegExp(regex),
);
// workflowCopy = initSpecialRecurse(
// workflowCopy,
// '$regex',
// (regex) => new RegExp(regex),
// );
Comment on lines +171 to +175
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

💡 Codebase verification

Commenting out regex initialization could break URL pattern matching

The verification reveals that regex functionality is actively used in the codebase:

  • $regex is part of the workflow type definition (RegexableString)
  • URL patterns with $regex are generated in Generator.ts
  • Regex pattern matching is used in interpret.ts for URL validation with .test()

Commenting out the regex initialization in preprocessor.ts will prevent the conversion of string patterns to RegExp objects, which will cause runtime errors when the code attempts to use .test() on unconverted regex strings.

🔗 Analysis chain

Verify the impact of removing regex initialization.

The commented-out code block handles regex parameter initialization. This change could affect workflow processing if regex functionality is still expected elsewhere in the codebase.


Let me gather more context about the regex initialization and its usage.


Let me check one more aspect to understand how URLs with regex patterns are processed in the codebase.

🏁 Scripts executed

The following scripts were executed for the analysis:

Script:

#!/bin/bash
# Description: Check for regex usage in workflows and related code

# Search for regex patterns in workflow files
rg -A 3 '\$regex' 

# Search for RegExp usage in workflow processing
ast-grep --pattern 'new RegExp($_)'

Length of output: 2501


Script:

#!/bin/bash
# Check the implementation of initSpecialRecurse and its usage
ast-grep --pattern 'function initSpecialRecurse($_) {
  $$$
}'

# Look for where regex patterns are actually used in workflow processing
rg -A 5 'new RegExp' maxun-core/

Length of output: 376


Script:

#!/bin/bash
# Check how URLs with regex patterns are processed
rg -A 10 "if \('\$regex' in url\)" maxun-core/

# Look for the actual usage of RegExp in URL matching
rg -A 5 "\.test\(|\.match\(" maxun-core/

Length of output: 1016


return <Workflow>workflowCopy;
}
Expand Down
2 changes: 1 addition & 1 deletion maxun-core/src/types/workflow.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ export type Meta = typeof meta[number];

export type SelectorArray = string[];

type RegexableString = string | { '$regex': string };
export type RegexableString = string | { '$regex': string };

type BaseConditions = {
'url': RegexableString,
Expand Down
2 changes: 1 addition & 1 deletion server/src/api/record.ts
Original file line number Diff line number Diff line change
Expand Up @@ -580,7 +580,7 @@ async function executeRun(id: string) {

const workflow = AddGeneratedFlags(recording.recording);
const interpretationInfo = await browser.interpreter.InterpretRecording(
workflow, currentPage, (newPage: Page) => currentPage = newPage, plainRun.interpreterSettings
workflow, currentPage, (newPage: Page) => currentPage = newPage, plainRun.interpreterSettings, plainRun.robotMetaId
);

const binaryOutputService = new BinaryOutputService('maxun-run-screenshots');
Expand Down
29 changes: 29 additions & 0 deletions server/src/models/Robot.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,22 @@ interface RobotWorkflow {
workflow: WhereWhatPair[];
}

interface StoredCookie {
name: string;
value: string;
domain: string;
path: string;
expires: number;
secure?: boolean;
sameSite?: "Strict" | "Lax" | "None";
httpOnly?: boolean;
}

interface CookieStorage {
cookies: StoredCookie[];
lastUpdated: number;
}

interface RobotAttributes {
id: string;
userId?: number;
Expand All @@ -26,6 +42,8 @@ interface RobotAttributes {
google_access_token?: string | null;
google_refresh_token?: string | null;
schedule?: ScheduleConfig | null;
isLogin: boolean;
cookie_storage?: CookieStorage | null;
}

interface ScheduleConfig {
Expand Down Expand Up @@ -54,6 +72,8 @@ class Robot extends Model<RobotAttributes, RobotCreationAttributes> implements R
public google_access_token!: string | null;
public google_refresh_token!: string | null;
public schedule!: ScheduleConfig | null;
public isLogin!: boolean;
public cookie_storage!: CookieStorage | null;
}

Robot.init(
Expand Down Expand Up @@ -99,6 +119,15 @@ Robot.init(
type: DataTypes.JSONB,
allowNull: true,
},
isLogin: {
type: DataTypes.BOOLEAN,
allowNull: false,
defaultValue: false,
},
cookie_storage: {
type: DataTypes.JSONB,
allowNull: true,
}
},
{
sequelize,
Expand Down
Loading