Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PIDS pattern inference code #4

Open
wants to merge 9 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,6 @@ node_modules
.vscode
target
.idea
.DS_Store

/add-in/dist
661 changes: 309 additions & 352 deletions add-in/package-lock.json

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions add-in/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,11 @@
"dependencies": {
"@fluentui/font-icons-mdl2": "^8.1.24",
"@fluentui/react": "^8.55.3",
"bigdecimal": "^0.6.1",
"core-js": "^3.21.1",
"cytoscape-klay": "^3.1.4",
"es6-promise": "^4.2.8",
"js-lexer": "^0.1.2",
"react": "^16.14.0",
"react-dom": "^16.14.0",
"regenerator-runtime": "^0.13.9"
Expand Down
20 changes: 20 additions & 0 deletions add-in/src/api/pids/pattern/extractPattern.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import { PatternMiner } from "./patternMiner";
import { transposeMatrix } from "../utils/matrix";

/*
Extract a pattern from a formula matrix that is stored in row-major.
Returns an array of patterns corresponding to each column.
*/
export function extractPattern(formMatrix) {
let miner = new PatternMiner();
miner.sampleSize = 2000;
let transpose = transposeMatrix(formMatrix);
let patterns = [];
for (let colIdx = 0; colIdx < transpose.length; colIdx += 1) {
let col = transposeMatrix([transpose[colIdx]]);
let pattern = miner.mine(col);
let transposed = transposeMatrix(pattern);
patterns.push(...transposed);
}
return transposeMatrix(patterns);
}
192 changes: 192 additions & 0 deletions add-in/src/api/pids/pattern/pattern.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
import { Token } from "../tokenize/token";

export abstract class Pattern {
name: string;

getName() {
return this.name;
}

/**
* @return all leaf patterns
*/
flatten() {
return [this];
}

/**
* Recursively visit the pattern elements starting from the root
*
* @param visitor
*/
visit = (visitor) => {
visitor.on(this);
};

abstract numChar();
}

export class PToken extends Pattern {
token: Token;

constructor(token) {
super();
this.token = token;
}

numChar() {
return this.token.numChar, this.token.numChar;
}

canEqual = (other) => other instanceof PToken;

equals(other) {
if (other instanceof PToken) {
return other.canEqual(this) && this.token == other.token;
}
return false;
}

override toString = () => this.token.toString();
}

export class PSeq extends Pattern {
content = [];

constructor(contents) {
super();
this.content = contents;
}

flatten = () => this.content.map((x) => x.flatten());

visit = (visitor) => {
visitor.on(this);
visitor.enter(this);
this.content.map((x) => x.visit(visitor));
visitor.exit(this);
};

numChar = () => {
this.content.map((x) => x.numChar()).reduce((a, b) => a + b);
};

canEqual = (other) => other instanceof PSeq;

equals = (other) => {
if (other instanceof PSeq) {
return other.canEqual(this) && this.content == other.content;
}
return false;
};

override toString = () => `<S>(${this.content.map((x) => x.toString()).join(",")})`;
}

export class PUnion extends Pattern {
content = [];

constructor(content) {
super();
this.content = content;
}

flatten = () => this.content.map((x) => x.flatten());

visit = (visitor) => {
visitor.on(this);
visitor.enter(this);
this.content.map((x) => x.visit(visitor));
visitor.exit(this);
};

numChar = () => {
this.content.map((x) => x.numChar()).reduce((a, b) => Math.min(a, b) + Math.max(a, b));
};

canEqual = (other) => other instanceof PUnion;

equals = (other) => {
if (other instanceof PUnion) {
return other.canEqual(PUnion) && this.content == other.content;
}
return false;
};

override toString = () => `<U>(\n${this.content.map((x) => x.toString()).join(",\n")}\n)`;
}

export class PEmpty extends Pattern {
numChar = () => [0, 0];

override toString = () => "<empty>";
}

abstract class PAny extends Pattern {
minLength;
maxLength;

constructor(minLength, maxLength) {
super();
this.minLength = minLength;
this.maxLength = maxLength;
}

numChar = () => [this.minLength, this.maxLength];

canEqual(other) {
return typeof other === typeof this;
}

equals(other) {
if (other instanceof PAny) {
return other.canEqual(PAny) && this.minLength == other.minLength && this.maxLength == other.maxLength;
}
return false;
}
}

export class PIntAny extends PAny {
hasHex = false;

constructor(minl = 1, maxl = -1, hasHex = false) {
super(minl, maxl);
this.hasHex = hasHex;
}

equals(other) {
if (other instanceof PIntAny) {
return this.canEqual(other) && this.minLength == other.minLength && this.maxLength == other.maxLength;
}
return false;
}

override toString = () => `<intany ${this.minLength}:${this.maxLength}, ${this.hasHex}>`;
}

export class PLetterAny extends PAny {
constructor(minl = 1, maxl = -1) {
super(minl, maxl);
}

override toString = () => `<letterany ${this.minLength}:${this.maxLength}>`;
}

/**
* Mix of letter and digits
*/
export class PLabelAny extends PAny {
constructor(minl = 1, maxl = -1) {
super(minl, maxl);
}

override toString = () => `<labelany ${this.minLength}:${this.maxLength}>`;
}

export class PWordAny extends PAny {
constructor(minl = 1, maxl = -1) {
super(minl, maxl);
}

override toString = () => `<wordany ${this.minLength}:${this.maxLength}>`;
}
46 changes: 46 additions & 0 deletions add-in/src/api/pids/pattern/patternMiner.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import { Tokenizer } from "../tokenize/tokenizer";
import { Pattern, PUnion, PSeq, PToken } from "./pattern";
import { CommonSymbolRule } from "../rule/commonSymbolRule";
import { CommonWordRule } from "../rule/commonWordRule";
import { SameItemRule } from "../rule/sameItemRule";
import { repack, combineConstantCols } from "./postProcessing";

export class PatternMiner {
sampleSize = 500;

rules = [new CommonSymbolRule(), new CommonWordRule(), new SameItemRule()];

mine(lines) {
let tokens = lines.map((line) => Tokenizer.tokenize(line));
let translated = new PUnion(tokens.map((line) => new PSeq(line.map((elem) => new PToken(elem)))));

let toRefine: Pattern = translated;
let needRefine = true;
let refineResult: Pattern = toRefine;

while (needRefine) {
let refined = this.refine(toRefine);
if (refined[1] != null) {
toRefine = refined[0];
} else {
needRefine = false;
refineResult = refined[0];
}
}
let repacked = repack(refineResult, lines.length);
return combineConstantCols(repacked);
}

refine(root) {
let current = root;
for (let i = 0; i < this.rules.length; i += 1) {
const rule = this.rules[i];
rule.reset();
current = rule.rewrite(current);
if (rule.modified) {
return [current, rule];
}
}
return [current, null];
}
}
82 changes: 82 additions & 0 deletions add-in/src/api/pids/pattern/postProcessing.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
import { PUnion, PSeq, PToken } from "./pattern";
import { transposeMatrix } from "../utils/matrix";

const getPSeqString = (seq) => seq.content.map((token) => token.toString()).join("");

export function repack(pattern, entries) {
if (!(pattern instanceof PSeq || pattern instanceof PUnion)) {
pattern = new PSeq(pattern);
}
let result = [];
for (let idx = 0; idx < entries; idx += 1) {
result.push(getPatternArray(pattern, idx));
}
return result;
}

function getPatternArray(pattern, idx) {
if (pattern instanceof PToken) {
return [pattern.token.toString()];
}
let hasSubpattern = false;
pattern.content.forEach((subpattern) => {
if (subpattern instanceof PSeq || subpattern instanceof PUnion) {
hasSubpattern = true;
}
});
if (hasSubpattern && pattern instanceof PSeq) {
let line = [];
pattern.content.forEach((subpattern) => {
let arr = getPatternArray(subpattern, idx);
line.push(...arr);
});
return line;
} else if (hasSubpattern && pattern instanceof PUnion) {
return getPatternArray(pattern.content[idx], idx);
} else if (pattern instanceof PSeq) {
return [getPSeqString(pattern)];
} else if (pattern instanceof PUnion) {
return [pattern.content[idx]];
}
return [];
}

export function combineConstantCols(mat: string[][]) {
/* Transpose matrix for better cache locality. */
let constantCols = new Set();
for (let x = 0; x < mat[0].length; x += 1) {
let constant = true;
let firstVal = mat[0][x];
for (let y = 0; y < mat.length; y += 1) {
if (mat[y][x] !== firstVal) {
constant = false;
break;
}
}
if (constant) {
constantCols.add(x);
}
}

let result = [];
for (let y = 0; y < mat.length; y += 1) {
let resultRow = [];
let runningString = "";
for (let x = 0; x < mat[0].length; x += 1) {
if (constantCols.has(x)) {
runningString += mat[y][x];
} else {
if (runningString.length !== 0) {
resultRow.push(runningString);
runningString = "";
}
resultRow.push(mat[y][x]);
}
}
if (runningString.length !== 0) {
resultRow.push(runningString);
}
result.push(resultRow);
}
return result;
}
30 changes: 30 additions & 0 deletions add-in/src/api/pids/rule/adacentTokenRule.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import { RewriteRule } from "./rewriteRule";
import { PToken, Pattern } from "../pattern/pattern";
import { TWord } from "../tokenize/token";

/* If two tokens are adjacent to each other, combine them. */
export class AdjacentTokenRule extends RewriteRule {
condition(_) {
return true;
}

update(ptn: Pattern) {
if (ptn instanceof Array) {
let result = [];
let runningToken = "";
ptn.forEach((subpattern) => {
if (subpattern instanceof PToken) {
runningToken += subpattern.token.toString();
} else {
if (runningToken.length !== 0) {
result.push(new PToken(new TWord(runningToken)));
runningToken = "";
}
result.push(subpattern);
}
});
return result;
}
return ptn;
}
}
Loading