;
+};
+const EXTRAS_DEFAULT = {};
+
+type ReplacementFn = (extras: Extras, ...matches: string[]) => string;
+type Replacement = ReplacementFn | string;
+type ProcessFn = (textToProcess: string, replacement: Replacement, shouldKeepRawInput: boolean) => string;
+
+type CommonRule = {
+ name: string;
+ replacement: Replacement;
+ rawInputReplacement?: Replacement;
+ pre?: (input: string) => string;
+ post?: (input: string) => string;
+};
+
+type RuleWithRegex = CommonRule & {
+ regex: RegExp;
+};
+
+type RuleWithProcess = CommonRule & {
+ process: ProcessFn;
+};
+
+type Rule = RuleWithRegex | RuleWithProcess;
+
+type ReplaceOptions = {
+ extras?: Extras;
+ filterRules?: string[];
+ disabledRules?: string[];
+ shouldEscapeText?: boolean;
+ shouldKeepRawInput?: boolean;
+};
const MARKDOWN_LINK_REGEX = new RegExp(`\\[([^\\][]*(?:\\[[^\\][]*][^\\][]*)*)]\\(${UrlPatterns.MARKDOWN_URL_REGEX}\\)(?![^<]*(<\\/pre>|<\\/code>))`, 'gi');
const MARKDOWN_IMAGE_REGEX = new RegExp(`\\!(?:\\[([^\\][]*(?:\\[[^\\][]*][^\\][]*)*)])?\\(${UrlPatterns.MARKDOWN_URL_REGEX}\\)(?![^<]*(<\\/pre>|<\\/code>))`, 'gi');
+const MARKDOWN_VIDEO_REGEX = new RegExp(
+ `\\!(?:\\[([^\\][]*(?:\\[[^\\][]*][^\\][]*)*)])?\\(((${UrlPatterns.MARKDOWN_URL_REGEX})\\.(?:${Constants.CONST.VIDEO_EXTENSIONS.join('|')}))\\)(?![^<]*(<\\/pre>|<\\/code>))`,
+ 'gi',
+);
+
const SLACK_SPAN_NEW_LINE_TAG = '';
export default class ExpensiMark {
+ static Log = new Logger({
+ serverLoggingCallback: () => undefined,
+ // eslint-disable-next-line no-console
+ clientLoggingCallback: (message) => console.warn(message),
+ isDebug: true,
+ });
+
+ /**
+ * Set the logger to use for logging inside of the ExpensiMark class
+ * @param logger - The logger object to use
+ */
+ static setLogger(logger: Logger) {
+ ExpensiMark.Log = logger;
+ }
+
+ /** Rules to apply to the text */
+ rules: Rule[];
+
+ /**
+ * The list of regex replacements to do on a HTML comment for converting it to markdown.
+ * Order of rules is important
+ */
+ htmlToMarkdownRules: RuleWithRegex[];
+
+ /**
+ * The list of rules to covert the HTML to text.
+ * Order of rules is important
+ */
+ htmlToTextRules: RuleWithRegex[];
+
+ /**
+ * The list of rules that we have to exclude in shouldKeepWhitespaceRules list.
+ */
+ whitespaceRulesToDisable = ['newline', 'replacepre', 'replacebr', 'replaceh1br'];
+
+ /**
+ * The list of rules that have to be applied when shouldKeepWhitespace flag is true.
+ */
+ filterRules: (rule: Rule) => boolean;
+
+ /**
+ * Filters rules to determine which should keep whitespace.
+ */
+ shouldKeepWhitespaceRules: Rule[];
+
+ /**
+ * maxQuoteDepth is the maximum depth of nested quotes that we want to support.
+ */
+ maxQuoteDepth: number;
+
+ /**
+ * currentQuoteDepth is the current depth of nested quotes that we are processing.
+ */
+ currentQuoteDepth: number;
+
constructor() {
/**
* The list of regex replacements to do on a comment. Check the link regex is first so links are processed
* before other delimiters
- *
- * @type {Object[]}
*/
this.rules = [
// Apply the emoji first avoid applying any other formatting rules inside of it
{
name: 'emoji',
regex: Constants.CONST.REG_EXP.EMOJI_RULE,
- replacement: (match) => `${match}`,
+ replacement: (_extras, match) => `${match}`,
},
/**
@@ -33,7 +129,7 @@ export default class ExpensiMark {
name: 'codeFence',
// ` is a backtick symbol we are matching on three of them before then after a new line character
- regex: /(```(?:\r\n|\n)?)((?:\s*?(?!(?:\r\n|\n)?```(?!`))[\S])+\s*?)((?=(?:\r\n|\n)?)```)/g,
+ regex: /(```(?:\r\n|\n))((?:\s*?(?!(?:\r\n|\n)?```(?!`))[\S])+\s*?(?:\r\n|\n))(```)/g,
// We're using a function here to perform an additional replace on the content
// inside the backticks because Android is not able to use tags and does
@@ -41,14 +137,13 @@ export default class ExpensiMark {
// with the new lines here since they need to be converted into
. And we don't
// want to do this anywhere else since that would break HTML.
// will create styling issues so use
- replacement: (match, __, textWithinFences) => {
+ replacement: (_extras, _match, _g1, textWithinFences) => {
const group = textWithinFences.replace(/(?:(?![\n\r])\s)/g, ' ');
return `${group}
`;
},
- rawInputReplacement: (match, __, textWithinFences) => {
- const withinFences = match.replace(/(?:```)([\s\S]*?)(?:```)/g, '$1');
- const group = textWithinFences.replace(/(?:(?![\n\r])\s)/g, ' ');
- return `${group}
`;
+ rawInputReplacement: (_extras, _match, _g1, textWithinFences) => {
+ const group = textWithinFences.replace(/(?:(?![\n\r])\s)/g, ' ').replace(/|<\/emoji>/g, '');
+ return `${group}
`;
},
},
@@ -62,16 +157,8 @@ export default class ExpensiMark {
// Use the url escaped version of a backtick (`) symbol. Mobile platforms do not support lookbehinds,
// so capture the first and third group and place them in the replacement.
// but we should not replace backtick symbols if they include tags between them.
- regex: /(\B|_|)`(?:(?!(?:(?!`).)*?))(.*?\S.*?)`(\B|_|)(?!`|[^<]*<\/pre>)/g,
- replacement: (match, g1, g2, g3) => {
- const regex = /^[`]+$/i;
-
- // if content of the inline code block is only backtick symbols, we should not replace them with tag
- if (regex.test(g2)) {
- return match;
- }
- return `${g1}${g2}
${g3}`;
- },
+ regex: /(\B|_|)`(.*?(?![`])\S.*?)`(\B|_|)(?!`|[^<]*<\/pre>)/gm,
+ replacement: '$1$2
$3',
},
/**
@@ -83,9 +170,9 @@ export default class ExpensiMark {
name: 'email',
process: (textToProcess, replacement, shouldKeepRawInput) => {
const regex = new RegExp(`(?!\\[\\s*\\])\\[([^[\\]]*)]\\((mailto:)?${Constants.CONST.REG_EXP.MARKDOWN_EMAIL}\\)`, 'gim');
- return this.modifyTextForEmailLinks(regex, textToProcess, replacement, shouldKeepRawInput);
+ return this.modifyTextForEmailLinks(regex, textToProcess, replacement as ReplacementFn, shouldKeepRawInput);
},
- replacement: (match, g1, g2) => {
+ replacement: (_extras, match, g1, g2) => {
if (g1.match(Constants.CONST.REG_EXP.EMOJIS) || !g1.trim()) {
return match;
}
@@ -94,7 +181,7 @@ export default class ExpensiMark {
const formattedLabel = label === href ? g2 : label;
return `${formattedLabel}`;
},
- rawInputReplacement: (match, g1, g2, g3) => {
+ rawInputReplacement: (_extras, match, g1, g2, g3) => {
if (g1.match(Constants.CONST.REG_EXP.EMOJIS) || !g1.trim()) {
return match;
}
@@ -109,13 +196,37 @@ export default class ExpensiMark {
name: 'heading1',
process: (textToProcess, replacement, shouldKeepRawInput = false) => {
const regexp = shouldKeepRawInput ? /^# ( *(?! )(?:(?!|\n|\r\n).)+)/gm : /^# +(?! )((?:(?!|\n|\r\n).)+)/gm;
- return textToProcess.replace(regexp, replacement);
+ return this.replaceTextWithExtras(textToProcess, regexp, EXTRAS_DEFAULT, replacement);
},
replacement: '$1
',
},
/**
- * Converts markdown style images to img tags e.g. ![Expensify](https://www.expensify.com/attachment.png)
+ * Converts markdown style video to video tags e.g. ![Expensify](https://www.expensify.com/attachment.mp4)
+ * We need to convert before image rules since they will not try to create a image tag from an existing video URL
+ * Extras arg could contain the attribute cache for the video tag which is cached during the html-to-markdown conversion
+ */
+ {
+ name: 'video',
+ regex: MARKDOWN_VIDEO_REGEX,
+ /**
+ * @param extras - The extras object
+ * @param videoName - The first capture group - video name
+ * @param videoSource - The second capture group - video URL
+ * @return Returns the HTML video tag
+ */
+ replacement: (extras, _match, videoName, videoSource) => {
+ const extraAttrs = extras && extras.videoAttributeCache && extras.videoAttributeCache[videoSource];
+ return ``;
+ },
+ rawInputReplacement: (extras, _match, videoName, videoSource) => {
+ const extraAttrs = extras && extras.videoAttributeCache && extras.videoAttributeCache[videoSource];
+ return ``;
+ },
+ },
+
+ /**
+ * Converts markdown style images to image tags e.g. ![Expensify](https://www.expensify.com/attachment.png)
* We need to convert before linking rules since they will not try to create a link from an existing img
* tag.
* Additional sanitization is done to the alt attribute to prevent parsing it further to html by later
@@ -124,8 +235,8 @@ export default class ExpensiMark {
{
name: 'image',
regex: MARKDOWN_IMAGE_REGEX,
- replacement: (match, g1, g2) => ``,
- rawInputReplacement: (match, g1, g2) =>
+ replacement: (_extras, _match, g1, g2) => ``,
+ rawInputReplacement: (_extras, _match, g1, g2) =>
``,
},
@@ -136,14 +247,14 @@ export default class ExpensiMark {
*/
{
name: 'link',
- process: (textToProcess, replacement) => this.modifyTextForUrlLinks(MARKDOWN_LINK_REGEX, textToProcess, replacement),
- replacement: (match, g1, g2) => {
+ process: (textToProcess, replacement) => this.modifyTextForUrlLinks(MARKDOWN_LINK_REGEX, textToProcess, replacement as ReplacementFn),
+ replacement: (_extras, match, g1, g2) => {
if (g1.match(Constants.CONST.REG_EXP.EMOJIS) || !g1.trim()) {
return match;
}
return `${g1.trim()}`;
},
- rawInputReplacement: (match, g1, g2) => {
+ rawInputReplacement: (_extras, match, g1, g2) => {
if (g1.match(Constants.CONST.REG_EXP.EMOJIS) || !g1.trim()) {
return match;
}
@@ -161,7 +272,7 @@ export default class ExpensiMark {
{
name: 'hereMentions',
regex: /([a-zA-Z0-9.!$%&+/=?^`{|}_-]?)(@here)([.!$%&+/=?^`{|}_-]?)(?=\b)(?!([\w'#%+-]*@(?:[a-z\d-]+\.)+[a-z]{2,}(?:\s|$|@here))|((?:(?!|[^<]*(<\/pre>|<\/code>))/gm,
- replacement: (match, g1, g2, g3) => {
+ replacement: (_extras, match, g1, g2, g3) => {
if (!Str.isValidMention(match)) {
return match;
}
@@ -178,7 +289,7 @@ export default class ExpensiMark {
{
name: 'reportMentions',
- regex: /(?|<\/code>))/gimu,
replacement: '$1',
},
@@ -197,15 +308,21 @@ export default class ExpensiMark {
`(@here|[a-zA-Z0-9.!$%&+=?^\`{|}-]?)(@${Constants.CONST.REG_EXP.EMAIL_PART}|@${Constants.CONST.REG_EXP.PHONE_PART})(?!((?:(?!|[^<]*(<\\/pre>|<\\/code>))`,
'gim',
),
- replacement: (match, g1, g2) => {
- if (!Str.isValidMention(match)) {
+ replacement: (_extras, match, g1, g2) => {
+ const phoneNumberRegex = new RegExp(`^${Constants.CONST.REG_EXP.PHONE_PART}$`);
+ const mention = g2.slice(1);
+ const mentionWithoutSMSDomain = Str.removeSMSDomain(mention);
+ if (!Str.isValidMention(match) || (phoneNumberRegex.test(mentionWithoutSMSDomain) && !Str.isValidPhoneNumber(mentionWithoutSMSDomain))) {
return match;
}
const phoneRegex = new RegExp(`^@${Constants.CONST.REG_EXP.PHONE_PART}$`);
return `${g1}${g2}${phoneRegex.test(g2) ? `@${Constants.CONST.SMS.DOMAIN}` : ''}`;
},
- rawInputReplacement: (match, g1, g2) => {
- if (!Str.isValidMention(match)) {
+ rawInputReplacement: (_extras, match, g1, g2) => {
+ const phoneNumberRegex = new RegExp(`^${Constants.CONST.REG_EXP.PHONE_PART}$`);
+ const mention = g2.slice(1);
+ const mentionWithoutSMSDomain = Str.removeSMSDomain(mention);
+ if (!Str.isValidMention(match) || (phoneNumberRegex.test(mentionWithoutSMSDomain) && !Str.isValidPhoneNumber(mentionWithoutSMSDomain))) {
return match;
}
return `${g1}${g2}`;
@@ -227,14 +344,14 @@ export default class ExpensiMark {
process: (textToProcess, replacement) => {
const regex = new RegExp(`(?![^<]*>|[^<>]*<\\/(?!h1>))([_*~]*?)${UrlPatterns.MARKDOWN_URL_REGEX}\\1(?!((?:(?!|[^<]*(<\\/pre>|<\\/code>|.+\\/>))`, 'gi');
- return this.modifyTextForUrlLinks(regex, textToProcess, replacement);
+ return this.modifyTextForUrlLinks(regex, textToProcess, replacement as ReplacementFn);
},
- replacement: (match, g1, g2) => {
+ replacement: (_extras, _match, g1, g2) => {
const href = Str.sanitizeURL(g2);
return `${g1}${g2}${g1}`;
},
- rawInputReplacement: (_match, g1, g2) => {
+ rawInputReplacement: (_extras, _match, g1, g2) => {
const href = Str.sanitizeURL(g2);
return `${g1}${g2}${g1}`;
},
@@ -248,22 +365,40 @@ export default class ExpensiMark {
// inline code blocks. A single prepending space should be stripped if it exists
process: (textToProcess, replacement, shouldKeepRawInput = false) => {
const regex = /^(?:>)+ +(?! )(?![^<]*(?:<\/pre>|<\/code>))([^\v\n\r]+)/gm;
- const replaceFunction = (g1) => replacement(g1, shouldKeepRawInput);
if (shouldKeepRawInput) {
- return textToProcess.replace(regex, replaceFunction);
+ const rawInputRegex = /^(?:>)+ +(?! )(?![^<]*(?:<\/pre>|<\/code>))([^\v\n\r]*)/gm;
+ return this.replaceTextWithExtras(textToProcess, rawInputRegex, EXTRAS_DEFAULT, replacement);
}
- return this.modifyTextForQuote(regex, textToProcess, replacement);
+ return this.modifyTextForQuote(regex, textToProcess, replacement as ReplacementFn);
},
- replacement: (g1, shouldKeepRawInput = false) => {
+ replacement: (_extras, g1) => {
+ // We want to enable 2 options of nested heading inside the blockquote: "># heading" and "> # heading".
+ // To do this we need to parse body of the quote without first space
+ const handleMatch = (match: string) => match;
+ const textToReplace = g1.replace(/^>( )?/gm, handleMatch);
+ const filterRules = ['heading1'];
+
+ // if we don't reach the max quote depth we allow the recursive call to process possible quote
+ if (this.currentQuoteDepth < this.maxQuoteDepth - 1) {
+ filterRules.push('quote');
+ this.currentQuoteDepth++;
+ }
+
+ const replacedText = this.replace(textToReplace, {
+ filterRules,
+ shouldEscapeText: false,
+ shouldKeepRawInput: false,
+ });
+ this.currentQuoteDepth = 0;
+ return `${replacedText}
`;
+ },
+ rawInputReplacement: (_extras, g1) => {
// We want to enable 2 options of nested heading inside the blockquote: "># heading" and "> # heading".
// To do this we need to parse body of the quote without first space
let isStartingWithSpace = false;
- const handleMatch = (match, g2) => {
- if (shouldKeepRawInput) {
- isStartingWithSpace = !!g2;
- return '';
- }
- return match;
+ const handleMatch = (_match: string, g2: string) => {
+ isStartingWithSpace = !!g2;
+ return '';
};
const textToReplace = g1.replace(/^>( )?/gm, handleMatch);
const filterRules = ['heading1'];
@@ -277,30 +412,31 @@ export default class ExpensiMark {
const replacedText = this.replace(textToReplace, {
filterRules,
shouldEscapeText: false,
- shouldKeepRawInput,
+ shouldKeepRawInput: true,
});
this.currentQuoteDepth = 0;
return `${isStartingWithSpace ? ' ' : ''}${replacedText}
`;
},
},
+ /**
+ * Use \b in this case because it will match on words, letters,
+ * and _: https://www.rexegg.com/regex-boundaries.html#wordboundary
+ * Use [\s\S]* instead of .* to match newline
+ */
{
- /**
- * Use \b in this case because it will match on words, letters,
- * and _: https://www.rexegg.com/regex-boundaries.html#wordboundary
- * The !_blank is to prevent the `target="_blank">` section of the
- * link replacement from being captured Additionally, something like
- * `\b\_([^<>]*?)\_\b` doesn't work because it won't replace
- * `_https://www.test.com_`
- * Use [\s\S]* instead of .* to match newline
- */
name: 'italic',
- regex: /(?]*)(\b_+|\b)(?!_blank")_((?![\s_])[\s\S]*?[^\s_](?)(?![^<]*(<\/pre>|<\/code>|<\/a>|<\/mention-user>|_blank))/g,
+ regex: /(<(pre|code|a|mention-user)[^>]*>(.*?)<\/\2>)|((\b_+|\b)_((?![\s_])[\s\S]*?[^\s_](?)(?![^<]*(<\/pre>|<\/code>|<\/a>|<\/mention-user>)))/g,
+ replacement: (_extras, match, html, tag, content, text, extraLeadingUnderscores, textWithinUnderscores) => {
+ // Skip any , , , tag contents
+ if (html) {
+ return html;
+ }
- // We want to add extraLeadingUnderscores back before the tag unless textWithinUnderscores starts with valid email
- replacement: (match, extraLeadingUnderscores, textWithinUnderscores) => {
+ // If any tags are included inside underscores, ignore it. ie. _abc pre tag
abc_
if (textWithinUnderscores.includes('
') || this.containsNonPairTag(textWithinUnderscores)) {
return match;
}
+
if (String(textWithinUnderscores).match(`^${Constants.CONST.REG_EXP.MARKDOWN_EMAIL}`)) {
return `${extraLeadingUnderscores}${textWithinUnderscores}`;
}
@@ -326,12 +462,12 @@ export default class ExpensiMark {
// for * and ~: https://www.rexegg.com/regex-boundaries.html#notb
name: 'bold',
regex: /(?]*)\B\*(?![^<]*(?:<\/pre>|<\/code>|<\/a>))((?![\s*])[\s\S]*?[^\s*](?)(?![^<]*(<\/pre>|<\/code>|<\/a>))/g,
- replacement: (match, g1) => (g1.includes('
') || this.containsNonPairTag(g1) ? match : `${g1}`),
+ replacement: (_extras, match, g1) => (g1.includes('
') || this.containsNonPairTag(g1) ? match : `${g1}`),
},
{
name: 'strikethrough',
regex: /(?]*)\B~((?![\s~])[\s\S]*?[^\s~](?)(?![^<]*(<\/pre>|<\/code>|<\/a>))/g,
- replacement: (match, g1) => (g1.includes('
') || this.containsNonPairTag(g1) ? match : `${g1}`),
+ replacement: (_extras, match, g1) => (g1.includes('
') || this.containsNonPairTag(g1) ? match : `${g1}`),
},
{
name: 'newline',
@@ -355,7 +491,6 @@ export default class ExpensiMark {
/**
* The list of regex replacements to do on a HTML comment for converting it to markdown.
* Order of rules is important
- * @type {Object[]}
*/
this.htmlToMarkdownRules = [
// Used to Exclude tags
@@ -422,16 +557,32 @@ export default class ExpensiMark {
{
name: 'quote',
regex: /<(blockquote|q)(?:"[^"]*"|'[^']*'|[^'">])*>([\s\S]*?)<\/\1>(?![^<]*(<\/pre>|<\/code>))/gi,
- replacement: (match, g1, g2) => {
+ replacement: (_extras, _match, _g1, g2) => {
// We remove the line break before heading inside quote to avoid adding extra line
- let resultString = g2
+ let resultString: string[] | string = g2
.replace(/\n?(# )/g, '$1')
.replace(/(|<\/h1>)+/g, '\n')
.trim()
.split('\n');
- const prependGreaterSign = (m) => `> ${m}`;
- resultString = _.map(resultString, prependGreaterSign).join('\n');
+ // Wrap each string in the array with and
+ resultString = resultString.map((line) => {
+ return `${line}
`;
+ });
+
+ resultString = resultString
+ .map((text) => {
+ let modifiedText = text;
+ let depth;
+ do {
+ depth = (modifiedText.match(//gi) || []).length;
+ modifiedText = modifiedText.replace(//gi, '');
+ modifiedText = modifiedText.replace(/<\/blockquote>/gi, '');
+ } while (//i.test(modifiedText));
+ return `${'>'.repeat(depth)} ${modifiedText}`;
+ })
+ .join('\n');
+
// We want to keep tag here and let method replaceBlockElementWithNewLine to handle the line break later
return `${resultString}
`;
},
@@ -444,12 +595,12 @@ export default class ExpensiMark {
{
name: 'codeFence',
regex: /<(pre)(?:"[^"]*"|'[^']*'|[^'">])*>([\s\S]*?)(\n?)<\/\1>(?![^<]*(<\/pre>|<\/code>))/gi,
- replacement: (match, g1, g2) => `\`\`\`\n${g2}\n\`\`\``,
+ replacement: (_extras, _match, _g1, g2) => `\`\`\`\n${g2}\n\`\`\``,
},
{
name: 'anchor',
regex: /<(a)[^><]*href\s*=\s*(['"])(.*?)\2(?:".*?"|'.*?'|[^'"><])*>([\s\S]*?)<\/\1>(?![^<]*(<\/pre>|<\/code>))/gi,
- replacement: (match, g1, g2, g3, g4) => {
+ replacement: (_extras, _match, _g1, _g2, g3, g4) => {
const email = g3.startsWith('mailto:') ? g3.slice(7) : '';
if (email === g4) {
return email;
@@ -457,10 +608,11 @@ export default class ExpensiMark {
return `[${g4}](${email || g3})`;
},
},
+
{
name: 'image',
regex: /<]*src\s*=\s*(['"])(.*?)\1(?:[^><]*alt\s*=\s*(['"])(.*?)\3)?[^><]*>*(?![^<][\s\S]*?(<\/pre>|<\/code>))/gi,
- replacement: (match, g1, g2, g3, g4) => {
+ replacement: (_extras, _match, _g1, g2, _g3, g4) => {
if (g4) {
return `![${g4}](${g2})`;
}
@@ -468,13 +620,38 @@ export default class ExpensiMark {
return `!(${g2})`;
},
},
+
+ {
+ name: 'video',
+ regex: /