From d441e5d2b0aa26173c7eb44b15e1148d672dd05a Mon Sep 17 00:00:00 2001 From: Nirmit Date: Wed, 1 Nov 2017 15:13:05 -0400 Subject: [PATCH] Fix url parsing (#149) * Support to show ICS invite as attachments ICS invites are currently not displayed correctly in Nylas. This commit adds support to extract calendar files and store them as attachments. The attachment is called `Event.ics`. * FIX: Only scroll vertically Fixes #135 * Fix the URL parser to only parse URLs Mike raised a valid issue where the parser was picking up the first word of each paragraph as a match. This should resolve the issue. 1. Modified it to handle URLs without a scheme. (www.google.com) 2. Added ability to handle `mailto` and `tel` 3. Added support for punycode URLs. Fixes #147 * Added the comments that were removed. --- packages/client-app/src/regexp-utils.coffee | 76 ++++++++++----------- 1 file changed, 37 insertions(+), 39 deletions(-) diff --git a/packages/client-app/src/regexp-utils.coffee b/packages/client-app/src/regexp-utils.coffee index 8eb62809f..48005c1f6 100644 --- a/packages/client-app/src/regexp-utils.coffee +++ b/packages/client-app/src/regexp-utils.coffee @@ -39,78 +39,76 @@ RegExpUtils = # Test cases: https://regex101.com/r/pD7iS5/3 urlRegex: ({matchEntireString} = {}) -> - commonTlds = ['com', 'org', 'edu', 'gov', 'uk', 'net', 'ca', 'de', 'jp', 'fr', 'au', 'us', 'ru', 'ch', 'it', 'nl', 'se', 'no', 'es', 'mil', 'ly', 'co', 'in', 'it', 'co\.uk', 'info', 'biz', 'ai'] + commonTlds = ['com', 'org', 'edu', 'gov', 'uk', 'net', 'ca', 'de', 'jp', 'fr', 'au', 'us', 'ru', 'ch', 'it', 'nl', 'se', 'no', 'es', 'mil', 'ly', 'biz', 'ai', 'info', 'it', 'to', 'io', 'co', 'eu', 'aero', 'jobs', 'mobi', 'at', 'be', 'br', 'cn' ] parts = [ '(' - # one of: + # one of '(' - # This OR block matches any TLD if the URL includes a scheme, and only - # the top ten TLDs if the scheme is omitted. - # YES - https://nylas.ai - # YES - https://10.2.3.1 - # YES - nylas.com - # NO - nylas.ai + # scheme, ala https:// + '([A-Za-z]{3,9}:(?:\\/\\/))?' + + # username:password (optional) + '(?:\\w+:\\w+@)?' + + # one of: '(' - # scheme, ala https:// (mandatory) - '([A-Za-z]{3,9}:(?:\\/\\/))' - # username:password (optional) - '(?:[\\-;:&=\\+\\$,\\w]+@)?' + # domain with common tld + '(?:(?:[-\\w\\d{1-3}]+\\.)+(?:' + commonTlds.join('|') + '))' - # one of: - '(' - # domain with any tld - '(?:(?:[-\\w\\d{1-3}]+\\.)+(?:' + commonTlds.join('|') + '|[a-z]{2,4}))' + # or + '|' - '|' + # ip address + '(' + '(\\b25[0-5]\\b|\\b[2][0-4][0-9]\\b|\\b[0-1]?[0-9]?[0-9]\\b)(\\.(\\b25[0-5]\\b|\\b[2][0-4][0-9]\\b|\\b[0-1]?[0-9]?[0-9]\\b)){3}' - # ip address - '((\\b25[0-5]\\b|\\b[2][0-4][0-9]\\b|\\b[0-1]?[0-9]?[0-9]\\b)(\.(\\b25[0-5]\\b|\\b[2][0-4][0-9]\\b|\\b[0-1]?[0-9]?[0-9]\\b)){3})' ')' + ')' - # port if specified - '(?::[\\d]{1,5})?' + # port if specified + '(?::[\\d]{1,5})?' - # URL Path - '(?:(?:(?:\\/(?:[-\\w~!$+|.,=]|%[a-f\\d]{2})+)+|\\/)+|\\?|#)?' + # URL Path + '(?:(?:(?:\\/(?:[-\\w~!$+|.,=:]|%[a-f\\d]{2})+)+|\\/)+|\\?|#)?' - # query strings - '(?:(?:\\?(?:[-\\w~!\\$\\+|\.,*:]|%[a-f\\d{2}])+=?(?:[-\\w~!$+|.,*:=]|%[a-f\\d]{2})*)(?:\\&(?:[-\\w~!\$\+|\.,*:]|%[a-f\\d{2}])+=?(?:[-\\w~!$+|.,*:=]|%[a-f\\d]{2})*)*)*' - '|' + # query strings + '(?:(?:\\?(?:[-\\w~!$+|.,*:]|%[a-f\\d{2}])+=?(?:[-\\w~!$+|.,*:=]|%[a-f\\d]{2})*)(?:&(?:[-\\w~!$+|.,*:]|%[a-f\\d{2}])+=?(?:[-\\w~!$+|.,*:=]|%[a-f\\d]{2})*)*)*' - # Anchor - '(?:#(?:[-\\w~!$ |\\/\.,*:;=]|%[a-f\\d]{2})*)?' + # Anchor links + '(?:#(?:[-\\w~!$ |\\/.,*:;=]|%[a-f\\d]{2})*)?' - '|' - - # mailtos - 'mailto:\\/*(?:\\w+\\.|[\\-;:&=\\+\\$.,\\w]+@)[A-Za-z0-9\\.\\-]+' + # or + '|' - ')' + # mailto links + 'mailto:\\/*(?:\\w+\\.|[\\-;:&=\\+\\$.,\\w]+@)[A-Za-z0-9\\.\\-]+' - # :port (optional) - '(?::\d*)?' + '|' - ')' + # telephone links + 'tel:' + ')' # optionally followed by: '(' # URL components # (last character must not be puncation, hence two groups) '(?:[\\+~%\\/\\.\\w\\-_@]*[\\+~%\\/\\w\\-_]+)?' - - # optionally followed by: a query string and/or a #location + + # optionally followed by: a query string and/or a #location # (last character must not be puncation, hence two groups) '(?:(\\?[\\-\\+=&;%@\\.\\w_\\#]*[\\#\\-\\+=&;%@\\w_\\/]+)?#?(?:[\'\\$\\&\\(\\)\\*\\+,;=\\.\\!\\/\\\\\\w%-]*[\\/\\\\\\w]+)?)?' ')?' ')' ] + if matchEntireString parts.unshift('^') return new RegExp(parts.join(''), 'gi') - + # Test cases: https://regex101.com/r/jD5zC7/2 # Returns the following capturing groups: # 1. start of the opening a tag to href="