Skip to content
This repository has been archived by the owner on Jun 26, 2020. It is now read-only.

Commit

Permalink
Fix url parsing (#149)
Browse files Browse the repository at this point in the history
* Support to show ICS invite as attachments

ICS invites are currently not displayed correctly in Nylas.  This commit adds support to extract calendar files and store them as attachments.  The attachment is called `Event.ics`.

* FIX:  Only scroll vertically

Fixes #135

* Fix the URL parser to only parse URLs

Mike raised a valid issue where the parser was picking up the first word of each paragraph as a match.  This should resolve the issue.

1. Modified it to handle URLs without a scheme. (www.google.com)
2. Added ability to handle `mailto` and `tel`
3. Added support for punycode URLs.

Fixes #147

* Added the comments that were removed.
  • Loading branch information
nirmit authored and mikeseese committed Nov 1, 2017
1 parent ab07fcc commit d441e5d
Showing 1 changed file with 37 additions and 39 deletions.
76 changes: 37 additions & 39 deletions packages/client-app/src/regexp-utils.coffee
Original file line number Diff line number Diff line change
Expand Up @@ -39,78 +39,76 @@ RegExpUtils =

# Test cases: https://regex101.com/r/pD7iS5/3
urlRegex: ({matchEntireString} = {}) ->
commonTlds = ['com', 'org', 'edu', 'gov', 'uk', 'net', 'ca', 'de', 'jp', 'fr', 'au', 'us', 'ru', 'ch', 'it', 'nl', 'se', 'no', 'es', 'mil', 'ly', 'co', 'in', 'it', 'co\.uk', 'info', 'biz', 'ai']
commonTlds = ['com', 'org', 'edu', 'gov', 'uk', 'net', 'ca', 'de', 'jp', 'fr', 'au', 'us', 'ru', 'ch', 'it', 'nl', 'se', 'no', 'es', 'mil', 'ly', 'biz', 'ai', 'info', 'it', 'to', 'io', 'co', 'eu', 'aero', 'jobs', 'mobi', 'at', 'be', 'br', 'cn' ]

parts = [
'('
# one of:
# one of
'('
# This OR block matches any TLD if the URL includes a scheme, and only
# the top ten TLDs if the scheme is omitted.
# YES - https://nylas.ai
# YES - https://10.2.3.1
# YES - nylas.com
# NO - nylas.ai
# scheme, ala https://
'([A-Za-z]{3,9}:(?:\\/\\/))?'

# username:password (optional)
'(?:\\w+:\\w+@)?'

# one of:
'('
# scheme, ala https:// (mandatory)
'([A-Za-z]{3,9}:(?:\\/\\/))'

# username:password (optional)
'(?:[\\-;:&=\\+\\$,\\w]+@)?'
# domain with common tld
'(?:(?:[-\\w\\d{1-3}]+\\.)+(?:' + commonTlds.join('|') + '))'

# one of:
'('
# domain with any tld
'(?:(?:[-\\w\\d{1-3}]+\\.)+(?:' + commonTlds.join('|') + '|[a-z]{2,4}))'
# or
'|'

'|'
# ip address
'('
'(\\b25[0-5]\\b|\\b[2][0-4][0-9]\\b|\\b[0-1]?[0-9]?[0-9]\\b)(\\.(\\b25[0-5]\\b|\\b[2][0-4][0-9]\\b|\\b[0-1]?[0-9]?[0-9]\\b)){3}'

# ip address
'((\\b25[0-5]\\b|\\b[2][0-4][0-9]\\b|\\b[0-1]?[0-9]?[0-9]\\b)(\.(\\b25[0-5]\\b|\\b[2][0-4][0-9]\\b|\\b[0-1]?[0-9]?[0-9]\\b)){3})'
')'
')'

# port if specified
'(?::[\\d]{1,5})?'
# port if specified
'(?::[\\d]{1,5})?'

# URL Path
'(?:(?:(?:\\/(?:[-\\w~!$+|.,=]|%[a-f\\d]{2})+)+|\\/)+|\\?|#)?'
# URL Path
'(?:(?:(?:\\/(?:[-\\w~!$+|.,=:]|%[a-f\\d]{2})+)+|\\/)+|\\?|#)?'

# query strings
'(?:(?:\\?(?:[-\\w~!\\$\\+|\.,*:]|%[a-f\\d{2}])+=?(?:[-\\w~!$+|.,*:=]|%[a-f\\d]{2})*)(?:\\&(?:[-\\w~!\$\+|\.,*:]|%[a-f\\d{2}])+=?(?:[-\\w~!$+|.,*:=]|%[a-f\\d]{2})*)*)*'
'|'
# query strings
'(?:(?:\\?(?:[-\\w~!$+|.,*:]|%[a-f\\d{2}])+=?(?:[-\\w~!$+|.,*:=]|%[a-f\\d]{2})*)(?:&(?:[-\\w~!$+|.,*:]|%[a-f\\d{2}])+=?(?:[-\\w~!$+|.,*:=]|%[a-f\\d]{2})*)*)*'

# Anchor
'(?:#(?:[-\\w~!$ |\\/\.,*:;=]|%[a-f\\d]{2})*)?'
# Anchor links
'(?:#(?:[-\\w~!$ |\\/.,*:;=]|%[a-f\\d]{2})*)?'

'|'

# mailtos
'mailto:\\/*(?:\\w+\\.|[\\-;:&=\\+\\$.,\\w]+@)[A-Za-z0-9\\.\\-]+'
# or
'|'

')'
# mailto links
'mailto:\\/*(?:\\w+\\.|[\\-;:&=\\+\\$.,\\w]+@)[A-Za-z0-9\\.\\-]+'

# :port (optional)
'(?::\d*)?'
'|'

')'
# telephone links
'tel:'
')'

# optionally followed by:
'('
# URL components
# (last character must not be puncation, hence two groups)
'(?:[\\+~%\\/\\.\\w\\-_@]*[\\+~%\\/\\w\\-_]+)?'

# optionally followed by: a query string and/or a #location
# optionally followed by: a query string and/or a #location
# (last character must not be puncation, hence two groups)
'(?:(\\?[\\-\\+=&;%@\\.\\w_\\#]*[\\#\\-\\+=&;%@\\w_\\/]+)?#?(?:[\'\\$\\&\\(\\)\\*\\+,;=\\.\\!\\/\\\\\\w%-]*[\\/\\\\\\w]+)?)?'
')?'
')'
]

if matchEntireString
parts.unshift('^')

return new RegExp(parts.join(''), 'gi')

# Test cases: https://regex101.com/r/jD5zC7/2
# Returns the following capturing groups:
# 1. start of the opening a tag to href="
Expand Down

0 comments on commit d441e5d

Please sign in to comment.