Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Iro support #1

Open
wants to merge 8 commits into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,16 @@ This is a test file meant to cover all good and bad syntax forms of note. Depend

Any bad cases are marked with a comment `# bad`

## Iro ##
The `.iro` version is meant for use in: https://eeyo.io/iro/

This tool can generate definitions for other editors but is somewhat inefficient and has some limitations:

* Doesn't support pop-to-push backreferences (needed for arbitrarily long multistring quotes).
* Documentation made it sound like Unicode support is iffy but haven't had problems yet?
* Cannot generate things like `meta.*` bodies for segments in Textmate/Sublime.
* Forces you to highlight things that don't matter, like whitespace, for some reason.

## Planned definitions ##

* Textmate
Expand Down
149 changes: 149 additions & 0 deletions build.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import os
import re
import argparse

import jinja2

def main():
parser = argparse.ArgumentParser(
description="Build syntax definitions .j2 files.")
parser.add_argument(
"--ascii", "-a", action="store_true",
help="Build ASCII-only version.")
parser.add_argument(
"--unicode", "-u", action="store_true",
help="Build Unicode-compatible version.")
parser.add_argument(
"--multi", "-m", default=3,
help=(
"If a syntax doesn't support end->start backreferencing, it"
" should generate up to this number of quotes. (Default: 3)"))
parser.add_argument(
"--no-cleanup", "-C", action="store_true",
help="Don't clean up ~~DEL~~ strings.")
parser.add_argument(
"files", nargs="+",
help=".j2 templates to build.")
args = parser.parse_args()

modes = []
if args.unicode:
modes.append(0)
if args.ascii:
modes.append(1)

filename_templates = ["{}.{}", "{}_ascii.{}"]

for fn in args.files:
if not fn.endswith(".j2"):
print(f"Not a known template format: {fn}")
continue

path, base = os.path.split(fn)
out_file, *out_ext = base.split(".")
prefix = os.path.join(path, out_file)
suffix = ".".join(out_ext[:-1])

for mode in modes:
out_filename = filename_templates[mode].format(prefix, suffix)
result = build_j2(fn, mode == 1, args)

if not args.no_cleanup:
result = cleanup(result)

with open(out_filename, "w", encoding="utf8") as f:
f.write(result)
print(f"Wrote out {out_filename}")


DEL = re.compile(r'^.*~~DEL~~.*$(?:\s+?^|\Z)', re.M)
DEL_NEXT = re.compile(r'^.*~~DEL NEXT~~.*$(?:\s*?^.*$(?:\s+?^|\Z)|\Z)', re.M)
def cleanup(text):
return DEL.sub("", DEL_NEXT.sub("", text))


class Quotes:
match = {
# Double
## Half, full
'"': '"', '"': '"',
## Rounded, heavy, sans-serif heavy
'“': '“”', '❝': '❝❞', '🙶': '🙶🙷',
'”': '”„', '❞': '❞❠', '🙷': '🙷🙸',
'„': '“”', '❠': '❝❞', '🙸': '🙶🙷',
## Reversed
'‟': '‟”',
# Single
## Half, full
"'": "'", "'": "'",
## Rounded, heavy
"‘": "‘’", "❛": "❛❜",
"’": "’‘‚", "❜": "❜❛❟",
"‚": "’‘", "❟": "❜❛",
## Reversed
"‛": "‛’",
# CJK
## Full, half
"「": "」", "「": "」",
## Full
'『': '』',
"《": "》",
"〈": "〉",
# Chevrons
'«': '»',
'»': '«»',
## Regular, heavy
"‹": "›", "❮": "❯",
"›": "‹", "❯": "❮",
}

ascii = "\"'"
mirrored = ascii + ""'"

# Note that rarely single quotes are the primary quote.
# Thus, we can't really be more semantic about this.
# Which has annoying implications for 「 because it's always primary.
lsingles = "''‘’❛❜‛「〈‚❟‹›❮❯"
rsingles = "''‘’❛❜‛’」〉‚❟›‹❯❮"
singles = lsingles + rsingles

ldoubles = '""“”❝❞🙶🙷‟『《„❠🙸«»'
rdoubles = '""“”❝❞🙶🙷‟”』》„❠🙸»«'
doubles = ldoubles + rdoubles

lquotes = lsingles + ldoubles
rquotes = rsingles + rdoubles
quotes = singles + doubles


J2_ENV = jinja2.Environment(
loader=jinja2.FileSystemLoader("."))
def build_j2(fn, ascii, args):
template = J2_ENV.get_template(fn)
return template.render(
quotes=Quotes,
ascii=ascii,
multi=int(args.multi),
)


def escape_unicode_in_regex(value, surrogate=True):
def escape(c):
o = ord(c)
if surrogate and o > 0xffff:
u = c.encode("utf-16-le")
s1 = int.from_bytes(u[:2], "little")
s2 = int.from_bytes(u[2:], "little")
return "\\u{s1:4x}\\u{s2:4x}"
if o < 0x20 or o > 0x7e:
return f"\\u{o:4x}"
return c

return "".join(escape(c) for c in value)
J2_ENV.filters["safe"] = escape_unicode_in_regex


if __name__ == "__main__":
main()
Loading