Skip to content

Commit

Permalink
Add major improvements to the importer
Browse files Browse the repository at this point in the history
* Add items previouly elided due to name conflicts (e.g. UpButton2)
* Add group and subgroup properties to `SingleEmoji` (see #7)
* Automate code generation (supporting both web and headless node)
  • Loading branch information
mqudsi committed Mar 26, 2019
1 parent 5ed63b8 commit 046a8d9
Show file tree
Hide file tree
Showing 10 changed files with 27,974 additions and 16,877 deletions.
4 changes: 1 addition & 3 deletions importers/emoji-importer.html
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,7 @@
let code = generator.generate();

let results = document.getElementById("results");
for (const emoji of code.emoji) {
results.append(document.createTextNode(emoji));
}
results.append(document.createTextNode(code.emoji));
})();
</script>
</body>
Expand Down
100 changes: 78 additions & 22 deletions importers/importer.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,15 @@ const firstRegex = /\b1st/;
const secondRegex = /\b2nd/;
const thirdRegex = /\b3rd/;

const intro = `namespace NeoSmart.Unicode
{
// This file is machine-generated from the official Unicode Consortium UTR51 publication
// See the \`importers\` folder for the generators.
`;

const extro = `
}`;

// Implementation of Lazy derived from the code at
// https://dev.to/nestedsoftware/lazy-evaluation-in-javascript-with-generators-map-filter-and-reduce--36h5
class Lazy {
Expand Down Expand Up @@ -168,17 +177,30 @@ function makeStringArray(keywords) {
.join(", ");
}

function makeSortedSet(name, emoji) {
result = `public static readonly SortedSet<SingleEmoji> ${name} = new SortedSet<SingleEmoji>() {
function makeSortedSet(name, emoji, summary = "") {
result = `using System.Collections.Generic;
${intro}
public static partial class Emoji
{
/// <summary>
/// ${summary}
/// </summary>
#if NET20 || NET30 || NET35
public static readonly List<SingleEmoji> ${name} = new List<SingleEmoji>() {
#else
public static readonly SortedSet<SingleEmoji> ${name} = new SortedSet<SingleEmoji>() {
#endif
`;

for (const e of emoji) {
result += `\t/* ${e.symbol} */ ${CamelCase(e.name)},
result += ` /* ${e.symbol} */ ${CamelCase(e.name)},
`;
}
result += `};
result += ` };
}`;

`;
result += extro;

return result;
}
Expand All @@ -196,16 +218,16 @@ function isUngenderedEmoji(emoji) {
}

function emojiToCSharp(emoji) {
return `/* ${emoji.symbol} */
public static readonly SingleEmoji ${CamelCase(emoji.name)} = new SingleEmoji(
sequence: new UnicodeSequence("${emoji.sequence}"),
name: "${emoji.name}",
group: "${emoji.group}",
subgroup: "${emoji.subgroup}",
searchTerms: new [] { ${makeStringArray(emoji.name)} },
sortOrder: ${emoji.index},
);
return `
/* ${emoji.symbol} */
public static readonly SingleEmoji ${CamelCase(emoji.name)} = new SingleEmoji(
sequence: new UnicodeSequence("${emoji.sequence}"),
name: "${emoji.name}",
group: "${emoji.group}",
subgroup: "${emoji.subgroup}",
searchTerms: new [] { ${makeStringArray(emoji.name)} },
sortOrder: ${emoji.index}
);
`;
}

Expand Down Expand Up @@ -242,8 +264,10 @@ function *parse(data) {
const groupRegex = /\bgroup: \s*(\S.+?)\s*$/;
const subgroupRegex = /subgroup: \s*(\S.+?)\s*$/;

let deduplicator = new Set();
let group = "";
let subgroup = "";
let sortIndex = 0;
for (let i = 0; i < lines.length; ++i) {
const line = lines[i];
if (line.startsWith("#") || !line.includes("fully-qualified")) {
Expand All @@ -257,19 +281,33 @@ function *parse(data) {

let results = line.match(parser);

yield {
const emoji = {
"sequence": results[1],
"symbol": results[2],
"name": results[3],
"index": i++,
"index": sortIndex++,
"group": group,
"subgroup": subgroup,
};

if (deduplicator.has(emoji.name)) {
continue;
}

let oldName = emoji.name;
let version = 2;
while (deduplicator.has(CamelCase(emoji.name))) {
emoji.name = oldName + version++;
}
deduplicator.add(emoji.name);
deduplicator.add(CamelCase(emoji.name));

yield emoji;
}
}

function parseEmoji(data) {
return new Lazy(parse(data), () => true);
return new Lazy(parse(data));
}

const manWomanRegex = /^(man|woman)/i;
Expand All @@ -295,15 +333,22 @@ class CodeGenerator {
let emoji = Array.from(parseEmoji(this.data));

let csharp = {
emoji: [],
emoji: "",
lists: {},
};

// Dump actual emoji objects.
// All other operations print only references to these.
let code = [];
code.push(intro);
code.push(" public static partial class Emoji\n");
code.push(" {");
for (const e of emoji) {
csharp.emoji.push(emojiToCSharp(e));
code.push(emojiToCSharp(e));
}
code.push(" }");
code.push(extro);
csharp.emoji = code.join("");

// Dump all emoji list
csharp.lists.all = makeSortedSet("All", emoji);
Expand All @@ -315,11 +360,22 @@ class CodeGenerator {
// Narrow it down to emoji supported by Segoe UI Emoji
let supportedEmoji = basicUngenderedEmoji
.filter(isBasicEmoji)
.filter(e => fontSupportsEmoji(this.font, e));
.filter(e => fontSupportsEmoji(this.font, e))

// Dump list of ungendered emoji
csharp.lists.basic = makeSortedSet("Basic", supportedEmoji);
csharp.lists.basic = makeSortedSet("Basic", supportedEmoji,
"A (sorted) enumeration of all emoji without skin variations and no duplicate " +
"gendered vs gender-neutral emoji, ideal for displaying. " +
"Emoji without supported glyphs in Segoe UI Emoji are also omitted from this list.");

return csharp;
}
}

if (this.module == undefined) {
this.module = {};
}

module.exports = {
CodeGenerator: CodeGenerator,
};
18 changes: 18 additions & 0 deletions importers/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
const fs = require("fs").promises;
const path = require("path");
const fontkit = require("fontkit");
const importer = require("./importer.js");

const codeRoot = "../unicode";
(async function() {
let text = await fs.readFile("emoji-test.txt", { encoding: "utf-8" });
let font = fontkit.create(await fs.readFile("seguiemj.ttf"));

let generator = new importer.CodeGenerator(font, text);

let code = generator.generate();

await fs.writeFile(codeRoot + "/Emoji-Emojis.cs", code.emoji, { encoding: "utf-8" });
await fs.writeFile(codeRoot + "/Emoji-All.cs", code.lists.all, { encoding: "utf-8" });
await fs.writeFile(codeRoot + "/Emoji-Basic.cs", code.lists.basic, { encoding: "utf-8" });
})();
9 changes: 9 additions & 0 deletions importers/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"name": "importers",
"version": "1.0.0",
"main": "index.js",
"license": "MIT",
"dependencies": {
"fontkit": "^1.8.0"
}
}
Loading

0 comments on commit 046a8d9

Please sign in to comment.