Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

(Try to) fix Utf8 #54

Merged
merged 5 commits into from
Apr 2, 2021
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Add strict_utf8
  • Loading branch information
bynect committed Apr 1, 2021
commit 80be725904d3318e9674d0d8848fd06b9e73cae8
9 changes: 4 additions & 5 deletions src/open_and_save.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ void savefile(void) {
for (unsigned int i = 0; i < num_lines; i++) {
for (unsigned int j = 0; j < lines[i].length; j++) {
unsigned char b[4];
fwrite(b, sizeof(unsigned char), utf8ToMultibyte(lines[i].data[j], b, 0), fpw);
int len = utf8ToMultibyte(lines[i].data[j], b, 0);
fwrite(b, sizeof(unsigned char), len, fpw);
}
if (num_lines > 1) {
if (config.line_break_type == 0)
Expand Down Expand Up @@ -91,10 +92,8 @@ void read_lines(void) {
else if (passed_spaces == 0)
lines[i].ident++;

unsigned char uc = *(unsigned char *)&c;

utf8ReadFile(uc, j, i, fp);

unsigned char uc = c;
utf8ReadFile(uc, &lines[i].data[j], fp);
lines[i].length++;
}

Expand Down
2 changes: 1 addition & 1 deletion src/ted.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ void setcolor(int c) {
unsigned int last_cursor_x = 0;

struct CFG config = {
4, 0, 0, 1, 1, 1, 1,
1, 4, 0, 0, 1, 1, 1, 1,
&default_syntax, 0, NULL,
{0, 0, 1},
};
Expand Down
3 changes: 2 additions & 1 deletion src/ted.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ void change_position(unsigned int x, unsigned int y);
void processMouseEvent(MEVENT ev);

// utf8.c
void utf8ReadFile(unsigned char uc, unsigned int lc, unsigned int i, FILE *fp);
void utf8ReadFile(unsigned char uc, uchar32_t *out, FILE *fp_);
int utf8ToMultibyte(uchar32_t c, unsigned char *out, bool validate);
bool validate_utf8(unsigned char *ucs);

Expand Down Expand Up @@ -132,6 +132,7 @@ struct BUFFER {
};

struct CFG {
bool strict_utf8; // high/low surrogates will be replaced
unsigned int tablen;
int lines;
unsigned char line_break_type; // 0: LF 1: CRLF 2: CR
Expand Down
30 changes: 15 additions & 15 deletions src/utf8.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,27 +15,27 @@ Utf8 sequences range
#define IN_RANGE(x, min, max) ((x) >= (min)) && ((x) <= (max))
#define OUT_RANGE(x, min, max) ((x) < (min)) || ((x) > (max))

void utf8ReadFile(unsigned char uc, unsigned int lc, unsigned int i, FILE *fp_) {
void utf8ReadFile(unsigned char uc, uchar32_t *out, FILE *fp_) {
if (uc <= 0x7F) {
lines[i].data[lc] = uc;
*out = uc;

} else if (IN_RANGE(uc, 0xC2, 0xDF)) {
int uc2 = fgetc(fp_);
if (OUT_RANGE(uc2, 0x80, 0xBF)) { ungetc(uc2, fp_); goto invalid; }

lines[i].data[lc] = uc;
lines[i].data[lc] += (unsigned int)uc2 << 8;
*out = uc;
*out += (unsigned int)uc2 << 8;

} else if (IN_RANGE(uc, 0xE0, 0xEF)) {
} else if (IN_RANGE(uc, 0xE0, 0xEF) && (!config.strict_utf8 || uc != 0xED)) {
int uc2 = fgetc(fp_);
if (OUT_RANGE(uc2, 0xA0, 0xBF)) { ungetc(uc2, fp_); goto invalid; }

int uc3 = fgetc(fp_);
if (OUT_RANGE(uc3, 0x80, 0xBF)) { ungetc(uc3, fp_); ungetc(uc2, fp_); goto invalid; }

lines[i].data[lc] = uc;
lines[i].data[lc] += (unsigned int)uc2 << 8;
lines[i].data[lc] += (unsigned int)uc3 << 16;
*out = uc;
*out += (unsigned int)uc2 << 8;
*out += (unsigned int)uc3 << 16;

} else if (IN_RANGE(uc, 0xF0, 0xF4)) {
int uc2 = fgetc(fp_);
Expand All @@ -47,15 +47,15 @@ void utf8ReadFile(unsigned char uc, unsigned int lc, unsigned int i, FILE *fp_)
int uc4 = fgetc(fp_);
if (OUT_RANGE(uc4, 0x80, 0xBF)) { ungetc(uc4, fp_); ungetc(uc3, fp_); ungetc(uc2, fp_); goto invalid; }

lines[i].data[lc] = uc;
lines[i].data[lc] += (unsigned int)uc2 << 8;
lines[i].data[lc] += (unsigned int)uc3 << 16;
lines[i].data[lc] += (unsigned int)uc4 << 24;
*out = uc;
*out += (unsigned int)uc2 << 8;
*out += (unsigned int)uc3 << 16;
*out += (unsigned int)uc4 << 24;
} else goto invalid;
return;

invalid:
lines[i].data[lc] = uc; //threat as a single char
*out = uc; //threat as a single char
}

int utf8ToMultibyte(uchar32_t c, unsigned char *out, bool validate) {
Expand All @@ -73,7 +73,7 @@ int utf8ToMultibyte(uchar32_t c, unsigned char *out, bool validate) {
return 2;
}
if (IN_RANGE(out[0], 0xE0, 0xEF)) {
if (OUT_RANGE(out[1], 0xA0, 0xBF) || OUT_RANGE(out[2], 0x80, 0xBF))
if (OUT_RANGE(out[1], 0xA0, 0xBF) || OUT_RANGE(out[2], 0x80, 0xBF) || (config.strict_utf8 && out[0] == 0xED))
goto invalid;
return 3;
}
Expand All @@ -96,7 +96,7 @@ bool validate_utf8(unsigned char *ucs) {
return 1;
}
if (IN_RANGE(ucs[0], 0xE0, 0xEF)) {
if (OUT_RANGE(ucs[1], 0xA0, 0xBF) || OUT_RANGE(ucs[2], 0x80, 0xBF))
if (OUT_RANGE(ucs[1], 0xA0, 0xBF) || OUT_RANGE(ucs[2], 0x80, 0xBF) || (config.strict_utf8 && ucs[0] == 0xED))
return 0;
return 1;
}
Expand Down