forked from tats/w3m
-
Notifications
You must be signed in to change notification settings - Fork 0
/
entity.c
82 lines (78 loc) · 2.27 KB
/
entity.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
/* $Id: entity.c,v 1.7 2003/09/24 18:48:59 ukai Exp $ */
#ifdef DUMMY
#include "Str.h"
#define NBSP " "
#define UseAltEntity 1
#undef USE_M17N
#else /* DUMMY */
#include "fm.h"
#ifdef USE_M17N
#ifdef USE_UNICODE
#include "ucs.h"
#include "utf8.h"
#endif
#endif
#endif /* DUMMY */
extern char * conv_entity(unsigned int c);
/* *INDENT-OFF* */
static char *alt_latin1[ 96 ] = {
NBSP, "!", "-c-", "-L-", "CUR", "=Y=", "|", "S:",
"\"", "(C)", "-a", "<<", "NOT", "-", "(R)", "-",
"DEG", "+-", "^2", "^3", "'", "u", "P:", ".",
",", "^1", "-o", ">>", "1/4", "1/2", "3/4", "?",
"A`", "A'", "A^", "A~", "A:", "AA", "AE", "C,",
"E`", "E'", "E^", "E:", "I`", "I'", "I^", "I:",
"D-", "N~", "O`", "O'", "O^", "O~", "O:", "x",
"O/", "U`", "U'", "U^", "U:", "Y'", "TH", "ss",
"a`", "a'", "a^", "a~", "a:", "aa", "ae", "c,",
"e`", "e'", "e^", "e:", "i`", "i'", "i^", "i:",
"d-", "n~", "o`", "o'", "o^", "o~", "o:", "-:",
"o/", "u`", "u'", "u^", "u:", "y'", "th", "y:"
};
/* *INDENT-ON* */
char *
conv_entity(unsigned int c)
{
char b = c & 0xff;
if (c < 0x20) /* C0 */
return " ";
if (c < 0x7f) /* ASCII */
return Strnew_charp_n(&b, 1)->ptr;
if (c < 0xa0) /* DEL, C1 */
return " ";
if (c == 0xa0)
return NBSP;
if (c == 0xad) /* SOFT HYPHEN */
return "";
if (c < 0x100) { /* Latin1 (ISO 8859-1) */
if (UseAltEntity)
return alt_latin1[c - 0xa0];
#ifdef USE_M17N
return wc_conv_n(&b, 1, WC_CES_ISO_8859_1, InnerCharset)->ptr;
#else
return Strnew_charp_n(&b, 1)->ptr;
#endif
}
#ifdef USE_M17N
#ifdef USE_UNICODE
if (c <= WC_C_UCS4_END) { /* Unicode */
char *chk;
wc_uchar utf8[7];
wc_ucs_to_utf8(c, utf8);
/* we eventually need to display it so check DisplayCharset */
chk = wc_conv((char *)utf8, WC_CES_UTF_8, DisplayCharset ? DisplayCharset : WC_CES_US_ASCII)->ptr;
if (strcmp(chk, "?") != 0)
return wc_conv((char *)utf8, WC_CES_UTF_8, InnerCharset)->ptr;
}
#endif
#endif
if (c == 0x201c || c == 0x201f || c == 0x201d || c == 0x2033)
return "\"";
if (c == 0x2018 || c == 0x201b || c == 0x2019 || c == 0x2032)
return "'";
if (c >= 0x2010 && c < 0x2014)
return "-";
if (c == 0x2014)
return "--";
return "?";
}