-
Notifications
You must be signed in to change notification settings - Fork 72
/
Copy pathUnHTM.ahk
40 lines (38 loc) · 2.27 KB
/
UnHTM.ahk
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
; UnHTM by SKAN
; Please do not expect UnHTM() to unformat a whole HTML file. If you have already parsed out a string, and need to unformat it to plain text, then UnHTM() would be handy.
; Example:
; HTM = <a href="/intl/en/ads/">Advertising Programs</a>
; MsgBox, % UnHTM( HTM )
UnHTM( HTM ) { ; Remove HTML formatting / Convert to ordinary text by SKAN 19-Nov-2009
Static HT ; Forum Topic: www.autohotkey.com/forum/topic51342.html
IfEqual,HT,, SetEnv,HT, % "ááââ´´ææàà&ååãã&au"
. "mlä&bdquo„¦¦&bull•ç縸¢¢&circˆ©©¤¤&dagger†&dagger‡°"
. "°÷÷ééêêèèððëë&euro€&fnofƒ½½¼¼¾¾>>&h"
. "ellip…ííîî¡¡ìì¿¿ïï««&ldquo“&lsaquo‹&lsquo‘<<&m"
. "acr¯&mdash—µµ··  &ndash–¬¬ññóóôô&oeligœòò&or"
. "dfªººøøõõöö¶¶&permil‰±±££"""»»&rdquo”®"
. "®&rsaquo›&rsquo’&sbquo‚&scaronš§§­¹¹²²³³ßßþþ&tilde˜&tim"
. "es×&trade™úúûûùù¨¨üüýý¥¥ÿÿ"
TXT := RegExReplace( HTM,"<[^>]+>" ) ; Remove all tags between "<" and ">"
Loop, Parse, TXT, &`; ; Create a list of special characters
L := "&" A_LoopField ";", R .= (!(A_Index&1)) ? ( (!InStr(R,L,1)) ? L:"" ) : ""
StringTrimRight, R, R, 1
Loop, Parse, R , `; ; Parse Special Characters
If F := InStr( HT, A_LoopField ) ; Lookup HT Data
StringReplace, TXT,TXT, %A_LoopField%`;, % SubStr( HT,F+StrLen(A_LoopField), 1 ), All
Else If ( SubStr( A_LoopField,2,1)="#" )
StringReplace, TXT, TXT, %A_LoopField%`;, % Chr(SubStr(A_LoopField,3)), All
Return RegExReplace( TXT, "(^\s*|\s*$)") ; Remove leading/trailing white spaces
}
/*
; Array of Special Character Entities was created with following code
Loop % 256-33 {
Transform, F, HTML, % Chr( A := A_Index+33 )
If Strlen(F) > 1 && !Instr( F, "#" )
list .= "&" SubStr(F,2, StrLen(F)-2) Chr(A )
}
StringLower, List, List
Sort, List, D& U
Clipboard := List
MsgBox, 0, % StrLen( List ), % Clipboard
*/