diff options
-rw-r--r-- | !NetSurf/Resources/Aliases | 302 | ||||
-rw-r--r-- | !NetSurf/Resources/CSS,f79 | 2 | ||||
-rw-r--r-- | Makefile | 18 | ||||
-rw-r--r-- | Makefile.config | 8 | ||||
-rw-r--r-- | debug/fontd.c | 17 | ||||
-rw-r--r-- | gtk/gtk_gui.c | 14 | ||||
l--------- | gtk/res/Aliases | 1 | ||||
-rw-r--r-- | render/directory.c | 27 | ||||
-rw-r--r-- | render/html.c | 471 | ||||
-rw-r--r-- | render/html.h | 14 |
10 files changed, 869 insertions, 5 deletions
diff --git a/!NetSurf/Resources/Aliases b/!NetSurf/Resources/Aliases new file mode 100644 index 000000000..db61ff13e --- /dev/null +++ b/!NetSurf/Resources/Aliases @@ -0,0 +1,302 @@ +# > Unicode:Files.Aliases +# Mapping of character set encoding names to their canonical form +# +# Lines starting with a '#' are comments, blank lines are ignored. +# +# Based on http://www.iana.org/assignments/character-sets and +# http://www.iana.org/assignments/ianacharset-mib +# +# Canonical Form MIBenum Aliases... +# +US-ASCII 3 iso-ir-6 ANSI_X3.4-1986 ISO_646.irv:1991 ASCII ISO646-US ANSI_X3.4-1968 us IBM367 cp367 csASCII +ISO-10646-UTF-1 27 csISO10646UTF1 +ISO_646.basic:1983 28 ref csISO646basic1983 +INVARIANT 29 csINVARIANT +ISO_646.irv:1983 30 iso-ir-2 irv csISO2IntlRefVersion +BS_4730 20 iso-ir-4 ISO646-GB gb uk csISO4UnitedKingdom +NATS-SEFI 31 iso-ir-8-1 csNATSSEFI +NATS-SEFI-ADD 32 iso-ir-8-2 csNATSSEFIADD +NATS-DANO 33 iso-ir-9-1 csNATSDANO +NATS-DANO-ADD 34 iso-ir-9-2 csNATSDANOADD +SEN_850200_B 35 iso-ir-10 FI ISO646-FI ISO646-SE se csISO10Swedish +SEN_850200_C 21 iso-ir-11 ISO646-SE2 se2 csISO11SwedishForNames +KS_C_5601-1987 36 iso-ir-149 KS_C_5601-1989 KSC_5601 korean csKSC56011987 +ISO-2022-KR 37 csISO2022KR +EUC-KR 38 csEUCKR EUCKR +ISO-2022-JP 39 csISO2022JP +ISO-2022-JP-2 40 csISO2022JP2 +ISO-2022-CN 104 +ISO-2022-CN-EXT 105 +JIS_C6220-1969-jp 41 JIS_C6220-1969 iso-ir-13 katakana x0201-7 csISO13JISC6220jp +JIS_C6220-1969-ro 42 iso-ir-14 jp ISO646-JP csISO14JISC6220ro +IT 22 iso-ir-15 ISO646-IT csISO15Italian +PT 43 iso-ir-16 ISO646-PT csISO16Portuguese +ES 23 iso-ir-17 ISO646-ES csISO17Spanish +greek7-old 44 iso-ir-18 csISO18Greek7Old +latin-greek 45 iso-ir-19 csISO19LatinGreek +DIN_66003 24 iso-ir-21 de ISO646-DE csISO21German +NF_Z_62-010_(1973) 46 iso-ir-25 ISO646-FR1 csISO25French +Latin-greek-1 47 iso-ir-27 csISO27LatinGreek1 +ISO_5427 48 iso-ir-37 csISO5427Cyrillic +JIS_C6226-1978 49 iso-ir-42 csISO42JISC62261978 +BS_viewdata 50 iso-ir-47 csISO47BSViewdata +INIS 51 iso-ir-49 csISO49INIS +INIS-8 52 iso-ir-50 csISO50INIS8 +INIS-cyrillic 53 iso-ir-51 csISO51INISCyrillic +ISO_5427:1981 54 iso-ir-54 ISO5427Cyrillic1981 +ISO_5428:1980 55 iso-ir-55 csISO5428Greek +GB_1988-80 56 iso-ir-57 cn ISO646-CN csISO57GB1988 +GB_2312-80 57 iso-ir-58 chinese csISO58GB231280 +NS_4551-1 25 iso-ir-60 ISO646-NO no csISO60DanishNorwegian csISO60Norwegian1 +NS_4551-2 58 ISO646-NO2 iso-ir-61 no2 csISO61Norwegian2 +NF_Z_62-010 26 iso-ir-69 ISO646-FR fr csISO69French +videotex-suppl 59 iso-ir-70 csISO70VideotexSupp1 +PT2 60 iso-ir-84 ISO646-PT2 csISO84Portuguese2 +ES2 61 iso-ir-85 ISO646-ES2 csISO85Spanish2 +MSZ_7795.3 62 iso-ir-86 ISO646-HU hu csISO86Hungarian +JIS_C6226-1983 63 iso-ir-87 x0208 JIS_X0208-1983 csISO87JISX0208 +greek7 64 iso-ir-88 csISO88Greek7 +ASMO_449 65 ISO_9036 arabic7 iso-ir-89 csISO89ASMO449 +iso-ir-90 66 csISO90 +JIS_C6229-1984-a 67 iso-ir-91 jp-ocr-a csISO91JISC62291984a +JIS_C6229-1984-b 68 iso-ir-92 ISO646-JP-OCR-B jp-ocr-b csISO92JISC62991984b +JIS_C6229-1984-b-add 69 iso-ir-93 jp-ocr-b-add csISO93JIS62291984badd +JIS_C6229-1984-hand 70 iso-ir-94 jp-ocr-hand csISO94JIS62291984hand +JIS_C6229-1984-hand-add 71 iso-ir-95 jp-ocr-hand-add csISO95JIS62291984handadd +JIS_C6229-1984-kana 72 iso-ir-96 csISO96JISC62291984kana +ISO_2033-1983 73 iso-ir-98 e13b csISO2033 +ANSI_X3.110-1983 74 iso-ir-99 CSA_T500-1983 NAPLPS csISO99NAPLPS +ISO-8859-1 4 iso-ir-100 ISO_8859-1 ISO_8859-1:1987 latin1 l1 IBM819 CP819 csISOLatin1 8859_1 ISO8859-1 +ISO-8859-2 5 iso-ir-101 ISO_8859-2 ISO_8859-2:1987 latin2 l2 csISOLatin2 8859_2 ISO8859-2 +T.61-7bit 75 iso-ir-102 csISO102T617bit +T.61-8bit 76 T.61 iso-ir-103 csISO103T618bit +ISO-8859-3 6 iso-ir-109 ISO_8859-3 ISO_8859-3:1988 latin3 l3 csISOLatin3 8859_3 ISO8859-3 +ISO-8859-4 7 iso-ir-110 ISO_8859-4 ISO_8859-4:1988 latin4 l4 csISOLatin4 8859_4 ISO8859-4 +ECMA-cyrillic 77 iso-ir-111 KOI8-E csISO111ECMACyrillic +CSA_Z243.4-1985-1 78 iso-ir-121 ISO646-CA csa7-1 ca csISO121Canadian1 +CSA_Z243.4-1985-2 79 iso-ir-122 ISO646-CA2 csa7-2 csISO122Canadian2 +CSA_Z243.4-1985-gr 80 iso-ir-123 csISO123CSAZ24341985gr +ISO-8859-6 9 iso-ir-127 ISO_8859-6 ISO_8859-6:1987 ECMA-114 ASMO-708 arabic csISOLatinArabic +ISO-8859-6-E 81 csISO88596E ISO_8859-6-E +ISO-8859-6-I 82 csISO88596I ISO_8859-6-I +ISO-8859-7 10 iso-ir-126 ISO_8859-7 ISO_8859-7:1987 ELOT_928 ECMA-118 greek greek8 csISOLatinGreek 8859_7 ISO8859-7 +T.101-G2 83 iso-ir-128 csISO128T101G2 +ISO-8859-8 11 iso-ir-138 ISO_8859-8 ISO_8859-8:1988 hebrew csISOLatinHebrew 8859_8 ISO8859-8 +ISO-8859-8-E 84 csISO88598E ISO_8859-8-E +ISO-8859-8-I 85 csISO88598I ISO_8859-8-I +CSN_369103 86 iso-ir-139 csISO139CSN369103 +JUS_I.B1.002 87 iso-ir-141 ISO646-YU js yu csISO141JUSIB1002 +ISO_6937-2-add 14 iso-ir-142 csISOTextComm +IEC_P27-1 88 iso-ir-143 csISO143IECP271 +ISO-8859-5 8 iso-ir-144 ISO_8859-5 ISO_8859-5:1988 cyrillic csISOLatinCyrillic 8859_5 ISO8859-5 +JUS_I.B1.003-serb 89 iso-ir-146 serbian csISO146Serbian +JUS_I.B1.003-mac 90 macedonian iso-ir-147 csISO147Macedonian +ISO-8859-9 12 iso-ir-148 ISO_8859-9 ISO_8859-9:1989 latin5 l5 csISOLatin5 8859_9 ISO8859-9 +greek-ccitt 91 iso-ir-150 csISO150 csISO150GreekCCITT +NC_NC00-10:81 92 cuba iso-ir-151 ISO646-CU csISO151Cuba +ISO_6937-2-25 93 iso-ir-152 csISO6937Add +GOST_19768-74 94 ST_SEV_358-88 iso-ir-153 csISO153GOST1976874 +ISO_8859-supp 95 iso-ir-154 latin1-2-5 csISO8859Supp +ISO_10367-box 96 iso-ir-155 csISO10367Box +ISO-8859-10 13 iso-ir-157 l6 ISO_8859-10:1992 csISOLatin6 latin6 8859_10 ISO8859-10 +latin-lap 97 lap iso-ir-158 csISO158Lap +JIS_X0212-1990 98 x0212 iso-ir-159 csISO159JISX02121990 +DS_2089 99 DS2089 ISO646-DK dk csISO646Danish +us-dk 100 csUSDK +dk-us 101 csDKUS +JIS_X0201 15 X0201 csHalfWidthKatakana +KSC5636 102 ISO646-KR csKSC5636 +ISO-10646-UCS-2 1000 csUnicode UCS-2 UCS2 +ISO-10646-UCS-4 1001 csUCS4 UCS-4 UCS4 +DEC-MCS 2008 dec csDECMCS +hp-roman8 2004 roman8 r8 csHPRoman8 +macintosh 2027 mac csMacintosh MACROMAN MAC-ROMAN X-MAC-ROMAN +IBM037 2028 cp037 ebcdic-cp-us ebcdic-cp-ca ebcdic-cp-wt ebcdic-cp-nl csIBM037 +IBM038 2029 EBCDIC-INT cp038 csIBM038 +IBM273 2030 CP273 csIBM273 +IBM274 2031 EBCDIC-BE CP274 csIBM274 +IBM275 2032 EBCDIC-BR cp275 csIBM275 +IBM277 2033 EBCDIC-CP-DK EBCDIC-CP-NO csIBM277 +IBM278 2034 CP278 ebcdic-cp-fi ebcdic-cp-se csIBM278 +IBM280 2035 CP280 ebcdic-cp-it csIBM280 +IBM281 2036 EBCDIC-JP-E cp281 csIBM281 +IBM284 2037 CP284 ebcdic-cp-es csIBM284 +IBM285 2038 CP285 ebcdic-cp-gb csIBM285 +IBM290 2039 cp290 EBCDIC-JP-kana csIBM290 +IBM297 2040 cp297 ebcdic-cp-fr csIBM297 +IBM420 2041 cp420 ebcdic-cp-ar1 csIBM420 +IBM423 2042 cp423 ebcdic-cp-gr csIBM423 +IBM424 2043 cp424 ebcdic-cp-he csIBM424 +IBM437 2011 cp437 437 csPC8CodePage437 +IBM500 2044 CP500 ebcdic-cp-be ebcdic-cp-ch csIBM500 +IBM775 2087 cp775 csPC775Baltic +IBM850 2009 cp850 850 csPC850Multilingual +IBM851 2045 cp851 851 csIBM851 +IBM852 2010 cp852 852 csPCp852 +IBM855 2046 cp855 855 csIBM855 +IBM857 2047 cp857 857 csIBM857 +IBM860 2048 cp860 860 csIBM860 +IBM861 2049 cp861 861 cp-is csIBM861 +IBM862 2013 cp862 862 csPC862LatinHebrew +IBM863 2050 cp863 863 csIBM863 +IBM864 2051 cp864 csIBM864 +IBM865 2052 cp865 865 csIBM865 +IBM866 2086 cp866 866 csIBM866 +IBM868 2053 CP868 cp-ar csIBM868 +IBM869 2054 cp869 869 cp-gr csIBM869 +IBM870 2055 CP870 ebcdic-cp-roece ebcdic-cp-yu csIBM870 +IBM871 2056 CP871 ebcdic-cp-is csIBM871 +IBM880 2057 cp880 EBCDIC-Cyrillic csIBM880 +IBM891 2058 cp891 csIBM891 +IBM903 2059 cp903 csIBM903 +IBM904 2060 cp904 904 csIBBM904 +IBM905 2061 CP905 ebcdic-cp-tr csIBM905 +IBM918 2062 CP918 ebcdic-cp-ar2 csIBM918 +IBM1026 2063 CP1026 csIBM1026 +EBCDIC-AT-DE 2064 csIBMEBCDICATDE +EBCDIC-AT-DE-A 2065 csEBCDICATDEA +EBCDIC-CA-FR 2066 csEBCDICCAFR +EBCDIC-DK-NO 2067 csEBCDICDKNO +EBCDIC-DK-NO-A 2068 csEBCDICDKNOA +EBCDIC-FI-SE 2069 csEBCDICFISE +EBCDIC-FI-SE-A 2070 csEBCDICFISEA +EBCDIC-FR 2071 csEBCDICFR +EBCDIC-IT 2072 csEBCDICIT +EBCDIC-PT 2073 csEBCDICPT +EBCDIC-ES 2074 csEBCDICES +EBCDIC-ES-A 2075 csEBCDICESA +EBCDIC-ES-S 2076 csEBCDICESS +EBCDIC-UK 2077 csEBCDICUK +EBCDIC-US 2078 csEBCDICUS +UNKNOWN-8BIT 2079 csUnknown8BiT +MNEMONIC 2080 csMnemonic +MNEM 2081 csMnem +VISCII 2082 csVISCII +VIQR 2083 csVIQR +KOI8-R 2084 csKOI8R +KOI8-U 2088 +IBM00858 2089 CCSID00858 CP00858 PC-Multilingual-850+euro +IBM00924 2090 CCSID00924 CP00924 ebcdic-Latin9--euro +IBM01140 2091 CCSID01140 CP01140 ebcdic-us-37+euro +IBM01141 2092 CCSID01141 CP01141 ebcdic-de-273+euro +IBM01142 2093 CCSID01142 CP01142 ebcdic-dk-277+euro ebcdic-no-277+euro +IBM01143 2094 CCSID01143 CP01143 ebcdic-fi-278+euro ebcdic-se-278+euro +IBM01144 2095 CCSID01144 CP01144 ebcdic-it-280+euro +IBM01145 2096 CCSID01145 CP01145 ebcdic-es-284+euro +IBM01146 2097 CCSID01146 CP01146 ebcdic-gb-285+euro +IBM01147 2098 CCSID01147 CP01147 ebcdic-fr-297+euro +IBM01148 2099 CCSID01148 CP01148 ebcdic-international-500+euro +IBM01149 2100 CCSID01149 CP01149 ebcdic-is-871+euro +Big5-HKSCS 2101 +IBM1047 2102 IBM-1047 +PTCP154 2103 csPTCP154 PT154 CP154 Cyrillic-Asian +Amiga-1251 2104 Ami1251 Amiga1251 Ami-1251 +KOI7-switched 2105 +UNICODE-1-1 1010 csUnicode11 +SCSU 1011 +UTF-7 1012 +UTF-16BE 1013 +UTF-16LE 1014 +UTF-16 1015 +CESU-8 1016 csCESU-8 +UTF-32 1017 +UTF-32BE 1018 +UTF-32LE 1019 +BOCU-1 1020 csBOCU-1 +UNICODE-1-1-UTF-7 103 csUnicode11UTF7 +UTF-8 106 UNICODE-1-1-UTF-8 UNICODE-2-0-UTF-8 utf8 +ISO-8859-13 109 8859_13 ISO8859-13 +ISO-8859-14 110 iso-ir-199 ISO_8859-14:1998 ISO_8859-14 latin8 iso-celtic l8 8859_14 ISO8859-14 +ISO-8859-15 111 ISO_8859-15 Latin-9 8859_15 ISO8859-15 +ISO-8859-16 112 iso-ir-226 ISO_8859-16:2001 ISO_8859-16 latin10 l10 +GBK 113 CP936 MS936 windows-936 +GB18030 114 +OSD_EBCDIC_DF04_15 115 +OSD_EBCDIC_DF03_IRV 116 +OSD_EBCDIC_DF04_1 117 +JIS_Encoding 16 csJISEncoding +Shift_JIS 17 MS_Kanji csShiftJIS X-SJIS Shift-JIS +EUC-JP 18 csEUCPkdFmtJapanese Extended_UNIX_Code_Packed_Format_for_Japanese EUCJP +Extended_UNIX_Code_Fixed_Width_for_Japanese 19 csEUCFixWidJapanese +ISO-10646-UCS-Basic 1002 csUnicodeASCII +ISO-10646-Unicode-Latin1 1003 csUnicodeLatin1 ISO-10646 +ISO-Unicode-IBM-1261 1005 csUnicodeIBM1261 +ISO-Unicode-IBM-1268 1006 csUnicodeIBM1268 +ISO-Unicode-IBM-1276 1007 csUnicodeIBM1276 +ISO-Unicode-IBM-1264 1008 csUnicodeIBM1264 +ISO-Unicode-IBM-1265 1009 csUnicodeIBM1265 +ISO-8859-1-Windows-3.0-Latin-1 2000 csWindows30Latin1 +ISO-8859-1-Windows-3.1-Latin-1 2001 csWindows31Latin1 +ISO-8859-2-Windows-Latin-2 2002 csWindows31Latin2 +ISO-8859-9-Windows-Latin-5 2003 csWindows31Latin5 +Adobe-Standard-Encoding 2005 csAdobeStandardEncoding +Ventura-US 2006 csVenturaUS +Ventura-International 2007 csVenturaInternational +PC8-Danish-Norwegian 2012 csPC8DanishNorwegian +PC8-Turkish 2014 csPC8Turkish +IBM-Symbols 2015 csIBMSymbols +IBM-Thai 2016 csIBMThai +HP-Legal 2017 csHPLegal +HP-Pi-font 2018 csHPPiFont +HP-Math8 2019 csHPMath8 +Adobe-Symbol-Encoding 2020 csHPPSMath +HP-DeskTop 2021 csHPDesktop +Ventura-Math 2022 csVenturaMath +Microsoft-Publishing 2023 csMicrosoftPublishing +Windows-31J 2024 csWindows31J +GB2312 2025 csGB2312 EUC-CN EUCCN CN-GB +Big5 2026 csBig5 BIG-FIVE BIG-5 CN-BIG5 BIG_FIVE +windows-1250 2250 CP1250 MS-EE +windows-1251 2251 CP1251 MS-CYRL +windows-1252 2252 CP1252 MS-ANSI +windows-1253 2253 CP1253 MS-GREEK +windows-1254 2254 CP1254 MS-TURK +windows-1255 2255 +windows-1256 2256 CP1256 MS-ARAB +windows-1257 2257 CP1257 WINBALTRIM +windows-1258 2258 +TIS-620 2259 +HZ-GB-2312 2085 + +# Additional encodings not defined by IANA + +# Arbitrary allocations +#CP737 3001 +#CP853 3002 +#CP856 3003 +CP874 3004 WINDOWS-874 +#CP922 3005 +#CP1046 3006 +#CP1124 3007 +#CP1125 3008 WINDOWS-1125 +#CP1129 3009 +#CP1133 3010 IBM-CP1133 +#CP1161 3011 IBM-1161 IBM1161 CSIBM1161 +#CP1162 3012 IBM-1162 IBM1162 CSIBM1162 +#CP1163 3013 IBM-1163 IBM1163 CSIBM1163 +#GEORGIAN-ACADEMY 3014 +#GEORGIAN-PS 3015 +#KOI8-RU 3016 +#KOI8-T 3017 +#MACARABIC 3018 X-MAC-ARABIC MAC-ARABIC +#MACCROATIAN 3019 X-MAC-CROATIAN MAC-CROATIAN +#MACGREEK 3020 X-MAC-GREEK MAC-GREEK +#MACHEBREW 3021 X-MAC-HEBREW MAC-HEBREW +#MACICELAND 3022 X-MAC-ICELAND MAC-ICELAND +#MACROMANIA 3023 X-MAC-ROMANIA MAC-ROMANIA +#MACTHAI 3024 X-MAC-THAI MAC-THAI +#MACTURKISH 3025 X-MAC-TURKISH MAC-TURKISH +#MULELAO-1 3026 + +# From Unicode Lib +ISO-IR-182 4000 +ISO-IR-197 4002 +ISO-2022-JP-1 4008 +MACCYRILLIC 4009 X-MAC-CYRILLIC MAC-CYRILLIC +MACUKRAINE 4010 X-MAC-UKRAINIAN MAC-UKRAINIAN +MACCENTRALEUROPE 4011 X-MAC-CENTRALEURROMAN MAC-CENTRALEURROMAN +JOHAB 4012 +ISO-8859-11 4014 iso-ir-166 ISO_8859-11 ISO8859-11 8859_11 +X-CURRENT 4999 X-SYSTEM +X-ACORN-LATIN1 5001 +X-ACORN-FUZZY 5002 diff --git a/!NetSurf/Resources/CSS,f79 b/!NetSurf/Resources/CSS,f79 index 640c8450a..c4ba88846 100644 --- a/!NetSurf/Resources/CSS,f79 +++ b/!NetSurf/Resources/CSS,f79 @@ -174,3 +174,5 @@ fieldset { display: block; border: thin solid #888; margin: 1.12em 0; } [align=left] { text-align: left; } [align=center] { text-align: center; } [align=right] { text-align: right; } + +script, style { display: none; } @@ -245,6 +245,12 @@ ifeq ($(TARGET),riscos) $(eval $(call feature_enabled,SPRITE,-DWITH_SPRITE,,RISC OS sprite rendering)) $(eval $(call feature_enabled,ARTWORKS,-DWITH_ARTWORKS,,ArtWorks rendering)) $(eval $(call feature_enabled,PLUGINS,-DWITH_PLUGIN,,Plugin protocol support)) + ifeq ($(HOST),riscos) + $(eval $(call feature_enabled,HUBBUB,-DWITH_HUBBUB,-lhubbub -lparserutils,Hubbub HTML parser)) + else + NETSURF_FEATURE_HUBBUB_CFLAGS := -DWITH_HUBBUB + $(eval $(call pkg_config_find_and_add,HUBBUB,libhubbub,Hubbub HTML parser)) + endif endif # ---------------------------------------------------------------------------- @@ -267,10 +273,12 @@ ifeq ($(TARGET),gtk) # define additional CFLAGS and LDFLAGS requirements for pkg-configed libs here NETSURF_FEATURE_RSVG_CFLAGS := -DWITH_RSVG NETSURF_FEATURE_ROSPRITE_CFLAGS := -DWITH_NSSPRITE + NETSURF_FEATURE_HUBBUB_CFLAGS := -DWITH_HUBBUB # add a line similar to below for each optional pkg-configed lib here $(eval $(call pkg_config_find_and_add,RSVG,librsvg-2.0,SVG rendering)) $(eval $(call pkg_config_find_and_add,ROSPRITE,librosprite,RISC OS sprite rendering)) + $(eval $(call pkg_config_find_and_add,HUBBUB,libhubbub,Hubbub HTML parser)) GTKCFLAGS := -std=c99 -Dgtk -Dnsgtk \ -DGTK_DISABLE_DEPRECATED \ @@ -399,10 +407,14 @@ ifeq ($(TARGET),debug) -D_XOPEN_SOURCE=600 \ -D_POSIX_C_SOURCE=200112L \ -D_NETBSD_SOURCE \ - $(WARNFLAGS) -I. -I../../libsprite/trunk/ -g $(OPT0FLAGS) \ - $(shell $(PKG_CONFIG) --cflags librosprite) \ + $(WARNFLAGS) -I. -g $(OPT0FLAGS) \ $(shell xml2-config --cflags) - LDFLAGS += $(shell $(PKG_CONFIG) --libs librosprite) + LDFLAGS += $(shell $(PKG_CONFIG) --libs libxml-2.0 libcurl openssl) + + $(eval $(call pkg_config_find_and_add,RSVG,librsvg-2.0,SVG rendering)) + $(eval $(call pkg_config_find_and_add,ROSPRITE,librosprite,RISC OS sprite rendering)) + $(eval $(call pkg_config_find_and_add,HUBBUB,libhubbub,Hubbub HTML parser)) + $(eval $(call pkg_config_find_and_add,HUBBUB,libparserutils,Hubbub HTML parser)) endif # ---------------------------------------------------------------------------- diff --git a/Makefile.config b/Makefile.config index dad9c0273..362db38f9 100644 --- a/Makefile.config +++ b/Makefile.config @@ -53,6 +53,10 @@ NETSURF_USE_LIBICONV_PLUG := YES # ---------------------------------------------------------------------------- ifeq ($(TARGET),riscos) + # Enable using Hubbub to parse HTML rather than libxml2 + # Valid options: YES, NO + NETSURF_USE_HUBBUB := YES + # Use James Bursa's libsvgtiny for rendering SVG images # Valid options: YES, NO NETSURF_USE_NSSVG := YES @@ -87,6 +91,10 @@ ifeq ($(TARGET),gtk) # Where to install the netsurf binary NETSURF_GTK_BIN := /usr/local/bin/ + # Enable using Hubbub to parse HTML rather than libxml2 + # Valid options: YES, NO, AUTO + NETSURF_USE_HUBBUB := AUTO + # Use librsvg in conjunction with Cairo to render SVG images # Valid options: YES, NO, AUTO NETSURF_USE_RSVG := AUTO diff --git a/debug/fontd.c b/debug/fontd.c index 50fabc59f..fa64b67c4 100644 --- a/debug/fontd.c +++ b/debug/fontd.c @@ -21,6 +21,22 @@ #include "render/font.h" +static bool nsfont_width(const struct css_style *style, + const char *string, size_t length, int *width); +static bool nsfont_position_in_string(const struct css_style *style, + const char *string, size_t length, + int x, size_t *char_offset, int *actual_x); +static bool nsfont_split(const struct css_style *style, + const char *string, size_t length, + int x, size_t *char_offset, int *actual_x); + +const struct font_functions nsfont = { + nsfont_width, + nsfont_position_in_string, + nsfont_split +}; + + bool nsfont_width(const struct css_style *style, const char *string, size_t length, int *width) @@ -63,3 +79,4 @@ bool nsfont_split(const struct css_style *style, *actual_x = *char_offset * 10; return true; } + diff --git a/gtk/gtk_gui.c b/gtk/gtk_gui.c index 73e3068be..3967cd1cb 100644 --- a/gtk/gtk_gui.c +++ b/gtk/gtk_gui.c @@ -31,6 +31,9 @@ #include <gdk/gdkkeysyms.h> #include <gtk/gtk.h> #include <glade/glade.h> +#ifdef WITH_HUBBUB +#include <hubbub/hubbub.h> +#endif #include "content/content.h" #include "content/fetch.h" #include "content/fetchers/fetch_curl.h" @@ -165,6 +168,13 @@ static void check_homedir(void) } } + +static void *myrealloc(void *ptr, size_t len, void *pw) +{ + return realloc(ptr, len); +} + + void gui_init(int argc, char** argv) { char buf[PATH_MAX]; @@ -182,6 +192,10 @@ void gui_init(int argc, char** argv) LOG(("Using '%s' as Resources directory", buf)); res_dir_location = strdup(buf); + find_resource(buf, "Aliases", "./gtk/res/Aliases"); + LOG(("Using '%s' as Aliases file", buf)); + hubbub_initialise(buf, myrealloc, NULL); + glade_init(); gladeWindows = glade_xml_new(glade_file_location, NULL, NULL); if (gladeWindows == NULL) diff --git a/gtk/res/Aliases b/gtk/res/Aliases new file mode 120000 index 000000000..a95a734da --- /dev/null +++ b/gtk/res/Aliases @@ -0,0 +1 @@ +../../!NetSurf/Resources/Aliases
\ No newline at end of file diff --git a/render/directory.c b/render/directory.c index 0f3dda03b..754449df5 100644 --- a/render/directory.c +++ b/render/directory.c @@ -27,6 +27,9 @@ #include <stdlib.h> #include <sys/stat.h> #include <time.h> +#ifdef WITH_HUBBUB +#include <hubbub/parser.h> +#endif #include <libxml/HTMLparser.h> #include "content/content.h" #include "render/directory.h" @@ -45,7 +48,12 @@ bool directory_create(struct content *c, const char *params[]) { /* html_create() must have broadcast MSG_ERROR already, so we * don't need to. */ return false; +#ifndef WITH_HUBBUB htmlParseChunk(c->data.html.parser, header, sizeof(header) - 1, 0); +#else + hubbub_parser_parse_chunk(c->data.html.parser, + (uint8_t *) header, sizeof(header) - 1); +#endif return true; } @@ -92,7 +100,11 @@ bool directory_convert(struct content *c, int width, int height) { "<body>\n<h1>\nIndex of %s</h1>\n<hr><pre>", nice_path, nice_path); free(nice_path); +#ifndef WITH_HUBBUB htmlParseChunk(c->data.html.parser, buffer, strlen(buffer), 0); +#else + hubbub_parser_parse_chunk(c->data.html.parser, buffer, strlen(buffer)); +#endif res = url_parent(c->url, &up); if (res == URL_FUNC_OK) { @@ -100,8 +112,13 @@ bool directory_convert(struct content *c, int width, int height) { if ((res == URL_FUNC_OK) && !compare) { snprintf(buffer, sizeof(buffer), "<a href=\"..\">[..]</a>\n"); +#ifndef WITH_HUBBUB htmlParseChunk(c->data.html.parser, buffer, strlen(buffer), 0); +#else + hubbub_parser_parse_chunk(c->data.html.parser, + buffer, strlen(buffer)); +#endif } free(up); } @@ -118,11 +135,21 @@ bool directory_convert(struct content *c, int width, int height) { snprintf(buffer, sizeof(buffer), "<a href=\"%s/%s\">%s</a>\n", c->url, entry->d_name, entry->d_name); +#ifndef WITH_HUBBUB htmlParseChunk(c->data.html.parser, buffer, strlen(buffer), 0); +#else + hubbub_parser_parse_chunk(c->data.html.parser, + buffer, strlen(buffer)); +#endif } closedir(parent); +#ifndef WITH_HUBBUB htmlParseChunk(c->data.html.parser, footer, sizeof(footer) - 1, 0); +#else + hubbub_parser_parse_chunk(c->data.html.parser, + (uint8_t *) footer, sizeof(footer) - 1); +#endif c->type = CONTENT_HTML; return html_convert(c, width, height); } diff --git a/render/html.c b/render/html.c index cc581a771..7d4a55a5d 100644 --- a/render/html.c +++ b/render/html.c @@ -20,12 +20,21 @@ * Content for text/html (implementation). */ +#define _GNU_SOURCE /* for strndup() */ + #include <assert.h> #include <ctype.h> #include <stdint.h> #include <string.h> #include <strings.h> #include <stdlib.h> +#ifdef WITH_HUBBUB +#include <hubbub/hubbub.h> +#include <hubbub/parser.h> +#include <hubbub/tree.h> +#endif +#include <libxml/tree.h> +#include <libxml/parser.h> #include <libxml/parserInternals.h> #include "utils/config.h" #include "content/content.h" @@ -87,6 +96,380 @@ static const char empty_document[] = "</html>"; +#ifdef WITH_HUBBUB + + +#define NUM_NAMESPACES 7 +const char const *ns_prefixes[NUM_NAMESPACES] = + { NULL, NULL, "math", "svg", "xlink", "xml", "xmlns" }; + +const char const *ns_urls[NUM_NAMESPACES] = { + NULL, + "http://www.w3.org/1999/xhtml", + "http://www.w3.org/1998/Math/MathML", + "http://www.w3.org/2000/svg", + "http://www.w3.org/1999/xlink", + "http://www.w3.org/XML/1998/namespace", + "http://www.w3.org/2000/xmlns/" +}; + +xmlNs *ns_ns[NUM_NAMESPACES]; + +static int create_comment(void *ctx, const hubbub_string *data, void **result); +static int create_doctype(void *ctx, const hubbub_doctype *doctype, + void **result); +static int create_element(void *ctx, const hubbub_tag *tag, void **result); +static int create_text(void *ctx, const hubbub_string *data, void **result); +static int ref_node(void *ctx, void *node); +static int unref_node(void *ctx, void *node); +static int append_child(void *ctx, void *parent, void *child, void **result); +static int insert_before(void *ctx, void *parent, void *child, void *ref_child, + void **result); +static int remove_child(void *ctx, void *parent, void *child, void **result); +static int clone_node(void *ctx, void *node, bool deep, void **result); +static int reparent_children(void *ctx, void *node, void *new_parent); +static int get_parent(void *ctx, void *node, bool element_only, void **result); +static int has_children(void *ctx, void *node, bool *result); +static int form_associate(void *ctx, void *form, void *node); +static int add_attributes(void *ctx, void *node, + const hubbub_attribute *attributes, uint32_t n_attributes); +static int set_quirks_mode(void *ctx, hubbub_quirks_mode mode); +static int change_encoding(void *ctx, const char *mibenum); + +static hubbub_tree_handler tree_handler = { + create_comment, + create_doctype, + create_element, + create_text, + ref_node, + unref_node, + append_child, + insert_before, + remove_child, + clone_node, + reparent_children, + get_parent, + has_children, + form_associate, + add_attributes, + set_quirks_mode, + change_encoding, + NULL +}; + + + +/*** Tree construction functions ***/ + +int create_comment(void *ctx, const hubbub_string *data, void **result) +{ + xmlNode *node = xmlNewComment(NULL); + + node->content = xmlStrndup(data->ptr, data->len); + node->_private = (void *)1; + *result = node; + + return 0; +} + +int create_doctype(void *ctx, const hubbub_doctype *doctype, void **result) +{ + /* Make a node that doesn't really exist, then don't append it + * later. */ + xmlNode *node = xmlNewComment(NULL); + + node->_private = (void *)1; + *result = node; + + return 0; +} + +int create_element(void *ctx, const hubbub_tag *tag, void **result) +{ + struct content *c = ctx; + struct content_html_data *html = &c->data.html; + + char *name = strndup((const char *) tag->name.ptr, + tag->name.len); + + xmlNode *node = xmlNewNode(NULL, BAD_CAST name); + node->_private = (void *)1; + *result = node; + + if (html->firstelem == true) { + for (size_t i = 1; i < NUM_NAMESPACES; i++) { + ns_ns[i] = xmlNewNs(node, + BAD_CAST ns_urls[i], + BAD_CAST ns_prefixes[i]); + } + html->firstelem = false; + } + + xmlSetNs(node, ns_ns[tag->ns]); + + free(name); + + for (size_t i = 0; i < tag->n_attributes; i++) { + hubbub_attribute *attr = &tag->attributes[i]; + + char *name = strndup((const char *) attr->name.ptr, + attr->name.len); + char *value = strndup((const char *) attr->value.ptr, + attr->value.len); + + if (attr->ns == HUBBUB_NS_NULL) { + xmlNewProp(node, BAD_CAST name, BAD_CAST value); + } else { + xmlNewNsProp(node, ns_ns[attr->ns], BAD_CAST name, + BAD_CAST value); + } + + free(name); + free(value); + } + + return 0; +} + +int create_text(void *ctx, const hubbub_string *data, void **result) +{ + xmlNode *node = xmlNewTextLen(BAD_CAST data->ptr, data->len); + node->_private = (void *)1; + *result = node; + + return 0; +} + +int ref_node(void *ctx, void *node) +{ + xmlNode *n = node; + n->_private = (void *)((uintptr_t)n->_private + 1); + + return 0; +} + +int unref_node(void *ctx, void *node) +{ + xmlNode *n = node; + n->_private = (void *)((uintptr_t)n->_private - 1); + + if (n->_private == (void *)0 && n->parent == NULL) { + xmlFreeNode(n); + } + + return 0; +} + +int append_child(void *ctx, void *parent, void *child, void **result) +{ + xmlNode *nparent = parent; + xmlNode *nchild = child; + + if (nchild->type == XML_TEXT_NODE && + nparent->last != NULL && + nparent->last->type == XML_TEXT_NODE) { + xmlNode *clone; + clone_node(ctx, nchild, false, (void **) &clone); + *result = xmlAddChild(parent, clone); + /* node referenced by clone_node */ + } else { + *result = xmlAddChild(parent, child); + ref_node(ctx, *result); + } + + return 0; +} + +/* insert 'child' before 'ref_child', under 'parent' */ +int insert_before(void *ctx, void *parent, void *child, void *ref_child, + void **result) +{ + *result = xmlAddPrevSibling(ref_child, child); + ref_node(ctx, *result); + + return 0; +} + +int remove_child(void *ctx, void *parent, void *child, void **result) +{ + xmlUnlinkNode(child); + *result = child; + + ref_node(ctx, *result); + + return 0; +} + +int clone_node(void *ctx, void *node, bool deep, void **result) +{ + xmlNode *n = xmlCopyNode(node, deep ? 1 : 2); + n->_private = (void *)1; + *result = n; + + return 0; +} + +/* Take all of the child nodes of "node" and append them to "new_parent" */ +int reparent_children(void *ctx, void *node, void *new_parent) +{ + xmlNode *n = (xmlNode *) node; + xmlNode *p = (xmlNode *) new_parent; + + for (xmlNode *child = n->children; child != NULL; ) { + xmlNode *next = child->next; + + xmlUnlinkNode(child); + + if (xmlAddChild(p, child) == NULL) + return 1; + + child = next; + } + + return 0; +} + +int get_parent(void *ctx, void *node, bool element_only, void **result) +{ + *result = ((xmlNode *)node)->parent; + + if (*result != NULL && element_only && + ((xmlNode *) *result)->type != XML_ELEMENT_NODE) + *result = NULL; + + if (*result != NULL) + ref_node(ctx, *result); + + return 0; +} + +int has_children(void *ctx, void *node, bool *result) +{ + *result = ((xmlNode *)node)->children ? true : false; + + return 0; +} + +int form_associate(void *ctx, void *form, void *node) +{ + return 0; +} + +int add_attributes(void *ctx, void *node, + const hubbub_attribute *attributes, uint32_t n_attributes) +{ + for (size_t i = 0; i < n_attributes; i++) { + const hubbub_attribute *attr = &attributes[i]; + + char *name = strndup((const char *) attr->name.ptr, + attr->name.len); + char *value = strndup((const char *) attr->value.ptr, + attr->value.len); + + if (attr->ns == HUBBUB_NS_NULL) { + xmlNewProp(node, BAD_CAST name, BAD_CAST value); + } else { + xmlNewNsProp(node, ns_ns[attr->ns], BAD_CAST name, + BAD_CAST value); + } + + free(name); + free(value); + } + + return 0; +} + +int set_quirks_mode(void *ctx, hubbub_quirks_mode mode) +{ + return 0; +} + +int change_encoding(void *ctx, const char *name) +{ + struct content *c = ctx; + struct content_html_data *html = &c->data.html; + + /* If we have an encoding here, it means we are *certain* */ + if (html->encoding) { + return 0; + } + + /* Find the confidence otherwise (can only be from a BOM) */ + uint32_t source; + const char *charset = hubbub_parser_read_charset(html->parser, &source); + + if (source == HUBBUB_CHARSET_CONFIDENT) { + html->encoding_source = ENCODING_SOURCE_DETECTED; + html->encoding = (char *) charset; + return 0; + } + + /* So here we have something of confidence tentative... */ + /* http://www.whatwg.org/specs/web-apps/current-work/#change */ + + /* 2. "If the new encoding is identical or equivalent to the encoding + * that is already being used to interpret the input stream, then set + * the confidence to confident and abort these steps." */ + + /* Whatever happens, the encoding should be set here; either for + * reprocessing with a different charset, or for confirming that the + * charset is in fact correct */ + html->encoding = (char *) name; + html->encoding_source = ENCODING_SOURCE_META; + + /* Equal encodings will have the same string pointers */ + return (charset == name) ? 0 : 1; +} + + +/** + * Talloc'd-up allocation hook for Hubbub. + */ +static void *html_hubbub_realloc(void *ptr, size_t len, void *pw) +{ + return talloc_realloc_size(pw, ptr, len); +} + + + +/** + * Create, set up, and whatnot, a Hubbub parser instance, along with the + * relevant libxml2 bits. + */ +static int html_create_parser(struct content *c) +{ + struct content_html_data *html = &c->data.html; + hubbub_parser_optparams param; + + html->parser = hubbub_parser_create(html->encoding, + html_hubbub_realloc, + c); + if (!html->parser) + return 1; + + html->document = xmlNewDoc(BAD_CAST "1.0"); + if (!html->document) + return 1; + + html->tree_handler = tree_handler; + html->tree_handler.ctx = c; + param.tree_handler = &html->tree_handler; + hubbub_parser_setopt(html->parser, HUBBUB_PARSER_TREE_HANDLER, ¶m); + + param.document_node = html->document; + hubbub_parser_setopt(html->parser, HUBBUB_PARSER_DOCUMENT_NODE, ¶m); + + return 0; +} + + + +#endif + + + + /** * Create a CONTENT_HTML. * @@ -101,6 +484,10 @@ bool html_create(struct content *c, const char *params[]) union content_msg_data msg_data; html->parser = 0; +#ifdef WITH_HUBBUB + html->document = 0; + html->firstelem = true; +#endif html->encoding_handler = 0; html->encoding = 0; html->getenc = true; @@ -135,16 +522,26 @@ bool html_create(struct content *c, const char *params[]) } } +#ifndef WITH_HUBBUB html->parser = htmlCreatePushParserCtxt(0, 0, "", 0, 0, XML_CHAR_ENCODING_NONE); if (!html->parser) goto no_memory; +#else + + /* Set up the parser, libxml2 document, and that */ + if (html_create_parser(c) != 0) + goto no_memory; +#endif + +#ifndef WITH_HUBBUB if (html->encoding) { /* an encoding was specified in the Content-Type header */ if (!html_set_parser_encoding(c, html->encoding)) return false; } +#endif return true; @@ -165,6 +562,7 @@ bool html_process_data(struct content *c, char *data, unsigned int size) { unsigned long x; +#ifndef WITH_HUBBUB if (c->data.html.getenc) { /* No encoding was specified in the Content-Type header. * Attempt to detect if the encoding is not 8-bit. If the @@ -190,13 +588,36 @@ bool html_process_data(struct content *c, char *data, unsigned int size) if (size == 0) return true; } +#endif + +#ifdef WITH_HUBBUB + hubbub_error err; +#endif for (x = 0; x + CHUNK <= size; x += CHUNK) { +#ifdef WITH_HUBBUB + err = hubbub_parser_parse_chunk( + c->data.html.parser, data + x, CHUNK); + if (err == HUBBUB_ENCODINGCHANGE) { + goto encoding_change; + } +#else htmlParseChunk(c->data.html.parser, data + x, CHUNK, 0); +#endif gui_multitask(); } + +#ifdef WITH_HUBBUB + err = hubbub_parser_parse_chunk( + c->data.html.parser, data + x, (size - x)); + if (err == HUBBUB_ENCODINGCHANGE) { + goto encoding_change; + } +#else htmlParseChunk(c->data.html.parser, data + x, (int) (size - x), 0); +#endif +#ifndef WITH_HUBBUB if (!c->data.html.encoding && c->data.html.parser->input->encoding) { /* The encoding was not in headers or detected, * and the parser found a <meta http-equiv="content-type" @@ -259,8 +680,36 @@ bool html_process_data(struct content *c, char *data, unsigned int size) if (!html_process_data(c, c->source_data, c->source_size)) return false; } +#endif return true; + +#ifdef WITH_HUBBUB + +encoding_change: + + /* Free up hubbub, libxml2 etc */ + hubbub_parser_destroy(c->data.html.parser); + if (c->data.html.document) { + xmlFreeDoc(c->data.html.document); + } + + /* Set up the parser, libxml2 document, and that */ + if (html_create_parser(c) != 0) { + union content_msg_data msg_data; + + msg_data.error = messages_get("NoMemory"); + content_broadcast(c, CONTENT_MSG_ERROR, msg_data); + return false; + } + + /* Recurse to reprocess all that data. This is safe because + * the encoding is now specified at parser-start which means + * it cannot be changed again. */ + return html_process_data(c, c->source_data, c->source_size); + +#endif + } @@ -274,6 +723,7 @@ bool html_process_data(struct content *c, char *data, unsigned int size) bool html_set_parser_encoding(struct content *c, const char *encoding) { +#ifndef WITH_HUBBUB struct content_html_data *html = &c->data.html; xmlError *error; char error_message[500]; @@ -322,6 +772,7 @@ bool html_set_parser_encoding(struct content *c, const char *encoding) /* Ensure noone else attempts to reset the encoding */ html->getenc = false; +#endif return true; } @@ -412,14 +863,28 @@ bool html_convert(struct content *c, int width, int height) /* finish parsing */ if (c->source_size == 0) +#ifndef WITH_HUBBUB htmlParseChunk(c->data.html.parser, empty_document, sizeof empty_document, 0); +#else + hubbub_parser_parse_chunk(c->data.html.parser, + (uint8_t *) empty_document, + sizeof empty_document); +#endif + +#ifndef WITH_HUBBUB htmlParseChunk(c->data.html.parser, "", 0, 1); document = c->data.html.parser->myDoc; /*xmlDebugDumpDocument(stderr, c->data.html.parser->myDoc);*/ htmlFreeParserCtxt(c->data.html.parser); c->data.html.parser = 0; - +#else + hubbub_parser_completed(c->data.html.parser); + hubbub_parser_destroy(c->data.html.parser); + c->data.html.parser = 0; + document = c->data.html.document; + /*xmlDebugDumpDocument(stderr, document);*/ +#endif if (!document) { LOG(("Parsing failed")); msg_data.error = messages_get("ParsingFail"); @@ -1733,7 +2198,11 @@ void html_destroy(struct content *c) } if (c->data.html.parser) +#ifndef WITH_HUBBUB htmlFreeParserCtxt(c->data.html.parser); +#else + hubbub_parser_destroy(c->data.html.parser); +#endif /* Free base target */ if (c->data.html.base_target) { diff --git a/render/html.h b/render/html.h index 5851b83b8..29691ea26 100644 --- a/render/html.h +++ b/render/html.h @@ -26,6 +26,10 @@ #define _NETSURF_RENDER_HTML_H_ #include <stdbool.h> +#ifdef WITH_HUBBUB +#include <hubbub/parser.h> +#include <hubbub/tree.h> +#endif #include <libxml/HTMLparser.h> #include "content/content_type.h" #include "css/css.h" @@ -114,11 +118,19 @@ struct content_html_iframe { /** Data specific to CONTENT_HTML. */ struct content_html_data { +#ifndef WITH_HUBBUB htmlParserCtxt *parser; /**< HTML parser context. */ +#else + hubbub_parser *parser; /**< HTML parser context. */ + hubbub_tree_handler tree_handler; + xmlDoc *document; + bool firstelem; +#endif + /** HTML parser encoding handler. */ xmlCharEncodingHandler *encoding_handler; - char *encoding; /**< Encoding of source, 0 if unknown. */ + char *encoding; /**< Encoding of source, 0 if unknown. */ enum { ENCODING_SOURCE_HEADER, ENCODING_SOURCE_DETECTED, ENCODING_SOURCE_META } encoding_source; /**< Source of encoding information. */ |