summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--!NetSurf/Resources/Aliases302
-rw-r--r--!NetSurf/Resources/CSS,f792
-rw-r--r--Makefile18
-rw-r--r--Makefile.config8
-rw-r--r--debug/fontd.c17
-rw-r--r--gtk/gtk_gui.c14
l---------gtk/res/Aliases1
-rw-r--r--render/directory.c27
-rw-r--r--render/html.c471
-rw-r--r--render/html.h14
10 files changed, 869 insertions, 5 deletions
diff --git a/!NetSurf/Resources/Aliases b/!NetSurf/Resources/Aliases
new file mode 100644
index 000000000..db61ff13e
--- /dev/null
+++ b/!NetSurf/Resources/Aliases
@@ -0,0 +1,302 @@
+# > Unicode:Files.Aliases
+# Mapping of character set encoding names to their canonical form
+#
+# Lines starting with a '#' are comments, blank lines are ignored.
+#
+# Based on http://www.iana.org/assignments/character-sets and
+# http://www.iana.org/assignments/ianacharset-mib
+#
+# Canonical Form MIBenum Aliases...
+#
+US-ASCII 3 iso-ir-6 ANSI_X3.4-1986 ISO_646.irv:1991 ASCII ISO646-US ANSI_X3.4-1968 us IBM367 cp367 csASCII
+ISO-10646-UTF-1 27 csISO10646UTF1
+ISO_646.basic:1983 28 ref csISO646basic1983
+INVARIANT 29 csINVARIANT
+ISO_646.irv:1983 30 iso-ir-2 irv csISO2IntlRefVersion
+BS_4730 20 iso-ir-4 ISO646-GB gb uk csISO4UnitedKingdom
+NATS-SEFI 31 iso-ir-8-1 csNATSSEFI
+NATS-SEFI-ADD 32 iso-ir-8-2 csNATSSEFIADD
+NATS-DANO 33 iso-ir-9-1 csNATSDANO
+NATS-DANO-ADD 34 iso-ir-9-2 csNATSDANOADD
+SEN_850200_B 35 iso-ir-10 FI ISO646-FI ISO646-SE se csISO10Swedish
+SEN_850200_C 21 iso-ir-11 ISO646-SE2 se2 csISO11SwedishForNames
+KS_C_5601-1987 36 iso-ir-149 KS_C_5601-1989 KSC_5601 korean csKSC56011987
+ISO-2022-KR 37 csISO2022KR
+EUC-KR 38 csEUCKR EUCKR
+ISO-2022-JP 39 csISO2022JP
+ISO-2022-JP-2 40 csISO2022JP2
+ISO-2022-CN 104
+ISO-2022-CN-EXT 105
+JIS_C6220-1969-jp 41 JIS_C6220-1969 iso-ir-13 katakana x0201-7 csISO13JISC6220jp
+JIS_C6220-1969-ro 42 iso-ir-14 jp ISO646-JP csISO14JISC6220ro
+IT 22 iso-ir-15 ISO646-IT csISO15Italian
+PT 43 iso-ir-16 ISO646-PT csISO16Portuguese
+ES 23 iso-ir-17 ISO646-ES csISO17Spanish
+greek7-old 44 iso-ir-18 csISO18Greek7Old
+latin-greek 45 iso-ir-19 csISO19LatinGreek
+DIN_66003 24 iso-ir-21 de ISO646-DE csISO21German
+NF_Z_62-010_(1973) 46 iso-ir-25 ISO646-FR1 csISO25French
+Latin-greek-1 47 iso-ir-27 csISO27LatinGreek1
+ISO_5427 48 iso-ir-37 csISO5427Cyrillic
+JIS_C6226-1978 49 iso-ir-42 csISO42JISC62261978
+BS_viewdata 50 iso-ir-47 csISO47BSViewdata
+INIS 51 iso-ir-49 csISO49INIS
+INIS-8 52 iso-ir-50 csISO50INIS8
+INIS-cyrillic 53 iso-ir-51 csISO51INISCyrillic
+ISO_5427:1981 54 iso-ir-54 ISO5427Cyrillic1981
+ISO_5428:1980 55 iso-ir-55 csISO5428Greek
+GB_1988-80 56 iso-ir-57 cn ISO646-CN csISO57GB1988
+GB_2312-80 57 iso-ir-58 chinese csISO58GB231280
+NS_4551-1 25 iso-ir-60 ISO646-NO no csISO60DanishNorwegian csISO60Norwegian1
+NS_4551-2 58 ISO646-NO2 iso-ir-61 no2 csISO61Norwegian2
+NF_Z_62-010 26 iso-ir-69 ISO646-FR fr csISO69French
+videotex-suppl 59 iso-ir-70 csISO70VideotexSupp1
+PT2 60 iso-ir-84 ISO646-PT2 csISO84Portuguese2
+ES2 61 iso-ir-85 ISO646-ES2 csISO85Spanish2
+MSZ_7795.3 62 iso-ir-86 ISO646-HU hu csISO86Hungarian
+JIS_C6226-1983 63 iso-ir-87 x0208 JIS_X0208-1983 csISO87JISX0208
+greek7 64 iso-ir-88 csISO88Greek7
+ASMO_449 65 ISO_9036 arabic7 iso-ir-89 csISO89ASMO449
+iso-ir-90 66 csISO90
+JIS_C6229-1984-a 67 iso-ir-91 jp-ocr-a csISO91JISC62291984a
+JIS_C6229-1984-b 68 iso-ir-92 ISO646-JP-OCR-B jp-ocr-b csISO92JISC62991984b
+JIS_C6229-1984-b-add 69 iso-ir-93 jp-ocr-b-add csISO93JIS62291984badd
+JIS_C6229-1984-hand 70 iso-ir-94 jp-ocr-hand csISO94JIS62291984hand
+JIS_C6229-1984-hand-add 71 iso-ir-95 jp-ocr-hand-add csISO95JIS62291984handadd
+JIS_C6229-1984-kana 72 iso-ir-96 csISO96JISC62291984kana
+ISO_2033-1983 73 iso-ir-98 e13b csISO2033
+ANSI_X3.110-1983 74 iso-ir-99 CSA_T500-1983 NAPLPS csISO99NAPLPS
+ISO-8859-1 4 iso-ir-100 ISO_8859-1 ISO_8859-1:1987 latin1 l1 IBM819 CP819 csISOLatin1 8859_1 ISO8859-1
+ISO-8859-2 5 iso-ir-101 ISO_8859-2 ISO_8859-2:1987 latin2 l2 csISOLatin2 8859_2 ISO8859-2
+T.61-7bit 75 iso-ir-102 csISO102T617bit
+T.61-8bit 76 T.61 iso-ir-103 csISO103T618bit
+ISO-8859-3 6 iso-ir-109 ISO_8859-3 ISO_8859-3:1988 latin3 l3 csISOLatin3 8859_3 ISO8859-3
+ISO-8859-4 7 iso-ir-110 ISO_8859-4 ISO_8859-4:1988 latin4 l4 csISOLatin4 8859_4 ISO8859-4
+ECMA-cyrillic 77 iso-ir-111 KOI8-E csISO111ECMACyrillic
+CSA_Z243.4-1985-1 78 iso-ir-121 ISO646-CA csa7-1 ca csISO121Canadian1
+CSA_Z243.4-1985-2 79 iso-ir-122 ISO646-CA2 csa7-2 csISO122Canadian2
+CSA_Z243.4-1985-gr 80 iso-ir-123 csISO123CSAZ24341985gr
+ISO-8859-6 9 iso-ir-127 ISO_8859-6 ISO_8859-6:1987 ECMA-114 ASMO-708 arabic csISOLatinArabic
+ISO-8859-6-E 81 csISO88596E ISO_8859-6-E
+ISO-8859-6-I 82 csISO88596I ISO_8859-6-I
+ISO-8859-7 10 iso-ir-126 ISO_8859-7 ISO_8859-7:1987 ELOT_928 ECMA-118 greek greek8 csISOLatinGreek 8859_7 ISO8859-7
+T.101-G2 83 iso-ir-128 csISO128T101G2
+ISO-8859-8 11 iso-ir-138 ISO_8859-8 ISO_8859-8:1988 hebrew csISOLatinHebrew 8859_8 ISO8859-8
+ISO-8859-8-E 84 csISO88598E ISO_8859-8-E
+ISO-8859-8-I 85 csISO88598I ISO_8859-8-I
+CSN_369103 86 iso-ir-139 csISO139CSN369103
+JUS_I.B1.002 87 iso-ir-141 ISO646-YU js yu csISO141JUSIB1002
+ISO_6937-2-add 14 iso-ir-142 csISOTextComm
+IEC_P27-1 88 iso-ir-143 csISO143IECP271
+ISO-8859-5 8 iso-ir-144 ISO_8859-5 ISO_8859-5:1988 cyrillic csISOLatinCyrillic 8859_5 ISO8859-5
+JUS_I.B1.003-serb 89 iso-ir-146 serbian csISO146Serbian
+JUS_I.B1.003-mac 90 macedonian iso-ir-147 csISO147Macedonian
+ISO-8859-9 12 iso-ir-148 ISO_8859-9 ISO_8859-9:1989 latin5 l5 csISOLatin5 8859_9 ISO8859-9
+greek-ccitt 91 iso-ir-150 csISO150 csISO150GreekCCITT
+NC_NC00-10:81 92 cuba iso-ir-151 ISO646-CU csISO151Cuba
+ISO_6937-2-25 93 iso-ir-152 csISO6937Add
+GOST_19768-74 94 ST_SEV_358-88 iso-ir-153 csISO153GOST1976874
+ISO_8859-supp 95 iso-ir-154 latin1-2-5 csISO8859Supp
+ISO_10367-box 96 iso-ir-155 csISO10367Box
+ISO-8859-10 13 iso-ir-157 l6 ISO_8859-10:1992 csISOLatin6 latin6 8859_10 ISO8859-10
+latin-lap 97 lap iso-ir-158 csISO158Lap
+JIS_X0212-1990 98 x0212 iso-ir-159 csISO159JISX02121990
+DS_2089 99 DS2089 ISO646-DK dk csISO646Danish
+us-dk 100 csUSDK
+dk-us 101 csDKUS
+JIS_X0201 15 X0201 csHalfWidthKatakana
+KSC5636 102 ISO646-KR csKSC5636
+ISO-10646-UCS-2 1000 csUnicode UCS-2 UCS2
+ISO-10646-UCS-4 1001 csUCS4 UCS-4 UCS4
+DEC-MCS 2008 dec csDECMCS
+hp-roman8 2004 roman8 r8 csHPRoman8
+macintosh 2027 mac csMacintosh MACROMAN MAC-ROMAN X-MAC-ROMAN
+IBM037 2028 cp037 ebcdic-cp-us ebcdic-cp-ca ebcdic-cp-wt ebcdic-cp-nl csIBM037
+IBM038 2029 EBCDIC-INT cp038 csIBM038
+IBM273 2030 CP273 csIBM273
+IBM274 2031 EBCDIC-BE CP274 csIBM274
+IBM275 2032 EBCDIC-BR cp275 csIBM275
+IBM277 2033 EBCDIC-CP-DK EBCDIC-CP-NO csIBM277
+IBM278 2034 CP278 ebcdic-cp-fi ebcdic-cp-se csIBM278
+IBM280 2035 CP280 ebcdic-cp-it csIBM280
+IBM281 2036 EBCDIC-JP-E cp281 csIBM281
+IBM284 2037 CP284 ebcdic-cp-es csIBM284
+IBM285 2038 CP285 ebcdic-cp-gb csIBM285
+IBM290 2039 cp290 EBCDIC-JP-kana csIBM290
+IBM297 2040 cp297 ebcdic-cp-fr csIBM297
+IBM420 2041 cp420 ebcdic-cp-ar1 csIBM420
+IBM423 2042 cp423 ebcdic-cp-gr csIBM423
+IBM424 2043 cp424 ebcdic-cp-he csIBM424
+IBM437 2011 cp437 437 csPC8CodePage437
+IBM500 2044 CP500 ebcdic-cp-be ebcdic-cp-ch csIBM500
+IBM775 2087 cp775 csPC775Baltic
+IBM850 2009 cp850 850 csPC850Multilingual
+IBM851 2045 cp851 851 csIBM851
+IBM852 2010 cp852 852 csPCp852
+IBM855 2046 cp855 855 csIBM855
+IBM857 2047 cp857 857 csIBM857
+IBM860 2048 cp860 860 csIBM860
+IBM861 2049 cp861 861 cp-is csIBM861
+IBM862 2013 cp862 862 csPC862LatinHebrew
+IBM863 2050 cp863 863 csIBM863
+IBM864 2051 cp864 csIBM864
+IBM865 2052 cp865 865 csIBM865
+IBM866 2086 cp866 866 csIBM866
+IBM868 2053 CP868 cp-ar csIBM868
+IBM869 2054 cp869 869 cp-gr csIBM869
+IBM870 2055 CP870 ebcdic-cp-roece ebcdic-cp-yu csIBM870
+IBM871 2056 CP871 ebcdic-cp-is csIBM871
+IBM880 2057 cp880 EBCDIC-Cyrillic csIBM880
+IBM891 2058 cp891 csIBM891
+IBM903 2059 cp903 csIBM903
+IBM904 2060 cp904 904 csIBBM904
+IBM905 2061 CP905 ebcdic-cp-tr csIBM905
+IBM918 2062 CP918 ebcdic-cp-ar2 csIBM918
+IBM1026 2063 CP1026 csIBM1026
+EBCDIC-AT-DE 2064 csIBMEBCDICATDE
+EBCDIC-AT-DE-A 2065 csEBCDICATDEA
+EBCDIC-CA-FR 2066 csEBCDICCAFR
+EBCDIC-DK-NO 2067 csEBCDICDKNO
+EBCDIC-DK-NO-A 2068 csEBCDICDKNOA
+EBCDIC-FI-SE 2069 csEBCDICFISE
+EBCDIC-FI-SE-A 2070 csEBCDICFISEA
+EBCDIC-FR 2071 csEBCDICFR
+EBCDIC-IT 2072 csEBCDICIT
+EBCDIC-PT 2073 csEBCDICPT
+EBCDIC-ES 2074 csEBCDICES
+EBCDIC-ES-A 2075 csEBCDICESA
+EBCDIC-ES-S 2076 csEBCDICESS
+EBCDIC-UK 2077 csEBCDICUK
+EBCDIC-US 2078 csEBCDICUS
+UNKNOWN-8BIT 2079 csUnknown8BiT
+MNEMONIC 2080 csMnemonic
+MNEM 2081 csMnem
+VISCII 2082 csVISCII
+VIQR 2083 csVIQR
+KOI8-R 2084 csKOI8R
+KOI8-U 2088
+IBM00858 2089 CCSID00858 CP00858 PC-Multilingual-850+euro
+IBM00924 2090 CCSID00924 CP00924 ebcdic-Latin9--euro
+IBM01140 2091 CCSID01140 CP01140 ebcdic-us-37+euro
+IBM01141 2092 CCSID01141 CP01141 ebcdic-de-273+euro
+IBM01142 2093 CCSID01142 CP01142 ebcdic-dk-277+euro ebcdic-no-277+euro
+IBM01143 2094 CCSID01143 CP01143 ebcdic-fi-278+euro ebcdic-se-278+euro
+IBM01144 2095 CCSID01144 CP01144 ebcdic-it-280+euro
+IBM01145 2096 CCSID01145 CP01145 ebcdic-es-284+euro
+IBM01146 2097 CCSID01146 CP01146 ebcdic-gb-285+euro
+IBM01147 2098 CCSID01147 CP01147 ebcdic-fr-297+euro
+IBM01148 2099 CCSID01148 CP01148 ebcdic-international-500+euro
+IBM01149 2100 CCSID01149 CP01149 ebcdic-is-871+euro
+Big5-HKSCS 2101
+IBM1047 2102 IBM-1047
+PTCP154 2103 csPTCP154 PT154 CP154 Cyrillic-Asian
+Amiga-1251 2104 Ami1251 Amiga1251 Ami-1251
+KOI7-switched 2105
+UNICODE-1-1 1010 csUnicode11
+SCSU 1011
+UTF-7 1012
+UTF-16BE 1013
+UTF-16LE 1014
+UTF-16 1015
+CESU-8 1016 csCESU-8
+UTF-32 1017
+UTF-32BE 1018
+UTF-32LE 1019
+BOCU-1 1020 csBOCU-1
+UNICODE-1-1-UTF-7 103 csUnicode11UTF7
+UTF-8 106 UNICODE-1-1-UTF-8 UNICODE-2-0-UTF-8 utf8
+ISO-8859-13 109 8859_13 ISO8859-13
+ISO-8859-14 110 iso-ir-199 ISO_8859-14:1998 ISO_8859-14 latin8 iso-celtic l8 8859_14 ISO8859-14
+ISO-8859-15 111 ISO_8859-15 Latin-9 8859_15 ISO8859-15
+ISO-8859-16 112 iso-ir-226 ISO_8859-16:2001 ISO_8859-16 latin10 l10
+GBK 113 CP936 MS936 windows-936
+GB18030 114
+OSD_EBCDIC_DF04_15 115
+OSD_EBCDIC_DF03_IRV 116
+OSD_EBCDIC_DF04_1 117
+JIS_Encoding 16 csJISEncoding
+Shift_JIS 17 MS_Kanji csShiftJIS X-SJIS Shift-JIS
+EUC-JP 18 csEUCPkdFmtJapanese Extended_UNIX_Code_Packed_Format_for_Japanese EUCJP
+Extended_UNIX_Code_Fixed_Width_for_Japanese 19 csEUCFixWidJapanese
+ISO-10646-UCS-Basic 1002 csUnicodeASCII
+ISO-10646-Unicode-Latin1 1003 csUnicodeLatin1 ISO-10646
+ISO-Unicode-IBM-1261 1005 csUnicodeIBM1261
+ISO-Unicode-IBM-1268 1006 csUnicodeIBM1268
+ISO-Unicode-IBM-1276 1007 csUnicodeIBM1276
+ISO-Unicode-IBM-1264 1008 csUnicodeIBM1264
+ISO-Unicode-IBM-1265 1009 csUnicodeIBM1265
+ISO-8859-1-Windows-3.0-Latin-1 2000 csWindows30Latin1
+ISO-8859-1-Windows-3.1-Latin-1 2001 csWindows31Latin1
+ISO-8859-2-Windows-Latin-2 2002 csWindows31Latin2
+ISO-8859-9-Windows-Latin-5 2003 csWindows31Latin5
+Adobe-Standard-Encoding 2005 csAdobeStandardEncoding
+Ventura-US 2006 csVenturaUS
+Ventura-International 2007 csVenturaInternational
+PC8-Danish-Norwegian 2012 csPC8DanishNorwegian
+PC8-Turkish 2014 csPC8Turkish
+IBM-Symbols 2015 csIBMSymbols
+IBM-Thai 2016 csIBMThai
+HP-Legal 2017 csHPLegal
+HP-Pi-font 2018 csHPPiFont
+HP-Math8 2019 csHPMath8
+Adobe-Symbol-Encoding 2020 csHPPSMath
+HP-DeskTop 2021 csHPDesktop
+Ventura-Math 2022 csVenturaMath
+Microsoft-Publishing 2023 csMicrosoftPublishing
+Windows-31J 2024 csWindows31J
+GB2312 2025 csGB2312 EUC-CN EUCCN CN-GB
+Big5 2026 csBig5 BIG-FIVE BIG-5 CN-BIG5 BIG_FIVE
+windows-1250 2250 CP1250 MS-EE
+windows-1251 2251 CP1251 MS-CYRL
+windows-1252 2252 CP1252 MS-ANSI
+windows-1253 2253 CP1253 MS-GREEK
+windows-1254 2254 CP1254 MS-TURK
+windows-1255 2255
+windows-1256 2256 CP1256 MS-ARAB
+windows-1257 2257 CP1257 WINBALTRIM
+windows-1258 2258
+TIS-620 2259
+HZ-GB-2312 2085
+
+# Additional encodings not defined by IANA
+
+# Arbitrary allocations
+#CP737 3001
+#CP853 3002
+#CP856 3003
+CP874 3004 WINDOWS-874
+#CP922 3005
+#CP1046 3006
+#CP1124 3007
+#CP1125 3008 WINDOWS-1125
+#CP1129 3009
+#CP1133 3010 IBM-CP1133
+#CP1161 3011 IBM-1161 IBM1161 CSIBM1161
+#CP1162 3012 IBM-1162 IBM1162 CSIBM1162
+#CP1163 3013 IBM-1163 IBM1163 CSIBM1163
+#GEORGIAN-ACADEMY 3014
+#GEORGIAN-PS 3015
+#KOI8-RU 3016
+#KOI8-T 3017
+#MACARABIC 3018 X-MAC-ARABIC MAC-ARABIC
+#MACCROATIAN 3019 X-MAC-CROATIAN MAC-CROATIAN
+#MACGREEK 3020 X-MAC-GREEK MAC-GREEK
+#MACHEBREW 3021 X-MAC-HEBREW MAC-HEBREW
+#MACICELAND 3022 X-MAC-ICELAND MAC-ICELAND
+#MACROMANIA 3023 X-MAC-ROMANIA MAC-ROMANIA
+#MACTHAI 3024 X-MAC-THAI MAC-THAI
+#MACTURKISH 3025 X-MAC-TURKISH MAC-TURKISH
+#MULELAO-1 3026
+
+# From Unicode Lib
+ISO-IR-182 4000
+ISO-IR-197 4002
+ISO-2022-JP-1 4008
+MACCYRILLIC 4009 X-MAC-CYRILLIC MAC-CYRILLIC
+MACUKRAINE 4010 X-MAC-UKRAINIAN MAC-UKRAINIAN
+MACCENTRALEUROPE 4011 X-MAC-CENTRALEURROMAN MAC-CENTRALEURROMAN
+JOHAB 4012
+ISO-8859-11 4014 iso-ir-166 ISO_8859-11 ISO8859-11 8859_11
+X-CURRENT 4999 X-SYSTEM
+X-ACORN-LATIN1 5001
+X-ACORN-FUZZY 5002
diff --git a/!NetSurf/Resources/CSS,f79 b/!NetSurf/Resources/CSS,f79
index 640c8450a..c4ba88846 100644
--- a/!NetSurf/Resources/CSS,f79
+++ b/!NetSurf/Resources/CSS,f79
@@ -174,3 +174,5 @@ fieldset { display: block; border: thin solid #888; margin: 1.12em 0; }
[align=left] { text-align: left; }
[align=center] { text-align: center; }
[align=right] { text-align: right; }
+
+script, style { display: none; }
diff --git a/Makefile b/Makefile
index 439f49287..a207877bf 100644
--- a/Makefile
+++ b/Makefile
@@ -245,6 +245,12 @@ ifeq ($(TARGET),riscos)
$(eval $(call feature_enabled,SPRITE,-DWITH_SPRITE,,RISC OS sprite rendering))
$(eval $(call feature_enabled,ARTWORKS,-DWITH_ARTWORKS,,ArtWorks rendering))
$(eval $(call feature_enabled,PLUGINS,-DWITH_PLUGIN,,Plugin protocol support))
+ ifeq ($(HOST),riscos)
+ $(eval $(call feature_enabled,HUBBUB,-DWITH_HUBBUB,-lhubbub -lparserutils,Hubbub HTML parser))
+ else
+ NETSURF_FEATURE_HUBBUB_CFLAGS := -DWITH_HUBBUB
+ $(eval $(call pkg_config_find_and_add,HUBBUB,libhubbub,Hubbub HTML parser))
+ endif
endif
# ----------------------------------------------------------------------------
@@ -267,10 +273,12 @@ ifeq ($(TARGET),gtk)
# define additional CFLAGS and LDFLAGS requirements for pkg-configed libs here
NETSURF_FEATURE_RSVG_CFLAGS := -DWITH_RSVG
NETSURF_FEATURE_ROSPRITE_CFLAGS := -DWITH_NSSPRITE
+ NETSURF_FEATURE_HUBBUB_CFLAGS := -DWITH_HUBBUB
# add a line similar to below for each optional pkg-configed lib here
$(eval $(call pkg_config_find_and_add,RSVG,librsvg-2.0,SVG rendering))
$(eval $(call pkg_config_find_and_add,ROSPRITE,librosprite,RISC OS sprite rendering))
+ $(eval $(call pkg_config_find_and_add,HUBBUB,libhubbub,Hubbub HTML parser))
GTKCFLAGS := -std=c99 -Dgtk -Dnsgtk \
-DGTK_DISABLE_DEPRECATED \
@@ -399,10 +407,14 @@ ifeq ($(TARGET),debug)
-D_XOPEN_SOURCE=600 \
-D_POSIX_C_SOURCE=200112L \
-D_NETBSD_SOURCE \
- $(WARNFLAGS) -I. -I../../libsprite/trunk/ -g $(OPT0FLAGS) \
- $(shell $(PKG_CONFIG) --cflags librosprite) \
+ $(WARNFLAGS) -I. -g $(OPT0FLAGS) \
$(shell xml2-config --cflags)
- LDFLAGS += $(shell $(PKG_CONFIG) --libs librosprite)
+ LDFLAGS += $(shell $(PKG_CONFIG) --libs libxml-2.0 libcurl openssl)
+
+ $(eval $(call pkg_config_find_and_add,RSVG,librsvg-2.0,SVG rendering))
+ $(eval $(call pkg_config_find_and_add,ROSPRITE,librosprite,RISC OS sprite rendering))
+ $(eval $(call pkg_config_find_and_add,HUBBUB,libhubbub,Hubbub HTML parser))
+ $(eval $(call pkg_config_find_and_add,HUBBUB,libparserutils,Hubbub HTML parser))
endif
# ----------------------------------------------------------------------------
diff --git a/Makefile.config b/Makefile.config
index dad9c0273..362db38f9 100644
--- a/Makefile.config
+++ b/Makefile.config
@@ -53,6 +53,10 @@ NETSURF_USE_LIBICONV_PLUG := YES
# ----------------------------------------------------------------------------
ifeq ($(TARGET),riscos)
+ # Enable using Hubbub to parse HTML rather than libxml2
+ # Valid options: YES, NO
+ NETSURF_USE_HUBBUB := YES
+
# Use James Bursa's libsvgtiny for rendering SVG images
# Valid options: YES, NO
NETSURF_USE_NSSVG := YES
@@ -87,6 +91,10 @@ ifeq ($(TARGET),gtk)
# Where to install the netsurf binary
NETSURF_GTK_BIN := /usr/local/bin/
+ # Enable using Hubbub to parse HTML rather than libxml2
+ # Valid options: YES, NO, AUTO
+ NETSURF_USE_HUBBUB := AUTO
+
# Use librsvg in conjunction with Cairo to render SVG images
# Valid options: YES, NO, AUTO
NETSURF_USE_RSVG := AUTO
diff --git a/debug/fontd.c b/debug/fontd.c
index 50fabc59f..fa64b67c4 100644
--- a/debug/fontd.c
+++ b/debug/fontd.c
@@ -21,6 +21,22 @@
#include "render/font.h"
+static bool nsfont_width(const struct css_style *style,
+ const char *string, size_t length, int *width);
+static bool nsfont_position_in_string(const struct css_style *style,
+ const char *string, size_t length,
+ int x, size_t *char_offset, int *actual_x);
+static bool nsfont_split(const struct css_style *style,
+ const char *string, size_t length,
+ int x, size_t *char_offset, int *actual_x);
+
+const struct font_functions nsfont = {
+ nsfont_width,
+ nsfont_position_in_string,
+ nsfont_split
+};
+
+
bool nsfont_width(const struct css_style *style,
const char *string, size_t length,
int *width)
@@ -63,3 +79,4 @@ bool nsfont_split(const struct css_style *style,
*actual_x = *char_offset * 10;
return true;
}
+
diff --git a/gtk/gtk_gui.c b/gtk/gtk_gui.c
index 73e3068be..3967cd1cb 100644
--- a/gtk/gtk_gui.c
+++ b/gtk/gtk_gui.c
@@ -31,6 +31,9 @@
#include <gdk/gdkkeysyms.h>
#include <gtk/gtk.h>
#include <glade/glade.h>
+#ifdef WITH_HUBBUB
+#include <hubbub/hubbub.h>
+#endif
#include "content/content.h"
#include "content/fetch.h"
#include "content/fetchers/fetch_curl.h"
@@ -165,6 +168,13 @@ static void check_homedir(void)
}
}
+
+static void *myrealloc(void *ptr, size_t len, void *pw)
+{
+ return realloc(ptr, len);
+}
+
+
void gui_init(int argc, char** argv)
{
char buf[PATH_MAX];
@@ -182,6 +192,10 @@ void gui_init(int argc, char** argv)
LOG(("Using '%s' as Resources directory", buf));
res_dir_location = strdup(buf);
+ find_resource(buf, "Aliases", "./gtk/res/Aliases");
+ LOG(("Using '%s' as Aliases file", buf));
+ hubbub_initialise(buf, myrealloc, NULL);
+
glade_init();
gladeWindows = glade_xml_new(glade_file_location, NULL, NULL);
if (gladeWindows == NULL)
diff --git a/gtk/res/Aliases b/gtk/res/Aliases
new file mode 120000
index 000000000..a95a734da
--- /dev/null
+++ b/gtk/res/Aliases
@@ -0,0 +1 @@
+../../!NetSurf/Resources/Aliases \ No newline at end of file
diff --git a/render/directory.c b/render/directory.c
index 0f3dda03b..754449df5 100644
--- a/render/directory.c
+++ b/render/directory.c
@@ -27,6 +27,9 @@
#include <stdlib.h>
#include <sys/stat.h>
#include <time.h>
+#ifdef WITH_HUBBUB
+#include <hubbub/parser.h>
+#endif
#include <libxml/HTMLparser.h>
#include "content/content.h"
#include "render/directory.h"
@@ -45,7 +48,12 @@ bool directory_create(struct content *c, const char *params[]) {
/* html_create() must have broadcast MSG_ERROR already, so we
* don't need to. */
return false;
+#ifndef WITH_HUBBUB
htmlParseChunk(c->data.html.parser, header, sizeof(header) - 1, 0);
+#else
+ hubbub_parser_parse_chunk(c->data.html.parser,
+ (uint8_t *) header, sizeof(header) - 1);
+#endif
return true;
}
@@ -92,7 +100,11 @@ bool directory_convert(struct content *c, int width, int height) {
"<body>\n<h1>\nIndex of %s</h1>\n<hr><pre>",
nice_path, nice_path);
free(nice_path);
+#ifndef WITH_HUBBUB
htmlParseChunk(c->data.html.parser, buffer, strlen(buffer), 0);
+#else
+ hubbub_parser_parse_chunk(c->data.html.parser, buffer, strlen(buffer));
+#endif
res = url_parent(c->url, &up);
if (res == URL_FUNC_OK) {
@@ -100,8 +112,13 @@ bool directory_convert(struct content *c, int width, int height) {
if ((res == URL_FUNC_OK) && !compare) {
snprintf(buffer, sizeof(buffer),
"<a href=\"..\">[..]</a>\n");
+#ifndef WITH_HUBBUB
htmlParseChunk(c->data.html.parser, buffer,
strlen(buffer), 0);
+#else
+ hubbub_parser_parse_chunk(c->data.html.parser,
+ buffer, strlen(buffer));
+#endif
}
free(up);
}
@@ -118,11 +135,21 @@ bool directory_convert(struct content *c, int width, int height) {
snprintf(buffer, sizeof(buffer), "<a href=\"%s/%s\">%s</a>\n",
c->url, entry->d_name, entry->d_name);
+#ifndef WITH_HUBBUB
htmlParseChunk(c->data.html.parser, buffer, strlen(buffer), 0);
+#else
+ hubbub_parser_parse_chunk(c->data.html.parser,
+ buffer, strlen(buffer));
+#endif
}
closedir(parent);
+#ifndef WITH_HUBBUB
htmlParseChunk(c->data.html.parser, footer, sizeof(footer) - 1, 0);
+#else
+ hubbub_parser_parse_chunk(c->data.html.parser,
+ (uint8_t *) footer, sizeof(footer) - 1);
+#endif
c->type = CONTENT_HTML;
return html_convert(c, width, height);
}
diff --git a/render/html.c b/render/html.c
index cc581a771..7d4a55a5d 100644
--- a/render/html.c
+++ b/render/html.c
@@ -20,12 +20,21 @@
* Content for text/html (implementation).
*/
+#define _GNU_SOURCE /* for strndup() */
+
#include <assert.h>
#include <ctype.h>
#include <stdint.h>
#include <string.h>
#include <strings.h>
#include <stdlib.h>
+#ifdef WITH_HUBBUB
+#include <hubbub/hubbub.h>
+#include <hubbub/parser.h>
+#include <hubbub/tree.h>
+#endif
+#include <libxml/tree.h>
+#include <libxml/parser.h>
#include <libxml/parserInternals.h>
#include "utils/config.h"
#include "content/content.h"
@@ -87,6 +96,380 @@ static const char empty_document[] =
"</html>";
+#ifdef WITH_HUBBUB
+
+
+#define NUM_NAMESPACES 7
+const char const *ns_prefixes[NUM_NAMESPACES] =
+ { NULL, NULL, "math", "svg", "xlink", "xml", "xmlns" };
+
+const char const *ns_urls[NUM_NAMESPACES] = {
+ NULL,
+ "http://www.w3.org/1999/xhtml",
+ "http://www.w3.org/1998/Math/MathML",
+ "http://www.w3.org/2000/svg",
+ "http://www.w3.org/1999/xlink",
+ "http://www.w3.org/XML/1998/namespace",
+ "http://www.w3.org/2000/xmlns/"
+};
+
+xmlNs *ns_ns[NUM_NAMESPACES];
+
+static int create_comment(void *ctx, const hubbub_string *data, void **result);
+static int create_doctype(void *ctx, const hubbub_doctype *doctype,
+ void **result);
+static int create_element(void *ctx, const hubbub_tag *tag, void **result);
+static int create_text(void *ctx, const hubbub_string *data, void **result);
+static int ref_node(void *ctx, void *node);
+static int unref_node(void *ctx, void *node);
+static int append_child(void *ctx, void *parent, void *child, void **result);
+static int insert_before(void *ctx, void *parent, void *child, void *ref_child,
+ void **result);
+static int remove_child(void *ctx, void *parent, void *child, void **result);
+static int clone_node(void *ctx, void *node, bool deep, void **result);
+static int reparent_children(void *ctx, void *node, void *new_parent);
+static int get_parent(void *ctx, void *node, bool element_only, void **result);
+static int has_children(void *ctx, void *node, bool *result);
+static int form_associate(void *ctx, void *form, void *node);
+static int add_attributes(void *ctx, void *node,
+ const hubbub_attribute *attributes, uint32_t n_attributes);
+static int set_quirks_mode(void *ctx, hubbub_quirks_mode mode);
+static int change_encoding(void *ctx, const char *mibenum);
+
+static hubbub_tree_handler tree_handler = {
+ create_comment,
+ create_doctype,
+ create_element,
+ create_text,
+ ref_node,
+ unref_node,
+ append_child,
+ insert_before,
+ remove_child,
+ clone_node,
+ reparent_children,
+ get_parent,
+ has_children,
+ form_associate,
+ add_attributes,
+ set_quirks_mode,
+ change_encoding,
+ NULL
+};
+
+
+
+/*** Tree construction functions ***/
+
+int create_comment(void *ctx, const hubbub_string *data, void **result)
+{
+ xmlNode *node = xmlNewComment(NULL);
+
+ node->content = xmlStrndup(data->ptr, data->len);
+ node->_private = (void *)1;
+ *result = node;
+
+ return 0;
+}
+
+int create_doctype(void *ctx, const hubbub_doctype *doctype, void **result)
+{
+ /* Make a node that doesn't really exist, then don't append it
+ * later. */
+ xmlNode *node = xmlNewComment(NULL);
+
+ node->_private = (void *)1;
+ *result = node;
+
+ return 0;
+}
+
+int create_element(void *ctx, const hubbub_tag *tag, void **result)
+{
+ struct content *c = ctx;
+ struct content_html_data *html = &c->data.html;
+
+ char *name = strndup((const char *) tag->name.ptr,
+ tag->name.len);
+
+ xmlNode *node = xmlNewNode(NULL, BAD_CAST name);
+ node->_private = (void *)1;
+ *result = node;
+
+ if (html->firstelem == true) {
+ for (size_t i = 1; i < NUM_NAMESPACES; i++) {
+ ns_ns[i] = xmlNewNs(node,
+ BAD_CAST ns_urls[i],
+ BAD_CAST ns_prefixes[i]);
+ }
+ html->firstelem = false;
+ }
+
+ xmlSetNs(node, ns_ns[tag->ns]);
+
+ free(name);
+
+ for (size_t i = 0; i < tag->n_attributes; i++) {
+ hubbub_attribute *attr = &tag->attributes[i];
+
+ char *name = strndup((const char *) attr->name.ptr,
+ attr->name.len);
+ char *value = strndup((const char *) attr->value.ptr,
+ attr->value.len);
+
+ if (attr->ns == HUBBUB_NS_NULL) {
+ xmlNewProp(node, BAD_CAST name, BAD_CAST value);
+ } else {
+ xmlNewNsProp(node, ns_ns[attr->ns], BAD_CAST name,
+ BAD_CAST value);
+ }
+
+ free(name);
+ free(value);
+ }
+
+ return 0;
+}
+
+int create_text(void *ctx, const hubbub_string *data, void **result)
+{
+ xmlNode *node = xmlNewTextLen(BAD_CAST data->ptr, data->len);
+ node->_private = (void *)1;
+ *result = node;
+
+ return 0;
+}
+
+int ref_node(void *ctx, void *node)
+{
+ xmlNode *n = node;
+ n->_private = (void *)((uintptr_t)n->_private + 1);
+
+ return 0;
+}
+
+int unref_node(void *ctx, void *node)
+{
+ xmlNode *n = node;
+ n->_private = (void *)((uintptr_t)n->_private - 1);
+
+ if (n->_private == (void *)0 && n->parent == NULL) {
+ xmlFreeNode(n);
+ }
+
+ return 0;
+}
+
+int append_child(void *ctx, void *parent, void *child, void **result)
+{
+ xmlNode *nparent = parent;
+ xmlNode *nchild = child;
+
+ if (nchild->type == XML_TEXT_NODE &&
+ nparent->last != NULL &&
+ nparent->last->type == XML_TEXT_NODE) {
+ xmlNode *clone;
+ clone_node(ctx, nchild, false, (void **) &clone);
+ *result = xmlAddChild(parent, clone);
+ /* node referenced by clone_node */
+ } else {
+ *result = xmlAddChild(parent, child);
+ ref_node(ctx, *result);
+ }
+
+ return 0;
+}
+
+/* insert 'child' before 'ref_child', under 'parent' */
+int insert_before(void *ctx, void *parent, void *child, void *ref_child,
+ void **result)
+{
+ *result = xmlAddPrevSibling(ref_child, child);
+ ref_node(ctx, *result);
+
+ return 0;
+}
+
+int remove_child(void *ctx, void *parent, void *child, void **result)
+{
+ xmlUnlinkNode(child);
+ *result = child;
+
+ ref_node(ctx, *result);
+
+ return 0;
+}
+
+int clone_node(void *ctx, void *node, bool deep, void **result)
+{
+ xmlNode *n = xmlCopyNode(node, deep ? 1 : 2);
+ n->_private = (void *)1;
+ *result = n;
+
+ return 0;
+}
+
+/* Take all of the child nodes of "node" and append them to "new_parent" */
+int reparent_children(void *ctx, void *node, void *new_parent)
+{
+ xmlNode *n = (xmlNode *) node;
+ xmlNode *p = (xmlNode *) new_parent;
+
+ for (xmlNode *child = n->children; child != NULL; ) {
+ xmlNode *next = child->next;
+
+ xmlUnlinkNode(child);
+
+ if (xmlAddChild(p, child) == NULL)
+ return 1;
+
+ child = next;
+ }
+
+ return 0;
+}
+
+int get_parent(void *ctx, void *node, bool element_only, void **result)
+{
+ *result = ((xmlNode *)node)->parent;
+
+ if (*result != NULL && element_only &&
+ ((xmlNode *) *result)->type != XML_ELEMENT_NODE)
+ *result = NULL;
+
+ if (*result != NULL)
+ ref_node(ctx, *result);
+
+ return 0;
+}
+
+int has_children(void *ctx, void *node, bool *result)
+{
+ *result = ((xmlNode *)node)->children ? true : false;
+
+ return 0;
+}
+
+int form_associate(void *ctx, void *form, void *node)
+{
+ return 0;
+}
+
+int add_attributes(void *ctx, void *node,
+ const hubbub_attribute *attributes, uint32_t n_attributes)
+{
+ for (size_t i = 0; i < n_attributes; i++) {
+ const hubbub_attribute *attr = &attributes[i];
+
+ char *name = strndup((const char *) attr->name.ptr,
+ attr->name.len);
+ char *value = strndup((const char *) attr->value.ptr,
+ attr->value.len);
+
+ if (attr->ns == HUBBUB_NS_NULL) {
+ xmlNewProp(node, BAD_CAST name, BAD_CAST value);
+ } else {
+ xmlNewNsProp(node, ns_ns[attr->ns], BAD_CAST name,
+ BAD_CAST value);
+ }
+
+ free(name);
+ free(value);
+ }
+
+ return 0;
+}
+
+int set_quirks_mode(void *ctx, hubbub_quirks_mode mode)
+{
+ return 0;
+}
+
+int change_encoding(void *ctx, const char *name)
+{
+ struct content *c = ctx;
+ struct content_html_data *html = &c->data.html;
+
+ /* If we have an encoding here, it means we are *certain* */
+ if (html->encoding) {
+ return 0;
+ }
+
+ /* Find the confidence otherwise (can only be from a BOM) */
+ uint32_t source;
+ const char *charset = hubbub_parser_read_charset(html->parser, &source);
+
+ if (source == HUBBUB_CHARSET_CONFIDENT) {
+ html->encoding_source = ENCODING_SOURCE_DETECTED;
+ html->encoding = (char *) charset;
+ return 0;
+ }
+
+ /* So here we have something of confidence tentative... */
+ /* http://www.whatwg.org/specs/web-apps/current-work/#change */
+
+ /* 2. "If the new encoding is identical or equivalent to the encoding
+ * that is already being used to interpret the input stream, then set
+ * the confidence to confident and abort these steps." */
+
+ /* Whatever happens, the encoding should be set here; either for
+ * reprocessing with a different charset, or for confirming that the
+ * charset is in fact correct */
+ html->encoding = (char *) name;
+ html->encoding_source = ENCODING_SOURCE_META;
+
+ /* Equal encodings will have the same string pointers */
+ return (charset == name) ? 0 : 1;
+}
+
+
+/**
+ * Talloc'd-up allocation hook for Hubbub.
+ */
+static void *html_hubbub_realloc(void *ptr, size_t len, void *pw)
+{
+ return talloc_realloc_size(pw, ptr, len);
+}
+
+
+
+/**
+ * Create, set up, and whatnot, a Hubbub parser instance, along with the
+ * relevant libxml2 bits.
+ */
+static int html_create_parser(struct content *c)
+{
+ struct content_html_data *html = &c->data.html;
+ hubbub_parser_optparams param;
+
+ html->parser = hubbub_parser_create(html->encoding,
+ html_hubbub_realloc,
+ c);
+ if (!html->parser)
+ return 1;
+
+ html->document = xmlNewDoc(BAD_CAST "1.0");
+ if (!html->document)
+ return 1;
+
+ html->tree_handler = tree_handler;
+ html->tree_handler.ctx = c;
+ param.tree_handler = &html->tree_handler;
+ hubbub_parser_setopt(html->parser, HUBBUB_PARSER_TREE_HANDLER, &param);
+
+ param.document_node = html->document;
+ hubbub_parser_setopt(html->parser, HUBBUB_PARSER_DOCUMENT_NODE, &param);
+
+ return 0;
+}
+
+
+
+#endif
+
+
+
+
/**
* Create a CONTENT_HTML.
*
@@ -101,6 +484,10 @@ bool html_create(struct content *c, const char *params[])
union content_msg_data msg_data;
html->parser = 0;
+#ifdef WITH_HUBBUB
+ html->document = 0;
+ html->firstelem = true;
+#endif
html->encoding_handler = 0;
html->encoding = 0;
html->getenc = true;
@@ -135,16 +522,26 @@ bool html_create(struct content *c, const char *params[])
}
}
+#ifndef WITH_HUBBUB
html->parser = htmlCreatePushParserCtxt(0, 0, "", 0, 0,
XML_CHAR_ENCODING_NONE);
if (!html->parser)
goto no_memory;
+#else
+
+ /* Set up the parser, libxml2 document, and that */
+ if (html_create_parser(c) != 0)
+ goto no_memory;
+#endif
+
+#ifndef WITH_HUBBUB
if (html->encoding) {
/* an encoding was specified in the Content-Type header */
if (!html_set_parser_encoding(c, html->encoding))
return false;
}
+#endif
return true;
@@ -165,6 +562,7 @@ bool html_process_data(struct content *c, char *data, unsigned int size)
{
unsigned long x;
+#ifndef WITH_HUBBUB
if (c->data.html.getenc) {
/* No encoding was specified in the Content-Type header.
* Attempt to detect if the encoding is not 8-bit. If the
@@ -190,13 +588,36 @@ bool html_process_data(struct content *c, char *data, unsigned int size)
if (size == 0)
return true;
}
+#endif
+
+#ifdef WITH_HUBBUB
+ hubbub_error err;
+#endif
for (x = 0; x + CHUNK <= size; x += CHUNK) {
+#ifdef WITH_HUBBUB
+ err = hubbub_parser_parse_chunk(
+ c->data.html.parser, data + x, CHUNK);
+ if (err == HUBBUB_ENCODINGCHANGE) {
+ goto encoding_change;
+ }
+#else
htmlParseChunk(c->data.html.parser, data + x, CHUNK, 0);
+#endif
gui_multitask();
}
+
+#ifdef WITH_HUBBUB
+ err = hubbub_parser_parse_chunk(
+ c->data.html.parser, data + x, (size - x));
+ if (err == HUBBUB_ENCODINGCHANGE) {
+ goto encoding_change;
+ }
+#else
htmlParseChunk(c->data.html.parser, data + x, (int) (size - x), 0);
+#endif
+#ifndef WITH_HUBBUB
if (!c->data.html.encoding && c->data.html.parser->input->encoding) {
/* The encoding was not in headers or detected,
* and the parser found a <meta http-equiv="content-type"
@@ -259,8 +680,36 @@ bool html_process_data(struct content *c, char *data, unsigned int size)
if (!html_process_data(c, c->source_data, c->source_size))
return false;
}
+#endif
return true;
+
+#ifdef WITH_HUBBUB
+
+encoding_change:
+
+ /* Free up hubbub, libxml2 etc */
+ hubbub_parser_destroy(c->data.html.parser);
+ if (c->data.html.document) {
+ xmlFreeDoc(c->data.html.document);
+ }
+
+ /* Set up the parser, libxml2 document, and that */
+ if (html_create_parser(c) != 0) {
+ union content_msg_data msg_data;
+
+ msg_data.error = messages_get("NoMemory");
+ content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
+ return false;
+ }
+
+ /* Recurse to reprocess all that data. This is safe because
+ * the encoding is now specified at parser-start which means
+ * it cannot be changed again. */
+ return html_process_data(c, c->source_data, c->source_size);
+
+#endif
+
}
@@ -274,6 +723,7 @@ bool html_process_data(struct content *c, char *data, unsigned int size)
bool html_set_parser_encoding(struct content *c, const char *encoding)
{
+#ifndef WITH_HUBBUB
struct content_html_data *html = &c->data.html;
xmlError *error;
char error_message[500];
@@ -322,6 +772,7 @@ bool html_set_parser_encoding(struct content *c, const char *encoding)
/* Ensure noone else attempts to reset the encoding */
html->getenc = false;
+#endif
return true;
}
@@ -412,14 +863,28 @@ bool html_convert(struct content *c, int width, int height)
/* finish parsing */
if (c->source_size == 0)
+#ifndef WITH_HUBBUB
htmlParseChunk(c->data.html.parser, empty_document,
sizeof empty_document, 0);
+#else
+ hubbub_parser_parse_chunk(c->data.html.parser,
+ (uint8_t *) empty_document,
+ sizeof empty_document);
+#endif
+
+#ifndef WITH_HUBBUB
htmlParseChunk(c->data.html.parser, "", 0, 1);
document = c->data.html.parser->myDoc;
/*xmlDebugDumpDocument(stderr, c->data.html.parser->myDoc);*/
htmlFreeParserCtxt(c->data.html.parser);
c->data.html.parser = 0;
-
+#else
+ hubbub_parser_completed(c->data.html.parser);
+ hubbub_parser_destroy(c->data.html.parser);
+ c->data.html.parser = 0;
+ document = c->data.html.document;
+ /*xmlDebugDumpDocument(stderr, document);*/
+#endif
if (!document) {
LOG(("Parsing failed"));
msg_data.error = messages_get("ParsingFail");
@@ -1733,7 +2198,11 @@ void html_destroy(struct content *c)
}
if (c->data.html.parser)
+#ifndef WITH_HUBBUB
htmlFreeParserCtxt(c->data.html.parser);
+#else
+ hubbub_parser_destroy(c->data.html.parser);
+#endif
/* Free base target */
if (c->data.html.base_target) {
diff --git a/render/html.h b/render/html.h
index 5851b83b8..29691ea26 100644
--- a/render/html.h
+++ b/render/html.h
@@ -26,6 +26,10 @@
#define _NETSURF_RENDER_HTML_H_
#include <stdbool.h>
+#ifdef WITH_HUBBUB
+#include <hubbub/parser.h>
+#include <hubbub/tree.h>
+#endif
#include <libxml/HTMLparser.h>
#include "content/content_type.h"
#include "css/css.h"
@@ -114,11 +118,19 @@ struct content_html_iframe {
/** Data specific to CONTENT_HTML. */
struct content_html_data {
+#ifndef WITH_HUBBUB
htmlParserCtxt *parser; /**< HTML parser context. */
+#else
+ hubbub_parser *parser; /**< HTML parser context. */
+ hubbub_tree_handler tree_handler;
+ xmlDoc *document;
+ bool firstelem;
+#endif
+
/** HTML parser encoding handler. */
xmlCharEncodingHandler *encoding_handler;
- char *encoding; /**< Encoding of source, 0 if unknown. */
+ char *encoding; /**< Encoding of source, 0 if unknown. */
enum { ENCODING_SOURCE_HEADER, ENCODING_SOURCE_DETECTED,
ENCODING_SOURCE_META } encoding_source;
/**< Source of encoding information. */