diff options
author | John Mark Bell <jmb@netsurf-browser.org> | 2007-06-23 22:40:25 +0000 |
---|---|---|
committer | John Mark Bell <jmb@netsurf-browser.org> | 2007-06-23 22:40:25 +0000 |
commit | 7b30a5520cfb56e651f0eb4da85a3e07747da7dc (patch) | |
tree | 5d6281c071c089e1e7a8ae6f8044cecaf6a7db16 /include/hubbub/parser.h | |
download | libhubbub-7b30a5520cfb56e651f0eb4da85a3e07747da7dc.tar.gz libhubbub-7b30a5520cfb56e651f0eb4da85a3e07747da7dc.tar.bz2 |
Import hubbub -- an HTML parsing library.
Plenty of work still to do (like tree generation ;)
svn path=/trunk/hubbub/; revision=3359
Diffstat (limited to 'include/hubbub/parser.h')
-rw-r--r-- | include/hubbub/parser.h | 84 |
1 files changed, 84 insertions, 0 deletions
diff --git a/include/hubbub/parser.h b/include/hubbub/parser.h new file mode 100644 index 0000000..cdf8664 --- /dev/null +++ b/include/hubbub/parser.h @@ -0,0 +1,84 @@ +/* + * This file is part of Hubbub. + * Licensed under the MIT License, + * http://www.opensource.org/licenses/mit-license.php + * Copyright 2007 John-Mark Bell <jmb@netsurf-browser.org> + */ + +#ifndef hubbub_parser_h_ +#define hubbub_parser_h_ + +#include <inttypes.h> + +#include <hubbub/errors.h> +#include <hubbub/functypes.h> +#include <hubbub/types.h> + +typedef struct hubbub_parser hubbub_parser; + +/** + * Hubbub parser option types + */ +typedef enum hubbub_parser_opttype { + HUBBUB_PARSER_TOKEN_HANDLER, + HUBBUB_PARSER_BUFFER_HANDLER, + HUBBUB_PARSER_ERROR_HANDLER, + HUBBUB_PARSER_CONTENT_MODEL, +} hubbub_parser_opttype; + +/** + * Hubbub parser option parameters + */ +typedef union hubbub_parser_optparams { + struct { + hubbub_token_handler handler; + void *pw; + } token_handler; + + struct { + hubbub_buffer_handler handler; + void *pw; + } buffer_handler; + + struct { + hubbub_error_handler handler; + void *pw; + } error_handler; + + struct { + hubbub_content_model model; + } content_model; +} hubbub_parser_optparams; + +/* Create a hubbub parser */ +hubbub_parser *hubbub_parser_create(const char *enc, const char *int_enc, + hubbub_alloc alloc, void *pw); +/* Destroy a hubbub parser */ +void hubbub_parser_destroy(hubbub_parser *parser); + +/* Configure a hubbub parser */ +hubbub_error hubbub_parser_setopt(hubbub_parser *parser, + hubbub_parser_opttype type, + hubbub_parser_optparams *params); + +/* Pass a chunk of data to a hubbub parser for parsing */ +/* This data is encoded in the input charset */ +hubbub_error hubbub_parser_parse_chunk(hubbub_parser *parser, + uint8_t *data, size_t len); +/* Pass a chunk of extraneous data to a hubbub parser for parsing */ +/* This data is UTF-8 encoded */ +hubbub_error hubbub_parser_parse_extraneous_chunk(hubbub_parser *parser, + uint8_t *data, size_t len); +/* Inform the parser that the last chunk of data has been parsed */ +hubbub_error hubbub_parser_completed(hubbub_parser *parser); + +/* Read the document charset */ +const char *hubbub_parser_read_charset(hubbub_parser *parser, + hubbub_charset_source *source); + +/* Claim ownership of the document buffer */ +hubbub_error hubbub_parser_claim_buffer(hubbub_parser *parser, + uint8_t **buffer, size_t *len); + +#endif + |