#include #include #include #include "utils/utils.h" #include "input/inputstream.h" #include "tokeniser/tokeniser.h" #include "testutils.h" static const uint8_t *pbuffer; static void buffer_handler(const uint8_t *buffer, size_t len, void *pw); static void token_handler(const hubbub_token *token, void *pw); static void *myrealloc(void *ptr, size_t len, void *pw) { UNUSED(pw); return realloc(ptr, len); } int main(int argc, char **argv) { hubbub_inputstream *stream; hubbub_tokeniser *tok; hubbub_tokeniser_optparams params; FILE *fp; size_t len, origlen; #define CHUNK_SIZE (4096) uint8_t buf[CHUNK_SIZE]; if (argc != 3) { printf("Usage: %s \n", argv[0]); return 1; } /* Initialise library */ assert(hubbub_initialise(argv[1], myrealloc, NULL) == HUBBUB_OK); stream = hubbub_inputstream_create("UTF-8", "UTF-8", myrealloc, NULL); assert(stream != NULL); tok = hubbub_tokeniser_create(stream, myrealloc, NULL); assert(tok != NULL); params.buffer_handler.handler = buffer_handler; params.buffer_handler.pw = NULL; assert(hubbub_tokeniser_setopt(tok, HUBBUB_TOKENISER_BUFFER_HANDLER, ¶ms) == HUBBUB_OK); params.token_handler.handler = token_handler; params.token_handler.pw = NULL; assert(hubbub_tokeniser_setopt(tok, HUBBUB_TOKENISER_TOKEN_HANDLER, ¶ms) == HUBBUB_OK); fp = fopen(argv[2], "rb"); if (fp == NULL) { printf("Failed opening %s\n", argv[2]); return 1; } fseek(fp, 0, SEEK_END); origlen = len = ftell(fp); fseek(fp, 0, SEEK_SET); while (len >= CHUNK_SIZE) { fread(buf, 1, CHUNK_SIZE, fp); assert(hubbub_inputstream_append(stream, buf, CHUNK_SIZE) == HUBBUB_OK); len -= CHUNK_SIZE; assert(hubbub_tokeniser_run(tok) == HUBBUB_OK); } if (len > 0) { fread(buf, 1, len, fp); assert(hubbub_inputstream_append(stream, buf, len) == HUBBUB_OK); len = 0; assert(hubbub_inputstream_append(stream, NULL, 0) == HUBBUB_OK); assert(hubbub_tokeniser_run(tok) == HUBBUB_OK); } fclose(fp); hubbub_tokeniser_destroy(tok); hubbub_inputstream_destroy(stream); assert(hubbub_finalise(myrealloc, NULL) == HUBBUB_OK); printf("PASS\n"); return 0; } void buffer_handler(const uint8_t *buffer, size_t len, void *pw) { UNUSED(len); UNUSED(pw); pbuffer = buffer; } void token_handler(const hubbub_token *token, void *pw) { static const char *token_names[] = { "DOCTYPE", "START TAG", "END TAG", "COMMENT", "CHARACTERS", "EOF" }; size_t i; UNUSED(pw); printf("%s: ", token_names[token->type]); switch (token->type) { case HUBBUB_TOKEN_DOCTYPE: printf("'%.*s' (%svalid)\n", (int) token->data.doctype.name.len, pbuffer + token->data.doctype.name.data_off, token->data.doctype.correct ? "" : "in"); break; case HUBBUB_TOKEN_START_TAG: printf("'%.*s' %s\n", (int) token->data.tag.name.len, pbuffer + token->data.tag.name.data_off, (token->data.tag.n_attributes > 0) ? "attributes:" : ""); for (i = 0; i < token->data.tag.n_attributes; i++) { printf("\t'%.*s' = '%.*s'\n", (int) token->data.tag.attributes[i].name.len, pbuffer + token->data.tag.attributes[i].name.data_off, (int) token->data.tag.attributes[i].value.len, pbuffer + token->data.tag.attributes[i].value.data_off); } break; case HUBBUB_TOKEN_END_TAG: printf("'%.*s' %s\n", (int) token->data.tag.name.len, pbuffer + token->data.tag.name.data_off, (token->data.tag.n_attributes > 0) ? "attributes:" : ""); for (i = 0; i < token->data.tag.n_attributes; i++) { printf("\t'%.*s' = '%.*s'\n", (int) token->data.tag.attributes[i].name.len, pbuffer + token->data.tag.attributes[i].name.data_off, (int) token->data.tag.attributes[i].value.len, pbuffer + token->data.tag.attributes[i].value.data_off); } break; case HUBBUB_TOKEN_COMMENT: printf("'%.*s'\n", (int) token->data.comment.len, pbuffer + token->data.comment.data_off); break; case HUBBUB_TOKEN_CHARACTER: printf("'%.*s'\n", (int) token->data.character.len, pbuffer + token->data.character.data_off); break; case HUBBUB_TOKEN_EOF: printf("\n"); break; } }