diff options
author | Vincent Sanders <vince@kyllikki.org> | 2018-01-20 18:46:41 +0000 |
---|---|---|
committer | Vincent Sanders <vince@kyllikki.org> | 2018-01-20 18:46:41 +0000 |
commit | 9f0e9af2eeb08abcaa4991ae4e87440dcba2ada1 (patch) | |
tree | 6b05ac4bedd3cc13f773e2b2adf03f7522c997c1 | |
parent | 7967f13f57f08d2b8f38b8c52567d847933b79d8 (diff) | |
download | libnspdf-9f0e9af2eeb08abcaa4991ae4e87440dcba2ada1.tar.gz libnspdf-9f0e9af2eeb08abcaa4991ae4e87440dcba2ada1.tar.bz2 |
correctly parse content streams for pages contents
-rw-r--r-- | include/nspdf/errors.h | 1 | ||||
-rw-r--r-- | src/cos_object.c | 113 | ||||
-rw-r--r-- | src/cos_parse.c | 142 | ||||
-rw-r--r-- | src/cos_parse.h | 3 | ||||
-rw-r--r-- | src/page.c | 48 | ||||
-rw-r--r-- | src/pdf_doc.c | 19 |
6 files changed, 206 insertions, 120 deletions
diff --git a/include/nspdf/errors.h b/include/nspdf/errors.h index f2142ff..6591dbb 100644 --- a/include/nspdf/errors.h +++ b/include/nspdf/errors.h @@ -24,6 +24,7 @@ typedef enum { NSPDFERROR_TYPE, /**< wrong type error */ NSPDFERROR_NOTFOUND, /**< key not found */ NSPDFERROR_FORMAT, /**< objects do not cornform to expected format */ + NSPDFERROR_INCOMPLETE, /**< operation was not completed */ } nspdferror; #endif diff --git a/src/cos_object.c b/src/cos_object.c index c7ec4e6..7a02ebd 100644 --- a/src/cos_object.c +++ b/src/cos_object.c @@ -386,36 +386,111 @@ cos_get_object(struct nspdf_doc *doc, return res; } +/* + * exported interface documented in cos_object.h + * + * slightly different behaviour to other getters: + * - This getter can be passed an object pointer to a synthetic parsed content + * stream object in which case it returns that objects content operation + * list. + * + * - Alternatively it can be passed a single indirect object reference to a + * content stream which will be processed into a filtered stream and then + * converted into a parsed content stream which replaces the passed + * object. The underlying filtered streams will then be freed. + * + * - An array of indirect object references to content streams all of which + * will be converted as if a single stream of tokens and the result handled + * as per the single reference case. + */ nspdferror cos_get_content(struct nspdf_doc *doc, struct cos_object *cobj, struct cos_content **content_out) { nspdferror res; - struct cos_object *content_obj; + struct cos_object **references; + unsigned int reference_count; + struct cos_stream **streams; + unsigned int index; + struct cos_object *content_obj; /* parsed content object */ + struct cos_object tmpobj; + + /* already parsed the content stream */ + if (cobj->type == COS_TYPE_CONTENT) { + *content_out = cobj->u.content; + } else if (cobj->type == COS_TYPE_REFERENCE) { + /* single reference */ + reference_count = 1; + references = calloc(reference_count, sizeof(struct cos_object *)); + if (references == NULL) { + return NSPDFERROR_NOMEM; + } - res = nspdf__xref_get_referenced(doc, &cobj); - if (res == NSPDFERROR_OK) { - if (cobj->type == COS_TYPE_STREAM) { - res = cos_parse_content_stream(doc, cobj->u.stream, &content_obj); - if (res == NSPDFERROR_OK) { - /* replace stream object with parsed content operations */ - struct cos_object tmpobj; - tmpobj = *cobj; - *cobj = *content_obj; - *content_obj = tmpobj; - cos_free_object(content_obj); - - *content_out = cobj->u.content; + *references = cobj; + } else if (cobj->type == COS_TYPE_ARRAY) { + /* array of references */ + reference_count = cobj->u.array->length; + references = malloc(reference_count * sizeof(struct cos_object *)); + if (references == NULL) { + return NSPDFERROR_NOMEM; + } + memcpy(references, cobj->u.array->values, reference_count * sizeof(struct cos_object *)); + /* check all objects in array are references */ + for (index = 0; index < reference_count ; index++) { + if ((*(references + index))->type != COS_TYPE_REFERENCE) { + free(references); + return NSPDFERROR_TYPE; } - } else if (cobj->type == COS_TYPE_CONTENT) { - *content_out = cobj->u.content; - } else { - res = NSPDFERROR_TYPE; } + } else { + return NSPDFERROR_TYPE; } - return res; + + /* obtain array of streams */ + streams = malloc(reference_count * sizeof(struct cos_stream *)); + if (streams == NULL) { + free(references); + return NSPDFERROR_TYPE; + } + + for (index = 0; index < reference_count ; index++) { + struct cos_object *stream_obj; + + stream_obj = *(references + index); + res = nspdf__xref_get_referenced(doc, &stream_obj); + if (res != NSPDFERROR_OK) { + free(references); + free(streams); + return res; + } + if (stream_obj->type != COS_TYPE_STREAM) { + free(references); + free(streams); + return NSPDFERROR_TYPE; + } + *(streams + index) = stream_obj->u.stream;; + } + + res = cos_parse_content_streams(doc, streams, reference_count, &content_obj); + if (res != NSPDFERROR_OK) { + free(references); + free(streams); + return res; + } + + /* replace passed object with parsed content operations object */ + tmpobj = *cobj; + *cobj = *content_obj; + *content_obj = tmpobj; + cos_free_object(content_obj); + + /** \todo call nspdf__xref_free_referenced(doc, *(references + index)); to free up storage associated with already parsed streams */ + + *content_out = cobj->u.content; + + return NSPDFERROR_OK; } /* diff --git a/src/cos_parse.c b/src/cos_parse.c index 5ccd171..46282ca 100644 --- a/src/cos_parse.c +++ b/src/cos_parse.c @@ -1115,6 +1115,10 @@ parse_operator(struct cos_stream *stream, offset = *offset_out; + if (offset >= stream->length) { + return NSPDFERROR_SYNTAX; + } + /* first char */ c = stream_byte(stream, offset); if ((bclass[c] & (BC_WSPC | BC_CMNT) ) != 0) { @@ -1125,13 +1129,15 @@ parse_operator(struct cos_stream *stream, offset++; /* possible second char */ c = stream_byte(stream, offset); - if ((bclass[c] & (BC_WSPC | BC_CMNT) ) == 0) { + if ((offset < stream->length) && + ((bclass[c] & (BC_WSPC | BC_CMNT) ) == 0)) { lookup = (lookup << 8) | c; offset++; /* possible third char */ c = stream_byte(stream, offset); - if ((bclass[c] & (BC_WSPC | BC_CMNT) ) == 0) { + if ((offset < stream->length) && + ((bclass[c] & (BC_WSPC | BC_CMNT) ) == 0)) { lookup = (lookup << 8) | c; offset++; @@ -1241,55 +1247,60 @@ parse_operator(struct cos_stream *stream, #define MAX_OPERAND_COUNT 32 -static nspdferror +static inline nspdferror parse_content_operation(struct nspdf_doc *doc, struct cos_stream *stream, strmoff_t *offset_out, + struct cos_object **operands, + unsigned int *operand_idx, struct content_operation *operation_out) { strmoff_t offset; nspdferror res; enum content_operator operator; - struct cos_object *operands[MAX_OPERAND_COUNT]; - unsigned int operand_idx = 0; offset = *offset_out; res = parse_operator(stream, &offset, &operator); while (res == NSPDFERROR_SYNTAX) { /* was not an operator so check for what else it could have been */ - if (operand_idx >= MAX_OPERAND_COUNT) { + if (*operand_idx >= MAX_OPERAND_COUNT) { /** \todo free any stacked operands */ printf("too many operands\n"); return NSPDFERROR_SYNTAX; } + if (offset >= stream->length) { + *offset_out = offset; + return NSPDFERROR_INCOMPLETE; + } + switch (stream_byte(stream, offset)) { case '-': case '+': case '.': case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': - res = cos_parse_number(stream, &offset, &operands[operand_idx]); + res = cos_parse_number(stream, &offset, &operands[*operand_idx]); break; case 't': case 'f': - res = cos_parse_boolean(stream, &offset, &operands[operand_idx]); + res = cos_parse_boolean(stream, &offset, &operands[*operand_idx]); break; case 'n': - res = cos_parse_null(stream, &offset, &operands[operand_idx]); + res = cos_parse_null(stream, &offset, &operands[*operand_idx]); break; case '(': - res = cos_parse_string(stream, &offset, &operands[operand_idx]); + res = cos_parse_string(stream, &offset, &operands[*operand_idx]); break; case '/': - res = cos_parse_name(stream, &offset, &operands[operand_idx]); + res = cos_parse_name(stream, &offset, &operands[*operand_idx]); break; case '[': - res = cos_parse_list(doc, stream, &offset, &operands[operand_idx]); + res = cos_parse_list(doc, stream, &offset, &operands[*operand_idx]); break; case '<': @@ -1297,16 +1308,23 @@ parse_content_operation(struct nspdf_doc *doc, res = cos_parse_dictionary(doc, stream, &offset, - &operands[operand_idx]); + &operands[*operand_idx]); } else { res = cos_parse_hex_string(stream, &offset, - &operands[operand_idx]); + &operands[*operand_idx]); } break; default: - printf("unknown operand type\n"); + printf("unknown operand with %d operands %d to %d of %d\n>>>%.*s<<<\n", + *operand_idx, + (*offset_out), + offset, + stream->length, + (offset + 1) - (*offset_out), + stream->data + (*offset_out)); + res = NSPDFERROR_SYNTAX; /* syntax error */ } @@ -1319,26 +1337,44 @@ parse_content_operation(struct nspdf_doc *doc, } /* move to next operand */ - operand_idx++; + (*operand_idx)++; res = parse_operator(stream, &offset, &operator); } operation_out->operator = operator; - //printf("returning operator %d with %d operands\n", operator, operand_idx); + + /* + printf("returning operator %d with %d operands %d to %d of %d\n>>>%.*s<<<\n", + operator, + *operand_idx, + (*offset_out), + offset, + stream->length, + offset - (*offset_out), + stream->data + (*offset_out)); + */ + + *operand_idx = 0; *offset_out = offset; + return NSPDFERROR_OK; } nspdferror -cos_parse_content_stream(struct nspdf_doc *doc, - struct cos_stream *stream, - struct cos_object **content_out) +cos_parse_content_streams(struct nspdf_doc *doc, + struct cos_stream **streams, + unsigned int stream_count, + struct cos_object **content_out) { nspdferror res; struct cos_object *cosobj; strmoff_t offset; + struct cos_stream *stream; + unsigned int stream_index; + struct cos_object *operands[MAX_OPERAND_COUNT]; + unsigned int operand_idx = 0; //printf("%.*s", (int)stream->length, stream->data); @@ -1354,42 +1390,50 @@ cos_parse_content_stream(struct nspdf_doc *doc, goto cos_parse_content_stream_error; } - offset = 0; + for (stream_index = 0; stream_index < stream_count; stream_index++) { + stream = *(streams + stream_index); + offset = 0; - /* skip any leading whitespace */ - res = nspdf__stream_skip_ws(stream, &offset); - if (res != NSPDFERROR_OK) { - goto cos_parse_content_stream_error; - } + /* skip any leading whitespace */ + res = nspdf__stream_skip_ws(stream, &offset); + if (res != NSPDFERROR_OK) { + goto cos_parse_content_stream_error; + } + + while (offset < stream->length) { + + /* ensure there is space in the operations array */ + if (cosobj->u.content->alloc < (cosobj->u.content->length + 1)) { + struct content_operation *newops; + newops = realloc(cosobj->u.content->operations, + sizeof(struct content_operation) * + (cosobj->u.content->alloc + 32)); + if (newops == NULL) { + res = NSPDFERROR_NOMEM; + goto cos_parse_content_stream_error; + } + cosobj->u.content->operations = newops; + cosobj->u.content->alloc += 32; + } - while (offset < stream->length) { - struct content_operation cop; - - /* ensure there is space in the operations array */ - if (cosobj->u.content->alloc < (cosobj->u.content->length + 1)) { - struct content_operation *newops; - newops = realloc(cosobj->u.content->operations, - sizeof(struct content_operation) * - (cosobj->u.content->alloc + 32)); - if (newops == NULL) { - res = NSPDFERROR_NOMEM; + /* parse an operation out */ + res = parse_content_operation( + doc, + stream, + &offset, + operands, + &operand_idx, + cosobj->u.content->operations + cosobj->u.content->length); + if (res== NSPDFERROR_OK) { + cosobj->u.content->length++; + } else if (res == NSPDFERROR_INCOMPLETE) { + //printf("Incomplete\n"); + } else if (res != NSPDFERROR_OK) { goto cos_parse_content_stream_error; } - cosobj->u.content->operations = newops; - cosobj->u.content->alloc += 32; - } - res = parse_content_operation( - doc, - stream, - &offset, - cosobj->u.content->operations + cosobj->u.content->length); - if (res != NSPDFERROR_OK) { - goto cos_parse_content_stream_error; } - cosobj->u.content->length++; } - *content_out = cosobj; return NSPDFERROR_OK; diff --git a/src/cos_parse.h b/src/cos_parse.h index a9cb9c9..a6a65ca 100644 --- a/src/cos_parse.h +++ b/src/cos_parse.h @@ -30,6 +30,7 @@ nspdferror cos_parse_object(struct nspdf_doc *doc, struct cos_stream *stream, st /** * Parse content stream into content operations object */ -nspdferror cos_parse_content_stream(struct nspdf_doc *doc, struct cos_stream *stream, struct cos_object **content_out); +nspdferror cos_parse_content_streams(struct nspdf_doc *doc, struct cos_stream **streams, unsigned int stream_count, struct cos_object **content_out); + #endif @@ -146,62 +146,20 @@ nspdf_page_count(struct nspdf_doc *doc, unsigned int *pages_out) return NSPDFERROR_OK; } -static nspdferror -nspdf__render_content_stream(struct nspdf_doc *doc, - struct page_table_entry *page_entry, - struct cos_object *content_entry) -{ - nspdferror res; - struct cos_content *content_operations; - - res = cos_get_content(doc, content_entry, &content_operations); - if (res == NSPDFERROR_OK) { - printf("%p\n", content_operations); - } - - return res; -} /* exported interface documented in nspdf/page.h */ nspdferror nspdf_page_render(struct nspdf_doc *doc, unsigned int page_number) { struct page_table_entry *page_entry; - struct cos_object *content_array; + struct cos_content *page_content; /* page operations array */ nspdferror res; page_entry = doc->page_table + page_number; - /* contents may be an array of stream objects or just a single one */ - res = cos_get_array(doc, page_entry->contents, &content_array); + res = cos_get_content(doc, page_entry->contents, &page_content); if (res == NSPDFERROR_OK) { - unsigned int content_stream_count; - unsigned int content_stream_index; - - res = cos_get_array_size(doc, content_array, &content_stream_count); - if (res != NSPDFERROR_OK) { - return res; - } - for (content_stream_index = 0; - content_stream_index < content_stream_count; - content_stream_index++) { - struct cos_object *content_entry; - res = cos_get_array_value(doc, - content_array, - content_stream_index, - &content_entry); - if (res != NSPDFERROR_OK) { - break; - } - - res = nspdf__render_content_stream(doc, page_entry, content_entry); - if (res != NSPDFERROR_OK) { - break; - } - } - } else if (res == NSPDFERROR_TYPE) { - res = nspdf__render_content_stream(doc, page_entry, page_entry->contents); + printf("%p\n", page_content); } - return res; } diff --git a/src/pdf_doc.c b/src/pdf_doc.c index d7c7a0e..3e55e16 100644 --- a/src/pdf_doc.c +++ b/src/pdf_doc.c @@ -23,19 +23,26 @@ nspdferror nspdf__stream_skip_ws(struct cos_stream *stream, strmoff_t *offset) { uint8_t c; - /* TODO sort out keeping offset in range */ + + if ((*offset) >= stream->length) { + return NSPDFERROR_OK; + } + c = stream_byte(stream, *offset); - while ((bclass[c] & (BC_WSPC | BC_CMNT) ) != 0) { + while (((*offset) < stream->length) && + ((bclass[c] & (BC_WSPC | BC_CMNT) ) != 0)) { (*offset)++; /* skip comments */ - if ((bclass[c] & BC_CMNT) != 0) { + if (((*offset) < stream->length) && + ((bclass[c] & BC_CMNT) != 0)) { c = stream_byte(stream, *offset); - while ((bclass[c] & BC_EOLM ) == 0) { + while ((*offset < stream->length) && + ((bclass[c] & BC_EOLM ) == 0)) { (*offset)++; - c = stream_byte(stream, *offset); + c = stream_byte(stream, (*offset)); } } - c = stream_byte(stream, *offset); + c = stream_byte(stream, (*offset)); } return NSPDFERROR_OK; } |