diff options
author | Vincent Sanders <vince@kyllikki.org> | 2017-12-24 10:06:04 +0000 |
---|---|---|
committer | Vincent Sanders <vince@kyllikki.org> | 2017-12-24 10:06:04 +0000 |
commit | 0c29558f8caf51da6c4258fbd1ebfa341af5aab9 (patch) | |
tree | e9831771710d81ee4e374d9449c856f0700ed191 /src | |
parent | 897a8900bf77db2d804b0f78bc4b41371e05347f (diff) | |
download | libnspdf-0c29558f8caf51da6c4258fbd1ebfa341af5aab9.tar.gz libnspdf-0c29558f8caf51da6c4258fbd1ebfa341af5aab9.tar.bz2 |
clean up error handling and start decoding of indirect objects
Diffstat (limited to 'src')
-rw-r--r-- | src/cos_decode.c | 158 | ||||
-rw-r--r-- | src/cos_object.c | 40 | ||||
-rw-r--r-- | src/cos_object.h | 12 | ||||
-rw-r--r-- | src/pdf_doc.c | 57 | ||||
-rw-r--r-- | src/pdf_doc.h | 8 | ||||
-rw-r--r-- | src/xref.c | 10 |
6 files changed, 191 insertions, 94 deletions
diff --git a/src/cos_decode.c b/src/cos_decode.c index 3936e05..730c771 100644 --- a/src/cos_decode.c +++ b/src/cos_decode.c @@ -10,9 +10,13 @@ #include "cos_object.h" #include "pdf_doc.h" +/** increments in which cos string allocations are extended */ #define COS_STRING_ALLOC 32 -nspdferror +/** Maximum length of cos name */ +#define NAME_MAX_LENGTH 127 + +static nspdferror cos_string_append(struct cos_string *s, uint8_t c) { //printf("appending 0x%x to %p len %d alloc %d\n", c, s->data, s->length, s->alloc); @@ -29,7 +33,7 @@ cos_string_append(struct cos_string *s, uint8_t c) return NSPDFERROR_OK; } -uint8_t xtoi(uint8_t x) +static uint8_t xtoi(uint8_t x) { if (x >= '0' && x <= '9') { x = x - '0'; @@ -41,7 +45,8 @@ uint8_t xtoi(uint8_t x) return x; } -int cos_decode_number(struct pdf_doc *doc, +static nspdferror +cos_decode_number(struct pdf_doc *doc, uint64_t *offset_out, struct cos_object **cosobj_out) { @@ -60,7 +65,8 @@ int cos_decode_number(struct pdf_doc *doc, uint64_t tens; if (len == 0) { - return -2; /* parse error no decimals in input */ + /* parse error no decimals in input */ + return NSPDFERROR_SYNTAX; } /* sum value from each place */ for (tens = 1; len > 0; tens = tens * 10, len--) { @@ -71,7 +77,7 @@ int cos_decode_number(struct pdf_doc *doc, cosobj = calloc(1, sizeof(struct cos_object)); if (cosobj == NULL) { - return -1; /* memory error */ + return NSPDFERROR_NOMEM; } cosobj->type = COS_TYPE_INT; @@ -81,20 +87,20 @@ int cos_decode_number(struct pdf_doc *doc, *offset_out = offset; - return 0; + return NSPDFERROR_OK; } num[len] = c - '0'; offset++; } - return -1; /* number too long */ + return NSPDFERROR_RANGE; /* number too long */ } /** - * literal string processing + * decode literal string * */ -nspdferror +static nspdferror cos_decode_string(struct pdf_doc *doc, uint64_t *offset_out, struct cos_object **cosobj_out) @@ -220,8 +226,10 @@ cos_decode_string(struct pdf_doc *doc, return NSPDFERROR_OK; } - -nspdferror +/** + * decode hex encoded string + */ +static nspdferror cos_decode_hex_string(struct pdf_doc *doc, uint64_t *offset_out, struct cos_object **cosobj_out) @@ -281,8 +289,11 @@ cos_decode_hex_string(struct pdf_doc *doc, return NSPDFERROR_SYNTAX; } - -int cos_decode_dictionary(struct pdf_doc *doc, +/** + * decode a dictionary object + */ +static nspdferror +cos_decode_dictionary(struct pdf_doc *doc, uint64_t *offset_out, struct cos_object **cosobj_out) { @@ -302,7 +313,7 @@ int cos_decode_dictionary(struct pdf_doc *doc, offset += 2; doc_skip_ws(doc, &offset); - printf("found a dictionary\n"); + //printf("found a dictionary\n"); cosobj = calloc(1, sizeof(struct cos_object)); if (cosobj == NULL) { @@ -314,7 +325,7 @@ int cos_decode_dictionary(struct pdf_doc *doc, (DOC_BYTE(doc, offset + 1) != '>')) { res = cos_decode_object(doc, &offset, &key); - if (res != 0) { + if (res != NSPDFERROR_OK) { /* todo free up any dictionary entries already created */ printf("key object decode failed\n"); return res; @@ -322,12 +333,12 @@ int cos_decode_dictionary(struct pdf_doc *doc, if (key->type != COS_TYPE_NAME) { /* key value pairs without a name */ printf("key was %d not a name %d\n", key->type, COS_TYPE_NAME); - return -1; /* syntax error */ + return NSPDFERROR_SYNTAX; } - printf("key: %s\n", key->u.n); + //printf("key: %s\n", key->u.n); res = cos_decode_object(doc, &offset, &value); - if (res != 0) { + if (res != NSPDFERROR_OK) { printf("Unable to decode value object in dictionary\n"); /* todo free up any dictionary entries already created */ return res; @@ -337,7 +348,7 @@ int cos_decode_dictionary(struct pdf_doc *doc, entry = calloc(1, sizeof(struct cos_dictionary_entry)); if (entry == NULL) { /* todo free up any dictionary entries already created */ - return -1; /* memory error */ + return NSPDFERROR_NOMEM; } entry->key = key; @@ -353,11 +364,13 @@ int cos_decode_dictionary(struct pdf_doc *doc, *cosobj_out = cosobj; *offset_out = offset; - return 0; + return NSPDFERROR_OK; } - -nspdferror +/** + * decode a list + */ +static nspdferror cos_decode_list(struct pdf_doc *doc, uint64_t *offset_out, struct cos_object **cosobj_out) @@ -419,19 +432,19 @@ cos_decode_list(struct pdf_doc *doc, *cosobj_out = cosobj; *offset_out = offset; - return 0; + return NSPDFERROR_OK; } -#define NAME_MAX_LENGTH 127 /** * decode a name object * * \todo deal with # symbols on pdf versions 1.2 and later */ -int cos_decode_name(struct pdf_doc *doc, - uint64_t *offset_out, - struct cos_object **cosobj_out) +static nspdferror +cos_decode_name(struct pdf_doc *doc, + uint64_t *offset_out, + struct cos_object **cosobj_out) { uint64_t offset; struct cos_object *cosobj; @@ -445,7 +458,7 @@ int cos_decode_name(struct pdf_doc *doc, if (c != '/') { return -1; /* names must be prefixed with a / */ } - printf("found a name\n"); + //printf("found a name\n"); c = DOC_BYTE(doc, offset); while ((idx <= NAME_MAX_LENGTH) && @@ -468,7 +481,7 @@ int cos_decode_name(struct pdf_doc *doc, cosobj = calloc(1, sizeof(struct cos_object)); if (cosobj == NULL) { - return -1; /* memory error */ + return NSPDFERROR_NOMEM; /* memory error */ } cosobj->type = COS_TYPE_NAME; @@ -478,13 +491,16 @@ int cos_decode_name(struct pdf_doc *doc, *offset_out = offset; - return 0; + return NSPDFERROR_OK; } - -int cos_decode_boolean(struct pdf_doc *doc, - uint64_t *offset_out, - struct cos_object **cosobj_out) +/** + * decode a cos boolean object + */ +static int +cos_decode_boolean(struct pdf_doc *doc, + uint64_t *offset_out, + struct cos_object **cosobj_out) { uint64_t offset; struct cos_object *cosobj; @@ -541,7 +557,7 @@ int cos_decode_boolean(struct pdf_doc *doc, cosobj = calloc(1, sizeof(struct cos_object)); if (cosobj == NULL) { - return -1; /* memory error */ + return NSPDFERROR_NOMEM; /* memory error */ } cosobj->type = COS_TYPE_BOOL; @@ -551,13 +567,16 @@ int cos_decode_boolean(struct pdf_doc *doc, *offset_out = offset; - return 0; - + return NSPDFERROR_OK; } -int cos_decode_null(struct pdf_doc *doc, - uint64_t *offset_out, - struct cos_object **cosobj_out) +/** + * decode the null object. + */ +static nspdferror +cos_decode_null(struct pdf_doc *doc, + uint64_t *offset_out, + struct cos_object **cosobj_out) { uint64_t offset; struct cos_object *cosobj; @@ -569,14 +588,17 @@ int cos_decode_null(struct pdf_doc *doc, if ((c != 'n') && (c != 'N')) { return -1; /* syntax error */ } + c = DOC_BYTE(doc, offset++); if ((c != 'u') && (c != 'U')) { return -1; /* syntax error */ } + c = DOC_BYTE(doc, offset++); if ((c != 'l') && (c != 'L')) { return -1; /* syntax error */ } + c = DOC_BYTE(doc, offset++); if ((c != 'l') && (c != 'L')) { return -1; /* syntax error */ @@ -586,15 +608,16 @@ int cos_decode_null(struct pdf_doc *doc, cosobj = calloc(1, sizeof(struct cos_object)); if (cosobj == NULL) { - return -1; /* memory error */ + return NSPDFERROR_NOMEM; } cosobj->type = COS_TYPE_NULL; *offset_out = offset; - return 0; + return NSPDFERROR_OK; } + /** * attempt to decode the stream into a reference * @@ -607,9 +630,10 @@ int cos_decode_null(struct pdf_doc *doc, * \param cosobj_out the object to return into, on input contains the first * integer */ -int cos_attempt_decode_reference(struct pdf_doc *doc, - uint64_t *offset_out, - struct cos_object **cosobj_out) +static nspdferror +cos_attempt_decode_reference(struct pdf_doc *doc, + uint64_t *offset_out, + struct cos_object **cosobj_out) { uint64_t offset; struct cos_object *cosobj; /* possible generation object */ @@ -621,13 +645,14 @@ int cos_attempt_decode_reference(struct pdf_doc *doc, res = cos_decode_number(doc, &offset, &cosobj); if (res != 0) { - return 0; /* no error if object could not be decoded */ + /* no error if next token could not be decoded as a number */ + return NSPDFERROR_OK; } if (cosobj->type != COS_TYPE_INT) { /* next object was not an integer so not a reference */ cos_free_object(cosobj); - return 0; + return NSPDFERROR_OK; } if (cosobj->u.i < 0) { @@ -635,8 +660,7 @@ int cos_attempt_decode_reference(struct pdf_doc *doc, * non-negative */ cos_free_object(cosobj); - return 0; - + return NSPDFERROR_OK; } /* two int in a row, look for the R */ @@ -644,18 +668,18 @@ int cos_attempt_decode_reference(struct pdf_doc *doc, if (c != 'R') { /* no R so not a reference */ cos_free_object(cosobj); - return 0; + return NSPDFERROR_OK; } /* found reference */ - printf("found reference\n"); + //printf("found reference\n"); doc_skip_ws(doc, &offset); nref = calloc(1, sizeof(struct cos_reference)); if (nref == NULL) { - /* todo free objects */ - return -1; /* memory error */ + /** \todo free objects */ + return NSPDFERROR_NOMEM; /* memory error */ } nref->id = (*cosobj_out)->u.i; @@ -670,10 +694,11 @@ int cos_attempt_decode_reference(struct pdf_doc *doc, *offset_out = offset; - return 0; + return NSPDFERROR_OK; } -/** + +/* * Decode input stream into an object * * lex and parse a byte stream to generate COS objects @@ -704,7 +729,8 @@ int cos_attempt_decode_reference(struct pdf_doc *doc, * TOK_STRING | * list | * dictionary | - * object_reference; + * object_reference | + * indirect_object; * * list: * '[' listargs ']'; @@ -717,13 +743,20 @@ int cos_attempt_decode_reference(struct pdf_doc *doc, * * object_reference: * TOK_UINT TOK_UINT 'R'; + * + * indirect_object: + * TOK_UINT TOK_UINT 'obj' cos_object 'endobj' + * | + * TOK_UINT TOK_UINT 'obj' dictionary 'stream' streamdata 'endstream' 'endobj' + * ; */ -int cos_decode_object(struct pdf_doc *doc, - uint64_t *offset_out, - struct cos_object **cosobj_out) +nspdferror +cos_decode_object(struct pdf_doc *doc, + uint64_t *offset_out, + struct cos_object **cosobj_out) { uint64_t offset; - int res; + nspdferror res; struct cos_object *cosobj; offset = *offset_out; @@ -786,11 +819,10 @@ int cos_decode_object(struct pdf_doc *doc, break; default: - res = -1; /* syntax error */ + res = NSPDFERROR_SYNTAX; /* syntax error */ } - - if (res == 0) { + if (res == NSPDFERROR_OK) { *cosobj_out = cosobj; *offset_out = offset; } diff --git a/src/cos_object.c b/src/cos_object.c index 96c669e..f4cd4fd 100644 --- a/src/cos_object.c +++ b/src/cos_object.c @@ -7,6 +7,7 @@ #include "nspdferror.h" #include "cos_object.h" +#include "pdf_doc.h" nspdferror cos_free_object(struct cos_object *cos_obj) @@ -115,25 +116,38 @@ cos_dictionary_extract_value(struct cos_object *dict, return NSPDFERROR_NOTFOUND; } -nspdferror cos_get_int(struct cos_object *cobj, int64_t *value_out) +nspdferror +cos_get_int(struct pdf_doc *doc, + struct cos_object *cobj, + int64_t *value_out) { - if (cobj->type != COS_TYPE_INT) { - return NSPDFERROR_TYPE; + nspdferror res; + + res = xref_get_referenced(doc, &cobj); + if (res == NSPDFERROR_OK) { + if (cobj->type != COS_TYPE_INT) { + res = NSPDFERROR_TYPE; + } else { + *value_out = cobj->u.i; + } } - *value_out = cobj->u.i; - return NSPDFERROR_OK; + return res; } nspdferror -cos_get_dictionary(struct cos_object *cobj, +cos_get_dictionary(struct pdf_doc *doc, + struct cos_object *cobj, struct cos_object **value_out) { - if (cobj->type == COS_TYPE_REFERENCE) { - - } - if (cobj->type != COS_TYPE_DICTIONARY) { - return NSPDFERROR_TYPE; + nspdferror res; + + res = xref_get_referenced(doc, &cobj); + if (res == NSPDFERROR_OK) { + if (cobj->type != COS_TYPE_DICTIONARY) { + res = NSPDFERROR_TYPE; + } else { + *value_out = cobj; + } } - *value_out = cobj; - return NSPDFERROR_OK; + return res; } diff --git a/src/cos_object.h b/src/cos_object.h index 65b3ed5..8d1449d 100644 --- a/src/cos_object.h +++ b/src/cos_object.h @@ -83,7 +83,12 @@ struct cos_object { } u; }; -int cos_decode_object(struct pdf_doc *doc, uint64_t *offset_out, struct cos_object **cosobj_out); +/** + * Decode input stream into an object + * + * lex and parse a byte stream to generate a COS object. + */ +nspdferror cos_decode_object(struct pdf_doc *doc, uint64_t *offset_out, struct cos_object **cosobj_out); nspdferror cos_free_object(struct cos_object *cos_obj); @@ -91,8 +96,9 @@ nspdferror cos_dictionary_get_value(struct cos_object *dict, const char *key, st nspdferror cos_dictionary_extract_value(struct cos_object *dict, const char *key, struct cos_object **value_out); -nspdferror cos_get_int(struct cos_object *cobj, int64_t *value_out); -nspdferror cos_get_dictionary(struct cos_object *cobj, struct cos_object **value_out); +nspdferror cos_get_int(struct pdf_doc *doc, struct cos_object *cobj, int64_t *value_out); + +nspdferror cos_get_dictionary(struct pdf_doc *doc, struct cos_object *cobj, struct cos_object **value_out); diff --git a/src/pdf_doc.c b/src/pdf_doc.c index 9b92bd0..dd31b72 100644 --- a/src/pdf_doc.c +++ b/src/pdf_doc.c @@ -1,4 +1,3 @@ - #include <stdint.h> #include <stddef.h> #include <stdbool.h> @@ -11,7 +10,7 @@ /** * move offset to next non whitespace byte */ -int doc_skip_ws(struct pdf_doc *doc, uint64_t *offset) +nspdferror doc_skip_ws(struct pdf_doc *doc, uint64_t *offset) { uint8_t c; /* TODO sort out keeping offset in range */ @@ -28,13 +27,13 @@ int doc_skip_ws(struct pdf_doc *doc, uint64_t *offset) } c = DOC_BYTE(doc, *offset); } - return 0; + return NSPDFERROR_OK; } /** * move offset to next non eol byte */ -int doc_skip_eol(struct pdf_doc *doc, uint64_t *offset) +nspdferror doc_skip_eol(struct pdf_doc *doc, uint64_t *offset) { uint8_t c; /* TODO sort out keeping offset in range */ @@ -43,5 +42,53 @@ int doc_skip_eol(struct pdf_doc *doc, uint64_t *offset) (*offset)++; c = DOC_BYTE(doc, *offset); } - return 0; + return NSPDFERROR_OK; +} + +static struct cos_object cos_null_obj = { + .type = COS_TYPE_NULL, +}; + +nspdferror +xref_get_referenced(struct pdf_doc *doc, struct cos_object **cobj_out) +{ + nspdferror res; + struct cos_object *cobj; + struct cos_object *indirect; + uint64_t offset; + struct xref_table_entry *entry; + + cobj = *cobj_out; + + if (cobj->type != COS_TYPE_REFERENCE) { + /* not passed a reference object so just return what was passed */ + return NSPDFERROR_OK; + } + + entry = doc->xref_table + cobj->u.reference->id; + + /* check if referenced object is in range and exists. return null object if + * not + */ + if ((cobj->u.reference->id >= doc->xref_size) || + (cobj->u.reference->id == 0) || + (entry->ref.id == 0)) { + *cobj_out = &cos_null_obj; + return NSPDFERROR_OK; + } + + if (entry->object == NULL) { + /* indirect object has never been decoded */ + offset = entry->offset; + res = cos_decode_object(doc, &offset, &indirect); + if (res != NSPDFERROR_OK) { + return res; + } + + entry->object = indirect; + } + + cobj = entry->object; + + return NSPDFERROR_OK; } diff --git a/src/pdf_doc.h b/src/pdf_doc.h index 696c121..b37e3b2 100644 --- a/src/pdf_doc.h +++ b/src/pdf_doc.h @@ -7,7 +7,7 @@ struct xref_table_entry { uint64_t offset; /* indirect object if already decoded */ - struct cos_object *o; + struct cos_object *object; }; @@ -38,5 +38,7 @@ struct pdf_doc { /* byte data acessory, allows for more complex buffer handling in future */ #define DOC_BYTE(doc, offset) (doc->start[(offset)]) -int doc_skip_ws(struct pdf_doc *doc, uint64_t *offset); -int doc_skip_eol(struct pdf_doc *doc, uint64_t *offset); +nspdferror doc_skip_ws(struct pdf_doc *doc, uint64_t *offset); +nspdferror doc_skip_eol(struct pdf_doc *doc, uint64_t *offset); + +nspdferror xref_get_referenced(struct pdf_doc *doc, struct cos_object **cobj_out); @@ -225,10 +225,6 @@ int check_header(struct pdf_doc *doc) } - - - - nspdferror decode_trailer(struct pdf_doc *doc, uint64_t *offset_out, @@ -407,7 +403,7 @@ nspdferror decode_xref_trailer(struct pdf_doc *doc, uint64_t xref_offset) goto decode_xref_trailer_failed; } - res = cos_get_int(cobj_size, &size); + res = cos_get_int(doc, cobj_size, &size); if (res != NSPDFERROR_OK) { printf("trailer Size not int\n"); goto decode_xref_trailer_failed; @@ -446,7 +442,7 @@ nspdferror decode_xref_trailer(struct pdf_doc *doc, uint64_t xref_offset) /* check for prev ID key in trailer and recurse call if present */ res = cos_dictionary_get_value(trailer, "Prev", &cobj_prev); if (res == NSPDFERROR_OK) { - res = cos_get_int(cobj_prev, &prev); + res = cos_get_int(doc, cobj_prev, &prev); if (res != NSPDFERROR_OK) { printf("trailer Prev not int\n"); goto decode_xref_trailer_failed; @@ -522,7 +518,7 @@ nspdferror decode_catalog(struct pdf_doc *doc) nspdferror res; struct cos_object *catalog; - res = cos_get_dictionary(doc->root, &catalog); + res = cos_get_dictionary(doc, doc->root, &catalog); return res; } |