summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorVincent Sanders <vince@kyllikki.org>2017-07-13 01:03:23 +0100
committerVincent Sanders <vince@kyllikki.org>2017-07-13 01:03:23 +0100
commit7220b02603cfd33775e56da19fe9f5fb1da08aa0 (patch)
tree0afacbc0e2fe797f4efeeb7ea15526d8f26f1c77 /src
parent4c3a26f186e50b6b8821122b9cb4def1eb0fffeb (diff)
downloadlibnspdf-7220b02603cfd33775e56da19fe9f5fb1da08aa0.tar.gz
libnspdf-7220b02603cfd33775e56da19fe9f5fb1da08aa0.tar.bz2
got references parsing
Diffstat (limited to 'src')
-rw-r--r--src/xref.c228
1 files changed, 192 insertions, 36 deletions
diff --git a/src/xref.c b/src/xref.c
index 6af8132..e1f3a9d 100644
--- a/src/xref.c
+++ b/src/xref.c
@@ -3,6 +3,7 @@
#include <inttypes.h>
#include <stdlib.h>
#include <stdbool.h>
+#include <string.h>
#define SLEN(x) (sizeof((x)) - 1)
@@ -14,7 +15,7 @@
#define BC_DELM (1<<4) /* character is a delimiter */
/**
- * byte classification
+ * pdf byte classification
*/
uint8_t bclass[] = {
BC_WSPC, BC_NONE, BC_NONE, BC_NONE, /* 00 - 03 */
@@ -126,7 +127,7 @@ struct cos_dictionary_entry {
struct cos_reference {
/** id of indirect object */
uint64_t id;
-
+
/* generation of indirect object */
uint64_t generation;
};
@@ -136,19 +137,19 @@ struct cos_object {
union {
/** boolean */
bool b;
-
+
/** integer */
int64_t i;
-
+
/** real */
double r;
-
+
/** name */
char *n;
-
+
/** string */
char *s;
-
+
/** stream data */
uint8_t *stream;
@@ -157,7 +158,7 @@ struct cos_object {
/** reference */
struct cos_reference *reference;
-
+
} u;
};
@@ -169,10 +170,10 @@ struct cos_indirect_object {
/* reference identifier */
struct cos_reference ref;
-
+
/** offset of object */
uint64_t offset;
-
+
/* direct object */
struct cos_object *o;
};
@@ -367,7 +368,7 @@ int cos_indirect_object_add(struct pdf_doc *doc,
nobj->offset = obj_offset;
doc->cos_list = nobj;
-
+
printf("xref %"PRIu64" %"PRIu64" %"PRIu64"\n",
obj_number, obj_offset, obj_generation);
return 0;
@@ -390,10 +391,10 @@ int cos_free_object(struct cos_object *cos_obj)
dentry = cos_obj->u.dictionary;
while (dentry != NULL) {
struct cos_dictionary_entry *odentry;
-
+
cos_free_object(dentry->key);
cos_free_object(dentry->value);
-
+
odentry = dentry;
dentry = dentry->next;
free(odentry);
@@ -414,7 +415,48 @@ int cos_decode_number(struct pdf_doc *doc,
uint64_t *offset_out,
struct cos_object **cosobj_out)
{
- return -1;
+ struct cos_object *cosobj;
+ uint8_t c; /* current byte from source data */
+ int len; /* number of decimal places in number */
+ uint8_t num[21]; /* temporary buffer for decimal values */
+ uint64_t offset; /* current offset of source data */
+
+ offset = *offset_out;
+
+ for (len = 0; len < sizeof(num); len++) {
+ c = DOC_BYTE(doc, offset);
+ if ((bclass[c] & BC_DCML) != BC_DCML) {
+ int64_t result = 0; /* parsed result */
+ uint64_t tens;
+
+ if (len == 0) {
+ return -2; /* parse error no decimals in input */
+ }
+ /* sum value from each place */
+ for (tens = 1; len > 0; tens = tens * 10, len--) {
+ result += (num[len - 1] * tens);
+ }
+
+ doc_skip_ws(doc, &offset);
+
+ cosobj = calloc(1, sizeof(struct cos_object));
+ if (cosobj == NULL) {
+ return -1; /* memory error */
+ }
+
+ cosobj->type = COS_TYPE_INT;
+ cosobj->u.i = result;
+
+ *cosobj_out = cosobj;
+
+ *offset_out = offset;
+
+ return 0;
+ }
+ num[len] = c - '0';
+ offset++;
+ }
+ return -1; /* number too long */
}
int cos_decode_string(struct pdf_doc *doc,
@@ -459,19 +501,22 @@ int cos_decode_dictionary(struct pdf_doc *doc,
return -1; /* memory error */
}
cosobj->type = COS_TYPE_DICTIONARY;
-
+
while ((DOC_BYTE(doc, offset) != '>') &&
(DOC_BYTE(doc, offset + 1) != '>')) {
-
+
res = cos_decode_object(doc, &offset, &key);
if (res != 0) {
/* todo free up any dictionary entries already created */
+ printf("key object decode failed\n");
return res;
}
if (key->type != COS_TYPE_NAME) {
/* key value pairs without a name */
+ printf("key was %d not a name %d\n", key->type, COS_TYPE_NAME);
return -1; /* syntax error */
}
+ printf("key: %s\n", key->u.n);
res = cos_decode_object(doc, &offset, &value);
if (res != 0) {
@@ -491,7 +536,7 @@ int cos_decode_dictionary(struct pdf_doc *doc,
entry->next = cosobj->u.dictionary;
cosobj->u.dictionary = entry;
-
+
}
offset += 2; /* skip closing >> */
doc_skip_ws(doc, &offset);
@@ -511,6 +556,11 @@ int cos_decode_list(struct pdf_doc *doc,
#define NAME_MAX_LENGTH 127
+/**
+ * decode a name object
+ *
+ * \todo deal with # symbols on pdf versions 1.2 and later
+ */
int cos_decode_name(struct pdf_doc *doc,
uint64_t *offset_out,
struct cos_object **cosobj_out)
@@ -519,13 +569,31 @@ int cos_decode_name(struct pdf_doc *doc,
struct cos_object *cosobj;
uint8_t c;
char name[NAME_MAX_LENGTH + 1];
+ int idx = 0;
offset = *offset_out;
c = DOC_BYTE(doc, offset++);
+ if (c != '/') {
+ return -1; /* names must be prefixed with a / */
+ }
+ printf("found a name\n");
+ c = DOC_BYTE(doc, offset++);
+ while ((idx <= NAME_MAX_LENGTH) &&
+ ((bclass[c] & (BC_WSPC | BC_DELM)) == 0)) {
+ //printf("%c", c);
+ name[idx++] = c;
+ c = DOC_BYTE(doc, offset++);
+ }
+ //printf("\nidx: %d\n", idx);
+ if (idx > NAME_MAX_LENGTH) {
+ /* name length exceeded implementation limit */
+ return -1;
+ }
+ name[idx] = 0;
-
+ //printf("name: %s\n", name);
doc_skip_ws(doc, &offset);
@@ -534,8 +602,8 @@ int cos_decode_name(struct pdf_doc *doc,
return -1; /* memory error */
}
- cosobj->type = COS_TYPE_BOOL;
- cosobj->u.b = value;
+ cosobj->type = COS_TYPE_NAME;
+ cosobj->u.n = strdup(name);
*cosobj_out = cosobj;
@@ -553,7 +621,7 @@ int cos_decode_boolean(struct pdf_doc *doc,
struct cos_object *cosobj;
uint8_t c;
bool value;
-
+
offset = *offset_out;
c = DOC_BYTE(doc, offset++);
@@ -573,7 +641,7 @@ int cos_decode_boolean(struct pdf_doc *doc,
return -1; /* syntax error */
}
value = true;
-
+
} else if ((c == 'f') || (c == 'F')) {
/* false branch */
@@ -613,7 +681,7 @@ int cos_decode_boolean(struct pdf_doc *doc,
*cosobj_out = cosobj;
*offset_out = offset;
-
+
return 0;
}
@@ -625,7 +693,7 @@ int cos_decode_null(struct pdf_doc *doc,
uint64_t offset;
struct cos_object *cosobj;
uint8_t c;
-
+
offset = *offset_out;
c = DOC_BYTE(doc, offset++);
@@ -644,7 +712,7 @@ int cos_decode_null(struct pdf_doc *doc,
if ((c != 'l') && (c != 'L')) {
return -1; /* syntax error */
}
-
+
doc_skip_ws(doc, &offset);
cosobj = calloc(1, sizeof(struct cos_object));
@@ -658,6 +726,83 @@ int cos_decode_null(struct pdf_doc *doc,
return 0;
}
+/**
+ * attempt to decode the stream into a reference
+ *
+ * The stream has already had a positive integer decoded from it. if another
+ * positive integer follows and a R character after that it is a reference,
+ * otherwise bail, but not finding a ref is not an error!
+ *
+ * \param doc the pdf document
+ * \param offset_out offset of current cursor in stream
+ * \param cosobj_out the object to return into, on input contains the first
+ * integer
+ */
+int cos_attempt_decode_reference(struct pdf_doc *doc,
+ uint64_t *offset_out,
+ struct cos_object **cosobj_out)
+{
+ uint64_t offset;
+ struct cos_object *cosobj; /* possible generation object */
+ uint8_t c;
+ int res;
+ struct cos_reference *nref; /* new reference */
+
+ offset = *offset_out;
+
+ res = cos_decode_object(doc, &offset, &cosobj);
+ if (res != 0) {
+ return 0; /* no error if object could not be decoded */
+ }
+
+ if (cosobj->type != COS_TYPE_INT) {
+ /* next object was not an integer so not a reference */
+ cos_free_object(cosobj);
+ return 0;
+ }
+
+ if (cosobj->u.i < 0) {
+ /* integer was negative so not a reference (generations must be
+ * non-negative
+ */
+ cos_free_object(cosobj);
+ return 0;
+
+ }
+
+ /* two int in a row, look for the R */
+ c = DOC_BYTE(doc, offset++);
+ if (c != 'R') {
+ /* no R so not a reference */
+ cos_free_object(cosobj);
+ return 0;
+ }
+
+ /* found reference */
+
+ printf("found reference\n");
+ doc_skip_ws(doc, &offset);
+
+ nref = calloc(1, sizeof(struct cos_reference));
+ if (nref == NULL) {
+ /* todo free objects */
+ return -1; /* memory error */
+ }
+
+ nref->id = (*cosobj_out)->u.i;
+ nref->generation = cosobj->u.i;
+
+ cos_free_object(*cosobj_out);
+
+ cosobj->type = COS_TYPE_REFERENCE;
+ cosobj->u.reference = nref;
+
+ *cosobj_out = cosobj;
+
+ *offset_out = offset;
+
+ return 0;
+}
/**
* Decode input stream into an object
@@ -678,7 +823,7 @@ int cos_decode_null(struct pdf_doc *doc,
* [ a list
* t|T boolean true
* f|F boolean false
- * n|N null
+ * n|N null
*
* Grammar is:
* cos_object:
@@ -711,12 +856,12 @@ int cos_decode_object(struct pdf_doc *doc,
uint64_t offset;
int res;
struct cos_object *cosobj;
-
+
offset = *offset_out;
/* object could be any type use first char to try and select */
switch (DOC_BYTE(doc, offset)) {
-
+
case '-':
case '+':
case '.':
@@ -731,7 +876,12 @@ int cos_decode_object(struct pdf_doc *doc,
case '8':
case '9':
res = cos_decode_number(doc, &offset, &cosobj);
- /* if type is uint try to check for reference */
+ /* if type is positive integer try to check for reference */
+ if ((res == 0) &&
+ (cosobj->type == COS_TYPE_INT) &&
+ (cosobj->u.i > 0)) {
+ res = cos_attempt_decode_reference(doc, &offset, &cosobj);
+ }
break;
case '<':
@@ -769,7 +919,13 @@ int cos_decode_object(struct pdf_doc *doc,
default:
res = -1; /* syntax error */
}
-
+
+
+ if (res == 0) {
+ *cosobj_out = cosobj;
+ *offset_out = offset;
+ }
+
return res;
}
@@ -792,7 +948,7 @@ int decode_trailer(struct pdf_doc *doc, uint64_t offset)
}
offset += 7;
doc_skip_ws(doc, &offset);
-
+
res = cos_decode_object(doc, &offset, &trailer);
if (res != 0) {
return res;
@@ -802,7 +958,7 @@ int decode_trailer(struct pdf_doc *doc, uint64_t offset)
cos_free_object(trailer);
return -1;
}
-
+
return 0;
}
@@ -812,9 +968,9 @@ int decode_xref(struct pdf_doc *doc, uint64_t *offset_out)
uint64_t objnum; /* current object number */
uint64_t lastobjnum;
uint64_t offset;
-
+
offset = *offset_out;
-
+
/* xref object header */
if ((DOC_BYTE(doc, offset ) != 'x') &&
(DOC_BYTE(doc, offset + 1) != 'r') &&
@@ -866,7 +1022,7 @@ int decode_xref(struct pdf_doc *doc, uint64_t *offset_out)
objnum++;
}
// printf("at objnum %"PRIu64"\n", objnum);
-
+
/* first object number in table */
res = doc_read_uint(doc, &offset, &objnum);
}
@@ -909,6 +1065,6 @@ int main(int argc, char **argv)
printf("failed to decode trailer\n");
return res;
}
-
+
return 0;
}