summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVincent Sanders <vince@kyllikki.org>2017-12-28 17:18:10 +0000
committerVincent Sanders <vince@kyllikki.org>2017-12-28 17:18:10 +0000
commit5422dd50a49fe1a282271f22cd324f815e592e07 (patch)
tree99c0760ab8318b7e9d8e8190886affad0b8e4e70
parent7da4a1d7b029ab640a9ae2b95e745d29c998a7b0 (diff)
downloadlibnspdf-5422dd50a49fe1a282271f22cd324f815e592e07.tar.gz
libnspdf-5422dd50a49fe1a282271f22cd324f815e592e07.tar.bz2
decode page tree
-rw-r--r--src/cos_object.c148
-rw-r--r--src/cos_object.h15
-rw-r--r--src/pdf_doc.h9
-rw-r--r--src/xref.c117
4 files changed, 284 insertions, 5 deletions
diff --git a/src/cos_object.c b/src/cos_object.c
index a5bd738..5bfd423 100644
--- a/src/cos_object.c
+++ b/src/cos_object.c
@@ -188,6 +188,59 @@ cos_get_dictionary_dictionary(struct pdf_doc *doc,
}
nspdferror
+cos_heritable_dictionary_dictionary(struct pdf_doc *doc,
+ struct cos_object *dict,
+ const char *key,
+ struct cos_object **value_out)
+{
+ nspdferror res;
+ struct cos_object *dict_value;
+ res = cos_get_dictionary_value(doc, dict, key, &dict_value);
+ if (res == NSPDFERROR_NOTFOUND) {
+ /* \todo get parent entry and extract key from that dictionary instead */
+ }
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+ return cos_get_dictionary(doc, dict_value, value_out);
+}
+
+nspdferror
+cos_get_dictionary_array(struct pdf_doc *doc,
+ struct cos_object *dict,
+ const char *key,
+ struct cos_object **value_out)
+{
+ nspdferror res;
+ struct cos_object *dict_value;
+
+ res = cos_get_dictionary_value(doc, dict, key, &dict_value);
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+ return cos_get_array(doc, dict_value, value_out);
+}
+
+nspdferror
+cos_heritable_dictionary_array(struct pdf_doc *doc,
+ struct cos_object *dict,
+ const char *key,
+ struct cos_object **value_out)
+{
+ nspdferror res;
+ struct cos_object *dict_value;
+
+ res = cos_get_dictionary_value(doc, dict, key, &dict_value);
+ if (res == NSPDFERROR_NOTFOUND) {
+ /* \todo get parent entry and extract key from that dictionary instead */
+ }
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+ return cos_get_array(doc, dict_value, value_out);
+}
+
+nspdferror
cos_get_int(struct pdf_doc *doc,
struct cos_object *cobj,
int64_t *value_out)
@@ -242,3 +295,98 @@ cos_get_dictionary(struct pdf_doc *doc,
}
return res;
}
+
+nspdferror
+cos_get_array(struct pdf_doc *doc,
+ struct cos_object *cobj,
+ struct cos_object **value_out)
+{
+ nspdferror res;
+
+ res = xref_get_referenced(doc, &cobj);
+ if (res == NSPDFERROR_OK) {
+ if (cobj->type != COS_TYPE_ARRAY) {
+ res = NSPDFERROR_TYPE;
+ } else {
+ *value_out = cobj;
+ }
+ }
+ return res;
+}
+
+/*
+ * get a value for a key from a dictionary
+ */
+nspdferror
+cos_get_array_value(struct pdf_doc *doc,
+ struct cos_object *array,
+ unsigned int index,
+ struct cos_object **value_out)
+{
+ nspdferror res;
+ struct cos_array_entry *entry;
+
+ res = xref_get_referenced(doc, &array);
+ if (res == NSPDFERROR_OK) {
+ if (array->type != COS_TYPE_ARRAY) {
+ res = NSPDFERROR_TYPE;
+ } else {
+ unsigned int cur_index = 0;
+ res = NSPDFERROR_RANGE;
+
+ entry = array->u.array;
+ while (entry != NULL) {
+ if (cur_index == index) {
+ *value_out = entry->value;
+ res = NSPDFERROR_OK;
+ break;
+ }
+ cur_index++;
+ entry = entry->next;
+ }
+ }
+ }
+ return res;
+}
+
+nspdferror
+cos_get_array_dictionary(struct pdf_doc *doc,
+ struct cos_object *array,
+ unsigned int index,
+ struct cos_object **value_out)
+{
+ nspdferror res;
+ struct cos_object *array_value;
+
+ res = cos_get_array_value(doc, array, index, &array_value);
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+ return cos_get_dictionary(doc, array_value, value_out);
+}
+
+nspdferror
+cos_get_array_size(struct pdf_doc *doc,
+ struct cos_object *cobj,
+ unsigned int *size_out)
+{
+ nspdferror res;
+ unsigned int array_size = 0;
+ struct cos_array_entry *array_entry;
+
+ res = xref_get_referenced(doc, &cobj);
+ if (res == NSPDFERROR_OK) {
+ if (cobj->type != COS_TYPE_ARRAY) {
+ res = NSPDFERROR_TYPE;
+ } else {
+ /* walk array list to enumerate entries */
+ array_entry = cobj->u.array;
+ while (array_entry != NULL) {
+ array_size++;
+ array_entry = array_entry->next;
+ }
+ *size_out = array_size;
+ }
+ }
+ return res;
+}
diff --git a/src/cos_object.h b/src/cos_object.h
index 2ded7ec..48241c6 100644
--- a/src/cos_object.h
+++ b/src/cos_object.h
@@ -127,6 +127,13 @@ nspdferror cos_get_dictionary_name(struct pdf_doc *doc, struct cos_object *dict,
nspdferror cos_get_dictionary_dictionary(struct pdf_doc *doc, struct cos_object *dict, const char *key, struct cos_object **value_out);
+nspdferror cos_heritable_dictionary_dictionary(struct pdf_doc *doc, struct cos_object *dict, const char *key, struct cos_object **value_out);
+
+nspdferror cos_get_dictionary_array(struct pdf_doc *doc, struct cos_object *dict, const char *key, struct cos_object **value_out);
+
+nspdferror cos_heritable_dictionary_array(struct pdf_doc *doc, struct cos_object *dict, const char *key, struct cos_object **value_out);
+
+
nspdferror cos_get_int(struct pdf_doc *doc, struct cos_object *cobj, int64_t *value_out);
@@ -134,3 +141,11 @@ nspdferror cos_get_name(struct pdf_doc *doc, struct cos_object *cobj, const char
nspdferror cos_get_dictionary(struct pdf_doc *doc, struct cos_object *cobj, struct cos_object **value_out);
+
+nspdferror cos_get_array(struct pdf_doc *doc, struct cos_object *cobj, struct cos_object **value_out);
+
+nspdferror cos_get_array_size(struct pdf_doc *doc, struct cos_object *cobj, unsigned int *size_out);
+
+nspdferror cos_get_array_value(struct pdf_doc *doc, struct cos_object *array, unsigned int index, struct cos_object **value_out);
+
+nspdferror cos_get_array_dictionary(struct pdf_doc *doc, struct cos_object *arrau, unsigned int index, struct cos_object **value_out);
diff --git a/src/pdf_doc.h b/src/pdf_doc.h
index b37e3b2..986556f 100644
--- a/src/pdf_doc.h
+++ b/src/pdf_doc.h
@@ -10,6 +10,12 @@ struct xref_table_entry {
struct cos_object *object;
};
+/** page entry */
+struct page_table_entry {
+ struct cos_object *resources;
+ struct cos_object *mediabox;
+ struct cos_object *contents;
+};
/** pdf document */
struct pdf_doc {
@@ -33,6 +39,9 @@ struct pdf_doc {
struct cos_object *info;
struct cos_object *id;
+ /* page refrerence table */
+ uint64_t page_table_size;
+ struct page_table_entry *page_table;
};
/* byte data acessory, allows for more complex buffer handling in future */
diff --git a/src/xref.c b/src/xref.c
index 8239f45..452aa19 100644
--- a/src/xref.c
+++ b/src/xref.c
@@ -499,12 +499,123 @@ nspdferror decode_trailers(struct pdf_doc *doc)
return decode_xref_trailer(doc, startxref);
}
+/**
+ * recursively decodes a page tree
+ */
+nspdferror
+decode_page_tree(struct pdf_doc *doc,
+ struct cos_object *page_tree_node,
+ unsigned int *page_index)
+{
+ nspdferror res;
+ const char *type;
+
+ // Type = Pages
+ res = cos_get_dictionary_name(doc, page_tree_node, "Type", &type);
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+
+ if (strcmp(type, "Pages") == 0) {
+ struct cos_object *kids;
+ unsigned int kids_size;
+ unsigned int kids_index;
+
+ if (doc->page_table == NULL) {
+ /* allocate top level page table */
+ int64_t count;
+
+ res = cos_get_dictionary_int(doc, page_tree_node, "Count", &count);
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+
+ doc->page_table = calloc(count, sizeof(struct page_table_entry));
+ if (doc->page_table == NULL) {
+ return NSPDFERROR_NOMEM;
+ }
+ doc->page_table_size = count;
+ }
+
+ res = cos_get_dictionary_array(doc, page_tree_node, "Kids", &kids);
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+
+ res = cos_get_array_size(doc, kids, &kids_size);
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+
+ for (kids_index = 0; kids_index < kids_size; kids_index++) {
+ struct cos_object *kid;
+
+ res = cos_get_array_dictionary(doc, kids, kids_index, &kid);
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+
+ res = decode_page_tree(doc, kid, page_index);
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+ }
+
+ } else if (strcmp(type, "Page") == 0) {
+ struct page_table_entry *page;
+
+ page = doc->page_table + (*page_index);
+
+ /* required heritable resources */
+ res = cos_heritable_dictionary_dictionary(doc,
+ page_tree_node,
+ "Resources",
+ &(page->resources));
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+
+ /* required heritable mediabox */
+ res = cos_heritable_dictionary_array(doc,
+ page_tree_node,
+ "MediaBox",
+ &(page->mediabox));
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+
+ /* optional page contents */
+ res = cos_get_dictionary_value(doc,
+ page_tree_node,
+ "Contents",
+ &(page->contents));
+ if ((res != NSPDFERROR_OK) &&
+ (res != NSPDFERROR_NOTFOUND)) {
+ return res;
+ }
+
+ printf("page index:%d page:%p resources:%p mediabox:%p contents:%p\n",
+ *page_index,
+ page,
+ page->resources,
+ page->mediabox,
+ page->contents);
+
+ (*page_index)++;
+ res = NSPDFERROR_OK;
+ } else {
+ res = NSPDFERROR_FORMAT;
+ }
+ return res;
+}
+
nspdferror decode_catalog(struct pdf_doc *doc)
{
nspdferror res;
struct cos_object *catalog;
const char *type;
struct cos_object *pages;
+ unsigned int page_index = 0;
res = cos_get_dictionary(doc, doc->root, &catalog);
if (res != NSPDFERROR_OK) {
@@ -526,14 +637,10 @@ nspdferror decode_catalog(struct pdf_doc *doc)
return res;
}
- // Type = Pages
- res = cos_get_dictionary_name(doc, pages, "Type", &type);
+ res = decode_page_tree(doc, pages, &page_index);
if (res != NSPDFERROR_OK) {
return res;
}
- if (strcmp(type, "Pages") != 0) {
- return NSPDFERROR_FORMAT;
- }
return res;
}