summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVincent Sanders <vince@kyllikki.org>2018-01-08 00:17:22 +0000
committerVincent Sanders <vince@kyllikki.org>2018-01-08 00:17:22 +0000
commit119b565de393fdd797e1a3c4f629c936092e9091 (patch)
treef725bc086c48122cec172430b3ffa1732177a807
parente8dbf1fa8049169e6918cce20e98e309a793cffe (diff)
downloadlibnspdf-119b565de393fdd797e1a3c4f629c936092e9091.tar.gz
libnspdf-119b565de393fdd797e1a3c4f629c936092e9091.tar.bz2
parse stream objects, no filters handled yet
-rw-r--r--src/cos_object.c3
-rw-r--r--src/cos_object.h24
-rw-r--r--src/cos_parse.c130
-rw-r--r--src/page.c5
4 files changed, 141 insertions, 21 deletions
diff --git a/src/cos_object.c b/src/cos_object.c
index 80e4431..335e14b 100644
--- a/src/cos_object.c
+++ b/src/cos_object.c
@@ -357,8 +357,9 @@ cos_get_stream(struct nspdf_doc *doc,
struct cos_stream **stream_out)
{
nspdferror res;
-
+ //printf("%p %d\n", cobj, cobj->type);
res = nspdf__xref_get_referenced(doc, &cobj);
+ //printf("%p %d res:%d\n", cobj, cobj->type, res);
if (res == NSPDFERROR_OK) {
if (cobj->type != COS_TYPE_STREAM) {
res = NSPDFERROR_TYPE;
diff --git a/src/cos_object.h b/src/cos_object.h
index 077be3b..8d0f910 100644
--- a/src/cos_object.h
+++ b/src/cos_object.h
@@ -1,18 +1,18 @@
struct nspdf_doc;
enum cos_type {
- COS_TYPE_NULL,
+ COS_TYPE_NULL, /* 0 */
COS_TYPE_BOOL,
COS_TYPE_INT,
COS_TYPE_REAL,
COS_TYPE_NAME,
COS_TYPE_STRING,
- COS_TYPE_ARRAY,
+ COS_TYPE_ARRAY, /* 6 */
COS_TYPE_DICTIONARY,
COS_TYPE_NAMETREE,
COS_TYPE_NUMBERTREE,
COS_TYPE_STREAM,
- COS_TYPE_REFERENCE,
+ COS_TYPE_REFERENCE, /* 11 */
};
struct cos_object;
@@ -37,22 +37,20 @@ struct cos_array_entry {
};
struct cos_string {
- uint8_t *data;
- size_t length;
- size_t alloc;
+ uint8_t *data; /**< string data */
+ size_t length; /**< string length */
+ size_t alloc; /**< memory allocation for string */
};
struct cos_reference {
- /** id of indirect object */
- uint64_t id;
-
- /* generation of indirect object */
- uint64_t generation;
+ uint64_t id; /**< id of indirect object */
+ uint64_t generation; /**< generation of indirect object */
};
struct cos_stream {
- uint8_t *data;
- size_t length;
+ const uint8_t *data; /**< decoded stream data */
+ int64_t length; /**< decoded stream length */
+ size_t alloc; /**< memory allocated for stream */
};
diff --git a/src/cos_parse.c b/src/cos_parse.c
index ca3d802..8fe181c 100644
--- a/src/cos_parse.c
+++ b/src/cos_parse.c
@@ -310,7 +310,7 @@ cos_decode_dictionary(struct nspdf_doc *doc,
if ((DOC_BYTE(doc, offset) != '<') ||
(DOC_BYTE(doc, offset + 1) != '<')) {
- return -1; /* syntax error */
+ return NSPDFERROR_SYNTAX; /* syntax error */
}
offset += 2;
doc_skip_ws(doc, &offset);
@@ -319,7 +319,7 @@ cos_decode_dictionary(struct nspdf_doc *doc,
cosobj = calloc(1, sizeof(struct cos_object));
if (cosobj == NULL) {
- return -1; /* memory error */
+ return NSPDFERROR_NOMEM;
}
cosobj->type = COS_TYPE_DICTIONARY;
@@ -337,7 +337,6 @@ cos_decode_dictionary(struct nspdf_doc *doc,
printf("key was %d not a name %d\n", key->type, COS_TYPE_NAME);
return NSPDFERROR_SYNTAX;
}
- //printf("key: %s\n", key->u.n);
res = cos_parse_object(doc, &offset, &value);
if (res != NSPDFERROR_OK) {
@@ -352,6 +351,7 @@ cos_decode_dictionary(struct nspdf_doc *doc,
/* todo free up any dictionary entries already created */
return NSPDFERROR_NOMEM;
}
+ //printf("key:%s value(type):%d\n", key->u.n, value->type);
entry->key = key;
entry->value = value;
@@ -622,6 +622,110 @@ cos_decode_null(struct nspdf_doc *doc,
return NSPDFERROR_OK;
}
+/**
+ * parse a stream object
+ */
+static nspdferror
+cos_parse_stream(struct nspdf_doc *doc,
+ uint64_t *offset_out,
+ struct cos_object **cosobj_out)
+{
+ struct cos_object *cosobj;
+ nspdferror res;
+ struct cos_object *stream_dict;
+ uint64_t offset;
+ struct cos_object *stream_filter;
+ struct cos_stream *stream;
+
+ offset = *offset_out;
+ stream_dict = *cosobj_out;
+
+ if (stream_dict->type != COS_TYPE_DICTIONARY) {
+ /* cannot be a stream if indirect object is not a dict */
+ return NSPDFERROR_NOTFOUND;
+ }
+
+ if ((DOC_BYTE(doc, offset ) != 's') &&
+ (DOC_BYTE(doc, offset + 1) != 't') &&
+ (DOC_BYTE(doc, offset + 2) != 'r') &&
+ (DOC_BYTE(doc, offset + 1) != 'e') &&
+ (DOC_BYTE(doc, offset + 2) != 'a') &&
+ (DOC_BYTE(doc, offset + 3) != 'm')) {
+ /* no stream marker */
+ return NSPDFERROR_NOTFOUND;
+ }
+ offset += 6;
+ //printf("detected stream\n");
+
+ /* parsed object was a dictionary and there is a stream marker */
+ res = doc_skip_ws(doc, &offset);
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+
+ stream = calloc(1, sizeof(struct cos_stream));
+ if (stream == NULL) {
+ return NSPDFERROR_NOMEM;
+ }
+
+ res = cos_get_dictionary_int(doc, stream_dict, "Length", &stream->length);
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+ //printf("stream length %d\n", stream_length);
+ stream->data = doc->start + offset;
+ stream->alloc = 0; /* stream is pointing at non malloced data */
+
+ offset += stream->length;
+
+ /* possible whitespace after stream data */
+ res = doc_skip_ws(doc, &offset);
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+
+ if ((DOC_BYTE(doc, offset ) != 'e') &&
+ (DOC_BYTE(doc, offset + 1) != 'n') &&
+ (DOC_BYTE(doc, offset + 2) != 'd') &&
+ (DOC_BYTE(doc, offset + 3) != 's') &&
+ (DOC_BYTE(doc, offset + 4) != 't') &&
+ (DOC_BYTE(doc, offset + 5) != 'r') &&
+ (DOC_BYTE(doc, offset + 6) != 'e') &&
+ (DOC_BYTE(doc, offset + 7) != 'a') &&
+ (DOC_BYTE(doc, offset + 8) != 'm')) {
+ /* no endstream marker */
+ return NSPDFERROR_SYNTAX;
+ }
+ offset += 9;
+ //printf("detected endstream\n");
+
+ res = doc_skip_ws(doc, &offset);
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+
+ //printf("returning with offset at %d\n", offset);
+ /* optional filter */
+ res = cos_get_dictionary_value(doc, stream_dict, "Filter", &stream_filter);
+ if (res == NSPDFERROR_OK) {
+ /** \todo filter stream */
+ printf("applying filter %s\n", stream_filter->u.n);
+ }
+
+ /* allocate stream object */
+ cosobj = calloc(1, sizeof(struct cos_object));
+ if (cosobj == NULL) {
+ free(stream);
+ return NSPDFERROR_NOMEM;
+ }
+ cosobj->type = COS_TYPE_STREAM;
+ cosobj->u.stream = stream;
+
+ *cosobj_out = cosobj;
+ *offset_out = offset;
+
+ return NSPDFERROR_OK;
+}
/**
* attempt to decode input data into a reference, indirect or stream object
@@ -719,7 +823,21 @@ cos_attempt_decode_reference(struct nspdf_doc *doc,
cos_free_object(generation);
return res;
}
- //printf("parsed object type %d\nendobj\n",indirect->type);
+
+ /* attempt to parse input as a stream */
+ res = cos_parse_stream(doc, &offset, &indirect);
+ if ((res != NSPDFERROR_OK) &&
+ (res != NSPDFERROR_NOTFOUND)) {
+ cos_free_object(indirect);
+ cos_free_object(generation);
+ return res;
+ }
+
+ /*printf("parsed indirect object num:%d gen:%d type %d\n",
+ (*cosobj_out)->u.i,
+ generation->u.i,
+ indirect->type);
+ */
if ((DOC_BYTE(doc, offset ) != 'e') &&
(DOC_BYTE(doc, offset + 1) != 'n') &&
@@ -732,7 +850,7 @@ cos_attempt_decode_reference(struct nspdf_doc *doc,
return NSPDFERROR_SYNTAX;
}
offset += 6;
- //printf("skipping\n");
+ //printf("endobj\n");
res = doc_skip_ws(doc, &offset);
if (res != NSPDFERROR_OK) {
@@ -746,6 +864,8 @@ cos_attempt_decode_reference(struct nspdf_doc *doc,
*cosobj_out = indirect;
*offset_out = offset;
+
+ //printf("returning object\n");
}
cos_free_object(generation);
diff --git a/src/page.c b/src/page.c
index 5d2a117..a6a9d52 100644
--- a/src/page.c
+++ b/src/page.c
@@ -120,12 +120,13 @@ nspdf__decode_page_tree(struct nspdf_doc *doc,
}
/*
- printf("page index:%d page:%p resources:%p mediabox:%p contents:%p\n",
+ printf("page index:%d page:%p resources:%p mediabox:%p contents:%p contents type:%d\n",
*page_index,
page,
page->resources,
page->mediabox,
- page->contents);
+ page->contents,
+ page->contents->type);
*/
(*page_index)++;