summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVincent Sanders <vince@kyllikki.org>2018-01-20 18:46:41 +0000
committerVincent Sanders <vince@kyllikki.org>2018-01-20 18:46:41 +0000
commit9f0e9af2eeb08abcaa4991ae4e87440dcba2ada1 (patch)
tree6b05ac4bedd3cc13f773e2b2adf03f7522c997c1
parent7967f13f57f08d2b8f38b8c52567d847933b79d8 (diff)
downloadlibnspdf-9f0e9af2eeb08abcaa4991ae4e87440dcba2ada1.tar.gz
libnspdf-9f0e9af2eeb08abcaa4991ae4e87440dcba2ada1.tar.bz2
correctly parse content streams for pages contents
-rw-r--r--include/nspdf/errors.h1
-rw-r--r--src/cos_object.c113
-rw-r--r--src/cos_parse.c142
-rw-r--r--src/cos_parse.h3
-rw-r--r--src/page.c48
-rw-r--r--src/pdf_doc.c19
6 files changed, 206 insertions, 120 deletions
diff --git a/include/nspdf/errors.h b/include/nspdf/errors.h
index f2142ff..6591dbb 100644
--- a/include/nspdf/errors.h
+++ b/include/nspdf/errors.h
@@ -24,6 +24,7 @@ typedef enum {
NSPDFERROR_TYPE, /**< wrong type error */
NSPDFERROR_NOTFOUND, /**< key not found */
NSPDFERROR_FORMAT, /**< objects do not cornform to expected format */
+ NSPDFERROR_INCOMPLETE, /**< operation was not completed */
} nspdferror;
#endif
diff --git a/src/cos_object.c b/src/cos_object.c
index c7ec4e6..7a02ebd 100644
--- a/src/cos_object.c
+++ b/src/cos_object.c
@@ -386,36 +386,111 @@ cos_get_object(struct nspdf_doc *doc,
return res;
}
+/*
+ * exported interface documented in cos_object.h
+ *
+ * slightly different behaviour to other getters:
+ * - This getter can be passed an object pointer to a synthetic parsed content
+ * stream object in which case it returns that objects content operation
+ * list.
+ *
+ * - Alternatively it can be passed a single indirect object reference to a
+ * content stream which will be processed into a filtered stream and then
+ * converted into a parsed content stream which replaces the passed
+ * object. The underlying filtered streams will then be freed.
+ *
+ * - An array of indirect object references to content streams all of which
+ * will be converted as if a single stream of tokens and the result handled
+ * as per the single reference case.
+ */
nspdferror
cos_get_content(struct nspdf_doc *doc,
struct cos_object *cobj,
struct cos_content **content_out)
{
nspdferror res;
- struct cos_object *content_obj;
+ struct cos_object **references;
+ unsigned int reference_count;
+ struct cos_stream **streams;
+ unsigned int index;
+ struct cos_object *content_obj; /* parsed content object */
+ struct cos_object tmpobj;
+
+ /* already parsed the content stream */
+ if (cobj->type == COS_TYPE_CONTENT) {
+ *content_out = cobj->u.content;
+ } else if (cobj->type == COS_TYPE_REFERENCE) {
+ /* single reference */
+ reference_count = 1;
+ references = calloc(reference_count, sizeof(struct cos_object *));
+ if (references == NULL) {
+ return NSPDFERROR_NOMEM;
+ }
- res = nspdf__xref_get_referenced(doc, &cobj);
- if (res == NSPDFERROR_OK) {
- if (cobj->type == COS_TYPE_STREAM) {
- res = cos_parse_content_stream(doc, cobj->u.stream, &content_obj);
- if (res == NSPDFERROR_OK) {
- /* replace stream object with parsed content operations */
- struct cos_object tmpobj;
- tmpobj = *cobj;
- *cobj = *content_obj;
- *content_obj = tmpobj;
- cos_free_object(content_obj);
-
- *content_out = cobj->u.content;
+ *references = cobj;
+ } else if (cobj->type == COS_TYPE_ARRAY) {
+ /* array of references */
+ reference_count = cobj->u.array->length;
+ references = malloc(reference_count * sizeof(struct cos_object *));
+ if (references == NULL) {
+ return NSPDFERROR_NOMEM;
+ }
+ memcpy(references, cobj->u.array->values, reference_count * sizeof(struct cos_object *));
+ /* check all objects in array are references */
+ for (index = 0; index < reference_count ; index++) {
+ if ((*(references + index))->type != COS_TYPE_REFERENCE) {
+ free(references);
+ return NSPDFERROR_TYPE;
}
- } else if (cobj->type == COS_TYPE_CONTENT) {
- *content_out = cobj->u.content;
- } else {
- res = NSPDFERROR_TYPE;
}
+ } else {
+ return NSPDFERROR_TYPE;
}
- return res;
+
+ /* obtain array of streams */
+ streams = malloc(reference_count * sizeof(struct cos_stream *));
+ if (streams == NULL) {
+ free(references);
+ return NSPDFERROR_TYPE;
+ }
+
+ for (index = 0; index < reference_count ; index++) {
+ struct cos_object *stream_obj;
+
+ stream_obj = *(references + index);
+ res = nspdf__xref_get_referenced(doc, &stream_obj);
+ if (res != NSPDFERROR_OK) {
+ free(references);
+ free(streams);
+ return res;
+ }
+ if (stream_obj->type != COS_TYPE_STREAM) {
+ free(references);
+ free(streams);
+ return NSPDFERROR_TYPE;
+ }
+ *(streams + index) = stream_obj->u.stream;;
+ }
+
+ res = cos_parse_content_streams(doc, streams, reference_count, &content_obj);
+ if (res != NSPDFERROR_OK) {
+ free(references);
+ free(streams);
+ return res;
+ }
+
+ /* replace passed object with parsed content operations object */
+ tmpobj = *cobj;
+ *cobj = *content_obj;
+ *content_obj = tmpobj;
+ cos_free_object(content_obj);
+
+ /** \todo call nspdf__xref_free_referenced(doc, *(references + index)); to free up storage associated with already parsed streams */
+
+ *content_out = cobj->u.content;
+
+ return NSPDFERROR_OK;
}
/*
diff --git a/src/cos_parse.c b/src/cos_parse.c
index 5ccd171..46282ca 100644
--- a/src/cos_parse.c
+++ b/src/cos_parse.c
@@ -1115,6 +1115,10 @@ parse_operator(struct cos_stream *stream,
offset = *offset_out;
+ if (offset >= stream->length) {
+ return NSPDFERROR_SYNTAX;
+ }
+
/* first char */
c = stream_byte(stream, offset);
if ((bclass[c] & (BC_WSPC | BC_CMNT) ) != 0) {
@@ -1125,13 +1129,15 @@ parse_operator(struct cos_stream *stream,
offset++;
/* possible second char */
c = stream_byte(stream, offset);
- if ((bclass[c] & (BC_WSPC | BC_CMNT) ) == 0) {
+ if ((offset < stream->length) &&
+ ((bclass[c] & (BC_WSPC | BC_CMNT) ) == 0)) {
lookup = (lookup << 8) | c;
offset++;
/* possible third char */
c = stream_byte(stream, offset);
- if ((bclass[c] & (BC_WSPC | BC_CMNT) ) == 0) {
+ if ((offset < stream->length) &&
+ ((bclass[c] & (BC_WSPC | BC_CMNT) ) == 0)) {
lookup = (lookup << 8) | c;
offset++;
@@ -1241,55 +1247,60 @@ parse_operator(struct cos_stream *stream,
#define MAX_OPERAND_COUNT 32
-static nspdferror
+static inline nspdferror
parse_content_operation(struct nspdf_doc *doc,
struct cos_stream *stream,
strmoff_t *offset_out,
+ struct cos_object **operands,
+ unsigned int *operand_idx,
struct content_operation *operation_out)
{
strmoff_t offset;
nspdferror res;
enum content_operator operator;
- struct cos_object *operands[MAX_OPERAND_COUNT];
- unsigned int operand_idx = 0;
offset = *offset_out;
res = parse_operator(stream, &offset, &operator);
while (res == NSPDFERROR_SYNTAX) {
/* was not an operator so check for what else it could have been */
- if (operand_idx >= MAX_OPERAND_COUNT) {
+ if (*operand_idx >= MAX_OPERAND_COUNT) {
/** \todo free any stacked operands */
printf("too many operands\n");
return NSPDFERROR_SYNTAX;
}
+ if (offset >= stream->length) {
+ *offset_out = offset;
+ return NSPDFERROR_INCOMPLETE;
+ }
+
switch (stream_byte(stream, offset)) {
case '-': case '+': case '.': case '0': case '1': case '2': case '3':
case '4': case '5': case '6': case '7': case '8': case '9':
- res = cos_parse_number(stream, &offset, &operands[operand_idx]);
+ res = cos_parse_number(stream, &offset, &operands[*operand_idx]);
break;
case 't':
case 'f':
- res = cos_parse_boolean(stream, &offset, &operands[operand_idx]);
+ res = cos_parse_boolean(stream, &offset, &operands[*operand_idx]);
break;
case 'n':
- res = cos_parse_null(stream, &offset, &operands[operand_idx]);
+ res = cos_parse_null(stream, &offset, &operands[*operand_idx]);
break;
case '(':
- res = cos_parse_string(stream, &offset, &operands[operand_idx]);
+ res = cos_parse_string(stream, &offset, &operands[*operand_idx]);
break;
case '/':
- res = cos_parse_name(stream, &offset, &operands[operand_idx]);
+ res = cos_parse_name(stream, &offset, &operands[*operand_idx]);
break;
case '[':
- res = cos_parse_list(doc, stream, &offset, &operands[operand_idx]);
+ res = cos_parse_list(doc, stream, &offset, &operands[*operand_idx]);
break;
case '<':
@@ -1297,16 +1308,23 @@ parse_content_operation(struct nspdf_doc *doc,
res = cos_parse_dictionary(doc,
stream,
&offset,
- &operands[operand_idx]);
+ &operands[*operand_idx]);
} else {
res = cos_parse_hex_string(stream,
&offset,
- &operands[operand_idx]);
+ &operands[*operand_idx]);
}
break;
default:
- printf("unknown operand type\n");
+ printf("unknown operand with %d operands %d to %d of %d\n>>>%.*s<<<\n",
+ *operand_idx,
+ (*offset_out),
+ offset,
+ stream->length,
+ (offset + 1) - (*offset_out),
+ stream->data + (*offset_out));
+
res = NSPDFERROR_SYNTAX; /* syntax error */
}
@@ -1319,26 +1337,44 @@ parse_content_operation(struct nspdf_doc *doc,
}
/* move to next operand */
- operand_idx++;
+ (*operand_idx)++;
res = parse_operator(stream, &offset, &operator);
}
operation_out->operator = operator;
- //printf("returning operator %d with %d operands\n", operator, operand_idx);
+
+ /*
+ printf("returning operator %d with %d operands %d to %d of %d\n>>>%.*s<<<\n",
+ operator,
+ *operand_idx,
+ (*offset_out),
+ offset,
+ stream->length,
+ offset - (*offset_out),
+ stream->data + (*offset_out));
+ */
+
+ *operand_idx = 0;
*offset_out = offset;
+
return NSPDFERROR_OK;
}
nspdferror
-cos_parse_content_stream(struct nspdf_doc *doc,
- struct cos_stream *stream,
- struct cos_object **content_out)
+cos_parse_content_streams(struct nspdf_doc *doc,
+ struct cos_stream **streams,
+ unsigned int stream_count,
+ struct cos_object **content_out)
{
nspdferror res;
struct cos_object *cosobj;
strmoff_t offset;
+ struct cos_stream *stream;
+ unsigned int stream_index;
+ struct cos_object *operands[MAX_OPERAND_COUNT];
+ unsigned int operand_idx = 0;
//printf("%.*s", (int)stream->length, stream->data);
@@ -1354,42 +1390,50 @@ cos_parse_content_stream(struct nspdf_doc *doc,
goto cos_parse_content_stream_error;
}
- offset = 0;
+ for (stream_index = 0; stream_index < stream_count; stream_index++) {
+ stream = *(streams + stream_index);
+ offset = 0;
- /* skip any leading whitespace */
- res = nspdf__stream_skip_ws(stream, &offset);
- if (res != NSPDFERROR_OK) {
- goto cos_parse_content_stream_error;
- }
+ /* skip any leading whitespace */
+ res = nspdf__stream_skip_ws(stream, &offset);
+ if (res != NSPDFERROR_OK) {
+ goto cos_parse_content_stream_error;
+ }
+
+ while (offset < stream->length) {
+
+ /* ensure there is space in the operations array */
+ if (cosobj->u.content->alloc < (cosobj->u.content->length + 1)) {
+ struct content_operation *newops;
+ newops = realloc(cosobj->u.content->operations,
+ sizeof(struct content_operation) *
+ (cosobj->u.content->alloc + 32));
+ if (newops == NULL) {
+ res = NSPDFERROR_NOMEM;
+ goto cos_parse_content_stream_error;
+ }
+ cosobj->u.content->operations = newops;
+ cosobj->u.content->alloc += 32;
+ }
- while (offset < stream->length) {
- struct content_operation cop;
-
- /* ensure there is space in the operations array */
- if (cosobj->u.content->alloc < (cosobj->u.content->length + 1)) {
- struct content_operation *newops;
- newops = realloc(cosobj->u.content->operations,
- sizeof(struct content_operation) *
- (cosobj->u.content->alloc + 32));
- if (newops == NULL) {
- res = NSPDFERROR_NOMEM;
+ /* parse an operation out */
+ res = parse_content_operation(
+ doc,
+ stream,
+ &offset,
+ operands,
+ &operand_idx,
+ cosobj->u.content->operations + cosobj->u.content->length);
+ if (res== NSPDFERROR_OK) {
+ cosobj->u.content->length++;
+ } else if (res == NSPDFERROR_INCOMPLETE) {
+ //printf("Incomplete\n");
+ } else if (res != NSPDFERROR_OK) {
goto cos_parse_content_stream_error;
}
- cosobj->u.content->operations = newops;
- cosobj->u.content->alloc += 32;
- }
- res = parse_content_operation(
- doc,
- stream,
- &offset,
- cosobj->u.content->operations + cosobj->u.content->length);
- if (res != NSPDFERROR_OK) {
- goto cos_parse_content_stream_error;
}
- cosobj->u.content->length++;
}
-
*content_out = cosobj;
return NSPDFERROR_OK;
diff --git a/src/cos_parse.h b/src/cos_parse.h
index a9cb9c9..a6a65ca 100644
--- a/src/cos_parse.h
+++ b/src/cos_parse.h
@@ -30,6 +30,7 @@ nspdferror cos_parse_object(struct nspdf_doc *doc, struct cos_stream *stream, st
/**
* Parse content stream into content operations object
*/
-nspdferror cos_parse_content_stream(struct nspdf_doc *doc, struct cos_stream *stream, struct cos_object **content_out);
+nspdferror cos_parse_content_streams(struct nspdf_doc *doc, struct cos_stream **streams, unsigned int stream_count, struct cos_object **content_out);
+
#endif
diff --git a/src/page.c b/src/page.c
index 3844122..5299c7c 100644
--- a/src/page.c
+++ b/src/page.c
@@ -146,62 +146,20 @@ nspdf_page_count(struct nspdf_doc *doc, unsigned int *pages_out)
return NSPDFERROR_OK;
}
-static nspdferror
-nspdf__render_content_stream(struct nspdf_doc *doc,
- struct page_table_entry *page_entry,
- struct cos_object *content_entry)
-{
- nspdferror res;
- struct cos_content *content_operations;
-
- res = cos_get_content(doc, content_entry, &content_operations);
- if (res == NSPDFERROR_OK) {
- printf("%p\n", content_operations);
- }
-
- return res;
-}
/* exported interface documented in nspdf/page.h */
nspdferror
nspdf_page_render(struct nspdf_doc *doc, unsigned int page_number)
{
struct page_table_entry *page_entry;
- struct cos_object *content_array;
+ struct cos_content *page_content; /* page operations array */
nspdferror res;
page_entry = doc->page_table + page_number;
- /* contents may be an array of stream objects or just a single one */
- res = cos_get_array(doc, page_entry->contents, &content_array);
+ res = cos_get_content(doc, page_entry->contents, &page_content);
if (res == NSPDFERROR_OK) {
- unsigned int content_stream_count;
- unsigned int content_stream_index;
-
- res = cos_get_array_size(doc, content_array, &content_stream_count);
- if (res != NSPDFERROR_OK) {
- return res;
- }
- for (content_stream_index = 0;
- content_stream_index < content_stream_count;
- content_stream_index++) {
- struct cos_object *content_entry;
- res = cos_get_array_value(doc,
- content_array,
- content_stream_index,
- &content_entry);
- if (res != NSPDFERROR_OK) {
- break;
- }
-
- res = nspdf__render_content_stream(doc, page_entry, content_entry);
- if (res != NSPDFERROR_OK) {
- break;
- }
- }
- } else if (res == NSPDFERROR_TYPE) {
- res = nspdf__render_content_stream(doc, page_entry, page_entry->contents);
+ printf("%p\n", page_content);
}
-
return res;
}
diff --git a/src/pdf_doc.c b/src/pdf_doc.c
index d7c7a0e..3e55e16 100644
--- a/src/pdf_doc.c
+++ b/src/pdf_doc.c
@@ -23,19 +23,26 @@ nspdferror
nspdf__stream_skip_ws(struct cos_stream *stream, strmoff_t *offset)
{
uint8_t c;
- /* TODO sort out keeping offset in range */
+
+ if ((*offset) >= stream->length) {
+ return NSPDFERROR_OK;
+ }
+
c = stream_byte(stream, *offset);
- while ((bclass[c] & (BC_WSPC | BC_CMNT) ) != 0) {
+ while (((*offset) < stream->length) &&
+ ((bclass[c] & (BC_WSPC | BC_CMNT) ) != 0)) {
(*offset)++;
/* skip comments */
- if ((bclass[c] & BC_CMNT) != 0) {
+ if (((*offset) < stream->length) &&
+ ((bclass[c] & BC_CMNT) != 0)) {
c = stream_byte(stream, *offset);
- while ((bclass[c] & BC_EOLM ) == 0) {
+ while ((*offset < stream->length) &&
+ ((bclass[c] & BC_EOLM ) == 0)) {
(*offset)++;
- c = stream_byte(stream, *offset);
+ c = stream_byte(stream, (*offset));
}
}
- c = stream_byte(stream, *offset);
+ c = stream_byte(stream, (*offset));
}
return NSPDFERROR_OK;
}