summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVincent Sanders <vince@kyllikki.org>2018-01-18 00:21:10 +0000
committerVincent Sanders <vince@kyllikki.org>2018-01-18 00:21:10 +0000
commitb1e0e4414ecd3161c0f947daceb8643b5889e51c (patch)
treeab63f66b3651856f3528bb51e9acf522d0854521
parentc27eb52f96f8070c4be77a387e603508fc4092ce (diff)
downloadlibnspdf-b1e0e4414ecd3161c0f947daceb8643b5889e51c.tar.gz
libnspdf-b1e0e4414ecd3161c0f947daceb8643b5889e51c.tar.bz2
start to alter parseing to read from cos_stream object
-rw-r--r--include/nspdf/document.h2
-rw-r--r--src/cos_object.c49
-rw-r--r--src/cos_object.h51
-rw-r--r--src/cos_parse.c79
-rw-r--r--src/cos_parse.h8
-rw-r--r--src/document.c13
-rw-r--r--src/page.c38
-rw-r--r--src/pdf_doc.c14
-rw-r--r--src/pdf_doc.h24
-rw-r--r--src/xref.c8
10 files changed, 212 insertions, 74 deletions
diff --git a/include/nspdf/document.h b/include/nspdf/document.h
index d7cbb0f..3c222cf 100644
--- a/include/nspdf/document.h
+++ b/include/nspdf/document.h
@@ -40,7 +40,7 @@ nspdferror nspdf_document_destroy(struct nspdf_doc *doc);
* ready to render pages. The passed buffer ownership is transfered and must
* not be altered untill the document is destroyed.
*/
-nspdferror nspdf_document_parse(struct nspdf_doc *doc, const uint8_t *buffer, uint64_t buffer_length);
+nspdferror nspdf_document_parse(struct nspdf_doc *doc, const uint8_t *buffer, unsigned int buffer_length);
#endif /* NSPDF_DOCUMENT_H_ */
diff --git a/src/cos_object.c b/src/cos_object.c
index 3dc5efa..4398822 100644
--- a/src/cos_object.c
+++ b/src/cos_object.c
@@ -17,6 +17,7 @@
#include <nspdf/errors.h>
#include "cos_object.h"
+#include "cos_parse.h"
#include "pdf_doc.h"
@@ -369,6 +370,54 @@ cos_get_stream(struct nspdf_doc *doc,
/*
+ * get object from object reference
+ */
+nspdferror
+cos_get_object(struct nspdf_doc *doc,
+ struct cos_object *cobj,
+ struct cos_object **value_out)
+{
+ nspdferror res;
+ res = nspdf__xref_get_referenced(doc, &cobj);
+ if (res == NSPDFERROR_OK) {
+ *value_out = cobj;
+ }
+ return res;
+}
+
+
+nspdferror
+cos_get_content(struct nspdf_doc *doc,
+ struct cos_object *cobj,
+ struct cos_content **content_out)
+{
+ nspdferror res;
+ struct cos_object *content_obj;
+
+ res = nspdf__xref_get_referenced(doc, &cobj);
+ if (res == NSPDFERROR_OK) {
+ if (cobj->type == COS_TYPE_STREAM) {
+ res = cos_parse_content_stream(doc, cobj->u.stream, &content_obj);
+ if (res == NSPDFERROR_OK) {
+ /* replace stream object with parsed content operations */
+ struct cos_object tmpobj;
+ tmpobj = *cobj;
+ *cobj = *content_obj;
+ *content_obj = tmpobj;
+ cos_free_object(content_obj);
+
+ *content_out = cobj->u.content;
+ }
+ } else if (cobj->type == COS_TYPE_CONTENT) {
+ *content_out = cobj->u.content;
+ } else {
+ res = NSPDFERROR_TYPE;
+ }
+ }
+ return res;
+}
+
+/*
* get a value for a key from a dictionary
*/
nspdferror
diff --git a/src/cos_object.h b/src/cos_object.h
index 2e763e2..9b98694 100644
--- a/src/cos_object.h
+++ b/src/cos_object.h
@@ -16,6 +16,7 @@
#define NSPDF__COS_OBJECT_H_
struct nspdf_doc;
+struct content_operation;
enum cos_type {
COS_TYPE_NULL, /* 0 */
@@ -30,6 +31,7 @@ enum cos_type {
COS_TYPE_NUMBERTREE,
COS_TYPE_STREAM,
COS_TYPE_REFERENCE, /* 11 */
+ COS_TYPE_CONTENT, /* 12 - parsed content stream */
};
struct cos_object;
@@ -59,10 +61,13 @@ struct cos_array {
struct cos_object **values;
};
+/**
+ * COS string data
+ */
struct cos_string {
- uint8_t *data; /**< string data */
- size_t length; /**< string length */
+ unsigned int length; /**< string length */
size_t alloc; /**< memory allocation for string */
+ uint8_t *data; /**< string data */
};
struct cos_reference {
@@ -71,12 +76,22 @@ struct cos_reference {
};
struct cos_stream {
- const uint8_t *data; /**< decoded stream data */
- int64_t length; /**< decoded stream length */
+ unsigned int length; /**< decoded stream length */
size_t alloc; /**< memory allocated for stream */
+ const uint8_t *data; /**< decoded stream data */
};
+/**
+ * Synthetic parsed content object.
+ *
+ */
+struct cos_content {
+ unsigned int length; /**< number of content operations */
+ unsigned int alloc; /**< number of allocated operations */
+ struct content_operation *operations;
+};
+
struct cos_object {
int type;
union {
@@ -107,6 +122,8 @@ struct cos_object {
/** reference */
struct cos_reference *reference;
+ /** parsed content stream */
+ struct cos_content *content;
} u;
};
@@ -264,5 +281,31 @@ nspdferror cos_get_array(struct nspdf_doc *doc, struct cos_object *cobj, struct
*/
nspdferror cos_get_stream(struct nspdf_doc *doc, struct cos_object *cobj, struct cos_stream **stream_out);
+/**
+ * get a direct cos object.
+ *
+ * Obtain a direct object if the passed object was a reference it is
+ * dereferenced from the cross reference table.
+ *
+ * \param doc The document the cos object belongs to.
+ * \param cobj A cos object.
+ * \param object_out The result object.
+ * \return NSERROR_OK and \p object_out updated,
+ */
+nspdferror cos_get_object(struct nspdf_doc *doc, struct cos_object *cobj, struct cos_object **object_out);
+
+/**
+ * get a parsed content object
+ *
+ * Get the parsed content from a cos object, if the object is an object
+ * reference it will be dereferenced first.
+ * The parsed content object is *not* a normal COS object rather it is the
+ * internal result of parsing a PDF content stream.
+ * This object type is used to replace the stream object in the cross reference
+ * table after its initial parse to avoid the need to keep and repeatedly
+ * parse the filtered stream data.
+ *
+ */
+nspdferror cos_get_content(struct nspdf_doc *doc, struct cos_object *cobj, struct cos_content **content_out);
#endif
diff --git a/src/cos_parse.c b/src/cos_parse.c
index 21ba0d7..c196019 100644
--- a/src/cos_parse.c
+++ b/src/cos_parse.c
@@ -57,11 +57,15 @@ static uint8_t xtoi(uint8_t x)
return x;
}
+/**
+ * parse a number
+ */
static nspdferror
-cos_decode_number(struct nspdf_doc *doc,
- uint64_t *offset_out,
- struct cos_object **cosobj_out)
+cos_parse_number(struct cos_stream *stream,
+ uint64_t *offset_out,
+ struct cos_object **cosobj_out)
{
+ nspdferror res;
struct cos_object *cosobj;
uint8_t c; /* current byte from source data */
unsigned int len; /* number of decimal places in number */
@@ -71,7 +75,7 @@ cos_decode_number(struct nspdf_doc *doc,
offset = *offset_out;
for (len = 0; len < sizeof(num); len++) {
- c = DOC_BYTE(doc, offset);
+ c = stream_byte(stream, offset);
if ((bclass[c] & BC_DCML) != BC_DCML) {
int64_t result = 0; /* parsed result */
uint64_t tens;
@@ -85,7 +89,10 @@ cos_decode_number(struct nspdf_doc *doc,
result += (num[len - 1] * tens);
}
- doc_skip_ws(doc, &offset);
+ res = nspdf__stream_skip_ws(stream, &offset);
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
cosobj = calloc(1, sizeof(struct cos_object));
if (cosobj == NULL) {
@@ -230,7 +237,7 @@ cos_decode_string(struct nspdf_doc *doc,
cos_string_append(cstring, c);
}
- doc_skip_ws(doc, &offset);
+ nspdf__stream_skip_ws(doc->stream, &offset);
*cosobj_out = cosobj;
*offset_out = offset;
@@ -279,7 +286,7 @@ cos_decode_hex_string(struct nspdf_doc *doc,
cos_string_append(cstring, value);
}
offset++;
- doc_skip_ws(doc, &offset);
+ nspdf__stream_skip_ws(doc->stream, &offset);
*cosobj_out = cosobj;
*offset_out = offset;
@@ -323,7 +330,7 @@ cos_decode_dictionary(struct nspdf_doc *doc,
return NSPDFERROR_SYNTAX; /* syntax error */
}
offset += 2;
- doc_skip_ws(doc, &offset);
+ nspdf__stream_skip_ws(doc->stream, &offset);
//printf("found a dictionary\n");
@@ -371,7 +378,7 @@ cos_decode_dictionary(struct nspdf_doc *doc,
}
offset += 2; /* skip closing >> */
- doc_skip_ws(doc, &offset);
+ nspdf__stream_skip_ws(doc->stream, &offset);
*cosobj_out = cosobj;
*offset_out = offset;
@@ -403,7 +410,7 @@ cos_parse_list(struct nspdf_doc *doc,
offset++;
/* advance offset to next token */
- res = doc_skip_ws(doc, &offset);
+ res = nspdf__stream_skip_ws(doc->stream, &offset);
if (res != NSPDFERROR_OK) {
return res;
}
@@ -449,7 +456,7 @@ cos_parse_list(struct nspdf_doc *doc,
}
offset++; /* skip closing ] */
- doc_skip_ws(doc, &offset);
+ nspdf__stream_skip_ws(doc->stream, &offset);
*cosobj_out = cosobj;
*offset_out = offset;
@@ -499,7 +506,7 @@ cos_decode_name(struct nspdf_doc *doc,
//printf("name: %s\n", name);
- doc_skip_ws(doc, &offset);
+ nspdf__stream_skip_ws(doc->stream, &offset);
cosobj = calloc(1, sizeof(struct cos_object));
if (cosobj == NULL) {
@@ -575,7 +582,7 @@ cos_decode_boolean(struct nspdf_doc *doc,
return -1; /* syntax error */
}
- doc_skip_ws(doc, &offset);
+ nspdf__stream_skip_ws(doc->stream, &offset);
cosobj = calloc(1, sizeof(struct cos_object));
if (cosobj == NULL) {
@@ -626,7 +633,7 @@ cos_decode_null(struct nspdf_doc *doc,
return -1; /* syntax error */
}
- doc_skip_ws(doc, &offset);
+ nspdf__stream_skip_ws(doc->stream, &offset);
cosobj = calloc(1, sizeof(struct cos_object));
if (cosobj == NULL) {
@@ -656,6 +663,7 @@ cos_parse_stream(struct nspdf_doc *doc,
uint64_t offset;
struct cos_object *stream_filter;
struct cos_stream *stream;
+ int64_t stream_length;
offset = *offset_out;
stream_dict = *cosobj_out;
@@ -678,7 +686,7 @@ cos_parse_stream(struct nspdf_doc *doc,
//printf("detected stream\n");
/* parsed object was a dictionary and there is a stream marker */
- res = doc_skip_ws(doc, &offset);
+ res = nspdf__stream_skip_ws(doc->stream, &offset);
if (res != NSPDFERROR_OK) {
return res;
}
@@ -688,10 +696,15 @@ cos_parse_stream(struct nspdf_doc *doc,
return NSPDFERROR_NOMEM;
}
- res = cos_get_dictionary_int(doc, stream_dict, "Length", &stream->length);
+ res = cos_get_dictionary_int(doc, stream_dict, "Length", &stream_length);
if (res != NSPDFERROR_OK) {
return res;
}
+ if (stream_length < 0) {
+ return NSPDFERROR_RANGE;
+ }
+ stream->length = stream_length;
+
//printf("stream length %d\n", stream_length);
stream->data = doc->start + offset;
stream->alloc = 0; /* stream is pointing at non malloced data */
@@ -699,7 +712,7 @@ cos_parse_stream(struct nspdf_doc *doc,
offset += stream->length;
/* possible whitespace after stream data */
- res = doc_skip_ws(doc, &offset);
+ res = nspdf__stream_skip_ws(doc->stream, &offset);
if (res != NSPDFERROR_OK) {
return res;
}
@@ -719,7 +732,7 @@ cos_parse_stream(struct nspdf_doc *doc,
offset += 9;
//printf("detected endstream\n");
- res = doc_skip_ws(doc, &offset);
+ res = nspdf__stream_skip_ws(doc->stream, &offset);
if (res != NSPDFERROR_OK) {
return res;
}
@@ -785,7 +798,7 @@ cos_attempt_decode_reference(struct nspdf_doc *doc,
offset = *offset_out;
- res = cos_decode_number(doc, &offset, &generation);
+ res = cos_parse_number(doc->stream, &offset, &generation);
if (res != NSPDFERROR_OK) {
/* no error if next token could not be decoded as a number */
return NSPDFERROR_OK;
@@ -813,7 +826,7 @@ cos_attempt_decode_reference(struct nspdf_doc *doc,
//printf("found object reference\n");
offset ++;
- doc_skip_ws(doc, &offset);
+ nspdf__stream_skip_ws(doc->stream, &offset);
nref = calloc(1, sizeof(struct cos_reference));
if (nref == NULL) {
@@ -839,7 +852,7 @@ cos_attempt_decode_reference(struct nspdf_doc *doc,
//printf("indirect\n");
offset += 3;
- res = doc_skip_ws(doc, &offset);
+ res = nspdf__stream_skip_ws(doc->stream, &offset);
if (res != NSPDFERROR_OK) {
cos_free_object(generation);
return res;
@@ -880,7 +893,7 @@ cos_attempt_decode_reference(struct nspdf_doc *doc,
offset += 6;
//printf("endobj\n");
- res = doc_skip_ws(doc, &offset);
+ res = nspdf__stream_skip_ws(doc->stream, &offset);
if (res != NSPDFERROR_OK) {
cos_free_object(indirect);
cos_free_object(generation);
@@ -980,7 +993,7 @@ cos_parse_object(struct nspdf_doc *doc,
case '7':
case '8':
case '9':
- res = cos_decode_number(doc, &offset, &cosobj);
+ res = cos_parse_number(doc->stream, &offset, &cosobj);
/* if type is positive integer try to check for reference */
if ((res == 0) &&
(cosobj->type == COS_TYPE_INT) &&
@@ -1032,3 +1045,23 @@ cos_parse_object(struct nspdf_doc *doc,
return res;
}
+
+nspdferror
+cos_parse_content_stream(struct nspdf_doc *doc,
+ struct cos_stream *stream,
+ struct cos_object **content_out)
+{
+ struct cos_object *cosobj;
+
+ printf("%.*s", (int)stream->length, stream->data);
+
+ cosobj = calloc(1, sizeof(struct cos_object));
+ if (cosobj == NULL) {
+ return NSPDFERROR_NOMEM;
+ }
+ cosobj->type = COS_TYPE_CONTENT;
+
+ *content_out = cosobj;
+
+ return NSPDFERROR_OK;
+}
diff --git a/src/cos_parse.h b/src/cos_parse.h
index 0bca79f..8f48108 100644
--- a/src/cos_parse.h
+++ b/src/cos_parse.h
@@ -17,12 +17,18 @@
struct nspdf_doc;
struct cos_object;
+struct cos_stream;
/**
- * Decode input stream into an object
+ * Parse input stream into an object
*
* lex and parse a byte stream to generate a COS object.
*/
nspdferror cos_parse_object(struct nspdf_doc *doc, uint64_t *offset_out, struct cos_object **cosobj_out);
+/**
+ * Parse content stream into content operations object
+ */
+nspdferror cos_parse_content_stream(struct nspdf_doc *doc, struct cos_stream *stream, struct cos_object **content_out);
+
#endif
diff --git a/src/document.c b/src/document.c
index bbe948d..b7a36d2 100644
--- a/src/document.c
+++ b/src/document.c
@@ -92,7 +92,7 @@ decode_startxref(struct nspdf_doc *doc,
}
offset += 9;
- res = doc_skip_ws(doc, &offset);
+ res = nspdf__stream_skip_ws(doc->stream, &offset);
if (res != NSPDFERROR_OK) {
return res;
}
@@ -168,7 +168,7 @@ decode_trailer(struct nspdf_doc *doc,
return -1;
}
offset += 7;
- doc_skip_ws(doc, &offset);
+ nspdf__stream_skip_ws(doc->stream, &offset);
res = cos_parse_object(doc, &offset, &trailer);
if (res != 0) {
@@ -422,13 +422,20 @@ static nspdferror check_header(struct nspdf_doc *doc)
nspdferror
nspdf_document_parse(struct nspdf_doc *doc,
const uint8_t *buffer,
- uint64_t buffer_length)
+ unsigned int buffer_length)
{
nspdferror res;
doc->start = buffer;
doc->length = buffer_length;
+ doc->stream = calloc(1, sizeof(struct cos_stream));
+ if (doc->stream == NULL) {
+ return NSPDFERROR_NOMEM;
+ }
+ doc->stream->data = buffer;
+ doc->stream->length = buffer_length;
+
res = check_header(doc);
if (res != 0) {
printf("header check failed\n");
diff --git a/src/page.c b/src/page.c
index acc97d7..7b6bee8 100644
--- a/src/page.c
+++ b/src/page.c
@@ -148,10 +148,18 @@ nspdf_page_count(struct nspdf_doc *doc, unsigned int *pages_out)
static nspdferror
nspdf__render_content_stream(struct nspdf_doc *doc,
- struct cos_stream *content_stream)
+ struct page_table_entry *page_entry,
+ struct cos_object *content_entry)
{
- printf("%.*s", (int)content_stream->length, content_stream->data);
- return NSPDFERROR_OK;
+ nspdferror res;
+ struct cos_content *content_operations;
+
+ res = cos_get_content(doc, content_entry, &content_operations);
+ if (res == NSPDFERROR_OK) {
+ printf("%p", content_operations);
+ }
+
+ return res;
}
/* exported interface documented in nspdf/page.h */
@@ -160,7 +168,6 @@ nspdf_page_render(struct nspdf_doc *doc, unsigned int page_number)
{
struct page_table_entry *page_entry;
struct cos_object *content_array;
- struct cos_stream *content_stream;
nspdferror res;
page_entry = doc->page_table + page_number;
@@ -184,31 +191,16 @@ nspdf_page_render(struct nspdf_doc *doc, unsigned int page_number)
content_stream_index,
&content_entry);
if (res != NSPDFERROR_OK) {
- return res;
- }
-
- res = cos_get_stream(doc, content_entry, &content_stream);
- if (res != NSPDFERROR_OK) {
- return res;
+ break;
}
- res = nspdf__render_content_stream(doc, content_stream);
+ res = nspdf__render_content_stream(doc, page_entry, content_entry);
if (res != NSPDFERROR_OK) {
- return res;
+ break;
}
}
} else if (res == NSPDFERROR_TYPE) {
- res = cos_get_stream(doc, page_entry->contents, &content_stream);
- if (res != NSPDFERROR_OK) {
- return res;
- }
-
- res = nspdf__render_content_stream(doc, content_stream);
- if (res != NSPDFERROR_OK) {
- return res;
- }
- } else {
- return res;
+ res = nspdf__render_content_stream(doc, page_entry, page_entry->contents);
}
return res;
diff --git a/src/pdf_doc.c b/src/pdf_doc.c
index 997a3d7..955f737 100644
--- a/src/pdf_doc.c
+++ b/src/pdf_doc.c
@@ -19,29 +19,27 @@
#include "cos_object.h"
#include "pdf_doc.h"
-/**
- * move offset to next non whitespace byte
- */
-nspdferror doc_skip_ws(struct nspdf_doc *doc, uint64_t *offset)
+nspdferror nspdf__stream_skip_ws(struct cos_stream *stream, uint64_t *offset)
{
uint8_t c;
/* TODO sort out keeping offset in range */
- c = DOC_BYTE(doc, *offset);
+ c = stream_byte(stream, *offset);
while ((bclass[c] & (BC_WSPC | BC_CMNT) ) != 0) {
(*offset)++;
/* skip comments */
if ((bclass[c] & BC_CMNT) != 0) {
- c = DOC_BYTE(doc, *offset);
+ c = stream_byte(stream, *offset);
while ((bclass[c] & BC_EOLM ) == 0) {
(*offset)++;
- c = DOC_BYTE(doc, *offset);
+ c = stream_byte(stream, *offset);
}
}
- c = DOC_BYTE(doc, *offset);
+ c = stream_byte(stream, *offset);
}
return NSPDFERROR_OK;
}
+
/**
* move offset to next non eol byte
*/
diff --git a/src/pdf_doc.h b/src/pdf_doc.h
index e362ea6..27a730a 100644
--- a/src/pdf_doc.h
+++ b/src/pdf_doc.h
@@ -18,11 +18,18 @@
struct xref_table_entry;
struct page_table_entry;
-/** pdf document */
+/**
+ * pdf document
+ */
struct nspdf_doc {
const uint8_t *start; /* start of pdf document in input stream */
- uint64_t length;
+ unsigned int length;
+
+ /**
+ * input data stream
+ */
+ struct cos_stream *stream;
int major;
int minor;
@@ -46,8 +53,14 @@ struct nspdf_doc {
/* byte data acessory, allows for more complex buffer handling in future */
#define DOC_BYTE(doc, offset) (doc->start[(offset)])
+static inline uint8_t
+stream_byte(struct cos_stream *stream, unsigned int offset)
+{
+ return *(stream->data + offset);
+}
+
/* helpers in pdf_doc.c */
-nspdferror doc_skip_ws(struct nspdf_doc *doc, uint64_t *offset);
+nspdferror nspdf__stream_skip_ws(struct cos_stream *stream, uint64_t *offset);
nspdferror doc_skip_eol(struct nspdf_doc *doc, uint64_t *offset);
nspdferror doc_read_uint(struct nspdf_doc *doc, uint64_t *offset_out, uint64_t *result_out);
@@ -68,9 +81,6 @@ nspdferror nspdf__xref_allocate(struct nspdf_doc *doc, int64_t size);
nspdferror nspdf__decode_page_tree(struct nspdf_doc *doc, struct cos_object *page_tree_node, unsigned int *page_index);
/* cos stream filters */
-nspdferror
-nspdf__cos_stream_filter(struct nspdf_doc *doc,
- const char *filter_name,
- struct cos_stream **stream_out);
+nspdferror nspdf__cos_stream_filter(struct nspdf_doc *doc, const char *filter_name, struct cos_stream **stream_out);
#endif
diff --git a/src/xref.c b/src/xref.c
index 298c750..2fb9301 100644
--- a/src/xref.c
+++ b/src/xref.c
@@ -68,7 +68,7 @@ nspdferror nspdf__xref_parse(struct nspdf_doc *doc, uint64_t *offset_out)
}
offset += 4;
- res = doc_skip_ws(doc, &offset);
+ res = nspdf__stream_skip_ws(doc->stream, &offset);
if (res != NSPDFERROR_OK) {
return res;
}
@@ -79,7 +79,7 @@ nspdferror nspdf__xref_parse(struct nspdf_doc *doc, uint64_t *offset_out)
res = doc_read_uint(doc, &offset, &objnumber);
while (res == NSPDFERROR_OK) {
uint64_t lastobj;
- res = doc_skip_ws(doc, &offset);
+ res = nspdf__stream_skip_ws(doc->stream, &offset);
if (res != NSPDFERROR_OK) {
return res;
}
@@ -89,7 +89,7 @@ nspdferror nspdf__xref_parse(struct nspdf_doc *doc, uint64_t *offset_out)
return res;
}
- res = doc_skip_ws(doc, &offset);
+ res = nspdf__stream_skip_ws(doc->stream, &offset);
if (res != NSPDFERROR_OK) {
return res;
}
@@ -169,7 +169,7 @@ nspdf__xref_get_referenced(struct nspdf_doc *doc, struct cos_object **cobj_out)
}
if (entry->object == NULL) {
- /* indirect object has never been decoded */
+ /* indirect object has never been parsed */
offset = entry->offset;
res = cos_parse_object(doc, &offset, &indirect);
if (res != NSPDFERROR_OK) {