diff options
author | Vincent Sanders <vince@kyllikki.org> | 2017-12-30 00:38:07 +0000 |
---|---|---|
committer | Vincent Sanders <vince@kyllikki.org> | 2017-12-30 00:38:07 +0000 |
commit | 31b1f792826f51e9271475d124c3a1df4aa5116b (patch) | |
tree | 3e0c1083064ba66398dee4fa0aa1c464be3b5325 /src | |
parent | 5422dd50a49fe1a282271f22cd324f815e592e07 (diff) | |
download | libnspdf-31b1f792826f51e9271475d124c3a1df4aa5116b.tar.gz libnspdf-31b1f792826f51e9271475d124c3a1df4aa5116b.tar.bz2 |
make an actual library
Diffstat (limited to 'src')
-rw-r--r-- | src/Makefile | 15 | ||||
-rw-r--r-- | src/cos_object.c | 31 | ||||
-rw-r--r-- | src/cos_object.h | 37 | ||||
-rw-r--r-- | src/cos_parse.c (renamed from src/cos_decode.c) | 34 | ||||
-rw-r--r-- | src/cos_parse.h | 10 | ||||
-rw-r--r-- | src/document.c (renamed from src/xref.c) | 188 | ||||
-rw-r--r-- | src/nspdferror.h | 10 | ||||
-rw-r--r-- | src/pdf_doc.c | 12 | ||||
-rw-r--r-- | src/pdf_doc.h | 12 |
9 files changed, 162 insertions, 187 deletions
diff --git a/src/Makefile b/src/Makefile index af806f3..ed0b4ba 100644 --- a/src/Makefile +++ b/src/Makefile @@ -1,14 +1,3 @@ -# +DIR_SOURCES := document.c byte_class.c cos_parse.c cos_object.c pdf_doc.c -CFLAGS+=-g -Wall -Wextra - -OBJS=xref.o byte_class.o cos_decode.o cos_object.o pdf_doc.o - -.PHONY:all clean - -all:xref - -xref:$(OBJS) - -clean: - ${RM} xref $(OBJS) +include $(NSBUILD)/Makefile.subdir diff --git a/src/cos_object.c b/src/cos_object.c index 5bfd423..2fa3a93 100644 --- a/src/cos_object.c +++ b/src/cos_object.c @@ -14,7 +14,8 @@ #include <stdio.h> #include <string.h> -#include "nspdferror.h" +#include <nspdf/errors.h> + #include "cos_object.h" #include "pdf_doc.h" @@ -110,7 +111,7 @@ cos_extract_dictionary_value(struct cos_object *dict, * get a value for a key from a dictionary */ nspdferror -cos_get_dictionary_value(struct pdf_doc *doc, +cos_get_dictionary_value(struct nspdf_doc *doc, struct cos_object *dict, const char *key, struct cos_object **value_out) @@ -140,7 +141,7 @@ cos_get_dictionary_value(struct pdf_doc *doc, } nspdferror -cos_get_dictionary_int(struct pdf_doc *doc, +cos_get_dictionary_int(struct nspdf_doc *doc, struct cos_object *dict, const char *key, int64_t *value_out) @@ -156,7 +157,7 @@ cos_get_dictionary_int(struct pdf_doc *doc, } nspdferror -cos_get_dictionary_name(struct pdf_doc *doc, +cos_get_dictionary_name(struct nspdf_doc *doc, struct cos_object *dict, const char *key, const char **value_out) @@ -172,7 +173,7 @@ cos_get_dictionary_name(struct pdf_doc *doc, } nspdferror -cos_get_dictionary_dictionary(struct pdf_doc *doc, +cos_get_dictionary_dictionary(struct nspdf_doc *doc, struct cos_object *dict, const char *key, struct cos_object **value_out) @@ -188,7 +189,7 @@ cos_get_dictionary_dictionary(struct pdf_doc *doc, } nspdferror -cos_heritable_dictionary_dictionary(struct pdf_doc *doc, +cos_heritable_dictionary_dictionary(struct nspdf_doc *doc, struct cos_object *dict, const char *key, struct cos_object **value_out) @@ -206,7 +207,7 @@ cos_heritable_dictionary_dictionary(struct pdf_doc *doc, } nspdferror -cos_get_dictionary_array(struct pdf_doc *doc, +cos_get_dictionary_array(struct nspdf_doc *doc, struct cos_object *dict, const char *key, struct cos_object **value_out) @@ -222,7 +223,7 @@ cos_get_dictionary_array(struct pdf_doc *doc, } nspdferror -cos_heritable_dictionary_array(struct pdf_doc *doc, +cos_heritable_dictionary_array(struct nspdf_doc *doc, struct cos_object *dict, const char *key, struct cos_object **value_out) @@ -241,7 +242,7 @@ cos_heritable_dictionary_array(struct pdf_doc *doc, } nspdferror -cos_get_int(struct pdf_doc *doc, +cos_get_int(struct nspdf_doc *doc, struct cos_object *cobj, int64_t *value_out) { @@ -259,7 +260,7 @@ cos_get_int(struct pdf_doc *doc, } nspdferror -cos_get_name(struct pdf_doc *doc, +cos_get_name(struct nspdf_doc *doc, struct cos_object *cobj, const char **value_out) { @@ -279,7 +280,7 @@ cos_get_name(struct pdf_doc *doc, nspdferror -cos_get_dictionary(struct pdf_doc *doc, +cos_get_dictionary(struct nspdf_doc *doc, struct cos_object *cobj, struct cos_object **value_out) { @@ -297,7 +298,7 @@ cos_get_dictionary(struct pdf_doc *doc, } nspdferror -cos_get_array(struct pdf_doc *doc, +cos_get_array(struct nspdf_doc *doc, struct cos_object *cobj, struct cos_object **value_out) { @@ -318,7 +319,7 @@ cos_get_array(struct pdf_doc *doc, * get a value for a key from a dictionary */ nspdferror -cos_get_array_value(struct pdf_doc *doc, +cos_get_array_value(struct nspdf_doc *doc, struct cos_object *array, unsigned int index, struct cos_object **value_out) @@ -350,7 +351,7 @@ cos_get_array_value(struct pdf_doc *doc, } nspdferror -cos_get_array_dictionary(struct pdf_doc *doc, +cos_get_array_dictionary(struct nspdf_doc *doc, struct cos_object *array, unsigned int index, struct cos_object **value_out) @@ -366,7 +367,7 @@ cos_get_array_dictionary(struct pdf_doc *doc, } nspdferror -cos_get_array_size(struct pdf_doc *doc, +cos_get_array_size(struct nspdf_doc *doc, struct cos_object *cobj, unsigned int *size_out) { diff --git a/src/cos_object.h b/src/cos_object.h index 48241c6..a40c691 100644 --- a/src/cos_object.h +++ b/src/cos_object.h @@ -1,4 +1,4 @@ -struct pdf_doc; +struct nspdf_doc; enum cos_type { COS_TYPE_NULL, @@ -83,13 +83,6 @@ struct cos_object { } u; }; -/** - * Decode input stream into an object - * - * lex and parse a byte stream to generate a COS object. - */ -nspdferror cos_decode_object(struct pdf_doc *doc, uint64_t *offset_out, struct cos_object **cosobj_out); - nspdferror cos_free_object(struct cos_object *cos_obj); /** @@ -117,35 +110,35 @@ nspdferror cos_extract_dictionary_value(struct cos_object *dict, const char *key * NSPDFERROR_TYPE if the object passed in \p dict is not a dictionary. * NSPDFERROR_NOTFOUND if the key is not present in the dictionary. */ -nspdferror cos_get_dictionary_value(struct pdf_doc *doc, struct cos_object *dict, const char *key, struct cos_object **value_out); +nspdferror cos_get_dictionary_value(struct nspdf_doc *doc, struct cos_object *dict, const char *key, struct cos_object **value_out); -nspdferror cos_get_dictionary_int(struct pdf_doc *doc, struct cos_object *dict, const char *key, int64_t *value_out); +nspdferror cos_get_dictionary_int(struct nspdf_doc *doc, struct cos_object *dict, const char *key, int64_t *value_out); -nspdferror cos_get_dictionary_name(struct pdf_doc *doc, struct cos_object *dict, const char *key, const char **value_out); +nspdferror cos_get_dictionary_name(struct nspdf_doc *doc, struct cos_object *dict, const char *key, const char **value_out); -nspdferror cos_get_dictionary_dictionary(struct pdf_doc *doc, struct cos_object *dict, const char *key, struct cos_object **value_out); +nspdferror cos_get_dictionary_dictionary(struct nspdf_doc *doc, struct cos_object *dict, const char *key, struct cos_object **value_out); -nspdferror cos_heritable_dictionary_dictionary(struct pdf_doc *doc, struct cos_object *dict, const char *key, struct cos_object **value_out); +nspdferror cos_heritable_dictionary_dictionary(struct nspdf_doc *doc, struct cos_object *dict, const char *key, struct cos_object **value_out); -nspdferror cos_get_dictionary_array(struct pdf_doc *doc, struct cos_object *dict, const char *key, struct cos_object **value_out); +nspdferror cos_get_dictionary_array(struct nspdf_doc *doc, struct cos_object *dict, const char *key, struct cos_object **value_out); -nspdferror cos_heritable_dictionary_array(struct pdf_doc *doc, struct cos_object *dict, const char *key, struct cos_object **value_out); +nspdferror cos_heritable_dictionary_array(struct nspdf_doc *doc, struct cos_object *dict, const char *key, struct cos_object **value_out); -nspdferror cos_get_int(struct pdf_doc *doc, struct cos_object *cobj, int64_t *value_out); +nspdferror cos_get_int(struct nspdf_doc *doc, struct cos_object *cobj, int64_t *value_out); -nspdferror cos_get_name(struct pdf_doc *doc, struct cos_object *cobj, const char **value_out); +nspdferror cos_get_name(struct nspdf_doc *doc, struct cos_object *cobj, const char **value_out); -nspdferror cos_get_dictionary(struct pdf_doc *doc, struct cos_object *cobj, struct cos_object **value_out); +nspdferror cos_get_dictionary(struct nspdf_doc *doc, struct cos_object *cobj, struct cos_object **value_out); -nspdferror cos_get_array(struct pdf_doc *doc, struct cos_object *cobj, struct cos_object **value_out); +nspdferror cos_get_array(struct nspdf_doc *doc, struct cos_object *cobj, struct cos_object **value_out); -nspdferror cos_get_array_size(struct pdf_doc *doc, struct cos_object *cobj, unsigned int *size_out); +nspdferror cos_get_array_size(struct nspdf_doc *doc, struct cos_object *cobj, unsigned int *size_out); -nspdferror cos_get_array_value(struct pdf_doc *doc, struct cos_object *array, unsigned int index, struct cos_object **value_out); +nspdferror cos_get_array_value(struct nspdf_doc *doc, struct cos_object *array, unsigned int index, struct cos_object **value_out); -nspdferror cos_get_array_dictionary(struct pdf_doc *doc, struct cos_object *arrau, unsigned int index, struct cos_object **value_out); +nspdferror cos_get_array_dictionary(struct nspdf_doc *doc, struct cos_object *arrau, unsigned int index, struct cos_object **value_out); diff --git a/src/cos_decode.c b/src/cos_parse.c index 8873060..ca3d802 100644 --- a/src/cos_decode.c +++ b/src/cos_parse.c @@ -5,8 +5,10 @@ #include <stdio.h> #include <string.h> +#include <nspdf/errors.h> + +#include "cos_parse.h" #include "byte_class.h" -#include "nspdferror.h" #include "cos_object.h" #include "pdf_doc.h" @@ -46,7 +48,7 @@ static uint8_t xtoi(uint8_t x) } static nspdferror -cos_decode_number(struct pdf_doc *doc, +cos_decode_number(struct nspdf_doc *doc, uint64_t *offset_out, struct cos_object **cosobj_out) { @@ -101,7 +103,7 @@ cos_decode_number(struct pdf_doc *doc, * */ static nspdferror -cos_decode_string(struct pdf_doc *doc, +cos_decode_string(struct nspdf_doc *doc, uint64_t *offset_out, struct cos_object **cosobj_out) { @@ -230,7 +232,7 @@ cos_decode_string(struct pdf_doc *doc, * decode hex encoded string */ static nspdferror -cos_decode_hex_string(struct pdf_doc *doc, +cos_decode_hex_string(struct nspdf_doc *doc, uint64_t *offset_out, struct cos_object **cosobj_out) { @@ -293,7 +295,7 @@ cos_decode_hex_string(struct pdf_doc *doc, * decode a dictionary object */ static nspdferror -cos_decode_dictionary(struct pdf_doc *doc, +cos_decode_dictionary(struct nspdf_doc *doc, uint64_t *offset_out, struct cos_object **cosobj_out) { @@ -324,7 +326,7 @@ cos_decode_dictionary(struct pdf_doc *doc, while ((DOC_BYTE(doc, offset) != '>') && (DOC_BYTE(doc, offset + 1) != '>')) { - res = cos_decode_object(doc, &offset, &key); + res = cos_parse_object(doc, &offset, &key); if (res != NSPDFERROR_OK) { /* todo free up any dictionary entries already created */ printf("key object decode failed\n"); @@ -337,7 +339,7 @@ cos_decode_dictionary(struct pdf_doc *doc, } //printf("key: %s\n", key->u.n); - res = cos_decode_object(doc, &offset, &value); + res = cos_parse_object(doc, &offset, &value); if (res != NSPDFERROR_OK) { printf("Unable to decode value object in dictionary\n"); /* todo free up any dictionary entries already created */ @@ -371,7 +373,7 @@ cos_decode_dictionary(struct pdf_doc *doc, * decode a list */ static nspdferror -cos_decode_list(struct pdf_doc *doc, +cos_decode_list(struct nspdf_doc *doc, uint64_t *offset_out, struct cos_object **cosobj_out) { @@ -406,7 +408,7 @@ cos_decode_list(struct pdf_doc *doc, while (DOC_BYTE(doc, offset) != ']') { - res = cos_decode_object(doc, &offset, &value); + res = cos_parse_object(doc, &offset, &value); if (res != NSPDFERROR_OK) { cos_free_object(cosobj); printf("Unable to decode value object in list\n"); @@ -442,7 +444,7 @@ cos_decode_list(struct pdf_doc *doc, * \todo deal with # symbols on pdf versions 1.2 and later */ static nspdferror -cos_decode_name(struct pdf_doc *doc, +cos_decode_name(struct nspdf_doc *doc, uint64_t *offset_out, struct cos_object **cosobj_out) { @@ -498,7 +500,7 @@ cos_decode_name(struct pdf_doc *doc, * decode a cos boolean object */ static int -cos_decode_boolean(struct pdf_doc *doc, +cos_decode_boolean(struct nspdf_doc *doc, uint64_t *offset_out, struct cos_object **cosobj_out) { @@ -574,7 +576,7 @@ cos_decode_boolean(struct pdf_doc *doc, * decode the null object. */ static nspdferror -cos_decode_null(struct pdf_doc *doc, +cos_decode_null(struct nspdf_doc *doc, uint64_t *offset_out, struct cos_object **cosobj_out) { @@ -640,7 +642,7 @@ cos_decode_null(struct pdf_doc *doc, * integer */ static nspdferror -cos_attempt_decode_reference(struct pdf_doc *doc, +cos_attempt_decode_reference(struct nspdf_doc *doc, uint64_t *offset_out, struct cos_object **cosobj_out) { @@ -712,7 +714,7 @@ cos_attempt_decode_reference(struct pdf_doc *doc, } //printf("decoding\n"); - res = cos_decode_object(doc, &offset, &indirect); + res = cos_parse_object(doc, &offset, &indirect); if (res != NSPDFERROR_OK) { cos_free_object(generation); return res; @@ -752,7 +754,7 @@ cos_attempt_decode_reference(struct pdf_doc *doc, /* - * Decode input stream into an object + * Parse input stream into an object * * lex and parse a byte stream to generate COS objects * @@ -804,7 +806,7 @@ cos_attempt_decode_reference(struct pdf_doc *doc, * ; */ nspdferror -cos_decode_object(struct pdf_doc *doc, +cos_parse_object(struct nspdf_doc *doc, uint64_t *offset_out, struct cos_object **cosobj_out) { diff --git a/src/cos_parse.h b/src/cos_parse.h new file mode 100644 index 0000000..adfb835 --- /dev/null +++ b/src/cos_parse.h @@ -0,0 +1,10 @@ +struct nspdf_doc; +struct cos_object; + +/** + * Decode input stream into an object + * + * lex and parse a byte stream to generate a COS object. + */ +nspdferror cos_parse_object(struct nspdf_doc *doc, uint64_t *offset_out, struct cos_object **cosobj_out); + diff --git a/src/xref.c b/src/document.c index 452aa19..9be0ab5 100644 --- a/src/xref.c +++ b/src/document.c @@ -1,3 +1,12 @@ +/* + * Copyright 2018 Vincent Sanders <vince@netsurf-browser.org> + * + * This file is part of libnspdf. + * + * Licensed under the MIT License, + * http://www.opensource.org/licenses/mit-license.php + */ + #include <stdio.h> #include <stdint.h> #include <inttypes.h> @@ -5,59 +14,27 @@ #include <stdbool.h> #include <string.h> -#include "nspdferror.h" +#include <nspdf/document.h> + +#include "cos_parse.h" #include "byte_class.h" #include "cos_object.h" #include "pdf_doc.h" #define SLEN(x) (sizeof((x)) - 1) - -int -read_whole_pdf(struct pdf_doc *doc, const char *fname) -{ - FILE *f; - off_t len; - uint8_t *buf; - size_t rd; - - f = fopen(fname, "r"); - if (f == NULL) { - perror("pdf open"); - return 1; - } - - fseek(f, 0, SEEK_END); - len = ftello(f); - - buf = malloc(len); - fseek(f, 0, SEEK_SET); - - rd = fread(buf, len, 1, f); - if (rd != 1) { - perror("pdf read"); - free(buf); - return 1; - } - - fclose(f); - - doc->start = doc->buffer = buf; - doc->length = doc->buffer_length = len; - - return 0; -} - - #define STARTXREF_TOK "startxref" -/* Number of bytes to search back from file end to find xref start token, convention says 1024 bytes */ -#define STARTXREF_SEARCH_SIZE 1024 - +/* Number of bytes to search back from file end to find xref start token, + * convention says 1024 bytes + */ +#define STARTXREF_SEARCH_SIZE 1024 static nspdferror -doc_read_uint(struct pdf_doc *doc, uint64_t *offset_out, uint64_t *result_out) +doc_read_uint(struct nspdf_doc *doc, + uint64_t *offset_out, + uint64_t *result_out) { uint8_t c; /* current byte from source data */ unsigned int len; /* number of decimal places in number */ @@ -90,10 +67,11 @@ doc_read_uint(struct pdf_doc *doc, uint64_t *offset_out, uint64_t *result_out) return -1; /* number too long */ } + /** * finds the startxref marker at the end of input */ -nspdferror find_startxref(struct pdf_doc *doc, uint64_t *offset_out) +static nspdferror find_startxref(struct nspdf_doc *doc, uint64_t *offset_out) { uint64_t offset; /* offset of characters being considered for startxref */ uint64_t earliest; /* earliest offset to serch for startxref */ @@ -123,10 +101,14 @@ nspdferror find_startxref(struct pdf_doc *doc, uint64_t *offset_out) return NSPDFERROR_SYNTAX; } + /** * decodes a startxref field */ -nspdferror decode_startxref(struct pdf_doc *doc, uint64_t *offset_out, uint64_t *start_xref_out) +static nspdferror +decode_startxref(struct nspdf_doc *doc, + uint64_t *offset_out, + uint64_t *start_xref_out) { uint64_t offset; /* offset of characters being considered for startxref */ uint64_t start_xref; @@ -181,7 +163,7 @@ nspdferror decode_startxref(struct pdf_doc *doc, uint64_t *offset_out, uint64_t /** * finds the next trailer */ -nspdferror find_trailer(struct pdf_doc *doc, uint64_t *offset_out) +static nspdferror find_trailer(struct nspdf_doc *doc, uint64_t *offset_out) { uint64_t offset; /* offset of characters being considered for trailer */ @@ -200,33 +182,9 @@ nspdferror find_trailer(struct pdf_doc *doc, uint64_t *offset_out) return NSPDFERROR_SYNTAX; } -/** - * find the PDF comment marker to identify the start of the document - */ -int check_header(struct pdf_doc *doc) -{ - uint64_t offset; /* offset of characters being considered for startxref */ - - for (offset = 0; offset < 1024; offset++) { - if ((DOC_BYTE(doc, offset) == '%') && - (DOC_BYTE(doc, offset + 1) == 'P') && - (DOC_BYTE(doc, offset + 2) == 'D') && - (DOC_BYTE(doc, offset + 3) == 'F') && - (DOC_BYTE(doc, offset + 4) == '-') && - (DOC_BYTE(doc, offset + 5) == '1') && - (DOC_BYTE(doc, offset + 6) == '.')) { - doc->start = doc->buffer + offset; - doc->length -= offset; - /* read number for minor */ - return 0; - } - } - return -1; -} - -nspdferror -decode_trailer(struct pdf_doc *doc, +static nspdferror +decode_trailer(struct nspdf_doc *doc, uint64_t *offset_out, struct cos_object **trailer_out) { @@ -249,7 +207,7 @@ decode_trailer(struct pdf_doc *doc, offset += 7; doc_skip_ws(doc, &offset); - res = cos_decode_object(doc, &offset, &trailer); + res = cos_parse_object(doc, &offset, &trailer); if (res != 0) { return res; } @@ -265,8 +223,9 @@ decode_trailer(struct pdf_doc *doc, return NSPDFERROR_OK; } -nspdferror -decode_xref(struct pdf_doc *doc, uint64_t *offset_out) + +static nspdferror +decode_xref(struct nspdf_doc *doc, uint64_t *offset_out) { uint64_t offset; nspdferror res; @@ -359,7 +318,8 @@ decode_xref(struct pdf_doc *doc, uint64_t *offset_out) /** * recursively parse trailers and xref tables */ -nspdferror decode_xref_trailer(struct pdf_doc *doc, uint64_t xref_offset) +static nspdferror +decode_xref_trailer(struct nspdf_doc *doc, uint64_t xref_offset) { nspdferror res; uint64_t offset; /* the current data offset */ @@ -455,6 +415,7 @@ decode_xref_trailer_failed: return res; } + /** * decode non-linear pdf trailer data * @@ -477,7 +438,7 @@ decode_xref_trailer_failed: * find the subsequent trailer. * */ -nspdferror decode_trailers(struct pdf_doc *doc) +static nspdferror decode_trailers(struct nspdf_doc *doc) { nspdferror res; uint64_t offset; /* the current data offset */ @@ -499,11 +460,12 @@ nspdferror decode_trailers(struct pdf_doc *doc) return decode_xref_trailer(doc, startxref); } + /** * recursively decodes a page tree */ -nspdferror -decode_page_tree(struct pdf_doc *doc, +static nspdferror +decode_page_tree(struct nspdf_doc *doc, struct cos_object *page_tree_node, unsigned int *page_index) { @@ -594,12 +556,14 @@ decode_page_tree(struct pdf_doc *doc, return res; } + /* printf("page index:%d page:%p resources:%p mediabox:%p contents:%p\n", *page_index, page, page->resources, page->mediabox, page->contents); + */ (*page_index)++; res = NSPDFERROR_OK; @@ -609,7 +573,8 @@ decode_page_tree(struct pdf_doc *doc, return res; } -nspdferror decode_catalog(struct pdf_doc *doc) + +static nspdferror decode_catalog(struct nspdf_doc *doc) { nspdferror res; struct cos_object *catalog; @@ -645,38 +610,63 @@ nspdferror decode_catalog(struct pdf_doc *doc) return res; } -nspdferror new_pdf_doc(struct pdf_doc **doc_out) +/* exported interface documented in nspdf/document.h */ +nspdferror nspdf_document_create(struct nspdf_doc **doc_out) { - struct pdf_doc *doc; - doc = calloc(1, sizeof(struct pdf_doc)); + struct nspdf_doc *doc; + doc = calloc(1, sizeof(struct nspdf_doc)); if (doc == NULL) { return NSPDFERROR_NOMEM; } + *doc_out = doc; + return NSPDFERROR_OK; } -int main(int argc, char **argv) +/* exported interface documented in nspdf/document.h */ +nspdferror nspdf_document_destroy(struct nspdf_doc *doc) { - struct pdf_doc *doc; - int res; + free(doc); - if (argc < 2) { - fprintf(stderr, "Usage %s <filename>\n", argv[0]); - return 1; - } + return NSPDFERROR_OK; +} - res = new_pdf_doc(&doc); - if (res != NSPDFERROR_OK) { - printf("failed to read file\n"); - return res; - } - res = read_whole_pdf(doc, argv[1]); - if (res != 0) { - printf("failed to read file\n"); - return res; +/** + * find the PDF comment marker to identify the start of the document + */ +static nspdferror check_header(struct nspdf_doc *doc) +{ + uint64_t offset; /* offset of characters being considered for header */ + for (offset = 0; offset < 1024; offset++) { + if ((DOC_BYTE(doc, offset) == '%') && + (DOC_BYTE(doc, offset + 1) == 'P') && + (DOC_BYTE(doc, offset + 2) == 'D') && + (DOC_BYTE(doc, offset + 3) == 'F') && + (DOC_BYTE(doc, offset + 4) == '-') && + (DOC_BYTE(doc, offset + 5) == '1') && + (DOC_BYTE(doc, offset + 6) == '.')) { + doc->start += offset; + doc->length -= offset; + + /* \todo read number for minor */ + return NSPDFERROR_OK; + } } + return NSPDFERROR_NOTFOUND; +} + +/* exported interface documented in nspdf/document.h */ +nspdferror +nspdf_document_parse(struct nspdf_doc *doc, + const uint8_t *buffer, + uint64_t buffer_length) +{ + nspdferror res; + + doc->start = buffer; + doc->length = buffer_length; res = check_header(doc); if (res != 0) { @@ -696,5 +686,5 @@ int main(int argc, char **argv) return res; } - return 0; + return res; } diff --git a/src/nspdferror.h b/src/nspdferror.h deleted file mode 100644 index 3e26813..0000000 --- a/src/nspdferror.h +++ /dev/null @@ -1,10 +0,0 @@ -typedef enum { - NSPDFERROR_OK, - NSPDFERROR_NOMEM, - NSPDFERROR_SYNTAX, /**< syntax error in parse */ - NSPDFERROR_SIZE, /**< not enough input data */ - NSPDFERROR_RANGE, /**< value outside type range */ - NSPDFERROR_TYPE, /**< wrong type error */ - NSPDFERROR_NOTFOUND, /**< key not found */ - NSPDFERROR_FORMAT, /**< objects do not cornform to expected format */ -} nspdferror; diff --git a/src/pdf_doc.c b/src/pdf_doc.c index 4a5cad1..281025c 100644 --- a/src/pdf_doc.c +++ b/src/pdf_doc.c @@ -12,7 +12,9 @@ #include <stdbool.h> #include <stdio.h> -#include "nspdferror.h" +#include <nspdf/errors.h> + +#include "cos_parse.h" #include "byte_class.h" #include "cos_object.h" #include "pdf_doc.h" @@ -20,7 +22,7 @@ /** * move offset to next non whitespace byte */ -nspdferror doc_skip_ws(struct pdf_doc *doc, uint64_t *offset) +nspdferror doc_skip_ws(struct nspdf_doc *doc, uint64_t *offset) { uint8_t c; /* TODO sort out keeping offset in range */ @@ -43,7 +45,7 @@ nspdferror doc_skip_ws(struct pdf_doc *doc, uint64_t *offset) /** * move offset to next non eol byte */ -nspdferror doc_skip_eol(struct pdf_doc *doc, uint64_t *offset) +nspdferror doc_skip_eol(struct nspdf_doc *doc, uint64_t *offset) { uint8_t c; /* TODO sort out keeping offset in range */ @@ -60,7 +62,7 @@ static struct cos_object cos_null_obj = { }; nspdferror -xref_get_referenced(struct pdf_doc *doc, struct cos_object **cobj_out) +xref_get_referenced(struct nspdf_doc *doc, struct cos_object **cobj_out) { nspdferror res; struct cos_object *cobj; @@ -90,7 +92,7 @@ xref_get_referenced(struct pdf_doc *doc, struct cos_object **cobj_out) if (entry->object == NULL) { /* indirect object has never been decoded */ offset = entry->offset; - res = cos_decode_object(doc, &offset, &indirect); + res = cos_parse_object(doc, &offset, &indirect); if (res != NSPDFERROR_OK) { printf("failed to decode indirect object\n"); return res; diff --git a/src/pdf_doc.h b/src/pdf_doc.h index 986556f..e9bdc14 100644 --- a/src/pdf_doc.h +++ b/src/pdf_doc.h @@ -18,11 +18,9 @@ struct page_table_entry { }; /** pdf document */ -struct pdf_doc { - uint8_t *buffer; - uint64_t buffer_length; +struct nspdf_doc { - uint8_t *start; /* start of pdf document in input stream */ + const uint8_t *start; /* start of pdf document in input stream */ uint64_t length; int major; @@ -47,7 +45,7 @@ struct pdf_doc { /* byte data acessory, allows for more complex buffer handling in future */ #define DOC_BYTE(doc, offset) (doc->start[(offset)]) -nspdferror doc_skip_ws(struct pdf_doc *doc, uint64_t *offset); -nspdferror doc_skip_eol(struct pdf_doc *doc, uint64_t *offset); +nspdferror doc_skip_ws(struct nspdf_doc *doc, uint64_t *offset); +nspdferror doc_skip_eol(struct nspdf_doc *doc, uint64_t *offset); -nspdferror xref_get_referenced(struct pdf_doc *doc, struct cos_object **cobj_out); +nspdferror xref_get_referenced(struct nspdf_doc *doc, struct cos_object **cobj_out); |