netsurf: branch master updated. release/3.7-58-gd884e0e
by NetSurf Browser Project
Gitweb links:
...log http://git.netsurf-browser.org/netsurf.git/shortlog/d884e0e53303c22e747c8...
...commit http://git.netsurf-browser.org/netsurf.git/commit/d884e0e53303c22e747c8d7...
...tree http://git.netsurf-browser.org/netsurf.git/tree/d884e0e53303c22e747c8d769...
The branch, master has been updated
via d884e0e53303c22e747c8d769189e21785b7f753 (commit)
from 470dce645bc9cfca1d438f62a8ffe7a6db80a278 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
commitdiff http://git.netsurf-browser.org/netsurf.git/commit/?id=d884e0e53303c22e747...
commit d884e0e53303c22e747c8d769189e21785b7f753
Author: Chris Young <chris(a)unsatisfactorysoftware.co.uk>
Commit: Chris Young <chris(a)unsatisfactorysoftware.co.uk>
Minor opt
diff --git a/frontends/amiga/gui.c b/frontends/amiga/gui.c
index 8e822f3..e337ede 100644
--- a/frontends/amiga/gui.c
+++ b/frontends/amiga/gui.c
@@ -4967,22 +4967,20 @@ void ami_get_hscroll_pos(struct gui_window_2 *gwin, ULONG *xs)
if(gwin->objects[GID_HSCROLL])
{
GetAttr(SCROLLER_Top, (Object *)gwin->objects[GID_HSCROLL], xs);
+ *xs /= gwin->gw->scale;
} else {
*xs = 0;
}
-
- *xs /= gwin->gw->scale;
}
void ami_get_vscroll_pos(struct gui_window_2 *gwin, ULONG *ys)
{
if(gwin->objects[GID_VSCROLL]) {
GetAttr(SCROLLER_Top, gwin->objects[GID_VSCROLL], ys);
+ *ys /= gwin->gw->scale;
} else {
*ys = 0;
}
-
- *ys /= gwin->gw->scale;
}
static bool gui_window_get_scroll(struct gui_window *g, int *restrict sx, int *restrict sy)
-----------------------------------------------------------------------
Summary of changes:
frontends/amiga/gui.c | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/frontends/amiga/gui.c b/frontends/amiga/gui.c
index 8e822f3..e337ede 100644
--- a/frontends/amiga/gui.c
+++ b/frontends/amiga/gui.c
@@ -4967,22 +4967,20 @@ void ami_get_hscroll_pos(struct gui_window_2 *gwin, ULONG *xs)
if(gwin->objects[GID_HSCROLL])
{
GetAttr(SCROLLER_Top, (Object *)gwin->objects[GID_HSCROLL], xs);
+ *xs /= gwin->gw->scale;
} else {
*xs = 0;
}
-
- *xs /= gwin->gw->scale;
}
void ami_get_vscroll_pos(struct gui_window_2 *gwin, ULONG *ys)
{
if(gwin->objects[GID_VSCROLL]) {
GetAttr(SCROLLER_Top, gwin->objects[GID_VSCROLL], ys);
+ *ys /= gwin->gw->scale;
} else {
*ys = 0;
}
-
- *ys /= gwin->gw->scale;
}
static bool gui_window_get_scroll(struct gui_window *g, int *restrict sx, int *restrict sy)
--
NetSurf Browser
5 years, 9 months
netsurf-wiki: branch master updated. 37d05386779a1be5200548ab53e7f6517d6a90af
by NetSurf Browser Project
Gitweb links:
...log http://git.netsurf-browser.org/netsurf-wiki.git/shortlog/37d05386779a1be5...
...commit http://git.netsurf-browser.org/netsurf-wiki.git/commit/37d05386779a1be520...
...tree http://git.netsurf-browser.org/netsurf-wiki.git/tree/37d05386779a1be52005...
The branch, master has been updated
via 37d05386779a1be5200548ab53e7f6517d6a90af (commit)
from cff988a4f7fd0a24954d9cc7f1661d4f0e5091f7 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
commitdiff http://git.netsurf-browser.org/netsurf-wiki.git/commit/?id=37d05386779a1b...
commit 37d05386779a1be5200548ab53e7f6517d6a90af
Author: Chris Young <chris(a)unsatisfactorysoftware.co.uk>
Commit: Chris Young <chris(a)unsatisfactorysoftware.co.uk>
update
diff --git a/documentation/buildingforamigaos.mdwn b/documentation/buildingforamigaos.mdwn
index 23fcb26..134b334 100644
--- a/documentation/buildingforamigaos.mdwn
+++ b/documentation/buildingforamigaos.mdwn
@@ -28,11 +28,15 @@ Libraries
---------
Make/install buildsystem first!
+Build in this approx order:
+buildsystem, libnslog, libnspsl, libnsgif, libnsbmp, libwapcaplet, libparserutils, libhubbub, libdom, libcss, libsvgtiny, libnsutils, libutf8proc
Clone and build the libraries with:
-`make HOST=m68k-unknown-amigaos`
-`sudo make HOST=m68k-unknown-amigaos install`
+`make HOST=m68k-unknown-amigaos PREFIX=/opt/netsurf/m68k-unknown-amigaos/env`
+`sudo make HOST=m68k-unknown-amigaos PREFIX=/opt/netsurf/m68k-unknown-amigaos/env install`
+
+Finally build nsgenbind for the host, not for OS3. It will install in /opt/netsurf/bin, add that to the path so make will find it later, or symlink /usr/bin/nsgenbind.
NetSurf
-------
-----------------------------------------------------------------------
Summary of changes:
documentation/buildingforamigaos.mdwn | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/documentation/buildingforamigaos.mdwn b/documentation/buildingforamigaos.mdwn
index 23fcb26..134b334 100644
--- a/documentation/buildingforamigaos.mdwn
+++ b/documentation/buildingforamigaos.mdwn
@@ -28,11 +28,15 @@ Libraries
---------
Make/install buildsystem first!
+Build in this approx order:
+buildsystem, libnslog, libnspsl, libnsgif, libnsbmp, libwapcaplet, libparserutils, libhubbub, libdom, libcss, libsvgtiny, libnsutils, libutf8proc
Clone and build the libraries with:
-`make HOST=m68k-unknown-amigaos`
-`sudo make HOST=m68k-unknown-amigaos install`
+`make HOST=m68k-unknown-amigaos PREFIX=/opt/netsurf/m68k-unknown-amigaos/env`
+`sudo make HOST=m68k-unknown-amigaos PREFIX=/opt/netsurf/m68k-unknown-amigaos/env install`
+
+Finally build nsgenbind for the host, not for OS3. It will install in /opt/netsurf/bin, add that to the path so make will find it later, or symlink /usr/bin/nsgenbind.
NetSurf
-------
--
NetSurf Developer Wiki Backing Store
5 years, 9 months
libnspdf: branch master updated. d9bf14b9a1a5fa5770808c18d8039da0fb34021c
by NetSurf Browser Project
Gitweb links:
...log http://git.netsurf-browser.org/libnspdf.git/shortlog/d9bf14b9a1a5fa577080...
...commit http://git.netsurf-browser.org/libnspdf.git/commit/d9bf14b9a1a5fa5770808c...
...tree http://git.netsurf-browser.org/libnspdf.git/tree/d9bf14b9a1a5fa5770808c18...
The branch, master has been updated
via d9bf14b9a1a5fa5770808c18d8039da0fb34021c (commit)
via 24f990ef17253074f3d667a8d4c88efd2fb1d4f6 (commit)
from d2d566cf50835d728f1c65ebeccf914b43d81867 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
commitdiff http://git.netsurf-browser.org/libnspdf.git/commit/?id=d9bf14b9a1a5fa5770...
commit d9bf14b9a1a5fa5770808c18d8039da0fb34021c
Author: Vincent Sanders <vince(a)kyllikki.org>
Commit: Vincent Sanders <vince(a)kyllikki.org>
fix line width scaling
diff --git a/src/cos_stream_filter.c b/src/cos_stream_filter.c
index 0c08442..c12f641 100644
--- a/src/cos_stream_filter.c
+++ b/src/cos_stream_filter.c
@@ -105,7 +105,7 @@ nspdf__cos_stream_filter(struct nspdf_doc *doc,
nspdferror res;
//printf("applying filter %s\n", filter_name);
-
+ /** \todo implement all the other mandantory stream filters */
if (strcmp(filter_name, "FlateDecode") == 0) {
res = cos_stream_inflate(doc, stream_out);
} else {
diff --git a/src/document.c b/src/document.c
index bd3d314..9e8ed22 100644
--- a/src/document.c
+++ b/src/document.c
@@ -22,6 +22,22 @@
#include "xref.h"
#include "pdf_doc.h"
+/*
+ * And you may find yourself
+ * Writing another PDF parser
+ * And you may find yourself
+ * In a middle of a debug session
+ * And you may find yourself
+ * Behind an British English keyboard
+ * And you may find yourself in a gdb
+ * With a long stacktrace
+ * And you may ask yourself, well
+ * How did I get here?
+ *
+ * Andrew Shadura 2018
+ */
+
+
#define SLEN(x) (sizeof((x)) - 1)
/* byte data acessory, allows for more complex buffer handling in future */
diff --git a/src/graphics_state.h b/src/graphics_state.h
index e5cc2bf..40060dd 100644
--- a/src/graphics_state.h
+++ b/src/graphics_state.h
@@ -52,7 +52,17 @@ struct graphics_state_param {
struct {
struct graphics_state_color colour;
} other;
- /* text state */
+ /** text state */
+ struct {
+ float charspacing;
+ float wordspacing;
+ float hscale;
+ float leading;
+ float fontsize;
+ unsigned int rendermode;
+ float rise;
+ /* knockout */
+ } text;
float line_width;
unsigned int line_cap;
unsigned int line_join;
diff --git a/src/page.c b/src/page.c
index 2a3a836..7a0a33b 100644
--- a/src/page.c
+++ b/src/page.c
@@ -13,6 +13,8 @@
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
+#include <math.h>
+
#include <nspdf/page.h>
#include "graphics_state.h"
@@ -409,14 +411,31 @@ render_operation_f(struct graphics_state *gs, struct nspdf_render_ctx* render_ct
return NSPDFERROR_OK;
}
+static inline nspdferror
+scale_stroke_width(float *ctm, float unscaled, float *scaled)
+{
+ float avscale;
+ avscale = (fabs(ctm[0]) + fabs(ctm[3])) / 2.0; /* average scale of x and y axis */
+
+ *scaled = unscaled * avscale;
+ if (*scaled < 0.1) {
+ /* printf("sx:%f sy:%f av:%f un:%f sc:%f\n",
+ ctm[0], ctm[3], avscale, unscaled, *scaled);*/
+ *scaled = 0.1;
+ }
+ return NSPDFERROR_OK;
+}
static inline nspdferror
render_operation_B(struct graphics_state *gs, struct nspdf_render_ctx* render_ctx)
{
struct nspdf_style style;
style.stroke_type = NSPDF_OP_TYPE_SOLID;
- style.stroke_width = gs->param_stack[gs->param_stack_idx].line_width;
- gsc_to_device(&gs->param_stack[gs->param_stack_idx].stroke.colour, &style.stroke_colour);
+ scale_stroke_width(gs->param_stack[gs->param_stack_idx].ctm,
+ gs->param_stack[gs->param_stack_idx].line_width,
+ &style.stroke_width);
+ gsc_to_device(&gs->param_stack[gs->param_stack_idx].stroke.colour,
+ &style.stroke_colour);
style.fill_type = NSPDF_OP_TYPE_SOLID;
gsc_to_device(&gs->param_stack[gs->param_stack_idx].other.colour, &style.fill_colour);
@@ -441,8 +460,11 @@ render_operation_S(struct graphics_state *gs, struct nspdf_render_ctx* render_ct
style.fill_colour = 0x01000000;
style.stroke_type = NSPDF_OP_TYPE_SOLID;
- style.stroke_width = gs->param_stack[gs->param_stack_idx].line_width;
- gsc_to_device(&gs->param_stack[gs->param_stack_idx].stroke.colour, &style.stroke_colour);
+ scale_stroke_width(gs->param_stack[gs->param_stack_idx].ctm,
+ gs->param_stack[gs->param_stack_idx].line_width,
+ &style.stroke_width);
+ gsc_to_device(&gs->param_stack[gs->param_stack_idx].stroke.colour,
+ &style.stroke_colour);
render_ctx->path(&style,
gs->path,
commitdiff http://git.netsurf-browser.org/libnspdf.git/commit/?id=24f990ef17253074f3...
commit 24f990ef17253074f3d667a8d4c88efd2fb1d4f6
Author: Vincent Sanders <vince(a)kyllikki.org>
Commit: Vincent Sanders <vince(a)kyllikki.org>
simplify path render a bit
diff --git a/src/page.c b/src/page.c
index 3b2371b..2a3a836 100644
--- a/src/page.c
+++ b/src/page.c
@@ -409,28 +409,6 @@ render_operation_f(struct graphics_state *gs, struct nspdf_render_ctx* render_ct
return NSPDFERROR_OK;
}
-static inline nspdferror
-render_operation_b(struct graphics_state *gs, struct nspdf_render_ctx* render_ctx)
-{
- struct nspdf_style style;
- style.stroke_type = NSPDF_OP_TYPE_SOLID;
- style.stroke_width = gs->param_stack[gs->param_stack_idx].line_width;
- gsc_to_device(&gs->param_stack[gs->param_stack_idx].stroke.colour, &style.stroke_colour);
-
- style.fill_type = NSPDF_OP_TYPE_SOLID;
- gsc_to_device(&gs->param_stack[gs->param_stack_idx].other.colour, &style.fill_colour);
-
- gs->path[gs->path_idx++] = NSPDF_PATH_CLOSE;
-
- render_ctx->path(&style,
- gs->path,
- gs->path_idx,
- gs->param_stack[gs->param_stack_idx].ctm,
- render_ctx->ctx);
- gs->path_idx = 0;
-
- return NSPDFERROR_OK;
-}
static inline nspdferror
render_operation_B(struct graphics_state *gs, struct nspdf_render_ctx* render_ctx)
@@ -823,7 +801,8 @@ nspdf_page_render(struct nspdf_doc *doc,
case CONTENT_OP_b:
case CONTENT_OP_b_:
- res = render_operation_b(&gs, render_ctx);
+ render_operation_h(&gs);
+ res = render_operation_B(&gs, render_ctx);
break;
case CONTENT_OP_s:
-----------------------------------------------------------------------
Summary of changes:
src/cos_stream_filter.c | 2 +-
src/document.c | 16 ++++++++++++++++
src/graphics_state.h | 12 +++++++++++-
src/page.c | 45 +++++++++++++++++++++++----------------------
4 files changed, 51 insertions(+), 24 deletions(-)
diff --git a/src/cos_stream_filter.c b/src/cos_stream_filter.c
index 0c08442..c12f641 100644
--- a/src/cos_stream_filter.c
+++ b/src/cos_stream_filter.c
@@ -105,7 +105,7 @@ nspdf__cos_stream_filter(struct nspdf_doc *doc,
nspdferror res;
//printf("applying filter %s\n", filter_name);
-
+ /** \todo implement all the other mandantory stream filters */
if (strcmp(filter_name, "FlateDecode") == 0) {
res = cos_stream_inflate(doc, stream_out);
} else {
diff --git a/src/document.c b/src/document.c
index bd3d314..9e8ed22 100644
--- a/src/document.c
+++ b/src/document.c
@@ -22,6 +22,22 @@
#include "xref.h"
#include "pdf_doc.h"
+/*
+ * And you may find yourself
+ * Writing another PDF parser
+ * And you may find yourself
+ * In a middle of a debug session
+ * And you may find yourself
+ * Behind an British English keyboard
+ * And you may find yourself in a gdb
+ * With a long stacktrace
+ * And you may ask yourself, well
+ * How did I get here?
+ *
+ * Andrew Shadura 2018
+ */
+
+
#define SLEN(x) (sizeof((x)) - 1)
/* byte data acessory, allows for more complex buffer handling in future */
diff --git a/src/graphics_state.h b/src/graphics_state.h
index e5cc2bf..40060dd 100644
--- a/src/graphics_state.h
+++ b/src/graphics_state.h
@@ -52,7 +52,17 @@ struct graphics_state_param {
struct {
struct graphics_state_color colour;
} other;
- /* text state */
+ /** text state */
+ struct {
+ float charspacing;
+ float wordspacing;
+ float hscale;
+ float leading;
+ float fontsize;
+ unsigned int rendermode;
+ float rise;
+ /* knockout */
+ } text;
float line_width;
unsigned int line_cap;
unsigned int line_join;
diff --git a/src/page.c b/src/page.c
index 3b2371b..7a0a33b 100644
--- a/src/page.c
+++ b/src/page.c
@@ -13,6 +13,8 @@
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
+#include <math.h>
+
#include <nspdf/page.h>
#include "graphics_state.h"
@@ -410,25 +412,17 @@ render_operation_f(struct graphics_state *gs, struct nspdf_render_ctx* render_ct
}
static inline nspdferror
-render_operation_b(struct graphics_state *gs, struct nspdf_render_ctx* render_ctx)
+scale_stroke_width(float *ctm, float unscaled, float *scaled)
{
- struct nspdf_style style;
- style.stroke_type = NSPDF_OP_TYPE_SOLID;
- style.stroke_width = gs->param_stack[gs->param_stack_idx].line_width;
- gsc_to_device(&gs->param_stack[gs->param_stack_idx].stroke.colour, &style.stroke_colour);
-
- style.fill_type = NSPDF_OP_TYPE_SOLID;
- gsc_to_device(&gs->param_stack[gs->param_stack_idx].other.colour, &style.fill_colour);
-
- gs->path[gs->path_idx++] = NSPDF_PATH_CLOSE;
-
- render_ctx->path(&style,
- gs->path,
- gs->path_idx,
- gs->param_stack[gs->param_stack_idx].ctm,
- render_ctx->ctx);
- gs->path_idx = 0;
+ float avscale;
+ avscale = (fabs(ctm[0]) + fabs(ctm[3])) / 2.0; /* average scale of x and y axis */
+ *scaled = unscaled * avscale;
+ if (*scaled < 0.1) {
+ /* printf("sx:%f sy:%f av:%f un:%f sc:%f\n",
+ ctm[0], ctm[3], avscale, unscaled, *scaled);*/
+ *scaled = 0.1;
+ }
return NSPDFERROR_OK;
}
@@ -437,8 +431,11 @@ render_operation_B(struct graphics_state *gs, struct nspdf_render_ctx* render_ct
{
struct nspdf_style style;
style.stroke_type = NSPDF_OP_TYPE_SOLID;
- style.stroke_width = gs->param_stack[gs->param_stack_idx].line_width;
- gsc_to_device(&gs->param_stack[gs->param_stack_idx].stroke.colour, &style.stroke_colour);
+ scale_stroke_width(gs->param_stack[gs->param_stack_idx].ctm,
+ gs->param_stack[gs->param_stack_idx].line_width,
+ &style.stroke_width);
+ gsc_to_device(&gs->param_stack[gs->param_stack_idx].stroke.colour,
+ &style.stroke_colour);
style.fill_type = NSPDF_OP_TYPE_SOLID;
gsc_to_device(&gs->param_stack[gs->param_stack_idx].other.colour, &style.fill_colour);
@@ -463,8 +460,11 @@ render_operation_S(struct graphics_state *gs, struct nspdf_render_ctx* render_ct
style.fill_colour = 0x01000000;
style.stroke_type = NSPDF_OP_TYPE_SOLID;
- style.stroke_width = gs->param_stack[gs->param_stack_idx].line_width;
- gsc_to_device(&gs->param_stack[gs->param_stack_idx].stroke.colour, &style.stroke_colour);
+ scale_stroke_width(gs->param_stack[gs->param_stack_idx].ctm,
+ gs->param_stack[gs->param_stack_idx].line_width,
+ &style.stroke_width);
+ gsc_to_device(&gs->param_stack[gs->param_stack_idx].stroke.colour,
+ &style.stroke_colour);
render_ctx->path(&style,
gs->path,
@@ -823,7 +823,8 @@ nspdf_page_render(struct nspdf_doc *doc,
case CONTENT_OP_b:
case CONTENT_OP_b_:
- res = render_operation_b(&gs, render_ctx);
+ render_operation_h(&gs);
+ res = render_operation_B(&gs, render_ctx);
break;
case CONTENT_OP_s:
--
PDF Manipulation Library
5 years, 10 months
libnspdf: branch master updated. d2d566cf50835d728f1c65ebeccf914b43d81867
by NetSurf Browser Project
Gitweb links:
...log http://git.netsurf-browser.org/libnspdf.git/shortlog/d2d566cf50835d728f1c...
...commit http://git.netsurf-browser.org/libnspdf.git/commit/d2d566cf50835d728f1c65...
...tree http://git.netsurf-browser.org/libnspdf.git/tree/d2d566cf50835d728f1c65eb...
The branch, master has been updated
via d2d566cf50835d728f1c65ebeccf914b43d81867 (commit)
from d8835327fcc498795ac66bed330e741cef12ce29 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
commitdiff http://git.netsurf-browser.org/libnspdf.git/commit/?id=d2d566cf50835d728f...
commit d2d566cf50835d728f1c65ebeccf914b43d81867
Author: Vincent Sanders <vince(a)kyllikki.org>
Commit: Vincent Sanders <vince(a)kyllikki.org>
extend path operations in render
diff --git a/src/page.c b/src/page.c
index 984228b..3b2371b 100644
--- a/src/page.c
+++ b/src/page.c
@@ -330,6 +330,13 @@ render_operation_h(struct graphics_state *gs)
}
static inline nspdferror
+render_operation_n(struct graphics_state *gs)
+{
+ gs->path_idx = 0;
+ return NSPDFERROR_OK;
+}
+
+static inline nspdferror
gsc_to_device(struct graphics_state_color * gsc, uint32_t *c_out)
{
uint32_t c;
@@ -402,6 +409,50 @@ render_operation_f(struct graphics_state *gs, struct nspdf_render_ctx* render_ct
return NSPDFERROR_OK;
}
+static inline nspdferror
+render_operation_b(struct graphics_state *gs, struct nspdf_render_ctx* render_ctx)
+{
+ struct nspdf_style style;
+ style.stroke_type = NSPDF_OP_TYPE_SOLID;
+ style.stroke_width = gs->param_stack[gs->param_stack_idx].line_width;
+ gsc_to_device(&gs->param_stack[gs->param_stack_idx].stroke.colour, &style.stroke_colour);
+
+ style.fill_type = NSPDF_OP_TYPE_SOLID;
+ gsc_to_device(&gs->param_stack[gs->param_stack_idx].other.colour, &style.fill_colour);
+
+ gs->path[gs->path_idx++] = NSPDF_PATH_CLOSE;
+
+ render_ctx->path(&style,
+ gs->path,
+ gs->path_idx,
+ gs->param_stack[gs->param_stack_idx].ctm,
+ render_ctx->ctx);
+ gs->path_idx = 0;
+
+ return NSPDFERROR_OK;
+}
+
+static inline nspdferror
+render_operation_B(struct graphics_state *gs, struct nspdf_render_ctx* render_ctx)
+{
+ struct nspdf_style style;
+ style.stroke_type = NSPDF_OP_TYPE_SOLID;
+ style.stroke_width = gs->param_stack[gs->param_stack_idx].line_width;
+ gsc_to_device(&gs->param_stack[gs->param_stack_idx].stroke.colour, &style.stroke_colour);
+
+ style.fill_type = NSPDF_OP_TYPE_SOLID;
+ gsc_to_device(&gs->param_stack[gs->param_stack_idx].other.colour, &style.fill_colour);
+
+ render_ctx->path(&style,
+ gs->path,
+ gs->path_idx,
+ gs->param_stack[gs->param_stack_idx].ctm,
+ render_ctx->ctx);
+ gs->path_idx = 0;
+
+ return NSPDFERROR_OK;
+}
+
static inline nspdferror
render_operation_S(struct graphics_state *gs, struct nspdf_render_ctx* render_ctx)
@@ -762,11 +813,17 @@ nspdf_page_render(struct nspdf_doc *doc,
case CONTENT_OP_f:
case CONTENT_OP_f_:
+ res = render_operation_f(&gs, render_ctx);
+ break;
+
case CONTENT_OP_B:
case CONTENT_OP_B_:
+ res = render_operation_B(&gs, render_ctx);
+ break;
+
case CONTENT_OP_b:
case CONTENT_OP_b_:
- res = render_operation_f(&gs, render_ctx);
+ res = render_operation_b(&gs, render_ctx);
break;
case CONTENT_OP_s:
@@ -778,6 +835,10 @@ nspdf_page_render(struct nspdf_doc *doc,
res = render_operation_S(&gs, render_ctx);
break;
+ case CONTENT_OP_n: /* end path */
+ res = render_operation_n(&gs);
+ break;
+
/* graphics state operations */
case CONTENT_OP_w: /* line width */
res = render_operation_w(operation, &gs);
-----------------------------------------------------------------------
Summary of changes:
src/page.c | 63 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 62 insertions(+), 1 deletion(-)
diff --git a/src/page.c b/src/page.c
index 984228b..3b2371b 100644
--- a/src/page.c
+++ b/src/page.c
@@ -330,6 +330,13 @@ render_operation_h(struct graphics_state *gs)
}
static inline nspdferror
+render_operation_n(struct graphics_state *gs)
+{
+ gs->path_idx = 0;
+ return NSPDFERROR_OK;
+}
+
+static inline nspdferror
gsc_to_device(struct graphics_state_color * gsc, uint32_t *c_out)
{
uint32_t c;
@@ -402,6 +409,50 @@ render_operation_f(struct graphics_state *gs, struct nspdf_render_ctx* render_ct
return NSPDFERROR_OK;
}
+static inline nspdferror
+render_operation_b(struct graphics_state *gs, struct nspdf_render_ctx* render_ctx)
+{
+ struct nspdf_style style;
+ style.stroke_type = NSPDF_OP_TYPE_SOLID;
+ style.stroke_width = gs->param_stack[gs->param_stack_idx].line_width;
+ gsc_to_device(&gs->param_stack[gs->param_stack_idx].stroke.colour, &style.stroke_colour);
+
+ style.fill_type = NSPDF_OP_TYPE_SOLID;
+ gsc_to_device(&gs->param_stack[gs->param_stack_idx].other.colour, &style.fill_colour);
+
+ gs->path[gs->path_idx++] = NSPDF_PATH_CLOSE;
+
+ render_ctx->path(&style,
+ gs->path,
+ gs->path_idx,
+ gs->param_stack[gs->param_stack_idx].ctm,
+ render_ctx->ctx);
+ gs->path_idx = 0;
+
+ return NSPDFERROR_OK;
+}
+
+static inline nspdferror
+render_operation_B(struct graphics_state *gs, struct nspdf_render_ctx* render_ctx)
+{
+ struct nspdf_style style;
+ style.stroke_type = NSPDF_OP_TYPE_SOLID;
+ style.stroke_width = gs->param_stack[gs->param_stack_idx].line_width;
+ gsc_to_device(&gs->param_stack[gs->param_stack_idx].stroke.colour, &style.stroke_colour);
+
+ style.fill_type = NSPDF_OP_TYPE_SOLID;
+ gsc_to_device(&gs->param_stack[gs->param_stack_idx].other.colour, &style.fill_colour);
+
+ render_ctx->path(&style,
+ gs->path,
+ gs->path_idx,
+ gs->param_stack[gs->param_stack_idx].ctm,
+ render_ctx->ctx);
+ gs->path_idx = 0;
+
+ return NSPDFERROR_OK;
+}
+
static inline nspdferror
render_operation_S(struct graphics_state *gs, struct nspdf_render_ctx* render_ctx)
@@ -762,11 +813,17 @@ nspdf_page_render(struct nspdf_doc *doc,
case CONTENT_OP_f:
case CONTENT_OP_f_:
+ res = render_operation_f(&gs, render_ctx);
+ break;
+
case CONTENT_OP_B:
case CONTENT_OP_B_:
+ res = render_operation_B(&gs, render_ctx);
+ break;
+
case CONTENT_OP_b:
case CONTENT_OP_b_:
- res = render_operation_f(&gs, render_ctx);
+ res = render_operation_b(&gs, render_ctx);
break;
case CONTENT_OP_s:
@@ -778,6 +835,10 @@ nspdf_page_render(struct nspdf_doc *doc,
res = render_operation_S(&gs, render_ctx);
break;
+ case CONTENT_OP_n: /* end path */
+ res = render_operation_n(&gs);
+ break;
+
/* graphics state operations */
case CONTENT_OP_w: /* line width */
res = render_operation_w(operation, &gs);
--
PDF Manipulation Library
5 years, 10 months
libnspdf: branch master updated. d8835327fcc498795ac66bed330e741cef12ce29
by NetSurf Browser Project
Gitweb links:
...log http://git.netsurf-browser.org/libnspdf.git/shortlog/d8835327fcc498795ac6...
...commit http://git.netsurf-browser.org/libnspdf.git/commit/d8835327fcc498795ac66b...
...tree http://git.netsurf-browser.org/libnspdf.git/tree/d8835327fcc498795ac66bed...
The branch, master has been updated
via d8835327fcc498795ac66bed330e741cef12ce29 (commit)
via f022fe68dcb0703e01e6530263e3177ffec1c083 (commit)
via 14ff03cff7400945afc3cd13d2f82e4450a3241e (commit)
from c4dd67804afc84fde84402649e5b32f2b00680c3 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
commitdiff http://git.netsurf-browser.org/libnspdf.git/commit/?id=d8835327fcc498795a...
commit d8835327fcc498795ac66bed330e741cef12ce29
Author: Vincent Sanders <vince(a)kyllikki.org>
Commit: Vincent Sanders <vince(a)kyllikki.org>
implement colour setting operations in render
diff --git a/src/graphics_state.h b/src/graphics_state.h
index ec158e0..e5cc2bf 100644
--- a/src/graphics_state.h
+++ b/src/graphics_state.h
@@ -29,16 +29,29 @@ struct graphics_state_color {
enum graphics_state_colorspace space;
union {
float gray; /* default is 0 - black */
- float rgb[3];
- float cmyk[3];
- };
+ struct {
+ float r;
+ float g;
+ float b;
+ } rgb;
+ struct {
+ float c;
+ float m;
+ float y;
+ float k;
+ } cmyk;
+ } u;
};
struct graphics_state_param {
float ctm[6]; /* current transform matrix */
/* clipping path */
- struct graphics_state_color stroke_colour;
- struct graphics_state_color other_colour;
+ struct {
+ struct graphics_state_color colour;
+ } stroke;
+ struct {
+ struct graphics_state_color colour;
+ } other;
/* text state */
float line_width;
unsigned int line_cap;
diff --git a/src/page.c b/src/page.c
index eb80f0f..984228b 100644
--- a/src/page.c
+++ b/src/page.c
@@ -330,13 +330,67 @@ render_operation_h(struct graphics_state *gs)
}
static inline nspdferror
+gsc_to_device(struct graphics_state_color * gsc, uint32_t *c_out)
+{
+ uint32_t c;
+ unsigned int v;
+
+ switch (gsc->space) {
+ case GSDeviceGray:
+ v = gsc->u.gray * 255.0;
+ v = v & 0xff;
+ c = v | (v << 8) | (v << 16);
+ break;
+
+ case GSDeviceRGB:
+ v = gsc->u.rgb.r * 255.0;
+ c = v & 0xff;
+ v = gsc->u.rgb.g * 255.0;
+ v = v & 0xff;
+ c |= v << 8;
+ v = gsc->u.rgb.b * 255.0;
+ v = v & 0xff;
+ c |= v << 16;
+ break;
+
+ case GSDeviceCMYK:
+ /* no color profile, this will look shocking */
+ v = (1.0 - ((gsc->u.cmyk.c * (1.0 - gsc->u.cmyk.k)) + gsc->u.cmyk.k)) * 255.0;
+ c = v & 0xff;
+ v = (1.0 - ((gsc->u.cmyk.m * (1.0 - gsc->u.cmyk.k)) + gsc->u.cmyk.k)) * 255.0;
+ v = v & 0xff;
+ c |= v << 8;
+ v = (1.0 - ((gsc->u.cmyk.y * (1.0 - gsc->u.cmyk.k)) + gsc->u.cmyk.k)) * 255.0;
+ v = v & 0xff;
+ c |= v << 16;
+ /* if (c != 0) printf("setting %f %f %f %f %x\n",
+ gsc->u.cmyk.c,
+ gsc->u.cmyk.m,
+ gsc->u.cmyk.y,
+ gsc->u.cmyk.k,
+ c);
+ */
+ break;
+
+ default:
+ c = 0;
+ break;
+ }
+
+ *c_out = c;
+
+ return NSPDFERROR_OK;
+}
+
+static inline nspdferror
render_operation_f(struct graphics_state *gs, struct nspdf_render_ctx* render_ctx)
{
struct nspdf_style style;
style.stroke_type = NSPDF_OP_TYPE_NONE;
style.stroke_colour = 0x01000000;
+
style.fill_type = NSPDF_OP_TYPE_SOLID;
- style.fill_colour = 0;
+ gsc_to_device(&gs->param_stack[gs->param_stack_idx].other.colour, &style.fill_colour);
render_ctx->path(&style,
gs->path,
@@ -344,6 +398,7 @@ render_operation_f(struct graphics_state *gs, struct nspdf_render_ctx* render_ct
gs->param_stack[gs->param_stack_idx].ctm,
render_ctx->ctx);
gs->path_idx = 0;
+
return NSPDFERROR_OK;
}
@@ -353,11 +408,13 @@ render_operation_S(struct graphics_state *gs, struct nspdf_render_ctx* render_ct
{
struct nspdf_style style;
- style.stroke_type = NSPDF_OP_TYPE_SOLID;
- style.stroke_colour = 0;
- style.stroke_width = gs->param_stack[gs->param_stack_idx].line_width;
style.fill_type = NSPDF_OP_TYPE_NONE;
style.fill_colour = 0x01000000;
+
+ style.stroke_type = NSPDF_OP_TYPE_SOLID;
+ style.stroke_width = gs->param_stack[gs->param_stack_idx].line_width;
+ gsc_to_device(&gs->param_stack[gs->param_stack_idx].stroke.colour, &style.stroke_colour);
+
render_ctx->path(&style,
gs->path,
gs->path_idx,
@@ -435,6 +492,186 @@ render_operation_cm(struct content_operation *operation, struct graphics_state *
gs->param_stack[gs->param_stack_idx].ctm);
}
+
+static inline nspdferror
+set_gsc_grey(struct graphics_state_color *gsc, float gray)
+{
+ /* bounds check */
+ if (gray < 0.0) {
+ gray = 0.0;
+ } else if (gray > 1.0) {
+ gray = 1.0;
+ }
+
+ gsc->space = GSDeviceGray;
+ gsc->u.gray = gray;
+
+ return NSPDFERROR_OK;
+}
+
+static inline nspdferror
+render_operation_G(struct content_operation *operation,
+ struct graphics_state *gs)
+{
+ return set_gsc_grey(&gs->param_stack[gs->param_stack_idx].stroke.colour,
+ operation->u.number[0]);
+}
+
+static inline nspdferror
+render_operation_g(struct content_operation *operation,
+ struct graphics_state *gs)
+{
+ return set_gsc_grey(&gs->param_stack[gs->param_stack_idx].other.colour,
+ operation->u.number[0]);
+}
+
+static inline nspdferror
+set_gsc_rgb(struct graphics_state_color *gsc, float r, float g, float b)
+{
+ /* bounds check */
+ if (r < 0.0) {
+ r = 0.0;
+ } else if (r > 1.0) {
+ r = 1.0;
+ }
+ if (g < 0.0) {
+ g = 0.0;
+ } else if (g > 1.0) {
+ g = 1.0;
+ }
+ if (b < 0.0) {
+ b = 0.0;
+ } else if (b > 1.0) {
+ b = 1.0;
+ }
+
+ gsc->space = GSDeviceRGB;
+ gsc->u.rgb.r = r;
+ gsc->u.rgb.g = g;
+ gsc->u.rgb.b = b;
+
+ return NSPDFERROR_OK;
+}
+
+static inline nspdferror
+render_operation_RG(struct content_operation *operation,
+ struct graphics_state *gs)
+{
+ return set_gsc_rgb(&gs->param_stack[gs->param_stack_idx].stroke.colour,
+ operation->u.number[0],
+ operation->u.number[1],
+ operation->u.number[2]);
+}
+
+static inline nspdferror
+render_operation_rg(struct content_operation *operation,
+ struct graphics_state *gs)
+{
+ return set_gsc_rgb(&gs->param_stack[gs->param_stack_idx].other.colour,
+ operation->u.number[0],
+ operation->u.number[1],
+ operation->u.number[2]);
+}
+
+static inline nspdferror
+set_gsc_cmyk(struct graphics_state_color *gsc, float c, float m, float y, float k)
+{
+ /* bounds check */
+ if (c < 0.0) {
+ c = 0.0;
+ } else if (c > 1.0) {
+ c = 1.0;
+ }
+ if (y < 0.0) {
+ y = 0.0;
+ } else if (y > 1.0) {
+ y = 1.0;
+ }
+ if (m < 0.0) {
+ m = 0.0;
+ } else if (m > 1.0) {
+ m = 1.0;
+ }
+ if (k < 0.0) {
+ k = 0.0;
+ } else if (k > 1.0) {
+ k = 1.0;
+ }
+
+ gsc->space = GSDeviceCMYK;
+ gsc->u.cmyk.c = c;
+ gsc->u.cmyk.m = m;
+ gsc->u.cmyk.y = y;
+ gsc->u.cmyk.k = k;
+
+ return NSPDFERROR_OK;
+}
+
+static inline nspdferror
+render_operation_K(struct content_operation *operation,
+ struct graphics_state *gs)
+{
+ return set_gsc_cmyk(&gs->param_stack[gs->param_stack_idx].stroke.colour,
+ operation->u.number[0],
+ operation->u.number[1],
+ operation->u.number[2],
+ operation->u.number[3]);
+}
+
+static inline nspdferror
+render_operation_k(struct content_operation *operation,
+ struct graphics_state *gs)
+{
+ return set_gsc_cmyk(&gs->param_stack[gs->param_stack_idx].other.colour,
+ operation->u.number[0],
+ operation->u.number[1],
+ operation->u.number[2],
+ operation->u.number[3]);
+}
+
+static inline nspdferror
+set_gsc_cs(struct graphics_state_color *gsc, const char *spacename)
+{
+ if (strcmp(spacename, "DeviceGray") == 0) {
+ gsc->space = GSDeviceGray;
+ gsc->u.gray = 0.0;
+ } else if (strcmp(spacename, "DeviceRGB") == 0) {
+ gsc->space = GSDeviceRGB;
+ gsc->u.rgb.r = 0.0;
+ gsc->u.rgb.g = 0.0;
+ gsc->u.rgb.b = 0.0;
+ } else if (strcmp(spacename, "DeviceCMYK") == 0) {
+ gsc->space = GSDeviceCMYK;
+ gsc->u.cmyk.c = 0.0;
+ gsc->u.cmyk.m = 0.0;
+ gsc->u.cmyk.y = 0.0;
+ gsc->u.cmyk.k = 1.0;
+ } else {
+ /** \todo colourspace from name defined in the ColorSpace subdictionary of the current resource dictionary */
+ gsc->space = GSDeviceGray;
+ gsc->u.gray = 0.0;
+
+ }
+ //printf("cs %s %d\n", spacename, gsc->space);
+ return NSPDFERROR_OK;
+}
+
+static inline nspdferror
+render_operation_CS(struct content_operation *operation,
+ struct graphics_state *gs)
+{
+ return set_gsc_cs(&gs->param_stack[gs->param_stack_idx].stroke.colour,
+ operation->u.name);
+}
+
+static inline nspdferror
+render_operation_cs(struct content_operation *operation,
+ struct graphics_state *gs)
+{
+ return set_gsc_cs(&gs->param_stack[gs->param_stack_idx].other.colour,
+ operation->u.name);
+}
+
/**
* Initialise the parameter stack
*
@@ -574,6 +811,44 @@ nspdf_page_render(struct nspdf_doc *doc,
res = render_operation_cm(operation, &gs);
break;
+ /* colour operators */
+ case CONTENT_OP_G: /* gray stroking colour */
+ res = render_operation_G(operation, &gs);
+ break;
+
+ case CONTENT_OP_g: /* gray non-stroking colour */
+ res = render_operation_g(operation, &gs);
+ break;
+
+ case CONTENT_OP_RG: /* rgb stroking colour */
+ res = render_operation_RG(operation, &gs);
+ break;
+
+ case CONTENT_OP_rg: /* rgb non-stroking colour */
+ res = render_operation_rg(operation, &gs);
+ break;
+
+ case CONTENT_OP_K: /* CMYK stroking colour */
+ res = render_operation_K(operation, &gs);
+ break;
+
+ case CONTENT_OP_k: /* CMYK non-stroking colour */
+ res = render_operation_k(operation, &gs);
+ break;
+
+ case CONTENT_OP_CS: /* change stroking colourspace */
+ res = render_operation_CS(operation, &gs);
+ break;
+
+ case CONTENT_OP_cs: /* change non-stroking colourspace */
+ res = render_operation_cs(operation, &gs);
+ break;
+
+ //case CONTENT_OP_SC:
+ //case CONTENT_OP_sc:
+ //case CONTENT_OP_SCN:
+ //case CONTENT_OP_scn:
+
default:
printf("operator %s\n",
nspdf__cos_content_operator_name(operation->operator));
commitdiff http://git.netsurf-browser.org/libnspdf.git/commit/?id=f022fe68dcb0703e01...
commit f022fe68dcb0703e01e6530263e3177ffec1c083
Author: Vincent Sanders <vince(a)kyllikki.org>
Commit: Vincent Sanders <vince(a)kyllikki.org>
extend graphics state operation implementation
diff --git a/src/graphics_state.h b/src/graphics_state.h
new file mode 100644
index 0000000..ec158e0
--- /dev/null
+++ b/src/graphics_state.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright 2018 Vincent Sanders <vince(a)netsurf-browser.org>
+ *
+ * This file is part of libnspdf.
+ *
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ */
+
+/**
+ * \file
+ * NetSurf PDF library graphics state
+ */
+
+#ifndef NSPDF__GRAPHICS_STATE_H_
+#define NSPDF__GRAPHICS_STATE_H_
+
+/**
+ * colourspaces
+ * \todo extend this with full list from section 4.5.2
+ */
+enum graphics_state_colorspace {
+ GSDeviceGray = 0, /* Default */
+ GSDeviceRGB,
+ GSDeviceCMYK,
+};
+
+struct graphics_state_color {
+ enum graphics_state_colorspace space;
+ union {
+ float gray; /* default is 0 - black */
+ float rgb[3];
+ float cmyk[3];
+ };
+};
+
+struct graphics_state_param {
+ float ctm[6]; /* current transform matrix */
+ /* clipping path */
+ struct graphics_state_color stroke_colour;
+ struct graphics_state_color other_colour;
+ /* text state */
+ float line_width;
+ unsigned int line_cap;
+ unsigned int line_join;
+ float miter_limit;
+ /* dash pattern */
+ /* rendering intent RelativeColorimetric */
+ bool stroke_adjustment;
+ /* blend mode: Normal */
+ /* soft mask */
+ /* alpha constant */
+ /* alpha source */
+
+ /* device dependant */
+ bool overprint;
+ float overprint_mode;
+ /* black generation */
+ /* undercolor removal */
+ /* transfer */
+ /* halftone */
+ float flatness;
+ float smoothness;
+};
+
+struct graphics_state {
+ float *path; /* current path */
+ unsigned int path_idx; /* current index into path */
+ unsigned int path_alloc; /* current number of path elements allocated */
+
+ struct graphics_state_param *param_stack; /* parameter stack */
+ unsigned int param_stack_idx;
+ unsigned int param_stack_alloc;
+};
+
+#endif
diff --git a/src/page.c b/src/page.c
index 08d993c..eb80f0f 100644
--- a/src/page.c
+++ b/src/page.c
@@ -15,6 +15,7 @@
#include <stdio.h>
#include <nspdf/page.h>
+#include "graphics_state.h"
#include "cos_content.h"
#include "cos_object.h"
#include "pdf_doc.h"
@@ -32,6 +33,65 @@ struct page_table_entry {
};
/**
+ * multiply pdf matricies
+ *
+ * pdf specifies its 3 x 3 transform matrix as six values and three constants
+ * | t[0] t[1] 0 |
+ * Mt = | t[2] t[3] 0 |
+ * | t[4] t[5] 1 |
+ *
+ * this multiples two such matricies together
+ * Mo = Ma * Mb
+ *
+ * Basic matrix expansion is
+ * | a b c | | A B C | | aA+bP+cU aB+bQ+cV aC+bR+cW |
+ * | p q r | * | P Q R | = | pA+qP+rU pB+qQ+rV pC+qR+rW |
+ * | u v w | | U V W | | uA+vP+wU uB+vQ+wV uC+vR+wW |
+ *
+ * With the a and b arrays substituted
+ * | o[0] o[1] 0 |
+ * | o[2] o[3] 0 | =
+ * | o[4] o[5] 1 |
+ *
+ * | a[0] a[1] 0 | | b[0] b[1] 0 |
+ * | a[2] a[3] 0 | * | b[2] b[3] 0 | =
+ * | a[4] a[5] 1 | | b[4] b[5] 1 |
+ *
+ * | a[0]*b[0]+a[1]*b[2] a[0]*b[1]+a[1]*b[3] 0 |
+ * | a[2]*b[0]+a[3]*b[2] a[2]*b[1]+a[3]*b[3] 0 |
+ * | a[4]*b[0]+a[5]*b[2]+b[4] a[4]*b[1]+a[5]*b[3]+b[5] 1 |
+ *
+ * \param a The array of six values for matrix a
+ * \param b The array of six values for matrix b
+ * \param o An array to receive six values resulting from Ma * Mb may be same array as a or b
+ * \return NSPDFERROR_OK on success
+ */
+static nspdferror
+pdf_matrix_multiply(float *a, float *b, float *o)
+{
+ float out[6]; /* result matrix array */
+
+ out[0] = a[0]*b[0] + a[1]*b[2];
+ out[1] = a[0]*b[1] + a[1]*b[3];
+ out[2] = a[2]*b[0] + a[3]*b[2];
+ out[3] = a[2]*b[1] + a[3]*b[3];
+ out[4] = a[4]*b[0] + a[5]*b[2] + b[4];
+ out[5] = a[4]*b[1] + a[5]*b[3] + b[5];
+
+ /* calculate and then assign output to allow input and output arrays to
+ * overlap
+ */
+ o[0] = out[0];
+ o[1] = out[1];
+ o[2] = out[2];
+ o[3] = out[3];
+ o[4] = out[4];
+ o[5] = out[5];
+
+ return NSPDFERROR_OK;
+}
+
+/**
* recursively decodes a page tree
*/
nspdferror
@@ -210,53 +270,6 @@ nspdf_page_count(struct nspdf_doc *doc, unsigned int *pages_out)
return NSPDFERROR_OK;
}
-/**
- * colourspaces
- * \todo extend this with full list from section 4.5.2
- */
-enum graphics_state_colorspace {
- GSDeviceGray = 0, /* Default */
- GSDeviceRGB,
- GSDeviceCMYK,
-};
-
-struct graphics_state_color {
- enum graphics_state_colorspace space;
- union {
- float gray; /* default is 0 - black */
- float rgb[3];
- float cmyk[3];
- };
-};
-
-struct graphics_state_param {
- float ctm[6]; /* current transform matrix */
- /* clipping path */
- struct graphics_state_color stroke_colour;
- struct graphics_state_color other_colour;
- /* text state */
- float line_width;
- unsigned int line_cap;
- unsigned int line_join;
- float miter_limit;
- /* dash pattern */
- /* rendering intent RelativeColorimetric */
- bool stroke_adjustment;
- /* blend mode: Normal */
- /* soft mask */
- /* alpha constant */
- /* alpha source */
-};
-
-struct graphics_state {
- float *path; /* current path */
- unsigned int path_idx; /* current index into path */
- unsigned int path_alloc; /* current number of path elements allocated */
-
- struct graphics_state_param *param_stack; /* parameter stack */
- unsigned int param_stack_idx;
- unsigned int param_stack_alloc;
-};
static inline nspdferror
render_operation_m(struct content_operation *operation, struct graphics_state *gs)
@@ -362,6 +375,34 @@ render_operation_w(struct content_operation *operation, struct graphics_state *g
}
static inline nspdferror
+render_operation_i(struct content_operation *operation, struct graphics_state *gs)
+{
+ gs->param_stack[gs->param_stack_idx].flatness = operation->u.number[0];
+ return NSPDFERROR_OK;
+}
+
+static inline nspdferror
+render_operation_M(struct content_operation *operation, struct graphics_state *gs)
+{
+ gs->param_stack[gs->param_stack_idx].miter_limit = operation->u.number[0];
+ return NSPDFERROR_OK;
+}
+
+static inline nspdferror
+render_operation_j(struct content_operation *operation, struct graphics_state *gs)
+{
+ gs->param_stack[gs->param_stack_idx].line_join = operation->u.i[0];
+ return NSPDFERROR_OK;
+}
+
+static inline nspdferror
+render_operation_J(struct content_operation *operation, struct graphics_state *gs)
+{
+ gs->param_stack[gs->param_stack_idx].line_cap = operation->u.i[0];
+ return NSPDFERROR_OK;
+}
+
+static inline nspdferror
render_operation_q(struct graphics_state *gs)
{
gs->param_stack[gs->param_stack_idx + 1] = gs->param_stack[gs->param_stack_idx];
@@ -379,64 +420,6 @@ render_operation_Q(struct graphics_state *gs)
}
-/**
- * multiply pdf matricies
- *
- * pdf specifies its 3 x 3 transform matrix as six values and three constants
- * | t[0] t[1] 0 |
- * Mt = | t[2] t[3] 0 |
- * | t[4] t[5] 1 |
- *
- * this multiples two such matricies together
- * Mo = Ma * Mb
- *
- * Basic matrix expansion is
- * | a b c | | A B C | | aA+bP+cU aB+bQ+cV aC+bR+cW |
- * | p q r | * | P Q R | = | pA+qP+rU pB+qQ+rV pC+qR+rW |
- * | u v w | | U V W | | uA+vP+wU uB+vQ+wV uC+vR+wW |
- *
- * With the a and b arrays substituted
- * | o[0] o[1] 0 |
- * | o[2] o[3] 0 | =
- * | o[4] o[5] 1 |
- *
- * | a[0] a[1] 0 | | b[0] b[1] 0 |
- * | a[2] a[3] 0 | * | b[2] b[3] 0 | =
- * | a[4] a[5] 1 | | b[4] b[5] 1 |
- *
- * | a[0]*b[0]+a[1]*b[2] a[0]*b[1]+a[1]*b[3] 0 |
- * | a[2]*b[0]+a[3]*b[2] a[2]*b[1]+a[3]*b[3] 0 |
- * | a[4]*b[0]+a[5]*b[2]+b[4] a[4]*b[1]+a[5]*b[3]+b[5] 1 |
- *
- * \param a The array of six values for matrix a
- * \param b The array of six values for matrix b
- * \param o An array to receive six values resulting from Ma * Mb may be same array as a or b
- * \return NSPDFERROR_OK on success
- */
-static nspdferror
-pdf_matrix_multiply(float *a, float *b, float *o)
-{
- float out[6]; /* result matrix array */
-
- out[0] = a[0]*b[0] + a[1]*b[2];
- out[1] = a[0]*b[1] + a[1]*b[3];
- out[2] = a[2]*b[0] + a[3]*b[2];
- out[3] = a[2]*b[1] + a[3]*b[3];
- out[4] = a[4]*b[0] + a[5]*b[2] + b[4];
- out[5] = a[4]*b[1] + a[5]*b[3] + b[5];
-
- /* calculate and then assign output to allow input and output arrays to
- * overlap
- */
- o[0] = out[0];
- o[1] = out[1];
- o[2] = out[2];
- o[3] = out[3];
- o[4] = out[4];
- o[5] = out[5];
-
- return NSPDFERROR_OK;
-}
/**
* pre-multiply matrix
@@ -519,6 +502,7 @@ nspdf_page_render(struct nspdf_doc *doc,
idx < page_content->length;
idx++, operation++) {
switch(operation->operator) {
+ /* path operations */
case CONTENT_OP_m: /* move */
res = render_operation_m(operation, &gs);
break;
@@ -557,9 +541,25 @@ nspdf_page_render(struct nspdf_doc *doc,
res = render_operation_S(&gs, render_ctx);
break;
- case CONTENT_OP_w:
+ /* graphics state operations */
+ case CONTENT_OP_w: /* line width */
res = render_operation_w(operation, &gs);
- //printf("line width:%f\n", gs.param_stack[gs.param_stack_idx].line_width);
+ break;
+
+ case CONTENT_OP_i: /* flatness */
+ res = render_operation_i(operation, &gs);
+ break;
+
+ case CONTENT_OP_j: /* line join style */
+ res = render_operation_j(operation, &gs);
+ break;
+
+ case CONTENT_OP_J: /* line cap style */
+ res = render_operation_J(operation, &gs);
+ break;
+
+ case CONTENT_OP_M: /* miter limit */
+ res = render_operation_M(operation, &gs);
break;
case CONTENT_OP_q: /* push parameter stack */
commitdiff http://git.netsurf-browser.org/libnspdf.git/commit/?id=14ff03cff7400945af...
commit 14ff03cff7400945afc3cd13d2f82e4450a3241e
Author: Vincent Sanders <vince(a)kyllikki.org>
Commit: Vincent Sanders <vince(a)kyllikki.org>
split out pdf matrix multiplication
diff --git a/src/page.c b/src/page.c
index c5ea8b8..08d993c 100644
--- a/src/page.c
+++ b/src/page.c
@@ -378,45 +378,78 @@ render_operation_Q(struct graphics_state *gs)
return NSPDFERROR_OK;
}
+
+/**
+ * multiply pdf matricies
+ *
+ * pdf specifies its 3 x 3 transform matrix as six values and three constants
+ * | t[0] t[1] 0 |
+ * Mt = | t[2] t[3] 0 |
+ * | t[4] t[5] 1 |
+ *
+ * this multiples two such matricies together
+ * Mo = Ma * Mb
+ *
+ * Basic matrix expansion is
+ * | a b c | | A B C | | aA+bP+cU aB+bQ+cV aC+bR+cW |
+ * | p q r | * | P Q R | = | pA+qP+rU pB+qQ+rV pC+qR+rW |
+ * | u v w | | U V W | | uA+vP+wU uB+vQ+wV uC+vR+wW |
+ *
+ * With the a and b arrays substituted
+ * | o[0] o[1] 0 |
+ * | o[2] o[3] 0 | =
+ * | o[4] o[5] 1 |
+ *
+ * | a[0] a[1] 0 | | b[0] b[1] 0 |
+ * | a[2] a[3] 0 | * | b[2] b[3] 0 | =
+ * | a[4] a[5] 1 | | b[4] b[5] 1 |
+ *
+ * | a[0]*b[0]+a[1]*b[2] a[0]*b[1]+a[1]*b[3] 0 |
+ * | a[2]*b[0]+a[3]*b[2] a[2]*b[1]+a[3]*b[3] 0 |
+ * | a[4]*b[0]+a[5]*b[2]+b[4] a[4]*b[1]+a[5]*b[3]+b[5] 1 |
+ *
+ * \param a The array of six values for matrix a
+ * \param b The array of six values for matrix b
+ * \param o An array to receive six values resulting from Ma * Mb may be same array as a or b
+ * \return NSPDFERROR_OK on success
+ */
+static nspdferror
+pdf_matrix_multiply(float *a, float *b, float *o)
+{
+ float out[6]; /* result matrix array */
+
+ out[0] = a[0]*b[0] + a[1]*b[2];
+ out[1] = a[0]*b[1] + a[1]*b[3];
+ out[2] = a[2]*b[0] + a[3]*b[2];
+ out[3] = a[2]*b[1] + a[3]*b[3];
+ out[4] = a[4]*b[0] + a[5]*b[2] + b[4];
+ out[5] = a[4]*b[1] + a[5]*b[3] + b[5];
+
+ /* calculate and then assign output to allow input and output arrays to
+ * overlap
+ */
+ o[0] = out[0];
+ o[1] = out[1];
+ o[2] = out[2];
+ o[3] = out[3];
+ o[4] = out[4];
+ o[5] = out[5];
+
+ return NSPDFERROR_OK;
+}
+
/**
* pre-multiply matrix
*/
static inline nspdferror
render_operation_cm(struct content_operation *operation, struct graphics_state *gs)
{
- float M[6]; /* result matrix */
- /* M' = Mt * M */
- /* M' = Mo * Mc where Mo is operation and Mc is graphics state ctm */
- /* | a b c | | A B C | | aA+bP+cU aB+bQ+cV aC+bR+cW |
- * | p q r | * | P Q R | = | pA+qP+rU pB+qQ+rV pC+qR+rW |
- * | u v w | | U V W | | uA+vP+wU uB+vQ+wV uC+vR+wW |
- *
- * | o[0] o[1] 0 | | c[0] c[1] 0 | | o[0]*c[0]+o[1]*c[2] o[0]*c[1]+o[1]*c[3] 0 |
- * | o[2] o[3] 0 | * | c[2] c[3] 0 | = | o[2]*c[0]+o[3]*c[2] o[2]*c[1]+o[3]*c[3] 0 |
- * | o[4] o[5] 1 | | c[4] c[5] 1 | | o[4]*c[0]+o[5]*c[2]+c[4] o[4]*c[1]+o[5]*c[3]+c[5] 1 |
+ /* Mres = Mop * Mctm
+ * where Mop is operation and Mctm is graphics state ctm
*/
- M[0] = operation->u.number[0] * gs->param_stack[gs->param_stack_idx].ctm[0] +
- operation->u.number[1] * gs->param_stack[gs->param_stack_idx].ctm[2];
- M[1] = operation->u.number[0] * gs->param_stack[gs->param_stack_idx].ctm[1] +
- operation->u.number[1] * gs->param_stack[gs->param_stack_idx].ctm[3];
- M[2] = operation->u.number[2] * gs->param_stack[gs->param_stack_idx].ctm[0] +
- operation->u.number[3] * gs->param_stack[gs->param_stack_idx].ctm[2];
- M[3] = operation->u.number[2] * gs->param_stack[gs->param_stack_idx].ctm[1] +
- operation->u.number[3] * gs->param_stack[gs->param_stack_idx].ctm[3];
- M[4] = operation->u.number[4] * gs->param_stack[gs->param_stack_idx].ctm[0] +
- operation->u.number[5] * gs->param_stack[gs->param_stack_idx].ctm[2] +
- gs->param_stack[gs->param_stack_idx].ctm[4];
- M[5] = operation->u.number[4] * gs->param_stack[gs->param_stack_idx].ctm[1] +
- operation->u.number[5] * gs->param_stack[gs->param_stack_idx].ctm[3] +
- gs->param_stack[gs->param_stack_idx].ctm[5];
-
- gs->param_stack[gs->param_stack_idx].ctm[0] = M[0];
- gs->param_stack[gs->param_stack_idx].ctm[1] = M[1];
- gs->param_stack[gs->param_stack_idx].ctm[2] = M[2];
- gs->param_stack[gs->param_stack_idx].ctm[3] = M[3];
- gs->param_stack[gs->param_stack_idx].ctm[4] = M[4];
- gs->param_stack[gs->param_stack_idx].ctm[5] = M[5];
- return NSPDFERROR_OK;
+ return pdf_matrix_multiply(operation->u.number,
+ gs->param_stack[gs->param_stack_idx].ctm,
+ gs->param_stack[gs->param_stack_idx].ctm);
}
/**
-----------------------------------------------------------------------
Summary of changes:
src/graphics_state.h | 89 ++++++++++
src/page.c | 476 +++++++++++++++++++++++++++++++++++++++++---------
2 files changed, 481 insertions(+), 84 deletions(-)
create mode 100644 src/graphics_state.h
diff --git a/src/graphics_state.h b/src/graphics_state.h
new file mode 100644
index 0000000..e5cc2bf
--- /dev/null
+++ b/src/graphics_state.h
@@ -0,0 +1,89 @@
+/*
+ * Copyright 2018 Vincent Sanders <vince(a)netsurf-browser.org>
+ *
+ * This file is part of libnspdf.
+ *
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ */
+
+/**
+ * \file
+ * NetSurf PDF library graphics state
+ */
+
+#ifndef NSPDF__GRAPHICS_STATE_H_
+#define NSPDF__GRAPHICS_STATE_H_
+
+/**
+ * colourspaces
+ * \todo extend this with full list from section 4.5.2
+ */
+enum graphics_state_colorspace {
+ GSDeviceGray = 0, /* Default */
+ GSDeviceRGB,
+ GSDeviceCMYK,
+};
+
+struct graphics_state_color {
+ enum graphics_state_colorspace space;
+ union {
+ float gray; /* default is 0 - black */
+ struct {
+ float r;
+ float g;
+ float b;
+ } rgb;
+ struct {
+ float c;
+ float m;
+ float y;
+ float k;
+ } cmyk;
+ } u;
+};
+
+struct graphics_state_param {
+ float ctm[6]; /* current transform matrix */
+ /* clipping path */
+ struct {
+ struct graphics_state_color colour;
+ } stroke;
+ struct {
+ struct graphics_state_color colour;
+ } other;
+ /* text state */
+ float line_width;
+ unsigned int line_cap;
+ unsigned int line_join;
+ float miter_limit;
+ /* dash pattern */
+ /* rendering intent RelativeColorimetric */
+ bool stroke_adjustment;
+ /* blend mode: Normal */
+ /* soft mask */
+ /* alpha constant */
+ /* alpha source */
+
+ /* device dependant */
+ bool overprint;
+ float overprint_mode;
+ /* black generation */
+ /* undercolor removal */
+ /* transfer */
+ /* halftone */
+ float flatness;
+ float smoothness;
+};
+
+struct graphics_state {
+ float *path; /* current path */
+ unsigned int path_idx; /* current index into path */
+ unsigned int path_alloc; /* current number of path elements allocated */
+
+ struct graphics_state_param *param_stack; /* parameter stack */
+ unsigned int param_stack_idx;
+ unsigned int param_stack_alloc;
+};
+
+#endif
diff --git a/src/page.c b/src/page.c
index c5ea8b8..984228b 100644
--- a/src/page.c
+++ b/src/page.c
@@ -15,6 +15,7 @@
#include <stdio.h>
#include <nspdf/page.h>
+#include "graphics_state.h"
#include "cos_content.h"
#include "cos_object.h"
#include "pdf_doc.h"
@@ -32,6 +33,65 @@ struct page_table_entry {
};
/**
+ * multiply pdf matricies
+ *
+ * pdf specifies its 3 x 3 transform matrix as six values and three constants
+ * | t[0] t[1] 0 |
+ * Mt = | t[2] t[3] 0 |
+ * | t[4] t[5] 1 |
+ *
+ * this multiples two such matricies together
+ * Mo = Ma * Mb
+ *
+ * Basic matrix expansion is
+ * | a b c | | A B C | | aA+bP+cU aB+bQ+cV aC+bR+cW |
+ * | p q r | * | P Q R | = | pA+qP+rU pB+qQ+rV pC+qR+rW |
+ * | u v w | | U V W | | uA+vP+wU uB+vQ+wV uC+vR+wW |
+ *
+ * With the a and b arrays substituted
+ * | o[0] o[1] 0 |
+ * | o[2] o[3] 0 | =
+ * | o[4] o[5] 1 |
+ *
+ * | a[0] a[1] 0 | | b[0] b[1] 0 |
+ * | a[2] a[3] 0 | * | b[2] b[3] 0 | =
+ * | a[4] a[5] 1 | | b[4] b[5] 1 |
+ *
+ * | a[0]*b[0]+a[1]*b[2] a[0]*b[1]+a[1]*b[3] 0 |
+ * | a[2]*b[0]+a[3]*b[2] a[2]*b[1]+a[3]*b[3] 0 |
+ * | a[4]*b[0]+a[5]*b[2]+b[4] a[4]*b[1]+a[5]*b[3]+b[5] 1 |
+ *
+ * \param a The array of six values for matrix a
+ * \param b The array of six values for matrix b
+ * \param o An array to receive six values resulting from Ma * Mb may be same array as a or b
+ * \return NSPDFERROR_OK on success
+ */
+static nspdferror
+pdf_matrix_multiply(float *a, float *b, float *o)
+{
+ float out[6]; /* result matrix array */
+
+ out[0] = a[0]*b[0] + a[1]*b[2];
+ out[1] = a[0]*b[1] + a[1]*b[3];
+ out[2] = a[2]*b[0] + a[3]*b[2];
+ out[3] = a[2]*b[1] + a[3]*b[3];
+ out[4] = a[4]*b[0] + a[5]*b[2] + b[4];
+ out[5] = a[4]*b[1] + a[5]*b[3] + b[5];
+
+ /* calculate and then assign output to allow input and output arrays to
+ * overlap
+ */
+ o[0] = out[0];
+ o[1] = out[1];
+ o[2] = out[2];
+ o[3] = out[3];
+ o[4] = out[4];
+ o[5] = out[5];
+
+ return NSPDFERROR_OK;
+}
+
+/**
* recursively decodes a page tree
*/
nspdferror
@@ -210,53 +270,6 @@ nspdf_page_count(struct nspdf_doc *doc, unsigned int *pages_out)
return NSPDFERROR_OK;
}
-/**
- * colourspaces
- * \todo extend this with full list from section 4.5.2
- */
-enum graphics_state_colorspace {
- GSDeviceGray = 0, /* Default */
- GSDeviceRGB,
- GSDeviceCMYK,
-};
-
-struct graphics_state_color {
- enum graphics_state_colorspace space;
- union {
- float gray; /* default is 0 - black */
- float rgb[3];
- float cmyk[3];
- };
-};
-
-struct graphics_state_param {
- float ctm[6]; /* current transform matrix */
- /* clipping path */
- struct graphics_state_color stroke_colour;
- struct graphics_state_color other_colour;
- /* text state */
- float line_width;
- unsigned int line_cap;
- unsigned int line_join;
- float miter_limit;
- /* dash pattern */
- /* rendering intent RelativeColorimetric */
- bool stroke_adjustment;
- /* blend mode: Normal */
- /* soft mask */
- /* alpha constant */
- /* alpha source */
-};
-
-struct graphics_state {
- float *path; /* current path */
- unsigned int path_idx; /* current index into path */
- unsigned int path_alloc; /* current number of path elements allocated */
-
- struct graphics_state_param *param_stack; /* parameter stack */
- unsigned int param_stack_idx;
- unsigned int param_stack_alloc;
-};
static inline nspdferror
render_operation_m(struct content_operation *operation, struct graphics_state *gs)
@@ -317,13 +330,67 @@ render_operation_h(struct graphics_state *gs)
}
static inline nspdferror
+gsc_to_device(struct graphics_state_color * gsc, uint32_t *c_out)
+{
+ uint32_t c;
+ unsigned int v;
+
+ switch (gsc->space) {
+ case GSDeviceGray:
+ v = gsc->u.gray * 255.0;
+ v = v & 0xff;
+ c = v | (v << 8) | (v << 16);
+ break;
+
+ case GSDeviceRGB:
+ v = gsc->u.rgb.r * 255.0;
+ c = v & 0xff;
+ v = gsc->u.rgb.g * 255.0;
+ v = v & 0xff;
+ c |= v << 8;
+ v = gsc->u.rgb.b * 255.0;
+ v = v & 0xff;
+ c |= v << 16;
+ break;
+
+ case GSDeviceCMYK:
+ /* no color profile, this will look shocking */
+ v = (1.0 - ((gsc->u.cmyk.c * (1.0 - gsc->u.cmyk.k)) + gsc->u.cmyk.k)) * 255.0;
+ c = v & 0xff;
+ v = (1.0 - ((gsc->u.cmyk.m * (1.0 - gsc->u.cmyk.k)) + gsc->u.cmyk.k)) * 255.0;
+ v = v & 0xff;
+ c |= v << 8;
+ v = (1.0 - ((gsc->u.cmyk.y * (1.0 - gsc->u.cmyk.k)) + gsc->u.cmyk.k)) * 255.0;
+ v = v & 0xff;
+ c |= v << 16;
+ /* if (c != 0) printf("setting %f %f %f %f %x\n",
+ gsc->u.cmyk.c,
+ gsc->u.cmyk.m,
+ gsc->u.cmyk.y,
+ gsc->u.cmyk.k,
+ c);
+ */
+ break;
+
+ default:
+ c = 0;
+ break;
+ }
+
+ *c_out = c;
+
+ return NSPDFERROR_OK;
+}
+
+static inline nspdferror
render_operation_f(struct graphics_state *gs, struct nspdf_render_ctx* render_ctx)
{
struct nspdf_style style;
style.stroke_type = NSPDF_OP_TYPE_NONE;
style.stroke_colour = 0x01000000;
+
style.fill_type = NSPDF_OP_TYPE_SOLID;
- style.fill_colour = 0;
+ gsc_to_device(&gs->param_stack[gs->param_stack_idx].other.colour, &style.fill_colour);
render_ctx->path(&style,
gs->path,
@@ -331,6 +398,7 @@ render_operation_f(struct graphics_state *gs, struct nspdf_render_ctx* render_ct
gs->param_stack[gs->param_stack_idx].ctm,
render_ctx->ctx);
gs->path_idx = 0;
+
return NSPDFERROR_OK;
}
@@ -340,11 +408,13 @@ render_operation_S(struct graphics_state *gs, struct nspdf_render_ctx* render_ct
{
struct nspdf_style style;
- style.stroke_type = NSPDF_OP_TYPE_SOLID;
- style.stroke_colour = 0;
- style.stroke_width = gs->param_stack[gs->param_stack_idx].line_width;
style.fill_type = NSPDF_OP_TYPE_NONE;
style.fill_colour = 0x01000000;
+
+ style.stroke_type = NSPDF_OP_TYPE_SOLID;
+ style.stroke_width = gs->param_stack[gs->param_stack_idx].line_width;
+ gsc_to_device(&gs->param_stack[gs->param_stack_idx].stroke.colour, &style.stroke_colour);
+
render_ctx->path(&style,
gs->path,
gs->path_idx,
@@ -362,6 +432,34 @@ render_operation_w(struct content_operation *operation, struct graphics_state *g
}
static inline nspdferror
+render_operation_i(struct content_operation *operation, struct graphics_state *gs)
+{
+ gs->param_stack[gs->param_stack_idx].flatness = operation->u.number[0];
+ return NSPDFERROR_OK;
+}
+
+static inline nspdferror
+render_operation_M(struct content_operation *operation, struct graphics_state *gs)
+{
+ gs->param_stack[gs->param_stack_idx].miter_limit = operation->u.number[0];
+ return NSPDFERROR_OK;
+}
+
+static inline nspdferror
+render_operation_j(struct content_operation *operation, struct graphics_state *gs)
+{
+ gs->param_stack[gs->param_stack_idx].line_join = operation->u.i[0];
+ return NSPDFERROR_OK;
+}
+
+static inline nspdferror
+render_operation_J(struct content_operation *operation, struct graphics_state *gs)
+{
+ gs->param_stack[gs->param_stack_idx].line_cap = operation->u.i[0];
+ return NSPDFERROR_OK;
+}
+
+static inline nspdferror
render_operation_q(struct graphics_state *gs)
{
gs->param_stack[gs->param_stack_idx + 1] = gs->param_stack[gs->param_stack_idx];
@@ -378,47 +476,202 @@ render_operation_Q(struct graphics_state *gs)
return NSPDFERROR_OK;
}
+
+
/**
* pre-multiply matrix
*/
static inline nspdferror
render_operation_cm(struct content_operation *operation, struct graphics_state *gs)
{
- float M[6]; /* result matrix */
- /* M' = Mt * M */
- /* M' = Mo * Mc where Mo is operation and Mc is graphics state ctm */
- /* | a b c | | A B C | | aA+bP+cU aB+bQ+cV aC+bR+cW |
- * | p q r | * | P Q R | = | pA+qP+rU pB+qQ+rV pC+qR+rW |
- * | u v w | | U V W | | uA+vP+wU uB+vQ+wV uC+vR+wW |
- *
- * | o[0] o[1] 0 | | c[0] c[1] 0 | | o[0]*c[0]+o[1]*c[2] o[0]*c[1]+o[1]*c[3] 0 |
- * | o[2] o[3] 0 | * | c[2] c[3] 0 | = | o[2]*c[0]+o[3]*c[2] o[2]*c[1]+o[3]*c[3] 0 |
- * | o[4] o[5] 1 | | c[4] c[5] 1 | | o[4]*c[0]+o[5]*c[2]+c[4] o[4]*c[1]+o[5]*c[3]+c[5] 1 |
+ /* Mres = Mop * Mctm
+ * where Mop is operation and Mctm is graphics state ctm
*/
- M[0] = operation->u.number[0] * gs->param_stack[gs->param_stack_idx].ctm[0] +
- operation->u.number[1] * gs->param_stack[gs->param_stack_idx].ctm[2];
- M[1] = operation->u.number[0] * gs->param_stack[gs->param_stack_idx].ctm[1] +
- operation->u.number[1] * gs->param_stack[gs->param_stack_idx].ctm[3];
- M[2] = operation->u.number[2] * gs->param_stack[gs->param_stack_idx].ctm[0] +
- operation->u.number[3] * gs->param_stack[gs->param_stack_idx].ctm[2];
- M[3] = operation->u.number[2] * gs->param_stack[gs->param_stack_idx].ctm[1] +
- operation->u.number[3] * gs->param_stack[gs->param_stack_idx].ctm[3];
- M[4] = operation->u.number[4] * gs->param_stack[gs->param_stack_idx].ctm[0] +
- operation->u.number[5] * gs->param_stack[gs->param_stack_idx].ctm[2] +
- gs->param_stack[gs->param_stack_idx].ctm[4];
- M[5] = operation->u.number[4] * gs->param_stack[gs->param_stack_idx].ctm[1] +
- operation->u.number[5] * gs->param_stack[gs->param_stack_idx].ctm[3] +
- gs->param_stack[gs->param_stack_idx].ctm[5];
-
- gs->param_stack[gs->param_stack_idx].ctm[0] = M[0];
- gs->param_stack[gs->param_stack_idx].ctm[1] = M[1];
- gs->param_stack[gs->param_stack_idx].ctm[2] = M[2];
- gs->param_stack[gs->param_stack_idx].ctm[3] = M[3];
- gs->param_stack[gs->param_stack_idx].ctm[4] = M[4];
- gs->param_stack[gs->param_stack_idx].ctm[5] = M[5];
+ return pdf_matrix_multiply(operation->u.number,
+ gs->param_stack[gs->param_stack_idx].ctm,
+ gs->param_stack[gs->param_stack_idx].ctm);
+}
+
+
+static inline nspdferror
+set_gsc_grey(struct graphics_state_color *gsc, float gray)
+{
+ /* bounds check */
+ if (gray < 0.0) {
+ gray = 0.0;
+ } else if (gray > 1.0) {
+ gray = 1.0;
+ }
+
+ gsc->space = GSDeviceGray;
+ gsc->u.gray = gray;
+
+ return NSPDFERROR_OK;
+}
+
+static inline nspdferror
+render_operation_G(struct content_operation *operation,
+ struct graphics_state *gs)
+{
+ return set_gsc_grey(&gs->param_stack[gs->param_stack_idx].stroke.colour,
+ operation->u.number[0]);
+}
+
+static inline nspdferror
+render_operation_g(struct content_operation *operation,
+ struct graphics_state *gs)
+{
+ return set_gsc_grey(&gs->param_stack[gs->param_stack_idx].other.colour,
+ operation->u.number[0]);
+}
+
+static inline nspdferror
+set_gsc_rgb(struct graphics_state_color *gsc, float r, float g, float b)
+{
+ /* bounds check */
+ if (r < 0.0) {
+ r = 0.0;
+ } else if (r > 1.0) {
+ r = 1.0;
+ }
+ if (g < 0.0) {
+ g = 0.0;
+ } else if (g > 1.0) {
+ g = 1.0;
+ }
+ if (b < 0.0) {
+ b = 0.0;
+ } else if (b > 1.0) {
+ b = 1.0;
+ }
+
+ gsc->space = GSDeviceRGB;
+ gsc->u.rgb.r = r;
+ gsc->u.rgb.g = g;
+ gsc->u.rgb.b = b;
+
return NSPDFERROR_OK;
}
+static inline nspdferror
+render_operation_RG(struct content_operation *operation,
+ struct graphics_state *gs)
+{
+ return set_gsc_rgb(&gs->param_stack[gs->param_stack_idx].stroke.colour,
+ operation->u.number[0],
+ operation->u.number[1],
+ operation->u.number[2]);
+}
+
+static inline nspdferror
+render_operation_rg(struct content_operation *operation,
+ struct graphics_state *gs)
+{
+ return set_gsc_rgb(&gs->param_stack[gs->param_stack_idx].other.colour,
+ operation->u.number[0],
+ operation->u.number[1],
+ operation->u.number[2]);
+}
+
+static inline nspdferror
+set_gsc_cmyk(struct graphics_state_color *gsc, float c, float m, float y, float k)
+{
+ /* bounds check */
+ if (c < 0.0) {
+ c = 0.0;
+ } else if (c > 1.0) {
+ c = 1.0;
+ }
+ if (y < 0.0) {
+ y = 0.0;
+ } else if (y > 1.0) {
+ y = 1.0;
+ }
+ if (m < 0.0) {
+ m = 0.0;
+ } else if (m > 1.0) {
+ m = 1.0;
+ }
+ if (k < 0.0) {
+ k = 0.0;
+ } else if (k > 1.0) {
+ k = 1.0;
+ }
+
+ gsc->space = GSDeviceCMYK;
+ gsc->u.cmyk.c = c;
+ gsc->u.cmyk.m = m;
+ gsc->u.cmyk.y = y;
+ gsc->u.cmyk.k = k;
+
+ return NSPDFERROR_OK;
+}
+
+static inline nspdferror
+render_operation_K(struct content_operation *operation,
+ struct graphics_state *gs)
+{
+ return set_gsc_cmyk(&gs->param_stack[gs->param_stack_idx].stroke.colour,
+ operation->u.number[0],
+ operation->u.number[1],
+ operation->u.number[2],
+ operation->u.number[3]);
+}
+
+static inline nspdferror
+render_operation_k(struct content_operation *operation,
+ struct graphics_state *gs)
+{
+ return set_gsc_cmyk(&gs->param_stack[gs->param_stack_idx].other.colour,
+ operation->u.number[0],
+ operation->u.number[1],
+ operation->u.number[2],
+ operation->u.number[3]);
+}
+
+static inline nspdferror
+set_gsc_cs(struct graphics_state_color *gsc, const char *spacename)
+{
+ if (strcmp(spacename, "DeviceGray") == 0) {
+ gsc->space = GSDeviceGray;
+ gsc->u.gray = 0.0;
+ } else if (strcmp(spacename, "DeviceRGB") == 0) {
+ gsc->space = GSDeviceRGB;
+ gsc->u.rgb.r = 0.0;
+ gsc->u.rgb.g = 0.0;
+ gsc->u.rgb.b = 0.0;
+ } else if (strcmp(spacename, "DeviceCMYK") == 0) {
+ gsc->space = GSDeviceCMYK;
+ gsc->u.cmyk.c = 0.0;
+ gsc->u.cmyk.m = 0.0;
+ gsc->u.cmyk.y = 0.0;
+ gsc->u.cmyk.k = 1.0;
+ } else {
+ /** \todo colourspace from name defined in the ColorSpace subdictionary of the current resource dictionary */
+ gsc->space = GSDeviceGray;
+ gsc->u.gray = 0.0;
+
+ }
+ //printf("cs %s %d\n", spacename, gsc->space);
+ return NSPDFERROR_OK;
+}
+
+static inline nspdferror
+render_operation_CS(struct content_operation *operation,
+ struct graphics_state *gs)
+{
+ return set_gsc_cs(&gs->param_stack[gs->param_stack_idx].stroke.colour,
+ operation->u.name);
+}
+
+static inline nspdferror
+render_operation_cs(struct content_operation *operation,
+ struct graphics_state *gs)
+{
+ return set_gsc_cs(&gs->param_stack[gs->param_stack_idx].other.colour,
+ operation->u.name);
+}
+
/**
* Initialise the parameter stack
*
@@ -486,6 +739,7 @@ nspdf_page_render(struct nspdf_doc *doc,
idx < page_content->length;
idx++, operation++) {
switch(operation->operator) {
+ /* path operations */
case CONTENT_OP_m: /* move */
res = render_operation_m(operation, &gs);
break;
@@ -524,9 +778,25 @@ nspdf_page_render(struct nspdf_doc *doc,
res = render_operation_S(&gs, render_ctx);
break;
- case CONTENT_OP_w:
+ /* graphics state operations */
+ case CONTENT_OP_w: /* line width */
res = render_operation_w(operation, &gs);
- //printf("line width:%f\n", gs.param_stack[gs.param_stack_idx].line_width);
+ break;
+
+ case CONTENT_OP_i: /* flatness */
+ res = render_operation_i(operation, &gs);
+ break;
+
+ case CONTENT_OP_j: /* line join style */
+ res = render_operation_j(operation, &gs);
+ break;
+
+ case CONTENT_OP_J: /* line cap style */
+ res = render_operation_J(operation, &gs);
+ break;
+
+ case CONTENT_OP_M: /* miter limit */
+ res = render_operation_M(operation, &gs);
break;
case CONTENT_OP_q: /* push parameter stack */
@@ -541,6 +811,44 @@ nspdf_page_render(struct nspdf_doc *doc,
res = render_operation_cm(operation, &gs);
break;
+ /* colour operators */
+ case CONTENT_OP_G: /* gray stroking colour */
+ res = render_operation_G(operation, &gs);
+ break;
+
+ case CONTENT_OP_g: /* gray non-stroking colour */
+ res = render_operation_g(operation, &gs);
+ break;
+
+ case CONTENT_OP_RG: /* rgb stroking colour */
+ res = render_operation_RG(operation, &gs);
+ break;
+
+ case CONTENT_OP_rg: /* rgb non-stroking colour */
+ res = render_operation_rg(operation, &gs);
+ break;
+
+ case CONTENT_OP_K: /* CMYK stroking colour */
+ res = render_operation_K(operation, &gs);
+ break;
+
+ case CONTENT_OP_k: /* CMYK non-stroking colour */
+ res = render_operation_k(operation, &gs);
+ break;
+
+ case CONTENT_OP_CS: /* change stroking colourspace */
+ res = render_operation_CS(operation, &gs);
+ break;
+
+ case CONTENT_OP_cs: /* change non-stroking colourspace */
+ res = render_operation_cs(operation, &gs);
+ break;
+
+ //case CONTENT_OP_SC:
+ //case CONTENT_OP_sc:
+ //case CONTENT_OP_SCN:
+ //case CONTENT_OP_scn:
+
default:
printf("operator %s\n",
nspdf__cos_content_operator_name(operation->operator));
--
PDF Manipulation Library
5 years, 10 months
toolchains: branch chris/gcc6-os3 updated. 6313763917905a44ba3b7a64988168f5b598729d
by NetSurf Browser Project
Gitweb links:
...log http://git.netsurf-browser.org/toolchains.git/shortlog/6313763917905a44ba...
...commit http://git.netsurf-browser.org/toolchains.git/commit/6313763917905a44ba3b...
...tree http://git.netsurf-browser.org/toolchains.git/tree/6313763917905a44ba3b7a...
The branch, chris/gcc6-os3 has been updated
via 6313763917905a44ba3b7a64988168f5b598729d (commit)
from 7208e56be4888cd8a28d7117fc7dd4b168f49889 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
commitdiff http://git.netsurf-browser.org/toolchains.git/commit/?id=6313763917905a44...
commit 6313763917905a44ba3b7a64988168f5b598729d
Author: Chris Young <chris(a)unsatisfactorysoftware.co.uk>
Commit: Chris Young <chris(a)unsatisfactorysoftware.co.uk>
ifndef
diff --git a/sdk/recipes/patches/openssl/e_os.h.p b/sdk/recipes/patches/openssl/e_os.h.p
index 26c68f6..cbcec55 100644
--- a/sdk/recipes/patches/openssl/e_os.h.p
+++ b/sdk/recipes/patches/openssl/e_os.h.p
@@ -4,7 +4,7 @@
# endif
# include <netinet/in.h>
# include <arpa/inet.h>
-+# if defined(OPENSSL_SYS_AMIGAOS3)
++# if !defined(OPENSSL_SYS_AMIGAOS3)
# include <netinet/tcp.h>
+# endif
# endif
-----------------------------------------------------------------------
Summary of changes:
sdk/recipes/patches/openssl/e_os.h.p | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/sdk/recipes/patches/openssl/e_os.h.p b/sdk/recipes/patches/openssl/e_os.h.p
index 26c68f6..cbcec55 100644
--- a/sdk/recipes/patches/openssl/e_os.h.p
+++ b/sdk/recipes/patches/openssl/e_os.h.p
@@ -4,7 +4,7 @@
# endif
# include <netinet/in.h>
# include <arpa/inet.h>
-+# if defined(OPENSSL_SYS_AMIGAOS3)
++# if !defined(OPENSSL_SYS_AMIGAOS3)
# include <netinet/tcp.h>
+# endif
# endif
--
Cross-compilation toolchains and environments
5 years, 10 months
toolchains: branch chris/gcc6-os3 updated. 7208e56be4888cd8a28d7117fc7dd4b168f49889
by NetSurf Browser Project
Gitweb links:
...log http://git.netsurf-browser.org/toolchains.git/shortlog/7208e56be4888cd8a2...
...commit http://git.netsurf-browser.org/toolchains.git/commit/7208e56be4888cd8a28d...
...tree http://git.netsurf-browser.org/toolchains.git/tree/7208e56be4888cd8a28d71...
The branch, chris/gcc6-os3 has been updated
via 7208e56be4888cd8a28d7117fc7dd4b168f49889 (commit)
via 5fc7b9195fed4530550f0ab67c933d060da10e79 (commit)
via 179dc6c30c2af02130f6dd79e8cedcffe242b2a6 (commit)
from 976bc05e52f73ea8ab4a8272004d8295037aca0e (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
commitdiff http://git.netsurf-browser.org/toolchains.git/commit/?id=7208e56be4888cd8...
commit 7208e56be4888cd8a28d7117fc7dd4b168f49889
Author: Chris Young <chris(a)unsatisfactorysoftware.co.uk>
Commit: Chris Young <chris(a)unsatisfactorysoftware.co.uk>
Remove netinet/tcp.h requirement
diff --git a/sdk/recipes/patches/openssl/e_os.h.p b/sdk/recipes/patches/openssl/e_os.h.p
new file mode 100644
index 0000000..26c68f6
--- /dev/null
+++ b/sdk/recipes/patches/openssl/e_os.h.p
@@ -0,0 +1,12 @@
+--- e_os.h 2017-05-25 13:46:20.000000000 +0100
++++ e_os.h 2018-02-05 18:38:51.167349254 +0000
+@@ -423,7 +423,9 @@ struct servent *PASCAL getservbyname(con
+ # endif
+ # include <netinet/in.h>
+ # include <arpa/inet.h>
++# if defined(OPENSSL_SYS_AMIGAOS3)
+ # include <netinet/tcp.h>
++# endif
+ # endif
+
+ # ifdef OPENSSL_SYS_AIX
commitdiff http://git.netsurf-browser.org/toolchains.git/commit/?id=5fc7b9195fed4530...
commit 5fc7b9195fed4530550f0ab67c933d060da10e79
Author: Chris Young <chris(a)unsatisfactorysoftware.co.uk>
Commit: Chris Young <chris(a)unsatisfactorysoftware.co.uk>
Add a missing typedef for locale_t
(clib2 maintainer is aware of this issue)
diff --git a/m68k-unknown-amigaos/recipes/patches/clib2/clib2.include.wchar.h.p b/m68k-unknown-amigaos/recipes/patches/clib2/clib2.include.wchar.h.p
new file mode 100644
index 0000000..968c3d0
--- /dev/null
+++ b/m68k-unknown-amigaos/recipes/patches/clib2/clib2.include.wchar.h.p
@@ -0,0 +1,10 @@
+--- include/wchar.h 2018-02-05 18:23:54.079257195 +0000
++++ include/wchar.h 2018-02-05 18:27:58.781471125 +0000
+@@ -175,6 +175,7 @@ extern size_t wcsftime(wchar_t *s, size_
+ /****************************************************************************/
+
+ #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)
++typedef void *locale_t;
+
+ extern long long wcstoll(const wchar_t *str, wchar_t **ptr, int base);
+ extern unsigned long long wcstoull(const wchar_t *str, wchar_t **ptr, int base);
commitdiff http://git.netsurf-browser.org/toolchains.git/commit/?id=179dc6c30c2af021...
commit 179dc6c30c2af02130f6dd79e8cedcffe242b2a6
Author: Chris Young <chris(a)unsatisfactorysoftware.co.uk>
Commit: Chris Young <chris(a)unsatisfactorysoftware.co.uk>
gcc update to try to fix issue with % op in 020 code
diff --git a/m68k-unknown-amigaos/Makefile b/m68k-unknown-amigaos/Makefile
index 6d26987..cc5aee9 100644
--- a/m68k-unknown-amigaos/Makefile
+++ b/m68k-unknown-amigaos/Makefile
@@ -3,7 +3,7 @@
# sources
-UPSTREAM_GCC_VERSION := 6.4.1b-20180203
+UPSTREAM_GCC_VERSION := 6.4.1b-20180205
UPSTREAM_GCC_TARBALL := v$(UPSTREAM_GCC_VERSION).tar.gz
UPSTREAM_GCC_URI := https://github.com/chris-y/gcc/archive/$(UPSTREAM_GCC_TARBALL)
-----------------------------------------------------------------------
Summary of changes:
m68k-unknown-amigaos/Makefile | 2 +-
.../recipes/patches/clib2/clib2.include.wchar.h.p | 10 ++++++++++
sdk/recipes/patches/openssl/e_os.h.p | 12 ++++++++++++
3 files changed, 23 insertions(+), 1 deletion(-)
create mode 100644 m68k-unknown-amigaos/recipes/patches/clib2/clib2.include.wchar.h.p
create mode 100644 sdk/recipes/patches/openssl/e_os.h.p
diff --git a/m68k-unknown-amigaos/Makefile b/m68k-unknown-amigaos/Makefile
index 6d26987..cc5aee9 100644
--- a/m68k-unknown-amigaos/Makefile
+++ b/m68k-unknown-amigaos/Makefile
@@ -3,7 +3,7 @@
# sources
-UPSTREAM_GCC_VERSION := 6.4.1b-20180203
+UPSTREAM_GCC_VERSION := 6.4.1b-20180205
UPSTREAM_GCC_TARBALL := v$(UPSTREAM_GCC_VERSION).tar.gz
UPSTREAM_GCC_URI := https://github.com/chris-y/gcc/archive/$(UPSTREAM_GCC_TARBALL)
diff --git a/m68k-unknown-amigaos/recipes/patches/clib2/clib2.include.wchar.h.p b/m68k-unknown-amigaos/recipes/patches/clib2/clib2.include.wchar.h.p
new file mode 100644
index 0000000..968c3d0
--- /dev/null
+++ b/m68k-unknown-amigaos/recipes/patches/clib2/clib2.include.wchar.h.p
@@ -0,0 +1,10 @@
+--- include/wchar.h 2018-02-05 18:23:54.079257195 +0000
++++ include/wchar.h 2018-02-05 18:27:58.781471125 +0000
+@@ -175,6 +175,7 @@ extern size_t wcsftime(wchar_t *s, size_
+ /****************************************************************************/
+
+ #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)
++typedef void *locale_t;
+
+ extern long long wcstoll(const wchar_t *str, wchar_t **ptr, int base);
+ extern unsigned long long wcstoull(const wchar_t *str, wchar_t **ptr, int base);
diff --git a/sdk/recipes/patches/openssl/e_os.h.p b/sdk/recipes/patches/openssl/e_os.h.p
new file mode 100644
index 0000000..26c68f6
--- /dev/null
+++ b/sdk/recipes/patches/openssl/e_os.h.p
@@ -0,0 +1,12 @@
+--- e_os.h 2017-05-25 13:46:20.000000000 +0100
++++ e_os.h 2018-02-05 18:38:51.167349254 +0000
+@@ -423,7 +423,9 @@ struct servent *PASCAL getservbyname(con
+ # endif
+ # include <netinet/in.h>
+ # include <arpa/inet.h>
++# if defined(OPENSSL_SYS_AMIGAOS3)
+ # include <netinet/tcp.h>
++# endif
+ # endif
+
+ # ifdef OPENSSL_SYS_AIX
--
Cross-compilation toolchains and environments
5 years, 10 months
netsurf: branch vince/pdf updated. release/3.7-42-g8bd19b1
by NetSurf Browser Project
Gitweb links:
...log http://git.netsurf-browser.org/netsurf.git/shortlog/8bd19b1cf342085998364...
...commit http://git.netsurf-browser.org/netsurf.git/commit/8bd19b1cf342085998364bc...
...tree http://git.netsurf-browser.org/netsurf.git/tree/8bd19b1cf342085998364bc40...
The branch, vince/pdf has been updated
via 8bd19b1cf342085998364bc403a3af5febfdde0c (commit)
from 4953bd0fe1914860ede6c80c0d6c7fcb5492f613 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
commitdiff http://git.netsurf-browser.org/netsurf.git/commit/?id=8bd19b1cf3420859983...
commit 8bd19b1cf342085998364bc403a3af5febfdde0c
Author: Vincent Sanders <vince(a)kyllikki.org>
Commit: Vincent Sanders <vince(a)kyllikki.org>
allow moving forward and back clicking in window
diff --git a/content/handlers/pdf/pdf.c b/content/handlers/pdf/pdf.c
index d8c4d61..89548cd 100644
--- a/content/handlers/pdf/pdf.c
+++ b/content/handlers/pdf/pdf.c
@@ -33,8 +33,10 @@
#include "utils/messages.h"
#include "utils/utils.h"
+#include "utils/log.h"
#include "netsurf/plotters.h"
#include "netsurf/content.h"
+#include "netsurf/browser_window.h"
#include "content/llcache.h"
#include "content/content_protected.h"
@@ -46,6 +48,7 @@ typedef struct pdf_content {
struct nspdf_doc *doc;
unsigned int current_page;
+ unsigned int page_count;
} pdf_content;
static nserror nspdf2nserr(nspdferror nspdferr)
@@ -139,13 +142,19 @@ static bool pdf_convert(struct content *c)
return false;
}
+ pdfres = nspdf_page_count(pdfc->doc, &pdfc->page_count);
+ if (pdfres != NSPDFERROR_OK) {
+ content_broadcast_errorcode(c, NSERROR_INVALID);
+ return false;
+ }
+
pdfres = nspdf_get_title(pdfc->doc, &title);
if (pdfres == NSPDFERROR_OK) {
content__set_title(c, lwc_string_data(title));
}
/** \todo extract documents starting page number */
- pdfc->current_page = 0;
+ pdfc->current_page = 16;
pdfres = nspdf_get_page_dimensions(pdfc->doc,
pdfc->current_page,
@@ -172,7 +181,7 @@ pdf_path(const struct nspdf_style *style,
const struct redraw_context *ctx = ctxin;
ctx->plot->path(ctx,
- style,
+ (const struct plot_style_s *)style,
path,
path_length,
style->stroke_width,
@@ -191,7 +200,8 @@ pdf_redraw(struct content *c,
nspdferror pdfres;
struct nspdf_render_ctx render_ctx;
- printf("data x:%d y:%d w:%d h:%d\nclip %d %d %d %d\n",
+ NSLOG(netsurf, DEBUG,
+ "data x:%d y:%d w:%d h:%d\nclip %d %d %d %d\n",
data->x, data->y, data->width, data->height,
clip->x0, clip->y0, clip->x1, clip->y1);
@@ -206,7 +216,6 @@ pdf_redraw(struct content *c,
pdfres = nspdf_page_render(pdfc->doc, pdfc->current_page, &render_ctx);
-
return true;
}
@@ -224,32 +233,56 @@ static content_type pdf_content_type(void)
}
static void
+pdf_change_page(struct pdf_content *pdfc,
+ struct browser_window *bw,
+ unsigned int page_number)
+{
+ float page_width;
+ float page_height;
+ nspdferror pdfres;
+
+ /* ensure page stays in bounds */
+ if (page_number >= pdfc->page_count) {
+ return;
+ }
+
+ pdfc->current_page = page_number;
+
+ pdfres = nspdf_get_page_dimensions(pdfc->doc,
+ pdfc->current_page,
+ &page_width,
+ &page_height);
+ if (pdfres == NSPDFERROR_OK) {
+ pdfc->base.width = page_width;
+ pdfc->base.height = page_height;
+ NSLOG(netsurf, DEBUG,
+ "page %d w:%f h:%f\n",
+ pdfc->current_page,
+ page_width,
+ page_height);
+ }
+
+ browser_window_update(bw, false);
+}
+
+static void
pdf_mouse_action(struct content *c,
struct browser_window *bw,
browser_mouse_state mouse,
int x, int y)
{
struct pdf_content *pdfc = (struct pdf_content *)c;
- nspdferror pdfres;
- printf("ici\n");
+
if (mouse & BROWSER_MOUSE_CLICK_1) {
- float page_width;
- float page_height;
-
- pdfc->current_page++;
-
- pdfres = nspdf_get_page_dimensions(pdfc->doc,
- pdfc->current_page,
- &page_width,
- &page_height);
- if (pdfres == NSPDFERROR_OK) {
- pdfc->base.width = page_width;
- pdfc->base.height = page_height;
- printf("page $d w:%f h:%f\n",pdfc->current_page, page_width, page_height);
+ int bwwidth;
+ int bwheight;
+ browser_window_get_extents(bw, false, &bwwidth, &bwheight);
+
+ if (x < (bwwidth / 2)) {
+ pdf_change_page(pdfc, bw, pdfc->current_page - 1);
+ } else {
+ pdf_change_page(pdfc, bw, pdfc->current_page + 1);
}
-
- browser_window_update(bw, false);
-
}
}
-----------------------------------------------------------------------
Summary of changes:
content/handlers/pdf/pdf.c | 77 +++++++++++++++++++++++++++++++-------------
1 file changed, 55 insertions(+), 22 deletions(-)
diff --git a/content/handlers/pdf/pdf.c b/content/handlers/pdf/pdf.c
index d8c4d61..89548cd 100644
--- a/content/handlers/pdf/pdf.c
+++ b/content/handlers/pdf/pdf.c
@@ -33,8 +33,10 @@
#include "utils/messages.h"
#include "utils/utils.h"
+#include "utils/log.h"
#include "netsurf/plotters.h"
#include "netsurf/content.h"
+#include "netsurf/browser_window.h"
#include "content/llcache.h"
#include "content/content_protected.h"
@@ -46,6 +48,7 @@ typedef struct pdf_content {
struct nspdf_doc *doc;
unsigned int current_page;
+ unsigned int page_count;
} pdf_content;
static nserror nspdf2nserr(nspdferror nspdferr)
@@ -139,13 +142,19 @@ static bool pdf_convert(struct content *c)
return false;
}
+ pdfres = nspdf_page_count(pdfc->doc, &pdfc->page_count);
+ if (pdfres != NSPDFERROR_OK) {
+ content_broadcast_errorcode(c, NSERROR_INVALID);
+ return false;
+ }
+
pdfres = nspdf_get_title(pdfc->doc, &title);
if (pdfres == NSPDFERROR_OK) {
content__set_title(c, lwc_string_data(title));
}
/** \todo extract documents starting page number */
- pdfc->current_page = 0;
+ pdfc->current_page = 16;
pdfres = nspdf_get_page_dimensions(pdfc->doc,
pdfc->current_page,
@@ -172,7 +181,7 @@ pdf_path(const struct nspdf_style *style,
const struct redraw_context *ctx = ctxin;
ctx->plot->path(ctx,
- style,
+ (const struct plot_style_s *)style,
path,
path_length,
style->stroke_width,
@@ -191,7 +200,8 @@ pdf_redraw(struct content *c,
nspdferror pdfres;
struct nspdf_render_ctx render_ctx;
- printf("data x:%d y:%d w:%d h:%d\nclip %d %d %d %d\n",
+ NSLOG(netsurf, DEBUG,
+ "data x:%d y:%d w:%d h:%d\nclip %d %d %d %d\n",
data->x, data->y, data->width, data->height,
clip->x0, clip->y0, clip->x1, clip->y1);
@@ -206,7 +216,6 @@ pdf_redraw(struct content *c,
pdfres = nspdf_page_render(pdfc->doc, pdfc->current_page, &render_ctx);
-
return true;
}
@@ -224,32 +233,56 @@ static content_type pdf_content_type(void)
}
static void
+pdf_change_page(struct pdf_content *pdfc,
+ struct browser_window *bw,
+ unsigned int page_number)
+{
+ float page_width;
+ float page_height;
+ nspdferror pdfres;
+
+ /* ensure page stays in bounds */
+ if (page_number >= pdfc->page_count) {
+ return;
+ }
+
+ pdfc->current_page = page_number;
+
+ pdfres = nspdf_get_page_dimensions(pdfc->doc,
+ pdfc->current_page,
+ &page_width,
+ &page_height);
+ if (pdfres == NSPDFERROR_OK) {
+ pdfc->base.width = page_width;
+ pdfc->base.height = page_height;
+ NSLOG(netsurf, DEBUG,
+ "page %d w:%f h:%f\n",
+ pdfc->current_page,
+ page_width,
+ page_height);
+ }
+
+ browser_window_update(bw, false);
+}
+
+static void
pdf_mouse_action(struct content *c,
struct browser_window *bw,
browser_mouse_state mouse,
int x, int y)
{
struct pdf_content *pdfc = (struct pdf_content *)c;
- nspdferror pdfres;
- printf("ici\n");
+
if (mouse & BROWSER_MOUSE_CLICK_1) {
- float page_width;
- float page_height;
-
- pdfc->current_page++;
-
- pdfres = nspdf_get_page_dimensions(pdfc->doc,
- pdfc->current_page,
- &page_width,
- &page_height);
- if (pdfres == NSPDFERROR_OK) {
- pdfc->base.width = page_width;
- pdfc->base.height = page_height;
- printf("page $d w:%f h:%f\n",pdfc->current_page, page_width, page_height);
+ int bwwidth;
+ int bwheight;
+ browser_window_get_extents(bw, false, &bwwidth, &bwheight);
+
+ if (x < (bwwidth / 2)) {
+ pdf_change_page(pdfc, bw, pdfc->current_page - 1);
+ } else {
+ pdf_change_page(pdfc, bw, pdfc->current_page + 1);
}
-
- browser_window_update(bw, false);
-
}
}
--
NetSurf Browser
5 years, 10 months
libnspdf: branch master updated. c4dd67804afc84fde84402649e5b32f2b00680c3
by NetSurf Browser Project
Gitweb links:
...log http://git.netsurf-browser.org/libnspdf.git/shortlog/c4dd67804afc84fde844...
...commit http://git.netsurf-browser.org/libnspdf.git/commit/c4dd67804afc84fde84402...
...tree http://git.netsurf-browser.org/libnspdf.git/tree/c4dd67804afc84fde8440264...
The branch, master has been updated
via c4dd67804afc84fde84402649e5b32f2b00680c3 (commit)
from 4a18ec928fa6e6e2231fcef76e91eb1a4d57f588 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
commitdiff http://git.netsurf-browser.org/libnspdf.git/commit/?id=c4dd67804afc84fde8...
commit c4dd67804afc84fde84402649e5b32f2b00680c3
Author: Vincent Sanders <vince(a)kyllikki.org>
Commit: Vincent Sanders <vince(a)kyllikki.org>
fix emiting beziers
diff --git a/src/page.c b/src/page.c
index 5156430..c5ea8b8 100644
--- a/src/page.c
+++ b/src/page.c
@@ -277,6 +277,19 @@ render_operation_l(struct content_operation *operation, struct graphics_state *g
}
static inline nspdferror
+render_operation_c(struct content_operation *operation, struct graphics_state *gs)
+{
+ gs->path[gs->path_idx++] = NSPDF_PATH_BEZIER;
+ gs->path[gs->path_idx++] = operation->u.number[0];
+ gs->path[gs->path_idx++] = operation->u.number[1];
+ gs->path[gs->path_idx++] = operation->u.number[2];
+ gs->path[gs->path_idx++] = operation->u.number[3];
+ gs->path[gs->path_idx++] = operation->u.number[4];
+ gs->path[gs->path_idx++] = operation->u.number[5];
+ return NSPDFERROR_OK;
+}
+
+static inline nspdferror
render_operation_re(struct content_operation *operation, struct graphics_state *gs)
{
gs->path[gs->path_idx++] = NSPDF_PATH_MOVE;
@@ -485,6 +498,10 @@ nspdf_page_render(struct nspdf_doc *doc,
res = render_operation_re(operation, &gs);
break;
+ case CONTENT_OP_c: /* curve */
+ res = render_operation_c(operation, &gs);
+ break;
+
case CONTENT_OP_h: /* close path */
res = render_operation_h(&gs);
break;
-----------------------------------------------------------------------
Summary of changes:
src/page.c | 17 +++++++++++++++++
1 file changed, 17 insertions(+)
diff --git a/src/page.c b/src/page.c
index 5156430..c5ea8b8 100644
--- a/src/page.c
+++ b/src/page.c
@@ -277,6 +277,19 @@ render_operation_l(struct content_operation *operation, struct graphics_state *g
}
static inline nspdferror
+render_operation_c(struct content_operation *operation, struct graphics_state *gs)
+{
+ gs->path[gs->path_idx++] = NSPDF_PATH_BEZIER;
+ gs->path[gs->path_idx++] = operation->u.number[0];
+ gs->path[gs->path_idx++] = operation->u.number[1];
+ gs->path[gs->path_idx++] = operation->u.number[2];
+ gs->path[gs->path_idx++] = operation->u.number[3];
+ gs->path[gs->path_idx++] = operation->u.number[4];
+ gs->path[gs->path_idx++] = operation->u.number[5];
+ return NSPDFERROR_OK;
+}
+
+static inline nspdferror
render_operation_re(struct content_operation *operation, struct graphics_state *gs)
{
gs->path[gs->path_idx++] = NSPDF_PATH_MOVE;
@@ -485,6 +498,10 @@ nspdf_page_render(struct nspdf_doc *doc,
res = render_operation_re(operation, &gs);
break;
+ case CONTENT_OP_c: /* curve */
+ res = render_operation_c(operation, &gs);
+ break;
+
case CONTENT_OP_h: /* close path */
res = render_operation_h(&gs);
break;
--
PDF Manipulation Library
5 years, 10 months
libnspdf: branch master created. 4a18ec928fa6e6e2231fcef76e91eb1a4d57f588
by NetSurf Browser Project
Gitweb links:
...log http://git.netsurf-browser.org/libnspdf.git/shortlog/4a18ec928fa6e6e2231f...
...commit http://git.netsurf-browser.org/libnspdf.git/commit/4a18ec928fa6e6e2231fce...
...tree http://git.netsurf-browser.org/libnspdf.git/tree/4a18ec928fa6e6e2231fcef7...
The branch, master has been created
at 4a18ec928fa6e6e2231fcef76e91eb1a4d57f588 (commit)
- Log -----------------------------------------------------------------
commitdiff http://git.netsurf-browser.org/libnspdf.git/commit/?id=4a18ec928fa6e6e223...
commit 4a18ec928fa6e6e2231fcef76e91eb1a4d57f588
Author: Vincent Sanders <vince(a)kyllikki.org>
Commit: Vincent Sanders <vince(a)kyllikki.org>
implement CTM matrix transform
diff --git a/src/page.c b/src/page.c
index 0d12fc2..5156430 100644
--- a/src/page.c
+++ b/src/page.c
@@ -348,6 +348,64 @@ render_operation_w(struct content_operation *operation, struct graphics_state *g
return NSPDFERROR_OK;
}
+static inline nspdferror
+render_operation_q(struct graphics_state *gs)
+{
+ gs->param_stack[gs->param_stack_idx + 1] = gs->param_stack[gs->param_stack_idx];
+ gs->param_stack_idx++;
+ return NSPDFERROR_OK;
+}
+
+static inline nspdferror
+render_operation_Q(struct graphics_state *gs)
+{
+ if (gs->param_stack_idx > 0) {
+ gs->param_stack_idx--;
+ }
+ return NSPDFERROR_OK;
+}
+
+/**
+ * pre-multiply matrix
+ */
+static inline nspdferror
+render_operation_cm(struct content_operation *operation, struct graphics_state *gs)
+{
+ float M[6]; /* result matrix */
+ /* M' = Mt * M */
+ /* M' = Mo * Mc where Mo is operation and Mc is graphics state ctm */
+ /* | a b c | | A B C | | aA+bP+cU aB+bQ+cV aC+bR+cW |
+ * | p q r | * | P Q R | = | pA+qP+rU pB+qQ+rV pC+qR+rW |
+ * | u v w | | U V W | | uA+vP+wU uB+vQ+wV uC+vR+wW |
+ *
+ * | o[0] o[1] 0 | | c[0] c[1] 0 | | o[0]*c[0]+o[1]*c[2] o[0]*c[1]+o[1]*c[3] 0 |
+ * | o[2] o[3] 0 | * | c[2] c[3] 0 | = | o[2]*c[0]+o[3]*c[2] o[2]*c[1]+o[3]*c[3] 0 |
+ * | o[4] o[5] 1 | | c[4] c[5] 1 | | o[4]*c[0]+o[5]*c[2]+c[4] o[4]*c[1]+o[5]*c[3]+c[5] 1 |
+ */
+ M[0] = operation->u.number[0] * gs->param_stack[gs->param_stack_idx].ctm[0] +
+ operation->u.number[1] * gs->param_stack[gs->param_stack_idx].ctm[2];
+ M[1] = operation->u.number[0] * gs->param_stack[gs->param_stack_idx].ctm[1] +
+ operation->u.number[1] * gs->param_stack[gs->param_stack_idx].ctm[3];
+ M[2] = operation->u.number[2] * gs->param_stack[gs->param_stack_idx].ctm[0] +
+ operation->u.number[3] * gs->param_stack[gs->param_stack_idx].ctm[2];
+ M[3] = operation->u.number[2] * gs->param_stack[gs->param_stack_idx].ctm[1] +
+ operation->u.number[3] * gs->param_stack[gs->param_stack_idx].ctm[3];
+ M[4] = operation->u.number[4] * gs->param_stack[gs->param_stack_idx].ctm[0] +
+ operation->u.number[5] * gs->param_stack[gs->param_stack_idx].ctm[2] +
+ gs->param_stack[gs->param_stack_idx].ctm[4];
+ M[5] = operation->u.number[4] * gs->param_stack[gs->param_stack_idx].ctm[1] +
+ operation->u.number[5] * gs->param_stack[gs->param_stack_idx].ctm[3] +
+ gs->param_stack[gs->param_stack_idx].ctm[5];
+
+ gs->param_stack[gs->param_stack_idx].ctm[0] = M[0];
+ gs->param_stack[gs->param_stack_idx].ctm[1] = M[1];
+ gs->param_stack[gs->param_stack_idx].ctm[2] = M[2];
+ gs->param_stack[gs->param_stack_idx].ctm[3] = M[3];
+ gs->param_stack[gs->param_stack_idx].ctm[4] = M[4];
+ gs->param_stack[gs->param_stack_idx].ctm[5] = M[5];
+ return NSPDFERROR_OK;
+}
+
/**
* Initialise the parameter stack
*
@@ -454,6 +512,18 @@ nspdf_page_render(struct nspdf_doc *doc,
//printf("line width:%f\n", gs.param_stack[gs.param_stack_idx].line_width);
break;
+ case CONTENT_OP_q: /* push parameter stack */
+ res = render_operation_q(&gs);
+ break;
+
+ case CONTENT_OP_Q: /* pop parameter stack */
+ res = render_operation_Q(&gs);
+ break;
+
+ case CONTENT_OP_cm: /* change matrix */
+ res = render_operation_cm(operation, &gs);
+ break;
+
default:
printf("operator %s\n",
nspdf__cos_content_operator_name(operation->operator));
commitdiff http://git.netsurf-browser.org/libnspdf.git/commit/?id=eeb93199c64db0c47d...
commit eeb93199c64db0c47dc73bb1b5a3c7d16d8172e2
Author: Vincent Sanders <vince(a)kyllikki.org>
Commit: Vincent Sanders <vince(a)kyllikki.org>
make value extraction the same interface as getting
diff --git a/src/cos_object.c b/src/cos_object.c
index 12adcc9..ad7a17f 100644
--- a/src/cos_object.c
+++ b/src/cos_object.c
@@ -178,31 +178,42 @@ nspdferror cos_free_object(struct cos_object *cos_obj)
* this finds and returns a value for a given key removing it from a dictionary
*/
nspdferror
-cos_extract_dictionary_value(struct cos_object *dict,
+cos_extract_dictionary_value(struct nspdf_doc *doc,
+ struct cos_object *dict,
const char *key,
struct cos_object **value_out)
{
- struct cos_dictionary_entry *entry;
- struct cos_dictionary_entry **prev;
+ nspdferror res;
- if (dict->type != COS_TYPE_DICTIONARY) {
- return NSPDFERROR_TYPE;
- }
+ res = nspdf__xref_get_referenced(doc, &dict);
+ if (res == NSPDFERROR_OK) {
+ if (dict->type != COS_TYPE_DICTIONARY) {
+ res = NSPDFERROR_TYPE;
- prev = &dict->u.dictionary;
- entry = *prev;
- while (entry != NULL) {
- if (strcmp(entry->key->u.name, key) == 0) {
- *value_out = entry->value;
- *prev = entry->next;
- cos_free_object(entry->key);
- free(entry);
- return NSPDFERROR_OK;
+ } else {
+ struct cos_dictionary_entry **prev;
+ struct cos_dictionary_entry *entry;
+
+ res = NSPDFERROR_NOTFOUND;
+
+ prev = &dict->u.dictionary;
+ entry = *prev;
+ while (entry != NULL) {
+ if (strcmp(entry->key->u.name, key) == 0) {
+ *value_out = entry->value;
+ *prev = entry->next;
+ cos_free_object(entry->key);
+ free(entry);
+ res = NSPDFERROR_OK;
+ break;
+ }
+ prev = &entry->next;
+ entry = *prev;
+ }
}
- prev = &entry->next;
- entry = *prev;
}
- return NSPDFERROR_NOTFOUND;
+ return res;
+
}
diff --git a/src/cos_object.h b/src/cos_object.h
index a1a70ee..1214a65 100644
--- a/src/cos_object.h
+++ b/src/cos_object.h
@@ -149,6 +149,11 @@ nspdferror cos_free_object(struct cos_object *cos_obj);
* the entry from the dictionary. Once extracted the caller owns the returned
* object and must free it.
*
+ * Get the value for a key from a dictionary, If the dictionary is an object
+ * reference it will be dereferenced first which will parse any previously
+ * unreferenced indirect objects.
+ *
+ * \param doc The document the cos object belongs to or NULL to supress dereferencing.
* \param dict The dictionary
* \param key The key to lookup
* \param value_out The value object associated with the key
@@ -156,7 +161,7 @@ nspdferror cos_free_object(struct cos_object *cos_obj);
* NSPDFERROR_TYPE if the object passed in \p dict is not a dictionary.
* NSPDFERROR_NOTFOUND if the key is not present in the dictionary.
*/
-nspdferror cos_extract_dictionary_value(struct cos_object *dict, const char *key, struct cos_object **value_out);
+nspdferror cos_extract_dictionary_value(struct nspdf_doc *doc, struct cos_object *dict, const char *key, struct cos_object **value_out);
/**
diff --git a/src/document.c b/src/document.c
index dcf8395..bd3d314 100644
--- a/src/document.c
+++ b/src/document.c
@@ -240,7 +240,7 @@ decode_xref_trailer(struct nspdf_doc *doc, unsigned int xref_offset)
goto decode_xref_trailer_failed;
}
- res = cos_extract_dictionary_value(trailer, "Root", &doc->root);
+ res = cos_extract_dictionary_value(NULL, trailer, "Root", &doc->root);
if (res != NSPDFERROR_OK) {
printf("no Root!\n");
goto decode_xref_trailer_failed;
@@ -251,17 +251,17 @@ decode_xref_trailer(struct nspdf_doc *doc, unsigned int xref_offset)
goto decode_xref_trailer_failed;
}
- res = cos_extract_dictionary_value(trailer, "Encrypt", &doc->encrypt);
+ res = cos_extract_dictionary_value(NULL, trailer, "Encrypt", &doc->encrypt);
if ((res != NSPDFERROR_OK) && (res != NSPDFERROR_NOTFOUND)) {
goto decode_xref_trailer_failed;
}
- res = cos_extract_dictionary_value(trailer, "Info", &doc->info);
+ res = cos_extract_dictionary_value(NULL, trailer, "Info", &doc->info);
if ((res != NSPDFERROR_OK) && (res != NSPDFERROR_NOTFOUND)) {
goto decode_xref_trailer_failed;
}
- res = cos_extract_dictionary_value(trailer, "ID", &doc->id);
+ res = cos_extract_dictionary_value(NULL, trailer, "ID", &doc->id);
if ((res != NSPDFERROR_OK) && (res != NSPDFERROR_NOTFOUND)) {
goto decode_xref_trailer_failed;
}
@@ -353,7 +353,7 @@ static nspdferror decode_catalog(struct nspdf_doc *doc)
return res;
}
- // Type = Catalog
+ /* Type = Catalog */
res = cos_get_dictionary_name(doc, catalog, "Type", &type);
if (res != NSPDFERROR_OK) {
return res;
@@ -362,7 +362,13 @@ static nspdferror decode_catalog(struct nspdf_doc *doc)
return NSPDFERROR_FORMAT;
}
- // Pages
+ /* Pages */
+
+ /** todo this should get the dictionary Pages object and check it is an
+ * indirect reference (spec says it *must* be) then below has the reference
+ * to free in xref table
+ */
+
res = cos_get_dictionary_dictionary(doc, catalog, "Pages", &pages);
if (res != NSPDFERROR_OK) {
return res;
@@ -373,6 +379,10 @@ static nspdferror decode_catalog(struct nspdf_doc *doc)
return res;
}
+ /** \todo need to free referenced object when page resources tree is copied
+ * instead of being owned by the reference. Right now we leak like a seive
+ */
+
return res;
}
diff --git a/src/page.c b/src/page.c
index d55a92f..0d12fc2 100644
--- a/src/page.c
+++ b/src/page.c
@@ -175,7 +175,7 @@ nspdf__decode_page_tree(struct nspdf_doc *doc,
}
/* optional page contents */
- res = cos_get_dictionary_value(doc,
+ res = cos_extract_dictionary_value(doc,
page_tree_node,
"Contents",
&(page->contents));
commitdiff http://git.netsurf-browser.org/libnspdf.git/commit/?id=e6af9e997df381f053...
commit e6af9e997df381f053f3f8c85a678ad07677c791
Author: Vincent Sanders <vince(a)kyllikki.org>
Commit: Vincent Sanders <vince(a)kyllikki.org>
correctly extract the page boundaries from the page tree
diff --git a/include/nspdf/page.h b/include/nspdf/page.h
index 9402a9e..39e6bf6 100644
--- a/include/nspdf/page.h
+++ b/include/nspdf/page.h
@@ -72,6 +72,8 @@ struct nspdf_render_ctx {
nspdferror (*path)(const struct nspdf_style *style, const float *p, unsigned int n, const float transform[6], const void *ctx);
};
+nspdferror nspdf_get_page_dimensions(struct nspdf_doc *doc, unsigned int page_number, float *width, float *height);
+
nspdferror nspdf_page_count(struct nspdf_doc *doc, unsigned int *pages_out);
nspdferror nspdf_page_render(struct nspdf_doc *doc, unsigned int page_num, struct nspdf_render_ctx* render_ctx);
diff --git a/src/cos_object.c b/src/cos_object.c
index 52731ca..12adcc9 100644
--- a/src/cos_object.c
+++ b/src/cos_object.c
@@ -30,6 +30,10 @@ static nspdferror cos_dump_object(const char *fmt, struct cos_object *cos_obj)
printf(" type = COS_TYPE_NAMETREE\n");
break;
+ case COS_TYPE_NUMBERTREE:
+ printf(" type = COS_TYPE_NUMBERTREE\n");
+ break;
+
case COS_TYPE_REFERENCE:
printf(" type = COS_TYPE_REFERENCE\n"
" u.reference->id = %lu\n"
@@ -116,7 +120,6 @@ static nspdferror cos_dump_object(const char *fmt, struct cos_object *cos_obj)
return NSPDFERROR_OK;
-
}
nspdferror cos_free_object(struct cos_object *cos_obj)
@@ -318,6 +321,8 @@ cos_heritable_dictionary_dictionary(struct nspdf_doc *doc,
return cos_get_dictionary(doc, dict_value, value_out);
}
+
+/* get an inheritable array object from a dictionary */
nspdferror
cos_get_dictionary_array(struct nspdf_doc *doc,
struct cos_object *dict,
@@ -503,6 +508,41 @@ cos_get_object(struct nspdf_doc *doc,
return res;
}
+
+nspdferror
+cos_get_rectangle(struct nspdf_doc *doc,
+ struct cos_object *cobj,
+ struct cos_rectangle *rect_out)
+{
+ nspdferror res;
+ struct cos_rectangle rect;
+
+ res = nspdf__xref_get_referenced(doc, &cobj);
+ if (res == NSPDFERROR_OK) {
+ if ((cobj->type != COS_TYPE_ARRAY) ||
+ (cobj->u.array->length != 4)) {
+ res = NSPDFERROR_TYPE;
+ } else {
+ res = cos_get_number(doc, cobj->u.array->values[0], &rect.llx);
+ if (res == NSPDFERROR_OK) {
+ res = cos_get_number(doc, cobj->u.array->values[1], &rect.lly);
+ if (res == NSPDFERROR_OK) {
+ res = cos_get_number(doc, cobj->u.array->values[2], &rect.urx);
+ if (res == NSPDFERROR_OK) {
+ res = cos_get_number(doc, cobj->u.array->values[3], &rect.ury);
+ if (res == NSPDFERROR_OK) {
+ *rect_out = rect;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ return res;
+}
+
+
/*
* exported interface documented in cos_object.h
*
diff --git a/src/cos_object.h b/src/cos_object.h
index 632126e..a1a70ee 100644
--- a/src/cos_object.h
+++ b/src/cos_object.h
@@ -20,9 +20,13 @@
struct nspdf_doc;
struct content_operation;
struct cos_content;
+struct cos_object;
+/**
+ * The type of cos object in an entry.
+ */
enum cos_type {
- COS_TYPE_NULL, /* 0 */
+ COS_TYPE_NULL, /* 0 - NULL object */
COS_TYPE_BOOL,
COS_TYPE_INT,
COS_TYPE_REAL,
@@ -37,8 +41,10 @@ enum cos_type {
COS_TYPE_CONTENT, /* 12 - parsed content stream */
};
-struct cos_object;
+/**
+ * list of COS dictionary entries.
+ */
struct cos_dictionary_entry {
/** next key/value in dictionary */
struct cos_dictionary_entry *next;
@@ -50,6 +56,7 @@ struct cos_dictionary_entry {
struct cos_object *value;
};
+
/**
* array of COS objects
*/
@@ -64,6 +71,7 @@ struct cos_array {
struct cos_object **values;
};
+
/**
* COS string data
*/
@@ -73,12 +81,29 @@ struct cos_string {
uint8_t *data; /**< string data */
};
+
+/**
+ * reference to COS object
+ */
struct cos_reference {
uint64_t id; /**< id of indirect object */
uint64_t generation; /**< generation of indirect object */
};
+/**
+ * COS rectangle
+ */
+struct cos_rectangle {
+ float llx; /**< lower left x */
+ float lly; /**< lower left y */
+ float urx; /**< upper right x */
+ float ury; /**< upper right y */
+};
+
+/**
+ * Carosel object
+ */
struct cos_object {
enum cos_type type;
union {
@@ -118,10 +143,11 @@ nspdferror cos_free_object(struct cos_object *cos_obj);
/**
- * extract a value for a key from a dictionary
+ * extract a value object for a key from a dictionary
*
- * This retrieves the value of a given key in a dictionary and removes it from
- * the dictionary.
+ * This retrieves the value object of a given key in a dictionary and removes
+ * the entry from the dictionary. Once extracted the caller owns the returned
+ * object and must free it.
*
* \param dict The dictionary
* \param key The key to lookup
@@ -134,8 +160,13 @@ nspdferror cos_extract_dictionary_value(struct cos_object *dict, const char *key
/**
- * get a value for a key from a dictionary
+ * get a value object for a key from a dictionary
+ *
+ * Get the value for a key from a dictionary, If the dictionary is an object
+ * reference it will be dereferenced first which will parse any
+ * previously unreferenced indirect objects.
*
+ * \param doc The document the cos object belongs to or NULL to supress dereferencing.
* \param dict The dictionary
* \param key The key to lookup
* \param value_out The value object associated with the key
@@ -146,7 +177,23 @@ nspdferror cos_extract_dictionary_value(struct cos_object *dict, const char *key
nspdferror cos_get_dictionary_value(struct nspdf_doc *doc, struct cos_object *dict, const char *key, struct cos_object **value_out);
-nspdferror cos_get_dictionary_int(struct nspdf_doc *doc, struct cos_object *dict, const char *key, int64_t *value_out);
+/**
+ * get an integer value for a key from a dictionary
+ *
+ * Get the integer value for a key from a dictionary, If the dictionary is an
+ * object reference it will be dereferenced first which will parse any
+ * previously unreferenced indirect objects.
+ *
+ * \param doc The document the cos object belongs to or NULL to supress dereferencing.
+ * \param dict The dictionary
+ * \param key The key to lookup
+ * \param in_out The integer value associated with the key.
+ * \return NSPDFERROR_OK and value_out updated on success.
+ * NSPDFERROR_TYPE if the object passed in \p dict is not a dictionary
+ * or the value of the key is not an integer.
+ * NSPDFERROR_NOTFOUND if the key is not present in the dictionary.
+ */
+nspdferror cos_get_dictionary_int(struct nspdf_doc *doc, struct cos_object *dict, const char *key, int64_t *int_out);
nspdferror cos_get_dictionary_name(struct nspdf_doc *doc, struct cos_object *dict, const char *key, const char **value_out);
@@ -190,6 +237,7 @@ nspdferror cos_get_array_dictionary(struct nspdf_doc *doc, struct cos_object *ar
*/
nspdferror cos_get_int(struct nspdf_doc *doc, struct cos_object *cobj, int64_t *value_out);
+
/**
* get the float value of a cos object.
*
@@ -205,6 +253,7 @@ nspdferror cos_get_int(struct nspdf_doc *doc, struct cos_object *cobj, int64_t *
*/
nspdferror cos_get_number(struct nspdf_doc *doc, struct cos_object *cobj, float *value_out);
+
/**
* get the name value of a cos object.
*
@@ -268,6 +317,7 @@ nspdferror cos_get_dictionary(struct nspdf_doc *doc, struct cos_object *cobj, st
*/
nspdferror cos_get_array(struct nspdf_doc *doc, struct cos_object *cobj, struct cos_object **value_out);
+
/**
* get the stream value of a cos object.
*
@@ -283,6 +333,7 @@ nspdferror cos_get_array(struct nspdf_doc *doc, struct cos_object *cobj, struct
*/
nspdferror cos_get_stream(struct nspdf_doc *doc, struct cos_object *cobj, struct cos_stream **stream_out);
+
/**
* get a direct cos object.
*
@@ -296,6 +347,7 @@ nspdferror cos_get_stream(struct nspdf_doc *doc, struct cos_object *cobj, struct
*/
nspdferror cos_get_object(struct nspdf_doc *doc, struct cos_object *cobj, struct cos_object **object_out);
+
/**
* get a parsed content object
*
@@ -310,4 +362,17 @@ nspdferror cos_get_object(struct nspdf_doc *doc, struct cos_object *cobj, struct
*/
nspdferror cos_get_content(struct nspdf_doc *doc, struct cos_object *cobj, struct cos_content **content_out);
+
+/**
+ * Get a rectangle
+ *
+ * Generates a synthetic rectangle object from a array of four numbers
+ *
+ * \param doc The document the cos object belongs to.
+ * \param cobj A cos object.
+ * \param rect_out The result rectangle.
+ * \return NSERROR_OK and \p rect_out updated,
+ */
+nspdferror cos_get_rectangle(struct nspdf_doc *doc, struct cos_object *cobj, struct cos_rectangle *rect_out);
+
#endif
diff --git a/src/page.c b/src/page.c
index e4bbae4..d55a92f 100644
--- a/src/page.c
+++ b/src/page.c
@@ -22,8 +22,13 @@
/** page entry */
struct page_table_entry {
struct cos_object *resources;
- struct cos_object *mediabox;
struct cos_object *contents;
+ struct cos_rectangle mediabox; /* extent of media - required */
+ struct cos_rectangle cropbox; /* default is mediabox */
+ struct cos_rectangle bleedbox; /* default is crop box */
+ struct cos_rectangle trimbox; /* default is crop box */
+ struct cos_rectangle artbox; /* default is crop box */
+
};
/**
@@ -90,6 +95,7 @@ nspdf__decode_page_tree(struct nspdf_doc *doc,
} else if (strcmp(type, "Page") == 0) {
struct page_table_entry *page;
+ struct cos_object *rect_array;
page = doc->page_table + (*page_index);
@@ -106,11 +112,68 @@ nspdf__decode_page_tree(struct nspdf_doc *doc,
res = cos_heritable_dictionary_array(doc,
page_tree_node,
"MediaBox",
- &(page->mediabox));
+ &rect_array);
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+
+ res = cos_get_rectangle(doc, rect_array, &page->mediabox);
if (res != NSPDFERROR_OK) {
return res;
}
+ /* optional heritable crop box */
+ res = cos_heritable_dictionary_array(doc,
+ page_tree_node,
+ "CropBox",
+ &rect_array);
+ if (res == NSPDFERROR_OK) {
+ res = cos_get_rectangle(doc, rect_array, &page->cropbox);
+ }
+ if (res != NSPDFERROR_OK) {
+ /* default is mediabox */
+ page->cropbox = page->mediabox;
+ }
+
+ /* optional bleed box */
+ res = cos_get_dictionary_array(doc,
+ page_tree_node,
+ "BleedBox",
+ &rect_array);
+ if (res == NSPDFERROR_OK) {
+ res = cos_get_rectangle(doc, rect_array, &page->bleedbox);
+ }
+ if (res != NSPDFERROR_OK) {
+ /* default is cropbox */
+ page->bleedbox = page->cropbox;
+ }
+
+ /* optional trim box */
+ res = cos_get_dictionary_array(doc,
+ page_tree_node,
+ "TrimBox",
+ &rect_array);
+ if (res == NSPDFERROR_OK) {
+ res = cos_get_rectangle(doc, rect_array, &page->trimbox);
+ }
+ if (res != NSPDFERROR_OK) {
+ /* default is cropbox */
+ page->trimbox = page->cropbox;
+ }
+
+ /* optional art box */
+ res = cos_get_dictionary_array(doc,
+ page_tree_node,
+ "ArtBox",
+ &rect_array);
+ if (res == NSPDFERROR_OK) {
+ res = cos_get_rectangle(doc, rect_array, &page->artbox);
+ }
+ if (res != NSPDFERROR_OK) {
+ /* default is cropbox */
+ page->artbox = page->cropbox;
+ }
+
/* optional page contents */
res = cos_get_dictionary_value(doc,
page_tree_node,
@@ -323,7 +386,6 @@ nspdf_page_render(struct nspdf_doc *doc,
nspdferror res;
struct content_operation *operation;
unsigned int idx;
-
struct graphics_state gs;
page_entry = doc->page_table + page_number;
@@ -406,3 +468,17 @@ nspdf_page_render(struct nspdf_doc *doc,
return res;
}
+
+
+nspdferror
+nspdf_get_page_dimensions(struct nspdf_doc *doc,
+ unsigned int page_number,
+ float *width,
+ float *height)
+{
+ struct page_table_entry *page_entry;
+ page_entry = doc->page_table + page_number;
+ *width = page_entry->cropbox.urx - page_entry->cropbox.llx;
+ *height = page_entry->cropbox.ury - page_entry->cropbox.lly;
+ return NSPDFERROR_OK;
+}
diff --git a/test/parsepdf.c b/test/parsepdf.c
index f569418..ae910b7 100644
--- a/test/parsepdf.c
+++ b/test/parsepdf.c
@@ -71,16 +71,24 @@ static nspdferror render_pages(struct nspdf_doc *doc, unsigned int page_count)
struct nspdf_render_ctx render_ctx;
unsigned int page_render_list[4] = { 0, 1, 0, 1};
unsigned int page_index;
+ float page_width;
+ float page_height;
- render_ctx.device_space[0] = 1;
- render_ctx.device_space[1] = 0;
- render_ctx.device_space[2] = 0;
- render_ctx.device_space[3] = -1; /* y scale */
- render_ctx.device_space[4] = 0; /* x offset */
- render_ctx.device_space[5] = 800; /* y offset */
- render_ctx.path = pdf_path;
+ render_ctx.device_space[0] = 1;
+ render_ctx.device_space[1] = 0;
+ render_ctx.device_space[2] = 0;
+ render_ctx.device_space[3] = -1; /* y scale */
+ render_ctx.device_space[4] = 0; /* x offset */
+ render_ctx.device_space[5] = 800; /* y offset */
+ render_ctx.path = pdf_path;
for (page_index = 0; page_index < page_count; page_index++) {
+ res = nspdf_get_page_dimensions(doc,
+ page_index,
+ &page_width,
+ &page_height);
+ printf("page w:%f h:%f\n", page_width, page_height);
+
res = nspdf_page_render(doc, page_index, &render_ctx);
if (res != NSPDFERROR_OK) {
break;
@@ -88,11 +96,18 @@ static nspdferror render_pages(struct nspdf_doc *doc, unsigned int page_count)
}
for (page_index = 0; page_index < 4; page_index++) {
+ res = nspdf_get_page_dimensions(doc,
+ page_index,
+ &page_width,
+ &page_height);
+ printf("page w:%f h:%f\n", page_width, page_height);
+
res = nspdf_page_render(doc, page_render_list[page_index], &render_ctx);
if (res != NSPDFERROR_OK) {
break;
}
}
+
return res;
}
commitdiff http://git.netsurf-browser.org/libnspdf.git/commit/?id=1ca90f384d4f2311cb...
commit 1ca90f384d4f2311cbf0abd44d8e4e5b7a4abb37
Author: Vincent Sanders <vince(a)kyllikki.org>
Commit: Vincent Sanders <vince(a)kyllikki.org>
start actual page rendering
diff --git a/include/nspdf/page.h b/include/nspdf/page.h
index f8c280e..9402a9e 100644
--- a/include/nspdf/page.h
+++ b/include/nspdf/page.h
@@ -35,7 +35,7 @@ enum nspdf_style_operation {
*/
typedef struct nspdf_style {
enum nspdf_style_operation stroke_type; /**< Stroke plot type */
- int stroke_width; /**< Width of stroke, in pixels */
+ float stroke_width; /**< Width of stroke, in pixels */
uint32_t stroke_colour; /**< Colour of stroke XBGR */
enum nspdf_style_operation fill_type; /**< Fill plot type */
diff --git a/src/cos_content.c b/src/cos_content.c
index cf5d2e0..0e14f85 100644
--- a/src/cos_content.c
+++ b/src/cos_content.c
@@ -20,7 +20,8 @@
#include "cos_content.h"
#include "pdf_doc.h"
-static const char*operator_name(enum content_operator operator)
+const char*
+nspdf__cos_content_operator_name(enum content_operator operator)
{
switch(operator) {
case CONTENT_OP_b: return "b";
@@ -135,14 +136,14 @@ copy_numbers(unsigned int wanted,
}
if ((*operand_idx) > index) {
printf("operator %s that takes %d operands passed %d\n",
- operator_name(operation_out->operator), wanted, *operand_idx);
+ nspdf__cos_content_operator_name(operation_out->operator), wanted, *operand_idx);
while (index < (*operand_idx)) {
cos_free_object(*(operands + index));
index++;
}
} else if ((*operand_idx) < index) {
printf("operator %s that takes %d operands passed %d\n",
- operator_name(operation_out->operator), wanted, *operand_idx);
+ nspdf__cos_content_operator_name(operation_out->operator), wanted, *operand_idx);
}
*operand_idx = 0; /* all operands freed */
@@ -174,14 +175,14 @@ copy_integers(unsigned int wanted,
}
if ((*operand_idx) > index) {
printf("operator %s that takes %d operands passed %d\n",
- operator_name(operation_out->operator), wanted, *operand_idx);
+ nspdf__cos_content_operator_name(operation_out->operator), wanted, *operand_idx);
while (index < (*operand_idx)) {
cos_free_object(*(operands + index));
index++;
}
} else if ((*operand_idx) < index) {
printf("operator %s that takes %d operands passed %d\n",
- operator_name(operation_out->operator), wanted, *operand_idx);
+ nspdf__cos_content_operator_name(operation_out->operator), wanted, *operand_idx);
}
*operand_idx = 0; /* all operands freed */
@@ -200,7 +201,7 @@ copy_string(struct cos_object **operands,
if ((*operand_idx) == 0) {
printf("operator %s that takes %d operands passed %d\n",
- operator_name(operation_out->operator), 1, *operand_idx);
+ nspdf__cos_content_operator_name(operation_out->operator), 1, *operand_idx);
operation_out->u.string.length = 0;
return NSPDFERROR_OK;
}
@@ -232,7 +233,7 @@ copy_string(struct cos_object **operands,
if ((*operand_idx) > 1) {
printf("operator %s that takes %d operands passed %d\n",
- operator_name(operation_out->operator), 1, *operand_idx);
+ nspdf__cos_content_operator_name(operation_out->operator), 1, *operand_idx);
}
/* free all operands */
@@ -254,7 +255,7 @@ copy_array(struct cos_object **operands,
if ((*operand_idx) == 0) {
printf("operator %s that takes %d operands passed %d\n",
- operator_name(operation_out->operator), 1, *operand_idx);
+ nspdf__cos_content_operator_name(operation_out->operator), 1, *operand_idx);
operation_out->u.array.length = 0;
return NSPDFERROR_OK;
}
@@ -273,7 +274,7 @@ copy_array(struct cos_object **operands,
if ((*operand_idx) > 1) {
printf("operator %s that takes %d operands passed %d\n",
- operator_name(operation_out->operator), 1, *operand_idx);
+ nspdf__cos_content_operator_name(operation_out->operator), 1, *operand_idx);
}
/* free all operands */
@@ -296,7 +297,7 @@ copy_name(struct cos_object **operands,
if ((*operand_idx) == 0) {
printf("operator %s that takes %d operands passed %d\n",
- operator_name(operation_out->operator), 1, *operand_idx);
+ nspdf__cos_content_operator_name(operation_out->operator), 1, *operand_idx);
operation_out->u.name = NULL;
return NSPDFERROR_OK;
}
@@ -313,7 +314,7 @@ copy_name(struct cos_object **operands,
if ((*operand_idx) > 1) {
printf("operator %s that takes %d operands passed %d\n",
- operator_name(operation_out->operator), 1, *operand_idx);
+ nspdf__cos_content_operator_name(operation_out->operator), 1, *operand_idx);
}
/* free all operands */
@@ -335,7 +336,7 @@ copy_name_number(struct cos_object **operands,
if ((*operand_idx) == 0) {
printf("operator %s that takes %d operands passed %d\n",
- operator_name(operation_out->operator), 2, *operand_idx);
+ nspdf__cos_content_operator_name(operation_out->operator), 2, *operand_idx);
operation_out->u.namenumber.name = NULL;
return NSPDFERROR_OK;
}
@@ -361,13 +362,13 @@ copy_name_number(struct cos_object **operands,
}
} else {
printf("operator %s that takes %d operands passed %d\n",
- operator_name(operation_out->operator), 2, *operand_idx);
+ nspdf__cos_content_operator_name(operation_out->operator), 2, *operand_idx);
}
}
if ((*operand_idx) > 2) {
printf("operator %s that takes %d operands passed %d\n",
- operator_name(operation_out->operator), 2, *operand_idx);
+ nspdf__cos_content_operator_name(operation_out->operator), 2, *operand_idx);
}
/* free all operands */
@@ -390,7 +391,7 @@ copy_array_int(struct cos_object **operands,
if ((*operand_idx) == 0) {
printf("operator %s that takes %d operands passed %d\n",
- operator_name(operation_out->operator), 2, *operand_idx);
+ nspdf__cos_content_operator_name(operation_out->operator), 2, *operand_idx);
operation_out->u.namenumber.name = NULL;
return NSPDFERROR_OK;
}
@@ -418,13 +419,13 @@ copy_array_int(struct cos_object **operands,
}
} else {
printf("operator %s that takes %d operands passed %d\n",
- operator_name(operation_out->operator), 2, *operand_idx);
+ nspdf__cos_content_operator_name(operation_out->operator), 2, *operand_idx);
}
}
if ((*operand_idx) > 2) {
printf("operator %s that takes %d operands passed %d\n",
- operator_name(operation_out->operator), 2, *operand_idx);
+ nspdf__cos_content_operator_name(operation_out->operator), 2, *operand_idx);
}
/* free all operands */
diff --git a/src/cos_content.h b/src/cos_content.h
index e0d2dfb..09700c2 100644
--- a/src/cos_content.h
+++ b/src/cos_content.h
@@ -193,9 +193,9 @@ enum content_operator {
#define content_string_intrnl_lngth ((sizeof(float) * content_number_size) - sizeof(uint8_t *))
-struct content_operation
-{
+struct content_operation {
enum content_operator operator;
+
union {
float number[content_number_size];
@@ -231,6 +231,19 @@ struct content_operation
};
/**
+ * Synthetic parsed content object.
+ */
+struct cos_content {
+ unsigned int length; /**< number of content operations */
+ unsigned int alloc; /**< number of allocated operations */
+ struct content_operation *operations;
+};
+
+
+const char* nspdf__cos_content_operator_name(enum content_operator operator);
+
+
+/**
* convert an operator and operand list into an operation
*/
nspdferror nspdf__cos_content_convert(enum content_operator operator, struct cos_object **operands, unsigned int *operand_idx, struct content_operation *operation_out);
diff --git a/src/cos_object.c b/src/cos_object.c
index 5d7da19..52731ca 100644
--- a/src/cos_object.c
+++ b/src/cos_object.c
@@ -17,6 +17,7 @@
#include <nspdf/errors.h>
#include "xref.h"
+#include "cos_content.h"
#include "cos_object.h"
#include "cos_parse.h"
#include "pdf_doc.h"
diff --git a/src/cos_object.h b/src/cos_object.h
index 79a1c8d..632126e 100644
--- a/src/cos_object.h
+++ b/src/cos_object.h
@@ -19,6 +19,7 @@
struct nspdf_doc;
struct content_operation;
+struct cos_content;
enum cos_type {
COS_TYPE_NULL, /* 0 */
@@ -78,16 +79,6 @@ struct cos_reference {
};
-/**
- * Synthetic parsed content object.
- *
- */
-struct cos_content {
- unsigned int length; /**< number of content operations */
- unsigned int alloc; /**< number of allocated operations */
- struct content_operation *operations;
-};
-
struct cos_object {
enum cos_type type;
union {
diff --git a/src/page.c b/src/page.c
index 5b91416..e4bbae4 100644
--- a/src/page.c
+++ b/src/page.c
@@ -15,6 +15,7 @@
#include <stdio.h>
#include <nspdf/page.h>
+#include "cos_content.h"
#include "cos_object.h"
#include "pdf_doc.h"
@@ -146,6 +147,170 @@ nspdf_page_count(struct nspdf_doc *doc, unsigned int *pages_out)
return NSPDFERROR_OK;
}
+/**
+ * colourspaces
+ * \todo extend this with full list from section 4.5.2
+ */
+enum graphics_state_colorspace {
+ GSDeviceGray = 0, /* Default */
+ GSDeviceRGB,
+ GSDeviceCMYK,
+};
+
+struct graphics_state_color {
+ enum graphics_state_colorspace space;
+ union {
+ float gray; /* default is 0 - black */
+ float rgb[3];
+ float cmyk[3];
+ };
+};
+
+struct graphics_state_param {
+ float ctm[6]; /* current transform matrix */
+ /* clipping path */
+ struct graphics_state_color stroke_colour;
+ struct graphics_state_color other_colour;
+ /* text state */
+ float line_width;
+ unsigned int line_cap;
+ unsigned int line_join;
+ float miter_limit;
+ /* dash pattern */
+ /* rendering intent RelativeColorimetric */
+ bool stroke_adjustment;
+ /* blend mode: Normal */
+ /* soft mask */
+ /* alpha constant */
+ /* alpha source */
+};
+
+struct graphics_state {
+ float *path; /* current path */
+ unsigned int path_idx; /* current index into path */
+ unsigned int path_alloc; /* current number of path elements allocated */
+
+ struct graphics_state_param *param_stack; /* parameter stack */
+ unsigned int param_stack_idx;
+ unsigned int param_stack_alloc;
+};
+
+static inline nspdferror
+render_operation_m(struct content_operation *operation, struct graphics_state *gs)
+{
+ gs->path[gs->path_idx++] = NSPDF_PATH_MOVE;
+ gs->path[gs->path_idx++] = operation->u.number[0];
+ gs->path[gs->path_idx++] = operation->u.number[1];
+ return NSPDFERROR_OK;
+}
+
+static inline nspdferror
+render_operation_l(struct content_operation *operation, struct graphics_state *gs)
+{
+ gs->path[gs->path_idx++] = NSPDF_PATH_LINE;
+ gs->path[gs->path_idx++] = operation->u.number[0];
+ gs->path[gs->path_idx++] = operation->u.number[1];
+ return NSPDFERROR_OK;
+}
+
+static inline nspdferror
+render_operation_re(struct content_operation *operation, struct graphics_state *gs)
+{
+ gs->path[gs->path_idx++] = NSPDF_PATH_MOVE;
+ gs->path[gs->path_idx++] = operation->u.number[0]; /* x */
+ gs->path[gs->path_idx++] = operation->u.number[1]; /* y */
+ gs->path[gs->path_idx++] = NSPDF_PATH_LINE;
+ gs->path[gs->path_idx++] = operation->u.number[0] + operation->u.number[2];
+ gs->path[gs->path_idx++] = operation->u.number[1];
+ gs->path[gs->path_idx++] = NSPDF_PATH_LINE;
+ gs->path[gs->path_idx++] = operation->u.number[0] + operation->u.number[2];
+ gs->path[gs->path_idx++] = operation->u.number[1] + operation->u.number[3];
+ gs->path[gs->path_idx++] = NSPDF_PATH_LINE;
+ gs->path[gs->path_idx++] = operation->u.number[0];
+ gs->path[gs->path_idx++] = operation->u.number[1] + operation->u.number[3];
+ gs->path[gs->path_idx++] = NSPDF_PATH_CLOSE;
+
+ return NSPDFERROR_OK;
+}
+
+static inline nspdferror
+render_operation_h(struct graphics_state *gs)
+{
+ gs->path[gs->path_idx++] = NSPDF_PATH_CLOSE;
+ return NSPDFERROR_OK;
+}
+
+static inline nspdferror
+render_operation_f(struct graphics_state *gs, struct nspdf_render_ctx* render_ctx)
+{
+ struct nspdf_style style;
+ style.stroke_type = NSPDF_OP_TYPE_NONE;
+ style.stroke_colour = 0x01000000;
+ style.fill_type = NSPDF_OP_TYPE_SOLID;
+ style.fill_colour = 0;
+
+ render_ctx->path(&style,
+ gs->path,
+ gs->path_idx,
+ gs->param_stack[gs->param_stack_idx].ctm,
+ render_ctx->ctx);
+ gs->path_idx = 0;
+ return NSPDFERROR_OK;
+}
+
+
+static inline nspdferror
+render_operation_S(struct graphics_state *gs, struct nspdf_render_ctx* render_ctx)
+{
+ struct nspdf_style style;
+
+ style.stroke_type = NSPDF_OP_TYPE_SOLID;
+ style.stroke_colour = 0;
+ style.stroke_width = gs->param_stack[gs->param_stack_idx].line_width;
+ style.fill_type = NSPDF_OP_TYPE_NONE;
+ style.fill_colour = 0x01000000;
+ render_ctx->path(&style,
+ gs->path,
+ gs->path_idx,
+ gs->param_stack[gs->param_stack_idx].ctm,
+ render_ctx->ctx);
+ gs->path_idx = 0;
+ return NSPDFERROR_OK;
+}
+
+static inline nspdferror
+render_operation_w(struct content_operation *operation, struct graphics_state *gs)
+{
+ gs->param_stack[gs->param_stack_idx].line_width = operation->u.number[0];
+ return NSPDFERROR_OK;
+}
+
+/**
+ * Initialise the parameter stack
+ *
+ * allocates the initial parameter stack and initialises the defaults
+ */
+static nspdferror
+init_param_stack(struct graphics_state *gs, struct nspdf_render_ctx* render_ctx)
+{
+ gs->param_stack_alloc = 16; /* start with 16 deep parameter stack */
+ gs->param_stack_idx = 0;
+ gs->param_stack = calloc(gs->param_stack_alloc,
+ sizeof(struct graphics_state_param));
+ if (gs->param_stack == NULL) {
+ return NSPDFERROR_NOMEM;
+ }
+
+ gs->param_stack[0].ctm[0] = render_ctx->device_space[0];
+ gs->param_stack[0].ctm[1] = render_ctx->device_space[1];
+ gs->param_stack[0].ctm[2] = render_ctx->device_space[2];
+ gs->param_stack[0].ctm[3] = render_ctx->device_space[3];
+ gs->param_stack[0].ctm[4] = render_ctx->device_space[4];
+ gs->param_stack[0].ctm[5] = render_ctx->device_space[5];
+ gs->param_stack[0].line_width = 1.0;
+
+ return NSPDFERROR_OK;
+}
/* exported interface documented in nspdf/page.h */
nspdferror
@@ -156,6 +321,10 @@ nspdf_page_render(struct nspdf_doc *doc,
struct page_table_entry *page_entry;
struct cos_content *page_content; /* page operations array */
nspdferror res;
+ struct content_operation *operation;
+ unsigned int idx;
+
+ struct graphics_state gs;
page_entry = doc->page_table + page_number;
@@ -166,5 +335,74 @@ nspdf_page_render(struct nspdf_doc *doc,
printf("page %d content:%p\n", page_number, page_content);
+ gs.path_idx = 0;
+ gs.path_alloc = 8192;
+ gs.path = malloc(gs.path_alloc * sizeof(float));
+ if (gs.path == NULL) {
+ return NSPDFERROR_NOMEM;
+ }
+
+ res = init_param_stack(&gs, render_ctx);
+ if (res != NSPDFERROR_OK) {
+ free(gs.path);
+ return res;
+ }
+
+ /* iterate over operations */
+ for (idx = 0, operation = page_content->operations;
+ idx < page_content->length;
+ idx++, operation++) {
+ switch(operation->operator) {
+ case CONTENT_OP_m: /* move */
+ res = render_operation_m(operation, &gs);
+ break;
+
+ case CONTENT_OP_l: /* line */
+ res = render_operation_l(operation, &gs);
+ break;
+
+ case CONTENT_OP_re: /* rectangle */
+ res = render_operation_re(operation, &gs);
+ break;
+
+ case CONTENT_OP_h: /* close path */
+ res = render_operation_h(&gs);
+ break;
+
+ case CONTENT_OP_f:
+ case CONTENT_OP_f_:
+ case CONTENT_OP_B:
+ case CONTENT_OP_B_:
+ case CONTENT_OP_b:
+ case CONTENT_OP_b_:
+ res = render_operation_f(&gs, render_ctx);
+ break;
+
+ case CONTENT_OP_s:
+ render_operation_h(&gs);
+ res = render_operation_S(&gs, render_ctx);
+ break;
+
+ case CONTENT_OP_S:
+ res = render_operation_S(&gs, render_ctx);
+ break;
+
+ case CONTENT_OP_w:
+ res = render_operation_w(operation, &gs);
+ //printf("line width:%f\n", gs.param_stack[gs.param_stack_idx].line_width);
+ break;
+
+ default:
+ printf("operator %s\n",
+ nspdf__cos_content_operator_name(operation->operator));
+ break;
+
+ }
+
+ }
+
+ free(gs.param_stack);
+ free(gs.path);
+
return res;
}
diff --git a/test/parsepdf.c b/test/parsepdf.c
index 054e96d..f569418 100644
--- a/test/parsepdf.c
+++ b/test/parsepdf.c
@@ -55,6 +55,16 @@ read_whole_pdf(const char *fname, uint8_t **buffer, uint64_t *buffer_length)
return NSPDFERROR_OK;
}
+static nspdferror
+pdf_path(const struct nspdf_style *style,
+ const float *path,
+ unsigned int path_length,
+ const float transform[6],
+ const void *ctxin)
+{
+ return NSPDFERROR_OK;
+}
+
static nspdferror render_pages(struct nspdf_doc *doc, unsigned int page_count)
{
nspdferror res;
@@ -62,6 +72,14 @@ static nspdferror render_pages(struct nspdf_doc *doc, unsigned int page_count)
unsigned int page_render_list[4] = { 0, 1, 0, 1};
unsigned int page_index;
+ render_ctx.device_space[0] = 1;
+ render_ctx.device_space[1] = 0;
+ render_ctx.device_space[2] = 0;
+ render_ctx.device_space[3] = -1; /* y scale */
+ render_ctx.device_space[4] = 0; /* x offset */
+ render_ctx.device_space[5] = 800; /* y offset */
+ render_ctx.path = pdf_path;
+
for (page_index = 0; page_index < page_count; page_index++) {
res = nspdf_page_render(doc, page_index, &render_ctx);
if (res != NSPDFERROR_OK) {
commitdiff http://git.netsurf-browser.org/libnspdf.git/commit/?id=6b16f2446bb381be3d...
commit 6b16f2446bb381be3de54d9edfe9088a8363df70
Author: Vincent Sanders <vince(a)kyllikki.org>
Commit: Vincent Sanders <vince(a)kyllikki.org>
extend page interface with render context
diff --git a/Makefile b/Makefile
index f6c261f..04f8218 100644
--- a/Makefile
+++ b/Makefile
@@ -57,5 +57,6 @@ I := /$(INCLUDEDIR)/nspdf
INSTALL_ITEMS := $(INSTALL_ITEMS) $(I):include/nspdf/document.h
INSTALL_ITEMS := $(INSTALL_ITEMS) $(I):include/nspdf/meta.h
INSTALL_ITEMS := $(INSTALL_ITEMS) $(I):include/nspdf/errors.h
+INSTALL_ITEMS := $(INSTALL_ITEMS) $(I):include/nspdf/page.h
INSTALL_ITEMS := $(INSTALL_ITEMS) /$(LIBDIR)/pkgconfig:lib$(COMPONENT).pc.in
INSTALL_ITEMS := $(INSTALL_ITEMS) /$(LIBDIR):$(OUTPUT)
diff --git a/include/nspdf/page.h b/include/nspdf/page.h
index 8c1d7fc..f8c280e 100644
--- a/include/nspdf/page.h
+++ b/include/nspdf/page.h
@@ -19,8 +19,61 @@
struct nspdf_doc;
+/**
+ * Type of plot operation
+ */
+enum nspdf_style_operation {
+ NSPDF_OP_TYPE_NONE = 0, /**< No operation */
+ NSPDF_OP_TYPE_SOLID, /**< Solid colour */
+ NSPDF_OP_TYPE_DOT, /**< Dotted plot */
+ NSPDF_OP_TYPE_DASH, /**< Dashed plot */
+};
+
+
+/**
+ * Plot style for stroke/fill plotters
+ */
+typedef struct nspdf_style {
+ enum nspdf_style_operation stroke_type; /**< Stroke plot type */
+ int stroke_width; /**< Width of stroke, in pixels */
+ uint32_t stroke_colour; /**< Colour of stroke XBGR */
+
+ enum nspdf_style_operation fill_type; /**< Fill plot type */
+ uint32_t fill_colour; /**< Colour of fill XBGR */
+} nspdf_style;
+
+
+enum nspdf_path_command {
+ NSPDF_PATH_MOVE = 0,
+ NSPDF_PATH_CLOSE,
+ NSPDF_PATH_LINE,
+ NSPDF_PATH_BEZIER,
+};
+
+struct nspdf_render_ctx {
+ const void *ctx; /**< context passed to drawing functions */
+
+ float device_space[6]; /* user space to device space transformation matrix */
+
+ /**
+ * Plots a path.
+ *
+ * Path plot consisting of lines and cubic Bezier curves. Line and fill
+ * colour is controlled by the style. All elements of the path are in
+ * device space.
+ *
+ * \param style Style controlling the path plot.
+ * \param p elements of path
+ * \param n nunber of elements on path
+ * \param transform A transform to apply to the path.
+ * \param ctx The drawing context.
+ * \return NSERROR_OK on success else error code.
+ */
+ nspdferror (*path)(const struct nspdf_style *style, const float *p, unsigned int n, const float transform[6], const void *ctx);
+};
+
nspdferror nspdf_page_count(struct nspdf_doc *doc, unsigned int *pages_out);
-nspdferror nspdf_page_render(struct nspdf_doc *doc, unsigned int page_num);
+nspdferror nspdf_page_render(struct nspdf_doc *doc, unsigned int page_num, struct nspdf_render_ctx* render_ctx);
#endif /* NSPDF_META_H_ */
diff --git a/src/cos_parse.c b/src/cos_parse.c
index dea67f9..df706a4 100644
--- a/src/cos_parse.c
+++ b/src/cos_parse.c
@@ -1424,7 +1424,7 @@ cos_parse_content_streams(struct nspdf_doc *doc,
if (cosobj->u.content == NULL) {
res = NSPDFERROR_NOMEM;
goto cos_parse_content_stream_error;
- }
+ }
for (stream_index = 0; stream_index < stream_count; stream_index++) {
stream = *(streams + stream_index);
diff --git a/src/page.c b/src/page.c
index 5299c7c..5b91416 100644
--- a/src/page.c
+++ b/src/page.c
@@ -149,7 +149,9 @@ nspdf_page_count(struct nspdf_doc *doc, unsigned int *pages_out)
/* exported interface documented in nspdf/page.h */
nspdferror
-nspdf_page_render(struct nspdf_doc *doc, unsigned int page_number)
+nspdf_page_render(struct nspdf_doc *doc,
+ unsigned int page_number,
+ struct nspdf_render_ctx* render_ctx)
{
struct page_table_entry *page_entry;
struct cos_content *page_content; /* page operations array */
@@ -158,8 +160,11 @@ nspdf_page_render(struct nspdf_doc *doc, unsigned int page_number)
page_entry = doc->page_table + page_number;
res = cos_get_content(doc, page_entry->contents, &page_content);
- if (res == NSPDFERROR_OK) {
- printf("%p\n", page_content);
+ if (res != NSPDFERROR_OK) {
+ return res;
}
+
+ printf("page %d content:%p\n", page_number, page_content);
+
return res;
}
diff --git a/test/parsepdf.c b/test/parsepdf.c
index 7a64f4b..054e96d 100644
--- a/test/parsepdf.c
+++ b/test/parsepdf.c
@@ -55,6 +55,28 @@ read_whole_pdf(const char *fname, uint8_t **buffer, uint64_t *buffer_length)
return NSPDFERROR_OK;
}
+static nspdferror render_pages(struct nspdf_doc *doc, unsigned int page_count)
+{
+ nspdferror res;
+ struct nspdf_render_ctx render_ctx;
+ unsigned int page_render_list[4] = { 0, 1, 0, 1};
+ unsigned int page_index;
+
+ for (page_index = 0; page_index < page_count; page_index++) {
+ res = nspdf_page_render(doc, page_index, &render_ctx);
+ if (res != NSPDFERROR_OK) {
+ break;
+ }
+ }
+
+ for (page_index = 0; page_index < 4; page_index++) {
+ res = nspdf_page_render(doc, page_render_list[page_index], &render_ctx);
+ if (res != NSPDFERROR_OK) {
+ break;
+ }
+ }
+ return res;
+}
int main(int argc, char **argv)
{
@@ -94,17 +116,15 @@ int main(int argc, char **argv)
}
res = nspdf_page_count(doc, &page_count);
- if (res != NSPDFERROR_OK) {
- printf("page count failed (%d)\n", res);
- return res;
+ if (res == NSPDFERROR_OK) {
+ printf("Pages:%d\n", page_count);
}
- printf("Pages:%d\n", page_count);
- res = nspdf_page_render(doc, 0);
- if (res != NSPDFERROR_OK) {
- printf("page render failed (%d)\n", res);
- return res;
- }
+ res = render_pages(doc, page_count);
+ if (res != NSPDFERROR_OK) {
+ printf("page render failed (%d)\n", res);
+ return res;
+ }
res = nspdf_document_destroy(doc);
if (res != NSPDFERROR_OK) {
commitdiff http://git.netsurf-browser.org/libnspdf.git/commit/?id=887175cabfbf3cea83...
commit 887175cabfbf3cea833583834937eadf9654e07a
Author: Vincent Sanders <vince(a)kyllikki.org>
Commit: Vincent Sanders <vince(a)kyllikki.org>
fix bug when content conversion has already been performed
when content stream had already been converted instead of skipping the
entire conversion and returning the previously converted content half
teh conversion was performed a second time generating utter garbage
and crashes.
Additionally add a object dump debug helper
diff --git a/src/cos_object.c b/src/cos_object.c
index 0c97190..5d7da19 100644
--- a/src/cos_object.c
+++ b/src/cos_object.c
@@ -21,6 +21,102 @@
#include "cos_parse.h"
#include "pdf_doc.h"
+static nspdferror cos_dump_object(const char *fmt, struct cos_object *cos_obj)
+{
+ printf("%s\n", fmt);
+ switch (cos_obj->type) {
+ case COS_TYPE_NAMETREE:
+ printf(" type = COS_TYPE_NAMETREE\n");
+ break;
+
+ case COS_TYPE_REFERENCE:
+ printf(" type = COS_TYPE_REFERENCE\n"
+ " u.reference->id = %lu\n"
+ " u.reference->generation = %lu\n",
+ cos_obj->u.reference->id,
+ cos_obj->u.reference->generation);
+ break;
+
+ case COS_TYPE_NULL:
+ printf(" type = COS_TYPE_NULL\n");
+ break;
+
+ case COS_TYPE_CONTENT:
+ printf(" type = COS_TYPE_CONTENT\n"
+ " u.content->length = %d\n"
+ " u.content->alloc = %d\n"
+ " u.content->operations = %p\n",
+ cos_obj->u.content->length,
+ cos_obj->u.content->alloc,
+ cos_obj->u.content->operations);
+ break;
+
+ case COS_TYPE_BOOL:
+ printf(" type = COS_TYPE_BOOL\n u.b = %s\n",
+ cos_obj->u.b ? "true" : "false");
+ break;
+
+ case COS_TYPE_INT:
+ printf(" type = COS_TYPE_INT\n u.i = %ld\n", cos_obj->u.i);
+ break;
+
+ case COS_TYPE_REAL:
+ printf(" type = COS_TYPE_REAL\n u.real = %f\n", cos_obj->u.real);
+ break;
+
+ case COS_TYPE_NAME:
+ printf(" type = COS_TYPE_NAME\n u.name = %s\n", cos_obj->u.name);
+ break;
+
+ case COS_TYPE_STRING:
+ printf(" type = COS_TYPE_STRING\n");
+ //free(cos_obj->u.s->data);
+ //free(cos_obj->u.s);
+ break;
+
+ case COS_TYPE_DICTIONARY:
+ printf(" type = COS_TYPE_DICTIONARY\n");
+ /*
+ dentry = cos_obj->u.dictionary;
+ while (dentry != NULL) {
+ struct cos_dictionary_entry *odentry;
+
+ cos_free_object(dentry->key);
+ cos_free_object(dentry->value);
+
+ odentry = dentry;
+ dentry = dentry->next;
+ free(odentry);
+ }
+ */
+ break;
+
+ case COS_TYPE_ARRAY:
+ printf(" type = COS_TYPE_ARRAY\n");
+ /*
+ if (cos_obj->u.array->alloc > 0) {
+ for (aentry = 0; aentry < cos_obj->u.array->length; aentry++) {
+ cos_free_object(*(cos_obj->u.array->values + aentry));
+ }
+ free(cos_obj->u.array->values);
+ }
+ free(cos_obj->u.array);
+ */
+ break;
+
+ case COS_TYPE_STREAM:
+ printf(" type = COS_TYPE_STREAM\n");
+ /*
+ free(cos_obj->u.stream);
+ */
+ break;
+
+ }
+
+
+ return NSPDFERROR_OK;
+
+}
nspdferror cos_free_object(struct cos_object *cos_obj)
{
@@ -173,9 +269,9 @@ cos_get_dictionary_name(struct nspdf_doc *doc,
nspdferror
cos_get_dictionary_string(struct nspdf_doc *doc,
- struct cos_object *dict,
- const char *key,
- struct cos_string **string_out)
+ struct cos_object *dict,
+ const char *key,
+ struct cos_string **string_out)
{
nspdferror res;
struct cos_object *dict_value;
@@ -189,9 +285,9 @@ cos_get_dictionary_string(struct nspdf_doc *doc,
nspdferror
cos_get_dictionary_dictionary(struct nspdf_doc *doc,
- struct cos_object *dict,
- const char *key,
- struct cos_object **value_out)
+ struct cos_object *dict,
+ const char *key,
+ struct cos_object **value_out)
{
nspdferror res;
struct cos_object *dict_value;
@@ -205,9 +301,9 @@ cos_get_dictionary_dictionary(struct nspdf_doc *doc,
nspdferror
cos_heritable_dictionary_dictionary(struct nspdf_doc *doc,
- struct cos_object *dict,
- const char *key,
- struct cos_object **value_out)
+ struct cos_object *dict,
+ const char *key,
+ struct cos_object **value_out)
{
nspdferror res;
struct cos_object *dict_value;
@@ -223,9 +319,9 @@ cos_heritable_dictionary_dictionary(struct nspdf_doc *doc,
nspdferror
cos_get_dictionary_array(struct nspdf_doc *doc,
- struct cos_object *dict,
- const char *key,
- struct cos_object **value_out)
+ struct cos_object *dict,
+ const char *key,
+ struct cos_object **value_out)
{
nspdferror res;
struct cos_object *dict_value;
@@ -239,9 +335,9 @@ cos_get_dictionary_array(struct nspdf_doc *doc,
nspdferror
cos_heritable_dictionary_array(struct nspdf_doc *doc,
- struct cos_object *dict,
- const char *key,
- struct cos_object **value_out)
+ struct cos_object *dict,
+ const char *key,
+ struct cos_object **value_out)
{
nspdferror res;
struct cos_object *dict_value;
@@ -296,8 +392,8 @@ cos_get_number(struct nspdf_doc *doc,
nspdferror
cos_get_name(struct nspdf_doc *doc,
- struct cos_object *cobj,
- const char **value_out)
+ struct cos_object *cobj,
+ const char **value_out)
{
nspdferror res;
@@ -334,8 +430,8 @@ cos_get_dictionary(struct nspdf_doc *doc,
nspdferror
cos_get_array(struct nspdf_doc *doc,
- struct cos_object *cobj,
- struct cos_object **value_out)
+ struct cos_object *cobj,
+ struct cos_object **value_out)
{
nspdferror res;
@@ -410,7 +506,7 @@ cos_get_object(struct nspdf_doc *doc,
* exported interface documented in cos_object.h
*
* slightly different behaviour to other getters:
-
+ *
* - This getter can be passed an object pointer to a synthetic parsed content
* stream object in which case it returns that objects content operation
* list.
@@ -437,10 +533,14 @@ cos_get_content(struct nspdf_doc *doc,
struct cos_object *content_obj; /* parsed content object */
struct cos_object tmpobj;
- /* already parsed the content stream */
+ //cos_dump_object("get content of", cobj);
+
if (cobj->type == COS_TYPE_CONTENT) {
- *content_out = cobj->u.content;
- } else if (cobj->type == COS_TYPE_REFERENCE) {
+ /* already parsed the content stream */
+ goto cos_get_content_done;
+ }
+
+ if (cobj->type == COS_TYPE_REFERENCE) {
/* single reference */
reference_count = 1;
references = calloc(reference_count, sizeof(struct cos_object *));
@@ -504,10 +604,15 @@ cos_get_content(struct nspdf_doc *doc,
tmpobj = *cobj;
*cobj = *content_obj;
*content_obj = tmpobj;
+
+ //cos_dump_object("content object", cobj);
+ //cos_dump_object("free object", content_obj);
+
cos_free_object(content_obj);
/** \todo call nspdf__xref_free_referenced(doc, *(references + index)); to free up storage associated with already parsed streams */
+cos_get_content_done:
*content_out = cobj->u.content;
return NSPDFERROR_OK;
@@ -541,9 +646,9 @@ cos_get_array_value(struct nspdf_doc *doc,
nspdferror
cos_get_array_dictionary(struct nspdf_doc *doc,
- struct cos_object *array,
- unsigned int index,
- struct cos_object **value_out)
+ struct cos_object *array,
+ unsigned int index,
+ struct cos_object **value_out)
{
nspdferror res;
struct cos_object *array_value;
diff --git a/src/cos_object.h b/src/cos_object.h
index 56c2179..79a1c8d 100644
--- a/src/cos_object.h
+++ b/src/cos_object.h
@@ -89,7 +89,7 @@ struct cos_content {
};
struct cos_object {
- int type;
+ enum cos_type type;
union {
/** boolean */
bool b;
commitdiff http://git.netsurf-browser.org/libnspdf.git/commit/?id=81543d2b980bf89c17...
commit 81543d2b980bf89c1797bf8f65e195f3a1f70414
Author: Vincent Sanders <vince(a)kyllikki.org>
Commit: Vincent Sanders <vince(a)kyllikki.org>
extend content conversion to include more parameter types
diff --git a/src/cos_content.c b/src/cos_content.c
index 6978606..cf5d2e0 100644
--- a/src/cos_content.c
+++ b/src/cos_content.c
@@ -380,6 +380,63 @@ copy_name_number(struct cos_object **operands,
return NSPDFERROR_OK;
}
+
+static nspdferror
+copy_array_int(struct cos_object **operands,
+ unsigned int *operand_idx,
+ struct content_operation *operation_out)
+{
+ unsigned int index = 0;
+
+ if ((*operand_idx) == 0) {
+ printf("operator %s that takes %d operands passed %d\n",
+ operator_name(operation_out->operator), 2, *operand_idx);
+ operation_out->u.namenumber.name = NULL;
+ return NSPDFERROR_OK;
+ }
+
+ /* process wanted operands */
+ if ((*operands)->type != COS_TYPE_ARRAY) {
+ printf("operand was not an array\n");
+ operation_out->u.arrayint.length = 0;
+ operation_out->u.arrayint.values = NULL;
+ operation_out->u.arrayint.i = 0;
+ } else {
+ operation_out->u.arrayint.length = (*operands)->u.array->length;
+ /* steal the values from the array object */
+ operation_out->u.arrayint.values = (*operands)->u.array->values;
+ (*operands)->u.array->alloc = 0;
+ (*operands)->u.array->length = 0;
+
+ operation_out->u.arrayint.i = 0;
+ /* get the int */
+ if ((*operand_idx) > 1) {
+ nspdferror res;
+ res = cos_get_int(NULL, *(operands + 1), &operation_out->u.arrayint.i);
+ if (res != NSPDFERROR_OK) {
+ printf("operand 1 could not be set in operation (code %d)\n", res);
+ }
+ } else {
+ printf("operator %s that takes %d operands passed %d\n",
+ operator_name(operation_out->operator), 2, *operand_idx);
+ }
+ }
+
+ if ((*operand_idx) > 2) {
+ printf("operator %s that takes %d operands passed %d\n",
+ operator_name(operation_out->operator), 2, *operand_idx);
+ }
+
+ /* free all operands */
+ while (index < (*operand_idx)) {
+ cos_free_object(*(operands + index));
+ index++;
+ }
+ *operand_idx = 0;
+
+ return NSPDFERROR_OK;
+}
+
/* exported interface documented in cos_content.h */
nspdferror
nspdf__cos_content_convert(enum content_operator operator,
@@ -483,31 +540,35 @@ nspdf__cos_content_convert(enum content_operator operator,
break;
case CONTENT_OP_gs:
+ case CONTENT_OP_Do:
+ case CONTENT_OP_ri:
+ case CONTENT_OP_CS:
+ case CONTENT_OP_cs:
+ case CONTENT_OP_sh:
+ case CONTENT_OP_MP:
+ case CONTENT_OP_BMC:
/* name */
res = copy_name(operands, operand_idx, operation_out);
break;
case CONTENT_OP_j:
case CONTENT_OP_J:
+ case CONTENT_OP_Tr:
/* one integer */
res = copy_integers(1, operands, operand_idx, operation_out);
break;
- case CONTENT_OP_BDC:
- case CONTENT_OP_BMC:
- case CONTENT_OP_CS:
- case CONTENT_OP_cs:
case CONTENT_OP_d:
- case CONTENT_OP_Do:
+ /* array and int */
+ res = copy_array_int(operands, operand_idx, operation_out);
+ break;
+
+ case CONTENT_OP_BDC:
case CONTENT_OP_DP:
- case CONTENT_OP_MP:
- case CONTENT_OP_ri:
case CONTENT_OP_SC:
case CONTENT_OP_sc:
case CONTENT_OP_SCN:
case CONTENT_OP_scn:
- case CONTENT_OP_sh:
- case CONTENT_OP_Tr:
case CONTENT_OP___:
res = copy_numbers(0, operands, operand_idx, operation_out);
break;
diff --git a/src/cos_content.h b/src/cos_content.h
index 9921e2b..e0d2dfb 100644
--- a/src/cos_content.h
+++ b/src/cos_content.h
@@ -220,6 +220,13 @@ struct content_operation
char *name;
float number;
} namenumber;
+
+ struct {
+ unsigned int length;
+ struct cos_object **values;
+ int64_t i;
+ } arrayint;
+
} u;
};
commitdiff http://git.netsurf-browser.org/libnspdf.git/commit/?id=f5b45e89d18ac96888...
commit f5b45e89d18ac968885ad2c31ddf9d9e1a22fd22
Author: Vincent Sanders <vince(a)kyllikki.org>
Commit: Vincent Sanders <vince(a)kyllikki.org>
split out content conversion from parse
diff --git a/src/Makefile b/src/Makefile
index 35576c2..23c20fd 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -1,3 +1,3 @@
-DIR_SOURCES := document.c byte_class.c cos_parse.c cos_object.c pdf_doc.c meta.c page.c xref.c cos_stream_filter.c
+DIR_SOURCES := document.c byte_class.c cos_parse.c cos_object.c pdf_doc.c meta.c page.c xref.c cos_stream_filter.c cos_content.c
include $(NSBUILD)/Makefile.subdir
diff --git a/src/content.h b/src/content.h
deleted file mode 100644
index af91b13..0000000
--- a/src/content.h
+++ /dev/null
@@ -1,226 +0,0 @@
-/*
- * Copyright 2018 Vincent Sanders <vince(a)netsurf-browser.org>
- *
- * This file is part of libnspdf.
- *
- * Licensed under the MIT License,
- * http://www.opensource.org/licenses/mit-license.php
- */
-
-/**
- * \file
- * NetSurf PDF library parsed content stream
- */
-
-#ifndef NSPDF__CONTENT_H_
-#define NSPDF__CONTENT_H_
-
-/**
- * content operator
- *
- * parameters types are listed as:
- * tag -
- * prp - properties
- * num - floating point value
- */
-enum content_operator {
- /**
- * close, fill and stroke path with nonzero winding rule.
- * b()
- */
- CONTENT_OP_b,
-
- /**
- * fill and stroke path using nonzero winding rule.
- * B()
- */
- CONTENT_OP_B,
-
- /**
- * close, fill and stroke path with even/odd rule
- * b*()
- */
- CONTENT_OP_b_,
-
- /**
- * fill and stroke path with even/odd rule
- * B*()
- */
- CONTENT_OP_B_,
-
- /**
- * tag prp BDC
- * begin marked content sequence with property list
- */
- CONTENT_OP_BDC,
-
- /**
- * BI()
- * begin inline image
- */
- CONTENT_OP_BI,
-
- /**
- * tag BMC
- * begin marked content sequence
- */
- CONTENT_OP_BMC,
-
- /**
- * begin text
- * BT()
- */
- CONTENT_OP_BT,
-
- /**
- * begin compatability
- * BX()
- */
- CONTENT_OP_BX,
-
- /**
- * append curved segment to path
- * c(num x1, num y1, num x2, num y2, num x3, num y3)
- */
- CONTENT_OP_c,
- CONTENT_OP_cm, /* a b c d e f cm - concatinate matrix to current trasnsform matrix */
- CONTENT_OP_CS, /* name CS - set colour space for stroking operations */
- CONTENT_OP_cs, /* name cs - set colourspace for non stroke operations */
- CONTENT_OP_d, /* array phase d - set line dash pattern */
- CONTENT_OP_d0, /* wx wy d0 - set glyph width in type 3 font */
- CONTENT_OP_d1, /* wx wy llx lly urx ury d1 - set glyph width and bounding box in type 3 font */
- CONTENT_OP_Do, /* name Do - invoke named xobject */
- CONTENT_OP_DP, /* tag prp DP - define marked content point with property list */
- CONTENT_OP_EI, /* EI - end of inline image */
- CONTENT_OP_EMC, /* EMC - end marked content sequence */
- CONTENT_OP_ET, /* ET - end text object */
- CONTENT_OP_EX, /* EX - end compatability section */
- CONTENT_OP_f, /* f - fill path using nonzero winding rule */
- CONTENT_OP_F, /* F - fill path using nonzero winding rule */
- CONTENT_OP_f_, /* f* - fill path with even/odd rule */
-
- /**
- * set gray level for stroking operations
- * G(num gray)
- */
- CONTENT_OP_G,
-
- /**
- * set gray level for nonstroking operations
- * g(num gray)
- */
- CONTENT_OP_g,
- CONTENT_OP_gs, /* dictName gs - set parameters from graphics state directory */
-
- /**
- * close subpath
- * h()
- */
- CONTENT_OP_h,
-
- /**
- * set flatness tolerance
- * i(num flatness)
- */
- CONTENT_OP_i,
- CONTENT_OP_ID, /* ID - begin inline image data */
-
- /**
- * set line join style (0, 1 or 2)
- * j(int linejoin)
- */
- CONTENT_OP_j,
- CONTENT_OP_J, /* linecap J - sel line cap style (int 0, 1 or 2) */
- CONTENT_OP_K, /* c m y k K - set cmyk colour for stroking operations */
- CONTENT_OP_k, /* c m y k k - set cmyk colour for nonstroking operations */
- CONTENT_OP_l, /* x y l - append straight line segment to path */
- CONTENT_OP_m, /* x y m - begin new subpath */
-
- /**
- * set mitre limit
- * M(num mitrelimit)
- */
- CONTENT_OP_M,
- CONTENT_OP_MP, /* tag MP - define marked content point */
- CONTENT_OP_n, /* n - end path without filling or stroking*/
- CONTENT_OP_q, /* q - save graphics state */
- CONTENT_OP_Q, /* Q - restore graphics state */
- CONTENT_OP_re, /* x y w h re - append rectangle to path */
- CONTENT_OP_RG, /* r g b RG - stroke colour in DeviceRGB colourspace */
- CONTENT_OP_rg, /* r g b rg - nonstroke colour in DeviceRGB colourspace */
- CONTENT_OP_ri, /* intent ri - set color rendering intent */
- CONTENT_OP_s, /* s - close and stroke path */
- CONTENT_OP_S, /* S - stroke path */
- CONTENT_OP_SC, /* c1 c... SC - set colour for stroking operation. 1 3 or 4 params */
- CONTENT_OP_sc, /* c1 c... sc - same as SC for nonstroking operations */
- CONTENT_OP_SCN, /* c1 c... name SCN - same as SC but extra colour spaces. max 32 params */
- CONTENT_OP_scn, /* c1 c... name scn - same as SCN for nonstroking operations */
- CONTENT_OP_sh, /* name sh - paint area defined by shading pattern */
- CONTENT_OP_T_, /* T* - move to start of next text line */
- CONTENT_OP_Tc, /* charspace Tc - set character spacing */
- CONTENT_OP_Td, /* tx ty Td - move text position */
- CONTENT_OP_TD, /* tx ty TD - move text position and set leading */
- CONTENT_OP_Tf, /* font size Tf - select text font and size */
- CONTENT_OP_Tj, /* string Tj - show text */
- CONTENT_OP_TJ, /* array TJ - show text strings allowing individual positioning */
- CONTENT_OP_TL, /* leading TL - set text leading for T* ' " operators */
- CONTENT_OP_Tm, /* a b c d e f Tm - set the text matrix */
- CONTENT_OP_Tr, /* render Tr - set rendering mode (int) */
- CONTENT_OP_Ts, /* rise Ts - set text rise */
- CONTENT_OP_Tw, /* wordspace Tw - set word spacing */
- CONTENT_OP_Tz, /* scale Tz - set horizontal scaling */
- CONTENT_OP_v, /* x2 y2 x3 y3 v - append curved segment path */
-
- /**
- * set line width
- * w(num linewidth)
- */
- CONTENT_OP_w,
- CONTENT_OP_W, /* W - set clipping path using nonzero winding rule */
- CONTENT_OP_W_, /* W* - set clipping path using odd even rule */
- CONTENT_OP_y, /* x1 y1 x3 y3 y - append curved segment to path */
- CONTENT_OP__, /* string ' - move to next line and show text */
- CONTENT_OP___, /* aw ac string " - set word and char spacing, move to next line and show text */
-};
-
-/* six numbers is adequate for almost all operations */
-#define content_number_size (6)
-
-/* compute how long the embedded string can be without inflating the
- * structure. size of the pointer is used instead of unsigned int as that is
- * what will control the structure padding.
- */
-#define content_string_intrnl_lngth ((sizeof(float) * content_number_size) - sizeof(uint8_t *))
-
-
-struct content_operation
-{
- enum content_operator operator;
- union {
- float number[content_number_size];
-
- char *name;
-
- int64_t i[3];
-
- struct {
- unsigned int length;
- union {
- char cdata[content_string_intrnl_lngth];
- uint8_t *pdata;
- } u;
- } string;
-
- struct {
- unsigned int length;
- struct cos_object **values;
- } array;
-
- struct {
- char *name;
- float number;
- } namenumber;
- } u;
-};
-
-#endif
diff --git a/src/cos_content.c b/src/cos_content.c
new file mode 100644
index 0000000..6978606
--- /dev/null
+++ b/src/cos_content.c
@@ -0,0 +1,517 @@
+/*
+ * Copyright 2018 Vincent Sanders <vince(a)netsurf-browser.org>
+ *
+ * This file is part of libnspdf.
+ *
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ */
+
+#include <stdint.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <nspdf/errors.h>
+
+#include "cos_object.h"
+#include "cos_content.h"
+#include "pdf_doc.h"
+
+static const char*operator_name(enum content_operator operator)
+{
+ switch(operator) {
+ case CONTENT_OP_b: return "b";
+ case CONTENT_OP_B: return "B";
+ case CONTENT_OP_b_: return "b*";
+ case CONTENT_OP_B_: return "B*";
+ case CONTENT_OP_BDC: return "BDC";
+ case CONTENT_OP_BI: return "BI";
+ case CONTENT_OP_BMC: return "BMC";
+ case CONTENT_OP_BT: return "BT";
+ case CONTENT_OP_BX: return "BX";
+ case CONTENT_OP_c: return "c";
+ case CONTENT_OP_cm: return "cm";
+ case CONTENT_OP_CS: return "CS";
+ case CONTENT_OP_cs: return "cs";
+ case CONTENT_OP_d: return "d";
+ case CONTENT_OP_d0: return "d0";
+ case CONTENT_OP_d1: return "d1";
+ case CONTENT_OP_Do: return "Do";
+ case CONTENT_OP_DP: return "DP";
+ case CONTENT_OP_EI: return "EI";
+ case CONTENT_OP_EMC: return "EMC";
+ case CONTENT_OP_ET: return "ET";
+ case CONTENT_OP_EX: return "EX";
+ case CONTENT_OP_f: return "f";
+ case CONTENT_OP_F: return "F";
+ case CONTENT_OP_f_: return "f*";
+ case CONTENT_OP_G: return "G";
+ case CONTENT_OP_g: return "g";
+ case CONTENT_OP_gs: return "gs";
+ case CONTENT_OP_h: return "h";
+ case CONTENT_OP_i: return "i";
+ case CONTENT_OP_ID: return "ID";
+ case CONTENT_OP_j: return "j";
+ case CONTENT_OP_J: return "J";
+ case CONTENT_OP_K: return "K";
+ case CONTENT_OP_k: return "k";
+ case CONTENT_OP_l: return "l";
+ case CONTENT_OP_m: return "m";
+ case CONTENT_OP_M: return "M";
+ case CONTENT_OP_MP: return "MP";
+ case CONTENT_OP_n: return "n";
+ case CONTENT_OP_q: return "q";
+ case CONTENT_OP_Q: return "Q";
+ case CONTENT_OP_re: return "re";
+ case CONTENT_OP_RG: return "RG";
+ case CONTENT_OP_rg: return "rg";
+ case CONTENT_OP_ri: return "ri";
+ case CONTENT_OP_s: return "s";
+ case CONTENT_OP_S: return "S";
+ case CONTENT_OP_SC: return "SC";
+ case CONTENT_OP_sc: return "sc";
+ case CONTENT_OP_SCN: return "SCN";
+ case CONTENT_OP_scn: return "scn";
+ case CONTENT_OP_sh: return "sh";
+ case CONTENT_OP_T_: return "T*";
+ case CONTENT_OP_Tc: return "Tc";
+ case CONTENT_OP_Td: return "Td";
+ case CONTENT_OP_TD: return "TD";
+ case CONTENT_OP_Tf: return "Tf";
+ case CONTENT_OP_Tj: return "Tj";
+ case CONTENT_OP_TJ: return "TJ";
+ case CONTENT_OP_TL: return "TL";
+ case CONTENT_OP_Tm: return "Tm";
+ case CONTENT_OP_Tr: return "Tr";
+ case CONTENT_OP_Ts: return "Ts";
+ case CONTENT_OP_Tw: return "Tw";
+ case CONTENT_OP_Tz: return "Tz";
+ case CONTENT_OP_v: return "v";
+ case CONTENT_OP_w: return "w";
+ case CONTENT_OP_W: return "W";
+ case CONTENT_OP_W_: return "W_";
+ case CONTENT_OP_y: return "y";
+ case CONTENT_OP__: return "\'";
+ case CONTENT_OP___: return "\"";
+ }
+ return "????";
+}
+
+
+/**
+ * move number operands from list into operation
+ *
+ * This ensures all operands are correctly handled not just the wanted ones
+ *
+ * \param wanted The number of wanted operands to place in the operation
+ * \param operands The array of operands from the parse
+ * \param operand_idx The number of operands from the parse
+ * \param operation_out The operation to place numbers in
+ */
+static nspdferror
+copy_numbers(unsigned int wanted,
+ struct cos_object **operands,
+ unsigned int *operand_idx,
+ struct content_operation *operation_out)
+{
+ nspdferror res;
+ unsigned int index = 0;
+
+ while ((index < (*operand_idx)) &&
+ (index < wanted)) {
+ /* process wanted operands */
+ res = cos_get_number(NULL,
+ *(operands + index),
+ &operation_out->u.number[index]);
+ if (res != NSPDFERROR_OK) {
+ printf("operand %d could not be set in operation (code %d)\n",
+ index, res);
+ }
+ cos_free_object(*(operands + index));
+ index++;
+ }
+ if ((*operand_idx) > index) {
+ printf("operator %s that takes %d operands passed %d\n",
+ operator_name(operation_out->operator), wanted, *operand_idx);
+ while (index < (*operand_idx)) {
+ cos_free_object(*(operands + index));
+ index++;
+ }
+ } else if ((*operand_idx) < index) {
+ printf("operator %s that takes %d operands passed %d\n",
+ operator_name(operation_out->operator), wanted, *operand_idx);
+ }
+
+ *operand_idx = 0; /* all operands freed */
+
+ return NSPDFERROR_OK;
+}
+
+static nspdferror
+copy_integers(unsigned int wanted,
+ struct cos_object **operands,
+ unsigned int *operand_idx,
+ struct content_operation *operation_out)
+{
+ nspdferror res;
+ unsigned int index = 0;
+
+ while ((index < (*operand_idx)) &&
+ (index < wanted)) {
+ /* process wanted operands */
+ res = cos_get_int(NULL,
+ *(operands + index),
+ &operation_out->u.i[index]);
+ if (res != NSPDFERROR_OK) {
+ printf("operand %d could not be set in operation (code %d)\n",
+ index, res);
+ }
+ cos_free_object(*(operands + index));
+ index++;
+ }
+ if ((*operand_idx) > index) {
+ printf("operator %s that takes %d operands passed %d\n",
+ operator_name(operation_out->operator), wanted, *operand_idx);
+ while (index < (*operand_idx)) {
+ cos_free_object(*(operands + index));
+ index++;
+ }
+ } else if ((*operand_idx) < index) {
+ printf("operator %s that takes %d operands passed %d\n",
+ operator_name(operation_out->operator), wanted, *operand_idx);
+ }
+
+ *operand_idx = 0; /* all operands freed */
+
+ return NSPDFERROR_OK;
+}
+
+static nspdferror
+copy_string(struct cos_object **operands,
+ unsigned int *operand_idx,
+ struct content_operation *operation_out)
+{
+ nspdferror res;
+ unsigned int index = 0;
+ struct cos_string *string;
+
+ if ((*operand_idx) == 0) {
+ printf("operator %s that takes %d operands passed %d\n",
+ operator_name(operation_out->operator), 1, *operand_idx);
+ operation_out->u.string.length = 0;
+ return NSPDFERROR_OK;
+ }
+
+ /* process wanted operands */
+ res = cos_get_string(NULL, *operands, &string);
+ if (res != NSPDFERROR_OK) {
+ printf("string could not be set in operation (code %d)\n", res);
+ operation_out->u.string.length = 0;
+ } else {
+ operation_out->u.string.length = string->length;
+ if (string->length > content_string_intrnl_lngth) {
+ /* steal the string from the object */
+ operation_out->u.string.u.pdata = string->data;
+ string->alloc = 0;
+ string->length = 0;
+ /*printf("external string \"%.*s\"\n",
+ operation_out->u.string.length,
+ operation_out->u.string.u.pdata);*/
+ } else {
+ memcpy(operation_out->u.string.u.cdata,
+ string->data,
+ string->length);
+ /*printf("internal string \"%.*s\"\n",
+ operation_out->u.string.length,
+ operation_out->u.string.u.cdata);*/
+ }
+ }
+
+ if ((*operand_idx) > 1) {
+ printf("operator %s that takes %d operands passed %d\n",
+ operator_name(operation_out->operator), 1, *operand_idx);
+ }
+
+ /* free all operands */
+ while (index < (*operand_idx)) {
+ cos_free_object(*(operands + index));
+ index++;
+ }
+ *operand_idx = 0;
+
+ return NSPDFERROR_OK;
+}
+
+static nspdferror
+copy_array(struct cos_object **operands,
+ unsigned int *operand_idx,
+ struct content_operation *operation_out)
+{
+ unsigned int index = 0;
+
+ if ((*operand_idx) == 0) {
+ printf("operator %s that takes %d operands passed %d\n",
+ operator_name(operation_out->operator), 1, *operand_idx);
+ operation_out->u.array.length = 0;
+ return NSPDFERROR_OK;
+ }
+
+ /* process wanted operands */
+ if ((*operands)->type != COS_TYPE_ARRAY) {
+ printf("operand was not an array\n");
+ operation_out->u.array.length = 0;
+ } else {
+ operation_out->u.array.length = (*operands)->u.array->length;
+ /* steal the values from the array object */
+ operation_out->u.array.values = (*operands)->u.array->values;
+ (*operands)->u.array->alloc = 0;
+ (*operands)->u.array->length = 0;
+ }
+
+ if ((*operand_idx) > 1) {
+ printf("operator %s that takes %d operands passed %d\n",
+ operator_name(operation_out->operator), 1, *operand_idx);
+ }
+
+ /* free all operands */
+ while (index < (*operand_idx)) {
+ cos_free_object(*(operands + index));
+ index++;
+ }
+ *operand_idx = 0;
+
+ return NSPDFERROR_OK;
+}
+
+
+static nspdferror
+copy_name(struct cos_object **operands,
+ unsigned int *operand_idx,
+ struct content_operation *operation_out)
+{
+ unsigned int index = 0;
+
+ if ((*operand_idx) == 0) {
+ printf("operator %s that takes %d operands passed %d\n",
+ operator_name(operation_out->operator), 1, *operand_idx);
+ operation_out->u.name = NULL;
+ return NSPDFERROR_OK;
+ }
+
+ /* process wanted operands */
+ if ((*operands)->type != COS_TYPE_NAME) {
+ printf("operand was not a name\n");
+ operation_out->u.name = NULL;
+ } else {
+ /* steal the name from the name object */
+ operation_out->u.name = (*operands)->u.name;
+ (*operands)->u.name = NULL;
+ }
+
+ if ((*operand_idx) > 1) {
+ printf("operator %s that takes %d operands passed %d\n",
+ operator_name(operation_out->operator), 1, *operand_idx);
+ }
+
+ /* free all operands */
+ while (index < (*operand_idx)) {
+ cos_free_object(*(operands + index));
+ index++;
+ }
+ *operand_idx = 0;
+
+ return NSPDFERROR_OK;
+}
+
+static nspdferror
+copy_name_number(struct cos_object **operands,
+ unsigned int *operand_idx,
+ struct content_operation *operation_out)
+{
+ unsigned int index = 0;
+
+ if ((*operand_idx) == 0) {
+ printf("operator %s that takes %d operands passed %d\n",
+ operator_name(operation_out->operator), 2, *operand_idx);
+ operation_out->u.namenumber.name = NULL;
+ return NSPDFERROR_OK;
+ }
+
+ /* process wanted operands */
+ if ((*operands)->type != COS_TYPE_NAME) {
+ printf("operand was not a name\n");
+ operation_out->u.namenumber.name = NULL;
+ } else {
+ /* steal the name from the name object */
+ operation_out->u.namenumber.name = (*operands)->u.name;
+ (*operands)->u.name = NULL;
+
+ operation_out->u.namenumber.number = 0;
+ /* get the number */
+ if ((*operand_idx) > 1) {
+ nspdferror res;
+ res = cos_get_number(NULL,
+ *(operands + 1),
+ &operation_out->u.namenumber.number);
+ if (res != NSPDFERROR_OK) {
+ printf("operand 1 could not be set in operation (code %d)\n", res);
+ }
+ } else {
+ printf("operator %s that takes %d operands passed %d\n",
+ operator_name(operation_out->operator), 2, *operand_idx);
+ }
+ }
+
+ if ((*operand_idx) > 2) {
+ printf("operator %s that takes %d operands passed %d\n",
+ operator_name(operation_out->operator), 2, *operand_idx);
+ }
+
+ /* free all operands */
+ while (index < (*operand_idx)) {
+ cos_free_object(*(operands + index));
+ index++;
+ }
+ *operand_idx = 0;
+
+ return NSPDFERROR_OK;
+}
+
+/* exported interface documented in cos_content.h */
+nspdferror
+nspdf__cos_content_convert(enum content_operator operator,
+ struct cos_object **operands,
+ unsigned int *operand_idx,
+ struct content_operation *operation_out)
+{
+ nspdferror res;
+
+ operation_out->operator = operator;
+
+ switch (operator) {
+ case CONTENT_OP_b:
+ case CONTENT_OP_B:
+ case CONTENT_OP_b_:
+ case CONTENT_OP_B_:
+ case CONTENT_OP_BI:
+ case CONTENT_OP_BT:
+ case CONTENT_OP_BX:
+ case CONTENT_OP_EI:
+ case CONTENT_OP_EMC:
+ case CONTENT_OP_ET:
+ case CONTENT_OP_EX:
+ case CONTENT_OP_f:
+ case CONTENT_OP_F:
+ case CONTENT_OP_f_:
+ case CONTENT_OP_h:
+ case CONTENT_OP_ID:
+ case CONTENT_OP_n:
+ case CONTENT_OP_q:
+ case CONTENT_OP_Q:
+ case CONTENT_OP_s:
+ case CONTENT_OP_S:
+ case CONTENT_OP_T_:
+ case CONTENT_OP_W:
+ case CONTENT_OP_W_:
+ /* no operands */
+ res = copy_numbers(0, operands, operand_idx, operation_out);
+ break;
+
+ case CONTENT_OP_G:
+ case CONTENT_OP_g:
+ case CONTENT_OP_i:
+ case CONTENT_OP_M:
+ case CONTENT_OP_Tc:
+ case CONTENT_OP_TL:
+ case CONTENT_OP_Ts:
+ case CONTENT_OP_Tw:
+ case CONTENT_OP_Tz:
+ case CONTENT_OP_w:
+ /* one number */
+ res = copy_numbers(1, operands, operand_idx, operation_out);
+ break;
+
+ case CONTENT_OP_d0:
+ case CONTENT_OP_l:
+ case CONTENT_OP_m:
+ case CONTENT_OP_Td:
+ case CONTENT_OP_TD:
+ /* two numbers */
+ res = copy_numbers(2, operands, operand_idx, operation_out);
+ break;
+
+ case CONTENT_OP_RG:
+ case CONTENT_OP_rg:
+ /* three numbers */
+ res = copy_numbers(3, operands, operand_idx, operation_out);
+ break;
+
+ case CONTENT_OP_K:
+ case CONTENT_OP_k:
+ case CONTENT_OP_re:
+ case CONTENT_OP_v:
+ case CONTENT_OP_y:
+ /* four numbers */
+ res = copy_numbers(4, operands, operand_idx, operation_out);
+ break;
+
+ case CONTENT_OP_c:
+ case CONTENT_OP_cm:
+ case CONTENT_OP_d1:
+ case CONTENT_OP_Tm:
+ /* six numbers */
+ res = copy_numbers(6, operands, operand_idx, operation_out);
+ break;
+
+ case CONTENT_OP_Tj:
+ case CONTENT_OP__:
+ /* single string */
+ res = copy_string(operands, operand_idx, operation_out);
+ break;
+
+ case CONTENT_OP_TJ:
+ /* single array */
+ res = copy_array(operands, operand_idx, operation_out);
+ break;
+
+ case CONTENT_OP_Tf:
+ /* name and number */
+ res = copy_name_number(operands, operand_idx, operation_out);
+ break;
+
+ case CONTENT_OP_gs:
+ /* name */
+ res = copy_name(operands, operand_idx, operation_out);
+ break;
+
+ case CONTENT_OP_j:
+ case CONTENT_OP_J:
+ /* one integer */
+ res = copy_integers(1, operands, operand_idx, operation_out);
+ break;
+
+ case CONTENT_OP_BDC:
+ case CONTENT_OP_BMC:
+ case CONTENT_OP_CS:
+ case CONTENT_OP_cs:
+ case CONTENT_OP_d:
+ case CONTENT_OP_Do:
+ case CONTENT_OP_DP:
+ case CONTENT_OP_MP:
+ case CONTENT_OP_ri:
+ case CONTENT_OP_SC:
+ case CONTENT_OP_sc:
+ case CONTENT_OP_SCN:
+ case CONTENT_OP_scn:
+ case CONTENT_OP_sh:
+ case CONTENT_OP_Tr:
+ case CONTENT_OP___:
+ res = copy_numbers(0, operands, operand_idx, operation_out);
+ break;
+ }
+
+ return res;
+}
diff --git a/src/cos_content.h b/src/cos_content.h
new file mode 100644
index 0000000..9921e2b
--- /dev/null
+++ b/src/cos_content.h
@@ -0,0 +1,232 @@
+/*
+ * Copyright 2018 Vincent Sanders <vince(a)netsurf-browser.org>
+ *
+ * This file is part of libnspdf.
+ *
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ */
+
+/**
+ * \file
+ * NetSurf PDF library parsed content stream
+ */
+
+#ifndef NSPDF__COS_CONTENT_H_
+#define NSPDF__COS_CONTENT_H_
+
+/**
+ * content operator
+ *
+ * parameters types are listed as:
+ * tag -
+ * prp - properties
+ * num - floating point value
+ */
+enum content_operator {
+ /**
+ * close, fill and stroke path with nonzero winding rule.
+ * b()
+ */
+ CONTENT_OP_b,
+
+ /**
+ * fill and stroke path using nonzero winding rule.
+ * B()
+ */
+ CONTENT_OP_B,
+
+ /**
+ * close, fill and stroke path with even/odd rule
+ * b*()
+ */
+ CONTENT_OP_b_,
+
+ /**
+ * fill and stroke path with even/odd rule
+ * B*()
+ */
+ CONTENT_OP_B_,
+
+ /**
+ * tag prp BDC
+ * begin marked content sequence with property list
+ */
+ CONTENT_OP_BDC,
+
+ /**
+ * BI()
+ * begin inline image
+ */
+ CONTENT_OP_BI,
+
+ /**
+ * tag BMC
+ * begin marked content sequence
+ */
+ CONTENT_OP_BMC,
+
+ /**
+ * begin text
+ * BT()
+ */
+ CONTENT_OP_BT,
+
+ /**
+ * begin compatability
+ * BX()
+ */
+ CONTENT_OP_BX,
+
+ /**
+ * append curved segment to path
+ * c(num x1, num y1, num x2, num y2, num x3, num y3)
+ */
+ CONTENT_OP_c,
+ CONTENT_OP_cm, /* a b c d e f cm - concatinate matrix to current trasnsform matrix */
+ CONTENT_OP_CS, /* name CS - set colour space for stroking operations */
+ CONTENT_OP_cs, /* name cs - set colourspace for non stroke operations */
+ CONTENT_OP_d, /* array phase d - set line dash pattern */
+ CONTENT_OP_d0, /* wx wy d0 - set glyph width in type 3 font */
+ CONTENT_OP_d1, /* wx wy llx lly urx ury d1 - set glyph width and bounding box in type 3 font */
+ CONTENT_OP_Do, /* name Do - invoke named xobject */
+ CONTENT_OP_DP, /* tag prp DP - define marked content point with property list */
+ CONTENT_OP_EI, /* EI - end of inline image */
+ CONTENT_OP_EMC, /* EMC - end marked content sequence */
+ CONTENT_OP_ET, /* ET - end text object */
+ CONTENT_OP_EX, /* EX - end compatability section */
+ CONTENT_OP_f, /* f - fill path using nonzero winding rule */
+ CONTENT_OP_F, /* F - fill path using nonzero winding rule */
+ CONTENT_OP_f_, /* f* - fill path with even/odd rule */
+
+ /**
+ * set gray level for stroking operations
+ * G(num gray)
+ */
+ CONTENT_OP_G,
+
+ /**
+ * set gray level for nonstroking operations
+ * g(num gray)
+ */
+ CONTENT_OP_g,
+ CONTENT_OP_gs, /* dictName gs - set parameters from graphics state directory */
+
+ /**
+ * close subpath
+ * h()
+ */
+ CONTENT_OP_h,
+
+ /**
+ * set flatness tolerance
+ * i(num flatness)
+ */
+ CONTENT_OP_i,
+ CONTENT_OP_ID, /* ID - begin inline image data */
+
+ /**
+ * set line join style (0, 1 or 2)
+ * j(int linejoin)
+ */
+ CONTENT_OP_j,
+ CONTENT_OP_J, /* linecap J - sel line cap style (int 0, 1 or 2) */
+ CONTENT_OP_K, /* c m y k K - set cmyk colour for stroking operations */
+ CONTENT_OP_k, /* c m y k k - set cmyk colour for nonstroking operations */
+ CONTENT_OP_l, /* x y l - append straight line segment to path */
+ CONTENT_OP_m, /* x y m - begin new subpath */
+
+ /**
+ * set mitre limit
+ * M(num mitrelimit)
+ */
+ CONTENT_OP_M,
+ CONTENT_OP_MP, /* tag MP - define marked content point */
+ CONTENT_OP_n, /* n - end path without filling or stroking*/
+ CONTENT_OP_q, /* q - save graphics state */
+ CONTENT_OP_Q, /* Q - restore graphics state */
+ CONTENT_OP_re, /* x y w h re - append rectangle to path */
+ CONTENT_OP_RG, /* r g b RG - stroke colour in DeviceRGB colourspace */
+ CONTENT_OP_rg, /* r g b rg - nonstroke colour in DeviceRGB colourspace */
+ CONTENT_OP_ri, /* intent ri - set color rendering intent */
+ CONTENT_OP_s, /* s - close and stroke path */
+ CONTENT_OP_S, /* S - stroke path */
+ CONTENT_OP_SC, /* c1 c... SC - set colour for stroking operation. 1 3 or 4 params */
+ CONTENT_OP_sc, /* c1 c... sc - same as SC for nonstroking operations */
+ CONTENT_OP_SCN, /* c1 c... name SCN - same as SC but extra colour spaces. max 32 params */
+ CONTENT_OP_scn, /* c1 c... name scn - same as SCN for nonstroking operations */
+ CONTENT_OP_sh, /* name sh - paint area defined by shading pattern */
+ CONTENT_OP_T_, /* T* - move to start of next text line */
+ CONTENT_OP_Tc, /* charspace Tc - set character spacing */
+ CONTENT_OP_Td, /* tx ty Td - move text position */
+ CONTENT_OP_TD, /* tx ty TD - move text position and set leading */
+ CONTENT_OP_Tf, /* font size Tf - select text font and size */
+ CONTENT_OP_Tj, /* string Tj - show text */
+ CONTENT_OP_TJ, /* array TJ - show text strings allowing individual positioning */
+ CONTENT_OP_TL, /* leading TL - set text leading for T* ' " operators */
+ CONTENT_OP_Tm, /* a b c d e f Tm - set the text matrix */
+ CONTENT_OP_Tr, /* render Tr - set rendering mode (int) */
+ CONTENT_OP_Ts, /* rise Ts - set text rise */
+ CONTENT_OP_Tw, /* wordspace Tw - set word spacing */
+ CONTENT_OP_Tz, /* scale Tz - set horizontal scaling */
+ CONTENT_OP_v, /* x2 y2 x3 y3 v - append curved segment path */
+
+ /**
+ * set line width
+ * w(num linewidth)
+ */
+ CONTENT_OP_w,
+ CONTENT_OP_W, /* W - set clipping path using nonzero winding rule */
+ CONTENT_OP_W_, /* W* - set clipping path using odd even rule */
+ CONTENT_OP_y, /* x1 y1 x3 y3 y - append curved segment to path */
+ CONTENT_OP__, /* string ' - move to next line and show text */
+ CONTENT_OP___, /* aw ac string " - set word and char spacing, move to next line and show text */
+};
+
+/* six numbers is adequate for almost all operations */
+#define content_number_size (6)
+
+/* compute how long the embedded string can be without inflating the
+ * structure. size of the pointer is used instead of unsigned int as that is
+ * what will control the structure padding.
+ */
+#define content_string_intrnl_lngth ((sizeof(float) * content_number_size) - sizeof(uint8_t *))
+
+
+struct content_operation
+{
+ enum content_operator operator;
+ union {
+ float number[content_number_size];
+
+ char *name;
+
+ int64_t i[3];
+
+ struct {
+ unsigned int length;
+ union {
+ char cdata[content_string_intrnl_lngth];
+ uint8_t *pdata;
+ } u;
+ } string;
+
+ struct {
+ unsigned int length;
+ struct cos_object **values;
+ } array;
+
+ struct {
+ char *name;
+ float number;
+ } namenumber;
+ } u;
+};
+
+/**
+ * convert an operator and operand list into an operation
+ */
+nspdferror nspdf__cos_content_convert(enum content_operator operator, struct cos_object **operands, unsigned int *operand_idx, struct content_operation *operation_out);
+
+
+#endif
diff --git a/src/cos_parse.c b/src/cos_parse.c
index a1587d5..dea67f9 100644
--- a/src/cos_parse.c
+++ b/src/cos_parse.c
@@ -19,7 +19,7 @@
#include "cos_parse.h"
#include "byte_class.h"
#include "cos_object.h"
-#include "content.h"
+#include "cos_content.h"
#include "pdf_doc.h"
/** increments in which cos string allocations are extended */
@@ -1246,367 +1246,9 @@ parse_operator(struct cos_stream *stream,
}
-static const char*operator_name(enum content_operator operator)
-{
- switch(operator) {
- case CONTENT_OP_b: return "b";
- case CONTENT_OP_B: return "B";
- case CONTENT_OP_b_: return "b*";
- case CONTENT_OP_B_: return "B*";
- case CONTENT_OP_BDC: return "BDC";
- case CONTENT_OP_BI: return "BI";
- case CONTENT_OP_BMC: return "BMC";
- case CONTENT_OP_BT: return "BT";
- case CONTENT_OP_BX: return "BX";
- case CONTENT_OP_c: return "c";
- case CONTENT_OP_cm: return "cm";
- case CONTENT_OP_CS: return "CS";
- case CONTENT_OP_cs: return "cs";
- case CONTENT_OP_d: return "d";
- case CONTENT_OP_d0: return "d0";
- case CONTENT_OP_d1: return "d1";
- case CONTENT_OP_Do: return "Do";
- case CONTENT_OP_DP: return "DP";
- case CONTENT_OP_EI: return "EI";
- case CONTENT_OP_EMC: return "EMC";
- case CONTENT_OP_ET: return "ET";
- case CONTENT_OP_EX: return "EX";
- case CONTENT_OP_f: return "f";
- case CONTENT_OP_F: return "F";
- case CONTENT_OP_f_: return "f*";
- case CONTENT_OP_G: return "G";
- case CONTENT_OP_g: return "g";
- case CONTENT_OP_gs: return "gs";
- case CONTENT_OP_h: return "h";
- case CONTENT_OP_i: return "i";
- case CONTENT_OP_ID: return "ID";
- case CONTENT_OP_j: return "j";
- case CONTENT_OP_J: return "J";
- case CONTENT_OP_K: return "K";
- case CONTENT_OP_k: return "k";
- case CONTENT_OP_l: return "l";
- case CONTENT_OP_m: return "m";
- case CONTENT_OP_M: return "M";
- case CONTENT_OP_MP: return "MP";
- case CONTENT_OP_n: return "n";
- case CONTENT_OP_q: return "q";
- case CONTENT_OP_Q: return "Q";
- case CONTENT_OP_re: return "re";
- case CONTENT_OP_RG: return "RG";
- case CONTENT_OP_rg: return "rg";
- case CONTENT_OP_ri: return "ri";
- case CONTENT_OP_s: return "s";
- case CONTENT_OP_S: return "S";
- case CONTENT_OP_SC: return "SC";
- case CONTENT_OP_sc: return "sc";
- case CONTENT_OP_SCN: return "SCN";
- case CONTENT_OP_scn: return "scn";
- case CONTENT_OP_sh: return "sh";
- case CONTENT_OP_T_: return "T*";
- case CONTENT_OP_Tc: return "Tc";
- case CONTENT_OP_Td: return "Td";
- case CONTENT_OP_TD: return "TD";
- case CONTENT_OP_Tf: return "Tf";
- case CONTENT_OP_Tj: return "Tj";
- case CONTENT_OP_TJ: return "TJ";
- case CONTENT_OP_TL: return "TL";
- case CONTENT_OP_Tm: return "Tm";
- case CONTENT_OP_Tr: return "Tr";
- case CONTENT_OP_Ts: return "Ts";
- case CONTENT_OP_Tw: return "Tw";
- case CONTENT_OP_Tz: return "Tz";
- case CONTENT_OP_v: return "v";
- case CONTENT_OP_w: return "w";
- case CONTENT_OP_W: return "W";
- case CONTENT_OP_W_: return "W_";
- case CONTENT_OP_y: return "y";
- case CONTENT_OP__: return "\'";
- case CONTENT_OP___: return "\"";
- }
- return "????";
-}
-
/**
- * move number operands from list into operation
- *
- * This ensures all operands are correctly handled not just the wanted ones
- *
- * \param wanted The number of wanted operands to place in the operation
- * \param operands The array of operands from the parse
- * \param operand_idx The number of operands from the parse
- * \param operation_out The operation to place numbers in
- */
-static nspdferror
-copy_numbers(unsigned int wanted,
- struct cos_object **operands,
- unsigned int *operand_idx,
- struct content_operation *operation_out)
-{
- nspdferror res;
- unsigned int index = 0;
-
- while ((index < (*operand_idx)) &&
- (index < wanted)) {
- /* process wanted operands */
- res = cos_get_number(NULL,
- *(operands + index),
- &operation_out->u.number[index]);
- if (res != NSPDFERROR_OK) {
- printf("operand %d could not be set in operation (code %d)\n",
- index, res);
- }
- cos_free_object(*(operands + index));
- index++;
- }
- if ((*operand_idx) > index) {
- printf("operator %s that takes %d operands passed %d\n",
- operator_name(operation_out->operator), wanted, *operand_idx);
- while (index < (*operand_idx)) {
- cos_free_object(*(operands + index));
- index++;
- }
- } else if ((*operand_idx) < index) {
- printf("operator %s that takes %d operands passed %d\n",
- operator_name(operation_out->operator), wanted, *operand_idx);
- }
-
- *operand_idx = 0; /* all operands freed */
-
- return NSPDFERROR_OK;
-}
-
-static nspdferror
-copy_integers(unsigned int wanted,
- struct cos_object **operands,
- unsigned int *operand_idx,
- struct content_operation *operation_out)
-{
- nspdferror res;
- unsigned int index = 0;
-
- while ((index < (*operand_idx)) &&
- (index < wanted)) {
- /* process wanted operands */
- res = cos_get_int(NULL,
- *(operands + index),
- &operation_out->u.i[index]);
- if (res != NSPDFERROR_OK) {
- printf("operand %d could not be set in operation (code %d)\n",
- index, res);
- }
- cos_free_object(*(operands + index));
- index++;
- }
- if ((*operand_idx) > index) {
- printf("operator %s that takes %d operands passed %d\n",
- operator_name(operation_out->operator), wanted, *operand_idx);
- while (index < (*operand_idx)) {
- cos_free_object(*(operands + index));
- index++;
- }
- } else if ((*operand_idx) < index) {
- printf("operator %s that takes %d operands passed %d\n",
- operator_name(operation_out->operator), wanted, *operand_idx);
- }
-
- *operand_idx = 0; /* all operands freed */
-
- return NSPDFERROR_OK;
-}
-
-static nspdferror
-copy_string(struct cos_object **operands,
- unsigned int *operand_idx,
- struct content_operation *operation_out)
-{
- nspdferror res;
- unsigned int index = 0;
- struct cos_string *string;
-
- if ((*operand_idx) == 0) {
- printf("operator %s that takes %d operands passed %d\n",
- operator_name(operation_out->operator), 1, *operand_idx);
- operation_out->u.string.length = 0;
- return NSPDFERROR_OK;
- }
-
- /* process wanted operands */
- res = cos_get_string(NULL, *operands, &string);
- if (res != NSPDFERROR_OK) {
- printf("string could not be set in operation (code %d)\n", res);
- operation_out->u.string.length = 0;
- } else {
- operation_out->u.string.length = string->length;
- if (string->length > content_string_intrnl_lngth) {
- /* steal the string from the object */
- operation_out->u.string.u.pdata = string->data;
- string->alloc = 0;
- string->length = 0;
- /*printf("external string \"%.*s\"\n",
- operation_out->u.string.length,
- operation_out->u.string.u.pdata);*/
- } else {
- memcpy(operation_out->u.string.u.cdata,
- string->data,
- string->length);
- /*printf("internal string \"%.*s\"\n",
- operation_out->u.string.length,
- operation_out->u.string.u.cdata);*/
- }
- }
-
- if ((*operand_idx) > 1) {
- printf("operator %s that takes %d operands passed %d\n",
- operator_name(operation_out->operator), 1, *operand_idx);
- }
-
- /* free all operands */
- while (index < (*operand_idx)) {
- cos_free_object(*(operands + index));
- index++;
- }
- *operand_idx = 0;
-
- return NSPDFERROR_OK;
-}
-
-static nspdferror
-copy_array(struct cos_object **operands,
- unsigned int *operand_idx,
- struct content_operation *operation_out)
-{
- unsigned int index = 0;
-
- if ((*operand_idx) == 0) {
- printf("operator %s that takes %d operands passed %d\n",
- operator_name(operation_out->operator), 1, *operand_idx);
- operation_out->u.array.length = 0;
- return NSPDFERROR_OK;
- }
-
- /* process wanted operands */
- if ((*operands)->type != COS_TYPE_ARRAY) {
- printf("operand was not an array\n");
- operation_out->u.array.length = 0;
- } else {
- operation_out->u.array.length = (*operands)->u.array->length;
- /* steal the values from the array object */
- operation_out->u.array.values = (*operands)->u.array->values;
- (*operands)->u.array->alloc = 0;
- (*operands)->u.array->length = 0;
- }
-
- if ((*operand_idx) > 1) {
- printf("operator %s that takes %d operands passed %d\n",
- operator_name(operation_out->operator), 1, *operand_idx);
- }
-
- /* free all operands */
- while (index < (*operand_idx)) {
- cos_free_object(*(operands + index));
- index++;
- }
- *operand_idx = 0;
-
- return NSPDFERROR_OK;
-}
-
-
-static nspdferror
-copy_name(struct cos_object **operands,
- unsigned int *operand_idx,
- struct content_operation *operation_out)
-{
- unsigned int index = 0;
-
- if ((*operand_idx) == 0) {
- printf("operator %s that takes %d operands passed %d\n",
- operator_name(operation_out->operator), 1, *operand_idx);
- operation_out->u.name = NULL;
- return NSPDFERROR_OK;
- }
-
- /* process wanted operands */
- if ((*operands)->type != COS_TYPE_NAME) {
- printf("operand was not a name\n");
- operation_out->u.name = NULL;
- } else {
- /* steal the name from the name object */
- operation_out->u.name = (*operands)->u.name;
- (*operands)->u.name = NULL;
- }
-
- if ((*operand_idx) > 1) {
- printf("operator %s that takes %d operands passed %d\n",
- operator_name(operation_out->operator), 1, *operand_idx);
- }
-
- /* free all operands */
- while (index < (*operand_idx)) {
- cos_free_object(*(operands + index));
- index++;
- }
- *operand_idx = 0;
-
- return NSPDFERROR_OK;
-}
-
-static nspdferror
-copy_name_number(struct cos_object **operands,
- unsigned int *operand_idx,
- struct content_operation *operation_out)
-{
- unsigned int index = 0;
-
- if ((*operand_idx) == 0) {
- printf("operator %s that takes %d operands passed %d\n",
- operator_name(operation_out->operator), 2, *operand_idx);
- operation_out->u.namenumber.name = NULL;
- return NSPDFERROR_OK;
- }
-
- /* process wanted operands */
- if ((*operands)->type != COS_TYPE_NAME) {
- printf("operand was not a name\n");
- operation_out->u.namenumber.name = NULL;
- } else {
- /* steal the name from the name object */
- operation_out->u.namenumber.name = (*operands)->u.name;
- (*operands)->u.name = NULL;
-
- operation_out->u.namenumber.number = 0;
- /* get the number */
- if ((*operand_idx) > 1) {
- nspdferror res;
- res = cos_get_number(NULL,
- *(operands + 1),
- &operation_out->u.namenumber.number);
- if (res != NSPDFERROR_OK) {
- printf("operand 1 could not be set in operation (code %d)\n", res);
- }
- } else {
- printf("operator %s that takes %d operands passed %d\n",
- operator_name(operation_out->operator), 2, *operand_idx);
- }
- }
-
- if ((*operand_idx) > 2) {
- printf("operator %s that takes %d operands passed %d\n",
- operator_name(operation_out->operator), 2, *operand_idx);
- }
-
- /* free all operands */
- while (index < (*operand_idx)) {
- cos_free_object(*(operands + index));
- index++;
- }
- *operand_idx = 0;
-
- return NSPDFERROR_OK;
-}
-
-/** largest number of operands any operator requires
+ * largest number of operands any operator requires
*
* This would be 6 except scn in Nchannel colourspace may have 32
*/
@@ -1707,7 +1349,6 @@ parse_content_operation(struct nspdf_doc *doc,
res = parse_operator(stream, &offset, &operator);
}
-
/*
printf("returning operator %d with %d operands %d to %d of %d\n>>>%.*s<<<\n",
operator,
@@ -1719,130 +1360,10 @@ parse_content_operation(struct nspdf_doc *doc,
stream->data + (*offset_out));
*/
- operation_out->operator = operator;
-
- switch (operator) {
- case CONTENT_OP_b:
- case CONTENT_OP_B:
- case CONTENT_OP_b_:
- case CONTENT_OP_B_:
- case CONTENT_OP_BI:
- case CONTENT_OP_BT:
- case CONTENT_OP_BX:
- case CONTENT_OP_EI:
- case CONTENT_OP_EMC:
- case CONTENT_OP_ET:
- case CONTENT_OP_EX:
- case CONTENT_OP_f:
- case CONTENT_OP_F:
- case CONTENT_OP_f_:
- case CONTENT_OP_h:
- case CONTENT_OP_ID:
- case CONTENT_OP_n:
- case CONTENT_OP_q:
- case CONTENT_OP_Q:
- case CONTENT_OP_s:
- case CONTENT_OP_S:
- case CONTENT_OP_T_:
- case CONTENT_OP_W:
- case CONTENT_OP_W_:
- /* no operands */
- res = copy_numbers(0, operands, operand_idx, operation_out);
- break;
-
- case CONTENT_OP_G:
- case CONTENT_OP_g:
- case CONTENT_OP_i:
- case CONTENT_OP_M:
- case CONTENT_OP_Tc:
- case CONTENT_OP_TL:
- case CONTENT_OP_Ts:
- case CONTENT_OP_Tw:
- case CONTENT_OP_Tz:
- case CONTENT_OP_w:
- /* one number */
- res = copy_numbers(1, operands, operand_idx, operation_out);
- break;
-
- case CONTENT_OP_d0:
- case CONTENT_OP_l:
- case CONTENT_OP_m:
- case CONTENT_OP_Td:
- case CONTENT_OP_TD:
- /* two numbers */
- res = copy_numbers(2, operands, operand_idx, operation_out);
- break;
-
- case CONTENT_OP_RG:
- case CONTENT_OP_rg:
- /* three numbers */
- res = copy_numbers(3, operands, operand_idx, operation_out);
- break;
-
- case CONTENT_OP_K:
- case CONTENT_OP_k:
- case CONTENT_OP_re:
- case CONTENT_OP_v:
- case CONTENT_OP_y:
- /* four numbers */
- res = copy_numbers(4, operands, operand_idx, operation_out);
- break;
-
- case CONTENT_OP_c:
- case CONTENT_OP_cm:
- case CONTENT_OP_d1:
- case CONTENT_OP_Tm:
- /* six numbers */
- res = copy_numbers(6, operands, operand_idx, operation_out);
- break;
-
- case CONTENT_OP_Tj:
- case CONTENT_OP__:
- /* single string */
- res = copy_string(operands, operand_idx, operation_out);
- break;
-
- case CONTENT_OP_TJ:
- /* single array */
- res = copy_array(operands, operand_idx, operation_out);
- break;
-
- case CONTENT_OP_Tf:
- /* name and number */
- res = copy_name_number(operands, operand_idx, operation_out);
- break;
-
- case CONTENT_OP_gs:
- /* name */
- res = copy_name(operands, operand_idx, operation_out);
- break;
-
- case CONTENT_OP_j:
- case CONTENT_OP_J:
- /* one integer */
- res = copy_integers(1, operands, operand_idx, operation_out);
- break;
-
- case CONTENT_OP_BDC:
- case CONTENT_OP_BMC:
- case CONTENT_OP_CS:
- case CONTENT_OP_cs:
- case CONTENT_OP_d:
- case CONTENT_OP_Do:
- case CONTENT_OP_DP:
- case CONTENT_OP_MP:
- case CONTENT_OP_ri:
- case CONTENT_OP_SC:
- case CONTENT_OP_sc:
- case CONTENT_OP_SCN:
- case CONTENT_OP_scn:
- case CONTENT_OP_sh:
- case CONTENT_OP_Tr:
- case CONTENT_OP___:
- res = copy_numbers(0, operands, operand_idx, operation_out);
- break;
- }
-
+ res = nspdf__cos_content_convert(operator,
+ operands,
+ operand_idx,
+ operation_out);
if (res == NSPDFERROR_OK) {
*operand_idx = 0;
@@ -1870,15 +1391,24 @@ cos_parse_content_streams(struct nspdf_doc *doc,
//#define SHOW_STRUCT_SIZE
#ifdef SHOW_STRUCT_SIZE
struct content_operation foo;
- printf("content_operation length:%d\nfloat:%d\nunsigned int:%d\n"
- "union %d\n"
- " n:%d string:%d string.u:%d string.u.cdata:%d array:%d\n",
- sizeof(struct content_operation),
+ printf("float:%lu unsigned int:%lu\n"
+ "struct content_operation:%lu\n"
+ " operator:%lu\n"
+ " union:%lu\n"
+ " number:%lu\n"
+ " string:%lu\n"
+ " string.length:%lu\n"
+ " string.u:%lu\n"
+ " string.u.cdata:%lu\n"
+ " array:%lu\n",
sizeof(float),
sizeof(unsigned int),
+ sizeof(struct content_operation),
+ sizeof(foo.operator),
sizeof(foo.u),
- sizeof(foo.u.n),
+ sizeof(foo.u.number),
sizeof(foo.u.string),
+ sizeof(foo.u.string.length),
sizeof(foo.u.string.u),
sizeof(foo.u.string.u.cdata),
sizeof(foo.u.array));
commitdiff http://git.netsurf-browser.org/libnspdf.git/commit/?id=06b733c6296bb7bf87...
commit 06b733c6296bb7bf8724a599c48d42655a190f74
Author: Vincent Sanders <vince(a)kyllikki.org>
Commit: Vincent Sanders <vince(a)kyllikki.org>
add parameter parsing for content objects
diff --git a/src/content.h b/src/content.h
index 11f4c3d..af91b13 100644
--- a/src/content.h
+++ b/src/content.h
@@ -15,88 +15,212 @@
#ifndef NSPDF__CONTENT_H_
#define NSPDF__CONTENT_H_
+/**
+ * content operator
+ *
+ * parameters types are listed as:
+ * tag -
+ * prp - properties
+ * num - floating point value
+ */
enum content_operator {
- CONTENT_OP_b, /* b - close, fill and stroke path with nonzero winding
- * rule. */
- CONTENT_OP_B, /* B - fill and stroke path using nonzero winding rule */
- CONTENT_OP_b_, /* b* - close, fill and stroke path with even/odd rule */
- CONTENT_OP_B_, /* B* - fill and stroke path with even/odd rule */
- CONTENT_OP_BDC, /* BDC - begin marked content sequence with property list */
- CONTENT_OP_BI, /* BI - begin inline image*/
- CONTENT_OP_BMC, /* BMC - begin marked content sequence */
- CONTENT_OP_BT, /* BT - begin text */
- CONTENT_OP_BX, /* BX - begin compatability */
- CONTENT_OP_c, /* c - append curved segment to path */
- CONTENT_OP_cm, /* cm - concatinate matrix to current trasnsform matrix */
- CONTENT_OP_CS, /* CS - set colour space for stroking operations */
- CONTENT_OP_cs, /* cs - set colourspace for non stroke operations */
- CONTENT_OP_d, /* d - set line dash pattern */
- CONTENT_OP_d0, /* d0 - set glyph width in type 3 font */
- CONTENT_OP_d1, /* d1 - set glyph width and bounding box in type 3 font */
- CONTENT_OP_Do, /* Do - invoke named xobject */
- CONTENT_OP_DP, /* DP - define marked content point with property list */
- CONTENT_OP_EI, /* EI - end of inline image */
- CONTENT_OP_EMC, /* EMC - end marked content sequence */
- CONTENT_OP_ET, /* ET - end text object */
- CONTENT_OP_EX, /* EX - end compatability section */
- CONTENT_OP_f, /* f - fill path using nonzero winding rule */
- CONTENT_OP_F, /* F - fill path using nonzero winding rule */
- CONTENT_OP_f_, /* f* - fill path with even/odd rule */
- CONTENT_OP_G, /* G - set gray level for stroking operations */
- CONTENT_OP_g, /* g - set gray level for nonstroking operations */
- CONTENT_OP_gs, /* gs - set parameters from graphics state directory */
- CONTENT_OP_h, /* h - close subpath */
- CONTENT_OP_i, /* i - set flatness tolerance */
- CONTENT_OP_ID, /* ID - begin inline image data */
- CONTENT_OP_j, /* j - set join style */
- CONTENT_OP_J, /* J - */
- CONTENT_OP_K, /* K - */
- CONTENT_OP_k, /* k - */
- CONTENT_OP_l, /* l - */
- CONTENT_OP_m, /* m - */
- CONTENT_OP_M, /* M - */
- CONTENT_OP_MP, /* MP - */
- CONTENT_OP_n, /* n - */
- CONTENT_OP_q, /* q - */
- CONTENT_OP_Q, /* Q - */
- CONTENT_OP_re, /* re - */
- CONTENT_OP_RG, /* RG - */
- CONTENT_OP_rg, /* rg - */
- CONTENT_OP_ri, /* ri - */
- CONTENT_OP_s, /* s - */
- CONTENT_OP_S, /* S - */
- CONTENT_OP_SC, /* SC - */
- CONTENT_OP_sc, /* sc - */
- CONTENT_OP_SCN, /* SCN - */
- CONTENT_OP_scn, /* scn - */
- CONTENT_OP_sh, /* sh - */
- CONTENT_OP_T_, /* T* - */
- CONTENT_OP_Tc, /* Tc - */
- CONTENT_OP_Td, /* Td - */
- CONTENT_OP_TD, /* TD - */
- CONTENT_OP_Tf, /* Tf - */
- CONTENT_OP_Tj, /* Tj - */
- CONTENT_OP_TJ, /* TJ - */
- CONTENT_OP_TL, /* TL - */
- CONTENT_OP_Tm, /* Tm - */
- CONTENT_OP_Tr, /* Tr - */
- CONTENT_OP_Ts, /* Ts - */
- CONTENT_OP_Tw, /* Tw - */
- CONTENT_OP_Tz, /* Tz - */
- CONTENT_OP_v, /* v - */
- CONTENT_OP_w, /* w - */
- CONTENT_OP_W, /* W - */
- CONTENT_OP_W_, /* W* - */
- CONTENT_OP_y, /* y - append curved segment to path */
- CONTENT_OP__, /* ' - move to next line and show text */
- CONTENT_OP___, /* " - set word and char spacing, move to next line and
- * show text */
+ /**
+ * close, fill and stroke path with nonzero winding rule.
+ * b()
+ */
+ CONTENT_OP_b,
+
+ /**
+ * fill and stroke path using nonzero winding rule.
+ * B()
+ */
+ CONTENT_OP_B,
+
+ /**
+ * close, fill and stroke path with even/odd rule
+ * b*()
+ */
+ CONTENT_OP_b_,
+
+ /**
+ * fill and stroke path with even/odd rule
+ * B*()
+ */
+ CONTENT_OP_B_,
+
+ /**
+ * tag prp BDC
+ * begin marked content sequence with property list
+ */
+ CONTENT_OP_BDC,
+
+ /**
+ * BI()
+ * begin inline image
+ */
+ CONTENT_OP_BI,
+
+ /**
+ * tag BMC
+ * begin marked content sequence
+ */
+ CONTENT_OP_BMC,
+
+ /**
+ * begin text
+ * BT()
+ */
+ CONTENT_OP_BT,
+
+ /**
+ * begin compatability
+ * BX()
+ */
+ CONTENT_OP_BX,
+
+ /**
+ * append curved segment to path
+ * c(num x1, num y1, num x2, num y2, num x3, num y3)
+ */
+ CONTENT_OP_c,
+ CONTENT_OP_cm, /* a b c d e f cm - concatinate matrix to current trasnsform matrix */
+ CONTENT_OP_CS, /* name CS - set colour space for stroking operations */
+ CONTENT_OP_cs, /* name cs - set colourspace for non stroke operations */
+ CONTENT_OP_d, /* array phase d - set line dash pattern */
+ CONTENT_OP_d0, /* wx wy d0 - set glyph width in type 3 font */
+ CONTENT_OP_d1, /* wx wy llx lly urx ury d1 - set glyph width and bounding box in type 3 font */
+ CONTENT_OP_Do, /* name Do - invoke named xobject */
+ CONTENT_OP_DP, /* tag prp DP - define marked content point with property list */
+ CONTENT_OP_EI, /* EI - end of inline image */
+ CONTENT_OP_EMC, /* EMC - end marked content sequence */
+ CONTENT_OP_ET, /* ET - end text object */
+ CONTENT_OP_EX, /* EX - end compatability section */
+ CONTENT_OP_f, /* f - fill path using nonzero winding rule */
+ CONTENT_OP_F, /* F - fill path using nonzero winding rule */
+ CONTENT_OP_f_, /* f* - fill path with even/odd rule */
+
+ /**
+ * set gray level for stroking operations
+ * G(num gray)
+ */
+ CONTENT_OP_G,
+
+ /**
+ * set gray level for nonstroking operations
+ * g(num gray)
+ */
+ CONTENT_OP_g,
+ CONTENT_OP_gs, /* dictName gs - set parameters from graphics state directory */
+
+ /**
+ * close subpath
+ * h()
+ */
+ CONTENT_OP_h,
+
+ /**
+ * set flatness tolerance
+ * i(num flatness)
+ */
+ CONTENT_OP_i,
+ CONTENT_OP_ID, /* ID - begin inline image data */
+
+ /**
+ * set line join style (0, 1 or 2)
+ * j(int linejoin)
+ */
+ CONTENT_OP_j,
+ CONTENT_OP_J, /* linecap J - sel line cap style (int 0, 1 or 2) */
+ CONTENT_OP_K, /* c m y k K - set cmyk colour for stroking operations */
+ CONTENT_OP_k, /* c m y k k - set cmyk colour for nonstroking operations */
+ CONTENT_OP_l, /* x y l - append straight line segment to path */
+ CONTENT_OP_m, /* x y m - begin new subpath */
+
+ /**
+ * set mitre limit
+ * M(num mitrelimit)
+ */
+ CONTENT_OP_M,
+ CONTENT_OP_MP, /* tag MP - define marked content point */
+ CONTENT_OP_n, /* n - end path without filling or stroking*/
+ CONTENT_OP_q, /* q - save graphics state */
+ CONTENT_OP_Q, /* Q - restore graphics state */
+ CONTENT_OP_re, /* x y w h re - append rectangle to path */
+ CONTENT_OP_RG, /* r g b RG - stroke colour in DeviceRGB colourspace */
+ CONTENT_OP_rg, /* r g b rg - nonstroke colour in DeviceRGB colourspace */
+ CONTENT_OP_ri, /* intent ri - set color rendering intent */
+ CONTENT_OP_s, /* s - close and stroke path */
+ CONTENT_OP_S, /* S - stroke path */
+ CONTENT_OP_SC, /* c1 c... SC - set colour for stroking operation. 1 3 or 4 params */
+ CONTENT_OP_sc, /* c1 c... sc - same as SC for nonstroking operations */
+ CONTENT_OP_SCN, /* c1 c... name SCN - same as SC but extra colour spaces. max 32 params */
+ CONTENT_OP_scn, /* c1 c... name scn - same as SCN for nonstroking operations */
+ CONTENT_OP_sh, /* name sh - paint area defined by shading pattern */
+ CONTENT_OP_T_, /* T* - move to start of next text line */
+ CONTENT_OP_Tc, /* charspace Tc - set character spacing */
+ CONTENT_OP_Td, /* tx ty Td - move text position */
+ CONTENT_OP_TD, /* tx ty TD - move text position and set leading */
+ CONTENT_OP_Tf, /* font size Tf - select text font and size */
+ CONTENT_OP_Tj, /* string Tj - show text */
+ CONTENT_OP_TJ, /* array TJ - show text strings allowing individual positioning */
+ CONTENT_OP_TL, /* leading TL - set text leading for T* ' " operators */
+ CONTENT_OP_Tm, /* a b c d e f Tm - set the text matrix */
+ CONTENT_OP_Tr, /* render Tr - set rendering mode (int) */
+ CONTENT_OP_Ts, /* rise Ts - set text rise */
+ CONTENT_OP_Tw, /* wordspace Tw - set word spacing */
+ CONTENT_OP_Tz, /* scale Tz - set horizontal scaling */
+ CONTENT_OP_v, /* x2 y2 x3 y3 v - append curved segment path */
+
+ /**
+ * set line width
+ * w(num linewidth)
+ */
+ CONTENT_OP_w,
+ CONTENT_OP_W, /* W - set clipping path using nonzero winding rule */
+ CONTENT_OP_W_, /* W* - set clipping path using odd even rule */
+ CONTENT_OP_y, /* x1 y1 x3 y3 y - append curved segment to path */
+ CONTENT_OP__, /* string ' - move to next line and show text */
+ CONTENT_OP___, /* aw ac string " - set word and char spacing, move to next line and show text */
};
+/* six numbers is adequate for almost all operations */
+#define content_number_size (6)
+
+/* compute how long the embedded string can be without inflating the
+ * structure. size of the pointer is used instead of unsigned int as that is
+ * what will control the structure padding.
+ */
+#define content_string_intrnl_lngth ((sizeof(float) * content_number_size) - sizeof(uint8_t *))
+
+
struct content_operation
{
enum content_operator operator;
-
+ union {
+ float number[content_number_size];
+
+ char *name;
+
+ int64_t i[3];
+
+ struct {
+ unsigned int length;
+ union {
+ char cdata[content_string_intrnl_lngth];
+ uint8_t *pdata;
+ } u;
+ } string;
+
+ struct {
+ unsigned int length;
+ struct cos_object **values;
+ } array;
+
+ struct {
+ char *name;
+ float number;
+ } namenumber;
+ } u;
};
#endif
diff --git a/src/cos_parse.c b/src/cos_parse.c
index 1881e6a..a1587d5 100644
--- a/src/cos_parse.c
+++ b/src/cos_parse.c
@@ -1245,6 +1245,371 @@ parse_operator(struct cos_stream *stream,
return NSPDFERROR_OK;
}
+
+static const char*operator_name(enum content_operator operator)
+{
+ switch(operator) {
+ case CONTENT_OP_b: return "b";
+ case CONTENT_OP_B: return "B";
+ case CONTENT_OP_b_: return "b*";
+ case CONTENT_OP_B_: return "B*";
+ case CONTENT_OP_BDC: return "BDC";
+ case CONTENT_OP_BI: return "BI";
+ case CONTENT_OP_BMC: return "BMC";
+ case CONTENT_OP_BT: return "BT";
+ case CONTENT_OP_BX: return "BX";
+ case CONTENT_OP_c: return "c";
+ case CONTENT_OP_cm: return "cm";
+ case CONTENT_OP_CS: return "CS";
+ case CONTENT_OP_cs: return "cs";
+ case CONTENT_OP_d: return "d";
+ case CONTENT_OP_d0: return "d0";
+ case CONTENT_OP_d1: return "d1";
+ case CONTENT_OP_Do: return "Do";
+ case CONTENT_OP_DP: return "DP";
+ case CONTENT_OP_EI: return "EI";
+ case CONTENT_OP_EMC: return "EMC";
+ case CONTENT_OP_ET: return "ET";
+ case CONTENT_OP_EX: return "EX";
+ case CONTENT_OP_f: return "f";
+ case CONTENT_OP_F: return "F";
+ case CONTENT_OP_f_: return "f*";
+ case CONTENT_OP_G: return "G";
+ case CONTENT_OP_g: return "g";
+ case CONTENT_OP_gs: return "gs";
+ case CONTENT_OP_h: return "h";
+ case CONTENT_OP_i: return "i";
+ case CONTENT_OP_ID: return "ID";
+ case CONTENT_OP_j: return "j";
+ case CONTENT_OP_J: return "J";
+ case CONTENT_OP_K: return "K";
+ case CONTENT_OP_k: return "k";
+ case CONTENT_OP_l: return "l";
+ case CONTENT_OP_m: return "m";
+ case CONTENT_OP_M: return "M";
+ case CONTENT_OP_MP: return "MP";
+ case CONTENT_OP_n: return "n";
+ case CONTENT_OP_q: return "q";
+ case CONTENT_OP_Q: return "Q";
+ case CONTENT_OP_re: return "re";
+ case CONTENT_OP_RG: return "RG";
+ case CONTENT_OP_rg: return "rg";
+ case CONTENT_OP_ri: return "ri";
+ case CONTENT_OP_s: return "s";
+ case CONTENT_OP_S: return "S";
+ case CONTENT_OP_SC: return "SC";
+ case CONTENT_OP_sc: return "sc";
+ case CONTENT_OP_SCN: return "SCN";
+ case CONTENT_OP_scn: return "scn";
+ case CONTENT_OP_sh: return "sh";
+ case CONTENT_OP_T_: return "T*";
+ case CONTENT_OP_Tc: return "Tc";
+ case CONTENT_OP_Td: return "Td";
+ case CONTENT_OP_TD: return "TD";
+ case CONTENT_OP_Tf: return "Tf";
+ case CONTENT_OP_Tj: return "Tj";
+ case CONTENT_OP_TJ: return "TJ";
+ case CONTENT_OP_TL: return "TL";
+ case CONTENT_OP_Tm: return "Tm";
+ case CONTENT_OP_Tr: return "Tr";
+ case CONTENT_OP_Ts: return "Ts";
+ case CONTENT_OP_Tw: return "Tw";
+ case CONTENT_OP_Tz: return "Tz";
+ case CONTENT_OP_v: return "v";
+ case CONTENT_OP_w: return "w";
+ case CONTENT_OP_W: return "W";
+ case CONTENT_OP_W_: return "W_";
+ case CONTENT_OP_y: return "y";
+ case CONTENT_OP__: return "\'";
+ case CONTENT_OP___: return "\"";
+ }
+ return "????";
+}
+
+
+/**
+ * move number operands from list into operation
+ *
+ * This ensures all operands are correctly handled not just the wanted ones
+ *
+ * \param wanted The number of wanted operands to place in the operation
+ * \param operands The array of operands from the parse
+ * \param operand_idx The number of operands from the parse
+ * \param operation_out The operation to place numbers in
+ */
+static nspdferror
+copy_numbers(unsigned int wanted,
+ struct cos_object **operands,
+ unsigned int *operand_idx,
+ struct content_operation *operation_out)
+{
+ nspdferror res;
+ unsigned int index = 0;
+
+ while ((index < (*operand_idx)) &&
+ (index < wanted)) {
+ /* process wanted operands */
+ res = cos_get_number(NULL,
+ *(operands + index),
+ &operation_out->u.number[index]);
+ if (res != NSPDFERROR_OK) {
+ printf("operand %d could not be set in operation (code %d)\n",
+ index, res);
+ }
+ cos_free_object(*(operands + index));
+ index++;
+ }
+ if ((*operand_idx) > index) {
+ printf("operator %s that takes %d operands passed %d\n",
+ operator_name(operation_out->operator), wanted, *operand_idx);
+ while (index < (*operand_idx)) {
+ cos_free_object(*(operands + index));
+ index++;
+ }
+ } else if ((*operand_idx) < index) {
+ printf("operator %s that takes %d operands passed %d\n",
+ operator_name(operation_out->operator), wanted, *operand_idx);
+ }
+
+ *operand_idx = 0; /* all operands freed */
+
+ return NSPDFERROR_OK;
+}
+
+static nspdferror
+copy_integers(unsigned int wanted,
+ struct cos_object **operands,
+ unsigned int *operand_idx,
+ struct content_operation *operation_out)
+{
+ nspdferror res;
+ unsigned int index = 0;
+
+ while ((index < (*operand_idx)) &&
+ (index < wanted)) {
+ /* process wanted operands */
+ res = cos_get_int(NULL,
+ *(operands + index),
+ &operation_out->u.i[index]);
+ if (res != NSPDFERROR_OK) {
+ printf("operand %d could not be set in operation (code %d)\n",
+ index, res);
+ }
+ cos_free_object(*(operands + index));
+ index++;
+ }
+ if ((*operand_idx) > index) {
+ printf("operator %s that takes %d operands passed %d\n",
+ operator_name(operation_out->operator), wanted, *operand_idx);
+ while (index < (*operand_idx)) {
+ cos_free_object(*(operands + index));
+ index++;
+ }
+ } else if ((*operand_idx) < index) {
+ printf("operator %s that takes %d operands passed %d\n",
+ operator_name(operation_out->operator), wanted, *operand_idx);
+ }
+
+ *operand_idx = 0; /* all operands freed */
+
+ return NSPDFERROR_OK;
+}
+
+static nspdferror
+copy_string(struct cos_object **operands,
+ unsigned int *operand_idx,
+ struct content_operation *operation_out)
+{
+ nspdferror res;
+ unsigned int index = 0;
+ struct cos_string *string;
+
+ if ((*operand_idx) == 0) {
+ printf("operator %s that takes %d operands passed %d\n",
+ operator_name(operation_out->operator), 1, *operand_idx);
+ operation_out->u.string.length = 0;
+ return NSPDFERROR_OK;
+ }
+
+ /* process wanted operands */
+ res = cos_get_string(NULL, *operands, &string);
+ if (res != NSPDFERROR_OK) {
+ printf("string could not be set in operation (code %d)\n", res);
+ operation_out->u.string.length = 0;
+ } else {
+ operation_out->u.string.length = string->length;
+ if (string->length > content_string_intrnl_lngth) {
+ /* steal the string from the object */
+ operation_out->u.string.u.pdata = string->data;
+ string->alloc = 0;
+ string->length = 0;
+ /*printf("external string \"%.*s\"\n",
+ operation_out->u.string.length,
+ operation_out->u.string.u.pdata);*/
+ } else {
+ memcpy(operation_out->u.string.u.cdata,
+ string->data,
+ string->length);
+ /*printf("internal string \"%.*s\"\n",
+ operation_out->u.string.length,
+ operation_out->u.string.u.cdata);*/
+ }
+ }
+
+ if ((*operand_idx) > 1) {
+ printf("operator %s that takes %d operands passed %d\n",
+ operator_name(operation_out->operator), 1, *operand_idx);
+ }
+
+ /* free all operands */
+ while (index < (*operand_idx)) {
+ cos_free_object(*(operands + index));
+ index++;
+ }
+ *operand_idx = 0;
+
+ return NSPDFERROR_OK;
+}
+
+static nspdferror
+copy_array(struct cos_object **operands,
+ unsigned int *operand_idx,
+ struct content_operation *operation_out)
+{
+ unsigned int index = 0;
+
+ if ((*operand_idx) == 0) {
+ printf("operator %s that takes %d operands passed %d\n",
+ operator_name(operation_out->operator), 1, *operand_idx);
+ operation_out->u.array.length = 0;
+ return NSPDFERROR_OK;
+ }
+
+ /* process wanted operands */
+ if ((*operands)->type != COS_TYPE_ARRAY) {
+ printf("operand was not an array\n");
+ operation_out->u.array.length = 0;
+ } else {
+ operation_out->u.array.length = (*operands)->u.array->length;
+ /* steal the values from the array object */
+ operation_out->u.array.values = (*operands)->u.array->values;
+ (*operands)->u.array->alloc = 0;
+ (*operands)->u.array->length = 0;
+ }
+
+ if ((*operand_idx) > 1) {
+ printf("operator %s that takes %d operands passed %d\n",
+ operator_name(operation_out->operator), 1, *operand_idx);
+ }
+
+ /* free all operands */
+ while (index < (*operand_idx)) {
+ cos_free_object(*(operands + index));
+ index++;
+ }
+ *operand_idx = 0;
+
+ return NSPDFERROR_OK;
+}
+
+
+static nspdferror
+copy_name(struct cos_object **operands,
+ unsigned int *operand_idx,
+ struct content_operation *operation_out)
+{
+ unsigned int index = 0;
+
+ if ((*operand_idx) == 0) {
+ printf("operator %s that takes %d operands passed %d\n",
+ operator_name(operation_out->operator), 1, *operand_idx);
+ operation_out->u.name = NULL;
+ return NSPDFERROR_OK;
+ }
+
+ /* process wanted operands */
+ if ((*operands)->type != COS_TYPE_NAME) {
+ printf("operand was not a name\n");
+ operation_out->u.name = NULL;
+ } else {
+ /* steal the name from the name object */
+ operation_out->u.name = (*operands)->u.name;
+ (*operands)->u.name = NULL;
+ }
+
+ if ((*operand_idx) > 1) {
+ printf("operator %s that takes %d operands passed %d\n",
+ operator_name(operation_out->operator), 1, *operand_idx);
+ }
+
+ /* free all operands */
+ while (index < (*operand_idx)) {
+ cos_free_object(*(operands + index));
+ index++;
+ }
+ *operand_idx = 0;
+
+ return NSPDFERROR_OK;
+}
+
+static nspdferror
+copy_name_number(struct cos_object **operands,
+ unsigned int *operand_idx,
+ struct content_operation *operation_out)
+{
+ unsigned int index = 0;
+
+ if ((*operand_idx) == 0) {
+ printf("operator %s that takes %d operands passed %d\n",
+ operator_name(operation_out->operator), 2, *operand_idx);
+ operation_out->u.namenumber.name = NULL;
+ return NSPDFERROR_OK;
+ }
+
+ /* process wanted operands */
+ if ((*operands)->type != COS_TYPE_NAME) {
+ printf("operand was not a name\n");
+ operation_out->u.namenumber.name = NULL;
+ } else {
+ /* steal the name from the name object */
+ operation_out->u.namenumber.name = (*operands)->u.name;
+ (*operands)->u.name = NULL;
+
+ operation_out->u.namenumber.number = 0;
+ /* get the number */
+ if ((*operand_idx) > 1) {
+ nspdferror res;
+ res = cos_get_number(NULL,
+ *(operands + 1),
+ &operation_out->u.namenumber.number);
+ if (res != NSPDFERROR_OK) {
+ printf("operand 1 could not be set in operation (code %d)\n", res);
+ }
+ } else {
+ printf("operator %s that takes %d operands passed %d\n",
+ operator_name(operation_out->operator), 2, *operand_idx);
+ }
+ }
+
+ if ((*operand_idx) > 2) {
+ printf("operator %s that takes %d operands passed %d\n",
+ operator_name(operation_out->operator), 2, *operand_idx);
+ }
+
+ /* free all operands */
+ while (index < (*operand_idx)) {
+ cos_free_object(*(operands + index));
+ index++;
+ }
+ *operand_idx = 0;
+
+ return NSPDFERROR_OK;
+}
+
+/** largest number of operands any operator requires
+ *
+ * This would be 6 except scn in Nchannel colourspace may have 32
+ */
#define MAX_OPERAND_COUNT 32
static inline nspdferror
@@ -1342,7 +1707,6 @@ parse_content_operation(struct nspdf_doc *doc,
res = parse_operator(stream, &offset, &operator);
}
- operation_out->operator = operator;
/*
printf("returning operator %d with %d operands %d to %d of %d\n>>>%.*s<<<\n",
@@ -1355,11 +1719,138 @@ parse_content_operation(struct nspdf_doc *doc,
stream->data + (*offset_out));
*/
- *operand_idx = 0;
+ operation_out->operator = operator;
- *offset_out = offset;
+ switch (operator) {
+ case CONTENT_OP_b:
+ case CONTENT_OP_B:
+ case CONTENT_OP_b_:
+ case CONTENT_OP_B_:
+ case CONTENT_OP_BI:
+ case CONTENT_OP_BT:
+ case CONTENT_OP_BX:
+ case CONTENT_OP_EI:
+ case CONTENT_OP_EMC:
+ case CONTENT_OP_ET:
+ case CONTENT_OP_EX:
+ case CONTENT_OP_f:
+ case CONTENT_OP_F:
+ case CONTENT_OP_f_:
+ case CONTENT_OP_h:
+ case CONTENT_OP_ID:
+ case CONTENT_OP_n:
+ case CONTENT_OP_q:
+ case CONTENT_OP_Q:
+ case CONTENT_OP_s:
+ case CONTENT_OP_S:
+ case CONTENT_OP_T_:
+ case CONTENT_OP_W:
+ case CONTENT_OP_W_:
+ /* no operands */
+ res = copy_numbers(0, operands, operand_idx, operation_out);
+ break;
- return NSPDFERROR_OK;
+ case CONTENT_OP_G:
+ case CONTENT_OP_g:
+ case CONTENT_OP_i:
+ case CONTENT_OP_M:
+ case CONTENT_OP_Tc:
+ case CONTENT_OP_TL:
+ case CONTENT_OP_Ts:
+ case CONTENT_OP_Tw:
+ case CONTENT_OP_Tz:
+ case CONTENT_OP_w:
+ /* one number */
+ res = copy_numbers(1, operands, operand_idx, operation_out);
+ break;
+
+ case CONTENT_OP_d0:
+ case CONTENT_OP_l:
+ case CONTENT_OP_m:
+ case CONTENT_OP_Td:
+ case CONTENT_OP_TD:
+ /* two numbers */
+ res = copy_numbers(2, operands, operand_idx, operation_out);
+ break;
+
+ case CONTENT_OP_RG:
+ case CONTENT_OP_rg:
+ /* three numbers */
+ res = copy_numbers(3, operands, operand_idx, operation_out);
+ break;
+
+ case CONTENT_OP_K:
+ case CONTENT_OP_k:
+ case CONTENT_OP_re:
+ case CONTENT_OP_v:
+ case CONTENT_OP_y:
+ /* four numbers */
+ res = copy_numbers(4, operands, operand_idx, operation_out);
+ break;
+
+ case CONTENT_OP_c:
+ case CONTENT_OP_cm:
+ case CONTENT_OP_d1:
+ case CONTENT_OP_Tm:
+ /* six numbers */
+ res = copy_numbers(6, operands, operand_idx, operation_out);
+ break;
+
+ case CONTENT_OP_Tj:
+ case CONTENT_OP__:
+ /* single string */
+ res = copy_string(operands, operand_idx, operation_out);
+ break;
+
+ case CONTENT_OP_TJ:
+ /* single array */
+ res = copy_array(operands, operand_idx, operation_out);
+ break;
+
+ case CONTENT_OP_Tf:
+ /* name and number */
+ res = copy_name_number(operands, operand_idx, operation_out);
+ break;
+
+ case CONTENT_OP_gs:
+ /* name */
+ res = copy_name(operands, operand_idx, operation_out);
+ break;
+
+ case CONTENT_OP_j:
+ case CONTENT_OP_J:
+ /* one integer */
+ res = copy_integers(1, operands, operand_idx, operation_out);
+ break;
+
+ case CONTENT_OP_BDC:
+ case CONTENT_OP_BMC:
+ case CONTENT_OP_CS:
+ case CONTENT_OP_cs:
+ case CONTENT_OP_d:
+ case CONTENT_OP_Do:
+ case CONTENT_OP_DP:
+ case CONTENT_OP_MP:
+ case CONTENT_OP_ri:
+ case CONTENT_OP_SC:
+ case CONTENT_OP_sc:
+ case CONTENT_OP_SCN:
+ case CONTENT_OP_scn:
+ case CONTENT_OP_sh:
+ case CONTENT_OP_Tr:
+ case CONTENT_OP___:
+ res = copy_numbers(0, operands, operand_idx, operation_out);
+ break;
+ }
+
+ if (res == NSPDFERROR_OK) {
+
+ *operand_idx = 0;
+
+ *offset_out = offset;
+ }
+
+ return res;
}
nspdferror
@@ -1376,7 +1867,22 @@ cos_parse_content_streams(struct nspdf_doc *doc,
struct cos_object *operands[MAX_OPERAND_COUNT];
unsigned int operand_idx = 0;
- //printf("%.*s", (int)stream->length, stream->data);
+ //#define SHOW_STRUCT_SIZE
+ #ifdef SHOW_STRUCT_SIZE
+ struct content_operation foo;
+ printf("content_operation length:%d\nfloat:%d\nunsigned int:%d\n"
+ "union %d\n"
+ " n:%d string:%d string.u:%d string.u.cdata:%d array:%d\n",
+ sizeof(struct content_operation),
+ sizeof(float),
+ sizeof(unsigned int),
+ sizeof(foo.u),
+ sizeof(foo.u.n),
+ sizeof(foo.u.string),
+ sizeof(foo.u.string.u),
+ sizeof(foo.u.string.u.cdata),
+ sizeof(foo.u.array));
+ #endif
cosobj = calloc(1, sizeof(struct cos_object));
if (cosobj == NULL) {
@@ -1394,6 +1900,8 @@ cos_parse_content_streams(struct nspdf_doc *doc,
stream = *(streams + stream_index);
offset = 0;
+ //printf("%.*s", (int)stream->length, stream->data);
+
/* skip any leading whitespace */
res = nspdf__stream_skip_ws(stream, &offset);
if (res != NSPDFERROR_OK) {
commitdiff http://git.netsurf-browser.org/libnspdf.git/commit/?id=04517ee5560353cab1...
commit 04517ee5560353cab1ecf5d24a1cb6301d3a8b05
Author: Vincent Sanders <vince(a)kyllikki.org>
Commit: Vincent Sanders <vince(a)kyllikki.org>
rename cos object name entry
diff --git a/src/cos_object.c b/src/cos_object.c
index 7a02ebd..0c97190 100644
--- a/src/cos_object.c
+++ b/src/cos_object.c
@@ -29,7 +29,7 @@ nspdferror cos_free_object(struct cos_object *cos_obj)
switch (cos_obj->type) {
case COS_TYPE_NAME:
- free(cos_obj->u.n);
+ free(cos_obj->u.name);
break;
case COS_TYPE_STRING:
@@ -92,7 +92,7 @@ cos_extract_dictionary_value(struct cos_object *dict,
prev = &dict->u.dictionary;
entry = *prev;
while (entry != NULL) {
- if (strcmp(entry->key->u.n, key) == 0) {
+ if (strcmp(entry->key->u.name, key) == 0) {
*value_out = entry->value;
*prev = entry->next;
cos_free_object(entry->key);
@@ -127,7 +127,7 @@ cos_get_dictionary_value(struct nspdf_doc *doc,
entry = dict->u.dictionary;
while (entry != NULL) {
- if (strcmp(entry->key->u.n, key) == 0) {
+ if (strcmp(entry->key->u.name, key) == 0) {
*value_out = entry->value;
res = NSPDFERROR_OK;
break;
@@ -275,6 +275,26 @@ cos_get_int(struct nspdf_doc *doc,
}
nspdferror
+cos_get_number(struct nspdf_doc *doc,
+ struct cos_object *cobj,
+ float *value_out)
+{
+ nspdferror res;
+
+ res = nspdf__xref_get_referenced(doc, &cobj);
+ if (res == NSPDFERROR_OK) {
+ if (cobj->type == COS_TYPE_INT) {
+ *value_out = (float)cobj->u.i;
+ } else if (cobj->type == COS_TYPE_REAL) {
+ *value_out = cobj->u.real;
+ } else {
+ res = NSPDFERROR_TYPE;
+ }
+ }
+ return res;
+}
+
+nspdferror
cos_get_name(struct nspdf_doc *doc,
struct cos_object *cobj,
const char **value_out)
@@ -286,7 +306,7 @@ cos_get_name(struct nspdf_doc *doc,
if (cobj->type != COS_TYPE_NAME) {
res = NSPDFERROR_TYPE;
} else {
- *value_out = cobj->u.n;
+ *value_out = cobj->u.name;
}
}
return res;
diff --git a/src/cos_object.h b/src/cos_object.h
index c5b85fa..56c2179 100644
--- a/src/cos_object.h
+++ b/src/cos_object.h
@@ -101,7 +101,7 @@ struct cos_object {
float real;
/** name */
- char *n;
+ char *name;
/** string */
struct cos_string *s;
@@ -200,6 +200,21 @@ nspdferror cos_get_array_dictionary(struct nspdf_doc *doc, struct cos_object *ar
nspdferror cos_get_int(struct nspdf_doc *doc, struct cos_object *cobj, int64_t *value_out);
/**
+ * get the float value of a cos object.
+ *
+ * Get the value from a cos object, if the object is an object reference it
+ * will be dereferenced first. The dereferencing will parse any previously
+ * unreferenced indirect objects as required.
+ *
+ * \param doc The document the cos object belongs to.
+ * \param cobj A cos object of integer type.
+ * \param value_out The result value.
+ * \return NSERROR_OK and \p value_out updated,
+ * NSERROR_TYPE if the \p cobj is not an integer
+ */
+nspdferror cos_get_number(struct nspdf_doc *doc, struct cos_object *cobj, float *value_out);
+
+/**
* get the name value of a cos object.
*
* Get the value from a cos object, if the object is an object reference it
diff --git a/src/cos_parse.c b/src/cos_parse.c
index 46282ca..1881e6a 100644
--- a/src/cos_parse.c
+++ b/src/cos_parse.c
@@ -569,7 +569,7 @@ cos_parse_name(struct cos_stream *stream,
}
cosobj->type = COS_TYPE_NAME;
- cosobj->u.n = strdup(name);
+ cosobj->u.name = strdup(name);
*cosobj_out = cosobj;
commitdiff http://git.netsurf-browser.org/libnspdf.git/commit/?id=3fe413e5838eaf9d8b...
commit 3fe413e5838eaf9d8bc30a9a49f0d7707e84db35
Author: Vincent Sanders <vince(a)kyllikki.org>
Commit: Vincent Sanders <vince(a)kyllikki.org>
add dereference error
diff --git a/include/nspdf/errors.h b/include/nspdf/errors.h
index 6591dbb..a557b9d 100644
--- a/include/nspdf/errors.h
+++ b/include/nspdf/errors.h
@@ -25,6 +25,7 @@ typedef enum {
NSPDFERROR_NOTFOUND, /**< key not found */
NSPDFERROR_FORMAT, /**< objects do not cornform to expected format */
NSPDFERROR_INCOMPLETE, /**< operation was not completed */
+ NSPDFERROR_REFERENCE, /**< unable to dereference object. */
} nspdferror;
#endif
diff --git a/src/xref.c b/src/xref.c
index 7780bf2..6218fac 100644
--- a/src/xref.c
+++ b/src/xref.c
@@ -160,6 +160,11 @@ nspdf__xref_get_referenced(struct nspdf_doc *doc, struct cos_object **cobj_out)
return NSPDFERROR_OK;
}
+ if (doc == NULL) {
+ /* a reference with no document to dereference against */
+ return NSPDFERROR_REFERENCE;
+ }
+
entry = doc->xref_table + cobj->u.reference->id;
/* check if referenced object is in range and exists. return null object if
commitdiff http://git.netsurf-browser.org/libnspdf.git/commit/?id=9f0e9af2eeb08abcaa...
commit 9f0e9af2eeb08abcaa4991ae4e87440dcba2ada1
Author: Vincent Sanders <vince(a)kyllikki.org>
Commit: Vincent Sanders <vince(a)kyllikki.org>
correctly parse content streams for pages contents
diff --git a/include/nspdf/errors.h b/include/nspdf/errors.h
index f2142ff..6591dbb 100644
--- a/include/nspdf/errors.h
+++ b/include/nspdf/errors.h
@@ -24,6 +24,7 @@ typedef enum {
NSPDFERROR_TYPE, /**< wrong type error */
NSPDFERROR_NOTFOUND, /**< key not found */
NSPDFERROR_FORMAT, /**< objects do not cornform to expected format */
+ NSPDFERROR_INCOMPLETE, /**< operation was not completed */
} nspdferror;
#endif
diff --git a/src/cos_object.c b/src/cos_object.c
index c7ec4e6..7a02ebd 100644
--- a/src/cos_object.c
+++ b/src/cos_object.c
@@ -386,36 +386,111 @@ cos_get_object(struct nspdf_doc *doc,
return res;
}
+/*
+ * exported interface documented in cos_object.h
+ *
+ * slightly different behaviour to other getters:
+ * - This getter can be passed an object pointer to a synthetic parsed content
+ * stream object in which case it returns that objects content operation
+ * list.
+ *
+ * - Alternatively it can be passed a single indirect object reference to a
+ * content stream which will be processed into a filtered stream and then
+ * converted into a parsed content stream which replaces the passed
+ * object. The underlying filtered streams will then be freed.
+ *
+ * - An array of indirect object references to content streams all of which
+ * will be converted as if a single stream of tokens and the result handled
+ * as per the single reference case.
+ */
nspdferror
cos_get_content(struct nspdf_doc *doc,
struct cos_object *cobj,
struct cos_content **content_out)
{
nspdferror res;
- struct cos_object *content_obj;
+ struct cos_object **references;
+ unsigned int reference_count;
+ struct cos_stream **streams;
+ unsigned int index;
+ struct cos_object *content_obj; /* parsed content object */
+ struct cos_object tmpobj;
+
+ /* already parsed the content stream */
+ if (cobj->type == COS_TYPE_CONTENT) {
+ *content_out = cobj->u.content;
+ } else if (cobj->type == COS_TYPE_REFERENCE) {
+ /* single reference */
+ reference_count = 1;
+ references = calloc(reference_count, sizeof(struct cos_object *));
+ if (references == NULL) {
+ return NSPDFERROR_NOMEM;
+ }
- res = nspdf__xref_get_referenced(doc, &cobj);
- if (res == NSPDFERROR_OK) {
- if (cobj->type == COS_TYPE_STREAM) {
- res = cos_parse_content_stream(doc, cobj->u.stream, &content_obj);
- if (res == NSPDFERROR_OK) {
- /* replace stream object with parsed content operations */
- struct cos_object tmpobj;
- tmpobj = *cobj;
- *cobj = *content_obj;
- *content_obj = tmpobj;
- cos_free_object(content_obj);
-
- *content_out = cobj->u.content;
+ *references = cobj;
+ } else if (cobj->type == COS_TYPE_ARRAY) {
+ /* array of references */
+ reference_count = cobj->u.array->length;
+ references = malloc(reference_count * sizeof(struct cos_object *));
+ if (references == NULL) {
+ return NSPDFERROR_NOMEM;
+ }
+ memcpy(references, cobj->u.array->values, reference_count * sizeof(struct cos_object *));
+ /* check all objects in array are references */
+ for (index = 0; index < reference_count ; index++) {
+ if ((*(references + index))->type != COS_TYPE_REFERENCE) {
+ free(references);
+ return NSPDFERROR_TYPE;
}
- } else if (cobj->type == COS_TYPE_CONTENT) {
- *content_out = cobj->u.content;
- } else {
- res = NSPDFERROR_TYPE;
}
+ } else {
+ return NSPDFERROR_TYPE;
}
- return res;
+
+ /* obtain array of streams */
+ streams = malloc(reference_count * sizeof(struct cos_stream *));
+ if (streams == NULL) {
+ free(references);
+ return NSPDFERROR_TYPE;
+ }
+
+ for (index = 0; index < reference_count ; index++) {
+ struct cos_object *stream_obj;
+
+ stream_obj = *(references + index);
+ res = nspdf__xref_get_referenced(doc, &stream_obj);
+ if (res != NSPDFERROR_OK) {
+ free(references);
+ free(streams);
+ return res;
+ }
+ if (stream_obj->type != COS_TYPE_STREAM) {
+ free(references);
+ free(streams);
+ return NSPDFERROR_TYPE;
+ }
+ *(streams + index) = stream_obj->u.stream;;
+ }
+
+ res = cos_parse_content_streams(doc, streams, reference_count, &content_obj);
+ if (res != NSPDFERROR_OK) {
+ free(references);
+ free(streams);
+ return res;
+ }
+
+ /* replace passed object with parsed content operations object */
+ tmpobj = *cobj;
+ *cobj = *content_obj;
+ *content_obj = tmpobj;
+ cos_free_object(content_obj);
+
+ /** \todo call nspdf__xref_free_referenced(doc, *(references + index)); to free up storage associated with already parsed streams */
+
+ *content_out = cobj->u.content;
+
+ return NSPDFERROR_OK;
}
/*
diff --git a/src/cos_parse.c b/src/cos_parse.c
index 5ccd171..46282ca 100644
--- a/src/cos_parse.c
+++ b/src/cos_parse.c
@@ -1115,6 +1115,10 @@ parse_operator(struct cos_stream *stream,
offset = *offset_out;
+ if (offset >= stream->length) {
+ return NSPDFERROR_SYNTAX;
+ }
+
/* first char */
c = stream_byte(stream, offset);
if ((bclass[c] & (BC_WSPC | BC_CMNT) ) != 0) {
@@ -1125,13 +1129,15 @@ parse_operator(struct cos_stream *stream,
offset++;
/* possible second char */
c = stream_byte(stream, offset);
- if ((bclass[c] & (BC_WSPC | BC_CMNT) ) == 0) {
+ if ((offset < stream->length) &&
+ ((bclass[c] & (BC_WSPC | BC_CMNT) ) == 0)) {
lookup = (lookup << 8) | c;
offset++;
/* possible third char */
c = stream_byte(stream, offset);
- if ((bclass[c] & (BC_WSPC | BC_CMNT) ) == 0) {
+ if ((offset < stream->length) &&
+ ((bclass[c] & (BC_WSPC | BC_CMNT) ) == 0)) {
lookup = (lookup << 8) | c;
offset++;
@@ -1241,55 +1247,60 @@ parse_operator(struct cos_stream *stream,
#define MAX_OPERAND_COUNT 32
-static nspdferror
+static inline nspdferror
parse_content_operation(struct nspdf_doc *doc,
struct cos_stream *stream,
strmoff_t *offset_out,
+ struct cos_object **operands,
+ unsigned int *operand_idx,
struct content_operation *operation_out)
{
strmoff_t offset;
nspdferror res;
enum content_operator operator;
- struct cos_object *operands[MAX_OPERAND_COUNT];
- unsigned int operand_idx = 0;
offset = *offset_out;
res = parse_operator(stream, &offset, &operator);
while (res == NSPDFERROR_SYNTAX) {
/* was not an operator so check for what else it could have been */
- if (operand_idx >= MAX_OPERAND_COUNT) {
+ if (*operand_idx >= MAX_OPERAND_COUNT) {
/** \todo free any stacked operands */
printf("too many operands\n");
return NSPDFERROR_SYNTAX;
}
+ if (offset >= stream->length) {
+ *offset_out = offset;
+ return NSPDFERROR_INCOMPLETE;
+ }
+
switch (stream_byte(stream, offset)) {
case '-': case '+': case '.': case '0': case '1': case '2': case '3':
case '4': case '5': case '6': case '7': case '8': case '9':
- res = cos_parse_number(stream, &offset, &operands[operand_idx]);
+ res = cos_parse_number(stream, &offset, &operands[*operand_idx]);
break;
case 't':
case 'f':
- res = cos_parse_boolean(stream, &offset, &operands[operand_idx]);
+ res = cos_parse_boolean(stream, &offset, &operands[*operand_idx]);
break;
case 'n':
- res = cos_parse_null(stream, &offset, &operands[operand_idx]);
+ res = cos_parse_null(stream, &offset, &operands[*operand_idx]);
break;
case '(':
- res = cos_parse_string(stream, &offset, &operands[operand_idx]);
+ res = cos_parse_string(stream, &offset, &operands[*operand_idx]);
break;
case '/':
- res = cos_parse_name(stream, &offset, &operands[operand_idx]);
+ res = cos_parse_name(stream, &offset, &operands[*operand_idx]);
break;
case '[':
- res = cos_parse_list(doc, stream, &offset, &operands[operand_idx]);
+ res = cos_parse_list(doc, stream, &offset, &operands[*operand_idx]);
break;
case '<':
@@ -1297,16 +1308,23 @@ parse_content_operation(struct nspdf_doc *doc,
res = cos_parse_dictionary(doc,
stream,
&offset,
- &operands[operand_idx]);
+ &operands[*operand_idx]);
} else {
res = cos_parse_hex_string(stream,
&offset,
- &operands[operand_idx]);
+ &operands[*operand_idx]);
}
break;
default:
- printf("unknown operand type\n");
+ printf("unknown operand with %d operands %d to %d of %d\n>>>%.*s<<<\n",
+ *operand_idx,
+ (*offset_out),
+ offset,
+ stream->length,
+ (offset + 1) - (*offset_out),
+ stream->data + (*offset_out));
+
res = NSPDFERROR_SYNTAX; /* syntax error */
}
@@ -1319,26 +1337,44 @@ parse_content_operation(struct nspdf_doc *doc,
}
/* move to next operand */
- operand_idx++;
+ (*operand_idx)++;
res = parse_operator(stream, &offset, &operator);
}
operation_out->operator = operator;
- //printf("returning operator %d with %d operands\n", operator, operand_idx);
+
+ /*
+ printf("returning operator %d with %d operands %d to %d of %d\n>>>%.*s<<<\n",
+ operator,
+ *operand_idx,
+ (*offset_out),
+ offset,
+ stream->length,
+ offset - (*offset_out),
+ stream->data + (*offset_out));
+ */
+
+ *operand_idx = 0;
*offset_out = offset;
+
return NSPDFERROR_OK;
}
nspdferror
-cos_parse_content_stream(struct nspdf_doc *doc,
- struct cos_stream *stream,
- struct cos_object **content_out)
+cos_parse_content_streams(struct nspdf_doc *doc,
+ struct cos_stream **streams,
+ unsigned int stream_count,
+ struct cos_object **content_out)
{
nspdferror res;
struct cos_object *cosobj;
strmoff_t offset;
+ struct cos_stream *stream;
+ unsigned int stream_index;
+ struct cos_object *operands[MAX_OPERAND_COUNT];
+ unsigned int operand_idx = 0;
//printf("%.*s", (int)stream->length, stream->data);
@@ -1354,42 +1390,50 @@ cos_parse_content_stream(struct nspdf_doc *doc,
goto cos_parse_content_stream_error;
}
- offset = 0;
+ for (stream_index = 0; stream_index < stream_count; stream_index++) {
+ stream = *(streams + stream_index);
+ offset = 0;
- /* skip any leading whitespace */
- res = nspdf__stream_skip_ws(stream, &offset);
- if (res != NSPDFERROR_OK) {
- goto cos_parse_content_stream_error;
- }
+ /* skip any leading whitespace */
+ res = nspdf__stream_skip_ws(stream, &offset);
+ if (res != NSPDFERROR_OK) {
+ goto cos_parse_content_stream_error;
+ }
+
+ while (offset < stream->length) {
+
+ /* ensure there is space in the operations array */
+ if (cosobj->u.content->alloc < (cosobj->u.content->length + 1)) {
+ struct content_operation *newops;
+ newops = realloc(cosobj->u.content->operations,
+ sizeof(struct content_operation) *
+ (cosobj->u.content->alloc + 32));
+ if (newops == NULL) {
+ res = NSPDFERROR_NOMEM;
+ goto cos_parse_content_stream_error;
+ }
+ cosobj->u.content->operations = newops;
+ cosobj->u.content->alloc += 32;
+ }
- while (offset < stream->length) {
- struct content_operation cop;
-
- /* ensure there is space in the operations array */
- if (cosobj->u.content->alloc < (cosobj->u.content->length + 1)) {
- struct content_operation *newops;
- newops = realloc(cosobj->u.content->operations,
- sizeof(struct content_operation) *
- (cosobj->u.content->alloc + 32));
- if (newops == NULL) {
- res = NSPDFERROR_NOMEM;
+ /* parse an operation out */
+ res = parse_content_operation(
+ doc,
+ stream,
+ &offset,
+ operands,
+ &operand_idx,
+ cosobj->u.content->operations + cosobj->u.content->length);
+ if (res== NSPDFERROR_OK) {
+ cosobj->u.content->length++;
+ } else if (res == NSPDFERROR_INCOMPLETE) {
+ //printf("Incomplete\n");
+ } else if (res != NSPDFERROR_OK) {
goto cos_parse_content_stream_error;
}
- cosobj->u.content->operations = newops;
- cosobj->u.content->alloc += 32;
- }
- res = parse_content_operation(
- doc,
- stream,
- &offset,
- cosobj->u.content->operations + cosobj->u.content->length);
- if (res != NSPDFERROR_OK) {
- goto cos_parse_content_stream_error;
}
- cosobj->u.content->length++;
}
-
*content_out = cosobj;
return NSPDFERROR_OK;
diff --git a/src/cos_parse.h b/src/cos_parse.h
index a9cb9c9..a6a65ca 100644
--- a/src/cos_parse.h
+++ b/src/cos_parse.h
@@ -30,6 +30,7 @@ nspdferror cos_parse_object(struct nspdf_doc *doc, struct cos_stream *stream, st
/**
* Parse content stream into content operations object
*/
-nspdferror cos_parse_content_stream(struct nspdf_doc *doc, struct cos_stream *stream, struct cos_object **content_out);
+nspdferror cos_parse_content_streams(struct nspdf_doc *doc, struct cos_stream **streams, unsigned int stream_count, struct cos_object **content_out);
+
#endif
diff --git a/src/page.c b/src/page.c
index 3844122..5299c7c 100644
--- a/src/page.c
+++ b/src/page.c
@@ -146,62 +146,20 @@ nspdf_page_count(struct nspdf_doc *doc, unsigned int *pages_out)
return NSPDFERROR_OK;
}
-static nspdferror
-nspdf__render_content_stream(struct nspdf_doc *doc,
- struct page_table_entry *page_entry,
- struct cos_object *content_entry)
-{
- nspdferror res;
- struct cos_content *content_operations;
-
- res = cos_get_content(doc, content_entry, &content_operations);
- if (res == NSPDFERROR_OK) {
- printf("%p\n", content_operations);
- }
-
- return res;
-}
/* exported interface documented in nspdf/page.h */
nspdferror
nspdf_page_render(struct nspdf_doc *doc, unsigned int page_number)
{
struct page_table_entry *page_entry;
- struct cos_object *content_array;
+ struct cos_content *page_content; /* page operations array */
nspdferror res;
page_entry = doc->page_table + page_number;
- /* contents may be an array of stream objects or just a single one */
- res = cos_get_array(doc, page_entry->contents, &content_array);
+ res = cos_get_content(doc, page_entry->contents, &page_content);
if (res == NSPDFERROR_OK) {
- unsigned int content_stream_count;
- unsigned int content_stream_index;
-
- res = cos_get_array_size(doc, content_array, &content_stream_count);
- if (res != NSPDFERROR_OK) {
- return res;
- }
- for (content_stream_index = 0;
- content_stream_index < content_stream_count;
- content_stream_index++) {
- struct cos_object *content_entry;
- res = cos_get_array_value(doc,
- content_array,
- content_stream_index,
- &content_entry);
- if (res != NSPDFERROR_OK) {
- break;
- }
-
- res = nspdf__render_content_stream(doc, page_entry, content_entry);
- if (res != NSPDFERROR_OK) {
- break;
- }
- }
- } else if (res == NSPDFERROR_TYPE) {
- res = nspdf__render_content_stream(doc, page_entry, page_entry->contents);
+ printf("%p\n", page_content);
}
-
return res;
}
diff --git a/src/pdf_doc.c b/src/pdf_doc.c
index d7c7a0e..3e55e16 100644
--- a/src/pdf_doc.c
+++ b/src/pdf_doc.c
@@ -23,19 +23,26 @@ nspdferror
nspdf__stream_skip_ws(struct cos_stream *stream, strmoff_t *offset)
{
uint8_t c;
- /* TODO sort out keeping offset in range */
+
+ if ((*offset) >= stream->length) {
+ return NSPDFERROR_OK;
+ }
+
c = stream_byte(stream, *offset);
- while ((bclass[c] & (BC_WSPC | BC_CMNT) ) != 0) {
+ while (((*offset) < stream->length) &&
+ ((bclass[c] & (BC_WSPC | BC_CMNT) ) != 0)) {
(*offset)++;
/* skip comments */
- if ((bclass[c] & BC_CMNT) != 0) {
+ if (((*offset) < stream->length) &&
+ ((bclass[c] & BC_CMNT) != 0)) {
c = stream_byte(stream, *offset);
- while ((bclass[c] & BC_EOLM ) == 0) {
+ while ((*offset < stream->length) &&
+ ((bclass[c] & BC_EOLM ) == 0)) {
(*offset)++;
- c = stream_byte(stream, *offset);
+ c = stream_byte(stream, (*offset));
}
}
- c = stream_byte(stream, *offset);
+ c = stream_byte(stream, (*offset));
}
return NSPDFERROR_OK;
}
commitdiff http://git.netsurf-browser.org/libnspdf.git/commit/?id=7967f13f57f08d2b8f...
commit 7967f13f57f08d2b8f38b8c52567d847933b79d8
Author: Vincent Sanders <vince(a)kyllikki.org>
Commit: Vincent Sanders <vince(a)kyllikki.org>
complete parse of all content stream operators
diff --git a/src/cos_parse.c b/src/cos_parse.c
index 5c8c702..5ccd171 100644
--- a/src/cos_parse.c
+++ b/src/cos_parse.c
@@ -74,11 +74,22 @@ cos_parse_number(struct cos_stream *stream,
strmoff_t offset; /* current offset of source data */
unsigned int point;
bool real = false;
+ bool neg = false;
offset = *offset_out;
+ c = stream_byte(stream, offset);
+ if (c == '-') {
+ neg = true;
+ offset++;
+ } else if (c == '+') {
+ neg = false;
+ offset++;
+ }
+
for (len = 0; len < sizeof(num); len++) {
c = stream_byte(stream, offset);
+
if (c == '.') {
real = true;
point = len;
@@ -118,11 +129,19 @@ cos_parse_number(struct cos_stream *stream,
div = div * 10;
}
cosobj->type = COS_TYPE_REAL;
- cosobj->u.real = (float)result / div;
- printf("real %d %f\n", result, cosobj->u.real);
+ if (neg) {
+ cosobj->u.real = -((float)result / div);
+ } else {
+ cosobj->u.real = (float)result / div;
+ }
+ //printf("real %d %f\n", result, cosobj->u.real);
} else {
cosobj->type = COS_TYPE_INT;
- cosobj->u.i = result;
+ if (neg) {
+ cosobj->u.i = -result;
+ } else {
+ cosobj->u.i = result;
+ }
}
*cosobj_out = cosobj;
@@ -1092,269 +1111,132 @@ parse_operator(struct cos_stream *stream,
strmoff_t offset;
enum content_operator operator;
uint8_t c;
+ unsigned int lookup;
offset = *offset_out;
- switch (stream_byte(stream, offset++)) {
- case 'b':
- //CONTENT_OP_b
- //CONTENT_OP_b_
- break;
+ /* first char */
+ c = stream_byte(stream, offset);
+ if ((bclass[c] & (BC_WSPC | BC_CMNT) ) != 0) {
+ /* must have at least one non-whitespace character */
+ return NSPDFERROR_SYNTAX;
+ }
+ lookup = c;
+ offset++;
+ /* possible second char */
+ c = stream_byte(stream, offset);
+ if ((bclass[c] & (BC_WSPC | BC_CMNT) ) == 0) {
+ lookup = (lookup << 8) | c;
+ offset++;
- case 'B':
- operator = CONTENT_OP_B;
+ /* possible third char */
c = stream_byte(stream, offset);
if ((bclass[c] & (BC_WSPC | BC_CMNT) ) == 0) {
- switch (c) {
- case '*':
- operator = CONTENT_OP_B_;
- offset++;
- break;
-
- case 'I':
- operator = CONTENT_OP_BI;
- offset++;
- break;
-
- case 'T':
- operator = CONTENT_OP_BT;
- offset++;
- break;
-
- case 'X':
- operator = CONTENT_OP_BX;
- offset++;
- break;
-
- case 'M':
- if (stream_byte(stream, offset + 1) == 'C') {
- operator = CONTENT_OP_BMC;
- offset+=2;
- }
- break;
-
- case 'D':
- if (stream_byte(stream, offset + 1) == 'C') {
- operator = CONTENT_OP_BDC;
- offset+=2;
- }
- break;
-
- default:
- goto parse_operator_nomatch;
- }
- c = stream_byte(stream, offset);
- }
- break;
-
- case 'c':
- //CONTENT_OP_c
- //CONTENT_OP_cm
- //CONTENT_OP_cs
- break;
-
- case 'C':
- //CONTENT_OP_CS
- break;
-
- case 'd':
- //CONTENT_OP_d
- //CONTENT_OP_d0
- //CONTENT_OP_d1
- break;
-
- case 'D':
- //CONTENT_OP_Do
- //CONTENT_OP_DP
- break;
-
- case 'E':
- //CONTENT_OP_EI
- //CONTENT_OP_EMC
- //CONTENT_OP_ET
- //CONTENT_OP_EX
- break;
-
- case 'f':
- //CONTENT_OP_f
- //CONTENT_OP_f_
- break;
-
- case 'F':
- //CONTENT_OP_F
- break;
-
- case 'G':
- //CONTENT_OP_G
- break;
-
- case 'g':
- operator = CONTENT_OP_g;
- c = stream_byte(stream, offset);
- if (((bclass[c] & (BC_WSPC | BC_CMNT) ) == 0) && (c == 's')) {
- operator = CONTENT_OP_gs;
+ lookup = (lookup << 8) | c;
offset++;
- }
- c = stream_byte(stream, offset);
- break;
-
- case 'h':
- //CONTENT_OP_h
- break;
-
- case 'i':
- //CONTENT_OP_i
- break;
-
- case 'I':
- //CONTENT_OP_ID
- break;
-
- case 'j':
- //CONTENT_OP_j
- break;
-
- case 'J':
- //CONTENT_OP_J
- break;
-
- case 'K':
- operator = CONTENT_OP_K;
- c = stream_byte(stream, offset);
- break;
-
- case 'k':
- operator = CONTENT_OP_k;
- c = stream_byte(stream, offset);
- break;
-
- case 'l':
- operator = CONTENT_OP_l;
- c = stream_byte(stream, offset);
- break;
-
- case 'm':
- break;
-
- case 'M':
- break;
- case 'n':
- break;
-
- case 'q':
- break;
-
- case 'Q':
- break;
-
- case 'r':
- break;
-
- case 'R':
- break;
-
- case 's':
- break;
-
- case 'S':
- break;
-
- case 'T':
- switch (stream_byte(stream, offset++)) {
- case '*':
- operator = CONTENT_OP_T_;
- break;
-
- case 'c':
- operator = CONTENT_OP_Tc;
- break;
-
- case 'd':
- operator = CONTENT_OP_Td;
- break;
-
- case 'D':
- operator = CONTENT_OP_TD;
- break;
-
- case 'f':
- operator = CONTENT_OP_Tf;
- break;
-
- case 'j':
- operator = CONTENT_OP_Tj;
- break;
-
- case 'J':
- operator = CONTENT_OP_TJ;
- break;
-
- case 'L':
- operator = CONTENT_OP_TL;
- break;
-
- case 'm':
- operator = CONTENT_OP_Tm;
- break;
-
- case 'r':
- operator = CONTENT_OP_Tr;
- break;
-
- case 's':
- operator = CONTENT_OP_Ts;
- break;
-
- case 'w':
- operator = CONTENT_OP_Tw;
- break;
-
- case 'z':
- operator = CONTENT_OP_Tz;
- break;
-
- default:
- goto parse_operator_nomatch;
+ /* fourth char must be whitespace */
+ c = stream_byte(stream, offset);
+ if ((bclass[c] & (BC_WSPC | BC_CMNT) ) == 0) {
+ return NSPDFERROR_SYNTAX;
+ }
}
+ }
- c = stream_byte(stream, offset);
- break;
-
- case 'v':
- break;
-
- case 'w':
- break;
-
- case 'W':
- break;
-
- case 'Y':
- break;
+ res = nspdf__stream_skip_ws(stream, &offset);
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
- case '\'':
- break;
-
- case '"':
- break;
+ switch (lookup) {
+ case '"': operator = CONTENT_OP___;
+ case '\'': operator = CONTENT_OP__; break;
+ case 'B': operator = CONTENT_OP_B; break;
+ case 'F': operator = CONTENT_OP_F; break;
+ case 'G': operator = CONTENT_OP_G; break;
+ case 'J': operator = CONTENT_OP_J; break;
+ case 'K': operator = CONTENT_OP_K; break;
+ case 'M': operator = CONTENT_OP_M; break;
+ case 'Q': operator = CONTENT_OP_Q; break;
+ case 'S': operator = CONTENT_OP_S; break;
+ case 'W': operator = CONTENT_OP_W; break;
+ case 'b': operator = CONTENT_OP_b; break;
+ case 'c': operator = CONTENT_OP_c; break;
+ case 'd': operator = CONTENT_OP_d; break;
+ case 'f': operator = CONTENT_OP_f; break;
+ case 'g': operator = CONTENT_OP_g; break;
+ case 'h': operator = CONTENT_OP_h; break;
+ case 'i': operator = CONTENT_OP_i; break;
+ case 'j': operator = CONTENT_OP_j; break;
+ case 'k': operator = CONTENT_OP_k; break;
+ case 'l': operator = CONTENT_OP_l; break;
+ case 'm': operator = CONTENT_OP_m; break;
+ case 'n': operator = CONTENT_OP_n; break;
+ case 'q': operator = CONTENT_OP_q; break;
+ case 's': operator = CONTENT_OP_s; break;
+ case 'v': operator = CONTENT_OP_v; break;
+ case 'w': operator = CONTENT_OP_w; break;
+ case 'y': operator = CONTENT_OP_y; break;
+
+ case (('B' << 8) | '*'): operator = CONTENT_OP_B_; break;
+ case (('T' << 8) | '*'): operator = CONTENT_OP_T_; break;
+ case (('W' << 8) | '*'): operator = CONTENT_OP_W_; break;
+
+ case (('B' << 8) | 'I'): operator = CONTENT_OP_BI; break;
+ case (('B' << 8) | 'T'): operator = CONTENT_OP_BT; break;
+ case (('B' << 8) | 'X'): operator = CONTENT_OP_BX; break;
+ case (('C' << 8) | 'S'): operator = CONTENT_OP_CS; break;
+ case (('D' << 8) | 'P'): operator = CONTENT_OP_DP; break;
+ case (('E' << 8) | 'I'): operator = CONTENT_OP_EI; break;
+ case (('E' << 8) | 'T'): operator = CONTENT_OP_ET; break;
+ case (('E' << 8) | 'X'): operator = CONTENT_OP_EX; break;
+ case (('I' << 8) | 'D'): operator = CONTENT_OP_ID; break;
+ case (('M' << 8) | 'P'): operator = CONTENT_OP_MP; break;
+ case (('R' << 8) | 'G'): operator = CONTENT_OP_RG; break;
+ case (('S' << 8) | 'S'): operator = CONTENT_OP_SC; break;
+ case (('T' << 8) | 'D'): operator = CONTENT_OP_TD; break;
+ case (('T' << 8) | 'J'): operator = CONTENT_OP_TJ; break;
+ case (('T' << 8) | 'L'): operator = CONTENT_OP_TL; break;
+
+ case (('D' << 8) | 'o'): operator = CONTENT_OP_Do; break;
+ case (('T' << 8) | 'c'): operator = CONTENT_OP_Tc; break;
+ case (('T' << 8) | 'd'): operator = CONTENT_OP_Td; break;
+ case (('T' << 8) | 'f'): operator = CONTENT_OP_Tf; break;
+ case (('T' << 8) | 'j'): operator = CONTENT_OP_Tj; break;
+ case (('T' << 8) | 'm'): operator = CONTENT_OP_Tm; break;
+ case (('T' << 8) | 'r'): operator = CONTENT_OP_Tr; break;
+ case (('T' << 8) | 's'): operator = CONTENT_OP_Ts; break;
+ case (('T' << 8) | 'w'): operator = CONTENT_OP_Tw; break;
+ case (('T' << 8) | 'z'): operator = CONTENT_OP_Tz; break;
+
+ case (('b' << 8) | '*'): operator = CONTENT_OP_b_; break;
+ case (('f' << 8) | '*'): operator = CONTENT_OP_f_; break;
+ case (('d' << 8) | '0'): operator = CONTENT_OP_d0; break;
+ case (('d' << 8) | '1'): operator = CONTENT_OP_d1; break;
+
+ case (('c' << 8) | 'm'): operator = CONTENT_OP_cm; break;
+ case (('c' << 8) | 's'): operator = CONTENT_OP_cs; break;
+ case (('g' << 8) | 's'): operator = CONTENT_OP_gs; break;
+ case (('r' << 8) | 'e'): operator = CONTENT_OP_re; break;
+ case (('r' << 8) | 'g'): operator = CONTENT_OP_rg; break;
+ case (('r' << 8) | 'i'): operator = CONTENT_OP_ri; break;
+ case (('s' << 8) | 'c'): operator = CONTENT_OP_sc; break;
+ case (('s' << 8) | 'h'): operator = CONTENT_OP_sh; break;
+
+ case (('B' << 16) | (('D' << 8) | 'C')): operator = CONTENT_OP_BDC; break;
+ case (('B' << 16) | (('M' << 8) | 'C')): operator = CONTENT_OP_BMC; break;
+ case (('E' << 16) | (('M' << 8) | 'C')): operator = CONTENT_OP_EMC; break;
+ case (('S' << 16) | (('C' << 8) | 'N')): operator = CONTENT_OP_SCN; break;
+ case (('s' << 16) | (('c' << 8) | 'n')): operator = CONTENT_OP_scn; break;
default:
- goto parse_operator_nomatch;
+ return NSPDFERROR_SYNTAX;
}
- /* matched prefix must be followed by a space */
- if ((bclass[c] & (BC_WSPC | BC_CMNT) ) != 0) {
- res = nspdf__stream_skip_ws(stream, &offset);
- if (res == NSPDFERROR_OK) {
- *operator_out = operator;
- *offset_out = offset;
- }
- return res;
- }
+ *operator_out = operator;
+ *offset_out = offset;
-parse_operator_nomatch:
- return NSPDFERROR_SYNTAX;
+ return NSPDFERROR_OK;
}
#define MAX_OPERAND_COUNT 32
@@ -1443,7 +1325,7 @@ parse_content_operation(struct nspdf_doc *doc,
}
operation_out->operator = operator;
- printf("returning operator %d with %d operands\n", operator, operand_idx);
+ //printf("returning operator %d with %d operands\n", operator, operand_idx);
*offset_out = offset;
return NSPDFERROR_OK;
@@ -1458,7 +1340,7 @@ cos_parse_content_stream(struct nspdf_doc *doc,
struct cos_object *cosobj;
strmoff_t offset;
- printf("%.*s", (int)stream->length, stream->data);
+ //printf("%.*s", (int)stream->length, stream->data);
cosobj = calloc(1, sizeof(struct cos_object));
if (cosobj == NULL) {
@@ -1468,13 +1350,18 @@ cos_parse_content_stream(struct nspdf_doc *doc,
cosobj->u.content = calloc(1, sizeof (struct cos_content));
if (cosobj->u.content == NULL) {
+ res = NSPDFERROR_NOMEM;
goto cos_parse_content_stream_error;
- cos_free_object(cosobj);
- return NSPDFERROR_NOMEM;
- }
+ }
offset = 0;
+ /* skip any leading whitespace */
+ res = nspdf__stream_skip_ws(stream, &offset);
+ if (res != NSPDFERROR_OK) {
+ goto cos_parse_content_stream_error;
+ }
+
while (offset < stream->length) {
struct content_operation cop;
commitdiff http://git.netsurf-browser.org/libnspdf.git/commit/?id=e9d3ec85ad043523a4...
commit e9d3ec85ad043523a47c0eef2a1662e79184e3b3
Author: Vincent Sanders <vince(a)kyllikki.org>
Commit: Vincent Sanders <vince(a)kyllikki.org>
convert to using stream offset type for stream offsets
diff --git a/src/cos_object.c b/src/cos_object.c
index 4398822..c7ec4e6 100644
--- a/src/cos_object.c
+++ b/src/cos_object.c
@@ -16,6 +16,7 @@
#include <nspdf/errors.h>
+#include "xref.h"
#include "cos_object.h"
#include "cos_parse.h"
#include "pdf_doc.h"
diff --git a/src/cos_object.h b/src/cos_object.h
index 9b98694..c5b85fa 100644
--- a/src/cos_object.h
+++ b/src/cos_object.h
@@ -15,6 +15,8 @@
#ifndef NSPDF__COS_OBJECT_H_
#define NSPDF__COS_OBJECT_H_
+#include "cos_stream.h"
+
struct nspdf_doc;
struct content_operation;
@@ -75,12 +77,6 @@ struct cos_reference {
uint64_t generation; /**< generation of indirect object */
};
-struct cos_stream {
- unsigned int length; /**< decoded stream length */
- size_t alloc; /**< memory allocated for stream */
- const uint8_t *data; /**< decoded stream data */
-};
-
/**
* Synthetic parsed content object.
@@ -102,7 +98,7 @@ struct cos_object {
int64_t i;
/** real */
- double r;
+ float real;
/** name */
char *n;
@@ -113,10 +109,10 @@ struct cos_object {
/** stream data */
struct cos_stream *stream;
- /* dictionary */
+ /** dictionary */
struct cos_dictionary_entry *dictionary;
- /* array */
+ /** array */
struct cos_array *array;
/** reference */
diff --git a/src/cos_parse.c b/src/cos_parse.c
index d0e50f5..5c8c702 100644
--- a/src/cos_parse.c
+++ b/src/cos_parse.c
@@ -63,7 +63,7 @@ static uint8_t xtoi(uint8_t x)
*/
static nspdferror
cos_parse_number(struct cos_stream *stream,
- uint64_t *offset_out,
+ strmoff_t *offset_out,
struct cos_object **cosobj_out)
{
nspdferror res;
@@ -71,12 +71,21 @@ cos_parse_number(struct cos_stream *stream,
uint8_t c; /* current byte from source data */
unsigned int len; /* number of decimal places in number */
uint8_t num[21]; /* temporary buffer for decimal values */
- uint64_t offset; /* current offset of source data */
+ strmoff_t offset; /* current offset of source data */
+ unsigned int point;
+ bool real = false;
offset = *offset_out;
for (len = 0; len < sizeof(num); len++) {
c = stream_byte(stream, offset);
+ if (c == '.') {
+ real = true;
+ point = len;
+ offset++;
+ c = stream_byte(stream, offset);
+ }
+
if ((bclass[c] & BC_DCML) != BC_DCML) {
int64_t result = 0; /* parsed result */
uint64_t tens;
@@ -85,6 +94,9 @@ cos_parse_number(struct cos_stream *stream,
/* parse error no decimals in input */
return NSPDFERROR_SYNTAX;
}
+
+ point = len - point;
+
/* sum value from each place */
for (tens = 1; len > 0; tens = tens * 10, len--) {
result += (num[len - 1] * tens);
@@ -100,8 +112,18 @@ cos_parse_number(struct cos_stream *stream,
return NSPDFERROR_NOMEM;
}
- cosobj->type = COS_TYPE_INT;
- cosobj->u.i = result;
+ if (real) {
+ unsigned int div = 1;
+ for (; point > 0;point--) {
+ div = div * 10;
+ }
+ cosobj->type = COS_TYPE_REAL;
+ cosobj->u.real = (float)result / div;
+ printf("real %d %f\n", result, cosobj->u.real);
+ } else {
+ cosobj->type = COS_TYPE_INT;
+ cosobj->u.i = result;
+ }
*cosobj_out = cosobj;
@@ -122,10 +144,10 @@ cos_parse_number(struct cos_stream *stream,
*/
static nspdferror
cos_parse_string(struct cos_stream *stream,
- uint64_t *offset_out,
+ strmoff_t *offset_out,
struct cos_object **cosobj_out)
{
- uint64_t offset;
+ strmoff_t offset;
struct cos_object *cosobj;
uint8_t c;
unsigned int pdepth = 1; /* depth of open parens */
@@ -251,10 +273,10 @@ cos_parse_string(struct cos_stream *stream,
*/
static nspdferror
cos_parse_hex_string(struct cos_stream *stream,
- uint64_t *offset_out,
+ strmoff_t *offset_out,
struct cos_object **cosobj_out)
{
- uint64_t offset;
+ strmoff_t offset;
struct cos_object *cosobj;
uint8_t c;
uint8_t value = 0;
@@ -315,15 +337,15 @@ cos_parse_hex_string(struct cos_stream *stream,
static nspdferror
cos_parse_dictionary(struct nspdf_doc *doc,
struct cos_stream *stream,
- uint64_t *offset_out,
+ strmoff_t *offset_out,
struct cos_object **cosobj_out)
{
- uint64_t offset;
+ nspdferror res;
+ strmoff_t offset;
struct cos_object *cosobj;
struct cos_dictionary_entry *entry;
struct cos_object *key;
struct cos_object *value;
- int res;
offset = *offset_out;
@@ -404,10 +426,10 @@ cos_parse_dictionary_error:
static nspdferror
cos_parse_list(struct nspdf_doc *doc,
struct cos_stream *stream,
- uint64_t *offset_out,
+ strmoff_t *offset_out,
struct cos_object **cosobj_out)
{
- uint64_t offset;
+ strmoff_t offset;
struct cos_object *cosobj;
struct cos_array *array;
struct cos_object *value;
@@ -485,10 +507,10 @@ cos_parse_list(struct nspdf_doc *doc,
*/
static nspdferror
cos_parse_name(struct cos_stream *stream,
- uint64_t *offset_out,
+ strmoff_t *offset_out,
struct cos_object **cosobj_out)
{
- uint64_t offset;
+ strmoff_t offset;
struct cos_object *cosobj;
uint8_t c;
char name[NAME_MAX_LENGTH + 1];
@@ -543,10 +565,10 @@ cos_parse_name(struct cos_stream *stream,
*/
static nspdferror
cos_parse_boolean(struct cos_stream *stream,
- uint64_t *offset_out,
+ strmoff_t *offset_out,
struct cos_object **cosobj_out)
{
- uint64_t offset;
+ strmoff_t offset;
struct cos_object *cosobj;
uint8_t c;
bool value;
@@ -625,10 +647,10 @@ cos_parse_boolean(struct cos_stream *stream,
*/
static nspdferror
cos_parse_null(struct cos_stream *stream,
- uint64_t *offset_out,
+ strmoff_t *offset_out,
struct cos_object **cosobj_out)
{
- uint64_t offset;
+ strmoff_t offset;
struct cos_object *cosobj;
uint8_t c;
@@ -676,13 +698,13 @@ cos_parse_null(struct cos_stream *stream,
static nspdferror
cos_parse_stream(struct nspdf_doc *doc,
struct cos_stream *stream_in,
- uint64_t *offset_out,
+ strmoff_t *offset_out,
struct cos_object **cosobj_out)
{
struct cos_object *cosobj;
nspdferror res;
struct cos_object *stream_dict;
- uint64_t offset;
+ strmoff_t offset;
struct cos_object *stream_filter;
struct cos_stream *stream;
int64_t stream_length;
@@ -811,11 +833,11 @@ cos_parse_stream(struct nspdf_doc *doc,
static nspdferror
cos_attempt_parse_reference(struct nspdf_doc *doc,
struct cos_stream *stream,
- uint64_t *offset_out,
+ strmoff_t *offset_out,
struct cos_object **cosobj_out)
{
nspdferror res;
- uint64_t offset;
+ strmoff_t offset;
uint8_t c;
struct cos_object *generation; /* generation object */
@@ -992,10 +1014,10 @@ cos_attempt_parse_reference(struct nspdf_doc *doc,
nspdferror
cos_parse_object(struct nspdf_doc *doc,
struct cos_stream *stream,
- uint64_t *offset_out,
+ strmoff_t *offset_out,
struct cos_object **cosobj_out)
{
- uint64_t offset;
+ strmoff_t offset;
nspdferror res;
struct cos_object *cosobj;
@@ -1008,34 +1030,24 @@ cos_parse_object(struct nspdf_doc *doc,
/* object could be any type use first char to try and select */
switch (stream_byte(stream, offset)) {
- case '-':
- case '+':
- case '.':
- case '0':
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- case '8':
- case '9':
+ case '-': case '+': case '.': case '0': case '1': case '2': case '3':
+ case '4': case '5': case '6': case '7': case '8': case '9':
res = cos_parse_number(stream, &offset, &cosobj);
/* if type is positive integer try to check for reference */
- if ((res == 0) &&
+ if ((res == NSPDFERROR_OK) &&
(cosobj->type == COS_TYPE_INT) &&
(cosobj->u.i > 0)) {
res = cos_attempt_parse_reference(doc, stream, &offset, &cosobj);
}
break;
- case '<':
- if (stream_byte(stream, offset + 1) == '<') {
- res = cos_parse_dictionary(doc, stream, &offset, &cosobj);
- } else {
- res = cos_parse_hex_string(stream, &offset, &cosobj);
- }
+ case 't':
+ case 'f':
+ res = cos_parse_boolean(stream, &offset, &cosobj);
+ break;
+
+ case 'n':
+ res = cos_parse_null(stream, &offset, &cosobj);
break;
case '(':
@@ -1046,46 +1058,392 @@ cos_parse_object(struct nspdf_doc *doc,
res = cos_parse_name(stream, &offset, &cosobj);
break;
+ case '<':
+ if (stream_byte(stream, offset + 1) == '<') {
+ res = cos_parse_dictionary(doc, stream, &offset, &cosobj);
+ } else {
+ res = cos_parse_hex_string(stream, &offset, &cosobj);
+ }
+ break;
+
case '[':
res = cos_parse_list(doc, stream, &offset, &cosobj);
break;
- case 't':
- case 'T':
+ default:
+ res = NSPDFERROR_SYNTAX; /* syntax error */
+ }
+
+ if (res == NSPDFERROR_OK) {
+ *cosobj_out = cosobj;
+ *offset_out = offset;
+ }
+
+ return res;
+}
+
+
+static nspdferror
+parse_operator(struct cos_stream *stream,
+ strmoff_t *offset_out,
+ enum content_operator *operator_out)
+{
+ nspdferror res;
+ strmoff_t offset;
+ enum content_operator operator;
+ uint8_t c;
+
+ offset = *offset_out;
+
+ switch (stream_byte(stream, offset++)) {
+ case 'b':
+ //CONTENT_OP_b
+ //CONTENT_OP_b_
+ break;
+
+ case 'B':
+ operator = CONTENT_OP_B;
+ c = stream_byte(stream, offset);
+ if ((bclass[c] & (BC_WSPC | BC_CMNT) ) == 0) {
+ switch (c) {
+ case '*':
+ operator = CONTENT_OP_B_;
+ offset++;
+ break;
+
+ case 'I':
+ operator = CONTENT_OP_BI;
+ offset++;
+ break;
+
+ case 'T':
+ operator = CONTENT_OP_BT;
+ offset++;
+ break;
+
+ case 'X':
+ operator = CONTENT_OP_BX;
+ offset++;
+ break;
+
+ case 'M':
+ if (stream_byte(stream, offset + 1) == 'C') {
+ operator = CONTENT_OP_BMC;
+ offset+=2;
+ }
+ break;
+
+ case 'D':
+ if (stream_byte(stream, offset + 1) == 'C') {
+ operator = CONTENT_OP_BDC;
+ offset+=2;
+ }
+ break;
+
+ default:
+ goto parse_operator_nomatch;
+ }
+ c = stream_byte(stream, offset);
+ }
+ break;
+
+ case 'c':
+ //CONTENT_OP_c
+ //CONTENT_OP_cm
+ //CONTENT_OP_cs
+ break;
+
+ case 'C':
+ //CONTENT_OP_CS
+ break;
+
+ case 'd':
+ //CONTENT_OP_d
+ //CONTENT_OP_d0
+ //CONTENT_OP_d1
+ break;
+
+ case 'D':
+ //CONTENT_OP_Do
+ //CONTENT_OP_DP
+ break;
+
+ case 'E':
+ //CONTENT_OP_EI
+ //CONTENT_OP_EMC
+ //CONTENT_OP_ET
+ //CONTENT_OP_EX
+ break;
+
case 'f':
+ //CONTENT_OP_f
+ //CONTENT_OP_f_
+ break;
+
case 'F':
- res = cos_parse_boolean(stream, &offset, &cosobj);
+ //CONTENT_OP_F
+ break;
+
+ case 'G':
+ //CONTENT_OP_G
+ break;
+
+ case 'g':
+ operator = CONTENT_OP_g;
+ c = stream_byte(stream, offset);
+ if (((bclass[c] & (BC_WSPC | BC_CMNT) ) == 0) && (c == 's')) {
+ operator = CONTENT_OP_gs;
+ offset++;
+ }
+ c = stream_byte(stream, offset);
+ break;
+
+ case 'h':
+ //CONTENT_OP_h
+ break;
+
+ case 'i':
+ //CONTENT_OP_i
+ break;
+
+ case 'I':
+ //CONTENT_OP_ID
+ break;
+
+ case 'j':
+ //CONTENT_OP_j
+ break;
+
+ case 'J':
+ //CONTENT_OP_J
+ break;
+
+ case 'K':
+ operator = CONTENT_OP_K;
+ c = stream_byte(stream, offset);
+ break;
+
+ case 'k':
+ operator = CONTENT_OP_k;
+ c = stream_byte(stream, offset);
+ break;
+
+ case 'l':
+ operator = CONTENT_OP_l;
+ c = stream_byte(stream, offset);
+ break;
+
+ case 'm':
+ break;
+
+ case 'M':
break;
case 'n':
- case 'N':
- res = cos_parse_null(stream, &offset, &cosobj);
+ break;
+
+ case 'q':
+ break;
+
+ case 'Q':
+ break;
+
+ case 'r':
+ break;
+
+ case 'R':
+ break;
+
+ case 's':
+ break;
+
+ case 'S':
+ break;
+
+ case 'T':
+ switch (stream_byte(stream, offset++)) {
+ case '*':
+ operator = CONTENT_OP_T_;
+ break;
+
+ case 'c':
+ operator = CONTENT_OP_Tc;
+ break;
+
+ case 'd':
+ operator = CONTENT_OP_Td;
+ break;
+
+ case 'D':
+ operator = CONTENT_OP_TD;
+ break;
+
+ case 'f':
+ operator = CONTENT_OP_Tf;
+ break;
+
+ case 'j':
+ operator = CONTENT_OP_Tj;
+ break;
+
+ case 'J':
+ operator = CONTENT_OP_TJ;
+ break;
+
+ case 'L':
+ operator = CONTENT_OP_TL;
+ break;
+
+ case 'm':
+ operator = CONTENT_OP_Tm;
+ break;
+
+ case 'r':
+ operator = CONTENT_OP_Tr;
+ break;
+
+ case 's':
+ operator = CONTENT_OP_Ts;
+ break;
+
+ case 'w':
+ operator = CONTENT_OP_Tw;
+ break;
+
+ case 'z':
+ operator = CONTENT_OP_Tz;
+ break;
+
+ default:
+ goto parse_operator_nomatch;
+ }
+
+ c = stream_byte(stream, offset);
+ break;
+
+ case 'v':
+ break;
+
+ case 'w':
+ break;
+
+ case 'W':
+ break;
+
+ case 'Y':
+ break;
+
+ case '\'':
+ break;
+
+ case '"':
break;
default:
- res = NSPDFERROR_SYNTAX; /* syntax error */
+ goto parse_operator_nomatch;
}
- if (res == NSPDFERROR_OK) {
- *cosobj_out = cosobj;
- *offset_out = offset;
+ /* matched prefix must be followed by a space */
+ if ((bclass[c] & (BC_WSPC | BC_CMNT) ) != 0) {
+ res = nspdf__stream_skip_ws(stream, &offset);
+ if (res == NSPDFERROR_OK) {
+ *operator_out = operator;
+ *offset_out = offset;
+ }
+ return res;
}
- return res;
+parse_operator_nomatch:
+ return NSPDFERROR_SYNTAX;
}
+#define MAX_OPERAND_COUNT 32
static nspdferror
parse_content_operation(struct nspdf_doc *doc,
struct cos_stream *stream,
- unsigned int *offset_out,
+ strmoff_t *offset_out,
struct content_operation *operation_out)
{
- unsigned int offset;
+ strmoff_t offset;
+ nspdferror res;
+ enum content_operator operator;
+ struct cos_object *operands[MAX_OPERAND_COUNT];
+ unsigned int operand_idx = 0;
offset = *offset_out;
- offset+=stream->length;
+ res = parse_operator(stream, &offset, &operator);
+ while (res == NSPDFERROR_SYNTAX) {
+ /* was not an operator so check for what else it could have been */
+ if (operand_idx >= MAX_OPERAND_COUNT) {
+ /** \todo free any stacked operands */
+ printf("too many operands\n");
+ return NSPDFERROR_SYNTAX;
+ }
+
+ switch (stream_byte(stream, offset)) {
+
+ case '-': case '+': case '.': case '0': case '1': case '2': case '3':
+ case '4': case '5': case '6': case '7': case '8': case '9':
+ res = cos_parse_number(stream, &offset, &operands[operand_idx]);
+ break;
+
+ case 't':
+ case 'f':
+ res = cos_parse_boolean(stream, &offset, &operands[operand_idx]);
+ break;
+
+ case 'n':
+ res = cos_parse_null(stream, &offset, &operands[operand_idx]);
+ break;
+
+ case '(':
+ res = cos_parse_string(stream, &offset, &operands[operand_idx]);
+ break;
+
+ case '/':
+ res = cos_parse_name(stream, &offset, &operands[operand_idx]);
+ break;
+
+ case '[':
+ res = cos_parse_list(doc, stream, &offset, &operands[operand_idx]);
+ break;
+
+ case '<':
+ if (stream_byte(stream, offset + 1) == '<') {
+ res = cos_parse_dictionary(doc,
+ stream,
+ &offset,
+ &operands[operand_idx]);
+ } else {
+ res = cos_parse_hex_string(stream,
+ &offset,
+ &operands[operand_idx]);
+ }
+ break;
+
+ default:
+ printf("unknown operand type\n");
+ res = NSPDFERROR_SYNTAX; /* syntax error */
+ }
+
+ if (res != NSPDFERROR_OK) {
+ /* parse error */
+ /** \todo free any stacked operands */
+ printf("operand parse failed at %c\n",
+ stream_byte(stream, offset));
+ return res;
+ }
+
+ /* move to next operand */
+ operand_idx++;
+
+ res = parse_operator(stream, &offset, &operator);
+ }
+
+ operation_out->operator = operator;
+ printf("returning operator %d with %d operands\n", operator, operand_idx);
*offset_out = offset;
return NSPDFERROR_OK;
@@ -1098,9 +1456,9 @@ cos_parse_content_stream(struct nspdf_doc *doc,
{
nspdferror res;
struct cos_object *cosobj;
- unsigned int offset;
+ strmoff_t offset;
- //printf("%.*s", (int)stream->length, stream->data);
+ printf("%.*s", (int)stream->length, stream->data);
cosobj = calloc(1, sizeof(struct cos_object));
if (cosobj == NULL) {
diff --git a/src/cos_parse.h b/src/cos_parse.h
index e7f1ce0..a9cb9c9 100644
--- a/src/cos_parse.h
+++ b/src/cos_parse.h
@@ -15,16 +15,17 @@
#ifndef NSPDF__COS_PARSE_H_
#define NSPDF__COS_PARSE_H_
+#include "cos_stream.h"
+
struct nspdf_doc;
struct cos_object;
-struct cos_stream;
/**
* Parse input stream into an object
*
* lex and parse a byte stream to generate a COS object.
*/
-nspdferror cos_parse_object(struct nspdf_doc *doc, struct cos_stream *stream, uint64_t *offset_out, struct cos_object **cosobj_out);
+nspdferror cos_parse_object(struct nspdf_doc *doc, struct cos_stream *stream, strmoff_t *offset_out, struct cos_object **cosobj_out);
/**
* Parse content stream into content operations object
diff --git a/src/cos_stream.h b/src/cos_stream.h
new file mode 100644
index 0000000..0a4992c
--- /dev/null
+++ b/src/cos_stream.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright 2018 Vincent Sanders <vince(a)netsurf-browser.org>
+ *
+ * This file is part of libnspdf.
+ *
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ */
+
+/**
+ * \file
+ * NetSurf PDF library COS stream
+ */
+
+#ifndef NSPDF__COS_STREAM_H_
+#define NSPDF__COS_STREAM_H_
+
+/* stream offset type */
+typedef unsigned int strmoff_t;
+
+/**
+ * stream of data.
+ */
+struct cos_stream {
+ strmoff_t length; /**< decoded stream length */
+ size_t alloc; /**< memory allocated for stream */
+ const uint8_t *data; /**< decoded stream data */
+};
+
+static inline uint8_t
+stream_byte(struct cos_stream *stream, strmoff_t offset)
+{
+ return *(stream->data + offset);
+}
+
+#endif
diff --git a/src/document.c b/src/document.c
index 36d4c63..dcf8395 100644
--- a/src/document.c
+++ b/src/document.c
@@ -19,10 +19,14 @@
#include "cos_parse.h"
#include "byte_class.h"
#include "cos_object.h"
+#include "xref.h"
#include "pdf_doc.h"
#define SLEN(x) (sizeof((x)) - 1)
+/* byte data acessory, allows for more complex buffer handling in future */
+#define DOC_BYTE(doc, offset) (doc->start[(offset)])
+
#define STARTXREF_TOK "startxref"
/* Number of bytes to search back from file end to find xref start token,
@@ -34,10 +38,11 @@
/**
* finds the startxref marker at the end of input
*/
-static nspdferror find_startxref(struct nspdf_doc *doc, uint64_t *offset_out)
+static nspdferror
+find_startxref(struct nspdf_doc *doc, strmoff_t *offset_out)
{
- uint64_t offset; /* offset of characters being considered for startxref */
- uint64_t earliest; /* earliest offset to serch for startxref */
+ strmoff_t offset; /* offset of characters being considered for startxref */
+ unsigned int earliest; /* earliest offset to serch for startxref */
offset = doc->length - SLEN(STARTXREF_TOK);
@@ -70,10 +75,10 @@ static nspdferror find_startxref(struct nspdf_doc *doc, uint64_t *offset_out)
*/
static nspdferror
decode_startxref(struct nspdf_doc *doc,
- uint64_t *offset_out,
- uint64_t *start_xref_out)
+ strmoff_t *offset_out,
+ unsigned int *start_xref_out)
{
- uint64_t offset; /* offset of characters being considered for startxref */
+ strmoff_t offset; /* offset of characters being considered for startxref */
uint64_t start_xref;
nspdferror res;
@@ -97,12 +102,12 @@ decode_startxref(struct nspdf_doc *doc,
return res;
}
- res = doc_read_uint(doc, &offset, &start_xref);
+ res = nspdf__stream_read_uint(doc->stream, &offset, &start_xref);
if (res != NSPDFERROR_OK) {
return res;
}
- res = doc_skip_eol(doc, &offset);
+ res = nspdf__stream_skip_eol(doc->stream, &offset);
if (res != NSPDFERROR_OK) {
return res;
}
@@ -126,9 +131,9 @@ decode_startxref(struct nspdf_doc *doc,
/**
* finds the next trailer
*/
-static nspdferror find_trailer(struct nspdf_doc *doc, uint64_t *offset_out)
+static nspdferror find_trailer(struct nspdf_doc *doc, strmoff_t *offset_out)
{
- uint64_t offset; /* offset of characters being considered for trailer */
+ strmoff_t offset; /* offset of characters being considered for trailer */
for (offset = *offset_out;offset < doc->length; offset++) {
if ((DOC_BYTE(doc, offset ) == 't') &&
@@ -148,12 +153,12 @@ static nspdferror find_trailer(struct nspdf_doc *doc, uint64_t *offset_out)
static nspdferror
decode_trailer(struct nspdf_doc *doc,
- uint64_t *offset_out,
+ strmoff_t *offset_out,
struct cos_object **trailer_out)
{
struct cos_object *trailer;
int res;
- uint64_t offset;
+ strmoff_t offset;
offset = *offset_out;
@@ -193,11 +198,11 @@ decode_trailer(struct nspdf_doc *doc,
* recursively parse trailers and xref tables
*/
static nspdferror
-decode_xref_trailer(struct nspdf_doc *doc, uint64_t xref_offset)
+decode_xref_trailer(struct nspdf_doc *doc, unsigned int xref_offset)
{
nspdferror res;
- uint64_t offset; /* the current data offset */
- uint64_t startxref; /* the value of the startxref field */
+ strmoff_t offset; /* the current data offset */
+ unsigned int startxref; /* the value of the startxref field */
struct cos_object *trailer; /* the current trailer */
int64_t prev;
@@ -275,7 +280,7 @@ decode_xref_trailer(struct nspdf_doc *doc, uint64_t xref_offset)
offset = xref_offset;
/** @todo deal with XrefStm (number) in trailer */
- res = nspdf__xref_parse(doc, &offset);
+ res = nspdf__xref_parse(doc, doc->stream, &offset);
if (res != NSPDFERROR_OK) {
printf("failed to decode xref table\n");
goto decode_xref_trailer_failed;
@@ -313,8 +318,8 @@ decode_xref_trailer_failed:
static nspdferror decode_trailers(struct nspdf_doc *doc)
{
nspdferror res;
- uint64_t offset; /* the current data offset */
- uint64_t startxref; /* the value of the first startxref field */
+ strmoff_t offset; /* the current data offset */
+ unsigned int startxref; /* the value of the first startxref field */
res = find_startxref(doc, &offset);
if (res != NSPDFERROR_OK) {
diff --git a/src/pdf_doc.c b/src/pdf_doc.c
index 955f737..d7c7a0e 100644
--- a/src/pdf_doc.c
+++ b/src/pdf_doc.c
@@ -19,7 +19,8 @@
#include "cos_object.h"
#include "pdf_doc.h"
-nspdferror nspdf__stream_skip_ws(struct cos_stream *stream, uint64_t *offset)
+nspdferror
+nspdf__stream_skip_ws(struct cos_stream *stream, strmoff_t *offset)
{
uint8_t c;
/* TODO sort out keeping offset in range */
@@ -43,35 +44,36 @@ nspdferror nspdf__stream_skip_ws(struct cos_stream *stream, uint64_t *offset)
/**
* move offset to next non eol byte
*/
-nspdferror doc_skip_eol(struct nspdf_doc *doc, uint64_t *offset)
+nspdferror
+nspdf__stream_skip_eol(struct cos_stream *stream, strmoff_t *offset)
{
uint8_t c;
- /* TODO sort out keeping offset in range */
- c = DOC_BYTE(doc, *offset);
+ /** \todo sort out keeping offset in range */
+ c = stream_byte(stream, *offset);
while ((bclass[c] & BC_EOLM) != 0) {
(*offset)++;
- c = DOC_BYTE(doc, *offset);
+ c = stream_byte(stream, *offset);
}
return NSPDFERROR_OK;
}
nspdferror
-doc_read_uint(struct nspdf_doc *doc,
- uint64_t *offset_out,
- uint64_t *result_out)
+nspdf__stream_read_uint(struct cos_stream *stream,
+ strmoff_t *offset_out,
+ uint64_t *result_out)
{
uint8_t c; /* current byte from source data */
+ strmoff_t offset; /* current offset of source data */
unsigned int len; /* number of decimal places in number */
uint8_t num[21]; /* temporary buffer for decimal values */
- uint64_t offset; /* current offset of source data */
uint64_t result=0; /* parsed result */
uint64_t tens;
offset = *offset_out;
for (len = 0; len < sizeof(num); len++) {
- c = DOC_BYTE(doc, offset);
+ c = stream_byte(stream, offset);
if ((bclass[c] & BC_DCML) != BC_DCML) {
if (len == 0) {
return -2; /* parse error no decimals in input */
@@ -89,5 +91,5 @@ doc_read_uint(struct nspdf_doc *doc,
num[len] = c - '0';
offset++;
}
- return -1; /* number too long */
+ return NSPDFERROR_RANGE; /* number too long */
}
diff --git a/src/pdf_doc.h b/src/pdf_doc.h
index 27a730a..4853170 100644
--- a/src/pdf_doc.h
+++ b/src/pdf_doc.h
@@ -15,6 +15,8 @@
#ifndef NSPDF__PDF_DOC_H_
#define NSPDF__PDF_DOC_H_
+#include "cos_stream.h"
+
struct xref_table_entry;
struct page_table_entry;
@@ -50,33 +52,11 @@ struct nspdf_doc {
struct page_table_entry *page_table;
};
-/* byte data acessory, allows for more complex buffer handling in future */
-#define DOC_BYTE(doc, offset) (doc->start[(offset)])
-
-static inline uint8_t
-stream_byte(struct cos_stream *stream, unsigned int offset)
-{
- return *(stream->data + offset);
-}
-
/* helpers in pdf_doc.c */
-nspdferror nspdf__stream_skip_ws(struct cos_stream *stream, uint64_t *offset);
-nspdferror doc_skip_eol(struct nspdf_doc *doc, uint64_t *offset);
-nspdferror doc_read_uint(struct nspdf_doc *doc, uint64_t *offset_out, uint64_t *result_out);
-
-/* cross reference table handlers */
-/**
- * parse xref from file
- */
-nspdferror nspdf__xref_parse(struct nspdf_doc *doc, uint64_t *offset_out);
-
-
-/**
- * get an object dereferencing through xref table if necessary
- */
-nspdferror nspdf__xref_get_referenced(struct nspdf_doc *doc, struct cos_object **cobj_out);
+nspdferror nspdf__stream_skip_ws(struct cos_stream *stream, strmoff_t *offset);
+nspdferror nspdf__stream_skip_eol(struct cos_stream *stream, strmoff_t *offset);
+nspdferror nspdf__stream_read_uint(struct cos_stream *stream, strmoff_t *offset_out, uint64_t *result_out);
-nspdferror nspdf__xref_allocate(struct nspdf_doc *doc, int64_t size);
nspdferror nspdf__decode_page_tree(struct nspdf_doc *doc, struct cos_object *page_tree_node, unsigned int *page_index);
diff --git a/src/xref.c b/src/xref.c
index cdd4088..7780bf2 100644
--- a/src/xref.c
+++ b/src/xref.c
@@ -16,6 +16,7 @@
#include "cos_parse.h"
#include "cos_object.h"
#include "pdf_doc.h"
+#include "xref.h"
/** indirect object */
@@ -24,7 +25,7 @@ struct xref_table_entry {
struct cos_reference ref;
/** offset of object */
- uint64_t offset;
+ strmoff_t offset;
/* indirect object if already decoded */
struct cos_object *object;
@@ -50,9 +51,12 @@ nspdferror nspdf__xref_allocate(struct nspdf_doc *doc, int64_t size)
return NSPDFERROR_OK;
}
-nspdferror nspdf__xref_parse(struct nspdf_doc *doc, uint64_t *offset_out)
+nspdferror
+nspdf__xref_parse(struct nspdf_doc *doc,
+ struct cos_stream *stream,
+ strmoff_t *offset_out)
{
- uint64_t offset;
+ strmoff_t offset;
nspdferror res;
uint64_t objnumber; /* current object number */
uint64_t objcount;
@@ -60,15 +64,15 @@ nspdferror nspdf__xref_parse(struct nspdf_doc *doc, uint64_t *offset_out)
offset = *offset_out;
/* xref object header */
- if ((DOC_BYTE(doc, offset ) != 'x') &&
- (DOC_BYTE(doc, offset + 1) != 'r') &&
- (DOC_BYTE(doc, offset + 2) != 'e') &&
- (DOC_BYTE(doc, offset + 3) != 'f')) {
+ if ((stream_byte(stream, offset ) != 'x') ||
+ (stream_byte(stream, offset + 1) != 'r') ||
+ (stream_byte(stream, offset + 2) != 'e') ||
+ (stream_byte(stream, offset + 3) != 'f')) {
return NSPDFERROR_SYNTAX;
}
offset += 4;
- res = nspdf__stream_skip_ws(doc->stream, &offset);
+ res = nspdf__stream_skip_ws(stream, &offset);
if (res != NSPDFERROR_OK) {
return res;
}
@@ -76,20 +80,20 @@ nspdferror nspdf__xref_parse(struct nspdf_doc *doc, uint64_t *offset_out)
/* subsections
* <first object number> <number of references in subsection>
*/
- res = doc_read_uint(doc, &offset, &objnumber);
+ res = nspdf__stream_read_uint(stream, &offset, &objnumber);
while (res == NSPDFERROR_OK) {
uint64_t lastobj;
- res = nspdf__stream_skip_ws(doc->stream, &offset);
+ res = nspdf__stream_skip_ws(stream, &offset);
if (res != NSPDFERROR_OK) {
return res;
}
- res = doc_read_uint(doc, &offset, &objcount);
+ res = nspdf__stream_read_uint(stream, &offset, &objcount);
if (res != NSPDFERROR_OK) {
return res;
}
- res = nspdf__stream_skip_ws(doc->stream, &offset);
+ res = nspdf__stream_skip_ws(stream, &offset);
if (res != NSPDFERROR_OK) {
return res;
}
@@ -103,19 +107,19 @@ nspdferror nspdf__xref_parse(struct nspdf_doc *doc, uint64_t *offset_out)
uint64_t objgeneration;
/* object index */
- res = doc_read_uint(doc, &offset, &objindex);
+ res = nspdf__stream_read_uint(stream, &offset, &objindex);
if (res != NSPDFERROR_OK) {
return res;
}
offset++; /* skip space */
- res = doc_read_uint(doc, &offset, &objgeneration);
+ res = nspdf__stream_read_uint(stream, &offset, &objgeneration);
if (res != NSPDFERROR_OK) {
return res;
}
offset++; /* skip space */
- if ((DOC_BYTE(doc, offset++) == 'n')) {
+ if ((stream_byte(stream, offset++) == 'n')) {
if (objnumber < doc->xref_table_size) {
struct xref_table_entry *indobj;
indobj = doc->xref_table + objnumber;
@@ -133,7 +137,7 @@ nspdferror nspdf__xref_parse(struct nspdf_doc *doc, uint64_t *offset_out)
offset += 2; /* skip EOL */
}
- res = doc_read_uint(doc, &offset, &objnumber);
+ res = nspdf__stream_read_uint(stream, &offset, &objnumber);
}
return NSPDFERROR_OK;
@@ -146,7 +150,7 @@ nspdf__xref_get_referenced(struct nspdf_doc *doc, struct cos_object **cobj_out)
nspdferror res;
struct cos_object *cobj;
struct cos_object *indirect;
- uint64_t offset;
+ strmoff_t offset;
struct xref_table_entry *entry;
cobj = *cobj_out;
diff --git a/src/xref.h b/src/xref.h
new file mode 100644
index 0000000..e53f2b2
--- /dev/null
+++ b/src/xref.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright 2018 Vincent Sanders <vince(a)netsurf-browser.org>
+ *
+ * This file is part of libnspdf.
+ *
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ */
+
+/**
+ * \file
+ * NetSurf PDF library cross reference table handling
+ */
+
+#ifndef NSPDF__XREF_H_
+#define NSPDF__XREF_H_
+
+#include "cos_stream.h"
+
+struct nspdf_doc;
+struct cos_object;
+
+/**
+ * parse xref from file
+ */
+nspdferror nspdf__xref_parse(struct nspdf_doc *doc, struct cos_stream *stream, strmoff_t *offset_out);
+
+
+/**
+ * get an object dereferencing through xref table if necessary
+ */
+nspdferror nspdf__xref_get_referenced(struct nspdf_doc *doc, struct cos_object **cobj_out);
+
+/**
+ * allocate storage for cross reference table
+ */
+nspdferror nspdf__xref_allocate(struct nspdf_doc *doc, int64_t size);
+
+#endif
commitdiff http://git.netsurf-browser.org/libnspdf.git/commit/?id=a686f1b43b9474376d...
commit a686f1b43b9474376d6bf52352d2b82b4e618769
Author: Vincent Sanders <vince(a)kyllikki.org>
Commit: Vincent Sanders <vince(a)kyllikki.org>
update cos object parsing to take a stream to parse from
diff --git a/src/content.h b/src/content.h
new file mode 100644
index 0000000..11f4c3d
--- /dev/null
+++ b/src/content.h
@@ -0,0 +1,102 @@
+/*
+ * Copyright 2018 Vincent Sanders <vince(a)netsurf-browser.org>
+ *
+ * This file is part of libnspdf.
+ *
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ */
+
+/**
+ * \file
+ * NetSurf PDF library parsed content stream
+ */
+
+#ifndef NSPDF__CONTENT_H_
+#define NSPDF__CONTENT_H_
+
+enum content_operator {
+ CONTENT_OP_b, /* b - close, fill and stroke path with nonzero winding
+ * rule. */
+ CONTENT_OP_B, /* B - fill and stroke path using nonzero winding rule */
+ CONTENT_OP_b_, /* b* - close, fill and stroke path with even/odd rule */
+ CONTENT_OP_B_, /* B* - fill and stroke path with even/odd rule */
+ CONTENT_OP_BDC, /* BDC - begin marked content sequence with property list */
+ CONTENT_OP_BI, /* BI - begin inline image*/
+ CONTENT_OP_BMC, /* BMC - begin marked content sequence */
+ CONTENT_OP_BT, /* BT - begin text */
+ CONTENT_OP_BX, /* BX - begin compatability */
+ CONTENT_OP_c, /* c - append curved segment to path */
+ CONTENT_OP_cm, /* cm - concatinate matrix to current trasnsform matrix */
+ CONTENT_OP_CS, /* CS - set colour space for stroking operations */
+ CONTENT_OP_cs, /* cs - set colourspace for non stroke operations */
+ CONTENT_OP_d, /* d - set line dash pattern */
+ CONTENT_OP_d0, /* d0 - set glyph width in type 3 font */
+ CONTENT_OP_d1, /* d1 - set glyph width and bounding box in type 3 font */
+ CONTENT_OP_Do, /* Do - invoke named xobject */
+ CONTENT_OP_DP, /* DP - define marked content point with property list */
+ CONTENT_OP_EI, /* EI - end of inline image */
+ CONTENT_OP_EMC, /* EMC - end marked content sequence */
+ CONTENT_OP_ET, /* ET - end text object */
+ CONTENT_OP_EX, /* EX - end compatability section */
+ CONTENT_OP_f, /* f - fill path using nonzero winding rule */
+ CONTENT_OP_F, /* F - fill path using nonzero winding rule */
+ CONTENT_OP_f_, /* f* - fill path with even/odd rule */
+ CONTENT_OP_G, /* G - set gray level for stroking operations */
+ CONTENT_OP_g, /* g - set gray level for nonstroking operations */
+ CONTENT_OP_gs, /* gs - set parameters from graphics state directory */
+ CONTENT_OP_h, /* h - close subpath */
+ CONTENT_OP_i, /* i - set flatness tolerance */
+ CONTENT_OP_ID, /* ID - begin inline image data */
+ CONTENT_OP_j, /* j - set join style */
+ CONTENT_OP_J, /* J - */
+ CONTENT_OP_K, /* K - */
+ CONTENT_OP_k, /* k - */
+ CONTENT_OP_l, /* l - */
+ CONTENT_OP_m, /* m - */
+ CONTENT_OP_M, /* M - */
+ CONTENT_OP_MP, /* MP - */
+ CONTENT_OP_n, /* n - */
+ CONTENT_OP_q, /* q - */
+ CONTENT_OP_Q, /* Q - */
+ CONTENT_OP_re, /* re - */
+ CONTENT_OP_RG, /* RG - */
+ CONTENT_OP_rg, /* rg - */
+ CONTENT_OP_ri, /* ri - */
+ CONTENT_OP_s, /* s - */
+ CONTENT_OP_S, /* S - */
+ CONTENT_OP_SC, /* SC - */
+ CONTENT_OP_sc, /* sc - */
+ CONTENT_OP_SCN, /* SCN - */
+ CONTENT_OP_scn, /* scn - */
+ CONTENT_OP_sh, /* sh - */
+ CONTENT_OP_T_, /* T* - */
+ CONTENT_OP_Tc, /* Tc - */
+ CONTENT_OP_Td, /* Td - */
+ CONTENT_OP_TD, /* TD - */
+ CONTENT_OP_Tf, /* Tf - */
+ CONTENT_OP_Tj, /* Tj - */
+ CONTENT_OP_TJ, /* TJ - */
+ CONTENT_OP_TL, /* TL - */
+ CONTENT_OP_Tm, /* Tm - */
+ CONTENT_OP_Tr, /* Tr - */
+ CONTENT_OP_Ts, /* Ts - */
+ CONTENT_OP_Tw, /* Tw - */
+ CONTENT_OP_Tz, /* Tz - */
+ CONTENT_OP_v, /* v - */
+ CONTENT_OP_w, /* w - */
+ CONTENT_OP_W, /* W - */
+ CONTENT_OP_W_, /* W* - */
+ CONTENT_OP_y, /* y - append curved segment to path */
+ CONTENT_OP__, /* ' - move to next line and show text */
+ CONTENT_OP___, /* " - set word and char spacing, move to next line and
+ * show text */
+};
+
+struct content_operation
+{
+ enum content_operator operator;
+
+};
+
+#endif
diff --git a/src/cos_parse.c b/src/cos_parse.c
index c196019..d0e50f5 100644
--- a/src/cos_parse.c
+++ b/src/cos_parse.c
@@ -19,6 +19,7 @@
#include "cos_parse.h"
#include "byte_class.h"
#include "cos_object.h"
+#include "content.h"
#include "pdf_doc.h"
/** increments in which cos string allocations are extended */
@@ -62,8 +63,8 @@ static uint8_t xtoi(uint8_t x)
*/
static nspdferror
cos_parse_number(struct cos_stream *stream,
- uint64_t *offset_out,
- struct cos_object **cosobj_out)
+ uint64_t *offset_out,
+ struct cos_object **cosobj_out)
{
nspdferror res;
struct cos_object *cosobj;
@@ -116,11 +117,11 @@ cos_parse_number(struct cos_stream *stream,
/**
- * decode literal string
+ * parse literal string
*
*/
static nspdferror
-cos_decode_string(struct nspdf_doc *doc,
+cos_parse_string(struct cos_stream *stream,
uint64_t *offset_out,
struct cos_object **cosobj_out)
{
@@ -132,7 +133,7 @@ cos_decode_string(struct nspdf_doc *doc,
offset = *offset_out;
- c = DOC_BYTE(doc, offset++);
+ c = stream_byte(stream, offset++);
if (c != '(') {
return NSPDFERROR_SYNTAX;
}
@@ -150,7 +151,7 @@ cos_decode_string(struct nspdf_doc *doc,
cosobj->u.s = cstring;
while (pdepth > 0) {
- c = DOC_BYTE(doc, offset++);
+ c = stream_byte(stream, offset++);
if (c == ')') {
pdepth--;
@@ -163,15 +164,15 @@ cos_decode_string(struct nspdf_doc *doc,
/* unescaped end of line characters are translated to a single
* newline
*/
- c = DOC_BYTE(doc, offset);
+ c = stream_byte(stream, offset);
while ((bclass[c] & BC_EOLM) != 0) {
offset++;
- c = DOC_BYTE(doc, offset);
+ c = stream_byte(stream, offset);
}
c = '\n';
} else if (c == '\\') {
/* escaped chars */
- c = DOC_BYTE(doc, offset++);
+ c = stream_byte(stream, offset++);
switch (c) {
case 'n':
c = '\n';
@@ -209,19 +210,19 @@ cos_decode_string(struct nspdf_doc *doc,
if ((bclass[c] & BC_EOLM) != 0) {
/* escaped end of line, swallow it */
- c = DOC_BYTE(doc, offset++);
+ c = stream_byte(stream, offset++);
while ((bclass[c] & BC_EOLM) != 0) {
- c = DOC_BYTE(doc, offset++);
+ c = stream_byte(stream, offset++);
}
} else if ((bclass[c] & BC_OCTL) != 0) {
/* octal value */
uint8_t val;
val = (c - '0');
- c = DOC_BYTE(doc, offset);
+ c = stream_byte(stream, offset);
if ((bclass[c] & BC_OCTL) != 0) {
offset++;
val = (val << 3) | (c - '0');
- c = DOC_BYTE(doc, offset);
+ c = stream_byte(stream, offset);
if ((bclass[c] & BC_OCTL) != 0) {
offset++;
val = (val << 3) | (c - '0');
@@ -237,7 +238,7 @@ cos_decode_string(struct nspdf_doc *doc,
cos_string_append(cstring, c);
}
- nspdf__stream_skip_ws(doc->stream, &offset);
+ nspdf__stream_skip_ws(stream, &offset);
*cosobj_out = cosobj;
*offset_out = offset;
@@ -249,9 +250,9 @@ cos_decode_string(struct nspdf_doc *doc,
* decode hex encoded string
*/
static nspdferror
-cos_decode_hex_string(struct nspdf_doc *doc,
- uint64_t *offset_out,
- struct cos_object **cosobj_out)
+cos_parse_hex_string(struct cos_stream *stream,
+ uint64_t *offset_out,
+ struct cos_object **cosobj_out)
{
uint64_t offset;
struct cos_object *cosobj;
@@ -262,7 +263,7 @@ cos_decode_hex_string(struct nspdf_doc *doc,
offset = *offset_out;
- c = DOC_BYTE(doc, offset++);
+ c = stream_byte(stream, offset++);
if (c != '<') {
return NSPDFERROR_SYNTAX;
}
@@ -279,14 +280,14 @@ cos_decode_hex_string(struct nspdf_doc *doc,
cosobj->type = COS_TYPE_STRING;
cosobj->u.s = cstring;
- for (; offset < doc->length; offset++) {
- c = DOC_BYTE(doc, offset);
+ for (; offset < stream->length; offset++) {
+ c = stream_byte(stream, offset);
if (c == '>') {
if (first == false) {
cos_string_append(cstring, value);
}
offset++;
- nspdf__stream_skip_ws(doc->stream, &offset);
+ nspdf__stream_skip_ws(stream, &offset);
*cosobj_out = cosobj;
*offset_out = offset;
@@ -309,12 +310,13 @@ cos_decode_hex_string(struct nspdf_doc *doc,
}
/**
- * decode a dictionary object
+ * parse a COS dictionary
*/
static nspdferror
-cos_decode_dictionary(struct nspdf_doc *doc,
- uint64_t *offset_out,
- struct cos_object **cosobj_out)
+cos_parse_dictionary(struct nspdf_doc *doc,
+ struct cos_stream *stream,
+ uint64_t *offset_out,
+ struct cos_object **cosobj_out)
{
uint64_t offset;
struct cos_object *cosobj;
@@ -325,12 +327,12 @@ cos_decode_dictionary(struct nspdf_doc *doc,
offset = *offset_out;
- if ((DOC_BYTE(doc, offset) != '<') ||
- (DOC_BYTE(doc, offset + 1) != '<')) {
+ if ((stream_byte(stream, offset ) != '<') ||
+ (stream_byte(stream, offset + 1) != '<')) {
return NSPDFERROR_SYNTAX; /* syntax error */
}
offset += 2;
- nspdf__stream_skip_ws(doc->stream, &offset);
+ nspdf__stream_skip_ws(stream, &offset);
//printf("found a dictionary\n");
@@ -340,33 +342,37 @@ cos_decode_dictionary(struct nspdf_doc *doc,
}
cosobj->type = COS_TYPE_DICTIONARY;
- while ((DOC_BYTE(doc, offset) != '>') &&
- (DOC_BYTE(doc, offset + 1) != '>')) {
+ while ((stream_byte(stream, offset ) != '>') &&
+ (stream_byte(stream, offset + 1) != '>')) {
- res = cos_parse_object(doc, &offset, &key);
+ res = cos_parse_object(doc, stream, &offset, &key);
if (res != NSPDFERROR_OK) {
- /* todo free up any dictionary entries already created */
printf("key object decode failed\n");
- return res;
+ goto cos_parse_dictionary_error;
}
if (key->type != COS_TYPE_NAME) {
/* key value pairs without a name */
printf("key was %d not a name %d\n", key->type, COS_TYPE_NAME);
- return NSPDFERROR_SYNTAX;
+
+ cos_free_object(key);
+ res = NSPDFERROR_SYNTAX;
+ goto cos_parse_dictionary_error;
}
- res = cos_parse_object(doc, &offset, &value);
+ res = cos_parse_object(doc, stream, &offset, &value);
if (res != NSPDFERROR_OK) {
printf("Unable to decode value object in dictionary\n");
- /* todo free up any dictionary entries already created */
- return res;
+ cos_free_object(key);
+ goto cos_parse_dictionary_error;
}
/* add dictionary entry */
entry = calloc(1, sizeof(struct cos_dictionary_entry));
if (entry == NULL) {
- /* todo free up any dictionary entries already created */
- return NSPDFERROR_NOMEM;
+ cos_free_object(key);
+ cos_free_object(value);
+ res = NSPDFERROR_NOMEM;
+ goto cos_parse_dictionary_error;
}
//printf("key:%s value(type):%d\n", key->u.n, value->type);
@@ -378,19 +384,26 @@ cos_decode_dictionary(struct nspdf_doc *doc,
}
offset += 2; /* skip closing >> */
- nspdf__stream_skip_ws(doc->stream, &offset);
+ nspdf__stream_skip_ws(stream, &offset);
*cosobj_out = cosobj;
*offset_out = offset;
return NSPDFERROR_OK;
+
+cos_parse_dictionary_error:
+ cos_free_object(cosobj);
+
+ return res;
}
+
/**
- * parse a list
+ * parse a COS list
*/
static nspdferror
cos_parse_list(struct nspdf_doc *doc,
+ struct cos_stream *stream,
uint64_t *offset_out,
struct cos_object **cosobj_out)
{
@@ -403,14 +416,13 @@ cos_parse_list(struct nspdf_doc *doc,
offset = *offset_out;
/* sanity check first token is list open */
- if (DOC_BYTE(doc, offset) != '[') {
- printf("not a [\n");
+ if (stream_byte(stream, offset++) != '[') {
+ printf("list does not start with a [\n");
return NSPDFERROR_SYNTAX;
}
- offset++;
/* advance offset to next token */
- res = nspdf__stream_skip_ws(doc->stream, &offset);
+ res = nspdf__stream_skip_ws(stream, &offset);
if (res != NSPDFERROR_OK) {
return res;
}
@@ -430,15 +442,16 @@ cos_parse_list(struct nspdf_doc *doc,
}
cosobj->u.array = array;
- while (DOC_BYTE(doc, offset) != ']') {
+ while (stream_byte(stream, offset) != ']') {
- res = cos_parse_object(doc, &offset, &value);
+ res = cos_parse_object(doc, stream, &offset, &value);
if (res != NSPDFERROR_OK) {
cos_free_object(cosobj);
printf("Unable to decode value object in list\n");
return res;
}
+ /* ensure there is enough space allocated for object pointers */
if (array->alloc < (array->length + 1)) {
struct cos_object **nvalues;
nvalues = realloc(array->values,
@@ -456,7 +469,7 @@ cos_parse_list(struct nspdf_doc *doc,
}
offset++; /* skip closing ] */
- nspdf__stream_skip_ws(doc->stream, &offset);
+ nspdf__stream_skip_ws(stream, &offset);
*cosobj_out = cosobj;
*offset_out = offset;
@@ -471,9 +484,9 @@ cos_parse_list(struct nspdf_doc *doc,
* \todo deal with # symbols on pdf versions 1.2 and later
*/
static nspdferror
-cos_decode_name(struct nspdf_doc *doc,
- uint64_t *offset_out,
- struct cos_object **cosobj_out)
+cos_parse_name(struct cos_stream *stream,
+ uint64_t *offset_out,
+ struct cos_object **cosobj_out)
{
uint64_t offset;
struct cos_object *cosobj;
@@ -483,30 +496,31 @@ cos_decode_name(struct nspdf_doc *doc,
offset = *offset_out;
- c = DOC_BYTE(doc, offset++);
+ c = stream_byte(stream, offset++);
if (c != '/') {
- return -1; /* names must be prefixed with a / */
+ return NSPDFERROR_SYNTAX;/* names must be prefixed with a / */
}
//printf("found a name\n");
- c = DOC_BYTE(doc, offset);
+ c = stream_byte(stream, offset);
while ((idx <= NAME_MAX_LENGTH) &&
((bclass[c] & (BC_WSPC | BC_DELM)) == 0)) {
offset++;
//printf("%c", c);
name[idx++] = c;
- c = DOC_BYTE(doc, offset);
+ c = stream_byte(stream, offset);
}
+
//printf("\nidx: %d\n", idx);
if (idx > NAME_MAX_LENGTH) {
/* name length exceeded implementation limit */
- return -1;
+ return NSPDFERROR_RANGE;
}
name[idx] = 0;
//printf("name: %s\n", name);
- nspdf__stream_skip_ws(doc->stream, &offset);
+ nspdf__stream_skip_ws(stream, &offset);
cosobj = calloc(1, sizeof(struct cos_object));
if (cosobj == NULL) {
@@ -523,13 +537,14 @@ cos_decode_name(struct nspdf_doc *doc,
return NSPDFERROR_OK;
}
+
/**
- * decode a cos boolean object
+ * parse a COS boolean object
*/
-static int
-cos_decode_boolean(struct nspdf_doc *doc,
- uint64_t *offset_out,
- struct cos_object **cosobj_out)
+static nspdferror
+cos_parse_boolean(struct cos_stream *stream,
+ uint64_t *offset_out,
+ struct cos_object **cosobj_out)
{
uint64_t offset;
struct cos_object *cosobj;
@@ -538,51 +553,56 @@ cos_decode_boolean(struct nspdf_doc *doc,
offset = *offset_out;
- c = DOC_BYTE(doc, offset++);
+ c = stream_byte(stream, offset++);
if ((c == 't') || (c == 'T')) {
/* true branch */
- c = DOC_BYTE(doc, offset++);
+ c = stream_byte(stream, offset++);
if ((c != 'r') && (c != 'R')) {
- return -1; /* syntax error */
+ return NSPDFERROR_SYNTAX;
}
- c = DOC_BYTE(doc, offset++);
+
+ c = stream_byte(stream, offset++);
if ((c != 'u') && (c != 'U')) {
- return -1; /* syntax error */
+ return NSPDFERROR_SYNTAX;
}
- c = DOC_BYTE(doc, offset++);
+
+ c = stream_byte(stream, offset++);
if ((c != 'e') && (c != 'E')) {
- return -1; /* syntax error */
+ return NSPDFERROR_SYNTAX;
}
value = true;
} else if ((c == 'f') || (c == 'F')) {
/* false branch */
- c = DOC_BYTE(doc, offset++);
+ c = stream_byte(stream, offset++);
if ((c != 'a') && (c != 'A')) {
- return -1; /* syntax error */
+ return NSPDFERROR_SYNTAX;
}
- c = DOC_BYTE(doc, offset++);
+
+ c = stream_byte(stream, offset++);
if ((c != 'l') && (c != 'L')) {
- return -1; /* syntax error */
+ return NSPDFERROR_SYNTAX;
}
- c = DOC_BYTE(doc, offset++);
+
+ c = stream_byte(stream, offset++);
if ((c != 's') && (c != 'S')) {
- return -1; /* syntax error */
+ return NSPDFERROR_SYNTAX;
}
- c = DOC_BYTE(doc, offset++);
+
+ c = stream_byte(stream, offset++);
if ((c != 'e') && (c != 'E')) {
- return -1; /* syntax error */
+ return NSPDFERROR_SYNTAX;
}
value = false;
} else {
- return -1; /* syntax error */
+ return NSPDFERROR_SYNTAX;
}
- nspdf__stream_skip_ws(doc->stream, &offset);
+ nspdf__stream_skip_ws(stream, &offset);
cosobj = calloc(1, sizeof(struct cos_object));
if (cosobj == NULL) {
@@ -599,13 +619,14 @@ cos_decode_boolean(struct nspdf_doc *doc,
return NSPDFERROR_OK;
}
+
/**
- * decode the null object.
+ * parse a COS null object.
*/
static nspdferror
-cos_decode_null(struct nspdf_doc *doc,
- uint64_t *offset_out,
- struct cos_object **cosobj_out)
+cos_parse_null(struct cos_stream *stream,
+ uint64_t *offset_out,
+ struct cos_object **cosobj_out)
{
uint64_t offset;
struct cos_object *cosobj;
@@ -613,27 +634,27 @@ cos_decode_null(struct nspdf_doc *doc,
offset = *offset_out;
- c = DOC_BYTE(doc, offset++);
+ c = stream_byte(stream, offset++);
if ((c != 'n') && (c != 'N')) {
- return -1; /* syntax error */
+ return NSPDFERROR_SYNTAX;
}
- c = DOC_BYTE(doc, offset++);
+ c = stream_byte(stream, offset++);
if ((c != 'u') && (c != 'U')) {
- return -1; /* syntax error */
+ return NSPDFERROR_SYNTAX;
}
- c = DOC_BYTE(doc, offset++);
+ c = stream_byte(stream, offset++);
if ((c != 'l') && (c != 'L')) {
- return -1; /* syntax error */
+ return NSPDFERROR_SYNTAX;
}
- c = DOC_BYTE(doc, offset++);
+ c = stream_byte(stream, offset++);
if ((c != 'l') && (c != 'L')) {
- return -1; /* syntax error */
+ return NSPDFERROR_SYNTAX;
}
- nspdf__stream_skip_ws(doc->stream, &offset);
+ nspdf__stream_skip_ws(stream, &offset);
cosobj = calloc(1, sizeof(struct cos_object));
if (cosobj == NULL) {
@@ -654,6 +675,7 @@ cos_decode_null(struct nspdf_doc *doc,
*/
static nspdferror
cos_parse_stream(struct nspdf_doc *doc,
+ struct cos_stream *stream_in,
uint64_t *offset_out,
struct cos_object **cosobj_out)
{
@@ -673,12 +695,12 @@ cos_parse_stream(struct nspdf_doc *doc,
return NSPDFERROR_NOTFOUND;
}
- if ((DOC_BYTE(doc, offset ) != 's') &&
- (DOC_BYTE(doc, offset + 1) != 't') &&
- (DOC_BYTE(doc, offset + 2) != 'r') &&
- (DOC_BYTE(doc, offset + 1) != 'e') &&
- (DOC_BYTE(doc, offset + 2) != 'a') &&
- (DOC_BYTE(doc, offset + 3) != 'm')) {
+ if ((stream_byte(stream_in, offset ) != 's') ||
+ (stream_byte(stream_in, offset + 1) != 't') ||
+ (stream_byte(stream_in, offset + 2) != 'r') ||
+ (stream_byte(stream_in, offset + 3) != 'e') ||
+ (stream_byte(stream_in, offset + 4) != 'a') ||
+ (stream_byte(stream_in, offset + 5) != 'm')) {
/* no stream marker */
return NSPDFERROR_NOTFOUND;
}
@@ -686,7 +708,7 @@ cos_parse_stream(struct nspdf_doc *doc,
//printf("detected stream\n");
/* parsed object was a dictionary and there is a stream marker */
- res = nspdf__stream_skip_ws(doc->stream, &offset);
+ res = nspdf__stream_skip_ws(stream_in, &offset);
if (res != NSPDFERROR_OK) {
return res;
}
@@ -706,33 +728,33 @@ cos_parse_stream(struct nspdf_doc *doc,
stream->length = stream_length;
//printf("stream length %d\n", stream_length);
- stream->data = doc->start + offset;
+ stream->data = stream_in->data + offset;
stream->alloc = 0; /* stream is pointing at non malloced data */
offset += stream->length;
/* possible whitespace after stream data */
- res = nspdf__stream_skip_ws(doc->stream, &offset);
+ res = nspdf__stream_skip_ws(stream_in, &offset);
if (res != NSPDFERROR_OK) {
return res;
}
- if ((DOC_BYTE(doc, offset ) != 'e') &&
- (DOC_BYTE(doc, offset + 1) != 'n') &&
- (DOC_BYTE(doc, offset + 2) != 'd') &&
- (DOC_BYTE(doc, offset + 3) != 's') &&
- (DOC_BYTE(doc, offset + 4) != 't') &&
- (DOC_BYTE(doc, offset + 5) != 'r') &&
- (DOC_BYTE(doc, offset + 6) != 'e') &&
- (DOC_BYTE(doc, offset + 7) != 'a') &&
- (DOC_BYTE(doc, offset + 8) != 'm')) {
+ if ((stream_byte(stream_in, offset ) != 'e') ||
+ (stream_byte(stream_in, offset + 1) != 'n') ||
+ (stream_byte(stream_in, offset + 2) != 'd') ||
+ (stream_byte(stream_in, offset + 3) != 's') ||
+ (stream_byte(stream_in, offset + 4) != 't') ||
+ (stream_byte(stream_in, offset + 5) != 'r') ||
+ (stream_byte(stream_in, offset + 6) != 'e') ||
+ (stream_byte(stream_in, offset + 7) != 'a') ||
+ (stream_byte(stream_in, offset + 8) != 'm')) {
/* no endstream marker */
return NSPDFERROR_SYNTAX;
}
offset += 9;
//printf("detected endstream\n");
- res = nspdf__stream_skip_ws(doc->stream, &offset);
+ res = nspdf__stream_skip_ws(stream_in, &offset);
if (res != NSPDFERROR_OK) {
return res;
}
@@ -787,9 +809,10 @@ cos_parse_stream(struct nspdf_doc *doc,
* integer
*/
static nspdferror
-cos_attempt_decode_reference(struct nspdf_doc *doc,
- uint64_t *offset_out,
- struct cos_object **cosobj_out)
+cos_attempt_parse_reference(struct nspdf_doc *doc,
+ struct cos_stream *stream,
+ uint64_t *offset_out,
+ struct cos_object **cosobj_out)
{
nspdferror res;
uint64_t offset;
@@ -798,7 +821,7 @@ cos_attempt_decode_reference(struct nspdf_doc *doc,
offset = *offset_out;
- res = cos_parse_number(doc->stream, &offset, &generation);
+ res = cos_parse_number(stream, &offset, &generation);
if (res != NSPDFERROR_OK) {
/* no error if next token could not be decoded as a number */
return NSPDFERROR_OK;
@@ -819,14 +842,14 @@ cos_attempt_decode_reference(struct nspdf_doc *doc,
}
/* two int in a row, look for the R */
- c = DOC_BYTE(doc, offset);
+ c = stream_byte(stream, offset);
if (c == 'R') {
struct cos_reference *nref; /* new reference */
//printf("found object reference\n");
offset ++;
- nspdf__stream_skip_ws(doc->stream, &offset);
+ nspdf__stream_skip_ws(stream, &offset);
nref = calloc(1, sizeof(struct cos_reference));
if (nref == NULL) {
@@ -846,27 +869,27 @@ cos_attempt_decode_reference(struct nspdf_doc *doc,
*offset_out = offset;
} else if ((c == 'o') &&
- (DOC_BYTE(doc, offset + 1) == 'b') &&
- (DOC_BYTE(doc, offset + 2) == 'j')) {
+ (stream_byte(stream, offset + 1) == 'b') &&
+ (stream_byte(stream, offset + 2) == 'j')) {
struct cos_object *indirect; /* indirect object */
//printf("indirect\n");
offset += 3;
- res = nspdf__stream_skip_ws(doc->stream, &offset);
+ res = nspdf__stream_skip_ws(stream, &offset);
if (res != NSPDFERROR_OK) {
cos_free_object(generation);
return res;
}
//printf("decoding\n");
- res = cos_parse_object(doc, &offset, &indirect);
+ res = cos_parse_object(doc, stream, &offset, &indirect);
if (res != NSPDFERROR_OK) {
cos_free_object(generation);
return res;
}
/* attempt to parse input as a stream */
- res = cos_parse_stream(doc, &offset, &indirect);
+ res = cos_parse_stream(doc, stream, &offset, &indirect);
if ((res != NSPDFERROR_OK) &&
(res != NSPDFERROR_NOTFOUND)) {
cos_free_object(indirect);
@@ -880,12 +903,12 @@ cos_attempt_decode_reference(struct nspdf_doc *doc,
indirect->type);
*/
- if ((DOC_BYTE(doc, offset ) != 'e') &&
- (DOC_BYTE(doc, offset + 1) != 'n') &&
- (DOC_BYTE(doc, offset + 2) != 'd') &&
- (DOC_BYTE(doc, offset + 1) != 'o') &&
- (DOC_BYTE(doc, offset + 2) != 'b') &&
- (DOC_BYTE(doc, offset + 3) != 'j')) {
+ if ((stream_byte(stream, offset ) != 'e') ||
+ (stream_byte(stream, offset + 1) != 'n') ||
+ (stream_byte(stream, offset + 2) != 'd') ||
+ (stream_byte(stream, offset + 3) != 'o') ||
+ (stream_byte(stream, offset + 4) != 'b') ||
+ (stream_byte(stream, offset + 5) != 'j')) {
cos_free_object(indirect);
cos_free_object(generation);
return NSPDFERROR_SYNTAX;
@@ -893,7 +916,7 @@ cos_attempt_decode_reference(struct nspdf_doc *doc,
offset += 6;
//printf("endobj\n");
- res = nspdf__stream_skip_ws(doc->stream, &offset);
+ res = nspdf__stream_skip_ws(stream, &offset);
if (res != NSPDFERROR_OK) {
cos_free_object(indirect);
cos_free_object(generation);
@@ -968,8 +991,9 @@ cos_attempt_decode_reference(struct nspdf_doc *doc,
*/
nspdferror
cos_parse_object(struct nspdf_doc *doc,
- uint64_t *offset_out,
- struct cos_object **cosobj_out)
+ struct cos_stream *stream,
+ uint64_t *offset_out,
+ struct cos_object **cosobj_out)
{
uint64_t offset;
nspdferror res;
@@ -977,8 +1001,12 @@ cos_parse_object(struct nspdf_doc *doc,
offset = *offset_out;
+ if (offset >= stream->length) {
+ return NSPDFERROR_RANGE;
+ }
+
/* object could be any type use first char to try and select */
- switch (DOC_BYTE(doc, offset)) {
+ switch (stream_byte(stream, offset)) {
case '-':
case '+':
@@ -993,45 +1021,45 @@ cos_parse_object(struct nspdf_doc *doc,
case '7':
case '8':
case '9':
- res = cos_parse_number(doc->stream, &offset, &cosobj);
+ res = cos_parse_number(stream, &offset, &cosobj);
/* if type is positive integer try to check for reference */
if ((res == 0) &&
(cosobj->type == COS_TYPE_INT) &&
(cosobj->u.i > 0)) {
- res = cos_attempt_decode_reference(doc, &offset, &cosobj);
+ res = cos_attempt_parse_reference(doc, stream, &offset, &cosobj);
}
break;
case '<':
- if (DOC_BYTE(doc, offset + 1) == '<') {
- res = cos_decode_dictionary(doc, &offset, &cosobj);
+ if (stream_byte(stream, offset + 1) == '<') {
+ res = cos_parse_dictionary(doc, stream, &offset, &cosobj);
} else {
- res = cos_decode_hex_string(doc, &offset, &cosobj);
+ res = cos_parse_hex_string(stream, &offset, &cosobj);
}
break;
case '(':
- res = cos_decode_string(doc, &offset, &cosobj);
+ res = cos_parse_string(stream, &offset, &cosobj);
break;
case '/':
- res = cos_decode_name(doc, &offset, &cosobj);
+ res = cos_parse_name(stream, &offset, &cosobj);
break;
case '[':
- res = cos_parse_list(doc, &offset, &cosobj);
+ res = cos_parse_list(doc, stream, &offset, &cosobj);
break;
case 't':
case 'T':
case 'f':
case 'F':
- res = cos_decode_boolean(doc, &offset, &cosobj);
+ res = cos_parse_boolean(stream, &offset, &cosobj);
break;
case 'n':
case 'N':
- res = cos_decode_null(doc, &offset, &cosobj);
+ res = cos_parse_null(stream, &offset, &cosobj);
break;
default:
@@ -1046,14 +1074,33 @@ cos_parse_object(struct nspdf_doc *doc,
return res;
}
+
+static nspdferror
+parse_content_operation(struct nspdf_doc *doc,
+ struct cos_stream *stream,
+ unsigned int *offset_out,
+ struct content_operation *operation_out)
+{
+ unsigned int offset;
+
+ offset = *offset_out;
+
+ offset+=stream->length;
+
+ *offset_out = offset;
+ return NSPDFERROR_OK;
+}
+
nspdferror
cos_parse_content_stream(struct nspdf_doc *doc,
struct cos_stream *stream,
struct cos_object **content_out)
{
+ nspdferror res;
struct cos_object *cosobj;
+ unsigned int offset;
- printf("%.*s", (int)stream->length, stream->data);
+ //printf("%.*s", (int)stream->length, stream->data);
cosobj = calloc(1, sizeof(struct cos_object));
if (cosobj == NULL) {
@@ -1061,7 +1108,48 @@ cos_parse_content_stream(struct nspdf_doc *doc,
}
cosobj->type = COS_TYPE_CONTENT;
+ cosobj->u.content = calloc(1, sizeof (struct cos_content));
+ if (cosobj->u.content == NULL) {
+ goto cos_parse_content_stream_error;
+ cos_free_object(cosobj);
+ return NSPDFERROR_NOMEM;
+ }
+
+ offset = 0;
+
+ while (offset < stream->length) {
+ struct content_operation cop;
+
+ /* ensure there is space in the operations array */
+ if (cosobj->u.content->alloc < (cosobj->u.content->length + 1)) {
+ struct content_operation *newops;
+ newops = realloc(cosobj->u.content->operations,
+ sizeof(struct content_operation) *
+ (cosobj->u.content->alloc + 32));
+ if (newops == NULL) {
+ res = NSPDFERROR_NOMEM;
+ goto cos_parse_content_stream_error;
+ }
+ cosobj->u.content->operations = newops;
+ cosobj->u.content->alloc += 32;
+ }
+
+ res = parse_content_operation(
+ doc,
+ stream,
+ &offset,
+ cosobj->u.content->operations + cosobj->u.content->length);
+ if (res != NSPDFERROR_OK) {
+ goto cos_parse_content_stream_error;
+ }
+ cosobj->u.content->length++;
+ }
+
*content_out = cosobj;
return NSPDFERROR_OK;
+
+cos_parse_content_stream_error:
+ cos_free_object(cosobj);
+ return res;
}
diff --git a/src/cos_parse.h b/src/cos_parse.h
index 8f48108..e7f1ce0 100644
--- a/src/cos_parse.h
+++ b/src/cos_parse.h
@@ -24,7 +24,7 @@ struct cos_stream;
*
* lex and parse a byte stream to generate a COS object.
*/
-nspdferror cos_parse_object(struct nspdf_doc *doc, uint64_t *offset_out, struct cos_object **cosobj_out);
+nspdferror cos_parse_object(struct nspdf_doc *doc, struct cos_stream *stream, uint64_t *offset_out, struct cos_object **cosobj_out);
/**
* Parse content stream into content operations object
diff --git a/src/document.c b/src/document.c
index b7a36d2..36d4c63 100644
--- a/src/document.c
+++ b/src/document.c
@@ -170,7 +170,7 @@ decode_trailer(struct nspdf_doc *doc,
offset += 7;
nspdf__stream_skip_ws(doc->stream, &offset);
- res = cos_parse_object(doc, &offset, &trailer);
+ res = cos_parse_object(doc, doc->stream, &offset, &trailer);
if (res != 0) {
return res;
}
diff --git a/src/page.c b/src/page.c
index 7b6bee8..3844122 100644
--- a/src/page.c
+++ b/src/page.c
@@ -156,7 +156,7 @@ nspdf__render_content_stream(struct nspdf_doc *doc,
res = cos_get_content(doc, content_entry, &content_operations);
if (res == NSPDFERROR_OK) {
- printf("%p", content_operations);
+ printf("%p\n", content_operations);
}
return res;
diff --git a/src/xref.c b/src/xref.c
index 2fb9301..cdd4088 100644
--- a/src/xref.c
+++ b/src/xref.c
@@ -171,7 +171,7 @@ nspdf__xref_get_referenced(struct nspdf_doc *doc, struct cos_object **cobj_out)
if (entry->object == NULL) {
/* indirect object has never been parsed */
offset = entry->offset;
- res = cos_parse_object(doc, &offset, &indirect);
+ res = cos_parse_object(doc, doc->stream, &offset, &indirect);
if (res != NSPDFERROR_OK) {
//printf("failed to decode indirect object\n");
return res;
commitdiff http://git.netsurf-browser.org/libnspdf.git/commit/?id=b1e0e4414ecd3161c0...
commit b1e0e4414ecd3161c0f947daceb8643b5889e51c
Author: Vincent Sanders <vince(a)kyllikki.org>
Commit: Vincent Sanders <vince(a)kyllikki.org>
start to alter parseing to read from cos_stream object
diff --git a/include/nspdf/document.h b/include/nspdf/document.h
index d7cbb0f..3c222cf 100644
--- a/include/nspdf/document.h
+++ b/include/nspdf/document.h
@@ -40,7 +40,7 @@ nspdferror nspdf_document_destroy(struct nspdf_doc *doc);
* ready to render pages. The passed buffer ownership is transfered and must
* not be altered untill the document is destroyed.
*/
-nspdferror nspdf_document_parse(struct nspdf_doc *doc, const uint8_t *buffer, uint64_t buffer_length);
+nspdferror nspdf_document_parse(struct nspdf_doc *doc, const uint8_t *buffer, unsigned int buffer_length);
#endif /* NSPDF_DOCUMENT_H_ */
diff --git a/src/cos_object.c b/src/cos_object.c
index 3dc5efa..4398822 100644
--- a/src/cos_object.c
+++ b/src/cos_object.c
@@ -17,6 +17,7 @@
#include <nspdf/errors.h>
#include "cos_object.h"
+#include "cos_parse.h"
#include "pdf_doc.h"
@@ -369,6 +370,54 @@ cos_get_stream(struct nspdf_doc *doc,
/*
+ * get object from object reference
+ */
+nspdferror
+cos_get_object(struct nspdf_doc *doc,
+ struct cos_object *cobj,
+ struct cos_object **value_out)
+{
+ nspdferror res;
+ res = nspdf__xref_get_referenced(doc, &cobj);
+ if (res == NSPDFERROR_OK) {
+ *value_out = cobj;
+ }
+ return res;
+}
+
+
+nspdferror
+cos_get_content(struct nspdf_doc *doc,
+ struct cos_object *cobj,
+ struct cos_content **content_out)
+{
+ nspdferror res;
+ struct cos_object *content_obj;
+
+ res = nspdf__xref_get_referenced(doc, &cobj);
+ if (res == NSPDFERROR_OK) {
+ if (cobj->type == COS_TYPE_STREAM) {
+ res = cos_parse_content_stream(doc, cobj->u.stream, &content_obj);
+ if (res == NSPDFERROR_OK) {
+ /* replace stream object with parsed content operations */
+ struct cos_object tmpobj;
+ tmpobj = *cobj;
+ *cobj = *content_obj;
+ *content_obj = tmpobj;
+ cos_free_object(content_obj);
+
+ *content_out = cobj->u.content;
+ }
+ } else if (cobj->type == COS_TYPE_CONTENT) {
+ *content_out = cobj->u.content;
+ } else {
+ res = NSPDFERROR_TYPE;
+ }
+ }
+ return res;
+}
+
+/*
* get a value for a key from a dictionary
*/
nspdferror
diff --git a/src/cos_object.h b/src/cos_object.h
index 2e763e2..9b98694 100644
--- a/src/cos_object.h
+++ b/src/cos_object.h
@@ -16,6 +16,7 @@
#define NSPDF__COS_OBJECT_H_
struct nspdf_doc;
+struct content_operation;
enum cos_type {
COS_TYPE_NULL, /* 0 */
@@ -30,6 +31,7 @@ enum cos_type {
COS_TYPE_NUMBERTREE,
COS_TYPE_STREAM,
COS_TYPE_REFERENCE, /* 11 */
+ COS_TYPE_CONTENT, /* 12 - parsed content stream */
};
struct cos_object;
@@ -59,10 +61,13 @@ struct cos_array {
struct cos_object **values;
};
+/**
+ * COS string data
+ */
struct cos_string {
- uint8_t *data; /**< string data */
- size_t length; /**< string length */
+ unsigned int length; /**< string length */
size_t alloc; /**< memory allocation for string */
+ uint8_t *data; /**< string data */
};
struct cos_reference {
@@ -71,12 +76,22 @@ struct cos_reference {
};
struct cos_stream {
- const uint8_t *data; /**< decoded stream data */
- int64_t length; /**< decoded stream length */
+ unsigned int length; /**< decoded stream length */
size_t alloc; /**< memory allocated for stream */
+ const uint8_t *data; /**< decoded stream data */
};
+/**
+ * Synthetic parsed content object.
+ *
+ */
+struct cos_content {
+ unsigned int length; /**< number of content operations */
+ unsigned int alloc; /**< number of allocated operations */
+ struct content_operation *operations;
+};
+
struct cos_object {
int type;
union {
@@ -107,6 +122,8 @@ struct cos_object {
/** reference */
struct cos_reference *reference;
+ /** parsed content stream */
+ struct cos_content *content;
} u;
};
@@ -264,5 +281,31 @@ nspdferror cos_get_array(struct nspdf_doc *doc, struct cos_object *cobj, struct
*/
nspdferror cos_get_stream(struct nspdf_doc *doc, struct cos_object *cobj, struct cos_stream **stream_out);
+/**
+ * get a direct cos object.
+ *
+ * Obtain a direct object if the passed object was a reference it is
+ * dereferenced from the cross reference table.
+ *
+ * \param doc The document the cos object belongs to.
+ * \param cobj A cos object.
+ * \param object_out The result object.
+ * \return NSERROR_OK and \p object_out updated,
+ */
+nspdferror cos_get_object(struct nspdf_doc *doc, struct cos_object *cobj, struct cos_object **object_out);
+
+/**
+ * get a parsed content object
+ *
+ * Get the parsed content from a cos object, if the object is an object
+ * reference it will be dereferenced first.
+ * The parsed content object is *not* a normal COS object rather it is the
+ * internal result of parsing a PDF content stream.
+ * This object type is used to replace the stream object in the cross reference
+ * table after its initial parse to avoid the need to keep and repeatedly
+ * parse the filtered stream data.
+ *
+ */
+nspdferror cos_get_content(struct nspdf_doc *doc, struct cos_object *cobj, struct cos_content **content_out);
#endif
diff --git a/src/cos_parse.c b/src/cos_parse.c
index 21ba0d7..c196019 100644
--- a/src/cos_parse.c
+++ b/src/cos_parse.c
@@ -57,11 +57,15 @@ static uint8_t xtoi(uint8_t x)
return x;
}
+/**
+ * parse a number
+ */
static nspdferror
-cos_decode_number(struct nspdf_doc *doc,
- uint64_t *offset_out,
- struct cos_object **cosobj_out)
+cos_parse_number(struct cos_stream *stream,
+ uint64_t *offset_out,
+ struct cos_object **cosobj_out)
{
+ nspdferror res;
struct cos_object *cosobj;
uint8_t c; /* current byte from source data */
unsigned int len; /* number of decimal places in number */
@@ -71,7 +75,7 @@ cos_decode_number(struct nspdf_doc *doc,
offset = *offset_out;
for (len = 0; len < sizeof(num); len++) {
- c = DOC_BYTE(doc, offset);
+ c = stream_byte(stream, offset);
if ((bclass[c] & BC_DCML) != BC_DCML) {
int64_t result = 0; /* parsed result */
uint64_t tens;
@@ -85,7 +89,10 @@ cos_decode_number(struct nspdf_doc *doc,
result += (num[len - 1] * tens);
}
- doc_skip_ws(doc, &offset);
+ res = nspdf__stream_skip_ws(stream, &offset);
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
cosobj = calloc(1, sizeof(struct cos_object));
if (cosobj == NULL) {
@@ -230,7 +237,7 @@ cos_decode_string(struct nspdf_doc *doc,
cos_string_append(cstring, c);
}
- doc_skip_ws(doc, &offset);
+ nspdf__stream_skip_ws(doc->stream, &offset);
*cosobj_out = cosobj;
*offset_out = offset;
@@ -279,7 +286,7 @@ cos_decode_hex_string(struct nspdf_doc *doc,
cos_string_append(cstring, value);
}
offset++;
- doc_skip_ws(doc, &offset);
+ nspdf__stream_skip_ws(doc->stream, &offset);
*cosobj_out = cosobj;
*offset_out = offset;
@@ -323,7 +330,7 @@ cos_decode_dictionary(struct nspdf_doc *doc,
return NSPDFERROR_SYNTAX; /* syntax error */
}
offset += 2;
- doc_skip_ws(doc, &offset);
+ nspdf__stream_skip_ws(doc->stream, &offset);
//printf("found a dictionary\n");
@@ -371,7 +378,7 @@ cos_decode_dictionary(struct nspdf_doc *doc,
}
offset += 2; /* skip closing >> */
- doc_skip_ws(doc, &offset);
+ nspdf__stream_skip_ws(doc->stream, &offset);
*cosobj_out = cosobj;
*offset_out = offset;
@@ -403,7 +410,7 @@ cos_parse_list(struct nspdf_doc *doc,
offset++;
/* advance offset to next token */
- res = doc_skip_ws(doc, &offset);
+ res = nspdf__stream_skip_ws(doc->stream, &offset);
if (res != NSPDFERROR_OK) {
return res;
}
@@ -449,7 +456,7 @@ cos_parse_list(struct nspdf_doc *doc,
}
offset++; /* skip closing ] */
- doc_skip_ws(doc, &offset);
+ nspdf__stream_skip_ws(doc->stream, &offset);
*cosobj_out = cosobj;
*offset_out = offset;
@@ -499,7 +506,7 @@ cos_decode_name(struct nspdf_doc *doc,
//printf("name: %s\n", name);
- doc_skip_ws(doc, &offset);
+ nspdf__stream_skip_ws(doc->stream, &offset);
cosobj = calloc(1, sizeof(struct cos_object));
if (cosobj == NULL) {
@@ -575,7 +582,7 @@ cos_decode_boolean(struct nspdf_doc *doc,
return -1; /* syntax error */
}
- doc_skip_ws(doc, &offset);
+ nspdf__stream_skip_ws(doc->stream, &offset);
cosobj = calloc(1, sizeof(struct cos_object));
if (cosobj == NULL) {
@@ -626,7 +633,7 @@ cos_decode_null(struct nspdf_doc *doc,
return -1; /* syntax error */
}
- doc_skip_ws(doc, &offset);
+ nspdf__stream_skip_ws(doc->stream, &offset);
cosobj = calloc(1, sizeof(struct cos_object));
if (cosobj == NULL) {
@@ -656,6 +663,7 @@ cos_parse_stream(struct nspdf_doc *doc,
uint64_t offset;
struct cos_object *stream_filter;
struct cos_stream *stream;
+ int64_t stream_length;
offset = *offset_out;
stream_dict = *cosobj_out;
@@ -678,7 +686,7 @@ cos_parse_stream(struct nspdf_doc *doc,
//printf("detected stream\n");
/* parsed object was a dictionary and there is a stream marker */
- res = doc_skip_ws(doc, &offset);
+ res = nspdf__stream_skip_ws(doc->stream, &offset);
if (res != NSPDFERROR_OK) {
return res;
}
@@ -688,10 +696,15 @@ cos_parse_stream(struct nspdf_doc *doc,
return NSPDFERROR_NOMEM;
}
- res = cos_get_dictionary_int(doc, stream_dict, "Length", &stream->length);
+ res = cos_get_dictionary_int(doc, stream_dict, "Length", &stream_length);
if (res != NSPDFERROR_OK) {
return res;
}
+ if (stream_length < 0) {
+ return NSPDFERROR_RANGE;
+ }
+ stream->length = stream_length;
+
//printf("stream length %d\n", stream_length);
stream->data = doc->start + offset;
stream->alloc = 0; /* stream is pointing at non malloced data */
@@ -699,7 +712,7 @@ cos_parse_stream(struct nspdf_doc *doc,
offset += stream->length;
/* possible whitespace after stream data */
- res = doc_skip_ws(doc, &offset);
+ res = nspdf__stream_skip_ws(doc->stream, &offset);
if (res != NSPDFERROR_OK) {
return res;
}
@@ -719,7 +732,7 @@ cos_parse_stream(struct nspdf_doc *doc,
offset += 9;
//printf("detected endstream\n");
- res = doc_skip_ws(doc, &offset);
+ res = nspdf__stream_skip_ws(doc->stream, &offset);
if (res != NSPDFERROR_OK) {
return res;
}
@@ -785,7 +798,7 @@ cos_attempt_decode_reference(struct nspdf_doc *doc,
offset = *offset_out;
- res = cos_decode_number(doc, &offset, &generation);
+ res = cos_parse_number(doc->stream, &offset, &generation);
if (res != NSPDFERROR_OK) {
/* no error if next token could not be decoded as a number */
return NSPDFERROR_OK;
@@ -813,7 +826,7 @@ cos_attempt_decode_reference(struct nspdf_doc *doc,
//printf("found object reference\n");
offset ++;
- doc_skip_ws(doc, &offset);
+ nspdf__stream_skip_ws(doc->stream, &offset);
nref = calloc(1, sizeof(struct cos_reference));
if (nref == NULL) {
@@ -839,7 +852,7 @@ cos_attempt_decode_reference(struct nspdf_doc *doc,
//printf("indirect\n");
offset += 3;
- res = doc_skip_ws(doc, &offset);
+ res = nspdf__stream_skip_ws(doc->stream, &offset);
if (res != NSPDFERROR_OK) {
cos_free_object(generation);
return res;
@@ -880,7 +893,7 @@ cos_attempt_decode_reference(struct nspdf_doc *doc,
offset += 6;
//printf("endobj\n");
- res = doc_skip_ws(doc, &offset);
+ res = nspdf__stream_skip_ws(doc->stream, &offset);
if (res != NSPDFERROR_OK) {
cos_free_object(indirect);
cos_free_object(generation);
@@ -980,7 +993,7 @@ cos_parse_object(struct nspdf_doc *doc,
case '7':
case '8':
case '9':
- res = cos_decode_number(doc, &offset, &cosobj);
+ res = cos_parse_number(doc->stream, &offset, &cosobj);
/* if type is positive integer try to check for reference */
if ((res == 0) &&
(cosobj->type == COS_TYPE_INT) &&
@@ -1032,3 +1045,23 @@ cos_parse_object(struct nspdf_doc *doc,
return res;
}
+
+nspdferror
+cos_parse_content_stream(struct nspdf_doc *doc,
+ struct cos_stream *stream,
+ struct cos_object **content_out)
+{
+ struct cos_object *cosobj;
+
+ printf("%.*s", (int)stream->length, stream->data);
+
+ cosobj = calloc(1, sizeof(struct cos_object));
+ if (cosobj == NULL) {
+ return NSPDFERROR_NOMEM;
+ }
+ cosobj->type = COS_TYPE_CONTENT;
+
+ *content_out = cosobj;
+
+ return NSPDFERROR_OK;
+}
diff --git a/src/cos_parse.h b/src/cos_parse.h
index 0bca79f..8f48108 100644
--- a/src/cos_parse.h
+++ b/src/cos_parse.h
@@ -17,12 +17,18 @@
struct nspdf_doc;
struct cos_object;
+struct cos_stream;
/**
- * Decode input stream into an object
+ * Parse input stream into an object
*
* lex and parse a byte stream to generate a COS object.
*/
nspdferror cos_parse_object(struct nspdf_doc *doc, uint64_t *offset_out, struct cos_object **cosobj_out);
+/**
+ * Parse content stream into content operations object
+ */
+nspdferror cos_parse_content_stream(struct nspdf_doc *doc, struct cos_stream *stream, struct cos_object **content_out);
+
#endif
diff --git a/src/document.c b/src/document.c
index bbe948d..b7a36d2 100644
--- a/src/document.c
+++ b/src/document.c
@@ -92,7 +92,7 @@ decode_startxref(struct nspdf_doc *doc,
}
offset += 9;
- res = doc_skip_ws(doc, &offset);
+ res = nspdf__stream_skip_ws(doc->stream, &offset);
if (res != NSPDFERROR_OK) {
return res;
}
@@ -168,7 +168,7 @@ decode_trailer(struct nspdf_doc *doc,
return -1;
}
offset += 7;
- doc_skip_ws(doc, &offset);
+ nspdf__stream_skip_ws(doc->stream, &offset);
res = cos_parse_object(doc, &offset, &trailer);
if (res != 0) {
@@ -422,13 +422,20 @@ static nspdferror check_header(struct nspdf_doc *doc)
nspdferror
nspdf_document_parse(struct nspdf_doc *doc,
const uint8_t *buffer,
- uint64_t buffer_length)
+ unsigned int buffer_length)
{
nspdferror res;
doc->start = buffer;
doc->length = buffer_length;
+ doc->stream = calloc(1, sizeof(struct cos_stream));
+ if (doc->stream == NULL) {
+ return NSPDFERROR_NOMEM;
+ }
+ doc->stream->data = buffer;
+ doc->stream->length = buffer_length;
+
res = check_header(doc);
if (res != 0) {
printf("header check failed\n");
diff --git a/src/page.c b/src/page.c
index acc97d7..7b6bee8 100644
--- a/src/page.c
+++ b/src/page.c
@@ -148,10 +148,18 @@ nspdf_page_count(struct nspdf_doc *doc, unsigned int *pages_out)
static nspdferror
nspdf__render_content_stream(struct nspdf_doc *doc,
- struct cos_stream *content_stream)
+ struct page_table_entry *page_entry,
+ struct cos_object *content_entry)
{
- printf("%.*s", (int)content_stream->length, content_stream->data);
- return NSPDFERROR_OK;
+ nspdferror res;
+ struct cos_content *content_operations;
+
+ res = cos_get_content(doc, content_entry, &content_operations);
+ if (res == NSPDFERROR_OK) {
+ printf("%p", content_operations);
+ }
+
+ return res;
}
/* exported interface documented in nspdf/page.h */
@@ -160,7 +168,6 @@ nspdf_page_render(struct nspdf_doc *doc, unsigned int page_number)
{
struct page_table_entry *page_entry;
struct cos_object *content_array;
- struct cos_stream *content_stream;
nspdferror res;
page_entry = doc->page_table + page_number;
@@ -184,31 +191,16 @@ nspdf_page_render(struct nspdf_doc *doc, unsigned int page_number)
content_stream_index,
&content_entry);
if (res != NSPDFERROR_OK) {
- return res;
- }
-
- res = cos_get_stream(doc, content_entry, &content_stream);
- if (res != NSPDFERROR_OK) {
- return res;
+ break;
}
- res = nspdf__render_content_stream(doc, content_stream);
+ res = nspdf__render_content_stream(doc, page_entry, content_entry);
if (res != NSPDFERROR_OK) {
- return res;
+ break;
}
}
} else if (res == NSPDFERROR_TYPE) {
- res = cos_get_stream(doc, page_entry->contents, &content_stream);
- if (res != NSPDFERROR_OK) {
- return res;
- }
-
- res = nspdf__render_content_stream(doc, content_stream);
- if (res != NSPDFERROR_OK) {
- return res;
- }
- } else {
- return res;
+ res = nspdf__render_content_stream(doc, page_entry, page_entry->contents);
}
return res;
diff --git a/src/pdf_doc.c b/src/pdf_doc.c
index 997a3d7..955f737 100644
--- a/src/pdf_doc.c
+++ b/src/pdf_doc.c
@@ -19,29 +19,27 @@
#include "cos_object.h"
#include "pdf_doc.h"
-/**
- * move offset to next non whitespace byte
- */
-nspdferror doc_skip_ws(struct nspdf_doc *doc, uint64_t *offset)
+nspdferror nspdf__stream_skip_ws(struct cos_stream *stream, uint64_t *offset)
{
uint8_t c;
/* TODO sort out keeping offset in range */
- c = DOC_BYTE(doc, *offset);
+ c = stream_byte(stream, *offset);
while ((bclass[c] & (BC_WSPC | BC_CMNT) ) != 0) {
(*offset)++;
/* skip comments */
if ((bclass[c] & BC_CMNT) != 0) {
- c = DOC_BYTE(doc, *offset);
+ c = stream_byte(stream, *offset);
while ((bclass[c] & BC_EOLM ) == 0) {
(*offset)++;
- c = DOC_BYTE(doc, *offset);
+ c = stream_byte(stream, *offset);
}
}
- c = DOC_BYTE(doc, *offset);
+ c = stream_byte(stream, *offset);
}
return NSPDFERROR_OK;
}
+
/**
* move offset to next non eol byte
*/
diff --git a/src/pdf_doc.h b/src/pdf_doc.h
index e362ea6..27a730a 100644
--- a/src/pdf_doc.h
+++ b/src/pdf_doc.h
@@ -18,11 +18,18 @@
struct xref_table_entry;
struct page_table_entry;
-/** pdf document */
+/**
+ * pdf document
+ */
struct nspdf_doc {
const uint8_t *start; /* start of pdf document in input stream */
- uint64_t length;
+ unsigned int length;
+
+ /**
+ * input data stream
+ */
+ struct cos_stream *stream;
int major;
int minor;
@@ -46,8 +53,14 @@ struct nspdf_doc {
/* byte data acessory, allows for more complex buffer handling in future */
#define DOC_BYTE(doc, offset) (doc->start[(offset)])
+static inline uint8_t
+stream_byte(struct cos_stream *stream, unsigned int offset)
+{
+ return *(stream->data + offset);
+}
+
/* helpers in pdf_doc.c */
-nspdferror doc_skip_ws(struct nspdf_doc *doc, uint64_t *offset);
+nspdferror nspdf__stream_skip_ws(struct cos_stream *stream, uint64_t *offset);
nspdferror doc_skip_eol(struct nspdf_doc *doc, uint64_t *offset);
nspdferror doc_read_uint(struct nspdf_doc *doc, uint64_t *offset_out, uint64_t *result_out);
@@ -68,9 +81,6 @@ nspdferror nspdf__xref_allocate(struct nspdf_doc *doc, int64_t size);
nspdferror nspdf__decode_page_tree(struct nspdf_doc *doc, struct cos_object *page_tree_node, unsigned int *page_index);
/* cos stream filters */
-nspdferror
-nspdf__cos_stream_filter(struct nspdf_doc *doc,
- const char *filter_name,
- struct cos_stream **stream_out);
+nspdferror nspdf__cos_stream_filter(struct nspdf_doc *doc, const char *filter_name, struct cos_stream **stream_out);
#endif
diff --git a/src/xref.c b/src/xref.c
index 298c750..2fb9301 100644
--- a/src/xref.c
+++ b/src/xref.c
@@ -68,7 +68,7 @@ nspdferror nspdf__xref_parse(struct nspdf_doc *doc, uint64_t *offset_out)
}
offset += 4;
- res = doc_skip_ws(doc, &offset);
+ res = nspdf__stream_skip_ws(doc->stream, &offset);
if (res != NSPDFERROR_OK) {
return res;
}
@@ -79,7 +79,7 @@ nspdferror nspdf__xref_parse(struct nspdf_doc *doc, uint64_t *offset_out)
res = doc_read_uint(doc, &offset, &objnumber);
while (res == NSPDFERROR_OK) {
uint64_t lastobj;
- res = doc_skip_ws(doc, &offset);
+ res = nspdf__stream_skip_ws(doc->stream, &offset);
if (res != NSPDFERROR_OK) {
return res;
}
@@ -89,7 +89,7 @@ nspdferror nspdf__xref_parse(struct nspdf_doc *doc, uint64_t *offset_out)
return res;
}
- res = doc_skip_ws(doc, &offset);
+ res = nspdf__stream_skip_ws(doc->stream, &offset);
if (res != NSPDFERROR_OK) {
return res;
}
@@ -169,7 +169,7 @@ nspdf__xref_get_referenced(struct nspdf_doc *doc, struct cos_object **cobj_out)
}
if (entry->object == NULL) {
- /* indirect object has never been decoded */
+ /* indirect object has never been parsed */
offset = entry->offset;
res = cos_parse_object(doc, &offset, &indirect);
if (res != NSPDFERROR_OK) {
commitdiff http://git.netsurf-browser.org/libnspdf.git/commit/?id=c27eb52f96f8070c4b...
commit c27eb52f96f8070c4be77a387e603508fc4092ce
Author: Vincent Sanders <vince(a)kyllikki.org>
Commit: Vincent Sanders <vince(a)kyllikki.org>
extend page content stream parse to cope with arrays of streams
diff --git a/src/page.c b/src/page.c
index a6a9d52..acc97d7 100644
--- a/src/page.c
+++ b/src/page.c
@@ -12,6 +12,7 @@
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
+#include <stdio.h>
#include <nspdf/page.h>
#include "cos_object.h"
@@ -145,18 +146,68 @@ nspdf_page_count(struct nspdf_doc *doc, unsigned int *pages_out)
return NSPDFERROR_OK;
}
+static nspdferror
+nspdf__render_content_stream(struct nspdf_doc *doc,
+ struct cos_stream *content_stream)
+{
+ printf("%.*s", (int)content_stream->length, content_stream->data);
+ return NSPDFERROR_OK;
+}
+
/* exported interface documented in nspdf/page.h */
nspdferror
nspdf_page_render(struct nspdf_doc *doc, unsigned int page_number)
{
struct page_table_entry *page_entry;
- struct cos_stream *stream;
+ struct cos_object *content_array;
+ struct cos_stream *content_stream;
nspdferror res;
page_entry = doc->page_table + page_number;
- res = cos_get_stream(doc, page_entry->contents, &stream);
- if (res != NSPDFERROR_OK) {
+ /* contents may be an array of stream objects or just a single one */
+ res = cos_get_array(doc, page_entry->contents, &content_array);
+ if (res == NSPDFERROR_OK) {
+ unsigned int content_stream_count;
+ unsigned int content_stream_index;
+
+ res = cos_get_array_size(doc, content_array, &content_stream_count);
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+ for (content_stream_index = 0;
+ content_stream_index < content_stream_count;
+ content_stream_index++) {
+ struct cos_object *content_entry;
+ res = cos_get_array_value(doc,
+ content_array,
+ content_stream_index,
+ &content_entry);
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+
+ res = cos_get_stream(doc, content_entry, &content_stream);
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+
+ res = nspdf__render_content_stream(doc, content_stream);
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+ }
+ } else if (res == NSPDFERROR_TYPE) {
+ res = cos_get_stream(doc, page_entry->contents, &content_stream);
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+
+ res = nspdf__render_content_stream(doc, content_stream);
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+ } else {
return res;
}
commitdiff http://git.netsurf-browser.org/libnspdf.git/commit/?id=096ab0ff30ae4a0f25...
commit 096ab0ff30ae4a0f257ef26ae9df119defb3e3a7
Author: Vincent Sanders <vince(a)kyllikki.org>
Commit: Vincent Sanders <vince(a)kyllikki.org>
fix list parse and construction
change lists to be represented by pointer arrays grown in 32 entry
blocks instead of linked list. This also ensures lists are constructed
in the correct order and makes enumeration and indexing much more
efficient.
diff --git a/src/cos_object.c b/src/cos_object.c
index b4de0f6..3dc5efa 100644
--- a/src/cos_object.c
+++ b/src/cos_object.c
@@ -23,7 +23,7 @@
nspdferror cos_free_object(struct cos_object *cos_obj)
{
struct cos_dictionary_entry *dentry;
- struct cos_array_entry *aentry;
+ unsigned int aentry;
switch (cos_obj->type) {
case COS_TYPE_NAME:
@@ -50,16 +50,13 @@ nspdferror cos_free_object(struct cos_object *cos_obj)
break;
case COS_TYPE_ARRAY:
- aentry = cos_obj->u.array;
- while (aentry != NULL) {
- struct cos_array_entry *oaentry;
-
- cos_free_object(aentry->value);
-
- oaentry = aentry;
- aentry = aentry->next;
- free(oaentry);
+ if (cos_obj->u.array->alloc > 0) {
+ for (aentry = 0; aentry < cos_obj->u.array->length; aentry++) {
+ cos_free_object(*(cos_obj->u.array->values + aentry));
+ }
+ free(cos_obj->u.array->values);
}
+ free(cos_obj->u.array);
break;
case COS_TYPE_STREAM:
@@ -381,25 +378,16 @@ cos_get_array_value(struct nspdf_doc *doc,
struct cos_object **value_out)
{
nspdferror res;
- struct cos_array_entry *entry;
res = nspdf__xref_get_referenced(doc, &array);
if (res == NSPDFERROR_OK) {
if (array->type != COS_TYPE_ARRAY) {
res = NSPDFERROR_TYPE;
} else {
- unsigned int cur_index = 0;
- res = NSPDFERROR_RANGE;
-
- entry = array->u.array;
- while (entry != NULL) {
- if (cur_index == index) {
- *value_out = entry->value;
- res = NSPDFERROR_OK;
- break;
- }
- cur_index++;
- entry = entry->next;
+ if (index >= array->u.array->length) {
+ res = NSPDFERROR_RANGE;
+ } else {
+ *value_out = *(array->u.array->values + index);
}
}
}
@@ -428,21 +416,13 @@ cos_get_array_size(struct nspdf_doc *doc,
unsigned int *size_out)
{
nspdferror res;
- unsigned int array_size = 0;
- struct cos_array_entry *array_entry;
res = nspdf__xref_get_referenced(doc, &cobj);
if (res == NSPDFERROR_OK) {
if (cobj->type != COS_TYPE_ARRAY) {
res = NSPDFERROR_TYPE;
} else {
- /* walk array list to enumerate entries */
- array_entry = cobj->u.array;
- while (array_entry != NULL) {
- array_size++;
- array_entry = array_entry->next;
- }
- *size_out = array_size;
+ *size_out = cobj->u.array->length;
}
}
return res;
diff --git a/src/cos_object.h b/src/cos_object.h
index b90ef15..2e763e2 100644
--- a/src/cos_object.h
+++ b/src/cos_object.h
@@ -45,12 +45,18 @@ struct cos_dictionary_entry {
struct cos_object *value;
};
-struct cos_array_entry {
- /** next value in array */
- struct cos_array_entry *next;
+/**
+ * array of COS objects
+ */
+struct cos_array {
+ /** number of values */
+ unsigned int length;
- /** value */
- struct cos_object *value;
+ /** number of allocated values */
+ unsigned int alloc;
+
+ /** array of object pointers */
+ struct cos_object **values;
};
struct cos_string {
@@ -96,7 +102,7 @@ struct cos_object {
struct cos_dictionary_entry *dictionary;
/* array */
- struct cos_array_entry *array;
+ struct cos_array *array;
/** reference */
struct cos_reference *reference;
diff --git a/src/cos_parse.c b/src/cos_parse.c
index de85b0c..21ba0d7 100644
--- a/src/cos_parse.c
+++ b/src/cos_parse.c
@@ -380,16 +380,16 @@ cos_decode_dictionary(struct nspdf_doc *doc,
}
/**
- * decode a list
+ * parse a list
*/
static nspdferror
-cos_decode_list(struct nspdf_doc *doc,
- uint64_t *offset_out,
- struct cos_object **cosobj_out)
+cos_parse_list(struct nspdf_doc *doc,
+ uint64_t *offset_out,
+ struct cos_object **cosobj_out)
{
uint64_t offset;
struct cos_object *cosobj;
- struct cos_array_entry *entry;
+ struct cos_array *array;
struct cos_object *value;
nspdferror res;
@@ -398,7 +398,7 @@ cos_decode_list(struct nspdf_doc *doc,
/* sanity check first token is list open */
if (DOC_BYTE(doc, offset) != '[') {
printf("not a [\n");
- return NSPDFERROR_SYNTAX; /* syntax error */
+ return NSPDFERROR_SYNTAX;
}
offset++;
@@ -409,13 +409,20 @@ cos_decode_list(struct nspdf_doc *doc,
}
//printf("found a list\n");
-
+ /* setup array object */
cosobj = calloc(1, sizeof(struct cos_object));
if (cosobj == NULL) {
return NSPDFERROR_NOMEM;
}
cosobj->type = COS_TYPE_ARRAY;
+ array = calloc(1, sizeof(struct cos_array));
+ if (array == NULL) {
+ cos_free_object(cosobj);
+ return NSPDFERROR_NOMEM;
+ }
+ cosobj->u.array = array;
+
while (DOC_BYTE(doc, offset) != ']') {
res = cos_parse_object(doc, &offset, &value);
@@ -425,17 +432,20 @@ cos_decode_list(struct nspdf_doc *doc,
return res;
}
- /* add entry to array */
- entry = calloc(1, sizeof(struct cos_array_entry));
- if (entry == NULL) {
- cos_free_object(cosobj);
- return NSPDFERROR_NOMEM;
+ if (array->alloc < (array->length + 1)) {
+ struct cos_object **nvalues;
+ nvalues = realloc(array->values,
+ sizeof(struct cos_object *) * (array->alloc + 32));
+ if (nvalues == NULL) {
+ cos_free_object(cosobj);
+ return NSPDFERROR_NOMEM;
+ }
+ array->values = nvalues;
+ array->alloc += 32;
}
- entry->value = value;
- entry->next = cosobj->u.array;
-
- cosobj->u.array = entry;
+ *(array->values + array->length) = value;
+ array->length++;
}
offset++; /* skip closing ] */
@@ -996,7 +1006,7 @@ cos_parse_object(struct nspdf_doc *doc,
break;
case '[':
- res = cos_decode_list(doc, &offset, &cosobj);
+ res = cos_parse_list(doc, &offset, &cosobj);
break;
case 't':
commitdiff http://git.netsurf-browser.org/libnspdf.git/commit/?id=a59a9fcbb5dd67f436...
commit a59a9fcbb5dd67f4368e88b9caa773b9c56811f9
Author: Vincent Sanders <vince(a)kyllikki.org>
Commit: Vincent Sanders <vince(a)kyllikki.org>
support flateDecode stream filter
diff --git a/Makefile b/Makefile
index 7024de4..f6c261f 100644
--- a/Makefile
+++ b/Makefile
@@ -45,10 +45,10 @@ ifneq ($(findstring clean,$(MAKECMDGOALS)),clean)
endif
endif
-REQUIRED_LIBS := nspdf
+REQUIRED_LIBS := nspdf z
TESTCFLAGS := -g -O2
-TESTLDFLAGS := -l$(COMPONENT) $(TESTLDFLAGS)
+TESTLDFLAGS := -l$(COMPONENT) -lz $(TESTLDFLAGS)
include $(NSBUILD)/Makefile.top
diff --git a/src/cos_stream_filter.c b/src/cos_stream_filter.c
index 6f407de..0c08442 100644
--- a/src/cos_stream_filter.c
+++ b/src/cos_stream_filter.c
@@ -13,21 +13,104 @@
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
+#include <zlib.h>
#include <nspdf/errors.h>
#include "cos_object.h"
#include "pdf_doc.h"
+static nspdferror
+cos_stream_inflate(struct nspdf_doc *doc, struct cos_stream **stream_out)
+{
+ int ret;
+ z_stream strm;
+ struct cos_stream *stream_in;
+ struct cos_stream *stream_res;
+
+ stream_in = *stream_out;
+
+ stream_res = calloc(1, sizeof(struct cos_stream));
+
+ //printf("inflating from %d bytes\n", stream_in->length);
+
+ strm.zalloc = Z_NULL;
+ strm.zfree = Z_NULL;
+ strm.opaque = Z_NULL;
+ strm.avail_in = 0;
+ strm.next_in = Z_NULL;
+
+ ret = inflateInit(&strm);
+ if (ret != Z_OK) {
+ return NSPDFERROR_NOTFOUND;
+ }
+
+ strm.next_in = (void *)stream_in->data;
+ strm.avail_in = stream_in->length;
+
+ do {
+ int64_t available; /* available buffer space for decompression */
+ available = stream_res->alloc - stream_res->length;
+
+ if (available < (strm.avail_in << 1)) {
+ uint8_t *newdata;
+ size_t newlength;
+
+ newlength = stream_res->alloc + (stream_in->length << 1);
+ newdata = realloc((void *)stream_res->data, newlength);
+ if (newdata == NULL) {
+ free((void *)stream_res->data);
+ free(stream_res);
+ inflateEnd(&strm);
+ return NSPDFERROR_NOMEM;
+ }
+
+ //printf("realloc %d\n", newlength);
+
+ stream_res->data = newdata;
+ stream_res->alloc = newlength;
+ available = stream_res->alloc - stream_res->length;
+ }
+
+ strm.avail_out = available;
+ strm.next_out = (void*)(stream_res->data + stream_res->length);
+ ret = inflate(&strm, Z_NO_FLUSH);
+ /** \todo check zlib return code */
+
+ stream_res->length += (available - strm.avail_out);
+
+ } while (ret != Z_STREAM_END);
+
+ //printf("allocated %d\n", stream_res->alloc);
+
+ //printf("length %d\n", stream_res->length);
+
+ inflateEnd(&strm);
+
+ if (stream_in->alloc != 0) {
+ free((void*)stream_in->data);
+ }
+ free(stream_in);
+
+ *stream_out = stream_res;
+
+ return NSPDFERROR_OK;
+}
+
nspdferror
nspdf__cos_stream_filter(struct nspdf_doc *doc,
const char *filter_name,
struct cos_stream **stream_out)
{
- struct cos_stream *stream_in;
+ nspdferror res;
- stream_in = *stream_out;
+ //printf("applying filter %s\n", filter_name);
- printf("applying filter %s\n", filter_name);
- return NSPDFERROR_OK;
+ if (strcmp(filter_name, "FlateDecode") == 0) {
+ res = cos_stream_inflate(doc, stream_out);
+ } else {
+ res = NSPDFERROR_NOTFOUND;
+ }
+
+ return res;
}
commitdiff http://git.netsurf-browser.org/libnspdf.git/commit/?id=b668680da5aa093082...
commit b668680da5aa0930820df70f1b182243a7a8cde4
Author: Vincent Sanders <vince(a)kyllikki.org>
Commit: Vincent Sanders <vince(a)kyllikki.org>
split out stream filtering
diff --git a/src/Makefile b/src/Makefile
index a2d1ae8..35576c2 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -1,3 +1,3 @@
-DIR_SOURCES := document.c byte_class.c cos_parse.c cos_object.c pdf_doc.c meta.c page.c xref.c
+DIR_SOURCES := document.c byte_class.c cos_parse.c cos_object.c pdf_doc.c meta.c page.c xref.c cos_stream_filter.c
include $(NSBUILD)/Makefile.subdir
diff --git a/src/cos_parse.c b/src/cos_parse.c
index 5448ad4..de85b0c 100644
--- a/src/cos_parse.c
+++ b/src/cos_parse.c
@@ -27,6 +27,7 @@
/** Maximum length of cos name */
#define NAME_MAX_LENGTH 127
+
static nspdferror
cos_string_append(struct cos_string *s, uint8_t c)
{
@@ -717,8 +718,16 @@ cos_parse_stream(struct nspdf_doc *doc,
/* optional filter */
res = cos_get_dictionary_value(doc, stream_dict, "Filter", &stream_filter);
if (res == NSPDFERROR_OK) {
- /** \todo filter stream */
- printf("applying filter %s\n", stream_filter->u.n);
+ const char *filter_name;
+ res = cos_get_name(doc, stream_filter, &filter_name);
+ if (res == NSPDFERROR_OK) {
+ res = nspdf__cos_stream_filter(doc, filter_name, &stream);
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+ } else {
+ /** \todo array of filter stream */
+ }
}
/* allocate stream object */
diff --git a/src/cos_stream_filter.c b/src/cos_stream_filter.c
new file mode 100644
index 0000000..6f407de
--- /dev/null
+++ b/src/cos_stream_filter.c
@@ -0,0 +1,33 @@
+/*
+ * Copyright 2018 Vincent Sanders <vince(a)netsurf-browser.org>
+ *
+ * This file is part of libnspdf.
+ *
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ */
+
+#include <stdint.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <nspdf/errors.h>
+
+#include "cos_object.h"
+#include "pdf_doc.h"
+
+nspdferror
+nspdf__cos_stream_filter(struct nspdf_doc *doc,
+ const char *filter_name,
+ struct cos_stream **stream_out)
+{
+ struct cos_stream *stream_in;
+
+ stream_in = *stream_out;
+
+ printf("applying filter %s\n", filter_name);
+ return NSPDFERROR_OK;
+}
diff --git a/src/pdf_doc.h b/src/pdf_doc.h
index b7e6546..e362ea6 100644
--- a/src/pdf_doc.h
+++ b/src/pdf_doc.h
@@ -46,16 +46,18 @@ struct nspdf_doc {
/* byte data acessory, allows for more complex buffer handling in future */
#define DOC_BYTE(doc, offset) (doc->start[(offset)])
-/* helpers in pdf_doc.h */
+/* helpers in pdf_doc.c */
nspdferror doc_skip_ws(struct nspdf_doc *doc, uint64_t *offset);
nspdferror doc_skip_eol(struct nspdf_doc *doc, uint64_t *offset);
nspdferror doc_read_uint(struct nspdf_doc *doc, uint64_t *offset_out, uint64_t *result_out);
+/* cross reference table handlers */
/**
* parse xref from file
*/
nspdferror nspdf__xref_parse(struct nspdf_doc *doc, uint64_t *offset_out);
+
/**
* get an object dereferencing through xref table if necessary
*/
@@ -65,4 +67,10 @@ nspdferror nspdf__xref_allocate(struct nspdf_doc *doc, int64_t size);
nspdferror nspdf__decode_page_tree(struct nspdf_doc *doc, struct cos_object *page_tree_node, unsigned int *page_index);
+/* cos stream filters */
+nspdferror
+nspdf__cos_stream_filter(struct nspdf_doc *doc,
+ const char *filter_name,
+ struct cos_stream **stream_out);
+
#endif
commitdiff http://git.netsurf-browser.org/libnspdf.git/commit/?id=615323e574ffcecd85...
commit 615323e574ffcecd8525711c39beffcac4156624
Author: Vincent Sanders <vince(a)kyllikki.org>
Commit: Vincent Sanders <vince(a)kyllikki.org>
fix header guards and copyright notices
diff --git a/src/byte_class.c b/src/byte_class.c
index e881cf5..fff0965 100644
--- a/src/byte_class.c
+++ b/src/byte_class.c
@@ -1,3 +1,12 @@
+/*
+ * Copyright 2018 Vincent Sanders <vince(a)netsurf-browser.org>
+ *
+ * This file is part of libnspdf.
+ *
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ */
+
#include <stdint.h>
#include "byte_class.h"
diff --git a/src/byte_class.h b/src/byte_class.h
index 0ccfbdf..0f81331 100644
--- a/src/byte_class.h
+++ b/src/byte_class.h
@@ -1,3 +1,20 @@
+/*
+ * Copyright 2018 Vincent Sanders <vince(a)netsurf-browser.org>
+ *
+ * This file is part of libnspdf.
+ *
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ */
+
+/**
+ * \file
+ * NetSurf PDF library byte classification
+ */
+
+#ifndef NSPDF__BYTE_CLASS_H_
+#define NSPDF__BYTE_CLASS_H_
+
#define BC_RGLR 0 /* regular character */
#define BC_WSPC 1 /* character is whitespace */
#define BC_EOLM (1<<1) /* character signifies end of line */
@@ -8,3 +25,5 @@
#define BC_CMNT (1<<6) /* character is a comment */
const uint8_t *bclass;
+
+#endif
diff --git a/src/cos_object.c b/src/cos_object.c
index 335e14b..b4de0f6 100644
--- a/src/cos_object.c
+++ b/src/cos_object.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2017 Vincent Sanders <vince(a)netsurf-browser.org>
+ * Copyright 2018 Vincent Sanders <vince(a)netsurf-browser.org>
*
* This file is part of libnspsl
*
diff --git a/src/cos_object.h b/src/cos_object.h
index 8d0f910..b90ef15 100644
--- a/src/cos_object.h
+++ b/src/cos_object.h
@@ -1,3 +1,20 @@
+/*
+ * Copyright 2018 Vincent Sanders <vince(a)netsurf-browser.org>
+ *
+ * This file is part of libnspdf.
+ *
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ */
+
+/**
+ * \file
+ * NetSurf PDF library COS objects
+ */
+
+#ifndef NSPDF__COS_OBJECT_H_
+#define NSPDF__COS_OBJECT_H_
+
struct nspdf_doc;
enum cos_type {
@@ -240,3 +257,6 @@ nspdferror cos_get_array(struct nspdf_doc *doc, struct cos_object *cobj, struct
* NSERROR_TYPE if the \p cobj is not a array
*/
nspdferror cos_get_stream(struct nspdf_doc *doc, struct cos_object *cobj, struct cos_stream **stream_out);
+
+
+#endif
diff --git a/src/cos_parse.c b/src/cos_parse.c
index 8fe181c..5448ad4 100644
--- a/src/cos_parse.c
+++ b/src/cos_parse.c
@@ -1,3 +1,12 @@
+/*
+ * Copyright 2018 Vincent Sanders <vince(a)netsurf-browser.org>
+ *
+ * This file is part of libnspdf.
+ *
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ */
+
#include <stdint.h>
#include <stddef.h>
#include <stdbool.h>
diff --git a/src/cos_parse.h b/src/cos_parse.h
index adfb835..0bca79f 100644
--- a/src/cos_parse.h
+++ b/src/cos_parse.h
@@ -1,3 +1,20 @@
+/*
+ * Copyright 2018 Vincent Sanders <vince(a)netsurf-browser.org>
+ *
+ * This file is part of libnspdf.
+ *
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ */
+
+/**
+ * \file
+ * NetSurf PDF library parsing cos objects from file
+ */
+
+#ifndef NSPDF__COS_PARSE_H_
+#define NSPDF__COS_PARSE_H_
+
struct nspdf_doc;
struct cos_object;
@@ -8,3 +25,4 @@ struct cos_object;
*/
nspdferror cos_parse_object(struct nspdf_doc *doc, uint64_t *offset_out, struct cos_object **cosobj_out);
+#endif
diff --git a/src/pdf_doc.h b/src/pdf_doc.h
index 5c25878..b7e6546 100644
--- a/src/pdf_doc.h
+++ b/src/pdf_doc.h
@@ -1,3 +1,19 @@
+/*
+ * Copyright 2018 Vincent Sanders <vince(a)netsurf-browser.org>
+ *
+ * This file is part of libnspdf.
+ *
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ */
+
+/**
+ * \file
+ * NetSurf PDF library pdf document
+ */
+
+#ifndef NSPDF__PDF_DOC_H_
+#define NSPDF__PDF_DOC_H_
struct xref_table_entry;
struct page_table_entry;
@@ -48,3 +64,5 @@ nspdferror nspdf__xref_get_referenced(struct nspdf_doc *doc, struct cos_object *
nspdferror nspdf__xref_allocate(struct nspdf_doc *doc, int64_t size);
nspdferror nspdf__decode_page_tree(struct nspdf_doc *doc, struct cos_object *page_tree_node, unsigned int *page_index);
+
+#endif
commitdiff http://git.netsurf-browser.org/libnspdf.git/commit/?id=119b565de393fdd797...
commit 119b565de393fdd797e1a3c4f629c936092e9091
Author: Vincent Sanders <vince(a)kyllikki.org>
Commit: Vincent Sanders <vince(a)kyllikki.org>
parse stream objects, no filters handled yet
diff --git a/src/cos_object.c b/src/cos_object.c
index 80e4431..335e14b 100644
--- a/src/cos_object.c
+++ b/src/cos_object.c
@@ -357,8 +357,9 @@ cos_get_stream(struct nspdf_doc *doc,
struct cos_stream **stream_out)
{
nspdferror res;
-
+ //printf("%p %d\n", cobj, cobj->type);
res = nspdf__xref_get_referenced(doc, &cobj);
+ //printf("%p %d res:%d\n", cobj, cobj->type, res);
if (res == NSPDFERROR_OK) {
if (cobj->type != COS_TYPE_STREAM) {
res = NSPDFERROR_TYPE;
diff --git a/src/cos_object.h b/src/cos_object.h
index 077be3b..8d0f910 100644
--- a/src/cos_object.h
+++ b/src/cos_object.h
@@ -1,18 +1,18 @@
struct nspdf_doc;
enum cos_type {
- COS_TYPE_NULL,
+ COS_TYPE_NULL, /* 0 */
COS_TYPE_BOOL,
COS_TYPE_INT,
COS_TYPE_REAL,
COS_TYPE_NAME,
COS_TYPE_STRING,
- COS_TYPE_ARRAY,
+ COS_TYPE_ARRAY, /* 6 */
COS_TYPE_DICTIONARY,
COS_TYPE_NAMETREE,
COS_TYPE_NUMBERTREE,
COS_TYPE_STREAM,
- COS_TYPE_REFERENCE,
+ COS_TYPE_REFERENCE, /* 11 */
};
struct cos_object;
@@ -37,22 +37,20 @@ struct cos_array_entry {
};
struct cos_string {
- uint8_t *data;
- size_t length;
- size_t alloc;
+ uint8_t *data; /**< string data */
+ size_t length; /**< string length */
+ size_t alloc; /**< memory allocation for string */
};
struct cos_reference {
- /** id of indirect object */
- uint64_t id;
-
- /* generation of indirect object */
- uint64_t generation;
+ uint64_t id; /**< id of indirect object */
+ uint64_t generation; /**< generation of indirect object */
};
struct cos_stream {
- uint8_t *data;
- size_t length;
+ const uint8_t *data; /**< decoded stream data */
+ int64_t length; /**< decoded stream length */
+ size_t alloc; /**< memory allocated for stream */
};
diff --git a/src/cos_parse.c b/src/cos_parse.c
index ca3d802..8fe181c 100644
--- a/src/cos_parse.c
+++ b/src/cos_parse.c
@@ -310,7 +310,7 @@ cos_decode_dictionary(struct nspdf_doc *doc,
if ((DOC_BYTE(doc, offset) != '<') ||
(DOC_BYTE(doc, offset + 1) != '<')) {
- return -1; /* syntax error */
+ return NSPDFERROR_SYNTAX; /* syntax error */
}
offset += 2;
doc_skip_ws(doc, &offset);
@@ -319,7 +319,7 @@ cos_decode_dictionary(struct nspdf_doc *doc,
cosobj = calloc(1, sizeof(struct cos_object));
if (cosobj == NULL) {
- return -1; /* memory error */
+ return NSPDFERROR_NOMEM;
}
cosobj->type = COS_TYPE_DICTIONARY;
@@ -337,7 +337,6 @@ cos_decode_dictionary(struct nspdf_doc *doc,
printf("key was %d not a name %d\n", key->type, COS_TYPE_NAME);
return NSPDFERROR_SYNTAX;
}
- //printf("key: %s\n", key->u.n);
res = cos_parse_object(doc, &offset, &value);
if (res != NSPDFERROR_OK) {
@@ -352,6 +351,7 @@ cos_decode_dictionary(struct nspdf_doc *doc,
/* todo free up any dictionary entries already created */
return NSPDFERROR_NOMEM;
}
+ //printf("key:%s value(type):%d\n", key->u.n, value->type);
entry->key = key;
entry->value = value;
@@ -622,6 +622,110 @@ cos_decode_null(struct nspdf_doc *doc,
return NSPDFERROR_OK;
}
+/**
+ * parse a stream object
+ */
+static nspdferror
+cos_parse_stream(struct nspdf_doc *doc,
+ uint64_t *offset_out,
+ struct cos_object **cosobj_out)
+{
+ struct cos_object *cosobj;
+ nspdferror res;
+ struct cos_object *stream_dict;
+ uint64_t offset;
+ struct cos_object *stream_filter;
+ struct cos_stream *stream;
+
+ offset = *offset_out;
+ stream_dict = *cosobj_out;
+
+ if (stream_dict->type != COS_TYPE_DICTIONARY) {
+ /* cannot be a stream if indirect object is not a dict */
+ return NSPDFERROR_NOTFOUND;
+ }
+
+ if ((DOC_BYTE(doc, offset ) != 's') &&
+ (DOC_BYTE(doc, offset + 1) != 't') &&
+ (DOC_BYTE(doc, offset + 2) != 'r') &&
+ (DOC_BYTE(doc, offset + 1) != 'e') &&
+ (DOC_BYTE(doc, offset + 2) != 'a') &&
+ (DOC_BYTE(doc, offset + 3) != 'm')) {
+ /* no stream marker */
+ return NSPDFERROR_NOTFOUND;
+ }
+ offset += 6;
+ //printf("detected stream\n");
+
+ /* parsed object was a dictionary and there is a stream marker */
+ res = doc_skip_ws(doc, &offset);
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+
+ stream = calloc(1, sizeof(struct cos_stream));
+ if (stream == NULL) {
+ return NSPDFERROR_NOMEM;
+ }
+
+ res = cos_get_dictionary_int(doc, stream_dict, "Length", &stream->length);
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+ //printf("stream length %d\n", stream_length);
+ stream->data = doc->start + offset;
+ stream->alloc = 0; /* stream is pointing at non malloced data */
+
+ offset += stream->length;
+
+ /* possible whitespace after stream data */
+ res = doc_skip_ws(doc, &offset);
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+
+ if ((DOC_BYTE(doc, offset ) != 'e') &&
+ (DOC_BYTE(doc, offset + 1) != 'n') &&
+ (DOC_BYTE(doc, offset + 2) != 'd') &&
+ (DOC_BYTE(doc, offset + 3) != 's') &&
+ (DOC_BYTE(doc, offset + 4) != 't') &&
+ (DOC_BYTE(doc, offset + 5) != 'r') &&
+ (DOC_BYTE(doc, offset + 6) != 'e') &&
+ (DOC_BYTE(doc, offset + 7) != 'a') &&
+ (DOC_BYTE(doc, offset + 8) != 'm')) {
+ /* no endstream marker */
+ return NSPDFERROR_SYNTAX;
+ }
+ offset += 9;
+ //printf("detected endstream\n");
+
+ res = doc_skip_ws(doc, &offset);
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+
+ //printf("returning with offset at %d\n", offset);
+ /* optional filter */
+ res = cos_get_dictionary_value(doc, stream_dict, "Filter", &stream_filter);
+ if (res == NSPDFERROR_OK) {
+ /** \todo filter stream */
+ printf("applying filter %s\n", stream_filter->u.n);
+ }
+
+ /* allocate stream object */
+ cosobj = calloc(1, sizeof(struct cos_object));
+ if (cosobj == NULL) {
+ free(stream);
+ return NSPDFERROR_NOMEM;
+ }
+ cosobj->type = COS_TYPE_STREAM;
+ cosobj->u.stream = stream;
+
+ *cosobj_out = cosobj;
+ *offset_out = offset;
+
+ return NSPDFERROR_OK;
+}
/**
* attempt to decode input data into a reference, indirect or stream object
@@ -719,7 +823,21 @@ cos_attempt_decode_reference(struct nspdf_doc *doc,
cos_free_object(generation);
return res;
}
- //printf("parsed object type %d\nendobj\n",indirect->type);
+
+ /* attempt to parse input as a stream */
+ res = cos_parse_stream(doc, &offset, &indirect);
+ if ((res != NSPDFERROR_OK) &&
+ (res != NSPDFERROR_NOTFOUND)) {
+ cos_free_object(indirect);
+ cos_free_object(generation);
+ return res;
+ }
+
+ /*printf("parsed indirect object num:%d gen:%d type %d\n",
+ (*cosobj_out)->u.i,
+ generation->u.i,
+ indirect->type);
+ */
if ((DOC_BYTE(doc, offset ) != 'e') &&
(DOC_BYTE(doc, offset + 1) != 'n') &&
@@ -732,7 +850,7 @@ cos_attempt_decode_reference(struct nspdf_doc *doc,
return NSPDFERROR_SYNTAX;
}
offset += 6;
- //printf("skipping\n");
+ //printf("endobj\n");
res = doc_skip_ws(doc, &offset);
if (res != NSPDFERROR_OK) {
@@ -746,6 +864,8 @@ cos_attempt_decode_reference(struct nspdf_doc *doc,
*cosobj_out = indirect;
*offset_out = offset;
+
+ //printf("returning object\n");
}
cos_free_object(generation);
diff --git a/src/page.c b/src/page.c
index 5d2a117..a6a9d52 100644
--- a/src/page.c
+++ b/src/page.c
@@ -120,12 +120,13 @@ nspdf__decode_page_tree(struct nspdf_doc *doc,
}
/*
- printf("page index:%d page:%p resources:%p mediabox:%p contents:%p\n",
+ printf("page index:%d page:%p resources:%p mediabox:%p contents:%p contents type:%d\n",
*page_index,
page,
page->resources,
page->mediabox,
- page->contents);
+ page->contents,
+ page->contents->type);
*/
(*page_index)++;
commitdiff http://git.netsurf-browser.org/libnspdf.git/commit/?id=e8dbf1fa8049169e69...
commit e8dbf1fa8049169e6918cce20e98e309a793cffe
Author: Vincent Sanders <vince(a)kyllikki.org>
Commit: Vincent Sanders <vince(a)kyllikki.org>
split out cross reference table handling to separate module
diff --git a/include/nspdf/page.h b/include/nspdf/page.h
index 119ef22..8c1d7fc 100644
--- a/include/nspdf/page.h
+++ b/include/nspdf/page.h
@@ -19,6 +19,8 @@
struct nspdf_doc;
-nspdferror nspdf_count_pages(struct nspdf_doc *doc, unsigned int *pages_out);
+nspdferror nspdf_page_count(struct nspdf_doc *doc, unsigned int *pages_out);
+
+nspdferror nspdf_page_render(struct nspdf_doc *doc, unsigned int page_num);
#endif /* NSPDF_META_H_ */
diff --git a/src/Makefile b/src/Makefile
index 09bde65..a2d1ae8 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -1,3 +1,3 @@
-DIR_SOURCES := document.c byte_class.c cos_parse.c cos_object.c pdf_doc.c meta.c page.c
+DIR_SOURCES := document.c byte_class.c cos_parse.c cos_object.c pdf_doc.c meta.c page.c xref.c
include $(NSBUILD)/Makefile.subdir
diff --git a/src/cos_object.c b/src/cos_object.c
index 494c7ff..80e4431 100644
--- a/src/cos_object.c
+++ b/src/cos_object.c
@@ -119,7 +119,7 @@ cos_get_dictionary_value(struct nspdf_doc *doc,
nspdferror res;
struct cos_dictionary_entry *entry;
- res = xref_get_referenced(doc, &dict);
+ res = nspdf__xref_get_referenced(doc, &dict);
if (res == NSPDFERROR_OK) {
if (dict->type != COS_TYPE_DICTIONARY) {
res = NSPDFERROR_TYPE;
@@ -264,7 +264,7 @@ cos_get_int(struct nspdf_doc *doc,
{
nspdferror res;
- res = xref_get_referenced(doc, &cobj);
+ res = nspdf__xref_get_referenced(doc, &cobj);
if (res == NSPDFERROR_OK) {
if (cobj->type != COS_TYPE_INT) {
res = NSPDFERROR_TYPE;
@@ -282,7 +282,7 @@ cos_get_name(struct nspdf_doc *doc,
{
nspdferror res;
- res = xref_get_referenced(doc, &cobj);
+ res = nspdf__xref_get_referenced(doc, &cobj);
if (res == NSPDFERROR_OK) {
if (cobj->type != COS_TYPE_NAME) {
res = NSPDFERROR_TYPE;
@@ -301,7 +301,7 @@ cos_get_dictionary(struct nspdf_doc *doc,
{
nspdferror res;
- res = xref_get_referenced(doc, &cobj);
+ res = nspdf__xref_get_referenced(doc, &cobj);
if (res == NSPDFERROR_OK) {
if (cobj->type != COS_TYPE_DICTIONARY) {
res = NSPDFERROR_TYPE;
@@ -320,7 +320,7 @@ cos_get_array(struct nspdf_doc *doc,
{
nspdferror res;
- res = xref_get_referenced(doc, &cobj);
+ res = nspdf__xref_get_referenced(doc, &cobj);
if (res == NSPDFERROR_OK) {
if (cobj->type != COS_TYPE_ARRAY) {
res = NSPDFERROR_TYPE;
@@ -339,7 +339,7 @@ cos_get_string(struct nspdf_doc *doc,
{
nspdferror res;
- res = xref_get_referenced(doc, &cobj);
+ res = nspdf__xref_get_referenced(doc, &cobj);
if (res == NSPDFERROR_OK) {
if (cobj->type != COS_TYPE_STRING) {
res = NSPDFERROR_TYPE;
@@ -351,6 +351,25 @@ cos_get_string(struct nspdf_doc *doc,
}
+nspdferror
+cos_get_stream(struct nspdf_doc *doc,
+ struct cos_object *cobj,
+ struct cos_stream **stream_out)
+{
+ nspdferror res;
+
+ res = nspdf__xref_get_referenced(doc, &cobj);
+ if (res == NSPDFERROR_OK) {
+ if (cobj->type != COS_TYPE_STREAM) {
+ res = NSPDFERROR_TYPE;
+ } else {
+ *stream_out = cobj->u.stream;
+ }
+ }
+ return res;
+}
+
+
/*
* get a value for a key from a dictionary
*/
@@ -363,7 +382,7 @@ cos_get_array_value(struct nspdf_doc *doc,
nspdferror res;
struct cos_array_entry *entry;
- res = xref_get_referenced(doc, &array);
+ res = nspdf__xref_get_referenced(doc, &array);
if (res == NSPDFERROR_OK) {
if (array->type != COS_TYPE_ARRAY) {
res = NSPDFERROR_TYPE;
@@ -411,7 +430,7 @@ cos_get_array_size(struct nspdf_doc *doc,
unsigned int array_size = 0;
struct cos_array_entry *array_entry;
- res = xref_get_referenced(doc, &cobj);
+ res = nspdf__xref_get_referenced(doc, &cobj);
if (res == NSPDFERROR_OK) {
if (cobj->type != COS_TYPE_ARRAY) {
res = NSPDFERROR_TYPE;
diff --git a/src/cos_object.h b/src/cos_object.h
index d0bd5ea..077be3b 100644
--- a/src/cos_object.h
+++ b/src/cos_object.h
@@ -50,6 +50,12 @@ struct cos_reference {
uint64_t generation;
};
+struct cos_stream {
+ uint8_t *data;
+ size_t length;
+};
+
+
struct cos_object {
int type;
union {
@@ -69,7 +75,7 @@ struct cos_object {
struct cos_string *s;
/** stream data */
- uint8_t *stream;
+ struct cos_stream *stream;
/* dictionary */
struct cos_dictionary_entry *dictionary;
@@ -221,3 +227,18 @@ nspdferror cos_get_dictionary(struct nspdf_doc *doc, struct cos_object *cobj, st
* NSERROR_TYPE if the \p cobj is not a array
*/
nspdferror cos_get_array(struct nspdf_doc *doc, struct cos_object *cobj, struct cos_object **value_out);
+
+/**
+ * get the stream value of a cos object.
+ *
+ * Get the value from a cos object, if the object is an object reference it
+ * will be dereferenced first. The dereferencing will parse any previously
+ * unreferenced indirect objects as required.
+ *
+ * \param doc The document the cos object belongs to.
+ * \param cobj A cos object of stream type.
+ * \param stream_out The result value.
+ * \return NSERROR_OK and \p stream_out updated,
+ * NSERROR_TYPE if the \p cobj is not a array
+ */
+nspdferror cos_get_stream(struct nspdf_doc *doc, struct cos_object *cobj, struct cos_stream **stream_out);
diff --git a/src/document.c b/src/document.c
index 3dea95e..bbe948d 100644
--- a/src/document.c
+++ b/src/document.c
@@ -31,43 +31,6 @@
#define STARTXREF_SEARCH_SIZE 1024
-static nspdferror
-doc_read_uint(struct nspdf_doc *doc,
- uint64_t *offset_out,
- uint64_t *result_out)
-{
- uint8_t c; /* current byte from source data */
- unsigned int len; /* number of decimal places in number */
- uint8_t num[21]; /* temporary buffer for decimal values */
- uint64_t offset; /* current offset of source data */
- uint64_t result=0; /* parsed result */
- uint64_t tens;
-
- offset = *offset_out;
-
- for (len = 0; len < sizeof(num); len++) {
- c = DOC_BYTE(doc, offset);
- if ((bclass[c] & BC_DCML) != BC_DCML) {
- if (len == 0) {
- return -2; /* parse error no decimals in input */
- }
- /* sum value from each place */
- for (tens = 1; len > 0; tens = tens * 10, len--) {
- result += (num[len - 1] * tens);
- }
-
- *offset_out = offset;
- *result_out = result;
-
- return NSPDFERROR_OK;
- }
- num[len] = c - '0';
- offset++;
- }
- return -1; /* number too long */
-}
-
-
/**
* finds the startxref marker at the end of input
*/
@@ -224,95 +187,6 @@ decode_trailer(struct nspdf_doc *doc,
}
-static nspdferror
-decode_xref(struct nspdf_doc *doc, uint64_t *offset_out)
-{
- uint64_t offset;
- nspdferror res;
- uint64_t objnumber; /* current object number */
- uint64_t objcount;
-
- offset = *offset_out;
-
- /* xref object header */
- if ((DOC_BYTE(doc, offset ) != 'x') &&
- (DOC_BYTE(doc, offset + 1) != 'r') &&
- (DOC_BYTE(doc, offset + 2) != 'e') &&
- (DOC_BYTE(doc, offset + 3) != 'f')) {
- return NSPDFERROR_SYNTAX;
- }
- offset += 4;
-
- res = doc_skip_ws(doc, &offset);
- if (res != NSPDFERROR_OK) {
- return res;
- }
-
- /* subsections
- * <first object number> <number of references in subsection>
- */
- res = doc_read_uint(doc, &offset, &objnumber);
- while (res == NSPDFERROR_OK) {
- uint64_t lastobj;
- res = doc_skip_ws(doc, &offset);
- if (res != NSPDFERROR_OK) {
- return res;
- }
-
- res = doc_read_uint(doc, &offset, &objcount);
- if (res != NSPDFERROR_OK) {
- return res;
- }
-
- res = doc_skip_ws(doc, &offset);
- if (res != NSPDFERROR_OK) {
- return res;
- }
-
- //printf("decoding subsection %lld %lld\n", objnumber, objcount);
-
- lastobj = objnumber + objcount;
- for (; objnumber < lastobj ; objnumber++) {
- /* each entry is a fixed format */
- uint64_t objindex;
- uint64_t objgeneration;
-
- /* object index */
- res = doc_read_uint(doc, &offset, &objindex);
- if (res != NSPDFERROR_OK) {
- return res;
- }
- offset++; /* skip space */
-
- res = doc_read_uint(doc, &offset, &objgeneration);
- if (res != NSPDFERROR_OK) {
- return res;
- }
- offset++; /* skip space */
-
- if ((DOC_BYTE(doc, offset++) == 'n')) {
- if (objnumber < doc->xref_size) {
- struct xref_table_entry *indobj;
- indobj = doc->xref_table + objnumber;
-
- indobj->ref.id = objnumber;
- indobj->ref.generation = objgeneration;
- indobj->offset = objindex;
-
- //printf("xref %lld %lld -> %lld\n", objnumber, objgeneration, objindex);
- } else {
- printf("index out of bounds\n");
- }
- }
-
- offset += 2; /* skip EOL */
- }
-
- res = doc_read_uint(doc, &offset, &objnumber);
- }
-
- return NSPDFERROR_OK;
-}
/**
@@ -367,12 +241,10 @@ decode_xref_trailer(struct nspdf_doc *doc, uint64_t xref_offset)
goto decode_xref_trailer_failed;
}
- doc->xref_table = calloc(size, sizeof(struct xref_table_entry));
- if (doc->xref_table == NULL) {
- res = NSPDFERROR_NOMEM;
+ res = nspdf__xref_allocate(doc, size);
+ if (res != NSPDFERROR_OK) {
goto decode_xref_trailer_failed;
}
- doc->xref_size = size;
res = cos_extract_dictionary_value(trailer, "Encrypt", &doc->encrypt);
if ((res != NSPDFERROR_OK) && (res != NSPDFERROR_NOTFOUND)) {
@@ -403,7 +275,7 @@ decode_xref_trailer(struct nspdf_doc *doc, uint64_t xref_offset)
offset = xref_offset;
/** @todo deal with XrefStm (number) in trailer */
- res = decode_xref(doc, &offset);
+ res = nspdf__xref_parse(doc, &offset);
if (res != NSPDFERROR_OK) {
printf("failed to decode xref table\n");
goto decode_xref_trailer_failed;
diff --git a/src/page.c b/src/page.c
index bca6dbc..5d2a117 100644
--- a/src/page.c
+++ b/src/page.c
@@ -17,13 +17,20 @@
#include "cos_object.h"
#include "pdf_doc.h"
+/** page entry */
+struct page_table_entry {
+ struct cos_object *resources;
+ struct cos_object *mediabox;
+ struct cos_object *contents;
+};
+
/**
* recursively decodes a page tree
*/
nspdferror
nspdf__decode_page_tree(struct nspdf_doc *doc,
- struct cos_object *page_tree_node,
- unsigned int *page_index)
+ struct cos_object *page_tree_node,
+ unsigned int *page_index)
{
nspdferror res;
const char *type;
@@ -131,8 +138,26 @@ nspdf__decode_page_tree(struct nspdf_doc *doc,
/* exported interface documented in nspdf/page.h */
nspdferror
-nspdf_count_pages(struct nspdf_doc *doc, unsigned int *pages_out)
+nspdf_page_count(struct nspdf_doc *doc, unsigned int *pages_out)
{
*pages_out = doc->page_table_size;
return NSPDFERROR_OK;
}
+
+/* exported interface documented in nspdf/page.h */
+nspdferror
+nspdf_page_render(struct nspdf_doc *doc, unsigned int page_number)
+{
+ struct page_table_entry *page_entry;
+ struct cos_stream *stream;
+ nspdferror res;
+
+ page_entry = doc->page_table + page_number;
+
+ res = cos_get_stream(doc, page_entry->contents, &stream);
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+
+ return res;
+}
diff --git a/src/pdf_doc.c b/src/pdf_doc.c
index 281025c..997a3d7 100644
--- a/src/pdf_doc.c
+++ b/src/pdf_doc.c
@@ -57,51 +57,39 @@ nspdferror doc_skip_eol(struct nspdf_doc *doc, uint64_t *offset)
return NSPDFERROR_OK;
}
-static struct cos_object cos_null_obj = {
- .type = COS_TYPE_NULL,
-};
nspdferror
-xref_get_referenced(struct nspdf_doc *doc, struct cos_object **cobj_out)
+doc_read_uint(struct nspdf_doc *doc,
+ uint64_t *offset_out,
+ uint64_t *result_out)
{
- nspdferror res;
- struct cos_object *cobj;
- struct cos_object *indirect;
- uint64_t offset;
- struct xref_table_entry *entry;
+ uint8_t c; /* current byte from source data */
+ unsigned int len; /* number of decimal places in number */
+ uint8_t num[21]; /* temporary buffer for decimal values */
+ uint64_t offset; /* current offset of source data */
+ uint64_t result=0; /* parsed result */
+ uint64_t tens;
- cobj = *cobj_out;
+ offset = *offset_out;
- if (cobj->type != COS_TYPE_REFERENCE) {
- /* not passed a reference object so just return what was passed */
- return NSPDFERROR_OK;
- }
-
- entry = doc->xref_table + cobj->u.reference->id;
+ for (len = 0; len < sizeof(num); len++) {
+ c = DOC_BYTE(doc, offset);
+ if ((bclass[c] & BC_DCML) != BC_DCML) {
+ if (len == 0) {
+ return -2; /* parse error no decimals in input */
+ }
+ /* sum value from each place */
+ for (tens = 1; len > 0; tens = tens * 10, len--) {
+ result += (num[len - 1] * tens);
+ }
- /* check if referenced object is in range and exists. return null object if
- * not
- */
- if ((cobj->u.reference->id >= doc->xref_size) ||
- (cobj->u.reference->id == 0) ||
- (entry->ref.id == 0)) {
- *cobj_out = &cos_null_obj;
- return NSPDFERROR_OK;
- }
+ *offset_out = offset;
+ *result_out = result;
- if (entry->object == NULL) {
- /* indirect object has never been decoded */
- offset = entry->offset;
- res = cos_parse_object(doc, &offset, &indirect);
- if (res != NSPDFERROR_OK) {
- printf("failed to decode indirect object\n");
- return res;
+ return NSPDFERROR_OK;
}
-
- entry->object = indirect;
+ num[len] = c - '0';
+ offset++;
}
-
- *cobj_out = entry->object;
-
- return NSPDFERROR_OK;
+ return -1; /* number too long */
}
diff --git a/src/pdf_doc.h b/src/pdf_doc.h
index a75c90e..5c25878 100644
--- a/src/pdf_doc.h
+++ b/src/pdf_doc.h
@@ -1,21 +1,6 @@
-/** indirect object */
-struct xref_table_entry {
- /* reference identifier */
- struct cos_reference ref;
- /** offset of object */
- uint64_t offset;
-
- /* indirect object if already decoded */
- struct cos_object *object;
-};
-
-/** page entry */
-struct page_table_entry {
- struct cos_object *resources;
- struct cos_object *mediabox;
- struct cos_object *contents;
-};
+struct xref_table_entry;
+struct page_table_entry;
/** pdf document */
struct nspdf_doc {
@@ -29,7 +14,7 @@ struct nspdf_doc {
/**
* Indirect object cross reference table
*/
- uint64_t xref_size;
+ uint64_t xref_table_size;
struct xref_table_entry *xref_table;
struct cos_object *root;
@@ -45,9 +30,21 @@ struct nspdf_doc {
/* byte data acessory, allows for more complex buffer handling in future */
#define DOC_BYTE(doc, offset) (doc->start[(offset)])
+/* helpers in pdf_doc.h */
nspdferror doc_skip_ws(struct nspdf_doc *doc, uint64_t *offset);
nspdferror doc_skip_eol(struct nspdf_doc *doc, uint64_t *offset);
+nspdferror doc_read_uint(struct nspdf_doc *doc, uint64_t *offset_out, uint64_t *result_out);
+
+/**
+ * parse xref from file
+ */
+nspdferror nspdf__xref_parse(struct nspdf_doc *doc, uint64_t *offset_out);
+
+/**
+ * get an object dereferencing through xref table if necessary
+ */
+nspdferror nspdf__xref_get_referenced(struct nspdf_doc *doc, struct cos_object **cobj_out);
-nspdferror xref_get_referenced(struct nspdf_doc *doc, struct cos_object **cobj_out);
+nspdferror nspdf__xref_allocate(struct nspdf_doc *doc, int64_t size);
nspdferror nspdf__decode_page_tree(struct nspdf_doc *doc, struct cos_object *page_tree_node, unsigned int *page_index);
diff --git a/src/xref.c b/src/xref.c
new file mode 100644
index 0000000..298c750
--- /dev/null
+++ b/src/xref.c
@@ -0,0 +1,186 @@
+/*
+ * Copyright 2018 Vincent Sanders <vince(a)netsurf-browser.org>
+ *
+ * This file is part of libnspdf.
+ *
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ */
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdbool.h>
+
+#include <nspdf/errors.h>
+
+#include "cos_parse.h"
+#include "cos_object.h"
+#include "pdf_doc.h"
+
+
+/** indirect object */
+struct xref_table_entry {
+ /* reference identifier */
+ struct cos_reference ref;
+
+ /** offset of object */
+ uint64_t offset;
+
+ /* indirect object if already decoded */
+ struct cos_object *object;
+};
+
+static struct cos_object cos_null_obj = {
+ .type = COS_TYPE_NULL,
+};
+
+nspdferror nspdf__xref_allocate(struct nspdf_doc *doc, int64_t size)
+{
+ if (doc->xref_table != NULL) {
+ /** \todo handle freeing xref table */
+ return NSPDFERROR_SYNTAX;
+ }
+ doc->xref_table_size = size;
+
+ doc->xref_table = calloc(doc->xref_table_size,
+ sizeof(struct xref_table_entry));
+ if (doc->xref_table == NULL) {
+ return NSPDFERROR_NOMEM;
+ }
+ return NSPDFERROR_OK;
+}
+
+nspdferror nspdf__xref_parse(struct nspdf_doc *doc, uint64_t *offset_out)
+{
+ uint64_t offset;
+ nspdferror res;
+ uint64_t objnumber; /* current object number */
+ uint64_t objcount;
+
+ offset = *offset_out;
+
+ /* xref object header */
+ if ((DOC_BYTE(doc, offset ) != 'x') &&
+ (DOC_BYTE(doc, offset + 1) != 'r') &&
+ (DOC_BYTE(doc, offset + 2) != 'e') &&
+ (DOC_BYTE(doc, offset + 3) != 'f')) {
+ return NSPDFERROR_SYNTAX;
+ }
+ offset += 4;
+
+ res = doc_skip_ws(doc, &offset);
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+
+ /* subsections
+ * <first object number> <number of references in subsection>
+ */
+ res = doc_read_uint(doc, &offset, &objnumber);
+ while (res == NSPDFERROR_OK) {
+ uint64_t lastobj;
+ res = doc_skip_ws(doc, &offset);
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+
+ res = doc_read_uint(doc, &offset, &objcount);
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+
+ res = doc_skip_ws(doc, &offset);
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+
+ //printf("decoding subsection %lld %lld\n", objnumber, objcount);
+
+ lastobj = objnumber + objcount;
+ for (; objnumber < lastobj ; objnumber++) {
+ /* each entry is a fixed format */
+ uint64_t objindex;
+ uint64_t objgeneration;
+
+ /* object index */
+ res = doc_read_uint(doc, &offset, &objindex);
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+ offset++; /* skip space */
+
+ res = doc_read_uint(doc, &offset, &objgeneration);
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+ offset++; /* skip space */
+
+ if ((DOC_BYTE(doc, offset++) == 'n')) {
+ if (objnumber < doc->xref_table_size) {
+ struct xref_table_entry *indobj;
+ indobj = doc->xref_table + objnumber;
+
+ indobj->ref.id = objnumber;
+ indobj->ref.generation = objgeneration;
+ indobj->offset = objindex;
+
+ //printf("xref %lld %lld -> %lld\n", objnumber, objgeneration, objindex);
+ } else {
+ //printf("index out of bounds\n");
+ }
+ }
+
+ offset += 2; /* skip EOL */
+ }
+
+ res = doc_read_uint(doc, &offset, &objnumber);
+ }
+
+ return NSPDFERROR_OK;
+}
+
+
+nspdferror
+nspdf__xref_get_referenced(struct nspdf_doc *doc, struct cos_object **cobj_out)
+{
+ nspdferror res;
+ struct cos_object *cobj;
+ struct cos_object *indirect;
+ uint64_t offset;
+ struct xref_table_entry *entry;
+
+ cobj = *cobj_out;
+
+ if (cobj->type != COS_TYPE_REFERENCE) {
+ /* not passed a reference object so just return what was passed */
+ return NSPDFERROR_OK;
+ }
+
+ entry = doc->xref_table + cobj->u.reference->id;
+
+ /* check if referenced object is in range and exists. return null object if
+ * not
+ */
+ if ((cobj->u.reference->id >= doc->xref_table_size) ||
+ (cobj->u.reference->id == 0) ||
+ (entry->ref.id == 0)) {
+ *cobj_out = &cos_null_obj;
+ return NSPDFERROR_OK;
+ }
+
+ if (entry->object == NULL) {
+ /* indirect object has never been decoded */
+ offset = entry->offset;
+ res = cos_parse_object(doc, &offset, &indirect);
+ if (res != NSPDFERROR_OK) {
+ //printf("failed to decode indirect object\n");
+ return res;
+ }
+
+ entry->object = indirect;
+ }
+
+ *cobj_out = entry->object;
+
+ return NSPDFERROR_OK;
+}
diff --git a/test/parsepdf.c b/test/parsepdf.c
index ad6c6c4..7a64f4b 100644
--- a/test/parsepdf.c
+++ b/test/parsepdf.c
@@ -93,13 +93,18 @@ int main(int argc, char **argv)
printf("Title:%s\n", lwc_string_data(title));
}
- res = nspdf_count_pages(doc, &page_count);
+ res = nspdf_page_count(doc, &page_count);
if (res != NSPDFERROR_OK) {
printf("page count failed (%d)\n", res);
return res;
}
printf("Pages:%d\n", page_count);
+ res = nspdf_page_render(doc, 0);
+ if (res != NSPDFERROR_OK) {
+ printf("page render failed (%d)\n", res);
+ return res;
+ }
res = nspdf_document_destroy(doc);
if (res != NSPDFERROR_OK) {
commitdiff http://git.netsurf-browser.org/libnspdf.git/commit/?id=ad6da4a71f2b837a79...
commit ad6da4a71f2b837a791401e658a16bf6903fd3b5
Author: Vincent Sanders <vince(a)kyllikki.org>
Commit: Vincent Sanders <vince(a)kyllikki.org>
split out page handling into new module
diff --git a/include/nspdf/page.h b/include/nspdf/page.h
new file mode 100644
index 0000000..119ef22
--- /dev/null
+++ b/include/nspdf/page.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright 2018 Vincent Sanders <vince(a)netsurf-browser.org>
+ *
+ * This file is part of libnspdf.
+ *
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ */
+
+/**
+ * \file
+ * NetSurf PDF library page manipulation.
+ */
+
+#ifndef NSPDF_PAGE_H_
+#define NSPDF_PAGE_H_
+
+#include <nspdf/errors.h>
+
+struct nspdf_doc;
+
+nspdferror nspdf_count_pages(struct nspdf_doc *doc, unsigned int *pages_out);
+
+#endif /* NSPDF_META_H_ */
diff --git a/src/Makefile b/src/Makefile
index c4ddc7f..09bde65 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -1,3 +1,3 @@
-DIR_SOURCES := document.c byte_class.c cos_parse.c cos_object.c pdf_doc.c meta.c
+DIR_SOURCES := document.c byte_class.c cos_parse.c cos_object.c pdf_doc.c meta.c page.c
include $(NSBUILD)/Makefile.subdir
diff --git a/src/document.c b/src/document.c
index cef2c1a..3dea95e 100644
--- a/src/document.c
+++ b/src/document.c
@@ -461,117 +461,6 @@ static nspdferror decode_trailers(struct nspdf_doc *doc)
}
-/**
- * recursively decodes a page tree
- */
-static nspdferror
-decode_page_tree(struct nspdf_doc *doc,
- struct cos_object *page_tree_node,
- unsigned int *page_index)
-{
- nspdferror res;
- const char *type;
-
- // Type = Pages
- res = cos_get_dictionary_name(doc, page_tree_node, "Type", &type);
- if (res != NSPDFERROR_OK) {
- return res;
- }
-
- if (strcmp(type, "Pages") == 0) {
- struct cos_object *kids;
- unsigned int kids_size;
- unsigned int kids_index;
-
- if (doc->page_table == NULL) {
- /* allocate top level page table */
- int64_t count;
-
- res = cos_get_dictionary_int(doc, page_tree_node, "Count", &count);
- if (res != NSPDFERROR_OK) {
- return res;
- }
-
- doc->page_table = calloc(count, sizeof(struct page_table_entry));
- if (doc->page_table == NULL) {
- return NSPDFERROR_NOMEM;
- }
- doc->page_table_size = count;
- }
-
- res = cos_get_dictionary_array(doc, page_tree_node, "Kids", &kids);
- if (res != NSPDFERROR_OK) {
- return res;
- }
-
- res = cos_get_array_size(doc, kids, &kids_size);
- if (res != NSPDFERROR_OK) {
- return res;
- }
-
- for (kids_index = 0; kids_index < kids_size; kids_index++) {
- struct cos_object *kid;
-
- res = cos_get_array_dictionary(doc, kids, kids_index, &kid);
- if (res != NSPDFERROR_OK) {
- return res;
- }
-
- res = decode_page_tree(doc, kid, page_index);
- if (res != NSPDFERROR_OK) {
- return res;
- }
- }
-
- } else if (strcmp(type, "Page") == 0) {
- struct page_table_entry *page;
-
- page = doc->page_table + (*page_index);
-
- /* required heritable resources */
- res = cos_heritable_dictionary_dictionary(doc,
- page_tree_node,
- "Resources",
- &(page->resources));
- if (res != NSPDFERROR_OK) {
- return res;
- }
-
- /* required heritable mediabox */
- res = cos_heritable_dictionary_array(doc,
- page_tree_node,
- "MediaBox",
- &(page->mediabox));
- if (res != NSPDFERROR_OK) {
- return res;
- }
-
- /* optional page contents */
- res = cos_get_dictionary_value(doc,
- page_tree_node,
- "Contents",
- &(page->contents));
- if ((res != NSPDFERROR_OK) &&
- (res != NSPDFERROR_NOTFOUND)) {
- return res;
- }
-
- /*
- printf("page index:%d page:%p resources:%p mediabox:%p contents:%p\n",
- *page_index,
- page,
- page->resources,
- page->mediabox,
- page->contents);
- */
-
- (*page_index)++;
- res = NSPDFERROR_OK;
- } else {
- res = NSPDFERROR_FORMAT;
- }
- return res;
-}
static nspdferror decode_catalog(struct nspdf_doc *doc)
@@ -602,7 +491,7 @@ static nspdferror decode_catalog(struct nspdf_doc *doc)
return res;
}
- res = decode_page_tree(doc, pages, &page_index);
+ res = nspdf__decode_page_tree(doc, pages, &page_index);
if (res != NSPDFERROR_OK) {
return res;
}
diff --git a/src/page.c b/src/page.c
new file mode 100644
index 0000000..bca6dbc
--- /dev/null
+++ b/src/page.c
@@ -0,0 +1,138 @@
+/*
+ * Copyright 2018 Vincent Sanders <vince(a)netsurf-browser.org>
+ *
+ * This file is part of libnspdf.
+ *
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ */
+
+#include <stdint.h>
+#include <stdbool.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <nspdf/page.h>
+
+#include "cos_object.h"
+#include "pdf_doc.h"
+
+/**
+ * recursively decodes a page tree
+ */
+nspdferror
+nspdf__decode_page_tree(struct nspdf_doc *doc,
+ struct cos_object *page_tree_node,
+ unsigned int *page_index)
+{
+ nspdferror res;
+ const char *type;
+
+ // Type = Pages
+ res = cos_get_dictionary_name(doc, page_tree_node, "Type", &type);
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+
+ if (strcmp(type, "Pages") == 0) {
+ struct cos_object *kids;
+ unsigned int kids_size;
+ unsigned int kids_index;
+
+ if (doc->page_table == NULL) {
+ /* allocate top level page table */
+ int64_t count;
+
+ res = cos_get_dictionary_int(doc, page_tree_node, "Count", &count);
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+
+ doc->page_table = calloc(count, sizeof(struct page_table_entry));
+ if (doc->page_table == NULL) {
+ return NSPDFERROR_NOMEM;
+ }
+ doc->page_table_size = count;
+ }
+
+ res = cos_get_dictionary_array(doc, page_tree_node, "Kids", &kids);
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+
+ res = cos_get_array_size(doc, kids, &kids_size);
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+
+ for (kids_index = 0; kids_index < kids_size; kids_index++) {
+ struct cos_object *kid;
+
+ res = cos_get_array_dictionary(doc, kids, kids_index, &kid);
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+
+ res = nspdf__decode_page_tree(doc, kid, page_index);
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+ }
+
+ } else if (strcmp(type, "Page") == 0) {
+ struct page_table_entry *page;
+
+ page = doc->page_table + (*page_index);
+
+ /* required heritable resources */
+ res = cos_heritable_dictionary_dictionary(doc,
+ page_tree_node,
+ "Resources",
+ &(page->resources));
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+
+ /* required heritable mediabox */
+ res = cos_heritable_dictionary_array(doc,
+ page_tree_node,
+ "MediaBox",
+ &(page->mediabox));
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+
+ /* optional page contents */
+ res = cos_get_dictionary_value(doc,
+ page_tree_node,
+ "Contents",
+ &(page->contents));
+ if ((res != NSPDFERROR_OK) &&
+ (res != NSPDFERROR_NOTFOUND)) {
+ return res;
+ }
+
+ /*
+ printf("page index:%d page:%p resources:%p mediabox:%p contents:%p\n",
+ *page_index,
+ page,
+ page->resources,
+ page->mediabox,
+ page->contents);
+ */
+
+ (*page_index)++;
+ res = NSPDFERROR_OK;
+ } else {
+ res = NSPDFERROR_FORMAT;
+ }
+ return res;
+}
+
+/* exported interface documented in nspdf/page.h */
+nspdferror
+nspdf_count_pages(struct nspdf_doc *doc, unsigned int *pages_out)
+{
+ *pages_out = doc->page_table_size;
+ return NSPDFERROR_OK;
+}
diff --git a/src/pdf_doc.h b/src/pdf_doc.h
index e9bdc14..a75c90e 100644
--- a/src/pdf_doc.h
+++ b/src/pdf_doc.h
@@ -49,3 +49,5 @@ nspdferror doc_skip_ws(struct nspdf_doc *doc, uint64_t *offset);
nspdferror doc_skip_eol(struct nspdf_doc *doc, uint64_t *offset);
nspdferror xref_get_referenced(struct nspdf_doc *doc, struct cos_object **cobj_out);
+
+nspdferror nspdf__decode_page_tree(struct nspdf_doc *doc, struct cos_object *page_tree_node, unsigned int *page_index);
diff --git a/test/parsepdf.c b/test/parsepdf.c
index 1d7a086..ad6c6c4 100644
--- a/test/parsepdf.c
+++ b/test/parsepdf.c
@@ -18,6 +18,7 @@
#include <nspdf/document.h>
#include <nspdf/meta.h>
+#include <nspdf/page.h>
static nspdferror
read_whole_pdf(const char *fname, uint8_t **buffer, uint64_t *buffer_length)
@@ -62,6 +63,7 @@ int main(int argc, char **argv)
struct nspdf_doc *doc;
nspdferror res;
struct lwc_string_s *title;
+ unsigned int page_count;
if (argc < 2) {
fprintf(stderr, "Usage %s <filename>\n", argv[0]);
@@ -91,6 +93,14 @@ int main(int argc, char **argv)
printf("Title:%s\n", lwc_string_data(title));
}
+ res = nspdf_count_pages(doc, &page_count);
+ if (res != NSPDFERROR_OK) {
+ printf("page count failed (%d)\n", res);
+ return res;
+ }
+ printf("Pages:%d\n", page_count);
+
+
res = nspdf_document_destroy(doc);
if (res != NSPDFERROR_OK) {
printf("failed to destroy document (%d)\n", res);
commitdiff http://git.netsurf-browser.org/libnspdf.git/commit/?id=a65babe2fbd341f9d1...
commit a65babe2fbd341f9d12a50a9530682ef1e015b58
Author: Vincent Sanders <vince(a)kyllikki.org>
Commit: Vincent Sanders <vince(a)kyllikki.org>
add metadata interface
diff --git a/Makefile b/Makefile
index 470505f..7024de4 100644
--- a/Makefile
+++ b/Makefile
@@ -34,6 +34,17 @@ else
endif
CFLAGS := $(CFLAGS) -D_POSIX_C_SOURCE=200809L
+# wapcaplet
+ifneq ($(findstring clean,$(MAKECMDGOALS)),clean)
+ ifneq ($(PKGCONFIG),)
+ CFLAGS := $(CFLAGS) $(shell $(PKGCONFIG) libwapcaplet --cflags)
+ LDFLAGS := $(LDFLAGS) $(shell $(PKGCONFIG) libwapcaplet --libs)
+ else
+ CFLAGS := $(CFLAGS) -I$(PREFIX)/include
+ LDFLAGS := $(LDFLAGS) -lwapcaplet
+ endif
+endif
+
REQUIRED_LIBS := nspdf
TESTCFLAGS := -g -O2
@@ -42,7 +53,9 @@ TESTLDFLAGS := -l$(COMPONENT) $(TESTLDFLAGS)
include $(NSBUILD)/Makefile.top
# Extra installation rules
-I := /$(INCLUDEDIR)
-INSTALL_ITEMS := $(INSTALL_ITEMS) $(I):include/nspdf.h
+I := /$(INCLUDEDIR)/nspdf
+INSTALL_ITEMS := $(INSTALL_ITEMS) $(I):include/nspdf/document.h
+INSTALL_ITEMS := $(INSTALL_ITEMS) $(I):include/nspdf/meta.h
+INSTALL_ITEMS := $(INSTALL_ITEMS) $(I):include/nspdf/errors.h
INSTALL_ITEMS := $(INSTALL_ITEMS) /$(LIBDIR)/pkgconfig:lib$(COMPONENT).pc.in
INSTALL_ITEMS := $(INSTALL_ITEMS) /$(LIBDIR):$(OUTPUT)
diff --git a/include/nspdf/document.h b/include/nspdf/document.h
index 4e4931d..d7cbb0f 100644
--- a/include/nspdf/document.h
+++ b/include/nspdf/document.h
@@ -15,6 +15,7 @@
#ifndef NSPDF_DOCUMENT_H_
#define NSPDF_DOCUMENT_H_
+#include <stdint.h>
#include <nspdf/errors.h>
struct nspdf_doc;
diff --git a/include/nspdf/meta.h b/include/nspdf/meta.h
new file mode 100644
index 0000000..a91183e
--- /dev/null
+++ b/include/nspdf/meta.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright 2018 Vincent Sanders <vince(a)netsurf-browser.org>
+ *
+ * This file is part of libnspdf.
+ *
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ */
+
+/**
+ * \file
+ * NetSurf PDF library meta data about document.
+ */
+
+#ifndef NSPDF_META_H_
+#define NSPDF_META_H_
+
+#include <nspdf/errors.h>
+
+struct nspdf_doc;
+struct lwc_string_s;
+
+nspdferror nspdf_get_title(struct nspdf_doc *doc, struct lwc_string_s **title);
+
+#endif /* NSPDF_META_H_ */
diff --git a/src/Makefile b/src/Makefile
index ed0b4ba..c4ddc7f 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -1,3 +1,3 @@
-DIR_SOURCES := document.c byte_class.c cos_parse.c cos_object.c pdf_doc.c
+DIR_SOURCES := document.c byte_class.c cos_parse.c cos_object.c pdf_doc.c meta.c
include $(NSBUILD)/Makefile.subdir
diff --git a/src/cos_object.c b/src/cos_object.c
index 2fa3a93..494c7ff 100644
--- a/src/cos_object.c
+++ b/src/cos_object.c
@@ -173,6 +173,22 @@ cos_get_dictionary_name(struct nspdf_doc *doc,
}
nspdferror
+cos_get_dictionary_string(struct nspdf_doc *doc,
+ struct cos_object *dict,
+ const char *key,
+ struct cos_string **string_out)
+{
+ nspdferror res;
+ struct cos_object *dict_value;
+
+ res = cos_get_dictionary_value(doc, dict, key, &dict_value);
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+ return cos_get_string(doc, dict_value, string_out);
+}
+
+nspdferror
cos_get_dictionary_dictionary(struct nspdf_doc *doc,
struct cos_object *dict,
const char *key,
@@ -278,7 +294,6 @@ cos_get_name(struct nspdf_doc *doc,
}
-
nspdferror
cos_get_dictionary(struct nspdf_doc *doc,
struct cos_object *cobj,
@@ -297,6 +312,7 @@ cos_get_dictionary(struct nspdf_doc *doc,
return res;
}
+
nspdferror
cos_get_array(struct nspdf_doc *doc,
struct cos_object *cobj,
@@ -315,6 +331,26 @@ cos_get_array(struct nspdf_doc *doc,
return res;
}
+
+nspdferror
+cos_get_string(struct nspdf_doc *doc,
+ struct cos_object *cobj,
+ struct cos_string **string_out)
+{
+ nspdferror res;
+
+ res = xref_get_referenced(doc, &cobj);
+ if (res == NSPDFERROR_OK) {
+ if (cobj->type != COS_TYPE_STRING) {
+ res = NSPDFERROR_TYPE;
+ } else {
+ *string_out = cobj->u.s;
+ }
+ }
+ return res;
+}
+
+
/*
* get a value for a key from a dictionary
*/
diff --git a/src/cos_object.h b/src/cos_object.h
index a40c691..d0bd5ea 100644
--- a/src/cos_object.h
+++ b/src/cos_object.h
@@ -85,6 +85,7 @@ struct cos_object {
nspdferror cos_free_object(struct cos_object *cos_obj);
+
/**
* extract a value for a key from a dictionary
*
@@ -100,6 +101,7 @@ nspdferror cos_free_object(struct cos_object *cos_obj);
*/
nspdferror cos_extract_dictionary_value(struct cos_object *dict, const char *key, struct cos_object **value_out);
+
/**
* get a value for a key from a dictionary
*
@@ -118,27 +120,104 @@ nspdferror cos_get_dictionary_int(struct nspdf_doc *doc, struct cos_object *dict
nspdferror cos_get_dictionary_name(struct nspdf_doc *doc, struct cos_object *dict, const char *key, const char **value_out);
+
+nspdferror cos_get_dictionary_string(struct nspdf_doc *doc, struct cos_object *dict, const char *key, struct cos_string **string_out);
+
+
nspdferror cos_get_dictionary_dictionary(struct nspdf_doc *doc, struct cos_object *dict, const char *key, struct cos_object **value_out);
+
nspdferror cos_heritable_dictionary_dictionary(struct nspdf_doc *doc, struct cos_object *dict, const char *key, struct cos_object **value_out);
+
nspdferror cos_get_dictionary_array(struct nspdf_doc *doc, struct cos_object *dict, const char *key, struct cos_object **value_out);
+
nspdferror cos_heritable_dictionary_array(struct nspdf_doc *doc, struct cos_object *dict, const char *key, struct cos_object **value_out);
-nspdferror cos_get_int(struct nspdf_doc *doc, struct cos_object *cobj, int64_t *value_out);
+nspdferror cos_get_array_size(struct nspdf_doc *doc, struct cos_object *cobj, unsigned int *size_out);
-nspdferror cos_get_name(struct nspdf_doc *doc, struct cos_object *cobj, const char **value_out);
+nspdferror cos_get_array_value(struct nspdf_doc *doc, struct cos_object *array, unsigned int index, struct cos_object **value_out);
-nspdferror cos_get_dictionary(struct nspdf_doc *doc, struct cos_object *cobj, struct cos_object **value_out);
+nspdferror cos_get_array_dictionary(struct nspdf_doc *doc, struct cos_object *arrau, unsigned int index, struct cos_object **value_out);
-nspdferror cos_get_array(struct nspdf_doc *doc, struct cos_object *cobj, struct cos_object **value_out);
+/**
+ * get the integer value of a cos object.
+ *
+ * Get the value from a cos object, if the object is an object reference it
+ * will be dereferenced first. The dereferencing will parse any previously
+ * unreferenced indirect objects as required.
+ *
+ * \param doc The document the cos object belongs to.
+ * \param cobj A cos object of integer type.
+ * \param value_out The result value.
+ * \return NSERROR_OK and \p value_out updated,
+ * NSERROR_TYPE if the \p cobj is not an integer
+ */
+nspdferror cos_get_int(struct nspdf_doc *doc, struct cos_object *cobj, int64_t *value_out);
-nspdferror cos_get_array_size(struct nspdf_doc *doc, struct cos_object *cobj, unsigned int *size_out);
+/**
+ * get the name value of a cos object.
+ *
+ * Get the value from a cos object, if the object is an object reference it
+ * will be dereferenced first. The dereferencing will parse any previously
+ * unreferenced indirect objects as required.
+ *
+ * \param doc The document the cos object belongs to.
+ * \param cobj A cos object of name type.
+ * \param name_out The result value.
+ * \return NSERROR_OK and \p value_out updated,
+ * NSERROR_TYPE if the \p cobj is not a name
+ */
+nspdferror cos_get_name(struct nspdf_doc *doc, struct cos_object *cobj, const char **name_out);
-nspdferror cos_get_array_value(struct nspdf_doc *doc, struct cos_object *array, unsigned int index, struct cos_object **value_out);
-nspdferror cos_get_array_dictionary(struct nspdf_doc *doc, struct cos_object *arrau, unsigned int index, struct cos_object **value_out);
+/**
+ * get the string value of a cos object.
+ *
+ * Get the value from a cos object, if the object is an object reference it
+ * will be dereferenced first. The dereferencing will parse any previously
+ * unreferenced indirect objects as required.
+ *
+ * \param doc The document the cos object belongs to.
+ * \param cobj A cos object of string type.
+ * \param string_out The result value.
+ * \return NSERROR_OK and \p value_out updated,
+ * NSERROR_TYPE if the \p cobj is not a string
+ */
+nspdferror cos_get_string(struct nspdf_doc *doc, struct cos_object *cobj, struct cos_string **string_out);
+
+
+/**
+ * get the dictionary value of a cos object.
+ *
+ * Get the value from a cos object, if the object is an object reference it
+ * will be dereferenced first. The dereferencing will parse any previously
+ * unreferenced indirect objects as required.
+ *
+ * \param doc The document the cos object belongs to.
+ * \param cobj A cos object of dictionary type.
+ * \param value_out The result value.
+ * \return NSERROR_OK and \p value_out updated,
+ * NSERROR_TYPE if the \p cobj is not a dictionary
+ */
+nspdferror cos_get_dictionary(struct nspdf_doc *doc, struct cos_object *cobj, struct cos_object **value_out);
+
+
+/**
+ * get the array value of a cos object.
+ *
+ * Get the value from a cos object, if the object is an object reference it
+ * will be dereferenced first. The dereferencing will parse any previously
+ * unreferenced indirect objects as required.
+ *
+ * \param doc The document the cos object belongs to.
+ * \param cobj A cos object of array type.
+ * \param value_out The result value.
+ * \return NSERROR_OK and \p value_out updated,
+ * NSERROR_TYPE if the \p cobj is not a array
+ */
+nspdferror cos_get_array(struct nspdf_doc *doc, struct cos_object *cobj, struct cos_object **value_out);
diff --git a/src/document.c b/src/document.c
index 9be0ab5..cef2c1a 100644
--- a/src/document.c
+++ b/src/document.c
@@ -419,9 +419,9 @@ decode_xref_trailer_failed:
/**
* decode non-linear pdf trailer data
*
- * PDF have a structure nominally defined as header, body, cross reference table
- * and trailer. The body, cross reference table and trailer sections may be
- * repeated in a scheme known as "incremental updates"
+ * PDF have a structure nominally defined as header, body, cross reference
+ * table and trailer. The body, cross reference table and trailer sections may
+ * be repeated in a scheme known as "incremental updates"
*
* The strategy used here is to locate the end of the last trailer block which
* contains a startxref token followed by a byte offset into the file of the
diff --git a/src/meta.c b/src/meta.c
new file mode 100644
index 0000000..02566b2
--- /dev/null
+++ b/src/meta.c
@@ -0,0 +1,60 @@
+/*
+ * Copyright 2018 Vincent Sanders <vince(a)netsurf-browser.org>
+ *
+ * This file is part of libnspdf.
+ *
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ */
+
+#include <libwapcaplet/libwapcaplet.h>
+
+#include <nspdf/meta.h>
+
+#include "cos_object.h"
+#include "pdf_doc.h"
+
+static nspdferror lwc2nspdferr(lwc_error ret)
+{
+ nspdferror res;
+
+ switch (ret) {
+ case lwc_error_ok:
+ res = NSPDFERROR_OK;
+ break;
+
+ case lwc_error_oom:
+ res = NSPDFERROR_NOMEM;
+ break;
+
+ case lwc_error_range:
+ res = NSPDFERROR_RANGE;
+ break;
+
+ default:
+ res = NSPDFERROR_NOTFOUND;
+ break;
+ }
+ return res;
+}
+
+nspdferror nspdf_get_title(struct nspdf_doc *doc, struct lwc_string_s **title)
+{
+ struct cos_string *cos_title;
+ nspdferror res;
+
+ if (doc->info == NULL) {
+ return NSPDFERROR_NOTFOUND;
+ }
+
+ res = cos_get_dictionary_string(doc, doc->info, "Title", &cos_title);
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+
+ res = lwc2nspdferr(lwc_intern_string((const char *)cos_title->data,
+ cos_title->length,
+ title));
+
+ return res;
+}
diff --git a/test/parsepdf.c b/test/parsepdf.c
index 3482af5..1d7a086 100644
--- a/test/parsepdf.c
+++ b/test/parsepdf.c
@@ -14,7 +14,10 @@
#include <stdbool.h>
#include <string.h>
+#include <libwapcaplet/libwapcaplet.h>
+
#include <nspdf/document.h>
+#include <nspdf/meta.h>
static nspdferror
read_whole_pdf(const char *fname, uint8_t **buffer, uint64_t *buffer_length)
@@ -58,6 +61,7 @@ int main(int argc, char **argv)
uint64_t buffer_length;
struct nspdf_doc *doc;
nspdferror res;
+ struct lwc_string_s *title;
if (argc < 2) {
fprintf(stderr, "Usage %s <filename>\n", argv[0]);
@@ -82,6 +86,11 @@ int main(int argc, char **argv)
return res;
}
+ res = nspdf_get_title(doc, &title);
+ if (res == NSPDFERROR_OK) {
+ printf("Title:%s\n", lwc_string_data(title));
+ }
+
res = nspdf_document_destroy(doc);
if (res != NSPDFERROR_OK) {
printf("failed to destroy document (%d)\n", res);
diff --git a/test/runtest.sh b/test/runtest.sh
index 1aa83c7..c62ec11 100755
--- a/test/runtest.sh
+++ b/test/runtest.sh
@@ -1,4 +1,4 @@
#!/bin/sh
TEST_PATH=$1
-${TEST_PATH}/test_parsepdf ~/Downloads/HiKey_User_Guide_Rev0.2.pdf
+${TEST_PATH}/test_parsepdf test/files/sn74ls173a.pdf
commitdiff http://git.netsurf-browser.org/libnspdf.git/commit/?id=31b1f792826f51e927...
commit 31b1f792826f51e9271475d124c3a1df4aa5116b
Author: Vincent Sanders <vince(a)kyllikki.org>
Commit: Vincent Sanders <vince(a)kyllikki.org>
make an actual library
diff --git a/COPYING b/COPYING
new file mode 100644
index 0000000..fe0a4e8
--- /dev/null
+++ b/COPYING
@@ -0,0 +1,19 @@
+Copyright 2018 Vincent Sanders <vince(a)netsurf-browser.org>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+ * The above copyright notice and this permission notice shall be included in
+ all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..470505f
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,48 @@
+#!/bin/make
+#
+# Makefile for libnspdf
+#
+# Copyright 2018 Vincent Sanders <vince(a)netsurf-browser.org>
+
+# Component settings
+COMPONENT := nspdf
+COMPONENT_VERSION := 0.0.1
+# Default to a static library
+COMPONENT_TYPE ?= lib-static
+
+# Setup the tooling
+PREFIX ?= /opt/netsurf
+NSSHARED ?= $(PREFIX)/share/netsurf-buildsystem
+include $(NSSHARED)/makefiles/Makefile.tools
+
+# Reevaluate when used, as BUILDDIR won't be defined yet
+TESTRUNNER = test/runtest.sh $(BUILDDIR) $(EXEEXT)
+
+# Toolchain flags
+WARNFLAGS := -Wall -W -Wundef -Wpointer-arith -Wcast-align \
+ -Wwrite-strings -Wstrict-prototypes -Wmissing-prototypes \
+ -Wmissing-declarations -Wnested-externs
+
+CFLAGS := -D_GNU_SOURCE -D_DEFAULT_SOURCE \
+ -I$(CURDIR)/include/ -I$(CURDIR)/src \
+ $(WARNFLAGS) $(CFLAGS)
+ifneq ($(GCCVER),2)
+ CFLAGS := $(CFLAGS) -std=c99
+else
+ # __inline__ is a GCCism
+ CFLAGS := $(CFLAGS) -Dinline="__inline__"
+endif
+CFLAGS := $(CFLAGS) -D_POSIX_C_SOURCE=200809L
+
+REQUIRED_LIBS := nspdf
+
+TESTCFLAGS := -g -O2
+TESTLDFLAGS := -l$(COMPONENT) $(TESTLDFLAGS)
+
+include $(NSBUILD)/Makefile.top
+
+# Extra installation rules
+I := /$(INCLUDEDIR)
+INSTALL_ITEMS := $(INSTALL_ITEMS) $(I):include/nspdf.h
+INSTALL_ITEMS := $(INSTALL_ITEMS) /$(LIBDIR)/pkgconfig:lib$(COMPONENT).pc.in
+INSTALL_ITEMS := $(INSTALL_ITEMS) /$(LIBDIR):$(OUTPUT)
diff --git a/README b/README
new file mode 100644
index 0000000..96602d4
--- /dev/null
+++ b/README
@@ -0,0 +1,4 @@
+NetSurf Portable Document Format handling library
+=================================================
+
+library to handle manipulating PDF files
diff --git a/include/nspdf/document.h b/include/nspdf/document.h
new file mode 100644
index 0000000..4e4931d
--- /dev/null
+++ b/include/nspdf/document.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright 2018 Vincent Sanders <vince(a)netsurf-browser.org>
+ *
+ * This file is part of libnspdf.
+ *
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ */
+
+/**
+ * \file
+ * NetSurf PDF library document handling
+ */
+
+#ifndef NSPDF_DOCUMENT_H_
+#define NSPDF_DOCUMENT_H_
+
+#include <nspdf/errors.h>
+
+struct nspdf_doc;
+
+/**
+ * create a new PDF document
+ */
+nspdferror nspdf_document_create(struct nspdf_doc **doc_out);
+
+/**
+ * destroys a previously created document
+ *
+ * any allocated resources are freed but any buffers passed for parse are not
+ * altered and may now be freed by the caller.
+ */
+nspdferror nspdf_document_destroy(struct nspdf_doc *doc);
+
+/**
+ * parse a PDF from a memory buffer
+ *
+ * reads all metadata and validates header, trailer, xref table and page tree
+ * ready to render pages. The passed buffer ownership is transfered and must
+ * not be altered untill the document is destroyed.
+ */
+nspdferror nspdf_document_parse(struct nspdf_doc *doc, const uint8_t *buffer, uint64_t buffer_length);
+
+
+#endif /* NSPDF_DOCUMENT_H_ */
diff --git a/include/nspdf/errors.h b/include/nspdf/errors.h
new file mode 100644
index 0000000..f2142ff
--- /dev/null
+++ b/include/nspdf/errors.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2018 Vincent Sanders <vince(a)netsurf-browser.org>
+ *
+ * This file is part of libnspdf.
+ *
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ */
+
+/**
+ * \file
+ * NetSurf PDF library return codes
+ */
+
+#ifndef NSPDF_ERRORS_H_
+#define NSPDF_ERRORS_H_
+
+typedef enum {
+ NSPDFERROR_OK, /**< no error */
+ NSPDFERROR_NOMEM, /**< memory allocation error */
+ NSPDFERROR_SYNTAX, /**< syntax error in parse */
+ NSPDFERROR_SIZE, /**< not enough input data */
+ NSPDFERROR_RANGE, /**< value outside type range */
+ NSPDFERROR_TYPE, /**< wrong type error */
+ NSPDFERROR_NOTFOUND, /**< key not found */
+ NSPDFERROR_FORMAT, /**< objects do not cornform to expected format */
+} nspdferror;
+
+#endif
diff --git a/libnspdf.pc.in b/libnspdf.pc.in
new file mode 100644
index 0000000..0898deb
--- /dev/null
+++ b/libnspdf.pc.in
@@ -0,0 +1,10 @@
+prefix=PREFIX
+exec_prefix=${prefix}
+libdir=${exec_prefix}/LIBDIR
+includedir=${prefix}/INCLUDEDIR
+
+Name: libnspdf
+Description: NetSurf PDF library
+Version: VERSION
+Libs: -L${libdir} LIBRARIES
+Cflags: -I${includedir}
diff --git a/src/Makefile b/src/Makefile
index af806f3..ed0b4ba 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -1,14 +1,3 @@
-#
+DIR_SOURCES := document.c byte_class.c cos_parse.c cos_object.c pdf_doc.c
-CFLAGS+=-g -Wall -Wextra
-
-OBJS=xref.o byte_class.o cos_decode.o cos_object.o pdf_doc.o
-
-.PHONY:all clean
-
-all:xref
-
-xref:$(OBJS)
-
-clean:
- ${RM} xref $(OBJS)
+include $(NSBUILD)/Makefile.subdir
diff --git a/src/cos_decode.c b/src/cos_decode.c
deleted file mode 100644
index 8873060..0000000
--- a/src/cos_decode.c
+++ /dev/null
@@ -1,884 +0,0 @@
-#include <stdint.h>
-#include <stddef.h>
-#include <stdbool.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-
-#include "byte_class.h"
-#include "nspdferror.h"
-#include "cos_object.h"
-#include "pdf_doc.h"
-
-/** increments in which cos string allocations are extended */
-#define COS_STRING_ALLOC 32
-
-/** Maximum length of cos name */
-#define NAME_MAX_LENGTH 127
-
-static nspdferror
-cos_string_append(struct cos_string *s, uint8_t c)
-{
- //printf("appending 0x%x to %p len %d alloc %d\n", c, s->data, s->length, s->alloc);
- if (s->length == s->alloc) {
- uint8_t *ns;
- ns = realloc(s->data, s->alloc + COS_STRING_ALLOC);
- if (ns == NULL) {
- return NSPDFERROR_NOMEM;
- }
- s->data = ns;
- s->alloc += COS_STRING_ALLOC;
- }
- s->data[s->length++] = c;
- return NSPDFERROR_OK;
-}
-
-static uint8_t xtoi(uint8_t x)
-{
- if (x >= '0' && x <= '9') {
- x = x - '0';
- } else if (x >= 'a' && x <='f') {
- x = x - 'a' + 10;
- } else if (x >= 'A' && x <='F') {
- x = x - 'A' + 10;
- }
- return x;
-}
-
-static nspdferror
-cos_decode_number(struct pdf_doc *doc,
- uint64_t *offset_out,
- struct cos_object **cosobj_out)
-{
- struct cos_object *cosobj;
- uint8_t c; /* current byte from source data */
- unsigned int len; /* number of decimal places in number */
- uint8_t num[21]; /* temporary buffer for decimal values */
- uint64_t offset; /* current offset of source data */
-
- offset = *offset_out;
-
- for (len = 0; len < sizeof(num); len++) {
- c = DOC_BYTE(doc, offset);
- if ((bclass[c] & BC_DCML) != BC_DCML) {
- int64_t result = 0; /* parsed result */
- uint64_t tens;
-
- if (len == 0) {
- /* parse error no decimals in input */
- return NSPDFERROR_SYNTAX;
- }
- /* sum value from each place */
- for (tens = 1; len > 0; tens = tens * 10, len--) {
- result += (num[len - 1] * tens);
- }
-
- doc_skip_ws(doc, &offset);
-
- cosobj = calloc(1, sizeof(struct cos_object));
- if (cosobj == NULL) {
- return NSPDFERROR_NOMEM;
- }
-
- cosobj->type = COS_TYPE_INT;
- cosobj->u.i = result;
-
- *cosobj_out = cosobj;
-
- *offset_out = offset;
-
- return NSPDFERROR_OK;
- }
- num[len] = c - '0';
- offset++;
- }
- return NSPDFERROR_RANGE; /* number too long */
-}
-
-
-/**
- * decode literal string
- *
- */
-static nspdferror
-cos_decode_string(struct pdf_doc *doc,
- uint64_t *offset_out,
- struct cos_object **cosobj_out)
-{
- uint64_t offset;
- struct cos_object *cosobj;
- uint8_t c;
- unsigned int pdepth = 1; /* depth of open parens */
- struct cos_string *cstring;
-
- offset = *offset_out;
-
- c = DOC_BYTE(doc, offset++);
- if (c != '(') {
- return NSPDFERROR_SYNTAX;
- }
-
- cstring = calloc(1, sizeof(*cstring));
- if (cstring == NULL) {
- return NSPDFERROR_NOMEM;
- }
-
- cosobj = calloc(1, sizeof(*cosobj));
- if (cosobj == NULL) {
- return NSPDFERROR_NOMEM;
- }
- cosobj->type = COS_TYPE_STRING;
- cosobj->u.s = cstring;
-
- while (pdepth > 0) {
- c = DOC_BYTE(doc, offset++);
-
- if (c == ')') {
- pdepth--;
- if (pdepth == 0) {
- break;
- }
- } else if (c == '(') {
- pdepth++;
- } else if ((bclass[c] & BC_EOLM ) != 0) {
- /* unescaped end of line characters are translated to a single
- * newline
- */
- c = DOC_BYTE(doc, offset);
- while ((bclass[c] & BC_EOLM) != 0) {
- offset++;
- c = DOC_BYTE(doc, offset);
- }
- c = '\n';
- } else if (c == '\\') {
- /* escaped chars */
- c = DOC_BYTE(doc, offset++);
- switch (c) {
- case 'n':
- c = '\n';
- break;
-
- case 'r':
- c = '\r';
- break;
-
- case 't':
- c = '\t';
- break;
-
- case 'b':
- c = '\b';
- break;
-
- case 'f':
- c = '\f';
- break;
-
- case '(':
- c = '(';
- break;
-
- case ')':
- c = ')';
- break;
-
- case '\\':
- c = '\\';
- break;
-
- default:
-
- if ((bclass[c] & BC_EOLM) != 0) {
- /* escaped end of line, swallow it */
- c = DOC_BYTE(doc, offset++);
- while ((bclass[c] & BC_EOLM) != 0) {
- c = DOC_BYTE(doc, offset++);
- }
- } else if ((bclass[c] & BC_OCTL) != 0) {
- /* octal value */
- uint8_t val;
- val = (c - '0');
- c = DOC_BYTE(doc, offset);
- if ((bclass[c] & BC_OCTL) != 0) {
- offset++;
- val = (val << 3) | (c - '0');
- c = DOC_BYTE(doc, offset);
- if ((bclass[c] & BC_OCTL) != 0) {
- offset++;
- val = (val << 3) | (c - '0');
- c = val;
- }
- }
- } /* else invalid (skip backslash) */
- break;
- }
- }
-
- /* c contains the character to add to the string */
- cos_string_append(cstring, c);
- }
-
- doc_skip_ws(doc, &offset);
-
- *cosobj_out = cosobj;
- *offset_out = offset;
-
- return NSPDFERROR_OK;
-}
-
-/**
- * decode hex encoded string
- */
-static nspdferror
-cos_decode_hex_string(struct pdf_doc *doc,
- uint64_t *offset_out,
- struct cos_object **cosobj_out)
-{
- uint64_t offset;
- struct cos_object *cosobj;
- uint8_t c;
- uint8_t value = 0;
- struct cos_string *cstring;
- bool first = true;
-
- offset = *offset_out;
-
- c = DOC_BYTE(doc, offset++);
- if (c != '<') {
- return NSPDFERROR_SYNTAX;
- }
-
- cstring = calloc(1, sizeof(*cstring));
- if (cstring == NULL) {
- return NSPDFERROR_NOMEM;
- }
-
- cosobj = calloc(1, sizeof(*cosobj));
- if (cosobj == NULL) {
- return NSPDFERROR_NOMEM;
- }
- cosobj->type = COS_TYPE_STRING;
- cosobj->u.s = cstring;
-
- for (; offset < doc->length; offset++) {
- c = DOC_BYTE(doc, offset);
- if (c == '>') {
- if (first == false) {
- cos_string_append(cstring, value);
- }
- offset++;
- doc_skip_ws(doc, &offset);
-
- *cosobj_out = cosobj;
- *offset_out = offset;
-
- return NSPDFERROR_OK;
- } else if ((bclass[c] & BC_HEXL) != 0) {
- if (first) {
- value = xtoi(c) << 4;
- first = false;
- } else {
- value |= xtoi(c);
- first = true;
- cos_string_append(cstring, value);
- }
- } else if ((bclass[c] & BC_WSPC) == 0) {
- break; /* unknown byte value in string */
- }
- }
- return NSPDFERROR_SYNTAX;
-}
-
-/**
- * decode a dictionary object
- */
-static nspdferror
-cos_decode_dictionary(struct pdf_doc *doc,
- uint64_t *offset_out,
- struct cos_object **cosobj_out)
-{
- uint64_t offset;
- struct cos_object *cosobj;
- struct cos_dictionary_entry *entry;
- struct cos_object *key;
- struct cos_object *value;
- int res;
-
- offset = *offset_out;
-
- if ((DOC_BYTE(doc, offset) != '<') ||
- (DOC_BYTE(doc, offset + 1) != '<')) {
- return -1; /* syntax error */
- }
- offset += 2;
- doc_skip_ws(doc, &offset);
-
- //printf("found a dictionary\n");
-
- cosobj = calloc(1, sizeof(struct cos_object));
- if (cosobj == NULL) {
- return -1; /* memory error */
- }
- cosobj->type = COS_TYPE_DICTIONARY;
-
- while ((DOC_BYTE(doc, offset) != '>') &&
- (DOC_BYTE(doc, offset + 1) != '>')) {
-
- res = cos_decode_object(doc, &offset, &key);
- if (res != NSPDFERROR_OK) {
- /* todo free up any dictionary entries already created */
- printf("key object decode failed\n");
- return res;
- }
- if (key->type != COS_TYPE_NAME) {
- /* key value pairs without a name */
- printf("key was %d not a name %d\n", key->type, COS_TYPE_NAME);
- return NSPDFERROR_SYNTAX;
- }
- //printf("key: %s\n", key->u.n);
-
- res = cos_decode_object(doc, &offset, &value);
- if (res != NSPDFERROR_OK) {
- printf("Unable to decode value object in dictionary\n");
- /* todo free up any dictionary entries already created */
- return res;
- }
-
- /* add dictionary entry */
- entry = calloc(1, sizeof(struct cos_dictionary_entry));
- if (entry == NULL) {
- /* todo free up any dictionary entries already created */
- return NSPDFERROR_NOMEM;
- }
-
- entry->key = key;
- entry->value = value;
- entry->next = cosobj->u.dictionary;
-
- cosobj->u.dictionary = entry;
-
- }
- offset += 2; /* skip closing >> */
- doc_skip_ws(doc, &offset);
-
- *cosobj_out = cosobj;
- *offset_out = offset;
-
- return NSPDFERROR_OK;
-}
-
-/**
- * decode a list
- */
-static nspdferror
-cos_decode_list(struct pdf_doc *doc,
- uint64_t *offset_out,
- struct cos_object **cosobj_out)
-{
- uint64_t offset;
- struct cos_object *cosobj;
- struct cos_array_entry *entry;
- struct cos_object *value;
- nspdferror res;
-
- offset = *offset_out;
-
- /* sanity check first token is list open */
- if (DOC_BYTE(doc, offset) != '[') {
- printf("not a [\n");
- return NSPDFERROR_SYNTAX; /* syntax error */
- }
- offset++;
-
- /* advance offset to next token */
- res = doc_skip_ws(doc, &offset);
- if (res != NSPDFERROR_OK) {
- return res;
- }
-
- //printf("found a list\n");
-
- cosobj = calloc(1, sizeof(struct cos_object));
- if (cosobj == NULL) {
- return NSPDFERROR_NOMEM;
- }
- cosobj->type = COS_TYPE_ARRAY;
-
- while (DOC_BYTE(doc, offset) != ']') {
-
- res = cos_decode_object(doc, &offset, &value);
- if (res != NSPDFERROR_OK) {
- cos_free_object(cosobj);
- printf("Unable to decode value object in list\n");
- return res;
- }
-
- /* add entry to array */
- entry = calloc(1, sizeof(struct cos_array_entry));
- if (entry == NULL) {
- cos_free_object(cosobj);
- return NSPDFERROR_NOMEM;
- }
-
- entry->value = value;
- entry->next = cosobj->u.array;
-
- cosobj->u.array = entry;
- }
- offset++; /* skip closing ] */
-
- doc_skip_ws(doc, &offset);
-
- *cosobj_out = cosobj;
- *offset_out = offset;
-
- return NSPDFERROR_OK;
-}
-
-
-/**
- * decode a name object
- *
- * \todo deal with # symbols on pdf versions 1.2 and later
- */
-static nspdferror
-cos_decode_name(struct pdf_doc *doc,
- uint64_t *offset_out,
- struct cos_object **cosobj_out)
-{
- uint64_t offset;
- struct cos_object *cosobj;
- uint8_t c;
- char name[NAME_MAX_LENGTH + 1];
- int idx = 0;
-
- offset = *offset_out;
-
- c = DOC_BYTE(doc, offset++);
- if (c != '/') {
- return -1; /* names must be prefixed with a / */
- }
- //printf("found a name\n");
-
- c = DOC_BYTE(doc, offset);
- while ((idx <= NAME_MAX_LENGTH) &&
- ((bclass[c] & (BC_WSPC | BC_DELM)) == 0)) {
- offset++;
- //printf("%c", c);
- name[idx++] = c;
- c = DOC_BYTE(doc, offset);
- }
- //printf("\nidx: %d\n", idx);
- if (idx > NAME_MAX_LENGTH) {
- /* name length exceeded implementation limit */
- return -1;
- }
- name[idx] = 0;
-
- //printf("name: %s\n", name);
-
- doc_skip_ws(doc, &offset);
-
- cosobj = calloc(1, sizeof(struct cos_object));
- if (cosobj == NULL) {
- return NSPDFERROR_NOMEM; /* memory error */
- }
-
- cosobj->type = COS_TYPE_NAME;
- cosobj->u.n = strdup(name);
-
- *cosobj_out = cosobj;
-
- *offset_out = offset;
-
- return NSPDFERROR_OK;
-}
-
-/**
- * decode a cos boolean object
- */
-static int
-cos_decode_boolean(struct pdf_doc *doc,
- uint64_t *offset_out,
- struct cos_object **cosobj_out)
-{
- uint64_t offset;
- struct cos_object *cosobj;
- uint8_t c;
- bool value;
-
- offset = *offset_out;
-
- c = DOC_BYTE(doc, offset++);
- if ((c == 't') || (c == 'T')) {
- /* true branch */
-
- c = DOC_BYTE(doc, offset++);
- if ((c != 'r') && (c != 'R')) {
- return -1; /* syntax error */
- }
- c = DOC_BYTE(doc, offset++);
- if ((c != 'u') && (c != 'U')) {
- return -1; /* syntax error */
- }
- c = DOC_BYTE(doc, offset++);
- if ((c != 'e') && (c != 'E')) {
- return -1; /* syntax error */
- }
- value = true;
-
- } else if ((c == 'f') || (c == 'F')) {
- /* false branch */
-
- c = DOC_BYTE(doc, offset++);
- if ((c != 'a') && (c != 'A')) {
- return -1; /* syntax error */
- }
- c = DOC_BYTE(doc, offset++);
- if ((c != 'l') && (c != 'L')) {
- return -1; /* syntax error */
- }
- c = DOC_BYTE(doc, offset++);
- if ((c != 's') && (c != 'S')) {
- return -1; /* syntax error */
- }
- c = DOC_BYTE(doc, offset++);
- if ((c != 'e') && (c != 'E')) {
- return -1; /* syntax error */
- }
-
- value = false;
-
- } else {
- return -1; /* syntax error */
- }
-
- doc_skip_ws(doc, &offset);
-
- cosobj = calloc(1, sizeof(struct cos_object));
- if (cosobj == NULL) {
- return NSPDFERROR_NOMEM; /* memory error */
- }
-
- cosobj->type = COS_TYPE_BOOL;
- cosobj->u.b = value;
-
- *cosobj_out = cosobj;
-
- *offset_out = offset;
-
- return NSPDFERROR_OK;
-}
-
-/**
- * decode the null object.
- */
-static nspdferror
-cos_decode_null(struct pdf_doc *doc,
- uint64_t *offset_out,
- struct cos_object **cosobj_out)
-{
- uint64_t offset;
- struct cos_object *cosobj;
- uint8_t c;
-
- offset = *offset_out;
-
- c = DOC_BYTE(doc, offset++);
- if ((c != 'n') && (c != 'N')) {
- return -1; /* syntax error */
- }
-
- c = DOC_BYTE(doc, offset++);
- if ((c != 'u') && (c != 'U')) {
- return -1; /* syntax error */
- }
-
- c = DOC_BYTE(doc, offset++);
- if ((c != 'l') && (c != 'L')) {
- return -1; /* syntax error */
- }
-
- c = DOC_BYTE(doc, offset++);
- if ((c != 'l') && (c != 'L')) {
- return -1; /* syntax error */
- }
-
- doc_skip_ws(doc, &offset);
-
- cosobj = calloc(1, sizeof(struct cos_object));
- if (cosobj == NULL) {
- return NSPDFERROR_NOMEM;
- }
-
- cosobj->type = COS_TYPE_NULL;
-
- *cosobj_out = cosobj;
-
- *offset_out = offset;
-
- return NSPDFERROR_OK;
-}
-
-
-/**
- * attempt to decode input data into a reference, indirect or stream object
- *
- * The input data already had a positive integer decoded from it:
- * - if another positive integer follows and a R character after that it is a
- * reference,
- *
- * - if another positive integer follows and 'obj' after that:
- * - a direct object followed by 'endobj' it is an indirect object.
- *
- * - a direct dictionary object followed by 'stream', then stream data,
- * then 'endstream' then 'endobj' it is a stream object
- *
- * \param doc the pdf document
- * \param offset_out offset of current cursor in input data
- * \param cosobj_out the object to return into, on input contains the first
- * integer
- */
-static nspdferror
-cos_attempt_decode_reference(struct pdf_doc *doc,
- uint64_t *offset_out,
- struct cos_object **cosobj_out)
-{
- nspdferror res;
- uint64_t offset;
- uint8_t c;
- struct cos_object *generation; /* generation object */
-
- offset = *offset_out;
-
- res = cos_decode_number(doc, &offset, &generation);
- if (res != NSPDFERROR_OK) {
- /* no error if next token could not be decoded as a number */
- return NSPDFERROR_OK;
- }
-
- if (generation->type != COS_TYPE_INT) {
- /* next object was not an integer so not a reference */
- cos_free_object(generation);
- return NSPDFERROR_OK;
- }
-
- if (generation->u.i < 0) {
- /* integer was negative so not a reference (generations must be
- * non-negative
- */
- cos_free_object(generation);
- return NSPDFERROR_OK;
- }
-
- /* two int in a row, look for the R */
- c = DOC_BYTE(doc, offset);
- if (c == 'R') {
- struct cos_reference *nref; /* new reference */
-
- //printf("found object reference\n");
- offset ++;
-
- doc_skip_ws(doc, &offset);
-
- nref = calloc(1, sizeof(struct cos_reference));
- if (nref == NULL) {
- cos_free_object(generation);
- return NSPDFERROR_NOMEM; /* memory error */
- }
-
- nref->id = (*cosobj_out)->u.i;
- nref->generation = generation->u.i;
-
- /* overwrite input object for output (it has to be an int which has no
- * allocation to free)
- */
- (*cosobj_out)->type = COS_TYPE_REFERENCE;
- (*cosobj_out)->u.reference = nref;
-
- *offset_out = offset;
-
- } else if ((c == 'o') &&
- (DOC_BYTE(doc, offset + 1) == 'b') &&
- (DOC_BYTE(doc, offset + 2) == 'j')) {
- struct cos_object *indirect; /* indirect object */
- //printf("indirect\n");
- offset += 3;
-
- res = doc_skip_ws(doc, &offset);
- if (res != NSPDFERROR_OK) {
- cos_free_object(generation);
- return res;
- }
- //printf("decoding\n");
-
- res = cos_decode_object(doc, &offset, &indirect);
- if (res != NSPDFERROR_OK) {
- cos_free_object(generation);
- return res;
- }
- //printf("parsed object type %d\nendobj\n",indirect->type);
-
- if ((DOC_BYTE(doc, offset ) != 'e') &&
- (DOC_BYTE(doc, offset + 1) != 'n') &&
- (DOC_BYTE(doc, offset + 2) != 'd') &&
- (DOC_BYTE(doc, offset + 1) != 'o') &&
- (DOC_BYTE(doc, offset + 2) != 'b') &&
- (DOC_BYTE(doc, offset + 3) != 'j')) {
- cos_free_object(indirect);
- cos_free_object(generation);
- return NSPDFERROR_SYNTAX;
- }
- offset += 6;
- //printf("skipping\n");
-
- res = doc_skip_ws(doc, &offset);
- if (res != NSPDFERROR_OK) {
- cos_free_object(indirect);
- cos_free_object(generation);
- return res;
- }
-
- cos_free_object(*cosobj_out);
-
- *cosobj_out = indirect;
-
- *offset_out = offset;
- }
-
- cos_free_object(generation);
- return NSPDFERROR_OK;
-}
-
-
-/*
- * Decode input stream into an object
- *
- * lex and parse a byte stream to generate COS objects
- *
- * lexing the input.
- * check first character:
- *
- * < either a hex string or a dictionary
- * second char < means dictionary else hex string
- * - either an integer or real
- * + either an integer or real
- * 0-9 an integer, unsigned integer or real
- * . a real number
- * ( a string
- * / a name
- * [ a list
- * t|T boolean true
- * f|F boolean false
- * n|N null
- *
- * Grammar is:
- * cos_object:
- * TOK_NULL |
- * TOK_BOOLEAN |
- * TOK_INT |
- * TOK_REAL |
- * TOK_NAME |
- * TOK_STRING |
- * list |
- * dictionary |
- * object_reference |
- * indirect_object;
- *
- * list:
- * '[' listargs ']';
- *
- * listargs:
- * cos_object
- * |
- * listargs cos_object
- * ;
- *
- * object_reference:
- * TOK_UINT TOK_UINT 'R';
- *
- * indirect_object:
- * TOK_UINT TOK_UINT 'obj' cos_object 'endobj'
- * |
- * TOK_UINT TOK_UINT 'obj' dictionary 'stream' streamdata 'endstream' 'endobj'
- * ;
- */
-nspdferror
-cos_decode_object(struct pdf_doc *doc,
- uint64_t *offset_out,
- struct cos_object **cosobj_out)
-{
- uint64_t offset;
- nspdferror res;
- struct cos_object *cosobj;
-
- offset = *offset_out;
-
- /* object could be any type use first char to try and select */
- switch (DOC_BYTE(doc, offset)) {
-
- case '-':
- case '+':
- case '.':
- case '0':
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- case '8':
- case '9':
- res = cos_decode_number(doc, &offset, &cosobj);
- /* if type is positive integer try to check for reference */
- if ((res == 0) &&
- (cosobj->type == COS_TYPE_INT) &&
- (cosobj->u.i > 0)) {
- res = cos_attempt_decode_reference(doc, &offset, &cosobj);
- }
- break;
-
- case '<':
- if (DOC_BYTE(doc, offset + 1) == '<') {
- res = cos_decode_dictionary(doc, &offset, &cosobj);
- } else {
- res = cos_decode_hex_string(doc, &offset, &cosobj);
- }
- break;
-
- case '(':
- res = cos_decode_string(doc, &offset, &cosobj);
- break;
-
- case '/':
- res = cos_decode_name(doc, &offset, &cosobj);
- break;
-
- case '[':
- res = cos_decode_list(doc, &offset, &cosobj);
- break;
-
- case 't':
- case 'T':
- case 'f':
- case 'F':
- res = cos_decode_boolean(doc, &offset, &cosobj);
- break;
-
- case 'n':
- case 'N':
- res = cos_decode_null(doc, &offset, &cosobj);
- break;
-
- default:
- res = NSPDFERROR_SYNTAX; /* syntax error */
- }
-
- if (res == NSPDFERROR_OK) {
- *cosobj_out = cosobj;
- *offset_out = offset;
- }
-
- return res;
-}
diff --git a/src/cos_object.c b/src/cos_object.c
index 5bfd423..2fa3a93 100644
--- a/src/cos_object.c
+++ b/src/cos_object.c
@@ -14,7 +14,8 @@
#include <stdio.h>
#include <string.h>
-#include "nspdferror.h"
+#include <nspdf/errors.h>
+
#include "cos_object.h"
#include "pdf_doc.h"
@@ -110,7 +111,7 @@ cos_extract_dictionary_value(struct cos_object *dict,
* get a value for a key from a dictionary
*/
nspdferror
-cos_get_dictionary_value(struct pdf_doc *doc,
+cos_get_dictionary_value(struct nspdf_doc *doc,
struct cos_object *dict,
const char *key,
struct cos_object **value_out)
@@ -140,7 +141,7 @@ cos_get_dictionary_value(struct pdf_doc *doc,
}
nspdferror
-cos_get_dictionary_int(struct pdf_doc *doc,
+cos_get_dictionary_int(struct nspdf_doc *doc,
struct cos_object *dict,
const char *key,
int64_t *value_out)
@@ -156,7 +157,7 @@ cos_get_dictionary_int(struct pdf_doc *doc,
}
nspdferror
-cos_get_dictionary_name(struct pdf_doc *doc,
+cos_get_dictionary_name(struct nspdf_doc *doc,
struct cos_object *dict,
const char *key,
const char **value_out)
@@ -172,7 +173,7 @@ cos_get_dictionary_name(struct pdf_doc *doc,
}
nspdferror
-cos_get_dictionary_dictionary(struct pdf_doc *doc,
+cos_get_dictionary_dictionary(struct nspdf_doc *doc,
struct cos_object *dict,
const char *key,
struct cos_object **value_out)
@@ -188,7 +189,7 @@ cos_get_dictionary_dictionary(struct pdf_doc *doc,
}
nspdferror
-cos_heritable_dictionary_dictionary(struct pdf_doc *doc,
+cos_heritable_dictionary_dictionary(struct nspdf_doc *doc,
struct cos_object *dict,
const char *key,
struct cos_object **value_out)
@@ -206,7 +207,7 @@ cos_heritable_dictionary_dictionary(struct pdf_doc *doc,
}
nspdferror
-cos_get_dictionary_array(struct pdf_doc *doc,
+cos_get_dictionary_array(struct nspdf_doc *doc,
struct cos_object *dict,
const char *key,
struct cos_object **value_out)
@@ -222,7 +223,7 @@ cos_get_dictionary_array(struct pdf_doc *doc,
}
nspdferror
-cos_heritable_dictionary_array(struct pdf_doc *doc,
+cos_heritable_dictionary_array(struct nspdf_doc *doc,
struct cos_object *dict,
const char *key,
struct cos_object **value_out)
@@ -241,7 +242,7 @@ cos_heritable_dictionary_array(struct pdf_doc *doc,
}
nspdferror
-cos_get_int(struct pdf_doc *doc,
+cos_get_int(struct nspdf_doc *doc,
struct cos_object *cobj,
int64_t *value_out)
{
@@ -259,7 +260,7 @@ cos_get_int(struct pdf_doc *doc,
}
nspdferror
-cos_get_name(struct pdf_doc *doc,
+cos_get_name(struct nspdf_doc *doc,
struct cos_object *cobj,
const char **value_out)
{
@@ -279,7 +280,7 @@ cos_get_name(struct pdf_doc *doc,
nspdferror
-cos_get_dictionary(struct pdf_doc *doc,
+cos_get_dictionary(struct nspdf_doc *doc,
struct cos_object *cobj,
struct cos_object **value_out)
{
@@ -297,7 +298,7 @@ cos_get_dictionary(struct pdf_doc *doc,
}
nspdferror
-cos_get_array(struct pdf_doc *doc,
+cos_get_array(struct nspdf_doc *doc,
struct cos_object *cobj,
struct cos_object **value_out)
{
@@ -318,7 +319,7 @@ cos_get_array(struct pdf_doc *doc,
* get a value for a key from a dictionary
*/
nspdferror
-cos_get_array_value(struct pdf_doc *doc,
+cos_get_array_value(struct nspdf_doc *doc,
struct cos_object *array,
unsigned int index,
struct cos_object **value_out)
@@ -350,7 +351,7 @@ cos_get_array_value(struct pdf_doc *doc,
}
nspdferror
-cos_get_array_dictionary(struct pdf_doc *doc,
+cos_get_array_dictionary(struct nspdf_doc *doc,
struct cos_object *array,
unsigned int index,
struct cos_object **value_out)
@@ -366,7 +367,7 @@ cos_get_array_dictionary(struct pdf_doc *doc,
}
nspdferror
-cos_get_array_size(struct pdf_doc *doc,
+cos_get_array_size(struct nspdf_doc *doc,
struct cos_object *cobj,
unsigned int *size_out)
{
diff --git a/src/cos_object.h b/src/cos_object.h
index 48241c6..a40c691 100644
--- a/src/cos_object.h
+++ b/src/cos_object.h
@@ -1,4 +1,4 @@
-struct pdf_doc;
+struct nspdf_doc;
enum cos_type {
COS_TYPE_NULL,
@@ -83,13 +83,6 @@ struct cos_object {
} u;
};
-/**
- * Decode input stream into an object
- *
- * lex and parse a byte stream to generate a COS object.
- */
-nspdferror cos_decode_object(struct pdf_doc *doc, uint64_t *offset_out, struct cos_object **cosobj_out);
-
nspdferror cos_free_object(struct cos_object *cos_obj);
/**
@@ -117,35 +110,35 @@ nspdferror cos_extract_dictionary_value(struct cos_object *dict, const char *key
* NSPDFERROR_TYPE if the object passed in \p dict is not a dictionary.
* NSPDFERROR_NOTFOUND if the key is not present in the dictionary.
*/
-nspdferror cos_get_dictionary_value(struct pdf_doc *doc, struct cos_object *dict, const char *key, struct cos_object **value_out);
+nspdferror cos_get_dictionary_value(struct nspdf_doc *doc, struct cos_object *dict, const char *key, struct cos_object **value_out);
-nspdferror cos_get_dictionary_int(struct pdf_doc *doc, struct cos_object *dict, const char *key, int64_t *value_out);
+nspdferror cos_get_dictionary_int(struct nspdf_doc *doc, struct cos_object *dict, const char *key, int64_t *value_out);
-nspdferror cos_get_dictionary_name(struct pdf_doc *doc, struct cos_object *dict, const char *key, const char **value_out);
+nspdferror cos_get_dictionary_name(struct nspdf_doc *doc, struct cos_object *dict, const char *key, const char **value_out);
-nspdferror cos_get_dictionary_dictionary(struct pdf_doc *doc, struct cos_object *dict, const char *key, struct cos_object **value_out);
+nspdferror cos_get_dictionary_dictionary(struct nspdf_doc *doc, struct cos_object *dict, const char *key, struct cos_object **value_out);
-nspdferror cos_heritable_dictionary_dictionary(struct pdf_doc *doc, struct cos_object *dict, const char *key, struct cos_object **value_out);
+nspdferror cos_heritable_dictionary_dictionary(struct nspdf_doc *doc, struct cos_object *dict, const char *key, struct cos_object **value_out);
-nspdferror cos_get_dictionary_array(struct pdf_doc *doc, struct cos_object *dict, const char *key, struct cos_object **value_out);
+nspdferror cos_get_dictionary_array(struct nspdf_doc *doc, struct cos_object *dict, const char *key, struct cos_object **value_out);
-nspdferror cos_heritable_dictionary_array(struct pdf_doc *doc, struct cos_object *dict, const char *key, struct cos_object **value_out);
+nspdferror cos_heritable_dictionary_array(struct nspdf_doc *doc, struct cos_object *dict, const char *key, struct cos_object **value_out);
-nspdferror cos_get_int(struct pdf_doc *doc, struct cos_object *cobj, int64_t *value_out);
+nspdferror cos_get_int(struct nspdf_doc *doc, struct cos_object *cobj, int64_t *value_out);
-nspdferror cos_get_name(struct pdf_doc *doc, struct cos_object *cobj, const char **value_out);
+nspdferror cos_get_name(struct nspdf_doc *doc, struct cos_object *cobj, const char **value_out);
-nspdferror cos_get_dictionary(struct pdf_doc *doc, struct cos_object *cobj, struct cos_object **value_out);
+nspdferror cos_get_dictionary(struct nspdf_doc *doc, struct cos_object *cobj, struct cos_object **value_out);
-nspdferror cos_get_array(struct pdf_doc *doc, struct cos_object *cobj, struct cos_object **value_out);
+nspdferror cos_get_array(struct nspdf_doc *doc, struct cos_object *cobj, struct cos_object **value_out);
-nspdferror cos_get_array_size(struct pdf_doc *doc, struct cos_object *cobj, unsigned int *size_out);
+nspdferror cos_get_array_size(struct nspdf_doc *doc, struct cos_object *cobj, unsigned int *size_out);
-nspdferror cos_get_array_value(struct pdf_doc *doc, struct cos_object *array, unsigned int index, struct cos_object **value_out);
+nspdferror cos_get_array_value(struct nspdf_doc *doc, struct cos_object *array, unsigned int index, struct cos_object **value_out);
-nspdferror cos_get_array_dictionary(struct pdf_doc *doc, struct cos_object *arrau, unsigned int index, struct cos_object **value_out);
+nspdferror cos_get_array_dictionary(struct nspdf_doc *doc, struct cos_object *arrau, unsigned int index, struct cos_object **value_out);
diff --git a/src/cos_parse.c b/src/cos_parse.c
new file mode 100644
index 0000000..ca3d802
--- /dev/null
+++ b/src/cos_parse.c
@@ -0,0 +1,886 @@
+#include <stdint.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <nspdf/errors.h>
+
+#include "cos_parse.h"
+#include "byte_class.h"
+#include "cos_object.h"
+#include "pdf_doc.h"
+
+/** increments in which cos string allocations are extended */
+#define COS_STRING_ALLOC 32
+
+/** Maximum length of cos name */
+#define NAME_MAX_LENGTH 127
+
+static nspdferror
+cos_string_append(struct cos_string *s, uint8_t c)
+{
+ //printf("appending 0x%x to %p len %d alloc %d\n", c, s->data, s->length, s->alloc);
+ if (s->length == s->alloc) {
+ uint8_t *ns;
+ ns = realloc(s->data, s->alloc + COS_STRING_ALLOC);
+ if (ns == NULL) {
+ return NSPDFERROR_NOMEM;
+ }
+ s->data = ns;
+ s->alloc += COS_STRING_ALLOC;
+ }
+ s->data[s->length++] = c;
+ return NSPDFERROR_OK;
+}
+
+static uint8_t xtoi(uint8_t x)
+{
+ if (x >= '0' && x <= '9') {
+ x = x - '0';
+ } else if (x >= 'a' && x <='f') {
+ x = x - 'a' + 10;
+ } else if (x >= 'A' && x <='F') {
+ x = x - 'A' + 10;
+ }
+ return x;
+}
+
+static nspdferror
+cos_decode_number(struct nspdf_doc *doc,
+ uint64_t *offset_out,
+ struct cos_object **cosobj_out)
+{
+ struct cos_object *cosobj;
+ uint8_t c; /* current byte from source data */
+ unsigned int len; /* number of decimal places in number */
+ uint8_t num[21]; /* temporary buffer for decimal values */
+ uint64_t offset; /* current offset of source data */
+
+ offset = *offset_out;
+
+ for (len = 0; len < sizeof(num); len++) {
+ c = DOC_BYTE(doc, offset);
+ if ((bclass[c] & BC_DCML) != BC_DCML) {
+ int64_t result = 0; /* parsed result */
+ uint64_t tens;
+
+ if (len == 0) {
+ /* parse error no decimals in input */
+ return NSPDFERROR_SYNTAX;
+ }
+ /* sum value from each place */
+ for (tens = 1; len > 0; tens = tens * 10, len--) {
+ result += (num[len - 1] * tens);
+ }
+
+ doc_skip_ws(doc, &offset);
+
+ cosobj = calloc(1, sizeof(struct cos_object));
+ if (cosobj == NULL) {
+ return NSPDFERROR_NOMEM;
+ }
+
+ cosobj->type = COS_TYPE_INT;
+ cosobj->u.i = result;
+
+ *cosobj_out = cosobj;
+
+ *offset_out = offset;
+
+ return NSPDFERROR_OK;
+ }
+ num[len] = c - '0';
+ offset++;
+ }
+ return NSPDFERROR_RANGE; /* number too long */
+}
+
+
+/**
+ * decode literal string
+ *
+ */
+static nspdferror
+cos_decode_string(struct nspdf_doc *doc,
+ uint64_t *offset_out,
+ struct cos_object **cosobj_out)
+{
+ uint64_t offset;
+ struct cos_object *cosobj;
+ uint8_t c;
+ unsigned int pdepth = 1; /* depth of open parens */
+ struct cos_string *cstring;
+
+ offset = *offset_out;
+
+ c = DOC_BYTE(doc, offset++);
+ if (c != '(') {
+ return NSPDFERROR_SYNTAX;
+ }
+
+ cstring = calloc(1, sizeof(*cstring));
+ if (cstring == NULL) {
+ return NSPDFERROR_NOMEM;
+ }
+
+ cosobj = calloc(1, sizeof(*cosobj));
+ if (cosobj == NULL) {
+ return NSPDFERROR_NOMEM;
+ }
+ cosobj->type = COS_TYPE_STRING;
+ cosobj->u.s = cstring;
+
+ while (pdepth > 0) {
+ c = DOC_BYTE(doc, offset++);
+
+ if (c == ')') {
+ pdepth--;
+ if (pdepth == 0) {
+ break;
+ }
+ } else if (c == '(') {
+ pdepth++;
+ } else if ((bclass[c] & BC_EOLM ) != 0) {
+ /* unescaped end of line characters are translated to a single
+ * newline
+ */
+ c = DOC_BYTE(doc, offset);
+ while ((bclass[c] & BC_EOLM) != 0) {
+ offset++;
+ c = DOC_BYTE(doc, offset);
+ }
+ c = '\n';
+ } else if (c == '\\') {
+ /* escaped chars */
+ c = DOC_BYTE(doc, offset++);
+ switch (c) {
+ case 'n':
+ c = '\n';
+ break;
+
+ case 'r':
+ c = '\r';
+ break;
+
+ case 't':
+ c = '\t';
+ break;
+
+ case 'b':
+ c = '\b';
+ break;
+
+ case 'f':
+ c = '\f';
+ break;
+
+ case '(':
+ c = '(';
+ break;
+
+ case ')':
+ c = ')';
+ break;
+
+ case '\\':
+ c = '\\';
+ break;
+
+ default:
+
+ if ((bclass[c] & BC_EOLM) != 0) {
+ /* escaped end of line, swallow it */
+ c = DOC_BYTE(doc, offset++);
+ while ((bclass[c] & BC_EOLM) != 0) {
+ c = DOC_BYTE(doc, offset++);
+ }
+ } else if ((bclass[c] & BC_OCTL) != 0) {
+ /* octal value */
+ uint8_t val;
+ val = (c - '0');
+ c = DOC_BYTE(doc, offset);
+ if ((bclass[c] & BC_OCTL) != 0) {
+ offset++;
+ val = (val << 3) | (c - '0');
+ c = DOC_BYTE(doc, offset);
+ if ((bclass[c] & BC_OCTL) != 0) {
+ offset++;
+ val = (val << 3) | (c - '0');
+ c = val;
+ }
+ }
+ } /* else invalid (skip backslash) */
+ break;
+ }
+ }
+
+ /* c contains the character to add to the string */
+ cos_string_append(cstring, c);
+ }
+
+ doc_skip_ws(doc, &offset);
+
+ *cosobj_out = cosobj;
+ *offset_out = offset;
+
+ return NSPDFERROR_OK;
+}
+
+/**
+ * decode hex encoded string
+ */
+static nspdferror
+cos_decode_hex_string(struct nspdf_doc *doc,
+ uint64_t *offset_out,
+ struct cos_object **cosobj_out)
+{
+ uint64_t offset;
+ struct cos_object *cosobj;
+ uint8_t c;
+ uint8_t value = 0;
+ struct cos_string *cstring;
+ bool first = true;
+
+ offset = *offset_out;
+
+ c = DOC_BYTE(doc, offset++);
+ if (c != '<') {
+ return NSPDFERROR_SYNTAX;
+ }
+
+ cstring = calloc(1, sizeof(*cstring));
+ if (cstring == NULL) {
+ return NSPDFERROR_NOMEM;
+ }
+
+ cosobj = calloc(1, sizeof(*cosobj));
+ if (cosobj == NULL) {
+ return NSPDFERROR_NOMEM;
+ }
+ cosobj->type = COS_TYPE_STRING;
+ cosobj->u.s = cstring;
+
+ for (; offset < doc->length; offset++) {
+ c = DOC_BYTE(doc, offset);
+ if (c == '>') {
+ if (first == false) {
+ cos_string_append(cstring, value);
+ }
+ offset++;
+ doc_skip_ws(doc, &offset);
+
+ *cosobj_out = cosobj;
+ *offset_out = offset;
+
+ return NSPDFERROR_OK;
+ } else if ((bclass[c] & BC_HEXL) != 0) {
+ if (first) {
+ value = xtoi(c) << 4;
+ first = false;
+ } else {
+ value |= xtoi(c);
+ first = true;
+ cos_string_append(cstring, value);
+ }
+ } else if ((bclass[c] & BC_WSPC) == 0) {
+ break; /* unknown byte value in string */
+ }
+ }
+ return NSPDFERROR_SYNTAX;
+}
+
+/**
+ * decode a dictionary object
+ */
+static nspdferror
+cos_decode_dictionary(struct nspdf_doc *doc,
+ uint64_t *offset_out,
+ struct cos_object **cosobj_out)
+{
+ uint64_t offset;
+ struct cos_object *cosobj;
+ struct cos_dictionary_entry *entry;
+ struct cos_object *key;
+ struct cos_object *value;
+ int res;
+
+ offset = *offset_out;
+
+ if ((DOC_BYTE(doc, offset) != '<') ||
+ (DOC_BYTE(doc, offset + 1) != '<')) {
+ return -1; /* syntax error */
+ }
+ offset += 2;
+ doc_skip_ws(doc, &offset);
+
+ //printf("found a dictionary\n");
+
+ cosobj = calloc(1, sizeof(struct cos_object));
+ if (cosobj == NULL) {
+ return -1; /* memory error */
+ }
+ cosobj->type = COS_TYPE_DICTIONARY;
+
+ while ((DOC_BYTE(doc, offset) != '>') &&
+ (DOC_BYTE(doc, offset + 1) != '>')) {
+
+ res = cos_parse_object(doc, &offset, &key);
+ if (res != NSPDFERROR_OK) {
+ /* todo free up any dictionary entries already created */
+ printf("key object decode failed\n");
+ return res;
+ }
+ if (key->type != COS_TYPE_NAME) {
+ /* key value pairs without a name */
+ printf("key was %d not a name %d\n", key->type, COS_TYPE_NAME);
+ return NSPDFERROR_SYNTAX;
+ }
+ //printf("key: %s\n", key->u.n);
+
+ res = cos_parse_object(doc, &offset, &value);
+ if (res != NSPDFERROR_OK) {
+ printf("Unable to decode value object in dictionary\n");
+ /* todo free up any dictionary entries already created */
+ return res;
+ }
+
+ /* add dictionary entry */
+ entry = calloc(1, sizeof(struct cos_dictionary_entry));
+ if (entry == NULL) {
+ /* todo free up any dictionary entries already created */
+ return NSPDFERROR_NOMEM;
+ }
+
+ entry->key = key;
+ entry->value = value;
+ entry->next = cosobj->u.dictionary;
+
+ cosobj->u.dictionary = entry;
+
+ }
+ offset += 2; /* skip closing >> */
+ doc_skip_ws(doc, &offset);
+
+ *cosobj_out = cosobj;
+ *offset_out = offset;
+
+ return NSPDFERROR_OK;
+}
+
+/**
+ * decode a list
+ */
+static nspdferror
+cos_decode_list(struct nspdf_doc *doc,
+ uint64_t *offset_out,
+ struct cos_object **cosobj_out)
+{
+ uint64_t offset;
+ struct cos_object *cosobj;
+ struct cos_array_entry *entry;
+ struct cos_object *value;
+ nspdferror res;
+
+ offset = *offset_out;
+
+ /* sanity check first token is list open */
+ if (DOC_BYTE(doc, offset) != '[') {
+ printf("not a [\n");
+ return NSPDFERROR_SYNTAX; /* syntax error */
+ }
+ offset++;
+
+ /* advance offset to next token */
+ res = doc_skip_ws(doc, &offset);
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+
+ //printf("found a list\n");
+
+ cosobj = calloc(1, sizeof(struct cos_object));
+ if (cosobj == NULL) {
+ return NSPDFERROR_NOMEM;
+ }
+ cosobj->type = COS_TYPE_ARRAY;
+
+ while (DOC_BYTE(doc, offset) != ']') {
+
+ res = cos_parse_object(doc, &offset, &value);
+ if (res != NSPDFERROR_OK) {
+ cos_free_object(cosobj);
+ printf("Unable to decode value object in list\n");
+ return res;
+ }
+
+ /* add entry to array */
+ entry = calloc(1, sizeof(struct cos_array_entry));
+ if (entry == NULL) {
+ cos_free_object(cosobj);
+ return NSPDFERROR_NOMEM;
+ }
+
+ entry->value = value;
+ entry->next = cosobj->u.array;
+
+ cosobj->u.array = entry;
+ }
+ offset++; /* skip closing ] */
+
+ doc_skip_ws(doc, &offset);
+
+ *cosobj_out = cosobj;
+ *offset_out = offset;
+
+ return NSPDFERROR_OK;
+}
+
+
+/**
+ * decode a name object
+ *
+ * \todo deal with # symbols on pdf versions 1.2 and later
+ */
+static nspdferror
+cos_decode_name(struct nspdf_doc *doc,
+ uint64_t *offset_out,
+ struct cos_object **cosobj_out)
+{
+ uint64_t offset;
+ struct cos_object *cosobj;
+ uint8_t c;
+ char name[NAME_MAX_LENGTH + 1];
+ int idx = 0;
+
+ offset = *offset_out;
+
+ c = DOC_BYTE(doc, offset++);
+ if (c != '/') {
+ return -1; /* names must be prefixed with a / */
+ }
+ //printf("found a name\n");
+
+ c = DOC_BYTE(doc, offset);
+ while ((idx <= NAME_MAX_LENGTH) &&
+ ((bclass[c] & (BC_WSPC | BC_DELM)) == 0)) {
+ offset++;
+ //printf("%c", c);
+ name[idx++] = c;
+ c = DOC_BYTE(doc, offset);
+ }
+ //printf("\nidx: %d\n", idx);
+ if (idx > NAME_MAX_LENGTH) {
+ /* name length exceeded implementation limit */
+ return -1;
+ }
+ name[idx] = 0;
+
+ //printf("name: %s\n", name);
+
+ doc_skip_ws(doc, &offset);
+
+ cosobj = calloc(1, sizeof(struct cos_object));
+ if (cosobj == NULL) {
+ return NSPDFERROR_NOMEM; /* memory error */
+ }
+
+ cosobj->type = COS_TYPE_NAME;
+ cosobj->u.n = strdup(name);
+
+ *cosobj_out = cosobj;
+
+ *offset_out = offset;
+
+ return NSPDFERROR_OK;
+}
+
+/**
+ * decode a cos boolean object
+ */
+static int
+cos_decode_boolean(struct nspdf_doc *doc,
+ uint64_t *offset_out,
+ struct cos_object **cosobj_out)
+{
+ uint64_t offset;
+ struct cos_object *cosobj;
+ uint8_t c;
+ bool value;
+
+ offset = *offset_out;
+
+ c = DOC_BYTE(doc, offset++);
+ if ((c == 't') || (c == 'T')) {
+ /* true branch */
+
+ c = DOC_BYTE(doc, offset++);
+ if ((c != 'r') && (c != 'R')) {
+ return -1; /* syntax error */
+ }
+ c = DOC_BYTE(doc, offset++);
+ if ((c != 'u') && (c != 'U')) {
+ return -1; /* syntax error */
+ }
+ c = DOC_BYTE(doc, offset++);
+ if ((c != 'e') && (c != 'E')) {
+ return -1; /* syntax error */
+ }
+ value = true;
+
+ } else if ((c == 'f') || (c == 'F')) {
+ /* false branch */
+
+ c = DOC_BYTE(doc, offset++);
+ if ((c != 'a') && (c != 'A')) {
+ return -1; /* syntax error */
+ }
+ c = DOC_BYTE(doc, offset++);
+ if ((c != 'l') && (c != 'L')) {
+ return -1; /* syntax error */
+ }
+ c = DOC_BYTE(doc, offset++);
+ if ((c != 's') && (c != 'S')) {
+ return -1; /* syntax error */
+ }
+ c = DOC_BYTE(doc, offset++);
+ if ((c != 'e') && (c != 'E')) {
+ return -1; /* syntax error */
+ }
+
+ value = false;
+
+ } else {
+ return -1; /* syntax error */
+ }
+
+ doc_skip_ws(doc, &offset);
+
+ cosobj = calloc(1, sizeof(struct cos_object));
+ if (cosobj == NULL) {
+ return NSPDFERROR_NOMEM; /* memory error */
+ }
+
+ cosobj->type = COS_TYPE_BOOL;
+ cosobj->u.b = value;
+
+ *cosobj_out = cosobj;
+
+ *offset_out = offset;
+
+ return NSPDFERROR_OK;
+}
+
+/**
+ * decode the null object.
+ */
+static nspdferror
+cos_decode_null(struct nspdf_doc *doc,
+ uint64_t *offset_out,
+ struct cos_object **cosobj_out)
+{
+ uint64_t offset;
+ struct cos_object *cosobj;
+ uint8_t c;
+
+ offset = *offset_out;
+
+ c = DOC_BYTE(doc, offset++);
+ if ((c != 'n') && (c != 'N')) {
+ return -1; /* syntax error */
+ }
+
+ c = DOC_BYTE(doc, offset++);
+ if ((c != 'u') && (c != 'U')) {
+ return -1; /* syntax error */
+ }
+
+ c = DOC_BYTE(doc, offset++);
+ if ((c != 'l') && (c != 'L')) {
+ return -1; /* syntax error */
+ }
+
+ c = DOC_BYTE(doc, offset++);
+ if ((c != 'l') && (c != 'L')) {
+ return -1; /* syntax error */
+ }
+
+ doc_skip_ws(doc, &offset);
+
+ cosobj = calloc(1, sizeof(struct cos_object));
+ if (cosobj == NULL) {
+ return NSPDFERROR_NOMEM;
+ }
+
+ cosobj->type = COS_TYPE_NULL;
+
+ *cosobj_out = cosobj;
+
+ *offset_out = offset;
+
+ return NSPDFERROR_OK;
+}
+
+
+/**
+ * attempt to decode input data into a reference, indirect or stream object
+ *
+ * The input data already had a positive integer decoded from it:
+ * - if another positive integer follows and a R character after that it is a
+ * reference,
+ *
+ * - if another positive integer follows and 'obj' after that:
+ * - a direct object followed by 'endobj' it is an indirect object.
+ *
+ * - a direct dictionary object followed by 'stream', then stream data,
+ * then 'endstream' then 'endobj' it is a stream object
+ *
+ * \param doc the pdf document
+ * \param offset_out offset of current cursor in input data
+ * \param cosobj_out the object to return into, on input contains the first
+ * integer
+ */
+static nspdferror
+cos_attempt_decode_reference(struct nspdf_doc *doc,
+ uint64_t *offset_out,
+ struct cos_object **cosobj_out)
+{
+ nspdferror res;
+ uint64_t offset;
+ uint8_t c;
+ struct cos_object *generation; /* generation object */
+
+ offset = *offset_out;
+
+ res = cos_decode_number(doc, &offset, &generation);
+ if (res != NSPDFERROR_OK) {
+ /* no error if next token could not be decoded as a number */
+ return NSPDFERROR_OK;
+ }
+
+ if (generation->type != COS_TYPE_INT) {
+ /* next object was not an integer so not a reference */
+ cos_free_object(generation);
+ return NSPDFERROR_OK;
+ }
+
+ if (generation->u.i < 0) {
+ /* integer was negative so not a reference (generations must be
+ * non-negative
+ */
+ cos_free_object(generation);
+ return NSPDFERROR_OK;
+ }
+
+ /* two int in a row, look for the R */
+ c = DOC_BYTE(doc, offset);
+ if (c == 'R') {
+ struct cos_reference *nref; /* new reference */
+
+ //printf("found object reference\n");
+ offset ++;
+
+ doc_skip_ws(doc, &offset);
+
+ nref = calloc(1, sizeof(struct cos_reference));
+ if (nref == NULL) {
+ cos_free_object(generation);
+ return NSPDFERROR_NOMEM; /* memory error */
+ }
+
+ nref->id = (*cosobj_out)->u.i;
+ nref->generation = generation->u.i;
+
+ /* overwrite input object for output (it has to be an int which has no
+ * allocation to free)
+ */
+ (*cosobj_out)->type = COS_TYPE_REFERENCE;
+ (*cosobj_out)->u.reference = nref;
+
+ *offset_out = offset;
+
+ } else if ((c == 'o') &&
+ (DOC_BYTE(doc, offset + 1) == 'b') &&
+ (DOC_BYTE(doc, offset + 2) == 'j')) {
+ struct cos_object *indirect; /* indirect object */
+ //printf("indirect\n");
+ offset += 3;
+
+ res = doc_skip_ws(doc, &offset);
+ if (res != NSPDFERROR_OK) {
+ cos_free_object(generation);
+ return res;
+ }
+ //printf("decoding\n");
+
+ res = cos_parse_object(doc, &offset, &indirect);
+ if (res != NSPDFERROR_OK) {
+ cos_free_object(generation);
+ return res;
+ }
+ //printf("parsed object type %d\nendobj\n",indirect->type);
+
+ if ((DOC_BYTE(doc, offset ) != 'e') &&
+ (DOC_BYTE(doc, offset + 1) != 'n') &&
+ (DOC_BYTE(doc, offset + 2) != 'd') &&
+ (DOC_BYTE(doc, offset + 1) != 'o') &&
+ (DOC_BYTE(doc, offset + 2) != 'b') &&
+ (DOC_BYTE(doc, offset + 3) != 'j')) {
+ cos_free_object(indirect);
+ cos_free_object(generation);
+ return NSPDFERROR_SYNTAX;
+ }
+ offset += 6;
+ //printf("skipping\n");
+
+ res = doc_skip_ws(doc, &offset);
+ if (res != NSPDFERROR_OK) {
+ cos_free_object(indirect);
+ cos_free_object(generation);
+ return res;
+ }
+
+ cos_free_object(*cosobj_out);
+
+ *cosobj_out = indirect;
+
+ *offset_out = offset;
+ }
+
+ cos_free_object(generation);
+ return NSPDFERROR_OK;
+}
+
+
+/*
+ * Parse input stream into an object
+ *
+ * lex and parse a byte stream to generate COS objects
+ *
+ * lexing the input.
+ * check first character:
+ *
+ * < either a hex string or a dictionary
+ * second char < means dictionary else hex string
+ * - either an integer or real
+ * + either an integer or real
+ * 0-9 an integer, unsigned integer or real
+ * . a real number
+ * ( a string
+ * / a name
+ * [ a list
+ * t|T boolean true
+ * f|F boolean false
+ * n|N null
+ *
+ * Grammar is:
+ * cos_object:
+ * TOK_NULL |
+ * TOK_BOOLEAN |
+ * TOK_INT |
+ * TOK_REAL |
+ * TOK_NAME |
+ * TOK_STRING |
+ * list |
+ * dictionary |
+ * object_reference |
+ * indirect_object;
+ *
+ * list:
+ * '[' listargs ']';
+ *
+ * listargs:
+ * cos_object
+ * |
+ * listargs cos_object
+ * ;
+ *
+ * object_reference:
+ * TOK_UINT TOK_UINT 'R';
+ *
+ * indirect_object:
+ * TOK_UINT TOK_UINT 'obj' cos_object 'endobj'
+ * |
+ * TOK_UINT TOK_UINT 'obj' dictionary 'stream' streamdata 'endstream' 'endobj'
+ * ;
+ */
+nspdferror
+cos_parse_object(struct nspdf_doc *doc,
+ uint64_t *offset_out,
+ struct cos_object **cosobj_out)
+{
+ uint64_t offset;
+ nspdferror res;
+ struct cos_object *cosobj;
+
+ offset = *offset_out;
+
+ /* object could be any type use first char to try and select */
+ switch (DOC_BYTE(doc, offset)) {
+
+ case '-':
+ case '+':
+ case '.':
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ res = cos_decode_number(doc, &offset, &cosobj);
+ /* if type is positive integer try to check for reference */
+ if ((res == 0) &&
+ (cosobj->type == COS_TYPE_INT) &&
+ (cosobj->u.i > 0)) {
+ res = cos_attempt_decode_reference(doc, &offset, &cosobj);
+ }
+ break;
+
+ case '<':
+ if (DOC_BYTE(doc, offset + 1) == '<') {
+ res = cos_decode_dictionary(doc, &offset, &cosobj);
+ } else {
+ res = cos_decode_hex_string(doc, &offset, &cosobj);
+ }
+ break;
+
+ case '(':
+ res = cos_decode_string(doc, &offset, &cosobj);
+ break;
+
+ case '/':
+ res = cos_decode_name(doc, &offset, &cosobj);
+ break;
+
+ case '[':
+ res = cos_decode_list(doc, &offset, &cosobj);
+ break;
+
+ case 't':
+ case 'T':
+ case 'f':
+ case 'F':
+ res = cos_decode_boolean(doc, &offset, &cosobj);
+ break;
+
+ case 'n':
+ case 'N':
+ res = cos_decode_null(doc, &offset, &cosobj);
+ break;
+
+ default:
+ res = NSPDFERROR_SYNTAX; /* syntax error */
+ }
+
+ if (res == NSPDFERROR_OK) {
+ *cosobj_out = cosobj;
+ *offset_out = offset;
+ }
+
+ return res;
+}
diff --git a/src/cos_parse.h b/src/cos_parse.h
new file mode 100644
index 0000000..adfb835
--- /dev/null
+++ b/src/cos_parse.h
@@ -0,0 +1,10 @@
+struct nspdf_doc;
+struct cos_object;
+
+/**
+ * Decode input stream into an object
+ *
+ * lex and parse a byte stream to generate a COS object.
+ */
+nspdferror cos_parse_object(struct nspdf_doc *doc, uint64_t *offset_out, struct cos_object **cosobj_out);
+
diff --git a/src/document.c b/src/document.c
new file mode 100644
index 0000000..9be0ab5
--- /dev/null
+++ b/src/document.c
@@ -0,0 +1,690 @@
+/*
+ * Copyright 2018 Vincent Sanders <vince(a)netsurf-browser.org>
+ *
+ * This file is part of libnspdf.
+ *
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+
+#include <nspdf/document.h>
+
+#include "cos_parse.h"
+#include "byte_class.h"
+#include "cos_object.h"
+#include "pdf_doc.h"
+
+#define SLEN(x) (sizeof((x)) - 1)
+
+#define STARTXREF_TOK "startxref"
+
+/* Number of bytes to search back from file end to find xref start token,
+ * convention says 1024 bytes
+ */
+#define STARTXREF_SEARCH_SIZE 1024
+
+
+static nspdferror
+doc_read_uint(struct nspdf_doc *doc,
+ uint64_t *offset_out,
+ uint64_t *result_out)
+{
+ uint8_t c; /* current byte from source data */
+ unsigned int len; /* number of decimal places in number */
+ uint8_t num[21]; /* temporary buffer for decimal values */
+ uint64_t offset; /* current offset of source data */
+ uint64_t result=0; /* parsed result */
+ uint64_t tens;
+
+ offset = *offset_out;
+
+ for (len = 0; len < sizeof(num); len++) {
+ c = DOC_BYTE(doc, offset);
+ if ((bclass[c] & BC_DCML) != BC_DCML) {
+ if (len == 0) {
+ return -2; /* parse error no decimals in input */
+ }
+ /* sum value from each place */
+ for (tens = 1; len > 0; tens = tens * 10, len--) {
+ result += (num[len - 1] * tens);
+ }
+
+ *offset_out = offset;
+ *result_out = result;
+
+ return NSPDFERROR_OK;
+ }
+ num[len] = c - '0';
+ offset++;
+ }
+ return -1; /* number too long */
+}
+
+
+/**
+ * finds the startxref marker at the end of input
+ */
+static nspdferror find_startxref(struct nspdf_doc *doc, uint64_t *offset_out)
+{
+ uint64_t offset; /* offset of characters being considered for startxref */
+ uint64_t earliest; /* earliest offset to serch for startxref */
+
+ offset = doc->length - SLEN(STARTXREF_TOK);
+
+ if (doc->length < STARTXREF_SEARCH_SIZE) {
+ earliest = 0;
+ } else {
+ earliest = doc->length - STARTXREF_SEARCH_SIZE;
+ }
+
+ for (;offset > earliest; offset--) {
+ if ((DOC_BYTE(doc, offset ) == 's') &&
+ (DOC_BYTE(doc, offset + 1) == 't') &&
+ (DOC_BYTE(doc, offset + 2) == 'a') &&
+ (DOC_BYTE(doc, offset + 3) == 'r') &&
+ (DOC_BYTE(doc, offset + 4) == 't') &&
+ (DOC_BYTE(doc, offset + 5) == 'x') &&
+ (DOC_BYTE(doc, offset + 6) == 'r') &&
+ (DOC_BYTE(doc, offset + 7) == 'e') &&
+ (DOC_BYTE(doc, offset + 8) == 'f')) {
+ *offset_out = offset;
+ return NSPDFERROR_OK;
+ }
+ }
+ return NSPDFERROR_SYNTAX;
+}
+
+
+/**
+ * decodes a startxref field
+ */
+static nspdferror
+decode_startxref(struct nspdf_doc *doc,
+ uint64_t *offset_out,
+ uint64_t *start_xref_out)
+{
+ uint64_t offset; /* offset of characters being considered for startxref */
+ uint64_t start_xref;
+ nspdferror res;
+
+ offset = *offset_out;
+
+ if ((DOC_BYTE(doc, offset ) != 's') ||
+ (DOC_BYTE(doc, offset + 1) != 't') ||
+ (DOC_BYTE(doc, offset + 2) != 'a') ||
+ (DOC_BYTE(doc, offset + 3) != 'r') ||
+ (DOC_BYTE(doc, offset + 4) != 't') ||
+ (DOC_BYTE(doc, offset + 5) != 'x') ||
+ (DOC_BYTE(doc, offset + 6) != 'r') ||
+ (DOC_BYTE(doc, offset + 7) != 'e') ||
+ (DOC_BYTE(doc, offset + 8) != 'f')) {
+ return NSPDFERROR_SYNTAX;
+ }
+ offset += 9;
+
+ res = doc_skip_ws(doc, &offset);
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+
+ res = doc_read_uint(doc, &offset, &start_xref);
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+
+ res = doc_skip_eol(doc, &offset);
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+
+ if ((DOC_BYTE(doc, offset ) != '%') ||
+ (DOC_BYTE(doc, offset + 1) != '%') ||
+ (DOC_BYTE(doc, offset + 2) != 'E') ||
+ (DOC_BYTE(doc, offset + 3) != 'O') ||
+ (DOC_BYTE(doc, offset + 4) != 'F')) {
+ printf("missing EOF marker\n");
+ return NSPDFERROR_SYNTAX;
+ }
+
+ *offset_out = offset;
+ *start_xref_out = start_xref;
+
+ return NSPDFERROR_OK;
+}
+
+
+/**
+ * finds the next trailer
+ */
+static nspdferror find_trailer(struct nspdf_doc *doc, uint64_t *offset_out)
+{
+ uint64_t offset; /* offset of characters being considered for trailer */
+
+ for (offset = *offset_out;offset < doc->length; offset++) {
+ if ((DOC_BYTE(doc, offset ) == 't') &&
+ (DOC_BYTE(doc, offset + 1) == 'r') &&
+ (DOC_BYTE(doc, offset + 2) == 'a') &&
+ (DOC_BYTE(doc, offset + 3) == 'i') &&
+ (DOC_BYTE(doc, offset + 4) == 'l') &&
+ (DOC_BYTE(doc, offset + 5) == 'e') &&
+ (DOC_BYTE(doc, offset + 6) == 'r')) {
+ *offset_out = offset;
+ return NSPDFERROR_OK;
+ }
+ }
+ return NSPDFERROR_SYNTAX;
+}
+
+
+static nspdferror
+decode_trailer(struct nspdf_doc *doc,
+ uint64_t *offset_out,
+ struct cos_object **trailer_out)
+{
+ struct cos_object *trailer;
+ int res;
+ uint64_t offset;
+
+ offset = *offset_out;
+
+ /* trailer object header */
+ if ((DOC_BYTE(doc, offset ) != 't') &&
+ (DOC_BYTE(doc, offset + 1) != 'r') &&
+ (DOC_BYTE(doc, offset + 2) != 'a') &&
+ (DOC_BYTE(doc, offset + 3) != 'i') &&
+ (DOC_BYTE(doc, offset + 4) != 'l') &&
+ (DOC_BYTE(doc, offset + 5) != 'e') &&
+ (DOC_BYTE(doc, offset + 6) != 'r')) {
+ return -1;
+ }
+ offset += 7;
+ doc_skip_ws(doc, &offset);
+
+ res = cos_parse_object(doc, &offset, &trailer);
+ if (res != 0) {
+ return res;
+ }
+
+ if (trailer->type != COS_TYPE_DICTIONARY) {
+ cos_free_object(trailer);
+ return -1;
+ }
+
+ *trailer_out = trailer;
+ *offset_out = offset;
+
+ return NSPDFERROR_OK;
+}
+
+
+static nspdferror
+decode_xref(struct nspdf_doc *doc, uint64_t *offset_out)
+{
+ uint64_t offset;
+ nspdferror res;
+ uint64_t objnumber; /* current object number */
+ uint64_t objcount;
+
+ offset = *offset_out;
+
+ /* xref object header */
+ if ((DOC_BYTE(doc, offset ) != 'x') &&
+ (DOC_BYTE(doc, offset + 1) != 'r') &&
+ (DOC_BYTE(doc, offset + 2) != 'e') &&
+ (DOC_BYTE(doc, offset + 3) != 'f')) {
+ return NSPDFERROR_SYNTAX;
+ }
+ offset += 4;
+
+ res = doc_skip_ws(doc, &offset);
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+
+ /* subsections
+ * <first object number> <number of references in subsection>
+ */
+ res = doc_read_uint(doc, &offset, &objnumber);
+ while (res == NSPDFERROR_OK) {
+ uint64_t lastobj;
+ res = doc_skip_ws(doc, &offset);
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+
+ res = doc_read_uint(doc, &offset, &objcount);
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+
+ res = doc_skip_ws(doc, &offset);
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+
+ //printf("decoding subsection %lld %lld\n", objnumber, objcount);
+
+ lastobj = objnumber + objcount;
+ for (; objnumber < lastobj ; objnumber++) {
+ /* each entry is a fixed format */
+ uint64_t objindex;
+ uint64_t objgeneration;
+
+ /* object index */
+ res = doc_read_uint(doc, &offset, &objindex);
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+ offset++; /* skip space */
+
+ res = doc_read_uint(doc, &offset, &objgeneration);
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+ offset++; /* skip space */
+
+ if ((DOC_BYTE(doc, offset++) == 'n')) {
+ if (objnumber < doc->xref_size) {
+ struct xref_table_entry *indobj;
+ indobj = doc->xref_table + objnumber;
+
+ indobj->ref.id = objnumber;
+ indobj->ref.generation = objgeneration;
+ indobj->offset = objindex;
+
+ //printf("xref %lld %lld -> %lld\n", objnumber, objgeneration, objindex);
+ } else {
+ printf("index out of bounds\n");
+ }
+ }
+
+ offset += 2; /* skip EOL */
+ }
+
+ res = doc_read_uint(doc, &offset, &objnumber);
+ }
+
+ return NSPDFERROR_OK;
+}
+
+
+/**
+ * recursively parse trailers and xref tables
+ */
+static nspdferror
+decode_xref_trailer(struct nspdf_doc *doc, uint64_t xref_offset)
+{
+ nspdferror res;
+ uint64_t offset; /* the current data offset */
+ uint64_t startxref; /* the value of the startxref field */
+ struct cos_object *trailer; /* the current trailer */
+ int64_t prev;
+
+ offset = xref_offset;
+
+ res = find_trailer(doc, &offset);
+ if (res != NSPDFERROR_OK) {
+ printf("failed to find last trailer\n");
+ return res;
+ }
+
+ res = decode_trailer(doc, &offset, &trailer);
+ if (res != NSPDFERROR_OK) {
+ printf("failed to decode trailer\n");
+ return res;
+ }
+
+ res = decode_startxref(doc, &offset, &startxref);
+ if (res != NSPDFERROR_OK) {
+ printf("failed to decode startxref\n");
+ goto decode_xref_trailer_failed;
+ }
+
+ if (startxref != xref_offset) {
+ printf("startxref and Prev value disagree\n");
+ }
+
+ if (doc->xref_table == NULL) {
+ /* extract Size from trailer and create xref table large enough */
+ int64_t size;
+
+ res = cos_get_dictionary_int(doc, trailer, "Size", &size);
+ if (res != NSPDFERROR_OK) {
+ printf("trailer has no integer Size value\n");
+ goto decode_xref_trailer_failed;
+ }
+
+ res = cos_extract_dictionary_value(trailer, "Root", &doc->root);
+ if (res != NSPDFERROR_OK) {
+ printf("no Root!\n");
+ goto decode_xref_trailer_failed;
+ }
+
+ doc->xref_table = calloc(size, sizeof(struct xref_table_entry));
+ if (doc->xref_table == NULL) {
+ res = NSPDFERROR_NOMEM;
+ goto decode_xref_trailer_failed;
+ }
+ doc->xref_size = size;
+
+ res = cos_extract_dictionary_value(trailer, "Encrypt", &doc->encrypt);
+ if ((res != NSPDFERROR_OK) && (res != NSPDFERROR_NOTFOUND)) {
+ goto decode_xref_trailer_failed;
+ }
+
+ res = cos_extract_dictionary_value(trailer, "Info", &doc->info);
+ if ((res != NSPDFERROR_OK) && (res != NSPDFERROR_NOTFOUND)) {
+ goto decode_xref_trailer_failed;
+ }
+
+ res = cos_extract_dictionary_value(trailer, "ID", &doc->id);
+ if ((res != NSPDFERROR_OK) && (res != NSPDFERROR_NOTFOUND)) {
+ goto decode_xref_trailer_failed;
+ }
+
+ }
+
+ /* check for prev ID key in trailer and recurse call if present */
+ res = cos_get_dictionary_int(doc, trailer, "Prev", &prev);
+ if (res == NSPDFERROR_OK) {
+ res = decode_xref_trailer(doc, prev);
+ if (res != NSPDFERROR_OK) {
+ goto decode_xref_trailer_failed;
+ }
+ }
+
+ offset = xref_offset;
+ /** @todo deal with XrefStm (number) in trailer */
+
+ res = decode_xref(doc, &offset);
+ if (res != NSPDFERROR_OK) {
+ printf("failed to decode xref table\n");
+ goto decode_xref_trailer_failed;
+ }
+
+decode_xref_trailer_failed:
+ cos_free_object(trailer);
+
+ return res;
+}
+
+
+/**
+ * decode non-linear pdf trailer data
+ *
+ * PDF have a structure nominally defined as header, body, cross reference table
+ * and trailer. The body, cross reference table and trailer sections may be
+ * repeated in a scheme known as "incremental updates"
+ *
+ * The strategy used here is to locate the end of the last trailer block which
+ * contains a startxref token followed by a byte offset into the file of the
+ * beginning of the cross reference table followed by a literal '%%EOF'
+ *
+ * the initial offset is used to walk back down a chain of xref/trailers until
+ * the trailer does not contain a Prev entry and decode xref tables forwards to
+ * overwrite earlier object entries with later ones.
+ *
+ * It is necessary to search forwards from the xref table to find the trailer
+ * block because instead of the Prev entry pointing to the previous trailer
+ * (from which we could have extracted the startxref to find the associated
+ * xref table) it points to the previous xref block which we have to skip to
+ * find the subsequent trailer.
+ *
+ */
+static nspdferror decode_trailers(struct nspdf_doc *doc)
+{
+ nspdferror res;
+ uint64_t offset; /* the current data offset */
+ uint64_t startxref; /* the value of the first startxref field */
+
+ res = find_startxref(doc, &offset);
+ if (res != NSPDFERROR_OK) {
+ printf("failed to find startxref\n");
+ return res;
+ }
+
+ res = decode_startxref(doc, &offset, &startxref);
+ if (res != NSPDFERROR_OK) {
+ printf("failed to decode startxref\n");
+ return res;
+ }
+
+ /* recurse down the xref and trailers */
+ return decode_xref_trailer(doc, startxref);
+}
+
+
+/**
+ * recursively decodes a page tree
+ */
+static nspdferror
+decode_page_tree(struct nspdf_doc *doc,
+ struct cos_object *page_tree_node,
+ unsigned int *page_index)
+{
+ nspdferror res;
+ const char *type;
+
+ // Type = Pages
+ res = cos_get_dictionary_name(doc, page_tree_node, "Type", &type);
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+
+ if (strcmp(type, "Pages") == 0) {
+ struct cos_object *kids;
+ unsigned int kids_size;
+ unsigned int kids_index;
+
+ if (doc->page_table == NULL) {
+ /* allocate top level page table */
+ int64_t count;
+
+ res = cos_get_dictionary_int(doc, page_tree_node, "Count", &count);
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+
+ doc->page_table = calloc(count, sizeof(struct page_table_entry));
+ if (doc->page_table == NULL) {
+ return NSPDFERROR_NOMEM;
+ }
+ doc->page_table_size = count;
+ }
+
+ res = cos_get_dictionary_array(doc, page_tree_node, "Kids", &kids);
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+
+ res = cos_get_array_size(doc, kids, &kids_size);
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+
+ for (kids_index = 0; kids_index < kids_size; kids_index++) {
+ struct cos_object *kid;
+
+ res = cos_get_array_dictionary(doc, kids, kids_index, &kid);
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+
+ res = decode_page_tree(doc, kid, page_index);
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+ }
+
+ } else if (strcmp(type, "Page") == 0) {
+ struct page_table_entry *page;
+
+ page = doc->page_table + (*page_index);
+
+ /* required heritable resources */
+ res = cos_heritable_dictionary_dictionary(doc,
+ page_tree_node,
+ "Resources",
+ &(page->resources));
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+
+ /* required heritable mediabox */
+ res = cos_heritable_dictionary_array(doc,
+ page_tree_node,
+ "MediaBox",
+ &(page->mediabox));
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+
+ /* optional page contents */
+ res = cos_get_dictionary_value(doc,
+ page_tree_node,
+ "Contents",
+ &(page->contents));
+ if ((res != NSPDFERROR_OK) &&
+ (res != NSPDFERROR_NOTFOUND)) {
+ return res;
+ }
+
+ /*
+ printf("page index:%d page:%p resources:%p mediabox:%p contents:%p\n",
+ *page_index,
+ page,
+ page->resources,
+ page->mediabox,
+ page->contents);
+ */
+
+ (*page_index)++;
+ res = NSPDFERROR_OK;
+ } else {
+ res = NSPDFERROR_FORMAT;
+ }
+ return res;
+}
+
+
+static nspdferror decode_catalog(struct nspdf_doc *doc)
+{
+ nspdferror res;
+ struct cos_object *catalog;
+ const char *type;
+ struct cos_object *pages;
+ unsigned int page_index = 0;
+
+ res = cos_get_dictionary(doc, doc->root, &catalog);
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+
+ // Type = Catalog
+ res = cos_get_dictionary_name(doc, catalog, "Type", &type);
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+ if (strcmp(type, "Catalog") != 0) {
+ return NSPDFERROR_FORMAT;
+ }
+
+ // Pages
+ res = cos_get_dictionary_dictionary(doc, catalog, "Pages", &pages);
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+
+ res = decode_page_tree(doc, pages, &page_index);
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+
+ return res;
+}
+
+/* exported interface documented in nspdf/document.h */
+nspdferror nspdf_document_create(struct nspdf_doc **doc_out)
+{
+ struct nspdf_doc *doc;
+ doc = calloc(1, sizeof(struct nspdf_doc));
+ if (doc == NULL) {
+ return NSPDFERROR_NOMEM;
+ }
+
+ *doc_out = doc;
+
+ return NSPDFERROR_OK;
+}
+
+/* exported interface documented in nspdf/document.h */
+nspdferror nspdf_document_destroy(struct nspdf_doc *doc)
+{
+ free(doc);
+
+ return NSPDFERROR_OK;
+}
+
+
+/**
+ * find the PDF comment marker to identify the start of the document
+ */
+static nspdferror check_header(struct nspdf_doc *doc)
+{
+ uint64_t offset; /* offset of characters being considered for header */
+ for (offset = 0; offset < 1024; offset++) {
+ if ((DOC_BYTE(doc, offset) == '%') &&
+ (DOC_BYTE(doc, offset + 1) == 'P') &&
+ (DOC_BYTE(doc, offset + 2) == 'D') &&
+ (DOC_BYTE(doc, offset + 3) == 'F') &&
+ (DOC_BYTE(doc, offset + 4) == '-') &&
+ (DOC_BYTE(doc, offset + 5) == '1') &&
+ (DOC_BYTE(doc, offset + 6) == '.')) {
+ doc->start += offset;
+ doc->length -= offset;
+
+ /* \todo read number for minor */
+ return NSPDFERROR_OK;
+ }
+ }
+ return NSPDFERROR_NOTFOUND;
+}
+
+/* exported interface documented in nspdf/document.h */
+nspdferror
+nspdf_document_parse(struct nspdf_doc *doc,
+ const uint8_t *buffer,
+ uint64_t buffer_length)
+{
+ nspdferror res;
+
+ doc->start = buffer;
+ doc->length = buffer_length;
+
+ res = check_header(doc);
+ if (res != 0) {
+ printf("header check failed\n");
+ return res;
+ }
+
+ res = decode_trailers(doc);
+ if (res != NSPDFERROR_OK) {
+ printf("failed to decode trailers (%d)\n", res);
+ return res;
+ }
+
+ res = decode_catalog(doc);
+ if (res != NSPDFERROR_OK) {
+ printf("failed to decode catalog (%d)\n", res);
+ return res;
+ }
+
+ return res;
+}
diff --git a/src/nspdferror.h b/src/nspdferror.h
deleted file mode 100644
index 3e26813..0000000
--- a/src/nspdferror.h
+++ /dev/null
@@ -1,10 +0,0 @@
-typedef enum {
- NSPDFERROR_OK,
- NSPDFERROR_NOMEM,
- NSPDFERROR_SYNTAX, /**< syntax error in parse */
- NSPDFERROR_SIZE, /**< not enough input data */
- NSPDFERROR_RANGE, /**< value outside type range */
- NSPDFERROR_TYPE, /**< wrong type error */
- NSPDFERROR_NOTFOUND, /**< key not found */
- NSPDFERROR_FORMAT, /**< objects do not cornform to expected format */
-} nspdferror;
diff --git a/src/pdf_doc.c b/src/pdf_doc.c
index 4a5cad1..281025c 100644
--- a/src/pdf_doc.c
+++ b/src/pdf_doc.c
@@ -12,7 +12,9 @@
#include <stdbool.h>
#include <stdio.h>
-#include "nspdferror.h"
+#include <nspdf/errors.h>
+
+#include "cos_parse.h"
#include "byte_class.h"
#include "cos_object.h"
#include "pdf_doc.h"
@@ -20,7 +22,7 @@
/**
* move offset to next non whitespace byte
*/
-nspdferror doc_skip_ws(struct pdf_doc *doc, uint64_t *offset)
+nspdferror doc_skip_ws(struct nspdf_doc *doc, uint64_t *offset)
{
uint8_t c;
/* TODO sort out keeping offset in range */
@@ -43,7 +45,7 @@ nspdferror doc_skip_ws(struct pdf_doc *doc, uint64_t *offset)
/**
* move offset to next non eol byte
*/
-nspdferror doc_skip_eol(struct pdf_doc *doc, uint64_t *offset)
+nspdferror doc_skip_eol(struct nspdf_doc *doc, uint64_t *offset)
{
uint8_t c;
/* TODO sort out keeping offset in range */
@@ -60,7 +62,7 @@ static struct cos_object cos_null_obj = {
};
nspdferror
-xref_get_referenced(struct pdf_doc *doc, struct cos_object **cobj_out)
+xref_get_referenced(struct nspdf_doc *doc, struct cos_object **cobj_out)
{
nspdferror res;
struct cos_object *cobj;
@@ -90,7 +92,7 @@ xref_get_referenced(struct pdf_doc *doc, struct cos_object **cobj_out)
if (entry->object == NULL) {
/* indirect object has never been decoded */
offset = entry->offset;
- res = cos_decode_object(doc, &offset, &indirect);
+ res = cos_parse_object(doc, &offset, &indirect);
if (res != NSPDFERROR_OK) {
printf("failed to decode indirect object\n");
return res;
diff --git a/src/pdf_doc.h b/src/pdf_doc.h
index 986556f..e9bdc14 100644
--- a/src/pdf_doc.h
+++ b/src/pdf_doc.h
@@ -18,11 +18,9 @@ struct page_table_entry {
};
/** pdf document */
-struct pdf_doc {
- uint8_t *buffer;
- uint64_t buffer_length;
+struct nspdf_doc {
- uint8_t *start; /* start of pdf document in input stream */
+ const uint8_t *start; /* start of pdf document in input stream */
uint64_t length;
int major;
@@ -47,7 +45,7 @@ struct pdf_doc {
/* byte data acessory, allows for more complex buffer handling in future */
#define DOC_BYTE(doc, offset) (doc->start[(offset)])
-nspdferror doc_skip_ws(struct pdf_doc *doc, uint64_t *offset);
-nspdferror doc_skip_eol(struct pdf_doc *doc, uint64_t *offset);
+nspdferror doc_skip_ws(struct nspdf_doc *doc, uint64_t *offset);
+nspdferror doc_skip_eol(struct nspdf_doc *doc, uint64_t *offset);
-nspdferror xref_get_referenced(struct pdf_doc *doc, struct cos_object **cobj_out);
+nspdferror xref_get_referenced(struct nspdf_doc *doc, struct cos_object **cobj_out);
diff --git a/src/xref.c b/src/xref.c
deleted file mode 100644
index 452aa19..0000000
--- a/src/xref.c
+++ /dev/null
@@ -1,700 +0,0 @@
-#include <stdio.h>
-#include <stdint.h>
-#include <inttypes.h>
-#include <stdlib.h>
-#include <stdbool.h>
-#include <string.h>
-
-#include "nspdferror.h"
-#include "byte_class.h"
-#include "cos_object.h"
-#include "pdf_doc.h"
-
-#define SLEN(x) (sizeof((x)) - 1)
-
-
-int
-read_whole_pdf(struct pdf_doc *doc, const char *fname)
-{
- FILE *f;
- off_t len;
- uint8_t *buf;
- size_t rd;
-
- f = fopen(fname, "r");
- if (f == NULL) {
- perror("pdf open");
- return 1;
- }
-
- fseek(f, 0, SEEK_END);
- len = ftello(f);
-
- buf = malloc(len);
- fseek(f, 0, SEEK_SET);
-
- rd = fread(buf, len, 1, f);
- if (rd != 1) {
- perror("pdf read");
- free(buf);
- return 1;
- }
-
- fclose(f);
-
- doc->start = doc->buffer = buf;
- doc->length = doc->buffer_length = len;
-
- return 0;
-}
-
-
-#define STARTXREF_TOK "startxref"
-/* Number of bytes to search back from file end to find xref start token, convention says 1024 bytes */
-#define STARTXREF_SEARCH_SIZE 1024
-
-
-
-
-static nspdferror
-doc_read_uint(struct pdf_doc *doc, uint64_t *offset_out, uint64_t *result_out)
-{
- uint8_t c; /* current byte from source data */
- unsigned int len; /* number of decimal places in number */
- uint8_t num[21]; /* temporary buffer for decimal values */
- uint64_t offset; /* current offset of source data */
- uint64_t result=0; /* parsed result */
- uint64_t tens;
-
- offset = *offset_out;
-
- for (len = 0; len < sizeof(num); len++) {
- c = DOC_BYTE(doc, offset);
- if ((bclass[c] & BC_DCML) != BC_DCML) {
- if (len == 0) {
- return -2; /* parse error no decimals in input */
- }
- /* sum value from each place */
- for (tens = 1; len > 0; tens = tens * 10, len--) {
- result += (num[len - 1] * tens);
- }
-
- *offset_out = offset;
- *result_out = result;
-
- return NSPDFERROR_OK;
- }
- num[len] = c - '0';
- offset++;
- }
- return -1; /* number too long */
-}
-
-/**
- * finds the startxref marker at the end of input
- */
-nspdferror find_startxref(struct pdf_doc *doc, uint64_t *offset_out)
-{
- uint64_t offset; /* offset of characters being considered for startxref */
- uint64_t earliest; /* earliest offset to serch for startxref */
-
- offset = doc->length - SLEN(STARTXREF_TOK);
-
- if (doc->length < STARTXREF_SEARCH_SIZE) {
- earliest = 0;
- } else {
- earliest = doc->length - STARTXREF_SEARCH_SIZE;
- }
-
- for (;offset > earliest; offset--) {
- if ((DOC_BYTE(doc, offset ) == 's') &&
- (DOC_BYTE(doc, offset + 1) == 't') &&
- (DOC_BYTE(doc, offset + 2) == 'a') &&
- (DOC_BYTE(doc, offset + 3) == 'r') &&
- (DOC_BYTE(doc, offset + 4) == 't') &&
- (DOC_BYTE(doc, offset + 5) == 'x') &&
- (DOC_BYTE(doc, offset + 6) == 'r') &&
- (DOC_BYTE(doc, offset + 7) == 'e') &&
- (DOC_BYTE(doc, offset + 8) == 'f')) {
- *offset_out = offset;
- return NSPDFERROR_OK;
- }
- }
- return NSPDFERROR_SYNTAX;
-}
-
-/**
- * decodes a startxref field
- */
-nspdferror decode_startxref(struct pdf_doc *doc, uint64_t *offset_out, uint64_t *start_xref_out)
-{
- uint64_t offset; /* offset of characters being considered for startxref */
- uint64_t start_xref;
- nspdferror res;
-
- offset = *offset_out;
-
- if ((DOC_BYTE(doc, offset ) != 's') ||
- (DOC_BYTE(doc, offset + 1) != 't') ||
- (DOC_BYTE(doc, offset + 2) != 'a') ||
- (DOC_BYTE(doc, offset + 3) != 'r') ||
- (DOC_BYTE(doc, offset + 4) != 't') ||
- (DOC_BYTE(doc, offset + 5) != 'x') ||
- (DOC_BYTE(doc, offset + 6) != 'r') ||
- (DOC_BYTE(doc, offset + 7) != 'e') ||
- (DOC_BYTE(doc, offset + 8) != 'f')) {
- return NSPDFERROR_SYNTAX;
- }
- offset += 9;
-
- res = doc_skip_ws(doc, &offset);
- if (res != NSPDFERROR_OK) {
- return res;
- }
-
- res = doc_read_uint(doc, &offset, &start_xref);
- if (res != NSPDFERROR_OK) {
- return res;
- }
-
- res = doc_skip_eol(doc, &offset);
- if (res != NSPDFERROR_OK) {
- return res;
- }
-
- if ((DOC_BYTE(doc, offset ) != '%') ||
- (DOC_BYTE(doc, offset + 1) != '%') ||
- (DOC_BYTE(doc, offset + 2) != 'E') ||
- (DOC_BYTE(doc, offset + 3) != 'O') ||
- (DOC_BYTE(doc, offset + 4) != 'F')) {
- printf("missing EOF marker\n");
- return NSPDFERROR_SYNTAX;
- }
-
- *offset_out = offset;
- *start_xref_out = start_xref;
-
- return NSPDFERROR_OK;
-}
-
-
-/**
- * finds the next trailer
- */
-nspdferror find_trailer(struct pdf_doc *doc, uint64_t *offset_out)
-{
- uint64_t offset; /* offset of characters being considered for trailer */
-
- for (offset = *offset_out;offset < doc->length; offset++) {
- if ((DOC_BYTE(doc, offset ) == 't') &&
- (DOC_BYTE(doc, offset + 1) == 'r') &&
- (DOC_BYTE(doc, offset + 2) == 'a') &&
- (DOC_BYTE(doc, offset + 3) == 'i') &&
- (DOC_BYTE(doc, offset + 4) == 'l') &&
- (DOC_BYTE(doc, offset + 5) == 'e') &&
- (DOC_BYTE(doc, offset + 6) == 'r')) {
- *offset_out = offset;
- return NSPDFERROR_OK;
- }
- }
- return NSPDFERROR_SYNTAX;
-}
-
-/**
- * find the PDF comment marker to identify the start of the document
- */
-int check_header(struct pdf_doc *doc)
-{
- uint64_t offset; /* offset of characters being considered for startxref */
-
- for (offset = 0; offset < 1024; offset++) {
- if ((DOC_BYTE(doc, offset) == '%') &&
- (DOC_BYTE(doc, offset + 1) == 'P') &&
- (DOC_BYTE(doc, offset + 2) == 'D') &&
- (DOC_BYTE(doc, offset + 3) == 'F') &&
- (DOC_BYTE(doc, offset + 4) == '-') &&
- (DOC_BYTE(doc, offset + 5) == '1') &&
- (DOC_BYTE(doc, offset + 6) == '.')) {
- doc->start = doc->buffer + offset;
- doc->length -= offset;
- /* read number for minor */
- return 0;
- }
- }
- return -1;
-}
-
-
-nspdferror
-decode_trailer(struct pdf_doc *doc,
- uint64_t *offset_out,
- struct cos_object **trailer_out)
-{
- struct cos_object *trailer;
- int res;
- uint64_t offset;
-
- offset = *offset_out;
-
- /* trailer object header */
- if ((DOC_BYTE(doc, offset ) != 't') &&
- (DOC_BYTE(doc, offset + 1) != 'r') &&
- (DOC_BYTE(doc, offset + 2) != 'a') &&
- (DOC_BYTE(doc, offset + 3) != 'i') &&
- (DOC_BYTE(doc, offset + 4) != 'l') &&
- (DOC_BYTE(doc, offset + 5) != 'e') &&
- (DOC_BYTE(doc, offset + 6) != 'r')) {
- return -1;
- }
- offset += 7;
- doc_skip_ws(doc, &offset);
-
- res = cos_decode_object(doc, &offset, &trailer);
- if (res != 0) {
- return res;
- }
-
- if (trailer->type != COS_TYPE_DICTIONARY) {
- cos_free_object(trailer);
- return -1;
- }
-
- *trailer_out = trailer;
- *offset_out = offset;
-
- return NSPDFERROR_OK;
-}
-
-nspdferror
-decode_xref(struct pdf_doc *doc, uint64_t *offset_out)
-{
- uint64_t offset;
- nspdferror res;
- uint64_t objnumber; /* current object number */
- uint64_t objcount;
-
- offset = *offset_out;
-
- /* xref object header */
- if ((DOC_BYTE(doc, offset ) != 'x') &&
- (DOC_BYTE(doc, offset + 1) != 'r') &&
- (DOC_BYTE(doc, offset + 2) != 'e') &&
- (DOC_BYTE(doc, offset + 3) != 'f')) {
- return NSPDFERROR_SYNTAX;
- }
- offset += 4;
-
- res = doc_skip_ws(doc, &offset);
- if (res != NSPDFERROR_OK) {
- return res;
- }
-
- /* subsections
- * <first object number> <number of references in subsection>
- */
- res = doc_read_uint(doc, &offset, &objnumber);
- while (res == NSPDFERROR_OK) {
- uint64_t lastobj;
- res = doc_skip_ws(doc, &offset);
- if (res != NSPDFERROR_OK) {
- return res;
- }
-
- res = doc_read_uint(doc, &offset, &objcount);
- if (res != NSPDFERROR_OK) {
- return res;
- }
-
- res = doc_skip_ws(doc, &offset);
- if (res != NSPDFERROR_OK) {
- return res;
- }
-
- //printf("decoding subsection %lld %lld\n", objnumber, objcount);
-
- lastobj = objnumber + objcount;
- for (; objnumber < lastobj ; objnumber++) {
- /* each entry is a fixed format */
- uint64_t objindex;
- uint64_t objgeneration;
-
- /* object index */
- res = doc_read_uint(doc, &offset, &objindex);
- if (res != NSPDFERROR_OK) {
- return res;
- }
- offset++; /* skip space */
-
- res = doc_read_uint(doc, &offset, &objgeneration);
- if (res != NSPDFERROR_OK) {
- return res;
- }
- offset++; /* skip space */
-
- if ((DOC_BYTE(doc, offset++) == 'n')) {
- if (objnumber < doc->xref_size) {
- struct xref_table_entry *indobj;
- indobj = doc->xref_table + objnumber;
-
- indobj->ref.id = objnumber;
- indobj->ref.generation = objgeneration;
- indobj->offset = objindex;
-
- //printf("xref %lld %lld -> %lld\n", objnumber, objgeneration, objindex);
- } else {
- printf("index out of bounds\n");
- }
- }
-
- offset += 2; /* skip EOL */
- }
-
- res = doc_read_uint(doc, &offset, &objnumber);
- }
-
- return NSPDFERROR_OK;
-}
-
-
-/**
- * recursively parse trailers and xref tables
- */
-nspdferror decode_xref_trailer(struct pdf_doc *doc, uint64_t xref_offset)
-{
- nspdferror res;
- uint64_t offset; /* the current data offset */
- uint64_t startxref; /* the value of the startxref field */
- struct cos_object *trailer; /* the current trailer */
- int64_t prev;
-
- offset = xref_offset;
-
- res = find_trailer(doc, &offset);
- if (res != NSPDFERROR_OK) {
- printf("failed to find last trailer\n");
- return res;
- }
-
- res = decode_trailer(doc, &offset, &trailer);
- if (res != NSPDFERROR_OK) {
- printf("failed to decode trailer\n");
- return res;
- }
-
- res = decode_startxref(doc, &offset, &startxref);
- if (res != NSPDFERROR_OK) {
- printf("failed to decode startxref\n");
- goto decode_xref_trailer_failed;
- }
-
- if (startxref != xref_offset) {
- printf("startxref and Prev value disagree\n");
- }
-
- if (doc->xref_table == NULL) {
- /* extract Size from trailer and create xref table large enough */
- int64_t size;
-
- res = cos_get_dictionary_int(doc, trailer, "Size", &size);
- if (res != NSPDFERROR_OK) {
- printf("trailer has no integer Size value\n");
- goto decode_xref_trailer_failed;
- }
-
- res = cos_extract_dictionary_value(trailer, "Root", &doc->root);
- if (res != NSPDFERROR_OK) {
- printf("no Root!\n");
- goto decode_xref_trailer_failed;
- }
-
- doc->xref_table = calloc(size, sizeof(struct xref_table_entry));
- if (doc->xref_table == NULL) {
- res = NSPDFERROR_NOMEM;
- goto decode_xref_trailer_failed;
- }
- doc->xref_size = size;
-
- res = cos_extract_dictionary_value(trailer, "Encrypt", &doc->encrypt);
- if ((res != NSPDFERROR_OK) && (res != NSPDFERROR_NOTFOUND)) {
- goto decode_xref_trailer_failed;
- }
-
- res = cos_extract_dictionary_value(trailer, "Info", &doc->info);
- if ((res != NSPDFERROR_OK) && (res != NSPDFERROR_NOTFOUND)) {
- goto decode_xref_trailer_failed;
- }
-
- res = cos_extract_dictionary_value(trailer, "ID", &doc->id);
- if ((res != NSPDFERROR_OK) && (res != NSPDFERROR_NOTFOUND)) {
- goto decode_xref_trailer_failed;
- }
-
- }
-
- /* check for prev ID key in trailer and recurse call if present */
- res = cos_get_dictionary_int(doc, trailer, "Prev", &prev);
- if (res == NSPDFERROR_OK) {
- res = decode_xref_trailer(doc, prev);
- if (res != NSPDFERROR_OK) {
- goto decode_xref_trailer_failed;
- }
- }
-
- offset = xref_offset;
- /** @todo deal with XrefStm (number) in trailer */
-
- res = decode_xref(doc, &offset);
- if (res != NSPDFERROR_OK) {
- printf("failed to decode xref table\n");
- goto decode_xref_trailer_failed;
- }
-
-decode_xref_trailer_failed:
- cos_free_object(trailer);
-
- return res;
-}
-
-/**
- * decode non-linear pdf trailer data
- *
- * PDF have a structure nominally defined as header, body, cross reference table
- * and trailer. The body, cross reference table and trailer sections may be
- * repeated in a scheme known as "incremental updates"
- *
- * The strategy used here is to locate the end of the last trailer block which
- * contains a startxref token followed by a byte offset into the file of the
- * beginning of the cross reference table followed by a literal '%%EOF'
- *
- * the initial offset is used to walk back down a chain of xref/trailers until
- * the trailer does not contain a Prev entry and decode xref tables forwards to
- * overwrite earlier object entries with later ones.
- *
- * It is necessary to search forwards from the xref table to find the trailer
- * block because instead of the Prev entry pointing to the previous trailer
- * (from which we could have extracted the startxref to find the associated
- * xref table) it points to the previous xref block which we have to skip to
- * find the subsequent trailer.
- *
- */
-nspdferror decode_trailers(struct pdf_doc *doc)
-{
- nspdferror res;
- uint64_t offset; /* the current data offset */
- uint64_t startxref; /* the value of the first startxref field */
-
- res = find_startxref(doc, &offset);
- if (res != NSPDFERROR_OK) {
- printf("failed to find startxref\n");
- return res;
- }
-
- res = decode_startxref(doc, &offset, &startxref);
- if (res != NSPDFERROR_OK) {
- printf("failed to decode startxref\n");
- return res;
- }
-
- /* recurse down the xref and trailers */
- return decode_xref_trailer(doc, startxref);
-}
-
-/**
- * recursively decodes a page tree
- */
-nspdferror
-decode_page_tree(struct pdf_doc *doc,
- struct cos_object *page_tree_node,
- unsigned int *page_index)
-{
- nspdferror res;
- const char *type;
-
- // Type = Pages
- res = cos_get_dictionary_name(doc, page_tree_node, "Type", &type);
- if (res != NSPDFERROR_OK) {
- return res;
- }
-
- if (strcmp(type, "Pages") == 0) {
- struct cos_object *kids;
- unsigned int kids_size;
- unsigned int kids_index;
-
- if (doc->page_table == NULL) {
- /* allocate top level page table */
- int64_t count;
-
- res = cos_get_dictionary_int(doc, page_tree_node, "Count", &count);
- if (res != NSPDFERROR_OK) {
- return res;
- }
-
- doc->page_table = calloc(count, sizeof(struct page_table_entry));
- if (doc->page_table == NULL) {
- return NSPDFERROR_NOMEM;
- }
- doc->page_table_size = count;
- }
-
- res = cos_get_dictionary_array(doc, page_tree_node, "Kids", &kids);
- if (res != NSPDFERROR_OK) {
- return res;
- }
-
- res = cos_get_array_size(doc, kids, &kids_size);
- if (res != NSPDFERROR_OK) {
- return res;
- }
-
- for (kids_index = 0; kids_index < kids_size; kids_index++) {
- struct cos_object *kid;
-
- res = cos_get_array_dictionary(doc, kids, kids_index, &kid);
- if (res != NSPDFERROR_OK) {
- return res;
- }
-
- res = decode_page_tree(doc, kid, page_index);
- if (res != NSPDFERROR_OK) {
- return res;
- }
- }
-
- } else if (strcmp(type, "Page") == 0) {
- struct page_table_entry *page;
-
- page = doc->page_table + (*page_index);
-
- /* required heritable resources */
- res = cos_heritable_dictionary_dictionary(doc,
- page_tree_node,
- "Resources",
- &(page->resources));
- if (res != NSPDFERROR_OK) {
- return res;
- }
-
- /* required heritable mediabox */
- res = cos_heritable_dictionary_array(doc,
- page_tree_node,
- "MediaBox",
- &(page->mediabox));
- if (res != NSPDFERROR_OK) {
- return res;
- }
-
- /* optional page contents */
- res = cos_get_dictionary_value(doc,
- page_tree_node,
- "Contents",
- &(page->contents));
- if ((res != NSPDFERROR_OK) &&
- (res != NSPDFERROR_NOTFOUND)) {
- return res;
- }
-
- printf("page index:%d page:%p resources:%p mediabox:%p contents:%p\n",
- *page_index,
- page,
- page->resources,
- page->mediabox,
- page->contents);
-
- (*page_index)++;
- res = NSPDFERROR_OK;
- } else {
- res = NSPDFERROR_FORMAT;
- }
- return res;
-}
-
-nspdferror decode_catalog(struct pdf_doc *doc)
-{
- nspdferror res;
- struct cos_object *catalog;
- const char *type;
- struct cos_object *pages;
- unsigned int page_index = 0;
-
- res = cos_get_dictionary(doc, doc->root, &catalog);
- if (res != NSPDFERROR_OK) {
- return res;
- }
-
- // Type = Catalog
- res = cos_get_dictionary_name(doc, catalog, "Type", &type);
- if (res != NSPDFERROR_OK) {
- return res;
- }
- if (strcmp(type, "Catalog") != 0) {
- return NSPDFERROR_FORMAT;
- }
-
- // Pages
- res = cos_get_dictionary_dictionary(doc, catalog, "Pages", &pages);
- if (res != NSPDFERROR_OK) {
- return res;
- }
-
- res = decode_page_tree(doc, pages, &page_index);
- if (res != NSPDFERROR_OK) {
- return res;
- }
-
- return res;
-}
-
-nspdferror new_pdf_doc(struct pdf_doc **doc_out)
-{
- struct pdf_doc *doc;
- doc = calloc(1, sizeof(struct pdf_doc));
- if (doc == NULL) {
- return NSPDFERROR_NOMEM;
- }
- *doc_out = doc;
- return NSPDFERROR_OK;
-}
-
-int main(int argc, char **argv)
-{
- struct pdf_doc *doc;
- int res;
-
- if (argc < 2) {
- fprintf(stderr, "Usage %s <filename>\n", argv[0]);
- return 1;
- }
-
- res = new_pdf_doc(&doc);
- if (res != NSPDFERROR_OK) {
- printf("failed to read file\n");
- return res;
- }
-
- res = read_whole_pdf(doc, argv[1]);
- if (res != 0) {
- printf("failed to read file\n");
- return res;
- }
-
- res = check_header(doc);
- if (res != 0) {
- printf("header check failed\n");
- return res;
- }
-
- res = decode_trailers(doc);
- if (res != NSPDFERROR_OK) {
- printf("failed to decode trailers (%d)\n", res);
- return res;
- }
-
- res = decode_catalog(doc);
- if (res != NSPDFERROR_OK) {
- printf("failed to decode catalog (%d)\n", res);
- return res;
- }
-
- return 0;
-}
diff --git a/test/Makefile b/test/Makefile
new file mode 100644
index 0000000..e3a2929
--- /dev/null
+++ b/test/Makefile
@@ -0,0 +1,3 @@
+DIR_TEST_ITEMS := parsepdf:parsepdf.c
+
+include $(NSBUILD)/Makefile.subdir
diff --git a/test/parsepdf.c b/test/parsepdf.c
new file mode 100644
index 0000000..3482af5
--- /dev/null
+++ b/test/parsepdf.c
@@ -0,0 +1,94 @@
+/*
+ * Copyright 2018 Vincent Sanders <vince(a)netsurf-browser.org>
+ *
+ * This file is part of libnspdf.
+ *
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+
+#include <nspdf/document.h>
+
+static nspdferror
+read_whole_pdf(const char *fname, uint8_t **buffer, uint64_t *buffer_length)
+{
+ FILE *f;
+ off_t len;
+ uint8_t *buf;
+ size_t rd;
+
+ f = fopen(fname, "r");
+ if (f == NULL) {
+ perror("pdf open");
+ return NSPDFERROR_NOTFOUND;
+ }
+
+ fseek(f, 0, SEEK_END);
+ len = ftello(f);
+
+ buf = malloc(len);
+ fseek(f, 0, SEEK_SET);
+
+ rd = fread(buf, len, 1, f);
+ if (rd != 1) {
+ perror("pdf read");
+ free(buf);
+ return 1;
+ }
+
+ fclose(f);
+
+ *buffer = buf;
+ *buffer_length = len;
+
+ return NSPDFERROR_OK;
+}
+
+
+int main(int argc, char **argv)
+{
+ uint8_t *buffer;
+ uint64_t buffer_length;
+ struct nspdf_doc *doc;
+ nspdferror res;
+
+ if (argc < 2) {
+ fprintf(stderr, "Usage %s <filename>\n", argv[0]);
+ return 1;
+ }
+
+ res = read_whole_pdf(argv[1], &buffer, &buffer_length);
+ if (res != 0) {
+ printf("failed to read file\n");
+ return res;
+ }
+
+ res = nspdf_document_create(&doc);
+ if (res != NSPDFERROR_OK) {
+ printf("failed to create a document\n");
+ return res;
+ }
+
+ res = nspdf_document_parse(doc, buffer, buffer_length);
+ if (res != NSPDFERROR_OK) {
+ printf("document parse failed (%d)\n", res);
+ return res;
+ }
+
+ res = nspdf_document_destroy(doc);
+ if (res != NSPDFERROR_OK) {
+ printf("failed to destroy document (%d)\n", res);
+ return res;
+ }
+
+ free(buffer);
+
+ return 0;
+}
diff --git a/test/runtest.sh b/test/runtest.sh
new file mode 100755
index 0000000..1aa83c7
--- /dev/null
+++ b/test/runtest.sh
@@ -0,0 +1,4 @@
+#!/bin/sh
+TEST_PATH=$1
+
+${TEST_PATH}/test_parsepdf ~/Downloads/HiKey_User_Guide_Rev0.2.pdf
commitdiff http://git.netsurf-browser.org/libnspdf.git/commit/?id=5422dd50a49fe1a282...
commit 5422dd50a49fe1a282271f22cd324f815e592e07
Author: Vincent Sanders <vince(a)kyllikki.org>
Commit: Vincent Sanders <vince(a)kyllikki.org>
decode page tree
diff --git a/src/cos_object.c b/src/cos_object.c
index a5bd738..5bfd423 100644
--- a/src/cos_object.c
+++ b/src/cos_object.c
@@ -188,6 +188,59 @@ cos_get_dictionary_dictionary(struct pdf_doc *doc,
}
nspdferror
+cos_heritable_dictionary_dictionary(struct pdf_doc *doc,
+ struct cos_object *dict,
+ const char *key,
+ struct cos_object **value_out)
+{
+ nspdferror res;
+ struct cos_object *dict_value;
+ res = cos_get_dictionary_value(doc, dict, key, &dict_value);
+ if (res == NSPDFERROR_NOTFOUND) {
+ /* \todo get parent entry and extract key from that dictionary instead */
+ }
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+ return cos_get_dictionary(doc, dict_value, value_out);
+}
+
+nspdferror
+cos_get_dictionary_array(struct pdf_doc *doc,
+ struct cos_object *dict,
+ const char *key,
+ struct cos_object **value_out)
+{
+ nspdferror res;
+ struct cos_object *dict_value;
+
+ res = cos_get_dictionary_value(doc, dict, key, &dict_value);
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+ return cos_get_array(doc, dict_value, value_out);
+}
+
+nspdferror
+cos_heritable_dictionary_array(struct pdf_doc *doc,
+ struct cos_object *dict,
+ const char *key,
+ struct cos_object **value_out)
+{
+ nspdferror res;
+ struct cos_object *dict_value;
+
+ res = cos_get_dictionary_value(doc, dict, key, &dict_value);
+ if (res == NSPDFERROR_NOTFOUND) {
+ /* \todo get parent entry and extract key from that dictionary instead */
+ }
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+ return cos_get_array(doc, dict_value, value_out);
+}
+
+nspdferror
cos_get_int(struct pdf_doc *doc,
struct cos_object *cobj,
int64_t *value_out)
@@ -242,3 +295,98 @@ cos_get_dictionary(struct pdf_doc *doc,
}
return res;
}
+
+nspdferror
+cos_get_array(struct pdf_doc *doc,
+ struct cos_object *cobj,
+ struct cos_object **value_out)
+{
+ nspdferror res;
+
+ res = xref_get_referenced(doc, &cobj);
+ if (res == NSPDFERROR_OK) {
+ if (cobj->type != COS_TYPE_ARRAY) {
+ res = NSPDFERROR_TYPE;
+ } else {
+ *value_out = cobj;
+ }
+ }
+ return res;
+}
+
+/*
+ * get a value for a key from a dictionary
+ */
+nspdferror
+cos_get_array_value(struct pdf_doc *doc,
+ struct cos_object *array,
+ unsigned int index,
+ struct cos_object **value_out)
+{
+ nspdferror res;
+ struct cos_array_entry *entry;
+
+ res = xref_get_referenced(doc, &array);
+ if (res == NSPDFERROR_OK) {
+ if (array->type != COS_TYPE_ARRAY) {
+ res = NSPDFERROR_TYPE;
+ } else {
+ unsigned int cur_index = 0;
+ res = NSPDFERROR_RANGE;
+
+ entry = array->u.array;
+ while (entry != NULL) {
+ if (cur_index == index) {
+ *value_out = entry->value;
+ res = NSPDFERROR_OK;
+ break;
+ }
+ cur_index++;
+ entry = entry->next;
+ }
+ }
+ }
+ return res;
+}
+
+nspdferror
+cos_get_array_dictionary(struct pdf_doc *doc,
+ struct cos_object *array,
+ unsigned int index,
+ struct cos_object **value_out)
+{
+ nspdferror res;
+ struct cos_object *array_value;
+
+ res = cos_get_array_value(doc, array, index, &array_value);
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+ return cos_get_dictionary(doc, array_value, value_out);
+}
+
+nspdferror
+cos_get_array_size(struct pdf_doc *doc,
+ struct cos_object *cobj,
+ unsigned int *size_out)
+{
+ nspdferror res;
+ unsigned int array_size = 0;
+ struct cos_array_entry *array_entry;
+
+ res = xref_get_referenced(doc, &cobj);
+ if (res == NSPDFERROR_OK) {
+ if (cobj->type != COS_TYPE_ARRAY) {
+ res = NSPDFERROR_TYPE;
+ } else {
+ /* walk array list to enumerate entries */
+ array_entry = cobj->u.array;
+ while (array_entry != NULL) {
+ array_size++;
+ array_entry = array_entry->next;
+ }
+ *size_out = array_size;
+ }
+ }
+ return res;
+}
diff --git a/src/cos_object.h b/src/cos_object.h
index 2ded7ec..48241c6 100644
--- a/src/cos_object.h
+++ b/src/cos_object.h
@@ -127,6 +127,13 @@ nspdferror cos_get_dictionary_name(struct pdf_doc *doc, struct cos_object *dict,
nspdferror cos_get_dictionary_dictionary(struct pdf_doc *doc, struct cos_object *dict, const char *key, struct cos_object **value_out);
+nspdferror cos_heritable_dictionary_dictionary(struct pdf_doc *doc, struct cos_object *dict, const char *key, struct cos_object **value_out);
+
+nspdferror cos_get_dictionary_array(struct pdf_doc *doc, struct cos_object *dict, const char *key, struct cos_object **value_out);
+
+nspdferror cos_heritable_dictionary_array(struct pdf_doc *doc, struct cos_object *dict, const char *key, struct cos_object **value_out);
+
+
nspdferror cos_get_int(struct pdf_doc *doc, struct cos_object *cobj, int64_t *value_out);
@@ -134,3 +141,11 @@ nspdferror cos_get_name(struct pdf_doc *doc, struct cos_object *cobj, const char
nspdferror cos_get_dictionary(struct pdf_doc *doc, struct cos_object *cobj, struct cos_object **value_out);
+
+nspdferror cos_get_array(struct pdf_doc *doc, struct cos_object *cobj, struct cos_object **value_out);
+
+nspdferror cos_get_array_size(struct pdf_doc *doc, struct cos_object *cobj, unsigned int *size_out);
+
+nspdferror cos_get_array_value(struct pdf_doc *doc, struct cos_object *array, unsigned int index, struct cos_object **value_out);
+
+nspdferror cos_get_array_dictionary(struct pdf_doc *doc, struct cos_object *arrau, unsigned int index, struct cos_object **value_out);
diff --git a/src/pdf_doc.h b/src/pdf_doc.h
index b37e3b2..986556f 100644
--- a/src/pdf_doc.h
+++ b/src/pdf_doc.h
@@ -10,6 +10,12 @@ struct xref_table_entry {
struct cos_object *object;
};
+/** page entry */
+struct page_table_entry {
+ struct cos_object *resources;
+ struct cos_object *mediabox;
+ struct cos_object *contents;
+};
/** pdf document */
struct pdf_doc {
@@ -33,6 +39,9 @@ struct pdf_doc {
struct cos_object *info;
struct cos_object *id;
+ /* page refrerence table */
+ uint64_t page_table_size;
+ struct page_table_entry *page_table;
};
/* byte data acessory, allows for more complex buffer handling in future */
diff --git a/src/xref.c b/src/xref.c
index 8239f45..452aa19 100644
--- a/src/xref.c
+++ b/src/xref.c
@@ -499,12 +499,123 @@ nspdferror decode_trailers(struct pdf_doc *doc)
return decode_xref_trailer(doc, startxref);
}
+/**
+ * recursively decodes a page tree
+ */
+nspdferror
+decode_page_tree(struct pdf_doc *doc,
+ struct cos_object *page_tree_node,
+ unsigned int *page_index)
+{
+ nspdferror res;
+ const char *type;
+
+ // Type = Pages
+ res = cos_get_dictionary_name(doc, page_tree_node, "Type", &type);
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+
+ if (strcmp(type, "Pages") == 0) {
+ struct cos_object *kids;
+ unsigned int kids_size;
+ unsigned int kids_index;
+
+ if (doc->page_table == NULL) {
+ /* allocate top level page table */
+ int64_t count;
+
+ res = cos_get_dictionary_int(doc, page_tree_node, "Count", &count);
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+
+ doc->page_table = calloc(count, sizeof(struct page_table_entry));
+ if (doc->page_table == NULL) {
+ return NSPDFERROR_NOMEM;
+ }
+ doc->page_table_size = count;
+ }
+
+ res = cos_get_dictionary_array(doc, page_tree_node, "Kids", &kids);
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+
+ res = cos_get_array_size(doc, kids, &kids_size);
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+
+ for (kids_index = 0; kids_index < kids_size; kids_index++) {
+ struct cos_object *kid;
+
+ res = cos_get_array_dictionary(doc, kids, kids_index, &kid);
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+
+ res = decode_page_tree(doc, kid, page_index);
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+ }
+
+ } else if (strcmp(type, "Page") == 0) {
+ struct page_table_entry *page;
+
+ page = doc->page_table + (*page_index);
+
+ /* required heritable resources */
+ res = cos_heritable_dictionary_dictionary(doc,
+ page_tree_node,
+ "Resources",
+ &(page->resources));
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+
+ /* required heritable mediabox */
+ res = cos_heritable_dictionary_array(doc,
+ page_tree_node,
+ "MediaBox",
+ &(page->mediabox));
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+
+ /* optional page contents */
+ res = cos_get_dictionary_value(doc,
+ page_tree_node,
+ "Contents",
+ &(page->contents));
+ if ((res != NSPDFERROR_OK) &&
+ (res != NSPDFERROR_NOTFOUND)) {
+ return res;
+ }
+
+ printf("page index:%d page:%p resources:%p mediabox:%p contents:%p\n",
+ *page_index,
+ page,
+ page->resources,
+ page->mediabox,
+ page->contents);
+
+ (*page_index)++;
+ res = NSPDFERROR_OK;
+ } else {
+ res = NSPDFERROR_FORMAT;
+ }
+ return res;
+}
+
nspdferror decode_catalog(struct pdf_doc *doc)
{
nspdferror res;
struct cos_object *catalog;
const char *type;
struct cos_object *pages;
+ unsigned int page_index = 0;
res = cos_get_dictionary(doc, doc->root, &catalog);
if (res != NSPDFERROR_OK) {
@@ -526,14 +637,10 @@ nspdferror decode_catalog(struct pdf_doc *doc)
return res;
}
- // Type = Pages
- res = cos_get_dictionary_name(doc, pages, "Type", &type);
+ res = decode_page_tree(doc, pages, &page_index);
if (res != NSPDFERROR_OK) {
return res;
}
- if (strcmp(type, "Pages") != 0) {
- return NSPDFERROR_FORMAT;
- }
return res;
}
commitdiff http://git.netsurf-browser.org/libnspdf.git/commit/?id=7da4a1d7b029ab640a...
commit 7da4a1d7b029ab640a9ae2b95e745d29c998a7b0
Author: Vincent Sanders <vince(a)kyllikki.org>
Commit: Vincent Sanders <vince(a)kyllikki.org>
add more data acessors
diff --git a/src/cos_object.c b/src/cos_object.c
index 5ec41a9..a5bd738 100644
--- a/src/cos_object.c
+++ b/src/cos_object.c
@@ -71,35 +71,14 @@ nspdferror cos_free_object(struct cos_object *cos_obj)
return NSPDFERROR_OK;
}
-nspdferror
-cos_dictionary_get_value(struct cos_object *dict,
- const char *key,
- struct cos_object **value_out)
-{
- struct cos_dictionary_entry *entry;
- if (dict->type != COS_TYPE_DICTIONARY) {
- return NSPDFERROR_TYPE;
- }
-
- entry = dict->u.dictionary;
- while (entry != NULL) {
- if (strcmp(entry->key->u.n, key) == 0) {
- *value_out = entry->value;
- return NSPDFERROR_OK;
- }
- entry = entry->next;
- }
- return NSPDFERROR_NOTFOUND;
-}
-
-/**
+/*
* extracts a value for a key in a dictionary.
*
* this finds and returns a value for a given key removing it from a dictionary
*/
nspdferror
-cos_dictionary_extract_value(struct cos_object *dict,
+cos_extract_dictionary_value(struct cos_object *dict,
const char *key,
struct cos_object **value_out)
{
@@ -126,6 +105,88 @@ cos_dictionary_extract_value(struct cos_object *dict,
return NSPDFERROR_NOTFOUND;
}
+
+/*
+ * get a value for a key from a dictionary
+ */
+nspdferror
+cos_get_dictionary_value(struct pdf_doc *doc,
+ struct cos_object *dict,
+ const char *key,
+ struct cos_object **value_out)
+{
+ nspdferror res;
+ struct cos_dictionary_entry *entry;
+
+ res = xref_get_referenced(doc, &dict);
+ if (res == NSPDFERROR_OK) {
+ if (dict->type != COS_TYPE_DICTIONARY) {
+ res = NSPDFERROR_TYPE;
+ } else {
+ res = NSPDFERROR_NOTFOUND;
+
+ entry = dict->u.dictionary;
+ while (entry != NULL) {
+ if (strcmp(entry->key->u.n, key) == 0) {
+ *value_out = entry->value;
+ res = NSPDFERROR_OK;
+ break;
+ }
+ entry = entry->next;
+ }
+ }
+ }
+ return res;
+}
+
+nspdferror
+cos_get_dictionary_int(struct pdf_doc *doc,
+ struct cos_object *dict,
+ const char *key,
+ int64_t *value_out)
+{
+ nspdferror res;
+ struct cos_object *dict_value;
+
+ res = cos_get_dictionary_value(doc, dict, key, &dict_value);
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+ return cos_get_int(doc, dict_value, value_out);
+}
+
+nspdferror
+cos_get_dictionary_name(struct pdf_doc *doc,
+ struct cos_object *dict,
+ const char *key,
+ const char **value_out)
+{
+ nspdferror res;
+ struct cos_object *dict_value;
+
+ res = cos_get_dictionary_value(doc, dict, key, &dict_value);
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+ return cos_get_name(doc, dict_value, value_out);
+}
+
+nspdferror
+cos_get_dictionary_dictionary(struct pdf_doc *doc,
+ struct cos_object *dict,
+ const char *key,
+ struct cos_object **value_out)
+{
+ nspdferror res;
+ struct cos_object *dict_value;
+
+ res = cos_get_dictionary_value(doc, dict, key, &dict_value);
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+ return cos_get_dictionary(doc, dict_value, value_out);
+}
+
nspdferror
cos_get_int(struct pdf_doc *doc,
struct cos_object *cobj,
@@ -145,6 +206,26 @@ cos_get_int(struct pdf_doc *doc,
}
nspdferror
+cos_get_name(struct pdf_doc *doc,
+ struct cos_object *cobj,
+ const char **value_out)
+{
+ nspdferror res;
+
+ res = xref_get_referenced(doc, &cobj);
+ if (res == NSPDFERROR_OK) {
+ if (cobj->type != COS_TYPE_NAME) {
+ res = NSPDFERROR_TYPE;
+ } else {
+ *value_out = cobj->u.n;
+ }
+ }
+ return res;
+}
+
+
+
+nspdferror
cos_get_dictionary(struct pdf_doc *doc,
struct cos_object *cobj,
struct cos_object **value_out)
diff --git a/src/cos_object.h b/src/cos_object.h
index 8d1449d..2ded7ec 100644
--- a/src/cos_object.h
+++ b/src/cos_object.h
@@ -92,13 +92,45 @@ nspdferror cos_decode_object(struct pdf_doc *doc, uint64_t *offset_out, struct c
nspdferror cos_free_object(struct cos_object *cos_obj);
-nspdferror cos_dictionary_get_value(struct cos_object *dict, const char *key, struct cos_object **value_out);
+/**
+ * extract a value for a key from a dictionary
+ *
+ * This retrieves the value of a given key in a dictionary and removes it from
+ * the dictionary.
+ *
+ * \param dict The dictionary
+ * \param key The key to lookup
+ * \param value_out The value object associated with the key
+ * \return NSPDFERROR_OK and value_out updated on success.
+ * NSPDFERROR_TYPE if the object passed in \p dict is not a dictionary.
+ * NSPDFERROR_NOTFOUND if the key is not present in the dictionary.
+ */
+nspdferror cos_extract_dictionary_value(struct cos_object *dict, const char *key, struct cos_object **value_out);
+
+/**
+ * get a value for a key from a dictionary
+ *
+ * \param dict The dictionary
+ * \param key The key to lookup
+ * \param value_out The value object associated with the key
+ * \return NSPDFERROR_OK and value_out updated on success.
+ * NSPDFERROR_TYPE if the object passed in \p dict is not a dictionary.
+ * NSPDFERROR_NOTFOUND if the key is not present in the dictionary.
+ */
+nspdferror cos_get_dictionary_value(struct pdf_doc *doc, struct cos_object *dict, const char *key, struct cos_object **value_out);
+
+
+nspdferror cos_get_dictionary_int(struct pdf_doc *doc, struct cos_object *dict, const char *key, int64_t *value_out);
+
-nspdferror cos_dictionary_extract_value(struct cos_object *dict, const char *key, struct cos_object **value_out);
+nspdferror cos_get_dictionary_name(struct pdf_doc *doc, struct cos_object *dict, const char *key, const char **value_out);
+nspdferror cos_get_dictionary_dictionary(struct pdf_doc *doc, struct cos_object *dict, const char *key, struct cos_object **value_out);
nspdferror cos_get_int(struct pdf_doc *doc, struct cos_object *cobj, int64_t *value_out);
-nspdferror cos_get_dictionary(struct pdf_doc *doc, struct cos_object *cobj, struct cos_object **value_out);
+
+nspdferror cos_get_name(struct pdf_doc *doc, struct cos_object *cobj, const char **value_out);
+nspdferror cos_get_dictionary(struct pdf_doc *doc, struct cos_object *cobj, struct cos_object **value_out);
diff --git a/src/nspdferror.h b/src/nspdferror.h
index c6042eb..3e26813 100644
--- a/src/nspdferror.h
+++ b/src/nspdferror.h
@@ -6,4 +6,5 @@ typedef enum {
NSPDFERROR_RANGE, /**< value outside type range */
NSPDFERROR_TYPE, /**< wrong type error */
NSPDFERROR_NOTFOUND, /**< key not found */
+ NSPDFERROR_FORMAT, /**< objects do not cornform to expected format */
} nspdferror;
diff --git a/src/xref.c b/src/xref.c
index b1748be..8239f45 100644
--- a/src/xref.c
+++ b/src/xref.c
@@ -365,7 +365,6 @@ nspdferror decode_xref_trailer(struct pdf_doc *doc, uint64_t xref_offset)
uint64_t offset; /* the current data offset */
uint64_t startxref; /* the value of the startxref field */
struct cos_object *trailer; /* the current trailer */
- struct cos_object *cobj_prev;
int64_t prev;
offset = xref_offset;
@@ -394,22 +393,15 @@ nspdferror decode_xref_trailer(struct pdf_doc *doc, uint64_t xref_offset)
if (doc->xref_table == NULL) {
/* extract Size from trailer and create xref table large enough */
- struct cos_object *cobj_size;
int64_t size;
- res = cos_dictionary_get_value(trailer, "Size", &cobj_size);
+ res = cos_get_dictionary_int(doc, trailer, "Size", &size);
if (res != NSPDFERROR_OK) {
- printf("trailer has no Size value\n");
+ printf("trailer has no integer Size value\n");
goto decode_xref_trailer_failed;
}
- res = cos_get_int(doc, cobj_size, &size);
- if (res != NSPDFERROR_OK) {
- printf("trailer Size not int\n");
- goto decode_xref_trailer_failed;
- }
-
- res = cos_dictionary_extract_value(trailer, "Root", &doc->root);
+ res = cos_extract_dictionary_value(trailer, "Root", &doc->root);
if (res != NSPDFERROR_OK) {
printf("no Root!\n");
goto decode_xref_trailer_failed;
@@ -422,17 +414,17 @@ nspdferror decode_xref_trailer(struct pdf_doc *doc, uint64_t xref_offset)
}
doc->xref_size = size;
- res = cos_dictionary_extract_value(trailer, "Encrypt", &doc->encrypt);
+ res = cos_extract_dictionary_value(trailer, "Encrypt", &doc->encrypt);
if ((res != NSPDFERROR_OK) && (res != NSPDFERROR_NOTFOUND)) {
goto decode_xref_trailer_failed;
}
- res = cos_dictionary_extract_value(trailer, "Info", &doc->info);
+ res = cos_extract_dictionary_value(trailer, "Info", &doc->info);
if ((res != NSPDFERROR_OK) && (res != NSPDFERROR_NOTFOUND)) {
goto decode_xref_trailer_failed;
}
- res = cos_dictionary_extract_value(trailer, "ID", &doc->id);
+ res = cos_extract_dictionary_value(trailer, "ID", &doc->id);
if ((res != NSPDFERROR_OK) && (res != NSPDFERROR_NOTFOUND)) {
goto decode_xref_trailer_failed;
}
@@ -440,14 +432,8 @@ nspdferror decode_xref_trailer(struct pdf_doc *doc, uint64_t xref_offset)
}
/* check for prev ID key in trailer and recurse call if present */
- res = cos_dictionary_get_value(trailer, "Prev", &cobj_prev);
+ res = cos_get_dictionary_int(doc, trailer, "Prev", &prev);
if (res == NSPDFERROR_OK) {
- res = cos_get_int(doc, cobj_prev, &prev);
- if (res != NSPDFERROR_OK) {
- printf("trailer Prev not int\n");
- goto decode_xref_trailer_failed;
- }
-
res = decode_xref_trailer(doc, prev);
if (res != NSPDFERROR_OK) {
goto decode_xref_trailer_failed;
@@ -517,9 +503,38 @@ nspdferror decode_catalog(struct pdf_doc *doc)
{
nspdferror res;
struct cos_object *catalog;
+ const char *type;
+ struct cos_object *pages;
res = cos_get_dictionary(doc, doc->root, &catalog);
-
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+
+ // Type = Catalog
+ res = cos_get_dictionary_name(doc, catalog, "Type", &type);
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+ if (strcmp(type, "Catalog") != 0) {
+ return NSPDFERROR_FORMAT;
+ }
+
+ // Pages
+ res = cos_get_dictionary_dictionary(doc, catalog, "Pages", &pages);
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+
+ // Type = Pages
+ res = cos_get_dictionary_name(doc, pages, "Type", &type);
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+ if (strcmp(type, "Pages") != 0) {
+ return NSPDFERROR_FORMAT;
+ }
+
return res;
}
@@ -539,6 +554,11 @@ int main(int argc, char **argv)
struct pdf_doc *doc;
int res;
+ if (argc < 2) {
+ fprintf(stderr, "Usage %s <filename>\n", argv[0]);
+ return 1;
+ }
+
res = new_pdf_doc(&doc);
if (res != NSPDFERROR_OK) {
printf("failed to read file\n");
commitdiff http://git.netsurf-browser.org/libnspdf.git/commit/?id=fb3cf89cbbf5ba7a28...
commit fb3cf89cbbf5ba7a2844f6016a88c6c4429ecda1
Author: Vincent Sanders <vince(a)kyllikki.org>
Commit: Vincent Sanders <vince(a)kyllikki.org>
decode indirect objects
diff --git a/src/cos_decode.c b/src/cos_decode.c
index a1ca318..8873060 100644
--- a/src/cos_decode.c
+++ b/src/cos_decode.c
@@ -396,7 +396,7 @@ cos_decode_list(struct pdf_doc *doc,
return res;
}
- printf("found a list\n");
+ //printf("found a list\n");
cosobj = calloc(1, sizeof(struct cos_object));
if (cosobj == NULL) {
@@ -647,13 +647,12 @@ cos_attempt_decode_reference(struct pdf_doc *doc,
nspdferror res;
uint64_t offset;
uint8_t c;
- struct cos_object *generation; /* generation object, reused for output */
- struct cos_reference *nref; /* new reference */
+ struct cos_object *generation; /* generation object */
offset = *offset_out;
res = cos_decode_number(doc, &offset, &generation);
- if (res != 0) {
+ if (res != NSPDFERROR_OK) {
/* no error if next token could not be decoded as a number */
return NSPDFERROR_OK;
}
@@ -673,36 +672,81 @@ cos_attempt_decode_reference(struct pdf_doc *doc,
}
/* two int in a row, look for the R */
- c = DOC_BYTE(doc, offset++);
- if (c != 'R') {
- /* no R so not a reference */
- cos_free_object(generation);
- return NSPDFERROR_OK;
- }
+ c = DOC_BYTE(doc, offset);
+ if (c == 'R') {
+ struct cos_reference *nref; /* new reference */
- /* found reference */
+ //printf("found object reference\n");
+ offset ++;
- //printf("found reference\n");
- doc_skip_ws(doc, &offset);
+ doc_skip_ws(doc, &offset);
- nref = calloc(1, sizeof(struct cos_reference));
- if (nref == NULL) {
- /** \todo free objects */
- return NSPDFERROR_NOMEM; /* memory error */
- }
+ nref = calloc(1, sizeof(struct cos_reference));
+ if (nref == NULL) {
+ cos_free_object(generation);
+ return NSPDFERROR_NOMEM; /* memory error */
+ }
- nref->id = (*cosobj_out)->u.i;
- nref->generation = generation->u.i;
+ nref->id = (*cosobj_out)->u.i;
+ nref->generation = generation->u.i;
- cos_free_object(*cosobj_out);
+ /* overwrite input object for output (it has to be an int which has no
+ * allocation to free)
+ */
+ (*cosobj_out)->type = COS_TYPE_REFERENCE;
+ (*cosobj_out)->u.reference = nref;
- generation->type = COS_TYPE_REFERENCE;
- generation->u.reference = nref;
+ *offset_out = offset;
- *cosobj_out = generation;
+ } else if ((c == 'o') &&
+ (DOC_BYTE(doc, offset + 1) == 'b') &&
+ (DOC_BYTE(doc, offset + 2) == 'j')) {
+ struct cos_object *indirect; /* indirect object */
+ //printf("indirect\n");
+ offset += 3;
- *offset_out = offset;
+ res = doc_skip_ws(doc, &offset);
+ if (res != NSPDFERROR_OK) {
+ cos_free_object(generation);
+ return res;
+ }
+ //printf("decoding\n");
+
+ res = cos_decode_object(doc, &offset, &indirect);
+ if (res != NSPDFERROR_OK) {
+ cos_free_object(generation);
+ return res;
+ }
+ //printf("parsed object type %d\nendobj\n",indirect->type);
+
+ if ((DOC_BYTE(doc, offset ) != 'e') &&
+ (DOC_BYTE(doc, offset + 1) != 'n') &&
+ (DOC_BYTE(doc, offset + 2) != 'd') &&
+ (DOC_BYTE(doc, offset + 1) != 'o') &&
+ (DOC_BYTE(doc, offset + 2) != 'b') &&
+ (DOC_BYTE(doc, offset + 3) != 'j')) {
+ cos_free_object(indirect);
+ cos_free_object(generation);
+ return NSPDFERROR_SYNTAX;
+ }
+ offset += 6;
+ //printf("skipping\n");
+
+ res = doc_skip_ws(doc, &offset);
+ if (res != NSPDFERROR_OK) {
+ cos_free_object(indirect);
+ cos_free_object(generation);
+ return res;
+ }
+
+ cos_free_object(*cosobj_out);
+
+ *cosobj_out = indirect;
+
+ *offset_out = offset;
+ }
+ cos_free_object(generation);
return NSPDFERROR_OK;
}
diff --git a/src/cos_object.c b/src/cos_object.c
index 3abe7e8..5ec41a9 100644
--- a/src/cos_object.c
+++ b/src/cos_object.c
@@ -59,6 +59,7 @@ nspdferror cos_free_object(struct cos_object *cos_obj)
aentry = aentry->next;
free(oaentry);
}
+ break;
case COS_TYPE_STREAM:
free(cos_obj->u.stream);
diff --git a/src/pdf_doc.c b/src/pdf_doc.c
index 7bba54f..4a5cad1 100644
--- a/src/pdf_doc.c
+++ b/src/pdf_doc.c
@@ -99,7 +99,7 @@ xref_get_referenced(struct pdf_doc *doc, struct cos_object **cobj_out)
entry->object = indirect;
}
- cobj = entry->object;
+ *cobj_out = entry->object;
return NSPDFERROR_OK;
}
commitdiff http://git.netsurf-browser.org/libnspdf.git/commit/?id=4a0316bbd73df526e4...
commit 4a0316bbd73df526e42207cda82a12a79da4abaf
Author: Vincent Sanders <vince(a)kyllikki.org>
Commit: Vincent Sanders <vince(a)kyllikki.org>
fix null object decode and improve reference object decode a bit
diff --git a/src/cos_decode.c b/src/cos_decode.c
index 730c771..a1ca318 100644
--- a/src/cos_decode.c
+++ b/src/cos_decode.c
@@ -612,6 +612,9 @@ cos_decode_null(struct pdf_doc *doc,
}
cosobj->type = COS_TYPE_NULL;
+
+ *cosobj_out = cosobj;
+
*offset_out = offset;
return NSPDFERROR_OK;
@@ -619,14 +622,20 @@ cos_decode_null(struct pdf_doc *doc,
/**
- * attempt to decode the stream into a reference
+ * attempt to decode input data into a reference, indirect or stream object
+ *
+ * The input data already had a positive integer decoded from it:
+ * - if another positive integer follows and a R character after that it is a
+ * reference,
*
- * The stream has already had a positive integer decoded from it. if another
- * positive integer follows and a R character after that it is a reference,
- * otherwise bail, but not finding a ref is not an error!
+ * - if another positive integer follows and 'obj' after that:
+ * - a direct object followed by 'endobj' it is an indirect object.
+ *
+ * - a direct dictionary object followed by 'stream', then stream data,
+ * then 'endstream' then 'endobj' it is a stream object
*
* \param doc the pdf document
- * \param offset_out offset of current cursor in stream
+ * \param offset_out offset of current cursor in input data
* \param cosobj_out the object to return into, on input contains the first
* integer
*/
@@ -635,31 +644,31 @@ cos_attempt_decode_reference(struct pdf_doc *doc,
uint64_t *offset_out,
struct cos_object **cosobj_out)
{
+ nspdferror res;
uint64_t offset;
- struct cos_object *cosobj; /* possible generation object */
uint8_t c;
- int res;
+ struct cos_object *generation; /* generation object, reused for output */
struct cos_reference *nref; /* new reference */
offset = *offset_out;
- res = cos_decode_number(doc, &offset, &cosobj);
+ res = cos_decode_number(doc, &offset, &generation);
if (res != 0) {
/* no error if next token could not be decoded as a number */
return NSPDFERROR_OK;
}
- if (cosobj->type != COS_TYPE_INT) {
+ if (generation->type != COS_TYPE_INT) {
/* next object was not an integer so not a reference */
- cos_free_object(cosobj);
+ cos_free_object(generation);
return NSPDFERROR_OK;
}
- if (cosobj->u.i < 0) {
+ if (generation->u.i < 0) {
/* integer was negative so not a reference (generations must be
* non-negative
*/
- cos_free_object(cosobj);
+ cos_free_object(generation);
return NSPDFERROR_OK;
}
@@ -667,7 +676,7 @@ cos_attempt_decode_reference(struct pdf_doc *doc,
c = DOC_BYTE(doc, offset++);
if (c != 'R') {
/* no R so not a reference */
- cos_free_object(cosobj);
+ cos_free_object(generation);
return NSPDFERROR_OK;
}
@@ -683,14 +692,14 @@ cos_attempt_decode_reference(struct pdf_doc *doc,
}
nref->id = (*cosobj_out)->u.i;
- nref->generation = cosobj->u.i;
+ nref->generation = generation->u.i;
cos_free_object(*cosobj_out);
- cosobj->type = COS_TYPE_REFERENCE;
- cosobj->u.reference = nref;
+ generation->type = COS_TYPE_REFERENCE;
+ generation->u.reference = nref;
- *cosobj_out = cosobj;
+ *cosobj_out = generation;
*offset_out = offset;
diff --git a/src/cos_object.c b/src/cos_object.c
index f4cd4fd..3abe7e8 100644
--- a/src/cos_object.c
+++ b/src/cos_object.c
@@ -1,3 +1,12 @@
+/*
+ * Copyright 2017 Vincent Sanders <vince(a)netsurf-browser.org>
+ *
+ * This file is part of libnspsl
+ *
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ */
+
#include <stdint.h>
#include <stddef.h>
#include <stdbool.h>
diff --git a/src/pdf_doc.c b/src/pdf_doc.c
index dd31b72..7bba54f 100644
--- a/src/pdf_doc.c
+++ b/src/pdf_doc.c
@@ -1,6 +1,16 @@
+/*
+ * Copyright 2017 Vincent Sanders <vince(a)netsurf-browser.org>
+ *
+ * This file is part of libnspsl
+ *
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ */
+
#include <stdint.h>
#include <stddef.h>
#include <stdbool.h>
+#include <stdio.h>
#include "nspdferror.h"
#include "byte_class.h"
@@ -82,6 +92,7 @@ xref_get_referenced(struct pdf_doc *doc, struct cos_object **cobj_out)
offset = entry->offset;
res = cos_decode_object(doc, &offset, &indirect);
if (res != NSPDFERROR_OK) {
+ printf("failed to decode indirect object\n");
return res;
}
commitdiff http://git.netsurf-browser.org/libnspdf.git/commit/?id=0c29558f8caf51da6c...
commit 0c29558f8caf51da6c4258fbd1ebfa341af5aab9
Author: Vincent Sanders <vince(a)kyllikki.org>
Commit: Vincent Sanders <vince(a)kyllikki.org>
clean up error handling and start decoding of indirect objects
diff --git a/src/cos_decode.c b/src/cos_decode.c
index 3936e05..730c771 100644
--- a/src/cos_decode.c
+++ b/src/cos_decode.c
@@ -10,9 +10,13 @@
#include "cos_object.h"
#include "pdf_doc.h"
+/** increments in which cos string allocations are extended */
#define COS_STRING_ALLOC 32
-nspdferror
+/** Maximum length of cos name */
+#define NAME_MAX_LENGTH 127
+
+static nspdferror
cos_string_append(struct cos_string *s, uint8_t c)
{
//printf("appending 0x%x to %p len %d alloc %d\n", c, s->data, s->length, s->alloc);
@@ -29,7 +33,7 @@ cos_string_append(struct cos_string *s, uint8_t c)
return NSPDFERROR_OK;
}
-uint8_t xtoi(uint8_t x)
+static uint8_t xtoi(uint8_t x)
{
if (x >= '0' && x <= '9') {
x = x - '0';
@@ -41,7 +45,8 @@ uint8_t xtoi(uint8_t x)
return x;
}
-int cos_decode_number(struct pdf_doc *doc,
+static nspdferror
+cos_decode_number(struct pdf_doc *doc,
uint64_t *offset_out,
struct cos_object **cosobj_out)
{
@@ -60,7 +65,8 @@ int cos_decode_number(struct pdf_doc *doc,
uint64_t tens;
if (len == 0) {
- return -2; /* parse error no decimals in input */
+ /* parse error no decimals in input */
+ return NSPDFERROR_SYNTAX;
}
/* sum value from each place */
for (tens = 1; len > 0; tens = tens * 10, len--) {
@@ -71,7 +77,7 @@ int cos_decode_number(struct pdf_doc *doc,
cosobj = calloc(1, sizeof(struct cos_object));
if (cosobj == NULL) {
- return -1; /* memory error */
+ return NSPDFERROR_NOMEM;
}
cosobj->type = COS_TYPE_INT;
@@ -81,20 +87,20 @@ int cos_decode_number(struct pdf_doc *doc,
*offset_out = offset;
- return 0;
+ return NSPDFERROR_OK;
}
num[len] = c - '0';
offset++;
}
- return -1; /* number too long */
+ return NSPDFERROR_RANGE; /* number too long */
}
/**
- * literal string processing
+ * decode literal string
*
*/
-nspdferror
+static nspdferror
cos_decode_string(struct pdf_doc *doc,
uint64_t *offset_out,
struct cos_object **cosobj_out)
@@ -220,8 +226,10 @@ cos_decode_string(struct pdf_doc *doc,
return NSPDFERROR_OK;
}
-
-nspdferror
+/**
+ * decode hex encoded string
+ */
+static nspdferror
cos_decode_hex_string(struct pdf_doc *doc,
uint64_t *offset_out,
struct cos_object **cosobj_out)
@@ -281,8 +289,11 @@ cos_decode_hex_string(struct pdf_doc *doc,
return NSPDFERROR_SYNTAX;
}
-
-int cos_decode_dictionary(struct pdf_doc *doc,
+/**
+ * decode a dictionary object
+ */
+static nspdferror
+cos_decode_dictionary(struct pdf_doc *doc,
uint64_t *offset_out,
struct cos_object **cosobj_out)
{
@@ -302,7 +313,7 @@ int cos_decode_dictionary(struct pdf_doc *doc,
offset += 2;
doc_skip_ws(doc, &offset);
- printf("found a dictionary\n");
+ //printf("found a dictionary\n");
cosobj = calloc(1, sizeof(struct cos_object));
if (cosobj == NULL) {
@@ -314,7 +325,7 @@ int cos_decode_dictionary(struct pdf_doc *doc,
(DOC_BYTE(doc, offset + 1) != '>')) {
res = cos_decode_object(doc, &offset, &key);
- if (res != 0) {
+ if (res != NSPDFERROR_OK) {
/* todo free up any dictionary entries already created */
printf("key object decode failed\n");
return res;
@@ -322,12 +333,12 @@ int cos_decode_dictionary(struct pdf_doc *doc,
if (key->type != COS_TYPE_NAME) {
/* key value pairs without a name */
printf("key was %d not a name %d\n", key->type, COS_TYPE_NAME);
- return -1; /* syntax error */
+ return NSPDFERROR_SYNTAX;
}
- printf("key: %s\n", key->u.n);
+ //printf("key: %s\n", key->u.n);
res = cos_decode_object(doc, &offset, &value);
- if (res != 0) {
+ if (res != NSPDFERROR_OK) {
printf("Unable to decode value object in dictionary\n");
/* todo free up any dictionary entries already created */
return res;
@@ -337,7 +348,7 @@ int cos_decode_dictionary(struct pdf_doc *doc,
entry = calloc(1, sizeof(struct cos_dictionary_entry));
if (entry == NULL) {
/* todo free up any dictionary entries already created */
- return -1; /* memory error */
+ return NSPDFERROR_NOMEM;
}
entry->key = key;
@@ -353,11 +364,13 @@ int cos_decode_dictionary(struct pdf_doc *doc,
*cosobj_out = cosobj;
*offset_out = offset;
- return 0;
+ return NSPDFERROR_OK;
}
-
-nspdferror
+/**
+ * decode a list
+ */
+static nspdferror
cos_decode_list(struct pdf_doc *doc,
uint64_t *offset_out,
struct cos_object **cosobj_out)
@@ -419,19 +432,19 @@ cos_decode_list(struct pdf_doc *doc,
*cosobj_out = cosobj;
*offset_out = offset;
- return 0;
+ return NSPDFERROR_OK;
}
-#define NAME_MAX_LENGTH 127
/**
* decode a name object
*
* \todo deal with # symbols on pdf versions 1.2 and later
*/
-int cos_decode_name(struct pdf_doc *doc,
- uint64_t *offset_out,
- struct cos_object **cosobj_out)
+static nspdferror
+cos_decode_name(struct pdf_doc *doc,
+ uint64_t *offset_out,
+ struct cos_object **cosobj_out)
{
uint64_t offset;
struct cos_object *cosobj;
@@ -445,7 +458,7 @@ int cos_decode_name(struct pdf_doc *doc,
if (c != '/') {
return -1; /* names must be prefixed with a / */
}
- printf("found a name\n");
+ //printf("found a name\n");
c = DOC_BYTE(doc, offset);
while ((idx <= NAME_MAX_LENGTH) &&
@@ -468,7 +481,7 @@ int cos_decode_name(struct pdf_doc *doc,
cosobj = calloc(1, sizeof(struct cos_object));
if (cosobj == NULL) {
- return -1; /* memory error */
+ return NSPDFERROR_NOMEM; /* memory error */
}
cosobj->type = COS_TYPE_NAME;
@@ -478,13 +491,16 @@ int cos_decode_name(struct pdf_doc *doc,
*offset_out = offset;
- return 0;
+ return NSPDFERROR_OK;
}
-
-int cos_decode_boolean(struct pdf_doc *doc,
- uint64_t *offset_out,
- struct cos_object **cosobj_out)
+/**
+ * decode a cos boolean object
+ */
+static int
+cos_decode_boolean(struct pdf_doc *doc,
+ uint64_t *offset_out,
+ struct cos_object **cosobj_out)
{
uint64_t offset;
struct cos_object *cosobj;
@@ -541,7 +557,7 @@ int cos_decode_boolean(struct pdf_doc *doc,
cosobj = calloc(1, sizeof(struct cos_object));
if (cosobj == NULL) {
- return -1; /* memory error */
+ return NSPDFERROR_NOMEM; /* memory error */
}
cosobj->type = COS_TYPE_BOOL;
@@ -551,13 +567,16 @@ int cos_decode_boolean(struct pdf_doc *doc,
*offset_out = offset;
- return 0;
-
+ return NSPDFERROR_OK;
}
-int cos_decode_null(struct pdf_doc *doc,
- uint64_t *offset_out,
- struct cos_object **cosobj_out)
+/**
+ * decode the null object.
+ */
+static nspdferror
+cos_decode_null(struct pdf_doc *doc,
+ uint64_t *offset_out,
+ struct cos_object **cosobj_out)
{
uint64_t offset;
struct cos_object *cosobj;
@@ -569,14 +588,17 @@ int cos_decode_null(struct pdf_doc *doc,
if ((c != 'n') && (c != 'N')) {
return -1; /* syntax error */
}
+
c = DOC_BYTE(doc, offset++);
if ((c != 'u') && (c != 'U')) {
return -1; /* syntax error */
}
+
c = DOC_BYTE(doc, offset++);
if ((c != 'l') && (c != 'L')) {
return -1; /* syntax error */
}
+
c = DOC_BYTE(doc, offset++);
if ((c != 'l') && (c != 'L')) {
return -1; /* syntax error */
@@ -586,15 +608,16 @@ int cos_decode_null(struct pdf_doc *doc,
cosobj = calloc(1, sizeof(struct cos_object));
if (cosobj == NULL) {
- return -1; /* memory error */
+ return NSPDFERROR_NOMEM;
}
cosobj->type = COS_TYPE_NULL;
*offset_out = offset;
- return 0;
+ return NSPDFERROR_OK;
}
+
/**
* attempt to decode the stream into a reference
*
@@ -607,9 +630,10 @@ int cos_decode_null(struct pdf_doc *doc,
* \param cosobj_out the object to return into, on input contains the first
* integer
*/
-int cos_attempt_decode_reference(struct pdf_doc *doc,
- uint64_t *offset_out,
- struct cos_object **cosobj_out)
+static nspdferror
+cos_attempt_decode_reference(struct pdf_doc *doc,
+ uint64_t *offset_out,
+ struct cos_object **cosobj_out)
{
uint64_t offset;
struct cos_object *cosobj; /* possible generation object */
@@ -621,13 +645,14 @@ int cos_attempt_decode_reference(struct pdf_doc *doc,
res = cos_decode_number(doc, &offset, &cosobj);
if (res != 0) {
- return 0; /* no error if object could not be decoded */
+ /* no error if next token could not be decoded as a number */
+ return NSPDFERROR_OK;
}
if (cosobj->type != COS_TYPE_INT) {
/* next object was not an integer so not a reference */
cos_free_object(cosobj);
- return 0;
+ return NSPDFERROR_OK;
}
if (cosobj->u.i < 0) {
@@ -635,8 +660,7 @@ int cos_attempt_decode_reference(struct pdf_doc *doc,
* non-negative
*/
cos_free_object(cosobj);
- return 0;
-
+ return NSPDFERROR_OK;
}
/* two int in a row, look for the R */
@@ -644,18 +668,18 @@ int cos_attempt_decode_reference(struct pdf_doc *doc,
if (c != 'R') {
/* no R so not a reference */
cos_free_object(cosobj);
- return 0;
+ return NSPDFERROR_OK;
}
/* found reference */
- printf("found reference\n");
+ //printf("found reference\n");
doc_skip_ws(doc, &offset);
nref = calloc(1, sizeof(struct cos_reference));
if (nref == NULL) {
- /* todo free objects */
- return -1; /* memory error */
+ /** \todo free objects */
+ return NSPDFERROR_NOMEM; /* memory error */
}
nref->id = (*cosobj_out)->u.i;
@@ -670,10 +694,11 @@ int cos_attempt_decode_reference(struct pdf_doc *doc,
*offset_out = offset;
- return 0;
+ return NSPDFERROR_OK;
}
-/**
+
+/*
* Decode input stream into an object
*
* lex and parse a byte stream to generate COS objects
@@ -704,7 +729,8 @@ int cos_attempt_decode_reference(struct pdf_doc *doc,
* TOK_STRING |
* list |
* dictionary |
- * object_reference;
+ * object_reference |
+ * indirect_object;
*
* list:
* '[' listargs ']';
@@ -717,13 +743,20 @@ int cos_attempt_decode_reference(struct pdf_doc *doc,
*
* object_reference:
* TOK_UINT TOK_UINT 'R';
+ *
+ * indirect_object:
+ * TOK_UINT TOK_UINT 'obj' cos_object 'endobj'
+ * |
+ * TOK_UINT TOK_UINT 'obj' dictionary 'stream' streamdata 'endstream' 'endobj'
+ * ;
*/
-int cos_decode_object(struct pdf_doc *doc,
- uint64_t *offset_out,
- struct cos_object **cosobj_out)
+nspdferror
+cos_decode_object(struct pdf_doc *doc,
+ uint64_t *offset_out,
+ struct cos_object **cosobj_out)
{
uint64_t offset;
- int res;
+ nspdferror res;
struct cos_object *cosobj;
offset = *offset_out;
@@ -786,11 +819,10 @@ int cos_decode_object(struct pdf_doc *doc,
break;
default:
- res = -1; /* syntax error */
+ res = NSPDFERROR_SYNTAX; /* syntax error */
}
-
- if (res == 0) {
+ if (res == NSPDFERROR_OK) {
*cosobj_out = cosobj;
*offset_out = offset;
}
diff --git a/src/cos_object.c b/src/cos_object.c
index 96c669e..f4cd4fd 100644
--- a/src/cos_object.c
+++ b/src/cos_object.c
@@ -7,6 +7,7 @@
#include "nspdferror.h"
#include "cos_object.h"
+#include "pdf_doc.h"
nspdferror cos_free_object(struct cos_object *cos_obj)
@@ -115,25 +116,38 @@ cos_dictionary_extract_value(struct cos_object *dict,
return NSPDFERROR_NOTFOUND;
}
-nspdferror cos_get_int(struct cos_object *cobj, int64_t *value_out)
+nspdferror
+cos_get_int(struct pdf_doc *doc,
+ struct cos_object *cobj,
+ int64_t *value_out)
{
- if (cobj->type != COS_TYPE_INT) {
- return NSPDFERROR_TYPE;
+ nspdferror res;
+
+ res = xref_get_referenced(doc, &cobj);
+ if (res == NSPDFERROR_OK) {
+ if (cobj->type != COS_TYPE_INT) {
+ res = NSPDFERROR_TYPE;
+ } else {
+ *value_out = cobj->u.i;
+ }
}
- *value_out = cobj->u.i;
- return NSPDFERROR_OK;
+ return res;
}
nspdferror
-cos_get_dictionary(struct cos_object *cobj,
+cos_get_dictionary(struct pdf_doc *doc,
+ struct cos_object *cobj,
struct cos_object **value_out)
{
- if (cobj->type == COS_TYPE_REFERENCE) {
-
- }
- if (cobj->type != COS_TYPE_DICTIONARY) {
- return NSPDFERROR_TYPE;
+ nspdferror res;
+
+ res = xref_get_referenced(doc, &cobj);
+ if (res == NSPDFERROR_OK) {
+ if (cobj->type != COS_TYPE_DICTIONARY) {
+ res = NSPDFERROR_TYPE;
+ } else {
+ *value_out = cobj;
+ }
}
- *value_out = cobj;
- return NSPDFERROR_OK;
+ return res;
}
diff --git a/src/cos_object.h b/src/cos_object.h
index 65b3ed5..8d1449d 100644
--- a/src/cos_object.h
+++ b/src/cos_object.h
@@ -83,7 +83,12 @@ struct cos_object {
} u;
};
-int cos_decode_object(struct pdf_doc *doc, uint64_t *offset_out, struct cos_object **cosobj_out);
+/**
+ * Decode input stream into an object
+ *
+ * lex and parse a byte stream to generate a COS object.
+ */
+nspdferror cos_decode_object(struct pdf_doc *doc, uint64_t *offset_out, struct cos_object **cosobj_out);
nspdferror cos_free_object(struct cos_object *cos_obj);
@@ -91,8 +96,9 @@ nspdferror cos_dictionary_get_value(struct cos_object *dict, const char *key, st
nspdferror cos_dictionary_extract_value(struct cos_object *dict, const char *key, struct cos_object **value_out);
-nspdferror cos_get_int(struct cos_object *cobj, int64_t *value_out);
-nspdferror cos_get_dictionary(struct cos_object *cobj, struct cos_object **value_out);
+nspdferror cos_get_int(struct pdf_doc *doc, struct cos_object *cobj, int64_t *value_out);
+
+nspdferror cos_get_dictionary(struct pdf_doc *doc, struct cos_object *cobj, struct cos_object **value_out);
diff --git a/src/pdf_doc.c b/src/pdf_doc.c
index 9b92bd0..dd31b72 100644
--- a/src/pdf_doc.c
+++ b/src/pdf_doc.c
@@ -1,4 +1,3 @@
-
#include <stdint.h>
#include <stddef.h>
#include <stdbool.h>
@@ -11,7 +10,7 @@
/**
* move offset to next non whitespace byte
*/
-int doc_skip_ws(struct pdf_doc *doc, uint64_t *offset)
+nspdferror doc_skip_ws(struct pdf_doc *doc, uint64_t *offset)
{
uint8_t c;
/* TODO sort out keeping offset in range */
@@ -28,13 +27,13 @@ int doc_skip_ws(struct pdf_doc *doc, uint64_t *offset)
}
c = DOC_BYTE(doc, *offset);
}
- return 0;
+ return NSPDFERROR_OK;
}
/**
* move offset to next non eol byte
*/
-int doc_skip_eol(struct pdf_doc *doc, uint64_t *offset)
+nspdferror doc_skip_eol(struct pdf_doc *doc, uint64_t *offset)
{
uint8_t c;
/* TODO sort out keeping offset in range */
@@ -43,5 +42,53 @@ int doc_skip_eol(struct pdf_doc *doc, uint64_t *offset)
(*offset)++;
c = DOC_BYTE(doc, *offset);
}
- return 0;
+ return NSPDFERROR_OK;
+}
+
+static struct cos_object cos_null_obj = {
+ .type = COS_TYPE_NULL,
+};
+
+nspdferror
+xref_get_referenced(struct pdf_doc *doc, struct cos_object **cobj_out)
+{
+ nspdferror res;
+ struct cos_object *cobj;
+ struct cos_object *indirect;
+ uint64_t offset;
+ struct xref_table_entry *entry;
+
+ cobj = *cobj_out;
+
+ if (cobj->type != COS_TYPE_REFERENCE) {
+ /* not passed a reference object so just return what was passed */
+ return NSPDFERROR_OK;
+ }
+
+ entry = doc->xref_table + cobj->u.reference->id;
+
+ /* check if referenced object is in range and exists. return null object if
+ * not
+ */
+ if ((cobj->u.reference->id >= doc->xref_size) ||
+ (cobj->u.reference->id == 0) ||
+ (entry->ref.id == 0)) {
+ *cobj_out = &cos_null_obj;
+ return NSPDFERROR_OK;
+ }
+
+ if (entry->object == NULL) {
+ /* indirect object has never been decoded */
+ offset = entry->offset;
+ res = cos_decode_object(doc, &offset, &indirect);
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+
+ entry->object = indirect;
+ }
+
+ cobj = entry->object;
+
+ return NSPDFERROR_OK;
}
diff --git a/src/pdf_doc.h b/src/pdf_doc.h
index 696c121..b37e3b2 100644
--- a/src/pdf_doc.h
+++ b/src/pdf_doc.h
@@ -7,7 +7,7 @@ struct xref_table_entry {
uint64_t offset;
/* indirect object if already decoded */
- struct cos_object *o;
+ struct cos_object *object;
};
@@ -38,5 +38,7 @@ struct pdf_doc {
/* byte data acessory, allows for more complex buffer handling in future */
#define DOC_BYTE(doc, offset) (doc->start[(offset)])
-int doc_skip_ws(struct pdf_doc *doc, uint64_t *offset);
-int doc_skip_eol(struct pdf_doc *doc, uint64_t *offset);
+nspdferror doc_skip_ws(struct pdf_doc *doc, uint64_t *offset);
+nspdferror doc_skip_eol(struct pdf_doc *doc, uint64_t *offset);
+
+nspdferror xref_get_referenced(struct pdf_doc *doc, struct cos_object **cobj_out);
diff --git a/src/xref.c b/src/xref.c
index 5e5ac8b..b1748be 100644
--- a/src/xref.c
+++ b/src/xref.c
@@ -225,10 +225,6 @@ int check_header(struct pdf_doc *doc)
}
-
-
-
-
nspdferror
decode_trailer(struct pdf_doc *doc,
uint64_t *offset_out,
@@ -407,7 +403,7 @@ nspdferror decode_xref_trailer(struct pdf_doc *doc, uint64_t xref_offset)
goto decode_xref_trailer_failed;
}
- res = cos_get_int(cobj_size, &size);
+ res = cos_get_int(doc, cobj_size, &size);
if (res != NSPDFERROR_OK) {
printf("trailer Size not int\n");
goto decode_xref_trailer_failed;
@@ -446,7 +442,7 @@ nspdferror decode_xref_trailer(struct pdf_doc *doc, uint64_t xref_offset)
/* check for prev ID key in trailer and recurse call if present */
res = cos_dictionary_get_value(trailer, "Prev", &cobj_prev);
if (res == NSPDFERROR_OK) {
- res = cos_get_int(cobj_prev, &prev);
+ res = cos_get_int(doc, cobj_prev, &prev);
if (res != NSPDFERROR_OK) {
printf("trailer Prev not int\n");
goto decode_xref_trailer_failed;
@@ -522,7 +518,7 @@ nspdferror decode_catalog(struct pdf_doc *doc)
nspdferror res;
struct cos_object *catalog;
- res = cos_get_dictionary(doc->root, &catalog);
+ res = cos_get_dictionary(doc, doc->root, &catalog);
return res;
}
commitdiff http://git.netsurf-browser.org/libnspdf.git/commit/?id=897a8900bf77db2d80...
commit 897a8900bf77db2d804b0f78bc4b41371e05347f
Author: Vincent Sanders <vince(a)kyllikki.org>
Commit: Vincent Sanders <vince(a)kyllikki.org>
split code up a bit
diff --git a/src/Makefile b/src/Makefile
index f9ca22c..af806f3 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -2,11 +2,13 @@
CFLAGS+=-g -Wall -Wextra
+OBJS=xref.o byte_class.o cos_decode.o cos_object.o pdf_doc.o
+
.PHONY:all clean
all:xref
-xref:xref.o byte_class.o
+xref:$(OBJS)
clean:
- ${RM} xref xref.o
+ ${RM} xref $(OBJS)
diff --git a/src/cos_decode.c b/src/cos_decode.c
new file mode 100644
index 0000000..3936e05
--- /dev/null
+++ b/src/cos_decode.c
@@ -0,0 +1,799 @@
+#include <stdint.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "byte_class.h"
+#include "nspdferror.h"
+#include "cos_object.h"
+#include "pdf_doc.h"
+
+#define COS_STRING_ALLOC 32
+
+nspdferror
+cos_string_append(struct cos_string *s, uint8_t c)
+{
+ //printf("appending 0x%x to %p len %d alloc %d\n", c, s->data, s->length, s->alloc);
+ if (s->length == s->alloc) {
+ uint8_t *ns;
+ ns = realloc(s->data, s->alloc + COS_STRING_ALLOC);
+ if (ns == NULL) {
+ return NSPDFERROR_NOMEM;
+ }
+ s->data = ns;
+ s->alloc += COS_STRING_ALLOC;
+ }
+ s->data[s->length++] = c;
+ return NSPDFERROR_OK;
+}
+
+uint8_t xtoi(uint8_t x)
+{
+ if (x >= '0' && x <= '9') {
+ x = x - '0';
+ } else if (x >= 'a' && x <='f') {
+ x = x - 'a' + 10;
+ } else if (x >= 'A' && x <='F') {
+ x = x - 'A' + 10;
+ }
+ return x;
+}
+
+int cos_decode_number(struct pdf_doc *doc,
+ uint64_t *offset_out,
+ struct cos_object **cosobj_out)
+{
+ struct cos_object *cosobj;
+ uint8_t c; /* current byte from source data */
+ unsigned int len; /* number of decimal places in number */
+ uint8_t num[21]; /* temporary buffer for decimal values */
+ uint64_t offset; /* current offset of source data */
+
+ offset = *offset_out;
+
+ for (len = 0; len < sizeof(num); len++) {
+ c = DOC_BYTE(doc, offset);
+ if ((bclass[c] & BC_DCML) != BC_DCML) {
+ int64_t result = 0; /* parsed result */
+ uint64_t tens;
+
+ if (len == 0) {
+ return -2; /* parse error no decimals in input */
+ }
+ /* sum value from each place */
+ for (tens = 1; len > 0; tens = tens * 10, len--) {
+ result += (num[len - 1] * tens);
+ }
+
+ doc_skip_ws(doc, &offset);
+
+ cosobj = calloc(1, sizeof(struct cos_object));
+ if (cosobj == NULL) {
+ return -1; /* memory error */
+ }
+
+ cosobj->type = COS_TYPE_INT;
+ cosobj->u.i = result;
+
+ *cosobj_out = cosobj;
+
+ *offset_out = offset;
+
+ return 0;
+ }
+ num[len] = c - '0';
+ offset++;
+ }
+ return -1; /* number too long */
+}
+
+
+/**
+ * literal string processing
+ *
+ */
+nspdferror
+cos_decode_string(struct pdf_doc *doc,
+ uint64_t *offset_out,
+ struct cos_object **cosobj_out)
+{
+ uint64_t offset;
+ struct cos_object *cosobj;
+ uint8_t c;
+ unsigned int pdepth = 1; /* depth of open parens */
+ struct cos_string *cstring;
+
+ offset = *offset_out;
+
+ c = DOC_BYTE(doc, offset++);
+ if (c != '(') {
+ return NSPDFERROR_SYNTAX;
+ }
+
+ cstring = calloc(1, sizeof(*cstring));
+ if (cstring == NULL) {
+ return NSPDFERROR_NOMEM;
+ }
+
+ cosobj = calloc(1, sizeof(*cosobj));
+ if (cosobj == NULL) {
+ return NSPDFERROR_NOMEM;
+ }
+ cosobj->type = COS_TYPE_STRING;
+ cosobj->u.s = cstring;
+
+ while (pdepth > 0) {
+ c = DOC_BYTE(doc, offset++);
+
+ if (c == ')') {
+ pdepth--;
+ if (pdepth == 0) {
+ break;
+ }
+ } else if (c == '(') {
+ pdepth++;
+ } else if ((bclass[c] & BC_EOLM ) != 0) {
+ /* unescaped end of line characters are translated to a single
+ * newline
+ */
+ c = DOC_BYTE(doc, offset);
+ while ((bclass[c] & BC_EOLM) != 0) {
+ offset++;
+ c = DOC_BYTE(doc, offset);
+ }
+ c = '\n';
+ } else if (c == '\\') {
+ /* escaped chars */
+ c = DOC_BYTE(doc, offset++);
+ switch (c) {
+ case 'n':
+ c = '\n';
+ break;
+
+ case 'r':
+ c = '\r';
+ break;
+
+ case 't':
+ c = '\t';
+ break;
+
+ case 'b':
+ c = '\b';
+ break;
+
+ case 'f':
+ c = '\f';
+ break;
+
+ case '(':
+ c = '(';
+ break;
+
+ case ')':
+ c = ')';
+ break;
+
+ case '\\':
+ c = '\\';
+ break;
+
+ default:
+
+ if ((bclass[c] & BC_EOLM) != 0) {
+ /* escaped end of line, swallow it */
+ c = DOC_BYTE(doc, offset++);
+ while ((bclass[c] & BC_EOLM) != 0) {
+ c = DOC_BYTE(doc, offset++);
+ }
+ } else if ((bclass[c] & BC_OCTL) != 0) {
+ /* octal value */
+ uint8_t val;
+ val = (c - '0');
+ c = DOC_BYTE(doc, offset);
+ if ((bclass[c] & BC_OCTL) != 0) {
+ offset++;
+ val = (val << 3) | (c - '0');
+ c = DOC_BYTE(doc, offset);
+ if ((bclass[c] & BC_OCTL) != 0) {
+ offset++;
+ val = (val << 3) | (c - '0');
+ c = val;
+ }
+ }
+ } /* else invalid (skip backslash) */
+ break;
+ }
+ }
+
+ /* c contains the character to add to the string */
+ cos_string_append(cstring, c);
+ }
+
+ doc_skip_ws(doc, &offset);
+
+ *cosobj_out = cosobj;
+ *offset_out = offset;
+
+ return NSPDFERROR_OK;
+}
+
+
+nspdferror
+cos_decode_hex_string(struct pdf_doc *doc,
+ uint64_t *offset_out,
+ struct cos_object **cosobj_out)
+{
+ uint64_t offset;
+ struct cos_object *cosobj;
+ uint8_t c;
+ uint8_t value = 0;
+ struct cos_string *cstring;
+ bool first = true;
+
+ offset = *offset_out;
+
+ c = DOC_BYTE(doc, offset++);
+ if (c != '<') {
+ return NSPDFERROR_SYNTAX;
+ }
+
+ cstring = calloc(1, sizeof(*cstring));
+ if (cstring == NULL) {
+ return NSPDFERROR_NOMEM;
+ }
+
+ cosobj = calloc(1, sizeof(*cosobj));
+ if (cosobj == NULL) {
+ return NSPDFERROR_NOMEM;
+ }
+ cosobj->type = COS_TYPE_STRING;
+ cosobj->u.s = cstring;
+
+ for (; offset < doc->length; offset++) {
+ c = DOC_BYTE(doc, offset);
+ if (c == '>') {
+ if (first == false) {
+ cos_string_append(cstring, value);
+ }
+ offset++;
+ doc_skip_ws(doc, &offset);
+
+ *cosobj_out = cosobj;
+ *offset_out = offset;
+
+ return NSPDFERROR_OK;
+ } else if ((bclass[c] & BC_HEXL) != 0) {
+ if (first) {
+ value = xtoi(c) << 4;
+ first = false;
+ } else {
+ value |= xtoi(c);
+ first = true;
+ cos_string_append(cstring, value);
+ }
+ } else if ((bclass[c] & BC_WSPC) == 0) {
+ break; /* unknown byte value in string */
+ }
+ }
+ return NSPDFERROR_SYNTAX;
+}
+
+
+int cos_decode_dictionary(struct pdf_doc *doc,
+ uint64_t *offset_out,
+ struct cos_object **cosobj_out)
+{
+ uint64_t offset;
+ struct cos_object *cosobj;
+ struct cos_dictionary_entry *entry;
+ struct cos_object *key;
+ struct cos_object *value;
+ int res;
+
+ offset = *offset_out;
+
+ if ((DOC_BYTE(doc, offset) != '<') ||
+ (DOC_BYTE(doc, offset + 1) != '<')) {
+ return -1; /* syntax error */
+ }
+ offset += 2;
+ doc_skip_ws(doc, &offset);
+
+ printf("found a dictionary\n");
+
+ cosobj = calloc(1, sizeof(struct cos_object));
+ if (cosobj == NULL) {
+ return -1; /* memory error */
+ }
+ cosobj->type = COS_TYPE_DICTIONARY;
+
+ while ((DOC_BYTE(doc, offset) != '>') &&
+ (DOC_BYTE(doc, offset + 1) != '>')) {
+
+ res = cos_decode_object(doc, &offset, &key);
+ if (res != 0) {
+ /* todo free up any dictionary entries already created */
+ printf("key object decode failed\n");
+ return res;
+ }
+ if (key->type != COS_TYPE_NAME) {
+ /* key value pairs without a name */
+ printf("key was %d not a name %d\n", key->type, COS_TYPE_NAME);
+ return -1; /* syntax error */
+ }
+ printf("key: %s\n", key->u.n);
+
+ res = cos_decode_object(doc, &offset, &value);
+ if (res != 0) {
+ printf("Unable to decode value object in dictionary\n");
+ /* todo free up any dictionary entries already created */
+ return res;
+ }
+
+ /* add dictionary entry */
+ entry = calloc(1, sizeof(struct cos_dictionary_entry));
+ if (entry == NULL) {
+ /* todo free up any dictionary entries already created */
+ return -1; /* memory error */
+ }
+
+ entry->key = key;
+ entry->value = value;
+ entry->next = cosobj->u.dictionary;
+
+ cosobj->u.dictionary = entry;
+
+ }
+ offset += 2; /* skip closing >> */
+ doc_skip_ws(doc, &offset);
+
+ *cosobj_out = cosobj;
+ *offset_out = offset;
+
+ return 0;
+}
+
+
+nspdferror
+cos_decode_list(struct pdf_doc *doc,
+ uint64_t *offset_out,
+ struct cos_object **cosobj_out)
+{
+ uint64_t offset;
+ struct cos_object *cosobj;
+ struct cos_array_entry *entry;
+ struct cos_object *value;
+ nspdferror res;
+
+ offset = *offset_out;
+
+ /* sanity check first token is list open */
+ if (DOC_BYTE(doc, offset) != '[') {
+ printf("not a [\n");
+ return NSPDFERROR_SYNTAX; /* syntax error */
+ }
+ offset++;
+
+ /* advance offset to next token */
+ res = doc_skip_ws(doc, &offset);
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+
+ printf("found a list\n");
+
+ cosobj = calloc(1, sizeof(struct cos_object));
+ if (cosobj == NULL) {
+ return NSPDFERROR_NOMEM;
+ }
+ cosobj->type = COS_TYPE_ARRAY;
+
+ while (DOC_BYTE(doc, offset) != ']') {
+
+ res = cos_decode_object(doc, &offset, &value);
+ if (res != NSPDFERROR_OK) {
+ cos_free_object(cosobj);
+ printf("Unable to decode value object in list\n");
+ return res;
+ }
+
+ /* add entry to array */
+ entry = calloc(1, sizeof(struct cos_array_entry));
+ if (entry == NULL) {
+ cos_free_object(cosobj);
+ return NSPDFERROR_NOMEM;
+ }
+
+ entry->value = value;
+ entry->next = cosobj->u.array;
+
+ cosobj->u.array = entry;
+ }
+ offset++; /* skip closing ] */
+
+ doc_skip_ws(doc, &offset);
+
+ *cosobj_out = cosobj;
+ *offset_out = offset;
+
+ return 0;
+}
+
+#define NAME_MAX_LENGTH 127
+
+/**
+ * decode a name object
+ *
+ * \todo deal with # symbols on pdf versions 1.2 and later
+ */
+int cos_decode_name(struct pdf_doc *doc,
+ uint64_t *offset_out,
+ struct cos_object **cosobj_out)
+{
+ uint64_t offset;
+ struct cos_object *cosobj;
+ uint8_t c;
+ char name[NAME_MAX_LENGTH + 1];
+ int idx = 0;
+
+ offset = *offset_out;
+
+ c = DOC_BYTE(doc, offset++);
+ if (c != '/') {
+ return -1; /* names must be prefixed with a / */
+ }
+ printf("found a name\n");
+
+ c = DOC_BYTE(doc, offset);
+ while ((idx <= NAME_MAX_LENGTH) &&
+ ((bclass[c] & (BC_WSPC | BC_DELM)) == 0)) {
+ offset++;
+ //printf("%c", c);
+ name[idx++] = c;
+ c = DOC_BYTE(doc, offset);
+ }
+ //printf("\nidx: %d\n", idx);
+ if (idx > NAME_MAX_LENGTH) {
+ /* name length exceeded implementation limit */
+ return -1;
+ }
+ name[idx] = 0;
+
+ //printf("name: %s\n", name);
+
+ doc_skip_ws(doc, &offset);
+
+ cosobj = calloc(1, sizeof(struct cos_object));
+ if (cosobj == NULL) {
+ return -1; /* memory error */
+ }
+
+ cosobj->type = COS_TYPE_NAME;
+ cosobj->u.n = strdup(name);
+
+ *cosobj_out = cosobj;
+
+ *offset_out = offset;
+
+ return 0;
+}
+
+
+int cos_decode_boolean(struct pdf_doc *doc,
+ uint64_t *offset_out,
+ struct cos_object **cosobj_out)
+{
+ uint64_t offset;
+ struct cos_object *cosobj;
+ uint8_t c;
+ bool value;
+
+ offset = *offset_out;
+
+ c = DOC_BYTE(doc, offset++);
+ if ((c == 't') || (c == 'T')) {
+ /* true branch */
+
+ c = DOC_BYTE(doc, offset++);
+ if ((c != 'r') && (c != 'R')) {
+ return -1; /* syntax error */
+ }
+ c = DOC_BYTE(doc, offset++);
+ if ((c != 'u') && (c != 'U')) {
+ return -1; /* syntax error */
+ }
+ c = DOC_BYTE(doc, offset++);
+ if ((c != 'e') && (c != 'E')) {
+ return -1; /* syntax error */
+ }
+ value = true;
+
+ } else if ((c == 'f') || (c == 'F')) {
+ /* false branch */
+
+ c = DOC_BYTE(doc, offset++);
+ if ((c != 'a') && (c != 'A')) {
+ return -1; /* syntax error */
+ }
+ c = DOC_BYTE(doc, offset++);
+ if ((c != 'l') && (c != 'L')) {
+ return -1; /* syntax error */
+ }
+ c = DOC_BYTE(doc, offset++);
+ if ((c != 's') && (c != 'S')) {
+ return -1; /* syntax error */
+ }
+ c = DOC_BYTE(doc, offset++);
+ if ((c != 'e') && (c != 'E')) {
+ return -1; /* syntax error */
+ }
+
+ value = false;
+
+ } else {
+ return -1; /* syntax error */
+ }
+
+ doc_skip_ws(doc, &offset);
+
+ cosobj = calloc(1, sizeof(struct cos_object));
+ if (cosobj == NULL) {
+ return -1; /* memory error */
+ }
+
+ cosobj->type = COS_TYPE_BOOL;
+ cosobj->u.b = value;
+
+ *cosobj_out = cosobj;
+
+ *offset_out = offset;
+
+ return 0;
+
+}
+
+int cos_decode_null(struct pdf_doc *doc,
+ uint64_t *offset_out,
+ struct cos_object **cosobj_out)
+{
+ uint64_t offset;
+ struct cos_object *cosobj;
+ uint8_t c;
+
+ offset = *offset_out;
+
+ c = DOC_BYTE(doc, offset++);
+ if ((c != 'n') && (c != 'N')) {
+ return -1; /* syntax error */
+ }
+ c = DOC_BYTE(doc, offset++);
+ if ((c != 'u') && (c != 'U')) {
+ return -1; /* syntax error */
+ }
+ c = DOC_BYTE(doc, offset++);
+ if ((c != 'l') && (c != 'L')) {
+ return -1; /* syntax error */
+ }
+ c = DOC_BYTE(doc, offset++);
+ if ((c != 'l') && (c != 'L')) {
+ return -1; /* syntax error */
+ }
+
+ doc_skip_ws(doc, &offset);
+
+ cosobj = calloc(1, sizeof(struct cos_object));
+ if (cosobj == NULL) {
+ return -1; /* memory error */
+ }
+
+ cosobj->type = COS_TYPE_NULL;
+ *offset_out = offset;
+
+ return 0;
+}
+
+/**
+ * attempt to decode the stream into a reference
+ *
+ * The stream has already had a positive integer decoded from it. if another
+ * positive integer follows and a R character after that it is a reference,
+ * otherwise bail, but not finding a ref is not an error!
+ *
+ * \param doc the pdf document
+ * \param offset_out offset of current cursor in stream
+ * \param cosobj_out the object to return into, on input contains the first
+ * integer
+ */
+int cos_attempt_decode_reference(struct pdf_doc *doc,
+ uint64_t *offset_out,
+ struct cos_object **cosobj_out)
+{
+ uint64_t offset;
+ struct cos_object *cosobj; /* possible generation object */
+ uint8_t c;
+ int res;
+ struct cos_reference *nref; /* new reference */
+
+ offset = *offset_out;
+
+ res = cos_decode_number(doc, &offset, &cosobj);
+ if (res != 0) {
+ return 0; /* no error if object could not be decoded */
+ }
+
+ if (cosobj->type != COS_TYPE_INT) {
+ /* next object was not an integer so not a reference */
+ cos_free_object(cosobj);
+ return 0;
+ }
+
+ if (cosobj->u.i < 0) {
+ /* integer was negative so not a reference (generations must be
+ * non-negative
+ */
+ cos_free_object(cosobj);
+ return 0;
+
+ }
+
+ /* two int in a row, look for the R */
+ c = DOC_BYTE(doc, offset++);
+ if (c != 'R') {
+ /* no R so not a reference */
+ cos_free_object(cosobj);
+ return 0;
+ }
+
+ /* found reference */
+
+ printf("found reference\n");
+ doc_skip_ws(doc, &offset);
+
+ nref = calloc(1, sizeof(struct cos_reference));
+ if (nref == NULL) {
+ /* todo free objects */
+ return -1; /* memory error */
+ }
+
+ nref->id = (*cosobj_out)->u.i;
+ nref->generation = cosobj->u.i;
+
+ cos_free_object(*cosobj_out);
+
+ cosobj->type = COS_TYPE_REFERENCE;
+ cosobj->u.reference = nref;
+
+ *cosobj_out = cosobj;
+
+ *offset_out = offset;
+
+ return 0;
+}
+
+/**
+ * Decode input stream into an object
+ *
+ * lex and parse a byte stream to generate COS objects
+ *
+ * lexing the input.
+ * check first character:
+ *
+ * < either a hex string or a dictionary
+ * second char < means dictionary else hex string
+ * - either an integer or real
+ * + either an integer or real
+ * 0-9 an integer, unsigned integer or real
+ * . a real number
+ * ( a string
+ * / a name
+ * [ a list
+ * t|T boolean true
+ * f|F boolean false
+ * n|N null
+ *
+ * Grammar is:
+ * cos_object:
+ * TOK_NULL |
+ * TOK_BOOLEAN |
+ * TOK_INT |
+ * TOK_REAL |
+ * TOK_NAME |
+ * TOK_STRING |
+ * list |
+ * dictionary |
+ * object_reference;
+ *
+ * list:
+ * '[' listargs ']';
+ *
+ * listargs:
+ * cos_object
+ * |
+ * listargs cos_object
+ * ;
+ *
+ * object_reference:
+ * TOK_UINT TOK_UINT 'R';
+ */
+int cos_decode_object(struct pdf_doc *doc,
+ uint64_t *offset_out,
+ struct cos_object **cosobj_out)
+{
+ uint64_t offset;
+ int res;
+ struct cos_object *cosobj;
+
+ offset = *offset_out;
+
+ /* object could be any type use first char to try and select */
+ switch (DOC_BYTE(doc, offset)) {
+
+ case '-':
+ case '+':
+ case '.':
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ res = cos_decode_number(doc, &offset, &cosobj);
+ /* if type is positive integer try to check for reference */
+ if ((res == 0) &&
+ (cosobj->type == COS_TYPE_INT) &&
+ (cosobj->u.i > 0)) {
+ res = cos_attempt_decode_reference(doc, &offset, &cosobj);
+ }
+ break;
+
+ case '<':
+ if (DOC_BYTE(doc, offset + 1) == '<') {
+ res = cos_decode_dictionary(doc, &offset, &cosobj);
+ } else {
+ res = cos_decode_hex_string(doc, &offset, &cosobj);
+ }
+ break;
+
+ case '(':
+ res = cos_decode_string(doc, &offset, &cosobj);
+ break;
+
+ case '/':
+ res = cos_decode_name(doc, &offset, &cosobj);
+ break;
+
+ case '[':
+ res = cos_decode_list(doc, &offset, &cosobj);
+ break;
+
+ case 't':
+ case 'T':
+ case 'f':
+ case 'F':
+ res = cos_decode_boolean(doc, &offset, &cosobj);
+ break;
+
+ case 'n':
+ case 'N':
+ res = cos_decode_null(doc, &offset, &cosobj);
+ break;
+
+ default:
+ res = -1; /* syntax error */
+ }
+
+
+ if (res == 0) {
+ *cosobj_out = cosobj;
+ *offset_out = offset;
+ }
+
+ return res;
+}
diff --git a/src/cos_object.c b/src/cos_object.c
new file mode 100644
index 0000000..96c669e
--- /dev/null
+++ b/src/cos_object.c
@@ -0,0 +1,139 @@
+#include <stdint.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "nspdferror.h"
+#include "cos_object.h"
+
+
+nspdferror cos_free_object(struct cos_object *cos_obj)
+{
+ struct cos_dictionary_entry *dentry;
+ struct cos_array_entry *aentry;
+
+ switch (cos_obj->type) {
+ case COS_TYPE_NAME:
+ free(cos_obj->u.n);
+ break;
+
+ case COS_TYPE_STRING:
+ free(cos_obj->u.s->data);
+ free(cos_obj->u.s);
+ break;
+
+ case COS_TYPE_DICTIONARY:
+ dentry = cos_obj->u.dictionary;
+ while (dentry != NULL) {
+ struct cos_dictionary_entry *odentry;
+
+ cos_free_object(dentry->key);
+ cos_free_object(dentry->value);
+
+ odentry = dentry;
+ dentry = dentry->next;
+ free(odentry);
+ }
+ break;
+
+ case COS_TYPE_ARRAY:
+ aentry = cos_obj->u.array;
+ while (aentry != NULL) {
+ struct cos_array_entry *oaentry;
+
+ cos_free_object(aentry->value);
+
+ oaentry = aentry;
+ aentry = aentry->next;
+ free(oaentry);
+ }
+
+ case COS_TYPE_STREAM:
+ free(cos_obj->u.stream);
+ break;
+
+ }
+ free(cos_obj);
+
+ return NSPDFERROR_OK;
+}
+
+nspdferror
+cos_dictionary_get_value(struct cos_object *dict,
+ const char *key,
+ struct cos_object **value_out)
+{
+ struct cos_dictionary_entry *entry;
+
+ if (dict->type != COS_TYPE_DICTIONARY) {
+ return NSPDFERROR_TYPE;
+ }
+
+ entry = dict->u.dictionary;
+ while (entry != NULL) {
+ if (strcmp(entry->key->u.n, key) == 0) {
+ *value_out = entry->value;
+ return NSPDFERROR_OK;
+ }
+ entry = entry->next;
+ }
+ return NSPDFERROR_NOTFOUND;
+}
+
+/**
+ * extracts a value for a key in a dictionary.
+ *
+ * this finds and returns a value for a given key removing it from a dictionary
+ */
+nspdferror
+cos_dictionary_extract_value(struct cos_object *dict,
+ const char *key,
+ struct cos_object **value_out)
+{
+ struct cos_dictionary_entry *entry;
+ struct cos_dictionary_entry **prev;
+
+ if (dict->type != COS_TYPE_DICTIONARY) {
+ return NSPDFERROR_TYPE;
+ }
+
+ prev = &dict->u.dictionary;
+ entry = *prev;
+ while (entry != NULL) {
+ if (strcmp(entry->key->u.n, key) == 0) {
+ *value_out = entry->value;
+ *prev = entry->next;
+ cos_free_object(entry->key);
+ free(entry);
+ return NSPDFERROR_OK;
+ }
+ prev = &entry->next;
+ entry = *prev;
+ }
+ return NSPDFERROR_NOTFOUND;
+}
+
+nspdferror cos_get_int(struct cos_object *cobj, int64_t *value_out)
+{
+ if (cobj->type != COS_TYPE_INT) {
+ return NSPDFERROR_TYPE;
+ }
+ *value_out = cobj->u.i;
+ return NSPDFERROR_OK;
+}
+
+nspdferror
+cos_get_dictionary(struct cos_object *cobj,
+ struct cos_object **value_out)
+{
+ if (cobj->type == COS_TYPE_REFERENCE) {
+
+ }
+ if (cobj->type != COS_TYPE_DICTIONARY) {
+ return NSPDFERROR_TYPE;
+ }
+ *value_out = cobj;
+ return NSPDFERROR_OK;
+}
diff --git a/src/cos_object.h b/src/cos_object.h
new file mode 100644
index 0000000..65b3ed5
--- /dev/null
+++ b/src/cos_object.h
@@ -0,0 +1,98 @@
+struct pdf_doc;
+
+enum cos_type {
+ COS_TYPE_NULL,
+ COS_TYPE_BOOL,
+ COS_TYPE_INT,
+ COS_TYPE_REAL,
+ COS_TYPE_NAME,
+ COS_TYPE_STRING,
+ COS_TYPE_ARRAY,
+ COS_TYPE_DICTIONARY,
+ COS_TYPE_NAMETREE,
+ COS_TYPE_NUMBERTREE,
+ COS_TYPE_STREAM,
+ COS_TYPE_REFERENCE,
+};
+
+struct cos_object;
+
+struct cos_dictionary_entry {
+ /** next key/value in dictionary */
+ struct cos_dictionary_entry *next;
+
+ /** key (name) */
+ struct cos_object *key;
+
+ /** value */
+ struct cos_object *value;
+};
+
+struct cos_array_entry {
+ /** next value in array */
+ struct cos_array_entry *next;
+
+ /** value */
+ struct cos_object *value;
+};
+
+struct cos_string {
+ uint8_t *data;
+ size_t length;
+ size_t alloc;
+};
+
+struct cos_reference {
+ /** id of indirect object */
+ uint64_t id;
+
+ /* generation of indirect object */
+ uint64_t generation;
+};
+
+struct cos_object {
+ int type;
+ union {
+ /** boolean */
+ bool b;
+
+ /** integer */
+ int64_t i;
+
+ /** real */
+ double r;
+
+ /** name */
+ char *n;
+
+ /** string */
+ struct cos_string *s;
+
+ /** stream data */
+ uint8_t *stream;
+
+ /* dictionary */
+ struct cos_dictionary_entry *dictionary;
+
+ /* array */
+ struct cos_array_entry *array;
+
+ /** reference */
+ struct cos_reference *reference;
+
+ } u;
+};
+
+int cos_decode_object(struct pdf_doc *doc, uint64_t *offset_out, struct cos_object **cosobj_out);
+
+nspdferror cos_free_object(struct cos_object *cos_obj);
+
+nspdferror cos_dictionary_get_value(struct cos_object *dict, const char *key, struct cos_object **value_out);
+
+nspdferror cos_dictionary_extract_value(struct cos_object *dict, const char *key, struct cos_object **value_out);
+
+nspdferror cos_get_int(struct cos_object *cobj, int64_t *value_out);
+
+nspdferror cos_get_dictionary(struct cos_object *cobj, struct cos_object **value_out);
+
+
diff --git a/src/pdf_doc.c b/src/pdf_doc.c
new file mode 100644
index 0000000..9b92bd0
--- /dev/null
+++ b/src/pdf_doc.c
@@ -0,0 +1,47 @@
+
+#include <stdint.h>
+#include <stddef.h>
+#include <stdbool.h>
+
+#include "nspdferror.h"
+#include "byte_class.h"
+#include "cos_object.h"
+#include "pdf_doc.h"
+
+/**
+ * move offset to next non whitespace byte
+ */
+int doc_skip_ws(struct pdf_doc *doc, uint64_t *offset)
+{
+ uint8_t c;
+ /* TODO sort out keeping offset in range */
+ c = DOC_BYTE(doc, *offset);
+ while ((bclass[c] & (BC_WSPC | BC_CMNT) ) != 0) {
+ (*offset)++;
+ /* skip comments */
+ if ((bclass[c] & BC_CMNT) != 0) {
+ c = DOC_BYTE(doc, *offset);
+ while ((bclass[c] & BC_EOLM ) == 0) {
+ (*offset)++;
+ c = DOC_BYTE(doc, *offset);
+ }
+ }
+ c = DOC_BYTE(doc, *offset);
+ }
+ return 0;
+}
+
+/**
+ * move offset to next non eol byte
+ */
+int doc_skip_eol(struct pdf_doc *doc, uint64_t *offset)
+{
+ uint8_t c;
+ /* TODO sort out keeping offset in range */
+ c = DOC_BYTE(doc, *offset);
+ while ((bclass[c] & BC_EOLM) != 0) {
+ (*offset)++;
+ c = DOC_BYTE(doc, *offset);
+ }
+ return 0;
+}
diff --git a/src/pdf_doc.h b/src/pdf_doc.h
new file mode 100644
index 0000000..696c121
--- /dev/null
+++ b/src/pdf_doc.h
@@ -0,0 +1,42 @@
+/** indirect object */
+struct xref_table_entry {
+ /* reference identifier */
+ struct cos_reference ref;
+
+ /** offset of object */
+ uint64_t offset;
+
+ /* indirect object if already decoded */
+ struct cos_object *o;
+};
+
+
+/** pdf document */
+struct pdf_doc {
+ uint8_t *buffer;
+ uint64_t buffer_length;
+
+ uint8_t *start; /* start of pdf document in input stream */
+ uint64_t length;
+
+ int major;
+ int minor;
+
+ /**
+ * Indirect object cross reference table
+ */
+ uint64_t xref_size;
+ struct xref_table_entry *xref_table;
+
+ struct cos_object *root;
+ struct cos_object *encrypt;
+ struct cos_object *info;
+ struct cos_object *id;
+
+};
+
+/* byte data acessory, allows for more complex buffer handling in future */
+#define DOC_BYTE(doc, offset) (doc->start[(offset)])
+
+int doc_skip_ws(struct pdf_doc *doc, uint64_t *offset);
+int doc_skip_eol(struct pdf_doc *doc, uint64_t *offset);
diff --git a/src/xref.c b/src/xref.c
index d6a07b8..5e5ac8b 100644
--- a/src/xref.c
+++ b/src/xref.c
@@ -7,136 +7,12 @@
#include "nspdferror.h"
#include "byte_class.h"
+#include "cos_object.h"
+#include "pdf_doc.h"
#define SLEN(x) (sizeof((x)) - 1)
-enum cos_type {
- COS_TYPE_NULL,
- COS_TYPE_BOOL,
- COS_TYPE_INT,
- COS_TYPE_REAL,
- COS_TYPE_NAME,
- COS_TYPE_STRING,
- COS_TYPE_ARRAY,
- COS_TYPE_DICTIONARY,
- COS_TYPE_NAMETREE,
- COS_TYPE_NUMBERTREE,
- COS_TYPE_STREAM,
- COS_TYPE_REFERENCE,
-};
-
-struct cos_object;
-
-struct cos_dictionary_entry {
- /** next key/value in dictionary */
- struct cos_dictionary_entry *next;
-
- /** key (name) */
- struct cos_object *key;
-
- /** value */
- struct cos_object *value;
-};
-
-struct cos_array_entry {
- /** next value in array */
- struct cos_array_entry *next;
-
- /** value */
- struct cos_object *value;
-};
-
-struct cos_string {
- uint8_t *data;
- size_t length;
- size_t alloc;
-};
-
-struct cos_reference {
- /** id of indirect object */
- uint64_t id;
-
- /* generation of indirect object */
- uint64_t generation;
-};
-
-struct cos_object {
- int type;
- union {
- /** boolean */
- bool b;
-
- /** integer */
- int64_t i;
-
- /** real */
- double r;
-
- /** name */
- char *n;
-
- /** string */
- struct cos_string *s;
-
- /** stream data */
- uint8_t *stream;
-
- /* dictionary */
- struct cos_dictionary_entry *dictionary;
-
- /* array */
- struct cos_array_entry *array;
-
- /** reference */
- struct cos_reference *reference;
-
- } u;
-};
-
-
-/** indirect object */
-struct cos_indirect_object {
- /* reference identifier */
- struct cos_reference ref;
-
- /** offset of object */
- uint64_t offset;
-
- /* direct object if already decoded */
- struct cos_object *o;
-};
-
-
-/** pdf document */
-struct pdf_doc {
- uint8_t *buffer;
- uint64_t buffer_length;
-
- uint8_t *start; /* start of pdf document in input stream */
- uint64_t length;
-
- int major;
- int minor;
-
- /**
- * Indirect object cross reference table
- */
- uint64_t xref_size;
- struct cos_indirect_object *xref_table;
-
- struct cos_object *root;
- struct cos_object *encrypt;
- struct cos_object *info;
- struct cos_object *id;
-
-};
-
-
-int cos_decode_object(struct pdf_doc *doc,
- uint64_t *offset_out,
- struct cos_object **cosobj_out);
-
int
read_whole_pdf(struct pdf_doc *doc, const char *fname)
{
@@ -178,46 +54,7 @@ read_whole_pdf(struct pdf_doc *doc, const char *fname)
#define STARTXREF_SEARCH_SIZE 1024
-/* byte data acessory, allows for more complex buffer handling in future */
-#define DOC_BYTE(doc, offset) (doc->start[(offset)])
-
-/**
- * move offset to next non whitespace byte
- */
-static int doc_skip_ws(struct pdf_doc *doc, uint64_t *offset)
-{
- uint8_t c;
- /* TODO sort out keeping offset in range */
- c = DOC_BYTE(doc, *offset);
- while ((bclass[c] & (BC_WSPC | BC_CMNT) ) != 0) {
- (*offset)++;
- /* skip comments */
- if ((bclass[c] & BC_CMNT) != 0) {
- c = DOC_BYTE(doc, *offset);
- while ((bclass[c] & BC_EOLM ) == 0) {
- (*offset)++;
- c = DOC_BYTE(doc, *offset);
- }
- }
- c = DOC_BYTE(doc, *offset);
- }
- return 0;
-}
-/**
- * move offset to next non eol byte
- */
-static int doc_skip_eol(struct pdf_doc *doc, uint64_t *offset)
-{
- uint8_t c;
- /* TODO sort out keeping offset in range */
- c = DOC_BYTE(doc, *offset);
- while ((bclass[c] & BC_EOLM) != 0) {
- (*offset)++;
- c = DOC_BYTE(doc, *offset);
- }
- return 0;
-}
static nspdferror
doc_read_uint(struct pdf_doc *doc, uint64_t *offset_out, uint64_t *result_out)
@@ -388,842 +225,7 @@ int check_header(struct pdf_doc *doc)
}
-nspdferror cos_free_object(struct cos_object *cos_obj)
-{
- struct cos_dictionary_entry *dentry;
- struct cos_array_entry *aentry;
-
- switch (cos_obj->type) {
- case COS_TYPE_NAME:
- free(cos_obj->u.n);
- break;
-
- case COS_TYPE_STRING:
- free(cos_obj->u.s->data);
- free(cos_obj->u.s);
- break;
-
- case COS_TYPE_DICTIONARY:
- dentry = cos_obj->u.dictionary;
- while (dentry != NULL) {
- struct cos_dictionary_entry *odentry;
-
- cos_free_object(dentry->key);
- cos_free_object(dentry->value);
-
- odentry = dentry;
- dentry = dentry->next;
- free(odentry);
- }
- break;
- case COS_TYPE_ARRAY:
- aentry = cos_obj->u.array;
- while (aentry != NULL) {
- struct cos_array_entry *oaentry;
-
- cos_free_object(aentry->value);
-
- oaentry = aentry;
- aentry = aentry->next;
- free(oaentry);
- }
-
- case COS_TYPE_STREAM:
- free(cos_obj->u.stream);
- break;
-
- }
- free(cos_obj);
-
- return NSPDFERROR_OK;
-}
-
-int cos_decode_number(struct pdf_doc *doc,
- uint64_t *offset_out,
- struct cos_object **cosobj_out)
-{
- struct cos_object *cosobj;
- uint8_t c; /* current byte from source data */
- unsigned int len; /* number of decimal places in number */
- uint8_t num[21]; /* temporary buffer for decimal values */
- uint64_t offset; /* current offset of source data */
-
- offset = *offset_out;
-
- for (len = 0; len < sizeof(num); len++) {
- c = DOC_BYTE(doc, offset);
- if ((bclass[c] & BC_DCML) != BC_DCML) {
- int64_t result = 0; /* parsed result */
- uint64_t tens;
-
- if (len == 0) {
- return -2; /* parse error no decimals in input */
- }
- /* sum value from each place */
- for (tens = 1; len > 0; tens = tens * 10, len--) {
- result += (num[len - 1] * tens);
- }
-
- doc_skip_ws(doc, &offset);
-
- cosobj = calloc(1, sizeof(struct cos_object));
- if (cosobj == NULL) {
- return -1; /* memory error */
- }
-
- cosobj->type = COS_TYPE_INT;
- cosobj->u.i = result;
-
- *cosobj_out = cosobj;
-
- *offset_out = offset;
-
- return 0;
- }
- num[len] = c - '0';
- offset++;
- }
- return -1; /* number too long */
-}
-
-#define COS_STRING_ALLOC 32
-
-nspdferror
-cos_string_append(struct cos_string *s, uint8_t c)
-{
- //printf("appending 0x%x to %p len %d alloc %d\n", c, s->data, s->length, s->alloc);
- if (s->length == s->alloc) {
- uint8_t *ns;
- ns = realloc(s->data, s->alloc + COS_STRING_ALLOC);
- if (ns == NULL) {
- return NSPDFERROR_NOMEM;
- }
- s->data = ns;
- s->alloc += COS_STRING_ALLOC;
- }
- s->data[s->length++] = c;
- return NSPDFERROR_OK;
-}
-
-/**
- * literal string processing
- *
- */
-nspdferror
-cos_decode_string(struct pdf_doc *doc,
- uint64_t *offset_out,
- struct cos_object **cosobj_out)
-{
- uint64_t offset;
- struct cos_object *cosobj;
- uint8_t c;
- unsigned int pdepth = 1; /* depth of open parens */
- struct cos_string *cstring;
-
- offset = *offset_out;
-
- c = DOC_BYTE(doc, offset++);
- if (c != '(') {
- return NSPDFERROR_SYNTAX;
- }
-
- cstring = calloc(1, sizeof(*cstring));
- if (cstring == NULL) {
- return NSPDFERROR_NOMEM;
- }
-
- cosobj = calloc(1, sizeof(*cosobj));
- if (cosobj == NULL) {
- return NSPDFERROR_NOMEM;
- }
- cosobj->type = COS_TYPE_STRING;
- cosobj->u.s = cstring;
-
- while (pdepth > 0) {
- c = DOC_BYTE(doc, offset++);
-
- if (c == ')') {
- pdepth--;
- if (pdepth == 0) {
- break;
- }
- } else if (c == '(') {
- pdepth++;
- } else if ((bclass[c] & BC_EOLM ) != 0) {
- /* unescaped end of line characters are translated to a single
- * newline
- */
- c = DOC_BYTE(doc, offset);
- while ((bclass[c] & BC_EOLM) != 0) {
- offset++;
- c = DOC_BYTE(doc, offset);
- }
- c = '\n';
- } else if (c == '\\') {
- /* escaped chars */
- c = DOC_BYTE(doc, offset++);
- switch (c) {
- case 'n':
- c = '\n';
- break;
-
- case 'r':
- c = '\r';
- break;
-
- case 't':
- c = '\t';
- break;
-
- case 'b':
- c = '\b';
- break;
-
- case 'f':
- c = '\f';
- break;
-
- case '(':
- c = '(';
- break;
-
- case ')':
- c = ')';
- break;
-
- case '\\':
- c = '\\';
- break;
-
- default:
-
- if ((bclass[c] & BC_EOLM) != 0) {
- /* escaped end of line, swallow it */
- c = DOC_BYTE(doc, offset++);
- while ((bclass[c] & BC_EOLM) != 0) {
- c = DOC_BYTE(doc, offset++);
- }
- } else if ((bclass[c] & BC_OCTL) != 0) {
- /* octal value */
- uint8_t val;
- val = (c - '0');
- c = DOC_BYTE(doc, offset);
- if ((bclass[c] & BC_OCTL) != 0) {
- offset++;
- val = (val << 3) | (c - '0');
- c = DOC_BYTE(doc, offset);
- if ((bclass[c] & BC_OCTL) != 0) {
- offset++;
- val = (val << 3) | (c - '0');
- c = val;
- }
- }
- } /* else invalid (skip backslash) */
- break;
- }
- }
-
- /* c contains the character to add to the string */
- cos_string_append(cstring, c);
- }
-
- doc_skip_ws(doc, &offset);
-
- *cosobj_out = cosobj;
- *offset_out = offset;
-
- return NSPDFERROR_OK;
-}
-
-uint8_t xtoi(uint8_t x)
-{
- if (x >= '0' && x <= '9') {
- x = x - '0';
- } else if (x >= 'a' && x <='f') {
- x = x - 'a' + 10;
- } else if (x >= 'A' && x <='F') {
- x = x - 'A' + 10;
- }
- return x;
-}
-
-nspdferror
-cos_decode_hex_string(struct pdf_doc *doc,
- uint64_t *offset_out,
- struct cos_object **cosobj_out)
-{
- uint64_t offset;
- struct cos_object *cosobj;
- uint8_t c;
- uint8_t value = 0;
- struct cos_string *cstring;
- bool first = true;
-
- offset = *offset_out;
-
- c = DOC_BYTE(doc, offset++);
- if (c != '<') {
- return NSPDFERROR_SYNTAX;
- }
-
- cstring = calloc(1, sizeof(*cstring));
- if (cstring == NULL) {
- return NSPDFERROR_NOMEM;
- }
-
- cosobj = calloc(1, sizeof(*cosobj));
- if (cosobj == NULL) {
- return NSPDFERROR_NOMEM;
- }
- cosobj->type = COS_TYPE_STRING;
- cosobj->u.s = cstring;
-
- for (; offset < doc->length; offset++) {
- c = DOC_BYTE(doc, offset);
- if (c == '>') {
- if (first == false) {
- cos_string_append(cstring, value);
- }
- offset++;
- doc_skip_ws(doc, &offset);
-
- *cosobj_out = cosobj;
- *offset_out = offset;
-
- return NSPDFERROR_OK;
- } else if ((bclass[c] & BC_HEXL) != 0) {
- if (first) {
- value = xtoi(c) << 4;
- first = false;
- } else {
- value |= xtoi(c);
- first = true;
- cos_string_append(cstring, value);
- }
- } else if ((bclass[c] & BC_WSPC) == 0) {
- break; /* unknown byte value in string */
- }
- }
- return NSPDFERROR_SYNTAX;
-}
-
-
-int cos_decode_dictionary(struct pdf_doc *doc,
- uint64_t *offset_out,
- struct cos_object **cosobj_out)
-{
- uint64_t offset;
- struct cos_object *cosobj;
- struct cos_dictionary_entry *entry;
- struct cos_object *key;
- struct cos_object *value;
- int res;
-
- offset = *offset_out;
-
- if ((DOC_BYTE(doc, offset) != '<') ||
- (DOC_BYTE(doc, offset + 1) != '<')) {
- return -1; /* syntax error */
- }
- offset += 2;
- doc_skip_ws(doc, &offset);
-
- printf("found a dictionary\n");
-
- cosobj = calloc(1, sizeof(struct cos_object));
- if (cosobj == NULL) {
- return -1; /* memory error */
- }
- cosobj->type = COS_TYPE_DICTIONARY;
-
- while ((DOC_BYTE(doc, offset) != '>') &&
- (DOC_BYTE(doc, offset + 1) != '>')) {
-
- res = cos_decode_object(doc, &offset, &key);
- if (res != 0) {
- /* todo free up any dictionary entries already created */
- printf("key object decode failed\n");
- return res;
- }
- if (key->type != COS_TYPE_NAME) {
- /* key value pairs without a name */
- printf("key was %d not a name %d\n", key->type, COS_TYPE_NAME);
- return -1; /* syntax error */
- }
- printf("key: %s\n", key->u.n);
-
- res = cos_decode_object(doc, &offset, &value);
- if (res != 0) {
- printf("Unable to decode value object in dictionary\n");
- /* todo free up any dictionary entries already created */
- return res;
- }
-
- /* add dictionary entry */
- entry = calloc(1, sizeof(struct cos_dictionary_entry));
- if (entry == NULL) {
- /* todo free up any dictionary entries already created */
- return -1; /* memory error */
- }
-
- entry->key = key;
- entry->value = value;
- entry->next = cosobj->u.dictionary;
-
- cosobj->u.dictionary = entry;
-
- }
- offset += 2; /* skip closing >> */
- doc_skip_ws(doc, &offset);
-
- *cosobj_out = cosobj;
- *offset_out = offset;
-
- return 0;
-}
-
-
-nspdferror
-cos_decode_list(struct pdf_doc *doc,
- uint64_t *offset_out,
- struct cos_object **cosobj_out)
-{
- uint64_t offset;
- struct cos_object *cosobj;
- struct cos_array_entry *entry;
- struct cos_object *value;
- nspdferror res;
-
- offset = *offset_out;
-
- /* sanity check first token is list open */
- if (DOC_BYTE(doc, offset) != '[') {
- printf("not a [\n");
- return NSPDFERROR_SYNTAX; /* syntax error */
- }
- offset++;
-
- /* advance offset to next token */
- res = doc_skip_ws(doc, &offset);
- if (res != NSPDFERROR_OK) {
- return res;
- }
-
- printf("found a list\n");
-
- cosobj = calloc(1, sizeof(struct cos_object));
- if (cosobj == NULL) {
- return NSPDFERROR_NOMEM;
- }
- cosobj->type = COS_TYPE_ARRAY;
-
- while (DOC_BYTE(doc, offset) != ']') {
-
- res = cos_decode_object(doc, &offset, &value);
- if (res != NSPDFERROR_OK) {
- cos_free_object(cosobj);
- printf("Unable to decode value object in list\n");
- return res;
- }
-
- /* add entry to array */
- entry = calloc(1, sizeof(struct cos_array_entry));
- if (entry == NULL) {
- cos_free_object(cosobj);
- return NSPDFERROR_NOMEM;
- }
-
- entry->value = value;
- entry->next = cosobj->u.array;
-
- cosobj->u.array = entry;
- }
- offset++; /* skip closing ] */
-
- doc_skip_ws(doc, &offset);
-
- *cosobj_out = cosobj;
- *offset_out = offset;
-
- return 0;
-}
-
-#define NAME_MAX_LENGTH 127
-
-/**
- * decode a name object
- *
- * \todo deal with # symbols on pdf versions 1.2 and later
- */
-int cos_decode_name(struct pdf_doc *doc,
- uint64_t *offset_out,
- struct cos_object **cosobj_out)
-{
- uint64_t offset;
- struct cos_object *cosobj;
- uint8_t c;
- char name[NAME_MAX_LENGTH + 1];
- int idx = 0;
-
- offset = *offset_out;
-
- c = DOC_BYTE(doc, offset++);
- if (c != '/') {
- return -1; /* names must be prefixed with a / */
- }
- printf("found a name\n");
-
- c = DOC_BYTE(doc, offset);
- while ((idx <= NAME_MAX_LENGTH) &&
- ((bclass[c] & (BC_WSPC | BC_DELM)) == 0)) {
- offset++;
- //printf("%c", c);
- name[idx++] = c;
- c = DOC_BYTE(doc, offset);
- }
- //printf("\nidx: %d\n", idx);
- if (idx > NAME_MAX_LENGTH) {
- /* name length exceeded implementation limit */
- return -1;
- }
- name[idx] = 0;
-
- //printf("name: %s\n", name);
-
- doc_skip_ws(doc, &offset);
-
- cosobj = calloc(1, sizeof(struct cos_object));
- if (cosobj == NULL) {
- return -1; /* memory error */
- }
-
- cosobj->type = COS_TYPE_NAME;
- cosobj->u.n = strdup(name);
-
- *cosobj_out = cosobj;
-
- *offset_out = offset;
-
- return 0;
-}
-
-
-int cos_decode_boolean(struct pdf_doc *doc,
- uint64_t *offset_out,
- struct cos_object **cosobj_out)
-{
- uint64_t offset;
- struct cos_object *cosobj;
- uint8_t c;
- bool value;
-
- offset = *offset_out;
-
- c = DOC_BYTE(doc, offset++);
- if ((c == 't') || (c == 'T')) {
- /* true branch */
-
- c = DOC_BYTE(doc, offset++);
- if ((c != 'r') && (c != 'R')) {
- return -1; /* syntax error */
- }
- c = DOC_BYTE(doc, offset++);
- if ((c != 'u') && (c != 'U')) {
- return -1; /* syntax error */
- }
- c = DOC_BYTE(doc, offset++);
- if ((c != 'e') && (c != 'E')) {
- return -1; /* syntax error */
- }
- value = true;
-
- } else if ((c == 'f') || (c == 'F')) {
- /* false branch */
-
- c = DOC_BYTE(doc, offset++);
- if ((c != 'a') && (c != 'A')) {
- return -1; /* syntax error */
- }
- c = DOC_BYTE(doc, offset++);
- if ((c != 'l') && (c != 'L')) {
- return -1; /* syntax error */
- }
- c = DOC_BYTE(doc, offset++);
- if ((c != 's') && (c != 'S')) {
- return -1; /* syntax error */
- }
- c = DOC_BYTE(doc, offset++);
- if ((c != 'e') && (c != 'E')) {
- return -1; /* syntax error */
- }
-
- value = false;
-
- } else {
- return -1; /* syntax error */
- }
-
- doc_skip_ws(doc, &offset);
-
- cosobj = calloc(1, sizeof(struct cos_object));
- if (cosobj == NULL) {
- return -1; /* memory error */
- }
-
- cosobj->type = COS_TYPE_BOOL;
- cosobj->u.b = value;
-
- *cosobj_out = cosobj;
-
- *offset_out = offset;
-
- return 0;
-
-}
-
-int cos_decode_null(struct pdf_doc *doc,
- uint64_t *offset_out,
- struct cos_object **cosobj_out)
-{
- uint64_t offset;
- struct cos_object *cosobj;
- uint8_t c;
-
- offset = *offset_out;
-
- c = DOC_BYTE(doc, offset++);
- if ((c != 'n') && (c != 'N')) {
- return -1; /* syntax error */
- }
- c = DOC_BYTE(doc, offset++);
- if ((c != 'u') && (c != 'U')) {
- return -1; /* syntax error */
- }
- c = DOC_BYTE(doc, offset++);
- if ((c != 'l') && (c != 'L')) {
- return -1; /* syntax error */
- }
- c = DOC_BYTE(doc, offset++);
- if ((c != 'l') && (c != 'L')) {
- return -1; /* syntax error */
- }
-
- doc_skip_ws(doc, &offset);
-
- cosobj = calloc(1, sizeof(struct cos_object));
- if (cosobj == NULL) {
- return -1; /* memory error */
- }
-
- cosobj->type = COS_TYPE_NULL;
- *offset_out = offset;
-
- return 0;
-}
-
-/**
- * attempt to decode the stream into a reference
- *
- * The stream has already had a positive integer decoded from it. if another
- * positive integer follows and a R character after that it is a reference,
- * otherwise bail, but not finding a ref is not an error!
- *
- * \param doc the pdf document
- * \param offset_out offset of current cursor in stream
- * \param cosobj_out the object to return into, on input contains the first
- * integer
- */
-int cos_attempt_decode_reference(struct pdf_doc *doc,
- uint64_t *offset_out,
- struct cos_object **cosobj_out)
-{
- uint64_t offset;
- struct cos_object *cosobj; /* possible generation object */
- uint8_t c;
- int res;
- struct cos_reference *nref; /* new reference */
-
- offset = *offset_out;
-
- res = cos_decode_number(doc, &offset, &cosobj);
- if (res != 0) {
- return 0; /* no error if object could not be decoded */
- }
-
- if (cosobj->type != COS_TYPE_INT) {
- /* next object was not an integer so not a reference */
- cos_free_object(cosobj);
- return 0;
- }
-
- if (cosobj->u.i < 0) {
- /* integer was negative so not a reference (generations must be
- * non-negative
- */
- cos_free_object(cosobj);
- return 0;
-
- }
-
- /* two int in a row, look for the R */
- c = DOC_BYTE(doc, offset++);
- if (c != 'R') {
- /* no R so not a reference */
- cos_free_object(cosobj);
- return 0;
- }
-
- /* found reference */
-
- printf("found reference\n");
- doc_skip_ws(doc, &offset);
-
- nref = calloc(1, sizeof(struct cos_reference));
- if (nref == NULL) {
- /* todo free objects */
- return -1; /* memory error */
- }
-
- nref->id = (*cosobj_out)->u.i;
- nref->generation = cosobj->u.i;
-
- cos_free_object(*cosobj_out);
-
- cosobj->type = COS_TYPE_REFERENCE;
- cosobj->u.reference = nref;
-
- *cosobj_out = cosobj;
-
- *offset_out = offset;
-
- return 0;
-}
-
-/**
- * Decode input stream into an object
- *
- * lex and parse a byte stream to generate COS objects
- *
- * lexing the input.
- * check first character:
- *
- * < either a hex string or a dictionary
- * second char < means dictionary else hex string
- * - either an integer or real
- * + either an integer or real
- * 0-9 an integer, unsigned integer or real
- * . a real number
- * ( a string
- * / a name
- * [ a list
- * t|T boolean true
- * f|F boolean false
- * n|N null
- *
- * Grammar is:
- * cos_object:
- * TOK_NULL |
- * TOK_BOOLEAN |
- * TOK_INT |
- * TOK_REAL |
- * TOK_NAME |
- * TOK_STRING |
- * list |
- * dictionary |
- * object_reference;
- *
- * list:
- * '[' listargs ']';
- *
- * listargs:
- * cos_object
- * |
- * listargs cos_object
- * ;
- *
- * object_reference:
- * TOK_UINT TOK_UINT 'R';
- */
-int cos_decode_object(struct pdf_doc *doc,
- uint64_t *offset_out,
- struct cos_object **cosobj_out)
-{
- uint64_t offset;
- int res;
- struct cos_object *cosobj;
-
- offset = *offset_out;
-
- /* object could be any type use first char to try and select */
- switch (DOC_BYTE(doc, offset)) {
-
- case '-':
- case '+':
- case '.':
- case '0':
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- case '8':
- case '9':
- res = cos_decode_number(doc, &offset, &cosobj);
- /* if type is positive integer try to check for reference */
- if ((res == 0) &&
- (cosobj->type == COS_TYPE_INT) &&
- (cosobj->u.i > 0)) {
- res = cos_attempt_decode_reference(doc, &offset, &cosobj);
- }
- break;
-
- case '<':
- if (DOC_BYTE(doc, offset + 1) == '<') {
- res = cos_decode_dictionary(doc, &offset, &cosobj);
- } else {
- res = cos_decode_hex_string(doc, &offset, &cosobj);
- }
- break;
-
- case '(':
- res = cos_decode_string(doc, &offset, &cosobj);
- break;
-
- case '/':
- res = cos_decode_name(doc, &offset, &cosobj);
- break;
-
- case '[':
- res = cos_decode_list(doc, &offset, &cosobj);
- break;
-
- case 't':
- case 'T':
- case 'f':
- case 'F':
- res = cos_decode_boolean(doc, &offset, &cosobj);
- break;
-
- case 'n':
- case 'N':
- res = cos_decode_null(doc, &offset, &cosobj);
- break;
-
- default:
- res = -1; /* syntax error */
- }
-
-
- if (res == 0) {
- *cosobj_out = cosobj;
- *offset_out = offset;
- }
-
- return res;
-}
@@ -1335,7 +337,7 @@ decode_xref(struct pdf_doc *doc, uint64_t *offset_out)
if ((DOC_BYTE(doc, offset++) == 'n')) {
if (objnumber < doc->xref_size) {
- struct cos_indirect_object *indobj;
+ struct xref_table_entry *indobj;
indobj = doc->xref_table + objnumber;
indobj->ref.id = objnumber;
@@ -1357,69 +359,6 @@ decode_xref(struct pdf_doc *doc, uint64_t *offset_out)
return NSPDFERROR_OK;
}
-nspdferror
-cos_dictionary_get_value(struct cos_object *dict,
- const char *key,
- struct cos_object **value_out)
-{
- struct cos_dictionary_entry *entry;
-
- if (dict->type != COS_TYPE_DICTIONARY) {
- return NSPDFERROR_TYPE;
- }
-
- entry = dict->u.dictionary;
- while (entry != NULL) {
- if (strcmp(entry->key->u.n, key) == 0) {
- *value_out = entry->value;
- return NSPDFERROR_OK;
- }
- entry = entry->next;
- }
- return NSPDFERROR_NOTFOUND;
-}
-
-/**
- * extracts a value for a key in a dictionary.
- *
- * this finds and returns a value for a given key removing it from a dictionary
- */
-nspdferror
-cos_dictionary_extract_value(struct cos_object *dict,
- const char *key,
- struct cos_object **value_out)
-{
- struct cos_dictionary_entry *entry;
- struct cos_dictionary_entry **prev;
-
- if (dict->type != COS_TYPE_DICTIONARY) {
- return NSPDFERROR_TYPE;
- }
-
- prev = &dict->u.dictionary;
- entry = *prev;
- while (entry != NULL) {
- if (strcmp(entry->key->u.n, key) == 0) {
- *value_out = entry->value;
- *prev = entry->next;
- cos_free_object(entry->key);
- free(entry);
- return NSPDFERROR_OK;
- }
- prev = &entry->next;
- entry = *prev;
- }
- return NSPDFERROR_NOTFOUND;
-}
-
-nspdferror cos_get_int(struct cos_object *cobj, int64_t *value_out)
-{
- if (cobj->type != COS_TYPE_INT) {
- return NSPDFERROR_TYPE;
- }
- *value_out = cobj->u.i;
- return NSPDFERROR_OK;
-}
/**
* recursively parse trailers and xref tables
@@ -1480,7 +419,7 @@ nspdferror decode_xref_trailer(struct pdf_doc *doc, uint64_t xref_offset)
goto decode_xref_trailer_failed;
}
- doc->xref_table = calloc(size, sizeof(struct cos_indirect_object));
+ doc->xref_table = calloc(size, sizeof(struct xref_table_entry));
if (doc->xref_table == NULL) {
res = NSPDFERROR_NOMEM;
goto decode_xref_trailer_failed;
@@ -1580,7 +519,12 @@ nspdferror decode_trailers(struct pdf_doc *doc)
nspdferror decode_catalog(struct pdf_doc *doc)
{
- return NSPDFERROR_OK;
+ nspdferror res;
+ struct cos_object *catalog;
+
+ res = cos_get_dictionary(doc->root, &catalog);
+
+ return res;
}
nspdferror new_pdf_doc(struct pdf_doc **doc_out)
commitdiff http://git.netsurf-browser.org/libnspdf.git/commit/?id=b22daf7d9b39210a0c...
commit b22daf7d9b39210a0c7f8b5522a884a828b656b0
Author: Vincent Sanders <vince(a)kyllikki.org>
Commit: Vincent Sanders <vince(a)kyllikki.org>
extract all trailer data
diff --git a/src/xref.c b/src/xref.c
index bdae8c7..d6a07b8 100644
--- a/src/xref.c
+++ b/src/xref.c
@@ -125,12 +125,11 @@ struct pdf_doc {
uint64_t xref_size;
struct cos_indirect_object *xref_table;
- /**
- * trailer object
- *
- * @todo probably unecessary and should extract just what is required
- */
- struct cos_object *trailer;
+ struct cos_object *root;
+ struct cos_object *encrypt;
+ struct cos_object *info;
+ struct cos_object *id;
+
};
@@ -1358,7 +1357,10 @@ decode_xref(struct pdf_doc *doc, uint64_t *offset_out)
return NSPDFERROR_OK;
}
-nspdferror cos_dictionary_get_value(struct cos_object *dict, const char *key, struct cos_object **value_out)
+nspdferror
+cos_dictionary_get_value(struct cos_object *dict,
+ const char *key,
+ struct cos_object **value_out)
{
struct cos_dictionary_entry *entry;
@@ -1377,6 +1379,39 @@ nspdferror cos_dictionary_get_value(struct cos_object *dict, const char *key, st
return NSPDFERROR_NOTFOUND;
}
+/**
+ * extracts a value for a key in a dictionary.
+ *
+ * this finds and returns a value for a given key removing it from a dictionary
+ */
+nspdferror
+cos_dictionary_extract_value(struct cos_object *dict,
+ const char *key,
+ struct cos_object **value_out)
+{
+ struct cos_dictionary_entry *entry;
+ struct cos_dictionary_entry **prev;
+
+ if (dict->type != COS_TYPE_DICTIONARY) {
+ return NSPDFERROR_TYPE;
+ }
+
+ prev = &dict->u.dictionary;
+ entry = *prev;
+ while (entry != NULL) {
+ if (strcmp(entry->key->u.n, key) == 0) {
+ *value_out = entry->value;
+ *prev = entry->next;
+ cos_free_object(entry->key);
+ free(entry);
+ return NSPDFERROR_OK;
+ }
+ prev = &entry->next;
+ entry = *prev;
+ }
+ return NSPDFERROR_NOTFOUND;
+}
+
nspdferror cos_get_int(struct cos_object *cobj, int64_t *value_out)
{
if (cobj->type != COS_TYPE_INT) {
@@ -1402,7 +1437,7 @@ nspdferror decode_xref_trailer(struct pdf_doc *doc, uint64_t xref_offset)
res = find_trailer(doc, &offset);
if (res != NSPDFERROR_OK) {
- printf("failed to decode startxref\n");
+ printf("failed to find last trailer\n");
return res;
}
@@ -1415,14 +1450,14 @@ nspdferror decode_xref_trailer(struct pdf_doc *doc, uint64_t xref_offset)
res = decode_startxref(doc, &offset, &startxref);
if (res != NSPDFERROR_OK) {
printf("failed to decode startxref\n");
- return res;
+ goto decode_xref_trailer_failed;
}
if (startxref != xref_offset) {
printf("startxref and Prev value disagree\n");
}
- if (doc->trailer == NULL) {
+ if (doc->xref_table == NULL) {
/* extract Size from trailer and create xref table large enough */
struct cos_object *cobj_size;
int64_t size;
@@ -1430,19 +1465,43 @@ nspdferror decode_xref_trailer(struct pdf_doc *doc, uint64_t xref_offset)
res = cos_dictionary_get_value(trailer, "Size", &cobj_size);
if (res != NSPDFERROR_OK) {
printf("trailer has no Size value\n");
- return res;
+ goto decode_xref_trailer_failed;
}
+
res = cos_get_int(cobj_size, &size);
if (res != NSPDFERROR_OK) {
printf("trailer Size not int\n");
- return res;
+ goto decode_xref_trailer_failed;
+ }
+
+ res = cos_dictionary_extract_value(trailer, "Root", &doc->root);
+ if (res != NSPDFERROR_OK) {
+ printf("no Root!\n");
+ goto decode_xref_trailer_failed;
}
+
doc->xref_table = calloc(size, sizeof(struct cos_indirect_object));
if (doc->xref_table == NULL) {
- return NSPDFERROR_NOMEM;
+ res = NSPDFERROR_NOMEM;
+ goto decode_xref_trailer_failed;
}
doc->xref_size = size;
- doc->trailer = trailer;
+
+ res = cos_dictionary_extract_value(trailer, "Encrypt", &doc->encrypt);
+ if ((res != NSPDFERROR_OK) && (res != NSPDFERROR_NOTFOUND)) {
+ goto decode_xref_trailer_failed;
+ }
+
+ res = cos_dictionary_extract_value(trailer, "Info", &doc->info);
+ if ((res != NSPDFERROR_OK) && (res != NSPDFERROR_NOTFOUND)) {
+ goto decode_xref_trailer_failed;
+ }
+
+ res = cos_dictionary_extract_value(trailer, "ID", &doc->id);
+ if ((res != NSPDFERROR_OK) && (res != NSPDFERROR_NOTFOUND)) {
+ goto decode_xref_trailer_failed;
+ }
+
}
/* check for prev ID key in trailer and recurse call if present */
@@ -1451,24 +1510,26 @@ nspdferror decode_xref_trailer(struct pdf_doc *doc, uint64_t xref_offset)
res = cos_get_int(cobj_prev, &prev);
if (res != NSPDFERROR_OK) {
printf("trailer Prev not int\n");
- return res;
+ goto decode_xref_trailer_failed;
}
res = decode_xref_trailer(doc, prev);
if (res != NSPDFERROR_OK) {
- return res;
+ goto decode_xref_trailer_failed;
}
}
offset = xref_offset;
+ /** @todo deal with XrefStm (number) in trailer */
res = decode_xref(doc, &offset);
if (res != NSPDFERROR_OK) {
printf("failed to decode xref table\n");
- return res;
+ goto decode_xref_trailer_failed;
}
- /** @todo free trailer? */
+decode_xref_trailer_failed:
+ cos_free_object(trailer);
return res;
}
@@ -1517,6 +1578,10 @@ nspdferror decode_trailers(struct pdf_doc *doc)
return decode_xref_trailer(doc, startxref);
}
+nspdferror decode_catalog(struct pdf_doc *doc)
+{
+ return NSPDFERROR_OK;
+}
nspdferror new_pdf_doc(struct pdf_doc **doc_out)
{
@@ -1558,5 +1623,11 @@ int main(int argc, char **argv)
return res;
}
+ res = decode_catalog(doc);
+ if (res != NSPDFERROR_OK) {
+ printf("failed to decode catalog (%d)\n", res);
+ return res;
+ }
+
return 0;
}
commitdiff http://git.netsurf-browser.org/libnspdf.git/commit/?id=ce39609925fe0e1c63...
commit ce39609925fe0e1c63ca7042ccfb67d84b9ea499
Author: Vincent Sanders <vince(a)kyllikki.org>
Commit: Vincent Sanders <vince(a)kyllikki.org>
fix xref table building
diff --git a/src/nspdferror.h b/src/nspdferror.h
new file mode 100644
index 0000000..c6042eb
--- /dev/null
+++ b/src/nspdferror.h
@@ -0,0 +1,9 @@
+typedef enum {
+ NSPDFERROR_OK,
+ NSPDFERROR_NOMEM,
+ NSPDFERROR_SYNTAX, /**< syntax error in parse */
+ NSPDFERROR_SIZE, /**< not enough input data */
+ NSPDFERROR_RANGE, /**< value outside type range */
+ NSPDFERROR_TYPE, /**< wrong type error */
+ NSPDFERROR_NOTFOUND, /**< key not found */
+} nspdferror;
diff --git a/src/xref.c b/src/xref.c
index 94b0ee0..bdae8c7 100644
--- a/src/xref.c
+++ b/src/xref.c
@@ -5,17 +5,11 @@
#include <stdbool.h>
#include <string.h>
+#include "nspdferror.h"
#include "byte_class.h"
#define SLEN(x) (sizeof((x)) - 1)
-typedef enum {
- NSPDFERROR_OK,
- NSPDFERROR_NOMEM,
- NSPDFERROR_SYNTAX, /* syntax error in parse */
- NSPDFERROR_SIZE, /* not enough input data */
- NSPDFERROR_RANGE, /* value outside type range */
-} nspdferror;
enum cos_type {
COS_TYPE_NULL,
@@ -101,21 +95,19 @@ struct cos_object {
};
-/** linked list of indirect objects */
+/** indirect object */
struct cos_indirect_object {
- /** next in list */
- struct cos_indirect_object *next;
-
/* reference identifier */
struct cos_reference ref;
/** offset of object */
uint64_t offset;
- /* direct object */
+ /* direct object if already decoded */
struct cos_object *o;
};
+
/** pdf document */
struct pdf_doc {
uint8_t *buffer;
@@ -127,11 +119,18 @@ struct pdf_doc {
int major;
int minor;
- /** start of current xref table */
- uint64_t startxref;
-
- /** indirect objects from document body */
- struct cos_indirect_object *cos_list;
+ /**
+ * Indirect object cross reference table
+ */
+ uint64_t xref_size;
+ struct cos_indirect_object *xref_table;
+
+ /**
+ * trailer object
+ *
+ * @todo probably unecessary and should extract just what is required
+ */
+ struct cos_object *trailer;
};
@@ -389,29 +388,6 @@ int check_header(struct pdf_doc *doc)
return -1;
}
-/* add indirect object */
-int cos_indirect_object_add(struct pdf_doc *doc,
- uint64_t obj_number,
- uint64_t obj_offset,
- uint64_t obj_generation)
-{
- struct cos_indirect_object *nobj;
- nobj = calloc(1, sizeof(struct cos_indirect_object));
-
- if (nobj == NULL) {
- return -1;
- }
- nobj->next = doc->cos_list;
- nobj->ref.id = obj_number;
- nobj->ref.generation = obj_generation;
- nobj->offset = obj_offset;
-
- doc->cos_list = nobj;
-
- printf("xref %"PRIu64" %"PRIu64" %"PRIu64"\n",
- obj_number, obj_offset, obj_generation);
- return 0;
-}
nspdferror cos_free_object(struct cos_object *cos_obj)
{
@@ -1292,12 +1268,13 @@ decode_trailer(struct pdf_doc *doc,
return NSPDFERROR_OK;
}
-int decode_xref(struct pdf_doc *doc, uint64_t *offset_out)
+nspdferror
+decode_xref(struct pdf_doc *doc, uint64_t *offset_out)
{
- int res;
- uint64_t objnum; /* current object number */
- uint64_t lastobjnum;
uint64_t offset;
+ nspdferror res;
+ uint64_t objnumber; /* current object number */
+ uint64_t objcount;
offset = *offset_out;
@@ -1306,58 +1283,107 @@ int decode_xref(struct pdf_doc *doc, uint64_t *offset_out)
(DOC_BYTE(doc, offset + 1) != 'r') &&
(DOC_BYTE(doc, offset + 2) != 'e') &&
(DOC_BYTE(doc, offset + 3) != 'f')) {
- return -1;
+ return NSPDFERROR_SYNTAX;
}
offset += 4;
- doc_skip_ws(doc, &offset);
- /* first object number in table */
- res = doc_read_uint(doc, &offset, &objnum);
- while (res == 0) {
- doc_skip_ws(doc, &offset);
+ res = doc_skip_ws(doc, &offset);
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+
+ /* subsections
+ * <first object number> <number of references in subsection>
+ */
+ res = doc_read_uint(doc, &offset, &objnumber);
+ while (res == NSPDFERROR_OK) {
+ uint64_t lastobj;
+ res = doc_skip_ws(doc, &offset);
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
- /* last object number in table */
- res = doc_read_uint(doc, &offset, &lastobjnum);
- if (res != 0) {
+ res = doc_read_uint(doc, &offset, &objcount);
+ if (res != NSPDFERROR_OK) {
return res;
}
- doc_skip_ws(doc, &offset);
- lastobjnum += objnum;
+ res = doc_skip_ws(doc, &offset);
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
- /* object index entries */
- while (objnum < lastobjnum) {
- uint64_t obj_start;
- uint64_t obj_generation;
+ //printf("decoding subsection %lld %lld\n", objnumber, objcount);
- /* object offset */
- res = doc_read_uint(doc, &offset, &obj_start);
- if (res != 0) {
+ lastobj = objnumber + objcount;
+ for (; objnumber < lastobj ; objnumber++) {
+ /* each entry is a fixed format */
+ uint64_t objindex;
+ uint64_t objgeneration;
+
+ /* object index */
+ res = doc_read_uint(doc, &offset, &objindex);
+ if (res != NSPDFERROR_OK) {
return res;
}
- doc_skip_ws(doc, &offset);
+ offset++; /* skip space */
- res = doc_read_uint(doc, &offset, &obj_generation);
- if (res != 0) {
+ res = doc_read_uint(doc, &offset, &objgeneration);
+ if (res != NSPDFERROR_OK) {
return res;
}
- doc_skip_ws(doc, &offset);
+ offset++; /* skip space */
+
+ if ((DOC_BYTE(doc, offset++) == 'n')) {
+ if (objnumber < doc->xref_size) {
+ struct cos_indirect_object *indobj;
+ indobj = doc->xref_table + objnumber;
+
+ indobj->ref.id = objnumber;
+ indobj->ref.generation = objgeneration;
+ indobj->offset = objindex;
- if ((DOC_BYTE(doc, offset) == 'n')) {
- cos_indirect_object_add(doc, objnum, obj_start, obj_generation);
+ //printf("xref %lld %lld -> %lld\n", objnumber, objgeneration, objindex);
+ } else {
+ printf("index out of bounds\n");
+ }
}
- offset++;
- doc_skip_ws(doc, &offset);
- objnum++;
+ offset += 2; /* skip EOL */
}
- // printf("at objnum %"PRIu64"\n", objnum);
- /* first object number in table */
- res = doc_read_uint(doc, &offset, &objnum);
+ res = doc_read_uint(doc, &offset, &objnumber);
}
- *offset_out = offset;
- return 0;
+
+ return NSPDFERROR_OK;
+}
+
+nspdferror cos_dictionary_get_value(struct cos_object *dict, const char *key, struct cos_object **value_out)
+{
+ struct cos_dictionary_entry *entry;
+
+ if (dict->type != COS_TYPE_DICTIONARY) {
+ return NSPDFERROR_TYPE;
+ }
+
+ entry = dict->u.dictionary;
+ while (entry != NULL) {
+ if (strcmp(entry->key->u.n, key) == 0) {
+ *value_out = entry->value;
+ return NSPDFERROR_OK;
+ }
+ entry = entry->next;
+ }
+ return NSPDFERROR_NOTFOUND;
+}
+
+nspdferror cos_get_int(struct cos_object *cobj, int64_t *value_out)
+{
+ if (cobj->type != COS_TYPE_INT) {
+ return NSPDFERROR_TYPE;
+ }
+ *value_out = cobj->u.i;
+ return NSPDFERROR_OK;
}
/**
@@ -1369,6 +1395,8 @@ nspdferror decode_xref_trailer(struct pdf_doc *doc, uint64_t xref_offset)
uint64_t offset; /* the current data offset */
uint64_t startxref; /* the value of the startxref field */
struct cos_object *trailer; /* the current trailer */
+ struct cos_object *cobj_prev;
+ int64_t prev;
offset = xref_offset;
@@ -1394,20 +1422,53 @@ nspdferror decode_xref_trailer(struct pdf_doc *doc, uint64_t xref_offset)
printf("startxref and Prev value disagree\n");
}
- /* extract Size from trailer and create xref table large enough */
+ if (doc->trailer == NULL) {
+ /* extract Size from trailer and create xref table large enough */
+ struct cos_object *cobj_size;
+ int64_t size;
+
+ res = cos_dictionary_get_value(trailer, "Size", &cobj_size);
+ if (res != NSPDFERROR_OK) {
+ printf("trailer has no Size value\n");
+ return res;
+ }
+ res = cos_get_int(cobj_size, &size);
+ if (res != NSPDFERROR_OK) {
+ printf("trailer Size not int\n");
+ return res;
+ }
+ doc->xref_table = calloc(size, sizeof(struct cos_indirect_object));
+ if (doc->xref_table == NULL) {
+ return NSPDFERROR_NOMEM;
+ }
+ doc->xref_size = size;
+ doc->trailer = trailer;
+ }
/* check for prev ID key in trailer and recurse call if present */
+ res = cos_dictionary_get_value(trailer, "Prev", &cobj_prev);
+ if (res == NSPDFERROR_OK) {
+ res = cos_get_int(cobj_prev, &prev);
+ if (res != NSPDFERROR_OK) {
+ printf("trailer Prev not int\n");
+ return res;
+ }
- /*
+ res = decode_xref_trailer(doc, prev);
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+ }
+ offset = xref_offset;
- res = decode_xref(&doc, &startxref);
- if (res != 0) {
+ res = decode_xref(doc, &offset);
+ if (res != NSPDFERROR_OK) {
printf("failed to decode xref table\n");
return res;
}
- */
+ /** @todo free trailer? */
return res;
}
@@ -1457,24 +1518,41 @@ nspdferror decode_trailers(struct pdf_doc *doc)
}
+nspdferror new_pdf_doc(struct pdf_doc **doc_out)
+{
+ struct pdf_doc *doc;
+ doc = calloc(1, sizeof(struct pdf_doc));
+ if (doc == NULL) {
+ return NSPDFERROR_NOMEM;
+ }
+ *doc_out = doc;
+ return NSPDFERROR_OK;
+}
+
int main(int argc, char **argv)
{
- struct pdf_doc doc;
+ struct pdf_doc *doc;
int res;
- res = read_whole_pdf(&doc, argv[1]);
+ res = new_pdf_doc(&doc);
+ if (res != NSPDFERROR_OK) {
+ printf("failed to read file\n");
+ return res;
+ }
+
+ res = read_whole_pdf(doc, argv[1]);
if (res != 0) {
printf("failed to read file\n");
return res;
}
- res = check_header(&doc);
+ res = check_header(doc);
if (res != 0) {
printf("header check failed\n");
return res;
}
- res = decode_trailers(&doc);
+ res = decode_trailers(doc);
if (res != NSPDFERROR_OK) {
printf("failed to decode trailers (%d)\n", res);
return res;
commitdiff http://git.netsurf-browser.org/libnspdf.git/commit/?id=d4b12bd7bf85a3fa26...
commit d4b12bd7bf85a3fa26f5a65d4dc3a5aaf02cb572
Author: Vincent Sanders <vince(a)kyllikki.org>
Commit: Vincent Sanders <vince(a)kyllikki.org>
fix hex string decode and trailer parse
diff --git a/src/byte_class.c b/src/byte_class.c
index 59c0206..e881cf5 100644
--- a/src/byte_class.c
+++ b/src/byte_class.c
@@ -17,54 +17,54 @@
* end of line - characters that signify an end of line
*/
const uint8_t byte_classification[] = {
- BC_WSPC, /* 00 - NULL */
- BC_RGLR, /* 01 */
- BC_RGLR, /* 02 */
- BC_RGLR, /* 03 */
- BC_RGLR, /* 04 */
- BC_RGLR, /* 05 */
- BC_RGLR, /* 06 */
- BC_RGLR, /* 07 */
- BC_RGLR, /* 08 */
- BC_WSPC, /* 09 - HT */
- BC_WSPC | BC_EOLM, /* 0A - LF */
- BC_RGLR, /* 0B */
- BC_WSPC, /* 0C - FF */
- BC_WSPC | BC_EOLM, /* 0D - CR */
- BC_RGLR, /* 0E */
- BC_RGLR, /* 0F */
- BC_RGLR,
- BC_RGLR,
- BC_RGLR,
- BC_RGLR, /* 10 - 13 */
- BC_RGLR,
- BC_RGLR,
- BC_RGLR,
- BC_RGLR, /* 14 - 17 */
- BC_RGLR,
- BC_RGLR,
- BC_RGLR,
- BC_RGLR, /* 18 - 1B */
- BC_RGLR,
- BC_RGLR,
- BC_RGLR,
- BC_RGLR, /* 1C - 1F */
- BC_WSPC, /* 20 - SP */
- BC_RGLR,
- BC_RGLR,
- BC_RGLR, /* 20 - 23 */
- BC_RGLR, /* 24 - '$' */
- BC_DELM | BC_CMNT, /* 25 - '%' */
- BC_RGLR,
- BC_RGLR, /* 26 - 27 */
- BC_DELM,
- BC_DELM, /* '(' ')' */
- BC_RGLR,
- BC_RGLR, /* 2A - 2B */
- BC_RGLR,
- BC_RGLR, /* 2C - 2D */
- BC_RGLR,
- BC_DELM, /* '.' '/' */
+ BC_WSPC, /* 00 - NULL */
+ BC_RGLR, /* 01 */
+ BC_RGLR, /* 02 */
+ BC_RGLR, /* 03 */
+ BC_RGLR, /* 04 */
+ BC_RGLR, /* 05 */
+ BC_RGLR, /* 06 */
+ BC_RGLR, /* 07 */
+ BC_RGLR, /* 08 */
+ BC_WSPC, /* 09 - HT */
+ BC_WSPC | BC_EOLM, /* 0A - LF */
+ BC_RGLR, /* 0B */
+ BC_WSPC, /* 0C - FF */
+ BC_WSPC | BC_EOLM, /* 0D - CR */
+ BC_RGLR, /* 0E */
+ BC_RGLR, /* 0F */
+ BC_RGLR, /* 10 */
+ BC_RGLR, /* 11 */
+ BC_RGLR, /* 12 */
+ BC_RGLR, /* 13 */
+ BC_RGLR, /* 14 */
+ BC_RGLR, /* 15 */
+ BC_RGLR, /* 16 */
+ BC_RGLR, /* 17 */
+ BC_RGLR, /* 18 */
+ BC_RGLR, /* 19 */
+ BC_RGLR, /* 1A */
+ BC_RGLR, /* 1B */
+ BC_RGLR, /* 1C */
+ BC_RGLR, /* 1D */
+ BC_RGLR, /* 1E */
+ BC_RGLR, /* 1F */
+ BC_WSPC, /* 20 - SP */
+ BC_RGLR, /* 21 */
+ BC_RGLR, /* 22 */
+ BC_RGLR, /* 23 */
+ BC_RGLR, /* 24 - '$' */
+ BC_DELM | BC_CMNT, /* 25 - '%' */
+ BC_RGLR, /* 26 */
+ BC_RGLR, /* 27 */
+ BC_DELM, /* 28 - '(' */
+ BC_DELM, /* 29 - ')' */
+ BC_RGLR, /* 2A */
+ BC_RGLR, /* 2B */
+ BC_RGLR, /* 2C */
+ BC_RGLR, /* 2D */
+ BC_RGLR, /* 2E - '.' */
+ BC_DELM, /* 2F - '/' */
BC_OCTL | BC_DCML | BC_HEXL, /* 30 - '0' */
BC_OCTL | BC_DCML | BC_HEXL, /* 31 - '1' */
BC_OCTL | BC_DCML | BC_HEXL, /* 32 - '2' */
@@ -73,76 +73,76 @@ const uint8_t byte_classification[] = {
BC_OCTL | BC_DCML | BC_HEXL, /* 35 - '5' */
BC_OCTL | BC_DCML | BC_HEXL, /* 36 - '6' */
BC_OCTL | BC_DCML | BC_HEXL, /* 37 - '7' */
- BC_DCML | BC_HEXL,
- BC_DCML | BC_HEXL, /* '8' '9' */
- BC_RGLR,
- BC_RGLR, /* ':' ';' */
- BC_DELM,
- BC_RGLR, /* '<' '=' */
- BC_DELM,
- BC_RGLR, /* '>' '?' */
- BC_RGLR,
- BC_RGLR,
- BC_RGLR,
- BC_RGLR, /* 40 - 43 */
- BC_RGLR,
- BC_HEXL,
- BC_HEXL,
- BC_HEXL, /* 44 - 47 */
- BC_HEXL,
- BC_HEXL,
- BC_HEXL,
- BC_RGLR, /* 48 - 4B */
- BC_RGLR,
- BC_RGLR,
- BC_RGLR,
- BC_RGLR, /* 4C - 4F */
- BC_RGLR,
- BC_RGLR,
- BC_RGLR,
- BC_RGLR, /* 50 - 53 */
- BC_RGLR,
- BC_RGLR,
- BC_RGLR,
- BC_RGLR, /* 54 - 57 */
- BC_RGLR,
- BC_RGLR, /* 58 - 59 */
- BC_RGLR,
- BC_DELM, /* 'Z' '[' */
- BC_RGLR,
- BC_DELM, /* '\' ']' */
- BC_RGLR,
- BC_RGLR, /* 5E - 5F */
- BC_RGLR,
- BC_RGLR,
- BC_RGLR,
- BC_RGLR,
- BC_RGLR,
- BC_HEXL,
- BC_HEXL,
- BC_HEXL, /* 60 - 67 */
- BC_HEXL,
- BC_HEXL,
- BC_HEXL,
- BC_RGLR,
- BC_RGLR,
- BC_RGLR,
- BC_RGLR,
- BC_RGLR, /* 68 - 6F */
- BC_RGLR,
- BC_RGLR,
- BC_RGLR,
- BC_RGLR,
- BC_RGLR,
- BC_RGLR,
- BC_RGLR,
- BC_RGLR, /* 70 - 77 */
- BC_RGLR,
- BC_RGLR, /* 78 - 79 */
- BC_RGLR,
- BC_DELM, /* 'z' '{' */
- BC_RGLR,
- BC_DELM, /* '|' '}' */
+ BC_DCML | BC_HEXL, /* 38 - '8' */
+ BC_DCML | BC_HEXL, /* 39 - '9' */
+ BC_RGLR, /* 3A - ':' */
+ BC_RGLR, /* 3B - ';' */
+ BC_DELM, /* 3C - '<' */
+ BC_RGLR, /* 3D - '=' */
+ BC_DELM, /* 3E - '>' */
+ BC_RGLR, /* 3F - '?' */
+ BC_RGLR, /* 40 - */
+ BC_HEXL, /* 41 - A */
+ BC_HEXL, /* 42 - B */
+ BC_HEXL, /* 43 - C */
+ BC_HEXL, /* 44 - D */
+ BC_HEXL, /* 45 - E */
+ BC_HEXL, /* 46 - F */
+ BC_RGLR, /* 47 - G */
+ BC_RGLR, /* 48 - H */
+ BC_RGLR, /* 49 - I */
+ BC_RGLR, /* 4A - J */
+ BC_RGLR, /* 4B - K */
+ BC_RGLR, /* 4C - L */
+ BC_RGLR, /* 4D - M */
+ BC_RGLR, /* 4E - N */
+ BC_RGLR, /* 4F - O */
+ BC_RGLR, /* 50 - P */
+ BC_RGLR, /* 51 - Q */
+ BC_RGLR, /* 52 - R */
+ BC_RGLR, /* 53 - S */
+ BC_RGLR, /* 54 - T */
+ BC_RGLR, /* 55 - U */
+ BC_RGLR, /* 56 - V */
+ BC_RGLR, /* 57 - W */
+ BC_RGLR, /* 58 - X */
+ BC_RGLR, /* 59 - Y */
+ BC_RGLR, /* 5A - 'Z' */
+ BC_DELM, /* 5B - '[' */
+ BC_RGLR, /* 5C - '\' */
+ BC_DELM, /* 5D - ']' */
+ BC_RGLR, /* 5E */
+ BC_RGLR, /* 5F */
+ BC_RGLR, /* 60 */
+ BC_HEXL, /* 61 - a */
+ BC_HEXL, /* 62 - b */
+ BC_HEXL, /* 63 - c */
+ BC_HEXL, /* 64 - d */
+ BC_HEXL, /* 65 - e */
+ BC_HEXL, /* 66 - f */
+ BC_RGLR, /* 67 - g */
+ BC_RGLR, /* 68 - h */
+ BC_RGLR, /* 69 - i */
+ BC_RGLR, /* 6A - j */
+ BC_RGLR, /* 6B - k */
+ BC_RGLR, /* 6C - l */
+ BC_RGLR, /* 6D - m */
+ BC_RGLR, /* 6E - n */
+ BC_RGLR, /* 6F - o */
+ BC_RGLR, /* 70 - p */
+ BC_RGLR, /* 71 - q */
+ BC_RGLR, /* 72 - r */
+ BC_RGLR, /* 73 - s */
+ BC_RGLR, /* 74 - t */
+ BC_RGLR, /* 75 - u */
+ BC_RGLR, /* 76 - v */
+ BC_RGLR, /* 77 - w */
+ BC_RGLR, /* 78 - x */
+ BC_RGLR, /* 79 - y */
+ BC_RGLR, /* 7A - 'z' */
+ BC_DELM, /* 7B - '{' */
+ BC_RGLR, /* 7C - '|' */
+ BC_DELM, /* 7D - '}' */
BC_RGLR,
BC_RGLR, /* 7E - 7F */
BC_RGLR,
diff --git a/src/xref.c b/src/xref.c
index 173eb9e..94b0ee0 100644
--- a/src/xref.c
+++ b/src/xref.c
@@ -206,7 +206,23 @@ static int doc_skip_ws(struct pdf_doc *doc, uint64_t *offset)
return 0;
}
-static int doc_read_uint(struct pdf_doc *doc, uint64_t *offset_out, uint64_t *result_out)
+/**
+ * move offset to next non eol byte
+ */
+static int doc_skip_eol(struct pdf_doc *doc, uint64_t *offset)
+{
+ uint8_t c;
+ /* TODO sort out keeping offset in range */
+ c = DOC_BYTE(doc, *offset);
+ while ((bclass[c] & BC_EOLM) != 0) {
+ (*offset)++;
+ c = DOC_BYTE(doc, *offset);
+ }
+ return 0;
+}
+
+static nspdferror
+doc_read_uint(struct pdf_doc *doc, uint64_t *offset_out, uint64_t *result_out)
{
uint8_t c; /* current byte from source data */
unsigned int len; /* number of decimal places in number */
@@ -231,7 +247,7 @@ static int doc_read_uint(struct pdf_doc *doc, uint64_t *offset_out, uint64_t *re
*offset_out = offset;
*result_out = result;
- return 0;
+ return NSPDFERROR_OK;
}
num[len] = c - '0';
offset++;
@@ -242,7 +258,7 @@ static int doc_read_uint(struct pdf_doc *doc, uint64_t *offset_out, uint64_t *re
/**
* finds the startxref marker at the end of input
*/
-int find_startxref(struct pdf_doc *doc, uint64_t *start_xref_out)
+nspdferror find_startxref(struct pdf_doc *doc, uint64_t *offset_out)
{
uint64_t offset; /* offset of characters being considered for startxref */
uint64_t earliest; /* earliest offset to serch for startxref */
@@ -265,14 +281,89 @@ int find_startxref(struct pdf_doc *doc, uint64_t *start_xref_out)
(DOC_BYTE(doc, offset + 6) == 'r') &&
(DOC_BYTE(doc, offset + 7) == 'e') &&
(DOC_BYTE(doc, offset + 8) == 'f')) {
- offset += 9;
- doc_skip_ws(doc, &offset);
- return doc_read_uint(doc, &offset, start_xref_out);
+ *offset_out = offset;
+ return NSPDFERROR_OK;
}
}
- return -1;
+ return NSPDFERROR_SYNTAX;
}
+/**
+ * decodes a startxref field
+ */
+nspdferror decode_startxref(struct pdf_doc *doc, uint64_t *offset_out, uint64_t *start_xref_out)
+{
+ uint64_t offset; /* offset of characters being considered for startxref */
+ uint64_t start_xref;
+ nspdferror res;
+
+ offset = *offset_out;
+
+ if ((DOC_BYTE(doc, offset ) != 's') ||
+ (DOC_BYTE(doc, offset + 1) != 't') ||
+ (DOC_BYTE(doc, offset + 2) != 'a') ||
+ (DOC_BYTE(doc, offset + 3) != 'r') ||
+ (DOC_BYTE(doc, offset + 4) != 't') ||
+ (DOC_BYTE(doc, offset + 5) != 'x') ||
+ (DOC_BYTE(doc, offset + 6) != 'r') ||
+ (DOC_BYTE(doc, offset + 7) != 'e') ||
+ (DOC_BYTE(doc, offset + 8) != 'f')) {
+ return NSPDFERROR_SYNTAX;
+ }
+ offset += 9;
+
+ res = doc_skip_ws(doc, &offset);
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+
+ res = doc_read_uint(doc, &offset, &start_xref);
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+
+ res = doc_skip_eol(doc, &offset);
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+
+ if ((DOC_BYTE(doc, offset ) != '%') ||
+ (DOC_BYTE(doc, offset + 1) != '%') ||
+ (DOC_BYTE(doc, offset + 2) != 'E') ||
+ (DOC_BYTE(doc, offset + 3) != 'O') ||
+ (DOC_BYTE(doc, offset + 4) != 'F')) {
+ printf("missing EOF marker\n");
+ return NSPDFERROR_SYNTAX;
+ }
+
+ *offset_out = offset;
+ *start_xref_out = start_xref;
+
+ return NSPDFERROR_OK;
+}
+
+
+/**
+ * finds the next trailer
+ */
+nspdferror find_trailer(struct pdf_doc *doc, uint64_t *offset_out)
+{
+ uint64_t offset; /* offset of characters being considered for trailer */
+
+ for (offset = *offset_out;offset < doc->length; offset++) {
+ if ((DOC_BYTE(doc, offset ) == 't') &&
+ (DOC_BYTE(doc, offset + 1) == 'r') &&
+ (DOC_BYTE(doc, offset + 2) == 'a') &&
+ (DOC_BYTE(doc, offset + 3) == 'i') &&
+ (DOC_BYTE(doc, offset + 4) == 'l') &&
+ (DOC_BYTE(doc, offset + 5) == 'e') &&
+ (DOC_BYTE(doc, offset + 6) == 'r')) {
+ *offset_out = offset;
+ return NSPDFERROR_OK;
+ }
+ }
+ return NSPDFERROR_SYNTAX;
+}
/**
* find the PDF comment marker to identify the start of the document
@@ -333,6 +424,7 @@ nspdferror cos_free_object(struct cos_object *cos_obj)
break;
case COS_TYPE_STRING:
+ free(cos_obj->u.s->data);
free(cos_obj->u.s);
break;
@@ -569,15 +661,29 @@ cos_decode_string(struct pdf_doc *doc,
return NSPDFERROR_OK;
}
+uint8_t xtoi(uint8_t x)
+{
+ if (x >= '0' && x <= '9') {
+ x = x - '0';
+ } else if (x >= 'a' && x <='f') {
+ x = x - 'a' + 10;
+ } else if (x >= 'A' && x <='F') {
+ x = x - 'A' + 10;
+ }
+ return x;
+}
+
nspdferror
cos_decode_hex_string(struct pdf_doc *doc,
uint64_t *offset_out,
struct cos_object **cosobj_out)
{
uint64_t offset;
- // struct cos_object *cosobj;
+ struct cos_object *cosobj;
uint8_t c;
- // uint8_t byte;
+ uint8_t value = 0;
+ struct cos_string *cstring;
+ bool first = true;
offset = *offset_out;
@@ -585,14 +691,46 @@ cos_decode_hex_string(struct pdf_doc *doc,
if (c != '<') {
return NSPDFERROR_SYNTAX;
}
- doc_skip_ws(doc, &offset);
- while (c != '>') {
- c = DOC_BYTE(doc, offset++);
- doc_skip_ws(doc, &offset);
+ cstring = calloc(1, sizeof(*cstring));
+ if (cstring == NULL) {
+ return NSPDFERROR_NOMEM;
}
- return -1;
+ cosobj = calloc(1, sizeof(*cosobj));
+ if (cosobj == NULL) {
+ return NSPDFERROR_NOMEM;
+ }
+ cosobj->type = COS_TYPE_STRING;
+ cosobj->u.s = cstring;
+
+ for (; offset < doc->length; offset++) {
+ c = DOC_BYTE(doc, offset);
+ if (c == '>') {
+ if (first == false) {
+ cos_string_append(cstring, value);
+ }
+ offset++;
+ doc_skip_ws(doc, &offset);
+
+ *cosobj_out = cosobj;
+ *offset_out = offset;
+
+ return NSPDFERROR_OK;
+ } else if ((bclass[c] & BC_HEXL) != 0) {
+ if (first) {
+ value = xtoi(c) << 4;
+ first = false;
+ } else {
+ value |= xtoi(c);
+ first = true;
+ cos_string_append(cstring, value);
+ }
+ } else if ((bclass[c] & BC_WSPC) == 0) {
+ break; /* unknown byte value in string */
+ }
+ }
+ return NSPDFERROR_SYNTAX;
}
@@ -640,8 +778,6 @@ int cos_decode_dictionary(struct pdf_doc *doc,
}
printf("key: %s\n", key->u.n);
- printf("%c\n", DOC_BYTE(doc, offset));
-
res = cos_decode_object(doc, &offset, &value);
if (res != 0) {
printf("Unable to decode value object in dictionary\n");
@@ -729,6 +865,7 @@ cos_decode_list(struct pdf_doc *doc,
cosobj->u.array = entry;
}
offset++; /* skip closing ] */
+
doc_skip_ws(doc, &offset);
*cosobj_out = cosobj;
@@ -762,12 +899,13 @@ int cos_decode_name(struct pdf_doc *doc,
}
printf("found a name\n");
- c = DOC_BYTE(doc, offset++);
+ c = DOC_BYTE(doc, offset);
while ((idx <= NAME_MAX_LENGTH) &&
((bclass[c] & (BC_WSPC | BC_DELM)) == 0)) {
+ offset++;
//printf("%c", c);
name[idx++] = c;
- c = DOC_BYTE(doc, offset++);
+ c = DOC_BYTE(doc, offset);
}
//printf("\nidx: %d\n", idx);
if (idx > NAME_MAX_LENGTH) {
@@ -1114,10 +1252,16 @@ int cos_decode_object(struct pdf_doc *doc,
-int decode_trailer(struct pdf_doc *doc, uint64_t offset)
+nspdferror
+decode_trailer(struct pdf_doc *doc,
+ uint64_t *offset_out,
+ struct cos_object **trailer_out)
{
struct cos_object *trailer;
int res;
+ uint64_t offset;
+
+ offset = *offset_out;
/* trailer object header */
if ((DOC_BYTE(doc, offset ) != 't') &&
@@ -1142,7 +1286,10 @@ int decode_trailer(struct pdf_doc *doc, uint64_t offset)
return -1;
}
- return 0;
+ *trailer_out = trailer;
+ *offset_out = offset;
+
+ return NSPDFERROR_OK;
}
int decode_xref(struct pdf_doc *doc, uint64_t *offset_out)
@@ -1213,39 +1360,123 @@ int decode_xref(struct pdf_doc *doc, uint64_t *offset_out)
return 0;
}
-int main(int argc, char **argv)
+/**
+ * recursively parse trailers and xref tables
+ */
+nspdferror decode_xref_trailer(struct pdf_doc *doc, uint64_t xref_offset)
{
- struct pdf_doc doc;
- int res;
- uint64_t startxref;
+ nspdferror res;
+ uint64_t offset; /* the current data offset */
+ uint64_t startxref; /* the value of the startxref field */
+ struct cos_object *trailer; /* the current trailer */
- res = read_whole_pdf(&doc, argv[1]);
- if (res != 0) {
- printf("failed to read file\n");
+ offset = xref_offset;
+
+ res = find_trailer(doc, &offset);
+ if (res != NSPDFERROR_OK) {
+ printf("failed to decode startxref\n");
return res;
}
- res = check_header(&doc);
- if (res != 0) {
- printf("header check failed\n");
+ res = decode_trailer(doc, &offset, &trailer);
+ if (res != NSPDFERROR_OK) {
+ printf("failed to decode trailer\n");
return res;
}
- res = find_startxref(&doc, &startxref);
- if (res != 0) {
- printf("failed to find startxref\n");
+ res = decode_startxref(doc, &offset, &startxref);
+ if (res != NSPDFERROR_OK) {
+ printf("failed to decode startxref\n");
return res;
}
+ if (startxref != xref_offset) {
+ printf("startxref and Prev value disagree\n");
+ }
+
+ /* extract Size from trailer and create xref table large enough */
+
+ /* check for prev ID key in trailer and recurse call if present */
+
+ /*
+
+
res = decode_xref(&doc, &startxref);
if (res != 0) {
printf("failed to decode xref table\n");
return res;
}
- res = decode_trailer(&doc, startxref);
+ */
+
+ return res;
+}
+
+/**
+ * decode non-linear pdf trailer data
+ *
+ * PDF have a structure nominally defined as header, body, cross reference table
+ * and trailer. The body, cross reference table and trailer sections may be
+ * repeated in a scheme known as "incremental updates"
+ *
+ * The strategy used here is to locate the end of the last trailer block which
+ * contains a startxref token followed by a byte offset into the file of the
+ * beginning of the cross reference table followed by a literal '%%EOF'
+ *
+ * the initial offset is used to walk back down a chain of xref/trailers until
+ * the trailer does not contain a Prev entry and decode xref tables forwards to
+ * overwrite earlier object entries with later ones.
+ *
+ * It is necessary to search forwards from the xref table to find the trailer
+ * block because instead of the Prev entry pointing to the previous trailer
+ * (from which we could have extracted the startxref to find the associated
+ * xref table) it points to the previous xref block which we have to skip to
+ * find the subsequent trailer.
+ *
+ */
+nspdferror decode_trailers(struct pdf_doc *doc)
+{
+ nspdferror res;
+ uint64_t offset; /* the current data offset */
+ uint64_t startxref; /* the value of the first startxref field */
+
+ res = find_startxref(doc, &offset);
+ if (res != NSPDFERROR_OK) {
+ printf("failed to find startxref\n");
+ return res;
+ }
+
+ res = decode_startxref(doc, &offset, &startxref);
+ if (res != NSPDFERROR_OK) {
+ printf("failed to decode startxref\n");
+ return res;
+ }
+
+ /* recurse down the xref and trailers */
+ return decode_xref_trailer(doc, startxref);
+}
+
+
+int main(int argc, char **argv)
+{
+ struct pdf_doc doc;
+ int res;
+
+ res = read_whole_pdf(&doc, argv[1]);
if (res != 0) {
- printf("failed to decode trailer\n");
+ printf("failed to read file\n");
+ return res;
+ }
+
+ res = check_header(&doc);
+ if (res != 0) {
+ printf("header check failed\n");
+ return res;
+ }
+
+ res = decode_trailers(&doc);
+ if (res != NSPDFERROR_OK) {
+ printf("failed to decode trailers (%d)\n", res);
return res;
}
commitdiff http://git.netsurf-browser.org/libnspdf.git/commit/?id=d26bc9f19191e5dd7d...
commit d26bc9f19191e5dd7d233302f73f226c89cb797f
Author: Vincent Sanders <vince(a)kyllikki.org>
Commit: Vincent Sanders <vince(a)kyllikki.org>
write parse of strings
diff --git a/src/Makefile b/src/Makefile
index ac8c347..f9ca22c 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -1,6 +1,6 @@
#
-CFLAGS+=-g -Wall
+CFLAGS+=-g -Wall -Wextra
.PHONY:all clean
diff --git a/src/byte_class.c b/src/byte_class.c
index a8ab735..59c0206 100644
--- a/src/byte_class.c
+++ b/src/byte_class.c
@@ -11,6 +11,7 @@
* decimal - characters that appear in decimal values 0123456789
* hexidecimal - characters that appear in hex values 0123456789ABCDEF
* delimiter - The characters used to separate tokens ()[]{}<>/%
+ * comment - the % character used to introduce a comment
* whitespace - separate syntactic constructs like names and numbers treated
* as a single character except in comments, strings and streams
* end of line - characters that signify an end of line
@@ -52,8 +53,8 @@ const uint8_t byte_classification[] = {
BC_RGLR,
BC_RGLR,
BC_RGLR, /* 20 - 23 */
- BC_RGLR,
- BC_DELM, /* '$' '%' */
+ BC_RGLR, /* 24 - '$' */
+ BC_DELM | BC_CMNT, /* 25 - '%' */
BC_RGLR,
BC_RGLR, /* 26 - 27 */
BC_DELM,
@@ -64,14 +65,14 @@ const uint8_t byte_classification[] = {
BC_RGLR, /* 2C - 2D */
BC_RGLR,
BC_DELM, /* '.' '/' */
- BC_DCML | BC_HEXL,
- BC_DCML | BC_HEXL, /* '0' '1' */
- BC_DCML | BC_HEXL,
- BC_DCML | BC_HEXL, /* '2' '3' */
- BC_DCML | BC_HEXL,
- BC_DCML | BC_HEXL, /* '4' '5' */
- BC_DCML | BC_HEXL,
- BC_DCML | BC_HEXL, /* '6' '7' */
+ BC_OCTL | BC_DCML | BC_HEXL, /* 30 - '0' */
+ BC_OCTL | BC_DCML | BC_HEXL, /* 31 - '1' */
+ BC_OCTL | BC_DCML | BC_HEXL, /* 32 - '2' */
+ BC_OCTL | BC_DCML | BC_HEXL, /* 33 - '3' */
+ BC_OCTL | BC_DCML | BC_HEXL, /* 34 - '4' */
+ BC_OCTL | BC_DCML | BC_HEXL, /* 35 - '5' */
+ BC_OCTL | BC_DCML | BC_HEXL, /* 36 - '6' */
+ BC_OCTL | BC_DCML | BC_HEXL, /* 37 - '7' */
BC_DCML | BC_HEXL,
BC_DCML | BC_HEXL, /* '8' '9' */
BC_RGLR,
@@ -181,4 +182,4 @@ const uint8_t byte_classification[] = {
BC_RGLR, BC_RGLR, BC_RGLR, BC_RGLR, /* F8 - FF */
};
-const uint8_t *blcass = &byte_classification[0];
+const uint8_t *bclass = &byte_classification[0];
diff --git a/src/byte_class.h b/src/byte_class.h
index 011acda..0ccfbdf 100644
--- a/src/byte_class.h
+++ b/src/byte_class.h
@@ -1,8 +1,10 @@
#define BC_RGLR 0 /* regular character */
#define BC_WSPC 1 /* character is whitespace */
#define BC_EOLM (1<<1) /* character signifies end of line */
-#define BC_DCML (1<<2) /* character is a decimal */
-#define BC_HEXL (1<<3) /* character is a hexadecimal */
-#define BC_DELM (1<<4) /* character is a delimiter */
+#define BC_OCTL (1<<2) /* character is octal */
+#define BC_DCML (1<<3) /* character is decimal */
+#define BC_HEXL (1<<4) /* character is hexadecimal */
+#define BC_DELM (1<<5) /* character is a delimiter */
+#define BC_CMNT (1<<6) /* character is a comment */
const uint8_t *bclass;
diff --git a/src/xref.c b/src/xref.c
index afb223f..173eb9e 100644
--- a/src/xref.c
+++ b/src/xref.c
@@ -9,6 +9,14 @@
#define SLEN(x) (sizeof((x)) - 1)
+typedef enum {
+ NSPDFERROR_OK,
+ NSPDFERROR_NOMEM,
+ NSPDFERROR_SYNTAX, /* syntax error in parse */
+ NSPDFERROR_SIZE, /* not enough input data */
+ NSPDFERROR_RANGE, /* value outside type range */
+} nspdferror;
+
enum cos_type {
COS_TYPE_NULL,
COS_TYPE_BOOL,
@@ -45,6 +53,11 @@ struct cos_array_entry {
struct cos_object *value;
};
+struct cos_string {
+ uint8_t *data;
+ size_t length;
+ size_t alloc;
+};
struct cos_reference {
/** id of indirect object */
@@ -70,7 +83,7 @@ struct cos_object {
char *n;
/** string */
- char *s;
+ struct cos_string *s;
/** stream data */
uint8_t *stream;
@@ -170,14 +183,24 @@ read_whole_pdf(struct pdf_doc *doc, const char *fname)
/* byte data acessory, allows for more complex buffer handling in future */
#define DOC_BYTE(doc, offset) (doc->start[(offset)])
-/* find next non whitespace byte */
+/**
+ * move offset to next non whitespace byte
+ */
static int doc_skip_ws(struct pdf_doc *doc, uint64_t *offset)
{
uint8_t c;
-
+ /* TODO sort out keeping offset in range */
c = DOC_BYTE(doc, *offset);
- while ((bclass[c] & BC_WSPC) != 0) {
+ while ((bclass[c] & (BC_WSPC | BC_CMNT) ) != 0) {
(*offset)++;
+ /* skip comments */
+ if ((bclass[c] & BC_CMNT) != 0) {
+ c = DOC_BYTE(doc, *offset);
+ while ((bclass[c] & BC_EOLM ) == 0) {
+ (*offset)++;
+ c = DOC_BYTE(doc, *offset);
+ }
+ }
c = DOC_BYTE(doc, *offset);
}
return 0;
@@ -186,7 +209,7 @@ static int doc_skip_ws(struct pdf_doc *doc, uint64_t *offset)
static int doc_read_uint(struct pdf_doc *doc, uint64_t *offset_out, uint64_t *result_out)
{
uint8_t c; /* current byte from source data */
- int len; /* number of decimal places in number */
+ unsigned int len; /* number of decimal places in number */
uint8_t num[21]; /* temporary buffer for decimal values */
uint64_t offset; /* current offset of source data */
uint64_t result=0; /* parsed result */
@@ -299,9 +322,10 @@ int cos_indirect_object_add(struct pdf_doc *doc,
return 0;
}
-int cos_free_object(struct cos_object *cos_obj)
+nspdferror cos_free_object(struct cos_object *cos_obj)
{
struct cos_dictionary_entry *dentry;
+ struct cos_array_entry *aentry;
switch (cos_obj->type) {
case COS_TYPE_NAME:
@@ -326,6 +350,18 @@ int cos_free_object(struct cos_object *cos_obj)
}
break;
+ case COS_TYPE_ARRAY:
+ aentry = cos_obj->u.array;
+ while (aentry != NULL) {
+ struct cos_array_entry *oaentry;
+
+ cos_free_object(aentry->value);
+
+ oaentry = aentry;
+ aentry = aentry->next;
+ free(oaentry);
+ }
+
case COS_TYPE_STREAM:
free(cos_obj->u.stream);
break;
@@ -333,7 +369,7 @@ int cos_free_object(struct cos_object *cos_obj)
}
free(cos_obj);
- return 0;
+ return NSPDFERROR_OK;
}
int cos_decode_number(struct pdf_doc *doc,
@@ -342,7 +378,7 @@ int cos_decode_number(struct pdf_doc *doc,
{
struct cos_object *cosobj;
uint8_t c; /* current byte from source data */
- int len; /* number of decimal places in number */
+ unsigned int len; /* number of decimal places in number */
uint8_t num[21]; /* temporary buffer for decimal values */
uint64_t offset; /* current offset of source data */
@@ -384,27 +420,170 @@ int cos_decode_number(struct pdf_doc *doc,
return -1; /* number too long */
}
-int cos_decode_string(struct pdf_doc *doc,
- uint64_t *offset_out,
- struct cos_object **cosobj_out)
+#define COS_STRING_ALLOC 32
+
+nspdferror
+cos_string_append(struct cos_string *s, uint8_t c)
{
- return -1;
+ //printf("appending 0x%x to %p len %d alloc %d\n", c, s->data, s->length, s->alloc);
+ if (s->length == s->alloc) {
+ uint8_t *ns;
+ ns = realloc(s->data, s->alloc + COS_STRING_ALLOC);
+ if (ns == NULL) {
+ return NSPDFERROR_NOMEM;
+ }
+ s->data = ns;
+ s->alloc += COS_STRING_ALLOC;
+ }
+ s->data[s->length++] = c;
+ return NSPDFERROR_OK;
+}
+
+/**
+ * literal string processing
+ *
+ */
+nspdferror
+cos_decode_string(struct pdf_doc *doc,
+ uint64_t *offset_out,
+ struct cos_object **cosobj_out)
+{
+ uint64_t offset;
+ struct cos_object *cosobj;
+ uint8_t c;
+ unsigned int pdepth = 1; /* depth of open parens */
+ struct cos_string *cstring;
+
+ offset = *offset_out;
+
+ c = DOC_BYTE(doc, offset++);
+ if (c != '(') {
+ return NSPDFERROR_SYNTAX;
+ }
+
+ cstring = calloc(1, sizeof(*cstring));
+ if (cstring == NULL) {
+ return NSPDFERROR_NOMEM;
+ }
+
+ cosobj = calloc(1, sizeof(*cosobj));
+ if (cosobj == NULL) {
+ return NSPDFERROR_NOMEM;
+ }
+ cosobj->type = COS_TYPE_STRING;
+ cosobj->u.s = cstring;
+
+ while (pdepth > 0) {
+ c = DOC_BYTE(doc, offset++);
+
+ if (c == ')') {
+ pdepth--;
+ if (pdepth == 0) {
+ break;
+ }
+ } else if (c == '(') {
+ pdepth++;
+ } else if ((bclass[c] & BC_EOLM ) != 0) {
+ /* unescaped end of line characters are translated to a single
+ * newline
+ */
+ c = DOC_BYTE(doc, offset);
+ while ((bclass[c] & BC_EOLM) != 0) {
+ offset++;
+ c = DOC_BYTE(doc, offset);
+ }
+ c = '\n';
+ } else if (c == '\\') {
+ /* escaped chars */
+ c = DOC_BYTE(doc, offset++);
+ switch (c) {
+ case 'n':
+ c = '\n';
+ break;
+
+ case 'r':
+ c = '\r';
+ break;
+
+ case 't':
+ c = '\t';
+ break;
+
+ case 'b':
+ c = '\b';
+ break;
+
+ case 'f':
+ c = '\f';
+ break;
+
+ case '(':
+ c = '(';
+ break;
+
+ case ')':
+ c = ')';
+ break;
+
+ case '\\':
+ c = '\\';
+ break;
+
+ default:
+
+ if ((bclass[c] & BC_EOLM) != 0) {
+ /* escaped end of line, swallow it */
+ c = DOC_BYTE(doc, offset++);
+ while ((bclass[c] & BC_EOLM) != 0) {
+ c = DOC_BYTE(doc, offset++);
+ }
+ } else if ((bclass[c] & BC_OCTL) != 0) {
+ /* octal value */
+ uint8_t val;
+ val = (c - '0');
+ c = DOC_BYTE(doc, offset);
+ if ((bclass[c] & BC_OCTL) != 0) {
+ offset++;
+ val = (val << 3) | (c - '0');
+ c = DOC_BYTE(doc, offset);
+ if ((bclass[c] & BC_OCTL) != 0) {
+ offset++;
+ val = (val << 3) | (c - '0');
+ c = val;
+ }
+ }
+ } /* else invalid (skip backslash) */
+ break;
+ }
+ }
+
+ /* c contains the character to add to the string */
+ cos_string_append(cstring, c);
+ }
+
+ doc_skip_ws(doc, &offset);
+
+ *cosobj_out = cosobj;
+ *offset_out = offset;
+
+ return NSPDFERROR_OK;
}
-int cos_decode_hex_string(struct pdf_doc *doc,
+nspdferror
+cos_decode_hex_string(struct pdf_doc *doc,
uint64_t *offset_out,
struct cos_object **cosobj_out)
{
uint64_t offset;
- struct cos_object *cosobj;
+ // struct cos_object *cosobj;
uint8_t c;
- uint8_t byte;
+ // uint8_t byte;
offset = *offset_out;
c = DOC_BYTE(doc, offset++);
if (c != '<') {
- return -1; /* syntax error */
+ return NSPDFERROR_SYNTAX;
}
doc_skip_ws(doc, &offset);
@@ -461,8 +640,11 @@ int cos_decode_dictionary(struct pdf_doc *doc,
}
printf("key: %s\n", key->u.n);
+ printf("%c\n", DOC_BYTE(doc, offset));
+
res = cos_decode_object(doc, &offset, &value);
if (res != 0) {
+ printf("Unable to decode value object in dictionary\n");
/* todo free up any dictionary entries already created */
return res;
}
@@ -490,52 +672,61 @@ int cos_decode_dictionary(struct pdf_doc *doc,
return 0;
}
-int cos_decode_list(struct pdf_doc *doc,
- uint64_t *offset_out,
- struct cos_object **cosobj_out)
+
+nspdferror
+cos_decode_list(struct pdf_doc *doc,
+ uint64_t *offset_out,
+ struct cos_object **cosobj_out)
{
uint64_t offset;
struct cos_object *cosobj;
struct cos_array_entry *entry;
struct cos_object *value;
- int res;
+ nspdferror res;
offset = *offset_out;
+ /* sanity check first token is list open */
if (DOC_BYTE(doc, offset) != '[') {
- return -1; /* syntax error */
+ printf("not a [\n");
+ return NSPDFERROR_SYNTAX; /* syntax error */
}
offset++;
- doc_skip_ws(doc, &offset);
+
+ /* advance offset to next token */
+ res = doc_skip_ws(doc, &offset);
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
printf("found a list\n");
cosobj = calloc(1, sizeof(struct cos_object));
if (cosobj == NULL) {
- return -1; /* memory error */
+ return NSPDFERROR_NOMEM;
}
cosobj->type = COS_TYPE_ARRAY;
while (DOC_BYTE(doc, offset) != ']') {
res = cos_decode_object(doc, &offset, &value);
- if (res != 0) {
- /* todo free up any array entries already created */
+ if (res != NSPDFERROR_OK) {
+ cos_free_object(cosobj);
+ printf("Unable to decode value object in list\n");
return res;
}
- /* add array entry */
+ /* add entry to array */
entry = calloc(1, sizeof(struct cos_array_entry));
if (entry == NULL) {
- /* todo free up any array entries already created */
- return -1; /* memory error */
+ cos_free_object(cosobj);
+ return NSPDFERROR_NOMEM;
}
entry->value = value;
entry->next = cosobj->u.array;
cosobj->u.array = entry;
-
}
offset++; /* skip closing ] */
doc_skip_ws(doc, &offset);
commitdiff http://git.netsurf-browser.org/libnspdf.git/commit/?id=5de69a618c7858f997...
commit 5de69a618c7858f997e9944c06837d951fc129aa
Author: Vincent Sanders <vince(a)kyllikki.org>
Commit: Vincent Sanders <vince(a)kyllikki.org>
split out byte class
diff --git a/src/Makefile b/src/Makefile
index f137a81..ac8c347 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -6,7 +6,7 @@ CFLAGS+=-g -Wall
all:xref
-xref:xref.o
+xref:xref.o byte_class.o
clean:
${RM} xref xref.o
diff --git a/src/byte_class.c b/src/byte_class.c
new file mode 100644
index 0000000..a8ab735
--- /dev/null
+++ b/src/byte_class.c
@@ -0,0 +1,184 @@
+#include <stdint.h>
+
+#include "byte_class.h"
+
+/**
+ * pdf byte classification
+ *
+ * spec defines three classes which this implementation futher subdivides for
+ * comments, strings and streams:
+ * regular - the default class
+ * decimal - characters that appear in decimal values 0123456789
+ * hexidecimal - characters that appear in hex values 0123456789ABCDEF
+ * delimiter - The characters used to separate tokens ()[]{}<>/%
+ * whitespace - separate syntactic constructs like names and numbers treated
+ * as a single character except in comments, strings and streams
+ * end of line - characters that signify an end of line
+ */
+const uint8_t byte_classification[] = {
+ BC_WSPC, /* 00 - NULL */
+ BC_RGLR, /* 01 */
+ BC_RGLR, /* 02 */
+ BC_RGLR, /* 03 */
+ BC_RGLR, /* 04 */
+ BC_RGLR, /* 05 */
+ BC_RGLR, /* 06 */
+ BC_RGLR, /* 07 */
+ BC_RGLR, /* 08 */
+ BC_WSPC, /* 09 - HT */
+ BC_WSPC | BC_EOLM, /* 0A - LF */
+ BC_RGLR, /* 0B */
+ BC_WSPC, /* 0C - FF */
+ BC_WSPC | BC_EOLM, /* 0D - CR */
+ BC_RGLR, /* 0E */
+ BC_RGLR, /* 0F */
+ BC_RGLR,
+ BC_RGLR,
+ BC_RGLR,
+ BC_RGLR, /* 10 - 13 */
+ BC_RGLR,
+ BC_RGLR,
+ BC_RGLR,
+ BC_RGLR, /* 14 - 17 */
+ BC_RGLR,
+ BC_RGLR,
+ BC_RGLR,
+ BC_RGLR, /* 18 - 1B */
+ BC_RGLR,
+ BC_RGLR,
+ BC_RGLR,
+ BC_RGLR, /* 1C - 1F */
+ BC_WSPC, /* 20 - SP */
+ BC_RGLR,
+ BC_RGLR,
+ BC_RGLR, /* 20 - 23 */
+ BC_RGLR,
+ BC_DELM, /* '$' '%' */
+ BC_RGLR,
+ BC_RGLR, /* 26 - 27 */
+ BC_DELM,
+ BC_DELM, /* '(' ')' */
+ BC_RGLR,
+ BC_RGLR, /* 2A - 2B */
+ BC_RGLR,
+ BC_RGLR, /* 2C - 2D */
+ BC_RGLR,
+ BC_DELM, /* '.' '/' */
+ BC_DCML | BC_HEXL,
+ BC_DCML | BC_HEXL, /* '0' '1' */
+ BC_DCML | BC_HEXL,
+ BC_DCML | BC_HEXL, /* '2' '3' */
+ BC_DCML | BC_HEXL,
+ BC_DCML | BC_HEXL, /* '4' '5' */
+ BC_DCML | BC_HEXL,
+ BC_DCML | BC_HEXL, /* '6' '7' */
+ BC_DCML | BC_HEXL,
+ BC_DCML | BC_HEXL, /* '8' '9' */
+ BC_RGLR,
+ BC_RGLR, /* ':' ';' */
+ BC_DELM,
+ BC_RGLR, /* '<' '=' */
+ BC_DELM,
+ BC_RGLR, /* '>' '?' */
+ BC_RGLR,
+ BC_RGLR,
+ BC_RGLR,
+ BC_RGLR, /* 40 - 43 */
+ BC_RGLR,
+ BC_HEXL,
+ BC_HEXL,
+ BC_HEXL, /* 44 - 47 */
+ BC_HEXL,
+ BC_HEXL,
+ BC_HEXL,
+ BC_RGLR, /* 48 - 4B */
+ BC_RGLR,
+ BC_RGLR,
+ BC_RGLR,
+ BC_RGLR, /* 4C - 4F */
+ BC_RGLR,
+ BC_RGLR,
+ BC_RGLR,
+ BC_RGLR, /* 50 - 53 */
+ BC_RGLR,
+ BC_RGLR,
+ BC_RGLR,
+ BC_RGLR, /* 54 - 57 */
+ BC_RGLR,
+ BC_RGLR, /* 58 - 59 */
+ BC_RGLR,
+ BC_DELM, /* 'Z' '[' */
+ BC_RGLR,
+ BC_DELM, /* '\' ']' */
+ BC_RGLR,
+ BC_RGLR, /* 5E - 5F */
+ BC_RGLR,
+ BC_RGLR,
+ BC_RGLR,
+ BC_RGLR,
+ BC_RGLR,
+ BC_HEXL,
+ BC_HEXL,
+ BC_HEXL, /* 60 - 67 */
+ BC_HEXL,
+ BC_HEXL,
+ BC_HEXL,
+ BC_RGLR,
+ BC_RGLR,
+ BC_RGLR,
+ BC_RGLR,
+ BC_RGLR, /* 68 - 6F */
+ BC_RGLR,
+ BC_RGLR,
+ BC_RGLR,
+ BC_RGLR,
+ BC_RGLR,
+ BC_RGLR,
+ BC_RGLR,
+ BC_RGLR, /* 70 - 77 */
+ BC_RGLR,
+ BC_RGLR, /* 78 - 79 */
+ BC_RGLR,
+ BC_DELM, /* 'z' '{' */
+ BC_RGLR,
+ BC_DELM, /* '|' '}' */
+ BC_RGLR,
+ BC_RGLR, /* 7E - 7F */
+ BC_RGLR,
+ BC_RGLR,
+ BC_RGLR,
+ BC_RGLR, /* 80 - 83 */
+ BC_RGLR, BC_RGLR, BC_RGLR, BC_RGLR, /* 84 - 87 */
+ BC_RGLR, BC_RGLR, BC_RGLR, BC_RGLR,
+ BC_RGLR, BC_RGLR, BC_RGLR, BC_RGLR, /* 88 - 8F */
+ BC_RGLR, BC_RGLR, BC_RGLR, BC_RGLR,
+ BC_RGLR, BC_RGLR, BC_RGLR, BC_RGLR, /* 90 - 97 */
+ BC_RGLR, BC_RGLR, BC_RGLR, BC_RGLR,
+ BC_RGLR, BC_RGLR, BC_RGLR, BC_RGLR, /* 98 - 9F */
+ BC_RGLR, BC_RGLR, BC_RGLR, BC_RGLR,
+ BC_RGLR, BC_RGLR, BC_RGLR, BC_RGLR, /* A0 - A7 */
+ BC_RGLR, BC_RGLR, BC_RGLR, BC_RGLR,
+ BC_RGLR, BC_RGLR, BC_RGLR, BC_RGLR, /* A8 - AF */
+ BC_RGLR, BC_RGLR, BC_RGLR, BC_RGLR,
+ BC_RGLR, BC_RGLR, BC_RGLR, BC_RGLR, /* B0 - B7 */
+ BC_RGLR, BC_RGLR, BC_RGLR, BC_RGLR,
+ BC_RGLR, BC_RGLR, BC_RGLR, BC_RGLR, /* B8 - BF */
+ BC_RGLR, BC_RGLR, BC_RGLR, BC_RGLR,
+ BC_RGLR, BC_RGLR, BC_RGLR, BC_RGLR, /* C0 - C7 */
+ BC_RGLR, BC_RGLR, BC_RGLR, BC_RGLR,
+ BC_RGLR, BC_RGLR, BC_RGLR, BC_RGLR, /* C8 - CF */
+ BC_RGLR, BC_RGLR, BC_RGLR, BC_RGLR,
+ BC_RGLR, BC_RGLR, BC_RGLR, BC_RGLR, /* D0 - D7 */
+ BC_RGLR, BC_RGLR, BC_RGLR, BC_RGLR,
+ BC_RGLR, BC_RGLR, BC_RGLR, BC_RGLR, /* D8 - DF */
+ BC_RGLR, BC_RGLR, BC_RGLR, BC_RGLR,
+ BC_RGLR, BC_RGLR, BC_RGLR, BC_RGLR, /* E0 - E7 */
+ BC_RGLR, BC_RGLR, BC_RGLR, BC_RGLR,
+ BC_RGLR, BC_RGLR, BC_RGLR, BC_RGLR, /* E8 - EF */
+ BC_RGLR, BC_RGLR, BC_RGLR, BC_RGLR,
+ BC_RGLR, BC_RGLR, BC_RGLR, BC_RGLR, /* F0 - F7 */
+ BC_RGLR, BC_RGLR, BC_RGLR, BC_RGLR,
+ BC_RGLR, BC_RGLR, BC_RGLR, BC_RGLR, /* F8 - FF */
+};
+
+const uint8_t *blcass = &byte_classification[0];
diff --git a/src/byte_class.h b/src/byte_class.h
new file mode 100644
index 0000000..011acda
--- /dev/null
+++ b/src/byte_class.h
@@ -0,0 +1,8 @@
+#define BC_RGLR 0 /* regular character */
+#define BC_WSPC 1 /* character is whitespace */
+#define BC_EOLM (1<<1) /* character signifies end of line */
+#define BC_DCML (1<<2) /* character is a decimal */
+#define BC_HEXL (1<<3) /* character is a hexadecimal */
+#define BC_DELM (1<<4) /* character is a delimiter */
+
+const uint8_t *bclass;
diff --git a/src/xref.c b/src/xref.c
index 76ed194..afb223f 100644
--- a/src/xref.c
+++ b/src/xref.c
@@ -5,95 +5,9 @@
#include <stdbool.h>
#include <string.h>
-#define SLEN(x) (sizeof((x)) - 1)
-
-#define BC_NONE 0
-#define BC_WSPC 1 /* character is whitespace */
-#define BC_EOLM (1<<1) /* character signifies end of line */
-#define BC_DCML (1<<2) /* character is a decimal */
-#define BC_HEXL (1<<3) /* character is a hexadecimal */
-#define BC_DELM (1<<4) /* character is a delimiter */
+#include "byte_class.h"
-/**
- * pdf byte classification
- */
-uint8_t bclass[] = {
- BC_WSPC, BC_NONE, BC_NONE, BC_NONE, /* 00 - 03 */
- BC_NONE, BC_NONE, BC_NONE, BC_NONE, /* 04 - 07 */
- BC_NONE, BC_WSPC, BC_WSPC | BC_EOLM, BC_NONE, /* 08 - 0B */
- BC_WSPC, BC_WSPC | BC_EOLM, BC_NONE, BC_NONE, /* 0C - 0F */
- BC_NONE, BC_NONE, BC_NONE, BC_NONE, /* 10 - 13 */
- BC_NONE, BC_NONE, BC_NONE, BC_NONE, /* 14 - 17 */
- BC_NONE, BC_NONE, BC_NONE, BC_NONE, /* 18 - 1B */
- BC_NONE, BC_NONE, BC_NONE, BC_NONE, /* 1C - 1F */
- BC_WSPC, BC_NONE, BC_NONE, BC_NONE, /* 20 - 23 */
- BC_NONE, BC_DELM, /* '$' '%' */
- BC_NONE, BC_NONE, /* 26 - 27 */
- BC_DELM, BC_DELM, /* '(' ')' */
- BC_NONE, BC_NONE, /* 2A - 2B */
- BC_NONE, BC_NONE, /* 2C - 2D */
- BC_NONE, BC_DELM, /* '.' '/' */
- BC_DCML | BC_HEXL, BC_DCML | BC_HEXL, /* '0' '1' */
- BC_DCML | BC_HEXL, BC_DCML | BC_HEXL, /* '2' '3' */
- BC_DCML | BC_HEXL, BC_DCML | BC_HEXL, /* '4' '5' */
- BC_DCML | BC_HEXL, BC_DCML | BC_HEXL, /* '6' '7' */
- BC_DCML | BC_HEXL, BC_DCML | BC_HEXL, /* '8' '9' */
- BC_NONE, BC_NONE, /* ':' ';' */
- BC_DELM, BC_NONE, /* '<' '=' */
- BC_DELM, BC_NONE, /* '>' '?' */
- BC_NONE, BC_NONE, BC_NONE, BC_NONE, /* 40 - 43 */
- BC_NONE, BC_HEXL, BC_HEXL, BC_HEXL, /* 44 - 47 */
- BC_HEXL, BC_HEXL, BC_HEXL, BC_NONE, /* 48 - 4B */
- BC_NONE, BC_NONE, BC_NONE, BC_NONE, /* 4C - 4F */
- BC_NONE, BC_NONE, BC_NONE, BC_NONE, /* 50 - 53 */
- BC_NONE, BC_NONE, BC_NONE, BC_NONE, /* 54 - 57 */
- BC_NONE, BC_NONE, /* 58 - 59 */
- BC_NONE, BC_DELM, /* 'Z' '[' */
- BC_NONE, BC_DELM, /* '\' ']' */
- BC_NONE, BC_NONE, /* 5E - 5F */
- BC_NONE, BC_NONE, BC_NONE, BC_NONE,
- BC_NONE, BC_HEXL, BC_HEXL, BC_HEXL, /* 60 - 67 */
- BC_HEXL, BC_HEXL, BC_HEXL, BC_NONE,
- BC_NONE, BC_NONE, BC_NONE, BC_NONE, /* 68 - 6F */
- BC_NONE, BC_NONE, BC_NONE, BC_NONE,
- BC_NONE, BC_NONE, BC_NONE, BC_NONE, /* 70 - 77 */
- BC_NONE, BC_NONE, /* 78 - 79 */
- BC_NONE, BC_DELM, /* 'z' '{' */
- BC_NONE, BC_DELM, /* '|' '}' */
- BC_NONE, BC_NONE, /* 7E - 7F */
- BC_NONE, BC_NONE, BC_NONE, BC_NONE, /* 80 - 83 */
- BC_NONE, BC_NONE, BC_NONE, BC_NONE, /* 84 - 87 */
- BC_NONE, BC_NONE, BC_NONE, BC_NONE,
- BC_NONE, BC_NONE, BC_NONE, BC_NONE, /* 88 - 8F */
- BC_NONE, BC_NONE, BC_NONE, BC_NONE,
- BC_NONE, BC_NONE, BC_NONE, BC_NONE, /* 90 - 97 */
- BC_NONE, BC_NONE, BC_NONE, BC_NONE,
- BC_NONE, BC_NONE, BC_NONE, BC_NONE, /* 98 - 9F */
- BC_NONE, BC_NONE, BC_NONE, BC_NONE,
- BC_NONE, BC_NONE, BC_NONE, BC_NONE, /* A0 - A7 */
- BC_NONE, BC_NONE, BC_NONE, BC_NONE,
- BC_NONE, BC_NONE, BC_NONE, BC_NONE, /* A8 - AF */
- BC_NONE, BC_NONE, BC_NONE, BC_NONE,
- BC_NONE, BC_NONE, BC_NONE, BC_NONE, /* B0 - B7 */
- BC_NONE, BC_NONE, BC_NONE, BC_NONE,
- BC_NONE, BC_NONE, BC_NONE, BC_NONE, /* B8 - BF */
- BC_NONE, BC_NONE, BC_NONE, BC_NONE,
- BC_NONE, BC_NONE, BC_NONE, BC_NONE, /* C0 - C7 */
- BC_NONE, BC_NONE, BC_NONE, BC_NONE,
- BC_NONE, BC_NONE, BC_NONE, BC_NONE, /* C8 - CF */
- BC_NONE, BC_NONE, BC_NONE, BC_NONE,
- BC_NONE, BC_NONE, BC_NONE, BC_NONE, /* D0 - D7 */
- BC_NONE, BC_NONE, BC_NONE, BC_NONE,
- BC_NONE, BC_NONE, BC_NONE, BC_NONE, /* D8 - DF */
- BC_NONE, BC_NONE, BC_NONE, BC_NONE,
- BC_NONE, BC_NONE, BC_NONE, BC_NONE, /* E0 - E7 */
- BC_NONE, BC_NONE, BC_NONE, BC_NONE,
- BC_NONE, BC_NONE, BC_NONE, BC_NONE, /* E8 - EF */
- BC_NONE, BC_NONE, BC_NONE, BC_NONE,
- BC_NONE, BC_NONE, BC_NONE, BC_NONE, /* F0 - F7 */
- BC_NONE, BC_NONE, BC_NONE, BC_NONE,
- BC_NONE, BC_NONE, BC_NONE, BC_NONE, /* F8 - FF */
-};
+#define SLEN(x) (sizeof((x)) - 1)
enum cos_type {
COS_TYPE_NULL,
commitdiff http://git.netsurf-browser.org/libnspdf.git/commit/?id=d1ee370eaa428fafb2...
commit d1ee370eaa428fafb27cce60624bdf73ca35716d
Author: Vincent Sanders <vince(a)kyllikki.org>
Commit: Vincent Sanders <vince(a)kyllikki.org>
add clean target
diff --git a/src/Makefile b/src/Makefile
index 997d964..f137a81 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -2,8 +2,11 @@
CFLAGS+=-g -Wall
-.PHONY:all
+.PHONY:all clean
all:xref
xref:xref.o
+
+clean:
+ ${RM} xref xref.o
commitdiff http://git.netsurf-browser.org/libnspdf.git/commit/?id=286bf6db27789424b0...
commit 286bf6db27789424b07f2d6f0479f0a0527e91a4
Author: Vincent Sanders <vince(a)kyllikki.org>
Commit: Vincent Sanders <vince(a)kyllikki.org>
arrays work
diff --git a/src/xref.c b/src/xref.c
index e1f3a9d..76ed194 100644
--- a/src/xref.c
+++ b/src/xref.c
@@ -123,6 +123,14 @@ struct cos_dictionary_entry {
struct cos_object *value;
};
+struct cos_array_entry {
+ /** next value in array */
+ struct cos_array_entry *next;
+
+ /** value */
+ struct cos_object *value;
+};
+
struct cos_reference {
/** id of indirect object */
@@ -156,6 +164,9 @@ struct cos_object {
/* dictionary */
struct cos_dictionary_entry *dictionary;
+ /* array */
+ struct cos_array_entry *array;
+
/** reference */
struct cos_reference *reference;
@@ -470,6 +481,24 @@ int cos_decode_hex_string(struct pdf_doc *doc,
uint64_t *offset_out,
struct cos_object **cosobj_out)
{
+ uint64_t offset;
+ struct cos_object *cosobj;
+ uint8_t c;
+ uint8_t byte;
+
+ offset = *offset_out;
+
+ c = DOC_BYTE(doc, offset++);
+ if (c != '<') {
+ return -1; /* syntax error */
+ }
+ doc_skip_ws(doc, &offset);
+
+ while (c != '>') {
+ c = DOC_BYTE(doc, offset++);
+ doc_skip_ws(doc, &offset);
+ }
+
return -1;
}
@@ -551,7 +580,56 @@ int cos_decode_list(struct pdf_doc *doc,
uint64_t *offset_out,
struct cos_object **cosobj_out)
{
- return -1;
+ uint64_t offset;
+ struct cos_object *cosobj;
+ struct cos_array_entry *entry;
+ struct cos_object *value;
+ int res;
+
+ offset = *offset_out;
+
+ if (DOC_BYTE(doc, offset) != '[') {
+ return -1; /* syntax error */
+ }
+ offset++;
+ doc_skip_ws(doc, &offset);
+
+ printf("found a list\n");
+
+ cosobj = calloc(1, sizeof(struct cos_object));
+ if (cosobj == NULL) {
+ return -1; /* memory error */
+ }
+ cosobj->type = COS_TYPE_ARRAY;
+
+ while (DOC_BYTE(doc, offset) != ']') {
+
+ res = cos_decode_object(doc, &offset, &value);
+ if (res != 0) {
+ /* todo free up any array entries already created */
+ return res;
+ }
+
+ /* add array entry */
+ entry = calloc(1, sizeof(struct cos_array_entry));
+ if (entry == NULL) {
+ /* todo free up any array entries already created */
+ return -1; /* memory error */
+ }
+
+ entry->value = value;
+ entry->next = cosobj->u.array;
+
+ cosobj->u.array = entry;
+
+ }
+ offset++; /* skip closing ] */
+ doc_skip_ws(doc, &offset);
+
+ *cosobj_out = cosobj;
+ *offset_out = offset;
+
+ return 0;
}
#define NAME_MAX_LENGTH 127
@@ -750,7 +828,7 @@ int cos_attempt_decode_reference(struct pdf_doc *doc,
offset = *offset_out;
- res = cos_decode_object(doc, &offset, &cosobj);
+ res = cos_decode_number(doc, &offset, &cosobj);
if (res != 0) {
return 0; /* no error if object could not be decoded */
}
commitdiff http://git.netsurf-browser.org/libnspdf.git/commit/?id=7220b02603cfd33775...
commit 7220b02603cfd33775e56da19fe9f5fb1da08aa0
Author: Vincent Sanders <vince(a)kyllikki.org>
Commit: Vincent Sanders <vince(a)kyllikki.org>
got references parsing
diff --git a/src/xref.c b/src/xref.c
index 6af8132..e1f3a9d 100644
--- a/src/xref.c
+++ b/src/xref.c
@@ -3,6 +3,7 @@
#include <inttypes.h>
#include <stdlib.h>
#include <stdbool.h>
+#include <string.h>
#define SLEN(x) (sizeof((x)) - 1)
@@ -14,7 +15,7 @@
#define BC_DELM (1<<4) /* character is a delimiter */
/**
- * byte classification
+ * pdf byte classification
*/
uint8_t bclass[] = {
BC_WSPC, BC_NONE, BC_NONE, BC_NONE, /* 00 - 03 */
@@ -126,7 +127,7 @@ struct cos_dictionary_entry {
struct cos_reference {
/** id of indirect object */
uint64_t id;
-
+
/* generation of indirect object */
uint64_t generation;
};
@@ -136,19 +137,19 @@ struct cos_object {
union {
/** boolean */
bool b;
-
+
/** integer */
int64_t i;
-
+
/** real */
double r;
-
+
/** name */
char *n;
-
+
/** string */
char *s;
-
+
/** stream data */
uint8_t *stream;
@@ -157,7 +158,7 @@ struct cos_object {
/** reference */
struct cos_reference *reference;
-
+
} u;
};
@@ -169,10 +170,10 @@ struct cos_indirect_object {
/* reference identifier */
struct cos_reference ref;
-
+
/** offset of object */
uint64_t offset;
-
+
/* direct object */
struct cos_object *o;
};
@@ -367,7 +368,7 @@ int cos_indirect_object_add(struct pdf_doc *doc,
nobj->offset = obj_offset;
doc->cos_list = nobj;
-
+
printf("xref %"PRIu64" %"PRIu64" %"PRIu64"\n",
obj_number, obj_offset, obj_generation);
return 0;
@@ -390,10 +391,10 @@ int cos_free_object(struct cos_object *cos_obj)
dentry = cos_obj->u.dictionary;
while (dentry != NULL) {
struct cos_dictionary_entry *odentry;
-
+
cos_free_object(dentry->key);
cos_free_object(dentry->value);
-
+
odentry = dentry;
dentry = dentry->next;
free(odentry);
@@ -414,7 +415,48 @@ int cos_decode_number(struct pdf_doc *doc,
uint64_t *offset_out,
struct cos_object **cosobj_out)
{
- return -1;
+ struct cos_object *cosobj;
+ uint8_t c; /* current byte from source data */
+ int len; /* number of decimal places in number */
+ uint8_t num[21]; /* temporary buffer for decimal values */
+ uint64_t offset; /* current offset of source data */
+
+ offset = *offset_out;
+
+ for (len = 0; len < sizeof(num); len++) {
+ c = DOC_BYTE(doc, offset);
+ if ((bclass[c] & BC_DCML) != BC_DCML) {
+ int64_t result = 0; /* parsed result */
+ uint64_t tens;
+
+ if (len == 0) {
+ return -2; /* parse error no decimals in input */
+ }
+ /* sum value from each place */
+ for (tens = 1; len > 0; tens = tens * 10, len--) {
+ result += (num[len - 1] * tens);
+ }
+
+ doc_skip_ws(doc, &offset);
+
+ cosobj = calloc(1, sizeof(struct cos_object));
+ if (cosobj == NULL) {
+ return -1; /* memory error */
+ }
+
+ cosobj->type = COS_TYPE_INT;
+ cosobj->u.i = result;
+
+ *cosobj_out = cosobj;
+
+ *offset_out = offset;
+
+ return 0;
+ }
+ num[len] = c - '0';
+ offset++;
+ }
+ return -1; /* number too long */
}
int cos_decode_string(struct pdf_doc *doc,
@@ -459,19 +501,22 @@ int cos_decode_dictionary(struct pdf_doc *doc,
return -1; /* memory error */
}
cosobj->type = COS_TYPE_DICTIONARY;
-
+
while ((DOC_BYTE(doc, offset) != '>') &&
(DOC_BYTE(doc, offset + 1) != '>')) {
-
+
res = cos_decode_object(doc, &offset, &key);
if (res != 0) {
/* todo free up any dictionary entries already created */
+ printf("key object decode failed\n");
return res;
}
if (key->type != COS_TYPE_NAME) {
/* key value pairs without a name */
+ printf("key was %d not a name %d\n", key->type, COS_TYPE_NAME);
return -1; /* syntax error */
}
+ printf("key: %s\n", key->u.n);
res = cos_decode_object(doc, &offset, &value);
if (res != 0) {
@@ -491,7 +536,7 @@ int cos_decode_dictionary(struct pdf_doc *doc,
entry->next = cosobj->u.dictionary;
cosobj->u.dictionary = entry;
-
+
}
offset += 2; /* skip closing >> */
doc_skip_ws(doc, &offset);
@@ -511,6 +556,11 @@ int cos_decode_list(struct pdf_doc *doc,
#define NAME_MAX_LENGTH 127
+/**
+ * decode a name object
+ *
+ * \todo deal with # symbols on pdf versions 1.2 and later
+ */
int cos_decode_name(struct pdf_doc *doc,
uint64_t *offset_out,
struct cos_object **cosobj_out)
@@ -519,13 +569,31 @@ int cos_decode_name(struct pdf_doc *doc,
struct cos_object *cosobj;
uint8_t c;
char name[NAME_MAX_LENGTH + 1];
+ int idx = 0;
offset = *offset_out;
c = DOC_BYTE(doc, offset++);
+ if (c != '/') {
+ return -1; /* names must be prefixed with a / */
+ }
+ printf("found a name\n");
+ c = DOC_BYTE(doc, offset++);
+ while ((idx <= NAME_MAX_LENGTH) &&
+ ((bclass[c] & (BC_WSPC | BC_DELM)) == 0)) {
+ //printf("%c", c);
+ name[idx++] = c;
+ c = DOC_BYTE(doc, offset++);
+ }
+ //printf("\nidx: %d\n", idx);
+ if (idx > NAME_MAX_LENGTH) {
+ /* name length exceeded implementation limit */
+ return -1;
+ }
+ name[idx] = 0;
-
+ //printf("name: %s\n", name);
doc_skip_ws(doc, &offset);
@@ -534,8 +602,8 @@ int cos_decode_name(struct pdf_doc *doc,
return -1; /* memory error */
}
- cosobj->type = COS_TYPE_BOOL;
- cosobj->u.b = value;
+ cosobj->type = COS_TYPE_NAME;
+ cosobj->u.n = strdup(name);
*cosobj_out = cosobj;
@@ -553,7 +621,7 @@ int cos_decode_boolean(struct pdf_doc *doc,
struct cos_object *cosobj;
uint8_t c;
bool value;
-
+
offset = *offset_out;
c = DOC_BYTE(doc, offset++);
@@ -573,7 +641,7 @@ int cos_decode_boolean(struct pdf_doc *doc,
return -1; /* syntax error */
}
value = true;
-
+
} else if ((c == 'f') || (c == 'F')) {
/* false branch */
@@ -613,7 +681,7 @@ int cos_decode_boolean(struct pdf_doc *doc,
*cosobj_out = cosobj;
*offset_out = offset;
-
+
return 0;
}
@@ -625,7 +693,7 @@ int cos_decode_null(struct pdf_doc *doc,
uint64_t offset;
struct cos_object *cosobj;
uint8_t c;
-
+
offset = *offset_out;
c = DOC_BYTE(doc, offset++);
@@ -644,7 +712,7 @@ int cos_decode_null(struct pdf_doc *doc,
if ((c != 'l') && (c != 'L')) {
return -1; /* syntax error */
}
-
+
doc_skip_ws(doc, &offset);
cosobj = calloc(1, sizeof(struct cos_object));
@@ -658,6 +726,83 @@ int cos_decode_null(struct pdf_doc *doc,
return 0;
}
+/**
+ * attempt to decode the stream into a reference
+ *
+ * The stream has already had a positive integer decoded from it. if another
+ * positive integer follows and a R character after that it is a reference,
+ * otherwise bail, but not finding a ref is not an error!
+ *
+ * \param doc the pdf document
+ * \param offset_out offset of current cursor in stream
+ * \param cosobj_out the object to return into, on input contains the first
+ * integer
+ */
+int cos_attempt_decode_reference(struct pdf_doc *doc,
+ uint64_t *offset_out,
+ struct cos_object **cosobj_out)
+{
+ uint64_t offset;
+ struct cos_object *cosobj; /* possible generation object */
+ uint8_t c;
+ int res;
+ struct cos_reference *nref; /* new reference */
+
+ offset = *offset_out;
+
+ res = cos_decode_object(doc, &offset, &cosobj);
+ if (res != 0) {
+ return 0; /* no error if object could not be decoded */
+ }
+
+ if (cosobj->type != COS_TYPE_INT) {
+ /* next object was not an integer so not a reference */
+ cos_free_object(cosobj);
+ return 0;
+ }
+
+ if (cosobj->u.i < 0) {
+ /* integer was negative so not a reference (generations must be
+ * non-negative
+ */
+ cos_free_object(cosobj);
+ return 0;
+
+ }
+
+ /* two int in a row, look for the R */
+ c = DOC_BYTE(doc, offset++);
+ if (c != 'R') {
+ /* no R so not a reference */
+ cos_free_object(cosobj);
+ return 0;
+ }
+
+ /* found reference */
+
+ printf("found reference\n");
+ doc_skip_ws(doc, &offset);
+
+ nref = calloc(1, sizeof(struct cos_reference));
+ if (nref == NULL) {
+ /* todo free objects */
+ return -1; /* memory error */
+ }
+
+ nref->id = (*cosobj_out)->u.i;
+ nref->generation = cosobj->u.i;
+
+ cos_free_object(*cosobj_out);
+
+ cosobj->type = COS_TYPE_REFERENCE;
+ cosobj->u.reference = nref;
+
+ *cosobj_out = cosobj;
+
+ *offset_out = offset;
+
+ return 0;
+}
/**
* Decode input stream into an object
@@ -678,7 +823,7 @@ int cos_decode_null(struct pdf_doc *doc,
* [ a list
* t|T boolean true
* f|F boolean false
- * n|N null
+ * n|N null
*
* Grammar is:
* cos_object:
@@ -711,12 +856,12 @@ int cos_decode_object(struct pdf_doc *doc,
uint64_t offset;
int res;
struct cos_object *cosobj;
-
+
offset = *offset_out;
/* object could be any type use first char to try and select */
switch (DOC_BYTE(doc, offset)) {
-
+
case '-':
case '+':
case '.':
@@ -731,7 +876,12 @@ int cos_decode_object(struct pdf_doc *doc,
case '8':
case '9':
res = cos_decode_number(doc, &offset, &cosobj);
- /* if type is uint try to check for reference */
+ /* if type is positive integer try to check for reference */
+ if ((res == 0) &&
+ (cosobj->type == COS_TYPE_INT) &&
+ (cosobj->u.i > 0)) {
+ res = cos_attempt_decode_reference(doc, &offset, &cosobj);
+ }
break;
case '<':
@@ -769,7 +919,13 @@ int cos_decode_object(struct pdf_doc *doc,
default:
res = -1; /* syntax error */
}
-
+
+
+ if (res == 0) {
+ *cosobj_out = cosobj;
+ *offset_out = offset;
+ }
+
return res;
}
@@ -792,7 +948,7 @@ int decode_trailer(struct pdf_doc *doc, uint64_t offset)
}
offset += 7;
doc_skip_ws(doc, &offset);
-
+
res = cos_decode_object(doc, &offset, &trailer);
if (res != 0) {
return res;
@@ -802,7 +958,7 @@ int decode_trailer(struct pdf_doc *doc, uint64_t offset)
cos_free_object(trailer);
return -1;
}
-
+
return 0;
}
@@ -812,9 +968,9 @@ int decode_xref(struct pdf_doc *doc, uint64_t *offset_out)
uint64_t objnum; /* current object number */
uint64_t lastobjnum;
uint64_t offset;
-
+
offset = *offset_out;
-
+
/* xref object header */
if ((DOC_BYTE(doc, offset ) != 'x') &&
(DOC_BYTE(doc, offset + 1) != 'r') &&
@@ -866,7 +1022,7 @@ int decode_xref(struct pdf_doc *doc, uint64_t *offset_out)
objnum++;
}
// printf("at objnum %"PRIu64"\n", objnum);
-
+
/* first object number in table */
res = doc_read_uint(doc, &offset, &objnum);
}
@@ -909,6 +1065,6 @@ int main(int argc, char **argv)
printf("failed to decode trailer\n");
return res;
}
-
+
return 0;
}
commitdiff http://git.netsurf-browser.org/libnspdf.git/commit/?id=4c3a26f186e50b6b88...
commit 4c3a26f186e50b6b8821122b9cb4def1eb0fffeb
Author: Vincent Sanders <vince(a)kyllikki.org>
Commit: Vincent Sanders <vince(a)kyllikki.org>
initial xref and basic cos object decode
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..d6ff91a
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+*~
+*.o
diff --git a/src/Makefile b/src/Makefile
new file mode 100644
index 0000000..997d964
--- /dev/null
+++ b/src/Makefile
@@ -0,0 +1,9 @@
+#
+
+CFLAGS+=-g -Wall
+
+.PHONY:all
+
+all:xref
+
+xref:xref.o
diff --git a/src/xref.c b/src/xref.c
new file mode 100644
index 0000000..6af8132
--- /dev/null
+++ b/src/xref.c
@@ -0,0 +1,914 @@
+#include <stdio.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <stdlib.h>
+#include <stdbool.h>
+
+#define SLEN(x) (sizeof((x)) - 1)
+
+#define BC_NONE 0
+#define BC_WSPC 1 /* character is whitespace */
+#define BC_EOLM (1<<1) /* character signifies end of line */
+#define BC_DCML (1<<2) /* character is a decimal */
+#define BC_HEXL (1<<3) /* character is a hexadecimal */
+#define BC_DELM (1<<4) /* character is a delimiter */
+
+/**
+ * byte classification
+ */
+uint8_t bclass[] = {
+ BC_WSPC, BC_NONE, BC_NONE, BC_NONE, /* 00 - 03 */
+ BC_NONE, BC_NONE, BC_NONE, BC_NONE, /* 04 - 07 */
+ BC_NONE, BC_WSPC, BC_WSPC | BC_EOLM, BC_NONE, /* 08 - 0B */
+ BC_WSPC, BC_WSPC | BC_EOLM, BC_NONE, BC_NONE, /* 0C - 0F */
+ BC_NONE, BC_NONE, BC_NONE, BC_NONE, /* 10 - 13 */
+ BC_NONE, BC_NONE, BC_NONE, BC_NONE, /* 14 - 17 */
+ BC_NONE, BC_NONE, BC_NONE, BC_NONE, /* 18 - 1B */
+ BC_NONE, BC_NONE, BC_NONE, BC_NONE, /* 1C - 1F */
+ BC_WSPC, BC_NONE, BC_NONE, BC_NONE, /* 20 - 23 */
+ BC_NONE, BC_DELM, /* '$' '%' */
+ BC_NONE, BC_NONE, /* 26 - 27 */
+ BC_DELM, BC_DELM, /* '(' ')' */
+ BC_NONE, BC_NONE, /* 2A - 2B */
+ BC_NONE, BC_NONE, /* 2C - 2D */
+ BC_NONE, BC_DELM, /* '.' '/' */
+ BC_DCML | BC_HEXL, BC_DCML | BC_HEXL, /* '0' '1' */
+ BC_DCML | BC_HEXL, BC_DCML | BC_HEXL, /* '2' '3' */
+ BC_DCML | BC_HEXL, BC_DCML | BC_HEXL, /* '4' '5' */
+ BC_DCML | BC_HEXL, BC_DCML | BC_HEXL, /* '6' '7' */
+ BC_DCML | BC_HEXL, BC_DCML | BC_HEXL, /* '8' '9' */
+ BC_NONE, BC_NONE, /* ':' ';' */
+ BC_DELM, BC_NONE, /* '<' '=' */
+ BC_DELM, BC_NONE, /* '>' '?' */
+ BC_NONE, BC_NONE, BC_NONE, BC_NONE, /* 40 - 43 */
+ BC_NONE, BC_HEXL, BC_HEXL, BC_HEXL, /* 44 - 47 */
+ BC_HEXL, BC_HEXL, BC_HEXL, BC_NONE, /* 48 - 4B */
+ BC_NONE, BC_NONE, BC_NONE, BC_NONE, /* 4C - 4F */
+ BC_NONE, BC_NONE, BC_NONE, BC_NONE, /* 50 - 53 */
+ BC_NONE, BC_NONE, BC_NONE, BC_NONE, /* 54 - 57 */
+ BC_NONE, BC_NONE, /* 58 - 59 */
+ BC_NONE, BC_DELM, /* 'Z' '[' */
+ BC_NONE, BC_DELM, /* '\' ']' */
+ BC_NONE, BC_NONE, /* 5E - 5F */
+ BC_NONE, BC_NONE, BC_NONE, BC_NONE,
+ BC_NONE, BC_HEXL, BC_HEXL, BC_HEXL, /* 60 - 67 */
+ BC_HEXL, BC_HEXL, BC_HEXL, BC_NONE,
+ BC_NONE, BC_NONE, BC_NONE, BC_NONE, /* 68 - 6F */
+ BC_NONE, BC_NONE, BC_NONE, BC_NONE,
+ BC_NONE, BC_NONE, BC_NONE, BC_NONE, /* 70 - 77 */
+ BC_NONE, BC_NONE, /* 78 - 79 */
+ BC_NONE, BC_DELM, /* 'z' '{' */
+ BC_NONE, BC_DELM, /* '|' '}' */
+ BC_NONE, BC_NONE, /* 7E - 7F */
+ BC_NONE, BC_NONE, BC_NONE, BC_NONE, /* 80 - 83 */
+ BC_NONE, BC_NONE, BC_NONE, BC_NONE, /* 84 - 87 */
+ BC_NONE, BC_NONE, BC_NONE, BC_NONE,
+ BC_NONE, BC_NONE, BC_NONE, BC_NONE, /* 88 - 8F */
+ BC_NONE, BC_NONE, BC_NONE, BC_NONE,
+ BC_NONE, BC_NONE, BC_NONE, BC_NONE, /* 90 - 97 */
+ BC_NONE, BC_NONE, BC_NONE, BC_NONE,
+ BC_NONE, BC_NONE, BC_NONE, BC_NONE, /* 98 - 9F */
+ BC_NONE, BC_NONE, BC_NONE, BC_NONE,
+ BC_NONE, BC_NONE, BC_NONE, BC_NONE, /* A0 - A7 */
+ BC_NONE, BC_NONE, BC_NONE, BC_NONE,
+ BC_NONE, BC_NONE, BC_NONE, BC_NONE, /* A8 - AF */
+ BC_NONE, BC_NONE, BC_NONE, BC_NONE,
+ BC_NONE, BC_NONE, BC_NONE, BC_NONE, /* B0 - B7 */
+ BC_NONE, BC_NONE, BC_NONE, BC_NONE,
+ BC_NONE, BC_NONE, BC_NONE, BC_NONE, /* B8 - BF */
+ BC_NONE, BC_NONE, BC_NONE, BC_NONE,
+ BC_NONE, BC_NONE, BC_NONE, BC_NONE, /* C0 - C7 */
+ BC_NONE, BC_NONE, BC_NONE, BC_NONE,
+ BC_NONE, BC_NONE, BC_NONE, BC_NONE, /* C8 - CF */
+ BC_NONE, BC_NONE, BC_NONE, BC_NONE,
+ BC_NONE, BC_NONE, BC_NONE, BC_NONE, /* D0 - D7 */
+ BC_NONE, BC_NONE, BC_NONE, BC_NONE,
+ BC_NONE, BC_NONE, BC_NONE, BC_NONE, /* D8 - DF */
+ BC_NONE, BC_NONE, BC_NONE, BC_NONE,
+ BC_NONE, BC_NONE, BC_NONE, BC_NONE, /* E0 - E7 */
+ BC_NONE, BC_NONE, BC_NONE, BC_NONE,
+ BC_NONE, BC_NONE, BC_NONE, BC_NONE, /* E8 - EF */
+ BC_NONE, BC_NONE, BC_NONE, BC_NONE,
+ BC_NONE, BC_NONE, BC_NONE, BC_NONE, /* F0 - F7 */
+ BC_NONE, BC_NONE, BC_NONE, BC_NONE,
+ BC_NONE, BC_NONE, BC_NONE, BC_NONE, /* F8 - FF */
+};
+
+enum cos_type {
+ COS_TYPE_NULL,
+ COS_TYPE_BOOL,
+ COS_TYPE_INT,
+ COS_TYPE_REAL,
+ COS_TYPE_NAME,
+ COS_TYPE_STRING,
+ COS_TYPE_ARRAY,
+ COS_TYPE_DICTIONARY,
+ COS_TYPE_NAMETREE,
+ COS_TYPE_NUMBERTREE,
+ COS_TYPE_STREAM,
+ COS_TYPE_REFERENCE,
+};
+
+struct cos_object;
+
+struct cos_dictionary_entry {
+ /** next key/value in dictionary */
+ struct cos_dictionary_entry *next;
+
+ /** key (name) */
+ struct cos_object *key;
+
+ /** value */
+ struct cos_object *value;
+};
+
+
+struct cos_reference {
+ /** id of indirect object */
+ uint64_t id;
+
+ /* generation of indirect object */
+ uint64_t generation;
+};
+
+struct cos_object {
+ int type;
+ union {
+ /** boolean */
+ bool b;
+
+ /** integer */
+ int64_t i;
+
+ /** real */
+ double r;
+
+ /** name */
+ char *n;
+
+ /** string */
+ char *s;
+
+ /** stream data */
+ uint8_t *stream;
+
+ /* dictionary */
+ struct cos_dictionary_entry *dictionary;
+
+ /** reference */
+ struct cos_reference *reference;
+
+ } u;
+};
+
+
+/** linked list of indirect objects */
+struct cos_indirect_object {
+ /** next in list */
+ struct cos_indirect_object *next;
+
+ /* reference identifier */
+ struct cos_reference ref;
+
+ /** offset of object */
+ uint64_t offset;
+
+ /* direct object */
+ struct cos_object *o;
+};
+
+/** pdf document */
+struct pdf_doc {
+ uint8_t *buffer;
+ uint64_t buffer_length;
+
+ uint8_t *start; /* start of pdf document in input stream */
+ uint64_t length;
+
+ int major;
+ int minor;
+
+ /** start of current xref table */
+ uint64_t startxref;
+
+ /** indirect objects from document body */
+ struct cos_indirect_object *cos_list;
+};
+
+
+int cos_decode_object(struct pdf_doc *doc,
+ uint64_t *offset_out,
+ struct cos_object **cosobj_out);
+
+int
+read_whole_pdf(struct pdf_doc *doc, const char *fname)
+{
+ FILE *f;
+ off_t len;
+ uint8_t *buf;
+ size_t rd;
+
+ f = fopen(fname, "r");
+ if (f == NULL) {
+ perror("pdf open");
+ return 1;
+ }
+
+ fseek(f, 0, SEEK_END);
+ len = ftello(f);
+
+ buf = malloc(len);
+ fseek(f, 0, SEEK_SET);
+
+ rd = fread(buf, len, 1, f);
+ if (rd != 1) {
+ perror("pdf read");
+ free(buf);
+ return 1;
+ }
+
+ fclose(f);
+
+ doc->start = doc->buffer = buf;
+ doc->length = doc->buffer_length = len;
+
+ return 0;
+}
+
+
+#define STARTXREF_TOK "startxref"
+/* Number of bytes to search back from file end to find xref start token, convention says 1024 bytes */
+#define STARTXREF_SEARCH_SIZE 1024
+
+
+/* byte data acessory, allows for more complex buffer handling in future */
+#define DOC_BYTE(doc, offset) (doc->start[(offset)])
+
+/* find next non whitespace byte */
+static int doc_skip_ws(struct pdf_doc *doc, uint64_t *offset)
+{
+ uint8_t c;
+
+ c = DOC_BYTE(doc, *offset);
+ while ((bclass[c] & BC_WSPC) != 0) {
+ (*offset)++;
+ c = DOC_BYTE(doc, *offset);
+ }
+ return 0;
+}
+
+static int doc_read_uint(struct pdf_doc *doc, uint64_t *offset_out, uint64_t *result_out)
+{
+ uint8_t c; /* current byte from source data */
+ int len; /* number of decimal places in number */
+ uint8_t num[21]; /* temporary buffer for decimal values */
+ uint64_t offset; /* current offset of source data */
+ uint64_t result=0; /* parsed result */
+ uint64_t tens;
+
+ offset = *offset_out;
+
+ for (len = 0; len < sizeof(num); len++) {
+ c = DOC_BYTE(doc, offset);
+ if ((bclass[c] & BC_DCML) != BC_DCML) {
+ if (len == 0) {
+ return -2; /* parse error no decimals in input */
+ }
+ /* sum value from each place */
+ for (tens = 1; len > 0; tens = tens * 10, len--) {
+ result += (num[len - 1] * tens);
+ }
+
+ *offset_out = offset;
+ *result_out = result;
+
+ return 0;
+ }
+ num[len] = c - '0';
+ offset++;
+ }
+ return -1; /* number too long */
+}
+
+/**
+ * finds the startxref marker at the end of input
+ */
+int find_startxref(struct pdf_doc *doc, uint64_t *start_xref_out)
+{
+ uint64_t offset; /* offset of characters being considered for startxref */
+ uint64_t earliest; /* earliest offset to serch for startxref */
+
+ offset = doc->length - SLEN(STARTXREF_TOK);
+
+ if (doc->length < STARTXREF_SEARCH_SIZE) {
+ earliest = 0;
+ } else {
+ earliest = doc->length - STARTXREF_SEARCH_SIZE;
+ }
+
+ for (;offset > earliest; offset--) {
+ if ((DOC_BYTE(doc, offset ) == 's') &&
+ (DOC_BYTE(doc, offset + 1) == 't') &&
+ (DOC_BYTE(doc, offset + 2) == 'a') &&
+ (DOC_BYTE(doc, offset + 3) == 'r') &&
+ (DOC_BYTE(doc, offset + 4) == 't') &&
+ (DOC_BYTE(doc, offset + 5) == 'x') &&
+ (DOC_BYTE(doc, offset + 6) == 'r') &&
+ (DOC_BYTE(doc, offset + 7) == 'e') &&
+ (DOC_BYTE(doc, offset + 8) == 'f')) {
+ offset += 9;
+ doc_skip_ws(doc, &offset);
+ return doc_read_uint(doc, &offset, start_xref_out);
+ }
+ }
+ return -1;
+}
+
+
+/**
+ * find the PDF comment marker to identify the start of the document
+ */
+int check_header(struct pdf_doc *doc)
+{
+ uint64_t offset; /* offset of characters being considered for startxref */
+
+ for (offset = 0; offset < 1024; offset++) {
+ if ((DOC_BYTE(doc, offset) == '%') &&
+ (DOC_BYTE(doc, offset + 1) == 'P') &&
+ (DOC_BYTE(doc, offset + 2) == 'D') &&
+ (DOC_BYTE(doc, offset + 3) == 'F') &&
+ (DOC_BYTE(doc, offset + 4) == '-') &&
+ (DOC_BYTE(doc, offset + 5) == '1') &&
+ (DOC_BYTE(doc, offset + 6) == '.')) {
+ doc->start = doc->buffer + offset;
+ doc->length -= offset;
+ /* read number for minor */
+ return 0;
+ }
+ }
+ return -1;
+}
+
+/* add indirect object */
+int cos_indirect_object_add(struct pdf_doc *doc,
+ uint64_t obj_number,
+ uint64_t obj_offset,
+ uint64_t obj_generation)
+{
+ struct cos_indirect_object *nobj;
+ nobj = calloc(1, sizeof(struct cos_indirect_object));
+
+ if (nobj == NULL) {
+ return -1;
+ }
+ nobj->next = doc->cos_list;
+ nobj->ref.id = obj_number;
+ nobj->ref.generation = obj_generation;
+ nobj->offset = obj_offset;
+
+ doc->cos_list = nobj;
+
+ printf("xref %"PRIu64" %"PRIu64" %"PRIu64"\n",
+ obj_number, obj_offset, obj_generation);
+ return 0;
+}
+
+int cos_free_object(struct cos_object *cos_obj)
+{
+ struct cos_dictionary_entry *dentry;
+
+ switch (cos_obj->type) {
+ case COS_TYPE_NAME:
+ free(cos_obj->u.n);
+ break;
+
+ case COS_TYPE_STRING:
+ free(cos_obj->u.s);
+ break;
+
+ case COS_TYPE_DICTIONARY:
+ dentry = cos_obj->u.dictionary;
+ while (dentry != NULL) {
+ struct cos_dictionary_entry *odentry;
+
+ cos_free_object(dentry->key);
+ cos_free_object(dentry->value);
+
+ odentry = dentry;
+ dentry = dentry->next;
+ free(odentry);
+ }
+ break;
+
+ case COS_TYPE_STREAM:
+ free(cos_obj->u.stream);
+ break;
+
+ }
+ free(cos_obj);
+
+ return 0;
+}
+
+int cos_decode_number(struct pdf_doc *doc,
+ uint64_t *offset_out,
+ struct cos_object **cosobj_out)
+{
+ return -1;
+}
+
+int cos_decode_string(struct pdf_doc *doc,
+ uint64_t *offset_out,
+ struct cos_object **cosobj_out)
+{
+ return -1;
+}
+
+int cos_decode_hex_string(struct pdf_doc *doc,
+ uint64_t *offset_out,
+ struct cos_object **cosobj_out)
+{
+ return -1;
+}
+
+
+int cos_decode_dictionary(struct pdf_doc *doc,
+ uint64_t *offset_out,
+ struct cos_object **cosobj_out)
+{
+ uint64_t offset;
+ struct cos_object *cosobj;
+ struct cos_dictionary_entry *entry;
+ struct cos_object *key;
+ struct cos_object *value;
+ int res;
+
+ offset = *offset_out;
+
+ if ((DOC_BYTE(doc, offset) != '<') ||
+ (DOC_BYTE(doc, offset + 1) != '<')) {
+ return -1; /* syntax error */
+ }
+ offset += 2;
+ doc_skip_ws(doc, &offset);
+
+ printf("found a dictionary\n");
+
+ cosobj = calloc(1, sizeof(struct cos_object));
+ if (cosobj == NULL) {
+ return -1; /* memory error */
+ }
+ cosobj->type = COS_TYPE_DICTIONARY;
+
+ while ((DOC_BYTE(doc, offset) != '>') &&
+ (DOC_BYTE(doc, offset + 1) != '>')) {
+
+ res = cos_decode_object(doc, &offset, &key);
+ if (res != 0) {
+ /* todo free up any dictionary entries already created */
+ return res;
+ }
+ if (key->type != COS_TYPE_NAME) {
+ /* key value pairs without a name */
+ return -1; /* syntax error */
+ }
+
+ res = cos_decode_object(doc, &offset, &value);
+ if (res != 0) {
+ /* todo free up any dictionary entries already created */
+ return res;
+ }
+
+ /* add dictionary entry */
+ entry = calloc(1, sizeof(struct cos_dictionary_entry));
+ if (entry == NULL) {
+ /* todo free up any dictionary entries already created */
+ return -1; /* memory error */
+ }
+
+ entry->key = key;
+ entry->value = value;
+ entry->next = cosobj->u.dictionary;
+
+ cosobj->u.dictionary = entry;
+
+ }
+ offset += 2; /* skip closing >> */
+ doc_skip_ws(doc, &offset);
+
+ *cosobj_out = cosobj;
+ *offset_out = offset;
+
+ return 0;
+}
+
+int cos_decode_list(struct pdf_doc *doc,
+ uint64_t *offset_out,
+ struct cos_object **cosobj_out)
+{
+ return -1;
+}
+
+#define NAME_MAX_LENGTH 127
+
+int cos_decode_name(struct pdf_doc *doc,
+ uint64_t *offset_out,
+ struct cos_object **cosobj_out)
+{
+ uint64_t offset;
+ struct cos_object *cosobj;
+ uint8_t c;
+ char name[NAME_MAX_LENGTH + 1];
+
+ offset = *offset_out;
+
+ c = DOC_BYTE(doc, offset++);
+
+
+
+
+ doc_skip_ws(doc, &offset);
+
+ cosobj = calloc(1, sizeof(struct cos_object));
+ if (cosobj == NULL) {
+ return -1; /* memory error */
+ }
+
+ cosobj->type = COS_TYPE_BOOL;
+ cosobj->u.b = value;
+
+ *cosobj_out = cosobj;
+
+ *offset_out = offset;
+
+ return 0;
+}
+
+
+int cos_decode_boolean(struct pdf_doc *doc,
+ uint64_t *offset_out,
+ struct cos_object **cosobj_out)
+{
+ uint64_t offset;
+ struct cos_object *cosobj;
+ uint8_t c;
+ bool value;
+
+ offset = *offset_out;
+
+ c = DOC_BYTE(doc, offset++);
+ if ((c == 't') || (c == 'T')) {
+ /* true branch */
+
+ c = DOC_BYTE(doc, offset++);
+ if ((c != 'r') && (c != 'R')) {
+ return -1; /* syntax error */
+ }
+ c = DOC_BYTE(doc, offset++);
+ if ((c != 'u') && (c != 'U')) {
+ return -1; /* syntax error */
+ }
+ c = DOC_BYTE(doc, offset++);
+ if ((c != 'e') && (c != 'E')) {
+ return -1; /* syntax error */
+ }
+ value = true;
+
+ } else if ((c == 'f') || (c == 'F')) {
+ /* false branch */
+
+ c = DOC_BYTE(doc, offset++);
+ if ((c != 'a') && (c != 'A')) {
+ return -1; /* syntax error */
+ }
+ c = DOC_BYTE(doc, offset++);
+ if ((c != 'l') && (c != 'L')) {
+ return -1; /* syntax error */
+ }
+ c = DOC_BYTE(doc, offset++);
+ if ((c != 's') && (c != 'S')) {
+ return -1; /* syntax error */
+ }
+ c = DOC_BYTE(doc, offset++);
+ if ((c != 'e') && (c != 'E')) {
+ return -1; /* syntax error */
+ }
+
+ value = false;
+
+ } else {
+ return -1; /* syntax error */
+ }
+
+ doc_skip_ws(doc, &offset);
+
+ cosobj = calloc(1, sizeof(struct cos_object));
+ if (cosobj == NULL) {
+ return -1; /* memory error */
+ }
+
+ cosobj->type = COS_TYPE_BOOL;
+ cosobj->u.b = value;
+
+ *cosobj_out = cosobj;
+
+ *offset_out = offset;
+
+ return 0;
+
+}
+
+int cos_decode_null(struct pdf_doc *doc,
+ uint64_t *offset_out,
+ struct cos_object **cosobj_out)
+{
+ uint64_t offset;
+ struct cos_object *cosobj;
+ uint8_t c;
+
+ offset = *offset_out;
+
+ c = DOC_BYTE(doc, offset++);
+ if ((c != 'n') && (c != 'N')) {
+ return -1; /* syntax error */
+ }
+ c = DOC_BYTE(doc, offset++);
+ if ((c != 'u') && (c != 'U')) {
+ return -1; /* syntax error */
+ }
+ c = DOC_BYTE(doc, offset++);
+ if ((c != 'l') && (c != 'L')) {
+ return -1; /* syntax error */
+ }
+ c = DOC_BYTE(doc, offset++);
+ if ((c != 'l') && (c != 'L')) {
+ return -1; /* syntax error */
+ }
+
+ doc_skip_ws(doc, &offset);
+
+ cosobj = calloc(1, sizeof(struct cos_object));
+ if (cosobj == NULL) {
+ return -1; /* memory error */
+ }
+
+ cosobj->type = COS_TYPE_NULL;
+ *offset_out = offset;
+
+ return 0;
+}
+
+
+/**
+ * Decode input stream into an object
+ *
+ * lex and parse a byte stream to generate COS objects
+ *
+ * lexing the input.
+ * check first character:
+ *
+ * < either a hex string or a dictionary
+ * second char < means dictionary else hex string
+ * - either an integer or real
+ * + either an integer or real
+ * 0-9 an integer, unsigned integer or real
+ * . a real number
+ * ( a string
+ * / a name
+ * [ a list
+ * t|T boolean true
+ * f|F boolean false
+ * n|N null
+ *
+ * Grammar is:
+ * cos_object:
+ * TOK_NULL |
+ * TOK_BOOLEAN |
+ * TOK_INT |
+ * TOK_REAL |
+ * TOK_NAME |
+ * TOK_STRING |
+ * list |
+ * dictionary |
+ * object_reference;
+ *
+ * list:
+ * '[' listargs ']';
+ *
+ * listargs:
+ * cos_object
+ * |
+ * listargs cos_object
+ * ;
+ *
+ * object_reference:
+ * TOK_UINT TOK_UINT 'R';
+ */
+int cos_decode_object(struct pdf_doc *doc,
+ uint64_t *offset_out,
+ struct cos_object **cosobj_out)
+{
+ uint64_t offset;
+ int res;
+ struct cos_object *cosobj;
+
+ offset = *offset_out;
+
+ /* object could be any type use first char to try and select */
+ switch (DOC_BYTE(doc, offset)) {
+
+ case '-':
+ case '+':
+ case '.':
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ res = cos_decode_number(doc, &offset, &cosobj);
+ /* if type is uint try to check for reference */
+ break;
+
+ case '<':
+ if (DOC_BYTE(doc, offset + 1) == '<') {
+ res = cos_decode_dictionary(doc, &offset, &cosobj);
+ } else {
+ res = cos_decode_hex_string(doc, &offset, &cosobj);
+ }
+ break;
+
+ case '(':
+ res = cos_decode_string(doc, &offset, &cosobj);
+ break;
+
+ case '/':
+ res = cos_decode_name(doc, &offset, &cosobj);
+ break;
+
+ case '[':
+ res = cos_decode_list(doc, &offset, &cosobj);
+ break;
+
+ case 't':
+ case 'T':
+ case 'f':
+ case 'F':
+ res = cos_decode_boolean(doc, &offset, &cosobj);
+ break;
+
+ case 'n':
+ case 'N':
+ res = cos_decode_null(doc, &offset, &cosobj);
+ break;
+
+ default:
+ res = -1; /* syntax error */
+ }
+
+ return res;
+}
+
+
+
+int decode_trailer(struct pdf_doc *doc, uint64_t offset)
+{
+ struct cos_object *trailer;
+ int res;
+
+ /* trailer object header */
+ if ((DOC_BYTE(doc, offset ) != 't') &&
+ (DOC_BYTE(doc, offset + 1) != 'r') &&
+ (DOC_BYTE(doc, offset + 2) != 'a') &&
+ (DOC_BYTE(doc, offset + 3) != 'i') &&
+ (DOC_BYTE(doc, offset + 4) != 'l') &&
+ (DOC_BYTE(doc, offset + 5) != 'e') &&
+ (DOC_BYTE(doc, offset + 6) != 'r')) {
+ return -1;
+ }
+ offset += 7;
+ doc_skip_ws(doc, &offset);
+
+ res = cos_decode_object(doc, &offset, &trailer);
+ if (res != 0) {
+ return res;
+ }
+
+ if (trailer->type != COS_TYPE_DICTIONARY) {
+ cos_free_object(trailer);
+ return -1;
+ }
+
+ return 0;
+}
+
+int decode_xref(struct pdf_doc *doc, uint64_t *offset_out)
+{
+ int res;
+ uint64_t objnum; /* current object number */
+ uint64_t lastobjnum;
+ uint64_t offset;
+
+ offset = *offset_out;
+
+ /* xref object header */
+ if ((DOC_BYTE(doc, offset ) != 'x') &&
+ (DOC_BYTE(doc, offset + 1) != 'r') &&
+ (DOC_BYTE(doc, offset + 2) != 'e') &&
+ (DOC_BYTE(doc, offset + 3) != 'f')) {
+ return -1;
+ }
+ offset += 4;
+ doc_skip_ws(doc, &offset);
+
+ /* first object number in table */
+ res = doc_read_uint(doc, &offset, &objnum);
+ while (res == 0) {
+ doc_skip_ws(doc, &offset);
+
+ /* last object number in table */
+ res = doc_read_uint(doc, &offset, &lastobjnum);
+ if (res != 0) {
+ return res;
+ }
+ doc_skip_ws(doc, &offset);
+
+ lastobjnum += objnum;
+
+ /* object index entries */
+ while (objnum < lastobjnum) {
+ uint64_t obj_start;
+ uint64_t obj_generation;
+
+ /* object offset */
+ res = doc_read_uint(doc, &offset, &obj_start);
+ if (res != 0) {
+ return res;
+ }
+ doc_skip_ws(doc, &offset);
+
+ res = doc_read_uint(doc, &offset, &obj_generation);
+ if (res != 0) {
+ return res;
+ }
+ doc_skip_ws(doc, &offset);
+
+ if ((DOC_BYTE(doc, offset) == 'n')) {
+ cos_indirect_object_add(doc, objnum, obj_start, obj_generation);
+ }
+ offset++;
+ doc_skip_ws(doc, &offset);
+
+ objnum++;
+ }
+ // printf("at objnum %"PRIu64"\n", objnum);
+
+ /* first object number in table */
+ res = doc_read_uint(doc, &offset, &objnum);
+ }
+ *offset_out = offset;
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ struct pdf_doc doc;
+ int res;
+ uint64_t startxref;
+
+ res = read_whole_pdf(&doc, argv[1]);
+ if (res != 0) {
+ printf("failed to read file\n");
+ return res;
+ }
+
+ res = check_header(&doc);
+ if (res != 0) {
+ printf("header check failed\n");
+ return res;
+ }
+
+ res = find_startxref(&doc, &startxref);
+ if (res != 0) {
+ printf("failed to find startxref\n");
+ return res;
+ }
+
+ res = decode_xref(&doc, &startxref);
+ if (res != 0) {
+ printf("failed to decode xref table\n");
+ return res;
+ }
+
+ res = decode_trailer(&doc, startxref);
+ if (res != 0) {
+ printf("failed to decode trailer\n");
+ return res;
+ }
+
+ return 0;
+}
-----------------------------------------------------------------------
--
PDF Manipulation Library
5 years, 10 months