[project @ 2004-03-25 00:31:45 by jmb]

Make text export use stdio. Move save_text.[ch] to desktop. svn path=/import/netsurf/; revision=666
author: John Mark Bell <jmb@netsurf-browser.org> 2004-03-25 00:31:45 +0000
committer: John Mark Bell <jmb@netsurf-browser.org> 2004-03-25 00:31:45 +0000
commit: c18775f4fb469dc1e60b345a9fd0f83788ffcf50 (patch)
tree: d93e1054f960a5a3c7af33086e95f2316d3106c1 /desktop/save_text.c
parent: 7d0b1443916345947ee72ff07ebb56493d09598e (diff)
download: netsurf-c18775f4fb469dc1e60b345a9fd0f83788ffcf50.tar.gz
netsurf-c18775f4fb469dc1e60b345a9fd0f83788ffcf50.tar.bz2
1 files changed, 117 insertions, 0 deletions
diff --git a/desktop/save_text.c b/desktop/save_text.c
new file mode 100644
index 000000000..916567717
--- /dev/null
+++ b/desktop/save_text.c
@@ -0,0 +1,117 @@
+/*
+ * This file is part of NetSurf, http://netsurf.sourceforge.net/
+ * Licensed under the GNU General Public License,
+ *                http://www.opensource.org/licenses/gpl-license
+ * Copyright 2004 John M Bell <jmb202@ecs.soton.ac.uk>
+ */
+
+#include <stdbool.h>
+#include <string.h>
+
+#include "libxml/HTMLtree.h"
+
+#include "netsurf/utils/config.h"
+#include "netsurf/content/content.h"
+#include "netsurf/desktop/save_text.h"
+#include "netsurf/utils/log.h"
+#include "netsurf/utils/utils.h"
+
+#ifdef WITH_TEXT_EXPORT
+
+static void extract_text(xmlDoc *doc);
+static void extract_text_from_tree(xmlNode *n);
+
+static FILE *out;
+
+void save_as_text(struct content *c, char *path) {
+
+        htmlParserCtxtPtr toSave;
+
+	if (c->type != CONTENT_HTML) {
+		return;
+	}
+
+        out = fopen(path, "w+");
+        if (!out) return;
+
+	toSave = htmlCreateMemoryParserCtxt(c->source_data, c->source_size);
+	htmlParseDocument(toSave);
+
+	extract_text(toSave->myDoc);
+
+        fclose(out);
+
+	xmlFreeDoc(toSave->myDoc);
+	htmlFreeParserCtxt(toSave);
+}
+
+void extract_text(xmlDoc *doc)
+{
+        xmlNode *html;
+
+        /* find the html element */
+        for (html = doc->children;
+             html!=0 && html->type != XML_ELEMENT_NODE;
+             html = html->next)
+                ;
+        if (html == 0 || strcmp((const char*)html->name, "html") != 0) {
+                return;
+        }
+
+        extract_text_from_tree(html);
+}
+
+void extract_text_from_tree(xmlNode *n)
+{
+        xmlNode *this;
+        char *text;
+        int len, need_nl = 0;
+
+        if (n->type == XML_ELEMENT_NODE) {
+                if (strcmp(n->name, "dl") == 0 ||
+                    strcmp(n->name, "h1") == 0 ||
+                    strcmp(n->name, "h2") == 0 ||
+                    strcmp(n->name, "h3") == 0 ||
+                    strcmp(n->name, "ol") == 0 ||
+                    strcmp(n->name, "title") == 0 ||
+                    strcmp(n->name, "ul") == 0) {
+                        need_nl = 2;
+                }
+                else if (strcmp(n->name, "applet") == 0 ||
+                         strcmp(n->name, "br") == 0 ||
+                         strcmp(n->name, "div") == 0 ||
+                         strcmp(n->name, "dt") == 0 ||
+                         strcmp(n->name, "h4") == 0 ||
+                         strcmp(n->name, "h5") == 0 ||
+                         strcmp(n->name, "h6") == 0 ||
+                         strcmp(n->name, "li") == 0 ||
+                         strcmp(n->name, "object") == 0 ||
+                         strcmp(n->name, "p") == 0 ||
+                         strcmp(n->name, "tr") == 0) {
+                        need_nl = 1;
+                }
+                /* do nothing, we just recurse through these nodes */
+        }
+        else if (n->type == XML_TEXT_NODE) {
+                text = squash_tolat1(n->content);
+                fprintf(out, "%s", text);
+                xfree(text);
+                return;
+        }
+        else {
+                return;
+        }
+
+        /* now recurse */
+	for (this = n->children; this != 0; this = this->next) {
+                extract_text_from_tree(this);
+	}
+
+	if (need_nl) {
+	        for (len = 0; len != need_nl; len++) {
+	                fprintf(out, "\n");
+                }
+	}
+}
+
+#endif
author	John Mark Bell <jmb@netsurf-browser.org>	2004-03-25 00:31:45 +0000
committer	John Mark Bell <jmb@netsurf-browser.org>	2004-03-25 00:31:45 +0000
commit	c18775f4fb469dc1e60b345a9fd0f83788ffcf50 (patch)
tree	d93e1054f960a5a3c7af33086e95f2316d3106c1 /desktop/save_text.c
parent	7d0b1443916345947ee72ff07ebb56493d09598e (diff)
download	netsurf-c18775f4fb469dc1e60b345a9fd0f83788ffcf50.tar.gz netsurf-c18775f4fb469dc1e60b345a9fd0f83788ffcf50.tar.bz2