summaryrefslogtreecommitdiff
path: root/ruby
diff options
context:
space:
mode:
authorSteven G. Johnson <stevenj@mit.edu>2014-07-15 15:29:52 -0400
committerSteven G. Johnson <stevenj@mit.edu>2014-07-15 15:29:52 -0400
commitab9520d18845248ef79ee98e8d671f8eecfec288 (patch)
tree92c5d4df269de5321b6eeb27206fded2316afb22 /ruby
downloadlibutf8proc-ab9520d18845248ef79ee98e8d671f8eecfec288.tar.gz
libutf8proc-ab9520d18845248ef79ee98e8d671f8eecfec288.tar.bz2
import of utf8proc-v1.1.6v1.1.6
Diffstat (limited to 'ruby')
-rw-r--r--ruby/extconf.rb2
-rw-r--r--ruby/gem/LICENSE64
-rw-r--r--ruby/gem/utf8proc.gemspec12
-rw-r--r--ruby/utf8proc.rb98
-rw-r--r--ruby/utf8proc_native.c160
5 files changed, 336 insertions, 0 deletions
diff --git a/ruby/extconf.rb b/ruby/extconf.rb
new file mode 100644
index 0000000..6dbb095
--- /dev/null
+++ b/ruby/extconf.rb
@@ -0,0 +1,2 @@
+require 'mkmf'
+create_makefile("utf8proc_native")
diff --git a/ruby/gem/LICENSE b/ruby/gem/LICENSE
new file mode 100644
index 0000000..504e4c5
--- /dev/null
+++ b/ruby/gem/LICENSE
@@ -0,0 +1,64 @@
+
+Copyright (c) 2009 Public Software Group e. V., Berlin, Germany
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+the rights to use, copy, modify, merge, publish, distribute, sublicense,
+and/or sell copies of the Software, and to permit persons to whom the
+Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
+
+
+This software distribution contains derived data from a modified version of
+the Unicode data files. The following license applies to that data:
+
+COPYRIGHT AND PERMISSION NOTICE
+
+Copyright (c) 1991-2007 Unicode, Inc. All rights reserved. Distributed
+under the Terms of Use in http://www.unicode.org/copyright.html.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of the Unicode data files and any associated documentation (the "Data
+Files") or Unicode software and any associated documentation (the
+"Software") to deal in the Data Files or Software without restriction,
+including without limitation the rights to use, copy, modify, merge,
+publish, distribute, and/or sell copies of the Data Files or Software, and
+to permit persons to whom the Data Files or Software are furnished to do
+so, provided that (a) the above copyright notice(s) and this permission
+notice appear with all copies of the Data Files or Software, (b) both the
+above copyright notice(s) and this permission notice appear in associated
+documentation, and (c) there is clear notice in each modified Data File or
+in the Software as well as in the documentation associated with the Data
+File(s) or Software that the data or software has been modified.
+
+THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
+KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF
+THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS
+INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR
+CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF
+USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+PERFORMANCE OF THE DATA FILES OR SOFTWARE.
+
+Except as contained in this notice, the name of a copyright holder shall
+not be used in advertising or otherwise to promote the sale, use or other
+dealings in these Data Files or Software without prior written
+authorization of the copyright holder.
+
+
+Unicode and the Unicode logo are trademarks of Unicode, Inc., and may be
+registered in some jurisdictions. All other trademarks and registered
+trademarks mentioned herein are the property of their respective owners.
+
diff --git a/ruby/gem/utf8proc.gemspec b/ruby/gem/utf8proc.gemspec
new file mode 100644
index 0000000..09f74dc
--- /dev/null
+++ b/ruby/gem/utf8proc.gemspec
@@ -0,0 +1,12 @@
+require 'rubygems'
+SPEC = Gem::Specification.new do |s|
+ s.name = 'utf8proc'
+ s.version = '1.1.6'
+ s.author = 'Public Software Group e. V., Berlin, Germany'
+ s.homepage = 'http://www.public-software-group.org/utf8proc'
+ s.summary = 'UTF-8 Unicode string processing'
+ s.files = ['LICENSE', 'lib/utf8proc.rb', 'ext/utf8proc_native.c']
+ s.require_path = 'lib/'
+ s.extensions = ['ext/extconf.rb']
+ s.has_rdoc = false
+end
diff --git a/ruby/utf8proc.rb b/ruby/utf8proc.rb
new file mode 100644
index 0000000..83f1649
--- /dev/null
+++ b/ruby/utf8proc.rb
@@ -0,0 +1,98 @@
+# Copyright (c) 2009 Public Software Group e. V., Berlin, Germany
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+
+#
+# File name: ruby/utf8proc.rb
+#
+# Description:
+# Part of the ruby wrapper for libutf8proc, which is written in ruby.
+#
+
+
+require 'utf8proc_native'
+
+
+module Utf8Proc
+
+ SpecialChars = {
+ :HT => "\x09",
+ :LF => "\x0A",
+ :VT => "\x0B",
+ :FF => "\x0C",
+ :CR => "\x0D",
+ :FS => "\x1C",
+ :GS => "\x1D",
+ :RS => "\x1E",
+ :US => "\x1F",
+ :LS => "\xE2\x80\xA8",
+ :PS => "\xE2\x80\xA9",
+ }
+
+ module StringExtensions
+ def utf8map(*option_array)
+ options = 0
+ option_array.each do |option|
+ flag = Utf8Proc::Options[option]
+ raise ArgumentError, "Unknown argument given to String#utf8map." unless
+ flag
+ options |= flag
+ end
+ return Utf8Proc::utf8map(self, options)
+ end
+ def utf8map!(*option_array)
+ self.replace(self.utf8map(*option_array))
+ end
+ def utf8nfd; utf8map( :stable, :decompose); end
+ def utf8nfd!; utf8map!(:stable, :decompose); end
+ def utf8nfc; utf8map( :stable, :compose); end
+ def utf8nfc!; utf8map!(:stable, :compose); end
+ def utf8nfkd; utf8map( :stable, :decompose, :compat); end
+ def utf8nfkd!; utf8map!(:stable, :decompose, :compat); end
+ def utf8nfkc; utf8map( :stable, :compose, :compat); end
+ def utf8nfkc!; utf8map!(:stable, :compose, :compat); end
+ def utf8chars
+ result = self.utf8map(:charbound).split("\377")
+ result.shift if result.first == ""
+ result
+ end
+ def char_ary
+ # depecated, use String#utf8chars instead
+ utf8chars
+ end
+ end
+
+ module IntegerExtensions
+ def utf8
+ return Utf8Proc::utf8char(self)
+ end
+ end
+
+end
+
+
+class String
+ include(Utf8Proc::StringExtensions)
+end
+
+class Integer
+ include(Utf8Proc::IntegerExtensions)
+end
+
diff --git a/ruby/utf8proc_native.c b/ruby/utf8proc_native.c
new file mode 100644
index 0000000..9e702a9
--- /dev/null
+++ b/ruby/utf8proc_native.c
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 2009 Public Software Group e. V., Berlin, Germany
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+
+/*
+ * File name: ruby/utf8proc_native.c
+ *
+ * Description:
+ * Native part of the ruby wrapper for libutf8proc.
+ */
+
+
+#include "../utf8proc.c"
+#include "ruby.h"
+
+#ifndef RSTRING_PTR
+#define RSTRING_PTR(s) (RSTRING(s)->ptr)
+#endif
+#ifndef RSTRING_LEN
+#define RSTRING_LEN(s) (RSTRING(s)->len)
+#endif
+
+typedef struct utf8proc_ruby_mapenv_struct {
+ int32_t *buffer;
+} utf8proc_ruby_mapenv_t;
+
+void utf8proc_ruby_mapenv_free(utf8proc_ruby_mapenv_t *env) {
+ free(env->buffer);
+ free(env);
+}
+
+VALUE utf8proc_ruby_module;
+VALUE utf8proc_ruby_options;
+VALUE utf8proc_ruby_eUnicodeError;
+VALUE utf8proc_ruby_eInvalidUtf8Error;
+VALUE utf8proc_ruby_eCodeNotAssignedError;
+
+VALUE utf8proc_ruby_map_error(ssize_t result) {
+ VALUE excpt_class;
+ switch (result) {
+ case UTF8PROC_ERROR_NOMEM:
+ excpt_class = rb_eNoMemError; break;
+ case UTF8PROC_ERROR_OVERFLOW:
+ case UTF8PROC_ERROR_INVALIDOPTS:
+ excpt_class = rb_eArgError; break;
+ case UTF8PROC_ERROR_INVALIDUTF8:
+ excpt_class = utf8proc_ruby_eInvalidUtf8Error; break;
+ case UTF8PROC_ERROR_NOTASSIGNED:
+ excpt_class = utf8proc_ruby_eCodeNotAssignedError; break;
+ default:
+ excpt_class = rb_eRuntimeError;
+ }
+ rb_raise(excpt_class, "%s", utf8proc_errmsg(result));
+ return Qnil;
+}
+
+VALUE utf8proc_ruby_map(VALUE self, VALUE str_param, VALUE options_param) {
+ VALUE str;
+ int options;
+ VALUE env_obj;
+ utf8proc_ruby_mapenv_t *env;
+ ssize_t result;
+ VALUE retval;
+ str = StringValue(str_param);
+ options = NUM2INT(options_param) & ~UTF8PROC_NULLTERM;
+ env_obj = Data_Make_Struct(rb_cObject, utf8proc_ruby_mapenv_t, NULL,
+ utf8proc_ruby_mapenv_free, env);
+ result = utf8proc_decompose(RSTRING_PTR(str), RSTRING_LEN(str),
+ NULL, 0, options);
+ if (result < 0) {
+ utf8proc_ruby_map_error(result);
+ return Qnil; /* needed to prevent problems with optimization */
+ }
+ env->buffer = ALLOC_N(int32_t, result+1);
+ result = utf8proc_decompose(RSTRING_PTR(str), RSTRING_LEN(str),
+ env->buffer, result, options);
+ if (result < 0) {
+ free(env->buffer);
+ env->buffer = 0;
+ utf8proc_ruby_map_error(result);
+ return Qnil; /* needed to prevent problems with optimization */
+ }
+ result = utf8proc_reencode(env->buffer, result, options);
+ if (result < 0) {
+ free(env->buffer);
+ env->buffer = 0;
+ utf8proc_ruby_map_error(result);
+ return Qnil; /* needed to prevent problems with optimization */
+ }
+ retval = rb_str_new((char *)env->buffer, result);
+ free(env->buffer);
+ env->buffer = 0;
+ return retval;
+}
+
+static VALUE utf8proc_ruby_char(VALUE self, VALUE code_param) {
+ char buffer[4];
+ ssize_t result;
+ int uc;
+ uc = NUM2INT(code_param);
+ if (!utf8proc_codepoint_valid(uc))
+ rb_raise(rb_eArgError, "Invalid Unicode code point");
+ result = utf8proc_encode_char(uc, buffer);
+ return rb_str_new(buffer, result);
+}
+
+#define register_utf8proc_option(sym, field) \
+ rb_hash_aset(utf8proc_ruby_options, ID2SYM(rb_intern(sym)), INT2FIX(field))
+
+void Init_utf8proc_native() {
+ utf8proc_ruby_module = rb_define_module("Utf8Proc");
+ rb_define_module_function(utf8proc_ruby_module, "utf8map",
+ utf8proc_ruby_map, 2);
+ rb_define_module_function(utf8proc_ruby_module, "utf8char",
+ utf8proc_ruby_char, 1);
+ utf8proc_ruby_eUnicodeError = rb_define_class_under(utf8proc_ruby_module,
+ "UnicodeError", rb_eStandardError);
+ utf8proc_ruby_eInvalidUtf8Error = rb_define_class_under(
+ utf8proc_ruby_module, "InvalidUtf8Error", utf8proc_ruby_eUnicodeError);
+ utf8proc_ruby_eCodeNotAssignedError = rb_define_class_under(
+ utf8proc_ruby_module, "CodeNotAssignedError",
+ utf8proc_ruby_eUnicodeError);
+ utf8proc_ruby_options = rb_hash_new();
+ register_utf8proc_option("stable", UTF8PROC_STABLE);
+ register_utf8proc_option("compat", UTF8PROC_COMPAT);
+ register_utf8proc_option("compose", UTF8PROC_COMPOSE);
+ register_utf8proc_option("decompose", UTF8PROC_DECOMPOSE);
+ register_utf8proc_option("ignore", UTF8PROC_IGNORE);
+ register_utf8proc_option("rejectna", UTF8PROC_REJECTNA);
+ register_utf8proc_option("nlf2ls", UTF8PROC_NLF2LS);
+ register_utf8proc_option("nlf2ps", UTF8PROC_NLF2PS);
+ register_utf8proc_option("nlf2lf", UTF8PROC_NLF2LF);
+ register_utf8proc_option("stripcc", UTF8PROC_STRIPCC);
+ register_utf8proc_option("casefold", UTF8PROC_CASEFOLD);
+ register_utf8proc_option("charbound", UTF8PROC_CHARBOUND);
+ register_utf8proc_option("lump", UTF8PROC_LUMP);
+ register_utf8proc_option("stripmark", UTF8PROC_STRIPMARK);
+ OBJ_FREEZE(utf8proc_ruby_options);
+ rb_define_const(utf8proc_ruby_module, "Options", utf8proc_ruby_options);
+}
+