first
This commit is contained in:
commit
64be7f056a
6 changed files with 397 additions and 0 deletions
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
|
@ -0,0 +1,2 @@
|
|||
/zig-cache
|
||||
/zig-out
|
20
LICENSE
Normal file
20
LICENSE
Normal file
|
@ -0,0 +1,20 @@
|
|||
Copyright © 2024 Jeffrey C. Ollie
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
|
6
README.md
Normal file
6
README.md
Normal file
|
@ -0,0 +1,6 @@
|
|||
Punycode encoding/decoding for Zig
|
||||
==================================
|
||||
|
||||
[Zig](https://ziglang.org) implementation of Punycode ([RFC 5891](https://datatracker.ietf.org/doc/html/rfc5891)/[RFC 3492](https://datatracker.ietf.org/doc/html/rfc3492)) encoding and decoding for Internationalized Domain Names.
|
||||
|
||||
Based on [punycode](https://github.com/bnoordhuis/punycode) by Ben Noordhuis with some fixes from [Michael Hempel-Jørgensen](https://github.com/bnoordhuis/punycode/pull/4).
|
21
build.zig
Normal file
21
build.zig
Normal file
|
@ -0,0 +1,21 @@
|
|||
const std = @import("std");
|
||||
|
||||
pub fn build(b: *std.Build) void {
|
||||
const target = b.standardTargetOptions(.{});
|
||||
const optimize = b.standardOptimizeOption(.{});
|
||||
|
||||
_ = b.addModule("punycode", .{
|
||||
.root_source_file = .{ .path = "src/root.zig" },
|
||||
});
|
||||
|
||||
const unit_tests = b.addTest(.{
|
||||
.root_source_file = .{ .path = "src/root.zig" },
|
||||
.target = target,
|
||||
.optimize = optimize,
|
||||
});
|
||||
|
||||
const run_unit_tests = b.addRunArtifact(unit_tests);
|
||||
|
||||
const test_step = b.step("test", "Run unit tests");
|
||||
test_step.dependOn(&run_unit_tests.step);
|
||||
}
|
14
build.zig.zon
Normal file
14
build.zig.zon
Normal file
|
@ -0,0 +1,14 @@
|
|||
.{
|
||||
.name = "punycode",
|
||||
.version = "1.0.0",
|
||||
|
||||
.dependencies = .{},
|
||||
|
||||
.paths = .{
|
||||
"build.zig",
|
||||
"build.zig.zon",
|
||||
"src",
|
||||
"LICENSE",
|
||||
"README.md",
|
||||
},
|
||||
}
|
334
src/root.zig
Normal file
334
src/root.zig
Normal file
|
@ -0,0 +1,334 @@
|
|||
// Copyright © 2024 Jeffrey C. Ollie
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
const std = @import("std");
|
||||
|
||||
pub fn BootString(
|
||||
comptime options: struct {
|
||||
base: usize,
|
||||
tmin: usize,
|
||||
tmax: usize,
|
||||
skew: usize,
|
||||
damp: usize,
|
||||
initial_n: usize,
|
||||
initial_bias: usize,
|
||||
},
|
||||
) type {
|
||||
return struct {
|
||||
fn adapt_bias(delta: usize, n_points: usize, is_first: bool) usize {
|
||||
var _delta = delta / if (is_first) options.damp else 2;
|
||||
_delta = _delta + (_delta / n_points);
|
||||
|
||||
var k: usize = 0;
|
||||
while (_delta > ((options.base - options.tmin) * options.tmax) / 2) : (k += options.base) {
|
||||
_delta = _delta / (options.base - options.tmin);
|
||||
}
|
||||
return k + (((options.base - options.tmin + 1) * _delta) / (_delta + options.skew));
|
||||
}
|
||||
|
||||
fn encode_digit(c: usize) u8 {
|
||||
std.debug.assert(c >= 0 and c <= options.base - options.tmin);
|
||||
return if (c > 25)
|
||||
return @intCast(c + '0' - 26)
|
||||
else
|
||||
return @intCast(c + 'a');
|
||||
}
|
||||
|
||||
fn encode_var_int(destination: *std.ArrayList(u8), bias: usize, delta: usize) !void {
|
||||
var k: usize = options.base;
|
||||
var q: usize = delta;
|
||||
while (true) : (k += options.base) {
|
||||
const t = t: {
|
||||
if (k <= bias) break :t options.tmin;
|
||||
if (k >= bias + options.tmax) break :t options.tmax;
|
||||
break :t k - bias;
|
||||
};
|
||||
if (q < t) {
|
||||
try destination.append(encode_digit(q));
|
||||
return;
|
||||
}
|
||||
try destination.append(encode_digit(t + (q - t) % (options.base - t)));
|
||||
q = (q - t) / (options.base - t);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn encode(alloc: std.mem.Allocator, source: []const u21) ![]const u8 {
|
||||
var destination = try std.ArrayList(u8).initCapacity(alloc, source.len * 3);
|
||||
errdefer destination.deinit();
|
||||
|
||||
for (source) |codepoint| {
|
||||
if (codepoint < 128) {
|
||||
try destination.append(@intCast(codepoint));
|
||||
}
|
||||
}
|
||||
|
||||
const initial_bytes_written = destination.items.len;
|
||||
var h = destination.items.len;
|
||||
|
||||
if (initial_bytes_written > 0) {
|
||||
try destination.append('-');
|
||||
}
|
||||
|
||||
var n = options.initial_n;
|
||||
var bias = options.initial_bias;
|
||||
var delta: usize = 0;
|
||||
|
||||
while (h < source.len) : ({
|
||||
n += 1;
|
||||
delta += 1;
|
||||
}) {
|
||||
var m: u21 = std.math.maxInt(u21);
|
||||
for (source) |codepoint| {
|
||||
if (codepoint >= n and codepoint < m) m = codepoint;
|
||||
}
|
||||
if ((m - n) > (std.math.maxInt(u21) - delta) / (h + 1)) return error.OverFlow;
|
||||
delta += (m - n) * (h + 1);
|
||||
n = m;
|
||||
for (source) |codepoint| {
|
||||
if (codepoint < n) {
|
||||
delta += 1;
|
||||
if (delta == 0) return error.OverFlow;
|
||||
} else if (codepoint == n) {
|
||||
try encode_var_int(&destination, bias, delta);
|
||||
bias = adapt_bias(delta, h + 1, h == initial_bytes_written);
|
||||
delta = 0;
|
||||
h += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return destination.toOwnedSlice();
|
||||
}
|
||||
|
||||
fn decode_digit(c: u8) !u21 {
|
||||
switch (c) {
|
||||
'0'...'9' => return 26 + c - '0',
|
||||
'a'...'z' => return c - 'a',
|
||||
'A'...'Z' => return c - 'A',
|
||||
else => return error.InvalidDigit,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn decode(alloc: std.mem.Allocator, source: []const u8) ![]const u21 {
|
||||
for (source) |char| {
|
||||
if (!std.ascii.isASCII(char)) return error.NotAscii;
|
||||
}
|
||||
|
||||
var destination = try std.ArrayList(u21).initCapacity(alloc, source.len);
|
||||
errdefer destination.deinit();
|
||||
|
||||
const b: usize = b: {
|
||||
if (std.mem.lastIndexOfScalar(u8, source, '-')) |b| break :b b;
|
||||
break :b 0;
|
||||
};
|
||||
|
||||
if (b > 0) for (source[0..b]) |c| try destination.append(c);
|
||||
|
||||
var i: usize = 0;
|
||||
var n: usize = options.initial_n;
|
||||
var bias: usize = options.initial_bias;
|
||||
|
||||
var si = b + @as(usize, if (b > 0) 1 else 0);
|
||||
var di = b + 1;
|
||||
while (si < source.len) : (di += 1) {
|
||||
const org_i = i;
|
||||
var w: usize = 1;
|
||||
var k: usize = options.base;
|
||||
|
||||
while (true) : (k += options.base) {
|
||||
const digit = try decode_digit(source[si]);
|
||||
si += 1;
|
||||
if (digit > (std.math.maxInt(u21) - 1) / w) return error.OverFlow;
|
||||
i += digit * w;
|
||||
const t = t: {
|
||||
if (k <= bias) break :t options.tmin;
|
||||
if (k >= bias + options.tmax) break :t options.tmax;
|
||||
break :t k - bias;
|
||||
};
|
||||
if (digit < t) break;
|
||||
if (w > std.math.maxInt(u21) / (options.base - t)) return error.OverFlow;
|
||||
w *= options.base - t;
|
||||
}
|
||||
|
||||
bias = adapt_bias(i - org_i, di, org_i == 0);
|
||||
|
||||
if (i / di > std.math.maxInt(u21) - n) return error.OverFlow;
|
||||
|
||||
n += i / di;
|
||||
i %= di;
|
||||
try destination.insert(i, @intCast(n));
|
||||
i += 1;
|
||||
}
|
||||
|
||||
return try destination.toOwnedSlice();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
const PunyCode = BootString(
|
||||
.{
|
||||
.base = 36,
|
||||
.tmin = 1,
|
||||
.tmax = 26,
|
||||
.skew = 38,
|
||||
.damp = 700,
|
||||
.initial_n = 128,
|
||||
.initial_bias = 72,
|
||||
},
|
||||
);
|
||||
|
||||
pub usingnamespace PunyCode;
|
||||
|
||||
test "test" {
|
||||
const cases = [_]struct {
|
||||
utf8: []const u8,
|
||||
unicode: []const u21,
|
||||
punycode: []const u8,
|
||||
}{
|
||||
.{
|
||||
.utf8 = "ü",
|
||||
.unicode = &.{0xfc},
|
||||
.punycode = "tda",
|
||||
},
|
||||
.{
|
||||
.utf8 = "Bach",
|
||||
.unicode = &.{ 'B', 'a', 'c', 'h' },
|
||||
.punycode = "Bach-",
|
||||
},
|
||||
.{
|
||||
.utf8 = "bücher",
|
||||
.unicode = &.{ 'b', 0xFC, 'c', 'h', 'e', 'r' },
|
||||
.punycode = "bcher-kva",
|
||||
},
|
||||
.{
|
||||
.utf8 = "Willst du die Blüthe des frühen, die Früchte des späteren Jahres",
|
||||
.unicode = &.{ 'W', 'i', 'l', 'l', 's', 't', ' ', 'd', 'u', ' ', 'd', 'i', 'e', ' ', 'B', 'l', 0xFC, 't', 'h', 'e', ' ', 'd', 'e', 's', ' ', 'f', 'r', 0xFC, 'h', 'e', 'n', ',', ' ', 'd', 'i', 'e', ' ', 'F', 'r', 0xFC, 'c', 'h', 't', 'e', ' ', 'd', 'e', 's', ' ', 's', 'p', 0xE4, 't', 'e', 'r', 'e', 'n', ' ', 'J', 'a', 'h', 'r', 'e', 's' },
|
||||
.punycode = "Willst du die Blthe des frhen, die Frchte des spteren Jahres-x9e96lkal",
|
||||
},
|
||||
.{
|
||||
.utf8 = "ليهمابتكلموشعربي؟",
|
||||
.unicode = &.{ 0x0644, 0x064A, 0x0647, 0x0645, 0x0627, 0x0628, 0x062A, 0x0643, 0x0644, 0x0645, 0x0648, 0x0634, 0x0639, 0x0631, 0x0628, 0x064A, 0x061F },
|
||||
.punycode = "egbpdaj6bu4bxfgehfvwxn",
|
||||
},
|
||||
.{
|
||||
.utf8 = "他们为什么不说中文",
|
||||
.unicode = &.{ 0x4ED6, 0x4EEC, 0x4E3A, 0x4EC0, 0x4E48, 0x4E0D, 0x8BF4, 0x4E2D, 0x6587 },
|
||||
.punycode = "ihqwcrb4cv8a8dqg056pqjye",
|
||||
},
|
||||
.{
|
||||
.utf8 = "他們爲什麽不說中文",
|
||||
.unicode = &.{ 0x4ED6, 0x5011, 0x7232, 0x4EC0, 0x9EBD, 0x4E0D, 0x8AAA, 0x4E2D, 0x6587 },
|
||||
.punycode = "ihqwctvzc91f659drss3x8bo0yb",
|
||||
},
|
||||
.{
|
||||
.utf8 = "Pročprostěnemluvíčesky",
|
||||
.unicode = &.{ 0x0050, 0x0072, 0x006F, 0x010D, 0x0070, 0x0072, 0x006F, 0x0073, 0x0074, 0x011B, 0x006E, 0x0065, 0x006D, 0x006C, 0x0075, 0x0076, 0x00ED, 0x010D, 0x0065, 0x0073, 0x006B, 0x0079 },
|
||||
.punycode = "Proprostnemluvesky-uyb24dma41a",
|
||||
},
|
||||
.{
|
||||
.utf8 = "למההםפשוטלאמדבריםעברית",
|
||||
.unicode = &.{ 0x05DC, 0x05DE, 0x05D4, 0x05D4, 0x05DD, 0x05E4, 0x05E9, 0x05D5, 0x05D8, 0x05DC, 0x05D0, 0x05DE, 0x05D3, 0x05D1, 0x05E8, 0x05D9, 0x05DD, 0x05E2, 0x05D1, 0x05E8, 0x05D9, 0x05EA },
|
||||
.punycode = "4dbcagdahymbxekheh6e0a7fei0b",
|
||||
},
|
||||
.{
|
||||
.utf8 = "यहलोगहिन्दीक्योंनहींबोलसकतेहैं",
|
||||
.unicode = &.{ 0x092F, 0x0939, 0x0932, 0x094B, 0x0917, 0x0939, 0x093F, 0x0928, 0x094D, 0x0926, 0x0940, 0x0915, 0x094D, 0x092F, 0x094B, 0x0902, 0x0928, 0x0939, 0x0940, 0x0902, 0x092C, 0x094B, 0x0932, 0x0938, 0x0915, 0x0924, 0x0947, 0x0939, 0x0948, 0x0902 },
|
||||
.punycode = "i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd",
|
||||
},
|
||||
.{
|
||||
.utf8 = "なぜみんな日本語を話してくれないのか",
|
||||
.unicode = &.{ 0x306A, 0x305C, 0x307F, 0x3093, 0x306A, 0x65E5, 0x672C, 0x8A9E, 0x3092, 0x8A71, 0x3057, 0x3066, 0x304F, 0x308C, 0x306A, 0x3044, 0x306E, 0x304B },
|
||||
.punycode = "n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa",
|
||||
},
|
||||
.{
|
||||
.utf8 = "세계의모든사람들이한국어를이해한다면얼마나좋을까",
|
||||
.unicode = &.{ 0xC138, 0xACC4, 0xC758, 0xBAA8, 0xB4E0, 0xC0AC, 0xB78C, 0xB4E4, 0xC774, 0xD55C, 0xAD6D, 0xC5B4, 0xB97C, 0xC774, 0xD574, 0xD55C, 0xB2E4, 0xBA74, 0xC5BC, 0xB9C8, 0xB098, 0xC88B, 0xC744, 0xAE4C },
|
||||
.punycode = "989aomsvi5e83db1d2a355cv1e0vak1dwrv93d5xbh15a0dt30a5jpsd879ccm6fea98c",
|
||||
},
|
||||
.{
|
||||
.utf8 = "почемужеонинеговорятпорусски",
|
||||
.unicode = &.{ 0x043F, 0x043E, 0x0447, 0x0435, 0x043C, 0x0443, 0x0436, 0x0435, 0x043E, 0x043D, 0x0438, 0x043D, 0x0435, 0x0433, 0x043E, 0x0432, 0x043E, 0x0440, 0x044F, 0x0442, 0x043F, 0x043E, 0x0440, 0x0443, 0x0441, 0x0441, 0x043A, 0x0438 },
|
||||
.punycode = "b1abfaaepdrnnbgefbadotcwatmq2g4l",
|
||||
},
|
||||
.{
|
||||
.utf8 = "PorquénopuedensimplementehablarenEspañol",
|
||||
.unicode = &.{ 0x0050, 0x006F, 0x0072, 0x0071, 0x0075, 0x00E9, 0x006E, 0x006F, 0x0070, 0x0075, 0x0065, 0x0064, 0x0065, 0x006E, 0x0073, 0x0069, 0x006D, 0x0070, 0x006C, 0x0065, 0x006D, 0x0065, 0x006E, 0x0074, 0x0065, 0x0068, 0x0061, 0x0062, 0x006C, 0x0061, 0x0072, 0x0065, 0x006E, 0x0045, 0x0073, 0x0070, 0x0061, 0x00F1, 0x006F, 0x006C },
|
||||
.punycode = "PorqunopuedensimplementehablarenEspaol-fmd56a",
|
||||
},
|
||||
.{
|
||||
.utf8 = "TạisaohọkhôngthểchỉnóitiếngViệt",
|
||||
.unicode = &.{ 0x0054, 0x1EA1, 0x0069, 0x0073, 0x0061, 0x006F, 0x0068, 0x1ECD, 0x006B, 0x0068, 0x00F4, 0x006E, 0x0067, 0x0074, 0x0068, 0x1EC3, 0x0063, 0x0068, 0x1EC9, 0x006E, 0x00F3, 0x0069, 0x0074, 0x0069, 0x1EBF, 0x006E, 0x0067, 0x0056, 0x0069, 0x1EC7, 0x0074 },
|
||||
.punycode = "TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g",
|
||||
},
|
||||
.{
|
||||
.utf8 = "3年B組金八先生",
|
||||
.unicode = &.{ 0x0033, 0x5E74, 0x0042, 0x7D44, 0x91D1, 0x516B, 0x5148, 0x751F },
|
||||
.punycode = "3B-ww4c5e180e575a65lsy2b",
|
||||
},
|
||||
.{
|
||||
.utf8 = "安室奈美恵-with-SUPER-MONKEYS",
|
||||
.unicode = &.{ 0x5B89, 0x5BA4, 0x5948, 0x7F8E, 0x6075, 0x002D, 0x0077, 0x0069, 0x0074, 0x0068, 0x002D, 0x0053, 0x0055, 0x0050, 0x0045, 0x0052, 0x002D, 0x004D, 0x004F, 0x004E, 0x004B, 0x0045, 0x0059, 0x0053 },
|
||||
.punycode = "-with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n",
|
||||
},
|
||||
.{
|
||||
.utf8 = "Hello-Another-Way-それぞれの場所",
|
||||
.unicode = &.{ 0x0048, 0x0065, 0x006C, 0x006C, 0x006F, 0x002D, 0x0041, 0x006E, 0x006F, 0x0074, 0x0068, 0x0065, 0x0072, 0x002D, 0x0057, 0x0061, 0x0079, 0x002D, 0x305D, 0x308C, 0x305E, 0x308C, 0x306E, 0x5834, 0x6240 },
|
||||
.punycode = "Hello-Another-Way--fc4qua05auwb3674vfr0b",
|
||||
},
|
||||
.{
|
||||
.utf8 = "ひとつ屋根の下2",
|
||||
.unicode = &.{ 0x3072, 0x3068, 0x3064, 0x5C4B, 0x6839, 0x306E, 0x4E0B, 0x0032 },
|
||||
.punycode = "2-u9tlzr9756bt3uc0v",
|
||||
},
|
||||
.{
|
||||
.utf8 = "MajiでKoiする5秒前",
|
||||
.unicode = &.{ 0x004D, 0x0061, 0x006A, 0x0069, 0x3067, 0x004B, 0x006F, 0x0069, 0x3059, 0x308B, 0x0035, 0x79D2, 0x524D },
|
||||
.punycode = "MajiKoi5-783gue6qz075azm5e",
|
||||
},
|
||||
.{
|
||||
.utf8 = "パフィーdeルンバ",
|
||||
.unicode = &.{ 0x30D1, 0x30D5, 0x30A3, 0x30FC, 0x0064, 0x0065, 0x30EB, 0x30F3, 0x30D0 },
|
||||
.punycode = "de-jg4avhby1noc0d",
|
||||
},
|
||||
.{
|
||||
.utf8 = "そのスピードで",
|
||||
.unicode = &.{ 0x305D, 0x306E, 0x30B9, 0x30D4, 0x30FC, 0x30C9, 0x3067 },
|
||||
.punycode = "d9juau41awczczp",
|
||||
},
|
||||
.{
|
||||
.utf8 = "-> $1.00 <-",
|
||||
.unicode = &.{ 0x002D, 0x003E, 0x0020, 0x0024, 0x0031, 0x002E, 0x0030, 0x0030, 0x0020, 0x003C, 0x002D },
|
||||
.punycode = "-> $1.00 <--",
|
||||
},
|
||||
};
|
||||
|
||||
for (cases) |case| {
|
||||
{
|
||||
const punycode = try PunyCode.encode(std.testing.allocator, case.unicode);
|
||||
defer std.testing.allocator.free(punycode);
|
||||
try std.testing.expectEqualStrings(case.punycode, punycode);
|
||||
}
|
||||
{
|
||||
const unicode = try PunyCode.decode(std.testing.allocator, case.punycode);
|
||||
defer std.testing.allocator.free(unicode);
|
||||
try std.testing.expectEqualSlices(u21, case.unicode, unicode);
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in a new issue