This commit is contained in:
Jeffrey C. Ollie 2024-03-02 12:49:40 -06:00
commit 64be7f056a
Signed by: jeff
GPG key ID: 6F86035A6D97044E
6 changed files with 397 additions and 0 deletions

2
.gitignore vendored Normal file
View file

@ -0,0 +1,2 @@
/zig-cache
/zig-out

20
LICENSE Normal file
View file

@ -0,0 +1,20 @@
Copyright © 2024 Jeffrey C. Ollie
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

6
README.md Normal file
View file

@ -0,0 +1,6 @@
Punycode encoding/decoding for Zig
==================================
[Zig](https://ziglang.org) implementation of Punycode ([RFC 5891](https://datatracker.ietf.org/doc/html/rfc5891)/[RFC 3492](https://datatracker.ietf.org/doc/html/rfc3492)) encoding and decoding for Internationalized Domain Names.
Based on [punycode](https://github.com/bnoordhuis/punycode) by Ben Noordhuis with some fixes from [Michael Hempel-Jørgensen](https://github.com/bnoordhuis/punycode/pull/4).

21
build.zig Normal file
View file

@ -0,0 +1,21 @@
const std = @import("std");
pub fn build(b: *std.Build) void {
const target = b.standardTargetOptions(.{});
const optimize = b.standardOptimizeOption(.{});
_ = b.addModule("punycode", .{
.root_source_file = .{ .path = "src/root.zig" },
});
const unit_tests = b.addTest(.{
.root_source_file = .{ .path = "src/root.zig" },
.target = target,
.optimize = optimize,
});
const run_unit_tests = b.addRunArtifact(unit_tests);
const test_step = b.step("test", "Run unit tests");
test_step.dependOn(&run_unit_tests.step);
}

14
build.zig.zon Normal file
View file

@ -0,0 +1,14 @@
.{
.name = "punycode",
.version = "1.0.0",
.dependencies = .{},
.paths = .{
"build.zig",
"build.zig.zon",
"src",
"LICENSE",
"README.md",
},
}

334
src/root.zig Normal file
View file

@ -0,0 +1,334 @@
// Copyright © 2024 Jeffrey C. Ollie
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
const std = @import("std");
pub fn BootString(
comptime options: struct {
base: usize,
tmin: usize,
tmax: usize,
skew: usize,
damp: usize,
initial_n: usize,
initial_bias: usize,
},
) type {
return struct {
fn adapt_bias(delta: usize, n_points: usize, is_first: bool) usize {
var _delta = delta / if (is_first) options.damp else 2;
_delta = _delta + (_delta / n_points);
var k: usize = 0;
while (_delta > ((options.base - options.tmin) * options.tmax) / 2) : (k += options.base) {
_delta = _delta / (options.base - options.tmin);
}
return k + (((options.base - options.tmin + 1) * _delta) / (_delta + options.skew));
}
fn encode_digit(c: usize) u8 {
std.debug.assert(c >= 0 and c <= options.base - options.tmin);
return if (c > 25)
return @intCast(c + '0' - 26)
else
return @intCast(c + 'a');
}
fn encode_var_int(destination: *std.ArrayList(u8), bias: usize, delta: usize) !void {
var k: usize = options.base;
var q: usize = delta;
while (true) : (k += options.base) {
const t = t: {
if (k <= bias) break :t options.tmin;
if (k >= bias + options.tmax) break :t options.tmax;
break :t k - bias;
};
if (q < t) {
try destination.append(encode_digit(q));
return;
}
try destination.append(encode_digit(t + (q - t) % (options.base - t)));
q = (q - t) / (options.base - t);
}
}
pub fn encode(alloc: std.mem.Allocator, source: []const u21) ![]const u8 {
var destination = try std.ArrayList(u8).initCapacity(alloc, source.len * 3);
errdefer destination.deinit();
for (source) |codepoint| {
if (codepoint < 128) {
try destination.append(@intCast(codepoint));
}
}
const initial_bytes_written = destination.items.len;
var h = destination.items.len;
if (initial_bytes_written > 0) {
try destination.append('-');
}
var n = options.initial_n;
var bias = options.initial_bias;
var delta: usize = 0;
while (h < source.len) : ({
n += 1;
delta += 1;
}) {
var m: u21 = std.math.maxInt(u21);
for (source) |codepoint| {
if (codepoint >= n and codepoint < m) m = codepoint;
}
if ((m - n) > (std.math.maxInt(u21) - delta) / (h + 1)) return error.OverFlow;
delta += (m - n) * (h + 1);
n = m;
for (source) |codepoint| {
if (codepoint < n) {
delta += 1;
if (delta == 0) return error.OverFlow;
} else if (codepoint == n) {
try encode_var_int(&destination, bias, delta);
bias = adapt_bias(delta, h + 1, h == initial_bytes_written);
delta = 0;
h += 1;
}
}
}
return destination.toOwnedSlice();
}
fn decode_digit(c: u8) !u21 {
switch (c) {
'0'...'9' => return 26 + c - '0',
'a'...'z' => return c - 'a',
'A'...'Z' => return c - 'A',
else => return error.InvalidDigit,
}
}
pub fn decode(alloc: std.mem.Allocator, source: []const u8) ![]const u21 {
for (source) |char| {
if (!std.ascii.isASCII(char)) return error.NotAscii;
}
var destination = try std.ArrayList(u21).initCapacity(alloc, source.len);
errdefer destination.deinit();
const b: usize = b: {
if (std.mem.lastIndexOfScalar(u8, source, '-')) |b| break :b b;
break :b 0;
};
if (b > 0) for (source[0..b]) |c| try destination.append(c);
var i: usize = 0;
var n: usize = options.initial_n;
var bias: usize = options.initial_bias;
var si = b + @as(usize, if (b > 0) 1 else 0);
var di = b + 1;
while (si < source.len) : (di += 1) {
const org_i = i;
var w: usize = 1;
var k: usize = options.base;
while (true) : (k += options.base) {
const digit = try decode_digit(source[si]);
si += 1;
if (digit > (std.math.maxInt(u21) - 1) / w) return error.OverFlow;
i += digit * w;
const t = t: {
if (k <= bias) break :t options.tmin;
if (k >= bias + options.tmax) break :t options.tmax;
break :t k - bias;
};
if (digit < t) break;
if (w > std.math.maxInt(u21) / (options.base - t)) return error.OverFlow;
w *= options.base - t;
}
bias = adapt_bias(i - org_i, di, org_i == 0);
if (i / di > std.math.maxInt(u21) - n) return error.OverFlow;
n += i / di;
i %= di;
try destination.insert(i, @intCast(n));
i += 1;
}
return try destination.toOwnedSlice();
}
};
}
const PunyCode = BootString(
.{
.base = 36,
.tmin = 1,
.tmax = 26,
.skew = 38,
.damp = 700,
.initial_n = 128,
.initial_bias = 72,
},
);
pub usingnamespace PunyCode;
test "test" {
const cases = [_]struct {
utf8: []const u8,
unicode: []const u21,
punycode: []const u8,
}{
.{
.utf8 = "ü",
.unicode = &.{0xfc},
.punycode = "tda",
},
.{
.utf8 = "Bach",
.unicode = &.{ 'B', 'a', 'c', 'h' },
.punycode = "Bach-",
},
.{
.utf8 = "bücher",
.unicode = &.{ 'b', 0xFC, 'c', 'h', 'e', 'r' },
.punycode = "bcher-kva",
},
.{
.utf8 = "Willst du die Blüthe des frühen, die Früchte des späteren Jahres",
.unicode = &.{ 'W', 'i', 'l', 'l', 's', 't', ' ', 'd', 'u', ' ', 'd', 'i', 'e', ' ', 'B', 'l', 0xFC, 't', 'h', 'e', ' ', 'd', 'e', 's', ' ', 'f', 'r', 0xFC, 'h', 'e', 'n', ',', ' ', 'd', 'i', 'e', ' ', 'F', 'r', 0xFC, 'c', 'h', 't', 'e', ' ', 'd', 'e', 's', ' ', 's', 'p', 0xE4, 't', 'e', 'r', 'e', 'n', ' ', 'J', 'a', 'h', 'r', 'e', 's' },
.punycode = "Willst du die Blthe des frhen, die Frchte des spteren Jahres-x9e96lkal",
},
.{
.utf8 = "ليهمابتكلموشعربي؟",
.unicode = &.{ 0x0644, 0x064A, 0x0647, 0x0645, 0x0627, 0x0628, 0x062A, 0x0643, 0x0644, 0x0645, 0x0648, 0x0634, 0x0639, 0x0631, 0x0628, 0x064A, 0x061F },
.punycode = "egbpdaj6bu4bxfgehfvwxn",
},
.{
.utf8 = "他们为什么不说中文",
.unicode = &.{ 0x4ED6, 0x4EEC, 0x4E3A, 0x4EC0, 0x4E48, 0x4E0D, 0x8BF4, 0x4E2D, 0x6587 },
.punycode = "ihqwcrb4cv8a8dqg056pqjye",
},
.{
.utf8 = "他們爲什麽不說中文",
.unicode = &.{ 0x4ED6, 0x5011, 0x7232, 0x4EC0, 0x9EBD, 0x4E0D, 0x8AAA, 0x4E2D, 0x6587 },
.punycode = "ihqwctvzc91f659drss3x8bo0yb",
},
.{
.utf8 = "Pročprostěnemluvíčesky",
.unicode = &.{ 0x0050, 0x0072, 0x006F, 0x010D, 0x0070, 0x0072, 0x006F, 0x0073, 0x0074, 0x011B, 0x006E, 0x0065, 0x006D, 0x006C, 0x0075, 0x0076, 0x00ED, 0x010D, 0x0065, 0x0073, 0x006B, 0x0079 },
.punycode = "Proprostnemluvesky-uyb24dma41a",
},
.{
.utf8 = "למההםפשוטלאמדבריםעברית",
.unicode = &.{ 0x05DC, 0x05DE, 0x05D4, 0x05D4, 0x05DD, 0x05E4, 0x05E9, 0x05D5, 0x05D8, 0x05DC, 0x05D0, 0x05DE, 0x05D3, 0x05D1, 0x05E8, 0x05D9, 0x05DD, 0x05E2, 0x05D1, 0x05E8, 0x05D9, 0x05EA },
.punycode = "4dbcagdahymbxekheh6e0a7fei0b",
},
.{
.utf8 = "यहलोगहिन्दीक्योंनहींबोलसकतेहैं",
.unicode = &.{ 0x092F, 0x0939, 0x0932, 0x094B, 0x0917, 0x0939, 0x093F, 0x0928, 0x094D, 0x0926, 0x0940, 0x0915, 0x094D, 0x092F, 0x094B, 0x0902, 0x0928, 0x0939, 0x0940, 0x0902, 0x092C, 0x094B, 0x0932, 0x0938, 0x0915, 0x0924, 0x0947, 0x0939, 0x0948, 0x0902 },
.punycode = "i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd",
},
.{
.utf8 = "なぜみんな日本語を話してくれないのか",
.unicode = &.{ 0x306A, 0x305C, 0x307F, 0x3093, 0x306A, 0x65E5, 0x672C, 0x8A9E, 0x3092, 0x8A71, 0x3057, 0x3066, 0x304F, 0x308C, 0x306A, 0x3044, 0x306E, 0x304B },
.punycode = "n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa",
},
.{
.utf8 = "세계의모든사람들이한국어를이해한다면얼마나좋을까",
.unicode = &.{ 0xC138, 0xACC4, 0xC758, 0xBAA8, 0xB4E0, 0xC0AC, 0xB78C, 0xB4E4, 0xC774, 0xD55C, 0xAD6D, 0xC5B4, 0xB97C, 0xC774, 0xD574, 0xD55C, 0xB2E4, 0xBA74, 0xC5BC, 0xB9C8, 0xB098, 0xC88B, 0xC744, 0xAE4C },
.punycode = "989aomsvi5e83db1d2a355cv1e0vak1dwrv93d5xbh15a0dt30a5jpsd879ccm6fea98c",
},
.{
.utf8 = "почемужеонинеговорятпорусски",
.unicode = &.{ 0x043F, 0x043E, 0x0447, 0x0435, 0x043C, 0x0443, 0x0436, 0x0435, 0x043E, 0x043D, 0x0438, 0x043D, 0x0435, 0x0433, 0x043E, 0x0432, 0x043E, 0x0440, 0x044F, 0x0442, 0x043F, 0x043E, 0x0440, 0x0443, 0x0441, 0x0441, 0x043A, 0x0438 },
.punycode = "b1abfaaepdrnnbgefbadotcwatmq2g4l",
},
.{
.utf8 = "PorquénopuedensimplementehablarenEspañol",
.unicode = &.{ 0x0050, 0x006F, 0x0072, 0x0071, 0x0075, 0x00E9, 0x006E, 0x006F, 0x0070, 0x0075, 0x0065, 0x0064, 0x0065, 0x006E, 0x0073, 0x0069, 0x006D, 0x0070, 0x006C, 0x0065, 0x006D, 0x0065, 0x006E, 0x0074, 0x0065, 0x0068, 0x0061, 0x0062, 0x006C, 0x0061, 0x0072, 0x0065, 0x006E, 0x0045, 0x0073, 0x0070, 0x0061, 0x00F1, 0x006F, 0x006C },
.punycode = "PorqunopuedensimplementehablarenEspaol-fmd56a",
},
.{
.utf8 = "TạisaohọkhôngthểchỉnóitiếngViệt",
.unicode = &.{ 0x0054, 0x1EA1, 0x0069, 0x0073, 0x0061, 0x006F, 0x0068, 0x1ECD, 0x006B, 0x0068, 0x00F4, 0x006E, 0x0067, 0x0074, 0x0068, 0x1EC3, 0x0063, 0x0068, 0x1EC9, 0x006E, 0x00F3, 0x0069, 0x0074, 0x0069, 0x1EBF, 0x006E, 0x0067, 0x0056, 0x0069, 0x1EC7, 0x0074 },
.punycode = "TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g",
},
.{
.utf8 = "3年B組金八先生",
.unicode = &.{ 0x0033, 0x5E74, 0x0042, 0x7D44, 0x91D1, 0x516B, 0x5148, 0x751F },
.punycode = "3B-ww4c5e180e575a65lsy2b",
},
.{
.utf8 = "安室奈美恵-with-SUPER-MONKEYS",
.unicode = &.{ 0x5B89, 0x5BA4, 0x5948, 0x7F8E, 0x6075, 0x002D, 0x0077, 0x0069, 0x0074, 0x0068, 0x002D, 0x0053, 0x0055, 0x0050, 0x0045, 0x0052, 0x002D, 0x004D, 0x004F, 0x004E, 0x004B, 0x0045, 0x0059, 0x0053 },
.punycode = "-with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n",
},
.{
.utf8 = "Hello-Another-Way-それぞれの場所",
.unicode = &.{ 0x0048, 0x0065, 0x006C, 0x006C, 0x006F, 0x002D, 0x0041, 0x006E, 0x006F, 0x0074, 0x0068, 0x0065, 0x0072, 0x002D, 0x0057, 0x0061, 0x0079, 0x002D, 0x305D, 0x308C, 0x305E, 0x308C, 0x306E, 0x5834, 0x6240 },
.punycode = "Hello-Another-Way--fc4qua05auwb3674vfr0b",
},
.{
.utf8 = "ひとつ屋根の下2",
.unicode = &.{ 0x3072, 0x3068, 0x3064, 0x5C4B, 0x6839, 0x306E, 0x4E0B, 0x0032 },
.punycode = "2-u9tlzr9756bt3uc0v",
},
.{
.utf8 = "MajiでKoiする5秒前",
.unicode = &.{ 0x004D, 0x0061, 0x006A, 0x0069, 0x3067, 0x004B, 0x006F, 0x0069, 0x3059, 0x308B, 0x0035, 0x79D2, 0x524D },
.punycode = "MajiKoi5-783gue6qz075azm5e",
},
.{
.utf8 = "パフィーdeルンバ",
.unicode = &.{ 0x30D1, 0x30D5, 0x30A3, 0x30FC, 0x0064, 0x0065, 0x30EB, 0x30F3, 0x30D0 },
.punycode = "de-jg4avhby1noc0d",
},
.{
.utf8 = "そのスピードで",
.unicode = &.{ 0x305D, 0x306E, 0x30B9, 0x30D4, 0x30FC, 0x30C9, 0x3067 },
.punycode = "d9juau41awczczp",
},
.{
.utf8 = "-> $1.00 <-",
.unicode = &.{ 0x002D, 0x003E, 0x0020, 0x0024, 0x0031, 0x002E, 0x0030, 0x0030, 0x0020, 0x003C, 0x002D },
.punycode = "-> $1.00 <--",
},
};
for (cases) |case| {
{
const punycode = try PunyCode.encode(std.testing.allocator, case.unicode);
defer std.testing.allocator.free(punycode);
try std.testing.expectEqualStrings(case.punycode, punycode);
}
{
const unicode = try PunyCode.decode(std.testing.allocator, case.punycode);
defer std.testing.allocator.free(unicode);
try std.testing.expectEqualSlices(u21, case.unicode, unicode);
}
}
}