Skip to content

Commit 170738f

Browse files
committed
Started work on IDN support
1 parent 0775115 commit 170738f

10 files changed

+321
-20
lines changed

Diff for: IDEAS

+2-1
Original file line numberDiff line numberDiff line change
@@ -71,4 +71,5 @@
7171
- IDNA::Punycode
7272
- Convert at create time, and when displaying?
7373
- Has to be done on a per-domain-component basis
74-
74+
- Call valid_domain_name everywhere
75+
- All pages that show domain name have to be updated

Diff for: IDNA/Punycode.pm

+240
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,240 @@
1+
package IDNA::Punycode;
2+
3+
use strict;
4+
our $VERSION = 0.03;
5+
6+
require Exporter;
7+
our @ISA = qw(Exporter);
8+
our @EXPORT = qw(encode_punycode decode_punycode idn_prefix);
9+
10+
use integer;
11+
12+
our $DEBUG = 0;
13+
our $PREFIX = 'xn--';
14+
15+
use constant BASE => 36;
16+
use constant TMIN => 1;
17+
use constant TMAX => 26;
18+
use constant SKEW => 38;
19+
use constant DAMP => 700;
20+
use constant INITIAL_BIAS => 72;
21+
use constant INITIAL_N => 128;
22+
23+
my $Delimiter = chr 0x2D;
24+
my $BasicRE = qr/[\x00-\x7f]/;
25+
26+
sub _croak { require Carp; Carp::croak(@_); }
27+
28+
sub idn_prefix {
29+
$PREFIX = shift;
30+
}
31+
32+
sub digit_value {
33+
my $code = shift;
34+
return ord($code) - ord("A") if $code =~ /[A-Z]/;
35+
return ord($code) - ord("a") if $code =~ /[a-z]/;
36+
return ord($code) - ord("0") + 26 if $code =~ /[0-9]/;
37+
return;
38+
}
39+
40+
sub code_point {
41+
my $digit = shift;
42+
return $digit + ord('a') if 0 <= $digit && $digit <= 25;
43+
return $digit + ord('0') - 26 if 26 <= $digit && $digit <= 36;
44+
die 'NOT COME HERE';
45+
}
46+
47+
sub adapt {
48+
my($delta, $numpoints, $firsttime) = @_;
49+
$delta = $firsttime ? $delta / DAMP : $delta / 2;
50+
$delta += $delta / $numpoints;
51+
my $k = 0;
52+
while ($delta > ((BASE - TMIN) * TMAX) / 2) {
53+
$delta /= BASE - TMIN;
54+
$k += BASE;
55+
}
56+
return $k + (((BASE - TMIN + 1) * $delta) / ($delta + SKEW));
57+
}
58+
59+
sub decode_punycode {
60+
my $code = shift;
61+
62+
my $n = INITIAL_N;
63+
my $i = 0;
64+
my $bias = INITIAL_BIAS;
65+
my @output;
66+
67+
if ($PREFIX) {
68+
if ($code !~ /^$PREFIX/) {
69+
return $code;
70+
}
71+
$code =~ s/^$PREFIX//;
72+
}
73+
74+
if ($code =~ s/(.*)$Delimiter//o) {
75+
push @output, map ord, split //, $1;
76+
return _croak('non-basic code point') unless $1 =~ /^$BasicRE*$/o;
77+
}
78+
79+
while ($code) {
80+
my $oldi = $i;
81+
my $w = 1;
82+
LOOP:
83+
for (my $k = BASE; 1; $k += BASE) {
84+
my $cp = substr($code, 0, 1, '');
85+
my $digit = digit_value($cp);
86+
defined $digit or return _croak("invalid punycode input");
87+
$i += $digit * $w;
88+
my $t = ($k <= $bias) ? TMIN
89+
: ($k >= $bias + TMAX) ? TMAX : $k - $bias;
90+
last LOOP if $digit < $t;
91+
$w *= (BASE - $t);
92+
}
93+
$bias = adapt($i - $oldi, @output + 1, $oldi == 0);
94+
warn "bias becomes $bias" if $DEBUG;
95+
$n += $i / (@output + 1);
96+
$i = $i % (@output + 1);
97+
splice(@output, $i, 0, $n);
98+
warn join " ", map sprintf('%04x', $_), @output if $DEBUG;
99+
$i++;
100+
}
101+
return join '', map chr, @output;
102+
}
103+
104+
sub encode_punycode {
105+
my $input = shift;
106+
# my @input = split //, $input; # doesn't work in 5.6.x!
107+
my @input = map substr($input, $_, 1), 0..length($input)-1;
108+
109+
my $n = INITIAL_N;
110+
my $delta = 0;
111+
my $bias = INITIAL_BIAS;
112+
113+
my @output;
114+
my @basic = grep /$BasicRE/, @input;
115+
my $h = my $b = @basic;
116+
#push @output, @basic, $Delimiter if $b > 0;
117+
push @output, @basic if $b > 0;
118+
warn "basic codepoints: (@output)" if $DEBUG;
119+
120+
if ($h < @input) {
121+
$PREFIX && unshift(@output, $PREFIX);
122+
push(@output, $Delimiter);
123+
} else {
124+
return join '', @output;
125+
}
126+
127+
while ($h < @input) {
128+
my $m = min(grep { $_ >= $n } map ord, @input);
129+
warn sprintf "next code point to insert is %04x", $m if $DEBUG;
130+
$delta += ($m - $n) * ($h + 1);
131+
$n = $m;
132+
for my $i (@input) {
133+
my $c = ord($i);
134+
$delta++ if $c < $n;
135+
if ($c == $n) {
136+
my $q = $delta;
137+
LOOP:
138+
for (my $k = BASE; 1; $k += BASE) {
139+
my $t = ($k <= $bias) ? TMIN :
140+
($k >= $bias + TMAX) ? TMAX : $k - $bias;
141+
last LOOP if $q < $t;
142+
my $cp = code_point($t + (($q - $t) % (BASE - $t)));
143+
push @output, chr($cp);
144+
$q = ($q - $t) / (BASE - $t);
145+
}
146+
push @output, chr(code_point($q));
147+
$bias = adapt($delta, $h + 1, $h == $b);
148+
warn "bias becomes $bias" if $DEBUG;
149+
$delta = 0;
150+
$h++;
151+
}
152+
}
153+
$delta++;
154+
$n++;
155+
}
156+
return join '', @output;
157+
}
158+
159+
sub min {
160+
my $min = shift;
161+
for (@_) { $min = $_ if $_ <= $min }
162+
return $min;
163+
}
164+
165+
1;
166+
__END__
167+
168+
=head1 NAME
169+
170+
IDNA::Punycode - encodes Unicode string in Punycode
171+
172+
=head1 SYNOPSIS
173+
174+
use IDNA::Punycode;
175+
idn_prefix('xn--');
176+
$punycode = encode_punycode($unicode);
177+
$unicode = decode_punycode($punycode);
178+
179+
=head1 DESCRIPTION
180+
181+
IDNA::Punycode is a module to encode / decode Unicode strings into
182+
Punycode, an efficient encoding of Unicode for use with IDNA.
183+
184+
This module requires Perl 5.6.0 or over to handle UTF8 flagged Unicode
185+
strings.
186+
187+
=head1 FUNCTIONS
188+
189+
This module exports following functions by default.
190+
191+
=over 4
192+
193+
=item encode_punycode
194+
195+
$punycode = encode_punycode($unicode);
196+
197+
takes Unicode string (UTF8-flagged variable) and returns Punycode
198+
encoding for it.
199+
200+
=item decode_punycode
201+
202+
$unicode = decode_punycode($punycode)
203+
204+
takes Punycode encoding and returns original Unicode string.
205+
206+
=item idn_prefix
207+
208+
idn_prefix($prefix);
209+
210+
causes encode_punycode() to add $prefix to ACE-string after conversion.
211+
As a side-effect decode_punycode() will only consider strings
212+
beginning with $prefix as punycode representations.
213+
214+
According to RFC 3490 the ACE prefix "xn--" had been chosen as the
215+
standard. Thus, "xn--" is also the default ACE prefix. For compatibility
216+
I'm leaving idn_prefix() in the module. Use C<idn_prefix(undef)> to
217+
get the old behaviour.
218+
219+
=back
220+
221+
These functions throws exceptionsn on failure. You can catch 'em via
222+
C<eval>.
223+
224+
=head1 AUTHORS
225+
226+
Tatsuhiko Miyagawa E<lt>[email protected]E<gt> is the original
227+
author and wrote almost all the code.
228+
229+
Robert Urban E<lt>[email protected]E<gt> added C<idn_prefix()>.
230+
231+
This library is free software; you can redistribute it and/or modify
232+
it under the same terms as Perl itself.
233+
234+
=head1 SEE ALSO
235+
236+
http://www.ietf.org/internet-drafts/draft-ietf-idn-punycode-01.txt
237+
238+
L<Encode::Punycode>
239+
240+
=cut

Diff for: create-domain.pl

+4-4
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ package virtual_server;
2727
while(@ARGV > 0) {
2828
local $a = shift(@ARGV);
2929
if ($a eq "--domain") {
30-
$domain = lc(shift(@ARGV));
30+
$domain = shift(@ARGV);
3131
}
3232
elsif ($a eq "--desc") {
3333
$owner = shift(@ARGV);
@@ -216,9 +216,9 @@ package virtual_server;
216216
}
217217

218218
# Validate args and work out defaults for those unset
219-
$domain =~ /^[A-Za-z0-9\.\-]+$/ || &usage($text{'setup_edomain'});
220-
$domain =~ /^\./ && &usage($text{'setup_edomain'});
221-
$domain =~ /\.$/ && &usage($text{'setup_edomain'});
219+
$domain = lc(&parse_domain_name($domain));
220+
$err = &valid_domain_name($domain);
221+
&usage($err) if ($err);
222222
&lock_domain_name($domain);
223223
foreach $d (&list_domains()) {
224224
usage($text{'setup_edomain2'}) if (lc($d->{'dom'}) eq lc($domain));

Diff for: domain_setup.cgi

+3-4
Original file line numberDiff line numberDiff line change
@@ -43,10 +43,9 @@ if ($in{'subdom'}) {
4343
&error(&text('setup_emax', $dmax)) if ($dleft == 0);
4444

4545
# Validate inputs (check domain name to see if in use)
46-
$in{'dom'} =~ /^[A-Za-z0-9\.\-]+$/ || &error($text{'setup_edomain'});
47-
$in{'dom'} =~ /^\./ && &error($text{'setup_edomain'});
48-
$in{'dom'} =~ /\.$/ && &error($text{'setup_edomain'});
49-
$in{'dom'} = lc($in{'dom'});
46+
$in{'dom'} = lc(&parse_domain_name($in{'dom'}));
47+
$err = &valid_domain_name($in{'dom'});
48+
&error($err) if ($err);
5049
&lock_domain_name($in{'dom'});
5150
if ($subdom) {
5251
# Append super-domain

Diff for: edit_domain.cgi

+8-2
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,15 @@ print &ui_hidden_table_start($text{'edit_header'}, "width=100%", 4,
3030
"basic", 1);
3131

3232
# Domain name, with link
33+
$dname = &show_domain_name($d);
3334
print &ui_table_row($text{'edit_domain'},
34-
$d->{'web'} ? "<tt><a href=http://$d->{'dom'}/>$d->{'dom'}</a></tt>"
35-
: "<tt>$d->{'dom'}</tt>", undef, \@tds);
35+
$d->{'web'} ? "<tt><a href=http://$d->{'dom'}/>$dname</a></tt>"
36+
: "<tt>$dname</tt>", undef, \@tds);
37+
38+
if ($dname ne $d->{'dom'}) {
39+
print &ui_table_row($text{'edit_xndomain'},
40+
"<tt>$d->{'dom'}</tt>");
41+
}
3642

3743
# Username
3844
print &ui_table_row($text{'edit_user'},

Diff for: lang/en

+4-1
Original file line numberDiff line numberDiff line change
@@ -295,7 +295,9 @@ form_proxysect=IP address and forwarding
295295

296296
setup_err=Failed to create virtual server
297297
setup_evital=Critial feature $1 was not properly created - Virtual server creation halted.
298-
setup_edomain=Missing or invalid domain name
298+
setup_edomain=Missing or invalid domain name - only letters, numbers and the following characters are allowed : . - _
299+
setup_edomain2=Domain names cannot start or end with a .
300+
setup_edomain3=The final component of a domain name cannot contain non-english letters
299301
setup_esubdomain=Invalid sub-domain name - no dots are allowed
300302
setup_edomain2=You are already hosting this domain
301303
setup_eip=Missing or invalid IP address
@@ -800,6 +802,7 @@ edit_header=Virtual server details
800802
edit_headerc=Configurable settings
801803
edit_headers=Related virtual servers
802804
edit_domain=Domain name
805+
edit_xndomain=Real DNS domain name
803806
edit_user=Administration username
804807
edit_group=Administration group
805808
edit_nogroup=None created

Diff for: list-domains.pl

+4
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,10 @@ package virtual_server;
9292
print " Type: ",($d->{'alias'} ? "Alias" :
9393
$d->{'parent'} ? "Sub-server" :
9494
"Top-level server"),"\n";
95+
$dname = &show_domain_name($d);
96+
if ($dname ne $d->{'dom'}) {
97+
print " International domain name: $dname\n";
98+
}
9599
if ($d->{'alias'}) {
96100
$aliasdom = &get_domain_by("id", $d->{'alias'});
97101
print " Real domain: $aliasdom->{'dom'}\n";

Diff for: mass_create.cgi

+4-3
Original file line numberDiff line numberDiff line change
@@ -58,16 +58,17 @@ foreach $line (@lines) {
5858
$lnum++;
5959
next if ($line !~ /\S/);
6060
local ($dname, $owner, $pass, $user, $pname, $ip, $aname) = split(/:/, $line, -1);
61-
$dname = lc($dname);
61+
$dname = lc(&parse_domain_name($dname));
6262
$user = lc($user);
6363

6464
# Validate domain details
6565
if (!$dname || !$owner) {
6666
&line_error($text{'cmass_edname'});
6767
next;
6868
}
69-
if ($dname !~ /^[A-Za-z0-9\.\-]+$/) {
70-
&line_error($text{'setup_edomain'});
69+
$err = &valid_domain_name($dname);
70+
if ($err) {
71+
&line_error($err);
7172
next;
7273
}
7374
if ($owner =~ /:/) {

Diff for: rename.cgi

+3-2
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,9 @@ $d = &get_domain($in{'dom'});
1010
&can_rename_domains() || &error($text{'rename_ecannot'});
1111

1212
# Validate inputs
13-
$in{'new'} =~ /^[A-Za-z0-9\.\-]+$/ || &error($text{'rename_enew'});
14-
$in{'new'} = lc($in{'new'});
13+
$in{'new'} = lc(&parse_domain_name($in{'new'}));
14+
$err = &valid_domain_name($in{'new'});
15+
&error($err) if ($err);
1516
$newdom = $in{'new'} ne $d->{'dom'} ? 1 : 0;
1617
if (!$d->{'parent'} && &can_rename_domains() == 2 &&
1718
($in{'user_mode'} == 2 || $newdom)) {

0 commit comments

Comments
 (0)