Started work on IDN support

jcameron · jcameron · commit 170738f283f7 · 2008-04-12T20:12:13.000Z
diff --git a/IDEAS b/IDEAS
@@ -71,4 +71,5 @@
 	- IDNA::Punycode
 	- Convert at create time, and when displaying?
 	- Has to be done on a per-domain-component basis
-
+	- Call valid_domain_name everywhere
+	- All pages that show domain name have to be updated
diff --git a/IDNA/Punycode.pm b/IDNA/Punycode.pm
@@ -0,0 +1,240 @@
+package IDNA::Punycode;
+
+use strict;
+our $VERSION = 0.03;
+
+require Exporter;
+our @ISA	= qw(Exporter);
+our @EXPORT = qw(encode_punycode decode_punycode idn_prefix);
+
+use integer;
+
+our $DEBUG = 0;
+our $PREFIX = 'xn--';
+
+use constant BASE => 36;
+use constant TMIN => 1;
+use constant TMAX => 26;
+use constant SKEW => 38;
+use constant DAMP => 700;
+use constant INITIAL_BIAS => 72;
+use constant INITIAL_N => 128;
+
+my $Delimiter = chr 0x2D;
+my $BasicRE   = qr/[\x00-\x7f]/;
+
+sub _croak { require Carp; Carp::croak(@_); }
+
+sub idn_prefix {
+	$PREFIX = shift;
+}
+
+sub digit_value {
+	my $code = shift;
+	return ord($code) - ord("A") if $code =~ /[A-Z]/;
+	return ord($code) - ord("a") if $code =~ /[a-z]/;
+	return ord($code) - ord("0") + 26 if $code =~ /[0-9]/;
+	return;
+}
+
+sub code_point {
+	my $digit = shift;
+	return $digit + ord('a') if 0 <= $digit && $digit <= 25;
+	return $digit + ord('0') - 26 if 26 <= $digit && $digit <= 36;
+	die 'NOT COME HERE';
+}
+
+sub adapt {
+	my($delta, $numpoints, $firsttime) = @_;
+	$delta = $firsttime ? $delta / DAMP : $delta / 2;
+	$delta += $delta / $numpoints;
+	my $k = 0;
+	while ($delta > ((BASE - TMIN) * TMAX) / 2) {
+		$delta /= BASE - TMIN;
+		$k += BASE;
+	}
+	return $k + (((BASE - TMIN + 1) * $delta) / ($delta + SKEW));
+}
+
+sub decode_punycode {
+	my $code = shift;
+
+	my $n	  = INITIAL_N;
+	my $i	  = 0;
+	my $bias   = INITIAL_BIAS;
+	my @output;
+
+	if ($PREFIX) {
+		if ($code !~ /^$PREFIX/) {
+			return $code;
+		}
+		$code =~ s/^$PREFIX//;
+	}
+
+	if ($code =~ s/(.*)$Delimiter//o) {
+		push @output, map ord, split //, $1;
+		return _croak('non-basic code point') unless $1 =~ /^$BasicRE*$/o;
+	}
+
+	while ($code) {
+		my $oldi = $i;
+		my $w	= 1;
+		LOOP:
+		for (my $k = BASE; 1; $k += BASE) {
+			my $cp = substr($code, 0, 1, '');
+			my $digit = digit_value($cp);
+			defined $digit or return _croak("invalid punycode input");
+			$i += $digit * $w;
+			my $t = ($k <= $bias) ? TMIN
+			: ($k >= $bias + TMAX) ? TMAX : $k - $bias;
+			last LOOP if $digit < $t;
+			$w *= (BASE - $t);
+		}
+		$bias = adapt($i - $oldi, @output + 1, $oldi == 0);
+		warn "bias becomes $bias" if $DEBUG;
+		$n += $i / (@output + 1);
+		$i = $i % (@output + 1);
+		splice(@output, $i, 0, $n);
+		warn join " ", map sprintf('%04x', $_), @output if $DEBUG;
+		$i++;
+	}
+	return join '', map chr, @output;
+}
+
+sub encode_punycode {
+	my $input = shift;
+	# my @input = split //, $input; # doesn't work in 5.6.x!
+	my @input = map substr($input, $_, 1), 0..length($input)-1;
+
+	my $n	 = INITIAL_N;
+	my $delta = 0;
+	my $bias  = INITIAL_BIAS;
+
+	my @output;
+	my @basic = grep /$BasicRE/, @input;
+	my $h = my $b = @basic;
+	#push @output, @basic, $Delimiter if $b > 0;
+	push @output, @basic if $b > 0;
+	warn "basic codepoints: (@output)" if $DEBUG;
+
+	if ($h < @input) {
+		$PREFIX && unshift(@output, $PREFIX);
+		push(@output, $Delimiter);
+	} else {
+		return join '', @output;
+	}
+
+	while ($h < @input) {
+		my $m = min(grep { $_ >= $n } map ord, @input);
+		warn sprintf "next code point to insert is %04x", $m if $DEBUG;
+		$delta += ($m - $n) * ($h + 1);
+		$n = $m;
+		for my $i (@input) {
+			my $c = ord($i);
+			$delta++ if $c < $n;
+			if ($c == $n) {
+				my $q = $delta;
+				LOOP:
+				for (my $k = BASE; 1; $k += BASE) {
+					my $t = ($k <= $bias) ? TMIN :
+					($k >= $bias + TMAX) ? TMAX : $k - $bias;
+					last LOOP if $q < $t;
+					my $cp = code_point($t + (($q - $t) % (BASE - $t)));
+					push @output, chr($cp);
+					$q = ($q - $t) / (BASE - $t);
+				}
+				push @output, chr(code_point($q));
+				$bias = adapt($delta, $h + 1, $h == $b);
+				warn "bias becomes $bias" if $DEBUG;
+				$delta = 0;
+				$h++;
+			}
+		}
+		$delta++;
+		$n++;
+	}
+	return join '', @output;
+}
+
+sub min {
+	my $min = shift;
+	for (@_) { $min = $_ if $_ <= $min }
+	return $min;
+}
+
+1;
+__END__
+
+=head1 NAME
+
+IDNA::Punycode - encodes Unicode string in Punycode
+
+=head1 SYNOPSIS
+
+  use IDNA::Punycode;
+  idn_prefix('xn--');
+  $punycode = encode_punycode($unicode);
+  $unicode  = decode_punycode($punycode);
+
+=head1 DESCRIPTION
+
+IDNA::Punycode is a module to encode / decode Unicode strings into
+Punycode, an efficient encoding of Unicode for use with IDNA.
+
+This module requires Perl 5.6.0 or over to handle UTF8 flagged Unicode
+strings.
+
+=head1 FUNCTIONS
+
+This module exports following functions by default.
+
+=over 4
+
+=item encode_punycode
+
+  $punycode = encode_punycode($unicode);
+
+takes Unicode string (UTF8-flagged variable) and returns Punycode
+encoding for it.
+
+=item decode_punycode
+
+  $unicode = decode_punycode($punycode)
+
+takes Punycode encoding and returns original Unicode string.
+
+=item idn_prefix
+
+  idn_prefix($prefix);
+
+causes encode_punycode() to add $prefix to ACE-string after conversion.
+As a side-effect decode_punycode() will only consider strings
+beginning with $prefix as punycode representations.
+
+According to RFC 3490 the ACE prefix "xn--" had been chosen as the
+standard.  Thus, "xn--" is also the default ACE prefix.  For compatibility
+I'm leaving idn_prefix() in the module.  Use C<idn_prefix(undef)> to
+get the old behaviour.
+
+=back
+
+These functions throws exceptionsn on failure. You can catch 'em via
+C<eval>.
+
+=head1 AUTHORS
+
+Tatsuhiko Miyagawa E<lt>miyagawa@bulknews.netE<gt> is the original
+author and wrote almost all the code.
+
+Robert Urban E<lt>urban@UNIX-Beratung.deE<gt> added C<idn_prefix()>.
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=head1 SEE ALSO
+
+http://www.ietf.org/internet-drafts/draft-ietf-idn-punycode-01.txt
+
+L<Encode::Punycode>
+
+=cut
diff --git a/create-domain.pl b/create-domain.pl
@@ -27,7 +27,7 @@ package virtual_server;
 while(@ARGV > 0) {
 	local $a = shift(@ARGV);
 	if ($a eq "--domain") {
-		$domain = lc(shift(@ARGV));
+		$domain = shift(@ARGV);
 		}
 	elsif ($a eq "--desc") {
 		$owner = shift(@ARGV);
@@ -216,9 +216,9 @@ package virtual_server;
 	}
 
 # Validate args and work out defaults for those unset
-$domain =~ /^[A-Za-z0-9\.\-]+$/ || &usage($text{'setup_edomain'});
-$domain =~ /^\./ && &usage($text{'setup_edomain'});
-$domain =~ /\.$/ && &usage($text{'setup_edomain'});
+$domain = lc(&parse_domain_name($domain));
+$err = &valid_domain_name($domain);
+&usage($err) if ($err);
 &lock_domain_name($domain);
 foreach $d (&list_domains()) {
         usage($text{'setup_edomain2'}) if (lc($d->{'dom'}) eq lc($domain));
diff --git a/domain_setup.cgi b/domain_setup.cgi
@@ -43,10 +43,9 @@ if ($in{'subdom'}) {
 &error(&text('setup_emax', $dmax)) if ($dleft == 0);
 
 # Validate inputs (check domain name to see if in use)
-$in{'dom'} =~ /^[A-Za-z0-9\.\-]+$/ || &error($text{'setup_edomain'});
-$in{'dom'} =~ /^\./ && &error($text{'setup_edomain'});
-$in{'dom'} =~ /\.$/ && &error($text{'setup_edomain'});
-$in{'dom'} = lc($in{'dom'});
+$in{'dom'} = lc(&parse_domain_name($in{'dom'}));
+$err = &valid_domain_name($in{'dom'});
+&error($err) if ($err);
 &lock_domain_name($in{'dom'});
 if ($subdom) {
 	# Append super-domain
diff --git a/edit_domain.cgi b/edit_domain.cgi
@@ -30,9 +30,15 @@ print &ui_hidden_table_start($text{'edit_header'}, "width=100%", 4,
 			     "basic", 1);
 
 # Domain name, with link
+$dname = &show_domain_name($d);
 print &ui_table_row($text{'edit_domain'},
-	$d->{'web'} ? "<tt><a href=http://$d->{'dom'}/>$d->{'dom'}</a></tt>"
-		    : "<tt>$d->{'dom'}</tt>", undef, \@tds);
+	$d->{'web'} ? "<tt><a href=http://$d->{'dom'}/>$dname</a></tt>"
+		    : "<tt>$dname</tt>", undef, \@tds);
+
+if ($dname ne $d->{'dom'}) {
+	print &ui_table_row($text{'edit_xndomain'},
+		"<tt>$d->{'dom'}</tt>");
+	}
 
 # Username
 print &ui_table_row($text{'edit_user'},
diff --git a/lang/en b/lang/en
@@ -295,7 +295,9 @@ form_proxysect=IP address and forwarding
 
 setup_err=Failed to create virtual server
 setup_evital=Critial feature $1 was not properly created - Virtual server creation halted.
-setup_edomain=Missing or invalid domain name
+setup_edomain=Missing or invalid domain name - only letters, numbers and the following characters are allowed : . - _
+setup_edomain2=Domain names cannot start or end with a .
+setup_edomain3=The final component of a domain name cannot contain non-english letters
 setup_esubdomain=Invalid sub-domain name - no dots are allowed
 setup_edomain2=You are already hosting this domain
 setup_eip=Missing or invalid IP address
@@ -800,6 +802,7 @@ edit_header=Virtual server details
 edit_headerc=Configurable settings
 edit_headers=Related virtual servers
 edit_domain=Domain name
+edit_xndomain=Real DNS domain name
 edit_user=Administration username
 edit_group=Administration group
 edit_nogroup=None created
diff --git a/list-domains.pl b/list-domains.pl
@@ -92,6 +92,10 @@ package virtual_server;
 		print "    Type: ",($d->{'alias'} ? "Alias" :
 				    $d->{'parent'} ? "Sub-server" :
 						     "Top-level server"),"\n";
+		$dname = &show_domain_name($d);
+		if ($dname ne $d->{'dom'}) {
+			print "    International domain name: $dname\n";
+			}
 		if ($d->{'alias'}) {
 			$aliasdom = &get_domain_by("id", $d->{'alias'});
 			print "    Real domain: $aliasdom->{'dom'}\n";
diff --git a/mass_create.cgi b/mass_create.cgi
@@ -58,16 +58,17 @@ foreach $line (@lines) {
 	$lnum++;
 	next if ($line !~ /\S/);
 	local ($dname, $owner, $pass, $user, $pname, $ip, $aname) = split(/:/, $line, -1);
-	$dname = lc($dname);
+	$dname = lc(&parse_domain_name($dname));
 	$user = lc($user);
 
 	# Validate domain details
 	if (!$dname || !$owner) {
 		&line_error($text{'cmass_edname'});
 		next;
 		}
-	if ($dname !~ /^[A-Za-z0-9\.\-]+$/) {
-		&line_error($text{'setup_edomain'});
+	$err = &valid_domain_name($dname);
+	if ($err) {
+		&line_error($err);
 		next;
 		}
 	if ($owner =~ /:/) {
diff --git a/rename.cgi b/rename.cgi
@@ -10,8 +10,9 @@ $d = &get_domain($in{'dom'});
 &can_rename_domains() || &error($text{'rename_ecannot'});
 
 # Validate inputs
-$in{'new'} =~ /^[A-Za-z0-9\.\-]+$/ || &error($text{'rename_enew'});
-$in{'new'} = lc($in{'new'});
+$in{'new'} = lc(&parse_domain_name($in{'new'}));
+$err = &valid_domain_name($in{'new'});
+&error($err) if ($err);
 $newdom = $in{'new'} ne $d->{'dom'} ? 1 : 0;
 if (!$d->{'parent'} && &can_rename_domains() == 2 &&
     ($in{'user_mode'} == 2 || $newdom)) {
diff --git a/virtual-server-lib-funcs.pl b/virtual-server-lib-funcs.pl