[ Index ] |
PHP Cross Reference of Unnamed Project |
[Summary view] [Print] [Text view]
1 # 2 # $Id: UTF7.pm,v 2.4 2006/06/03 20:28:48 dankogai Exp $ 3 # 4 package Encode::Unicode::UTF7; 5 use strict; 6 use warnings; 7 no warnings 'redefine'; 8 use base qw(Encode::Encoding); 9 __PACKAGE__->Define('UTF-7'); 10 our $VERSION = do { my @r = ( q$Revision: 2.4 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; 11 use MIME::Base64; 12 use Encode; 13 14 # 15 # Algorithms taken from Unicode::String by Gisle Aas 16 # 17 18 our $OPTIONAL_DIRECT_CHARS = 1; 19 my $specials = quotemeta "\'(),-./:?"; 20 $OPTIONAL_DIRECT_CHARS 21 and $specials .= quotemeta "!\"#$%&*;<=>@[]^_`{|}"; 22 23 # \s will not work because it matches U+3000 DEOGRAPHIC SPACE 24 # We use qr/[\n\r\t\ ] instead 25 my $re_asis = qr/(?:[\n\r\t\ A-Za-z0-9$specials])/; 26 my $re_encoded = qr/(?:[^\n\r\t\ A-Za-z0-9$specials])/; 27 my $e_utf16 = find_encoding("UTF-16BE"); 28 29 sub needs_lines { 1 } 30 31 sub encode($$;$) { 32 my ( $obj, $str, $chk ) = @_; 33 my $len = length($str); 34 pos($str) = 0; 35 my $bytes = ''; 36 while ( pos($str) < $len ) { 37 if ( $str =~ /\G($re_asis+)/ogc ) { 38 $bytes .= $1; 39 } 40 elsif ( $str =~ /\G($re_encoded+)/ogsc ) { 41 if ( $1 eq "+" ) { 42 $bytes .= "+-"; 43 } 44 else { 45 my $s = $1; 46 my $base64 = encode_base64( $e_utf16->encode($s), '' ); 47 $base64 =~ s/=+$//; 48 $bytes .= "+$base64-"; 49 } 50 } 51 else { 52 die "This should not happen! (pos=" . pos($str) . ")"; 53 } 54 } 55 $_[1] = '' if $chk; 56 return $bytes; 57 } 58 59 sub decode($$;$) { 60 my ( $obj, $bytes, $chk ) = @_; 61 my $len = length($bytes); 62 my $str = ""; 63 no warnings 'uninitialized'; 64 while ( pos($bytes) < $len ) { 65 if ( $bytes =~ /\G([^+]+)/ogc ) { 66 $str .= $1; 67 } 68 elsif ( $bytes =~ /\G\+-/ogc ) { 69 $str .= "+"; 70 } 71 elsif ( $bytes =~ /\G\+([A-Za-z0-9+\/]+)-?/ogsc ) { 72 my $base64 = $1; 73 my $pad = length($base64) % 4; 74 $base64 .= "=" x ( 4 - $pad ) if $pad; 75 $str .= $e_utf16->decode( decode_base64($base64) ); 76 } 77 elsif ( $bytes =~ /\G\+/ogc ) { 78 $^W and warn "Bad UTF7 data escape"; 79 $str .= "+"; 80 } 81 else { 82 die "This should not happen " . pos($bytes); 83 } 84 } 85 $_[1] = '' if $chk; 86 return $str; 87 } 88 1; 89 __END__ 90 91 =head1 NAME 92 93 Encode::Unicode::UTF7 -- UTF-7 encoding 94 95 =head1 SYNOPSIS 96 97 use Encode qw/encode decode/; 98 $utf7 = encode("UTF-7", $utf8); 99 $utf8 = decode("UTF-7", $ucs2); 100 101 =head1 ABSTRACT 102 103 This module implements UTF-7 encoding documented in RFC 2152. UTF-7, 104 as its name suggests, is a 7-bit re-encoded version of UTF-16BE. It 105 is designed to be MTA-safe and expected to be a standard way to 106 exchange Unicoded mails via mails. But with the advent of UTF-8 and 107 8-bit compliant MTAs, UTF-7 is hardly ever used. 108 109 UTF-7 was not supported by Encode until version 1.95 because of that. 110 But Unicode::String, a module by Gisle Aas which adds Unicode supports 111 to non-utf8-savvy perl did support UTF-7, the UTF-7 support was added 112 so Encode can supersede Unicode::String 100%. 113 114 =head1 In Practice 115 116 When you want to encode Unicode for mails and web pages, however, do 117 not use UTF-7 unless you are sure your recipients and readers can 118 handle it. Very few MUAs and WWW Browsers support these days (only 119 Mozilla seems to support one). For general cases, use UTF-8 for 120 message body and MIME-Header for header instead. 121 122 =head1 SEE ALSO 123 124 L<Encode>, L<Encode::Unicode>, L<Unicode::String> 125 126 RFC 2781 L<http://www.ietf.org/rfc/rfc2152.txt> 127 128 =cut
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated: Tue Mar 17 22:47:18 2015 | Cross-referenced by PHPXref 0.7.1 |