求助,谁能用 Perl 写一个基于正向或逆向最大匹配算法的分词程序??
转贴一个:
print "Loading Dictionary...\n";
open(FileIn,"word.txt");
$MaxLen=0;
while($Line=<FileIn>){
chop($Line);
$MapDict{$Line}=length($Line);
if ( length($Line) > $MaxLen ){
$MaxLen=length($Line);
}
}
close(FileIn);
print "请输入要切分的句子!\n(press \"exit\" or \"q\" to quit!)\n=>";
while (){
$Sent=<STDIN>;
chop($Sent);
if ( $Sent eq 'exit' || $Sent eq 'q' ){
exit;
}
$Result=Segment($Sent);
print "$Result\n";
print "=>";
}
sub Segment
{
my ($Input)=@_;
my $Segemted=();
my $Remained=$Input;
while ( length($Remained) > 0 ){
$Match=0;
for($i=$MaxLen;$i>1;$i-- ){
$MatchString=substr($Remained,0,$i);
if ( defined $MapDict{$MatchString} ){
$Segemted.=$MatchString;
$Segemted.='/';
$Remained=substr($Remained,$i,length($Remained)-$i);
$Match=1;
last;
}
}
if ( $Match == 0 ){
if ( ord($Remained) &0x80 ){
$Len=2;
}else{
$Len=1;
}
$Segemted.=substr($Remained,0,$Len);
$Segemted.='/';
$Remained=substr($Remained,$Len,length($Remained)-$Len);
}
}
return $Segemted;
}