2 # by ChaosLawful@SMTH at 2006-3-23
6 use Fcntl ':seek'; # for constants SEEK_*
8 my @ncce_dicts=("ec","ce");
10 for my $ncce_dict (@ncce_dicts) {
12 my $idxFH=new FileHandle("$ncce_dict.idx") or die;
13 my $libFH=new FileHandle("$ncce_dict.lib") or die;
14 my $outFH=new FileHandle(">ncce_$ncce_dict.tab") or die;
19 sysread($idxFH,$buf,4);
20 my ($totalRecord)=unpack("L",$buf); # got total record number
22 for my $idxNo (1..$totalRecord) {
23 seek($idxFH,$idxNo*4,SEEK_SET); # find offset
24 sysread($idxFH,$buf,4);
26 my ($off)=unpack("L",$buf); # seek into lib file
27 seek($libFH,$off,SEEK_SET);
29 # and read corresponding entry record
34 $buf=pack "C*",map $_+0x1e,unpack "C*",$buf; # decrypt record
37 my @fields=split(/\x1e/,$buf); # split entry into word and explanation
38 toTextDict($outFH,$fields[0],$fields[1]); # output
44 my ($fh,$word,$explain)=@_;
45 # Kingsoft custom dictionary's export format:
46 # every line contains one entry, whose format is:
47 # <word>|<explanation>
48 # where <explanation>:=<literal>[\r\n<explanation>]
50 # strip leading and trailing spaces, squeeze inner spaces
59 # split NCCE entry into multiple explanations, optional
60 $explain=~s/;\s*/\\n/gs;
61 # convert fullwidth comma between alphadigits into halfwidth comma, optional
62 $word=~s/\xa3\xac(?=\w)/,/gs;
63 $explain=~s/\xa3\xac(?=\w)/,/gs;
65 $word=encode("utf-8",decode("cp936",$word));
66 $explain=encode("utf-8",decode("cp936",$explain));
67 print $fh "$word\t$explain\n";