1--- misc/mythes-1.2.4/th_gen_idx.pl 2010-06-16 06:37:34 2+++ misc/build/mythes-1.2.4/th_gen_idx.pl 2026-06-11 08:10:31 3@@ -1,11 +1,26 @@ 4-#!/usr/bin/perl 5- 6-# perl program to take a thesaurus structured text data file 7-# and create the proper sorted index file (.idx) 8+: 9+eval 'exec perl -wS $0 ${1+"$@"}' 10+ if 0; 11+#************************************************************** 12 # 13-# typically invoked as follows: 14-# cat th_en_US_new.dat | ./th_gen_idx.pl > th_en_US_new.idx 15+# Licensed to the Apache Software Foundation (ASF) under one 16+# or more contributor license agreements. See the NOTICE file 17+# distributed with this work for additional information 18+# regarding copyright ownership. The ASF licenses this file 19+# to you under the Apache License, Version 2.0 (the 20+# "License"); you may not use this file except in compliance 21+# with the License. You may obtain a copy of the License at 22 # 23+# http://www.apache.org/licenses/LICENSE-2.0 24+# 25+# Unless required by applicable law or agreed to in writing, 26+# software distributed under the License is distributed on an 27+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 28+# KIND, either express or implied. See the License for the 29+# specific language governing permissions and limitations 30+# under the License. 31+# 32+#************************************************************** 33 34 sub by_entry { 35 my ($aent, $aoff) = split('\|',$a); 36@@ -13,6 +28,27 @@ 37 $aent cmp $bent; 38 } 39 40+#FIXME: someone may want "infile" or even parameter parsing 41+sub get_outfile { 42+ my $next_is_file = 0; 43+ foreach ( @ARGV ) { 44+ if ( $next_is_file ) { 45+ return $_ 46+ } 47+ if ( $_ eq "-o" ) { 48+ $next_is_file = 1; 49+ } 50+ } 51+ return ""; 52+} 53+ 54+sub usage { 55+ print "usage:\n"; 56+ print "$0 -o outfile < input\n"; 57+ 58+ exit 99; 59+} 60+ 61 # main routine 62 my $ne = 0; # number of entries in index 63 my @tindex=(); # the index itself 64@@ -24,7 +60,11 @@ 65 my $meaning=""; # current meaning and synonyms 66 my $p; # misc uses 67 my $encoding; # encoding used by text file 68+my $outfile = ""; 69 70+$outfile = get_outfile(); 71+usage() if ( $outfile eq "" ); 72+ 73 # top line of thesaurus provides encoding 74 $encoding=<STDIN>; 75 $foffset = $foffset + length($encoding); 76@@ -51,9 +91,13 @@ 77 # now we have all of the information 78 # so sort it and then output the encoding, count and index data 79 @tindex = sort by_entry @tindex; 80-print STDOUT "$encoding\n"; 81-print STDOUT "$ne\n"; 82+ 83+print "$outfile\n"; 84+open OUTFILE, ">$outfile" or die "ERROR: Can't open $outfile for writing!"; 85+print OUTFILE "$encoding\n"; 86+print OUTFILE "$ne\n"; 87 foreach $one (@tindex) { 88- print STDOUT "$one\n"; 89+ print OUTFILE "$one\n"; 90 } 91+close OUTFILE; 92 93