Logoj0ke.net Open Build Service > Projects > home:jg > hunspell > wordlist2hunspell
Sign Up | Log In

File wordlist2hunspell of Package hunspell

x
 
1
#!/bin/sh
2
# 
3
#    (C) 2008 Caolán McNamara <caolanm@redhat.com>
4
# 
5
#    This program is free software; you can redistribute it and/or modify
6
#    it under the terms of the GNU General Public License as published by
7
#    the Free Software Foundation; either version 2 of the License, or
8
#    (at your option) any later version.
9
#
10
#    This program is distributed in the hope that it will be useful,
11
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
12
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
#    GNU General Public License for more details.
14
#
15
#    You should have received a copy of the GNU General Public License
16
#    along with this program; if not, write to the Free Software
17
#    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18
19
# This creates a LANG_TERRITORY .aff & .dic from a wordlist. 
20
# It is only a simple wordlist spellchecking dictionary output, no
21
# knowledge of language rules can be extrapolated to shrink the
22
# wordlist or provide .aff rules for extending wordstems
23
24
if [ $# -lt 2 ]; then
25
    echo "Usage: wordlist2hunspell wordlist_file locale"
26
    echo "e.g. wordlist2hunspell breton.words br_FR to create br_FR.dic and br_FR.aff in cwd"
27
    exit 1
28
fi
29
30
export LANG=$2.utf8
31
echo "# A basic .aff for a raw wordlist, created through wordlist2hunspell" > $2.aff
32
echo SET UTF-8 >> $2.aff
33
#see https://bugzilla.redhat.com/show_bug.cgi?id=462184 for the "C" hacks
34
echo TRY `sed 's/./&\n/g' $1 | sed '/^$/d' | LC_ALL=C sort -n | LC_ALL=C uniq -c | LC_ALL=C sort -rn | tr -s ' ' | cut -d ' ' -f 3 | tr -d '\n'` >> $2.aff
35
cat $1 | sed '/^$/d' | wc -l > $2.dic
36
LC_ALL=C sort $1 | sed '/^$/d' >> $2.dic
37
38
echo Basic $2.dic and $2.aff created
39