xref: /AOO41X/main/i18npool/source/localedata/data/currency-check.awk (revision cdf0e10c4e3984b49a9502b011690b615761d4a3)
1*cdf0e10cSrcweir#!/usr/bin/gawk -f
2*cdf0e10cSrcweir# Usage: gawk -f currency-check.awk *.xml
3*cdf0e10cSrcweir# Check any
4*cdf0e10cSrcweir# <FormatCode>...[$xxx-...]...</FormatCode>
5*cdf0e10cSrcweir# against every
6*cdf0e10cSrcweir# <CurrencySymbol>xxx</CurrencySymbol>
7*cdf0e10cSrcweir# definition of the same XML file and output symbols if no match was found.
8*cdf0e10cSrcweir# For formatindex="12" to formatindex="15" and for formatindex="17" it is
9*cdf0e10cSrcweir# checked if the used currency symbol is the usedInCompatibleFormatCodes
10*cdf0e10cSrcweir# currency symbol as it is needed by the number formatter.
11*cdf0e10cSrcweir# Also generates output if the generic currency symbol (UTF8 string 0xC2A4)
12*cdf0e10cSrcweir# is used instead of a real currency symbol.
13*cdf0e10cSrcweir# Author: Eike Rathke <er@openoffice.org>
14*cdf0e10cSrcweir
15*cdf0e10cSrcweirBEGIN {
16*cdf0e10cSrcweir    file = ""
17*cdf0e10cSrcweir}
18*cdf0e10cSrcweir
19*cdf0e10cSrcweir
20*cdf0e10cSrcweirfile != FILENAME {
21*cdf0e10cSrcweir    if ( file )
22*cdf0e10cSrcweir        checkIt()
23*cdf0e10cSrcweir    file = FILENAME
24*cdf0e10cSrcweir    line = 0
25*cdf0e10cSrcweir    nFormats = 0
26*cdf0e10cSrcweir    nCurrencies = 0
27*cdf0e10cSrcweir    bFormatAuto = 0
28*cdf0e10cSrcweir    sReplaceFrom = ""
29*cdf0e10cSrcweir    sReplaceTo = ""
30*cdf0e10cSrcweir    sMatchReplace = ""
31*cdf0e10cSrcweir    sRefCurrencyFromLocale = ""
32*cdf0e10cSrcweir    crlf = 0
33*cdf0e10cSrcweir}
34*cdf0e10cSrcweir
35*cdf0e10cSrcweir{
36*cdf0e10cSrcweir    ++line
37*cdf0e10cSrcweir    # If run under Unix a CrLf spoils ...$ line end checks. DOS line endings
38*cdf0e10cSrcweir    # are boo anyways.
39*cdf0e10cSrcweir    if ( /\x0D$/ )
40*cdf0e10cSrcweir    {
41*cdf0e10cSrcweir        print "Error: not Unix line ending in line " line
42*cdf0e10cSrcweir        crlf = 1
43*cdf0e10cSrcweir        exit(1)
44*cdf0e10cSrcweir    }
45*cdf0e10cSrcweir    if ( $1 ~ /^<LC_FORMAT(>|$)/ )
46*cdf0e10cSrcweir    {
47*cdf0e10cSrcweir        if ( $0 ~ /replaceFrom="\[CURRENCY\]"/ )
48*cdf0e10cSrcweir        {
49*cdf0e10cSrcweir            sReplaceFrom = "\\[CURRENCY\\]"
50*cdf0e10cSrcweir            sMatchReplace = "^<FormatCode>.*" sReplaceFrom
51*cdf0e10cSrcweir        }
52*cdf0e10cSrcweir        for ( j=2; j<=NF; ++j )
53*cdf0e10cSrcweir        {
54*cdf0e10cSrcweir            if ( $j ~ /^replaceTo="/ )
55*cdf0e10cSrcweir            {
56*cdf0e10cSrcweir                l = 12
57*cdf0e10cSrcweir                if ( $j ~ />$/ )
58*cdf0e10cSrcweir                    ++l
59*cdf0e10cSrcweir                if ( $j ~ /\/>$/ )
60*cdf0e10cSrcweir                    ++l
61*cdf0e10cSrcweir                sReplaceTo = substr( $j, 12, length($j)-l )
62*cdf0e10cSrcweir            }
63*cdf0e10cSrcweir        }
64*cdf0e10cSrcweir    }
65*cdf0e10cSrcweir    else if ( $1 ~ /^<FormatElement(>|$)/ )
66*cdf0e10cSrcweir    {
67*cdf0e10cSrcweir        if ( $0 ~ /usage="CURRENCY"/ )
68*cdf0e10cSrcweir        {
69*cdf0e10cSrcweir            if ( $0 ~ /formatindex="1[23457]"/ )
70*cdf0e10cSrcweir                bFormatAuto = 1
71*cdf0e10cSrcweir            else
72*cdf0e10cSrcweir                bFormatAuto = 0
73*cdf0e10cSrcweir        }
74*cdf0e10cSrcweir    }
75*cdf0e10cSrcweir    else if ( $0 ~ /^[[:blank:]]*<FormatCode>.*\[\$.*-[0-9a-fA-F]+\]/ ||
76*cdf0e10cSrcweir            (sMatchReplace && $0 ~ sMatchReplace ) )
77*cdf0e10cSrcweir    {
78*cdf0e10cSrcweir        if ( sReplaceFrom )
79*cdf0e10cSrcweir            gsub( sReplaceFrom, sReplaceTo )
80*cdf0e10cSrcweir        split( $0, arr, /<|>/ )
81*cdf0e10cSrcweir        split( arr[3], code, /(\[\$)|(-[0-9a-fA-F]+\])/ )
82*cdf0e10cSrcweir        for ( j in code )
83*cdf0e10cSrcweir        {
84*cdf0e10cSrcweir            if ( code[j] && code[j] !~ /\#|0|\[NatNum/ )
85*cdf0e10cSrcweir            {
86*cdf0e10cSrcweir                FormatLine[nFormats] = file " line " line
87*cdf0e10cSrcweir                FormatAuto[nFormats] = bFormatAuto
88*cdf0e10cSrcweir                Formats[nFormats++] = code[j]
89*cdf0e10cSrcweir            }
90*cdf0e10cSrcweir        }
91*cdf0e10cSrcweir        bFormatAuto = 0
92*cdf0e10cSrcweir    }
93*cdf0e10cSrcweir    else if ( $1 ~ /^<LC_CURRENCY(>|$)/ )
94*cdf0e10cSrcweir    {
95*cdf0e10cSrcweir        for ( j=2; j<=NF; ++j )
96*cdf0e10cSrcweir        {
97*cdf0e10cSrcweir            if ( $j ~ /^ref="/ )
98*cdf0e10cSrcweir            {
99*cdf0e10cSrcweir                l = 6
100*cdf0e10cSrcweir                if ( $j ~ />$/ )
101*cdf0e10cSrcweir                    ++l
102*cdf0e10cSrcweir                if ( $j ~ /\/>$/ )
103*cdf0e10cSrcweir                    ++l
104*cdf0e10cSrcweir                locale = substr( $j, 6, length($j)-l )
105*cdf0e10cSrcweir                sRefCurrencyFromLocale = file
106*cdf0e10cSrcweir                oldfile = file
107*cdf0e10cSrcweir                oldline = line
108*cdf0e10cSrcweir                file = locale ".xml"
109*cdf0e10cSrcweir                line = 0
110*cdf0e10cSrcweir                while ( (getline <file) > 0 )
111*cdf0e10cSrcweir                {
112*cdf0e10cSrcweir                    ++line
113*cdf0e10cSrcweir                    getCurrencyParams()
114*cdf0e10cSrcweir                }
115*cdf0e10cSrcweir                close( file )
116*cdf0e10cSrcweir                if ( !line )
117*cdf0e10cSrcweir                    print "ref locale not available: " file \
118*cdf0e10cSrcweir                        " (from " oldfile " line " oldline ")"
119*cdf0e10cSrcweir                file = oldfile
120*cdf0e10cSrcweir                line = oldline
121*cdf0e10cSrcweir                sRefCurrencyFromLocale = ""
122*cdf0e10cSrcweir            }
123*cdf0e10cSrcweir        }
124*cdf0e10cSrcweir    }
125*cdf0e10cSrcweir    else
126*cdf0e10cSrcweir        getCurrencyParams()
127*cdf0e10cSrcweir}
128*cdf0e10cSrcweir
129*cdf0e10cSrcweir
130*cdf0e10cSrcweirEND {
131*cdf0e10cSrcweir    if ( file && !crlf )
132*cdf0e10cSrcweir        checkIt()
133*cdf0e10cSrcweir}
134*cdf0e10cSrcweir
135*cdf0e10cSrcweir
136*cdf0e10cSrcweirfunction getCurrencyParams() {
137*cdf0e10cSrcweir    # Assumes that each element is on a line on its own!
138*cdf0e10cSrcweir    if ( $1 ~ /^<Currency(>|$)/ )
139*cdf0e10cSrcweir    {
140*cdf0e10cSrcweir        if ( $0 ~ /default="true"/ )
141*cdf0e10cSrcweir            SymbolDefault[nCurrencies] = 1
142*cdf0e10cSrcweir        else
143*cdf0e10cSrcweir            SymbolDefault[nCurrencies] = 0
144*cdf0e10cSrcweir        if ( $0 ~ /usedInCompatibleFormatCodes="true"/ )
145*cdf0e10cSrcweir            SymbolCompati[nCurrencies] = 1
146*cdf0e10cSrcweir        else
147*cdf0e10cSrcweir            SymbolCompati[nCurrencies] = 0
148*cdf0e10cSrcweir    }
149*cdf0e10cSrcweir    else if ( $0 ~ /^[[:blank:]]*<CurrencyID>/ )
150*cdf0e10cSrcweir    {
151*cdf0e10cSrcweir        split( $0, arr, /<|>/ )
152*cdf0e10cSrcweir        if ( sRefCurrencyFromLocale )
153*cdf0e10cSrcweir            IDLine[nCurrencies] = file " line " line \
154*cdf0e10cSrcweir                " (referenced from " sRefCurrencyFromLocale ")"
155*cdf0e10cSrcweir        else
156*cdf0e10cSrcweir            IDLine[nCurrencies] = file " line " line
157*cdf0e10cSrcweir        IDs[nCurrencies] = arr[3]
158*cdf0e10cSrcweir    }
159*cdf0e10cSrcweir    else if ( $0 ~ /^[[:blank:]]*<CurrencySymbol>/ )
160*cdf0e10cSrcweir    {
161*cdf0e10cSrcweir        split( $0, arr, /<|>/ )
162*cdf0e10cSrcweir        if ( sRefCurrencyFromLocale )
163*cdf0e10cSrcweir            SymbolLine[nCurrencies] = file " line " line \
164*cdf0e10cSrcweir                " (referenced from " sRefCurrencyFromLocale ")"
165*cdf0e10cSrcweir        else
166*cdf0e10cSrcweir            SymbolLine[nCurrencies] = file " line " line
167*cdf0e10cSrcweir        Symbols[nCurrencies] = arr[3]
168*cdf0e10cSrcweir    }
169*cdf0e10cSrcweir    else if ( $0 ~ /^[[:blank:]]*<BankSymbol>/ )
170*cdf0e10cSrcweir    {
171*cdf0e10cSrcweir        split( $0, arr, /<|>/ )
172*cdf0e10cSrcweir        if ( sRefCurrencyFromLocale )
173*cdf0e10cSrcweir            BankSymbolLine[nCurrencies] = file " line " line \
174*cdf0e10cSrcweir                " (referenced from " sRefCurrencyFromLocale ")"
175*cdf0e10cSrcweir        else
176*cdf0e10cSrcweir            BankSymbolLine[nCurrencies] = file " line " line
177*cdf0e10cSrcweir        BankSymbols[nCurrencies] = arr[3]
178*cdf0e10cSrcweir    }
179*cdf0e10cSrcweir    else if ( $1 ~ /^<\/Currency>/ )
180*cdf0e10cSrcweir    {
181*cdf0e10cSrcweir        ++nCurrencies
182*cdf0e10cSrcweir    }
183*cdf0e10cSrcweir}
184*cdf0e10cSrcweir
185*cdf0e10cSrcweir
186*cdf0e10cSrcweirfunction checkIt() {
187*cdf0e10cSrcweir    bad = 0
188*cdf0e10cSrcweir    for ( j=0; j<nFormats; ++j )
189*cdf0e10cSrcweir    {
190*cdf0e10cSrcweir        state = FormatInSymbol( Formats[j] )
191*cdf0e10cSrcweir        if ( Formats[j] == "\xc2\xa4" )
192*cdf0e10cSrcweir        {
193*cdf0e10cSrcweir            bad = 1
194*cdf0e10cSrcweir            print "    bad: `" Formats[j] "'   (" FormatLine[j] ")"
195*cdf0e10cSrcweir        }
196*cdf0e10cSrcweir        else if ( state == 0 )
197*cdf0e10cSrcweir        {
198*cdf0e10cSrcweir            bad = 1
199*cdf0e10cSrcweir            print "unknown: `" Formats[j] "'   (" FormatLine[j] ")"
200*cdf0e10cSrcweir        }
201*cdf0e10cSrcweir        else if ( FormatAuto[j] && state < 2 )
202*cdf0e10cSrcweir        {
203*cdf0e10cSrcweir            bad = 1
204*cdf0e10cSrcweir            print "badauto: `" Formats[j] "'   (" FormatLine[j] ")"
205*cdf0e10cSrcweir        }
206*cdf0e10cSrcweir    }
207*cdf0e10cSrcweir    if ( bad )
208*cdf0e10cSrcweir    {
209*cdf0e10cSrcweir        for ( j=0; j<nCurrencies; ++j )
210*cdf0e10cSrcweir        {
211*cdf0e10cSrcweir            bDef = 0
212*cdf0e10cSrcweir            if ( Symbols[j] == "\xc2\xa4" )
213*cdf0e10cSrcweir                print "def bad: `" Symbols[j] "'   (" SymbolLine[j] ")"
214*cdf0e10cSrcweir            if ( SymbolDefault[j] )
215*cdf0e10cSrcweir            {
216*cdf0e10cSrcweir                bDef = 1
217*cdf0e10cSrcweir                print "default: `" Symbols[j] "'   (" SymbolLine[j] ")"
218*cdf0e10cSrcweir            }
219*cdf0e10cSrcweir            if ( SymbolCompati[j] )
220*cdf0e10cSrcweir            {
221*cdf0e10cSrcweir                bDef = 1
222*cdf0e10cSrcweir                print "compati: `" Symbols[j] "'   (" SymbolLine[j] ")"
223*cdf0e10cSrcweir            }
224*cdf0e10cSrcweir            if ( !bDef )
225*cdf0e10cSrcweir                print "defined: `" Symbols[j] "'   (" SymbolLine[j] ")"
226*cdf0e10cSrcweir        }
227*cdf0e10cSrcweir    }
228*cdf0e10cSrcweir    else
229*cdf0e10cSrcweir    {
230*cdf0e10cSrcweir        bHasDefault = 0
231*cdf0e10cSrcweir        bHasCompati = 0
232*cdf0e10cSrcweir        for ( j=0; j<nCurrencies; ++j )
233*cdf0e10cSrcweir        {
234*cdf0e10cSrcweir            if ( Symbols[j] == "\xc2\xa4" )
235*cdf0e10cSrcweir            {
236*cdf0e10cSrcweir                bad = 1
237*cdf0e10cSrcweir                print "def bad: `" Symbols[j] "'   (" SymbolLine[j] ")"
238*cdf0e10cSrcweir            }
239*cdf0e10cSrcweir            if ( SymbolDefault[j] )
240*cdf0e10cSrcweir            {
241*cdf0e10cSrcweir                if ( !bHasDefault )
242*cdf0e10cSrcweir                    bHasDefault = 1
243*cdf0e10cSrcweir                else
244*cdf0e10cSrcweir                {
245*cdf0e10cSrcweir                    bad = 1
246*cdf0e10cSrcweir                    print "dupe default: `" Symbols[j] "'   (" SymbolLine[j] ")"
247*cdf0e10cSrcweir                }
248*cdf0e10cSrcweir            }
249*cdf0e10cSrcweir            if ( SymbolCompati[j] )
250*cdf0e10cSrcweir            {
251*cdf0e10cSrcweir                if ( !bHasCompati )
252*cdf0e10cSrcweir                    bHasCompati = 1
253*cdf0e10cSrcweir                else
254*cdf0e10cSrcweir                {
255*cdf0e10cSrcweir                    bad = 1
256*cdf0e10cSrcweir                    print "dupe compati: `" Symbols[j] "'   (" SymbolLine[j] ")"
257*cdf0e10cSrcweir                }
258*cdf0e10cSrcweir            }
259*cdf0e10cSrcweir        }
260*cdf0e10cSrcweir        if ( !bHasDefault )
261*cdf0e10cSrcweir        {
262*cdf0e10cSrcweir            bad = 1
263*cdf0e10cSrcweir            print "  no default: (" file ")"
264*cdf0e10cSrcweir        }
265*cdf0e10cSrcweir        if ( !bHasCompati )
266*cdf0e10cSrcweir        {
267*cdf0e10cSrcweir            bad = 1
268*cdf0e10cSrcweir            print "  no compati: (" file ")"
269*cdf0e10cSrcweir        }
270*cdf0e10cSrcweir    }
271*cdf0e10cSrcweir    for ( j=0; j<nCurrencies; ++j )
272*cdf0e10cSrcweir    {
273*cdf0e10cSrcweir        # Check if CurrencyID at least resembles some ISO 4217 code.
274*cdf0e10cSrcweir        # The only exception is zh_MO that had an erroneous original data set
275*cdf0e10cSrcweir        # with BankSymbol="P" (stored as ISO code in documents, hence copied to
276*cdf0e10cSrcweir        # CurrencyID now) and needs that entry for legacy documents.
277*cdf0e10cSrcweir        # There is a strange bug in gawk 3.1.4 that does a match of [A-Z] on
278*cdf0e10cSrcweir        # lower case except 'a', regardless of IGNORECASE setting, hence this
279*cdf0e10cSrcweir        # ugly notation. [[:upper:]] wouldn't be correct since we want only
280*cdf0e10cSrcweir        # ASCII to match.
281*cdf0e10cSrcweir        if ( IDs[j] !~ /^[ABCDEFGHIJKLMNOPQRSTUVWXYZ][ABCDEFGHIJKLMNOPQRSTUVWXYZ][ABCDEFGHIJKLMNOPQRSTUVWXYZ]$/ \
282*cdf0e10cSrcweir              && !(file == "zh_MO.xml" && IDs[j] == "P") )
283*cdf0e10cSrcweir        {
284*cdf0e10cSrcweir            bad = 1
285*cdf0e10cSrcweir            print "no ISO 4217 code: `" IDs[j] "'   (" IDLine[j] ")"
286*cdf0e10cSrcweir        }
287*cdf0e10cSrcweir        # CurrencyID should equal BankSymbol for now.
288*cdf0e10cSrcweir        if ( IDs[j] != BankSymbols[j] )
289*cdf0e10cSrcweir        {
290*cdf0e10cSrcweir            bad = 1
291*cdf0e10cSrcweir            print "not equal: CurrencyID `" IDs[j] "' != BankSymbol `" BankSymbols[j] \
292*cdf0e10cSrcweir                  "'   (" IDLine[j] " and " BankSymbolLine[j] ")"
293*cdf0e10cSrcweir        }
294*cdf0e10cSrcweir    }
295*cdf0e10cSrcweir    if ( bad )
296*cdf0e10cSrcweir        print ""
297*cdf0e10cSrcweir}
298*cdf0e10cSrcweir
299*cdf0e10cSrcweir
300*cdf0e10cSrcweirfunction FormatInSymbol( format ) {
301*cdf0e10cSrcweir    state = 0
302*cdf0e10cSrcweir    for ( nSym=0; nSym<nCurrencies; ++nSym )
303*cdf0e10cSrcweir    {
304*cdf0e10cSrcweir        if ( format == Symbols[nSym] )
305*cdf0e10cSrcweir        {
306*cdf0e10cSrcweir            # Two currencies can have the same symbol (e.g. az_AZ.xml 'man.'
307*cdf0e10cSrcweir            # for AZM and AZN), continue to lookup if the match isn't the
308*cdf0e10cSrcweir            # compatible one.
309*cdf0e10cSrcweir            if ( SymbolCompati[nSym] )
310*cdf0e10cSrcweir                return 2
311*cdf0e10cSrcweir            else
312*cdf0e10cSrcweir                state = 1
313*cdf0e10cSrcweir        }
314*cdf0e10cSrcweir    }
315*cdf0e10cSrcweir    return state
316*cdf0e10cSrcweir}
317*cdf0e10cSrcweir
318*cdf0e10cSrcweir# vim: ts=4 sw=4 expandtab
319