xref: /AOO41X/main/i18npool/source/localedata/data/currency-check.awk (revision 5b501c92293051a25b12b7eb43a2a29471ec1458)
1cdf0e10cSrcweir#!/usr/bin/gawk -f
2*5b501c92SAndrew Rist# *************************************************************
3*5b501c92SAndrew Rist#
4*5b501c92SAndrew Rist#  Licensed to the Apache Software Foundation (ASF) under one
5*5b501c92SAndrew Rist#  or more contributor license agreements.  See the NOTICE file
6*5b501c92SAndrew Rist#  distributed with this work for additional information
7*5b501c92SAndrew Rist#  regarding copyright ownership.  The ASF licenses this file
8*5b501c92SAndrew Rist#  to you under the Apache License, Version 2.0 (the
9*5b501c92SAndrew Rist#  "License"); you may not use this file except in compliance
10*5b501c92SAndrew Rist#  with the License.  You may obtain a copy of the License at
11*5b501c92SAndrew Rist#
12*5b501c92SAndrew Rist#    http://www.apache.org/licenses/LICENSE-2.0
13*5b501c92SAndrew Rist#
14*5b501c92SAndrew Rist#  Unless required by applicable law or agreed to in writing,
15*5b501c92SAndrew Rist#  software distributed under the License is distributed on an
16*5b501c92SAndrew Rist#  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17*5b501c92SAndrew Rist#  KIND, either express or implied.  See the License for the
18*5b501c92SAndrew Rist#  specific language governing permissions and limitations
19*5b501c92SAndrew Rist#  under the License.
20*5b501c92SAndrew Rist#
21*5b501c92SAndrew Rist# *************************************************************
22cdf0e10cSrcweir# Usage: gawk -f currency-check.awk *.xml
23cdf0e10cSrcweir# Check any
24cdf0e10cSrcweir# <FormatCode>...[$xxx-...]...</FormatCode>
25cdf0e10cSrcweir# against every
26cdf0e10cSrcweir# <CurrencySymbol>xxx</CurrencySymbol>
27cdf0e10cSrcweir# definition of the same XML file and output symbols if no match was found.
28cdf0e10cSrcweir# For formatindex="12" to formatindex="15" and for formatindex="17" it is
29cdf0e10cSrcweir# checked if the used currency symbol is the usedInCompatibleFormatCodes
30cdf0e10cSrcweir# currency symbol as it is needed by the number formatter.
31cdf0e10cSrcweir# Also generates output if the generic currency symbol (UTF8 string 0xC2A4)
32cdf0e10cSrcweir# is used instead of a real currency symbol.
33cdf0e10cSrcweir# Author: Eike Rathke <er@openoffice.org>
34cdf0e10cSrcweir
35cdf0e10cSrcweirBEGIN {
36cdf0e10cSrcweir    file = ""
37cdf0e10cSrcweir}
38cdf0e10cSrcweir
39cdf0e10cSrcweir
40cdf0e10cSrcweirfile != FILENAME {
41cdf0e10cSrcweir    if ( file )
42cdf0e10cSrcweir        checkIt()
43cdf0e10cSrcweir    file = FILENAME
44cdf0e10cSrcweir    line = 0
45cdf0e10cSrcweir    nFormats = 0
46cdf0e10cSrcweir    nCurrencies = 0
47cdf0e10cSrcweir    bFormatAuto = 0
48cdf0e10cSrcweir    sReplaceFrom = ""
49cdf0e10cSrcweir    sReplaceTo = ""
50cdf0e10cSrcweir    sMatchReplace = ""
51cdf0e10cSrcweir    sRefCurrencyFromLocale = ""
52cdf0e10cSrcweir    crlf = 0
53cdf0e10cSrcweir}
54cdf0e10cSrcweir
55cdf0e10cSrcweir{
56cdf0e10cSrcweir    ++line
57cdf0e10cSrcweir    # If run under Unix a CrLf spoils ...$ line end checks. DOS line endings
58cdf0e10cSrcweir    # are boo anyways.
59cdf0e10cSrcweir    if ( /\x0D$/ )
60cdf0e10cSrcweir    {
61cdf0e10cSrcweir        print "Error: not Unix line ending in line " line
62cdf0e10cSrcweir        crlf = 1
63cdf0e10cSrcweir        exit(1)
64cdf0e10cSrcweir    }
65cdf0e10cSrcweir    if ( $1 ~ /^<LC_FORMAT(>|$)/ )
66cdf0e10cSrcweir    {
67cdf0e10cSrcweir        if ( $0 ~ /replaceFrom="\[CURRENCY\]"/ )
68cdf0e10cSrcweir        {
69cdf0e10cSrcweir            sReplaceFrom = "\\[CURRENCY\\]"
70cdf0e10cSrcweir            sMatchReplace = "^<FormatCode>.*" sReplaceFrom
71cdf0e10cSrcweir        }
72cdf0e10cSrcweir        for ( j=2; j<=NF; ++j )
73cdf0e10cSrcweir        {
74cdf0e10cSrcweir            if ( $j ~ /^replaceTo="/ )
75cdf0e10cSrcweir            {
76cdf0e10cSrcweir                l = 12
77cdf0e10cSrcweir                if ( $j ~ />$/ )
78cdf0e10cSrcweir                    ++l
79cdf0e10cSrcweir                if ( $j ~ /\/>$/ )
80cdf0e10cSrcweir                    ++l
81cdf0e10cSrcweir                sReplaceTo = substr( $j, 12, length($j)-l )
82cdf0e10cSrcweir            }
83cdf0e10cSrcweir        }
84cdf0e10cSrcweir    }
85cdf0e10cSrcweir    else if ( $1 ~ /^<FormatElement(>|$)/ )
86cdf0e10cSrcweir    {
87cdf0e10cSrcweir        if ( $0 ~ /usage="CURRENCY"/ )
88cdf0e10cSrcweir        {
89cdf0e10cSrcweir            if ( $0 ~ /formatindex="1[23457]"/ )
90cdf0e10cSrcweir                bFormatAuto = 1
91cdf0e10cSrcweir            else
92cdf0e10cSrcweir                bFormatAuto = 0
93cdf0e10cSrcweir        }
94cdf0e10cSrcweir    }
95cdf0e10cSrcweir    else if ( $0 ~ /^[[:blank:]]*<FormatCode>.*\[\$.*-[0-9a-fA-F]+\]/ ||
96cdf0e10cSrcweir            (sMatchReplace && $0 ~ sMatchReplace ) )
97cdf0e10cSrcweir    {
98cdf0e10cSrcweir        if ( sReplaceFrom )
99cdf0e10cSrcweir            gsub( sReplaceFrom, sReplaceTo )
100cdf0e10cSrcweir        split( $0, arr, /<|>/ )
101cdf0e10cSrcweir        split( arr[3], code, /(\[\$)|(-[0-9a-fA-F]+\])/ )
102cdf0e10cSrcweir        for ( j in code )
103cdf0e10cSrcweir        {
104cdf0e10cSrcweir            if ( code[j] && code[j] !~ /\#|0|\[NatNum/ )
105cdf0e10cSrcweir            {
106cdf0e10cSrcweir                FormatLine[nFormats] = file " line " line
107cdf0e10cSrcweir                FormatAuto[nFormats] = bFormatAuto
108cdf0e10cSrcweir                Formats[nFormats++] = code[j]
109cdf0e10cSrcweir            }
110cdf0e10cSrcweir        }
111cdf0e10cSrcweir        bFormatAuto = 0
112cdf0e10cSrcweir    }
113cdf0e10cSrcweir    else if ( $1 ~ /^<LC_CURRENCY(>|$)/ )
114cdf0e10cSrcweir    {
115cdf0e10cSrcweir        for ( j=2; j<=NF; ++j )
116cdf0e10cSrcweir        {
117cdf0e10cSrcweir            if ( $j ~ /^ref="/ )
118cdf0e10cSrcweir            {
119cdf0e10cSrcweir                l = 6
120cdf0e10cSrcweir                if ( $j ~ />$/ )
121cdf0e10cSrcweir                    ++l
122cdf0e10cSrcweir                if ( $j ~ /\/>$/ )
123cdf0e10cSrcweir                    ++l
124cdf0e10cSrcweir                locale = substr( $j, 6, length($j)-l )
125cdf0e10cSrcweir                sRefCurrencyFromLocale = file
126cdf0e10cSrcweir                oldfile = file
127cdf0e10cSrcweir                oldline = line
128cdf0e10cSrcweir                file = locale ".xml"
129cdf0e10cSrcweir                line = 0
130cdf0e10cSrcweir                while ( (getline <file) > 0 )
131cdf0e10cSrcweir                {
132cdf0e10cSrcweir                    ++line
133cdf0e10cSrcweir                    getCurrencyParams()
134cdf0e10cSrcweir                }
135cdf0e10cSrcweir                close( file )
136cdf0e10cSrcweir                if ( !line )
137cdf0e10cSrcweir                    print "ref locale not available: " file \
138cdf0e10cSrcweir                        " (from " oldfile " line " oldline ")"
139cdf0e10cSrcweir                file = oldfile
140cdf0e10cSrcweir                line = oldline
141cdf0e10cSrcweir                sRefCurrencyFromLocale = ""
142cdf0e10cSrcweir            }
143cdf0e10cSrcweir        }
144cdf0e10cSrcweir    }
145cdf0e10cSrcweir    else
146cdf0e10cSrcweir        getCurrencyParams()
147cdf0e10cSrcweir}
148cdf0e10cSrcweir
149cdf0e10cSrcweir
150cdf0e10cSrcweirEND {
151cdf0e10cSrcweir    if ( file && !crlf )
152cdf0e10cSrcweir        checkIt()
153cdf0e10cSrcweir}
154cdf0e10cSrcweir
155cdf0e10cSrcweir
156cdf0e10cSrcweirfunction getCurrencyParams() {
157cdf0e10cSrcweir    # Assumes that each element is on a line on its own!
158cdf0e10cSrcweir    if ( $1 ~ /^<Currency(>|$)/ )
159cdf0e10cSrcweir    {
160cdf0e10cSrcweir        if ( $0 ~ /default="true"/ )
161cdf0e10cSrcweir            SymbolDefault[nCurrencies] = 1
162cdf0e10cSrcweir        else
163cdf0e10cSrcweir            SymbolDefault[nCurrencies] = 0
164cdf0e10cSrcweir        if ( $0 ~ /usedInCompatibleFormatCodes="true"/ )
165cdf0e10cSrcweir            SymbolCompati[nCurrencies] = 1
166cdf0e10cSrcweir        else
167cdf0e10cSrcweir            SymbolCompati[nCurrencies] = 0
168cdf0e10cSrcweir    }
169cdf0e10cSrcweir    else if ( $0 ~ /^[[:blank:]]*<CurrencyID>/ )
170cdf0e10cSrcweir    {
171cdf0e10cSrcweir        split( $0, arr, /<|>/ )
172cdf0e10cSrcweir        if ( sRefCurrencyFromLocale )
173cdf0e10cSrcweir            IDLine[nCurrencies] = file " line " line \
174cdf0e10cSrcweir                " (referenced from " sRefCurrencyFromLocale ")"
175cdf0e10cSrcweir        else
176cdf0e10cSrcweir            IDLine[nCurrencies] = file " line " line
177cdf0e10cSrcweir        IDs[nCurrencies] = arr[3]
178cdf0e10cSrcweir    }
179cdf0e10cSrcweir    else if ( $0 ~ /^[[:blank:]]*<CurrencySymbol>/ )
180cdf0e10cSrcweir    {
181cdf0e10cSrcweir        split( $0, arr, /<|>/ )
182cdf0e10cSrcweir        if ( sRefCurrencyFromLocale )
183cdf0e10cSrcweir            SymbolLine[nCurrencies] = file " line " line \
184cdf0e10cSrcweir                " (referenced from " sRefCurrencyFromLocale ")"
185cdf0e10cSrcweir        else
186cdf0e10cSrcweir            SymbolLine[nCurrencies] = file " line " line
187cdf0e10cSrcweir        Symbols[nCurrencies] = arr[3]
188cdf0e10cSrcweir    }
189cdf0e10cSrcweir    else if ( $0 ~ /^[[:blank:]]*<BankSymbol>/ )
190cdf0e10cSrcweir    {
191cdf0e10cSrcweir        split( $0, arr, /<|>/ )
192cdf0e10cSrcweir        if ( sRefCurrencyFromLocale )
193cdf0e10cSrcweir            BankSymbolLine[nCurrencies] = file " line " line \
194cdf0e10cSrcweir                " (referenced from " sRefCurrencyFromLocale ")"
195cdf0e10cSrcweir        else
196cdf0e10cSrcweir            BankSymbolLine[nCurrencies] = file " line " line
197cdf0e10cSrcweir        BankSymbols[nCurrencies] = arr[3]
198cdf0e10cSrcweir    }
199cdf0e10cSrcweir    else if ( $1 ~ /^<\/Currency>/ )
200cdf0e10cSrcweir    {
201cdf0e10cSrcweir        ++nCurrencies
202cdf0e10cSrcweir    }
203cdf0e10cSrcweir}
204cdf0e10cSrcweir
205cdf0e10cSrcweir
206cdf0e10cSrcweirfunction checkIt() {
207cdf0e10cSrcweir    bad = 0
208cdf0e10cSrcweir    for ( j=0; j<nFormats; ++j )
209cdf0e10cSrcweir    {
210cdf0e10cSrcweir        state = FormatInSymbol( Formats[j] )
211cdf0e10cSrcweir        if ( Formats[j] == "\xc2\xa4" )
212cdf0e10cSrcweir        {
213cdf0e10cSrcweir            bad = 1
214cdf0e10cSrcweir            print "    bad: `" Formats[j] "'   (" FormatLine[j] ")"
215cdf0e10cSrcweir        }
216cdf0e10cSrcweir        else if ( state == 0 )
217cdf0e10cSrcweir        {
218cdf0e10cSrcweir            bad = 1
219cdf0e10cSrcweir            print "unknown: `" Formats[j] "'   (" FormatLine[j] ")"
220cdf0e10cSrcweir        }
221cdf0e10cSrcweir        else if ( FormatAuto[j] && state < 2 )
222cdf0e10cSrcweir        {
223cdf0e10cSrcweir            bad = 1
224cdf0e10cSrcweir            print "badauto: `" Formats[j] "'   (" FormatLine[j] ")"
225cdf0e10cSrcweir        }
226cdf0e10cSrcweir    }
227cdf0e10cSrcweir    if ( bad )
228cdf0e10cSrcweir    {
229cdf0e10cSrcweir        for ( j=0; j<nCurrencies; ++j )
230cdf0e10cSrcweir        {
231cdf0e10cSrcweir            bDef = 0
232cdf0e10cSrcweir            if ( Symbols[j] == "\xc2\xa4" )
233cdf0e10cSrcweir                print "def bad: `" Symbols[j] "'   (" SymbolLine[j] ")"
234cdf0e10cSrcweir            if ( SymbolDefault[j] )
235cdf0e10cSrcweir            {
236cdf0e10cSrcweir                bDef = 1
237cdf0e10cSrcweir                print "default: `" Symbols[j] "'   (" SymbolLine[j] ")"
238cdf0e10cSrcweir            }
239cdf0e10cSrcweir            if ( SymbolCompati[j] )
240cdf0e10cSrcweir            {
241cdf0e10cSrcweir                bDef = 1
242cdf0e10cSrcweir                print "compati: `" Symbols[j] "'   (" SymbolLine[j] ")"
243cdf0e10cSrcweir            }
244cdf0e10cSrcweir            if ( !bDef )
245cdf0e10cSrcweir                print "defined: `" Symbols[j] "'   (" SymbolLine[j] ")"
246cdf0e10cSrcweir        }
247cdf0e10cSrcweir    }
248cdf0e10cSrcweir    else
249cdf0e10cSrcweir    {
250cdf0e10cSrcweir        bHasDefault = 0
251cdf0e10cSrcweir        bHasCompati = 0
252cdf0e10cSrcweir        for ( j=0; j<nCurrencies; ++j )
253cdf0e10cSrcweir        {
254cdf0e10cSrcweir            if ( Symbols[j] == "\xc2\xa4" )
255cdf0e10cSrcweir            {
256cdf0e10cSrcweir                bad = 1
257cdf0e10cSrcweir                print "def bad: `" Symbols[j] "'   (" SymbolLine[j] ")"
258cdf0e10cSrcweir            }
259cdf0e10cSrcweir            if ( SymbolDefault[j] )
260cdf0e10cSrcweir            {
261cdf0e10cSrcweir                if ( !bHasDefault )
262cdf0e10cSrcweir                    bHasDefault = 1
263cdf0e10cSrcweir                else
264cdf0e10cSrcweir                {
265cdf0e10cSrcweir                    bad = 1
266cdf0e10cSrcweir                    print "dupe default: `" Symbols[j] "'   (" SymbolLine[j] ")"
267cdf0e10cSrcweir                }
268cdf0e10cSrcweir            }
269cdf0e10cSrcweir            if ( SymbolCompati[j] )
270cdf0e10cSrcweir            {
271cdf0e10cSrcweir                if ( !bHasCompati )
272cdf0e10cSrcweir                    bHasCompati = 1
273cdf0e10cSrcweir                else
274cdf0e10cSrcweir                {
275cdf0e10cSrcweir                    bad = 1
276cdf0e10cSrcweir                    print "dupe compati: `" Symbols[j] "'   (" SymbolLine[j] ")"
277cdf0e10cSrcweir                }
278cdf0e10cSrcweir            }
279cdf0e10cSrcweir        }
280cdf0e10cSrcweir        if ( !bHasDefault )
281cdf0e10cSrcweir        {
282cdf0e10cSrcweir            bad = 1
283cdf0e10cSrcweir            print "  no default: (" file ")"
284cdf0e10cSrcweir        }
285cdf0e10cSrcweir        if ( !bHasCompati )
286cdf0e10cSrcweir        {
287cdf0e10cSrcweir            bad = 1
288cdf0e10cSrcweir            print "  no compati: (" file ")"
289cdf0e10cSrcweir        }
290cdf0e10cSrcweir    }
291cdf0e10cSrcweir    for ( j=0; j<nCurrencies; ++j )
292cdf0e10cSrcweir    {
293cdf0e10cSrcweir        # Check if CurrencyID at least resembles some ISO 4217 code.
294cdf0e10cSrcweir        # The only exception is zh_MO that had an erroneous original data set
295cdf0e10cSrcweir        # with BankSymbol="P" (stored as ISO code in documents, hence copied to
296cdf0e10cSrcweir        # CurrencyID now) and needs that entry for legacy documents.
297cdf0e10cSrcweir        # There is a strange bug in gawk 3.1.4 that does a match of [A-Z] on
298cdf0e10cSrcweir        # lower case except 'a', regardless of IGNORECASE setting, hence this
299cdf0e10cSrcweir        # ugly notation. [[:upper:]] wouldn't be correct since we want only
300cdf0e10cSrcweir        # ASCII to match.
301cdf0e10cSrcweir        if ( IDs[j] !~ /^[ABCDEFGHIJKLMNOPQRSTUVWXYZ][ABCDEFGHIJKLMNOPQRSTUVWXYZ][ABCDEFGHIJKLMNOPQRSTUVWXYZ]$/ \
302cdf0e10cSrcweir              && !(file == "zh_MO.xml" && IDs[j] == "P") )
303cdf0e10cSrcweir        {
304cdf0e10cSrcweir            bad = 1
305cdf0e10cSrcweir            print "no ISO 4217 code: `" IDs[j] "'   (" IDLine[j] ")"
306cdf0e10cSrcweir        }
307cdf0e10cSrcweir        # CurrencyID should equal BankSymbol for now.
308cdf0e10cSrcweir        if ( IDs[j] != BankSymbols[j] )
309cdf0e10cSrcweir        {
310cdf0e10cSrcweir            bad = 1
311cdf0e10cSrcweir            print "not equal: CurrencyID `" IDs[j] "' != BankSymbol `" BankSymbols[j] \
312cdf0e10cSrcweir                  "'   (" IDLine[j] " and " BankSymbolLine[j] ")"
313cdf0e10cSrcweir        }
314cdf0e10cSrcweir    }
315cdf0e10cSrcweir    if ( bad )
316cdf0e10cSrcweir        print ""
317cdf0e10cSrcweir}
318cdf0e10cSrcweir
319cdf0e10cSrcweir
320cdf0e10cSrcweirfunction FormatInSymbol( format ) {
321cdf0e10cSrcweir    state = 0
322cdf0e10cSrcweir    for ( nSym=0; nSym<nCurrencies; ++nSym )
323cdf0e10cSrcweir    {
324cdf0e10cSrcweir        if ( format == Symbols[nSym] )
325cdf0e10cSrcweir        {
326cdf0e10cSrcweir            # Two currencies can have the same symbol (e.g. az_AZ.xml 'man.'
327cdf0e10cSrcweir            # for AZM and AZN), continue to lookup if the match isn't the
328cdf0e10cSrcweir            # compatible one.
329cdf0e10cSrcweir            if ( SymbolCompati[nSym] )
330cdf0e10cSrcweir                return 2
331cdf0e10cSrcweir            else
332cdf0e10cSrcweir                state = 1
333cdf0e10cSrcweir        }
334cdf0e10cSrcweir    }
335cdf0e10cSrcweir    return state
336cdf0e10cSrcweir}
337cdf0e10cSrcweir
338cdf0e10cSrcweir# vim: ts=4 sw=4 expandtab
339