1*cdf0e10cSrcweir#!/usr/bin/gawk -f 2*cdf0e10cSrcweir# Usage: gawk -f currency-check.awk *.xml 3*cdf0e10cSrcweir# Check any 4*cdf0e10cSrcweir# <FormatCode>...[$xxx-...]...</FormatCode> 5*cdf0e10cSrcweir# against every 6*cdf0e10cSrcweir# <CurrencySymbol>xxx</CurrencySymbol> 7*cdf0e10cSrcweir# definition of the same XML file and output symbols if no match was found. 8*cdf0e10cSrcweir# For formatindex="12" to formatindex="15" and for formatindex="17" it is 9*cdf0e10cSrcweir# checked if the used currency symbol is the usedInCompatibleFormatCodes 10*cdf0e10cSrcweir# currency symbol as it is needed by the number formatter. 11*cdf0e10cSrcweir# Also generates output if the generic currency symbol (UTF8 string 0xC2A4) 12*cdf0e10cSrcweir# is used instead of a real currency symbol. 13*cdf0e10cSrcweir# Author: Eike Rathke <er@openoffice.org> 14*cdf0e10cSrcweir 15*cdf0e10cSrcweirBEGIN { 16*cdf0e10cSrcweir file = "" 17*cdf0e10cSrcweir} 18*cdf0e10cSrcweir 19*cdf0e10cSrcweir 20*cdf0e10cSrcweirfile != FILENAME { 21*cdf0e10cSrcweir if ( file ) 22*cdf0e10cSrcweir checkIt() 23*cdf0e10cSrcweir file = FILENAME 24*cdf0e10cSrcweir line = 0 25*cdf0e10cSrcweir nFormats = 0 26*cdf0e10cSrcweir nCurrencies = 0 27*cdf0e10cSrcweir bFormatAuto = 0 28*cdf0e10cSrcweir sReplaceFrom = "" 29*cdf0e10cSrcweir sReplaceTo = "" 30*cdf0e10cSrcweir sMatchReplace = "" 31*cdf0e10cSrcweir sRefCurrencyFromLocale = "" 32*cdf0e10cSrcweir crlf = 0 33*cdf0e10cSrcweir} 34*cdf0e10cSrcweir 35*cdf0e10cSrcweir{ 36*cdf0e10cSrcweir ++line 37*cdf0e10cSrcweir # If run under Unix a CrLf spoils ...$ line end checks. DOS line endings 38*cdf0e10cSrcweir # are boo anyways. 39*cdf0e10cSrcweir if ( /\x0D$/ ) 40*cdf0e10cSrcweir { 41*cdf0e10cSrcweir print "Error: not Unix line ending in line " line 42*cdf0e10cSrcweir crlf = 1 43*cdf0e10cSrcweir exit(1) 44*cdf0e10cSrcweir } 45*cdf0e10cSrcweir if ( $1 ~ /^<LC_FORMAT(>|$)/ ) 46*cdf0e10cSrcweir { 47*cdf0e10cSrcweir if ( $0 ~ /replaceFrom="\[CURRENCY\]"/ ) 48*cdf0e10cSrcweir { 49*cdf0e10cSrcweir sReplaceFrom = "\\[CURRENCY\\]" 50*cdf0e10cSrcweir sMatchReplace = "^<FormatCode>.*" sReplaceFrom 51*cdf0e10cSrcweir } 52*cdf0e10cSrcweir for ( j=2; j<=NF; ++j ) 53*cdf0e10cSrcweir { 54*cdf0e10cSrcweir if ( $j ~ /^replaceTo="/ ) 55*cdf0e10cSrcweir { 56*cdf0e10cSrcweir l = 12 57*cdf0e10cSrcweir if ( $j ~ />$/ ) 58*cdf0e10cSrcweir ++l 59*cdf0e10cSrcweir if ( $j ~ /\/>$/ ) 60*cdf0e10cSrcweir ++l 61*cdf0e10cSrcweir sReplaceTo = substr( $j, 12, length($j)-l ) 62*cdf0e10cSrcweir } 63*cdf0e10cSrcweir } 64*cdf0e10cSrcweir } 65*cdf0e10cSrcweir else if ( $1 ~ /^<FormatElement(>|$)/ ) 66*cdf0e10cSrcweir { 67*cdf0e10cSrcweir if ( $0 ~ /usage="CURRENCY"/ ) 68*cdf0e10cSrcweir { 69*cdf0e10cSrcweir if ( $0 ~ /formatindex="1[23457]"/ ) 70*cdf0e10cSrcweir bFormatAuto = 1 71*cdf0e10cSrcweir else 72*cdf0e10cSrcweir bFormatAuto = 0 73*cdf0e10cSrcweir } 74*cdf0e10cSrcweir } 75*cdf0e10cSrcweir else if ( $0 ~ /^[[:blank:]]*<FormatCode>.*\[\$.*-[0-9a-fA-F]+\]/ || 76*cdf0e10cSrcweir (sMatchReplace && $0 ~ sMatchReplace ) ) 77*cdf0e10cSrcweir { 78*cdf0e10cSrcweir if ( sReplaceFrom ) 79*cdf0e10cSrcweir gsub( sReplaceFrom, sReplaceTo ) 80*cdf0e10cSrcweir split( $0, arr, /<|>/ ) 81*cdf0e10cSrcweir split( arr[3], code, /(\[\$)|(-[0-9a-fA-F]+\])/ ) 82*cdf0e10cSrcweir for ( j in code ) 83*cdf0e10cSrcweir { 84*cdf0e10cSrcweir if ( code[j] && code[j] !~ /\#|0|\[NatNum/ ) 85*cdf0e10cSrcweir { 86*cdf0e10cSrcweir FormatLine[nFormats] = file " line " line 87*cdf0e10cSrcweir FormatAuto[nFormats] = bFormatAuto 88*cdf0e10cSrcweir Formats[nFormats++] = code[j] 89*cdf0e10cSrcweir } 90*cdf0e10cSrcweir } 91*cdf0e10cSrcweir bFormatAuto = 0 92*cdf0e10cSrcweir } 93*cdf0e10cSrcweir else if ( $1 ~ /^<LC_CURRENCY(>|$)/ ) 94*cdf0e10cSrcweir { 95*cdf0e10cSrcweir for ( j=2; j<=NF; ++j ) 96*cdf0e10cSrcweir { 97*cdf0e10cSrcweir if ( $j ~ /^ref="/ ) 98*cdf0e10cSrcweir { 99*cdf0e10cSrcweir l = 6 100*cdf0e10cSrcweir if ( $j ~ />$/ ) 101*cdf0e10cSrcweir ++l 102*cdf0e10cSrcweir if ( $j ~ /\/>$/ ) 103*cdf0e10cSrcweir ++l 104*cdf0e10cSrcweir locale = substr( $j, 6, length($j)-l ) 105*cdf0e10cSrcweir sRefCurrencyFromLocale = file 106*cdf0e10cSrcweir oldfile = file 107*cdf0e10cSrcweir oldline = line 108*cdf0e10cSrcweir file = locale ".xml" 109*cdf0e10cSrcweir line = 0 110*cdf0e10cSrcweir while ( (getline <file) > 0 ) 111*cdf0e10cSrcweir { 112*cdf0e10cSrcweir ++line 113*cdf0e10cSrcweir getCurrencyParams() 114*cdf0e10cSrcweir } 115*cdf0e10cSrcweir close( file ) 116*cdf0e10cSrcweir if ( !line ) 117*cdf0e10cSrcweir print "ref locale not available: " file \ 118*cdf0e10cSrcweir " (from " oldfile " line " oldline ")" 119*cdf0e10cSrcweir file = oldfile 120*cdf0e10cSrcweir line = oldline 121*cdf0e10cSrcweir sRefCurrencyFromLocale = "" 122*cdf0e10cSrcweir } 123*cdf0e10cSrcweir } 124*cdf0e10cSrcweir } 125*cdf0e10cSrcweir else 126*cdf0e10cSrcweir getCurrencyParams() 127*cdf0e10cSrcweir} 128*cdf0e10cSrcweir 129*cdf0e10cSrcweir 130*cdf0e10cSrcweirEND { 131*cdf0e10cSrcweir if ( file && !crlf ) 132*cdf0e10cSrcweir checkIt() 133*cdf0e10cSrcweir} 134*cdf0e10cSrcweir 135*cdf0e10cSrcweir 136*cdf0e10cSrcweirfunction getCurrencyParams() { 137*cdf0e10cSrcweir # Assumes that each element is on a line on its own! 138*cdf0e10cSrcweir if ( $1 ~ /^<Currency(>|$)/ ) 139*cdf0e10cSrcweir { 140*cdf0e10cSrcweir if ( $0 ~ /default="true"/ ) 141*cdf0e10cSrcweir SymbolDefault[nCurrencies] = 1 142*cdf0e10cSrcweir else 143*cdf0e10cSrcweir SymbolDefault[nCurrencies] = 0 144*cdf0e10cSrcweir if ( $0 ~ /usedInCompatibleFormatCodes="true"/ ) 145*cdf0e10cSrcweir SymbolCompati[nCurrencies] = 1 146*cdf0e10cSrcweir else 147*cdf0e10cSrcweir SymbolCompati[nCurrencies] = 0 148*cdf0e10cSrcweir } 149*cdf0e10cSrcweir else if ( $0 ~ /^[[:blank:]]*<CurrencyID>/ ) 150*cdf0e10cSrcweir { 151*cdf0e10cSrcweir split( $0, arr, /<|>/ ) 152*cdf0e10cSrcweir if ( sRefCurrencyFromLocale ) 153*cdf0e10cSrcweir IDLine[nCurrencies] = file " line " line \ 154*cdf0e10cSrcweir " (referenced from " sRefCurrencyFromLocale ")" 155*cdf0e10cSrcweir else 156*cdf0e10cSrcweir IDLine[nCurrencies] = file " line " line 157*cdf0e10cSrcweir IDs[nCurrencies] = arr[3] 158*cdf0e10cSrcweir } 159*cdf0e10cSrcweir else if ( $0 ~ /^[[:blank:]]*<CurrencySymbol>/ ) 160*cdf0e10cSrcweir { 161*cdf0e10cSrcweir split( $0, arr, /<|>/ ) 162*cdf0e10cSrcweir if ( sRefCurrencyFromLocale ) 163*cdf0e10cSrcweir SymbolLine[nCurrencies] = file " line " line \ 164*cdf0e10cSrcweir " (referenced from " sRefCurrencyFromLocale ")" 165*cdf0e10cSrcweir else 166*cdf0e10cSrcweir SymbolLine[nCurrencies] = file " line " line 167*cdf0e10cSrcweir Symbols[nCurrencies] = arr[3] 168*cdf0e10cSrcweir } 169*cdf0e10cSrcweir else if ( $0 ~ /^[[:blank:]]*<BankSymbol>/ ) 170*cdf0e10cSrcweir { 171*cdf0e10cSrcweir split( $0, arr, /<|>/ ) 172*cdf0e10cSrcweir if ( sRefCurrencyFromLocale ) 173*cdf0e10cSrcweir BankSymbolLine[nCurrencies] = file " line " line \ 174*cdf0e10cSrcweir " (referenced from " sRefCurrencyFromLocale ")" 175*cdf0e10cSrcweir else 176*cdf0e10cSrcweir BankSymbolLine[nCurrencies] = file " line " line 177*cdf0e10cSrcweir BankSymbols[nCurrencies] = arr[3] 178*cdf0e10cSrcweir } 179*cdf0e10cSrcweir else if ( $1 ~ /^<\/Currency>/ ) 180*cdf0e10cSrcweir { 181*cdf0e10cSrcweir ++nCurrencies 182*cdf0e10cSrcweir } 183*cdf0e10cSrcweir} 184*cdf0e10cSrcweir 185*cdf0e10cSrcweir 186*cdf0e10cSrcweirfunction checkIt() { 187*cdf0e10cSrcweir bad = 0 188*cdf0e10cSrcweir for ( j=0; j<nFormats; ++j ) 189*cdf0e10cSrcweir { 190*cdf0e10cSrcweir state = FormatInSymbol( Formats[j] ) 191*cdf0e10cSrcweir if ( Formats[j] == "\xc2\xa4" ) 192*cdf0e10cSrcweir { 193*cdf0e10cSrcweir bad = 1 194*cdf0e10cSrcweir print " bad: `" Formats[j] "' (" FormatLine[j] ")" 195*cdf0e10cSrcweir } 196*cdf0e10cSrcweir else if ( state == 0 ) 197*cdf0e10cSrcweir { 198*cdf0e10cSrcweir bad = 1 199*cdf0e10cSrcweir print "unknown: `" Formats[j] "' (" FormatLine[j] ")" 200*cdf0e10cSrcweir } 201*cdf0e10cSrcweir else if ( FormatAuto[j] && state < 2 ) 202*cdf0e10cSrcweir { 203*cdf0e10cSrcweir bad = 1 204*cdf0e10cSrcweir print "badauto: `" Formats[j] "' (" FormatLine[j] ")" 205*cdf0e10cSrcweir } 206*cdf0e10cSrcweir } 207*cdf0e10cSrcweir if ( bad ) 208*cdf0e10cSrcweir { 209*cdf0e10cSrcweir for ( j=0; j<nCurrencies; ++j ) 210*cdf0e10cSrcweir { 211*cdf0e10cSrcweir bDef = 0 212*cdf0e10cSrcweir if ( Symbols[j] == "\xc2\xa4" ) 213*cdf0e10cSrcweir print "def bad: `" Symbols[j] "' (" SymbolLine[j] ")" 214*cdf0e10cSrcweir if ( SymbolDefault[j] ) 215*cdf0e10cSrcweir { 216*cdf0e10cSrcweir bDef = 1 217*cdf0e10cSrcweir print "default: `" Symbols[j] "' (" SymbolLine[j] ")" 218*cdf0e10cSrcweir } 219*cdf0e10cSrcweir if ( SymbolCompati[j] ) 220*cdf0e10cSrcweir { 221*cdf0e10cSrcweir bDef = 1 222*cdf0e10cSrcweir print "compati: `" Symbols[j] "' (" SymbolLine[j] ")" 223*cdf0e10cSrcweir } 224*cdf0e10cSrcweir if ( !bDef ) 225*cdf0e10cSrcweir print "defined: `" Symbols[j] "' (" SymbolLine[j] ")" 226*cdf0e10cSrcweir } 227*cdf0e10cSrcweir } 228*cdf0e10cSrcweir else 229*cdf0e10cSrcweir { 230*cdf0e10cSrcweir bHasDefault = 0 231*cdf0e10cSrcweir bHasCompati = 0 232*cdf0e10cSrcweir for ( j=0; j<nCurrencies; ++j ) 233*cdf0e10cSrcweir { 234*cdf0e10cSrcweir if ( Symbols[j] == "\xc2\xa4" ) 235*cdf0e10cSrcweir { 236*cdf0e10cSrcweir bad = 1 237*cdf0e10cSrcweir print "def bad: `" Symbols[j] "' (" SymbolLine[j] ")" 238*cdf0e10cSrcweir } 239*cdf0e10cSrcweir if ( SymbolDefault[j] ) 240*cdf0e10cSrcweir { 241*cdf0e10cSrcweir if ( !bHasDefault ) 242*cdf0e10cSrcweir bHasDefault = 1 243*cdf0e10cSrcweir else 244*cdf0e10cSrcweir { 245*cdf0e10cSrcweir bad = 1 246*cdf0e10cSrcweir print "dupe default: `" Symbols[j] "' (" SymbolLine[j] ")" 247*cdf0e10cSrcweir } 248*cdf0e10cSrcweir } 249*cdf0e10cSrcweir if ( SymbolCompati[j] ) 250*cdf0e10cSrcweir { 251*cdf0e10cSrcweir if ( !bHasCompati ) 252*cdf0e10cSrcweir bHasCompati = 1 253*cdf0e10cSrcweir else 254*cdf0e10cSrcweir { 255*cdf0e10cSrcweir bad = 1 256*cdf0e10cSrcweir print "dupe compati: `" Symbols[j] "' (" SymbolLine[j] ")" 257*cdf0e10cSrcweir } 258*cdf0e10cSrcweir } 259*cdf0e10cSrcweir } 260*cdf0e10cSrcweir if ( !bHasDefault ) 261*cdf0e10cSrcweir { 262*cdf0e10cSrcweir bad = 1 263*cdf0e10cSrcweir print " no default: (" file ")" 264*cdf0e10cSrcweir } 265*cdf0e10cSrcweir if ( !bHasCompati ) 266*cdf0e10cSrcweir { 267*cdf0e10cSrcweir bad = 1 268*cdf0e10cSrcweir print " no compati: (" file ")" 269*cdf0e10cSrcweir } 270*cdf0e10cSrcweir } 271*cdf0e10cSrcweir for ( j=0; j<nCurrencies; ++j ) 272*cdf0e10cSrcweir { 273*cdf0e10cSrcweir # Check if CurrencyID at least resembles some ISO 4217 code. 274*cdf0e10cSrcweir # The only exception is zh_MO that had an erroneous original data set 275*cdf0e10cSrcweir # with BankSymbol="P" (stored as ISO code in documents, hence copied to 276*cdf0e10cSrcweir # CurrencyID now) and needs that entry for legacy documents. 277*cdf0e10cSrcweir # There is a strange bug in gawk 3.1.4 that does a match of [A-Z] on 278*cdf0e10cSrcweir # lower case except 'a', regardless of IGNORECASE setting, hence this 279*cdf0e10cSrcweir # ugly notation. [[:upper:]] wouldn't be correct since we want only 280*cdf0e10cSrcweir # ASCII to match. 281*cdf0e10cSrcweir if ( IDs[j] !~ /^[ABCDEFGHIJKLMNOPQRSTUVWXYZ][ABCDEFGHIJKLMNOPQRSTUVWXYZ][ABCDEFGHIJKLMNOPQRSTUVWXYZ]$/ \ 282*cdf0e10cSrcweir && !(file == "zh_MO.xml" && IDs[j] == "P") ) 283*cdf0e10cSrcweir { 284*cdf0e10cSrcweir bad = 1 285*cdf0e10cSrcweir print "no ISO 4217 code: `" IDs[j] "' (" IDLine[j] ")" 286*cdf0e10cSrcweir } 287*cdf0e10cSrcweir # CurrencyID should equal BankSymbol for now. 288*cdf0e10cSrcweir if ( IDs[j] != BankSymbols[j] ) 289*cdf0e10cSrcweir { 290*cdf0e10cSrcweir bad = 1 291*cdf0e10cSrcweir print "not equal: CurrencyID `" IDs[j] "' != BankSymbol `" BankSymbols[j] \ 292*cdf0e10cSrcweir "' (" IDLine[j] " and " BankSymbolLine[j] ")" 293*cdf0e10cSrcweir } 294*cdf0e10cSrcweir } 295*cdf0e10cSrcweir if ( bad ) 296*cdf0e10cSrcweir print "" 297*cdf0e10cSrcweir} 298*cdf0e10cSrcweir 299*cdf0e10cSrcweir 300*cdf0e10cSrcweirfunction FormatInSymbol( format ) { 301*cdf0e10cSrcweir state = 0 302*cdf0e10cSrcweir for ( nSym=0; nSym<nCurrencies; ++nSym ) 303*cdf0e10cSrcweir { 304*cdf0e10cSrcweir if ( format == Symbols[nSym] ) 305*cdf0e10cSrcweir { 306*cdf0e10cSrcweir # Two currencies can have the same symbol (e.g. az_AZ.xml 'man.' 307*cdf0e10cSrcweir # for AZM and AZN), continue to lookup if the match isn't the 308*cdf0e10cSrcweir # compatible one. 309*cdf0e10cSrcweir if ( SymbolCompati[nSym] ) 310*cdf0e10cSrcweir return 2 311*cdf0e10cSrcweir else 312*cdf0e10cSrcweir state = 1 313*cdf0e10cSrcweir } 314*cdf0e10cSrcweir } 315*cdf0e10cSrcweir return state 316*cdf0e10cSrcweir} 317*cdf0e10cSrcweir 318*cdf0e10cSrcweir# vim: ts=4 sw=4 expandtab 319