1cdf0e10cSrcweir#!/usr/bin/gawk -f 2*5b501c92SAndrew Rist# ************************************************************* 3*5b501c92SAndrew Rist# 4*5b501c92SAndrew Rist# Licensed to the Apache Software Foundation (ASF) under one 5*5b501c92SAndrew Rist# or more contributor license agreements. See the NOTICE file 6*5b501c92SAndrew Rist# distributed with this work for additional information 7*5b501c92SAndrew Rist# regarding copyright ownership. The ASF licenses this file 8*5b501c92SAndrew Rist# to you under the Apache License, Version 2.0 (the 9*5b501c92SAndrew Rist# "License"); you may not use this file except in compliance 10*5b501c92SAndrew Rist# with the License. You may obtain a copy of the License at 11*5b501c92SAndrew Rist# 12*5b501c92SAndrew Rist# http://www.apache.org/licenses/LICENSE-2.0 13*5b501c92SAndrew Rist# 14*5b501c92SAndrew Rist# Unless required by applicable law or agreed to in writing, 15*5b501c92SAndrew Rist# software distributed under the License is distributed on an 16*5b501c92SAndrew Rist# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 17*5b501c92SAndrew Rist# KIND, either express or implied. See the License for the 18*5b501c92SAndrew Rist# specific language governing permissions and limitations 19*5b501c92SAndrew Rist# under the License. 20*5b501c92SAndrew Rist# 21*5b501c92SAndrew Rist# ************************************************************* 22cdf0e10cSrcweir# Usage: gawk -f currency-check.awk *.xml 23cdf0e10cSrcweir# Check any 24cdf0e10cSrcweir# <FormatCode>...[$xxx-...]...</FormatCode> 25cdf0e10cSrcweir# against every 26cdf0e10cSrcweir# <CurrencySymbol>xxx</CurrencySymbol> 27cdf0e10cSrcweir# definition of the same XML file and output symbols if no match was found. 28cdf0e10cSrcweir# For formatindex="12" to formatindex="15" and for formatindex="17" it is 29cdf0e10cSrcweir# checked if the used currency symbol is the usedInCompatibleFormatCodes 30cdf0e10cSrcweir# currency symbol as it is needed by the number formatter. 31cdf0e10cSrcweir# Also generates output if the generic currency symbol (UTF8 string 0xC2A4) 32cdf0e10cSrcweir# is used instead of a real currency symbol. 33cdf0e10cSrcweir# Author: Eike Rathke <er@openoffice.org> 34cdf0e10cSrcweir 35cdf0e10cSrcweirBEGIN { 36cdf0e10cSrcweir file = "" 37cdf0e10cSrcweir} 38cdf0e10cSrcweir 39cdf0e10cSrcweir 40cdf0e10cSrcweirfile != FILENAME { 41cdf0e10cSrcweir if ( file ) 42cdf0e10cSrcweir checkIt() 43cdf0e10cSrcweir file = FILENAME 44cdf0e10cSrcweir line = 0 45cdf0e10cSrcweir nFormats = 0 46cdf0e10cSrcweir nCurrencies = 0 47cdf0e10cSrcweir bFormatAuto = 0 48cdf0e10cSrcweir sReplaceFrom = "" 49cdf0e10cSrcweir sReplaceTo = "" 50cdf0e10cSrcweir sMatchReplace = "" 51cdf0e10cSrcweir sRefCurrencyFromLocale = "" 52cdf0e10cSrcweir crlf = 0 53cdf0e10cSrcweir} 54cdf0e10cSrcweir 55cdf0e10cSrcweir{ 56cdf0e10cSrcweir ++line 57cdf0e10cSrcweir # If run under Unix a CrLf spoils ...$ line end checks. DOS line endings 58cdf0e10cSrcweir # are boo anyways. 59cdf0e10cSrcweir if ( /\x0D$/ ) 60cdf0e10cSrcweir { 61cdf0e10cSrcweir print "Error: not Unix line ending in line " line 62cdf0e10cSrcweir crlf = 1 63cdf0e10cSrcweir exit(1) 64cdf0e10cSrcweir } 65cdf0e10cSrcweir if ( $1 ~ /^<LC_FORMAT(>|$)/ ) 66cdf0e10cSrcweir { 67cdf0e10cSrcweir if ( $0 ~ /replaceFrom="\[CURRENCY\]"/ ) 68cdf0e10cSrcweir { 69cdf0e10cSrcweir sReplaceFrom = "\\[CURRENCY\\]" 70cdf0e10cSrcweir sMatchReplace = "^<FormatCode>.*" sReplaceFrom 71cdf0e10cSrcweir } 72cdf0e10cSrcweir for ( j=2; j<=NF; ++j ) 73cdf0e10cSrcweir { 74cdf0e10cSrcweir if ( $j ~ /^replaceTo="/ ) 75cdf0e10cSrcweir { 76cdf0e10cSrcweir l = 12 77cdf0e10cSrcweir if ( $j ~ />$/ ) 78cdf0e10cSrcweir ++l 79cdf0e10cSrcweir if ( $j ~ /\/>$/ ) 80cdf0e10cSrcweir ++l 81cdf0e10cSrcweir sReplaceTo = substr( $j, 12, length($j)-l ) 82cdf0e10cSrcweir } 83cdf0e10cSrcweir } 84cdf0e10cSrcweir } 85cdf0e10cSrcweir else if ( $1 ~ /^<FormatElement(>|$)/ ) 86cdf0e10cSrcweir { 87cdf0e10cSrcweir if ( $0 ~ /usage="CURRENCY"/ ) 88cdf0e10cSrcweir { 89cdf0e10cSrcweir if ( $0 ~ /formatindex="1[23457]"/ ) 90cdf0e10cSrcweir bFormatAuto = 1 91cdf0e10cSrcweir else 92cdf0e10cSrcweir bFormatAuto = 0 93cdf0e10cSrcweir } 94cdf0e10cSrcweir } 95cdf0e10cSrcweir else if ( $0 ~ /^[[:blank:]]*<FormatCode>.*\[\$.*-[0-9a-fA-F]+\]/ || 96cdf0e10cSrcweir (sMatchReplace && $0 ~ sMatchReplace ) ) 97cdf0e10cSrcweir { 98cdf0e10cSrcweir if ( sReplaceFrom ) 99cdf0e10cSrcweir gsub( sReplaceFrom, sReplaceTo ) 100cdf0e10cSrcweir split( $0, arr, /<|>/ ) 101cdf0e10cSrcweir split( arr[3], code, /(\[\$)|(-[0-9a-fA-F]+\])/ ) 102cdf0e10cSrcweir for ( j in code ) 103cdf0e10cSrcweir { 104cdf0e10cSrcweir if ( code[j] && code[j] !~ /\#|0|\[NatNum/ ) 105cdf0e10cSrcweir { 106cdf0e10cSrcweir FormatLine[nFormats] = file " line " line 107cdf0e10cSrcweir FormatAuto[nFormats] = bFormatAuto 108cdf0e10cSrcweir Formats[nFormats++] = code[j] 109cdf0e10cSrcweir } 110cdf0e10cSrcweir } 111cdf0e10cSrcweir bFormatAuto = 0 112cdf0e10cSrcweir } 113cdf0e10cSrcweir else if ( $1 ~ /^<LC_CURRENCY(>|$)/ ) 114cdf0e10cSrcweir { 115cdf0e10cSrcweir for ( j=2; j<=NF; ++j ) 116cdf0e10cSrcweir { 117cdf0e10cSrcweir if ( $j ~ /^ref="/ ) 118cdf0e10cSrcweir { 119cdf0e10cSrcweir l = 6 120cdf0e10cSrcweir if ( $j ~ />$/ ) 121cdf0e10cSrcweir ++l 122cdf0e10cSrcweir if ( $j ~ /\/>$/ ) 123cdf0e10cSrcweir ++l 124cdf0e10cSrcweir locale = substr( $j, 6, length($j)-l ) 125cdf0e10cSrcweir sRefCurrencyFromLocale = file 126cdf0e10cSrcweir oldfile = file 127cdf0e10cSrcweir oldline = line 128cdf0e10cSrcweir file = locale ".xml" 129cdf0e10cSrcweir line = 0 130cdf0e10cSrcweir while ( (getline <file) > 0 ) 131cdf0e10cSrcweir { 132cdf0e10cSrcweir ++line 133cdf0e10cSrcweir getCurrencyParams() 134cdf0e10cSrcweir } 135cdf0e10cSrcweir close( file ) 136cdf0e10cSrcweir if ( !line ) 137cdf0e10cSrcweir print "ref locale not available: " file \ 138cdf0e10cSrcweir " (from " oldfile " line " oldline ")" 139cdf0e10cSrcweir file = oldfile 140cdf0e10cSrcweir line = oldline 141cdf0e10cSrcweir sRefCurrencyFromLocale = "" 142cdf0e10cSrcweir } 143cdf0e10cSrcweir } 144cdf0e10cSrcweir } 145cdf0e10cSrcweir else 146cdf0e10cSrcweir getCurrencyParams() 147cdf0e10cSrcweir} 148cdf0e10cSrcweir 149cdf0e10cSrcweir 150cdf0e10cSrcweirEND { 151cdf0e10cSrcweir if ( file && !crlf ) 152cdf0e10cSrcweir checkIt() 153cdf0e10cSrcweir} 154cdf0e10cSrcweir 155cdf0e10cSrcweir 156cdf0e10cSrcweirfunction getCurrencyParams() { 157cdf0e10cSrcweir # Assumes that each element is on a line on its own! 158cdf0e10cSrcweir if ( $1 ~ /^<Currency(>|$)/ ) 159cdf0e10cSrcweir { 160cdf0e10cSrcweir if ( $0 ~ /default="true"/ ) 161cdf0e10cSrcweir SymbolDefault[nCurrencies] = 1 162cdf0e10cSrcweir else 163cdf0e10cSrcweir SymbolDefault[nCurrencies] = 0 164cdf0e10cSrcweir if ( $0 ~ /usedInCompatibleFormatCodes="true"/ ) 165cdf0e10cSrcweir SymbolCompati[nCurrencies] = 1 166cdf0e10cSrcweir else 167cdf0e10cSrcweir SymbolCompati[nCurrencies] = 0 168cdf0e10cSrcweir } 169cdf0e10cSrcweir else if ( $0 ~ /^[[:blank:]]*<CurrencyID>/ ) 170cdf0e10cSrcweir { 171cdf0e10cSrcweir split( $0, arr, /<|>/ ) 172cdf0e10cSrcweir if ( sRefCurrencyFromLocale ) 173cdf0e10cSrcweir IDLine[nCurrencies] = file " line " line \ 174cdf0e10cSrcweir " (referenced from " sRefCurrencyFromLocale ")" 175cdf0e10cSrcweir else 176cdf0e10cSrcweir IDLine[nCurrencies] = file " line " line 177cdf0e10cSrcweir IDs[nCurrencies] = arr[3] 178cdf0e10cSrcweir } 179cdf0e10cSrcweir else if ( $0 ~ /^[[:blank:]]*<CurrencySymbol>/ ) 180cdf0e10cSrcweir { 181cdf0e10cSrcweir split( $0, arr, /<|>/ ) 182cdf0e10cSrcweir if ( sRefCurrencyFromLocale ) 183cdf0e10cSrcweir SymbolLine[nCurrencies] = file " line " line \ 184cdf0e10cSrcweir " (referenced from " sRefCurrencyFromLocale ")" 185cdf0e10cSrcweir else 186cdf0e10cSrcweir SymbolLine[nCurrencies] = file " line " line 187cdf0e10cSrcweir Symbols[nCurrencies] = arr[3] 188cdf0e10cSrcweir } 189cdf0e10cSrcweir else if ( $0 ~ /^[[:blank:]]*<BankSymbol>/ ) 190cdf0e10cSrcweir { 191cdf0e10cSrcweir split( $0, arr, /<|>/ ) 192cdf0e10cSrcweir if ( sRefCurrencyFromLocale ) 193cdf0e10cSrcweir BankSymbolLine[nCurrencies] = file " line " line \ 194cdf0e10cSrcweir " (referenced from " sRefCurrencyFromLocale ")" 195cdf0e10cSrcweir else 196cdf0e10cSrcweir BankSymbolLine[nCurrencies] = file " line " line 197cdf0e10cSrcweir BankSymbols[nCurrencies] = arr[3] 198cdf0e10cSrcweir } 199cdf0e10cSrcweir else if ( $1 ~ /^<\/Currency>/ ) 200cdf0e10cSrcweir { 201cdf0e10cSrcweir ++nCurrencies 202cdf0e10cSrcweir } 203cdf0e10cSrcweir} 204cdf0e10cSrcweir 205cdf0e10cSrcweir 206cdf0e10cSrcweirfunction checkIt() { 207cdf0e10cSrcweir bad = 0 208cdf0e10cSrcweir for ( j=0; j<nFormats; ++j ) 209cdf0e10cSrcweir { 210cdf0e10cSrcweir state = FormatInSymbol( Formats[j] ) 211cdf0e10cSrcweir if ( Formats[j] == "\xc2\xa4" ) 212cdf0e10cSrcweir { 213cdf0e10cSrcweir bad = 1 214cdf0e10cSrcweir print " bad: `" Formats[j] "' (" FormatLine[j] ")" 215cdf0e10cSrcweir } 216cdf0e10cSrcweir else if ( state == 0 ) 217cdf0e10cSrcweir { 218cdf0e10cSrcweir bad = 1 219cdf0e10cSrcweir print "unknown: `" Formats[j] "' (" FormatLine[j] ")" 220cdf0e10cSrcweir } 221cdf0e10cSrcweir else if ( FormatAuto[j] && state < 2 ) 222cdf0e10cSrcweir { 223cdf0e10cSrcweir bad = 1 224cdf0e10cSrcweir print "badauto: `" Formats[j] "' (" FormatLine[j] ")" 225cdf0e10cSrcweir } 226cdf0e10cSrcweir } 227cdf0e10cSrcweir if ( bad ) 228cdf0e10cSrcweir { 229cdf0e10cSrcweir for ( j=0; j<nCurrencies; ++j ) 230cdf0e10cSrcweir { 231cdf0e10cSrcweir bDef = 0 232cdf0e10cSrcweir if ( Symbols[j] == "\xc2\xa4" ) 233cdf0e10cSrcweir print "def bad: `" Symbols[j] "' (" SymbolLine[j] ")" 234cdf0e10cSrcweir if ( SymbolDefault[j] ) 235cdf0e10cSrcweir { 236cdf0e10cSrcweir bDef = 1 237cdf0e10cSrcweir print "default: `" Symbols[j] "' (" SymbolLine[j] ")" 238cdf0e10cSrcweir } 239cdf0e10cSrcweir if ( SymbolCompati[j] ) 240cdf0e10cSrcweir { 241cdf0e10cSrcweir bDef = 1 242cdf0e10cSrcweir print "compati: `" Symbols[j] "' (" SymbolLine[j] ")" 243cdf0e10cSrcweir } 244cdf0e10cSrcweir if ( !bDef ) 245cdf0e10cSrcweir print "defined: `" Symbols[j] "' (" SymbolLine[j] ")" 246cdf0e10cSrcweir } 247cdf0e10cSrcweir } 248cdf0e10cSrcweir else 249cdf0e10cSrcweir { 250cdf0e10cSrcweir bHasDefault = 0 251cdf0e10cSrcweir bHasCompati = 0 252cdf0e10cSrcweir for ( j=0; j<nCurrencies; ++j ) 253cdf0e10cSrcweir { 254cdf0e10cSrcweir if ( Symbols[j] == "\xc2\xa4" ) 255cdf0e10cSrcweir { 256cdf0e10cSrcweir bad = 1 257cdf0e10cSrcweir print "def bad: `" Symbols[j] "' (" SymbolLine[j] ")" 258cdf0e10cSrcweir } 259cdf0e10cSrcweir if ( SymbolDefault[j] ) 260cdf0e10cSrcweir { 261cdf0e10cSrcweir if ( !bHasDefault ) 262cdf0e10cSrcweir bHasDefault = 1 263cdf0e10cSrcweir else 264cdf0e10cSrcweir { 265cdf0e10cSrcweir bad = 1 266cdf0e10cSrcweir print "dupe default: `" Symbols[j] "' (" SymbolLine[j] ")" 267cdf0e10cSrcweir } 268cdf0e10cSrcweir } 269cdf0e10cSrcweir if ( SymbolCompati[j] ) 270cdf0e10cSrcweir { 271cdf0e10cSrcweir if ( !bHasCompati ) 272cdf0e10cSrcweir bHasCompati = 1 273cdf0e10cSrcweir else 274cdf0e10cSrcweir { 275cdf0e10cSrcweir bad = 1 276cdf0e10cSrcweir print "dupe compati: `" Symbols[j] "' (" SymbolLine[j] ")" 277cdf0e10cSrcweir } 278cdf0e10cSrcweir } 279cdf0e10cSrcweir } 280cdf0e10cSrcweir if ( !bHasDefault ) 281cdf0e10cSrcweir { 282cdf0e10cSrcweir bad = 1 283cdf0e10cSrcweir print " no default: (" file ")" 284cdf0e10cSrcweir } 285cdf0e10cSrcweir if ( !bHasCompati ) 286cdf0e10cSrcweir { 287cdf0e10cSrcweir bad = 1 288cdf0e10cSrcweir print " no compati: (" file ")" 289cdf0e10cSrcweir } 290cdf0e10cSrcweir } 291cdf0e10cSrcweir for ( j=0; j<nCurrencies; ++j ) 292cdf0e10cSrcweir { 293cdf0e10cSrcweir # Check if CurrencyID at least resembles some ISO 4217 code. 294cdf0e10cSrcweir # The only exception is zh_MO that had an erroneous original data set 295cdf0e10cSrcweir # with BankSymbol="P" (stored as ISO code in documents, hence copied to 296cdf0e10cSrcweir # CurrencyID now) and needs that entry for legacy documents. 297cdf0e10cSrcweir # There is a strange bug in gawk 3.1.4 that does a match of [A-Z] on 298cdf0e10cSrcweir # lower case except 'a', regardless of IGNORECASE setting, hence this 299cdf0e10cSrcweir # ugly notation. [[:upper:]] wouldn't be correct since we want only 300cdf0e10cSrcweir # ASCII to match. 301cdf0e10cSrcweir if ( IDs[j] !~ /^[ABCDEFGHIJKLMNOPQRSTUVWXYZ][ABCDEFGHIJKLMNOPQRSTUVWXYZ][ABCDEFGHIJKLMNOPQRSTUVWXYZ]$/ \ 302cdf0e10cSrcweir && !(file == "zh_MO.xml" && IDs[j] == "P") ) 303cdf0e10cSrcweir { 304cdf0e10cSrcweir bad = 1 305cdf0e10cSrcweir print "no ISO 4217 code: `" IDs[j] "' (" IDLine[j] ")" 306cdf0e10cSrcweir } 307cdf0e10cSrcweir # CurrencyID should equal BankSymbol for now. 308cdf0e10cSrcweir if ( IDs[j] != BankSymbols[j] ) 309cdf0e10cSrcweir { 310cdf0e10cSrcweir bad = 1 311cdf0e10cSrcweir print "not equal: CurrencyID `" IDs[j] "' != BankSymbol `" BankSymbols[j] \ 312cdf0e10cSrcweir "' (" IDLine[j] " and " BankSymbolLine[j] ")" 313cdf0e10cSrcweir } 314cdf0e10cSrcweir } 315cdf0e10cSrcweir if ( bad ) 316cdf0e10cSrcweir print "" 317cdf0e10cSrcweir} 318cdf0e10cSrcweir 319cdf0e10cSrcweir 320cdf0e10cSrcweirfunction FormatInSymbol( format ) { 321cdf0e10cSrcweir state = 0 322cdf0e10cSrcweir for ( nSym=0; nSym<nCurrencies; ++nSym ) 323cdf0e10cSrcweir { 324cdf0e10cSrcweir if ( format == Symbols[nSym] ) 325cdf0e10cSrcweir { 326cdf0e10cSrcweir # Two currencies can have the same symbol (e.g. az_AZ.xml 'man.' 327cdf0e10cSrcweir # for AZM and AZN), continue to lookup if the match isn't the 328cdf0e10cSrcweir # compatible one. 329cdf0e10cSrcweir if ( SymbolCompati[nSym] ) 330cdf0e10cSrcweir return 2 331cdf0e10cSrcweir else 332cdf0e10cSrcweir state = 1 333cdf0e10cSrcweir } 334cdf0e10cSrcweir } 335cdf0e10cSrcweir return state 336cdf0e10cSrcweir} 337cdf0e10cSrcweir 338cdf0e10cSrcweir# vim: ts=4 sw=4 expandtab 339