Skip to content

Commit

Permalink
Merge pull request #233 from gocardless/joesouthan/iban-registry
Browse files Browse the repository at this point in the history
Update the IBAN registry and add new countries
  • Loading branch information
JoeSouthan authored Mar 28, 2023
2 parents f3c59e1 + a4386a1 commit 5564f04
Show file tree
Hide file tree
Showing 11 changed files with 2,083 additions and 492 deletions.
2 changes: 1 addition & 1 deletion .rubocop.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ inherit_gem:
require: rubocop-rails

AllCops:
TargetRubyVersion: 2.5
TargetRubyVersion: 3.2

# Limit lines to 90 characters.
Layout/LineLength:
Expand Down
335 changes: 221 additions & 114 deletions bin/build_structure_file.rb
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
#!/usr/bin/env ruby
# frozen_string_literal: true

# rubocop:disable Layout/LineLength

# Script for parsing the IBAN registry (IBAN_Registry.txt) and IBAN structures
# (IBANSTRUCTURE.xml) files from SWIFT.
require "csv"
Expand All @@ -22,148 +24,253 @@ class Country

class Report
include SAXMachine
elements "ibanstructure", as: :countries, class: Country
elements "ibanstructure_v2", as: :countries, class: Country
end

# rubocop:disable Metrics/AbcSize
def get_iban_structures(iban_structures_file, iban_registry_file)
bban_formats = get_bban_formats(iban_registry_file)

report = Report.parse(iban_structures_file)
report.countries.each_with_object({}) do |country, hash|
hash[country.country_code] = {
bank_code_position: country.bank_code_position.to_i,
bank_code_length: country.bank_code_length.to_i,
branch_code_position: country.branch_code_position.to_i,
branch_code_length: country.branch_code_length.to_i,
account_number_position: country.account_number_position.to_i,
account_number_length: country.account_number_length.to_i,
total_length: country.total_length.to_i,
national_id_length: country.national_id_length.to_i,
}.merge(bban_formats[country.country_code])
class IbanRegistryTextFile
attr_accessor :lines, :registry

FILE_ELEMENTS = [
# 0 Data element
# 1 Name of country
# 2 IBAN prefix country code (ISO 3166)
COUNTRY_CODE = 2,
# 3 Country code includes other countries/territories
# 4 SEPA country
# 5 SEPA country also includes
# 6 Domestic account number example
DOMESTIC_ACCOUNT_NUMBER_EXAMPLE = 6,
# 7 BBAN
# 8 BBAN structure
BBAN_STRUCTURE = 8,
# 9 BBAN length
# 10 Bank identifier position within the BBAN
BANK_IDENTIFIER_POSITION = 10,
# 11 Bank identifier pattern
BANK_IDENTIFIER_PATTERN = 11,
# 12 Branch identifier position within the BBAN
BRANCH_IDENTIFIER_POSITION = 12,
# 13 Branch identifier pattern
BRANCH_IDENTIFIER_PATTERN = 13,
# 14 Bank identifier example
# 15 Branch identifier example
# 16 BBAN example
BBAN_EXAMPLE = 16,
# 17 IBAN
# 18 IBAN structure
# 19 IBAN length
# 20 Effective date
# 21 IBAN electronic format example
IBAN_EXAMPLE = 21,
].freeze

def self.call(path = "../data/raw/IBAN_Registry.txt")
lines = CSV.read(
File.expand_path(path, __dir__),
col_sep: "\t",
headers: true,
encoding: Encoding::ISO_8859_1,
).to_a.transpose.tap(&:shift)

new(lines).tap(&:parse)
end
end
# rubocop:enable Metrics/AbcSize

FILE_ELEMENTS = [
# 0 Data element
# 1 Name of country
# 2 IBAN prefix country code (ISO 3166)
COUNTRY_CODE = 2,
# 3 Country code includes other countries/territories
# 4 SEPA country
# 5 SEPA country also includes
# 6 Domestic account number example
# 7 BBAN
# 8 BBAN structure
BBAN_STRUCTURE = 8,
# 9 BBAN length
# 10 Bank identifier position within the BBAN
# 11 Bank identifier pattern
BANK_IDENTIFIER_PATTERN = 11,
# 12 Branch identifier position within the BBAN
# 13 Branch identifier pattern
BRANCH_IDENTIFIER_PATTERN = 13,
# 14 Bank identifier example
# 15 Branch identifier example
# 16 BBAN example
# 17 IBAN
# 18 IBAN structure
# 19 IBAN length
# 20 Effective date
# 21 IBAN electronic format example
].freeze

def get_bban_formats(iban_registry_file)
iban_registry_file.each_with_object({}) do |line, hash|
bban_structure = line[BBAN_STRUCTURE].strip

bank_code_structure = line[BANK_IDENTIFIER_PATTERN].strip
branch_code_structure = line[BRANCH_IDENTIFIER_PATTERN]&.strip

bank_code_structure = "" if bank_code_structure == "N/A"

country_code = line[COUNTRY_CODE].strip
hash[country_code] = convert_swift_convention(bban_structure,
bank_code_structure,
branch_code_structure)

def initialize(lines)
@lines = lines
@registry = {}
end
end

# IBAN Registry has BBAN format (which seems to be accurate), and Bank
# identifier length, which contains something roughly like the format for the
# bank code and usually the branch code where applicable. This is a best attempt
# to convert those from weird SWIFT-talk into regexes, and then work out the
# account number format regex by taking the bank and branch code regexes off
# the front of the BBAN format.
#
# This works about 70% of the time, the rest are overridden in
# structure_additions.yml
def convert_swift_convention(bban, bank, branch)
bban_regex = iban_registry_to_regex(bban)
bank_regex = iban_registry_to_regex(bank)
branch_regex = branch.nil? ? nil : iban_registry_to_regex(branch)

non_account_number_regex = [bank_regex, branch_regex].join
account_number_start = (bban_regex.index(non_account_number_regex) || 0) +
non_account_number_regex.length
account_number_regex = bban_regex[account_number_start..-1]

{
bban_format: bban_regex,
bank_code_format: bank_regex,
branch_code_format: branch_regex,
account_number_format: account_number_regex,
}.compact
def parse
lines.each do |line|
country_code = clean_string(line[COUNTRY_CODE])

bban_details = convert_swift_convention(
country_code: country_code,
bban_structure: clean_string(line[BBAN_STRUCTURE]),
bank_code_structure: clean_string(line[BANK_IDENTIFIER_PATTERN]),
branch_code_structure: clean_string(line[BRANCH_IDENTIFIER_PATTERN]),
bank_identifier_position: clean_string(line[BANK_IDENTIFIER_POSITION]),
branch_identifier_position: clean_string(line[BRANCH_IDENTIFIER_POSITION]),
) || {}

registry[country_code] = {
iban_example: clean_string(line[IBAN_EXAMPLE]),
bban_example: clean_string(line[BBAN_EXAMPLE]),
domestic_account_number_example: clean_string(line[DOMESTIC_ACCOUNT_NUMBER_EXAMPLE]),
**bban_details,
}.compact
end
end

private

def clean_string(string)
return nil if string.nil?

string.strip!
return nil if string == "N/A"

string
end

# IBAN Registry has BBAN format (which seems to be accurate), and Bank
# identifier length, which contains something roughly like the format for the
# bank code and usually the branch code where applicable. This is a best attempt
# to convert those from weird SWIFT-talk into regexes, and then work out the
# account number format regex by taking the bank and branch code regexes off
# the front of the BBAN format.
#
# This works about 90% of the time, the rest are overridden in
# structure_additions.yml
def convert_swift_convention( # rubocop:todo Metrics/AbcSize
country_code:,
bban_structure:,
branch_code_structure:,
bank_code_structure: nil,
bank_identifier_position: nil,
branch_identifier_position: nil
)
bban_regex = iban_registry_to_regex(bban_structure)
bank_regex = iban_registry_to_regex(bank_code_structure)
branch_regex = branch_code_structure.nil? ? nil : iban_registry_to_regex(branch_code_structure)

bban_ranges = create_bban_ranges(bban_structure)
ranges_to_remove = [
convert_string_range(bank_identifier_position),
convert_string_range(branch_identifier_position),
].compact.uniq
max_bank_details_index = ranges_to_remove.map(&:last).max

_, non_bank_identifier_ranges = bban_ranges.partition do |_, range|
max_bank_details_index >= range.last
end

account_number_regex = iban_registry_to_regex(non_bank_identifier_ranges.map(&:first).join)

{
bban_format: bban_regex.source,
bank_code_format: bank_regex.source,
branch_code_format: branch_regex&.source,
account_number_format: account_number_regex.source,
}
rescue StandardError => e
puts "-----------------"
puts "Issue with: #{country_code}"
puts "\t #{e.message}"
puts "\t #{e.backtrace}"
puts "\t -----------------"
puts "\t country_code: #{country_code}"
puts "\t bban_structure: #{bban_structure}"
puts "\t branch_code_structure: #{branch_code_structure}"
puts "\t bank_code_structure: #{bank_code_structure}"
puts "\t bank_identifier_position: #{bank_identifier_position}"
puts "\t branch_identifier_position: #{branch_identifier_position}"
end

# Given "4!n4!n12!c" this returns an array that contains the ranges that cover the
# structure. Eg; [["4!n", 0..3]]
def create_bban_ranges(bban_structure)
arr = bban_structure.scan(/((\d+)![anc])/)

start = 0

arr.each_with_object([]) do |(structure, length), acc|
end_number = start + length.to_i - 1
acc.push([structure, start..end_number])
start = end_number + 1
end
end

def convert_string_range(str)
start_val, end_val = str.split("-").map(&:to_i)
(start_val - 1)..(end_val - 1)
rescue StandardError
nil
end

def iban_registry_to_regex(swift_string)
regex = swift_string.
gsub(/(\d+)!n/, '\\d{\1}').
gsub(/(\d+)!a/, '[A-Z]{\1}').
gsub(/(\d+)!c/, '[A-Z0-9]{\1}')
Regexp.new(regex)
end
end

def iban_registry_to_regex(swift_string)
swift_string.gsub(/(\d+)!([nac])/, '\2{\1}').
gsub("n", '\d').
gsub("a", "[A-Z]").
gsub("c", "[A-Z0-9]")
class IbanStructureFile
attr_accessor :report, :iban_registry_file

def self.call(iban_registry_file, path: "../data/raw/IBANSTRUCTURE.xml")
iban_structures_file = File.read(File.expand_path(path, __dir__))
new(iban_registry_file:, iban_structures_file:).parse
end

def initialize(iban_registry_file:, iban_structures_file:)
@iban_registry_file = iban_registry_file
@report = Report.parse(iban_structures_file)
end

def parse # rubocop:todo Metrics/AbcSize
report.countries.each_with_object({}) do |country, hash|
country_bban = iban_registry_file.registry[country.country_code] || {}

hash[country.country_code] = {
bank_code_position: country.bank_code_position.to_i,
bank_code_length: country.bank_code_length.to_i,
branch_code_position: country.branch_code_position.to_i,
branch_code_length: country.branch_code_length.to_i,
account_number_position: country.account_number_position.to_i,
account_number_length: country.account_number_length.to_i,
total_length: country.total_length.to_i,
national_id_length: country.national_id_length.to_i,
**country_bban,
}
end
end
end

def merge_structures(structures, additions)
additions.each_pair do |key, value|
structures[key].merge!(value) if structures.include?(key)
structures[key].merge!(value).compact! if structures.include?(key)
end

structures
end

def load_yaml_file(path)
YAML.safe_load(
File.read(File.expand_path(path, __dir__)),
permitted_classes: [Range, Symbol, Regexp],
)
end

# Only parse the files if this file is run as an executable (not required in,
# as it is in the specs)
if __FILE__ == $PROGRAM_NAME
iban_registry_file = CSV.read(
File.expand_path("../data/raw/IBAN_Registry.txt", __dir__),
col_sep: "\t",
headers: true,
encoding: Encoding::ISO_8859_1,
).to_a.transpose

iban_registry_file.shift

iban_structures_file = File.read(
File.expand_path("../data/raw/IBANSTRUCTURE.xml", __dir__),
)
old_file = load_yaml_file("../data/structures.yml")

iban_structures = get_iban_structures(
iban_structures_file,
iban_registry_file,
)
iban_registry_file = IbanRegistryTextFile.call
iban_structures = IbanStructureFile.call(iban_registry_file)

structure_additions = YAML.safe_load(
File.read(File.expand_path("../data/raw/structure_additions.yml", __dir__)),
permitted_classes: [Range, Symbol],
)
structure_additions = load_yaml_file("../data/raw/structure_additions.yml")

complete_structures = merge_structures(iban_structures, structure_additions)
pseudo_ibans = load_yaml_file("../data/raw/pseudo_ibans.yml")

complete_structures.merge!(pseudo_ibans)

output_file_path = File.expand_path(
"../data/structures.yml",
__dir__,
)

File.open(output_file_path, "w") { |f| f.write(complete_structures.to_yaml) }

new_countries = old_file.keys.to_set ^ complete_structures.keys.to_set
puts "New countries:"
new_countries.each do |country|
puts "#{country} #{complete_structures[country][:iban_example]} #{complete_structures[country][:domestic_account_number_example]}"
end
end

# rubocop:enable Layout/LineLength
Loading

0 comments on commit 5564f04

Please sign in to comment.