diff --git a/.github/workflows/ruby.yml b/.github/workflows/ruby.yml index 91f3d244d..9eadec655 100644 --- a/.github/workflows/ruby.yml +++ b/.github/workflows/ruby.yml @@ -32,3 +32,5 @@ jobs: bundler-cache: true # runs 'bundle install' and caches installed gems automatically - name: Run tests run: bundle exec rspec spec + - name: Run strict tests + run: MODE=strict bundle exec rspec spec diff --git a/lib/monetize.rb b/lib/monetize.rb index dfedaba7d..361b1bb81 100644 --- a/lib/monetize.rb +++ b/lib/monetize.rb @@ -4,7 +4,8 @@ require 'monetize/core_extensions' require 'monetize/errors' require 'monetize/version' -require 'monetize/parser' +require 'monetize/optimistic_parser' +require 'monetize/strict_parser' require 'monetize/collection' module Monetize @@ -26,6 +27,10 @@ class << self # human text that we're dealing with fractions of cents. attr_accessor :expect_whole_subunits + # Specify which of the previously registered parsers should be used when parsing an input + # unless overriden using the :parser keyword option for the .parse and parse! methods. + attr_accessor :default_parser + def parse(input, currency = Money.default_currency, options = {}) parse! input, currency, options rescue Error @@ -36,7 +41,7 @@ def parse!(input, currency = Money.default_currency, options = {}) return input if input.is_a?(Money) return from_numeric(input, currency) if input.is_a?(Numeric) - parser = Monetize::Parser.new(input, currency, options) + parser = fetch_parser(input, currency, options) amount, currency = parser.parse Money.from_amount(amount, currency) @@ -77,5 +82,29 @@ def extract_cents(input, currency = Money.default_currency) money = parse(input, currency) money.cents if money end + + # Registers a new parser class along with the default options. It can then be used by + # providing a :parser option when parsing an input or by specifying a default parser + # using Monetize.default_parser=. + def register_parser(name, klass, options = {}) + @parsers ||= {} + @parsers[name] = [klass, options] + end + + private + + attr_reader :parsers + + def fetch_parser(input, currency, options) + parser_name = options[:parser] || default_parser + parser_klass, parser_options = parsers.fetch(parser_name) do + raise ArgumentError, "Parser not registered: #{parser_name}" + end + parser_klass.new(input, currency, parser_options.merge(options)) + end end end + +Monetize.register_parser(:optimistic, Monetize::OptimisticParser) +Monetize.register_parser(:strict, Monetize::StrictParser) +Monetize.default_parser = :optimistic diff --git a/lib/monetize/optimistic_parser.rb b/lib/monetize/optimistic_parser.rb new file mode 100644 index 000000000..dd1c41e16 --- /dev/null +++ b/lib/monetize/optimistic_parser.rb @@ -0,0 +1,168 @@ +# encoding: utf-8 + +require 'monetize/parser' + +module Monetize + class OptimisticParser < Parser + MULTIPLIER_REGEXP = Regexp.new(format('^(.*?\d)(%s)\b([^\d]*)$', MULTIPLIER_SUFFIXES.keys.join('|')), 'i') + + DEFAULT_DECIMAL_MARK = '.'.freeze + + def initialize(input, fallback_currency, options) + @input = input.to_s.strip + @fallback_currency = fallback_currency + @options = options + end + + def parse + currency = Money::Currency.wrap(parse_currency) + + multiplier_exp, input = extract_multiplier + + num = input.gsub(/(?:^#{currency.symbol}|[^\d.,'-]+)/, '') + + negative, num = extract_sign(num) + + num.chop! if num =~ /[\.|,]$/ + + major, minor = extract_major_minor(num, currency) + + amount = to_big_decimal([major, minor].join(DEFAULT_DECIMAL_MARK)) + amount = apply_multiplier(multiplier_exp, amount) + amount = apply_sign(negative, amount) + + [amount, currency] + end + + private + + private + + attr_reader :input, :fallback_currency, :options + + def to_big_decimal(value) + BigDecimal(value) + rescue ::ArgumentError => err + fail ParseError, err.message + end + + def parse_currency + computed_currency = nil + computed_currency = input[/[A-Z]{2,3}/] + computed_currency = nil unless CURRENCY_SYMBOLS.value?(computed_currency) + computed_currency ||= compute_currency if assume_from_symbol? + + + computed_currency || fallback_currency || Money.default_currency + end + + def assume_from_symbol? + options.fetch(:assume_from_symbol) { Monetize.assume_from_symbol } + end + + def expect_whole_subunits? + options.fetch(:expect_whole_subunits) { Monetize.expect_whole_subunits } + end + + def apply_multiplier(multiplier_exp, amount) + amount * 10**multiplier_exp + end + + def apply_sign(negative, amount) + negative ? amount * -1 : amount + end + + def compute_currency + match = input.match(currency_symbol_regex) + CURRENCY_SYMBOLS[match.to_s] if match + end + + def extract_major_minor(num, currency) + used_delimiters = num.scan(/[^\d]/).uniq + + case used_delimiters.length + when 0 + [num, 0] + when 2 + thousands_separator, decimal_mark = used_delimiters + split_major_minor(num.gsub(thousands_separator, ''), decimal_mark) + when 1 + extract_major_minor_with_single_delimiter(num, currency, used_delimiters.first) + else + fail ParseError, 'Invalid amount' + end + end + + def minor_has_correct_dp_for_currency_subunit?(minor, currency) + minor.length == currency.subunit_to_unit.to_s.length - 1 + end + + def extract_major_minor_with_single_delimiter(num, currency, delimiter) + if expect_whole_subunits? + possible_major, possible_minor = split_major_minor(num, delimiter) + if minor_has_correct_dp_for_currency_subunit?(possible_minor, currency) + split_major_minor(num, delimiter) + else + extract_major_minor_with_tentative_delimiter(num, delimiter) + end + else + if delimiter == currency.decimal_mark + split_major_minor(num, delimiter) + elsif Monetize.enforce_currency_delimiters && delimiter == currency.thousands_separator + [num.gsub(delimiter, ''), 0] + else + extract_major_minor_with_tentative_delimiter(num, delimiter) + end + end + end + + def extract_major_minor_with_tentative_delimiter(num, delimiter) + if num.scan(delimiter).length > 1 + # Multiple matches; treat as thousands separator + [num.gsub(delimiter, ''), '00'] + else + possible_major, possible_minor = split_major_minor(num, delimiter) + + # Doesn't look like thousands separator + is_decimal_mark = possible_minor.length != 3 || + possible_major.length > 3 || + possible_major.to_i == 0 || + (!expect_whole_subunits? && delimiter == '.') + + if is_decimal_mark + [possible_major, possible_minor] + else + ["#{possible_major}#{possible_minor}", '00'] + end + end + end + + def extract_multiplier + if (matches = MULTIPLIER_REGEXP.match(input)) + multiplier_suffix = matches[2].upcase + [MULTIPLIER_SUFFIXES[multiplier_suffix], "#{$1}#{$3}"] + else + [0, input] + end + end + + def extract_sign(input) + result = (input =~ /^-+(.*)$/ || input =~ /^(.*)-+$/) ? [true, $1] : [false, input] + fail ParseError, 'Invalid amount (hyphen)' if result[1].include?('-') + result + end + + def regex_safe_symbols + CURRENCY_SYMBOLS.keys.map { |key| Regexp.escape(key) }.join('|') + end + + def split_major_minor(num, delimiter) + major, minor = num.split(delimiter) + [major, minor || '00'] + end + + def currency_symbol_regex + /(? 3, 'M' => 6, 'B' => 9, 'T' => 12 } MULTIPLIER_SUFFIXES.default = 0 - MULTIPLIER_REGEXP = Regexp.new(format('^(.*?\d)(%s)\b([^\d]*)$', MULTIPLIER_SUFFIXES.keys.join('|')), 'i') - DEFAULT_DECIMAL_MARK = '.'.freeze - - def initialize(input, fallback_currency = Money.default_currency, options = {}) - @input = input.to_s.strip - @fallback_currency = fallback_currency - @options = options + def initialize(input, fallback_currency, options) + raise NotImplementedError, 'Monetize::Parser subclasses must implement #initialize' end def parse - currency = Money::Currency.wrap(parse_currency) - - multiplier_exp, input = extract_multiplier - - num = input.gsub(/(?:^#{currency.symbol}|[^\d.,'-]+)/, '') - - negative, num = extract_sign(num) - - num.chop! if num =~ /[\.|,]$/ - - major, minor = extract_major_minor(num, currency) - - amount = to_big_decimal([major, minor].join(DEFAULT_DECIMAL_MARK)) - amount = apply_multiplier(multiplier_exp, amount) - amount = apply_sign(negative, amount) - - [amount, currency] - end - - private - - def to_big_decimal(value) - BigDecimal(value) - rescue ::ArgumentError => err - fail ParseError, err.message - end - - attr_reader :input, :fallback_currency, :options - - def parse_currency - computed_currency = nil - computed_currency = input[/[A-Z]{2,3}/] - computed_currency = nil unless Monetize::Parser::CURRENCY_SYMBOLS.value?(computed_currency) - computed_currency ||= compute_currency if assume_from_symbol? - - - computed_currency || fallback_currency || Money.default_currency - end - - def assume_from_symbol? - options.fetch(:assume_from_symbol) { Monetize.assume_from_symbol } - end - - def expect_whole_subunits? - options.fetch(:expect_whole_subunits) { Monetize.expect_whole_subunits } - end - - def apply_multiplier(multiplier_exp, amount) - amount * 10**multiplier_exp - end - - def apply_sign(negative, amount) - negative ? amount * -1 : amount - end - - def compute_currency - match = input.match(currency_symbol_regex) - CURRENCY_SYMBOLS[match.to_s] if match - end - - def extract_major_minor(num, currency) - used_delimiters = num.scan(/[^\d]/).uniq - - case used_delimiters.length - when 0 - [num, 0] - when 2 - thousands_separator, decimal_mark = used_delimiters - split_major_minor(num.gsub(thousands_separator, ''), decimal_mark) - when 1 - extract_major_minor_with_single_delimiter(num, currency, used_delimiters.first) - else - fail ParseError, 'Invalid amount' - end - end - - def minor_has_correct_dp_for_currency_subunit?(minor, currency) - minor.length == currency.subunit_to_unit.to_s.length - 1 - end - - def extract_major_minor_with_single_delimiter(num, currency, delimiter) - if expect_whole_subunits? - possible_major, possible_minor = split_major_minor(num, delimiter) - if minor_has_correct_dp_for_currency_subunit?(possible_minor, currency) - split_major_minor(num, delimiter) - else - extract_major_minor_with_tentative_delimiter(num, delimiter) - end - else - if delimiter == currency.decimal_mark - split_major_minor(num, delimiter) - elsif Monetize.enforce_currency_delimiters && delimiter == currency.thousands_separator - [num.gsub(delimiter, ''), 0] - else - extract_major_minor_with_tentative_delimiter(num, delimiter) - end - end - end - - def extract_major_minor_with_tentative_delimiter(num, delimiter) - if num.scan(delimiter).length > 1 - # Multiple matches; treat as thousands separator - [num.gsub(delimiter, ''), '00'] - else - possible_major, possible_minor = split_major_minor(num, delimiter) - - # Doesn't look like thousands separator - is_decimal_mark = possible_minor.length != 3 || - possible_major.length > 3 || - possible_major.to_i == 0 || - (!expect_whole_subunits? && delimiter == '.') - - if is_decimal_mark - [possible_major, possible_minor] - else - ["#{possible_major}#{possible_minor}", '00'] - end - end - end - - def extract_multiplier - if (matches = MULTIPLIER_REGEXP.match(input)) - multiplier_suffix = matches[2].upcase - [MULTIPLIER_SUFFIXES[multiplier_suffix], "#{$1}#{$3}"] - else - [0, input] - end - end - - def extract_sign(input) - result = (input =~ /^-+(.*)$/ || input =~ /^(.*)-+$/) ? [true, $1] : [false, input] - fail ParseError, 'Invalid amount (hyphen)' if result[1].include?('-') - result - end - - def regex_safe_symbols - CURRENCY_SYMBOLS.keys.map { |key| Regexp.escape(key) }.join('|') - end - - def split_major_minor(num, delimiter) - major, minor = num.split(delimiter) - [major, minor || '00'] - end - - def currency_symbol_regex - /(?" - creates a named capture + # "\k" - backreferences a named capture + # "?!" - negative lookahead (next character(-s) can't be the contents of this group) + AMOUNT_REGEXP = %r{ + ^ + (?: # whole units + (?: # try to capture units separated by thousands + \d{1,3} # must start with 3 or less whole numbers + (?:(?#{THOUSAND_SEPARATORS})\d{3})? # first occurance of separated thousands, captures the separator + (?:\k\d{3})* # other iterations with a the same exact separator + ) + |\d+ # fallback to non thousands-separated units + ) + (?: # this group captures subunits + (?!\k) # disallow captured thousands separator as decimals separator + (?#{DECIMAL_MARKS}) # captured decimal separator + \d+ # subunits + )? + $ + }ix.freeze + + def initialize(input, fallback_currency = Money.default_currency, options = {}) + @input = input.to_s + @options = options + @fallback_currency = Money::Currency.wrap(fallback_currency) + # This shouldn't be here, however String#to_money defaults currency to nil. Ideally we want + # the default to always be Money.default_currency unless specified. In that case an explicit + # nil would indicate that the currency must be determined from the input. + @fallback_currency ||= Money.default_currency + end + + def parse + tokens = Tokenizer.new(input, options).process + + unless ALLOWED_FORMATS.include?(tokens.map(&:first)) + raise ParseError, "invalid input - #{tokens.map(&:first)}" + end + + parts = Struct.new(:amount, :sign, :symbol, :currency_iso).new + tokens.each { |token| parts[token.type] = token } + + currency = + if parts.currency_iso + parse_currency_iso(parts.currency_iso.match.to_s) + elsif parts.symbol && assume_from_symbol? + parse_symbol(parts.symbol.match.to_s) + else + fallback_currency + end + + amount = parse_amount(currency, parts.amount.match, parts.sign&.match) + + [amount, currency] + end + + private + + ALLOWED_FORMATS = [ + [:amount], # 9.99 + [:sign, :amount], # -9.99 + [:symbol, :amount], # £9.99 + [:sign, :symbol, :amount], # -£9.99 + [:symbol, :sign, :amount], # £-9.99 + [:symbol, :amount, :sign], # £9.99- + [:amount, :symbol], # 9.99£ + [:sign, :amount, :symbol], # -9.99£ + [:currency_iso, :amount], # GBP 9.99 + [:currency_iso, :sign, :amount], # GBP -9.99 + [:amount, :currency_iso], # 9.99 GBP + [:sign, :amount, :currency_iso], # -9.99 GBP + [:symbol, :amount, :currency_iso], # £9.99 GBP + [:sign, :symbol, :amount, :currency_iso], # -£9.99 GBP + ].freeze + + attr_reader :input, :fallback_currency, :options + + def parse_amount(currency, amount_match, sign) + amount = amount_match[:amount] + multiplier = amount_match[:multiplier] + + matches = amount.match(AMOUNT_REGEXP) + + unless matches + raise ParseError, 'the provided input does not contain a valid amount' + end + + thousands_separator = matches[:ts] + decimal_separator = matches[:ds] + + # A single thousands separator without a decimal separator might be considered a decimal + # separator in some cases (e.g. '1.001 TND' is likely 1.001 and not 1001). Here we need to + # check if the currency allows 3+ subunits. + if thousands_separator && + !decimal_separator && + currency.subunit_to_unit > 100 && + amount.count(thousands_separator) == 1 + _, possible_subunits = amount.split(thousands_separator) + + if possible_subunits.length > 2 + decimal_separator = thousands_separator + thousands_separator = nil + end + end + + amount.gsub!(thousands_separator, '') if thousands_separator + amount.gsub!(decimal_separator, '.') if decimal_separator + amount = amount.to_f + + amount = apply_multiplier(amount, multiplier) + amount = apply_sign(amount, sign.to_s) + + amount + end + + def parse_symbol(symbol) + Money::Currency.wrap(CURRENCY_SYMBOLS[symbol]) + end + + def parse_currency_iso(currency_iso) + Money::Currency.wrap(currency_iso) + end + + def assume_from_symbol? + options.fetch(:assume_from_symbol) { Monetize.assume_from_symbol } + end + + def apply_multiplier(num, multiplier) + return num unless multiplier + + exponent = MULTIPLIER_SUFFIXES[multiplier.to_s.upcase] + num * 10**exponent + end + + def apply_sign(num, sign) + sign == '-' ? num * -1 : num + end + end +end diff --git a/lib/monetize/tokenizer.rb b/lib/monetize/tokenizer.rb new file mode 100644 index 000000000..645f65d0d --- /dev/null +++ b/lib/monetize/tokenizer.rb @@ -0,0 +1,77 @@ +require 'monetize/parser' + +module Monetize + class Tokenizer + SYMBOLS = Monetize::Parser::CURRENCY_SYMBOLS.keys.map { |symbol| Regexp.escape(symbol) }.freeze + THOUSAND_SEPARATORS = /[\.\ ,]/.freeze + DECIMAL_MARKS = /[\.,]/.freeze + MULTIPLIERS = Monetize::Parser::MULTIPLIER_SUFFIXES.keys.join('|').freeze + + REPLACEMENT_SYMBOL = '§'.freeze + SYMBOL_REGEXP = Regexp.new(SYMBOLS.join('|')).freeze + CURRENCY_ISO_REGEXP = /(? # amount group + \d+ # starts with at least one digit + (?:#{THOUSAND_SEPARATORS}\d{3})* # separated into groups of 3 digits by a thousands separator + (?!\d) # not followed by a digit + (?:#{DECIMAL_MARKS}\d+)? # might have decimal mark followed by decimal part + ) + (?#{MULTIPLIERS})? # optional multiplier + }ix.freeze + + class Token < Struct.new(:type, :match); end + + def initialize(input, options = {}) + @original_input = input + @options = options + end + + def process + # matches are removed from the input string to avoid overlapping matches + input = original_input.dup + result = [] + + result += match(input, :currency_iso, CURRENCY_ISO_REGEXP) + result += match(input, :symbol, SYMBOL_REGEXP) + result += match(input, :sign, SIGN_REGEXP) + result += match(input, :amount, AMOUNT_REGEXP) + + # allow only unmatched empty spaces, nothing else + unless input.gsub(REPLACEMENT_SYMBOL, '').strip.empty? + raise ParseError, 'non-exhaustive match' + end + + result.sort_by { |token| token.match.offset(0).first } + end + + private + + attr_reader :original_input, :options + + def match(input, type, regexp) + tokens = [] + input.gsub!(regexp) do + tokens << Token.new(type, Regexp.last_match) + # Replace the matches from the input with § to avoid overlapping matches. Stripping + # out the matches is dangerous because it can bring things irrelevant things together: + # '12USD34' will become '1234' after removing currency, which is NOT expected. + REPLACEMENT_SYMBOL * Regexp.last_match.to_s.length + end + + tokens + end + + def preview(result) + preview_input = original_input.dup + result.reverse.each do |token| + offset = token.match.offset(0) + preview_input.slice!(offset.first, token.match.to_s.length) + preview_input.insert(offset.first, "<#{token.type}>") + end + + puts preview_input + end + end +end diff --git a/spec/monetize_spec.rb b/spec/monetize_spec.rb index 00aad8002..14af6b230 100644 --- a/spec/monetize_spec.rb +++ b/spec/monetize_spec.rb @@ -36,6 +36,17 @@ } JSON + # Dummy parser that always returns an amount and currency specified via options + class TestParser < Monetize::Parser + def initialize(input, currency, options) + @options = options + end + + def parse + [@options[:amount], @options[:currency]] + end + end + describe '.parse' do it 'parses european-formatted inputs under 10EUR' do expect(Monetize.parse('EUR 5,95')).to eq Money.new(595, 'EUR') @@ -56,7 +67,7 @@ Monetize.assume_from_symbol = false end - Monetize::Parser::CURRENCY_SYMBOLS.each_pair do |symbol, iso_code| + Monetize::OptimisticParser::CURRENCY_SYMBOLS.each_pair do |symbol, iso_code| context iso_code do let(:currency) { Money::Currency.find(iso_code) } let(:amount) { 5_95 } @@ -390,6 +401,12 @@ expect(Monetize.parse('£10.00')).to eq Money.new(10_00, 'GBP') end end + + context 'when specified parser does not exist' do + it 'returns nil' do + expect(Monetize.parse('100 USD', nil, parser: :foo)).to eq(nil) + end + end end describe '.parse!' do @@ -406,6 +423,14 @@ it 'raises ArgumentError with invalid format' do expect { Monetize.parse!('11..0') }.to raise_error Monetize::ParseError end + + context 'when specified parser does not exist' do + it 'raises ArgumentError' do + expect do + Monetize.parse!('100 USD', nil, parser: :foo) + end.to raise_error(Monetize::ArgumentError, 'Parser not registered: foo') + end + end end describe '.parse_collection' do @@ -630,4 +655,39 @@ expect(4.635.to_money).to eq '4.635'.to_money end end + + describe '.register_parser' do + it 'registers a new parser with a provided name' do + Monetize.register_parser(:test, TestParser, amount: 42, currency: 'GBP') + + expect(Monetize.parse!('test', nil, parser: :test)).to eq(Money.new(42_00, 'GBP')) + end + + it 'registers the same parser with a different name' do + Monetize.register_parser(:test_1, TestParser, amount: 1, currency: 'GBP') + Monetize.register_parser(:test_2, TestParser, amount: 2, currency: 'USD') + + expect(Monetize.parse!('test', nil, parser: :test_1)).to eq(Money.new(1_00, 'GBP')) + expect(Monetize.parse!('test', nil, parser: :test_2)).to eq(Money.new(2_00, 'USD')) + end + + it 'overrides existing parser with the same name' do + Monetize.register_parser(:test, TestParser, amount: 42, currency: 'GBP') + Monetize.register_parser(:test, TestParser, amount: 99, currency: 'USD') + + expect(Monetize.parse!('test', nil, parser: :test)).to eq(Money.new(99_00, 'USD')) + end + end + + describe '.default_parser=' do + before { Monetize.register_parser(:test, TestParser, amount: 1, currency: 'USD') } + after { Monetize.default_parser = :optimistic } + + it 'specifies which parser to use by default' do + expect(Monetize.parse!('99 GBP')).to eq(Money.new(99_00, 'GBP')) + + Monetize.default_parser = :test + expect(Monetize.parse!('99 GBP')).to eq(Money.new(1_00, 'USD')) + end + end end diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index 51f6235e2..ed318e121 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -4,4 +4,10 @@ RSpec.configure do |config| config.order = 'random' + if ENV['MODE'] == 'strict' + config.before(:each) do + Monetize.default_parser = :strict + end + end end +