From d6820b2f6a5f4e47be7c1308a9e5878a4db61c97 Mon Sep 17 00:00:00 2001 From: Kevin Sellers Date: Mon, 1 May 2023 12:50:40 +0000 Subject: [PATCH] Add support for Amazon Ion (#1017) --- lib/rouge/demos/ion | 20 ++++ lib/rouge/lexers/ion.rb | 175 +++++++++++++++++++++++++++++++++++ spec/lexers/ion_spec.rb | 199 ++++++++++++++++++++++++++++++++++++++++ spec/visual/samples/ion | 23 +++++ 4 files changed, 417 insertions(+) create mode 100644 lib/rouge/demos/ion create mode 100644 lib/rouge/lexers/ion.rb create mode 100644 spec/lexers/ion_spec.rb create mode 100644 spec/visual/samples/ion diff --git a/lib/rouge/demos/ion b/lib/rouge/demos/ion new file mode 100644 index 0000000000..d9a699d2f6 --- /dev/null +++ b/lib/rouge/demos/ion @@ -0,0 +1,20 @@ +/** + * Sample Ion document. + */ +{ + key: "value", + 'float': 1_234e-5, + "annotation": type:: null.string, + list: 'things':: ['symbol', nan, +inf, -inf, "other"], + sexp: (concat ('symbol' [list] "string" null)), + date: date:: 1970-01-01T00:00Z, + "long string": str:: ( + ''' + long strings are neat + ''' + ), + struct: { + nested: { value: int:: 12_345 }, + }, + blob: {{ SGVsbG8sIFdvcmxkIQ== }}, // Hello, World! +} diff --git a/lib/rouge/lexers/ion.rb b/lib/rouge/lexers/ion.rb new file mode 100644 index 0000000000..7e74c19d50 --- /dev/null +++ b/lib/rouge/lexers/ion.rb @@ -0,0 +1,175 @@ +# -*- coding: utf-8 -*- # +# frozen_string_literal: true + +module Rouge + module Lexers + class Ion < RegexLexer + title 'Ion' + desc 'Amazon Ion (https://amazon-ion.github.io/ion-docs)' + tag 'ion' + filenames '*.ion' + mimetypes 'application/ion' + + escapes = %r{\\(?:[\\abtR"'/?\\]|x\h{2}|u\h{4}|U\h{8})} + q = %r{'(?:#{escapes}|\\'|[^"\n\r])+'} + qq = %r{"(?:#{escapes}|\\"|[^"\n\r])+"} + qqq = %r{'''} + symbol = %r{[A-Za-z_\$](?:[0-9A-Za-z_\$])*} + + state :qq do + rule qq, Literal::String::Double + end + + state :qqq do + rule %r{'''}, Literal::String::Double, :pop! + rule %r{[^']+}m, Literal::String::Double + rule %r{'}, Literal::String::Double + end + + state :quotes do + rule qqq, Literal::String::Double, :qqq + mixin :qq + rule symbol, Literal::String::Symbol + rule q, Literal::String::Symbol + end + + state :annotation do + annotation = %r{(?:[\u{0020}-\u{0026}]|[\u{0028}-\u{005B}]|[\u{005D}-\u{FFFF}]|[\t\b\f ])+} + rule %r{('#{annotation}'|#{symbol})(\s*)(::)} do + groups Name::Decorator, Text::Whitespace, Operator + end + end + + state :comments do + rule %r{/\*.*?\*/}m, Comment::Multiline + rule %r{//.*?$}, Comment::Single + end + + state :constants do + rule %r{(?:true|false)\b}, Name::Builtin + rule %r{null(?:\.(?:blob|bool|clob|decimal|float|int|list|null|sexp|string|struct|symbol|timestamp))?\b}, Name::Builtin + end + + state :numbers do + rule %r{0b[01]+(?:_[01]+)*\b}, Literal::Number::Bin + rule %r{0x\h+(?:_\h+)*\b}, Literal::Number::Hex + rule %r{(?:nan|[+-]inf)\b}, Literal::Number::Float + + integer = %r{-?(?:0|[1-9]\d*(?:_\d+)*)} + rule %r{#{integer}[.dD][+-]?(?:#{integer})*(?:[dDeE][+-]?#{integer})?}, Literal::Number::Float + rule %r{#{integer}[dDeE][+-]?#{integer}}, Literal::Number::Float + rule integer, Literal::Number::Integer + end + + state :timestamps do + year = %r{000[1-9]|00[1-9]\d|0[1-9]\d{2}|[1-9]\d{3}} + month = %r{0[1-9]|1[0-2]} + day = %r{0[1-9]|[12]\d|3[01]} + date = %r{#{year}-#{month}-#{day}} + + hour = %r{[01]\d|2[0-3]} + minute = %r{[0-5]\d} + second = %r{[0-5]\d(?:\.\d+)?} + offset = %r{Z|[+-]#{hour}:#{minute}} + time = %r{#{hour}:#{minute}(?::#{second})?#{offset}} + + rule %r{#{date}(T#{time}?)?|#{year}(?:-#{month})?T}, Literal::Date + end + + state :whitespace do + rule %r{\s+}, Text::Whitespace + end + + state :blob do + rule %r/}}/, Punctuation::Indicator, :pop! + + rule qqq, Literal::String::Double, :qqq + mixin :qq + mixin :whitespace + + # no attempt to validate the Base64 blob + rule %r{(?:[A-Za-z0-9/\+=]+)}, Literal + end + + state :containers do + rule %r/{{/, Punctuation::Indicator, :blob + rule %r{\[}, Punctuation::Indicator, :list + rule %r{\(}, Punctuation::Indicator, :sexp + rule %r/{/, Punctuation::Indicator, :struct + end + + state :list do + rule %r{]}, Punctuation::Indicator, :pop! + + mixin :containers + mixin :comments + mixin :annotation + mixin :whitespace + mixin :constants + mixin :timestamps + mixin :numbers + mixin :quotes + + rule %r{[,=;]}, Punctuation + end + + state :sexp do + rule %r{\)}, Punctuation::Indicator, :pop! + rule %r{(?:\+\+|--|<<|>>|\&\&|\.\.|\|\||[-+\*/=<>|&$^.#!%?@`~])}, Operator + + mixin :containers + mixin :comments + mixin :annotation + mixin :whitespace + mixin :constants + mixin :timestamps + mixin :numbers + mixin :quotes + end + + state :struct do + rule %r/}/, Punctuation::Indicator, :pop! + + rule %r{(#{q}|#{qq}|#{symbol})(\s*)(:)} do + groups Name::Label, Text::Whitespace, Punctuation + push :value + end + + mixin :containers + mixin :comments + mixin :whitespace + end + + state :value do + mixin :containers + mixin :comments + mixin :whitespace + mixin :annotation + mixin :constants + mixin :timestamps + mixin :numbers + mixin :quotes + + rule %r{,}, Punctuation, :pop! + + rule %r/(})/ do + groups Punctuation::Indicator + pop!(2) + end + end + + state :root do + rule %r{\s*\A\$(?:ion_1_0|ion_symbol_table)\b}, Name::Builtin::Pseudo + + mixin :comments + mixin :annotation + mixin :constants + mixin :timestamps + mixin :numbers + mixin :containers + mixin :quotes + mixin :whitespace + end + end + end +end diff --git a/spec/lexers/ion_spec.rb b/spec/lexers/ion_spec.rb new file mode 100644 index 0000000000..f89b1d19d5 --- /dev/null +++ b/spec/lexers/ion_spec.rb @@ -0,0 +1,199 @@ +# -*- coding: utf-8 -*- # +# frozen_string_literal: true + +describe Rouge::Lexers::Ion do + let(:subject) { Rouge::Lexers::Ion.new } + + describe 'guessing' do + include Support::Guessing + + it 'guesses by filename' do + assert_guess :filename => 'file.ion' + end + + it 'guesses by mimetype' do + assert_guess :mimetype => 'application/ion' + end + end + + describe 'lexing' do + include Support::Lexing + + it 'handles a basic file' do + ion = <<~ION + /** + * comment + */ + 'annotation' :: { + a: 1, + 'b': 2.0, + "c": float:: 1.23e-4, + "d": 'null':: null.symbol, + "e": 'timestamps':: [ + 1970-01-01T00:00Z, + 1970T, + ], + f: 'constants and such':: [ + nancy, nan, + "+infinity", +inf, -inf, + truee, false, true, + null.null, null, + nullish, + ], + "g": (add [0b101, 0xdeadbeef, -1_000] "hello"), + h: { i: '''string''', }, + j: {{ aW9u }}, + } + ION + + assert_no_errors ion + + assert_tokens_equal ion, + ["Comment.Multiline", "/**\n * comment\n */"], + ["Text.Whitespace", "\n"], + ["Name.Decorator", "'annotation'"], + ["Text.Whitespace", " "], + ["Operator", "::"], + ["Text.Whitespace", " "], + ["Punctuation.Indicator", "{"], + ["Text.Whitespace", "\n "], + ["Name.Label", "a"], + ["Punctuation", ":"], + ["Text.Whitespace", " "], + ["Literal.Number.Integer", "1"], + ["Punctuation", ","], + ["Text.Whitespace", "\n "], + ["Name.Label", "'b'"], + ["Punctuation", ":"], + ["Text.Whitespace", " "], + ["Literal.Number.Float", "2.0"], + ["Punctuation", ","], + ["Text.Whitespace", "\n "], + ["Name.Label", "\"c\""], + ["Punctuation", ":"], + ["Text.Whitespace", " "], + ["Name.Decorator", "float"], + ["Operator", "::"], + ["Text.Whitespace", " "], + ["Literal.Number.Float", "1.23e-4"], + ["Punctuation", ","], + ["Text.Whitespace", "\n "], + ["Name.Label", "\"d\""], + ["Punctuation", ":"], + ["Text.Whitespace", " "], + ["Name.Decorator", "'null'"], + ["Operator", "::"], + ["Text.Whitespace", " "], + ["Name.Builtin", "null.symbol"], + ["Punctuation", ","], + ["Text.Whitespace", "\n "], + ["Name.Label", "\"e\""], + ["Punctuation", ":"], + ["Text.Whitespace", " "], + ["Name.Decorator", "'timestamps'"], + ["Operator", "::"], + ["Text.Whitespace", " "], + ["Punctuation.Indicator", "["], + ["Text.Whitespace", "\n "], + ["Literal.Date", "1970-01-01T00:00Z"], + ["Punctuation", ","], + ["Text.Whitespace", "\n "], + ["Literal.Date", "1970T"], + ["Punctuation", ","], + ["Text.Whitespace", "\n "], + ["Punctuation.Indicator", "]"], + ["Punctuation", ","], + ["Text.Whitespace", "\n "], + ["Name.Label", "f"], + ["Punctuation", ":"], + ["Text.Whitespace", " "], + ["Name.Decorator", "'constants and such'"], + ["Operator", "::"], + ["Text.Whitespace", " "], + ["Punctuation.Indicator", "["], + ["Text.Whitespace", "\n "], + ["Literal.String.Symbol", "nancy"], + ["Punctuation", ","], + ["Text.Whitespace", " "], + ["Literal.Number.Float", "nan"], + ["Punctuation", ","], + ["Text.Whitespace", "\n "], + ["Literal.String.Double", "\"+infinity\""], + ["Punctuation", ","], + ["Text.Whitespace", " "], + ["Literal.Number.Float", "+inf"], + ["Punctuation", ","], + ["Text.Whitespace", " "], + ["Literal.Number.Float", "-inf"], + ["Punctuation", ","], + ["Text.Whitespace", "\n "], + ["Literal.String.Symbol", "truee"], + ["Punctuation", ","], + ["Text.Whitespace", " "], + ["Name.Builtin", "false"], + ["Punctuation", ","], + ["Text.Whitespace", " "], + ["Name.Builtin", "true"], + ["Punctuation", ","], + ["Text.Whitespace", "\n "], + ["Name.Builtin", "null.null"], + ["Punctuation", ","], + ["Text.Whitespace", " "], + ["Name.Builtin", "null"], + ["Punctuation", ","], + ["Text.Whitespace", "\n "], + ["Literal.String.Symbol", "nullish"], + ["Punctuation", ","], + ["Text.Whitespace", "\n "], + ["Punctuation.Indicator", "]"], + ["Punctuation", ","], + ["Text.Whitespace", "\n "], + ["Name.Label", "\"g\""], + ["Punctuation", ":"], + ["Text.Whitespace", " "], + ["Punctuation.Indicator", "("], + ["Literal.String.Symbol", "add"], + ["Text.Whitespace", " "], + ["Punctuation.Indicator", "["], + ["Literal.Number.Bin", "0b101"], + ["Punctuation", ","], + ["Text.Whitespace", " "], + ["Literal.Number.Hex", "0xdeadbeef"], + ["Punctuation", ","], + ["Text.Whitespace", " "], + ["Literal.Number.Integer", "-1_000"], + ["Punctuation.Indicator", "]"], + ["Text.Whitespace", " "], + ["Literal.String.Double", "\"hello\""], + ["Punctuation.Indicator", ")"], + ["Punctuation", ","], + ["Text.Whitespace", "\n "], + ["Name.Label", "h"], + ["Punctuation", ":"], + ["Text.Whitespace", " "], + ["Punctuation.Indicator", "{"], + ["Text.Whitespace", " "], + ["Name.Label", "i"], + ["Punctuation", ":"], + ["Text.Whitespace", " "], + ["Literal.String.Double", "'''string'''"], + ["Punctuation", ","], + ["Text.Whitespace", " "], + ["Punctuation.Indicator", "}"], + ["Punctuation", ","], + ["Text.Whitespace", "\n "], + ["Name.Label", "j"], + ["Punctuation", ":"], + ["Text.Whitespace", " "], + ["Punctuation.Indicator", "{{"], + ["Text.Whitespace", " "], + ["Literal", "aW9u"], + ["Text.Whitespace", " "], + ["Punctuation.Indicator", "}}"], + ["Punctuation", ","], + ["Text.Whitespace", "\n"], + ["Punctuation.Indicator", "}"], + ["Text.Whitespace", "\n"] + end + end +end diff --git a/spec/visual/samples/ion b/spec/visual/samples/ion new file mode 100644 index 0000000000..4863134b8a --- /dev/null +++ b/spec/visual/samples/ion @@ -0,0 +1,23 @@ +/** + * comment + */ +'annotation' :: { + a: 1, + 'b': 2.0, + "c": float:: 1.23e-4, + "d": 'null':: null.symbol, + "e": 'timestamps':: [ + 1970-01-01T00:00Z, + 1970T, + ], + f: 'constants and such':: [ + nancy, nan, + "+infinity", +inf, -inf, + truee, false, true, + null.null, null, + nullish, + ], + "g": (add [0b101, 0xdeadbeef, -1_000] "hello"), + h: { i: '''string''', }, + j: {{ aW9u }}, +}