From ebe61d378152378b6d827a26e004e3b4ed67d66a Mon Sep 17 00:00:00 2001
From: msagca <51697294+msagca@users.noreply.github.com>
Date: Sun, 7 May 2023 06:15:42 +0300
Subject: [PATCH 1/5] add support for ANTLR

---
 lib/rouge/demos/antlr     | 19 ++++++++++++
 lib/rouge/lexers/antlr.rb | 63 +++++++++++++++++++++++++++++++++++++++
 spec/lexers/antlr_spec.rb | 15 ++++++++++
 spec/visual/samples/antlr | 45 ++++++++++++++++++++++++++++
 4 files changed, 142 insertions(+)
 create mode 100644 lib/rouge/demos/antlr
 create mode 100644 lib/rouge/lexers/antlr.rb
 create mode 100644 spec/lexers/antlr_spec.rb
 create mode 100644 spec/visual/samples/antlr

diff --git a/lib/rouge/demos/antlr b/lib/rouge/demos/antlr
new file mode 100644
index 0000000000..adc9fd42bb
--- /dev/null
+++ b/lib/rouge/demos/antlr
@@ -0,0 +1,19 @@
+grammar awk;
+options { caseInsensitive = true; }
+/* parser rules */
+program: item_list item? EOF;
+param_list: name (',' name)* @{ action code };
+terminated_statement:
+  IF '(' expr ')' newline_opt terminated_statement (
+    ELSE newline_opt terminated_statement
+  )? # if_stmt
+  | WHILE '(' expr ')' newline_opt terminated_statement #while_stmt
+  | FOR '(' simple_statement_opt ';' expr_opt ';' simple_statement_opt ')' newline_opt
+    terminated_statement # for_stmt;
+COMMENT: '#' .*? NEWLINE -> channel(HIDDEN);
+ESC_NEWLINE: '\\' NEWLINE -> skip;
+STRING: '"' (~["\\\r\n] | ESCAPE_SEQUENCE)* '"';
+WORD: [A-Za-z_] [A-Za-z_0-9]*;
+// fragments
+fragment EXPONENT_PART: [eE] [+\-]? DIGIT_SEQUENCE;
+fragment HEX_CONSTANT: '0' [xX] [0-9A-Fa-f]+;
diff --git a/lib/rouge/lexers/antlr.rb b/lib/rouge/lexers/antlr.rb
new file mode 100644
index 0000000000..8f0c8e97f7
--- /dev/null
+++ b/lib/rouge/lexers/antlr.rb
@@ -0,0 +1,63 @@
+# -*- coding: utf-8 -*- #
+# frozen_string_literal: true
+
+module Rouge
+  module Lexers
+    class ANTLR < RegexLexer
+      title "ANTLR"
+      desc "ANother Tool for Language Recognition"
+      tag 'antlr'
+      filenames '*.g4'
+      word = /[a-z][a-zA-Z0-9_]*/
+      WORD = /[A-Z][a-zA-Z0-9_]*/
+      def self.keywords
+        @keywords ||= Set.new %w(
+          import fragment lexer parser grammar protected public private returns
+          locals throws catch finally mode options tokens channels channel type
+          popMode pushMode skip more
+        )
+      end
+      state :string do
+        rule %r/'/, Str, :pop!
+        rule %r/\\./, Str::Escape
+        rule %r/[^\\'\n]+/, Str
+      end
+      state :charset do
+        rule %r/\]/, Name::Variable, :pop!
+        rule %r/\\./, Str::Escape
+        rule %r/[^\\\[\]\n]+/, Name::Variable
+      end
+      state :label do
+        rule %r/[a-zA-Z0-9_]+/, Name::Label, :pop!
+        rule %r/\s+/, Text
+      end
+      state :block do
+        rule %r/}/, Punctuation, :pop!
+        rule %r/\\./, Str::Escape
+        rule %r/[^\\{}\s]+/, Name::Builtin
+        rule %r/\s+/, Text
+      end
+      state :root do
+        rule %r/\s+/, Text
+        rule %r(//.*?$), Comment::Single
+        rule %r(/\*.*?\*/)m, Comment::Multiline
+        rule %r/'/, Str, :string
+        rule %r/\[/, Name::Variable, :charset
+        rule %r/#/, Name::Label, :label
+        rule %r/{/, Punctuation, :block
+        rule %r/0|[1-9][0-9]*/, Num::Integer
+        rule %r/[-?@*+<=>~$.]/, Operator
+        rule %r/[;:()|]/, Punctuation
+        rule %r/\\./, Str::Escape
+        rule WORD, Name::Class
+        rule word do |m|
+          if self.class.keywords.include? m[0]
+            token Keyword
+          else
+            token Name::Function
+          end
+        end
+      end
+    end
+  end
+end
diff --git a/spec/lexers/antlr_spec.rb b/spec/lexers/antlr_spec.rb
new file mode 100644
index 0000000000..4e1f9b7ceb
--- /dev/null
+++ b/spec/lexers/antlr_spec.rb
@@ -0,0 +1,15 @@
+# -*- coding: utf-8 -*- #
+# frozen_string_literal: true
+
+describe Rouge::Lexers::ANTLR do
+  let(:subject) { Rouge::Lexers::ANTLR.new }
+
+  describe 'guessing' do
+    include Support::Guessing
+
+    it 'guesses by filename' do
+      assert_guess :filename => 'foo.g4'
+    end
+
+  end
+end
diff --git a/spec/visual/samples/antlr b/spec/visual/samples/antlr
new file mode 100644
index 0000000000..fdb42ecd33
--- /dev/null
+++ b/spec/visual/samples/antlr
@@ -0,0 +1,45 @@
+grammar MyGrammar;
+// options
+options {
+  language = Ruby;
+  output = AST;
+  backtrack = true;
+}
+// lexer commands
+@header {
+  require 'strscan'
+}
+@lexer::members {
+  def scan_number
+    # implementation here
+  end
+}
+// lexer modes
+mode COMMAND_MODE;
+// lexer rules
+WORD: [a-zA-Z]+;
+NUMBER: [0-9]+;
+// parser rules
+parse: expr+;
+expr: WORD (PLUS | MINUS) expr | NUMBER;
+// rule actions
+expr
+  returns[int value]
+  @init {
+  $value = 0
+}:
+  WORD { $value = lookup_word($WORD.text) }
+  | NUMBER { $value = $NUMBER.text.to_i };
+// options and lexer commands in lexer rule
+WORD:
+  [a-zA-Z]+ {
+       # option example: sets the token type
+       $type = $options.myToken
+
+       # lexer command example: calls the method defined in @lexer::members
+       scan_number
+     };
+// lexer modes in lexer rule
+COMMAND: '/' -> pushMode(COMMAND_MODE);
+mode COMMAND_MODE;
+COMMAND_MODE_COMMAND: ~[\r\n]+ -> type(COMMAND);

From 7d2e389579ddd222e61149e4c4b2eda3348522df Mon Sep 17 00:00:00 2001
From: msagca <51697294+msagca@users.noreply.github.com>
Date: Mon, 8 May 2023 15:24:39 +0300
Subject: [PATCH 2/5] handle nested braces

---
 lib/rouge/demos/antlr     | 4 ++--
 lib/rouge/lexers/antlr.rb | 5 +++--
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/lib/rouge/demos/antlr b/lib/rouge/demos/antlr
index adc9fd42bb..8946dbab42 100644
--- a/lib/rouge/demos/antlr
+++ b/lib/rouge/demos/antlr
@@ -1,8 +1,8 @@
 grammar awk;
-options { caseInsensitive = true; }
+options { an option; }
 /* parser rules */
 program: item_list item? EOF;
-param_list: name (',' name)* @{ action code };
+param_list: name (',' name)* @{ {action code} };
 terminated_statement:
   IF '(' expr ')' newline_opt terminated_statement (
     ELSE newline_opt terminated_statement
diff --git a/lib/rouge/lexers/antlr.rb b/lib/rouge/lexers/antlr.rb
index 8f0c8e97f7..bd7842078d 100644
--- a/lib/rouge/lexers/antlr.rb
+++ b/lib/rouge/lexers/antlr.rb
@@ -32,7 +32,8 @@ def self.keywords
         rule %r/\s+/, Text
       end
       state :block do
-        rule %r/}/, Punctuation, :pop!
+        rule %r/{/, Name::Builtin, :block
+        rule %r/}/, Name::Builtin, :pop!
         rule %r/\\./, Str::Escape
         rule %r/[^\\{}\s]+/, Name::Builtin
         rule %r/\s+/, Text
@@ -44,7 +45,7 @@ def self.keywords
         rule %r/'/, Str, :string
         rule %r/\[/, Name::Variable, :charset
         rule %r/#/, Name::Label, :label
-        rule %r/{/, Punctuation, :block
+        rule %r/{/, Name::Builtin, :block
         rule %r/0|[1-9][0-9]*/, Num::Integer
         rule %r/[-?@*+<=>~$.]/, Operator
         rule %r/[;:()|]/, Punctuation

From a104d0db28ff1fcd9d30c5cf416b1f34f005decc Mon Sep 17 00:00:00 2001
From: msagca <51697294+msagca@users.noreply.github.com>
Date: Mon, 8 May 2023 22:39:38 +0300
Subject: [PATCH 3/5] improve antlr lexer

---
 lib/rouge/demos/antlr     |   3 +-
 lib/rouge/lexers/antlr.rb | 112 ++++++++++++++++++++++++++++----------
 2 files changed, 83 insertions(+), 32 deletions(-)

diff --git a/lib/rouge/demos/antlr b/lib/rouge/demos/antlr
index 8946dbab42..842cc77819 100644
--- a/lib/rouge/demos/antlr
+++ b/lib/rouge/demos/antlr
@@ -1,8 +1,7 @@
 grammar awk;
-options { an option; }
 /* parser rules */
 program: item_list item? EOF;
-param_list: name (',' name)* @{ {action code} };
+param_list /*comment*/ : name (',' name)* ;
 terminated_statement:
   IF '(' expr ')' newline_opt terminated_statement (
     ELSE newline_opt terminated_statement
diff --git a/lib/rouge/lexers/antlr.rb b/lib/rouge/lexers/antlr.rb
index bd7842078d..c7dee609d9 100644
--- a/lib/rouge/lexers/antlr.rb
+++ b/lib/rouge/lexers/antlr.rb
@@ -8,54 +8,106 @@ class ANTLR < RegexLexer
       desc "ANother Tool for Language Recognition"
       tag 'antlr'
       filenames '*.g4'
-      word = /[a-z][a-zA-Z0-9_]*/
-      WORD = /[A-Z][a-zA-Z0-9_]*/
       def self.keywords
         @keywords ||= Set.new %w(
-          import fragment lexer parser grammar protected public private returns
-          locals throws catch finally mode options tokens channels channel type
-          popMode pushMode skip more
+          catch channel channels finally fragment grammar import lexer locals
+          mode more options parser popMode private protected public pushMode
+          returns skip throws tokens type
         )
       end
+      identifier = %r/[A-Za-z][a-zA-Z0-9_]*/
+      integer = %r/0|[1-9][0-9]*/
+      parse_rule_name = false
+      rule_name = %r/[a-z][a-zA-Z0-9_]*/
+      token_name = %r/[A-Z][a-zA-Z0-9_]*/
+      state :whitespace do
+        rule %r/\s+/, Text
+      end
+      state :comment_and_whitespace do
+        mixin :whitespace
+        rule %r(//.*?$), Comment::Single
+        rule %r(/\*.*?\*/)m, Comment::Multiline
+      end
+      state :escape_sequence do
+        rule %r/\\./, Str::Escape
+      end
       state :string do
+        mixin :escape_sequence
         rule %r/'/, Str, :pop!
-        rule %r/\\./, Str::Escape
         rule %r/[^\\'\n]+/, Str
       end
-      state :charset do
-        rule %r/\]/, Name::Variable, :pop!
-        rule %r/\\./, Str::Escape
-        rule %r/[^\\\[\]\n]+/, Name::Variable
+      state :options_spec do
+        mixin :comment_and_whitespace
+        rule %r/{/, Punctuation
+        rule %r/}/, Punctuation, :pop!
+        rule %r/=/, Operator, :option_value
+        rule identifier, Name::Variable
       end
-      state :label do
-        rule %r/[a-zA-Z0-9_]+/, Name::Label, :pop!
-        rule %r/\s+/, Text
+      state :option_value do
+        mixin :comment_and_whitespace
+        rule %r/;/, Punctuation, :pop!
+        rule %r/./, Punctuation
+        rule %r/'/, Str, :string
+        rule %r/{/, Punctuation, :action_block
+        rule identifier, Name::Constant
+        rule integer, Num::Integer
       end
-      state :block do
-        rule %r/{/, Name::Builtin, :block
-        rule %r/}/, Name::Builtin, :pop!
-        rule %r/\\./, Str::Escape
-        rule %r/[^\\{}\s]+/, Name::Builtin
-        rule %r/\s+/, Text
+      state :action_block do
+        mixin :escape_sequence
+        mixin :whitespace
+        rule %r/[^\\{}\s]+/, Name::Function
+        rule %r/{/, Punctuation, :action_block
+        rule %r/}/, Punctuation, :pop!
+      end
+      state :arg_action_block do
+        mixin :escape_sequence
+        mixin :whitespace
+        rule %r/[^\\\[\]]+/, Str
+        rule %r/\]/, Str, :pop!
+      end
+      state :label do
+        mixin :comment_and_whitespace
+        rule rule_name, Name::Label, :pop!
       end
       state :root do
-        rule %r/\s+/, Text
-        rule %r(//.*?$), Comment::Single
-        rule %r(/\*.*?\*/)m, Comment::Multiline
+        mixin :comment_and_whitespace
         rule %r/'/, Str, :string
-        rule %r/\[/, Name::Variable, :charset
+        rule %r/[|,.()]/, Punctuation
+        rule %r/[@<>=~\-+?*]/, Operator
+        rule %r/{/, Punctuation, :action_block
+        rule %r/\[/, Str, :arg_action_block
         rule %r/#/, Name::Label, :label
-        rule %r/{/, Name::Builtin, :block
         rule %r/0|[1-9][0-9]*/, Num::Integer
-        rule %r/[-?@*+<=>~$.]/, Operator
-        rule %r/[;:()|]/, Punctuation
-        rule %r/\\./, Str::Escape
-        rule WORD, Name::Class
-        rule word do |m|
+        rule %r/:/ do
+          token Punctuation
+          parse_rule_name = false
+        end
+        rule %r/;/ do
+          token Punctuation
+          parse_rule_name = true
+        end
+        rule token_name do
+          if parse_rule_name
+            token Name::Label
+          else
+            token Name::Class
+          end
+        end
+        rule rule_name do |m|
           if self.class.keywords.include? m[0]
             token Keyword
+            case m[0]
+            when 'grammar'
+              parse_rule_name = true
+            when 'options'
+              push :options_spec
+            when 'throws'
+              parse_rule_name = false
+            end
+          elsif parse_rule_name
+            token Name::Label
           else
-            token Name::Function
+            token Name::Variable
           end
         end
       end

From 8b742f6633821b2c7ec10f959e1f7889547b1dbd Mon Sep 17 00:00:00 2001
From: msagca <51697294+msagca@users.noreply.github.com>
Date: Mon, 8 May 2023 23:27:29 +0300
Subject: [PATCH 4/5] tiny styling update

---
 lib/rouge/lexers/antlr.rb | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/rouge/lexers/antlr.rb b/lib/rouge/lexers/antlr.rb
index c7dee609d9..66fea899ee 100644
--- a/lib/rouge/lexers/antlr.rb
+++ b/lib/rouge/lexers/antlr.rb
@@ -72,12 +72,12 @@ def self.keywords
       state :root do
         mixin :comment_and_whitespace
         rule %r/'/, Str, :string
-        rule %r/[|,.()]/, Punctuation
         rule %r/[@<>=~\-+?*]/, Operator
+        rule %r/[|,.()]/, Punctuation
         rule %r/{/, Punctuation, :action_block
         rule %r/\[/, Str, :arg_action_block
         rule %r/#/, Name::Label, :label
-        rule %r/0|[1-9][0-9]*/, Num::Integer
+        rule integer, Num::Integer
         rule %r/:/ do
           token Punctuation
           parse_rule_name = false

From c67ff20fec3f0fcb142f80069282659d52cffbe0 Mon Sep 17 00:00:00 2001
From: msagca <51697294+msagca@users.noreply.github.com>
Date: Sun, 4 Jun 2023 23:43:14 +0300
Subject: [PATCH 5/5] fix a bug

---
 lib/rouge/demos/antlr     | 11 ++++++-----
 lib/rouge/lexers/antlr.rb | 26 ++++++++++++--------------
 spec/visual/samples/antlr | 14 +++-----------
 3 files changed, 21 insertions(+), 30 deletions(-)

diff --git a/lib/rouge/demos/antlr b/lib/rouge/demos/antlr
index 842cc77819..b1aca03fbb 100644
--- a/lib/rouge/demos/antlr
+++ b/lib/rouge/demos/antlr
@@ -1,7 +1,8 @@
-grammar awk;
-/* parser rules */
+grammar GrammarName;
+options { anOption = optionValue; }
 program: item_list item? EOF;
-param_list /*comment*/ : name (',' name)* ;
+param_list /* multi-line
+comment */ : name (',' name)* ;
 terminated_statement:
   IF '(' expr ')' newline_opt terminated_statement (
     ELSE newline_opt terminated_statement
@@ -12,7 +13,7 @@ terminated_statement:
 COMMENT: '#' .*? NEWLINE -> channel(HIDDEN);
 ESC_NEWLINE: '\\' NEWLINE -> skip;
 STRING: '"' (~["\\\r\n] | ESCAPE_SEQUENCE)* '"';
-WORD: [A-Za-z_] [A-Za-z_0-9]*;
-// fragments
+mode WORD_MODE;
+WORD: [A-Za-z_] [A-Za-z_0-9]*; // single line comment
 fragment EXPONENT_PART: [eE] [+\-]? DIGIT_SEQUENCE;
 fragment HEX_CONSTANT: '0' [xX] [0-9A-Fa-f]+;
diff --git a/lib/rouge/lexers/antlr.rb b/lib/rouge/lexers/antlr.rb
index 66fea899ee..fb4a70bfd8 100644
--- a/lib/rouge/lexers/antlr.rb
+++ b/lib/rouge/lexers/antlr.rb
@@ -15,11 +15,11 @@ def self.keywords
           returns skip throws tokens type
         )
       end
+      get_label = true
       identifier = %r/[A-Za-z][a-zA-Z0-9_]*/
       integer = %r/0|[1-9][0-9]*/
-      parse_rule_name = false
-      rule_name = %r/[a-z][a-zA-Z0-9_]*/
-      token_name = %r/[A-Z][a-zA-Z0-9_]*/
+      lowercase_name = %r/[a-z][a-zA-Z0-9_]*/
+      uppercase_name = %r/[A-Z][a-zA-Z0-9_]*/
       state :whitespace do
         rule %r/\s+/, Text
       end
@@ -41,7 +41,7 @@ def self.keywords
         rule %r/{/, Punctuation
         rule %r/}/, Punctuation, :pop!
         rule %r/=/, Operator, :option_value
-        rule identifier, Name::Variable
+        rule identifier, Name::Attribute
       end
       state :option_value do
         mixin :comment_and_whitespace
@@ -67,7 +67,7 @@ def self.keywords
       end
       state :label do
         mixin :comment_and_whitespace
-        rule rule_name, Name::Label, :pop!
+        rule lowercase_name, Name::Label, :pop!
       end
       state :root do
         mixin :comment_and_whitespace
@@ -80,31 +80,29 @@ def self.keywords
         rule integer, Num::Integer
         rule %r/:/ do
           token Punctuation
-          parse_rule_name = false
+          get_label = false
         end
         rule %r/;/ do
           token Punctuation
-          parse_rule_name = true
+          get_label = true
         end
-        rule token_name do
-          if parse_rule_name
+        rule uppercase_name do
+          if get_label
             token Name::Label
           else
             token Name::Class
           end
         end
-        rule rule_name do |m|
+        rule lowercase_name do |m|
           if self.class.keywords.include? m[0]
             token Keyword
             case m[0]
-            when 'grammar'
-              parse_rule_name = true
             when 'options'
               push :options_spec
             when 'throws'
-              parse_rule_name = false
+              get_label = false
             end
-          elsif parse_rule_name
+          elsif get_label
             token Name::Label
           else
             token Name::Variable
diff --git a/spec/visual/samples/antlr b/spec/visual/samples/antlr
index fdb42ecd33..540e68544c 100644
--- a/spec/visual/samples/antlr
+++ b/spec/visual/samples/antlr
@@ -1,11 +1,9 @@
 grammar MyGrammar;
-// options
 options {
   language = Ruby;
   output = AST;
   backtrack = true;
 }
-// lexer commands
 @header {
   require 'strscan'
 }
@@ -14,32 +12,26 @@ options {
     # implementation here
   end
 }
-// lexer modes
 mode COMMAND_MODE;
-// lexer rules
 WORD: [a-zA-Z]+;
 NUMBER: [0-9]+;
-// parser rules
-parse: expr+;
+parse: expr+; // single line comment
 expr: WORD (PLUS | MINUS) expr | NUMBER;
-// rule actions
 expr
   returns[int value]
   @init {
   $value = 0
 }:
-  WORD { $value = lookup_word($WORD.text) }
+  WORD { $value = lookup_word($WORD.text) } /* multi-line
+comment */
   | NUMBER { $value = $NUMBER.text.to_i };
-// options and lexer commands in lexer rule
 WORD:
   [a-zA-Z]+ {
        # option example: sets the token type
        $type = $options.myToken
-
        # lexer command example: calls the method defined in @lexer::members
        scan_number
      };
-// lexer modes in lexer rule
 COMMAND: '/' -> pushMode(COMMAND_MODE);
 mode COMMAND_MODE;
 COMMAND_MODE_COMMAND: ~[\r\n]+ -> type(COMMAND);