diff --git a/lib/rouge/demos/antlr b/lib/rouge/demos/antlr new file mode 100644 index 0000000000..b1aca03fbb --- /dev/null +++ b/lib/rouge/demos/antlr @@ -0,0 +1,19 @@ +grammar GrammarName; +options { anOption = optionValue; } +program: item_list item? EOF; +param_list /* multi-line +comment */ : name (',' name)* ; +terminated_statement: + IF '(' expr ')' newline_opt terminated_statement ( + ELSE newline_opt terminated_statement + )? # if_stmt + | WHILE '(' expr ')' newline_opt terminated_statement #while_stmt + | FOR '(' simple_statement_opt ';' expr_opt ';' simple_statement_opt ')' newline_opt + terminated_statement # for_stmt; +COMMENT: '#' .*? NEWLINE -> channel(HIDDEN); +ESC_NEWLINE: '\\' NEWLINE -> skip; +STRING: '"' (~["\\\r\n] | ESCAPE_SEQUENCE)* '"'; +mode WORD_MODE; +WORD: [A-Za-z_] [A-Za-z_0-9]*; // single line comment +fragment EXPONENT_PART: [eE] [+\-]? DIGIT_SEQUENCE; +fragment HEX_CONSTANT: '0' [xX] [0-9A-Fa-f]+; diff --git a/lib/rouge/lexers/antlr.rb b/lib/rouge/lexers/antlr.rb new file mode 100644 index 0000000000..fb4a70bfd8 --- /dev/null +++ b/lib/rouge/lexers/antlr.rb @@ -0,0 +1,114 @@ +# -*- coding: utf-8 -*- # +# frozen_string_literal: true + +module Rouge + module Lexers + class ANTLR < RegexLexer + title "ANTLR" + desc "ANother Tool for Language Recognition" + tag 'antlr' + filenames '*.g4' + def self.keywords + @keywords ||= Set.new %w( + catch channel channels finally fragment grammar import lexer locals + mode more options parser popMode private protected public pushMode + returns skip throws tokens type + ) + end + get_label = true + identifier = %r/[A-Za-z][a-zA-Z0-9_]*/ + integer = %r/0|[1-9][0-9]*/ + lowercase_name = %r/[a-z][a-zA-Z0-9_]*/ + uppercase_name = %r/[A-Z][a-zA-Z0-9_]*/ + state :whitespace do + rule %r/\s+/, Text + end + state :comment_and_whitespace do + mixin :whitespace + rule %r(//.*?$), Comment::Single + rule %r(/\*.*?\*/)m, Comment::Multiline + end + state :escape_sequence do + rule %r/\\./, Str::Escape + end + state :string do + mixin :escape_sequence + rule %r/'/, Str, :pop! + rule %r/[^\\'\n]+/, Str + end + state :options_spec do + mixin :comment_and_whitespace + rule %r/{/, Punctuation + rule %r/}/, Punctuation, :pop! + rule %r/=/, Operator, :option_value + rule identifier, Name::Attribute + end + state :option_value do + mixin :comment_and_whitespace + rule %r/;/, Punctuation, :pop! + rule %r/./, Punctuation + rule %r/'/, Str, :string + rule %r/{/, Punctuation, :action_block + rule identifier, Name::Constant + rule integer, Num::Integer + end + state :action_block do + mixin :escape_sequence + mixin :whitespace + rule %r/[^\\{}\s]+/, Name::Function + rule %r/{/, Punctuation, :action_block + rule %r/}/, Punctuation, :pop! + end + state :arg_action_block do + mixin :escape_sequence + mixin :whitespace + rule %r/[^\\\[\]]+/, Str + rule %r/\]/, Str, :pop! + end + state :label do + mixin :comment_and_whitespace + rule lowercase_name, Name::Label, :pop! + end + state :root do + mixin :comment_and_whitespace + rule %r/'/, Str, :string + rule %r/[@<>=~\-+?*]/, Operator + rule %r/[|,.()]/, Punctuation + rule %r/{/, Punctuation, :action_block + rule %r/\[/, Str, :arg_action_block + rule %r/#/, Name::Label, :label + rule integer, Num::Integer + rule %r/:/ do + token Punctuation + get_label = false + end + rule %r/;/ do + token Punctuation + get_label = true + end + rule uppercase_name do + if get_label + token Name::Label + else + token Name::Class + end + end + rule lowercase_name do |m| + if self.class.keywords.include? m[0] + token Keyword + case m[0] + when 'options' + push :options_spec + when 'throws' + get_label = false + end + elsif get_label + token Name::Label + else + token Name::Variable + end + end + end + end + end +end diff --git a/spec/lexers/antlr_spec.rb b/spec/lexers/antlr_spec.rb new file mode 100644 index 0000000000..4e1f9b7ceb --- /dev/null +++ b/spec/lexers/antlr_spec.rb @@ -0,0 +1,15 @@ +# -*- coding: utf-8 -*- # +# frozen_string_literal: true + +describe Rouge::Lexers::ANTLR do + let(:subject) { Rouge::Lexers::ANTLR.new } + + describe 'guessing' do + include Support::Guessing + + it 'guesses by filename' do + assert_guess :filename => 'foo.g4' + end + + end +end diff --git a/spec/visual/samples/antlr b/spec/visual/samples/antlr new file mode 100644 index 0000000000..540e68544c --- /dev/null +++ b/spec/visual/samples/antlr @@ -0,0 +1,37 @@ +grammar MyGrammar; +options { + language = Ruby; + output = AST; + backtrack = true; +} +@header { + require 'strscan' +} +@lexer::members { + def scan_number + # implementation here + end +} +mode COMMAND_MODE; +WORD: [a-zA-Z]+; +NUMBER: [0-9]+; +parse: expr+; // single line comment +expr: WORD (PLUS | MINUS) expr | NUMBER; +expr + returns[int value] + @init { + $value = 0 +}: + WORD { $value = lookup_word($WORD.text) } /* multi-line +comment */ + | NUMBER { $value = $NUMBER.text.to_i }; +WORD: + [a-zA-Z]+ { + # option example: sets the token type + $type = $options.myToken + # lexer command example: calls the method defined in @lexer::members + scan_number + }; +COMMAND: '/' -> pushMode(COMMAND_MODE); +mode COMMAND_MODE; +COMMAND_MODE_COMMAND: ~[\r\n]+ -> type(COMMAND);