diff --git a/lib/rouge/lexers/python.rb b/lib/rouge/lexers/python.rb index b47b64aa76..b8ba11d70b 100644 --- a/lib/rouge/lexers/python.rb +++ b/lib/rouge/lexers/python.rb @@ -68,12 +68,34 @@ def self.exceptions end identifier = /[[:alpha:]_][[:alnum:]_]*/ + lower_identifier = /[[:lower:]_][[:alnum:]_]*/ + upper_identifier = /[[:upper:]_][[:alnum:]_]*/ dotted_identifier = /[[:alpha:]_.][[:alnum:]_.]*/ def current_string @string_register ||= StringRegister.new end + # Yield a token for an identifier. Handle keywords/builtins, attr accesses + def token_for_identifier(word, fallback) + if self.class.keywords.include? word + token Keyword + elsif not in_state?(:dot) and self.class.exceptions.include? word + token Name::Builtin + elsif not in_state?(:dot) and self.class.builtins.include? word + token Name::Builtin + elsif not in_state?(:dot) and self.class.builtins_pseudo.include? word + token Name::Builtin::Pseudo + else + token fallback + end + + # Reset attr access state + if in_state?(:dot) + pop! + end + end + state :root do rule %r/\n+/m, Text rule %r/^(:)(\s*)([ru]{,2}""".*?""")/mi do @@ -84,10 +106,16 @@ def current_string rule %r/[^\S\n]+/, Text rule %r(#(.*)?\n?), Comment::Single - rule %r/[\[\]{}:(),;.]/, Punctuation + rule %r/[\[\]{}:(),;]/, Punctuation rule %r/\\\n/, Text rule %r/\\/, Text + # Push the :dot state to the stack to keep track of attr accesses + rule %r/\./ do + token Punctuation + push :dot if not (in_state?(:generic_string) or in_state?(:dot)) + end + rule %r/@#{dotted_identifier}/i, Name::Decorator rule %r/(in|is|and|or|not)\b/, Operator::Word @@ -116,9 +144,6 @@ def current_string push :classname end - rule %r/([a-z_]\w*)[ \t]*(?=(\(.*\)))/m, Name::Function - rule %r/([A-Z_]\w*)[ \t]*(?=(\(.*\)))/m, Name::Class - # TODO: not in python 3 rule %r/`.*?`/, Str::Backtick rule %r/([rfbu]{0,2})('''|"""|['"])/i do |m| @@ -127,22 +152,18 @@ def current_string push :generic_string end - # using negative lookbehind so we don't match property names - rule %r/(?