second commit
This commit is contained in:
436
env/lib/python3.11/site-packages/pygments/lexers/textfmts.py
vendored
Normal file
436
env/lib/python3.11/site-packages/pygments/lexers/textfmts.py
vendored
Normal file
@ -0,0 +1,436 @@
|
||||
"""
|
||||
pygments.lexers.textfmts
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Lexers for various text formats.
|
||||
|
||||
:copyright: Copyright 2006-2024 by the Pygments team, see AUTHORS.
|
||||
:license: BSD, see LICENSE for details.
|
||||
"""
|
||||
|
||||
import re
|
||||
|
||||
from pygments.lexers import guess_lexer, get_lexer_by_name
|
||||
from pygments.lexer import RegexLexer, bygroups, default, include
|
||||
from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
|
||||
Number, Generic, Literal, Punctuation
|
||||
from pygments.util import ClassNotFound
|
||||
|
||||
__all__ = ['IrcLogsLexer', 'TodotxtLexer', 'HttpLexer', 'GettextLexer',
|
||||
'NotmuchLexer', 'KernelLogLexer']
|
||||
|
||||
|
||||
class IrcLogsLexer(RegexLexer):
|
||||
"""
|
||||
Lexer for IRC logs in *irssi*, *xchat* or *weechat* style.
|
||||
"""
|
||||
|
||||
name = 'IRC logs'
|
||||
aliases = ['irc']
|
||||
filenames = ['*.weechatlog']
|
||||
mimetypes = ['text/x-irclog']
|
||||
url = 'https://en.wikipedia.org/wiki/Internet_Relay_Chat'
|
||||
version_added = ''
|
||||
|
||||
flags = re.VERBOSE | re.MULTILINE
|
||||
timestamp = r"""
|
||||
(
|
||||
# irssi / xchat and others
|
||||
(?: \[|\()? # Opening bracket or paren for the timestamp
|
||||
(?: # Timestamp
|
||||
(?: (?:\d{1,4} [-/])* # Date as - or /-separated groups of digits
|
||||
(?:\d{1,4})
|
||||
[T ])? # Date/time separator: T or space
|
||||
(?: \d?\d [:.])* # Time as :/.-separated groups of 1 or 2 digits
|
||||
(?: \d?\d)
|
||||
)
|
||||
(?: \]|\))?\s+ # Closing bracket or paren for the timestamp
|
||||
|
|
||||
# weechat
|
||||
\d{4}\s\w{3}\s\d{2}\s # Date
|
||||
\d{2}:\d{2}:\d{2}\s+ # Time + Whitespace
|
||||
|
|
||||
# xchat
|
||||
\w{3}\s\d{2}\s # Date
|
||||
\d{2}:\d{2}:\d{2}\s+ # Time + Whitespace
|
||||
)?
|
||||
"""
|
||||
tokens = {
|
||||
'root': [
|
||||
# log start/end
|
||||
(r'^\*\*\*\*(.*)\*\*\*\*$', Comment),
|
||||
# hack
|
||||
("^" + timestamp + r'(\s*<[^>]*>\s*)$', bygroups(Comment.Preproc, Name.Tag)),
|
||||
# normal msgs
|
||||
("^" + timestamp + r"""
|
||||
(\s*<.*?>\s*) # Nick """,
|
||||
bygroups(Comment.Preproc, Name.Tag), 'msg'),
|
||||
# /me msgs
|
||||
("^" + timestamp + r"""
|
||||
(\s*[*]\s+) # Star
|
||||
(\S+\s+.*?\n) # Nick + rest of message """,
|
||||
bygroups(Comment.Preproc, Keyword, Generic.Inserted)),
|
||||
# join/part msgs
|
||||
("^" + timestamp + r"""
|
||||
(\s*(?:\*{3}|<?-[!@=P]?->?)\s*) # Star(s) or symbols
|
||||
(\S+\s+) # Nick + Space
|
||||
(.*?\n) # Rest of message """,
|
||||
bygroups(Comment.Preproc, Keyword, String, Comment)),
|
||||
(r"^.*?\n", Text),
|
||||
],
|
||||
'msg': [
|
||||
(r"\S+:(?!//)", Name.Attribute), # Prefix
|
||||
(r".*\n", Text, '#pop'),
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
class GettextLexer(RegexLexer):
|
||||
"""
|
||||
Lexer for Gettext catalog files.
|
||||
"""
|
||||
name = 'Gettext Catalog'
|
||||
aliases = ['pot', 'po']
|
||||
filenames = ['*.pot', '*.po']
|
||||
mimetypes = ['application/x-gettext', 'text/x-gettext', 'text/gettext']
|
||||
url = 'https://www.gnu.org/software/gettext'
|
||||
version_added = '0.9'
|
||||
|
||||
tokens = {
|
||||
'root': [
|
||||
(r'^#,\s.*?$', Keyword.Type),
|
||||
(r'^#:\s.*?$', Keyword.Declaration),
|
||||
# (r'^#$', Comment),
|
||||
(r'^(#|#\.\s|#\|\s|#~\s|#\s).*$', Comment.Single),
|
||||
(r'^(")([A-Za-z-]+:)(.*")$',
|
||||
bygroups(String, Name.Property, String)),
|
||||
(r'^".*"$', String),
|
||||
(r'^(msgid|msgid_plural|msgstr|msgctxt)(\s+)(".*")$',
|
||||
bygroups(Name.Variable, Text, String)),
|
||||
(r'^(msgstr\[)(\d)(\])(\s+)(".*")$',
|
||||
bygroups(Name.Variable, Number.Integer, Name.Variable, Text, String)),
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
class HttpLexer(RegexLexer):
|
||||
"""
|
||||
Lexer for HTTP sessions.
|
||||
"""
|
||||
|
||||
name = 'HTTP'
|
||||
aliases = ['http']
|
||||
url = 'https://httpwg.org/specs'
|
||||
version_added = '1.5'
|
||||
|
||||
flags = re.DOTALL
|
||||
|
||||
def get_tokens_unprocessed(self, text, stack=('root',)):
|
||||
"""Reset the content-type state."""
|
||||
self.content_type = None
|
||||
return RegexLexer.get_tokens_unprocessed(self, text, stack)
|
||||
|
||||
def header_callback(self, match):
|
||||
if match.group(1).lower() == 'content-type':
|
||||
content_type = match.group(5).strip()
|
||||
if ';' in content_type:
|
||||
content_type = content_type[:content_type.find(';')].strip()
|
||||
self.content_type = content_type
|
||||
yield match.start(1), Name.Attribute, match.group(1)
|
||||
yield match.start(2), Text, match.group(2)
|
||||
yield match.start(3), Operator, match.group(3)
|
||||
yield match.start(4), Text, match.group(4)
|
||||
yield match.start(5), Literal, match.group(5)
|
||||
yield match.start(6), Text, match.group(6)
|
||||
|
||||
def continuous_header_callback(self, match):
|
||||
yield match.start(1), Text, match.group(1)
|
||||
yield match.start(2), Literal, match.group(2)
|
||||
yield match.start(3), Text, match.group(3)
|
||||
|
||||
def content_callback(self, match):
|
||||
content_type = getattr(self, 'content_type', None)
|
||||
content = match.group()
|
||||
offset = match.start()
|
||||
if content_type:
|
||||
from pygments.lexers import get_lexer_for_mimetype
|
||||
possible_lexer_mimetypes = [content_type]
|
||||
if '+' in content_type:
|
||||
# application/calendar+xml can be treated as application/xml
|
||||
# if there's not a better match.
|
||||
general_type = re.sub(r'^(.*)/.*\+(.*)$', r'\1/\2',
|
||||
content_type)
|
||||
possible_lexer_mimetypes.append(general_type)
|
||||
|
||||
for i in possible_lexer_mimetypes:
|
||||
try:
|
||||
lexer = get_lexer_for_mimetype(i)
|
||||
except ClassNotFound:
|
||||
pass
|
||||
else:
|
||||
for idx, token, value in lexer.get_tokens_unprocessed(content):
|
||||
yield offset + idx, token, value
|
||||
return
|
||||
yield offset, Text, content
|
||||
|
||||
tokens = {
|
||||
'root': [
|
||||
(r'([a-zA-Z][-_a-zA-Z]+)( +)([^ ]+)( +)'
|
||||
r'(HTTP)(/)(1\.[01]|2(?:\.0)?|3)(\r?\n|\Z)',
|
||||
bygroups(Name.Function, Text, Name.Namespace, Text,
|
||||
Keyword.Reserved, Operator, Number, Text),
|
||||
'headers'),
|
||||
(r'(HTTP)(/)(1\.[01]|2(?:\.0)?|3)( +)(\d{3})(?:( +)([^\r\n]*))?(\r?\n|\Z)',
|
||||
bygroups(Keyword.Reserved, Operator, Number, Text, Number, Text,
|
||||
Name.Exception, Text),
|
||||
'headers'),
|
||||
],
|
||||
'headers': [
|
||||
(r'([^\s:]+)( *)(:)( *)([^\r\n]*)(\r?\n|\Z)', header_callback),
|
||||
(r'([\t ]+)([^\r\n]+)(\r?\n|\Z)', continuous_header_callback),
|
||||
(r'\r?\n', Text, 'content')
|
||||
],
|
||||
'content': [
|
||||
(r'.+', content_callback)
|
||||
]
|
||||
}
|
||||
|
||||
def analyse_text(text):
|
||||
return any (
|
||||
re.search(pattern, text) is not None
|
||||
for pattern in (
|
||||
r'^([a-zA-Z][-_a-zA-Z]+)( +)([^ ]+)( +)(HTTP)(/)(1\.[01]|2(?:\.0)?|3)(\r?\n|\Z)',
|
||||
r'^(HTTP)(/)(1\.[01]|2(?:\.0)?|3)( +)(\d{3})(?:( +)([^\r\n]*))?(\r?\n|\Z)',
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
class TodotxtLexer(RegexLexer):
|
||||
"""
|
||||
Lexer for Todo.txt todo list format.
|
||||
"""
|
||||
|
||||
name = 'Todotxt'
|
||||
url = 'http://todotxt.com/'
|
||||
aliases = ['todotxt']
|
||||
version_added = '2.0'
|
||||
# *.todotxt is not a standard extension for Todo.txt files; including it
|
||||
# makes testing easier, and also makes autodetecting file type easier.
|
||||
filenames = ['todo.txt', '*.todotxt']
|
||||
mimetypes = ['text/x-todo']
|
||||
|
||||
# Aliases mapping standard token types of Todo.txt format concepts
|
||||
CompleteTaskText = Operator # Chosen to de-emphasize complete tasks
|
||||
IncompleteTaskText = Text # Incomplete tasks should look like plain text
|
||||
|
||||
# Priority should have most emphasis to indicate importance of tasks
|
||||
Priority = Generic.Heading
|
||||
# Dates should have next most emphasis because time is important
|
||||
Date = Generic.Subheading
|
||||
|
||||
# Project and context should have equal weight, and be in different colors
|
||||
Project = Generic.Error
|
||||
Context = String
|
||||
|
||||
# If tag functionality is added, it should have the same weight as Project
|
||||
# and Context, and a different color. Generic.Traceback would work well.
|
||||
|
||||
# Regex patterns for building up rules; dates, priorities, projects, and
|
||||
# contexts are all atomic
|
||||
# TODO: Make date regex more ISO 8601 compliant
|
||||
date_regex = r'\d{4,}-\d{2}-\d{2}'
|
||||
priority_regex = r'\([A-Z]\)'
|
||||
project_regex = r'\+\S+'
|
||||
context_regex = r'@\S+'
|
||||
|
||||
# Compound regex expressions
|
||||
complete_one_date_regex = r'(x )(' + date_regex + r')'
|
||||
complete_two_date_regex = (complete_one_date_regex + r'( )(' +
|
||||
date_regex + r')')
|
||||
priority_date_regex = r'(' + priority_regex + r')( )(' + date_regex + r')'
|
||||
|
||||
tokens = {
|
||||
# Should parse starting at beginning of line; each line is a task
|
||||
'root': [
|
||||
# Complete task entry points: two total:
|
||||
# 1. Complete task with two dates
|
||||
(complete_two_date_regex, bygroups(CompleteTaskText, Date,
|
||||
CompleteTaskText, Date),
|
||||
'complete'),
|
||||
# 2. Complete task with one date
|
||||
(complete_one_date_regex, bygroups(CompleteTaskText, Date),
|
||||
'complete'),
|
||||
|
||||
# Incomplete task entry points: six total:
|
||||
# 1. Priority plus date
|
||||
(priority_date_regex, bygroups(Priority, IncompleteTaskText, Date),
|
||||
'incomplete'),
|
||||
# 2. Priority only
|
||||
(priority_regex, Priority, 'incomplete'),
|
||||
# 3. Leading date
|
||||
(date_regex, Date, 'incomplete'),
|
||||
# 4. Leading context
|
||||
(context_regex, Context, 'incomplete'),
|
||||
# 5. Leading project
|
||||
(project_regex, Project, 'incomplete'),
|
||||
# 6. Non-whitespace catch-all
|
||||
(r'\S+', IncompleteTaskText, 'incomplete'),
|
||||
],
|
||||
|
||||
# Parse a complete task
|
||||
'complete': [
|
||||
# Newline indicates end of task, should return to root
|
||||
(r'\s*\n', CompleteTaskText, '#pop'),
|
||||
# Tokenize contexts and projects
|
||||
(context_regex, Context),
|
||||
(project_regex, Project),
|
||||
# Tokenize non-whitespace text
|
||||
(r'\S+', CompleteTaskText),
|
||||
# Tokenize whitespace not containing a newline
|
||||
(r'\s+', CompleteTaskText),
|
||||
],
|
||||
|
||||
# Parse an incomplete task
|
||||
'incomplete': [
|
||||
# Newline indicates end of task, should return to root
|
||||
(r'\s*\n', IncompleteTaskText, '#pop'),
|
||||
# Tokenize contexts and projects
|
||||
(context_regex, Context),
|
||||
(project_regex, Project),
|
||||
# Tokenize non-whitespace text
|
||||
(r'\S+', IncompleteTaskText),
|
||||
# Tokenize whitespace not containing a newline
|
||||
(r'\s+', IncompleteTaskText),
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
class NotmuchLexer(RegexLexer):
|
||||
"""
|
||||
For Notmuch email text format.
|
||||
|
||||
Additional options accepted:
|
||||
|
||||
`body_lexer`
|
||||
If given, highlight the contents of the message body with the specified
|
||||
lexer, else guess it according to the body content (default: ``None``).
|
||||
"""
|
||||
|
||||
name = 'Notmuch'
|
||||
url = 'https://notmuchmail.org/'
|
||||
aliases = ['notmuch']
|
||||
version_added = '2.5'
|
||||
|
||||
def _highlight_code(self, match):
|
||||
code = match.group(1)
|
||||
|
||||
try:
|
||||
if self.body_lexer:
|
||||
lexer = get_lexer_by_name(self.body_lexer)
|
||||
else:
|
||||
lexer = guess_lexer(code.strip())
|
||||
except ClassNotFound:
|
||||
lexer = get_lexer_by_name('text')
|
||||
|
||||
yield from lexer.get_tokens_unprocessed(code)
|
||||
|
||||
tokens = {
|
||||
'root': [
|
||||
(r'\fmessage\{\s*', Keyword, ('message', 'message-attr')),
|
||||
],
|
||||
'message-attr': [
|
||||
(r'(\s*id:\s*)(\S+)', bygroups(Name.Attribute, String)),
|
||||
(r'(\s*(?:depth|match|excluded):\s*)(\d+)',
|
||||
bygroups(Name.Attribute, Number.Integer)),
|
||||
(r'(\s*filename:\s*)(.+\n)',
|
||||
bygroups(Name.Attribute, String)),
|
||||
default('#pop'),
|
||||
],
|
||||
'message': [
|
||||
(r'\fmessage\}\n', Keyword, '#pop'),
|
||||
(r'\fheader\{\n', Keyword, 'header'),
|
||||
(r'\fbody\{\n', Keyword, 'body'),
|
||||
],
|
||||
'header': [
|
||||
(r'\fheader\}\n', Keyword, '#pop'),
|
||||
(r'((?:Subject|From|To|Cc|Date):\s*)(.*\n)',
|
||||
bygroups(Name.Attribute, String)),
|
||||
(r'(.*)(\s*\(.*\))(\s*\(.*\)\n)',
|
||||
bygroups(Generic.Strong, Literal, Name.Tag)),
|
||||
],
|
||||
'body': [
|
||||
(r'\fpart\{\n', Keyword, 'part'),
|
||||
(r'\f(part|attachment)\{\s*', Keyword, ('part', 'part-attr')),
|
||||
(r'\fbody\}\n', Keyword, '#pop'),
|
||||
],
|
||||
'part-attr': [
|
||||
(r'(ID:\s*)(\d+)', bygroups(Name.Attribute, Number.Integer)),
|
||||
(r'(,\s*)((?:Filename|Content-id):\s*)([^,]+)',
|
||||
bygroups(Punctuation, Name.Attribute, String)),
|
||||
(r'(,\s*)(Content-type:\s*)(.+\n)',
|
||||
bygroups(Punctuation, Name.Attribute, String)),
|
||||
default('#pop'),
|
||||
],
|
||||
'part': [
|
||||
(r'\f(?:part|attachment)\}\n', Keyword, '#pop'),
|
||||
(r'\f(?:part|attachment)\{\s*', Keyword, ('#push', 'part-attr')),
|
||||
(r'^Non-text part: .*\n', Comment),
|
||||
(r'(?s)(.*?(?=\f(?:part|attachment)\}\n))', _highlight_code),
|
||||
],
|
||||
}
|
||||
|
||||
def analyse_text(text):
|
||||
return 1.0 if text.startswith('\fmessage{') else 0.0
|
||||
|
||||
def __init__(self, **options):
|
||||
self.body_lexer = options.get('body_lexer', None)
|
||||
RegexLexer.__init__(self, **options)
|
||||
|
||||
|
||||
class KernelLogLexer(RegexLexer):
|
||||
"""
|
||||
For Linux Kernel log ("dmesg") output.
|
||||
"""
|
||||
name = 'Kernel log'
|
||||
aliases = ['kmsg', 'dmesg']
|
||||
filenames = ['*.kmsg', '*.dmesg']
|
||||
url = 'https://fr.wikipedia.org/wiki/Dmesg'
|
||||
version_added = '2.6'
|
||||
|
||||
tokens = {
|
||||
'root': [
|
||||
(r'^[^:]+:debug : (?=\[)', Text, 'debug'),
|
||||
(r'^[^:]+:info : (?=\[)', Text, 'info'),
|
||||
(r'^[^:]+:warn : (?=\[)', Text, 'warn'),
|
||||
(r'^[^:]+:notice: (?=\[)', Text, 'warn'),
|
||||
(r'^[^:]+:err : (?=\[)', Text, 'error'),
|
||||
(r'^[^:]+:crit : (?=\[)', Text, 'error'),
|
||||
(r'^(?=\[)', Text, 'unknown'),
|
||||
],
|
||||
'unknown': [
|
||||
(r'^(?=.+(warning|notice|audit|deprecated))', Text, 'warn'),
|
||||
(r'^(?=.+(error|critical|fail|Bug))', Text, 'error'),
|
||||
default('info'),
|
||||
],
|
||||
'base': [
|
||||
(r'\[[0-9. ]+\] ', Number),
|
||||
(r'(?<=\] ).+?:', Keyword),
|
||||
(r'\n', Text, '#pop'),
|
||||
],
|
||||
'debug': [
|
||||
include('base'),
|
||||
(r'.+\n', Comment, '#pop')
|
||||
],
|
||||
'info': [
|
||||
include('base'),
|
||||
(r'.+\n', Text, '#pop')
|
||||
],
|
||||
'warn': [
|
||||
include('base'),
|
||||
(r'.+\n', Generic.Strong, '#pop')
|
||||
],
|
||||
'error': [
|
||||
include('base'),
|
||||
(r'.+\n', Generic.Error, '#pop')
|
||||
]
|
||||
}
|
Reference in New Issue
Block a user