second commit

This commit is contained in:
2024-12-27 22:31:23 +09:00
parent 2353324570
commit 10a0f110ca
8819 changed files with 1307198 additions and 28 deletions

View File

@ -0,0 +1,27 @@
__all__ = (
"StateBlock",
"paragraph",
"heading",
"lheading",
"code",
"fence",
"hr",
"list_block",
"reference",
"blockquote",
"html_block",
"table",
)
from .blockquote import blockquote
from .code import code
from .fence import fence
from .heading import heading
from .hr import hr
from .html_block import html_block
from .lheading import lheading
from .list import list_block
from .paragraph import paragraph
from .reference import reference
from .state_block import StateBlock
from .table import table

View File

@ -0,0 +1,299 @@
# Block quotes
from __future__ import annotations
import logging
from ..common.utils import isStrSpace
from .state_block import StateBlock
LOGGER = logging.getLogger(__name__)
def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool:
LOGGER.debug(
"entering blockquote: %s, %s, %s, %s", state, startLine, endLine, silent
)
oldLineMax = state.lineMax
pos = state.bMarks[startLine] + state.tShift[startLine]
max = state.eMarks[startLine]
if state.is_code_block(startLine):
return False
# check the block quote marker
try:
if state.src[pos] != ">":
return False
except IndexError:
return False
pos += 1
# we know that it's going to be a valid blockquote,
# so no point trying to find the end of it in silent mode
if silent:
return True
# set offset past spaces and ">"
initial = offset = state.sCount[startLine] + 1
try:
second_char: str | None = state.src[pos]
except IndexError:
second_char = None
# skip one optional space after '>'
if second_char == " ":
# ' > test '
# ^ -- position start of line here:
pos += 1
initial += 1
offset += 1
adjustTab = False
spaceAfterMarker = True
elif second_char == "\t":
spaceAfterMarker = True
if (state.bsCount[startLine] + offset) % 4 == 3:
# ' >\t test '
# ^ -- position start of line here (tab has width==1)
pos += 1
initial += 1
offset += 1
adjustTab = False
else:
# ' >\t test '
# ^ -- position start of line here + shift bsCount slightly
# to make extra space appear
adjustTab = True
else:
spaceAfterMarker = False
oldBMarks = [state.bMarks[startLine]]
state.bMarks[startLine] = pos
while pos < max:
ch = state.src[pos]
if isStrSpace(ch):
if ch == "\t":
offset += (
4
- (offset + state.bsCount[startLine] + (1 if adjustTab else 0)) % 4
)
else:
offset += 1
else:
break
pos += 1
oldBSCount = [state.bsCount[startLine]]
state.bsCount[startLine] = (
state.sCount[startLine] + 1 + (1 if spaceAfterMarker else 0)
)
lastLineEmpty = pos >= max
oldSCount = [state.sCount[startLine]]
state.sCount[startLine] = offset - initial
oldTShift = [state.tShift[startLine]]
state.tShift[startLine] = pos - state.bMarks[startLine]
terminatorRules = state.md.block.ruler.getRules("blockquote")
oldParentType = state.parentType
state.parentType = "blockquote"
# Search the end of the block
#
# Block ends with either:
# 1. an empty line outside:
# ```
# > test
#
# ```
# 2. an empty line inside:
# ```
# >
# test
# ```
# 3. another tag:
# ```
# > test
# - - -
# ```
# for (nextLine = startLine + 1; nextLine < endLine; nextLine++) {
nextLine = startLine + 1
while nextLine < endLine:
# check if it's outdented, i.e. it's inside list item and indented
# less than said list item:
#
# ```
# 1. anything
# > current blockquote
# 2. checking this line
# ```
isOutdented = state.sCount[nextLine] < state.blkIndent
pos = state.bMarks[nextLine] + state.tShift[nextLine]
max = state.eMarks[nextLine]
if pos >= max:
# Case 1: line is not inside the blockquote, and this line is empty.
break
evaluatesTrue = state.src[pos] == ">" and not isOutdented
pos += 1
if evaluatesTrue:
# This line is inside the blockquote.
# set offset past spaces and ">"
initial = offset = state.sCount[nextLine] + 1
try:
next_char: str | None = state.src[pos]
except IndexError:
next_char = None
# skip one optional space after '>'
if next_char == " ":
# ' > test '
# ^ -- position start of line here:
pos += 1
initial += 1
offset += 1
adjustTab = False
spaceAfterMarker = True
elif next_char == "\t":
spaceAfterMarker = True
if (state.bsCount[nextLine] + offset) % 4 == 3:
# ' >\t test '
# ^ -- position start of line here (tab has width==1)
pos += 1
initial += 1
offset += 1
adjustTab = False
else:
# ' >\t test '
# ^ -- position start of line here + shift bsCount slightly
# to make extra space appear
adjustTab = True
else:
spaceAfterMarker = False
oldBMarks.append(state.bMarks[nextLine])
state.bMarks[nextLine] = pos
while pos < max:
ch = state.src[pos]
if isStrSpace(ch):
if ch == "\t":
offset += (
4
- (
offset
+ state.bsCount[nextLine]
+ (1 if adjustTab else 0)
)
% 4
)
else:
offset += 1
else:
break
pos += 1
lastLineEmpty = pos >= max
oldBSCount.append(state.bsCount[nextLine])
state.bsCount[nextLine] = (
state.sCount[nextLine] + 1 + (1 if spaceAfterMarker else 0)
)
oldSCount.append(state.sCount[nextLine])
state.sCount[nextLine] = offset - initial
oldTShift.append(state.tShift[nextLine])
state.tShift[nextLine] = pos - state.bMarks[nextLine]
nextLine += 1
continue
# Case 2: line is not inside the blockquote, and the last line was empty.
if lastLineEmpty:
break
# Case 3: another tag found.
terminate = False
for terminatorRule in terminatorRules:
if terminatorRule(state, nextLine, endLine, True):
terminate = True
break
if terminate:
# Quirk to enforce "hard termination mode" for paragraphs;
# normally if you call `tokenize(state, startLine, nextLine)`,
# paragraphs will look below nextLine for paragraph continuation,
# but if blockquote is terminated by another tag, they shouldn't
state.lineMax = nextLine
if state.blkIndent != 0:
# state.blkIndent was non-zero, we now set it to zero,
# so we need to re-calculate all offsets to appear as
# if indent wasn't changed
oldBMarks.append(state.bMarks[nextLine])
oldBSCount.append(state.bsCount[nextLine])
oldTShift.append(state.tShift[nextLine])
oldSCount.append(state.sCount[nextLine])
state.sCount[nextLine] -= state.blkIndent
break
oldBMarks.append(state.bMarks[nextLine])
oldBSCount.append(state.bsCount[nextLine])
oldTShift.append(state.tShift[nextLine])
oldSCount.append(state.sCount[nextLine])
# A negative indentation means that this is a paragraph continuation
#
state.sCount[nextLine] = -1
nextLine += 1
oldIndent = state.blkIndent
state.blkIndent = 0
token = state.push("blockquote_open", "blockquote", 1)
token.markup = ">"
token.map = lines = [startLine, 0]
state.md.block.tokenize(state, startLine, nextLine)
token = state.push("blockquote_close", "blockquote", -1)
token.markup = ">"
state.lineMax = oldLineMax
state.parentType = oldParentType
lines[1] = state.line
# Restore original tShift; this might not be necessary since the parser
# has already been here, but just to make sure we can do that.
for i, item in enumerate(oldTShift):
state.bMarks[i + startLine] = oldBMarks[i]
state.tShift[i + startLine] = item
state.sCount[i + startLine] = oldSCount[i]
state.bsCount[i + startLine] = oldBSCount[i]
state.blkIndent = oldIndent
return True

View File

@ -0,0 +1,35 @@
"""Code block (4 spaces padded)."""
import logging
from .state_block import StateBlock
LOGGER = logging.getLogger(__name__)
def code(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool:
LOGGER.debug("entering code: %s, %s, %s, %s", state, startLine, endLine, silent)
if not state.is_code_block(startLine):
return False
last = nextLine = startLine + 1
while nextLine < endLine:
if state.isEmpty(nextLine):
nextLine += 1
continue
if state.is_code_block(nextLine):
nextLine += 1
last = nextLine
continue
break
state.line = last
token = state.push("code_block", "code", 0)
token.content = state.getLines(startLine, last, 4 + state.blkIndent, False) + "\n"
token.map = [startLine, state.line]
return True

View File

@ -0,0 +1,101 @@
# fences (``` lang, ~~~ lang)
import logging
from .state_block import StateBlock
LOGGER = logging.getLogger(__name__)
def fence(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool:
LOGGER.debug("entering fence: %s, %s, %s, %s", state, startLine, endLine, silent)
haveEndMarker = False
pos = state.bMarks[startLine] + state.tShift[startLine]
maximum = state.eMarks[startLine]
if state.is_code_block(startLine):
return False
if pos + 3 > maximum:
return False
marker = state.src[pos]
if marker not in ("~", "`"):
return False
# scan marker length
mem = pos
pos = state.skipCharsStr(pos, marker)
length = pos - mem
if length < 3:
return False
markup = state.src[mem:pos]
params = state.src[pos:maximum]
if marker == "`" and marker in params:
return False
# Since start is found, we can report success here in validation mode
if silent:
return True
# search end of block
nextLine = startLine
while True:
nextLine += 1
if nextLine >= endLine:
# unclosed block should be autoclosed by end of document.
# also block seems to be autoclosed by end of parent
break
pos = mem = state.bMarks[nextLine] + state.tShift[nextLine]
maximum = state.eMarks[nextLine]
if pos < maximum and state.sCount[nextLine] < state.blkIndent:
# non-empty line with negative indent should stop the list:
# - ```
# test
break
try:
if state.src[pos] != marker:
continue
except IndexError:
break
if state.is_code_block(nextLine):
continue
pos = state.skipCharsStr(pos, marker)
# closing code fence must be at least as long as the opening one
if pos - mem < length:
continue
# make sure tail has spaces only
pos = state.skipSpaces(pos)
if pos < maximum:
continue
haveEndMarker = True
# found!
break
# If a fence has heading spaces, they should be removed from its inner block
length = state.sCount[startLine]
state.line = nextLine + (1 if haveEndMarker else 0)
token = state.push("fence", "code", 0)
token.info = params
token.content = state.getLines(startLine + 1, nextLine, length, True)
token.markup = markup
token.map = [startLine, state.line]
return True

View File

@ -0,0 +1,68 @@
""" Atex heading (#, ##, ...) """
from __future__ import annotations
import logging
from ..common.utils import isStrSpace
from .state_block import StateBlock
LOGGER = logging.getLogger(__name__)
def heading(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool:
LOGGER.debug("entering heading: %s, %s, %s, %s", state, startLine, endLine, silent)
pos = state.bMarks[startLine] + state.tShift[startLine]
maximum = state.eMarks[startLine]
if state.is_code_block(startLine):
return False
ch: str | None = state.src[pos]
if ch != "#" or pos >= maximum:
return False
# count heading level
level = 1
pos += 1
try:
ch = state.src[pos]
except IndexError:
ch = None
while ch == "#" and pos < maximum and level <= 6:
level += 1
pos += 1
try:
ch = state.src[pos]
except IndexError:
ch = None
if level > 6 or (pos < maximum and not isStrSpace(ch)):
return False
if silent:
return True
# Let's cut tails like ' ### ' from the end of string
maximum = state.skipSpacesBack(maximum, pos)
tmp = state.skipCharsStrBack(maximum, "#", pos)
if tmp > pos and isStrSpace(state.src[tmp - 1]):
maximum = tmp
state.line = startLine + 1
token = state.push("heading_open", "h" + str(level), 1)
token.markup = "########"[:level]
token.map = [startLine, state.line]
token = state.push("inline", "", 0)
token.content = state.src[pos:maximum].strip()
token.map = [startLine, state.line]
token.children = []
token = state.push("heading_close", "h" + str(level), -1)
token.markup = "########"[:level]
return True

View File

@ -0,0 +1,55 @@
"""Horizontal rule
At least 3 of these characters on a line * - _
"""
import logging
from ..common.utils import isStrSpace
from .state_block import StateBlock
LOGGER = logging.getLogger(__name__)
def hr(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool:
LOGGER.debug("entering hr: %s, %s, %s, %s", state, startLine, endLine, silent)
pos = state.bMarks[startLine] + state.tShift[startLine]
maximum = state.eMarks[startLine]
if state.is_code_block(startLine):
return False
try:
marker = state.src[pos]
except IndexError:
return False
pos += 1
# Check hr marker
if marker not in ("*", "-", "_"):
return False
# markers can be mixed with spaces, but there should be at least 3 of them
cnt = 1
while pos < maximum:
ch = state.src[pos]
pos += 1
if ch != marker and not isStrSpace(ch):
return False
if ch == marker:
cnt += 1
if cnt < 3:
return False
if silent:
return True
state.line = startLine + 1
token = state.push("hr", "hr", 0)
token.map = [startLine, state.line]
token.markup = marker * (cnt + 1)
return True

View File

@ -0,0 +1,90 @@
# HTML block
from __future__ import annotations
import logging
import re
from ..common.html_blocks import block_names
from ..common.html_re import HTML_OPEN_CLOSE_TAG_STR
from .state_block import StateBlock
LOGGER = logging.getLogger(__name__)
# An array of opening and corresponding closing sequences for html tags,
# last argument defines whether it can terminate a paragraph or not
HTML_SEQUENCES: list[tuple[re.Pattern[str], re.Pattern[str], bool]] = [
(
re.compile(r"^<(script|pre|style|textarea)(?=(\s|>|$))", re.IGNORECASE),
re.compile(r"<\/(script|pre|style|textarea)>", re.IGNORECASE),
True,
),
(re.compile(r"^<!--"), re.compile(r"-->"), True),
(re.compile(r"^<\?"), re.compile(r"\?>"), True),
(re.compile(r"^<![A-Z]"), re.compile(r">"), True),
(re.compile(r"^<!\[CDATA\["), re.compile(r"\]\]>"), True),
(
re.compile("^</?(" + "|".join(block_names) + ")(?=(\\s|/?>|$))", re.IGNORECASE),
re.compile(r"^$"),
True,
),
(re.compile(HTML_OPEN_CLOSE_TAG_STR + "\\s*$"), re.compile(r"^$"), False),
]
def html_block(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool:
LOGGER.debug(
"entering html_block: %s, %s, %s, %s", state, startLine, endLine, silent
)
pos = state.bMarks[startLine] + state.tShift[startLine]
maximum = state.eMarks[startLine]
if state.is_code_block(startLine):
return False
if not state.md.options.get("html", None):
return False
if state.src[pos] != "<":
return False
lineText = state.src[pos:maximum]
html_seq = None
for HTML_SEQUENCE in HTML_SEQUENCES:
if HTML_SEQUENCE[0].search(lineText):
html_seq = HTML_SEQUENCE
break
if not html_seq:
return False
if silent:
# true if this sequence can be a terminator, false otherwise
return html_seq[2]
nextLine = startLine + 1
# If we are here - we detected HTML block.
# Let's roll down till block end.
if not html_seq[1].search(lineText):
while nextLine < endLine:
if state.sCount[nextLine] < state.blkIndent:
break
pos = state.bMarks[nextLine] + state.tShift[nextLine]
maximum = state.eMarks[nextLine]
lineText = state.src[pos:maximum]
if html_seq[1].search(lineText):
if len(lineText) != 0:
nextLine += 1
break
nextLine += 1
state.line = nextLine
token = state.push("html_block", "", 0)
token.map = [startLine, nextLine]
token.content = state.getLines(startLine, nextLine, state.blkIndent, True)
return True

View File

@ -0,0 +1,86 @@
# lheading (---, ==)
import logging
from .state_block import StateBlock
LOGGER = logging.getLogger(__name__)
def lheading(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool:
LOGGER.debug("entering lheading: %s, %s, %s, %s", state, startLine, endLine, silent)
level = None
nextLine = startLine + 1
ruler = state.md.block.ruler
terminatorRules = ruler.getRules("paragraph")
if state.is_code_block(startLine):
return False
oldParentType = state.parentType
state.parentType = "paragraph" # use paragraph to match terminatorRules
# jump line-by-line until empty one or EOF
while nextLine < endLine and not state.isEmpty(nextLine):
# this would be a code block normally, but after paragraph
# it's considered a lazy continuation regardless of what's there
if state.sCount[nextLine] - state.blkIndent > 3:
nextLine += 1
continue
# Check for underline in setext header
if state.sCount[nextLine] >= state.blkIndent:
pos = state.bMarks[nextLine] + state.tShift[nextLine]
maximum = state.eMarks[nextLine]
if pos < maximum:
marker = state.src[pos]
if marker in ("-", "="):
pos = state.skipCharsStr(pos, marker)
pos = state.skipSpaces(pos)
# /* = */
if pos >= maximum:
level = 1 if marker == "=" else 2
break
# quirk for blockquotes, this line should already be checked by that rule
if state.sCount[nextLine] < 0:
nextLine += 1
continue
# Some tags can terminate paragraph without empty line.
terminate = False
for terminatorRule in terminatorRules:
if terminatorRule(state, nextLine, endLine, True):
terminate = True
break
if terminate:
break
nextLine += 1
if not level:
# Didn't find valid underline
return False
content = state.getLines(startLine, nextLine, state.blkIndent, False).strip()
state.line = nextLine + 1
token = state.push("heading_open", "h" + str(level), 1)
token.markup = marker
token.map = [startLine, state.line]
token = state.push("inline", "", 0)
token.content = content
token.map = [startLine, state.line - 1]
token.children = []
token = state.push("heading_close", "h" + str(level), -1)
token.markup = marker
state.parentType = oldParentType
return True

View File

@ -0,0 +1,345 @@
# Lists
import logging
from ..common.utils import isStrSpace
from .state_block import StateBlock
LOGGER = logging.getLogger(__name__)
# Search `[-+*][\n ]`, returns next pos after marker on success
# or -1 on fail.
def skipBulletListMarker(state: StateBlock, startLine: int) -> int:
pos = state.bMarks[startLine] + state.tShift[startLine]
maximum = state.eMarks[startLine]
try:
marker = state.src[pos]
except IndexError:
return -1
pos += 1
if marker not in ("*", "-", "+"):
return -1
if pos < maximum:
ch = state.src[pos]
if not isStrSpace(ch):
# " -test " - is not a list item
return -1
return pos
# Search `\d+[.)][\n ]`, returns next pos after marker on success
# or -1 on fail.
def skipOrderedListMarker(state: StateBlock, startLine: int) -> int:
start = state.bMarks[startLine] + state.tShift[startLine]
pos = start
maximum = state.eMarks[startLine]
# List marker should have at least 2 chars (digit + dot)
if pos + 1 >= maximum:
return -1
ch = state.src[pos]
pos += 1
ch_ord = ord(ch)
# /* 0 */ /* 9 */
if ch_ord < 0x30 or ch_ord > 0x39:
return -1
while True:
# EOL -> fail
if pos >= maximum:
return -1
ch = state.src[pos]
pos += 1
# /* 0 */ /* 9 */
ch_ord = ord(ch)
if ch_ord >= 0x30 and ch_ord <= 0x39:
# List marker should have no more than 9 digits
# (prevents integer overflow in browsers)
if pos - start >= 10:
return -1
continue
# found valid marker
if ch in (")", "."):
break
return -1
if pos < maximum:
ch = state.src[pos]
if not isStrSpace(ch):
# " 1.test " - is not a list item
return -1
return pos
def markTightParagraphs(state: StateBlock, idx: int) -> None:
level = state.level + 2
i = idx + 2
length = len(state.tokens) - 2
while i < length:
if state.tokens[i].level == level and state.tokens[i].type == "paragraph_open":
state.tokens[i + 2].hidden = True
state.tokens[i].hidden = True
i += 2
i += 1
def list_block(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool:
LOGGER.debug("entering list: %s, %s, %s, %s", state, startLine, endLine, silent)
isTerminatingParagraph = False
tight = True
if state.is_code_block(startLine):
return False
# Special case:
# - item 1
# - item 2
# - item 3
# - item 4
# - this one is a paragraph continuation
if (
state.listIndent >= 0
and state.sCount[startLine] - state.listIndent >= 4
and state.sCount[startLine] < state.blkIndent
):
return False
# limit conditions when list can interrupt
# a paragraph (validation mode only)
# Next list item should still terminate previous list item
#
# This code can fail if plugins use blkIndent as well as lists,
# but I hope the spec gets fixed long before that happens.
#
if (
silent
and state.parentType == "paragraph"
and state.sCount[startLine] >= state.blkIndent
):
isTerminatingParagraph = True
# Detect list type and position after marker
posAfterMarker = skipOrderedListMarker(state, startLine)
if posAfterMarker >= 0:
isOrdered = True
start = state.bMarks[startLine] + state.tShift[startLine]
markerValue = int(state.src[start : posAfterMarker - 1])
# If we're starting a new ordered list right after
# a paragraph, it should start with 1.
if isTerminatingParagraph and markerValue != 1:
return False
else:
posAfterMarker = skipBulletListMarker(state, startLine)
if posAfterMarker >= 0:
isOrdered = False
else:
return False
# If we're starting a new unordered list right after
# a paragraph, first line should not be empty.
if (
isTerminatingParagraph
and state.skipSpaces(posAfterMarker) >= state.eMarks[startLine]
):
return False
# We should terminate list on style change. Remember first one to compare.
markerChar = state.src[posAfterMarker - 1]
# For validation mode we can terminate immediately
if silent:
return True
# Start list
listTokIdx = len(state.tokens)
if isOrdered:
token = state.push("ordered_list_open", "ol", 1)
if markerValue != 1:
token.attrs = {"start": markerValue}
else:
token = state.push("bullet_list_open", "ul", 1)
token.map = listLines = [startLine, 0]
token.markup = markerChar
#
# Iterate list items
#
nextLine = startLine
prevEmptyEnd = False
terminatorRules = state.md.block.ruler.getRules("list")
oldParentType = state.parentType
state.parentType = "list"
while nextLine < endLine:
pos = posAfterMarker
maximum = state.eMarks[nextLine]
initial = offset = (
state.sCount[nextLine]
+ posAfterMarker
- (state.bMarks[startLine] + state.tShift[startLine])
)
while pos < maximum:
ch = state.src[pos]
if ch == "\t":
offset += 4 - (offset + state.bsCount[nextLine]) % 4
elif ch == " ":
offset += 1
else:
break
pos += 1
contentStart = pos
# trimming space in "- \n 3" case, indent is 1 here
indentAfterMarker = 1 if contentStart >= maximum else offset - initial
# If we have more than 4 spaces, the indent is 1
# (the rest is just indented code block)
if indentAfterMarker > 4:
indentAfterMarker = 1
# " - test"
# ^^^^^ - calculating total length of this thing
indent = initial + indentAfterMarker
# Run subparser & write tokens
token = state.push("list_item_open", "li", 1)
token.markup = markerChar
token.map = itemLines = [startLine, 0]
if isOrdered:
token.info = state.src[start : posAfterMarker - 1]
# change current state, then restore it after parser subcall
oldTight = state.tight
oldTShift = state.tShift[startLine]
oldSCount = state.sCount[startLine]
# - example list
# ^ listIndent position will be here
# ^ blkIndent position will be here
#
oldListIndent = state.listIndent
state.listIndent = state.blkIndent
state.blkIndent = indent
state.tight = True
state.tShift[startLine] = contentStart - state.bMarks[startLine]
state.sCount[startLine] = offset
if contentStart >= maximum and state.isEmpty(startLine + 1):
# workaround for this case
# (list item is empty, list terminates before "foo"):
# ~~~~~~~~
# -
#
# foo
# ~~~~~~~~
state.line = min(state.line + 2, endLine)
else:
# NOTE in list.js this was:
# state.md.block.tokenize(state, startLine, endLine, True)
# but tokeniz does not take the final parameter
state.md.block.tokenize(state, startLine, endLine)
# If any of list item is tight, mark list as tight
if (not state.tight) or prevEmptyEnd:
tight = False
# Item become loose if finish with empty line,
# but we should filter last element, because it means list finish
prevEmptyEnd = (state.line - startLine) > 1 and state.isEmpty(state.line - 1)
state.blkIndent = state.listIndent
state.listIndent = oldListIndent
state.tShift[startLine] = oldTShift
state.sCount[startLine] = oldSCount
state.tight = oldTight
token = state.push("list_item_close", "li", -1)
token.markup = markerChar
nextLine = startLine = state.line
itemLines[1] = nextLine
if nextLine >= endLine:
break
contentStart = state.bMarks[startLine]
#
# Try to check if list is terminated or continued.
#
if state.sCount[nextLine] < state.blkIndent:
break
if state.is_code_block(startLine):
break
# fail if terminating block found
terminate = False
for terminatorRule in terminatorRules:
if terminatorRule(state, nextLine, endLine, True):
terminate = True
break
if terminate:
break
# fail if list has another type
if isOrdered:
posAfterMarker = skipOrderedListMarker(state, nextLine)
if posAfterMarker < 0:
break
start = state.bMarks[nextLine] + state.tShift[nextLine]
else:
posAfterMarker = skipBulletListMarker(state, nextLine)
if posAfterMarker < 0:
break
if markerChar != state.src[posAfterMarker - 1]:
break
# Finalize list
if isOrdered:
token = state.push("ordered_list_close", "ol", -1)
else:
token = state.push("bullet_list_close", "ul", -1)
token.markup = markerChar
listLines[1] = nextLine
state.line = nextLine
state.parentType = oldParentType
# mark paragraphs tight if needed
if tight:
markTightParagraphs(state, listTokIdx)
return True

View File

@ -0,0 +1,65 @@
"""Paragraph."""
import logging
from .state_block import StateBlock
LOGGER = logging.getLogger(__name__)
def paragraph(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool:
LOGGER.debug(
"entering paragraph: %s, %s, %s, %s", state, startLine, endLine, silent
)
nextLine = startLine + 1
ruler = state.md.block.ruler
terminatorRules = ruler.getRules("paragraph")
endLine = state.lineMax
oldParentType = state.parentType
state.parentType = "paragraph"
# jump line-by-line until empty one or EOF
while nextLine < endLine:
if state.isEmpty(nextLine):
break
# this would be a code block normally, but after paragraph
# it's considered a lazy continuation regardless of what's there
if state.sCount[nextLine] - state.blkIndent > 3:
nextLine += 1
continue
# quirk for blockquotes, this line should already be checked by that rule
if state.sCount[nextLine] < 0:
nextLine += 1
continue
# Some tags can terminate paragraph without empty line.
terminate = False
for terminatorRule in terminatorRules:
if terminatorRule(state, nextLine, endLine, True):
terminate = True
break
if terminate:
break
nextLine += 1
content = state.getLines(startLine, nextLine, state.blkIndent, False).strip()
state.line = nextLine
token = state.push("paragraph_open", "p", 1)
token.map = [startLine, state.line]
token = state.push("inline", "", 0)
token.content = content
token.map = [startLine, state.line]
token.children = []
token = state.push("paragraph_close", "p", -1)
state.parentType = oldParentType
return True

View File

@ -0,0 +1,215 @@
import logging
from ..common.utils import charCodeAt, isSpace, normalizeReference
from .state_block import StateBlock
LOGGER = logging.getLogger(__name__)
def reference(state: StateBlock, startLine: int, _endLine: int, silent: bool) -> bool:
LOGGER.debug(
"entering reference: %s, %s, %s, %s", state, startLine, _endLine, silent
)
lines = 0
pos = state.bMarks[startLine] + state.tShift[startLine]
maximum = state.eMarks[startLine]
nextLine = startLine + 1
if state.is_code_block(startLine):
return False
if state.src[pos] != "[":
return False
# Simple check to quickly interrupt scan on [link](url) at the start of line.
# Can be useful on practice: https:#github.com/markdown-it/markdown-it/issues/54
while pos < maximum:
# /* ] */ /* \ */ /* : */
if state.src[pos] == "]" and state.src[pos - 1] != "\\":
if pos + 1 == maximum:
return False
if state.src[pos + 1] != ":":
return False
break
pos += 1
endLine = state.lineMax
# jump line-by-line until empty one or EOF
terminatorRules = state.md.block.ruler.getRules("reference")
oldParentType = state.parentType
state.parentType = "reference"
while nextLine < endLine and not state.isEmpty(nextLine):
# this would be a code block normally, but after paragraph
# it's considered a lazy continuation regardless of what's there
if state.sCount[nextLine] - state.blkIndent > 3:
nextLine += 1
continue
# quirk for blockquotes, this line should already be checked by that rule
if state.sCount[nextLine] < 0:
nextLine += 1
continue
# Some tags can terminate paragraph without empty line.
terminate = False
for terminatorRule in terminatorRules:
if terminatorRule(state, nextLine, endLine, True):
terminate = True
break
if terminate:
break
nextLine += 1
string = state.getLines(startLine, nextLine, state.blkIndent, False).strip()
maximum = len(string)
labelEnd = None
pos = 1
while pos < maximum:
ch = charCodeAt(string, pos)
if ch == 0x5B: # /* [ */
return False
elif ch == 0x5D: # /* ] */
labelEnd = pos
break
elif ch == 0x0A: # /* \n */
lines += 1
elif ch == 0x5C: # /* \ */
pos += 1
if pos < maximum and charCodeAt(string, pos) == 0x0A:
lines += 1
pos += 1
if (
labelEnd is None or labelEnd < 0 or charCodeAt(string, labelEnd + 1) != 0x3A
): # /* : */
return False
# [label]: destination 'title'
# ^^^ skip optional whitespace here
pos = labelEnd + 2
while pos < maximum:
ch = charCodeAt(string, pos)
if ch == 0x0A:
lines += 1
elif isSpace(ch):
pass
else:
break
pos += 1
# [label]: destination 'title'
# ^^^^^^^^^^^ parse this
res = state.md.helpers.parseLinkDestination(string, pos, maximum)
if not res.ok:
return False
href = state.md.normalizeLink(res.str)
if not state.md.validateLink(href):
return False
pos = res.pos
lines += res.lines
# save cursor state, we could require to rollback later
destEndPos = pos
destEndLineNo = lines
# [label]: destination 'title'
# ^^^ skipping those spaces
start = pos
while pos < maximum:
ch = charCodeAt(string, pos)
if ch == 0x0A:
lines += 1
elif isSpace(ch):
pass
else:
break
pos += 1
# [label]: destination 'title'
# ^^^^^^^ parse this
res = state.md.helpers.parseLinkTitle(string, pos, maximum)
if pos < maximum and start != pos and res.ok:
title = res.str
pos = res.pos
lines += res.lines
else:
title = ""
pos = destEndPos
lines = destEndLineNo
# skip trailing spaces until the rest of the line
while pos < maximum:
ch = charCodeAt(string, pos)
if not isSpace(ch):
break
pos += 1
if pos < maximum and charCodeAt(string, pos) != 0x0A and title:
# garbage at the end of the line after title,
# but it could still be a valid reference if we roll back
title = ""
pos = destEndPos
lines = destEndLineNo
while pos < maximum:
ch = charCodeAt(string, pos)
if not isSpace(ch):
break
pos += 1
if pos < maximum and charCodeAt(string, pos) != 0x0A:
# garbage at the end of the line
return False
label = normalizeReference(string[1:labelEnd])
if not label:
# CommonMark 0.20 disallows empty labels
return False
# Reference can not terminate anything. This check is for safety only.
if silent:
return True
if "references" not in state.env:
state.env["references"] = {}
state.line = startLine + lines + 1
# note, this is not part of markdown-it JS, but is useful for renderers
if state.md.options.get("inline_definitions", False):
token = state.push("definition", "", 0)
token.meta = {
"id": label,
"title": title,
"url": href,
"label": string[1:labelEnd],
}
token.map = [startLine, state.line]
if label not in state.env["references"]:
state.env["references"][label] = {
"title": title,
"href": href,
"map": [startLine, state.line],
}
else:
state.env.setdefault("duplicate_refs", []).append(
{
"title": title,
"href": href,
"label": label,
"map": [startLine, state.line],
}
)
state.parentType = oldParentType
return True

View File

@ -0,0 +1,261 @@
from __future__ import annotations
from typing import TYPE_CHECKING, Literal
from ..common.utils import isStrSpace
from ..ruler import StateBase
from ..token import Token
from ..utils import EnvType
if TYPE_CHECKING:
from markdown_it.main import MarkdownIt
class StateBlock(StateBase):
def __init__(
self, src: str, md: MarkdownIt, env: EnvType, tokens: list[Token]
) -> None:
self.src = src
# link to parser instance
self.md = md
self.env = env
#
# Internal state variables
#
self.tokens = tokens
self.bMarks: list[int] = [] # line begin offsets for fast jumps
self.eMarks: list[int] = [] # line end offsets for fast jumps
# offsets of the first non-space characters (tabs not expanded)
self.tShift: list[int] = []
self.sCount: list[int] = [] # indents for each line (tabs expanded)
# An amount of virtual spaces (tabs expanded) between beginning
# of each line (bMarks) and real beginning of that line.
#
# It exists only as a hack because blockquotes override bMarks
# losing information in the process.
#
# It's used only when expanding tabs, you can think about it as
# an initial tab length, e.g. bsCount=21 applied to string `\t123`
# means first tab should be expanded to 4-21%4 === 3 spaces.
#
self.bsCount: list[int] = []
# block parser variables
self.blkIndent = 0 # required block content indent (for example, if we are
# inside a list, it would be positioned after list marker)
self.line = 0 # line index in src
self.lineMax = 0 # lines count
self.tight = False # loose/tight mode for lists
self.ddIndent = -1 # indent of the current dd block (-1 if there isn't any)
self.listIndent = -1 # indent of the current list block (-1 if there isn't any)
# can be 'blockquote', 'list', 'root', 'paragraph' or 'reference'
# used in lists to determine if they interrupt a paragraph
self.parentType = "root"
self.level = 0
# renderer
self.result = ""
# Create caches
# Generate markers.
indent_found = False
start = pos = indent = offset = 0
length = len(self.src)
for pos, character in enumerate(self.src):
if not indent_found:
if isStrSpace(character):
indent += 1
if character == "\t":
offset += 4 - offset % 4
else:
offset += 1
continue
else:
indent_found = True
if character == "\n" or pos == length - 1:
if character != "\n":
pos += 1
self.bMarks.append(start)
self.eMarks.append(pos)
self.tShift.append(indent)
self.sCount.append(offset)
self.bsCount.append(0)
indent_found = False
indent = 0
offset = 0
start = pos + 1
# Push fake entry to simplify cache bounds checks
self.bMarks.append(length)
self.eMarks.append(length)
self.tShift.append(0)
self.sCount.append(0)
self.bsCount.append(0)
self.lineMax = len(self.bMarks) - 1 # don't count last fake line
# pre-check if code blocks are enabled, to speed up is_code_block method
self._code_enabled = "code" in self.md["block"].ruler.get_active_rules()
def __repr__(self) -> str:
return (
f"{self.__class__.__name__}"
f"(line={self.line},level={self.level},tokens={len(self.tokens)})"
)
def push(self, ttype: str, tag: str, nesting: Literal[-1, 0, 1]) -> Token:
"""Push new token to "stream"."""
token = Token(ttype, tag, nesting)
token.block = True
if nesting < 0:
self.level -= 1 # closing tag
token.level = self.level
if nesting > 0:
self.level += 1 # opening tag
self.tokens.append(token)
return token
def isEmpty(self, line: int) -> bool:
"""."""
return (self.bMarks[line] + self.tShift[line]) >= self.eMarks[line]
def skipEmptyLines(self, from_pos: int) -> int:
"""."""
while from_pos < self.lineMax:
try:
if (self.bMarks[from_pos] + self.tShift[from_pos]) < self.eMarks[
from_pos
]:
break
except IndexError:
pass
from_pos += 1
return from_pos
def skipSpaces(self, pos: int) -> int:
"""Skip spaces from given position."""
while True:
try:
current = self.src[pos]
except IndexError:
break
if not isStrSpace(current):
break
pos += 1
return pos
def skipSpacesBack(self, pos: int, minimum: int) -> int:
"""Skip spaces from given position in reverse."""
if pos <= minimum:
return pos
while pos > minimum:
pos -= 1
if not isStrSpace(self.src[pos]):
return pos + 1
return pos
def skipChars(self, pos: int, code: int) -> int:
"""Skip character code from given position."""
while True:
try:
current = self.srcCharCode[pos]
except IndexError:
break
if current != code:
break
pos += 1
return pos
def skipCharsStr(self, pos: int, ch: str) -> int:
"""Skip character string from given position."""
while True:
try:
current = self.src[pos]
except IndexError:
break
if current != ch:
break
pos += 1
return pos
def skipCharsBack(self, pos: int, code: int, minimum: int) -> int:
"""Skip character code reverse from given position - 1."""
if pos <= minimum:
return pos
while pos > minimum:
pos -= 1
if code != self.srcCharCode[pos]:
return pos + 1
return pos
def skipCharsStrBack(self, pos: int, ch: str, minimum: int) -> int:
"""Skip character string reverse from given position - 1."""
if pos <= minimum:
return pos
while pos > minimum:
pos -= 1
if ch != self.src[pos]:
return pos + 1
return pos
def getLines(self, begin: int, end: int, indent: int, keepLastLF: bool) -> str:
"""Cut lines range from source."""
line = begin
if begin >= end:
return ""
queue = [""] * (end - begin)
i = 1
while line < end:
lineIndent = 0
lineStart = first = self.bMarks[line]
last = (
self.eMarks[line] + 1
if line + 1 < end or keepLastLF
else self.eMarks[line]
)
while (first < last) and (lineIndent < indent):
ch = self.src[first]
if isStrSpace(ch):
if ch == "\t":
lineIndent += 4 - (lineIndent + self.bsCount[line]) % 4
else:
lineIndent += 1
elif first - lineStart < self.tShift[line]:
lineIndent += 1
else:
break
first += 1
if lineIndent > indent:
# partially expanding tabs in code blocks, e.g '\t\tfoobar'
# with indent=2 becomes ' \tfoobar'
queue[i - 1] = (" " * (lineIndent - indent)) + self.src[first:last]
else:
queue[i - 1] = self.src[first:last]
line += 1
i += 1
return "".join(queue)
def is_code_block(self, line: int) -> bool:
"""Check if line is a code block,
i.e. the code block rule is enabled and text is indented by more than 3 spaces.
"""
return self._code_enabled and (self.sCount[line] - self.blkIndent) >= 4

View File

@ -0,0 +1,236 @@
# GFM table, https://github.github.com/gfm/#tables-extension-
from __future__ import annotations
import re
from ..common.utils import charStrAt, isStrSpace
from .state_block import StateBlock
headerLineRe = re.compile(r"^:?-+:?$")
enclosingPipesRe = re.compile(r"^\||\|$")
def getLine(state: StateBlock, line: int) -> str:
pos = state.bMarks[line] + state.tShift[line]
maximum = state.eMarks[line]
# return state.src.substr(pos, max - pos)
return state.src[pos:maximum]
def escapedSplit(string: str) -> list[str]:
result: list[str] = []
pos = 0
max = len(string)
isEscaped = False
lastPos = 0
current = ""
ch = charStrAt(string, pos)
while pos < max:
if ch == "|":
if not isEscaped:
# pipe separating cells, '|'
result.append(current + string[lastPos:pos])
current = ""
lastPos = pos + 1
else:
# escaped pipe, '\|'
current += string[lastPos : pos - 1]
lastPos = pos
isEscaped = ch == "\\"
pos += 1
ch = charStrAt(string, pos)
result.append(current + string[lastPos:])
return result
def table(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool:
tbodyLines = None
# should have at least two lines
if startLine + 2 > endLine:
return False
nextLine = startLine + 1
if state.sCount[nextLine] < state.blkIndent:
return False
if state.is_code_block(nextLine):
return False
# first character of the second line should be '|', '-', ':',
# and no other characters are allowed but spaces;
# basically, this is the equivalent of /^[-:|][-:|\s]*$/ regexp
pos = state.bMarks[nextLine] + state.tShift[nextLine]
if pos >= state.eMarks[nextLine]:
return False
first_ch = state.src[pos]
pos += 1
if first_ch not in ("|", "-", ":"):
return False
if pos >= state.eMarks[nextLine]:
return False
second_ch = state.src[pos]
pos += 1
if second_ch not in ("|", "-", ":") and not isStrSpace(second_ch):
return False
# if first character is '-', then second character must not be a space
# (due to parsing ambiguity with list)
if first_ch == "-" and isStrSpace(second_ch):
return False
while pos < state.eMarks[nextLine]:
ch = state.src[pos]
if ch not in ("|", "-", ":") and not isStrSpace(ch):
return False
pos += 1
lineText = getLine(state, startLine + 1)
columns = lineText.split("|")
aligns = []
for i in range(len(columns)):
t = columns[i].strip()
if not t:
# allow empty columns before and after table, but not in between columns;
# e.g. allow ` |---| `, disallow ` ---||--- `
if i == 0 or i == len(columns) - 1:
continue
else:
return False
if not headerLineRe.search(t):
return False
if charStrAt(t, len(t) - 1) == ":":
aligns.append("center" if charStrAt(t, 0) == ":" else "right")
elif charStrAt(t, 0) == ":":
aligns.append("left")
else:
aligns.append("")
lineText = getLine(state, startLine).strip()
if "|" not in lineText:
return False
if state.is_code_block(startLine):
return False
columns = escapedSplit(lineText)
if columns and columns[0] == "":
columns.pop(0)
if columns and columns[-1] == "":
columns.pop()
# header row will define an amount of columns in the entire table,
# and align row should be exactly the same (the rest of the rows can differ)
columnCount = len(columns)
if columnCount == 0 or columnCount != len(aligns):
return False
if silent:
return True
oldParentType = state.parentType
state.parentType = "table"
# use 'blockquote' lists for termination because it's
# the most similar to tables
terminatorRules = state.md.block.ruler.getRules("blockquote")
token = state.push("table_open", "table", 1)
token.map = tableLines = [startLine, 0]
token = state.push("thead_open", "thead", 1)
token.map = [startLine, startLine + 1]
token = state.push("tr_open", "tr", 1)
token.map = [startLine, startLine + 1]
for i in range(len(columns)):
token = state.push("th_open", "th", 1)
if aligns[i]:
token.attrs = {"style": "text-align:" + aligns[i]}
token = state.push("inline", "", 0)
# note in markdown-it this map was removed in v12.0.0 however, we keep it,
# since it is helpful to propagate to children tokens
token.map = [startLine, startLine + 1]
token.content = columns[i].strip()
token.children = []
token = state.push("th_close", "th", -1)
token = state.push("tr_close", "tr", -1)
token = state.push("thead_close", "thead", -1)
nextLine = startLine + 2
while nextLine < endLine:
if state.sCount[nextLine] < state.blkIndent:
break
terminate = False
for i in range(len(terminatorRules)):
if terminatorRules[i](state, nextLine, endLine, True):
terminate = True
break
if terminate:
break
lineText = getLine(state, nextLine).strip()
if not lineText:
break
if state.is_code_block(nextLine):
break
columns = escapedSplit(lineText)
if columns and columns[0] == "":
columns.pop(0)
if columns and columns[-1] == "":
columns.pop()
if nextLine == startLine + 2:
token = state.push("tbody_open", "tbody", 1)
token.map = tbodyLines = [startLine + 2, 0]
token = state.push("tr_open", "tr", 1)
token.map = [nextLine, nextLine + 1]
for i in range(columnCount):
token = state.push("td_open", "td", 1)
if aligns[i]:
token.attrs = {"style": "text-align:" + aligns[i]}
token = state.push("inline", "", 0)
# note in markdown-it this map was removed in v12.0.0 however, we keep it,
# since it is helpful to propagate to children tokens
token.map = [nextLine, nextLine + 1]
try:
token.content = columns[i].strip() if columns[i] else ""
except IndexError:
token.content = ""
token.children = []
token = state.push("td_close", "td", -1)
token = state.push("tr_close", "tr", -1)
nextLine += 1
if tbodyLines:
token = state.push("tbody_close", "tbody", -1)
tbodyLines[1] = nextLine
token = state.push("table_close", "table", -1)
tableLines[1] = nextLine
state.parentType = oldParentType
state.line = nextLine
return True