second commit

This commit is contained in:
2024-12-27 22:31:23 +09:00
parent 2353324570
commit 10a0f110ca
8819 changed files with 1307198 additions and 28 deletions

View File

@ -0,0 +1,31 @@
__all__ = (
"StateInline",
"text",
"fragments_join",
"link_pairs",
"linkify",
"escape",
"newline",
"backtick",
"emphasis",
"image",
"link",
"autolink",
"entity",
"html_inline",
"strikethrough",
)
from . import emphasis, strikethrough
from .autolink import autolink
from .backticks import backtick
from .balance_pairs import link_pairs
from .entity import entity
from .escape import escape
from .fragments_join import fragments_join
from .html_inline import html_inline
from .image import image
from .link import link
from .linkify import linkify
from .newline import newline
from .state_inline import StateInline
from .text import text

View File

@ -0,0 +1,77 @@
# Process autolinks '<protocol:...>'
import re
from .state_inline import StateInline
EMAIL_RE = re.compile(
r"^([a-zA-Z0-9.!#$%&\'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)$" # noqa: E501
)
AUTOLINK_RE = re.compile(r"^([a-zA-Z][a-zA-Z0-9+.\-]{1,31}):([^<>\x00-\x20]*)$")
def autolink(state: StateInline, silent: bool) -> bool:
pos = state.pos
if state.src[pos] != "<":
return False
start = state.pos
maximum = state.posMax
while True:
pos += 1
if pos >= maximum:
return False
ch = state.src[pos]
if ch == "<":
return False
if ch == ">":
break
url = state.src[start + 1 : pos]
if AUTOLINK_RE.search(url) is not None:
fullUrl = state.md.normalizeLink(url)
if not state.md.validateLink(fullUrl):
return False
if not silent:
token = state.push("link_open", "a", 1)
token.attrs = {"href": fullUrl}
token.markup = "autolink"
token.info = "auto"
token = state.push("text", "", 0)
token.content = state.md.normalizeLinkText(url)
token = state.push("link_close", "a", -1)
token.markup = "autolink"
token.info = "auto"
state.pos += len(url) + 2
return True
if EMAIL_RE.search(url) is not None:
fullUrl = state.md.normalizeLink("mailto:" + url)
if not state.md.validateLink(fullUrl):
return False
if not silent:
token = state.push("link_open", "a", 1)
token.attrs = {"href": fullUrl}
token.markup = "autolink"
token.info = "auto"
token = state.push("text", "", 0)
token.content = state.md.normalizeLinkText(url)
token = state.push("link_close", "a", -1)
token.markup = "autolink"
token.info = "auto"
state.pos += len(url) + 2
return True
return False

View File

@ -0,0 +1,72 @@
# Parse backticks
import re
from .state_inline import StateInline
regex = re.compile("^ (.+) $")
def backtick(state: StateInline, silent: bool) -> bool:
pos = state.pos
if state.src[pos] != "`":
return False
start = pos
pos += 1
maximum = state.posMax
# scan marker length
while pos < maximum and (state.src[pos] == "`"):
pos += 1
marker = state.src[start:pos]
openerLength = len(marker)
if state.backticksScanned and state.backticks.get(openerLength, 0) <= start:
if not silent:
state.pending += marker
state.pos += openerLength
return True
matchStart = matchEnd = pos
# Nothing found in the cache, scan until the end of the line (or until marker is found)
while True:
try:
matchStart = state.src.index("`", matchEnd)
except ValueError:
break
matchEnd = matchStart + 1
# scan marker length
while matchEnd < maximum and (state.src[matchEnd] == "`"):
matchEnd += 1
closerLength = matchEnd - matchStart
if closerLength == openerLength:
# Found matching closer length.
if not silent:
token = state.push("code_inline", "code", 0)
token.markup = marker
token.content = state.src[pos:matchStart].replace("\n", " ")
if (
token.content.startswith(" ")
and token.content.endswith(" ")
and len(token.content.strip()) > 0
):
token.content = token.content[1:-1]
state.pos = matchEnd
return True
# Some different length found, put it in cache as upper limit of where closer can be found
state.backticks[closerLength] = matchStart
# Scanned through the end, didn't find anything
state.backticksScanned = True
if not silent:
state.pending += marker
state.pos += openerLength
return True

View File

@ -0,0 +1,137 @@
"""Balance paired characters (*, _, etc) in inline tokens."""
from __future__ import annotations
from .state_inline import Delimiter, StateInline
def processDelimiters(state: StateInline, delimiters: list[Delimiter]) -> None:
"""For each opening emphasis-like marker find a matching closing one."""
if not delimiters:
return
openersBottom = {}
maximum = len(delimiters)
# headerIdx is the first delimiter of the current (where closer is) delimiter run
headerIdx = 0
lastTokenIdx = -2 # needs any value lower than -1
jumps: list[int] = []
closerIdx = 0
while closerIdx < maximum:
closer = delimiters[closerIdx]
jumps.append(0)
# markers belong to same delimiter run if:
# - they have adjacent tokens
# - AND markers are the same
#
if (
delimiters[headerIdx].marker != closer.marker
or lastTokenIdx != closer.token - 1
):
headerIdx = closerIdx
lastTokenIdx = closer.token
# Length is only used for emphasis-specific "rule of 3",
# if it's not defined (in strikethrough or 3rd party plugins),
# we can default it to 0 to disable those checks.
#
closer.length = closer.length or 0
if not closer.close:
closerIdx += 1
continue
# Previously calculated lower bounds (previous fails)
# for each marker, each delimiter length modulo 3,
# and for whether this closer can be an opener;
# https://github.com/commonmark/cmark/commit/34250e12ccebdc6372b8b49c44fab57c72443460
if closer.marker not in openersBottom:
openersBottom[closer.marker] = [-1, -1, -1, -1, -1, -1]
minOpenerIdx = openersBottom[closer.marker][
(3 if closer.open else 0) + (closer.length % 3)
]
openerIdx = headerIdx - jumps[headerIdx] - 1
newMinOpenerIdx = openerIdx
while openerIdx > minOpenerIdx:
opener = delimiters[openerIdx]
if opener.marker != closer.marker:
openerIdx -= jumps[openerIdx] + 1
continue
if opener.open and opener.end < 0:
isOddMatch = False
# from spec:
#
# If one of the delimiters can both open and close emphasis, then the
# sum of the lengths of the delimiter runs containing the opening and
# closing delimiters must not be a multiple of 3 unless both lengths
# are multiples of 3.
#
if (
(opener.close or closer.open)
and ((opener.length + closer.length) % 3 == 0)
and (opener.length % 3 != 0 or closer.length % 3 != 0)
):
isOddMatch = True
if not isOddMatch:
# If previous delimiter cannot be an opener, we can safely skip
# the entire sequence in future checks. This is required to make
# sure algorithm has linear complexity (see *_*_*_*_*_... case).
#
if openerIdx > 0 and not delimiters[openerIdx - 1].open:
lastJump = jumps[openerIdx - 1] + 1
else:
lastJump = 0
jumps[closerIdx] = closerIdx - openerIdx + lastJump
jumps[openerIdx] = lastJump
closer.open = False
opener.end = closerIdx
opener.close = False
newMinOpenerIdx = -1
# treat next token as start of run,
# it optimizes skips in **<...>**a**<...>** pathological case
lastTokenIdx = -2
break
openerIdx -= jumps[openerIdx] + 1
if newMinOpenerIdx != -1:
# If match for this delimiter run failed, we want to set lower bound for
# future lookups. This is required to make sure algorithm has linear
# complexity.
#
# See details here:
# https:#github.com/commonmark/cmark/issues/178#issuecomment-270417442
#
openersBottom[closer.marker][
(3 if closer.open else 0) + ((closer.length or 0) % 3)
] = newMinOpenerIdx
closerIdx += 1
def link_pairs(state: StateInline) -> None:
tokens_meta = state.tokens_meta
maximum = len(state.tokens_meta)
processDelimiters(state, state.delimiters)
curr = 0
while curr < maximum:
curr_meta = tokens_meta[curr]
if curr_meta and "delimiters" in curr_meta:
processDelimiters(state, curr_meta["delimiters"])
curr += 1

View File

@ -0,0 +1,102 @@
# Process *this* and _that_
#
from __future__ import annotations
from .state_inline import Delimiter, StateInline
def tokenize(state: StateInline, silent: bool) -> bool:
"""Insert each marker as a separate text token, and add it to delimiter list"""
start = state.pos
marker = state.src[start]
if silent:
return False
if marker not in ("_", "*"):
return False
scanned = state.scanDelims(state.pos, marker == "*")
for _ in range(scanned.length):
token = state.push("text", "", 0)
token.content = marker
state.delimiters.append(
Delimiter(
marker=ord(marker),
length=scanned.length,
token=len(state.tokens) - 1,
end=-1,
open=scanned.can_open,
close=scanned.can_close,
)
)
state.pos += scanned.length
return True
def _postProcess(state: StateInline, delimiters: list[Delimiter]) -> None:
i = len(delimiters) - 1
while i >= 0:
startDelim = delimiters[i]
# /* _ */ /* * */
if startDelim.marker != 0x5F and startDelim.marker != 0x2A:
i -= 1
continue
# Process only opening markers
if startDelim.end == -1:
i -= 1
continue
endDelim = delimiters[startDelim.end]
# If the previous delimiter has the same marker and is adjacent to this one,
# merge those into one strong delimiter.
#
# `<em><em>whatever</em></em>` -> `<strong>whatever</strong>`
#
isStrong = (
i > 0
and delimiters[i - 1].end == startDelim.end + 1
# check that first two markers match and adjacent
and delimiters[i - 1].marker == startDelim.marker
and delimiters[i - 1].token == startDelim.token - 1
# check that last two markers are adjacent (we can safely assume they match)
and delimiters[startDelim.end + 1].token == endDelim.token + 1
)
ch = chr(startDelim.marker)
token = state.tokens[startDelim.token]
token.type = "strong_open" if isStrong else "em_open"
token.tag = "strong" if isStrong else "em"
token.nesting = 1
token.markup = ch + ch if isStrong else ch
token.content = ""
token = state.tokens[endDelim.token]
token.type = "strong_close" if isStrong else "em_close"
token.tag = "strong" if isStrong else "em"
token.nesting = -1
token.markup = ch + ch if isStrong else ch
token.content = ""
if isStrong:
state.tokens[delimiters[i - 1].token].content = ""
state.tokens[delimiters[startDelim.end + 1].token].content = ""
i -= 1
i -= 1
def postProcess(state: StateInline) -> None:
"""Walk through delimiter list and replace text tokens with tags."""
_postProcess(state, state.delimiters)
for token in state.tokens_meta:
if token and "delimiters" in token:
_postProcess(state, token["delimiters"])

View File

@ -0,0 +1,53 @@
# Process html entity - &#123;, &#xAF;, &quot;, ...
import re
from ..common.entities import entities
from ..common.utils import fromCodePoint, isValidEntityCode
from .state_inline import StateInline
DIGITAL_RE = re.compile(r"^&#((?:x[a-f0-9]{1,6}|[0-9]{1,7}));", re.IGNORECASE)
NAMED_RE = re.compile(r"^&([a-z][a-z0-9]{1,31});", re.IGNORECASE)
def entity(state: StateInline, silent: bool) -> bool:
pos = state.pos
maximum = state.posMax
if state.src[pos] != "&":
return False
if pos + 1 >= maximum:
return False
if state.src[pos + 1] == "#":
if match := DIGITAL_RE.search(state.src[pos:]):
if not silent:
match1 = match.group(1)
code = (
int(match1[1:], 16) if match1[0].lower() == "x" else int(match1, 10)
)
token = state.push("text_special", "", 0)
token.content = (
fromCodePoint(code)
if isValidEntityCode(code)
else fromCodePoint(0xFFFD)
)
token.markup = match.group(0)
token.info = "entity"
state.pos += len(match.group(0))
return True
else:
if (match := NAMED_RE.search(state.src[pos:])) and match.group(1) in entities:
if not silent:
token = state.push("text_special", "", 0)
token.content = entities[match.group(1)]
token.markup = match.group(0)
token.info = "entity"
state.pos += len(match.group(0))
return True
return False

View File

@ -0,0 +1,92 @@
"""
Process escaped chars and hardbreaks
"""
from ..common.utils import isStrSpace
from .state_inline import StateInline
def escape(state: StateInline, silent: bool) -> bool:
"""Process escaped chars and hardbreaks."""
pos = state.pos
maximum = state.posMax
if state.src[pos] != "\\":
return False
pos += 1
# '\' at the end of the inline block
if pos >= maximum:
return False
ch1 = state.src[pos]
ch1_ord = ord(ch1)
if ch1 == "\n":
if not silent:
state.push("hardbreak", "br", 0)
pos += 1
# skip leading whitespaces from next line
while pos < maximum:
ch = state.src[pos]
if not isStrSpace(ch):
break
pos += 1
state.pos = pos
return True
escapedStr = state.src[pos]
if ch1_ord >= 0xD800 and ch1_ord <= 0xDBFF and pos + 1 < maximum:
ch2 = state.src[pos + 1]
ch2_ord = ord(ch2)
if ch2_ord >= 0xDC00 and ch2_ord <= 0xDFFF:
escapedStr += ch2
pos += 1
origStr = "\\" + escapedStr
if not silent:
token = state.push("text_special", "", 0)
token.content = escapedStr if ch1 in _ESCAPED else origStr
token.markup = origStr
token.info = "escape"
state.pos = pos + 1
return True
_ESCAPED = {
"!",
'"',
"#",
"$",
"%",
"&",
"'",
"(",
")",
"*",
"+",
",",
"-",
".",
"/",
":",
";",
"<",
"=",
">",
"?",
"@",
"[",
"\\",
"]",
"^",
"_",
"`",
"{",
"|",
"}",
"~",
}

View File

@ -0,0 +1,43 @@
from .state_inline import StateInline
def fragments_join(state: StateInline) -> None:
"""
Clean up tokens after emphasis and strikethrough postprocessing:
merge adjacent text nodes into one and re-calculate all token levels
This is necessary because initially emphasis delimiter markers (``*, _, ~``)
are treated as their own separate text tokens. Then emphasis rule either
leaves them as text (needed to merge with adjacent text) or turns them
into opening/closing tags (which messes up levels inside).
"""
level = 0
maximum = len(state.tokens)
curr = last = 0
while curr < maximum:
# re-calculate levels after emphasis/strikethrough turns some text nodes
# into opening/closing tags
if state.tokens[curr].nesting < 0:
level -= 1 # closing tag
state.tokens[curr].level = level
if state.tokens[curr].nesting > 0:
level += 1 # opening tag
if (
state.tokens[curr].type == "text"
and curr + 1 < maximum
and state.tokens[curr + 1].type == "text"
):
# collapse two adjacent text nodes
state.tokens[curr + 1].content = (
state.tokens[curr].content + state.tokens[curr + 1].content
)
else:
if curr != last:
state.tokens[last] = state.tokens[curr]
last += 1
curr += 1
if curr != last:
del state.tokens[last:]

View File

@ -0,0 +1,43 @@
# Process html tags
from ..common.html_re import HTML_TAG_RE
from ..common.utils import isLinkClose, isLinkOpen
from .state_inline import StateInline
def isLetter(ch: int) -> bool:
lc = ch | 0x20 # to lower case
# /* a */ and /* z */
return (lc >= 0x61) and (lc <= 0x7A)
def html_inline(state: StateInline, silent: bool) -> bool:
pos = state.pos
if not state.md.options.get("html", None):
return False
# Check start
maximum = state.posMax
if state.src[pos] != "<" or pos + 2 >= maximum:
return False
# Quick fail on second char
ch = state.src[pos + 1]
if ch not in ("!", "?", "/") and not isLetter(ord(ch)): # /* / */
return False
match = HTML_TAG_RE.search(state.src[pos:])
if not match:
return False
if not silent:
token = state.push("html_inline", "", 0)
token.content = state.src[pos : pos + len(match.group(0))]
if isLinkOpen(token.content):
state.linkLevel += 1
if isLinkClose(token.content):
state.linkLevel -= 1
state.pos += len(match.group(0))
return True

View File

@ -0,0 +1,148 @@
# Process ![image](<src> "title")
from __future__ import annotations
from ..common.utils import isStrSpace, normalizeReference
from ..token import Token
from .state_inline import StateInline
def image(state: StateInline, silent: bool) -> bool:
label = None
href = ""
oldPos = state.pos
max = state.posMax
if state.src[state.pos] != "!":
return False
if state.pos + 1 < state.posMax and state.src[state.pos + 1] != "[":
return False
labelStart = state.pos + 2
labelEnd = state.md.helpers.parseLinkLabel(state, state.pos + 1, False)
# parser failed to find ']', so it's not a valid link
if labelEnd < 0:
return False
pos = labelEnd + 1
if pos < max and state.src[pos] == "(":
#
# Inline link
#
# [link]( <href> "title" )
# ^^ skipping these spaces
pos += 1
while pos < max:
ch = state.src[pos]
if not isStrSpace(ch) and ch != "\n":
break
pos += 1
if pos >= max:
return False
# [link]( <href> "title" )
# ^^^^^^ parsing link destination
start = pos
res = state.md.helpers.parseLinkDestination(state.src, pos, state.posMax)
if res.ok:
href = state.md.normalizeLink(res.str)
if state.md.validateLink(href):
pos = res.pos
else:
href = ""
# [link]( <href> "title" )
# ^^ skipping these spaces
start = pos
while pos < max:
ch = state.src[pos]
if not isStrSpace(ch) and ch != "\n":
break
pos += 1
# [link]( <href> "title" )
# ^^^^^^^ parsing link title
res = state.md.helpers.parseLinkTitle(state.src, pos, state.posMax)
if pos < max and start != pos and res.ok:
title = res.str
pos = res.pos
# [link]( <href> "title" )
# ^^ skipping these spaces
while pos < max:
ch = state.src[pos]
if not isStrSpace(ch) and ch != "\n":
break
pos += 1
else:
title = ""
if pos >= max or state.src[pos] != ")":
state.pos = oldPos
return False
pos += 1
else:
#
# Link reference
#
if "references" not in state.env:
return False
# /* [ */
if pos < max and state.src[pos] == "[":
start = pos + 1
pos = state.md.helpers.parseLinkLabel(state, pos)
if pos >= 0:
label = state.src[start:pos]
pos += 1
else:
pos = labelEnd + 1
else:
pos = labelEnd + 1
# covers label == '' and label == undefined
# (collapsed reference link and shortcut reference link respectively)
if not label:
label = state.src[labelStart:labelEnd]
label = normalizeReference(label)
ref = state.env["references"].get(label, None)
if not ref:
state.pos = oldPos
return False
href = ref["href"]
title = ref["title"]
#
# We found the end of the link, and know for a fact it's a valid link
# so all that's left to do is to call tokenizer.
#
if not silent:
content = state.src[labelStart:labelEnd]
tokens: list[Token] = []
state.md.inline.parse(content, state.md, state.env, tokens)
token = state.push("image", "img", 0)
token.attrs = {"src": href, "alt": ""}
token.children = tokens or None
token.content = content
if title:
token.attrSet("title", title)
# note, this is not part of markdown-it JS, but is useful for renderers
if label and state.md.options.get("store_labels", False):
token.meta["label"] = label
state.pos = pos
state.posMax = max
return True

View File

@ -0,0 +1,151 @@
# Process [link](<to> "stuff")
from ..common.utils import isStrSpace, normalizeReference
from .state_inline import StateInline
def link(state: StateInline, silent: bool) -> bool:
href = ""
title = ""
label = None
oldPos = state.pos
maximum = state.posMax
start = state.pos
parseReference = True
if state.src[state.pos] != "[":
return False
labelStart = state.pos + 1
labelEnd = state.md.helpers.parseLinkLabel(state, state.pos, True)
# parser failed to find ']', so it's not a valid link
if labelEnd < 0:
return False
pos = labelEnd + 1
if pos < maximum and state.src[pos] == "(":
#
# Inline link
#
# might have found a valid shortcut link, disable reference parsing
parseReference = False
# [link]( <href> "title" )
# ^^ skipping these spaces
pos += 1
while pos < maximum:
ch = state.src[pos]
if not isStrSpace(ch) and ch != "\n":
break
pos += 1
if pos >= maximum:
return False
# [link]( <href> "title" )
# ^^^^^^ parsing link destination
start = pos
res = state.md.helpers.parseLinkDestination(state.src, pos, state.posMax)
if res.ok:
href = state.md.normalizeLink(res.str)
if state.md.validateLink(href):
pos = res.pos
else:
href = ""
# [link]( <href> "title" )
# ^^ skipping these spaces
start = pos
while pos < maximum:
ch = state.src[pos]
if not isStrSpace(ch) and ch != "\n":
break
pos += 1
# [link]( <href> "title" )
# ^^^^^^^ parsing link title
res = state.md.helpers.parseLinkTitle(state.src, pos, state.posMax)
if pos < maximum and start != pos and res.ok:
title = res.str
pos = res.pos
# [link]( <href> "title" )
# ^^ skipping these spaces
while pos < maximum:
ch = state.src[pos]
if not isStrSpace(ch) and ch != "\n":
break
pos += 1
if pos >= maximum or state.src[pos] != ")":
# parsing a valid shortcut link failed, fallback to reference
parseReference = True
pos += 1
if parseReference:
#
# Link reference
#
if "references" not in state.env:
return False
if pos < maximum and state.src[pos] == "[":
start = pos + 1
pos = state.md.helpers.parseLinkLabel(state, pos)
if pos >= 0:
label = state.src[start:pos]
pos += 1
else:
pos = labelEnd + 1
else:
pos = labelEnd + 1
# covers label == '' and label == undefined
# (collapsed reference link and shortcut reference link respectively)
if not label:
label = state.src[labelStart:labelEnd]
label = normalizeReference(label)
ref = (
state.env["references"][label] if label in state.env["references"] else None
)
if not ref:
state.pos = oldPos
return False
href = ref["href"]
title = ref["title"]
#
# We found the end of the link, and know for a fact it's a valid link
# so all that's left to do is to call tokenizer.
#
if not silent:
state.pos = labelStart
state.posMax = labelEnd
token = state.push("link_open", "a", 1)
token.attrs = {"href": href}
if title:
token.attrSet("title", title)
# note, this is not part of markdown-it JS, but is useful for renderers
if label and state.md.options.get("store_labels", False):
token.meta["label"] = label
state.linkLevel += 1
state.md.inline.tokenize(state)
state.linkLevel -= 1
token = state.push("link_close", "a", -1)
state.pos = pos
state.posMax = maximum
return True

View File

@ -0,0 +1,61 @@
"""Process links like https://example.org/"""
import re
from .state_inline import StateInline
# RFC3986: scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
SCHEME_RE = re.compile(r"(?:^|[^a-z0-9.+-])([a-z][a-z0-9.+-]*)$", re.IGNORECASE)
def linkify(state: StateInline, silent: bool) -> bool:
"""Rule for identifying plain-text links."""
if not state.md.options.linkify:
return False
if state.linkLevel > 0:
return False
if not state.md.linkify:
raise ModuleNotFoundError("Linkify enabled but not installed.")
pos = state.pos
maximum = state.posMax
if (
(pos + 3) > maximum
or state.src[pos] != ":"
or state.src[pos + 1] != "/"
or state.src[pos + 2] != "/"
):
return False
if not (match := SCHEME_RE.match(state.pending)):
return False
proto = match.group(1)
if not (link := state.md.linkify.match_at_start(state.src[pos - len(proto) :])):
return False
url: str = link.url
# disallow '*' at the end of the link (conflicts with emphasis)
url = url.rstrip("*")
full_url = state.md.normalizeLink(url)
if not state.md.validateLink(full_url):
return False
if not silent:
state.pending = state.pending[: -len(proto)]
token = state.push("link_open", "a", 1)
token.attrs = {"href": full_url}
token.markup = "linkify"
token.info = "auto"
token = state.push("text", "", 0)
token.content = state.md.normalizeLinkText(url)
token = state.push("link_close", "a", -1)
token.markup = "linkify"
token.info = "auto"
state.pos += len(url) - len(proto)
return True

View File

@ -0,0 +1,43 @@
"""Proceess '\n'."""
from ..common.utils import charStrAt, isStrSpace
from .state_inline import StateInline
def newline(state: StateInline, silent: bool) -> bool:
pos = state.pos
if state.src[pos] != "\n":
return False
pmax = len(state.pending) - 1
maximum = state.posMax
# ' \n' -> hardbreak
# Lookup in pending chars is bad practice! Don't copy to other rules!
# Pending string is stored in concat mode, indexed lookups will cause
# conversion to flat mode.
if not silent:
if pmax >= 0 and charStrAt(state.pending, pmax) == " ":
if pmax >= 1 and charStrAt(state.pending, pmax - 1) == " ":
# Find whitespaces tail of pending chars.
ws = pmax - 1
while ws >= 1 and charStrAt(state.pending, ws - 1) == " ":
ws -= 1
state.pending = state.pending[:ws]
state.push("hardbreak", "br", 0)
else:
state.pending = state.pending[:-1]
state.push("softbreak", "br", 0)
else:
state.push("softbreak", "br", 0)
pos += 1
# skip heading spaces for next line
while pos < maximum and isStrSpace(state.src[pos]):
pos += 1
state.pos = pos
return True

View File

@ -0,0 +1,166 @@
from __future__ import annotations
from collections import namedtuple
from dataclasses import dataclass
from typing import TYPE_CHECKING, Any, Literal
from .._compat import DATACLASS_KWARGS
from ..common.utils import isMdAsciiPunct, isPunctChar, isWhiteSpace
from ..ruler import StateBase
from ..token import Token
from ..utils import EnvType
if TYPE_CHECKING:
from markdown_it import MarkdownIt
@dataclass(**DATACLASS_KWARGS)
class Delimiter:
# Char code of the starting marker (number).
marker: int
# Total length of these series of delimiters.
length: int
# A position of the token this delimiter corresponds to.
token: int
# If this delimiter is matched as a valid opener, `end` will be
# equal to its position, otherwise it's `-1`.
end: int
# Boolean flags that determine if this delimiter could open or close
# an emphasis.
open: bool
close: bool
level: bool | None = None
Scanned = namedtuple("Scanned", ["can_open", "can_close", "length"])
class StateInline(StateBase):
def __init__(
self, src: str, md: MarkdownIt, env: EnvType, outTokens: list[Token]
) -> None:
self.src = src
self.env = env
self.md = md
self.tokens = outTokens
self.tokens_meta: list[dict[str, Any] | None] = [None] * len(outTokens)
self.pos = 0
self.posMax = len(self.src)
self.level = 0
self.pending = ""
self.pendingLevel = 0
# Stores { start: end } pairs. Useful for backtrack
# optimization of pairs parse (emphasis, strikes).
self.cache: dict[int, int] = {}
# List of emphasis-like delimiters for current tag
self.delimiters: list[Delimiter] = []
# Stack of delimiter lists for upper level tags
self._prev_delimiters: list[list[Delimiter]] = []
# backticklength => last seen position
self.backticks: dict[int, int] = {}
self.backticksScanned = False
# Counter used to disable inline linkify-it execution
# inside <a> and markdown links
self.linkLevel = 0
def __repr__(self) -> str:
return (
f"{self.__class__.__name__}"
f"(pos=[{self.pos} of {self.posMax}], token={len(self.tokens)})"
)
def pushPending(self) -> Token:
token = Token("text", "", 0)
token.content = self.pending
token.level = self.pendingLevel
self.tokens.append(token)
self.pending = ""
return token
def push(self, ttype: str, tag: str, nesting: Literal[-1, 0, 1]) -> Token:
"""Push new token to "stream".
If pending text exists - flush it as text token
"""
if self.pending:
self.pushPending()
token = Token(ttype, tag, nesting)
token_meta = None
if nesting < 0:
# closing tag
self.level -= 1
self.delimiters = self._prev_delimiters.pop()
token.level = self.level
if nesting > 0:
# opening tag
self.level += 1
self._prev_delimiters.append(self.delimiters)
self.delimiters = []
token_meta = {"delimiters": self.delimiters}
self.pendingLevel = self.level
self.tokens.append(token)
self.tokens_meta.append(token_meta)
return token
def scanDelims(self, start: int, canSplitWord: bool) -> Scanned:
"""
Scan a sequence of emphasis-like markers, and determine whether
it can start an emphasis sequence or end an emphasis sequence.
- start - position to scan from (it should point at a valid marker);
- canSplitWord - determine if these markers can be found inside a word
"""
pos = start
maximum = self.posMax
marker = self.src[start]
# treat beginning of the line as a whitespace
lastChar = self.src[start - 1] if start > 0 else " "
while pos < maximum and self.src[pos] == marker:
pos += 1
count = pos - start
# treat end of the line as a whitespace
nextChar = self.src[pos] if pos < maximum else " "
isLastPunctChar = isMdAsciiPunct(ord(lastChar)) or isPunctChar(lastChar)
isNextPunctChar = isMdAsciiPunct(ord(nextChar)) or isPunctChar(nextChar)
isLastWhiteSpace = isWhiteSpace(ord(lastChar))
isNextWhiteSpace = isWhiteSpace(ord(nextChar))
left_flanking = not (
isNextWhiteSpace
or (isNextPunctChar and not (isLastWhiteSpace or isLastPunctChar))
)
right_flanking = not (
isLastWhiteSpace
or (isLastPunctChar and not (isNextWhiteSpace or isNextPunctChar))
)
if not canSplitWord:
can_open = left_flanking and ((not right_flanking) or isLastPunctChar)
can_close = right_flanking and ((not left_flanking) or isNextPunctChar)
else:
can_open = left_flanking
can_close = right_flanking
return Scanned(can_open, can_close, count)

View File

@ -0,0 +1,127 @@
# ~~strike through~~
from __future__ import annotations
from .state_inline import Delimiter, StateInline
def tokenize(state: StateInline, silent: bool) -> bool:
"""Insert each marker as a separate text token, and add it to delimiter list"""
start = state.pos
ch = state.src[start]
if silent:
return False
if ch != "~":
return False
scanned = state.scanDelims(state.pos, True)
length = scanned.length
if length < 2:
return False
if length % 2:
token = state.push("text", "", 0)
token.content = ch
length -= 1
i = 0
while i < length:
token = state.push("text", "", 0)
token.content = ch + ch
state.delimiters.append(
Delimiter(
marker=ord(ch),
length=0, # disable "rule of 3" length checks meant for emphasis
token=len(state.tokens) - 1,
end=-1,
open=scanned.can_open,
close=scanned.can_close,
)
)
i += 2
state.pos += scanned.length
return True
def _postProcess(state: StateInline, delimiters: list[Delimiter]) -> None:
loneMarkers = []
maximum = len(delimiters)
i = 0
while i < maximum:
startDelim = delimiters[i]
if startDelim.marker != 0x7E: # /* ~ */
i += 1
continue
if startDelim.end == -1:
i += 1
continue
endDelim = delimiters[startDelim.end]
token = state.tokens[startDelim.token]
token.type = "s_open"
token.tag = "s"
token.nesting = 1
token.markup = "~~"
token.content = ""
token = state.tokens[endDelim.token]
token.type = "s_close"
token.tag = "s"
token.nesting = -1
token.markup = "~~"
token.content = ""
if (
state.tokens[endDelim.token - 1].type == "text"
and state.tokens[endDelim.token - 1].content == "~"
):
loneMarkers.append(endDelim.token - 1)
i += 1
# If a marker sequence has an odd number of characters, it's split
# like this: `~~~~~` -> `~` + `~~` + `~~`, leaving one marker at the
# start of the sequence.
#
# So, we have to move all those markers after subsequent s_close tags.
#
while loneMarkers:
i = loneMarkers.pop()
j = i + 1
while (j < len(state.tokens)) and (state.tokens[j].type == "s_close"):
j += 1
j -= 1
if i != j:
token = state.tokens[j]
state.tokens[j] = state.tokens[i]
state.tokens[i] = token
def postProcess(state: StateInline) -> None:
"""Walk through delimiter list and replace text tokens with tags."""
tokens_meta = state.tokens_meta
maximum = len(state.tokens_meta)
_postProcess(state, state.delimiters)
curr = 0
while curr < maximum:
try:
curr_meta = tokens_meta[curr]
except IndexError:
pass
else:
if curr_meta and "delimiters" in curr_meta:
_postProcess(state, curr_meta["delimiters"])
curr += 1

View File

@ -0,0 +1,53 @@
# Skip text characters for text token, place those to pending buffer
# and increment current pos
from .state_inline import StateInline
# Rule to skip pure text
# '{}$%@~+=:' reserved for extensions
# !!!! Don't confuse with "Markdown ASCII Punctuation" chars
# http://spec.commonmark.org/0.15/#ascii-punctuation-character
_TerminatorChars = {
"\n",
"!",
"#",
"$",
"%",
"&",
"*",
"+",
"-",
":",
"<",
"=",
">",
"@",
"[",
"\\",
"]",
"^",
"_",
"`",
"{",
"}",
"~",
}
def text(state: StateInline, silent: bool) -> bool:
pos = state.pos
posMax = state.posMax
while (pos < posMax) and state.src[pos] not in _TerminatorChars:
pos += 1
if pos == state.pos:
return False
if not silent:
state.pending += state.src[state.pos : pos]
state.pos = pos
return True