#cython: language_level=3 from __future__ import print_function from typing import Optional from cpython.mem cimport PyMem_Malloc, PyMem_Free from cpython cimport PyObject_GetBuffer, PyBuffer_Release, PyBUF_SIMPLE, \ Py_buffer, PyBytes_AsString from .python cimport PyMemoryView_Check, PyMemoryView_GET_BUFFER from .errors import (HttpParserError, HttpParserCallbackError, HttpParserInvalidStatusError, HttpParserInvalidMethodError, HttpParserInvalidURLError, HttpParserUpgrade) cimport cython from . cimport cparser __all__ = ('HttpRequestParser', 'HttpResponseParser') @cython.internal cdef class HttpParser: cdef: cparser.llhttp_t* _cparser cparser.llhttp_settings_t* _csettings bytes _current_header_name bytes _current_header_value _proto_on_url, _proto_on_status, _proto_on_body, \ _proto_on_header, _proto_on_headers_complete, \ _proto_on_message_complete, _proto_on_chunk_header, \ _proto_on_chunk_complete, _proto_on_message_begin object _last_error Py_buffer py_buf def __cinit__(self): self._cparser = \ PyMem_Malloc(sizeof(cparser.llhttp_t)) if self._cparser is NULL: raise MemoryError() self._csettings = \ PyMem_Malloc(sizeof(cparser.llhttp_settings_t)) if self._csettings is NULL: raise MemoryError() def __dealloc__(self): PyMem_Free(self._cparser) PyMem_Free(self._csettings) cdef _init(self, protocol, cparser.llhttp_type_t mode): cparser.llhttp_settings_init(self._csettings) cparser.llhttp_init(self._cparser, mode, self._csettings) self._cparser.data = self self._current_header_name = None self._current_header_value = None self._proto_on_header = getattr(protocol, 'on_header', None) if self._proto_on_header is not None: self._csettings.on_header_field = cb_on_header_field self._csettings.on_header_value = cb_on_header_value self._proto_on_headers_complete = getattr( protocol, 'on_headers_complete', None) self._csettings.on_headers_complete = cb_on_headers_complete self._proto_on_body = getattr(protocol, 'on_body', None) if self._proto_on_body is not None: self._csettings.on_body = cb_on_body self._proto_on_message_begin = getattr( protocol, 'on_message_begin', None) if self._proto_on_message_begin is not None: self._csettings.on_message_begin = cb_on_message_begin self._proto_on_message_complete = getattr( protocol, 'on_message_complete', None) if self._proto_on_message_complete is not None: self._csettings.on_message_complete = cb_on_message_complete self._proto_on_chunk_header = getattr( protocol, 'on_chunk_header', None) self._csettings.on_chunk_header = cb_on_chunk_header self._proto_on_chunk_complete = getattr( protocol, 'on_chunk_complete', None) self._csettings.on_chunk_complete = cb_on_chunk_complete self._last_error = None cdef _maybe_call_on_header(self): if self._current_header_value is not None: current_header_name = self._current_header_name current_header_value = self._current_header_value self._current_header_name = self._current_header_value = None if self._proto_on_header is not None: self._proto_on_header(current_header_name, current_header_value) cdef _on_header_field(self, bytes field): self._maybe_call_on_header() if self._current_header_name is None: self._current_header_name = field else: self._current_header_name += field cdef _on_header_value(self, bytes val): if self._current_header_value is None: self._current_header_value = val else: # This is unlikely, as mostly HTTP headers are one-line self._current_header_value += val cdef _on_headers_complete(self): self._maybe_call_on_header() if self._proto_on_headers_complete is not None: self._proto_on_headers_complete() cdef _on_chunk_header(self): if (self._current_header_value is not None or self._current_header_name is not None): raise HttpParserError('invalid headers state') if self._proto_on_chunk_header is not None: self._proto_on_chunk_header() cdef _on_chunk_complete(self): self._maybe_call_on_header() if self._proto_on_chunk_complete is not None: self._proto_on_chunk_complete() ### Public API ### def set_dangerous_leniencies( self, lenient_headers: Optional[bool] = None, lenient_chunked_length: Optional[bool] = None, lenient_keep_alive: Optional[bool] = None, lenient_transfer_encoding: Optional[bool] = None, lenient_version: Optional[bool] = None, lenient_data_after_close: Optional[bool] = None, lenient_optional_lf_after_cr: Optional[bool] = None, lenient_optional_cr_before_lf: Optional[bool] = None, lenient_optional_crlf_after_chunk: Optional[bool] = None, lenient_spaces_after_chunk_size: Optional[bool] = None, ): cdef cparser.llhttp_t* parser = self._cparser if lenient_headers is not None: cparser.llhttp_set_lenient_headers( parser, lenient_headers) if lenient_chunked_length is not None: cparser.llhttp_set_lenient_chunked_length( parser, lenient_chunked_length) if lenient_keep_alive is not None: cparser.llhttp_set_lenient_keep_alive( parser, lenient_keep_alive) if lenient_transfer_encoding is not None: cparser.llhttp_set_lenient_transfer_encoding( parser, lenient_transfer_encoding) if lenient_version is not None: cparser.llhttp_set_lenient_version( parser, lenient_version) if lenient_data_after_close is not None: cparser.llhttp_set_lenient_data_after_close( parser, lenient_data_after_close) if lenient_optional_lf_after_cr is not None: cparser.llhttp_set_lenient_optional_lf_after_cr( parser, lenient_optional_lf_after_cr) if lenient_optional_cr_before_lf is not None: cparser.llhttp_set_lenient_optional_cr_before_lf( parser, lenient_optional_cr_before_lf) if lenient_optional_crlf_after_chunk is not None: cparser.llhttp_set_lenient_optional_crlf_after_chunk( parser, lenient_optional_crlf_after_chunk) if lenient_spaces_after_chunk_size is not None: cparser.llhttp_set_lenient_spaces_after_chunk_size( parser, lenient_spaces_after_chunk_size) def get_http_version(self): cdef cparser.llhttp_t* parser = self._cparser return '{}.{}'.format(parser.http_major, parser.http_minor) def should_keep_alive(self): return bool(cparser.llhttp_should_keep_alive(self._cparser)) def should_upgrade(self): cdef cparser.llhttp_t* parser = self._cparser return bool(parser.upgrade) def feed_data(self, data): cdef: size_t data_len cparser.llhttp_errno_t err Py_buffer *buf bint owning_buf = False const char* err_pos if PyMemoryView_Check(data): buf = PyMemoryView_GET_BUFFER(data) data_len = buf.len err = cparser.llhttp_execute( self._cparser, buf.buf, data_len) else: buf = &self.py_buf PyObject_GetBuffer(data, buf, PyBUF_SIMPLE) owning_buf = True data_len = buf.len err = cparser.llhttp_execute( self._cparser, buf.buf, data_len) try: if self._cparser.upgrade == 1 and err == cparser.HPE_PAUSED_UPGRADE: err_pos = cparser.llhttp_get_error_pos(self._cparser) # Immediately free the parser from "error" state, simulating # http-parser behavior here because 1) we never had the API to # allow users manually "resume after upgrade", and 2) the use # case for resuming parsing is very rare. cparser.llhttp_resume_after_upgrade(self._cparser) # The err_pos here is specific for the input buf. So if we ever # switch to the llhttp behavior (re-raise HttpParserUpgrade for # successive calls to feed_data() until resume_after_upgrade is # called), we have to store the result and keep our own state. raise HttpParserUpgrade(err_pos - buf.buf) finally: if owning_buf: PyBuffer_Release(buf) if err != cparser.HPE_OK: ex = parser_error_from_errno( self._cparser, self._cparser.error) if isinstance(ex, HttpParserCallbackError): if self._last_error is not None: ex.__context__ = self._last_error self._last_error = None raise ex cdef class HttpRequestParser(HttpParser): def __init__(self, protocol): self._init(protocol, cparser.HTTP_REQUEST) self._proto_on_url = getattr(protocol, 'on_url', None) if self._proto_on_url is not None: self._csettings.on_url = cb_on_url def get_method(self): cdef cparser.llhttp_t* parser = self._cparser return cparser.llhttp_method_name( parser.method) cdef class HttpResponseParser(HttpParser): def __init__(self, protocol): self._init(protocol, cparser.HTTP_RESPONSE) self._proto_on_status = getattr(protocol, 'on_status', None) if self._proto_on_status is not None: self._csettings.on_status = cb_on_status def get_status_code(self): cdef cparser.llhttp_t* parser = self._cparser return parser.status_code cdef int cb_on_message_begin(cparser.llhttp_t* parser) except -1: cdef HttpParser pyparser = parser.data try: pyparser._proto_on_message_begin() except BaseException as ex: pyparser._last_error = ex return -1 else: return 0 cdef int cb_on_url(cparser.llhttp_t* parser, const char *at, size_t length) except -1: cdef HttpParser pyparser = parser.data try: pyparser._proto_on_url(at[:length]) except BaseException as ex: cparser.llhttp_set_error_reason(parser, "`on_url` callback error") pyparser._last_error = ex return cparser.HPE_USER else: return 0 cdef int cb_on_status(cparser.llhttp_t* parser, const char *at, size_t length) except -1: cdef HttpParser pyparser = parser.data try: pyparser._proto_on_status(at[:length]) except BaseException as ex: cparser.llhttp_set_error_reason(parser, "`on_status` callback error") pyparser._last_error = ex return cparser.HPE_USER else: return 0 cdef int cb_on_header_field(cparser.llhttp_t* parser, const char *at, size_t length) except -1: cdef HttpParser pyparser = parser.data try: pyparser._on_header_field(at[:length]) except BaseException as ex: cparser.llhttp_set_error_reason(parser, "`on_header_field` callback error") pyparser._last_error = ex return cparser.HPE_USER else: return 0 cdef int cb_on_header_value(cparser.llhttp_t* parser, const char *at, size_t length) except -1: cdef HttpParser pyparser = parser.data try: pyparser._on_header_value(at[:length]) except BaseException as ex: cparser.llhttp_set_error_reason(parser, "`on_header_value` callback error") pyparser._last_error = ex return cparser.HPE_USER else: return 0 cdef int cb_on_headers_complete(cparser.llhttp_t* parser) except -1: cdef HttpParser pyparser = parser.data try: pyparser._on_headers_complete() except BaseException as ex: pyparser._last_error = ex return -1 else: if pyparser._cparser.upgrade: return 1 else: return 0 cdef int cb_on_body(cparser.llhttp_t* parser, const char *at, size_t length) except -1: cdef HttpParser pyparser = parser.data try: pyparser._proto_on_body(at[:length]) except BaseException as ex: cparser.llhttp_set_error_reason(parser, "`on_body` callback error") pyparser._last_error = ex return cparser.HPE_USER else: return 0 cdef int cb_on_message_complete(cparser.llhttp_t* parser) except -1: cdef HttpParser pyparser = parser.data try: pyparser._proto_on_message_complete() except BaseException as ex: pyparser._last_error = ex return -1 else: return 0 cdef int cb_on_chunk_header(cparser.llhttp_t* parser) except -1: cdef HttpParser pyparser = parser.data try: pyparser._on_chunk_header() except BaseException as ex: pyparser._last_error = ex return -1 else: return 0 cdef int cb_on_chunk_complete(cparser.llhttp_t* parser) except -1: cdef HttpParser pyparser = parser.data try: pyparser._on_chunk_complete() except BaseException as ex: pyparser._last_error = ex return -1 else: return 0 cdef parser_error_from_errno(cparser.llhttp_t* parser, cparser.llhttp_errno_t errno): cdef bytes reason = cparser.llhttp_get_error_reason(parser) if errno in (cparser.HPE_CB_MESSAGE_BEGIN, cparser.HPE_CB_HEADERS_COMPLETE, cparser.HPE_CB_MESSAGE_COMPLETE, cparser.HPE_CB_CHUNK_HEADER, cparser.HPE_CB_CHUNK_COMPLETE, cparser.HPE_USER): cls = HttpParserCallbackError elif errno == cparser.HPE_INVALID_STATUS: cls = HttpParserInvalidStatusError elif errno == cparser.HPE_INVALID_METHOD: cls = HttpParserInvalidMethodError elif errno == cparser.HPE_INVALID_URL: cls = HttpParserInvalidURLError else: cls = HttpParserError return cls(reason.decode('latin-1'))