week06

2024-12-09 18:22:38 +09:00
parent ab0cbebefc
commit c4c4547706
959 changed files with 174888 additions and 6 deletions
--- a/env/lib/python3.12/site-packages/pyshark/tshark/init.py
+++ b/env/lib/python3.12/site-packages/pyshark/tshark/init.py
--- a/env/lib/python3.12/site-packages/pyshark/tshark/pycache/init.cpython-312.pyc
+++ b/env/lib/python3.12/site-packages/pyshark/tshark/pycache/init.cpython-312.pyc
--- a/env/lib/python3.12/site-packages/pyshark/tshark/pycache/tshark.cpython-312.pyc
+++ b/env/lib/python3.12/site-packages/pyshark/tshark/pycache/tshark.cpython-312.pyc
--- a/env/lib/python3.12/site-packages/pyshark/tshark/output_parser/init.py
+++ b/env/lib/python3.12/site-packages/pyshark/tshark/output_parser/init.py
--- a/env/lib/python3.12/site-packages/pyshark/tshark/output_parser/pycache/init.cpython-312.pyc
+++ b/env/lib/python3.12/site-packages/pyshark/tshark/output_parser/pycache/init.cpython-312.pyc
--- a/env/lib/python3.12/site-packages/pyshark/tshark/output_parser/pycache/base_parser.cpython-312.pyc
+++ b/env/lib/python3.12/site-packages/pyshark/tshark/output_parser/pycache/base_parser.cpython-312.pyc
--- a/env/lib/python3.12/site-packages/pyshark/tshark/output_parser/pycache/tshark_ek.cpython-312.pyc
+++ b/env/lib/python3.12/site-packages/pyshark/tshark/output_parser/pycache/tshark_ek.cpython-312.pyc
--- a/env/lib/python3.12/site-packages/pyshark/tshark/output_parser/pycache/tshark_json.cpython-312.pyc
+++ b/env/lib/python3.12/site-packages/pyshark/tshark/output_parser/pycache/tshark_json.cpython-312.pyc
--- a/env/lib/python3.12/site-packages/pyshark/tshark/output_parser/pycache/tshark_xml.cpython-312.pyc
+++ b/env/lib/python3.12/site-packages/pyshark/tshark/output_parser/pycache/tshark_xml.cpython-312.pyc
--- a/env/lib/python3.12/site-packages/pyshark/tshark/output_parser/base_parser.py
+++ b/env/lib/python3.12/site-packages/pyshark/tshark/output_parser/base_parser.py
@ -0,0 +1,30 @@
+class BaseTsharkOutputParser:
+    DEFAULT_BATCH_SIZE = 2 ** 16
+
+    async def get_packets_from_stream(self, stream, existing_data, got_first_packet=True):
+        """A coroutine which returns a single packet if it can be read from the given StreamReader.
+
+        :return a tuple of (packet, remaining_data). The packet will be None if there was not enough XML data to create
+        a packet. remaining_data is the leftover data which was not enough to create a packet from.
+        :raises EOFError if EOF was reached.
+        """
+        # yield each packet in existing_data
+        packet, existing_data = self._extract_packet_from_data(existing_data,
+                                                               got_first_packet=got_first_packet)
+        if packet:
+            packet = self._parse_single_packet(packet)
+            return packet, existing_data
+
+        new_data = await stream.read(self.DEFAULT_BATCH_SIZE)
+        existing_data += new_data
+
+        if not new_data:
+            raise EOFError()
+        return None, existing_data
+
+    def _parse_single_packet(self, packet):
+        raise NotImplementedError()
+
+    def _extract_packet_from_data(self, data, got_first_packet=True):
+        """Returns a packet's data and any remaining data after reading that first packet"""
+        raise NotImplementedError()
--- a/env/lib/python3.12/site-packages/pyshark/tshark/output_parser/tshark_ek.py
+++ b/env/lib/python3.12/site-packages/pyshark/tshark/output_parser/tshark_ek.py
@ -0,0 +1,59 @@
+import json
+import os
+
+from pyshark.tshark.output_parser.base_parser import BaseTsharkOutputParser
+
+try:
+    import ujson
+    USE_UJSON = True
+except ImportError:
+    USE_UJSON = False
+
+from pyshark.packet.layers.ek_layer import EkLayer
+from pyshark.packet.packet import Packet
+
+_ENCODED_OS_LINESEP = os.linesep.encode()
+
+
+class TsharkEkJsonParser(BaseTsharkOutputParser):
+
+    def _parse_single_packet(self, packet):
+        return packet_from_ek_packet(packet)
+
+    def _extract_packet_from_data(self, data, got_first_packet=True):
+        """Returns a packet's data and any remaining data after reading that first packet"""
+        start_index = 0
+        data = data.lstrip()
+        if data.startswith(b'{"ind'):
+            # Skip the 'index' JSONs, generated for Elastic.
+            # See: https://bugs.wireshark.org/bugzilla/show_bug.cgi?id=16656
+            start_index = data.find(_ENCODED_OS_LINESEP) + 1
+        linesep_location = data.find(_ENCODED_OS_LINESEP, start_index)
+        if linesep_location == -1:
+            return None, data
+
+        return data[start_index:linesep_location], data[linesep_location + 1:]
+
+
+def packet_from_ek_packet(json_pkt):
+    if USE_UJSON:
+        pkt_dict = ujson.loads(json_pkt)
+    else:
+        pkt_dict = json.loads(json_pkt.decode('utf-8'))
+
+    # We use the frame dict here and not the object access because it's faster.
+    frame_dict = pkt_dict['layers'].pop('frame')
+    layers = []
+    for layer in frame_dict['frame_frame_protocols'].split(':'):
+        layer_dict = pkt_dict['layers'].pop(layer, None)
+        if layer_dict is not None:
+            layers.append(EkLayer(layer, layer_dict))
+    # Add all leftovers
+    for name, layer in pkt_dict['layers'].items():
+        layers.append(EkLayer(name, layer))
+
+    return Packet(layers=layers, frame_info=EkLayer('frame', frame_dict),
+                  number=int(frame_dict.get('frame_frame_number', 0)),
+                  length=int(frame_dict['frame_frame_len']),
+                  sniff_time=frame_dict['frame_frame_time_epoch'],
+                  interface_captured=frame_dict.get('rame_frame_interface_id'))
--- a/env/lib/python3.12/site-packages/pyshark/tshark/output_parser/tshark_json.py
+++ b/env/lib/python3.12/site-packages/pyshark/tshark/output_parser/tshark_json.py
@ -0,0 +1,112 @@
+import json
+import os
+
+from packaging import version
+
+from pyshark.packet.layers.json_layer import JsonLayer
+from pyshark.packet.packet import Packet
+from pyshark.tshark.output_parser.base_parser import BaseTsharkOutputParser
+from pyshark.tshark import tshark
+
+try:
+    import ujson
+    USE_UJSON = True
+except ImportError:
+    USE_UJSON = False
+
+
+class TsharkJsonParser(BaseTsharkOutputParser):
+
+    def __init__(self, tshark_version=None):
+        super().__init__()
+        self._tshark_version = tshark_version
+
+    def _parse_single_packet(self, packet):
+        json_has_duplicate_keys = tshark.tshark_supports_duplicate_keys(self._tshark_version)
+        return packet_from_json_packet(packet, deduplicate_fields=json_has_duplicate_keys)
+
+    def _extract_packet_from_data(self, data, got_first_packet=True):
+        """Returns a packet's data and any remaining data after reading that first packet"""
+        tag_start = 0
+        if not got_first_packet:
+            tag_start = data.find(b"{")
+            if tag_start == -1:
+                return None, data
+        packet_separator, end_separator, end_tag_strip_length = self._get_json_separators()
+        found_separator = None
+
+        tag_end = data.find(packet_separator)
+        if tag_end == -1:
+            # Not end of packet, maybe it has end of entire file?
+            tag_end = data.find(end_separator)
+            if tag_end != -1:
+                found_separator = end_separator
+        else:
+            # Found a single packet, just add the separator without extras
+            found_separator = packet_separator
+
+        if found_separator:
+            tag_end += len(found_separator) - end_tag_strip_length
+            return data[tag_start:tag_end].strip().strip(b","), data[tag_end + 1:]
+        return None, data
+
+    def _get_json_separators(self):
+        """"Returns the separators between packets in a JSON output
+
+        Returns a tuple of (packet_separator, end_of_file_separator, characters_to_disregard).
+        The latter variable being the number of characters to ignore in order to pass the packet (i.e. extra newlines,
+        commas, parenthesis).
+        """
+        if not self._tshark_version or self._tshark_version >= version.parse("3.0.0"):
+            return f"{os.linesep}  }},{os.linesep}".encode(), f"}}{os.linesep}]".encode(), 1 + len(os.linesep)
+        else:
+            return f"}}{os.linesep}{os.linesep}  ,".encode(), f"}}{os.linesep}{os.linesep}]".encode(), 1
+
+
+def duplicate_object_hook(ordered_pairs):
+    """Make lists out of duplicate keys."""
+    json_dict = {}
+    for key, val in ordered_pairs:
+        existing_val = json_dict.get(key)
+        if not existing_val:
+            json_dict[key] = val
+        else:
+            if isinstance(existing_val, list):
+                existing_val.append(val)
+            else:
+                json_dict[key] = [existing_val, val]
+
+    return json_dict
+
+
+def packet_from_json_packet(json_pkt, deduplicate_fields=True):
+    """Creates a Pyshark Packet from a tshark json single packet.
+
+    Before tshark 2.6, there could be duplicate keys in a packet json, which creates the need for
+    deduplication and slows it down significantly.
+    """
+    if deduplicate_fields:
+        # NOTE: We can use ujson here for ~25% speed-up, however since we can't use hooks in ujson
+        # we lose the ability to view duplicates. This might still be a good option later on.
+        pkt_dict = json.loads(json_pkt.decode('utf-8'), object_pairs_hook=duplicate_object_hook)
+    else:
+        if USE_UJSON:
+            pkt_dict = ujson.loads(json_pkt)
+        else:
+            pkt_dict = json.loads(json_pkt.decode('utf-8'))
+    # We use the frame dict here and not the object access because it's faster.
+    frame_dict = pkt_dict['_source']['layers'].pop('frame')
+    layers = []
+    for layer in frame_dict['frame.protocols'].split(':'):
+        layer_dict = pkt_dict['_source']['layers'].pop(layer, None)
+        if layer_dict is not None:
+            layers.append(JsonLayer(layer, layer_dict))
+    # Add all leftovers
+    for name, layer in pkt_dict['_source']['layers'].items():
+        layers.append(JsonLayer(name, layer))
+
+    return Packet(layers=layers, frame_info=JsonLayer('frame', frame_dict),
+                  number=int(frame_dict.get('frame.number', 0)),
+                  length=int(frame_dict['frame.len']),
+                  sniff_time=frame_dict['frame.time_epoch'],
+                  interface_captured=frame_dict.get('frame.interface_id'))
--- a/env/lib/python3.12/site-packages/pyshark/tshark/output_parser/tshark_xml.py
+++ b/env/lib/python3.12/site-packages/pyshark/tshark/output_parser/tshark_xml.py
@ -0,0 +1,118 @@
+"""This module contains functions to turn TShark XML parts into Packet objects."""
+import lxml.objectify
+
+from pyshark.packet.layers.xml_layer import XmlLayer
+from pyshark.packet.packet import Packet
+from pyshark.packet.packet_summary import PacketSummary
+
+from pyshark.tshark.output_parser.base_parser import BaseTsharkOutputParser
+
+# Prepare dictionary used with str.translate for removing invalid XML characters
+DEL_BAD_XML_CHARS = {bad_char: None for bad_char in range(0x00, 0x20) if not bad_char in (0x09, 0x0a, 0x0d)}
+DEL_BAD_XML_CHARS.update({bad_char: None for bad_char in range(0xd800, 0xe000)})
+DEL_BAD_XML_CHARS.update({bad_char: None for bad_char in range(0xfffe, 0x10000)})
+
+
+class TsharkXmlParser(BaseTsharkOutputParser):
+    SUMMARIES_BATCH_SIZE = 64
+
+    def __init__(self, parse_summaries=False):
+        super().__init__()
+        self._parse_summaries = parse_summaries
+        self._psml_structure = None
+
+    async def get_packets_from_stream(self, stream, existing_data, got_first_packet=True):
+        if self._parse_summaries:
+            existing_data = await self._get_psml_struct(stream)
+        return await super().get_packets_from_stream(stream, existing_data, got_first_packet=got_first_packet)
+
+    def _parse_single_packet(self, packet):
+        return packet_from_xml_packet(packet, psml_structure=self._psml_structure)
+
+    def _extract_packet_from_data(self, data, got_first_packet=True):
+        """Gets data containing a (part of) tshark xml.
+
+        If the given tag is found in it, returns the tag data and the remaining data.
+        Otherwise returns None and the same data.
+
+        :param data: string of a partial tshark xml.
+        :return: a tuple of (tag, data). tag will be None if none is found.
+        """
+        return _extract_tag_from_xml_data(data, tag_name=b"packet")
+
+    async def _get_psml_struct(self, fd):
+        """Gets the current PSML (packet summary xml) structure in a tuple ((None, leftover_data)),
+        only if the capture is configured to return it, else returns (None, leftover_data).
+
+        A coroutine.
+        """
+        initial_data = b""
+        psml_struct = None
+
+        # If summaries are read, we need the psdml structure which appears on top of the file.
+        while not psml_struct:
+            new_data = await fd.read(self.SUMMARIES_BATCH_SIZE)
+            initial_data += new_data
+            psml_struct, initial_data = _extract_tag_from_xml_data(initial_data, b"structure")
+            if psml_struct:
+                self._psml_structure = psml_structure_from_xml(psml_struct)
+            elif not new_data:
+                return initial_data
+        return initial_data
+
+
+def psml_structure_from_xml(psml_structure):
+    if not isinstance(psml_structure, lxml.objectify.ObjectifiedElement):
+        psml_structure = lxml.objectify.fromstring(psml_structure)
+    return psml_structure.findall('section')
+
+
+def packet_from_xml_packet(xml_pkt, psml_structure=None):
+    """
+    Gets a TShark XML packet object or string, and returns a pyshark Packet objec.t
+
+    :param xml_pkt: str or xml object.
+    :param psml_structure: a list of the fields in each packet summary in the psml data. If given, packets will
+    be returned as a PacketSummary object.
+    :return: Packet object.
+    """
+    if not isinstance(xml_pkt, lxml.objectify.ObjectifiedElement):
+        parser = lxml.objectify.makeparser(huge_tree=True, recover=True, encoding='utf-8')
+        xml_pkt = xml_pkt.decode(errors='ignore').translate(DEL_BAD_XML_CHARS)
+        xml_pkt = lxml.objectify.fromstring(xml_pkt.encode('utf-8'), parser)
+    if psml_structure:
+        return _packet_from_psml_packet(xml_pkt, psml_structure)
+    return _packet_from_pdml_packet(xml_pkt)
+
+
+def _packet_from_psml_packet(psml_packet, structure):
+    return PacketSummary(structure, psml_packet.findall('section'))
+
+
+def _packet_from_pdml_packet(pdml_packet):
+    layers = [XmlLayer(proto) for proto in pdml_packet.proto]
+    geninfo, frame, layers = layers[0], layers[1], layers[2:]
+    return Packet(layers=layers, frame_info=frame, number=geninfo.get_field_value('num'),
+                  length=geninfo.get_field_value('len'), sniff_time=geninfo.get_field_value('timestamp', raw=True),
+                  captured_length=geninfo.get_field_value('caplen'),
+                  interface_captured=frame.get_field_value('interface_id', raw=True))
+
+
+def _extract_tag_from_xml_data(data, tag_name=b"packet"):
+    """Gets data containing a (part of) tshark xml.
+
+    If the given tag is found in it, returns the tag data and the remaining data.
+    Otherwise returns None and the same data.
+
+    :param data: string of a partial tshark xml.
+    :param tag_name: A bytes string of the tag name
+    :return: a tuple of (tag, data). tag will be None if none is found.
+    """
+    opening_tag = b"<" + tag_name + b">"
+    closing_tag = opening_tag.replace(b"<", b"</")
+    tag_end = data.find(closing_tag)
+    if tag_end != -1:
+        tag_end += len(closing_tag)
+        tag_start = data.find(opening_tag)
+        return data[tag_start:tag_end], data[tag_end:]
+    return None, data
--- a/env/lib/python3.12/site-packages/pyshark/tshark/tshark.py
+++ b/env/lib/python3.12/site-packages/pyshark/tshark/tshark.py
@ -0,0 +1,169 @@
+"""Module used for the actual running of TShark"""
+import json
+
+from packaging import version
+import os
+import subprocess
+import sys
+import re
+
+from pyshark.config import get_config
+
+
+class TSharkNotFoundException(Exception):
+    pass
+
+
+class TSharkVersionException(Exception):
+    pass
+
+
+_TSHARK_INTERFACE_ALIAS_PATTERN = re.compile(r"[0-9]*\. ([^\s]*)(?: \((.*)\))?")
+
+
+def get_process_path(tshark_path=None, process_name="tshark"):
+    """Finds the path of the tshark executable.
+
+    If the user has provided a path
+    or specified a location in config.ini it will be used. Otherwise default
+    locations will be searched.
+
+    :param tshark_path: Path of the tshark binary
+    :raises TSharkNotFoundException in case TShark is not found in any location.
+    """
+    possible_paths = []
+    # Check if `config.ini` exists in the current directory or the pyshark directory
+    config = get_config()
+    if config:
+        possible_paths.append(config.get(process_name, f"{process_name}_path"))
+
+    # Add the user provided path to the search list
+    if tshark_path is not None:
+        user_tshark_path = os.path.join(os.path.dirname(tshark_path),
+                                        f"{process_name}.exe" if sys.platform.startswith("win") else process_name)
+        possible_paths.insert(0, user_tshark_path)
+
+    # Windows search order: configuration file"s path, common paths.
+    if sys.platform.startswith("win"):
+        for env in ("ProgramFiles(x86)", "ProgramFiles"):
+            program_files = os.getenv(env)
+            if program_files is not None:
+                possible_paths.append(
+                    os.path.join(program_files, "Wireshark", f"{process_name}.exe")
+                )
+    # Linux, etc. search order: configuration file's path, the system's path
+    else:
+        os_path = os.getenv(
+            "PATH",
+            "/usr/bin:/usr/sbin:/usr/lib/tshark:/usr/local/bin"
+        )
+        for path in os_path.split(":"):
+            possible_paths.append(os.path.join(path, process_name))
+    if sys.platform.startswith("darwin"):
+        possible_paths.append(f"/Applications/Wireshark.app/Contents/MacOS/{process_name}")
+
+    for path in possible_paths:
+        if os.path.exists(path):
+            if sys.platform.startswith("win"):
+                path = path.replace("\\", "/")
+            return path
+    raise TSharkNotFoundException(
+        "TShark not found. Try adding its location to the configuration file. "
+        f"Searched these paths: {possible_paths}"
+    )
+
+
+def get_tshark_version(tshark_path=None):
+    parameters = [get_process_path(tshark_path), "-v"]
+    with open(os.devnull, "w") as null:
+        version_output = subprocess.check_output(parameters, stderr=null).decode("ascii")
+
+    version_line = version_output.splitlines()[0]
+    pattern = r'.*\s(\d+\.\d+\.\d+).*'  # match " #.#.#" version pattern
+    m = re.match(pattern, version_line)
+    if not m:
+        raise TSharkVersionException("Unable to parse TShark version from: {}".format(version_line))
+    version_string = m.groups()[0]  # Use first match found
+
+    return version.parse(version_string)
+
+
+def tshark_supports_duplicate_keys(tshark_version):
+    return tshark_version >= version.parse("2.6.7")
+
+
+def tshark_supports_json(tshark_version):
+    return tshark_version >= version.parse("2.2.0")
+
+
+def get_tshark_display_filter_flag(tshark_version):
+    """Returns '-Y' for tshark versions >= 1.10.0 and '-R' for older versions."""
+    if tshark_version >= version.parse("1.10.0"):
+        return "-Y"
+    else:
+        return "-R"
+
+
+def get_tshark_interfaces(tshark_path=None):
+    """Returns a list of interface numbers from the output tshark -D.
+
+    Used internally to capture on multiple interfaces.
+    """
+    parameters = [get_process_path(tshark_path), "-D"]
+    with open(os.devnull, "w") as null:
+        tshark_interfaces = subprocess.check_output(parameters, stderr=null).decode("utf-8")
+
+    return [line.split(" ")[1] for line in tshark_interfaces.splitlines() if '\\\\.\\' not in line]
+
+
+def get_all_tshark_interfaces_names(tshark_path=None):
+    """Returns a list of all possible interface names. Some interfaces may have aliases"""
+    parameters = [get_process_path(tshark_path), "-D"]
+    with open(os.devnull, "w") as null:
+        tshark_interfaces = subprocess.check_output(parameters, stderr=null).decode("utf-8")
+
+    all_interface_names = []
+    for line in tshark_interfaces.splitlines():
+        matches = _TSHARK_INTERFACE_ALIAS_PATTERN.findall(line)
+        if matches:
+            all_interface_names.extend([name for name in matches[0] if name])
+    return all_interface_names
+
+
+def get_ek_field_mapping(tshark_path=None):
+    parameters = [get_process_path(tshark_path), "-G", "elastic-mapping"]
+    with open(os.devnull, "w") as null:
+        mapping = subprocess.check_output(parameters, stderr=null).decode("ascii")
+
+    mapping = json.loads(
+        mapping,
+        object_pairs_hook=_duplicate_object_hook)["mappings"]
+    # If using wireshark 4, the key "mapping" contains what we want,
+    if "dynamic" in mapping and "properties" in mapping:
+        pass
+    # if using wireshark 3.5 to < 4 the data in "mapping.doc",
+    elif "doc" in mapping:
+        mapping = mapping["doc"]
+    # or "mapping.pcap_file" if using wireshark < 3.5
+    elif "pcap_file" in mapping:
+        mapping = mapping["pcap_file"]
+    else:
+        raise TSharkVersionException(f"Your tshark version does not support elastic-mapping. Please upgrade.")
+
+    return mapping["properties"]["layers"]["properties"]
+
+
+def _duplicate_object_hook(ordered_pairs):
+    """Make lists out of duplicate keys."""
+    json_dict = {}
+    for key, val in ordered_pairs:
+        existing_val = json_dict.get(key)
+        if not existing_val:
+            json_dict[key] = val
+        else:
+            # There are duplicates without any data for some reason, if it's that - drop it
+            # Otherwise, override
+            if val.get("properties") != {}:
+                json_dict[key] = val
+
+    return json_dict