This commit is contained in:
2024-12-09 18:22:38 +09:00
parent ab0cbebefc
commit c4c4547706
959 changed files with 174888 additions and 6 deletions

View File

@ -0,0 +1,17 @@
import sys
class UnsupportedVersionException(Exception):
pass
if sys.version_info[0] < 3 or (sys.version_info[0] == 3 and sys.version_info[1] < 5):
raise UnsupportedVersionException("Your version of Python is unsupported. "
"Pyshark requires Python >= 3.5 & Wireshark >= 2.2.0. "
" Please upgrade or use pyshark-legacy, or pyshark version 0.3.8")
from pyshark.capture.live_capture import LiveCapture
from pyshark.capture.live_ring_capture import LiveRingCapture
from pyshark.capture.file_capture import FileCapture
from pyshark.capture.remote_capture import RemoteCapture
from pyshark.capture.inmem_capture import InMemCapture

View File

@ -0,0 +1,15 @@
import pathlib
import shutil
import appdirs
def get_cache_dir(tshark_version) -> pathlib.Path:
cache_dir = pathlib.Path(appdirs.user_cache_dir(appname="pyshark", version=tshark_version))
if not cache_dir.exists():
cache_dir.mkdir(parents=True)
return cache_dir
def clear_cache(tshark_version=None):
shutil.rmtree(get_cache_dir(tshark_version))

View File

@ -0,0 +1,466 @@
import asyncio
import os
import threading
import subprocess
import concurrent.futures
import sys
import logging
import warnings
from pyshark import ek_field_mapping
from pyshark.packet.packet import Packet
from pyshark.tshark.output_parser import tshark_ek
from pyshark.tshark.output_parser import tshark_json
from pyshark.tshark.output_parser import tshark_xml
from pyshark.tshark.tshark import get_process_path, get_tshark_display_filter_flag, \
tshark_supports_json, TSharkVersionException, get_tshark_version, tshark_supports_duplicate_keys
if sys.version_info < (3, 8):
asyncTimeoutError = concurrent.futures.TimeoutError
else:
asyncTimeoutError = asyncio.exceptions.TimeoutError
class TSharkCrashException(Exception):
pass
class UnknownEncyptionStandardException(Exception):
pass
class RawMustUseJsonException(Exception):
"""If the use_raw argument is True, so should the use_json argument"""
class StopCapture(Exception):
"""Exception that the user can throw anywhere in packet-handling to stop the capture process."""
pass
class Capture:
"""Base class for packet captures."""
SUMMARIES_BATCH_SIZE = 64
DEFAULT_LOG_LEVEL = logging.CRITICAL
SUPPORTED_ENCRYPTION_STANDARDS = ["wep", "wpa-pwk", "wpa-pwd", "wpa-psk"]
def __init__(self, display_filter=None, only_summaries=False, eventloop=None,
decryption_key=None, encryption_type="wpa-pwd", output_file=None,
decode_as=None, disable_protocol=None, tshark_path=None,
override_prefs=None, capture_filter=None, use_json=False, include_raw=False,
use_ek=False, custom_parameters=None, debug=False):
self.loaded = False
self.tshark_path = tshark_path
self._override_prefs = override_prefs
self.debug = debug
self.use_json = use_json
self._use_ek = use_ek
self.include_raw = include_raw
self._packets = []
self._current_packet = 0
self._display_filter = display_filter
self._capture_filter = capture_filter
self._only_summaries = only_summaries
self._output_file = output_file
self._running_processes = set()
self._decode_as = decode_as
self._disable_protocol = disable_protocol
self._log = logging.Logger(
self.__class__.__name__, level=self.DEFAULT_LOG_LEVEL)
self._closed = False
self._custom_parameters = custom_parameters
self._eof_reached = False
self._last_error_line = None
self._stderr_handling_tasks = []
self.__tshark_version = None
if include_raw and not (use_json or use_ek):
raise RawMustUseJsonException(
"use_json/use_ek must be True if include_raw")
if self.debug:
self.set_debug()
self.eventloop = eventloop
if self.eventloop is None:
self._setup_eventloop()
if encryption_type and encryption_type.lower() in self.SUPPORTED_ENCRYPTION_STANDARDS:
self.encryption = (decryption_key, encryption_type.lower())
else:
standards = ", ".join(self.SUPPORTED_ENCRYPTION_STANDARDS)
raise UnknownEncyptionStandardException(f"Only the following standards are supported: {standards}.")
def __getitem__(self, item):
"""Gets the packet in the given index.
:param item: packet index
:return: Packet object.
"""
return self._packets[item]
def __len__(self):
return len(self._packets)
def next(self) -> Packet:
return self.next_packet()
# Allows for child classes to call next() from super() without 2to3 "fixing"
# the call
def next_packet(self) -> Packet:
if self._current_packet >= len(self._packets):
raise StopIteration()
cur_packet = self._packets[self._current_packet]
self._current_packet += 1
return cur_packet
def clear(self):
"""Empties the capture of any saved packets."""
self._packets = []
self._current_packet = 0
def reset(self):
"""Starts iterating packets from the first one."""
self._current_packet = 0
def load_packets(self, packet_count=0, timeout=None):
"""Reads the packets from the source (cap, interface, etc.) and adds it to the internal list.
If 0 as the packet_count is given, reads forever
:param packet_count: The amount of packets to add to the packet list (0 to read forever)
:param timeout: If given, automatically stops after a given amount of time.
"""
initial_packet_amount = len(self._packets)
def keep_packet(pkt):
self._packets.append(pkt)
if packet_count != 0 and len(self._packets) - initial_packet_amount >= packet_count:
raise StopCapture()
try:
self.apply_on_packets(
keep_packet, timeout=timeout, packet_count=packet_count)
self.loaded = True
except asyncTimeoutError:
pass
def set_debug(self, set_to=True, log_level=logging.DEBUG):
"""Sets the capture to debug mode (or turns it off if specified)."""
if set_to:
handler = logging.StreamHandler(sys.stdout)
handler.setFormatter(logging.Formatter(
"%(asctime)s - %(name)s - %(levelname)s - %(message)s"))
self._log.addHandler(handler)
self._log.level = log_level
self.debug = set_to
def _verify_capture_parameters(self):
"""Optionally verify that the capture's parameters are valid.
Should raise an exception if they are not valid.
"""
pass
def _setup_eventloop(self):
"""Sets up a new eventloop as the current one according to the OS."""
if os.name == "nt":
current_eventloop = asyncio.get_event_loop_policy().get_event_loop()
if isinstance(current_eventloop, asyncio.ProactorEventLoop):
self.eventloop = current_eventloop
else:
# On Python before 3.8, Proactor is not the default eventloop type, so we have to create a new one.
# If there was an existing eventloop this can create issues, since we effectively disable it here.
if asyncio.all_tasks():
warnings.warn("The running eventloop has tasks but pyshark must set a new eventloop to continue. "
"Existing tasks may not run.")
self.eventloop = asyncio.ProactorEventLoop()
asyncio.set_event_loop(self.eventloop)
else:
try:
self.eventloop = asyncio.get_event_loop_policy().get_event_loop()
except RuntimeError:
if threading.current_thread() != threading.main_thread():
# Ran not in main thread, make a new eventloop
self.eventloop = asyncio.new_event_loop()
asyncio.set_event_loop(self.eventloop)
else:
raise
if os.name == "posix" and isinstance(threading.current_thread(), threading._MainThread):
# The default child watchers (ThreadedChildWatcher) attach_loop method is empty!
# While using pyshark with ThreadedChildWatcher, asyncio could raise a ChildProcessError
# "Unknown child process pid %d, will report returncode 255"
# This led to a TSharkCrashException in _cleanup_subprocess.
# Using the SafeChildWatcher fixes this issue, but it is slower.
# SafeChildWatcher O(n) -> large numbers of processes are slow
# ThreadedChildWatcher O(1) -> independent of process number
# asyncio.get_child_watcher().attach_loop(self.eventloop)
asyncio.set_child_watcher(asyncio.SafeChildWatcher())
asyncio.get_child_watcher().attach_loop(self.eventloop)
def _packets_from_tshark_sync(self, packet_count=None, existing_process=None):
"""Returns a generator of packets.
This is the sync version of packets_from_tshark. It wait for the completion of each coroutine and
reimplements reading packets in a sync way, yielding each packet as it arrives.
:param packet_count: If given, stops after this amount of packets is captured.
"""
# NOTE: This has code duplication with the async version, think about how to solve this
tshark_process = existing_process or self.eventloop.run_until_complete(
self._get_tshark_process())
parser = self._setup_tshark_output_parser()
packets_captured = 0
data = b""
try:
while True:
try:
packet, data = self.eventloop.run_until_complete(
parser.get_packets_from_stream(tshark_process.stdout, data,
got_first_packet=packets_captured > 0))
except EOFError:
self._log.debug("EOF reached (sync)")
self._eof_reached = True
break
if packet:
packets_captured += 1
yield packet
if packet_count and packets_captured >= packet_count:
break
finally:
if tshark_process in self._running_processes:
self.eventloop.run_until_complete(
self._cleanup_subprocess(tshark_process))
def apply_on_packets(self, callback, timeout=None, packet_count=None):
"""Runs through all packets and calls the given callback (a function) with each one as it is read.
If the capture is infinite (i.e. a live capture), it will run forever, otherwise it will complete after all
packets have been read.
Example usage:
def print_callback(pkt):
print(pkt)
capture.apply_on_packets(print_callback)
If a timeout is given, raises a Timeout error if not complete before the timeout (in seconds)
"""
coro = self.packets_from_tshark(callback, packet_count=packet_count)
if timeout is not None:
coro = asyncio.wait_for(coro, timeout)
return self.eventloop.run_until_complete(coro)
async def packets_from_tshark(self, packet_callback, packet_count=None, close_tshark=True):
"""
A coroutine which creates a tshark process, runs the given callback on each packet that is received from it and
closes the process when it is done.
Do not use interactively. Can be used in order to insert packets into your own eventloop.
"""
tshark_process = await self._get_tshark_process(packet_count=packet_count)
try:
await self._go_through_packets_from_fd(tshark_process.stdout, packet_callback, packet_count=packet_count)
except StopCapture:
pass
finally:
if close_tshark:
await self.close_async()
async def _go_through_packets_from_fd(self, fd, packet_callback, packet_count=None):
"""A coroutine which goes through a stream and calls a given callback for each XML packet seen in it."""
packets_captured = 0
self._log.debug("Starting to go through packets")
parser = self._setup_tshark_output_parser()
data = b""
while True:
try:
packet, data = await parser.get_packets_from_stream(fd, data,
got_first_packet=packets_captured > 0)
except EOFError:
self._log.debug("EOF reached")
self._eof_reached = True
break
if packet:
packets_captured += 1
try:
packet_callback(packet)
except StopCapture:
self._log.debug("User-initiated capture stop in callback")
break
if packet_count and packets_captured >= packet_count:
break
def _create_stderr_handling_task(self, stderr):
self._stderr_handling_tasks.append(asyncio.ensure_future(self._handle_process_stderr_forever(stderr)))
async def _handle_process_stderr_forever(self, stderr):
while True:
stderr_line = await stderr.readline()
if not stderr_line:
break
stderr_line = stderr_line.decode().strip()
self._last_error_line = stderr_line
self._log.debug(stderr_line)
def _get_tshark_path(self):
return get_process_path(self.tshark_path)
def _get_tshark_version(self):
if self.__tshark_version is None:
self.__tshark_version = get_tshark_version(self.tshark_path)
return self.__tshark_version
async def _get_tshark_process(self, packet_count=None, stdin=None):
"""Returns a new tshark process with previously-set parameters."""
self._verify_capture_parameters()
output_parameters = []
if self.use_json or self._use_ek:
if not tshark_supports_json(self._get_tshark_version()):
raise TSharkVersionException(
"JSON only supported on Wireshark >= 2.2.0")
if self.use_json:
output_type = "json"
if tshark_supports_duplicate_keys(self._get_tshark_version()):
output_parameters.append("--no-duplicate-keys")
elif self._use_ek:
output_type = "ek"
else:
output_type = "psml" if self._only_summaries else "pdml"
parameters = [self._get_tshark_path(), "-l", "-n", "-T", output_type] + \
self.get_parameters(packet_count=packet_count) + output_parameters
self._log.debug(
"Creating TShark subprocess with parameters: " + " ".join(parameters))
self._log.debug("Executable: %s", parameters[0])
tshark_process = await asyncio.create_subprocess_exec(*parameters,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
stdin=stdin)
self._create_stderr_handling_task(tshark_process.stderr)
self._created_new_process(parameters, tshark_process)
return tshark_process
def _created_new_process(self, parameters, process, process_name="TShark"):
self._log.debug(
process_name + f" subprocess (pid {process.pid}) created")
if process.returncode is not None and process.returncode != 0:
raise TSharkCrashException(
f"{process_name} seems to have crashed. Try updating it. (command ran: '{' '.join(parameters)}')")
self._running_processes.add(process)
async def _cleanup_subprocess(self, process):
"""Kill the given process and properly closes any pipes connected to it."""
self._log.debug(f"Cleanup Subprocess (pid {process.pid})")
if process.returncode is None:
try:
process.kill()
return await asyncio.wait_for(process.wait(), 1)
except asyncTimeoutError:
self._log.debug(
"Waiting for process to close failed, may have zombie process.")
except ProcessLookupError:
pass
except OSError:
if os.name != "nt":
raise
elif process.returncode > 0:
if process.returncode != 1 or self._eof_reached:
raise TSharkCrashException(f"TShark (pid {process.pid}) seems to have crashed (retcode: {process.returncode}).\n"
f"Last error line: {self._last_error_line}\n"
"Try rerunning in debug mode [ capture_obj.set_debug() ] or try updating tshark.")
def _setup_tshark_output_parser(self):
if self.use_json:
return tshark_json.TsharkJsonParser(self._get_tshark_version())
if self._use_ek:
ek_field_mapping.MAPPING.load_mapping(str(self._get_tshark_version()),
tshark_path=self.tshark_path)
return tshark_ek.TsharkEkJsonParser()
return tshark_xml.TsharkXmlParser(parse_summaries=self._only_summaries)
def close(self):
self.eventloop.run_until_complete(self.close_async())
async def close_async(self):
for process in self._running_processes.copy():
await self._cleanup_subprocess(process)
self._running_processes.clear()
# Wait for all stderr handling to finish
await asyncio.gather(*self._stderr_handling_tasks)
def __del__(self):
if self._running_processes:
self.close()
def __enter__(self): return self
async def __aenter__(self): return self
def __exit__(self, exc_type, exc_val, exc_tb): self.close()
async def __aexit__(self, exc_type, exc_val,
exc_tb): await self.close_async()
def get_parameters(self, packet_count=None):
"""Returns the special tshark parameters to be used according to the configuration of this class."""
params = []
if self._capture_filter:
params += ["-f", self._capture_filter]
if self._display_filter:
params += [get_tshark_display_filter_flag(self._get_tshark_version(),),
self._display_filter]
# Raw is only enabled when JSON is also enabled.
if self.include_raw:
params += ["-x"]
if packet_count:
params += ["-c", str(packet_count)]
if self._custom_parameters:
if isinstance(self._custom_parameters, list):
params += self._custom_parameters
elif isinstance(self._custom_parameters, dict):
for key, val in self._custom_parameters.items():
params += [key, val]
else:
raise TypeError("Custom parameters type not supported.")
if all(self.encryption):
params += ["-o", "wlan.enable_decryption:TRUE", "-o", 'uat:80211_keys:"' + self.encryption[1] + '","' +
self.encryption[0] + '"']
if self._override_prefs:
for preference_name, preference_value in self._override_prefs.items():
if all(self.encryption) and preference_name in ("wlan.enable_decryption", "uat:80211_keys"):
continue # skip if override preferences also given via --encryption options
params += ["-o", f"{preference_name}:{preference_value}"]
if self._output_file:
params += ["-w", self._output_file]
if self._decode_as:
for criterion, decode_as_proto in self._decode_as.items():
params += ["-d",
",".join([criterion.strip(), decode_as_proto.strip()])]
if self._disable_protocol:
params += ["--disable-protocol", self._disable_protocol.strip()]
return params
def __iter__(self):
if self.loaded:
return iter(self._packets)
else:
return self._packets_from_tshark_sync()
def __repr__(self):
return f"<{self.__class__.__name__} ({len(self._packets)} packets)>"

View File

@ -0,0 +1,93 @@
import pathlib
from pyshark.capture.capture import Capture
from pyshark.packet.packet import Packet
class FileCapture(Capture):
"""A class representing a capture read from a file."""
def __init__(self, input_file=None, keep_packets=True, display_filter=None, only_summaries=False,
decryption_key=None, encryption_type="wpa-pwk", decode_as=None,
disable_protocol=None, tshark_path=None, override_prefs=None,
use_json=False, use_ek=False,
output_file=None, include_raw=False, eventloop=None, custom_parameters=None,
debug=False):
"""Creates a packet capture object by reading from file.
:param keep_packets: Whether to keep packets after reading them via next(). Used to conserve memory when reading
large caps (can only be used along with the "lazy" option!)
:param input_file: File path of the capture (PCAP, PCAPNG)
:param display_filter: A display (wireshark) filter to apply on the cap before reading it.
:param only_summaries: Only produce packet summaries, much faster but includes very little information.
:param decryption_key: Optional key used to encrypt and decrypt captured traffic.
:param encryption_type: Standard of encryption used in captured traffic (must be either 'WEP', 'WPA-PWD', or
'WPA-PWK'. Defaults to WPA-PWK).
:param decode_as: A dictionary of {decode_criterion_string: decode_as_protocol} that are used to tell tshark
to decode protocols in situations it wouldn't usually, for instance {'tcp.port==8888': 'http'} would make
it attempt to decode any port 8888 traffic as HTTP. See tshark documentation for details.
:param tshark_path: Path of the tshark binary
:param override_prefs: A dictionary of tshark preferences to override, {PREFERENCE_NAME: PREFERENCE_VALUE, ...}.
:param disable_protocol: Tells tshark to remove a dissector for a specific protocol.
:param use_ek: Uses tshark in EK JSON mode. It is faster than XML but has slightly less data.
:param use_json: DEPRECATED. Use use_ek instead.
:param output_file: A string of a file to write every read packet into (useful when filtering).
:param custom_parameters: A dict of custom parameters to pass to tshark, i.e. {"--param": "value"}
or else a list of parameters in the format ["--foo", "bar", "--baz", "foo"].
"""
super(FileCapture, self).__init__(display_filter=display_filter, only_summaries=only_summaries,
decryption_key=decryption_key, encryption_type=encryption_type,
decode_as=decode_as, disable_protocol=disable_protocol,
tshark_path=tshark_path, override_prefs=override_prefs,
use_json=use_json, use_ek=use_ek, output_file=output_file,
include_raw=include_raw, eventloop=eventloop,
custom_parameters=custom_parameters, debug=debug)
self.input_filepath = pathlib.Path(input_file)
if not self.input_filepath.exists():
raise FileNotFoundError(f"[Errno 2] No such file or directory: {self.input_filepath}")
if not self.input_filepath.is_file():
raise FileNotFoundError(f"{self.input_filepath} is a directory")
self.keep_packets = keep_packets
self._packet_generator = self._packets_from_tshark_sync()
def next(self) -> Packet:
"""Returns the next packet in the cap.
If the capture's keep_packets flag is True, will also keep it in the internal packet list.
"""
if not self.keep_packets:
return self._packet_generator.send(None)
elif self._current_packet >= len(self._packets):
packet = self._packet_generator.send(None)
self._packets += [packet]
return super(FileCapture, self).next_packet()
def __getitem__(self, packet_index):
if not self.keep_packets:
raise NotImplementedError("Cannot use getitem if packets are not kept")
# We may not yet have this packet
while packet_index >= len(self._packets):
try:
self.next()
except StopIteration:
# We read the whole file, and there's still not such packet.
raise KeyError(f"Packet of index {packet_index} does not exist in capture")
return super(FileCapture, self).__getitem__(packet_index)
def get_parameters(self, packet_count=None):
return super(FileCapture, self).get_parameters(packet_count=packet_count) + [
"-r", self.input_filepath.as_posix()]
def _verify_capture_parameters(self):
try:
with self.input_filepath.open("rb"):
pass
except PermissionError:
raise PermissionError(f"Permission denied for file {self.input_filepath}")
def __repr__(self):
if self.keep_packets:
return f"<{self.__class__.__name__} {self.input_filepath.as_posix()}>"
else:
return f"<{self.__class__.__name__} {self.input_filepath.as_posix()} ({len(self._packets)} packets)>"

View File

@ -0,0 +1,200 @@
import asyncio
import datetime
import itertools
import subprocess
import os
import struct
import time
import warnings
from packaging import version
from pyshark.capture.capture import Capture, StopCapture
DEFAULT_TIMEOUT = 30
class LinkTypes(object):
NULL = 0
ETHERNET = 1
IEEE802_5 = 6
PPP = 9
IEEE802_11 = 105
class InMemCapture(Capture):
def __init__(self, bpf_filter=None, display_filter=None, only_summaries=False,
decryption_key=None, encryption_type='wpa-pwk', decode_as=None,
disable_protocol=None, tshark_path=None, override_prefs=None, use_json=False, use_ek=False,
linktype=LinkTypes.ETHERNET, include_raw=False, eventloop=None, custom_parameters=None,
debug=False):
"""Creates a new in-mem capture, a capture capable of receiving binary packets and parsing them using tshark.
Significantly faster if packets are added in a batch.
:param bpf_filter: BPF filter to use on packets.
:param display_filter: Display (wireshark) filter to use.
:param only_summaries: Only produce packet summaries, much faster but includes very little information
:param decryption_key: Key used to encrypt and decrypt captured traffic.
:param encryption_type: Standard of encryption used in captured traffic (must be either 'WEP', 'WPA-PWD',
or 'WPA-PWK'. Defaults to WPA-PWK).
:param decode_as: A dictionary of {decode_criterion_string: decode_as_protocol} that are used to tell tshark
to decode protocols in situations it wouldn't usually, for instance {'tcp.port==8888': 'http'} would make
it attempt to decode any port 8888 traffic as HTTP. See tshark documentation for details.
:param tshark_path: Path of the tshark binary
:param override_prefs: A dictionary of tshark preferences to override, {PREFERENCE_NAME: PREFERENCE_VALUE, ...}.
:param disable_protocol: Tells tshark to remove a dissector for a specifc protocol.
:param custom_parameters: A dict of custom parameters to pass to tshark, i.e. {"--param": "value"}
or else a list of parameters in the format ["--foo", "bar", "--baz", "foo"].
"""
super(InMemCapture, self).__init__(display_filter=display_filter, only_summaries=only_summaries,
decryption_key=decryption_key, encryption_type=encryption_type,
decode_as=decode_as, disable_protocol=disable_protocol,
tshark_path=tshark_path, override_prefs=override_prefs,
use_json=use_json, use_ek=use_ek,
include_raw=include_raw, eventloop=eventloop,
custom_parameters=custom_parameters, debug=debug)
self.bpf_filter = bpf_filter
self._packets_to_write = None
self._current_linktype = linktype
self._current_tshark = None
def get_parameters(self, packet_count=None):
"""Returns the special tshark parameters to be used according to the configuration of this class."""
params = super(InMemCapture, self).get_parameters(
packet_count=packet_count)
params += ['-i', '-']
return params
async def _get_tshark_process(self, packet_count=None):
if self._current_tshark:
return self._current_tshark
proc = await super(InMemCapture, self)._get_tshark_process(packet_count=packet_count, stdin=subprocess.PIPE)
self._current_tshark = proc
# Create PCAP header
header = struct.pack("IHHIIII", 0xa1b2c3d4, 2, 4,
0, 0, 0x7fff, self._current_linktype)
proc.stdin.write(header)
return proc
def _get_json_separators(self):
""""Returns the separators between packets in a JSON output
Returns a tuple of (packet_separator, end_of_file_separator, characters_to_disregard).
The latter variable being the number of characters to ignore in order to pass the packet (i.e. extra newlines,
commas, parenthesis).
"""
if self._get_tshark_version() >= version.parse("2.6.7"):
return f"{os.linesep} }}".encode(), f"}}{os.linesep}]".encode(), 0
else:
return f'}}{os.linesep}{os.linesep}'.encode(), f"}}{os.linesep}{os.linesep}]", 1
def _write_packet(self, packet, sniff_time):
if sniff_time is None:
now = time.time()
elif isinstance(sniff_time, datetime.datetime):
now = sniff_time.timestamp()
else:
now = float(sniff_time)
secs = int(now)
usecs = int((now * 1000000) % 1000000)
# Write packet header
self._current_tshark.stdin.write(struct.pack(
"IIII", secs, usecs, len(packet), len(packet)))
self._current_tshark.stdin.write(packet)
def parse_packet(self, binary_packet, sniff_time=None, timeout=DEFAULT_TIMEOUT):
"""Parses a single binary packet and returns its parsed version.
DOES NOT CLOSE tshark. It must be closed manually by calling close() when you're done
working with it.
Use parse_packets when parsing multiple packets for faster parsing
"""
if sniff_time is not None:
sniff_time = [sniff_time]
return self.parse_packets([binary_packet], sniff_time, timeout)[0]
def parse_packets(self, binary_packets, sniff_times=None, timeout=DEFAULT_TIMEOUT):
"""Parses binary packets and return a list of parsed packets.
DOES NOT CLOSE tshark. It must be closed manually by calling close() when you're done
working with it.
"""
if self.eventloop is None:
self._setup_eventloop()
return self.eventloop.run_until_complete(self.parse_packets_async(binary_packets, sniff_times, timeout))
async def parse_packets_async(self, binary_packets, sniff_times=None, timeout=DEFAULT_TIMEOUT):
"""A coroutine which parses binary packets and return a list of parsed packets.
DOES NOT CLOSE tshark. It must be closed manually by calling close() when you're done
working with it.
"""
parsed_packets = []
if sniff_times is None:
sniff_times = []
if not self._current_tshark:
await self._get_tshark_process()
for binary_packet, sniff_time in itertools.zip_longest(binary_packets, sniff_times):
self._write_packet(binary_packet, sniff_time)
def callback(pkt):
parsed_packets.append(pkt)
if len(parsed_packets) == len(binary_packets):
raise StopCapture()
await self._get_parsed_packet_from_tshark(callback, timeout)
return parsed_packets
async def _get_parsed_packet_from_tshark(self, callback, timeout):
await self._current_tshark.stdin.drain()
try:
await asyncio.wait_for(self.packets_from_tshark(callback, close_tshark=False), timeout)
except asyncio.TimeoutError:
await self.close_async()
raise asyncio.TimeoutError("Timed out while waiting for tshark to parse packet. "
"Try rerunning with cap.set_debug() to see tshark errors. "
"Closing tshark..")
async def close_async(self):
self._current_tshark = None
await super(InMemCapture, self).close_async()
def feed_packet(self, binary_packet, linktype=LinkTypes.ETHERNET, timeout=DEFAULT_TIMEOUT):
"""
DEPRECATED. Use parse_packet instead.
This function adds the packet to the packets list, and also closes and reopens tshark for
each packet.
==============
Gets a binary (string) packet and parses & adds it to this capture.
Returns the added packet.
Use feed_packets if you have multiple packets to insert.
By default, assumes the packet is an ethernet packet. For another link type, supply the linktype argument (most
can be found in the class LinkTypes)
"""
warnings.warn(
"Deprecated method. Use InMemCapture.parse_packet() instead.")
self._current_linktype = linktype
pkt = self.parse_packet(binary_packet, timeout=timeout)
self.close()
self._packets.append(pkt)
return pkt
def feed_packets(self, binary_packets, linktype=LinkTypes.ETHERNET, timeout=DEFAULT_TIMEOUT):
"""Gets a list of binary packets, parses them using tshark and returns their parsed values.
Keeps the packets in the internal packet list as well.
By default, assumes the packets are ethernet packets. For another link type, supply the linktype argument (most
can be found in the class LinkTypes)
"""
self._current_linktype = linktype
parsed_packets = self.parse_packets(binary_packets, timeout=timeout)
self._packets.extend(parsed_packets)
self.close()
return parsed_packets

View File

@ -0,0 +1,132 @@
import os
import asyncio
import subprocess
from packaging import version
from pyshark.capture.capture import Capture
from pyshark.tshark import tshark
from pyshark.tshark.tshark import get_tshark_interfaces, get_process_path
class UnknownInterfaceException(Exception):
pass
class LiveCapture(Capture):
"""Represents a live capture on a network interface."""
def __init__(self, interface=None, bpf_filter=None, display_filter=None, only_summaries=False,
decryption_key=None, encryption_type='wpa-pwk', output_file=None, decode_as=None,
disable_protocol=None, tshark_path=None, override_prefs=None, capture_filter=None,
monitor_mode=False, use_json=False, use_ek=False,
include_raw=False, eventloop=None, custom_parameters=None,
debug=False):
"""Creates a new live capturer on a given interface. Does not start the actual capture itself.
:param interface: Name of the interface to sniff on or a list of names (str). If not given, runs on all interfaces.
:param bpf_filter: BPF filter to use on packets.
:param display_filter: Display (wireshark) filter to use.
:param only_summaries: Only produce packet summaries, much faster but includes very little information
:param decryption_key: Optional key used to encrypt and decrypt captured traffic.
:param encryption_type: Standard of encryption used in captured traffic (must be either 'WEP', 'WPA-PWD', or
'WPA-PWK'. Defaults to WPA-PWK).
:param output_file: Additionally save live captured packets to this file.
:param decode_as: A dictionary of {decode_criterion_string: decode_as_protocol} that are used to tell tshark
to decode protocols in situations it wouldn't usually, for instance {'tcp.port==8888': 'http'} would make
it attempt to decode any port 8888 traffic as HTTP. See tshark documentation for details.
:param tshark_path: Path of the tshark binary
:param override_prefs: A dictionary of tshark preferences to override, {PREFERENCE_NAME: PREFERENCE_VALUE, ...}.
:param capture_filter: Capture (wireshark) filter to use.
:param disable_protocol: Tells tshark to remove a dissector for a specifc protocol.
:param use_ek: Uses tshark in EK JSON mode. It is faster than XML but has slightly less data.
:param use_json: DEPRECATED. Use use_ek instead.
:param custom_parameters: A dict of custom parameters to pass to tshark, i.e. {"--param": "value"} or
else a list of parameters in the format ["--foo", "bar", "--baz", "foo"].
"""
super(LiveCapture, self).__init__(display_filter=display_filter, only_summaries=only_summaries,
decryption_key=decryption_key, encryption_type=encryption_type,
output_file=output_file, decode_as=decode_as, disable_protocol=disable_protocol,
tshark_path=tshark_path, override_prefs=override_prefs,
capture_filter=capture_filter, use_json=use_json, use_ek=use_ek,
include_raw=include_raw,
eventloop=eventloop, custom_parameters=custom_parameters,
debug=debug)
self.bpf_filter = bpf_filter
self.monitor_mode = monitor_mode
all_interfaces = get_tshark_interfaces(tshark_path)
if interface is None:
self.interfaces = all_interfaces
elif isinstance(interface, str):
self.interfaces = [interface]
else:
self.interfaces = interface
def get_parameters(self, packet_count=None):
"""Returns the special tshark parameters to be used according to the configuration of this class."""
params = super(LiveCapture, self).get_parameters(packet_count=packet_count)
# Read from STDIN
params += ["-i", "-"]
return params
def _verify_capture_parameters(self):
all_interfaces_names = tshark.get_all_tshark_interfaces_names(self.tshark_path)
all_interfaces_lowercase = [interface.lower() for interface in all_interfaces_names]
for each_interface in self.interfaces:
if each_interface.startswith("rpcap://"):
continue
if each_interface.isnumeric():
continue
if each_interface.lower() not in all_interfaces_lowercase:
raise UnknownInterfaceException(
f"Interface '{each_interface}' does not exist, unable to initiate capture. "
f"Perhaps permissions are missing?\n"
f"Possible interfaces: {os.linesep.join(all_interfaces_names)}")
def _get_dumpcap_parameters(self):
# Don't report packet counts.
params = ["-q"]
if self._get_tshark_version() < version.parse("2.5.0"):
# Tshark versions older than 2.5 don't support pcapng. This flag forces dumpcap to output pcap.
params += ["-P"]
if self.bpf_filter:
params += ["-f", self.bpf_filter]
if self.monitor_mode:
params += ["-I"]
for interface in self.interfaces:
params += ["-i", interface]
# Write to STDOUT
params += ["-w", "-"]
return params
async def _get_tshark_process(self, packet_count=None, stdin=None):
read, write = os.pipe()
dumpcap_params = [get_process_path(process_name="dumpcap", tshark_path=self.tshark_path)] + self._get_dumpcap_parameters()
self._log.debug("Creating Dumpcap subprocess with parameters: %s", " ".join(dumpcap_params))
dumpcap_process = await asyncio.create_subprocess_exec(*dumpcap_params, stdout=write,
stderr=subprocess.PIPE)
self._create_stderr_handling_task(dumpcap_process.stderr)
self._created_new_process(dumpcap_params, dumpcap_process, process_name="Dumpcap")
tshark = await super(LiveCapture, self)._get_tshark_process(packet_count=packet_count, stdin=read)
return tshark
# Backwards compatibility
sniff = Capture.load_packets
def sniff_continuously(self, packet_count=None):
"""Captures from the set interface, returning a generator which returns packets continuously.
Can be used as follows:
for packet in capture.sniff_continuously():
print('Woo, another packet:', packet)
Note: you can also call capture.apply_on_packets(packet_callback) which should have a slight performance boost.
:param packet_count: an amount of packets to capture, then stop.
"""
# Retained for backwards compatibility and to add documentation.
return self._packets_from_tshark_sync(packet_count=packet_count)

View File

@ -0,0 +1,57 @@
from pyshark import LiveCapture
class LiveRingCapture(LiveCapture):
"""Represents a live ringbuffer capture on a network interface."""
def __init__(self, ring_file_size=1024, num_ring_files=1, ring_file_name='/tmp/pyshark.pcap', interface=None,
bpf_filter=None, display_filter=None, only_summaries=False, decryption_key=None,
encryption_type='wpa-pwk', decode_as=None, disable_protocol=None,
tshark_path=None, override_prefs=None, capture_filter=None,
use_json=False, use_ek=False, include_raw=False, eventloop=None,
custom_parameters=None, debug=False):
"""
Creates a new live capturer on a given interface. Does not start the actual capture itself.
:param ring_file_size: Size of the ring file in kB, default is 1024
:param num_ring_files: Number of ring files to keep, default is 1
:param ring_file_name: Name of the ring file, default is /tmp/pyshark.pcap
:param interface: Name of the interface to sniff on or a list of names (str). If not given, runs on all interfaces.
:param bpf_filter: BPF filter to use on packets.
:param display_filter: Display (wireshark) filter to use.
:param only_summaries: Only produce packet summaries, much faster but includes very little information
:param decryption_key: Optional key used to encrypt and decrypt captured traffic.
:param encryption_type: Standard of encryption used in captured traffic (must be either 'WEP', 'WPA-PWD', or
'WPA-PWK'. Defaults to WPA-PWK).
:param decode_as: A dictionary of {decode_criterion_string: decode_as_protocol} that are used to tell tshark
to decode protocols in situations it wouldn't usually, for instance {'tcp.port==8888': 'http'} would make
it attempt to decode any port 8888 traffic as HTTP. See tshark documentation for details.
:param tshark_path: Path of the tshark binary
:param override_prefs: A dictionary of tshark preferences to override, {PREFERENCE_NAME: PREFERENCE_VALUE, ...}.
:param capture_filter: Capture (wireshark) filter to use.
:param disable_protocol: Tells tshark to remove a dissector for a specifc protocol.
:param use_ek: Uses tshark in EK JSON mode. It is faster than XML but has slightly less data.
:param use_json: DEPRECATED. Use use_ek instead.
:param custom_parameters: A dict of custom parameters to pass to tshark, i.e. {"--param": "value"}
or else a list of parameters in the format ["--foo", "bar", "--baz", "foo"]. or else a list of parameters in the format ["--foo", "bar", "--baz", "foo"].
"""
super(LiveRingCapture, self).__init__(interface, bpf_filter=bpf_filter, display_filter=display_filter, only_summaries=only_summaries,
decryption_key=decryption_key, encryption_type=encryption_type,
tshark_path=tshark_path, decode_as=decode_as, disable_protocol=disable_protocol,
override_prefs=override_prefs, capture_filter=capture_filter,
use_json=use_json, use_ek=use_ek, include_raw=include_raw, eventloop=eventloop,
custom_parameters=custom_parameters, debug=debug)
self.ring_file_size = ring_file_size
self.num_ring_files = num_ring_files
self.ring_file_name = ring_file_name
def get_parameters(self, packet_count=None):
params = super(LiveRingCapture, self).get_parameters(packet_count=packet_count)
params += ['-b', 'filesize:' + str(self.ring_file_size), '-b', 'files:' + str(self.num_ring_files),
'-w', self.ring_file_name, '-P', '-V']
return params
def _get_dumpcap_parameters(self):
params = super(LiveRingCapture, self)._get_dumpcap_parameters()
params += ['-P']
return params

View File

@ -0,0 +1,52 @@
import os
from pyshark.capture.capture import Capture
class PipeCapture(Capture):
def __init__(self, pipe, display_filter=None, only_summaries=False,
decryption_key=None, encryption_type='wpa-pwk', decode_as=None,
disable_protocol=None, tshark_path=None, override_prefs=None, use_json=False,
include_raw=False, eventloop=None, custom_parameters=None, debug=False):
"""Receives a file-like and reads the packets from there (pcap format).
:param bpf_filter: BPF filter to use on packets.
:param display_filter: Display (wireshark) filter to use.
:param only_summaries: Only produce packet summaries, much faster but includes very little information
:param decryption_key: Key used to encrypt and decrypt captured traffic.
:param encryption_type: Standard of encryption used in captured traffic (must be either 'WEP', 'WPA-PWD',
or 'WPA-PWK'. Defaults to WPA-PWK).
:param decode_as: A dictionary of {decode_criterion_string: decode_as_protocol} that are used to tell tshark
to decode protocols in situations it wouldn't usually, for instance {'tcp.port==8888': 'http'} would make
it attempt to decode any port 8888 traffic as HTTP. See tshark documentation for details.
:param tshark_path: Path of the tshark binary
:param override_prefs: A dictionary of tshark preferences to override, {PREFERENCE_NAME: PREFERENCE_VALUE, ...}.
:param disable_protocol: Tells tshark to remove a dissector for a specifc protocol.
:param custom_parameters: A dict of custom parameters to pass to tshark, i.e. {"--param": "value"}
or else a list of parameters in the format ["--foo", "bar", "--baz", "foo"].
"""
super(PipeCapture, self).__init__(display_filter=display_filter,
only_summaries=only_summaries,
decryption_key=decryption_key,
encryption_type=encryption_type,
decode_as=decode_as, disable_protocol=disable_protocol,
tshark_path=tshark_path, override_prefs=override_prefs,
use_json=use_json, include_raw=include_raw, eventloop=eventloop,
custom_parameters=custom_parameters, debug=debug)
self._pipe = pipe
def get_parameters(self, packet_count=None):
"""
Returns the special tshark parameters to be used according to the configuration of this class.
"""
params = super(PipeCapture, self).get_parameters(packet_count=packet_count)
params += ['-r', '-']
return params
async def _get_tshark_process(self, packet_count=None):
return await super(PipeCapture, self)._get_tshark_process(packet_count=packet_count, stdin=self._pipe)
def close(self):
# Close pipe
os.close(self._pipe)
super(PipeCapture, self).close()

View File

@ -0,0 +1,66 @@
from pyshark import LiveCapture
class RemoteCapture(LiveCapture):
"""A capture which is performed on a remote machine which has an rpcapd service running."""
def __init__(
self,
remote_host,
remote_interface,
*args,
remote_port=2002,
bpf_filter=None,
only_summaries=False,
decryption_key=None,
encryption_type="wpa-pwk",
decode_as=None,
disable_protocol=None,
tshark_path=None,
override_prefs=None,
eventloop=None,
debug=False,
**kwargs
):
"""
Creates a new remote capture which will connect to a remote machine which is running rpcapd. Use the sniff()
method to get packets.
Note: The remote machine should have rpcapd running in null authentication mode (-n). Be warned that the traffic
is unencrypted!
Note:
*args and **kwargs are passed to LiveCature's __init__ method.
:param remote_host: The remote host to capture on (IP or hostname). Should be running rpcapd.
:param remote_interface: The remote interface on the remote machine to capture on. Note that on windows it is
not the device display name but the true interface name (i.e. \\Device\\NPF_..).
:param remote_port: The remote port the rpcapd service is listening on
:param bpf_filter: A BPF (tcpdump) filter to apply on the cap before reading.
:param only_summaries: Only produce packet summaries, much faster but includes very little information
:param decryption_key: Key used to encrypt and decrypt captured traffic.
:param encryption_type: Standard of encryption used in captured traffic (must be either 'WEP', 'WPA-PWD',
or 'WPA-PWK'. Defaults to WPA-PWK).
:param decode_as: A dictionary of {decode_criterion_string: decode_as_protocol} that are used to tell tshark
to decode protocols in situations it wouldn't usually, for instance {'tcp.port==8888': 'http'} would make
it attempt to decode any port 8888 traffic as HTTP. See tshark documentation for details.
:param tshark_path: Path of the tshark binary
:param override_prefs: A dictionary of tshark preferences to override, {PREFERENCE_NAME: PREFERENCE_VALUE, ...}.
:param disable_protocol: Tells tshark to remove a dissector for a specifc protocol.
"""
interface = f'rpcap://{remote_host}:{remote_port}/{remote_interface}'
super(RemoteCapture, self).__init__(
interface,
*args,
bpf_filter=bpf_filter,
only_summaries=only_summaries,
decryption_key=decryption_key,
encryption_type=encryption_type,
tshark_path=tshark_path,
decode_as=decode_as,
disable_protocol=disable_protocol,
override_prefs=override_prefs,
eventloop=eventloop,
debug=debug,
**kwargs
)

View File

@ -0,0 +1,13 @@
[tshark]
# Specify the path to the tshark executable.
# If the configured path does not exist, these locations will be searched:
# (Linux): /usr/bin/tshark
# (Linux): /usr/sbin/tshark
# (Linux): /usr/lib/tshark/tshark
# (Linux): /usr/local/bin/tshark
# (Windows): %ProgramFiles%\Wireshark\tshark.exe
# (Windows): %ProgramFiles(x86)%\Wireshark\tshark.exe
tshark_path = C:\Program Files\Wireshark\tshark.exe
[dumpcap]
dumpcap_path = C:\Program Files\Wireshark\dumpcap.exe

View File

@ -0,0 +1,22 @@
from pathlib import Path
from configparser import ConfigParser
import pyshark
fp_config_path = Path.cwd() / 'config.ini' # get config from the current directory
pyshark_config_path = Path(pyshark.__file__).parent / 'config.ini'
def get_config():
if fp_config_path.exists():
config_path = fp_config_path
elif pyshark_config_path.exists():
config_path = pyshark_config_path
else:
return None
config = ConfigParser()
config.read(config_path)
return config

View File

@ -0,0 +1,91 @@
import binascii
import json
from pyshark import cache
from pyshark.tshark import tshark
_MAPPING_CACHE_NAME = "ek_field_mapping.json"
class FieldNotFound(Exception):
pass
class ProtocolMappingNotInitialized(Exception):
pass
class _EkFieldMapping:
def __init__(self):
self._protocol_to_mapping = {}
def load_mapping(self, tshark_version, tshark_path=None):
if self._protocol_to_mapping:
return
mapping_cache_file = cache.get_cache_dir(tshark_version).joinpath(_MAPPING_CACHE_NAME)
if mapping_cache_file.exists():
self._protocol_to_mapping = json.load(mapping_cache_file.open())
else:
self._protocol_to_mapping = tshark.get_ek_field_mapping(tshark_path=tshark_path)
mapping_cache_file.open("w").write(json.dumps(self._protocol_to_mapping))
def cast_field_value(self, protocol, field_name, field_value):
"""Casts the field value to its proper type according to the mapping"""
if isinstance(field_value, list):
return [self.cast_field_value(protocol, field_name, item) for item in field_value]
if not isinstance(field_value, str):
return field_value
field_type = self.get_field_type(protocol, field_name)
if field_type == str:
return field_value
if field_type == int and field_value.startswith("0x"):
return int(field_value, 16)
if field_type == bytes:
try:
return binascii.unhexlify(field_value.replace(":", ""))
except binascii.Error:
return field_value
try:
return field_type(field_value)
except ValueError:
return field_value
def get_field_type(self, protocol, field_name):
"""Gets the Python type for the given field (only for EK fields).
If we are unfamiliar with the type, str will be returned.
"""
if not self._protocol_to_mapping:
raise ProtocolMappingNotInitialized("Protocol mapping not initialized. Call load_mapping() first")
if protocol not in self._protocol_to_mapping:
raise FieldNotFound(f"Type mapping for protocol {protocol} not found")
fields = self._protocol_to_mapping[protocol]["properties"]
if field_name not in fields:
return str
return self._get_python_type_for_field_type(fields[field_name]["type"])
def clear(self):
self._protocol_to_mapping.clear()
@classmethod
def _get_python_type_for_field_type(cls, field_type):
if field_type in ("integer", "long", "short"):
return int
if field_type == "float":
return float
if field_type == "date":
# We don't use datetime.datetime because these can be timedeltas as well.
# Better let the user decide.
return float
if field_type == "byte":
return bytes
# Other known types are IP. Retain as str
return str
MAPPING = _EkFieldMapping()

View File

@ -0,0 +1,41 @@
import sys
import functools
import termcolor
class Pickleable(object):
"""
Base class that implements getstate/setstate, since most of the classes are overriding getattr.
"""
def __getstate__(self):
return self.__dict__
def __setstate__(self, data):
self.__dict__.update(data)
class SlotsPickleable(object):
__slots__ = []
def __getstate__(self):
ret = {}
for slot in self.__slots__:
ret[slot] = getattr(self, slot)
return ret
def __setstate__(self, data):
for key, val in data.items():
setattr(self, key, val)
@functools.wraps(termcolor.colored)
def colored(text, *args, **kwargs):
try:
enable_color = sys.stdout.isatty()
except (AttributeError, NotImplementedError, FileNotFoundError):
enable_color = False
if enable_color:
return termcolor.colored(text, *args, **kwargs)
return text

View File

@ -0,0 +1 @@
TRANSPORT_LAYERS = ['UDP', 'TCP']

View File

@ -0,0 +1,115 @@
import binascii
import typing
from pyshark.packet.common import Pickleable, SlotsPickleable
class LayerField(SlotsPickleable):
"""Holds all data about a field of a layer, both its actual value and its name and nice representation."""
# Note: We use this object with slots and not just a dict because
# it's much more memory-efficient (cuts about a third of the memory).
__slots__ = ['name', 'showname', 'raw_value', 'show', 'hide', 'pos', 'size', 'unmaskedvalue']
def __init__(self, name=None, showname=None, value=None, show=None, hide=None, pos=None, size=None, unmaskedvalue=None):
self.name = name
self.showname = showname
self.raw_value = value
self.show = show
self.pos = pos
self.size = size
self.unmaskedvalue = unmaskedvalue
if hide and hide == 'yes':
self.hide = True
else:
self.hide = False
def __repr__(self):
return f'<LayerField {self.name}: {self.get_default_value()}>'
def get_default_value(self) -> str:
"""Gets the best 'value' string this field has."""
val = self.show
if not val:
val = self.raw_value
if not val:
val = self.showname
return val
@property
def showname_value(self) -> typing.Union[str, None]:
"""The "pretty value" (as displayed by Wireshark) of the field."""
if self.showname and ': ' in self.showname:
return self.showname.split(': ', 1)[1]
return None
@property
def showname_key(self) -> typing.Union[str, None]:
"""The "pretty name" (as displayed by Wireshark) of the field."""
if self.showname and ': ' in self.showname:
return self.showname.split(': ', 1)[0]
return None
@property
def binary_value(self) -> bytes:
"""Converts this field to binary (assuming it's a binary string)"""
str_raw_value = str(self.raw_value)
if len(str_raw_value) % 2 == 1:
str_raw_value = '0' + str_raw_value
return binascii.unhexlify(str_raw_value)
@property
def int_value(self) -> int:
"""Returns the int value of this field (assuming it's represented as a decimal integer)."""
return int(self.raw_value)
@property
def hex_value(self) -> int:
"""Returns the int value of this field if it's in base 16
(either as a normal number or in a "0xFFFF"-style hex value)
"""
return int(self.raw_value, 16)
base16_value = hex_value
class LayerFieldsContainer(str, Pickleable):
"""An object which contains one or more fields (of the same name).
When accessing member, such as showname, raw_value, etc. the appropriate member of the main (first) field saved
in this container will be shown.
"""
def __new__(cls, main_field, *args, **kwargs):
if hasattr(main_field, 'get_default_value'):
obj = str.__new__(cls, main_field.get_default_value(), *args, **kwargs)
else:
obj = str.__new__(cls, main_field, *args, **kwargs)
obj.fields = [main_field]
return obj
def __dir__(self):
return dir(type(self)) + list(self.__dict__.keys()) + dir(self.main_field)
def add_field(self, field):
self.fields.append(field)
@property
def all_fields(self):
"""Returns all fields in a list, the main field followed by the alternate fields."""
return self.fields
@property
def main_field(self):
return self.fields[0]
@property
def alternate_fields(self):
"""Return the alternate values of this field containers (non-main ones)."""
return self.fields[1:]
def __getattr__(self, item):
return getattr(self.main_field, item)

View File

@ -0,0 +1,71 @@
import os
import typing
import io
import sys
from pyshark.packet import common
DATA_LAYER_NAME = "DATA"
class BaseLayer(common.SlotsPickleable):
"""An object representing a Packet layer."""
__slots__ = ["_layer_name"]
def __init__(self, layer_name):
self._layer_name = layer_name
def get_field(self, name):
raise NotImplementedError()
@property
def field_names(self) -> typing.List[str]:
"""Gets all XML field names of this layer."""
raise NotImplementedError()
def has_field(self, name):
return name in self.field_names
@property
def layer_name(self):
return self._layer_name
def get(self, item, default=None):
"""Gets a field in the layer, or the default if not found.
Works the same way as getattr, but returns the given default if not the field was not found"""
try:
return getattr(self, item)
except AttributeError:
return default
def __dir__(self):
return dir(type(self)) + self.field_names
def __getattr__(self, item):
val = self.get_field(item)
if val is None:
raise AttributeError(f"{item} does not exist in Layer")
return val
def pretty_print(self, writer=None):
if not writer:
writer = sys.stdout
if self.layer_name == DATA_LAYER_NAME:
writer.write('DATA')
return
text = f'Layer {self.layer_name.upper()}{os.linesep}:'
writer.write(common.colored(text, color="yellow", attrs=["bold"]))
self._pretty_print_layer_fields(writer)
def _pretty_print_layer_fields(self, terminal_writer: io.IOBase):
raise NotImplementedError()
def __repr__(self):
return f'<{self.layer_name.upper()} Layer>'
def __str__(self):
writer = io.StringIO()
self.pretty_print(writer=writer)
return writer.getvalue()

View File

@ -0,0 +1,185 @@
import abc
import os
import io
import typing
from pyshark.packet.common import colored
from pyshark import ek_field_mapping
from pyshark.packet.layers.base import BaseLayer
class _EkLayerHelperFuncsMixin(abc.ABC):
"""For methods shared between the EK layer and sublayers"""
def get_field_as_list(self, name) -> list:
"""Helper function to get a certain field always as a list.
Some fields may appear once or more in the packet. The field will appear as a list if it appears more
than once. In order to avoid checking certain fields if they're lists or not, this function will
return the field inside a list at all times.
For example, in a DNS packet there may be one or more responses.
A packet with with one response (www.google.com) will return:
>>> print(pkt.dns.resp_name)
"www.google.com"
While a packet with two responses will return:
>>> print(pkt.dns.resp_name)
["www.google.com", "www.google2.com"]
To avoid this changing behaviour, use:
>>> print(pkt.dns.get_field_as_list("resp_name"))
["www.google.com"]
"""
field_value = self.get_field(name)
if isinstance(field_value, list):
return field_value
return [field_value]
class EkLayer(BaseLayer, _EkLayerHelperFuncsMixin):
__slots__ = ["_layer_name", "_fields_dict"]
def __init__(self, layer_name, layer_dict):
super().__init__(layer_name)
self._fields_dict = layer_dict
def get_field(self, name) -> typing.Union["EkMultiField", None, str, int, bool, bytes, list]:
name = name.replace(".", "_")
if name in self._fields_dict:
# For cases like "text"
return self._get_field_value(name)
for prefix in self._get_possible_layer_prefixes():
nested_field = self._get_nested_field(prefix, name)
if nested_field is not None:
return nested_field
return None
def has_field(self, name) -> bool:
"""Checks if the field exists, either a nested field or a regular field"""
return name in self.field_names or name in self.all_field_names
@property
def field_names(self):
return list({field_name.split("_", 1)[0] for field_name in self.all_field_names})
@property
def all_field_names(self):
"""Gets all field names, including subfields"""
names = set()
for field_name in self._fields_dict:
for prefix in self._get_possible_layer_prefixes():
if field_name.startswith(prefix):
names.add(_remove_ek_prefix(prefix, field_name))
break
return list(names)
def _get_field_value(self, full_field_name):
"""Gets the field value, optionally casting it using the cached field mapping"""
field_value = self._fields_dict[full_field_name]
return ek_field_mapping.MAPPING.cast_field_value(self._layer_name, full_field_name, field_value)
def _get_nested_field(self, prefix, name):
"""Gets a field that is directly on the layer
Returns either a multifield or a raw value.
"""
# TODO: Optimize
field_ek_name = f"{prefix}_{name}"
if field_ek_name in self._fields_dict:
if self._field_has_subfields(field_ek_name):
return EkMultiField(self, self._fields_dict, name,
value=self._get_field_value(field_ek_name))
return self._get_field_value(field_ek_name)
for possible_nested_name in self._fields_dict:
if possible_nested_name.startswith(f"{field_ek_name}_"):
return EkMultiField(self, self._fields_dict, name, value=None)
return None
def _field_has_subfields(self, field_ek_name):
field_ek_name_with_ext = f"{field_ek_name}_"
for field_name in self._fields_dict:
if field_name.startswith(field_ek_name_with_ext):
return True
return False
def _pretty_print_layer_fields(self, file: io.IOBase):
for field_name in self.field_names:
field = self.get_field(field_name)
self._pretty_print_field(field_name, field, file, indent=1)
def _pretty_print_field(self, field_name, field, file, indent=0):
prefix = "\t" * indent
if isinstance(field, EkMultiField):
file.write(colored(f"{prefix}{field_name}: ", "green", attrs=["bold"]))
if field.value is not None:
file.write(str(field.value))
file.write(os.linesep)
for subfield in field.subfields:
self._pretty_print_field(subfield, field.get_field(subfield), file,
indent=indent + 1)
else:
file.write(colored(f"{prefix}{field_name}: ", "green", attrs=["bold"]))
file.write(f"{field}{os.linesep}")
def _get_possible_layer_prefixes(self):
"""Gets the possible prefixes for a field under this layer.
The order matters, longest must be first
"""
return [f"{self._layer_name}_{self._layer_name}", self._layer_name]
class EkMultiField(_EkLayerHelperFuncsMixin):
__slots__ = ["_containing_layer", "_full_name", "_all_fields", "value"]
def __init__(self, containing_layer: EkLayer, all_fields, full_name, value=None):
self._containing_layer = containing_layer
self._full_name = full_name
self._all_fields = all_fields
self.value = value
def get_field(self, field_name):
return self._containing_layer.get_field(f"{self._full_name}_{field_name}")
@property
def subfields(self):
names = set()
for field_name in self._containing_layer.all_field_names:
if field_name != self._full_name and field_name.startswith(f"{self._full_name}_"):
names.add(field_name[len(self._full_name):].split("_")[1])
return list(names)
@property
def field_name(self):
return self._full_name.split("_")[-1]
def __getattr__(self, item):
value = self.get_field(item)
if value is None:
raise AttributeError(f"Subfield {item} not found")
return value
def __repr__(self):
value = f": {self.value}" if self.value else ""
return f"<EkMultiField {self.field_name}{value}>"
def __dir__(self) -> typing.Iterable[str]:
return dir(type(self)) + self.subfields
def _remove_ek_prefix(prefix, value):
"""Removes prefix given and the underscore after it"""
return value[len(prefix) + 1:]
def _get_subfields(all_fields, field_ek_name):
subfield_names = []
for field in all_fields:
if field != field_ek_name and field.startswith(field_ek_name):
subfield_names.append(_remove_ek_prefix(field_ek_name, field))
return subfield_names

View File

@ -0,0 +1,200 @@
import os
import io
from pyshark.packet.common import colored
from pyshark.packet.fields import LayerField
from pyshark.packet.fields import LayerFieldsContainer
from pyshark.packet.layers.base import BaseLayer
class JsonLayer(BaseLayer):
__slots__ = [
"duplicate_layers",
"_showname_fields_converted_to_regular",
"_full_name",
"_is_intermediate",
"_wrapped_fields",
"value",
"_all_fields"
] + BaseLayer.__slots__
def __init__(self, layer_name, layer_dict, full_name=None, is_intermediate=False):
"""Creates a JsonLayer. All sublayers and fields are created lazily later."""
super().__init__(layer_name)
self.duplicate_layers = []
self._showname_fields_converted_to_regular = False
if not full_name:
self._full_name = self._layer_name
else:
self._full_name = full_name
self._is_intermediate = is_intermediate
self._wrapped_fields = {}
if isinstance(layer_dict, list):
self.duplicate_layers = [JsonLayer(layer_name, duplicate_dict,
full_name=full_name, is_intermediate=is_intermediate)
for duplicate_dict in layer_dict[1:]]
layer_dict = layer_dict[0]
if not isinstance(layer_dict, dict):
self.value = layer_dict
self._all_fields = {}
return
self._all_fields = layer_dict
def get_field(self, name):
"""Gets a field by its full or partial name."""
# We only make the wrappers here (lazily) to avoid creating a ton of objects needlessly.
self._convert_showname_field_names_to_field_names()
field = self._wrapped_fields.get(name)
if field is None:
is_fake = False
field = self._get_internal_field_by_name(name)
if field is None:
# Might be a "fake" field in JSON
is_fake = self._is_fake_field(name)
if not is_fake:
raise AttributeError(f"No such field {name}")
field = self._make_wrapped_field(name, field, is_fake=is_fake)
self._wrapped_fields[name] = field
return field
@property
def field_names(self):
self._convert_showname_field_names_to_field_names()
return list(set([self._sanitize_field_name(name) for name in self._all_fields
if name.startswith(self._full_name)] +
[name.rsplit('.', 1)[1] for name in self._all_fields if '.' in name]))
def has_field(self, dotted_name) -> bool:
"""Checks whether the layer has the given field name.
Can get a dotted name, i.e. layer.sublayer.subsublayer.field
"""
parts = dotted_name.split('.')
cur_layer = self
for part in parts:
if part in cur_layer.field_names:
cur_layer = cur_layer.get_field(part)
else:
return False
return True
def _pretty_print_layer_fields(self, file: io.IOBase):
for field_line in self._get_all_field_lines():
if ':' in field_line:
field_name, field_line = field_line.split(':', 1)
file.write(colored(field_name + ':', "green", ["bold"]))
file.write(colored(field_line, attrs=["bold"]))
def _get_all_field_lines(self):
"""Returns all lines that represent the fields of the layer (both their names and values)."""
for field in self._get_all_fields_with_alternates():
yield from self._get_field_or_layer_repr(field)
def _get_field_or_layer_repr(self, field):
if isinstance(field, JsonLayer):
yield "\t" + field.layer_name + ":" + os.linesep
for line in field._get_all_field_lines():
yield "\t" + line
elif isinstance(field, list):
for subfield_or_layer in field:
yield from self._get_field_or_layer_repr(subfield_or_layer)
else:
yield f"\t{self._sanitize_field_name(field.name)}: {field.raw_value}{os.linesep}"
def _sanitize_field_name(self, field_name):
return field_name.replace(self._full_name + '.', '')
def _field_name_from_showname(self, field_name):
"""Converts a 'showname'-like field key to a regular field name
Sometimes in the JSON, there are "text" type fields which might look like this:
"my_layer":
{
"my_layer.some_field": 1,
"Something Special: it's special": {
"my_layer.special_field": "it's special"
}
}
We convert the showname key into the field name. The internals will turn into a fake layer.
In this case the field will be accessible by pkt.my_layer.something_special.special_field
"""
showname_key = field_name.split(":", 1)[0]
return self._full_name + "." + showname_key.lower().replace(" ", "_")
def _get_all_fields_with_alternates(self):
return [self.get_field(name) for name in self.field_names]
def _convert_showname_field_names_to_field_names(self):
"""Converts all fields that don't have a proper name (they have a showname name) to a regular name
See self._field_name_from_showname docs for more.
"""
if self._showname_fields_converted_to_regular:
return
for field_name in list(self._all_fields):
if ":" in field_name:
field_value = self._all_fields.pop(field_name)
if isinstance(field_value, dict):
# Save the showname
field_value["showname"] = field_name
# Convert the old name to the new name.
self._all_fields[
self._field_name_from_showname(field_name)] = field_value
self._showname_fields_converted_to_regular = True
def _get_internal_field_by_name(self, name):
"""Gets the field by name, or None if not found."""
field = self._all_fields.get(name, self._all_fields.get(f"{self._full_name}.{name}"))
if field is not None:
return field
for field_name in self._all_fields:
# Specific name
if field_name.endswith(f'.{name}'):
return self._all_fields[field_name]
def _is_fake_field(self, name):
# Some fields include parts that are not reflected in the JSON dictionary
# i.e. a possible json is:
# {
# foo: {
# foo.bar.baz: {
# foo.baz: 3
# }
# }
# So in this case we must create a fake layer for "bar".
field_full_name = f"{self._full_name}.{name}."
for name, field in self._all_fields.items():
if name.startswith(field_full_name):
return True
return False
def _make_wrapped_field(self, name, field, is_fake=False, full_name=None):
"""Creates the field lazily.
If it's a simple field, wraps it in a container that adds extra features.
If it's a nested layer, creates a layer for it.
If it's an intermediate layer, copies over the relevant fields and creates a new layer for
it.
"""
if not full_name:
full_name = f"{self._full_name}.{name}"
if is_fake:
# Populate with all fields that are supposed to be inside of it
field = {key: value for key, value in self._all_fields.items()
if key.startswith(full_name)}
if isinstance(field, dict):
if name.endswith('_tree'):
name = name.replace('_tree', '')
full_name = f'{self._full_name}.{name}'
return JsonLayer(name, field, full_name=full_name, is_intermediate=is_fake)
elif isinstance(field, list):
# For whatever reason in list-type object it goes back to using the original parent name
return [self._make_wrapped_field(name, field_part,
full_name=self._full_name.split('.')[0])
for field_part in field]
return LayerFieldsContainer(LayerField(name=name, value=field))

View File

@ -0,0 +1,142 @@
import os
import typing
import io
from pyshark.packet.common import colored
from pyshark.packet.fields import LayerField, LayerFieldsContainer
from pyshark.packet.layers import base
class XmlLayer(base.BaseLayer):
__slots__ = [
"raw_mode",
"_all_fields"
] + base.BaseLayer.__slots__
def __init__(self, xml_obj=None, raw_mode=False):
super().__init__(xml_obj.attrib['name'])
self.raw_mode = raw_mode
self._all_fields = {}
# We copy over all the fields from the XML object
# Note: we don't read lazily from the XML because the lxml objects are very memory-inefficient
# so we'd rather not save them.
for field in xml_obj.findall('.//field'):
attributes = dict(field.attrib)
field_obj = LayerField(**attributes)
if attributes['name'] in self._all_fields:
# Field name already exists, add this field to the container.
self._all_fields[attributes['name']].add_field(field_obj)
else:
self._all_fields[attributes['name']] = LayerFieldsContainer(field_obj)
def get_field(self, name) -> typing.Union[LayerFieldsContainer, None]:
"""Gets the XML field object of the given name."""
# Quicker in case the exact name was used.
field = self._all_fields.get(name)
if field is not None:
return field
for field_name, field in self._all_fields.items():
if self._sanitize_field_name(name) == self._sanitize_field_name(field_name):
return field
return None
def get_field_value(self, name, raw=False) -> typing.Union[LayerFieldsContainer, None]:
"""Tries getting the value of the given field.
Tries it in the following order: show (standard nice display), value (raw value),
showname (extended nice display).
:param name: The name of the field
:param raw: Only return raw value
:return: str of value
"""
field = self.get_field(name)
if field is None:
return None
if raw:
return field.raw_value
return field
@property
def field_names(self) -> typing.List[str]:
"""Gets all XML field names of this layer."""
return [self._sanitize_field_name(field_name) for field_name in self._all_fields]
@property
def layer_name(self):
if self._layer_name == 'fake-field-wrapper':
return base.DATA_LAYER_NAME
return super().layer_name
def __getattr__(self, item):
val = self.get_field(item)
if val is None:
raise AttributeError()
if self.raw_mode:
return val.raw_value
return val
@property
def _field_prefix(self) -> str:
"""Prefix to field names in the XML."""
if self.layer_name == 'geninfo':
return ''
return self.layer_name + '.'
def _sanitize_field_name(self, field_name):
"""Sanitizes an XML field name
An xml field might have characters which would make it inaccessible as a python attribute).
"""
field_name = field_name.replace(self._field_prefix, '')
return field_name.replace('.', '_').replace('-', '_').lower()
def _pretty_print_layer_fields(self, file: io.IOBase):
for field_line in self._get_all_field_lines():
if ':' in field_line:
field_name, field_line = field_line.split(':', 1)
file.write(colored(field_name + ':', "green", attrs=["bold"]))
file.write(colored(field_line, attrs=["bold"]))
def _get_all_fields_with_alternates(self):
all_fields = list(self._all_fields.values())
all_fields += sum([field.alternate_fields for field in all_fields
if isinstance(field, LayerFieldsContainer)], [])
return all_fields
def _get_all_field_lines(self):
"""Returns all lines that represent the fields of the layer (both their names and values)."""
for field in self._get_all_fields_with_alternates():
yield from self._get_field_or_layer_repr(field)
def _get_field_or_layer_repr(self, field):
field_repr = self._get_field_repr(field)
if field_repr:
yield f"\t{field_repr}{os.linesep}"
def _get_field_repr(self, field):
if field.hide:
return
if field.showname:
return field.showname
elif field.show:
return field.show
elif field.raw_value:
return f"{self._sanitize_field_name(field.name)}: {field.raw_value}"
def get_field_by_showname(self, showname) -> typing.Union[LayerFieldsContainer, None]:
"""Gets a field by its "showname"
This is the name that appears in Wireshark's detailed display i.e. in 'User-Agent: Mozilla...',
'User-Agent' is the .showname
Returns None if not found.
"""
for field in self._get_all_fields_with_alternates():
if field.showname_key == showname:
# Return it if "XXX: whatever == XXX"
return field
return None

View File

@ -0,0 +1,143 @@
import datetime
import os
import binascii
import typing
from pyshark.packet import consts
from pyshark.packet.common import Pickleable
from pyshark.packet.layers.base import BaseLayer
class Packet(Pickleable):
"""A packet object which contains layers.
Layers can be accessed via index or name.
"""
def __init__(self, layers=None, frame_info=None, number=None,
length=None, captured_length=None, sniff_time=None, interface_captured=None):
"""
Creates a Packet object with the given layers and info.
:param layers: A list of BaseLayer objects.
:param frame_info: Layer object for the entire packet frame (information like frame length, packet number, etc.
:param length: Length of the actual packet.
:param captured_length: The length of the packet that was actually captured (could be less then length)
:param sniff_time: The time the packet was captured (timestamp)
:param interface_captured: The interface the packet was captured in.
"""
if layers is None:
self.layers = []
else:
self.layers = layers
self.frame_info = frame_info
self.number = number
self.interface_captured = interface_captured
self.captured_length = captured_length
self.length = length
self.sniff_timestamp = sniff_time
def __getitem__(self, item):
"""
Gets a layer according to its index or its name
:param item: layer index or name
:return: BaseLayer object.
"""
if isinstance(item, int):
return self.layers[item]
for layer in self.layers:
if layer.layer_name.lower() == item.lower():
return layer
raise KeyError('Layer does not exist in packet')
def __contains__(self, item):
"""Checks if the layer is inside the packet.
:param item: name of the layer
"""
try:
self[item]
return True
except KeyError:
return False
def __dir__(self):
return dir(type(self)) + list(self.__dict__.keys()) + [l.layer_name for l in self.layers]
def get_raw_packet(self) -> bytes:
assert "FRAME_RAW" in self, "Packet contains no raw data. In order to contains it, " \
"make sure that use_json and include_raw are set to True " \
"in the Capture object"
raw_packet = b''
byte_values = [''.join(x) for x in zip(self.frame_raw.value[0::2], self.frame_raw.value[1::2])]
for value in byte_values:
raw_packet += binascii.unhexlify(value)
return raw_packet
def __len__(self):
return int(self.length)
def __bool__(self):
return True
@property
def sniff_time(self) -> datetime.datetime:
try:
timestamp = float(self.sniff_timestamp)
except ValueError:
# If the value after the decimal point is negative, discard it
# Google: wireshark fractional second
timestamp = float(self.sniff_timestamp.split(".")[0])
return datetime.datetime.fromtimestamp(timestamp)
def __repr__(self):
transport_protocol = ''
if self.transport_layer != self.highest_layer and self.transport_layer is not None:
transport_protocol = self.transport_layer + '/'
return f'<{transport_protocol}{self.highest_layer} Packet>'
def __str__(self):
s = self._packet_string
for layer in self.layers:
s += str(layer)
return s
@property
def _packet_string(self):
"""A simple pretty string that represents the packet."""
return f'Packet (Length: {self.length}){os.linesep}'
def pretty_print(self):
for layer in self.layers:
layer.pretty_print()
# Alias
show = pretty_print
def __getattr__(self, item):
"""
Allows layers to be retrieved via get attr. For instance: pkt.ip
"""
for layer in self.layers:
if layer.layer_name.lower() == item.lower():
return layer
raise AttributeError(f"No attribute named {item}")
@property
def highest_layer(self) -> BaseLayer:
return self.layers[-1].layer_name.upper()
@property
def transport_layer(self) -> BaseLayer:
for layer in consts.TRANSPORT_LAYERS:
if layer in self:
return layer
def get_multiple_layers(self, layer_name) -> typing.List[BaseLayer]:
"""Returns a list of all the layers in the packet that are of the layer type (an incase-sensitive string).
This is in order to retrieve layers which appear multiple times in the same packet (i.e. double VLAN)
which cannot be retrieved by easier means.
"""
return [layer for layer in self.layers if layer.layer_name.lower() == layer_name.lower()]

View File

@ -0,0 +1,27 @@
class PacketSummary(object):
"""A simple object containing a psml summary.
Can contain various summary information about a packet.
"""
def __init__(self, structure, values):
self._fields = {}
self._field_order = []
for key, val in zip(structure, values):
key, val = str(key), str(val)
self._fields[key] = val
self._field_order.append(key)
setattr(self, key.lower().replace('.', '').replace(',', ''), val)
def __repr__(self):
protocol, src, dst = self._fields.get('Protocol', '?'), self._fields.get('Source', '?'),\
self._fields.get('Destination', '?')
return f'<{self.__class__.__name__} {protocol}: {src} to {dst}>'
def __str__(self):
return self.summary_line
@property
def summary_line(self) -> str:
return ' '.join([self._fields[key] for key in self._field_order])

View File

@ -0,0 +1,30 @@
class BaseTsharkOutputParser:
DEFAULT_BATCH_SIZE = 2 ** 16
async def get_packets_from_stream(self, stream, existing_data, got_first_packet=True):
"""A coroutine which returns a single packet if it can be read from the given StreamReader.
:return a tuple of (packet, remaining_data). The packet will be None if there was not enough XML data to create
a packet. remaining_data is the leftover data which was not enough to create a packet from.
:raises EOFError if EOF was reached.
"""
# yield each packet in existing_data
packet, existing_data = self._extract_packet_from_data(existing_data,
got_first_packet=got_first_packet)
if packet:
packet = self._parse_single_packet(packet)
return packet, existing_data
new_data = await stream.read(self.DEFAULT_BATCH_SIZE)
existing_data += new_data
if not new_data:
raise EOFError()
return None, existing_data
def _parse_single_packet(self, packet):
raise NotImplementedError()
def _extract_packet_from_data(self, data, got_first_packet=True):
"""Returns a packet's data and any remaining data after reading that first packet"""
raise NotImplementedError()

View File

@ -0,0 +1,59 @@
import json
import os
from pyshark.tshark.output_parser.base_parser import BaseTsharkOutputParser
try:
import ujson
USE_UJSON = True
except ImportError:
USE_UJSON = False
from pyshark.packet.layers.ek_layer import EkLayer
from pyshark.packet.packet import Packet
_ENCODED_OS_LINESEP = os.linesep.encode()
class TsharkEkJsonParser(BaseTsharkOutputParser):
def _parse_single_packet(self, packet):
return packet_from_ek_packet(packet)
def _extract_packet_from_data(self, data, got_first_packet=True):
"""Returns a packet's data and any remaining data after reading that first packet"""
start_index = 0
data = data.lstrip()
if data.startswith(b'{"ind'):
# Skip the 'index' JSONs, generated for Elastic.
# See: https://bugs.wireshark.org/bugzilla/show_bug.cgi?id=16656
start_index = data.find(_ENCODED_OS_LINESEP) + 1
linesep_location = data.find(_ENCODED_OS_LINESEP, start_index)
if linesep_location == -1:
return None, data
return data[start_index:linesep_location], data[linesep_location + 1:]
def packet_from_ek_packet(json_pkt):
if USE_UJSON:
pkt_dict = ujson.loads(json_pkt)
else:
pkt_dict = json.loads(json_pkt.decode('utf-8'))
# We use the frame dict here and not the object access because it's faster.
frame_dict = pkt_dict['layers'].pop('frame')
layers = []
for layer in frame_dict['frame_frame_protocols'].split(':'):
layer_dict = pkt_dict['layers'].pop(layer, None)
if layer_dict is not None:
layers.append(EkLayer(layer, layer_dict))
# Add all leftovers
for name, layer in pkt_dict['layers'].items():
layers.append(EkLayer(name, layer))
return Packet(layers=layers, frame_info=EkLayer('frame', frame_dict),
number=int(frame_dict.get('frame_frame_number', 0)),
length=int(frame_dict['frame_frame_len']),
sniff_time=frame_dict['frame_frame_time_epoch'],
interface_captured=frame_dict.get('rame_frame_interface_id'))

View File

@ -0,0 +1,112 @@
import json
import os
from packaging import version
from pyshark.packet.layers.json_layer import JsonLayer
from pyshark.packet.packet import Packet
from pyshark.tshark.output_parser.base_parser import BaseTsharkOutputParser
from pyshark.tshark import tshark
try:
import ujson
USE_UJSON = True
except ImportError:
USE_UJSON = False
class TsharkJsonParser(BaseTsharkOutputParser):
def __init__(self, tshark_version=None):
super().__init__()
self._tshark_version = tshark_version
def _parse_single_packet(self, packet):
json_has_duplicate_keys = tshark.tshark_supports_duplicate_keys(self._tshark_version)
return packet_from_json_packet(packet, deduplicate_fields=json_has_duplicate_keys)
def _extract_packet_from_data(self, data, got_first_packet=True):
"""Returns a packet's data and any remaining data after reading that first packet"""
tag_start = 0
if not got_first_packet:
tag_start = data.find(b"{")
if tag_start == -1:
return None, data
packet_separator, end_separator, end_tag_strip_length = self._get_json_separators()
found_separator = None
tag_end = data.find(packet_separator)
if tag_end == -1:
# Not end of packet, maybe it has end of entire file?
tag_end = data.find(end_separator)
if tag_end != -1:
found_separator = end_separator
else:
# Found a single packet, just add the separator without extras
found_separator = packet_separator
if found_separator:
tag_end += len(found_separator) - end_tag_strip_length
return data[tag_start:tag_end].strip().strip(b","), data[tag_end + 1:]
return None, data
def _get_json_separators(self):
""""Returns the separators between packets in a JSON output
Returns a tuple of (packet_separator, end_of_file_separator, characters_to_disregard).
The latter variable being the number of characters to ignore in order to pass the packet (i.e. extra newlines,
commas, parenthesis).
"""
if not self._tshark_version or self._tshark_version >= version.parse("3.0.0"):
return f"{os.linesep} }},{os.linesep}".encode(), f"}}{os.linesep}]".encode(), 1 + len(os.linesep)
else:
return f"}}{os.linesep}{os.linesep} ,".encode(), f"}}{os.linesep}{os.linesep}]".encode(), 1
def duplicate_object_hook(ordered_pairs):
"""Make lists out of duplicate keys."""
json_dict = {}
for key, val in ordered_pairs:
existing_val = json_dict.get(key)
if not existing_val:
json_dict[key] = val
else:
if isinstance(existing_val, list):
existing_val.append(val)
else:
json_dict[key] = [existing_val, val]
return json_dict
def packet_from_json_packet(json_pkt, deduplicate_fields=True):
"""Creates a Pyshark Packet from a tshark json single packet.
Before tshark 2.6, there could be duplicate keys in a packet json, which creates the need for
deduplication and slows it down significantly.
"""
if deduplicate_fields:
# NOTE: We can use ujson here for ~25% speed-up, however since we can't use hooks in ujson
# we lose the ability to view duplicates. This might still be a good option later on.
pkt_dict = json.loads(json_pkt.decode('utf-8'), object_pairs_hook=duplicate_object_hook)
else:
if USE_UJSON:
pkt_dict = ujson.loads(json_pkt)
else:
pkt_dict = json.loads(json_pkt.decode('utf-8'))
# We use the frame dict here and not the object access because it's faster.
frame_dict = pkt_dict['_source']['layers'].pop('frame')
layers = []
for layer in frame_dict['frame.protocols'].split(':'):
layer_dict = pkt_dict['_source']['layers'].pop(layer, None)
if layer_dict is not None:
layers.append(JsonLayer(layer, layer_dict))
# Add all leftovers
for name, layer in pkt_dict['_source']['layers'].items():
layers.append(JsonLayer(name, layer))
return Packet(layers=layers, frame_info=JsonLayer('frame', frame_dict),
number=int(frame_dict.get('frame.number', 0)),
length=int(frame_dict['frame.len']),
sniff_time=frame_dict['frame.time_epoch'],
interface_captured=frame_dict.get('frame.interface_id'))

View File

@ -0,0 +1,118 @@
"""This module contains functions to turn TShark XML parts into Packet objects."""
import lxml.objectify
from pyshark.packet.layers.xml_layer import XmlLayer
from pyshark.packet.packet import Packet
from pyshark.packet.packet_summary import PacketSummary
from pyshark.tshark.output_parser.base_parser import BaseTsharkOutputParser
# Prepare dictionary used with str.translate for removing invalid XML characters
DEL_BAD_XML_CHARS = {bad_char: None for bad_char in range(0x00, 0x20) if not bad_char in (0x09, 0x0a, 0x0d)}
DEL_BAD_XML_CHARS.update({bad_char: None for bad_char in range(0xd800, 0xe000)})
DEL_BAD_XML_CHARS.update({bad_char: None for bad_char in range(0xfffe, 0x10000)})
class TsharkXmlParser(BaseTsharkOutputParser):
SUMMARIES_BATCH_SIZE = 64
def __init__(self, parse_summaries=False):
super().__init__()
self._parse_summaries = parse_summaries
self._psml_structure = None
async def get_packets_from_stream(self, stream, existing_data, got_first_packet=True):
if self._parse_summaries:
existing_data = await self._get_psml_struct(stream)
return await super().get_packets_from_stream(stream, existing_data, got_first_packet=got_first_packet)
def _parse_single_packet(self, packet):
return packet_from_xml_packet(packet, psml_structure=self._psml_structure)
def _extract_packet_from_data(self, data, got_first_packet=True):
"""Gets data containing a (part of) tshark xml.
If the given tag is found in it, returns the tag data and the remaining data.
Otherwise returns None and the same data.
:param data: string of a partial tshark xml.
:return: a tuple of (tag, data). tag will be None if none is found.
"""
return _extract_tag_from_xml_data(data, tag_name=b"packet")
async def _get_psml_struct(self, fd):
"""Gets the current PSML (packet summary xml) structure in a tuple ((None, leftover_data)),
only if the capture is configured to return it, else returns (None, leftover_data).
A coroutine.
"""
initial_data = b""
psml_struct = None
# If summaries are read, we need the psdml structure which appears on top of the file.
while not psml_struct:
new_data = await fd.read(self.SUMMARIES_BATCH_SIZE)
initial_data += new_data
psml_struct, initial_data = _extract_tag_from_xml_data(initial_data, b"structure")
if psml_struct:
self._psml_structure = psml_structure_from_xml(psml_struct)
elif not new_data:
return initial_data
return initial_data
def psml_structure_from_xml(psml_structure):
if not isinstance(psml_structure, lxml.objectify.ObjectifiedElement):
psml_structure = lxml.objectify.fromstring(psml_structure)
return psml_structure.findall('section')
def packet_from_xml_packet(xml_pkt, psml_structure=None):
"""
Gets a TShark XML packet object or string, and returns a pyshark Packet objec.t
:param xml_pkt: str or xml object.
:param psml_structure: a list of the fields in each packet summary in the psml data. If given, packets will
be returned as a PacketSummary object.
:return: Packet object.
"""
if not isinstance(xml_pkt, lxml.objectify.ObjectifiedElement):
parser = lxml.objectify.makeparser(huge_tree=True, recover=True, encoding='utf-8')
xml_pkt = xml_pkt.decode(errors='ignore').translate(DEL_BAD_XML_CHARS)
xml_pkt = lxml.objectify.fromstring(xml_pkt.encode('utf-8'), parser)
if psml_structure:
return _packet_from_psml_packet(xml_pkt, psml_structure)
return _packet_from_pdml_packet(xml_pkt)
def _packet_from_psml_packet(psml_packet, structure):
return PacketSummary(structure, psml_packet.findall('section'))
def _packet_from_pdml_packet(pdml_packet):
layers = [XmlLayer(proto) for proto in pdml_packet.proto]
geninfo, frame, layers = layers[0], layers[1], layers[2:]
return Packet(layers=layers, frame_info=frame, number=geninfo.get_field_value('num'),
length=geninfo.get_field_value('len'), sniff_time=geninfo.get_field_value('timestamp', raw=True),
captured_length=geninfo.get_field_value('caplen'),
interface_captured=frame.get_field_value('interface_id', raw=True))
def _extract_tag_from_xml_data(data, tag_name=b"packet"):
"""Gets data containing a (part of) tshark xml.
If the given tag is found in it, returns the tag data and the remaining data.
Otherwise returns None and the same data.
:param data: string of a partial tshark xml.
:param tag_name: A bytes string of the tag name
:return: a tuple of (tag, data). tag will be None if none is found.
"""
opening_tag = b"<" + tag_name + b">"
closing_tag = opening_tag.replace(b"<", b"</")
tag_end = data.find(closing_tag)
if tag_end != -1:
tag_end += len(closing_tag)
tag_start = data.find(opening_tag)
return data[tag_start:tag_end], data[tag_end:]
return None, data

View File

@ -0,0 +1,169 @@
"""Module used for the actual running of TShark"""
import json
from packaging import version
import os
import subprocess
import sys
import re
from pyshark.config import get_config
class TSharkNotFoundException(Exception):
pass
class TSharkVersionException(Exception):
pass
_TSHARK_INTERFACE_ALIAS_PATTERN = re.compile(r"[0-9]*\. ([^\s]*)(?: \((.*)\))?")
def get_process_path(tshark_path=None, process_name="tshark"):
"""Finds the path of the tshark executable.
If the user has provided a path
or specified a location in config.ini it will be used. Otherwise default
locations will be searched.
:param tshark_path: Path of the tshark binary
:raises TSharkNotFoundException in case TShark is not found in any location.
"""
possible_paths = []
# Check if `config.ini` exists in the current directory or the pyshark directory
config = get_config()
if config:
possible_paths.append(config.get(process_name, f"{process_name}_path"))
# Add the user provided path to the search list
if tshark_path is not None:
user_tshark_path = os.path.join(os.path.dirname(tshark_path),
f"{process_name}.exe" if sys.platform.startswith("win") else process_name)
possible_paths.insert(0, user_tshark_path)
# Windows search order: configuration file"s path, common paths.
if sys.platform.startswith("win"):
for env in ("ProgramFiles(x86)", "ProgramFiles"):
program_files = os.getenv(env)
if program_files is not None:
possible_paths.append(
os.path.join(program_files, "Wireshark", f"{process_name}.exe")
)
# Linux, etc. search order: configuration file's path, the system's path
else:
os_path = os.getenv(
"PATH",
"/usr/bin:/usr/sbin:/usr/lib/tshark:/usr/local/bin"
)
for path in os_path.split(":"):
possible_paths.append(os.path.join(path, process_name))
if sys.platform.startswith("darwin"):
possible_paths.append(f"/Applications/Wireshark.app/Contents/MacOS/{process_name}")
for path in possible_paths:
if os.path.exists(path):
if sys.platform.startswith("win"):
path = path.replace("\\", "/")
return path
raise TSharkNotFoundException(
"TShark not found. Try adding its location to the configuration file. "
f"Searched these paths: {possible_paths}"
)
def get_tshark_version(tshark_path=None):
parameters = [get_process_path(tshark_path), "-v"]
with open(os.devnull, "w") as null:
version_output = subprocess.check_output(parameters, stderr=null).decode("ascii")
version_line = version_output.splitlines()[0]
pattern = r'.*\s(\d+\.\d+\.\d+).*' # match " #.#.#" version pattern
m = re.match(pattern, version_line)
if not m:
raise TSharkVersionException("Unable to parse TShark version from: {}".format(version_line))
version_string = m.groups()[0] # Use first match found
return version.parse(version_string)
def tshark_supports_duplicate_keys(tshark_version):
return tshark_version >= version.parse("2.6.7")
def tshark_supports_json(tshark_version):
return tshark_version >= version.parse("2.2.0")
def get_tshark_display_filter_flag(tshark_version):
"""Returns '-Y' for tshark versions >= 1.10.0 and '-R' for older versions."""
if tshark_version >= version.parse("1.10.0"):
return "-Y"
else:
return "-R"
def get_tshark_interfaces(tshark_path=None):
"""Returns a list of interface numbers from the output tshark -D.
Used internally to capture on multiple interfaces.
"""
parameters = [get_process_path(tshark_path), "-D"]
with open(os.devnull, "w") as null:
tshark_interfaces = subprocess.check_output(parameters, stderr=null).decode("utf-8")
return [line.split(" ")[1] for line in tshark_interfaces.splitlines() if '\\\\.\\' not in line]
def get_all_tshark_interfaces_names(tshark_path=None):
"""Returns a list of all possible interface names. Some interfaces may have aliases"""
parameters = [get_process_path(tshark_path), "-D"]
with open(os.devnull, "w") as null:
tshark_interfaces = subprocess.check_output(parameters, stderr=null).decode("utf-8")
all_interface_names = []
for line in tshark_interfaces.splitlines():
matches = _TSHARK_INTERFACE_ALIAS_PATTERN.findall(line)
if matches:
all_interface_names.extend([name for name in matches[0] if name])
return all_interface_names
def get_ek_field_mapping(tshark_path=None):
parameters = [get_process_path(tshark_path), "-G", "elastic-mapping"]
with open(os.devnull, "w") as null:
mapping = subprocess.check_output(parameters, stderr=null).decode("ascii")
mapping = json.loads(
mapping,
object_pairs_hook=_duplicate_object_hook)["mappings"]
# If using wireshark 4, the key "mapping" contains what we want,
if "dynamic" in mapping and "properties" in mapping:
pass
# if using wireshark 3.5 to < 4 the data in "mapping.doc",
elif "doc" in mapping:
mapping = mapping["doc"]
# or "mapping.pcap_file" if using wireshark < 3.5
elif "pcap_file" in mapping:
mapping = mapping["pcap_file"]
else:
raise TSharkVersionException(f"Your tshark version does not support elastic-mapping. Please upgrade.")
return mapping["properties"]["layers"]["properties"]
def _duplicate_object_hook(ordered_pairs):
"""Make lists out of duplicate keys."""
json_dict = {}
for key, val in ordered_pairs:
existing_val = json_dict.get(key)
if not existing_val:
json_dict[key] = val
else:
# There are duplicates without any data for some reason, if it's that - drop it
# Otherwise, override
if val.get("properties") != {}:
json_dict[key] = val
return json_dict