This commit is contained in:
2024-12-09 18:22:38 +09:00
parent ab0cbebefc
commit c4c4547706
959 changed files with 174888 additions and 6 deletions

View File

@ -0,0 +1,466 @@
import asyncio
import os
import threading
import subprocess
import concurrent.futures
import sys
import logging
import warnings
from pyshark import ek_field_mapping
from pyshark.packet.packet import Packet
from pyshark.tshark.output_parser import tshark_ek
from pyshark.tshark.output_parser import tshark_json
from pyshark.tshark.output_parser import tshark_xml
from pyshark.tshark.tshark import get_process_path, get_tshark_display_filter_flag, \
tshark_supports_json, TSharkVersionException, get_tshark_version, tshark_supports_duplicate_keys
if sys.version_info < (3, 8):
asyncTimeoutError = concurrent.futures.TimeoutError
else:
asyncTimeoutError = asyncio.exceptions.TimeoutError
class TSharkCrashException(Exception):
pass
class UnknownEncyptionStandardException(Exception):
pass
class RawMustUseJsonException(Exception):
"""If the use_raw argument is True, so should the use_json argument"""
class StopCapture(Exception):
"""Exception that the user can throw anywhere in packet-handling to stop the capture process."""
pass
class Capture:
"""Base class for packet captures."""
SUMMARIES_BATCH_SIZE = 64
DEFAULT_LOG_LEVEL = logging.CRITICAL
SUPPORTED_ENCRYPTION_STANDARDS = ["wep", "wpa-pwk", "wpa-pwd", "wpa-psk"]
def __init__(self, display_filter=None, only_summaries=False, eventloop=None,
decryption_key=None, encryption_type="wpa-pwd", output_file=None,
decode_as=None, disable_protocol=None, tshark_path=None,
override_prefs=None, capture_filter=None, use_json=False, include_raw=False,
use_ek=False, custom_parameters=None, debug=False):
self.loaded = False
self.tshark_path = tshark_path
self._override_prefs = override_prefs
self.debug = debug
self.use_json = use_json
self._use_ek = use_ek
self.include_raw = include_raw
self._packets = []
self._current_packet = 0
self._display_filter = display_filter
self._capture_filter = capture_filter
self._only_summaries = only_summaries
self._output_file = output_file
self._running_processes = set()
self._decode_as = decode_as
self._disable_protocol = disable_protocol
self._log = logging.Logger(
self.__class__.__name__, level=self.DEFAULT_LOG_LEVEL)
self._closed = False
self._custom_parameters = custom_parameters
self._eof_reached = False
self._last_error_line = None
self._stderr_handling_tasks = []
self.__tshark_version = None
if include_raw and not (use_json or use_ek):
raise RawMustUseJsonException(
"use_json/use_ek must be True if include_raw")
if self.debug:
self.set_debug()
self.eventloop = eventloop
if self.eventloop is None:
self._setup_eventloop()
if encryption_type and encryption_type.lower() in self.SUPPORTED_ENCRYPTION_STANDARDS:
self.encryption = (decryption_key, encryption_type.lower())
else:
standards = ", ".join(self.SUPPORTED_ENCRYPTION_STANDARDS)
raise UnknownEncyptionStandardException(f"Only the following standards are supported: {standards}.")
def __getitem__(self, item):
"""Gets the packet in the given index.
:param item: packet index
:return: Packet object.
"""
return self._packets[item]
def __len__(self):
return len(self._packets)
def next(self) -> Packet:
return self.next_packet()
# Allows for child classes to call next() from super() without 2to3 "fixing"
# the call
def next_packet(self) -> Packet:
if self._current_packet >= len(self._packets):
raise StopIteration()
cur_packet = self._packets[self._current_packet]
self._current_packet += 1
return cur_packet
def clear(self):
"""Empties the capture of any saved packets."""
self._packets = []
self._current_packet = 0
def reset(self):
"""Starts iterating packets from the first one."""
self._current_packet = 0
def load_packets(self, packet_count=0, timeout=None):
"""Reads the packets from the source (cap, interface, etc.) and adds it to the internal list.
If 0 as the packet_count is given, reads forever
:param packet_count: The amount of packets to add to the packet list (0 to read forever)
:param timeout: If given, automatically stops after a given amount of time.
"""
initial_packet_amount = len(self._packets)
def keep_packet(pkt):
self._packets.append(pkt)
if packet_count != 0 and len(self._packets) - initial_packet_amount >= packet_count:
raise StopCapture()
try:
self.apply_on_packets(
keep_packet, timeout=timeout, packet_count=packet_count)
self.loaded = True
except asyncTimeoutError:
pass
def set_debug(self, set_to=True, log_level=logging.DEBUG):
"""Sets the capture to debug mode (or turns it off if specified)."""
if set_to:
handler = logging.StreamHandler(sys.stdout)
handler.setFormatter(logging.Formatter(
"%(asctime)s - %(name)s - %(levelname)s - %(message)s"))
self._log.addHandler(handler)
self._log.level = log_level
self.debug = set_to
def _verify_capture_parameters(self):
"""Optionally verify that the capture's parameters are valid.
Should raise an exception if they are not valid.
"""
pass
def _setup_eventloop(self):
"""Sets up a new eventloop as the current one according to the OS."""
if os.name == "nt":
current_eventloop = asyncio.get_event_loop_policy().get_event_loop()
if isinstance(current_eventloop, asyncio.ProactorEventLoop):
self.eventloop = current_eventloop
else:
# On Python before 3.8, Proactor is not the default eventloop type, so we have to create a new one.
# If there was an existing eventloop this can create issues, since we effectively disable it here.
if asyncio.all_tasks():
warnings.warn("The running eventloop has tasks but pyshark must set a new eventloop to continue. "
"Existing tasks may not run.")
self.eventloop = asyncio.ProactorEventLoop()
asyncio.set_event_loop(self.eventloop)
else:
try:
self.eventloop = asyncio.get_event_loop_policy().get_event_loop()
except RuntimeError:
if threading.current_thread() != threading.main_thread():
# Ran not in main thread, make a new eventloop
self.eventloop = asyncio.new_event_loop()
asyncio.set_event_loop(self.eventloop)
else:
raise
if os.name == "posix" and isinstance(threading.current_thread(), threading._MainThread):
# The default child watchers (ThreadedChildWatcher) attach_loop method is empty!
# While using pyshark with ThreadedChildWatcher, asyncio could raise a ChildProcessError
# "Unknown child process pid %d, will report returncode 255"
# This led to a TSharkCrashException in _cleanup_subprocess.
# Using the SafeChildWatcher fixes this issue, but it is slower.
# SafeChildWatcher O(n) -> large numbers of processes are slow
# ThreadedChildWatcher O(1) -> independent of process number
# asyncio.get_child_watcher().attach_loop(self.eventloop)
asyncio.set_child_watcher(asyncio.SafeChildWatcher())
asyncio.get_child_watcher().attach_loop(self.eventloop)
def _packets_from_tshark_sync(self, packet_count=None, existing_process=None):
"""Returns a generator of packets.
This is the sync version of packets_from_tshark. It wait for the completion of each coroutine and
reimplements reading packets in a sync way, yielding each packet as it arrives.
:param packet_count: If given, stops after this amount of packets is captured.
"""
# NOTE: This has code duplication with the async version, think about how to solve this
tshark_process = existing_process or self.eventloop.run_until_complete(
self._get_tshark_process())
parser = self._setup_tshark_output_parser()
packets_captured = 0
data = b""
try:
while True:
try:
packet, data = self.eventloop.run_until_complete(
parser.get_packets_from_stream(tshark_process.stdout, data,
got_first_packet=packets_captured > 0))
except EOFError:
self._log.debug("EOF reached (sync)")
self._eof_reached = True
break
if packet:
packets_captured += 1
yield packet
if packet_count and packets_captured >= packet_count:
break
finally:
if tshark_process in self._running_processes:
self.eventloop.run_until_complete(
self._cleanup_subprocess(tshark_process))
def apply_on_packets(self, callback, timeout=None, packet_count=None):
"""Runs through all packets and calls the given callback (a function) with each one as it is read.
If the capture is infinite (i.e. a live capture), it will run forever, otherwise it will complete after all
packets have been read.
Example usage:
def print_callback(pkt):
print(pkt)
capture.apply_on_packets(print_callback)
If a timeout is given, raises a Timeout error if not complete before the timeout (in seconds)
"""
coro = self.packets_from_tshark(callback, packet_count=packet_count)
if timeout is not None:
coro = asyncio.wait_for(coro, timeout)
return self.eventloop.run_until_complete(coro)
async def packets_from_tshark(self, packet_callback, packet_count=None, close_tshark=True):
"""
A coroutine which creates a tshark process, runs the given callback on each packet that is received from it and
closes the process when it is done.
Do not use interactively. Can be used in order to insert packets into your own eventloop.
"""
tshark_process = await self._get_tshark_process(packet_count=packet_count)
try:
await self._go_through_packets_from_fd(tshark_process.stdout, packet_callback, packet_count=packet_count)
except StopCapture:
pass
finally:
if close_tshark:
await self.close_async()
async def _go_through_packets_from_fd(self, fd, packet_callback, packet_count=None):
"""A coroutine which goes through a stream and calls a given callback for each XML packet seen in it."""
packets_captured = 0
self._log.debug("Starting to go through packets")
parser = self._setup_tshark_output_parser()
data = b""
while True:
try:
packet, data = await parser.get_packets_from_stream(fd, data,
got_first_packet=packets_captured > 0)
except EOFError:
self._log.debug("EOF reached")
self._eof_reached = True
break
if packet:
packets_captured += 1
try:
packet_callback(packet)
except StopCapture:
self._log.debug("User-initiated capture stop in callback")
break
if packet_count and packets_captured >= packet_count:
break
def _create_stderr_handling_task(self, stderr):
self._stderr_handling_tasks.append(asyncio.ensure_future(self._handle_process_stderr_forever(stderr)))
async def _handle_process_stderr_forever(self, stderr):
while True:
stderr_line = await stderr.readline()
if not stderr_line:
break
stderr_line = stderr_line.decode().strip()
self._last_error_line = stderr_line
self._log.debug(stderr_line)
def _get_tshark_path(self):
return get_process_path(self.tshark_path)
def _get_tshark_version(self):
if self.__tshark_version is None:
self.__tshark_version = get_tshark_version(self.tshark_path)
return self.__tshark_version
async def _get_tshark_process(self, packet_count=None, stdin=None):
"""Returns a new tshark process with previously-set parameters."""
self._verify_capture_parameters()
output_parameters = []
if self.use_json or self._use_ek:
if not tshark_supports_json(self._get_tshark_version()):
raise TSharkVersionException(
"JSON only supported on Wireshark >= 2.2.0")
if self.use_json:
output_type = "json"
if tshark_supports_duplicate_keys(self._get_tshark_version()):
output_parameters.append("--no-duplicate-keys")
elif self._use_ek:
output_type = "ek"
else:
output_type = "psml" if self._only_summaries else "pdml"
parameters = [self._get_tshark_path(), "-l", "-n", "-T", output_type] + \
self.get_parameters(packet_count=packet_count) + output_parameters
self._log.debug(
"Creating TShark subprocess with parameters: " + " ".join(parameters))
self._log.debug("Executable: %s", parameters[0])
tshark_process = await asyncio.create_subprocess_exec(*parameters,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
stdin=stdin)
self._create_stderr_handling_task(tshark_process.stderr)
self._created_new_process(parameters, tshark_process)
return tshark_process
def _created_new_process(self, parameters, process, process_name="TShark"):
self._log.debug(
process_name + f" subprocess (pid {process.pid}) created")
if process.returncode is not None and process.returncode != 0:
raise TSharkCrashException(
f"{process_name} seems to have crashed. Try updating it. (command ran: '{' '.join(parameters)}')")
self._running_processes.add(process)
async def _cleanup_subprocess(self, process):
"""Kill the given process and properly closes any pipes connected to it."""
self._log.debug(f"Cleanup Subprocess (pid {process.pid})")
if process.returncode is None:
try:
process.kill()
return await asyncio.wait_for(process.wait(), 1)
except asyncTimeoutError:
self._log.debug(
"Waiting for process to close failed, may have zombie process.")
except ProcessLookupError:
pass
except OSError:
if os.name != "nt":
raise
elif process.returncode > 0:
if process.returncode != 1 or self._eof_reached:
raise TSharkCrashException(f"TShark (pid {process.pid}) seems to have crashed (retcode: {process.returncode}).\n"
f"Last error line: {self._last_error_line}\n"
"Try rerunning in debug mode [ capture_obj.set_debug() ] or try updating tshark.")
def _setup_tshark_output_parser(self):
if self.use_json:
return tshark_json.TsharkJsonParser(self._get_tshark_version())
if self._use_ek:
ek_field_mapping.MAPPING.load_mapping(str(self._get_tshark_version()),
tshark_path=self.tshark_path)
return tshark_ek.TsharkEkJsonParser()
return tshark_xml.TsharkXmlParser(parse_summaries=self._only_summaries)
def close(self):
self.eventloop.run_until_complete(self.close_async())
async def close_async(self):
for process in self._running_processes.copy():
await self._cleanup_subprocess(process)
self._running_processes.clear()
# Wait for all stderr handling to finish
await asyncio.gather(*self._stderr_handling_tasks)
def __del__(self):
if self._running_processes:
self.close()
def __enter__(self): return self
async def __aenter__(self): return self
def __exit__(self, exc_type, exc_val, exc_tb): self.close()
async def __aexit__(self, exc_type, exc_val,
exc_tb): await self.close_async()
def get_parameters(self, packet_count=None):
"""Returns the special tshark parameters to be used according to the configuration of this class."""
params = []
if self._capture_filter:
params += ["-f", self._capture_filter]
if self._display_filter:
params += [get_tshark_display_filter_flag(self._get_tshark_version(),),
self._display_filter]
# Raw is only enabled when JSON is also enabled.
if self.include_raw:
params += ["-x"]
if packet_count:
params += ["-c", str(packet_count)]
if self._custom_parameters:
if isinstance(self._custom_parameters, list):
params += self._custom_parameters
elif isinstance(self._custom_parameters, dict):
for key, val in self._custom_parameters.items():
params += [key, val]
else:
raise TypeError("Custom parameters type not supported.")
if all(self.encryption):
params += ["-o", "wlan.enable_decryption:TRUE", "-o", 'uat:80211_keys:"' + self.encryption[1] + '","' +
self.encryption[0] + '"']
if self._override_prefs:
for preference_name, preference_value in self._override_prefs.items():
if all(self.encryption) and preference_name in ("wlan.enable_decryption", "uat:80211_keys"):
continue # skip if override preferences also given via --encryption options
params += ["-o", f"{preference_name}:{preference_value}"]
if self._output_file:
params += ["-w", self._output_file]
if self._decode_as:
for criterion, decode_as_proto in self._decode_as.items():
params += ["-d",
",".join([criterion.strip(), decode_as_proto.strip()])]
if self._disable_protocol:
params += ["--disable-protocol", self._disable_protocol.strip()]
return params
def __iter__(self):
if self.loaded:
return iter(self._packets)
else:
return self._packets_from_tshark_sync()
def __repr__(self):
return f"<{self.__class__.__name__} ({len(self._packets)} packets)>"

View File

@ -0,0 +1,93 @@
import pathlib
from pyshark.capture.capture import Capture
from pyshark.packet.packet import Packet
class FileCapture(Capture):
"""A class representing a capture read from a file."""
def __init__(self, input_file=None, keep_packets=True, display_filter=None, only_summaries=False,
decryption_key=None, encryption_type="wpa-pwk", decode_as=None,
disable_protocol=None, tshark_path=None, override_prefs=None,
use_json=False, use_ek=False,
output_file=None, include_raw=False, eventloop=None, custom_parameters=None,
debug=False):
"""Creates a packet capture object by reading from file.
:param keep_packets: Whether to keep packets after reading them via next(). Used to conserve memory when reading
large caps (can only be used along with the "lazy" option!)
:param input_file: File path of the capture (PCAP, PCAPNG)
:param display_filter: A display (wireshark) filter to apply on the cap before reading it.
:param only_summaries: Only produce packet summaries, much faster but includes very little information.
:param decryption_key: Optional key used to encrypt and decrypt captured traffic.
:param encryption_type: Standard of encryption used in captured traffic (must be either 'WEP', 'WPA-PWD', or
'WPA-PWK'. Defaults to WPA-PWK).
:param decode_as: A dictionary of {decode_criterion_string: decode_as_protocol} that are used to tell tshark
to decode protocols in situations it wouldn't usually, for instance {'tcp.port==8888': 'http'} would make
it attempt to decode any port 8888 traffic as HTTP. See tshark documentation for details.
:param tshark_path: Path of the tshark binary
:param override_prefs: A dictionary of tshark preferences to override, {PREFERENCE_NAME: PREFERENCE_VALUE, ...}.
:param disable_protocol: Tells tshark to remove a dissector for a specific protocol.
:param use_ek: Uses tshark in EK JSON mode. It is faster than XML but has slightly less data.
:param use_json: DEPRECATED. Use use_ek instead.
:param output_file: A string of a file to write every read packet into (useful when filtering).
:param custom_parameters: A dict of custom parameters to pass to tshark, i.e. {"--param": "value"}
or else a list of parameters in the format ["--foo", "bar", "--baz", "foo"].
"""
super(FileCapture, self).__init__(display_filter=display_filter, only_summaries=only_summaries,
decryption_key=decryption_key, encryption_type=encryption_type,
decode_as=decode_as, disable_protocol=disable_protocol,
tshark_path=tshark_path, override_prefs=override_prefs,
use_json=use_json, use_ek=use_ek, output_file=output_file,
include_raw=include_raw, eventloop=eventloop,
custom_parameters=custom_parameters, debug=debug)
self.input_filepath = pathlib.Path(input_file)
if not self.input_filepath.exists():
raise FileNotFoundError(f"[Errno 2] No such file or directory: {self.input_filepath}")
if not self.input_filepath.is_file():
raise FileNotFoundError(f"{self.input_filepath} is a directory")
self.keep_packets = keep_packets
self._packet_generator = self._packets_from_tshark_sync()
def next(self) -> Packet:
"""Returns the next packet in the cap.
If the capture's keep_packets flag is True, will also keep it in the internal packet list.
"""
if not self.keep_packets:
return self._packet_generator.send(None)
elif self._current_packet >= len(self._packets):
packet = self._packet_generator.send(None)
self._packets += [packet]
return super(FileCapture, self).next_packet()
def __getitem__(self, packet_index):
if not self.keep_packets:
raise NotImplementedError("Cannot use getitem if packets are not kept")
# We may not yet have this packet
while packet_index >= len(self._packets):
try:
self.next()
except StopIteration:
# We read the whole file, and there's still not such packet.
raise KeyError(f"Packet of index {packet_index} does not exist in capture")
return super(FileCapture, self).__getitem__(packet_index)
def get_parameters(self, packet_count=None):
return super(FileCapture, self).get_parameters(packet_count=packet_count) + [
"-r", self.input_filepath.as_posix()]
def _verify_capture_parameters(self):
try:
with self.input_filepath.open("rb"):
pass
except PermissionError:
raise PermissionError(f"Permission denied for file {self.input_filepath}")
def __repr__(self):
if self.keep_packets:
return f"<{self.__class__.__name__} {self.input_filepath.as_posix()}>"
else:
return f"<{self.__class__.__name__} {self.input_filepath.as_posix()} ({len(self._packets)} packets)>"

View File

@ -0,0 +1,200 @@
import asyncio
import datetime
import itertools
import subprocess
import os
import struct
import time
import warnings
from packaging import version
from pyshark.capture.capture import Capture, StopCapture
DEFAULT_TIMEOUT = 30
class LinkTypes(object):
NULL = 0
ETHERNET = 1
IEEE802_5 = 6
PPP = 9
IEEE802_11 = 105
class InMemCapture(Capture):
def __init__(self, bpf_filter=None, display_filter=None, only_summaries=False,
decryption_key=None, encryption_type='wpa-pwk', decode_as=None,
disable_protocol=None, tshark_path=None, override_prefs=None, use_json=False, use_ek=False,
linktype=LinkTypes.ETHERNET, include_raw=False, eventloop=None, custom_parameters=None,
debug=False):
"""Creates a new in-mem capture, a capture capable of receiving binary packets and parsing them using tshark.
Significantly faster if packets are added in a batch.
:param bpf_filter: BPF filter to use on packets.
:param display_filter: Display (wireshark) filter to use.
:param only_summaries: Only produce packet summaries, much faster but includes very little information
:param decryption_key: Key used to encrypt and decrypt captured traffic.
:param encryption_type: Standard of encryption used in captured traffic (must be either 'WEP', 'WPA-PWD',
or 'WPA-PWK'. Defaults to WPA-PWK).
:param decode_as: A dictionary of {decode_criterion_string: decode_as_protocol} that are used to tell tshark
to decode protocols in situations it wouldn't usually, for instance {'tcp.port==8888': 'http'} would make
it attempt to decode any port 8888 traffic as HTTP. See tshark documentation for details.
:param tshark_path: Path of the tshark binary
:param override_prefs: A dictionary of tshark preferences to override, {PREFERENCE_NAME: PREFERENCE_VALUE, ...}.
:param disable_protocol: Tells tshark to remove a dissector for a specifc protocol.
:param custom_parameters: A dict of custom parameters to pass to tshark, i.e. {"--param": "value"}
or else a list of parameters in the format ["--foo", "bar", "--baz", "foo"].
"""
super(InMemCapture, self).__init__(display_filter=display_filter, only_summaries=only_summaries,
decryption_key=decryption_key, encryption_type=encryption_type,
decode_as=decode_as, disable_protocol=disable_protocol,
tshark_path=tshark_path, override_prefs=override_prefs,
use_json=use_json, use_ek=use_ek,
include_raw=include_raw, eventloop=eventloop,
custom_parameters=custom_parameters, debug=debug)
self.bpf_filter = bpf_filter
self._packets_to_write = None
self._current_linktype = linktype
self._current_tshark = None
def get_parameters(self, packet_count=None):
"""Returns the special tshark parameters to be used according to the configuration of this class."""
params = super(InMemCapture, self).get_parameters(
packet_count=packet_count)
params += ['-i', '-']
return params
async def _get_tshark_process(self, packet_count=None):
if self._current_tshark:
return self._current_tshark
proc = await super(InMemCapture, self)._get_tshark_process(packet_count=packet_count, stdin=subprocess.PIPE)
self._current_tshark = proc
# Create PCAP header
header = struct.pack("IHHIIII", 0xa1b2c3d4, 2, 4,
0, 0, 0x7fff, self._current_linktype)
proc.stdin.write(header)
return proc
def _get_json_separators(self):
""""Returns the separators between packets in a JSON output
Returns a tuple of (packet_separator, end_of_file_separator, characters_to_disregard).
The latter variable being the number of characters to ignore in order to pass the packet (i.e. extra newlines,
commas, parenthesis).
"""
if self._get_tshark_version() >= version.parse("2.6.7"):
return f"{os.linesep} }}".encode(), f"}}{os.linesep}]".encode(), 0
else:
return f'}}{os.linesep}{os.linesep}'.encode(), f"}}{os.linesep}{os.linesep}]", 1
def _write_packet(self, packet, sniff_time):
if sniff_time is None:
now = time.time()
elif isinstance(sniff_time, datetime.datetime):
now = sniff_time.timestamp()
else:
now = float(sniff_time)
secs = int(now)
usecs = int((now * 1000000) % 1000000)
# Write packet header
self._current_tshark.stdin.write(struct.pack(
"IIII", secs, usecs, len(packet), len(packet)))
self._current_tshark.stdin.write(packet)
def parse_packet(self, binary_packet, sniff_time=None, timeout=DEFAULT_TIMEOUT):
"""Parses a single binary packet and returns its parsed version.
DOES NOT CLOSE tshark. It must be closed manually by calling close() when you're done
working with it.
Use parse_packets when parsing multiple packets for faster parsing
"""
if sniff_time is not None:
sniff_time = [sniff_time]
return self.parse_packets([binary_packet], sniff_time, timeout)[0]
def parse_packets(self, binary_packets, sniff_times=None, timeout=DEFAULT_TIMEOUT):
"""Parses binary packets and return a list of parsed packets.
DOES NOT CLOSE tshark. It must be closed manually by calling close() when you're done
working with it.
"""
if self.eventloop is None:
self._setup_eventloop()
return self.eventloop.run_until_complete(self.parse_packets_async(binary_packets, sniff_times, timeout))
async def parse_packets_async(self, binary_packets, sniff_times=None, timeout=DEFAULT_TIMEOUT):
"""A coroutine which parses binary packets and return a list of parsed packets.
DOES NOT CLOSE tshark. It must be closed manually by calling close() when you're done
working with it.
"""
parsed_packets = []
if sniff_times is None:
sniff_times = []
if not self._current_tshark:
await self._get_tshark_process()
for binary_packet, sniff_time in itertools.zip_longest(binary_packets, sniff_times):
self._write_packet(binary_packet, sniff_time)
def callback(pkt):
parsed_packets.append(pkt)
if len(parsed_packets) == len(binary_packets):
raise StopCapture()
await self._get_parsed_packet_from_tshark(callback, timeout)
return parsed_packets
async def _get_parsed_packet_from_tshark(self, callback, timeout):
await self._current_tshark.stdin.drain()
try:
await asyncio.wait_for(self.packets_from_tshark(callback, close_tshark=False), timeout)
except asyncio.TimeoutError:
await self.close_async()
raise asyncio.TimeoutError("Timed out while waiting for tshark to parse packet. "
"Try rerunning with cap.set_debug() to see tshark errors. "
"Closing tshark..")
async def close_async(self):
self._current_tshark = None
await super(InMemCapture, self).close_async()
def feed_packet(self, binary_packet, linktype=LinkTypes.ETHERNET, timeout=DEFAULT_TIMEOUT):
"""
DEPRECATED. Use parse_packet instead.
This function adds the packet to the packets list, and also closes and reopens tshark for
each packet.
==============
Gets a binary (string) packet and parses & adds it to this capture.
Returns the added packet.
Use feed_packets if you have multiple packets to insert.
By default, assumes the packet is an ethernet packet. For another link type, supply the linktype argument (most
can be found in the class LinkTypes)
"""
warnings.warn(
"Deprecated method. Use InMemCapture.parse_packet() instead.")
self._current_linktype = linktype
pkt = self.parse_packet(binary_packet, timeout=timeout)
self.close()
self._packets.append(pkt)
return pkt
def feed_packets(self, binary_packets, linktype=LinkTypes.ETHERNET, timeout=DEFAULT_TIMEOUT):
"""Gets a list of binary packets, parses them using tshark and returns their parsed values.
Keeps the packets in the internal packet list as well.
By default, assumes the packets are ethernet packets. For another link type, supply the linktype argument (most
can be found in the class LinkTypes)
"""
self._current_linktype = linktype
parsed_packets = self.parse_packets(binary_packets, timeout=timeout)
self._packets.extend(parsed_packets)
self.close()
return parsed_packets

View File

@ -0,0 +1,132 @@
import os
import asyncio
import subprocess
from packaging import version
from pyshark.capture.capture import Capture
from pyshark.tshark import tshark
from pyshark.tshark.tshark import get_tshark_interfaces, get_process_path
class UnknownInterfaceException(Exception):
pass
class LiveCapture(Capture):
"""Represents a live capture on a network interface."""
def __init__(self, interface=None, bpf_filter=None, display_filter=None, only_summaries=False,
decryption_key=None, encryption_type='wpa-pwk', output_file=None, decode_as=None,
disable_protocol=None, tshark_path=None, override_prefs=None, capture_filter=None,
monitor_mode=False, use_json=False, use_ek=False,
include_raw=False, eventloop=None, custom_parameters=None,
debug=False):
"""Creates a new live capturer on a given interface. Does not start the actual capture itself.
:param interface: Name of the interface to sniff on or a list of names (str). If not given, runs on all interfaces.
:param bpf_filter: BPF filter to use on packets.
:param display_filter: Display (wireshark) filter to use.
:param only_summaries: Only produce packet summaries, much faster but includes very little information
:param decryption_key: Optional key used to encrypt and decrypt captured traffic.
:param encryption_type: Standard of encryption used in captured traffic (must be either 'WEP', 'WPA-PWD', or
'WPA-PWK'. Defaults to WPA-PWK).
:param output_file: Additionally save live captured packets to this file.
:param decode_as: A dictionary of {decode_criterion_string: decode_as_protocol} that are used to tell tshark
to decode protocols in situations it wouldn't usually, for instance {'tcp.port==8888': 'http'} would make
it attempt to decode any port 8888 traffic as HTTP. See tshark documentation for details.
:param tshark_path: Path of the tshark binary
:param override_prefs: A dictionary of tshark preferences to override, {PREFERENCE_NAME: PREFERENCE_VALUE, ...}.
:param capture_filter: Capture (wireshark) filter to use.
:param disable_protocol: Tells tshark to remove a dissector for a specifc protocol.
:param use_ek: Uses tshark in EK JSON mode. It is faster than XML but has slightly less data.
:param use_json: DEPRECATED. Use use_ek instead.
:param custom_parameters: A dict of custom parameters to pass to tshark, i.e. {"--param": "value"} or
else a list of parameters in the format ["--foo", "bar", "--baz", "foo"].
"""
super(LiveCapture, self).__init__(display_filter=display_filter, only_summaries=only_summaries,
decryption_key=decryption_key, encryption_type=encryption_type,
output_file=output_file, decode_as=decode_as, disable_protocol=disable_protocol,
tshark_path=tshark_path, override_prefs=override_prefs,
capture_filter=capture_filter, use_json=use_json, use_ek=use_ek,
include_raw=include_raw,
eventloop=eventloop, custom_parameters=custom_parameters,
debug=debug)
self.bpf_filter = bpf_filter
self.monitor_mode = monitor_mode
all_interfaces = get_tshark_interfaces(tshark_path)
if interface is None:
self.interfaces = all_interfaces
elif isinstance(interface, str):
self.interfaces = [interface]
else:
self.interfaces = interface
def get_parameters(self, packet_count=None):
"""Returns the special tshark parameters to be used according to the configuration of this class."""
params = super(LiveCapture, self).get_parameters(packet_count=packet_count)
# Read from STDIN
params += ["-i", "-"]
return params
def _verify_capture_parameters(self):
all_interfaces_names = tshark.get_all_tshark_interfaces_names(self.tshark_path)
all_interfaces_lowercase = [interface.lower() for interface in all_interfaces_names]
for each_interface in self.interfaces:
if each_interface.startswith("rpcap://"):
continue
if each_interface.isnumeric():
continue
if each_interface.lower() not in all_interfaces_lowercase:
raise UnknownInterfaceException(
f"Interface '{each_interface}' does not exist, unable to initiate capture. "
f"Perhaps permissions are missing?\n"
f"Possible interfaces: {os.linesep.join(all_interfaces_names)}")
def _get_dumpcap_parameters(self):
# Don't report packet counts.
params = ["-q"]
if self._get_tshark_version() < version.parse("2.5.0"):
# Tshark versions older than 2.5 don't support pcapng. This flag forces dumpcap to output pcap.
params += ["-P"]
if self.bpf_filter:
params += ["-f", self.bpf_filter]
if self.monitor_mode:
params += ["-I"]
for interface in self.interfaces:
params += ["-i", interface]
# Write to STDOUT
params += ["-w", "-"]
return params
async def _get_tshark_process(self, packet_count=None, stdin=None):
read, write = os.pipe()
dumpcap_params = [get_process_path(process_name="dumpcap", tshark_path=self.tshark_path)] + self._get_dumpcap_parameters()
self._log.debug("Creating Dumpcap subprocess with parameters: %s", " ".join(dumpcap_params))
dumpcap_process = await asyncio.create_subprocess_exec(*dumpcap_params, stdout=write,
stderr=subprocess.PIPE)
self._create_stderr_handling_task(dumpcap_process.stderr)
self._created_new_process(dumpcap_params, dumpcap_process, process_name="Dumpcap")
tshark = await super(LiveCapture, self)._get_tshark_process(packet_count=packet_count, stdin=read)
return tshark
# Backwards compatibility
sniff = Capture.load_packets
def sniff_continuously(self, packet_count=None):
"""Captures from the set interface, returning a generator which returns packets continuously.
Can be used as follows:
for packet in capture.sniff_continuously():
print('Woo, another packet:', packet)
Note: you can also call capture.apply_on_packets(packet_callback) which should have a slight performance boost.
:param packet_count: an amount of packets to capture, then stop.
"""
# Retained for backwards compatibility and to add documentation.
return self._packets_from_tshark_sync(packet_count=packet_count)

View File

@ -0,0 +1,57 @@
from pyshark import LiveCapture
class LiveRingCapture(LiveCapture):
"""Represents a live ringbuffer capture on a network interface."""
def __init__(self, ring_file_size=1024, num_ring_files=1, ring_file_name='/tmp/pyshark.pcap', interface=None,
bpf_filter=None, display_filter=None, only_summaries=False, decryption_key=None,
encryption_type='wpa-pwk', decode_as=None, disable_protocol=None,
tshark_path=None, override_prefs=None, capture_filter=None,
use_json=False, use_ek=False, include_raw=False, eventloop=None,
custom_parameters=None, debug=False):
"""
Creates a new live capturer on a given interface. Does not start the actual capture itself.
:param ring_file_size: Size of the ring file in kB, default is 1024
:param num_ring_files: Number of ring files to keep, default is 1
:param ring_file_name: Name of the ring file, default is /tmp/pyshark.pcap
:param interface: Name of the interface to sniff on or a list of names (str). If not given, runs on all interfaces.
:param bpf_filter: BPF filter to use on packets.
:param display_filter: Display (wireshark) filter to use.
:param only_summaries: Only produce packet summaries, much faster but includes very little information
:param decryption_key: Optional key used to encrypt and decrypt captured traffic.
:param encryption_type: Standard of encryption used in captured traffic (must be either 'WEP', 'WPA-PWD', or
'WPA-PWK'. Defaults to WPA-PWK).
:param decode_as: A dictionary of {decode_criterion_string: decode_as_protocol} that are used to tell tshark
to decode protocols in situations it wouldn't usually, for instance {'tcp.port==8888': 'http'} would make
it attempt to decode any port 8888 traffic as HTTP. See tshark documentation for details.
:param tshark_path: Path of the tshark binary
:param override_prefs: A dictionary of tshark preferences to override, {PREFERENCE_NAME: PREFERENCE_VALUE, ...}.
:param capture_filter: Capture (wireshark) filter to use.
:param disable_protocol: Tells tshark to remove a dissector for a specifc protocol.
:param use_ek: Uses tshark in EK JSON mode. It is faster than XML but has slightly less data.
:param use_json: DEPRECATED. Use use_ek instead.
:param custom_parameters: A dict of custom parameters to pass to tshark, i.e. {"--param": "value"}
or else a list of parameters in the format ["--foo", "bar", "--baz", "foo"]. or else a list of parameters in the format ["--foo", "bar", "--baz", "foo"].
"""
super(LiveRingCapture, self).__init__(interface, bpf_filter=bpf_filter, display_filter=display_filter, only_summaries=only_summaries,
decryption_key=decryption_key, encryption_type=encryption_type,
tshark_path=tshark_path, decode_as=decode_as, disable_protocol=disable_protocol,
override_prefs=override_prefs, capture_filter=capture_filter,
use_json=use_json, use_ek=use_ek, include_raw=include_raw, eventloop=eventloop,
custom_parameters=custom_parameters, debug=debug)
self.ring_file_size = ring_file_size
self.num_ring_files = num_ring_files
self.ring_file_name = ring_file_name
def get_parameters(self, packet_count=None):
params = super(LiveRingCapture, self).get_parameters(packet_count=packet_count)
params += ['-b', 'filesize:' + str(self.ring_file_size), '-b', 'files:' + str(self.num_ring_files),
'-w', self.ring_file_name, '-P', '-V']
return params
def _get_dumpcap_parameters(self):
params = super(LiveRingCapture, self)._get_dumpcap_parameters()
params += ['-P']
return params

View File

@ -0,0 +1,52 @@
import os
from pyshark.capture.capture import Capture
class PipeCapture(Capture):
def __init__(self, pipe, display_filter=None, only_summaries=False,
decryption_key=None, encryption_type='wpa-pwk', decode_as=None,
disable_protocol=None, tshark_path=None, override_prefs=None, use_json=False,
include_raw=False, eventloop=None, custom_parameters=None, debug=False):
"""Receives a file-like and reads the packets from there (pcap format).
:param bpf_filter: BPF filter to use on packets.
:param display_filter: Display (wireshark) filter to use.
:param only_summaries: Only produce packet summaries, much faster but includes very little information
:param decryption_key: Key used to encrypt and decrypt captured traffic.
:param encryption_type: Standard of encryption used in captured traffic (must be either 'WEP', 'WPA-PWD',
or 'WPA-PWK'. Defaults to WPA-PWK).
:param decode_as: A dictionary of {decode_criterion_string: decode_as_protocol} that are used to tell tshark
to decode protocols in situations it wouldn't usually, for instance {'tcp.port==8888': 'http'} would make
it attempt to decode any port 8888 traffic as HTTP. See tshark documentation for details.
:param tshark_path: Path of the tshark binary
:param override_prefs: A dictionary of tshark preferences to override, {PREFERENCE_NAME: PREFERENCE_VALUE, ...}.
:param disable_protocol: Tells tshark to remove a dissector for a specifc protocol.
:param custom_parameters: A dict of custom parameters to pass to tshark, i.e. {"--param": "value"}
or else a list of parameters in the format ["--foo", "bar", "--baz", "foo"].
"""
super(PipeCapture, self).__init__(display_filter=display_filter,
only_summaries=only_summaries,
decryption_key=decryption_key,
encryption_type=encryption_type,
decode_as=decode_as, disable_protocol=disable_protocol,
tshark_path=tshark_path, override_prefs=override_prefs,
use_json=use_json, include_raw=include_raw, eventloop=eventloop,
custom_parameters=custom_parameters, debug=debug)
self._pipe = pipe
def get_parameters(self, packet_count=None):
"""
Returns the special tshark parameters to be used according to the configuration of this class.
"""
params = super(PipeCapture, self).get_parameters(packet_count=packet_count)
params += ['-r', '-']
return params
async def _get_tshark_process(self, packet_count=None):
return await super(PipeCapture, self)._get_tshark_process(packet_count=packet_count, stdin=self._pipe)
def close(self):
# Close pipe
os.close(self._pipe)
super(PipeCapture, self).close()

View File

@ -0,0 +1,66 @@
from pyshark import LiveCapture
class RemoteCapture(LiveCapture):
"""A capture which is performed on a remote machine which has an rpcapd service running."""
def __init__(
self,
remote_host,
remote_interface,
*args,
remote_port=2002,
bpf_filter=None,
only_summaries=False,
decryption_key=None,
encryption_type="wpa-pwk",
decode_as=None,
disable_protocol=None,
tshark_path=None,
override_prefs=None,
eventloop=None,
debug=False,
**kwargs
):
"""
Creates a new remote capture which will connect to a remote machine which is running rpcapd. Use the sniff()
method to get packets.
Note: The remote machine should have rpcapd running in null authentication mode (-n). Be warned that the traffic
is unencrypted!
Note:
*args and **kwargs are passed to LiveCature's __init__ method.
:param remote_host: The remote host to capture on (IP or hostname). Should be running rpcapd.
:param remote_interface: The remote interface on the remote machine to capture on. Note that on windows it is
not the device display name but the true interface name (i.e. \\Device\\NPF_..).
:param remote_port: The remote port the rpcapd service is listening on
:param bpf_filter: A BPF (tcpdump) filter to apply on the cap before reading.
:param only_summaries: Only produce packet summaries, much faster but includes very little information
:param decryption_key: Key used to encrypt and decrypt captured traffic.
:param encryption_type: Standard of encryption used in captured traffic (must be either 'WEP', 'WPA-PWD',
or 'WPA-PWK'. Defaults to WPA-PWK).
:param decode_as: A dictionary of {decode_criterion_string: decode_as_protocol} that are used to tell tshark
to decode protocols in situations it wouldn't usually, for instance {'tcp.port==8888': 'http'} would make
it attempt to decode any port 8888 traffic as HTTP. See tshark documentation for details.
:param tshark_path: Path of the tshark binary
:param override_prefs: A dictionary of tshark preferences to override, {PREFERENCE_NAME: PREFERENCE_VALUE, ...}.
:param disable_protocol: Tells tshark to remove a dissector for a specifc protocol.
"""
interface = f'rpcap://{remote_host}:{remote_port}/{remote_interface}'
super(RemoteCapture, self).__init__(
interface,
*args,
bpf_filter=bpf_filter,
only_summaries=only_summaries,
decryption_key=decryption_key,
encryption_type=encryption_type,
tshark_path=tshark_path,
decode_as=decode_as,
disable_protocol=disable_protocol,
override_prefs=override_prefs,
eventloop=eventloop,
debug=debug,
**kwargs
)