diff --git a/Disk.py b/Disk.py index df9c5c6..2a84c89 100644 --- a/Disk.py +++ b/Disk.py @@ -1,2 +1,78 @@ from Snapshot import Snapshot +import datetime + +class Disk(object): + """ + Keeps snapshots assigned to corresponding disks, + so we can filter by disks and check for disk replica anomalies. + """ + def __init__(self, disk): + """At init we create empty list we can append Snapshot() objects to""" + self.disk_name = disk + self.snapshots = list() + + def add_snapshot(self, snapshot): + """ + After we created Disk() we want to add to it some Snapshot() + This function performs necessary sanity checks to make sure that snapshot is assigned to correct disk + :param snapshot: Snapshot() object we will try to assign to disk + """ + if isinstance(snapshot,Snapshot): + disk = snapshot.get_disk_name() + if snapshot.get_disk_name() == self.disk_name: + self.snapshots.append(snapshot) + else: + raise AssertionError + + def get_latest_snapshot_ctime(self): + """ + We look for the latest taken snapshot, so we can check for failures + :return: Epoch time of creation of the most recent snapshot + """ + latest = max(self.snapshots, key=lambda snapshot: snapshot.get_snapshot_creation_time_epoch()) + assert isinstance(latest,Snapshot) + return latest.get_snapshot_creation_time_epoch() + + def get_snapshot_cumulative_size(self, human_readable = True): + """ + Gets cumulative size of snapshots for the Disk() so we can look if it isn't 0 bytes or too small + :param human_readable: For console app we want to use human-readable size values + :return: cumulative size of snapshots human-readable or if not in bytes + """ + ## TODO Probably should add initial disk size to this or make seperate method + if human_readable: + return Snapshot.sizeof_fmt\ + (sum(snapshot.get_snapshot_used_size(human_readable=False) for snapshot in self.snapshots)) + else: + return sum(snapshot.get_snapshot_used_size(human_readable=False) for snapshot in self.snapshots) + + def check_if_old(self): + """ + We check if we received snapshot from the last day + :return: True if we didn't receive snapshot, False if everything is working fine here. + """ + latest_date = self.get_latest_snapshot_ctime() + now = datetime.datetime.now().timestamp() + acceptable_time = 24*60*60 # 1 day + return True if (now - latest_date > acceptable_time) else False + + def check_if_zero_bytes(self): + ## TODO Check if empty snapshot is for sure bad snapshot. If the last but one isn't empty, it might be ok. + """ + We check if the snapshot is empty. It's probably useless if it is + :return: True if snapshot is zero bytes in size. False, if it's fine. + """ + size = self.get_snapshot_cumulative_size(human_readable=False) + return True if (size == 0) else False + + def check_for_problems(self): + ## TODO Some verbose information what is probably wrong about latest snapshot. + """ + We combine the factors that make snapshot bad. E.g if snapshot isn't empty but it's old it's still bad. + :return: False if everyhing about latest snapshot is fine. True if something went wrong + """ + return True if any([self.check_if_zero_bytes(), self.check_if_old()]) else False + + def __repr__(self): + return self.disk_name + " " + str(self.check_for_problems()) diff --git a/Snapshot.py b/Snapshot.py index e8389f9..5405fa9 100644 --- a/Snapshot.py +++ b/Snapshot.py @@ -2,7 +2,15 @@ import re import datetime class Snapshot(object): + """ + Snapshot class we use to convert zfs list string to proper object we can use to analyze replicas. + """ def __init__(self, snapshot_string): + """ + We compile some regexes at init so we can use them quickly later. + We keep tuples of data returned by zfs list command. We do this because it returns Tab-Seperated Data + :param snapshot_string: string to parse into Snapshot() class object + """ self.snapshot_string = snapshot_string self.re_split_tabs = re.compile(r'\t+') self.re_name = re.compile(r"[^/]+$") @@ -17,36 +25,80 @@ class Snapshot(object): return self.snapshot_string def parse_string_to_table(self): + """ + We split Tab-Seperaed data returned by zfs list to tuples so we can use it without hassle. + :return: Tuple containing name, ctime, used_size, ref_size of snapshot + """ array = re.split(self.re_split_tabs, self.snapshot_string) return tuple(array) def get_snapshot_name(self): + """ + We grab the name only from tuple + :return: Whole name of the snapshot as str() + """ name_str = self.snapshot_tuple[0] name = next(self.re_name.finditer(name_str)) return name.group() def get_snapshot_zvol(self): + """ + We grab the zvol only from tuple + :return: Zvol where replica is being kept - as str() + """ name_str = self.snapshot_tuple[0] zvol = next(self.re_zvol.finditer(name_str)) return zvol.group() def get_snapshot_creation_time(self): + """ + We grab the epoch time from the data in the tuple + :return: UTC Human-Readable Date of creation of the snapshot + """ time_epoch = int(self.snapshot_tuple[1]) return str(datetime.datetime.utcfromtimestamp(time_epoch)) - def get_snapshot_used_size(self): - used_str = self.snapshot_tuple[2] - return self.sizeof_fmt(int(used_str)) + def get_snapshot_creation_time_epoch(self): + """ + We will probably also make use of the epoch time + :return: Epoch time straight from tuple + """ + return int(self.snapshot_tuple[1]) - def get_snapshot_referenced_size(self): + def get_snapshot_used_size(self, human_readable = True): + """ + We grab used size of the snapshot + :param human_readable: For backend we want it in Bytes. But we want the user to quickly see how big snapshot is. + :return: Used Size of the snapshot Human-Readable or not + """ + used_str = self.snapshot_tuple[2] + if human_readable: + return self.sizeof_fmt(int(used_str)) + else: return int(used_str) + + def get_snapshot_referenced_size(self, human_readable = True): + """ + We grab referenced size of the snapshot + :param human_readable: For backend we want it in Bytes. But we want the user to quickly see how big snapshot is. + :return: Referenced Size of the snapshot Human-Readable or not + """ referenced_str = self.snapshot_tuple[3] - return self.sizeof_fmt(int(referenced_str)) + if human_readable: + return self.sizeof_fmt(int(referenced_str)) + else: + return int(referenced_str) def get_disk_name(self): return self.disk_name @staticmethod def sizeof_fmt(num, suffix='B'): + """ + Function taken from stackexchange. Used to make size in Bytes Human-readable + :param num: Bytes value we want to convert + :param suffix: suffix we want to add to SI Prefix like Ki, Gi etc. E.g. B for GiB + :return: Human-Readable size string + """ ## FIXME WOW taken from stack but it's too fucking slow. Probably because of division. Have to profile for unit in ['','Ki','Mi','Gi','Ti','Pi','Ei','Zi']: if abs(num) < 1024.0: diff --git a/zfssm_client.py b/zfssm_client.py index 73887f8..19c0796 100644 --- a/zfssm_client.py +++ b/zfssm_client.py @@ -4,19 +4,29 @@ import Pyro4 from Snapshot import Snapshot import time from zfssmd_worker import DEFAULT_REFRESH_INTERVAL -## FIXME Only debug -import pprint from itertools import product +from Disk import Disk PYRO_URI = "PYRO:058b7dde9ec53de9235cfc57a07ce17a9eabfce3@./u:/run/zfssmd.sock" class ZFSSMDClient(object): + """ + CLient class we use to connect to pyro4 daemon to collect information. + """ def __init__(self, uri=PYRO_URI): + """ + On init we initiate the connection + :param uri: Pyro4 Daemon uri to connect to + """ self.uri = uri self.pyro_conn = Pyro4.Proxy(self.uri) - ## FIXME Check for old timestamp def check_old_timestamp(self, timestamp): + """ + Sanity checking to make sure that the list was refreshed recently. + :param timestamp: Epoch timestamp taken from get_current_list() by self.get_snapshot_list() + :return: False if everything works as intended. + """ target_interval = DEFAULT_REFRESH_INTERVAL * 2 if time.time() - timestamp <= target_interval: return False @@ -24,6 +34,10 @@ class ZFSSMDClient(object): return True def get_snapshot_list(self): + """ + Parses list taken from the daemon and runs timestamp sanity check. + :return: Just snapshot string list + """ response = self.pyro_conn.get_current_list() timestamp = next(iter(response)) if (self.check_old_timestamp(timestamp)): @@ -32,6 +46,10 @@ class ZFSSMDClient(object): return slist def make_snapshot_objects(self): + """ + Creates Snapshot() objects from snapshot string + :return: list of created Snapshot() objects + """ snapshot_object_list = list() for snapshot in self.get_snapshot_list(): snapshot_obj = Snapshot(snapshot) @@ -39,17 +57,29 @@ class ZFSSMDClient(object): return snapshot_object_list def make_disk_objects(self): - disk_object_list = list() - #disk_list = list() - #snapshot_list = self.make_snapshot_objects() - #for snapshot in snapshot_list: - # disk = snapshot.get_disk_name() - # if disk not in disk_list: - # disk_list.append(disk) - #map(lambda (x,y):) + """ + Creates Disk() objects and assigns Snapshot() objects to corresponding Disk(). + Makes sure that there are no duplicate disks and that snapshots are correctly assigned. + :return: list of created Disk() objects + """ + ## TODO avoid this fucking many of loops + snapshots = self.make_snapshot_objects() + disk_list=set() + disk_object_list=list() + for snapshot in snapshots: + disk_list.add(snapshot.disk_name) + for disk in disk_list: + disk_obj = Disk(disk) + disk_object_list.append(disk_obj) + for snapshot in snapshots: + disk_obj.add_snapshot(snapshot) + return disk_object_list def list_snapshots(self): - ## TODO make d + """ + Just a helper function to get all snapshot parameters + :return: dict of snapshots where key is the name of the snapshot and value is a list of snapshot properties. + """ snapshot_dict = dict() for o in self.make_snapshot_objects(): name = o.get_snapshot_name() @@ -59,6 +89,3 @@ class ZFSSMDClient(object): ref = o.get_snapshot_referenced_size() snapshot_dict.update ({name:[zvol,ctime,used,ref]}) return snapshot_dict - -pp = pprint.PrettyPrinter() -pp.pprint(ZFSSMDClient().make_disk_objects()) \ No newline at end of file diff --git a/zfssmd.py b/zfssmd.py index 82de665..c45a090 100644 --- a/zfssmd.py +++ b/zfssmd.py @@ -14,14 +14,27 @@ UNIX_PID=Path('/run/zfssmd.pid') CONN_ID='058b7dde9ec53de9235cfc57a07ce17a9eabfce3' class ZfsSnapshotManagerDaemon(object): - # TODO docstrings class and methods + """Pyro4 Daemon we create to speed up listing of snapshots by caching it in memory + When we start daemon we refresh the list by calling zfs list command. + We keep listening on unix socket for refresh requests or client requests. + """ + snapshots = list() - last_refreshed=0 + last_refreshed=99999999 # We use arbitrarily large number to make sure we are not interfering with ZFSSMDClient.check_old_timestamp() + def __init__(self): + """ + Refreshes snapshot list on daemon launch + :param self.last_refreshed keeps tabs on when was the last cache refresh + """ self.last_refreshed = self.call_zfs_list_snapshots() @Pyro4.expose def call_zfs_list_snapshots(self): + """ + Calls zfs list command and mutates the self.snapshots variable to contain latest list of snapshots. + :return: refresh timestamp + """ command = 'zfs list -Hp -t snapshot -o name,creation,used,referenced -s name' try: p = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) @@ -35,10 +48,19 @@ class ZfsSnapshotManagerDaemon(object): @Pyro4.expose def get_current_list(self): + """ + Getter exposed for client connection + :return: dict with key being last refresh time and value being list of snapshots + """ return {self.last_refreshed:self.snapshots} def check_start_conditions(): + """ + Makes sure daemon can launch by checking for PID and sock files. If they exist it logs warning, deletes temp files, + creates new PID file. At this point if we detect error we kill the daemon immediately. + :return: False as intended + """ if UNIX_PID.is_file(): log.warning("Daemon already running or didn't exit gracefully. Trying to clean up.") try: @@ -66,12 +88,23 @@ def check_start_conditions(): log.debug("Socket not set. Continuing.") def sigterm_handler(_signo, _stack_frame): + """ + Handles SIGINT and SIGTERM. Same action no point in creating two functions. + Deletes PID and sock file on exit + """ + log.warning("Received Termination signal. Cleaning up.") UNIX_SOCK.unlink() UNIX_PID.unlink() raise SystemExit(0) def start_daemon(): + """ + Handles starting all the intended actions with starting this program. + Spawns signal handler, checks start conditions, registers new daemon with URI + and launches scheduled interval refresh thread. + :return: False + """ signal.signal(signal.SIGTERM, sigterm_handler) signal.signal(signal.SIGINT, sigterm_handler) check_start_conditions() diff --git a/zfssmd_worker.py b/zfssmd_worker.py index 5e6bb96..49141f7 100644 --- a/zfssmd_worker.py +++ b/zfssmd_worker.py @@ -4,11 +4,20 @@ import Pyro4 import logging as log DEFAULT_REFRESH_INTERVAL=(5 * 60) +PYRO4_URI="PYRO:058b7dde9ec53de9235cfc57a07ce17a9eabfce3@./u:/run/zfssmd.sock" class ZFSSMDaemonRefresh(object): - ## TODO Docstrings - def __init__(self, interval=DEFAULT_REFRESH_INTERVAL): + """ + Spawns as thread to the daemon. Requests snapshot list refresh on interval. + """ + def __init__(self, interval=DEFAULT_REFRESH_INTERVAL, uri=PYRO4_URI): + """ + starts the thread + :param interval: interval to perform a request to daemon + :param uri: uri returned by Pyro4 daemon on launch + """ self.interval = interval + self.uri = uri thread = threading.Thread(target=self.run, args=()) thread.daemon = True @@ -17,8 +26,8 @@ class ZFSSMDaemonRefresh(object): def run(self): while True: ## FIXME Rework it as root process cursor if it's even possible to avoid RPC. For now it's not bad. - uri = "PYRO:058b7dde9ec53de9235cfc57a07ce17a9eabfce3@./u:/run/zfssmd.sock" - zfssmd_connection = Pyro4.Proxy(uri) + _uri = self.uri + zfssmd_connection = Pyro4.Proxy(_uri) last_ref = zfssmd_connection.call_zfs_list_snapshots() log.debug("Refreshed list " + str(last_ref)) time.sleep(self.interval) \ No newline at end of file