create disk class, added docstrings

2019-10-04 12:28:19 +02:00 · 2019-10-04 12:28:19 +02:00 · f80d8d0fe2
parent ec94aa9e25
commit f80d8d0fe2
5 changed files with 223 additions and 26 deletions
--- a/Disk.py
+++ b/Disk.py
@ -1,2 +1,78 @@
 from Snapshot import Snapshot
+import datetime
+
+class Disk(object):
+    """
+    Keeps snapshots assigned to corresponding disks,
+    so we can filter by disks and check for disk replica anomalies.
+    """
+    def __init__(self, disk):
+        """At init we create empty list we can append Snapshot() objects to"""
+        self.disk_name = disk
+        self.snapshots = list()
+
+    def add_snapshot(self, snapshot):
+        """
+        After we created Disk() we want to add to it some Snapshot()
+        This function performs necessary sanity checks to make sure that snapshot is assigned to correct disk
+        :param snapshot: Snapshot() object we will try to assign to disk
+        """
+        if isinstance(snapshot,Snapshot):
+            disk = snapshot.get_disk_name()
+            if snapshot.get_disk_name() == self.disk_name:
+                self.snapshots.append(snapshot)
+        else:
+            raise AssertionError
+
+    def get_latest_snapshot_ctime(self):
+        """
+        We look for the latest taken snapshot, so we can check for failures
+        :return: Epoch time of creation of the most recent snapshot
+        """
+        latest = max(self.snapshots, key=lambda snapshot: snapshot.get_snapshot_creation_time_epoch())
+        assert isinstance(latest,Snapshot)
+        return latest.get_snapshot_creation_time_epoch()
+
+    def get_snapshot_cumulative_size(self, human_readable = True):
+        """
+        Gets cumulative size of snapshots for the Disk() so we can look if it isn't 0 bytes or too small
+        :param human_readable: For console app we want to use human-readable size values
+        :return: cumulative size of snapshots human-readable or if not in bytes
+        """
+        ## TODO Probably should add initial disk size to this or make seperate method
+        if human_readable:
+            return Snapshot.sizeof_fmt\
+                (sum(snapshot.get_snapshot_used_size(human_readable=False) for snapshot in self.snapshots))
+        else:
+            return sum(snapshot.get_snapshot_used_size(human_readable=False) for snapshot in self.snapshots)
+
+    def check_if_old(self):
+        """
+        We check if we received snapshot from the last day
+        :return: True if we didn't receive snapshot, False if everything is working fine here.
+        """
+        latest_date = self.get_latest_snapshot_ctime()
+        now = datetime.datetime.now().timestamp()
+        acceptable_time = 24*60*60 # 1 day
+        return True if (now - latest_date > acceptable_time) else False
+
+    def check_if_zero_bytes(self):
+        ## TODO Check if empty snapshot is for sure bad snapshot. If the last but one isn't empty, it might be ok.
+        """
+        We check if the snapshot is empty. It's probably useless if it is
+        :return: True if snapshot is zero bytes in size. False, if it's fine.
+        """
+        size = self.get_snapshot_cumulative_size(human_readable=False)
+        return True if (size == 0) else False
+
+    def check_for_problems(self):
+        ## TODO Some verbose information what is probably wrong about latest snapshot.
+        """
+        We combine the factors that make snapshot bad. E.g if snapshot isn't empty but it's old it's still bad.
+        :return: False if everyhing about latest snapshot is fine. True if something went wrong
+        """
+        return True if any([self.check_if_zero_bytes(), self.check_if_old()]) else False
+
+    def __repr__(self):
+        return self.disk_name + " " + str(self.check_for_problems())

--- a/Snapshot.py
+++ b/Snapshot.py
@ -2,7 +2,15 @@ import re
 import datetime

 class Snapshot(object):
+    """
+    Snapshot class we use to convert zfs list string to proper object we can use to analyze replicas.
+    """
    def __init__(self, snapshot_string):
+        """
+        We compile some regexes at init so we can use them quickly later.
+        We keep tuples of data returned by zfs list command. We do this because it returns Tab-Seperated Data
+        :param snapshot_string: string to parse into Snapshot() class object
+        """
        self.snapshot_string = snapshot_string
        self.re_split_tabs = re.compile(r'\t+')
        self.re_name = re.compile(r"[^/]+$")
@ -17,36 +25,80 @@ class Snapshot(object):
        return self.snapshot_string

    def parse_string_to_table(self):
+        """
+        We split Tab-Seperaed data returned by zfs list to tuples so we can use it without hassle.
+        :return: Tuple containing name, ctime, used_size, ref_size of snapshot
+        """
        array = re.split(self.re_split_tabs, self.snapshot_string)
        return tuple(array)

    def get_snapshot_name(self):
+        """
+        We grab the name only from tuple
+        :return: Whole name of the snapshot as str()
+        """
        name_str = self.snapshot_tuple[0]
        name = next(self.re_name.finditer(name_str))
        return name.group()

    def get_snapshot_zvol(self):
+        """
+        We grab the zvol only from tuple
+        :return: Zvol where replica is being kept - as str()
+        """
        name_str = self.snapshot_tuple[0]
        zvol = next(self.re_zvol.finditer(name_str))
        return zvol.group()

    def get_snapshot_creation_time(self):
+        """
+        We grab the epoch time from the data in the tuple
+        :return: UTC Human-Readable Date of creation of the snapshot
+        """
        time_epoch = int(self.snapshot_tuple[1])
        return str(datetime.datetime.utcfromtimestamp(time_epoch))

-    def get_snapshot_used_size(self):
-        used_str = self.snapshot_tuple[2]
-        return self.sizeof_fmt(int(used_str))
+    def get_snapshot_creation_time_epoch(self):
+        """
+        We will probably also make use of the epoch time
+        :return: Epoch time straight from tuple
+        """
+        return int(self.snapshot_tuple[1])

-    def get_snapshot_referenced_size(self):
+    def get_snapshot_used_size(self, human_readable = True):
+        """
+        We grab used size of the snapshot
+        :param human_readable: For backend we want it in Bytes. But we want the user to quickly see how big snapshot is.
+        :return: Used Size of the snapshot Human-Readable or not
+        """
+        used_str = self.snapshot_tuple[2]
+        if human_readable:
+            return self.sizeof_fmt(int(used_str))
+        else: return int(used_str)
+
+    def get_snapshot_referenced_size(self, human_readable = True):
+        """
+         We grab referenced size of the snapshot
+         :param human_readable: For backend we want it in Bytes. But we want the user to quickly see how big snapshot is.
+         :return: Referenced Size of the snapshot Human-Readable or not
+         """
        referenced_str = self.snapshot_tuple[3]
-        return self.sizeof_fmt(int(referenced_str))
+        if human_readable:
+            return self.sizeof_fmt(int(referenced_str))
+        else:
+            return int(referenced_str)

    def get_disk_name(self):
        return self.disk_name

    @staticmethod
    def sizeof_fmt(num, suffix='B'):
+        """
+        Function taken from stackexchange. Used to make size in Bytes Human-readable
+        :param num: Bytes value we want to convert
+        :param suffix: suffix we want to add to SI Prefix like Ki, Gi etc. E.g. B for GiB
+        :return: Human-Readable size string
+        """
    ## FIXME WOW taken from stack but it's too fucking slow. Probably because of division. Have to profile
        for unit in ['','Ki','Mi','Gi','Ti','Pi','Ei','Zi']:
            if abs(num) < 1024.0:
--- a/zfssm_client.py
+++ b/zfssm_client.py
@ -4,19 +4,29 @@ import Pyro4
 from Snapshot import Snapshot
 import time
 from zfssmd_worker import DEFAULT_REFRESH_INTERVAL
-## FIXME Only debug
-import pprint
 from itertools import product
+from Disk import Disk

 PYRO_URI = "PYRO:058b7dde9ec53de9235cfc57a07ce17a9eabfce3@./u:/run/zfssmd.sock"

 class ZFSSMDClient(object):
+    """
+    CLient class we use to connect to pyro4 daemon to collect information.
+    """
    def __init__(self, uri=PYRO_URI):
+        """
+        On init we initiate the connection
+        :param uri: Pyro4 Daemon uri to connect to
+        """
        self.uri = uri
        self.pyro_conn = Pyro4.Proxy(self.uri)

-    ## FIXME Check for old timestamp
    def check_old_timestamp(self, timestamp):
+        """
+        Sanity checking to make sure that the list was refreshed recently.
+        :param timestamp: Epoch timestamp taken from get_current_list() by self.get_snapshot_list()
+        :return: False if everything works as intended.
+        """
        target_interval = DEFAULT_REFRESH_INTERVAL * 2
        if time.time() - timestamp <= target_interval:
            return False
@ -24,6 +34,10 @@ class ZFSSMDClient(object):
            return True

    def get_snapshot_list(self):
+        """
+        Parses list taken from the daemon and runs timestamp sanity check.
+        :return: Just snapshot string list
+        """
        response = self.pyro_conn.get_current_list()
        timestamp = next(iter(response))
        if (self.check_old_timestamp(timestamp)):
@ -32,6 +46,10 @@ class ZFSSMDClient(object):
        return slist

    def make_snapshot_objects(self):
+        """
+        Creates Snapshot() objects from snapshot string
+        :return: list of created Snapshot() objects
+        """
        snapshot_object_list = list()
        for snapshot in self.get_snapshot_list():
            snapshot_obj = Snapshot(snapshot)
@ -39,17 +57,29 @@ class ZFSSMDClient(object):
        return snapshot_object_list

    def make_disk_objects(self):
-        disk_object_list = list()
-        #disk_list = list()
-        #snapshot_list = self.make_snapshot_objects()
-        #for snapshot in snapshot_list:
-        #    disk = snapshot.get_disk_name()
-        #    if disk not in disk_list:
-        #        disk_list.append(disk)
-        #map(lambda (x,y):)
+        """
+        Creates Disk() objects and assigns Snapshot() objects to corresponding Disk().
+        Makes sure that there are no duplicate disks and that snapshots are correctly assigned.
+        :return: list of created Disk() objects
+        """
+        ## TODO avoid this fucking many of loops
+        snapshots = self.make_snapshot_objects()
+        disk_list=set()
+        disk_object_list=list()
+        for snapshot in snapshots:
+            disk_list.add(snapshot.disk_name)
+        for disk in disk_list:
+            disk_obj = Disk(disk)
+            disk_object_list.append(disk_obj)
+            for snapshot in snapshots:
+                disk_obj.add_snapshot(snapshot)
+        return disk_object_list

    def list_snapshots(self):
-        ## TODO make d
+        """
+        Just a helper function to get all snapshot parameters
+        :return: dict of snapshots where key is the name of the snapshot and value is a list of snapshot properties.
+        """
        snapshot_dict = dict()
        for o in self.make_snapshot_objects():
            name = o.get_snapshot_name()
@ -59,6 +89,3 @@ class ZFSSMDClient(object):
            ref = o.get_snapshot_referenced_size()
            snapshot_dict.update ({name:[zvol,ctime,used,ref]})
        return snapshot_dict
-
-pp = pprint.PrettyPrinter()
-pp.pprint(ZFSSMDClient().make_disk_objects())
--- a/zfssmd.py
+++ b/zfssmd.py
@ -14,14 +14,27 @@ UNIX_PID=Path('/run/zfssmd.pid')
 CONN_ID='058b7dde9ec53de9235cfc57a07ce17a9eabfce3'

 class ZfsSnapshotManagerDaemon(object):
-    # TODO docstrings class and methods
+    """Pyro4 Daemon we create to speed up listing of snapshots by caching it in memory
+    When we start daemon we refresh the list by calling zfs list command.
+    We keep listening on unix socket for refresh requests or client requests.
+    """
+
    snapshots = list()
-    last_refreshed=0
+    last_refreshed=99999999 # We use arbitrarily large number to make sure we are not interfering with ZFSSMDClient.check_old_timestamp()
+
    def __init__(self):
+        """
+        Refreshes snapshot list on daemon launch
+        :param self.last_refreshed keeps tabs on when was the last cache refresh
+        """
        self.last_refreshed = self.call_zfs_list_snapshots()

    @Pyro4.expose
    def call_zfs_list_snapshots(self):
+        """
+        Calls zfs list command and mutates the self.snapshots variable to contain latest list of snapshots.
+        :return: refresh timestamp
+        """
        command = 'zfs list -Hp -t snapshot -o name,creation,used,referenced -s name'
        try:
            p = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
@ -35,10 +48,19 @@ class ZfsSnapshotManagerDaemon(object):

    @Pyro4.expose
    def get_current_list(self):
+        """
+        Getter exposed for client connection
+        :return: dict with key being last refresh time and value being list of snapshots
+        """
        return {self.last_refreshed:self.snapshots}


 def check_start_conditions():
+    """
+    Makes sure daemon can launch by checking for PID and sock files. If they exist it logs warning, deletes temp files,
+    creates new PID file. At this point if we detect error we kill the daemon immediately.
+    :return: False as intended
+    """
    if UNIX_PID.is_file():
        log.warning("Daemon already running or didn't exit gracefully. Trying to clean up.")
        try:
@ -66,12 +88,23 @@ def check_start_conditions():
        log.debug("Socket not set. Continuing.")

 def sigterm_handler(_signo, _stack_frame):
+    """
+    Handles SIGINT and SIGTERM. Same action no point in creating two functions.
+    Deletes PID and sock file on exit
+    """
+
    log.warning("Received Termination signal. Cleaning up.")
    UNIX_SOCK.unlink()
    UNIX_PID.unlink()
    raise SystemExit(0)

 def start_daemon():
+    """
+    Handles starting all the intended actions with starting this program.
+    Spawns signal handler, checks start conditions, registers new daemon with URI
+    and launches scheduled interval refresh thread.
+    :return: False
+    """
    signal.signal(signal.SIGTERM, sigterm_handler)
    signal.signal(signal.SIGINT, sigterm_handler)
    check_start_conditions()
--- a/zfssmd_worker.py
+++ b/zfssmd_worker.py
@ -4,11 +4,20 @@ import Pyro4
 import logging as log

 DEFAULT_REFRESH_INTERVAL=(5 * 60)
+PYRO4_URI="PYRO:058b7dde9ec53de9235cfc57a07ce17a9eabfce3@./u:/run/zfssmd.sock"

 class ZFSSMDaemonRefresh(object):
-    ## TODO Docstrings
-    def __init__(self, interval=DEFAULT_REFRESH_INTERVAL):
+    """
+    Spawns as thread to the daemon. Requests snapshot list refresh on interval.
+    """
+    def __init__(self, interval=DEFAULT_REFRESH_INTERVAL, uri=PYRO4_URI):
+        """
+        starts the thread
+        :param interval: interval to perform a request to daemon
+        :param uri: uri returned by Pyro4 daemon on launch
+        """
        self.interval = interval
+        self.uri = uri

        thread = threading.Thread(target=self.run, args=())
        thread.daemon = True
@ -17,8 +26,8 @@ class ZFSSMDaemonRefresh(object):
    def run(self):
        while True:
            ## FIXME Rework it as root process cursor if it's even possible to avoid RPC. For now it's not bad.
-            uri = "PYRO:058b7dde9ec53de9235cfc57a07ce17a9eabfce3@./u:/run/zfssmd.sock"
-            zfssmd_connection = Pyro4.Proxy(uri)
+            _uri = self.uri
+            zfssmd_connection = Pyro4.Proxy(_uri)
            last_ref = zfssmd_connection.call_zfs_list_snapshots()
            log.debug("Refreshed list " + str(last_ref))
            time.sleep(self.interval)