Adjustments to the new confluence plugin

Fix infinite fetching of the same batches Move query implementation out of the investigator Handle page/comment object initialization in init Plus a couple of style adjustments and cleanup
psss · Sep 30, 2019 · c7c2c69 · c7c2c69
1 parent 7eaec9b
commit c7c2c69
Showing 1 changed file with 51 additions and 91 deletions.
diff --git a/did/plugins/confluence.py b/did/plugins/confluence.py
@@ -5,7 +5,7 @@
 Configuration example (GSS authentication)::
 
     [confluence]
-    type =  confluence
+    type = confluence
     url = https://docs.jboss.org/
 
 Configuration example (basic authentication)::
@@ -23,7 +23,7 @@
   SSL verification (default: true)
 * ``auth_url`` parameter is optional. If not provided,
   ``url + "/step-auth-gss"`` will be used for authentication.
-* ``auth_type`` parameter is optional, default value is 'gss'.
+* ``auth_type`` parameter is optional, default value is ``gss``.
 * ``auth_username`` and ``auth_password`` are only valid for
   basic authentication.
 """
@@ -42,7 +42,7 @@
 from did.stats import Stats, StatsGroup
 
 # Maximum number of results fetched at once
-MAX_RESULTS = 1000
+MAX_RESULTS = 100
 
 # Maximum number of batches
 MAX_BATCHES = 100
@@ -62,81 +62,59 @@ class Confluence(object):
     """ Confluence investigator """
 
     @staticmethod
-    def search(query, content_type, stats):
+    def search(query, stats, expand=None):
         """ Perform page/comment search for given stats instance """
         log.debug("Search query: {0}".format(query))
         content = []
-        expand = None
-
-        if content_type == ConfluenceComment:
-            expand = "body.editor"
-            query = query + " AND type=comment"
-        elif content_type == ConfluencePage:
-            query = query + " AND type=page"
 
         # Fetch data from the server in batches of MAX_RESULTS issues
         for batch in range(MAX_BATCHES):
             response = stats.parent.session.get(
                 "{0}/rest/api/content/search?{1}".format(
-                    stats.parent.url,
-                    urllib.urlencode(
-                        {
-                            "cql": query,
-                            "limit": MAX_RESULTS,
-                            "expand": expand,
-                            "startAt": batch * MAX_RESULTS,
-                        }
-                    ),
-                )
-            )
+                    stats.parent.url, urllib.urlencode({
+                        "cql": query,
+                        "limit": MAX_RESULTS,
+                        "expand": expand,
+                        "start": batch * MAX_RESULTS})))
             data = response.json()
             log.debug(
                 "Batch {0} result: {1} fetched".format(
-                    batch, listed(data["results"], "title")
-                )
-            )
+                    batch, listed(data["results"], "object")))
             log.data(pretty(data))
             content.extend(data["results"])
             # If all issues fetched, we're done
-            if len(data) >= data["size"]:
+            if data['_links'].get('next') is None:
                 break
-        ret_data = []
-        for c in content:
-            if content_type == ConfluenceComment:
-                ret_data.append(
-                    ConfluenceComment(c["body"]["editor"]["value"], c["title"])
-                )
-            elif content_type == ConfluencePage:
-                ret_data.append(ConfluencePage(c["title"]))
-        return ret_data
+        return content
 
 
 class ConfluencePage(Confluence):
     """ Confluence page results """
 
-    def __init__(self, title=None):
-        """ Initialize issue """
-        self.title = title
+    def __init__(self, page):
+        """ Initialize the page """
+        self.title = page['title']
 
     def __unicode__(self):
-        """  Confluence title for displaying """
+        """ Page title for displaying """
         return "{}".format(self.title)
 
 
 class ConfluenceComment(Confluence):
     """ Confluence comment results """
 
-    def __init__(self, body=None, title=None):
+    def __init__(self, comment):
         """ Initialize issue """
-        self.title = title
-        self.body = body
+        # Remove the 'Re:' prefix
+        self.title = re.sub('^Re: ', '', comment['title'])
+        self.body = comment['body']['editor']['value']
+        # Remove html tags
+        self.body = re.sub('</p><p>', ' ', self.body)
+        self.body = re.sub('<[^<]+?>', '', self.body)
 
     def __unicode__(self):
         """ Confluence title & comment snippet for displaying """
-        # remove "Re: " and html tags
-        return "{}: {}".format(
-            self.title[3:], re.sub("<[^<]+?>", "", self.body)
-        )
+        return "{}: {}".format(self.title, self.body)
 
 
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -150,24 +128,23 @@ class PageCreated(Stats):
     def fetch(self):
         log.info("Searching for pages created by {0}".format(self.user))
         query = (
-            "creator = '{0}' AND type=page "
-            "AND created >= {1} AND created <= {2}".format(
-                self.parent.login, self.options.since, self.options.until
-            )
-        )
-        self.stats = Confluence.search(query, ConfluencePage, self)
+            "type=page AND creator = '{0}' "
+            "AND created >= {1} AND created < {2}".format(
+                self.user.login, self.options.since, self.options.until))
+        self.stats = [
+            ConfluencePage(page) for page in Confluence.search(query, self)]
 
 
 class CommentAdded(Stats):
     def fetch(self):
         log.info("Searching for comments added by {0}".format(self.user))
         query = (
-            "creator = '{0}' AND type=comment "
-            "AND created >= {1} AND created <= {2}".format(
-                self.parent.login, self.options.since, self.options.until
-            )
-        )
-        self.stats = Confluence.search(query, ConfluenceComment, self)
+            "type=comment AND creator = '{0}' "
+            "AND created >= {1} AND created < {2}".format(
+                self.user.login, self.options.since, self.options.until))
+        self.stats = [
+            ConfluenceComment(comment) for comment in Confluence.search(
+                query, self, expand="body.editor")]
 
 
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -188,8 +165,7 @@ def __init__(self, option, name=None, parent=None, user=None):
         config = dict(Config().section(option))
         if "url" not in config:
             raise ReportError(
-                "No Confluence url set in the [{0}] section".format(option)
-            )
+                "No Confluence url set in the [{0}] section".format(option))
         self.url = config["url"].rstrip("/")
         # Optional authentication url
         if "auth_url" in config:
@@ -201,9 +177,7 @@ def __init__(self, option, name=None, parent=None, user=None):
             if config["auth_type"] not in AUTH_TYPES:
                 raise ReportError(
                     "Unsupported authentication type: {0}".format(
-                        config["auth_type"]
-                    )
-                )
+                        config["auth_type"]))
             self.auth_type = config["auth_type"]
         else:
             self.auth_type = "gss"
@@ -212,38 +186,30 @@ def __init__(self, option, name=None, parent=None, user=None):
             if "auth_username" not in config:
                 raise ReportError(
                     "`auth_username` not set in the [{0}] section".format(
-                        option
-                    )
-                )
+                        option))
             self.auth_username = config["auth_username"]
             if "auth_password" not in config:
                 raise ReportError(
                     "`auth_password` not set in the [{0}] section".format(
-                        option
-                    )
-                )
+                        option))
             self.auth_password = config["auth_password"]
         else:
             if "auth_username" in config:
                 raise ReportError(
                     "`auth_username` is only valid for basic authentication"
-                    + " (section [{0}])".format(option)
-                )
+                    + " (section [{0}])".format(option))
             if "auth_password" in config:
                 raise ReportError(
                     "`auth_password` is only valid for basic authentication"
-                    + " (section [{0}])".format(option)
-                )
+                    + " (section [{0}])".format(option))
         # SSL verification
         if "ssl_verify" in config:
             try:
                 self.ssl_verify = distutils.util.strtobool(
-                    config["ssl_verify"]
-                )
+                    config["ssl_verify"])
             except Exception as error:
                 raise ReportError(
-                    "Error when parsing 'ssl_verify': {0}".format(error)
-                )
+                    "Error when parsing 'ssl_verify': {0}".format(error))
         else:
             self.ssl_verify = SSL_VERIFY
 
@@ -253,15 +219,13 @@ def __init__(self, option, name=None, parent=None, user=None):
         # Create the list of stats
         self.stats = [
             PageCreated(
-                option=option + "-created",
+                option=option + "-pages",
                 parent=self,
-                name="Confluence pages created in {}".format(option),
-            ),
+                name="Pages created in {}".format(option)),
             CommentAdded(
-                option=option + "-comment-added",
+                option=option + "-comments",
                 parent=self,
-                name="Confluence comments added in {}".format(option),
-            ),
+                name="Comments added in {}".format(option)),
         ]
 
     @property
@@ -273,23 +237,19 @@ def session(self):
             # Disable SSL warning when ssl_verify is False
             if not self.ssl_verify:
                 requests.packages.urllib3.disable_warnings(
-                    InsecureRequestWarning
-                )
+                    InsecureRequestWarning)
             if self.auth_type == "basic":
                 basic_auth = (self.auth_username, self.auth_password)
                 response = self._session.get(
-                    self.auth_url, auth=basic_auth, verify=self.ssl_verify
-                )
+                    self.auth_url, auth=basic_auth, verify=self.ssl_verify)
             else:
                 gssapi_auth = HTTPSPNEGOAuth(mutual_authentication=DISABLED)
                 response = self._session.get(
-                    self.auth_url, auth=gssapi_auth, verify=self.ssl_verify
-                )
+                    self.auth_url, auth=gssapi_auth, verify=self.ssl_verify)
             try:
                 response.raise_for_status()
             except requests.exceptions.HTTPError as error:
                 log.error(error)
                 raise ReportError(
-                    "Confluence authentication failed. Try kinit."
-                )
+                    "Confluence authentication failed. Try kinit.")
         return self._session