summaryrefslogtreecommitdiffstats
path: root/stats/stats-bodhi.py
blob: 8f0665daf51d21395770f3f1833f59f71bfcd7de (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Query Bodhi on Fedora updates (https://admin.fedoraproject.org/updates/)
for updates, retrieve statistics about user comments.

@author: Lukas Brabec <lbrabec@redhat.com>
"""

from fedora.client import BodhiClient
from fedora.accounts import fas2
import math
import threading as t
import operator
import cgi
import re
import sys
import datetime
from optparse import OptionParser
from getpass import getpass


class BodhiClientNG(BodhiClient):
    """
    BodhiClient has support for time constraints, but method query
    doesn't take such parameters.

    This class inhertites from BodhiClient and overrides query method.
    """
    def query(self, release=None, status=None, type_=None, bugs=None,
              request=None, mine=None, package=None, username=None, limit=10,
              start_date=None, end_date=None, page=None):
        """ Query bodhi for a list of updates.

        :kwarg release: The release that you wish to query updates for.
        :kwarg status: The update status (``pending``, ``testing``, ``stable``,
            ``obsolete``)
        :kwarg type_: The type of this update: ``security``, ``bugfix``,
            ``enhancement``, and ``newpackage``.
        :kwarg bugs: A list of Red Hat Bugzilla ID's
        :kwarg request: An update request to query for
            ``testing``, ``stable``, ``unpush``, ``obsolete`` or None.
        :kwarg mine: If True, only query the users updates.  Default: False.
        :kwarg package: A package name or a name-version-release.
        :kwarg limit: The maximum number of updates to display.  Default: 10.
        """
        params = {
            'updates_tgp_limit': limit,
            'username': username,
            'release': release,
            'package': package,
            'request': request,
            'status': status,
            'type_': type_,
            'bugs': bugs,
            'mine': mine,
            'start_date': start_date,
            'end_date': end_date
        }
        if page:
            params['updates_tgp_no'] = page
        auth = False
        if params['mine']:
            auth = True
        for key, value in params.items():
            if not value:
                del params[key]
        return self.send_request('list', req_params=params, auth=auth)


RELEASE = None
START_DATE = None
END_DATE = None
# Updates queried per thread; lower values hammer the server more (more requests),
# but the retrieval is faster overall
LIMIT = 50
THRESHOLD = None
USERNAME = None
# Number of simultaneous requests
THREADS = 10
# FAS account system for human names
FAS = None


def parse_args():
    parser = OptionParser()
    parser.add_option('-r', '--release',
        help="Fedora release to query; examples: "
             "'F19','F18','EL-6','EL-5' (default: all)")
    parser.add_option('-s', '--startdate',
        help="Start date, format 'YYYY-MM-DD' (default: no constraint)")
    parser.add_option('-e', '--enddate',
        help="End date, format 'YYYY-MM-DD' (default: today)")
    parser.add_option('-t', '--threshold', type='int', default=1,
        help="Users with lower score are omitted from the results "
             "(default: %default)")
    parser.add_option('-u', '--username',
        help="FAS username (will prompt for password). If you provide this, "
             "the results will also contain people real names, not just FAS "
             "login names.")

    options, args = parser.parse_args()

    global RELEASE, START_DATE, END_DATE, THRESHOLD, USERNAME
    RELEASE = options.release
    START_DATE = options.startdate + " 00:00:00" if options.startdate else None
    END_DATE = options.enddate + " 23:59:59" if options.enddate else None
    THRESHOLD = options.threshold
    USERNAME = options.username or None
    return options


def create_name(username):
    if not FAS:
        return username, None

    fas_person = FAS.person_by_username(username)

    if fas_person == {}:
        return username, ("@redhat.com" in username)
    elif not fas_person.human_name:
        email = fas_person.email
        return username, ("@redhat.com" in email or "@redhat.com" in username)
    elif username == fas_person.human_name:
        email = fas_person.email
        return username, ("@redhat.com" in email or "@redhat.com" in username)
    else:
        email = fas_person.email
        return fas_person.human_name + u" ("+username+u")", \
               ("@redhat.com" in email or "@redhat.com" in username)

class BodhiStats():
    """
    Class for stats retrieval
    """
    def __init__(self, start_date=None, end_date=None, release=None, limit=50):
        # returned list of updates
        self.updates = []
        # bodhi client for communication with server
        self.client = BodhiClientNG()
        # dictionary with statistics: user -> number of commented updates
        self.people = {}
        # list of bots, not included in stats
        self.bots = [u'bodhi', u'autoqa']
        # stats retrieval is in threads, lock for mutual exclusion
        self.mutex = t.Lock()
        # params from command line
        self.start_date = start_date
        self.end_date = end_date
        self.release = release
        self.limit = limit
        # quickly get number of updates
        self.cnt = int(self.client.query(start_date=START_DATE,
                                         end_date=END_DATE,
                                         release=RELEASE).num_items) + 1
        # authors over threshold
        self.authors = []
        # authors under threshold
        self.others = []
        # nickname -> realname (if provided, otherwise nickname)
        self.real_names = {}
        # list of non-redhatters
        self.nonrh = []

    def get_updates(self, page):
        global threads
        global done
        ret = self.client.query(
            limit=self.limit,
            start_date=self.start_date,
            end_date=self.end_date,
            release=self.release,
            page=page)
        with self.mutex:
            self.updates += ret.updates
            done += 1
            sys.stderr.write("\rReceiving updates: %3.0f%%      "
                             % (100.0*done/len(threads)))

    def parse_updates(self, threshold):
        start_date = datetime.date.today()
        end_date = datetime.date(1, 1, 1)
        for item in self.updates:
            # set of authors of comments for each update
            authors = set()
            for comment in item.comments:
                date = datetime.datetime.strptime(comment.timestamp, "%Y-%m-%d %H:%M:%S").date()
                start_date = date if date < start_date else start_date
                end_date = date if date > end_date else end_date
                if item.submitter != comment.author and \
                   not comment.author in self.bots and  \
                   not comment.author in authors:
                    self.people[comment.author] = self.people.get(comment.author, 0) + 1
                    authors.add(comment.author)

        self.authors = sorted(self.people.iteritems(), key=operator.itemgetter(1), reverse=True)
        self.threshold = threshold
        self.others = [a for a in self.authors if a[1] < threshold]
        self.authors = [a for a in self.authors if a[1] >= threshold]

        if not self.start_date:
            self.start_date = str(start_date)
        if not self.end_date:
            self.end_date = str(end_date)


    def get_FAS_names(self):
        if FAS:
            sys.stderr.write("Asking FAS for real names...\n")

        for author in self.authors:
            name, red_hatter = create_name(author[0])
            name = name.replace('@', ' at ')
            self.real_names[author[0]] = name
            if not red_hatter:
                self.nonrh.append(name)


    def get_HTML(self):
        template = u'''\
<!DOCTYPE HTML>
<html>
<head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/></head>
<body>
  <p>Test period: <b>[FIXME]</b> (%(start_date)s - %(end_date)s)<br/> \
     Testers: <b>%(testers)d</b><br/> \
     Comments<a style="text-decoration:none" \
         href="#comments"><sup>1</sup></a>: <b>%(comments)d</b> \
  </p>
  <table>
    <tr style="text-align:center">
      <th style="vertical-align:middle">Name</th>
      <th style="vertical-align:middle">Updates commented</th>
    </tr>
    %(lines)s
    <tr id="threshold">
      <td colspan="2"><i>...and also %(others)d other reporters who \
      created less than %(threshold)d reports each, but %(other_comments)d \
      reports combined!</i></td>
    </tr>
  </table>
  <p style="color:#808080;font-size:smaller"> \
  <sup id="comments">1</sup> If a person provides multiple comments to a single \
  update, it is considered as a single comment. Karma value is not taken into account. \
  </p>
</body>
</html>
'''
        line_str = u'<tr><td>'\
                   u'<a href="https://fedoraproject.org/wiki/User:%s">'\
                   u'%s'\
                   u'</a></td><td align="center">%d</td></tr>'
        create_line = lambda author: line_str % (cgi.escape(author[0], quote=True),
                                                 self.real_names[author[0]],
                                                 author[1])

        lines = u'\n'.join([create_line(a) for a in self.authors])

        output = template % {
            'start_date': self.start_date.split()[0] if self.start_date else self.start_date,
            'end_date': self.end_date.split()[0] if self.end_date else self.end_date,
            'testers': len(self.authors+self.others),
            'comments': sum([a[1] for a in self.others+self.authors]),
            'lines': lines,
            'others': len(self.others),
            'threshold': self.threshold,
            'other_comments': sum([o[1] for o in self.others])
        }

        # remove unnecessary lines
        if THRESHOLD <= 1:
            output = re.sub(u'<tr id="threshold">.*?</tr>', u'', output, flags=re.DOTALL)

        sys.stderr.write("List of contributors that *might* not be Red Hatters (don't "
        "have a @redhat.com email address): %s\n" % ", ".join(self.nonrh))

        return output

# Retrieve stats by small parts simultaneously is much faster
# than huge query at once
threads = []
done = 0


def main():
    parse_args()

    if USERNAME:
        global FAS
        password = getpass(prompt='FAS password: ')
        sys.stderr.write('Verifying FAS credentials...\n')
        FAS = fas2.AccountSystem(username=USERNAME, password=password)
        if not FAS.verify_password(USERNAME, password):
            sys.stderr.write('FAS login credentials not valid!\n')
            sys.exit(1)
    else:
        sys.stderr.write('FAS enquiry not requested, the output will not '
            'contain people real names, just login names.\n')

    sys.stderr.write('Finding out the total number of updates...\n')
    stats = BodhiStats(start_date=START_DATE,
                       end_date=END_DATE,
                       release=RELEASE,
                       limit=LIMIT)

    global threads
    requests = int(math.ceil(stats.cnt/stats.limit))
    sys.stderr.write("There will be %d requests in total...\n" % requests)
    for page in range(1, requests + 1):
        threads.append(t.Thread(target=stats.get_updates, args=(page,)))

    thread_part = []
    for thread in threads:
        thread_part.append(thread)
        thread.start()
        if len(thread_part) % THREADS == 0:
            sys.stderr.write("Sending %d requests...\n" % len(thread_part))
            [thread.join() for thread in thread_part]
            thread_part = []
            sys.stderr.write("\n")

    sys.stderr.write("Sending %d requests...\n" % len(thread_part))
    [thread.join() for thread in thread_part]
    sys.stderr.write("\n")

    sys.stderr.write("Parsing...\n")
    stats.parse_updates(THRESHOLD)

    stats.get_FAS_names()

    sys.stderr.write("Generating output...\n")
    print stats.get_HTML().encode('utf-8')

    sys.stderr.write("Done.\n")


if __name__ == '__main__':
    main()