summaryrefslogtreecommitdiffstats
path: root/stats/stats-wiki.py
blob: 12d3d52236778107db31e598869bf28844b59f26 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Query Fedora wiki for release validation matrices and retrieve statistics about
user contribution (number of executed test cases).

Currently it queries for base, desktop and installation validation matrices:
http://fedoraproject.org/wiki/Category:Release_validation

@author Kamil Páral <kparal@redhat.com>
@licence GNU AGPL 3+ <http://www.gnu.org/licenses/agpl-3.0.html>
"""

import optparse
import urllib
import sys
import json
import re
import logging
import cgi

API_URL = 'https://fedoraproject.org/w/api.php?'
BASE_NAME = 'Category:Base validation testing'
DESKTOP_NAME = 'Category:Desktop validation testing'
INSTALLATION_NAME = 'Category:Installation validation testing'
USER_PATTERN = re.compile(r'{{result\|(pass|warn|fail)\|([^}\|]+)([^}]*)}}')

RELEASE = None
options = None

logger = logging.getLogger()
logger.setLevel(logging.DEBUG)

results = {}  # dict of name -> UserStats

class UserStats(object):
    '''Store results specific to a particular user'''
    def __init__(self):
        self.reports = 0
        self.bugs = set()


def parse_args():
    usage = 'usage: %prog [options] RELEASE'
    description = '''\
Query Fedora wiki for release validation matrices and retrieve statistics \
about user contribution (number of executed test cases). Generate the output \
as an HTML file (ugly code, but suitable for blog embedding) printed to \
standard output.

Arguments:
  RELEASE - the Fedora release number to query for, e.g. "17"
'''

    # don't strip newlines from description
    optparse.OptionParser.format_description = lambda self, formatter: self.description
    parser = optparse.OptionParser(usage=usage, description=description)
    parser.add_option('-f', '--filter', metavar='KEYWORD', action='append',
        help=("Query only wiki pages containing KEYWORD in their name. This way "
        "you can easily query just a specific milestone (Alpha/Beta/Final) or "
        "type (Base/Desktop/Install) or release candidate. Can be used multiple "
        "times (any of them must match)."))

    global options
    options, args = parser.parse_args()

    # sanity check
    if len(args) != 1:
        parser.error('Invalid number of arguments')

    global RELEASE
    RELEASE = args[0]


def api_request(args):
    '''Send a wiki api request with provided args.
    @param args HTTP args
    @return response from the server
    '''
    url = API_URL + '&'.join(args)
    f = urllib.urlopen(url)
    return f.read()


def list_pages(category):
    '''List all pages within a category.
    @param name wiki page name of the category
    @return the list of names of pages within this category
    '''
    args = ['action=query',
            'list=categorymembers',
            'cmtitle=%s' % category,
            'cmsort=timestamp',
            'cmdir=asc',
            'cmlimit=max',
            'format=json',
            ]
    logger.debug('Querying %s' % category)
    response = api_request(args)
    data = json.loads(response)

    pages = []
    for page in data['query']['categorymembers']:
        title = page['title']
        if title.startswith('Test Results:Fedora %s' % RELEASE):
            # this is a valid test page name, now we need to check filters
            if options.filter:
                for keyword in options.filter:
                    if keyword in title:
                        pages.append(title)
            else:
                pages.append(title)

    return pages


def parse_matrix(name):
    '''Parse a wiki page with results matrix.
    @param name wiki page name
    @return list of tuples (user name, UserStats)
    '''
    args = ['action=query',
            'titles=%s' % name,
            'prop=revisions',
            'rvprop=content',
            'format=json',
            ]
    logger.debug('Querying %s' % name)
    response = api_request(args)
    data = json.loads(response)
    text = data['query']['pages'].values()[0]['revisions'][0]['*']

    # filter out example results matrix
    pos1 = text.index('== Key ==')
    pos2 = text.index('==', pos1+10)
    text = text[:pos1] + text[pos2:]

    # find all user results
    matches = USER_PATTERN.findall(text)

    # save user results
    for result, name, bugs in matches:
        # filter out "previous <build> run"
        if 'previous ' in name:
            continue

        if not name in results:
            results[name] = UserStats()

        results[name].reports += 1

        # parse bugs
        bugs = bugs.strip().strip('|').strip()
        if bugs:
            bugs = bugs.split('|')
            for bug in bugs[:]:
                # sometimes people write 123456#c7, remove the suffix
                if '#' in bug:
                    newbug = bug.split('#')[0]
                    if unicode.isdigit(newbug):
                        bugs.remove(bug)
                        bugs.append(newbug)
                        bug = newbug
                if not unicode.isdigit(bug):
                    logger.debug("WARN: Unparseable bug number: %s", bug)
            results[name].bugs.update(bugs)


def finalize_results():
    '''Do some post-processing of the results, like eliminating duplicate
    user names that differ only in letter case'''

    # == remove duplicate user names that differ in letter case ==
    names = results.keys()

    for index, name in enumerate(names):
        if not name in results:
            # we already removed this duplicate
            continue

        dupes = [name2 for name2 in names[index+1:] if
                 name2.lower() == name.lower()]

        # eliminate duplicates
        for dup in dupes:
            results[name].reports += results[dup].reports
            results[name].bugs.update(results[dup].bugs)
            del results[dup]


def print_results():
    '''Print results in an HTML format'''

    template = '''\
<!DOCTYPE HTML>
<html>
<head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/></head>
<body>
  <p>Test period: <b>Fedora %(release)s [FIXME] - Fedora %(release)s [FIXME]</b><br/> \
     Testers: <b>%(all_testers)d</b><br/> \
     Reports: <b>%(all_reports)d</b><br/> \
     Unique referenced bugs: <b>%(all_bugs)d</b> \
  </p>
  <table>
    <tr style="text-align:center">
        <th style="vertical-align:middle">Name</th>
        <th style="vertical-align:middle">Reports submitted</th>
        <th style="vertical-align:middle">Referenced bugs<a style="text-decoration:none" \
        href="#ref"><sup>1</sup></a></th>
    </tr>
%(rows)s
  </table>
  <p style="color:#808080;font-size:smaller"> \
  <sup id="ref">1</sup> This is a list of bug reports linked to the wiki \
  results. They don't have to be reported by that concrete person. \
  </p>
</body>
</html>
'''

    # sort according to score
    ladder = results.keys()
    ladder.sort(key=lambda name: results[name].reports, reverse=True)

    # format results
    rows = []
    for name in ladder:
        tr = '<tr>'
        # name
        tr += ('<td><a href="https://fedoraproject.org/wiki/User:%(name_esc)s">'
               '%(name)s</a></td>' % {'name_esc': cgi.escape(name, quote=True),
                                      'name': name})
        # reports
        tr += '<td align="center">%d</td>' % results[name].reports
        # bugs
        if results[name].bugs:
            tr += '<td><span style="font-size:smaller">%s</span> (%d)</td>' % (
                ' '.join(['<a href="https://bugzilla.redhat.com/show_bug.cgi?id=%s">%s</a>' \
                  % (cgi.escape(bug, quote=True), bug) for bug in sorted(results[name].bugs)]),
                len(results[name].bugs))
        else:
            tr += '<td/>'
        tr += '</tr>'
        rows.append(tr)

    rows = '\n'.join(rows)

    output = template % {'release': RELEASE,
                         'all_testers': len(ladder),
                         'all_reports': sum([results[name].reports
                                             for name in ladder]),
                         'all_bugs': len(set.union(*[results[name].bugs for
                                                     name in ladder])) \
                                     if ladder else 0,
                         'rows': rows}

    # print
    print output


def main():
    logging.basicConfig(format='%(message)s')

    parse_args()

    # download and parse all pages
    for category in (BASE_NAME, DESKTOP_NAME, INSTALLATION_NAME):
        pages = list_pages(category)
        for page in pages:
            parse_matrix(page)

    # make some last touches
    finalize_results()

    # print
    print_results()


if __name__ == '__main__':
    try:
        main()
    except KeyboardInterrupt:
        sys.exit(1)