Usuari:TronaBot/Python/ranking.py

#!/usr/bin/env python
#-*- encoding: utf8 -*-

# Copyleft (!C) 2013 w:ca:User:Coet
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

import argparse, codecs, time, os, sys, re
from datetime import datetime
try:
	import MySQLdb
	sql_available = True
except:
	#we aren't on toolserver, api will be required
	sql_available = False

#loading superfolder (pywikipedia)
sys.path.append(os.path.split(os.getcwd())[0])
import wikipedia as pywikilib
import query as api

#user modules
sys.path.append("/home/pasqual/public_html/pyuserlib")
import common
from dateutil.relativedelta import relativedelta as time_delta
from func import format_number

def last_hours_api_ts(h):
	date = datetime.now()
	date = date-timedelta(hours=h)
	return date.strftime("%Y-%m-%dT%H:%M:%SZ")

class API(object):
	def __init__(self, site):
		self.site=site

	def query(self, params):
		return api.GetData(params, self.site)

	def allusers(self):
		#action=query&list=allusers&auprop=groups&aulimit=max&auwitheditsonly&auactiveusers&auexcludegroup=bot&auprop=editcount
		aufrom=True
		users=[]
		params = {
			"action":"query",
			"list": "allusers",
			"aulimit":"max",
			"auwitheditsonly":"",
			#"auactiveusers":"",
			"auprop":["editcount","groups"],
			"auexcludegroup":["bot"],
		}
		while aufrom:
			if isinstance(aufrom, basestring):
				params['aufrom'] = aufrom
			data = self.query(params)
			users += data['query']['allusers']
			print time.strftime("[%H:%M:%S]"), len(users)
			aufrom = data['query-continue']['allusers']['aufrom'] if data.has_key("query-continue") else None
		return users

	def last_edit(self, user):
		#action=query&list=usercontribs&ucuser=Coet&ucprop=title|timestamp&uclimit=1
		params = {
			"action":"query",
			"list": "usercontribs",
			"ucuser": user,
			"uclimit": 1,
			"ucprop": "timestamp"

		}
		data = self.query(params)
		return data['query']['usercontribs'][0]['timestamp']

class SQL(object):
	def __init__(self, lang="ca", host=None, db=None, fname=None, cfg='~/.my.cnf', ):
		self.host = host or "%swiki-p.db.toolserver.org" % lang
		self.db = db or '%swiki_p' % lang
		self.cfg = cfg
		path= "/home/pasqual/temp/"
		self.fname = fname and "%s%s.log" % (path,fname) or "%ssql-%s.log"% (path, time.strftime("%y%m%d_%H%M%S"))

	def start(self):
		self.conn = MySQLdb.connect(host=self.host, db=self.db, read_default_file=self.cfg, use_unicode=True)
		self.cursor = self.conn.cursor()

	def stop(self):
		self.cursor.close()
		self.conn.close()

	def query(self, sql, **kwargs):
		self.start()
		if kwargs:
			self.cursor.execute(sql.format(**kwargs))
		else:
			self.cursor.execute(sql)
		result = self.cursor.fetchall()
		self.stop()
		return result

	def output(self, sql, **kwargs):
		new_sql = """mysql -h {host} -e "USE {db}; {sql}" > {fname}"""
		if kwargs:
			sql = sql.format(**kwargs)
		sql = new_sql.format(
			host=self.host,
			db=self.db,
			sql=sql,
			fname = self.fname
		)
		print sql
		os.system(sql)


def by_api(fname):
	query = API(site)
	users = query.allusers()
	users_by_num_of_edits = {}
	for user in users:
		if user['name'] in skip:continue
		if not users_by_num_of_edits.has_key(user['editcount']):
			users_by_num_of_edits[user['editcount']]=[]
		users_by_num_of_edits[user['editcount']].append((user['name'], user['groups']))
	counter=1
	goods=[]
	break_loop=False
	i=0
	for num_of_edits in reversed(sorted(users_by_num_of_edits.keys())):
		goods.append((num_of_edits,[]))
		for user in users_by_num_of_edits[num_of_edits]:
			goods[i][1].append((user[0], query.last_edit(user[0]), "sysop" in user[1]))
			counter+=1
			if counter>500:

				break_loop=True
				break
		i+=1
		if break_loop:break
	f=codecs.open(fname, "w","utf-8")
	pywikilib.json.dump(goods, f, indent=4, encoding="utf8")
	f.close()
	#os.startfile(fname)
	return goods

def by_sql(fname):
	sql = SQL(fname="ranking")
	query = (
		#"SET SESSION TRANSACTION ISOLATION LEVEL READ UNCOMMITTED; "
		"SELECT user_name, user_editcount, ug_group "
		"FROM user "
		"LEFT JOIN user_groups "
		"ON user_id = ug_user "
		"WHERE user_editcount >= {min} "
		"ORDER BY user_editcount DESC LIMIT {limit};"
	)
	min=25
	users = sql.query(query, min=min, limit=5000)
	users_by_num_of_edits=set()
	for row in users:
		username = unicode(row[0], "utf-8")
		if username in skip:continue
		if len(row)==1:
			users_by_num_of_edits.add((username, row[1], row[2]))
	pywikilib.output(u"%s usuaris amb més de %i edicions." % (len(users_by_num_of_edits), min))

	goods_dict={}
	api = API(site)
	for user, editcount, group in users_by_num_of_edits:
		if goods.has_key(editcount):
			goods[editcount].append((user, api.last_edit(user), group))
		else:
			goods[editcount]=[(user, api.last_edit(user), group)]
	goods=[]
	for ec in goods:
		goods=(ec, [])
		for user, editcount, group in goods_dict[ec]:
			goods.append((user, last_edit, group))
	f=codecs.open(fname, "w","utf-8")
	pywikilib.json.dump(goods, f, indent=4, encoding="utf8")
	f.close()
	#os.startfile(fname)
	return goods

def by_cache(fname):
	f=codecs.open(fname, "r", "utf-8")
	users_by_num_of_edits = pywikilib.json.load(f, encoding="utf8")
	f.close()
	return users_by_num_of_edits

def test_sql():
	sql = SQL(fname="ranking")
	query = (
		#"SET SESSION TRANSACTION ISOLATION LEVEL READ UNCOMMITTED; "
		"SELECT user_name, user_editcount, ug_group " #, rev_timestamp "
		"FROM user "
		"LEFT JOIN user_groups "
		"ON user_id = ug_user "
		#"RIGHT JOIN revision "
		#"ON user_id = rev_user "
		"WHERE user_editcount >= {min} "
		"ORDER BY user_editcount DESC LIMIT {limit};"
	)
	sql.output(query, limit=5000, min=25)

def main():
	fname="logs/ranking_500_cawiki.log"
	if args.api:
		users = by_api(fname)
	elif sql_available and args.sql:
		users = by_sql(fname)
	elif args.cache:
		users = by_cache(fname)
	else:
		users ={}

	if args.testsql:
		test_sql()
		return

	text=[u"{{/begin|500}}"]
	i=0;j=1
	for noe in users:
		for user, ts, groups  in users[i][1]:
			#user = user[0]; ts=user[1
			ts=common.Date(ts)
			td = time_delta(datetime.now(),ts.to_datetime())
			fmt_usr = (td.years*365)+(td.months*30)+td.days >= 30 and u'<span style="color:gray">%s</span>' % user or user
			text.append(
				u"|-\n| {idx} || [[Usuari:{usr}|{fmt_usr}]]{grp} || [[Especial:Contributions/{usr}|{noe}]] || {lc}". format(
					idx = j,
					fmt_usr = fmt_usr,
					grp = groups and " (Admin)" or "",
					noe=format_number(noe[0],0),
					usr=user,
					lc=ts.to_cest().strftime("%Y-%m-%d %H:%M")
				)
			)
			j+=1
		i+=1
		if i>len(users):break
	print time.strftime("[%H:%M:%S]"), j-1
	text+=["{{/end}}","{{nobots|allow=TronaBot}}"]
	text="\n".join(text)
	fname=fname.replace(".log","_text.log")
	f=codecs.open(fname, "w","utf-8")
	f.write(text)
	f.close()
	if args.openfiles:os.startfile(fname)
	if args.edit:page.put(text, u"Bot: actualització")

if __name__ == '__main__':
	#python2.7 ranking_usuaris.py -AE
	parser = argparse.ArgumentParser()
	parser.add_argument("--sql", "-S",action="store_true", default=False)
	parser.add_argument("--api", "-A",action="store_true", default=False)
	parser.add_argument("--cache", "-C",action="store_true", default=False)
	parser.add_argument("--edit", "-E",action="store_true", default=False)
	parser.add_argument("--openfiles", "-O",action="store_true", default=False)
	parser.add_argument("--testsql", "-t",action="store_true", default=False)
	args = parser.parse_args()
	site = pywikilib.getSite()
	title = u"Viquipèdia:Llista de viquipedistes per nombre d'edicions"
	page = pywikilib.Page(site, title)
	bot_page = pywikilib.Page(site, u"%s/unflaggedbots" % title)
	exception_page = pywikilib.Page(site, u"%s/exclude" % title)
	re_link = re.compile(ur"\* *\[\[Us(?:er|uari|ària) *:([^\]|]+?)(?:\|[^\]]*)?\]\]")
	bots = re_link.findall(bot_page.get())
	users = re_link.findall(exception_page.get())
	skip = bots + users
	pywikilib.output("S'ha inclós %i excepcions. (%i usuaris i %i bots.)" % (len(skip), len(users), len(bots)))
	main()
	pywikilib.stopme()