Usuari:TronaBot/Python/vandallevel.py

#!/usr/bin/python2.7
#-*- coding:utf8 -*-
#
# Copyrleft (!C) 2013 Coet@cawiki
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

import codecs as cs, os, re, sys, time, locale

#l'arquitectura que he creat per als meus scripts és ficar una carpeta
#al mateix nivell que la carpeta del pywikimedia, així que he de
#carregar els mòduls amb les següents línies.
on_win = sys.platform.startswith("win")
home = on_win and r"E:\\iShare\SugarSync\My Python scripts" \
	   or "/home/pasqual/public_html/"
for folder in ("pywikilib", "pyuserlib"):
	sys.path.append(os.path.join(home, folder))

#from pywikipedia
import query as api, wikipedia as pywikilib
#from pyusrlib
from common import Date, ArgumentHandler, File, yellow, printf

def get_limits(days):
	start = Date(Date().substract(days=days).replace(hour=23,minute=59,second=0)).to_api()
	end = Date(Date(start).replace(hour=0,minute=0,second=0)).to_api()
	return start, end

def queries(start, end):
	"""
	Getting recentchanges and patrol, delete and block logs between start and end timestamps.
	start and end must be API timestamps.
	"""
	rccontinue=True
	lecontinue=True
	params = {
		"action": "query",
	}

	logevents = {
		"lelimit": "max",
		"lestart": start,
		"leend": end,
		"leprop": ["user", "title", "timestamp", "details", "comment", "type"],
	}
	recentchanges = {
		"rcprop": ["user", "timestamp", "comment", "patrolled"],
		"rclimit": "max",
		"rctype": "edit",
		"rcstart": start,
		"rcend": end,
	}

	while rccontinue or lecontinue:
		params['list']=[]
		if rccontinue:
			params['list'].append("recentchanges")
			params.update(recentchanges)
			if isinstance(rccontinue, basestring):
				params['rccontinue']=rccontinue
		if lecontinue:
			params['list'].append("logevents")
			params.update(logevents)
			if isinstance(lecontinue, basestring):
				params['lecontinue']=lecontinue

		data = api.GetData(params)

		if not data.has_key("query"):
			print "#ERR: NO DATA!!!"
			print params
			print data
			break

		if data['query'].has_key("recentchanges"):
			for rcevt in data['query']['recentchanges']:
				if rcevt.has_key('commenthidden'):
					#new 2014-05-18_10:43:55
					yield {"user": rcevt['user'], "type": "hidden", "timestamp" :rcevt['timestamp']}
				elif re.search(ur"Revertides les edicions de:? |Bot: Rv\. edic\. ", rcevt['comment']):
					yield {"user": rcevt['user'], "type": "revert", "timestamp" :rcevt['timestamp']}
				elif re.search(ur"Reversió a l'edició del", rcevt['comment']):
					yield {"user": rcevt['user'], "type": "revert", "timestamp" :rcevt['timestamp']}
				elif re.search(ur"^ *rv(?: va?nd)?", rcevt['comment'], re.I):
					yield {"user": rcevt['user'], "type": "revert", "timestamp" :rcevt['timestamp']}
				elif re.search(ur"Es desfà la revisió |Desfets els canvis ", rcevt['comment']):
					yield {"user": rcevt['user'], "type": "undo", "timestamp" :rcevt['timestamp']}
				elif rcevt.has_key("unpatrolled"):
					yield {"user": rcevt['user'], "type": "unpatrolled", "timestamp" :rcevt['timestamp']}

		if data['query'].has_key("logevents"):
			for logevt in data['query']['logevents']:
				if logevt['action'] not in ("patrol", "delete", "block"):continue
				if logevt['action']=="patrol" and logevt['patrol']['auto']==0:
					yield {"user": logevt['user'], "type": "patrol", "timestamp": logevt['timestamp']}
				elif logevt['type']=='deleted' and logevt['comment'].startswith("[[VP:CSR#G3|G3]]: Vandalisme:"):
					yield {"user": logevt['user'], "type": "delete", "timestamp": logevt['timestamp']}
				elif logevt['type']=='block':
					yield {"user": logevt['user'], "type": "block", "timestamp": logevt['timestamp']}

		rccontinue = data['query-continue']['recentchanges']['rccontinue'] if data.has_key('query-continue') and data['query-continue'].has_key("recentchanges") else None
		lecontinue = data['query-continue']['logevents']['lecontinue'] if data.has_key("query-continue") and data['query-continue'].has_key("logevents") else None

def last_days():
	"""
	Logging:
		minimum 3 days ago, maximum 31 days ago, or many days between max & min
	"""
	today = Date().to_cest()
	yesterday = Date(today).substract(days=1)
	print "today:",today.strftime("%Y-%m-%d")
	print "yesterday", yesterday.strftime("%Y-%m-%d")

	dates = {}
	days = args.days or 31
	if args.update: days = 3
	logtxt=""
	for i in range(days):
		start, end = get_limits(i)
		d = Date(start)
		s = d.to_datetime().strftime("%Y-%m-%d"), d.short_local_weekday()
		print "{0[0]} {0[1]}" .format (s)
		logtxt += "{0[0]} {0[1]}\n" .format (s)
		for evt in queries(start, end):
			logtxt +="%r\n" % evt
			d = Date(evt['timestamp']).to_cest()
			date = Date(d).to_datetime().strftime("%Y-%m-%d")
			hour = Date(d).to_datetime().strftime("%H")
			if not dates.has_key(date):
				dates[date]=dict([("%02i" % h, init_dict.copy()) for h in range(24)])
			dates[date][hour][evt['type']]+=1
	logevt.save(logtxt)
	logdata.backup(dates)
	del logtxt

	lines=""; bldl_lines = ""; rv_lines = ""; ud_lines = ""; ptl_lines = ""; hdn_lines = ""; upd_lines = ""
	all=patrolled=unpatrolled=0
	for date in reversed(sorted(dates.keys())):
		day=Date(date)
		weekday = day.short_local_weekday()
		new_line = "|-\n| %s %s " % (date, weekday)
		d = dates[date]

		#all
		th = [sum([d[h]['revert'], d[h]['undo'], d[h]['block'], d[h]['delete'], d[h]['patrol'], d[h]['hidden'], d[h]['unpatrolled']]) for h in sorted(dates[date].keys())]
		td = sum(th)
		line = "%s|| %i " % (new_line, td)
		line += "|| %s " * 23
		line += "|| %s"
		line = line % tuple(th)
		lines+= "%s\n" % line
		all += td

		#block & delete
		bldl_th = [sum([d[h]['delete'], d[h]['block']]) for h in sorted(dates[date].keys())]
		bldl_td = sum(bldl_th)
		bldl_line = "%s|| %i b/s " % (new_line, bldl_td)
		bldl_line += "|| %s " * 23
		bldl_line += "|| %s"
		bldl_line = bldl_line % tuple(bldl_th)
		bldl_lines+= "%s\n" % bldl_line

		#revert
		rv_th = [d[h]['revert'] for h in sorted(dates[date].keys())]
		rv_td = sum(rv_th)
		rv_line = "%s|| %i rv. " % (new_line, rv_td)
		rv_line += "|| %s " * 23
		rv_line += "|| %s"
		rv_line = rv_line % tuple(rv_th)
		rv_lines+= "%s\n" % rv_line

		#undo
		ud_th = [d[h]['undo'] for h in sorted(dates[date].keys())]
		ud_td = sum(ud_th)
		ud_line = "%s|| %i df. " % (new_line, ud_td)
		ud_line += "|| %s " * 23
		ud_line += "|| %s"
		ud_line = ud_line % tuple(ud_th)
		ud_lines+= "%s\n" % ud_line

		#patrol
		ptl_th = [d[h]['patrol'] for h in sorted(dates[date].keys())]
		ptl_td = sum(ptl_th)
		ptl_line = "%s|| %i p. " % (new_line, ptl_td)
		ptl_line += "|| %s " * 23
		ptl_line += "|| %s"
		ptl_line = ptl_line % tuple(ptl_th)
		ptl_lines+= "%s\n" % ptl_line
		patrolled+=ptl_td

		#hidden
		hdn_th = [d[h]['hidden'] for h in sorted(dates[date].keys())]
		hdn_td = sum(hdn_th)
		hdn_line = "%s|| %i o. " % (new_line, hdn_td)
		hdn_line += "|| %s " * 23
		hdn_line += "|| %s"
		hdn_line = hdn_line % tuple(hdn_th)
		hdn_lines+= "%s\n" % hdn_line

		#unpatrolled
		upd_th = [d[h]['unpatrolled'] for h in sorted(dates[date].keys())]
		upd_td = sum(upd_th)
		upd_line = "%s|| %i n. " % (new_line, upd_td)
		upd_line += "|| %s " * 23
		upd_line += "|| %s"
		upd_line = upd_line % tuple(upd_th)
		upd_lines+= "%s\n" % upd_line
		unpatrolled +=  upd_td

		ttl=upd_td+ptl_td
		print "{}.{}.{} {}\n\ttotal: {} unpatrolled: {:0.2f}% patrolled: {:0.2f}%" .format(
			day.year, day.month, day.day, day.short_local_weekday(),
			ttl, upd_td*100.0/ttl, ptl_td*100.0/ttl
		)

	hours = "".join(["!! %02i " %i for i in range(24)])
	#all
	text=u'==estadístiques (totals)==\n{| class="sortable wikitable" style="text-align:right"'\
			 '\n|-\n! data/hores !! total<br/>dia %s' % hours
	text +="\n%s|}" % lines

	#block & delete
	bl_del_text=u'==bloquejos i supressions==\n{| class="sortable wikitable" style="text-align:right"'\
			 '\n|-\n! data/hores !! total<br/>dia %s' % hours
	bl_del_text +="\n%s|}" % bldl_lines

	#revert
	rv_text=u'==reversions==\n{| class="sortable wikitable" style="text-align:right"'\
			 '\n|-\n! data/hores !! total<br/>dia %s' % hours
	rv_text +="\n%s|}" % rv_lines

	#undo
	ud_text=u'==revisions desfetes==\n{| class="sortable wikitable" style="text-align:right"'\
			 '\n|-\n! data/hores !! total<br/>dia %s' % hours
	ud_text +="\n%s|}" % ud_lines

	#patrol
	ptl_text=u'==edicions patrullades==\n{| class="sortable wikitable" style="text-align:right"'\
			 '\n|-\n! data/hores !! total<br/>dia %s' % hours
	ptl_text +="\n%s|}" % ptl_lines

	if args.edit and not args.update:
		header = u"A continuació es mostren els resultats sobre les reversions, edicions desfetes, edicions patrullades, articles esborrats per vandalisme i usuaris blocats."
		text = "\n\n".join([header, text, bl_del_text, rv_text, ud_text, ptl_text])
		page3.put(text, "actualitzant dades")
	elif args.update:
		new_text = page3.get(); old_text = page3.get()
		lines = [line for line in lines.splitlines() if lastdays_re.search(line) and not lastdays_re.search(line).group("unit")]
		old_lines = [line for line in old_text.splitlines() if lastdays_re.search(line) and not lastdays_re.search(line).group("unit")][:days]
		list_lines = lambda x, y: [line for line in x.splitlines() if lastdays_re.search(line) and lastdays_re.search(line).group("unit") and lastdays_re.search(line).group("unit").strip() == y][:days]
		bldl_lines = list_lines(bldl_lines, "b/s")
		bldl_old_lines = list_lines(old_text, "b/s")
		rv_lines = list_lines(rv_lines, "rv.")
		rv_old_lines = list_lines(old_text, "rv.")
		ud_lines = list_lines(ud_lines, "df.")
		ud_old_lines = list_lines(old_text, "df.")
		ptl_lines = list_lines(ptl_lines, "p.")
		ptl_old_lines = list_lines(old_text, "p.")

		if lastdays_re.search(lines[0]).group("date") != lastdays_re.search(old_lines[0]).group("date"):
			line = "%s\n|-\n%s" % (lines[0], old_lines[0])
			new_text = new_text.replace(old_lines[0], line, 1)

			line = "%s\n|-\n%s" % (bldl_lines[0], bldl_old_lines[0])
			new_text = new_text.replace(bldl_old_lines[0], line, 1)

			line = "%s\n|-\n%s" % (rv_lines[0], rv_old_lines[0])
			new_text = new_text.replace(rv_old_lines[0], line, 1)

			line = "%s\n|-\n%s" % (ud_lines[0], ud_old_lines[0])
			new_text = new_text.replace(ud_old_lines[0], line, 1)

			line = "%s\n|-\n%s" % (ptl_lines[0], ptl_old_lines[0])
			new_text = new_text.replace(ptl_old_lines[0], line, 1)

		couples = (
			(lines, old_lines),
			(bldl_lines, bldl_old_lines),
			(rv_lines, rv_old_lines),
			(ud_lines, ud_old_lines),
			(ptl_lines,ptl_old_lines)
		)
		for new_lines, old_lines in couples:
			i=0
			for line in new_lines:
				if i==days-1: break
				g = lastdays_re.search(line).groupdict()
				for old_line in old_lines:
					if g['date'] in old_line:
						new_text = new_text.replace(old_line, line, 1)
				i+=1

		pywikilib.showDiff(old_text, new_text)
		if args.edit:
			page3.put(new_text, "Bot, actualitzant")
		elif args.log:
			logfile.save(new_text)

	else:
		pywikilib.output(text)
	t = unpatrolled+patrolled
	print "RESULTS: all:{0} patr.:{1} ({3:0.2f}%) unpatr.:{2} ({4:0.2f}%)" .format(
		all,
		patrolled,
		unpatrolled,
		patrolled*100.0/t,
		unpatrolled*100.0/t
	)

def last_hours():
	"""
	Logging:
		1 day.
	"""
	dates = {}
	start, end = get_limits(0)
	today = Date(Date(start).substract(days=1)).to_cest()
	yesterday = Date(Date(start).substract(days=2)).to_cest()
	print "today:", Date(today).to_datetime().strftime("%Y-%m-%d")
	print "yesterday:", Date(yesterday).to_datetime().strftime("%Y-%m-%d")

	start, end = get_limits(0)
	print Date(start).to_datetime().strftime("%Y-%m-%d"), Date(start).short_local_weekday()
	patrollers=set()
	for evt in queries(start, end):
		d = Date(evt['timestamp']).to_cest()
		date = Date(d).to_datetime().strftime("%Y-%m-%d")
		hour = Date(d).to_datetime().strftime("%H")
		if not dates.has_key(date):
			dates[date]=dict([("%02i" % h, init_dict.copy()) for h in range(24)])
		dates[date][hour][evt['type']]+=1

		if evt['type'] == "patrol":patrollers.add(evt['user'])

	lines=""
	reversions = undoings = patrolled = 0
	current = dates[date]["%02i" % Date().hour]
	for date in reversed(sorted(dates.keys())):
		weekday = Date(date).short_local_weekday()
		line = "|-\n| %s %s. " % (date, weekday)
		d = dates[date]
		th= [sum([d[h]['revert'], d[h]['undo'], d[h]['block'], d[h]['delete'], d[h]['patrol']]) for h in sorted(dates[date].keys())]
		r = sum([d[h]['revert'] for h in sorted(dates[date].keys())])
		u = sum([d[h]['undo'] for h in sorted(dates[date].keys())])
		p = sum([d[h]['patrol'] for h in sorted(dates[date].keys())])
		reversions += r
		undoings += u
		patrolled += p
		td= sum(th)
		line += "|| %i " % td
		line += "|| %s " * 24
		line = line % tuple(th)
		lines+= "%s\n" % line

	hours = "".join(["!! %02i " %i for i in range(24)])
	text=u'==estadístiques reversions==\n{| class="sortable wikitable" style="text-align:right"'\
			 '\n|-\n! data/hores !! total<br/>dia %s' % hours
	text +="\n%s|}" % lines

	old_text = page.get()
	last_data = lasthour_re.search(old_text)
	last_rev = int(last_data.group("oldrv"))
	last_undo = int(last_data.group("oldud"))
	growth = current['revert']+current['undo']
	level = 0
	if growth>=15:
		level=1
	elif growth in (12,13,14):
		level=2
	elif 8 <= growth <= 11:
		level=3
	elif growth in (5,6,7):
		level=4
	elif growth <=4:
		level=5

	new_data= u"| {} || {} || {} || {} || {}".format(date, reversions, undoings, len(patrollers), patrolled)
	new_text = old_text.replace(last_data.group(1), new_data, 1)
	level_txt = template.get()
	old_level = int(level_re.search(level_txt).group(2))
	if args.edit:
		now = Date().to_cest()
		new_level = level_re.sub(
			r"|level  = %i\n    |sign   = %s" % (
				level, Date(now).to_datetime().strftime("%Y-%m-%d a les %H:%M")
			),
			level_txt
		)
		template.put(new_level, "Bot: s'ha actualitzat el nivell. (%i rv/h n:%i)" % (growth, level))
		subtemplate.put("%i" % level, "Bot: s'ha actualitzat el nivell. (%i rv/h n:%i)" % (growth, level))

	if old_text != new_text and args.edit:
		pywikilib.showDiff(old_text, new_text)
		#f.save(new_text)

	if args.edit:
		old_text = page.get()
		if last_data.group("date") != date:
			new_data = "%s\n|-\n%s" % (new_data, last_data.group(0))
		new_text = old_text.replace(last_data.group(0), new_data, 1)
		if new_text != old_text and args.edit:
			page.put(new_text, "Bot: s'ha actualitzat la taula.")

if __name__ == '__main__':
	init_dict = {"block": 0, "delete": 0, "revert": 0, "undo": 0, "patrol": 0, "hidden": 0, "unpatrolled": 0}
	lastdays_re = re.compile(r"\| (?P<date>\d{4}-\d{2}-\d{2}) (?P<weekday>\w{2}\.) \|\| (?P<total>\d+)(?P<unit> [bdfhnoprsuv\./]+)?(?P<data>(?: \|\| \d+){24})")
	lasthour_re = re.compile(r"\| (?P<date>\d{4}-\d{2}-\d{2}) \|\| (?P<oldrv>\d+) \|\| (?P<oldud>\d+) \|\| (?P<patrollers>\d+) \|\| (?P<patrolled>\d+)")
	level_re = re.compile(r"(\|level\s+= (\d)\s*\|sign\s+= +(?:\(bot aturat\)|[\d/-]+ a les [\d:]+))", re.S)
	estatbot = pywikilib.getSite("ca", "wikipedia_e")
	page = pywikilib.Page(estatbot, u"Usuari:TronaBot/log:Reversions i patrullatge")
	page2 = pywikilib.Page(estatbot, u"Usuari:TronaBot/log:Reversors i patrulladors")
	page3 = pywikilib.Page(estatbot, u"Usuari:TronaBot/log:Activitat reversors per hores")
	template = pywikilib.Page(estatbot, u"Plantilla:Nivell vandalisme")
	subtemplate = pywikilib.Page(estatbot, u"Plantilla:Nivell vandalisme/Nivell")
	args = ArgumentHandler()
	args.parse_arguments()
	if args.ld:
		logfile = File("vandallevel")
		logevt = File("vdllvl_logevt")
		logdata =  File("vdllvl_logdata")
		last_days()
	if args.lh:
		last_hours()