Usuari:TronaBot/Python/treulallengua.py

#!/usr/bin/python2.7
#-*- coding:utf8 -*-
#
u"""
treulallengua.py

Anem a treure les plantilles en anglés i posar les nostres.
* Link GA → Enllaç AB
* Link FA → Enllaç AD
* Link FL → Enllaç AD
Del francés
* Lien BA → Enllaç AB

Altres traduccions:
* Cite journal → Citar publicació
*

"""
import argparse, codecs as cs, os, re, sys, time
from datetime import datetime
path = os.path.join(os.path.split(os.getcwd())[:-1][0])
sys.path.append(path)
import query as api, wikipedia as pywikilib, pagegenerators as pg

def timedelta(td):
	#get the timedelta obejct and returns also hours, minutes and seconds
	#by accessing to .seconds atribute.
	hours, remainder = divmod(td.seconds, 3600)
	minutes, seconds = divmod(remainder, 60)
	return td.days, hours, minutes, seconds

def to_regexp(s, sep=" *", sensitive=True):
	#prepare a wikititle (with sensitive capital) to a valid regexp
	for ch in ".?*+[](){}":
		s = s.replace(ch, r"\%s" % ch)
	if sensitive:
		c = s[0]
		s = u"%s%s%s%s" % (sep, c.replace(c, "[%s%s]" %(c.upper(), c.lower())), s[1:], sep)
	else:
		s = u"%s%s%s" % (sep, s, sep)
	return s

def get_refferred_page(page):
	site=pywikilib.getSite()
	refpage = pywikilib.Page(site, page)
	allpages = pg.ReferringPageGenerator(refpage, onlyTemplateInclusion=True)
	articles = pg.NamespaceFilterPageGenerator(allpages, 0)
	return articles #pg.PreloadingGenerator(articles, pageNumber = 50)

def link_FGA():
	#ensure links FA/GA aren't duplicated
	#translate to local template
	#sort the template
	#relocate after the first category
	i=0
	local_linkGA = ("Link GA", "Lien BA", u"Enllaç AB")
	for template in ("Link GA", "Link FA", "Lien BA", u"Enllaç AB", u"Enllaç AD"):
		if args.limit and i>=args.limit:break
		for page in get_refferred_page("Template:%s" % template):
			if args.page and page.title() != args.page: continue
			if args.pages and page.title() not in args.pages: contine
			if args.limit and i>=args.limit:break
			i+=1
			new_text = old_text = page.get()
			#keep all link template
			links = re.findall(ur"(\{\{ *(?:[Ll]ink[ _][FG]A|[Ll]ien[ _]BA|[Ee]nllaç[ _]A[BD]) *\|[^\}]+?\}\}\s*)" , old_text, re.S)
			j=0
			for tpl in list(links):
				m = re.search(ur"\{\{ *(?P<tpl>Link[ _][FG]A|Lien[ _]BA|Enllaç[ _]A[BD]) *\|(?P<lang>[^}]+)\}\}", tpl)
				if m:
					feat_good = "AB" if m.group("tpl") in local_linkGA else "AD"
					tpl = u"{{Enllaç %s|%s}}" %(feat_good, m.group("lang"))
					links[j]=tpl
					j+=1
			links=set(links)
			#remove all link templates
			new_text = re.sub(ur"\{\{ *(?:[Ll]ink[ _][FG]A|[Ll]ien[ _]BA|[Ee]nllaç[ _]A[BD]) *\|[^\}]+?\}\}\s*" , "", old_text, re.S)
			#prepare new links
			linksAB = [link for link in links if "AB" in link]
			linksAB = "\n".join(sorted(linksAB))
			if linksAB: linksAB = "\n%s" % linksAB
			linksAD = [link for link in links if "AD" in link]
			linksAD = "\n".join(sorted(linksAD))
			if linksAD: linksAD = "\n%s" % linksAD
			links = "%s%s" % (linksAB, linksAD)
			if links: links = "%s\n" % links
			#insert new links
			text_before = new_text.splitlines()
			line_before = text_before[-1]
			if not line_before.strip() or re.search("^\[\[ *(?:[Cc]ategor(?:y|ia)|[a-z\-]{2,}) *:", line_before):
				j = -1
				for line in text_before:
					j-=1
					line_before = text_before[j]
					if line_before.strip() and not re.search("^\[\[ *(?:[Cc]ategor(?:y|ia)|[a-z\-]{2,}) *:", line_before):
						break
			newline_char_before = newline_char_after = ""
			if not line_before.startswith("{{") and not line_before.endswith("}}"):
				newline_char_before += "\n"
			else: newline_char_before += ""

			new_text = new_text.replace(line_before, "%s%s%s" % (line_before, newline_char_before, links), 1)
			if args.verbose and old_text != new_text:
				pywikilib.output(u"\n\n[%s] %i [[%s]]" % (time.strftime("%H:%M:%S"), i, page.title()))
				pywikilib.output(u"LINE BEFORE: %s" % (line_before,))
				pywikilib.showDiff(old_text,new_text)
			if args.edit and old_text != new_text:
				summary = u"Bot: s'ha ordenat i traduït les plantilles d'enllaços d'articles bons i articles destacats."
				if len(old_text)-len(new_text)<=2:
					summary = u"Bot: canvis estètics en relació a les plantilles d'enllaços d'articles bons i articles destacats"
				page.put(new_text, summary)


templates = {
	#cite journal → citar publicació
	u"publicació":{
		"pair": ("cite journal", u"citar publicació"),
		"tranlations" : {
			"author": "autor",
			"last": r"cognom",
			"last1": r"cognom",
			"first": "nom",
			"first1": "nom",
			"first2": "nom2",
			"last2": r"cognom2",
			"first3": "nom3",
			"last3": r"cognom3",
			"first4": "nom4",
			"last4": r"cognom4",
			"authorlink": u"enllaçautor",
			"authorlink2": u"enllaçautor2",
			"authorlink3": u"enllaçautor3",
			"authorlink4": u"enllaçautor4",
			"coauthors": "coautors",
			"date": "data",
			"year": "any",
			"month": "mes",
			"url": "url",
			"title": "article",
			"publisher": "editorial",
			"format": "format",
			"location": "lloc",
			"quote": u"citació",
			"language": "llengua",
			"issn": "issn",
			"journal": u"publicació",
			"volume": "volum",
			"issue": "exemplar",
			"pages": u"pàgines",
			"doi": "doi",
			"accessdate": "consulta",
			"ref":"ref"
		},
		"sorting": (
			"author", "last", "last1", "first", "first1", "authorlink",
			"first2", "last2", "authorlink2", "first3", "last3", "authorlink3",
			"first4", "last4", "authorlink4", "coauthors", "date", "year",
			"month", "url", "title", "publisher", "format", "location", "quote",
			"language", "issn", "journal", "volume", "exemplar", "doi",
			"accessdate", "ref"
		),
		#params that aren't used in this language.
		"skip": (
			"editor-last","editor-first","editor-link","year","month",
			"trans_title","series","type","arxiv","oclc","pmid","pmc",
			"bibcode","archiveurl","archivedate","laysource","laysummary",
			"laydate","separator","postscript"
		),
		#the minimum params needed
		"least": (
			"cognom", "nom", "article", u"publicació",
			"volum", "data", u"pàgines"
		),
		"unit": {
			"volum": "vol.",
			u"pàgines": u"pàg."
		}
	},
	#cite book → citar llibre
	u"llibre":{
		"pair": ("cite journal", u"citar publicació"),
		"tranlations" : {
			#author
			"author":"autor", "author1":u"autor", "authorlink":u"enllaçautor",
			"authorlink1":u"enllaçautor", "coauthors":"coautors",
			"coauthor":"coautors", "first":"nom", "first1":"nom",
			"given":"nom", "last1":"cognom", "last":"cognom", "surname":"cognom",
			#author2
			"first2":"nom2", "given2":"nom2", "last2":"cognom2",
			"surname2":"cognom2", "authorlink2":u"enllaçautor2",
			#author3
			"first3":"nom3", "given3":"nom3", "last3":"cognom3",
			"surname3":"cognom3", "authorlink3":u"enllaçautor3",
			#author4
			"first4":"nom4", "given4":"nom4", "last4":"cognom4",
			"surname4":"cognom4",
			#autho5
			"first5":"nom5", "given5":"nom5", "last5":"cognom5",
			"surname5":"cognom5",
			#author6
			"first6":"nom6", "given6":"nom6", "last6":"cognom6",
			"surname6":"cognom6",
			#coauthors
			"coauthors":"coautors", "others":"altres",
			#pointing
			"chapter": u"capítol", "chapterurl": u"urlcapítol",
			"title": u"títol", "volume":"volum", "edition":u"edició",
			"series": u"col·lecció", "serie": u"col·lecció",
			"page": u"pàgina", "pages": u"pàgines", "quote": u"citació",
			#lang, publisher, loc
			"language":"llengua", "publisher":"editorial",
			"location":"lloc", "place":"lloc",
			#date
			"date":"data", "year":"any", "month":"mes",
			#archive date
			"origdate": "dataarxiu", "origyear":"anyarxiu",
			"origmonth": "mesarxiu", "archiveurl": "url",
			"archivedate":u"dataarxiu",
			#url access data
			"accessdate": "consulta", "accessyear": u"anyaccés",
			"accessmonth": u"mesaccés",
		},
		"sorting": (
			"author", "last", "last1", "first", "first1", "authorlink",
			"first2", "last2", "authorlink2", "first3", "last3", "authorlink3",
			"first4", "last4", "authorlink4", "coauthors", "date", "year",
			"month", "url", "title", "publisher", "format", "location", "quote",
			"language", "issn", "journal", "volume", "exemplar", "doi",
			"accessdate", "ref"
		),
		#params that aren't used in this language.
		"skip": (
			"editor1-link", "trans_title", "type", "at", "trans_chapter", "bibcode",
			"laysummary", "laydate", "author-mask", "author-name-separator",
			"author-separator", "display-authors", "separator", "postscript",
			"lastauthoramp", "origdate", "origyear", "nopp", "editor2-first",
			"editor2-last", "editor3-first", "editor3-last", "editor4-first",
			"editor4-last", "editor5-first", "editor5-last", "editor6-first",
			"editor6-last", "editor7-first", "editor7-last", "editor8-first",
			"editor8-last"
		),
		#the minimum params needed
		"least": (
			"nom", "cognom", u"títol", "editorial", "lloc",
			"data", u"pàgines", "isbn", "ref"
		),
		"unit": {
			"volum": "vol.",
			u"pàgines": u"pàg."
		}
	}
}

def cites(template):
	file = cs.open("logs/transtemplate-%s.log" % template, "a", "utf-8")
	source = templates[template]["pair"][0]
	target = templates[template]["pair"][1]
	translated_params = templates[template]['tranlations']
	skip_params = templates[template]['skip']
	unit = templates[template]['unit']
	i=0
	source_re = re.compile(to_regexp(source))
	for page in get_refferred_page(u"Template:%s" % source):
		pywikilib.output(
			u"[%s] %i [[%s]]" % (
				time.strftime("%H:%M:%S"), i, page.title()
			)
		)
		if page.namespace() != 0:continue
		i+=1
		old_text = new_text = page.get(get_redirect=True)
		for tpl in page.templatesWithParams():
			tpl_dict = {}
			tpl_ptn = u"(?s)\{\{%s\|\s*" % to_regexp(tpl[0],"\s*")
			#building a regexp pattern for the cite template
			if source_re.search(tpl[0]):
				i=1
				for params in tpl[1]:
					splitted = params.split("=")
					key = splitted[0].strip()
					value= splitted[1].strip()
					if key in translated_params:
						value = u"%s %s" % (unit[translated_params[key]], value) \
							if unit.has_key(translated_params[key]) else value
					elif key in skip_params:
						#unused keys
						file.write("[%s] [[%s]] skipped key: %s, value: %s\n" % (
							time.strftime("%H:%M:%S"), page.title(), key, value)
						)
					elif key and not value:
						value = key; key =""
						file.write("[%s] [[%s]] argument: %s\n" % (
							time.strftime("%H:%M:%S"), page.title(), key, value)
						)
					else:
						#unknown field
						file.write("[%s] [[%s]] unknown key: %s, value: %s\n" %(
							time.strftime("%H:%M:%S"), page.title(), key, value)
						)
					tpl_ptn += u"%s%s" % (to_regexp(params.strip(), ""), "\s*" if len(tpl[1])==i else "\s*\|\s*")
					tpl_dict[key]=value
					i+=1
				tpl_ptn += "\}\}"

				#check that the minimum params are all included.
				included=[]; excluded=[]; extra=[]
				for key in tpl_dict:
					if key in templates[template]['least']:
						included.append(key)
					elif key in templates[template]['skip']:
						pass
					else:
						extra.append(key)
				excluded = list(templates[template]['least'])
				for key in included:
					excluded.remove(key)
				if len(excluded)>=1:
					file.write(
						"[%s] [[%s]] missing keys: %s\n" % (
							time.strftime("%H:%M:%S"),
							page.title(), ",".join(excluded)
						)
					)

				#create the new cite template sorting fields.
				cite = u"{{%s|" % target
				for key in templates[template]["sorting"]:
					if key in tpl_dict:
						cite += u"%s=%s|" % (translated_params[key], tpl_dict[key])
				cite = cite[:-1] + "}}"

				#replacing the old cite template with the new one
				new_text = re.sub(tpl_ptn, cite, new_text)

		if args.verbose and old_text!=new_text:
			pywikilib.showDiff(old_text, new_text)

		if args.edit:
			page.put(new_text, u"Bot: traduïnt {{%s}} a {{%s}}" %(source, target))
		file.flush()
	file.close()

def main():
	if args.async:
		pywikilib.async_put()
	try:
		if args.cites:
			cites(u"publicació")
		elif args.linkFGA:
			link_FGA()

	except KeyboardInterrupt:
		print "cancelled by user"

	chrono = timedelta(datetime.now()-datetime.fromtimestamp(init_ts))
	line = "%s%s%s%s" %(
		"%i d" % chrono[0] if chrono[0] else "",
		" %i h" % chrono[1] if chrono[1] else "",
		" %i m" %chrono[2] if chrono[2] else "",
		" %i s" %chrono[3] if chrono[3] else "",
	)
	print line.strip()

if __name__ == '__main__':
	parser = argparse.ArgumentParser()
	parser.add_argument("--async", "-A", action="store_true", default=False)
	parser.add_argument("--cites", "-c", action="store_true", default=False)
	parser.add_argument("--edit", "-E", action="store_true", default=False)
	parser.add_argument("--limit","-L", type=int)
	parser.add_argument("--linkFGA","-l", action="store_true", default=False)
	parser.add_argument("--page", "-p")
	parser.add_argument("--pages", "-P", nargs="+")
	parser.add_argument("--test", "-T", action="store_true", default=False)
	parser.add_argument("--verbose", "-v", action="store_true", default=False)
	args = parser.parse_args()
	init_ts = time.time()
	main()
	pywikilib.stopme()