Usuari:TronaBot/Python/treulallengua.py
< Usuari:TronaBot | Python
#!/usr/bin/python2.7
#-*- coding:utf8 -*-
#
u"""
treulallengua.py
Anem a treure les plantilles en anglés i posar les nostres.
* Link GA → Enllaç AB
* Link FA → Enllaç AD
* Link FL → Enllaç AD
Del francés
* Lien BA → Enllaç AB
Altres traduccions:
* Cite journal → Citar publicació
*
"""
import argparse, codecs as cs, os, re, sys, time
from datetime import datetime
path = os.path.join(os.path.split(os.getcwd())[:-1][0])
sys.path.append(path)
import query as api, wikipedia as pywikilib, pagegenerators as pg
def timedelta(td):
#get the timedelta obejct and returns also hours, minutes and seconds
#by accessing to .seconds atribute.
hours, remainder = divmod(td.seconds, 3600)
minutes, seconds = divmod(remainder, 60)
return td.days, hours, minutes, seconds
def to_regexp(s, sep=" *", sensitive=True):
#prepare a wikititle (with sensitive capital) to a valid regexp
for ch in ".?*+[](){}":
s = s.replace(ch, r"\%s" % ch)
if sensitive:
c = s[0]
s = u"%s%s%s%s" % (sep, c.replace(c, "[%s%s]" %(c.upper(), c.lower())), s[1:], sep)
else:
s = u"%s%s%s" % (sep, s, sep)
return s
def get_refferred_page(page):
site=pywikilib.getSite()
refpage = pywikilib.Page(site, page)
allpages = pg.ReferringPageGenerator(refpage, onlyTemplateInclusion=True)
articles = pg.NamespaceFilterPageGenerator(allpages, 0)
return articles #pg.PreloadingGenerator(articles, pageNumber = 50)
def link_FGA():
#ensure links FA/GA aren't duplicated
#translate to local template
#sort the template
#relocate after the first category
i=0
local_linkGA = ("Link GA", "Lien BA", u"Enllaç AB")
for template in ("Link GA", "Link FA", "Lien BA", u"Enllaç AB", u"Enllaç AD"):
if args.limit and i>=args.limit:break
for page in get_refferred_page("Template:%s" % template):
if args.page and page.title() != args.page: continue
if args.pages and page.title() not in args.pages: contine
if args.limit and i>=args.limit:break
i+=1
new_text = old_text = page.get()
#keep all link template
links = re.findall(ur"(\{\{ *(?:[Ll]ink[ _][FG]A|[Ll]ien[ _]BA|[Ee]nllaç[ _]A[BD]) *\|[^\}]+?\}\}\s*)" , old_text, re.S)
j=0
for tpl in list(links):
m = re.search(ur"\{\{ *(?P<tpl>Link[ _][FG]A|Lien[ _]BA|Enllaç[ _]A[BD]) *\|(?P<lang>[^}]+)\}\}", tpl)
if m:
feat_good = "AB" if m.group("tpl") in local_linkGA else "AD"
tpl = u"{{Enllaç %s|%s}}" %(feat_good, m.group("lang"))
links[j]=tpl
j+=1
links=set(links)
#remove all link templates
new_text = re.sub(ur"\{\{ *(?:[Ll]ink[ _][FG]A|[Ll]ien[ _]BA|[Ee]nllaç[ _]A[BD]) *\|[^\}]+?\}\}\s*" , "", old_text, re.S)
#prepare new links
linksAB = [link for link in links if "AB" in link]
linksAB = "\n".join(sorted(linksAB))
if linksAB: linksAB = "\n%s" % linksAB
linksAD = [link for link in links if "AD" in link]
linksAD = "\n".join(sorted(linksAD))
if linksAD: linksAD = "\n%s" % linksAD
links = "%s%s" % (linksAB, linksAD)
if links: links = "%s\n" % links
#insert new links
text_before = new_text.splitlines()
line_before = text_before[-1]
if not line_before.strip() or re.search("^\[\[ *(?:[Cc]ategor(?:y|ia)|[a-z\-]{2,}) *:", line_before):
j = -1
for line in text_before:
j-=1
line_before = text_before[j]
if line_before.strip() and not re.search("^\[\[ *(?:[Cc]ategor(?:y|ia)|[a-z\-]{2,}) *:", line_before):
break
newline_char_before = newline_char_after = ""
if not line_before.startswith("{{") and not line_before.endswith("}}"):
newline_char_before += "\n"
else: newline_char_before += ""
new_text = new_text.replace(line_before, "%s%s%s" % (line_before, newline_char_before, links), 1)
if args.verbose and old_text != new_text:
pywikilib.output(u"\n\n[%s] %i [[%s]]" % (time.strftime("%H:%M:%S"), i, page.title()))
pywikilib.output(u"LINE BEFORE: %s" % (line_before,))
pywikilib.showDiff(old_text,new_text)
if args.edit and old_text != new_text:
summary = u"Bot: s'ha ordenat i traduït les plantilles d'enllaços d'articles bons i articles destacats."
if len(old_text)-len(new_text)<=2:
summary = u"Bot: canvis estètics en relació a les plantilles d'enllaços d'articles bons i articles destacats"
page.put(new_text, summary)
templates = {
#cite journal → citar publicació
u"publicació":{
"pair": ("cite journal", u"citar publicació"),
"tranlations" : {
"author": "autor",
"last": r"cognom",
"last1": r"cognom",
"first": "nom",
"first1": "nom",
"first2": "nom2",
"last2": r"cognom2",
"first3": "nom3",
"last3": r"cognom3",
"first4": "nom4",
"last4": r"cognom4",
"authorlink": u"enllaçautor",
"authorlink2": u"enllaçautor2",
"authorlink3": u"enllaçautor3",
"authorlink4": u"enllaçautor4",
"coauthors": "coautors",
"date": "data",
"year": "any",
"month": "mes",
"url": "url",
"title": "article",
"publisher": "editorial",
"format": "format",
"location": "lloc",
"quote": u"citació",
"language": "llengua",
"issn": "issn",
"journal": u"publicació",
"volume": "volum",
"issue": "exemplar",
"pages": u"pàgines",
"doi": "doi",
"accessdate": "consulta",
"ref":"ref"
},
"sorting": (
"author", "last", "last1", "first", "first1", "authorlink",
"first2", "last2", "authorlink2", "first3", "last3", "authorlink3",
"first4", "last4", "authorlink4", "coauthors", "date", "year",
"month", "url", "title", "publisher", "format", "location", "quote",
"language", "issn", "journal", "volume", "exemplar", "doi",
"accessdate", "ref"
),
#params that aren't used in this language.
"skip": (
"editor-last","editor-first","editor-link","year","month",
"trans_title","series","type","arxiv","oclc","pmid","pmc",
"bibcode","archiveurl","archivedate","laysource","laysummary",
"laydate","separator","postscript"
),
#the minimum params needed
"least": (
"cognom", "nom", "article", u"publicació",
"volum", "data", u"pàgines"
),
"unit": {
"volum": "vol.",
u"pàgines": u"pàg."
}
},
#cite book → citar llibre
u"llibre":{
"pair": ("cite journal", u"citar publicació"),
"tranlations" : {
#author
"author":"autor", "author1":u"autor", "authorlink":u"enllaçautor",
"authorlink1":u"enllaçautor", "coauthors":"coautors",
"coauthor":"coautors", "first":"nom", "first1":"nom",
"given":"nom", "last1":"cognom", "last":"cognom", "surname":"cognom",
#author2
"first2":"nom2", "given2":"nom2", "last2":"cognom2",
"surname2":"cognom2", "authorlink2":u"enllaçautor2",
#author3
"first3":"nom3", "given3":"nom3", "last3":"cognom3",
"surname3":"cognom3", "authorlink3":u"enllaçautor3",
#author4
"first4":"nom4", "given4":"nom4", "last4":"cognom4",
"surname4":"cognom4",
#autho5
"first5":"nom5", "given5":"nom5", "last5":"cognom5",
"surname5":"cognom5",
#author6
"first6":"nom6", "given6":"nom6", "last6":"cognom6",
"surname6":"cognom6",
#coauthors
"coauthors":"coautors", "others":"altres",
#pointing
"chapter": u"capítol", "chapterurl": u"urlcapítol",
"title": u"títol", "volume":"volum", "edition":u"edició",
"series": u"col·lecció", "serie": u"col·lecció",
"page": u"pàgina", "pages": u"pàgines", "quote": u"citació",
#lang, publisher, loc
"language":"llengua", "publisher":"editorial",
"location":"lloc", "place":"lloc",
#date
"date":"data", "year":"any", "month":"mes",
#archive date
"origdate": "dataarxiu", "origyear":"anyarxiu",
"origmonth": "mesarxiu", "archiveurl": "url",
"archivedate":u"dataarxiu",
#url access data
"accessdate": "consulta", "accessyear": u"anyaccés",
"accessmonth": u"mesaccés",
},
"sorting": (
"author", "last", "last1", "first", "first1", "authorlink",
"first2", "last2", "authorlink2", "first3", "last3", "authorlink3",
"first4", "last4", "authorlink4", "coauthors", "date", "year",
"month", "url", "title", "publisher", "format", "location", "quote",
"language", "issn", "journal", "volume", "exemplar", "doi",
"accessdate", "ref"
),
#params that aren't used in this language.
"skip": (
"editor1-link", "trans_title", "type", "at", "trans_chapter", "bibcode",
"laysummary", "laydate", "author-mask", "author-name-separator",
"author-separator", "display-authors", "separator", "postscript",
"lastauthoramp", "origdate", "origyear", "nopp", "editor2-first",
"editor2-last", "editor3-first", "editor3-last", "editor4-first",
"editor4-last", "editor5-first", "editor5-last", "editor6-first",
"editor6-last", "editor7-first", "editor7-last", "editor8-first",
"editor8-last"
),
#the minimum params needed
"least": (
"nom", "cognom", u"títol", "editorial", "lloc",
"data", u"pàgines", "isbn", "ref"
),
"unit": {
"volum": "vol.",
u"pàgines": u"pàg."
}
}
}
def cites(template):
file = cs.open("logs/transtemplate-%s.log" % template, "a", "utf-8")
source = templates[template]["pair"][0]
target = templates[template]["pair"][1]
translated_params = templates[template]['tranlations']
skip_params = templates[template]['skip']
unit = templates[template]['unit']
i=0
source_re = re.compile(to_regexp(source))
for page in get_refferred_page(u"Template:%s" % source):
pywikilib.output(
u"[%s] %i [[%s]]" % (
time.strftime("%H:%M:%S"), i, page.title()
)
)
if page.namespace() != 0:continue
i+=1
old_text = new_text = page.get(get_redirect=True)
for tpl in page.templatesWithParams():
tpl_dict = {}
tpl_ptn = u"(?s)\{\{%s\|\s*" % to_regexp(tpl[0],"\s*")
#building a regexp pattern for the cite template
if source_re.search(tpl[0]):
i=1
for params in tpl[1]:
splitted = params.split("=")
key = splitted[0].strip()
value= splitted[1].strip()
if key in translated_params:
value = u"%s %s" % (unit[translated_params[key]], value) \
if unit.has_key(translated_params[key]) else value
elif key in skip_params:
#unused keys
file.write("[%s] [[%s]] skipped key: %s, value: %s\n" % (
time.strftime("%H:%M:%S"), page.title(), key, value)
)
elif key and not value:
value = key; key =""
file.write("[%s] [[%s]] argument: %s\n" % (
time.strftime("%H:%M:%S"), page.title(), key, value)
)
else:
#unknown field
file.write("[%s] [[%s]] unknown key: %s, value: %s\n" %(
time.strftime("%H:%M:%S"), page.title(), key, value)
)
tpl_ptn += u"%s%s" % (to_regexp(params.strip(), ""), "\s*" if len(tpl[1])==i else "\s*\|\s*")
tpl_dict[key]=value
i+=1
tpl_ptn += "\}\}"
#check that the minimum params are all included.
included=[]; excluded=[]; extra=[]
for key in tpl_dict:
if key in templates[template]['least']:
included.append(key)
elif key in templates[template]['skip']:
pass
else:
extra.append(key)
excluded = list(templates[template]['least'])
for key in included:
excluded.remove(key)
if len(excluded)>=1:
file.write(
"[%s] [[%s]] missing keys: %s\n" % (
time.strftime("%H:%M:%S"),
page.title(), ",".join(excluded)
)
)
#create the new cite template sorting fields.
cite = u"{{%s|" % target
for key in templates[template]["sorting"]:
if key in tpl_dict:
cite += u"%s=%s|" % (translated_params[key], tpl_dict[key])
cite = cite[:-1] + "}}"
#replacing the old cite template with the new one
new_text = re.sub(tpl_ptn, cite, new_text)
if args.verbose and old_text!=new_text:
pywikilib.showDiff(old_text, new_text)
if args.edit:
page.put(new_text, u"Bot: traduïnt {{%s}} a {{%s}}" %(source, target))
file.flush()
file.close()
def main():
if args.async:
pywikilib.async_put()
try:
if args.cites:
cites(u"publicació")
elif args.linkFGA:
link_FGA()
except KeyboardInterrupt:
print "cancelled by user"
chrono = timedelta(datetime.now()-datetime.fromtimestamp(init_ts))
line = "%s%s%s%s" %(
"%i d" % chrono[0] if chrono[0] else "",
" %i h" % chrono[1] if chrono[1] else "",
" %i m" %chrono[2] if chrono[2] else "",
" %i s" %chrono[3] if chrono[3] else "",
)
print line.strip()
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("--async", "-A", action="store_true", default=False)
parser.add_argument("--cites", "-c", action="store_true", default=False)
parser.add_argument("--edit", "-E", action="store_true", default=False)
parser.add_argument("--limit","-L", type=int)
parser.add_argument("--linkFGA","-l", action="store_true", default=False)
parser.add_argument("--page", "-p")
parser.add_argument("--pages", "-P", nargs="+")
parser.add_argument("--test", "-T", action="store_true", default=False)
parser.add_argument("--verbose", "-v", action="store_true", default=False)
args = parser.parse_args()
init_ts = time.time()
main()
pywikilib.stopme()