Usuari:TronaBot/Python/ortobot.py
< Usuari:TronaBot | Python
Mòdul article requerix el fitxer Usuari:TronaBot/Python/common.py.
# -*- coding: utf-8 -*-
import sys, os, time, random, re
from platform import system as platfsys
import webbrowser
from platform import system as platfsys
on_win = platfsys().lower() == "windows"
home = on_win and r"E:\\iShare\SugarSync\My Python scripts" \
or "/home/pasqual/public_html/"
for folder in ("pywikilib", "pyuserlib"):
sys.path.append(os.path.join(home, folder))
#pywikilib
import wikipedia as pywikilib, pagegenerators as pg
from replace import ReplaceRobot
from pywikibot import i18n
import editarticle
# Imports predefined replacements tasks from fixes.py
import fixes
#user implementations
from common import ArgumentHandler, File
from common import format_string, get_diffs
class Replacing(ReplaceRobot):
def __init__(
self, generator, replacements, exceptions={}, acceptall=False,
allowoverlap=False, recursive=False, addedCat=None, sleep=None,
editSummary='', articles=None, exctitles=None, categories=None
):
self.generator = generator
self.replacements = replacements
self.exceptions = exceptions
self.acceptall = acceptall
self.allowoverlap = allowoverlap
self.recursive = recursive
if addedCat:
site = pywikibot.getSite()
self.addedCat = pywikibot.Page(site, addedCat, defaultNamespace=14)
self.sleep = sleep
# Some function to set default editSummary should probably be added
self.edit_summary = editSummary
self.articles = articles
self.exctitles = exctitles
# An edit counter to split the file by 100 titles if -save or -savenew
# is on, and to display the number of edited articles otherwise.
self.editcounter = 0
# A counter for saved exceptions
self.exceptcounter = 0
ReplaceRobot.__init__(
self, generator, replacements, exceptions, acceptall, allowoverlap,
recursive, addedCat, sleep, editSummary, articles, exctitles
)
self.categories = categories
self.counter = {}
def doReplacements(self, original_text):
"""
Returns the text which is generated by applying all replacements to
the given text.
"""
new_text = old_text = original_text
exceptions = []
if "inside-tags" in self.exceptions:
exceptions += self.exceptions['inside-tags']
if "inside" in self.exceptions:
exceptions += self.exceptions['inside']
#reinitializing variables
for r in self.replacements:
self.counter[r[-1]]=0
for old, new, repl in self.replacements:
if isinstance(new, list): new = random.choice(new)
if self.sleep is not None:
time.sleep(self.sleep)
new_text = pywikilib.replaceExcept(
new_text, old, new, exceptions, allowoverlap=self.allowoverlap
)
if "<:de:>" in new_text:
tpls = re.findall(ur"(<:de:> )(\w+)", new_text, re.U)
for tpl in tpls:
prep = tpl[0]
word = tpl[1]
if re.match("[aeiouàèéíòóúh]", word):
new_text=new_text.replace(u"<:de:> %s" % word,"d'%s" %word)
else:
new_text=new_text.replace(u"<:de:> %s" % word,"de %s" %word)
if new_text != old_text:
added, removed, kept = get_diffs(new_text, old_text)
self.counter[repl]+=len(added)
old_text = new_text
if new_text != original_text:
replacements = sorted(reversed([(c, r) for r, c in self.counter.items() if c>0]))
replacements = ["%s (%i)" % (self.categories.get(r,r),c) for c,r in replacements]
replacements = " i ".join(c for c in [", ".join(replacements[:-1]), replacements[-1]] if c)
self.editSummary = format_string("$1 $2", self.edit_summary, replacements)
msg = format_string("&ysummary: $1", self.editSummary)
pywikilib.output(msg)
return new_text
def run(self):
"""
Starts the robot.
"""
# Run the generator which will yield Pages which might need to be
# changed.
for page in self.generator:
if self.isTitleExcepted(page.title()):
pywikilib.output(
u'Skipping %s because the title is on the exceptions list.'
% page.title(asLink=True))
continue
#--això
try:File("lastpage", "ortobot").backup(page.title())
except:pass
try:
# Load the page's text from the wiki
original_text = page.get(get_redirect=True)
if not (self.articles or page.canBeEdited()):
pywikilib.output(u"You can't edit page %s"
% page.title(asLink=True))
continue
except pywikilib.NoPage:
pywikilib.output(u'Page %s not found' % page.title(asLink=True))
continue
new_text = original_text
while True:
if self.isTextExcepted(new_text):
pywikilib.output(
u'Skipping %s because it contains text that is on the exceptions list.'
% page.title(asLink=True))
break
new_text = self.doReplacements(new_text)
if new_text == original_text:
pywikilib.output(u'No changes were necessary in %s'
% page.title(asLink=True))
break
if self.recursive:
newest_text = self.doReplacements(new_text)
while (newest_text!=new_text):
new_text = newest_text
newest_text = self.doReplacements(new_text)
if hasattr(self, "addedCat"):
cats = page.categories()
if self.addedCat not in cats:
cats.append(self.addedCat)
new_text = pywikilib.replaceCategoryLinks(new_text,
cats)
# Show the title of the page we're working on.
# Highlight the title in purple.
pywikilib.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<"
% page.title())
pywikilib.showDiff(original_text, new_text)
if self.acceptall:
break
if self.exctitles:
choice = pywikilib.inputChoice(
u'Do you want to accept these changes?',
['Yes', 'No', 'no+eXcept', 'Edit',
'open in Browser', 'All', 'Quit'],
['y', 'N', 'x', 'e', 'b', 'a', 'q'], 'N')
else:
choice = pywikilib.inputChoice(
u'Do you want to accept these changes?',
['Yes', 'No', 'Edit', 'open in Browser', 'All',
'Quit'],
['y', 'N', 'e', 'b', 'a', 'q'], 'N')
if choice == 'e':
editor = editarticle.TextEditor()
as_edited = editor.edit(original_text)
# if user didn't press Cancel
if as_edited and as_edited != new_text:
new_text = as_edited
continue
if choice == 'b':
webbrowser.open("http://%s%s" % (
page.site.hostname(),
page.site.nice_get_address(page.title())
))
i18n.input('pywikilib-enter-finished-browser')
try:
original_text = page.get(get_redirect=True, force=True)
except pywikilib.NoPage:
pywikilib.output(u'Page %s has been deleted.'
% page.title())
break
new_text = original_text
continue
if choice == 'q':
self.writeEditCounter()
self.writeExceptCounter()
return
if choice == 'a':
self.acceptall = True
if choice == 'x': #May happen only if self.exctitles isn't None
self.exctitles.write(
u"ur'^%s$',\n" % re.escape(page.title()))
self.exctitles.flush()
self.exceptcounter += 1
if choice == 'y':
if not self.articles:
# Primary behaviour: working on wiki
page.put_async(new_text, self.editSummary)
self.editcounter += 1
# Bug: this increments even if put_async fails
# This is separately in two clauses of if for
# future purposes to get feedback form put_async
else:
#Save the title for later processing instead of editing
self.editcounter += 1
self.articles.write(u'#%s\n%s'
% (page.title(asLink=True, textlink=True),
self.splitLine()))
self.articles.flush() # For the peace of our soul :-)
# choice must be 'N'
break
if self.acceptall and new_text != original_text:
if not self.articles:
#Primary behaviour: working on wiki
try:
page.put(new_text, self.editSummary)
self.editcounter += 1 #increment only on success
except pywikilib.EditConflict:
pywikilib.output(u'Skipping %s because of edit conflict'
% (page.title(),))
except pywikilib.SpamfilterError, e:
pywikilib.output(
u'Cannot change %s because of blacklist entry %s'
% (page.title(), e.url))
except pywikilib.PageNotSaved, error:
pywikilib.error(u'putting page: %s'
% (error.args,))
except pywikilib.LockedPage:
pywikilib.output(u'Skipping %s (locked page)'
% (page.title(),))
else:
#Save the title for later processing instead of editing
self.editcounter += 1
self.articles.write(u'#%s\n%s'
% (page.title(asLink=True, textlink=True),
self.splitLine()))
self.articles.flush()
#Finally:
self.writeEditCounter()
self.writeExceptCounter()
def main():
add_cat = None
gen = None
# summary message
summary_commandline = False
# Array which will collect commandline parameters.
# First element is original text, second element is replacement text.
commandline_replacements = []
# A list of 2-tuples of original text and replacement text.
replacements = []
# Don't edit pages which contain certain texts.
exceptions = {
'title': [],
'text-contains': [],
'inside': [],
'inside-tags': [],
'require-title': [], # using a seperate requirements dict needs some
} # major refactoring of code.
# Should the elements of 'replacements' and 'exceptions' be interpreted
# as regular expressions?
regex = False
# Predefined fixes from dictionary 'fixes' (see above).
fix = None
# the dump's path, either absolute or relative, which will be used
# if -xml flag is present
xmlFilename = None
useSql = False
PageTitles = []
# will become True when the user presses a ('yes to all') or uses the
# -always flag.
acceptall = False
# Will become True if the user inputs the commandline parameter -nocase
caseInsensitive = False
# Will become True if the user inputs the commandline parameter -dotall
dotall = False
# Will become True if the user inputs the commandline parameter -multiline
multiline = False
# Do all hits when they overlap
allowoverlap = False
# Do not recurse replacement
recursive = False
# This is the maximum number of pages to load per query
maxquerysize = 60
# This factory is responsible for processing command line arguments
# that are also used by other scripts and that determine on which pages
# to work on.
genFactory = pg.GeneratorFactory()
# Load default summary message.
# BUG WARNING: This is probably incompatible with the -lang parameter.
editSummary = i18n.twtranslate(pywikilib.getSite(), 'replace-replacing',
{'description': u''})
# Between a regex and another (using -fix) sleep some time (not to waste
# too much CPU
sleep = None
# Do not save the page titles, rather work on wiki
filename = None # The name of the file to save titles
titlefile = None # The file object itself
# If we save, primary behaviour is append rather then new file
append = True
# Default: don't write titles to exception file and don't read them.
excoutfilename = None # The name of the file to save exceptions
excoutfile = None # The file object itself
# excinfilename: reserved for later use (reading back exceptions)
# If we save exceptions, primary behaviour is append
excappend = True
#abredged name for the summary specification --això
categories = None
# Read commandline parameters.
if args.regex:
regex = True
if args.xmlstart:
if isinstance(args.xmlstart, bool):
xmlStart = pywikilib.input(
u'Please enter the dumped article to start with:')
else:
xmlStart = args.xmlstart
if args.xml:
if isinstance(args.xml, bool):
xmlFilename = i18n.input('pywikibot-enter-xml-filename')
else:
xmlFilename = args.xml
if args.sql:
useSql = True
if args.page:
PageTitles = args.page if isinstance(args.page, list) else [args.page]
for page in list(PageTitles):
if isinstance(page, bool):
PageTitles.remove(page)
PageTitles.append(pywikilib.input(
u'Which page do you want to change?'))
if args.saveexcnew:
excappend = False
if isinstance(args.saveexcnew, bool):
excoutfilename = pywikilib.input(
u'Please enter the filename to save the excepted titles' +
u'\n(will be deleted if exists):')
else:
excoutfilename = args.saveexcnew
if args.saveexc:
if isinstance(args.saveexc, bool):
excoutfilename = pywikilib.input(
u'Please enter the filename to save the excepted titles:')
else:
excoutfilename = args.saveexc
if args.savenew:
append = False
if isinstance(args.savenew, bool):
filename = pywikilib.input(
u'Please enter the filename to save the titles' +
u'\n(will be deleted if exists):')
else:
filename = args.savenew
if args.save:
if isinstance(args.save, bool):
filename = pywikilib.input(
u'Please enter the filename to save the titles:')
else:
filename = args.save
if args.replacementfile:
if isinstance(args.replacementfile, bool):
replacefile = pywikilib.input(
u'Please enter the filename to read replacements from:')
else:
replacefile = args.replacementfile
try:
commandline_replacements.extend(
[x.lstrip(u'\uFEFF').rstrip('\r\n')
for x in codecs.open(replacefile, 'r', 'utf-8')])
except IOError:
raise pywikilib.Error(
'\n%s cannot be opened. Try again :-)' % replacefile)
if args.excepttitle:
exceptions['title'] = args.excepttitle
if args.requiretitle:
exceptions['require-title'] = args.requiretitle
if args.excepttext:
exceptions['text-contains'] = args.excepttext
if args.exceptinside:
exceptions['inside'] = args.exceptinside
if args.exceptinsidetag:
exceptions['inside-tags'] = exceptinsidetag
if args.fix:
fix = args.fix
if args.sleep:
sleep = args.sleep
if args.always:
acceptall = True
if args.recursive:
recursive = True
if args.nocase:
caseInsensitive = True
if args.dotall:
dotall = True
if args.multiline:
multiline = True
if args.addcat:
add_cat = args.addcat
if args.summary:
editSummary = args.summary
summary_commandline = True
if args.allowoverlap:
allowoverlap = True
if args.query:
maxquerysize = args.query
for arg in args.raw:
if not genFactory.handleArg(arg) and not arg.startswith("-"):
commandline_replacements.append(arg)
#acaba antic for arg in ...
if pywikilib.verbose:
pywikilib.output(u"commandline_replacements: " +
', '.join(commandline_replacements))
if (len(commandline_replacements) % 2):
raise pywikilib.Error, 'require even number of replacements.'
elif (len(commandline_replacements) == 2 and fix is None):
replacements.append(
(commandline_replacements[0], commandline_replacements[1])
)
if not summary_commandline:
editSummary = i18n.twtranslate(
pywikilib.getSite(),
'replace-replacing',
{
'description': ' (-%s +%s)'% (
commandline_replacements[0],
commandline_replacements[1]
)
}
)
elif (len(commandline_replacements) > 1):
if (fix is None):
for i in xrange (0, len(commandline_replacements), 2):
replacements.append((commandline_replacements[i],
commandline_replacements[i + 1]))
if not summary_commandline:
pairs = [
(
commandline_replacements[i],
commandline_replacements[i + 1]
) for i in range(0, len(commandline_replacements), 2)
]
replacementsDescription = '(%s)' % ', '.join(
[('-' + pair[0] + ' +' + pair[1]) for pair in pairs]
)
editSummary = i18n.twtranslate(
pywikilib.getSite(),
'replace-replacing',
{
'description': replacementsDescription
}
)
else:
raise pywikilib.Error(
'Specifying -fix with replacements is undefined'
)
elif fix is None:
old = pywikilib.input(u'Please enter the text that should be replaced:')
new = pywikilib.input(u'Please enter the new text:')
change = '(-' + old + ' +' + new
replacements.append((old, new))
while True:
old = pywikilib.input(
u'Please enter another text that should be replaced,' +
u'\nor press Enter to start:')
if old == '':
change += ')'
break
new = i18n.input('pywikibot-enter-new-text')
change += ' & -%s +%s' % (old, new)
replacements.append((old, new))
if not summary_commandline:
default_summary_message = i18n.twtranslate(pywikilib.getSite(),
'replace-replacing',
{'description': change})
pywikilib.output(u'The summary message will default to: %s'
% default_summary_message)
summary_message = pywikilib.input(
u'Press Enter to use this default message, or enter a ' +
u'description of the\nchanges your bot will make:')
if summary_message == '':
summary_message = default_summary_message
editSummary = summary_message
else:
# Perform one of the predefined actions.
fixname = fix # Save the name for passing to exceptions function.
try:
fix = fixes.fixes[fix]
except KeyError:
pywikilib.output(u'Available predefined fixes are: %s'
% fixes.fixes.keys())
return
if "regex" in fix:
regex = fix['regex']
if "msg" in fix:
if isinstance(fix['msg'], basestring):
editSummary = i18n.twtranslate(pywikilib.getSite(),
str(fix['msg']))
else:
editSummary = pywikilib.translate(pywikilib.getSite(),
fix['msg'])
#--això
if fix.get("categories"):
categories = fix['categories']
if "exceptions" in fix:
exceptions = fix['exceptions']
# Try to append common extensions for multiple fixes.
# It must be either a dictionary or a function that returns a dict.
if 'include' in exceptions:
incl = exceptions['include']
if callable(incl):
baseExcDict = incl(fixname)
else:
try:
baseExcDict = incl
except NameError:
pywikilib.output(
u'\nIncluded exceptions dictionary does not exist.' +
u' Continuing with the exceptions\ngiven in fix.\n')
baseExcDict = None
if baseExcDict:
for l in baseExcDict:
try:
exceptions[l].extend(baseExcDict[l])
except KeyError:
exceptions[l] = baseExcDict[l]
if "recursive" in fix:
recursive = fix['recursive']
if "nocase" in fix:
caseInsensitive = fix['nocase']
try:
replacements = fix['replacements']
# enable regex/replacements as a dictionary for different langs
if isinstance(replacements, dict):
replacements = replacements[pywikilib.getSite().lang]
except KeyError:
pywikilib.output(
u"No replacements given in fix.")
return
# Set the regular expression flags
flags = re.UNICODE
if caseInsensitive:
flags = flags | re.IGNORECASE
if dotall:
flags = flags | re.DOTALL
if multiline:
flags = flags | re.MULTILINE
# Pre-compile all regular expressions here to save time later
for i in range(len(replacements)):
old, new, topic = replacements[i]
if not regex:
old = re.escape(old)
oldR = re.compile(old, flags)
replacements[i] = oldR, new, topic
for exceptionCategory in [
'title', 'require-title', 'text-contains', 'inside']:
if exceptionCategory in exceptions:
patterns = exceptions[exceptionCategory]
if not regex:
patterns = [re.escape(pattern) for pattern in patterns]
patterns = [re.compile(pattern, flags) for pattern in patterns]
exceptions[exceptionCategory] = patterns
if xmlFilename:
try:
xmlStart
except NameError:
xmlStart = None
gen = XmlDumpReplacePageGenerator(xmlFilename, xmlStart,
replacements, exceptions)
elif useSql:
whereClause = 'WHERE (%s)' % ' OR '.join(
["old_text RLIKE '%s'" % prepareRegexForMySQL(old.pattern)
for (old, new) in replacements])
if exceptions:
exceptClause = 'AND NOT (%s)' % ' OR '.join(
["old_text RLIKE '%s'" % prepareRegexForMySQL(exc.pattern)
for exc in exceptions])
else:
exceptClause = ''
query = u"""
SELECT page_namespace, page_title
FROM page
JOIN text ON (page_id = old_id)
%s
%s
LIMIT 200""" % (whereClause, exceptClause)
gen = pg.MySQLPageGenerator(query)
elif PageTitles:
pages = [pywikilib.Page(pywikilib.getSite(), PageTitle)
for PageTitle in PageTitles]
gen = iter(pages)
gen = genFactory.getCombinedGenerator(gen)
if not gen:
# syntax error, show help text from the top of this file
pywikilib.showHelp('replace')
return
preloadingGen = pg.PreloadingGenerator(gen, pageNumber=maxquerysize)
# Finally we open the file for page titles or set parameter article to None
if filename:
try:
# This opens in strict error mode, that means bot will stop
# on encoding errors with ValueError.
# See http://docs.python.org/library/codecs.html#codecs.open
titlefile = codecs.open(filename, encoding='utf-8',
mode=(lambda x: x and 'a' or 'w')(append))
except IOError:
pywikilib.output("%s cannot be opened for writing." %
filename)
return
# The same process with exceptions file:
if excoutfilename:
try:
excoutfile = codecs.open(
excoutfilename, encoding='utf-8',
mode=(lambda x: x and 'a' or 'w')(excappend))
except IOError:
pywikilib.output("%s cannot be opened for writing." %
excoutfilename)
return
bot = Replacing(preloadingGen, replacements, exceptions, acceptall,
allowoverlap, recursive, add_cat, sleep, editSummary,
titlefile, excoutfile, categories)
try:
bot.run()
finally:
# Just for the spirit of programming (they were flushed)
if titlefile:
titlefile.close()
if excoutfile:
excoutfile.close()
if __name__ == '__main__':
try:
#pywikilib.verbose = True
args = ArgumentHandler()
args.parse_arguments()
if args.resume:
lvp = File("lastpage", "ortobot").load()
pywikilib.output(u'last article: "%s"' % lvp)
line = (
u'-fix:auto -family:wikipedia_o -recursive -always '
u'-query:125 -sleep:1 -start:"%s"' % lvp
)
args.parse_arguments(line)
main()
except KeyboardInterrupt:
msg = format_string("\n&r(:cancel·lat per l'usuari:)")
pywikilib.output(msg)
finally:
pywikilib.stopme()