Usuari:TronaBot/Python/ortobot.py

Mòdul article requerix el fitxer Usuari:TronaBot/Python/common.py.
# -*- coding: utf-8 -*-

import sys, os, time, random, re
from platform import system as platfsys
import webbrowser
from platform import system as platfsys
on_win = platfsys().lower() == "windows"
home = on_win and r"E:\\iShare\SugarSync\My Python scripts" \
	   or "/home/pasqual/public_html/"
for folder in ("pywikilib", "pyuserlib"):
	sys.path.append(os.path.join(home, folder))

#pywikilib
import wikipedia as pywikilib, pagegenerators as pg
from replace import ReplaceRobot
from pywikibot import i18n
import editarticle

# Imports predefined replacements tasks from fixes.py
import fixes

#user implementations
from common import ArgumentHandler, File
from common import format_string, get_diffs

class Replacing(ReplaceRobot):
	def __init__(
			self, generator, replacements, exceptions={}, acceptall=False,
			allowoverlap=False, recursive=False, addedCat=None, sleep=None,
			editSummary='', articles=None, exctitles=None, categories=None
		):
		self.generator = generator
		self.replacements = replacements
		self.exceptions = exceptions
		self.acceptall = acceptall
		self.allowoverlap = allowoverlap
		self.recursive = recursive
		if addedCat:
			site = pywikibot.getSite()
			self.addedCat = pywikibot.Page(site, addedCat, defaultNamespace=14)
		self.sleep = sleep
		# Some function to set default editSummary should probably be added
		self.edit_summary = editSummary
		self.articles = articles
		self.exctitles = exctitles

		# An edit counter to split the file by 100 titles if -save or -savenew
		# is on, and to display the number of edited articles otherwise.
		self.editcounter = 0
		# A counter for saved exceptions
		self.exceptcounter = 0
		ReplaceRobot.__init__(
			self, generator, replacements, exceptions, acceptall, allowoverlap,
			recursive, addedCat, sleep, editSummary, articles, exctitles
		)
		self.categories = categories
		self.counter = {}

	def doReplacements(self, original_text):
		"""
		Returns the text which is generated by applying all replacements to
		the given text.
		"""
		new_text = old_text = original_text
		exceptions = []
		if "inside-tags" in self.exceptions:
			exceptions += self.exceptions['inside-tags']
		if "inside" in self.exceptions:
			exceptions += self.exceptions['inside']
		#reinitializing variables
		for r in self.replacements:
			self.counter[r[-1]]=0

		for old, new, repl in self.replacements:
			if isinstance(new, list): new = random.choice(new)
			if self.sleep is not None:
				time.sleep(self.sleep)
			new_text = pywikilib.replaceExcept(
				new_text, old, new, exceptions, allowoverlap=self.allowoverlap
			)
			if "<:de:>" in new_text:
				tpls = re.findall(ur"(<:de:> )(\w+)", new_text, re.U)
				for tpl in tpls:
					prep = tpl[0]
					word = tpl[1]
					if re.match("[aeiouàèéíòóúh]", word):
						new_text=new_text.replace(u"<:de:> %s" % word,"d'%s" %word)
					else:
						new_text=new_text.replace(u"<:de:> %s" % word,"de %s" %word)
			if new_text !=  old_text:
				added, removed, kept = get_diffs(new_text, old_text)
				self.counter[repl]+=len(added)
				old_text = new_text

		if new_text !=  original_text:
			replacements = sorted(reversed([(c, r) for r, c in self.counter.items() if c>0]))
			replacements = ["%s (%i)" % (self.categories.get(r,r),c) for c,r in replacements]
			replacements = " i ".join(c for c in [", ".join(replacements[:-1]), replacements[-1]] if c)
			self.editSummary = format_string("$1 $2", self.edit_summary, replacements)
			msg = format_string("&ysummary: $1", self.editSummary)
			pywikilib.output(msg)

		return new_text

	def run(self):
		"""
		Starts the robot.
		"""
		# Run the generator which will yield Pages which might need to be
		# changed.
		for page in self.generator:
			if self.isTitleExcepted(page.title()):
				pywikilib.output(
					u'Skipping %s because the title is on the exceptions list.'
					% page.title(asLink=True))
				continue
			#--això
			try:File("lastpage", "ortobot").backup(page.title())
			except:pass
			try:
				# Load the page's text from the wiki
				original_text = page.get(get_redirect=True)
				if not (self.articles or page.canBeEdited()):
					pywikilib.output(u"You can't edit page %s"
									 % page.title(asLink=True))
					continue
			except pywikilib.NoPage:
				pywikilib.output(u'Page %s not found' % page.title(asLink=True))
				continue
			new_text = original_text
			while True:
				if self.isTextExcepted(new_text):
					pywikilib.output(
	u'Skipping %s because it contains text that is on the exceptions list.'
									 % page.title(asLink=True))
					break
				new_text = self.doReplacements(new_text)
				if new_text == original_text:
					pywikilib.output(u'No changes were necessary in %s'
									 % page.title(asLink=True))
					break
				if self.recursive:
					newest_text = self.doReplacements(new_text)
					while (newest_text!=new_text):
						new_text = newest_text
						newest_text = self.doReplacements(new_text)
				if hasattr(self, "addedCat"):
					cats = page.categories()
					if self.addedCat not in cats:
						cats.append(self.addedCat)
						new_text = pywikilib.replaceCategoryLinks(new_text,
																  cats)
				# Show the title of the page we're working on.
				# Highlight the title in purple.
				pywikilib.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<"
								 % page.title())
				pywikilib.showDiff(original_text, new_text)
				if self.acceptall:
					break
				if self.exctitles:
					choice = pywikilib.inputChoice(
							u'Do you want to accept these changes?',
							['Yes', 'No', 'no+eXcept', 'Edit',
							 'open in Browser', 'All', 'Quit'],
							['y', 'N', 'x', 'e', 'b', 'a', 'q'], 'N')
				else:
					choice = pywikilib.inputChoice(
							u'Do you want to accept these changes?',
							['Yes', 'No', 'Edit', 'open in Browser', 'All',
							 'Quit'],
							['y', 'N', 'e', 'b', 'a', 'q'], 'N')
				if choice == 'e':
					editor = editarticle.TextEditor()
					as_edited = editor.edit(original_text)
					# if user didn't press Cancel
					if as_edited and as_edited != new_text:
						new_text = as_edited
					continue
				if choice == 'b':
					webbrowser.open("http://%s%s" % (
						page.site.hostname(),
						page.site.nice_get_address(page.title())
					))
					i18n.input('pywikilib-enter-finished-browser')
					try:
						original_text = page.get(get_redirect=True, force=True)
					except pywikilib.NoPage:
						pywikilib.output(u'Page %s has been deleted.'
										 % page.title())
						break
					new_text = original_text
					continue
				if choice == 'q':
					self.writeEditCounter()
					self.writeExceptCounter()
					return
				if choice == 'a':
					self.acceptall = True
				if choice == 'x': #May happen only if self.exctitles isn't None
					self.exctitles.write(
						u"ur'^%s$',\n" % re.escape(page.title()))
					self.exctitles.flush()
					self.exceptcounter += 1
				if choice == 'y':
					if not self.articles:
						# Primary behaviour: working on wiki
						page.put_async(new_text, self.editSummary)
						self.editcounter += 1
						# Bug: this increments even if put_async fails
						# This is separately in two clauses of if for
						# future purposes to get feedback form put_async
					else:
						#Save the title for later processing instead of editing
						self.editcounter += 1
						self.articles.write(u'#%s\n%s'
									% (page.title(asLink=True, textlink=True),
									   self.splitLine()))
						self.articles.flush() # For the peace of our soul :-)
				# choice must be 'N'
				break
			if self.acceptall and new_text != original_text:
				if not self.articles:
					#Primary behaviour: working on wiki
					try:
						page.put(new_text, self.editSummary)
						self.editcounter += 1 #increment only on success
					except pywikilib.EditConflict:
						pywikilib.output(u'Skipping %s because of edit conflict'
										 % (page.title(),))
					except pywikilib.SpamfilterError, e:
						pywikilib.output(
							u'Cannot change %s because of blacklist entry %s'
							% (page.title(), e.url))
					except pywikilib.PageNotSaved, error:
						pywikilib.error(u'putting page: %s'
										% (error.args,))
					except pywikilib.LockedPage:
						pywikilib.output(u'Skipping %s (locked page)'
										 % (page.title(),))
				else:
					#Save the title for later processing instead of editing
					self.editcounter += 1
					self.articles.write(u'#%s\n%s'
									% (page.title(asLink=True, textlink=True),
									   self.splitLine()))
					self.articles.flush()

		#Finally:
		self.writeEditCounter()
		self.writeExceptCounter()

def main():
	add_cat = None
	gen = None
	# summary message
	summary_commandline = False
	# Array which will collect commandline parameters.
	# First element is original text, second element is replacement text.
	commandline_replacements = []
	# A list of 2-tuples of original text and replacement text.
	replacements = []
	# Don't edit pages which contain certain texts.
	exceptions = {
		'title':         [],
		'text-contains': [],
		'inside':        [],
		'inside-tags':   [],
		'require-title': [], # using a seperate requirements dict needs some
	}                        # major refactoring of code.

	# Should the elements of 'replacements' and 'exceptions' be interpreted
	# as regular expressions?
	regex = False
	# Predefined fixes from dictionary 'fixes' (see above).
	fix = None

	# the dump's path, either absolute or relative, which will be used
	# if -xml flag is present
	xmlFilename = None
	useSql = False
	PageTitles = []
	# will become True when the user presses a ('yes to all') or uses the
	# -always flag.
	acceptall = False
	# Will become True if the user inputs the commandline parameter -nocase
	caseInsensitive = False
	# Will become True if the user inputs the commandline parameter -dotall
	dotall = False
	# Will become True if the user inputs the commandline parameter -multiline
	multiline = False
	# Do all hits when they overlap
	allowoverlap = False
	# Do not recurse replacement
	recursive = False
	# This is the maximum number of pages to load per query
	maxquerysize = 60
	# This factory is responsible for processing command line arguments
	# that are also used by other scripts and that determine on which pages
	# to work on.
	genFactory = pg.GeneratorFactory()
	# Load default summary message.
	# BUG WARNING: This is probably incompatible with the -lang parameter.
	editSummary = i18n.twtranslate(pywikilib.getSite(), 'replace-replacing',
								   {'description': u''})
	# Between a regex and another (using -fix) sleep some time (not to waste
	# too much CPU
	sleep = None
	# Do not save the page titles, rather work on wiki
	filename = None # The name of the file to save titles
	titlefile = None # The file object itself
	# If we save, primary behaviour is append rather then new file
	append = True
	# Default: don't write titles to exception file and don't read them.
	excoutfilename = None # The name of the file to save exceptions
	excoutfile = None # The file object itself
	# excinfilename: reserved for later use (reading back exceptions)
	# If we save exceptions, primary behaviour is append
	excappend = True

	#abredged name for the summary specification --això
	categories = None

	# Read commandline parameters.
	if args.regex:
		regex = True
	if args.xmlstart:
		if isinstance(args.xmlstart, bool):
			xmlStart = pywikilib.input(
				u'Please enter the dumped article to start with:')
		else:
			xmlStart = args.xmlstart
	if args.xml:
		if isinstance(args.xml, bool):
			xmlFilename = i18n.input('pywikibot-enter-xml-filename')
		else:
			xmlFilename = args.xml
	if args.sql:
		useSql = True
	if args.page:
		PageTitles = args.page if isinstance(args.page, list) else [args.page]
		for page in list(PageTitles):
			if isinstance(page, bool):
				PageTitles.remove(page)
				PageTitles.append(pywikilib.input(
					u'Which page do you want to change?'))
	if args.saveexcnew:
		excappend = False
		if isinstance(args.saveexcnew, bool):
			excoutfilename = pywikilib.input(
				u'Please enter the filename to save the excepted titles' +
				u'\n(will be deleted if exists):')
		else:
			excoutfilename = args.saveexcnew
	if args.saveexc:
		if isinstance(args.saveexc, bool):
			excoutfilename = pywikilib.input(
				u'Please enter the filename to save the excepted titles:')
		else:
			excoutfilename = args.saveexc
	if args.savenew:
		append = False
		if isinstance(args.savenew, bool):
			filename = pywikilib.input(
				u'Please enter the filename to save the titles' +
				u'\n(will be deleted if exists):')
		else:
			filename = args.savenew
	if args.save:
		if isinstance(args.save, bool):
			filename = pywikilib.input(
				u'Please enter the filename to save the titles:')
		else:
			filename = args.save
	if args.replacementfile:
		if isinstance(args.replacementfile, bool):
			replacefile = pywikilib.input(
				u'Please enter the filename to read replacements from:')
		else:
			replacefile = args.replacementfile
		try:
			commandline_replacements.extend(
				[x.lstrip(u'\uFEFF').rstrip('\r\n')
				for x in codecs.open(replacefile, 'r', 'utf-8')])
		except IOError:
			raise pywikilib.Error(
		   '\n%s cannot be opened. Try again :-)' % replacefile)
	if args.excepttitle:
		exceptions['title'] = args.excepttitle
	if args.requiretitle:
		exceptions['require-title'] = args.requiretitle
	if args.excepttext:
		exceptions['text-contains'] = args.excepttext
	if args.exceptinside:
		exceptions['inside'] = args.exceptinside
	if args.exceptinsidetag:
		exceptions['inside-tags'] = exceptinsidetag
	if args.fix:
		fix = args.fix
	if args.sleep:
		sleep = args.sleep
	if args.always:
		acceptall = True
	if args.recursive:
		recursive = True
	if args.nocase:
		caseInsensitive = True
	if args.dotall:
		dotall = True
	if args.multiline:
		multiline = True
	if args.addcat:
		add_cat = args.addcat
	if args.summary:
		editSummary = args.summary
		summary_commandline = True
	if args.allowoverlap:
		allowoverlap = True
	if args.query:
		maxquerysize = args.query
	for arg in args.raw:
		if not genFactory.handleArg(arg) and not arg.startswith("-"):
			commandline_replacements.append(arg)

	#acaba antic for arg in ...
	if pywikilib.verbose:
		pywikilib.output(u"commandline_replacements: " +
						 ', '.join(commandline_replacements))

	if (len(commandline_replacements) % 2):
		raise pywikilib.Error, 'require even number of replacements.'
	elif (len(commandline_replacements) == 2 and fix is None):
		replacements.append(
			(commandline_replacements[0], commandline_replacements[1])
		)
		if not summary_commandline:
			editSummary = i18n.twtranslate(
				pywikilib.getSite(),
				'replace-replacing',
				{
					'description': ' (-%s +%s)'% (
						commandline_replacements[0],
						commandline_replacements[1]
					)
				}
			)
	elif (len(commandline_replacements) > 1):
		if (fix is None):
			for i in xrange (0, len(commandline_replacements), 2):
				replacements.append((commandline_replacements[i],
									 commandline_replacements[i + 1]))
			if not summary_commandline:
				pairs = [
					(
						commandline_replacements[i],
						commandline_replacements[i + 1]
					) for i in range(0, len(commandline_replacements), 2)
				]
				replacementsDescription = '(%s)' % ', '.join(
					[('-' + pair[0] + ' +' + pair[1]) for pair in pairs]
				)
				editSummary = i18n.twtranslate(
					pywikilib.getSite(),
					'replace-replacing',
					{
						'description': replacementsDescription
					}
				)
		else:
			raise pywikilib.Error(
				'Specifying -fix with replacements is undefined'
			)
	elif fix is None:
		old = pywikilib.input(u'Please enter the text that should be replaced:')
		new = pywikilib.input(u'Please enter the new text:')
		change = '(-' + old + ' +' + new
		replacements.append((old, new))
		while True:
			old = pywikilib.input(
					u'Please enter another text that should be replaced,' +
					u'\nor press Enter to start:')
			if old == '':
				change += ')'
				break
			new = i18n.input('pywikibot-enter-new-text')
			change += ' & -%s +%s' % (old, new)
			replacements.append((old, new))
		if not summary_commandline:
			default_summary_message = i18n.twtranslate(pywikilib.getSite(),
													   'replace-replacing',
													   {'description': change})
			pywikilib.output(u'The summary message will default to: %s'
							 % default_summary_message)
			summary_message = pywikilib.input(
				u'Press Enter to use this default message, or enter a ' +
				u'description of the\nchanges your bot will make:')
			if summary_message == '':
				summary_message = default_summary_message
			editSummary = summary_message

	else:
		# Perform one of the predefined actions.
		fixname = fix # Save the name for passing to exceptions function.
		try:
			fix = fixes.fixes[fix]
		except KeyError:
			pywikilib.output(u'Available predefined fixes are: %s'
							 % fixes.fixes.keys())
			return
		if "regex" in fix:
			regex = fix['regex']
		if "msg" in fix:
			if isinstance(fix['msg'], basestring):
				editSummary = i18n.twtranslate(pywikilib.getSite(),
											   str(fix['msg']))
			else:
				editSummary = pywikilib.translate(pywikilib.getSite(),
												  fix['msg'])
		#--això
		if fix.get("categories"):
			categories = fix['categories']

		if "exceptions" in fix:
			exceptions = fix['exceptions']
			# Try to append common extensions for multiple fixes.
			# It must be either a dictionary or a function that returns a dict.
			if 'include' in exceptions:
				incl = exceptions['include']
				if callable(incl):
					baseExcDict = incl(fixname)
				else:
					try:
						baseExcDict = incl
					except NameError:
						pywikilib.output(
						  u'\nIncluded exceptions dictionary does not exist.' +
						  u' Continuing with the exceptions\ngiven in fix.\n')
						baseExcDict = None
				if baseExcDict:
					for l in baseExcDict:
						try:
							exceptions[l].extend(baseExcDict[l])
						except KeyError:
							exceptions[l] = baseExcDict[l]
		if "recursive" in fix:
			recursive = fix['recursive']
		if "nocase" in fix:
			caseInsensitive = fix['nocase']
		try:
			replacements = fix['replacements']
			# enable regex/replacements as a dictionary for different langs
			if isinstance(replacements, dict):
				replacements = replacements[pywikilib.getSite().lang]
		except KeyError:
			pywikilib.output(
				u"No replacements given in fix.")
			return

	# Set the regular expression flags
	flags = re.UNICODE
	if caseInsensitive:
		flags = flags | re.IGNORECASE
	if dotall:
		flags = flags | re.DOTALL
	if multiline:
		flags = flags | re.MULTILINE

	# Pre-compile all regular expressions here to save time later
	for i in range(len(replacements)):
		old, new, topic = replacements[i]
		if not regex:
			old = re.escape(old)
		oldR = re.compile(old, flags)
		replacements[i] = oldR, new, topic

	for exceptionCategory in [
						'title', 'require-title', 'text-contains', 'inside']:
		if exceptionCategory in exceptions:
			patterns = exceptions[exceptionCategory]
			if not regex:
				patterns = [re.escape(pattern) for pattern in patterns]
			patterns = [re.compile(pattern, flags) for pattern in patterns]
			exceptions[exceptionCategory] = patterns

	if xmlFilename:
		try:
			xmlStart
		except NameError:
			xmlStart = None
		gen = XmlDumpReplacePageGenerator(xmlFilename, xmlStart,
										  replacements, exceptions)
	elif useSql:
		whereClause = 'WHERE (%s)' % ' OR '.join(
			["old_text RLIKE '%s'" % prepareRegexForMySQL(old.pattern)
			 for (old, new) in replacements])
		if exceptions:
			exceptClause = 'AND NOT (%s)' % ' OR '.join(
				["old_text RLIKE '%s'" % prepareRegexForMySQL(exc.pattern)
				 for exc in exceptions])
		else:
			exceptClause = ''
		query = u"""
SELECT page_namespace, page_title
FROM page
JOIN text ON (page_id = old_id)
%s
%s
LIMIT 200""" % (whereClause, exceptClause)
		gen = pg.MySQLPageGenerator(query)
	elif PageTitles:
		pages = [pywikilib.Page(pywikilib.getSite(), PageTitle)
				 for PageTitle in PageTitles]
		gen = iter(pages)

	gen = genFactory.getCombinedGenerator(gen)
	if not gen:
		# syntax error, show help text from the top of this file
		pywikilib.showHelp('replace')
		return

	preloadingGen = pg.PreloadingGenerator(gen, pageNumber=maxquerysize)

	# Finally we open the file for page titles or set parameter article to None
	if filename:
		try:
			# This opens in strict error mode, that means bot will stop
			# on encoding errors with ValueError.
			# See http://docs.python.org/library/codecs.html#codecs.open
			titlefile = codecs.open(filename, encoding='utf-8',
									mode=(lambda x: x and 'a' or 'w')(append))
		except IOError:
			pywikilib.output("%s cannot be opened for writing." %
							 filename)
			return
	# The same process with exceptions file:
	if excoutfilename:
		try:
			excoutfile = codecs.open(
							excoutfilename, encoding='utf-8',
							mode=(lambda x: x and 'a' or 'w')(excappend))
		except IOError:
			pywikilib.output("%s cannot be opened for writing." %
							 excoutfilename)
			return
	bot = Replacing(preloadingGen, replacements, exceptions, acceptall,
					   allowoverlap, recursive, add_cat, sleep, editSummary,
					   titlefile, excoutfile, categories)
	try:
		bot.run()
	finally:
		# Just for the spirit of programming (they were flushed)
		if titlefile:
			titlefile.close()
		if excoutfile:
			excoutfile.close()

if __name__ == '__main__':
	try:
		#pywikilib.verbose = True
		args = ArgumentHandler()
		args.parse_arguments()
		if args.resume:
			lvp = File("lastpage", "ortobot").load()
			pywikilib.output(u'last article: "%s"' % lvp)
			line = (
				u'-fix:auto -family:wikipedia_o -recursive -always '
				u'-query:125 -sleep:1 -start:"%s"' % lvp
			)
			args.parse_arguments(line)
		main()
	except KeyboardInterrupt:
		msg = format_string("\n&r(:cancel·lat per l'usuari:)")
		pywikilib.output(msg)
	finally:
		pywikilib.stopme()