Usuari:KRLS/codi/treuCatInterwikisDinsCos.py

#!/usr/bin/python
# -*- coding: utf-8 -*-

import pywikibot, re

def main():
	site = pywikibot.Site("ca", "wikipedia")
	arts = pywikibot.Category(site, u"Articles amb interviquis dins del text").articlesList(recurse=1)
	for art in arts:
		print art.title
		text = art.text
		match = re.findall(u"(\[\[:.*?\]\])", text)
		if not match:
			current = text.replace("[[Categoria:Articles amb interviquis dins del text]]", "")
			pywikibot.showDiff(text,current)
			art.put(current, u'No té interwikis dins el text. Elimino la "Categoria:Articles amb interviquis dins del text". #QQ18')
		else:
			current = text
			for elem in match:
				ref = u'(<ref(?: name=(.*?)>)?.*?{0}.*?</ref>)'.format(re.escape(elem))
				refMatch = re.findall(ref, current)
				'''if not refMatch:
					ref = u'(<ref>?.*?{0}.*?</ref>)'.format(re.escape(elem))
					refMatch = re.findall(ref, current)'''
				if refMatch:
					refArray = refMatch[0]
					tmp = current.replace(refArray[0], u"")
					if refArray[1]:
						tmp = tmp.replace(u'<ref name={0}/>'.format(refArray[1]), "")
					pywikibot.showDiff(current,tmp)
					answer = raw_input('Vols eliminar la referència sencera? [y] = Yes, [n] = No, [s] = Skip article\n'.format(refArray[0]))
					if(answer == 'y'):
						current = tmp
					elif(answer == 's'):
						break
					else:
						interwiki = re.findall('\[\[((:[a-z]{1,3}:).*?)\]\]', refArray[0])
						splitted = interwiki[0][0].split('|')
						if len(splitted) > 1:
							tmp = refArray[0].replace(splitted[0] + '|', '')
							tmp = current.replace(refArray[0], tmp)
							
						pywikibot.showDiff(current,tmp)
						answer = raw_input('Vols eliminar només l\'enllaç? [y] = Yes, [n] = No, [s] = Skip article\n')
						if(answer == 'y'):
							current = tmp
						elif(answer == 's'):
							break
						else:
							tmp = refArray[0].replace(interwiki[0][1], '')
							tmp = current.replace(refArray[0], tmp)
							pywikibot.showDiff(current,tmp)
							answer = raw_input('Vols eliminar només l\'interwiki? [y] = Yes, [n] = No, [s] = Skip article\n')
							if(answer == 'y'):
								current = tmp
							elif(answer == 's'):
								break
				else:
					interwiki = re.findall(u"\[\[((:[a-z]{1,3}:)(.*?))\]\]", elem)
					splitted = interwiki[0][0].split('|')
					if len(splitted) > 1:
						tmp = elem.replace(splitted[0] + '|', '')
						tmp = current.replace(elem, tmp)
					pywikibot.showDiff(current,tmp)
					answer = raw_input('Vols eliminar només l\'enllaç? [y] = Yes, [n] = No, [s] = Skip article\n')
					if(answer == 'y'):
						current = tmp
					elif(answer == 's'):
						break
					else:
						tmp = elem.replace(interwiki[0][1], '')
						tmp = current.replace(elem, tmp)
						pywikibot.showDiff(current,tmp)
						answer = raw_input('Vols eliminar només l\'interwiki? [y] = Yes, [n] = No, [s] = Skip article\n')
						if(answer == 'y'):
							current = tmp
			match = re.findall(u"(\[\[:.*?\]\])", current)
			if not match:
				tmp = current.replace("[[Categoria:Articles amb interviquis dins del text]]", "")
				current = tmp
			if text != current:
				pywikibot.showDiff(text,current)
				answer = raw_input('Finalment deso? [y] = Yes, [n] = No\n')
				if(answer == 'y'):
					art.put(current, u'Retiro interwikis dins el text. #QQ18')	
				#cerca <ref></ref>
				#cerca <ref name=> XXX </ref> i després eliminar <refname/>
				#sinó eliminar simplement paraula:
					#FIXME vols proposar un nom nou. i substitueix.

if __name__ == '__main__':
	main()