User:Reza1615/BOT/merge-pages.py

From Wikidata
Jump to navigation Jump to search
The printable version is no longer supported and may have rendering errors. Please update your browser bookmarks and please use the default browser print function instead.

For running this code please read User:Reza1615/BOT

Sub-page format, which bot get the items list, should be like here

<source lang='python'>

  1. !/usr/bin/python

from __future__ import unicode_literals

  1. -*- coding: utf-8 -*-
  2. Reza (User:reza1615)
  3. Distributed under the terms of the CC-BY-SA 3.0 .
  4. -*- coding: utf-8 -*-

import wikipedia,pagegenerators,config import query,time,login,codecs,re import wikidata from pywikibot import textlib

  1. -------------------Summary which uses in wikidata and they should be english ----

creat_summary="Bot: Import page from {0}wiki".format('en') update_link_summary="Bot: Update site links from {0}wiki".format('en') update_link_labels_summary="Bot: Update site links and labels from {0}wiki".format('en') update_Labels_summary=u'Bot: Update of labels.'

  1. -------------------------------------------------------------------------------

Orgine_Site=wikipedia.getSite('en',fam='wikipedia') repo = Orgine_Site.data_repository() SafeWork=True mysite=wikipedia.getSite('wikidata','wikidata')

def login_wiki(mode):

   dataSite=wikipedia.getSite('wikidata','wikidata')
   passwords='yourBotPass'
   usernames='YourBotUser'
   botlog=login.LoginManager(password=passwords,username=usernames,site=dataSite)
   botlog.login()
   

def save_file(case,type):

   if type=='merge':
       file = 'zzinterwiki_import_merged_need_deleted.txt'
   elif type=='error':
       file = 'zzinterwiki_import_errors.txt'    
   else:
       file = 'zzinterwiki_conflicts.txt'
   try:        
       file_text = codecs.open(file,'r' ,'utf8' )
       file_text = file_text.read().strip()
   except:
       file_text=u
   if not case in file_text:    
       with codecs.open(file ,mode = 'a',encoding = 'utf8' ) as f:
                           f.write(u'\n'+case)       
           

def check_item(wiki,link):

   site=wikipedia.getSite(wiki.replace(u'_',u'-'),fam='wikipedia')
   page=wikipedia.Page(site,link)
   data=wikipedia.DataPage(page)
   try:
       items=data.get()
   except wikipedia.NoPage:
       return True
   except:
       wikipedia.output("\03{lightred}Item has been created. Skipping...\03{default}")
       return False

def set_lable(data,new_langs,item):

   dic_item=data.get()
   old=dic_item['links']
   changes=False
   for cases in new_langs:
       if cases=='nb':
           cases='no' 
       dic_item['links'][cases]=new_langs[cases]    
       if old!=dic_item['links']:    
           wikipedia.output('added '+cases+'......................')    
   for langs in dic_item['links']:
       if langs=='nb':
           langs='no' 
       value=dic_item['links'][langs].strip()
       lang=langs.replace('wiki',).replace('_','-')
       try:
          value=unicode(value,'UTF8')
       except:
          pass
       if lang !='fa':
               value = value.split(u'(')[0].strip()    
       if lang =='es' or lang=='pt' or lang=='pt-br':
           value = value.replace(u"Anexo:",u"")
       if lang == 'cs':
           value = value.replace(u"Príloha:",u"")
       if lang == 'de-ch':
           value = value.replace(u"ß",u"ss")
       try :
           a=dic_item['label'][lang]
       except:
           item.labels[lang] = value
           changes=True
           wikipedia.output('\03{lightgreen}for '+value+' added as label of '+lang+'\03{default}')
   if changes:
       changes=True
   else:
       wikipedia.output("Doesn't need any update!")    
       changes=False   
   return item,changes    

def Update_data(data_add,appenddict):

       item = wikidata.api.getItemById(data_add.title())
       summary=
       confilict={}
       new_langs={}
       for lang in appenddict:
           if lang=='nb':
               lang='no'                     
           site_lang=lang
           interlangLinks=appenddict[lang]    
           status=check_item(site_lang,interlangLinks)
           if not status:
               wikipedia.output(site_lang+' has confilict!')
               confilict[site_lang]=interlangLinks
               continue
           summary=update_link_summary
           item.sitelinks[lang+"wiki"] = interlangLinks
           new_langs[lang+"wiki"] = interlangLinks
       if confilict:
           item_confilict=u'* '+data_add.title()+u' Confilict > '
           for i in confilict:
               item_confilict+=u'[[:'+i+u':'+confilict[i]+u'|'+i+u'wiki]]-'
           save_file(item_confilict[:-1],'conflict')
           if SafeWork:
                   wikipedia.output('\03{lightred}-->'+data_add.title()+' Passed! because of safe mode and conflict\03{default}')
                   return False
       if summary:
           item,changes=set_lable(data_add,new_langs,item)    
           if changes:
               summary=update_link_labels_summary
           try:
               wikidata.api.save(item, summary)
               wikipedia.output('\03{lightblue}Page '+data_add.title()+' : '+summary+'\03{default}')
               return True
           except Exception,e:
               try:
                   wikipedia.output('\03{lightred}Page '+data_add.title()+' Passed! error was : '+str(e)+' \03{default}')
               except:
                   wikipedia.output('\03{lightred}Page '+data_add.title()+'Passed!\03{default}')
       return False


def find_diff(my_data,interwiki_links):

       dictionary= {}
       appenddict={}
       for lang in interwiki_links:
           if lang=='nb':
               lang='no'
           L_lang=lang.replace(u'-',u'_')
           L_link=interwiki_links[lang]  
           if not (L_lang in appenddict):    
               if not ((L_lang+'wiki') in dictionary):    
                   appenddict[L_lang]=L_link
                   wikipedia.output('\03{lightblue}+ '+L_lang +u' > '+L_link+' \03{default}')
                   
       done=Update_data(my_data,appenddict)
       if done:
           return True
    

def main():

   site = wikipedia.getSite('en')
   Orgine_Site=wikipedia.getSite('wikidata',fam='wikidata')
   Orgine_page=wikipedia.Page( Orgine_Site,'User:Yamaha5/test')#'User:Soulkeeper/dups' )
   text=Orgine_page.get()
   lines=text.split(u'\n')
   for line in lines:
       our_links={}
       Regex=re.compile(ur'\[\[.*?\]\]')
       wikipedia.output('--------------------------------------------------')    
       links = Regex.findall(line)
       lent=len(links)
       desc={}
       cla=[]
       alia={}
       if links:
           count=0
           line_pass=True    
           for link in links:
               Last_Item=False
               link=link.replace(u'',u'').replace(u'',u).strip()    
               wikipedia.output('\03{lightblue}working on '+link+u'\03{default}')
               count+=1
               repo = site.data_repository()
               data = wikipedia.DataPage(repo, link)
               try:
                   dict = data.get()
               except:
                   wikipedia.output(link+' has error')
                   continue                    
               
               if 'description' in dict:
                   description=dict['description']
                   for i in description:
                       desc[i]=    description[i]
               if 'claims' in dict:
                   claims=dict['claims']
                   for i in claims:
                       if not i in  cla:
                           cla.append(i)
               if 'aliases' in dict:
                   aliases=dict['aliases']
                   for i in aliases:
                       alia[i]=aliases[i]
               if 'links' in dict:    
                   dictionary=dict['links']
               if our_links=={} and count==lent and desc=={} and alia=={}:    
                   line_pass=False    
                   break
               if count==lent:
                   for lang in dictionary:
                       break
                   lang=lang.replace(u'wiki',u)    
                   Last_Item=True    
                   break
               item = wikidata.api.getItemById(link)
               countB=0
               
               for lang in dictionary:
                   lang=lang.replace(u'wiki',u)
                   wikipedia.output(lang+u'>'+dictionary[lang+u"wiki"]) 
                   if not lang in our_links:
                       our_links[lang]=dictionary[lang+u"wiki"]                        
                   item.sitelinks[lang+u"wiki"] = 
                   item.labels[lang] = 
               try:
                  wikidata.api.save(item, u'Bot: Merging Items')
                  pass
               except Exception,e:
                  wikipedia.output('\03{lightred}Page Passed! error was : '+str(e)+' \03{default}')
                  continue
               if not Last_Item:
                   wikipedia.output('\03{lightred}Emptying '+link+u'\03{default}')
           if desc or alia:
               siteLang = wikipedia.getSite(lang)
               Orgine_page2=wikipedia.Page( siteLang,dictionary[lang+u"wiki"])
               data = wikipedia.DataPage(Orgine_page2)
               if not data.exists():    
                   continue 
               item = wikidata.api.getItemById(data.title())              
               for i in desc:
                   if str(type(desc[i]))==("<type 'list'>" or "<type 'dict'>"):
                       continue 
                   item.descriptions[i] = desc[i]
               for i in alia:
                   if str(type(alia[i]))==("<type 'list'>" or "<type 'dict'>"):
                       continue    
                   item.aliases[i] = alia[i]
               try:
                   wikidata.api.save(item, u'Bot: adding alias and discriptions')    
                   wikipedia.output(u'Bot: adding alias and discriptions')
               except:
                   continue
           if line_pass and our_links!={}:   
               siteLang = wikipedia.getSite(lang)    
               Orgine_page2=wikipedia.Page( siteLang,dictionary[lang+u"wiki"])
               data = wikipedia.DataPage(Orgine_page2)
               if not data.exists():
                   save_file(line,'error')
                   continue    
               done=find_diff(data,our_links)
               wikipedia.output('\03{lightgreen}Filling '+link+' with new Langs\03{default}')
               if done:
                   wikipedia.output(u'\03{lightgreen}Item merged and updated!\03{default} '+line)
               else:
                   wikipedia.output(u'\03{lightred}Item not merged!\03{default}')    

if __name__ == "__main__":

   login_wiki(1)
   main()