پرش به محتوا

ویکی‌پدیا:رده‌دهی مقالات همسنگ/کد

از ویکی‌پدیا، دانشنامهٔ آزاد
#!/usr/bin/python3
# -*- coding: utf-8  -*-
"""
analogouscat.py - a script that adds an article to categories
that are analogous to the categories in which the enwiki version
of that article is.

For more information see [[fa:ویکی‌پدیا:درخواست‌های ربات/رده همسنگ]]
and [[fa:ویکی‌پدیا:رده‌دهی مقالات همسنگ]]

usage:
  python3 pwb.py analogous <generator> <option>

parameters:
  <generator> is any standard page generator such as -cat, -file, etc.
  For further information see pywikibot/pagegenerators.py

  <option> is any standard global pywikibot option such as -lang, etc.
  For further information see pywikibot/bot.py

examples:
  python3 pwb.py analogous -page:"نویهوفن"
  python3 pwb.py analogous -cat:"شیمی آلی"
  python3 pwb.py analogous -namespace:14 -recentchanges:10

future use cases (not implemented yet):
  python3 pwb.py analogous -namespace:0 -cat:Iran -lang:en
"""
#
# (C) w:fa:User:Reza1615, 2011-2020
# (C) w:fa:User:Huji, 2020
# Distributed under the terms of MIT License (MIT)
#


import pywikibot
from pywikibot import config
from pywikibot import pagegenerators as pg
import re
import sys
# import fa_cosmetic_changes_core

class AnalogousCatBot():

    def __init__(self, gen):
        self.gen = gen
        self.summary = ''
        self.cache = {}
        self.done = []
        self.fa_site = pywikibot.Site('fa', 'wikipedia')
        self.en_site = pywikibot.Site('en', 'wikipedia')
        self.version = '31'
        
    def check_version(self):
        p = pywikibot.Page(self.fa_site, 'ویکی‌پدیا:رده‌دهی مقالات همسنگ/نسخه')
        return p.get().strip() == self.version

    def cache_set(self, en_title, fa_title):
        self.cache[en_title] = fa_title

    def cache_get(self, en_title):
        if en_title in self.cache.keys():
            return self.cache[en_title]
        else:
            return False

    def link_to_title(self, link):
        return '%s:%s' % (link.namespace.custom_name, link.title)

    def add_categories(self, page, new_cats):
        if len(new_cats) == 0:
            return

        # TODO: actually add the categories to the end of the page
        # TODO: remove any duplicative categories
        # TODO: run the page through fa_cosmetic_changes as a courtesy

    def process_page(self, page):
        # Fetch a list of categories the page is currently in
        fa_cats = page.categories()

        # Populate a list of categories the page shoudl be added to
        new_cats = []
        
        # Find the enwiki counterpart to the page
        item = pywikibot.ItemPage.fromPage(page)
        if 'enwiki' not in item.sitelinks:
            return
        en_link = item.sitelinks['enwiki']
        en_page = pywikibot.Page(self.en_site, self.link_to_title(en_link))
        
        # Fetch a list of categories that enwiki page is in
        en_cats = en_page.categories()

        for en_cat in en_cats:
            # Ignore hidden categories
            if en_cat.isHiddenCategory():
                continue

            # If we have already seen this category, use our cache
            # Note that pywikibot also caches its Wikidata queries
            # However, having a local cache makes our script nimbler
            fa_title = self.cache_get(en_cat.title)

            # Not in cache; find the fawiki analogous category
            if fa_title == False:
                cat_item = pywikibot.ItemPage.fromPage(en_cat)
                cat_item.get()
                
                # If an analogous category existsin fawiki, cache it
                if 'fawiki' in cat_item.sitelinks:
                    fa_link = cat_item.sitelinks['fawiki']
                    fa_title = self.link_to_title(fa_link)
                    self.cache_set(en_cat.title, fa_title)

            # If the analogous category was in cahce or was found and cached
            if fa_title != False:
                fa_cat = pywikibot.Page(self.fa_site, fa_title)
                
                # If the fawiki page is already in that category, ignore it
                if fa_cat in fa_cats:
                    continue

                # TODO: check if the category is a redirect (needed?)
                # Add this category to the list of new categories
                new_cats.append(fa_title)
        
        self.add_categories(page, new_cats)

    def run(self):
        if self.check_version() == False:
            pywikibot.output("\03{lightred}Please update me!\03{default}")
            pywikibot.stopme()
            exit()
        
        for page in self.gen:
            if page.title() in self.done:
                continue
            print("Processing %s" % page)
            self.process_page(page)            
            self.done.append(page.title())

def main(*args):
    gf = pg.GeneratorFactory()
    local_args = pywikibot.handle_args()

    for arg in local_args:
        # Let pywikibot process standard arguments
        if gf.handleArg(arg):
            continue
        else:
            print('Unknown argument %s' % arg)

    gen = gf.getCombinedGenerator(preload=True)

    if gen is None:
        pywikibot.output('No generator found!')
        pywikibot.stopme()
        exit()

    bot = AnalogousCatBot(gen)
    bot.run()
    return True

if __name__ == '__main__':
    main()