ویکی‌پدیا:رده‌دهی مقالات همسنگ/کد: تفاوت میان نسخه‌ها

از ویکی‌پدیا، دانشنامهٔ آزاد
محتوای حذف‌شده محتوای افزوده‌شده
صفحه‌ای تازه حاوی «<source lang=python> </source>» ایجاد کرد
 
بدون خلاصۀ ویرایش
 
خط ۱: خط ۱:
<source lang=python>
<source lang=python>
#!/usr/bin/python3
# -*- coding: utf-8 -*-
"""
analogouscat.py - a script that adds an article to categories
that are analogous to the categories in which the enwiki version
of that article is.


For more information see [[fa:ویکی‌پدیا:درخواست‌های ربات/رده همسنگ]]
and [[fa:ویکی‌پدیا:رده‌دهی مقالات همسنگ]]

usage:
python3 pwb.py analogous <generator> <option>

parameters:
<generator> is any standard page generator such as -cat, -file, etc.
For further information see pywikibot/pagegenerators.py

<option> is any standard global pywikibot option such as -lang, etc.
For further information see pywikibot/bot.py

examples:
python3 pwb.py analogous -page:"نویهوفن"
python3 pwb.py analogous -cat:"شیمی آلی"
python3 pwb.py analogous -namespace:14 -recentchanges:10

future use cases (not implemented yet):
python3 pwb.py analogous -namespace:0 -cat:Iran -lang:en
"""
#
# (C) w:fa:User:Reza1615, 2011-2020
# (C) w:fa:User:Huji, 2020
# Distributed under the terms of MIT License (MIT)
#


import pywikibot
from pywikibot import config
from pywikibot import pagegenerators as pg
import re
import sys
# import fa_cosmetic_changes_core

class AnalogousCatBot():

def __init__(self, gen):
self.gen = gen
self.summary = ''
self.cache = {}
self.done = []
self.fa_site = pywikibot.Site('fa', 'wikipedia')
self.en_site = pywikibot.Site('en', 'wikipedia')
self.version = '31'
def check_version(self):
p = pywikibot.Page(self.fa_site, 'ویکی‌پدیا:رده‌دهی مقالات همسنگ/نسخه')
return p.get().strip() == self.version

def cache_set(self, en_title, fa_title):
self.cache[en_title] = fa_title

def cache_get(self, en_title):
if en_title in self.cache.keys():
return self.cache[en_title]
else:
return False

def link_to_title(self, link):
return '%s:%s' % (link.namespace.custom_name, link.title)

def add_categories(self, page, new_cats):
if len(new_cats) == 0:
return

# TODO: actually add the categories to the end of the page
# TODO: remove any duplicative categories
# TODO: run the page through fa_cosmetic_changes as a courtesy

def process_page(self, page):
# Fetch a list of categories the page is currently in
fa_cats = page.categories()

# Populate a list of categories the page shoudl be added to
new_cats = []
# Find the enwiki counterpart to the page
item = pywikibot.ItemPage.fromPage(page)
if 'enwiki' not in item.sitelinks:
return
en_link = item.sitelinks['enwiki']
en_page = pywikibot.Page(self.en_site, self.link_to_title(en_link))
# Fetch a list of categories that enwiki page is in
en_cats = en_page.categories()

for en_cat in en_cats:
# Ignore hidden categories
if en_cat.isHiddenCategory():
continue

# If we have already seen this category, use our cache
# Note that pywikibot also caches its Wikidata queries
# However, having a local cache makes our script nimbler
fa_title = self.cache_get(en_cat.title)

# Not in cache; find the fawiki analogous category
if fa_title == False:
cat_item = pywikibot.ItemPage.fromPage(en_cat)
cat_item.get()
# If an analogous category existsin fawiki, cache it
if 'fawiki' in cat_item.sitelinks:
fa_link = cat_item.sitelinks['fawiki']
fa_title = self.link_to_title(fa_link)
self.cache_set(en_cat.title, fa_title)

# If the analogous category was in cahce or was found and cached
if fa_title != False:
fa_cat = pywikibot.Page(self.fa_site, fa_title)
# If the fawiki page is already in that category, ignore it
if fa_cat in fa_cats:
continue

# TODO: check if the category is a redirect (needed?)
# Add this category to the list of new categories
new_cats.append(fa_title)
self.add_categories(page, new_cats)

def run(self):
if self.check_version() == False:
pywikibot.output("\03{lightred}Please update me!\03{default}")
pywikibot.stopme()
exit()
for page in self.gen:
if page.title() in self.done:
continue
print("Processing %s" % page)
self.process_page(page)
self.done.append(page.title())

def main(*args):
gf = pg.GeneratorFactory()
local_args = pywikibot.handle_args()

for arg in local_args:
# Let pywikibot process standard arguments
if gf.handleArg(arg):
continue
else:
print('Unknown argument %s' % arg)

gen = gf.getCombinedGenerator(preload=True)

if gen is None:
pywikibot.output('No generator found!')
pywikibot.stopme()
exit()

bot = AnalogousCatBot(gen)
bot.run()
return True

if __name__ == '__main__':
main()


</source>
</source>

نسخهٔ کنونی تا ‏۲۸ مهٔ ۲۰۲۰، ساعت ۱۳:۳۳

#!/usr/bin/python3
# -*- coding: utf-8  -*-
"""
analogouscat.py - a script that adds an article to categories
that are analogous to the categories in which the enwiki version
of that article is.

For more information see [[fa:ویکی‌پدیا:درخواست‌های ربات/رده همسنگ]]
and [[fa:ویکی‌پدیا:رده‌دهی مقالات همسنگ]]

usage:
  python3 pwb.py analogous <generator> <option>

parameters:
  <generator> is any standard page generator such as -cat, -file, etc.
  For further information see pywikibot/pagegenerators.py

  <option> is any standard global pywikibot option such as -lang, etc.
  For further information see pywikibot/bot.py

examples:
  python3 pwb.py analogous -page:"نویهوفن"
  python3 pwb.py analogous -cat:"شیمی آلی"
  python3 pwb.py analogous -namespace:14 -recentchanges:10

future use cases (not implemented yet):
  python3 pwb.py analogous -namespace:0 -cat:Iran -lang:en
"""
#
# (C) w:fa:User:Reza1615, 2011-2020
# (C) w:fa:User:Huji, 2020
# Distributed under the terms of MIT License (MIT)
#


import pywikibot
from pywikibot import config
from pywikibot import pagegenerators as pg
import re
import sys
# import fa_cosmetic_changes_core

class AnalogousCatBot():

    def __init__(self, gen):
        self.gen = gen
        self.summary = ''
        self.cache = {}
        self.done = []
        self.fa_site = pywikibot.Site('fa', 'wikipedia')
        self.en_site = pywikibot.Site('en', 'wikipedia')
        self.version = '31'
        
    def check_version(self):
        p = pywikibot.Page(self.fa_site, 'ویکی‌پدیا:رده‌دهی مقالات همسنگ/نسخه')
        return p.get().strip() == self.version

    def cache_set(self, en_title, fa_title):
        self.cache[en_title] = fa_title

    def cache_get(self, en_title):
        if en_title in self.cache.keys():
            return self.cache[en_title]
        else:
            return False

    def link_to_title(self, link):
        return '%s:%s' % (link.namespace.custom_name, link.title)

    def add_categories(self, page, new_cats):
        if len(new_cats) == 0:
            return

        # TODO: actually add the categories to the end of the page
        # TODO: remove any duplicative categories
        # TODO: run the page through fa_cosmetic_changes as a courtesy

    def process_page(self, page):
        # Fetch a list of categories the page is currently in
        fa_cats = page.categories()

        # Populate a list of categories the page shoudl be added to
        new_cats = []
        
        # Find the enwiki counterpart to the page
        item = pywikibot.ItemPage.fromPage(page)
        if 'enwiki' not in item.sitelinks:
            return
        en_link = item.sitelinks['enwiki']
        en_page = pywikibot.Page(self.en_site, self.link_to_title(en_link))
        
        # Fetch a list of categories that enwiki page is in
        en_cats = en_page.categories()

        for en_cat in en_cats:
            # Ignore hidden categories
            if en_cat.isHiddenCategory():
                continue

            # If we have already seen this category, use our cache
            # Note that pywikibot also caches its Wikidata queries
            # However, having a local cache makes our script nimbler
            fa_title = self.cache_get(en_cat.title)

            # Not in cache; find the fawiki analogous category
            if fa_title == False:
                cat_item = pywikibot.ItemPage.fromPage(en_cat)
                cat_item.get()
                
                # If an analogous category existsin fawiki, cache it
                if 'fawiki' in cat_item.sitelinks:
                    fa_link = cat_item.sitelinks['fawiki']
                    fa_title = self.link_to_title(fa_link)
                    self.cache_set(en_cat.title, fa_title)

            # If the analogous category was in cahce or was found and cached
            if fa_title != False:
                fa_cat = pywikibot.Page(self.fa_site, fa_title)
                
                # If the fawiki page is already in that category, ignore it
                if fa_cat in fa_cats:
                    continue

                # TODO: check if the category is a redirect (needed?)
                # Add this category to the list of new categories
                new_cats.append(fa_title)
        
        self.add_categories(page, new_cats)

    def run(self):
        if self.check_version() == False:
            pywikibot.output("\03{lightred}Please update me!\03{default}")
            pywikibot.stopme()
            exit()
        
        for page in self.gen:
            if page.title() in self.done:
                continue
            print("Processing %s" % page)
            self.process_page(page)            
            self.done.append(page.title())

def main(*args):
    gf = pg.GeneratorFactory()
    local_args = pywikibot.handle_args()

    for arg in local_args:
        # Let pywikibot process standard arguments
        if gf.handleArg(arg):
            continue
        else:
            print('Unknown argument %s' % arg)

    gen = gf.getCombinedGenerator(preload=True)

    if gen is None:
        pywikibot.output('No generator found!')
        pywikibot.stopme()
        exit()

    bot = AnalogousCatBot(gen)
    bot.run()
    return True

if __name__ == '__main__':
    main()