ویکی‌پدیا:درخواست‌های ربات/ربات استخراج جعبه از درون مقاله(جعبه)/ویرایش 0

از ویکی‌پدیا، دانشنامهٔ آزاد
#!/usr/bin/python
# -*- coding: utf-8  -*-
#
# Reza(User:reza1615), 2011
#
# Distributed under the terms of the CC-BY-SA 3.0 .
#!/usr/bin/python
# -*- coding: utf-8  -*-
import codecs
count = 0
filesample = 'resultr2.txt'
text2 = codecs.open( filesample,'r' ,'utf8' )
text = text2.read()
title,interwiki = ' ',''
#if you want to import template active olgoo else disactive it
#olgoo=u'الگو'
#olgoo=u' '
def box( section,interwiki ):
    linebaz = 0
    linebasteh = 0
    start = 0
    linebox ,newinterwiki = ' ',''
    for line in section.split( '\n' ):
        line = line.strip()
        if line == '':
            continue
        else:
            linebaz = string.count( line,"{" )
            linebasteh = string.count( line,"}" )
            linebaz = linebaz - linebasteh
        if  string.count( line,"^" ) > 0:
            linebox = line.replace( u'^','' )
            linebox = u'\nAAA[['+linebox + ']]AAA'
            interwiki = line.replace( u'^','' )
            for i in interwiki:
                newinterwiki = newinterwiki + i + '$'
            interwiki = u'[[en:' + newinterwiki + ']]'
            continue
        if linebaz < 0:
            linebox = linebox + '\n' + line
            return linebox,interwiki
        if linebaz == 2:
            linebox = linebox + '\n' + line
            start = 1
        if linebaz == 0 and start == 1:
            linebox = linebox + '\n' + line
            
for pag in text.split( u'@@@' ):
    count = count + 1
    #title = pag.split( '</title>' )[0].replace( '<title>','' ).strip()
    #title=title.replace('template:',olgoo)
    #section = pag.split( '<revision>' )[-1]
    #pprint.pprint( section )
#    try:
#        section = section.split( '<text xml:space="preserve">' )[1]
#        section = section.split( '</text>' )[0]
#    except:
#        a = 1
#    section = section.replace( '&lt;','<' )
#    section = section.replace( '&gt;','>' )
#    section = section.replace( '&quot;','"' )
    section = pag.strip()
    if section.find( u'{{Infobox Settlement' ) != -1:#----------------------the of template and you have to change this name-------------
        resultdata,interwiki = box( section ,interwiki )
    else:
        if section.find( u'{{infobox Settlement' ) != -1:#----------------the of template and you have to change this name--------------
            resultdata,interwiki = box( section,interwiki )
        else:
            #pprint.pprint( section )
            continue
    resultdata = u'xxx\nxxx' + resultdata + '\n'
    with codecs.open( 'FileBox.txt',mode = 'a',encoding = 'utf8' ) as f:#------the export file name--------------
                    f.write( resultdata )
    f.close()
 
 
print count