ویکی‌پدیا:درخواست‌های ربات/ربات صفحه خوان/ویرایش ۰

از ویکی‌پدیا، دانشنامهٔ آزاد

<syntaxhighlight lang="python">

  1. !/usr/bin/python
  2. -*- coding: utf-8 -*-

data = {} def parseText( text ):

   for section in text.split( '\n' ):
       
       section = section.strip()
       lenth = len( section )
       if lenth == 0:
           continue
       if section[0] == '{' :
           if  section[-1] == '}':
               section = section.replace( '{}', )
               section = section.replace( '}', )
               templates( section )
               continue
       if section[0] == '{' and section[-1] != '}':
           section = section.replace( '{', )
           boxs( section )
           continue
       if section[0] == '|':
           section = section[1:]
           boxs( section )
           continue
       section2 = section.split( '|' )
       section = section.replace( 'image:','file:' )
       section = section.replace( 'Image:','file:' )
       section = section.replace( 'File:','file:' )
       if  section.find( 'file:' ) != -1 :
           if  section[0] == '[' and  section[-1] == ']' :
                             section = section.replace( '[', )
                             section = section.replace( ']', )
                             images( section )
                             continue
       else:
           if section[0] == '[' :
                   if section[-1] == ']':
                           
                              section = section.replace( '[', )
                              section = section.replace( ']', )
                              interwikis( section )
                              continue
       txts( section )
       continue
   pprint .pprint( data )

def templates( section ):

               data['template'].append( section )

def interwikis( section ):

               data['interwiki'].append( section )

def images( section ):

               data['image'].append( section )

def boxs( section ):

               data['box'].append( section )

def txts( section ):

               data['txt'].append( section )


if __name__ == '__main__':

   data = {}
   data['box'] = []
   data['interwiki'] = []
   data['image'] = []
   data['txt'] = []
   data['template'] = []
   line = 
   import pprint
   text = open( 'sample.txt','rb' ).read().replace( '\r', )
   text = text.encode( 'utf-8' )
   data2 = parseText( text )
   
   #open('b2.py', 'w').write(repr(data))
   pprint.pprint( data2 )
   print data