ویکیپدیا:درخواستهای ربات/ربات صفحه خوان/ویرایش ۰
<syntaxhighlight lang="python">
- !/usr/bin/python
- -*- coding: utf-8 -*-
data = {} def parseText( text ):
for section in text.split( '\n' ): section = section.strip() lenth = len( section ) if lenth == 0: continue if section[0] == '{' : if section[-1] == '}': section = section.replace( '{}', ) section = section.replace( '}', ) templates( section ) continue if section[0] == '{' and section[-1] != '}': section = section.replace( '{', ) boxs( section ) continue if section[0] == '|': section = section[1:] boxs( section ) continue section2 = section.split( '|' ) section = section.replace( 'image:','file:' ) section = section.replace( 'Image:','file:' ) section = section.replace( 'File:','file:' ) if section.find( 'file:' ) != -1 : if section[0] == '[' and section[-1] == ']' : section = section.replace( '[', ) section = section.replace( ']', ) images( section ) continue else: if section[0] == '[' : if section[-1] == ']': section = section.replace( '[', ) section = section.replace( ']', ) interwikis( section ) continue txts( section ) continue pprint .pprint( data )
def templates( section ):
data['template'].append( section )
def interwikis( section ):
data['interwiki'].append( section )
def images( section ):
data['image'].append( section )
def boxs( section ):
data['box'].append( section )
def txts( section ):
data['txt'].append( section )
if __name__ == '__main__':
data = {} data['box'] = [] data['interwiki'] = [] data['image'] = [] data['txt'] = [] data['template'] = [] line = import pprint text = open( 'sample.txt','rb' ).read().replace( '\r', ) text = text.encode( 'utf-8' ) data2 = parseText( text ) #open('b2.py', 'w').write(repr(data)) pprint.pprint( data2 ) print data