博客
关于我
强烈建议你试试无所不能的chatGPT,快点击我
python---练习---即时标记
阅读量:5749 次
发布时间:2019-06-18

本文共 5325 字,大约阅读时间需要 17 分钟。

hot3.png

这里只贴代码(python版本2.7.2)

markup.py

import sys,refrom handlers import *from util import *from rules import *class Parser:	"""	语法分析器读取文本文件,应用规则并且控制处理程序	"""	def __init__(self,handler):		self.handler = handler		self.rules = []		self.filters = []	def addRule(self,rule):		self.rules.append(rule)	def addFilter(self,pattern,name):		def filter(block,handler):			return re.sub(pattern,handler.sub(name),block)		self.filters.append(filter)	def parse(self,file):		self.handler.start('document')		for block in blocks(file):			for filter in self.filters:				block = filter(block,self.handler)			for rule in self.rules:				if rule.condition(block):					last = rule.action(block,self.handler)					if last:						break		self.handler.end('document')class BasicTextParser(Parser):	"""	在构造函数中增加规则和过滤器的具体语法分析器	"""	def __init__(self,handler):		Parser.__init__(self,handler)		self.addRule(ListRule())		self.addRule(ListItemRule())		self.addRule(TitleRule())		self.addRule(HeadingRule())		self.addRule(ParagraphRule())		self.addFilter(r'\*(.*?)\*','emphasis')		self.addFilter(r'(http://[\.a-zA-Z/]+)','url')		self.addFilter(r'([\.a-zA-Z]+@[\.a-zA-Z]+[a-zA-Z])','mail')handler = HTMLRenderer()parser = BasicTextParser(handler)parser.parse(sys.stdin)

handlers.py

class Handler:	"""	处理从parser调用的方法的对象	这个解析器会在每个块的开始部分调用start()和恩典()方法,使用合适的块名作为参数。	sub()方法会用于正则表达式替换中。当使用了'emphasis'	这样的名字调用时,会返回合适的替换函数	"""	def callback(self,prefix,name,*args):		method = getattr(self,prefix+name,None)		if callable(method):			return method(*args)	def start(self,name):		self.callback('start_',name)	def end(self,name):		self.callback('end_',name)	def sub(self,name):		def substitution(match):			result = self.callback('sub_',name,match)			if result is None:				match.group(0)			return result		return substitutionclass HTMLRenderer(Handler):	"""	用户生成HTML的具体处理程序	HTMLRenderer内的方法都可以通过超类处理程序的start()、end()和sub()	方法来访问。他们是实现了用于HTML文档的基本标签	"""	def start_document(self):		print '...'	def end_document(self):		print ''	def start_paragraph(self):		print '

' def end_paragraph(self): print '

' def start_heading(self): print '

' def end_heading(self): print '

' def start_list(self): print '
    ' def end_list(self): print '
' def start_listitem(self): print '
  • ' def end_listitem(self): print '
  • ' def start_title(self): print '

    ' def end_title(self): print '

    ' def sub_emphasis(self,match): return '%s' % match.group(1) def sub_url(self,match): return '%s' % (match.group(1),match.group(1)) def sub_mail(self,match): return '%s' % (match.group(1),match.group(1)) def feed(self,data): print data

    util.py

    class Rule:	"""	所有规则的基类	"""	def action(self,block,handler):		handler.start(self,type)		handler.feed(block)		handler.end(self,type)		return Trueclass HeadingRule(Rule):	"""	标题占一行,最多70个字符,并且不以冒号结尾	"""	type = 'heading'	def condition(self,block):		return not '\n' in block and len(block) <= 70 and not block[-1] == ':'class TitleRule(HeadingRule):	"""	题目是文档的第一个块,但前提是它是大标题	"""	type = 'title'	first = True	def condition(self,block):		if not self.first:			return False		self.first = False		return HeadingRule.condition(self,block)class ListItemRule(Rule):	"""	列表项是以连字符开始的段落。作为格式化的一部分,要移除连字符	"""	type = "listitem"	def confition(self,block):		return block[0] == '_'	def action(self,block,handler):		handler.start(self.type)		handler.feed(block[1:].strip)		handler.end(self,type)		return Trueclass ListRule(ListItemRule):	"""	列表从不是列表项的块和随后的列表项之间。在最后一个连续列表项之后结束	"""	type = 'list'	inside = False	def condition(self,block):		return True	def action(self,block,handler):		if not self.inside and ListItemRule.condition(self,block):			handler.start(self.type)			self.inside = True		elif self.inside and not ListItemRule.condition(self,block):			handler.end(self.type)			self.inside = False		return Falseclass ParagraphRule(Rule):	"""	段落只是其他规则并没有覆盖到得块	"""	type = 'paragraph'	def condition(self,block):		return True

    rules.py

    def lines(file):	for line in file: 		yield line		yield '\n'def blocks(file):	block = []	for line in lines(file):		if line.strip():			block.append(line)		elif block:			yield ''.join(block).strip()			block = []

    这里举一个实例

    dos命令行下

    python markup.py < test_input.txt > test_input.html

    test_input.txt就是一个普通的文本

    如:

    Welcome to World Wide Spam. Inc.

     

     

    These are the corporate web pages of *World Wide Spam*, Inc. We hope you find your stay enjoyable, and that you will sample many of our products.

     

    A short history of the company

     

    World Wide Spam was started in the summer of 2000. The business concept was to ride the dot-com wave and to make money both through bulk email and by selling canned meat online.

     

    After receiving several complaints from customers who weren't satisfied by their bulk email, World Wide Spam altered their profile, and focused 100% on canned goods. Today, they rank as the world's 13,892 online supplier of SPAM.

     

    Destinations

     

    From this page you may visit several of our interesting web pages:

     

    - What is SPAM?(http://wwspam.fu/whatisspam)

     

    - How do they make it?(http://wwspam.fu/howtomakeit)

     

    - Why should I eat it?(http://wwspam.fu/whyeatit)

     

    How to get in touch with us

     

    You can get in touch with us in *many* ways: By phone (555-1234), by email (wwspam@wwspam.fu) or by visiting our customer feedback page (http://wwspam.fu/feedback)

    执行后,会输出用指定的html进行格式化

    没错,我是在做书上的练习

    呵呵呵 这里主要是体现了类和方法的使用

     

    转载于:https://my.oschina.net/zhangdapeng89/blog/53910

    你可能感兴趣的文章
    SCCM的证书配置PKI
    查看>>
    看linux书籍做的一些重要笔记(2011.07.03更新)
    查看>>
    Exchange server 2010系列教程之一 安装Exchange 2010准备条件
    查看>>
    POI 生成 xls 文件使用总结(快速入门)
    查看>>
    CString、Char* ,char [20]、wchar_t、unsigned short转化
    查看>>
    从案例学RxAndroid开发(上)
    查看>>
    debian 下安装megacli
    查看>>
    线程同步--事件对象
    查看>>
    报表工具FastReport.Net v2016.4正式发布!
    查看>>
    rarlinux的安装及使用
    查看>>
    LAMP 源码详细安装过程 完结版
    查看>>
    IOS中类和对象还有,nil/Nil/NULL的区别
    查看>>
    探索音乐+社交的更多可能
    查看>>
    OSI参考模型
    查看>>
    2013,我静下心来!!!!!
    查看>>
    我的mbp / macOS 10.12.6
    查看>>
    java 线程安全问题之静态变量、实例变量、局部变量
    查看>>
    怎么选择和快速搭建个人博客
    查看>>
    WAF绕过方法从简单到高级
    查看>>
    PSEXEC执行命令原理分析
    查看>>