Pygments highlighter for Genshi

17 July 2008
10:12

By cmlenz as Python

Just testing

1 # -*- coding: utf-8 -*-
2
3 import re
4
5 from genshi.core import Attrs, QName, Stream, START, END, TEXT
6 from genshi.input import _coalesce
7 from pygments.lexers import get_lexer_by_name, get_lexer_for_mimetype, \
8 guess_lexer
9 from pygments.formatters import get_formatter_by_name
10 from pygments.styles import get_style_by_name
11
12
13 class PygmentsHighlighter(object):
14 """Adapter for generate Genshi streams from the Pygments syntax highlighter.
15
16 >>> source = '''#!/usr/bin/python
17 ...
18 ... def main():
19 ... print 'Hello, world')
20 ... '''
21 >>> stream = PygmentsAdapter().generate(source.strip())
22 >>> print stream.render('html', strip_whitespace=False)
23 <span class="c">#!/usr/bin/python</span>
24 <BLANKLINE>
25 <span class="k">def</span> <span class="nf">main</span><span class="p">():</span>
26 <span class="k">print</span> <span class="s">'Hello, world'</span><span class="p">)</span>
27 <BLANKLINE>
28
29 This class could be used directly in a markup template as follows:
30
31 >>> from genshi.template import MarkupTemplate
32 >>> tmpl = MarkupTemplate('''<div>Code snippet:
33 ... <pre class="code">
34 ... ${highlight(code, 'python')}
35 ... </pre></div>''')
36 >>> adapter = PygmentsAdapter()
37 >>> stream = tmpl.generate(code=source, highlight=adapter.generate)
38 >>> print stream.render('html')
39 <div>Code snippet:
40 <pre class="code">
41 <span class="c">#!/usr/bin/python</span>
42 <BLANKLINE>
43 <span class="k">def</span> <span class="nf">main</span><span class="p">():</span>
44 <span class="k">print</span> <span class="s">'Hello, world'</span><span class="p">)</span>
45 <BLANKLINE>
46 </pre></div>
47 """
48
49 DEFAULT_TAG_NAME = QName('span')
50
51 def __init__(self, style='default', noclasses=False, classprefix='',
52 tagname=DEFAULT_TAG_NAME):
53 """Initialize the adapter.
54
55 :param style: the Pygments style to use; either the name of the style as
56 a string, or a custom ``Style`` subclass
57 :param noclasses: if set to `True`, token tags will not use inline
58 styles instead of CSS classes
59 :param classprefix: a prefix string that is prepended to all the CSS
60 class names in the output
61 :param tagname: the name of the tag used to wrap tokens
62 """
63 self.noclasses = noclasses
64 if isinstance(style, basestring):
65 style = get_style_by_name(style)
66 self.formatter = get_formatter_by_name('html', style=style,
67 classprefix=classprefix)
68 self.tagname = tagname
69
70 def generate(self, source, lexer=None, stripnl=False):
71 """Generate a markup stream for the given code using the specified
72 Pygments lexer.
73
74 :param source: the source text to highlight as a string
75 :param lexer: the Pygments lexer to use; this can be either the name
76 of the lexer as a string, or a ``Lexer`` instance; if
77 `None`, an attempt is made to guess the lexer from the
78 file contents (for example using the shebang line)
79 :return: the markup stream
80 :rtype: `Stream`
81 """
82 if lexer is None:
83 lexer = guess_lexer(source, stripnl=stripnl)
84 elif isinstance(lexer, basestring):
85 if '/' in lexer:
86 lexer = get_lexer_for_mimetype(lexer, stripnl=stripnl)
87 else:
88 lexer = get_lexer_by_name(lexer, stripnl=stripnl)
89 tokens = lexer.get_tokens(source)
90
91 def _generate(formatter=self.formatter, tagname=self.tagname,
92 pos=(None, -1, -1)):
93 tagname = self.tagname
94 attrs = prev_class = None
95
96 for ttype, value in tokens:
97 css_class = formatter._get_css_class(ttype)
98 if css_class == prev_class:
99 yield TEXT, value, pos
100
101 elif value: # if no value, leave old tag open
102 if attrs:
103 yield END, tagname, pos
104 if css_class:
105 if self.noclasses:
106 style = formatter.class2style[css_class][0]
107 attrs = Attrs([(QName('style'), style)])
108 else:
109 attrs = Attrs([(QName('class'), css_class)])
110 else:
111 attrs = Attrs()
112 prev_class = css_class
113 if attrs:
114 yield START, (tagname, attrs), pos
115 yield TEXT, value, pos
116
117 if attrs:
118 yield END, tagname, pos
119
120 return Stream(_coalesce(_generate()))
121
122 def lines(self, source, lexer=None, stripnl=False):
123 """Generate a markup stream for every line in the given code using the
124 specified Pygments lexer.
125
126 :param source: the source text to highlight as a string
127 :param lexer: the Pygments lexer to use; this can be either the name
128 of the lexer as a string, or a ``Lexer`` instance; if
129 `None`, an attempt is made to guess the lexer from the
130 file contents (for example using the shebang line)
131 :return: an iterator over markup streams, one for every line
132 :rtype: `generator`
133 """
134 buf = []
135 depth = 0
136 pos = (None, -1, -1)
137 tagname = self.tagname
138
139 def _split_lines(stream):
140 stack = []
141 def _reverse():
142 for event in reversed(stack):
143 yield END, event[1][0], event[2]
144
145 for kind, data, pos in stream:
146 if kind is TEXT:
147 lines = data.split('\n')
148 if lines:
149 # First element
150 line = lines.pop(0)
151 if line:
152 for e in stack:
153 yield e
154 yield kind, line, pos
155 for e in _reverse():
156 yield e
157 # Subsequent ones, prefix with \n
158 for line in lines:
159 yield TEXT, '\n', pos
160 if line:
161 for e in stack:
162 yield e
163 yield kind, line, pos
164 for e in _reverse():
165 yield e
166 else:
167 if kind is START:
168 stack.append((kind, data, pos))
169 elif kind is END:
170 stack.pop()
171 else:
172 yield kind, data, pos
173
174 buf = []
175 for kind, data, pos in _split_lines(self.generate(source, lexer=lexer,
176 stripnl=stripnl)):
177 if kind is TEXT and data == '\n':
178 yield Stream(buf[:])
179 del buf[:]
180 else:
181 buf.append((kind, data, pos))
182 if buf:
183 yield Stream(buf[:])
184

Download Raw Source

Comments

No comments so far.