# -*- coding: utf-8 -*-

import re

from genshi.core import Attrs, QName, Stream, START, END, TEXT
from genshi.input import _coalesce
from pygments.lexers import get_lexer_by_name, get_lexer_for_mimetype, \
                            guess_lexer
from pygments.formatters import get_formatter_by_name
from pygments.styles import get_style_by_name


class PygmentsHighlighter(object):
    """Adapter for generate Genshi streams from the Pygments syntax highlighter.
    
    >>> source = '''#!/usr/bin/python
    ... 
    ... def main():
    ...    print 'Hello, world')
    ... '''
    >>> stream = PygmentsAdapter().generate(source.strip())
    >>> print stream.render('html', strip_whitespace=False)
    <span class="c">#!/usr/bin/python</span>
    <BLANKLINE>
    <span class="k">def</span> <span class="nf">main</span><span class="p">():</span>
       <span class="k">print</span> <span class="s">'Hello, world'</span><span class="p">)</span>
    <BLANKLINE>
    
    This class could be used directly in a markup template as follows:
    
    >>> from genshi.template import MarkupTemplate
    >>> tmpl = MarkupTemplate('''<div>Code snippet:
    ... <pre class="code">
    ... ${highlight(code, 'python')}
    ... </pre></div>''')
    >>> adapter = PygmentsAdapter()
    >>> stream = tmpl.generate(code=source, highlight=adapter.generate)
    >>> print stream.render('html')
    <div>Code snippet:
    <pre class="code">
    <span class="c">#!/usr/bin/python</span>
    <BLANKLINE>
    <span class="k">def</span> <span class="nf">main</span><span class="p">():</span>
       <span class="k">print</span> <span class="s">'Hello, world'</span><span class="p">)</span>
    <BLANKLINE>
    </pre></div>
    """

    DEFAULT_TAG_NAME = QName('span')

    def __init__(self, style='default', noclasses=False, classprefix='',
                 tagname=DEFAULT_TAG_NAME):
        """Initialize the adapter.
        
        :param style: the Pygments style to use; either the name of the style as
                      a string, or a custom ``Style`` subclass
        :param noclasses: if set to `True`, token tags will not use inline
                          styles instead of CSS classes
        :param classprefix: a prefix string that is prepended to all the CSS
                            class names in the output
        :param tagname: the name of the tag used to wrap tokens
        """
        self.noclasses = noclasses
        if isinstance(style, basestring):
            style = get_style_by_name(style)
        self.formatter = get_formatter_by_name('html', style=style,
                                               classprefix=classprefix)
        self.tagname = tagname

    def generate(self, source, lexer=None, stripnl=False):
        """Generate a markup stream for the given code using the specified
        Pygments lexer.
        
        :param source: the source text to highlight as a string
        :param lexer: the Pygments lexer to use; this can be either the name
                      of the lexer as a string, or a ``Lexer`` instance; if
                      `None`, an attempt is made to guess the lexer from the
                      file contents (for example using the shebang line)
        :return: the markup stream
        :rtype: `Stream`
        """
        if lexer is None:
            lexer = guess_lexer(source, stripnl=stripnl)
        elif isinstance(lexer, basestring):
            if '/' in lexer:
                lexer = get_lexer_for_mimetype(lexer, stripnl=stripnl)
            else:
                lexer = get_lexer_by_name(lexer, stripnl=stripnl)
        tokens = lexer.get_tokens(source)

        def _generate(formatter=self.formatter, tagname=self.tagname,
                      pos=(None, -1, -1)):
            tagname = self.tagname
            attrs = prev_class = None

            for ttype, value in tokens:
                css_class = formatter._get_css_class(ttype)
                if css_class == prev_class:
                    yield TEXT, value, pos

                elif value: # if no value, leave old tag open
                    if attrs:
                        yield END, tagname, pos
                    if css_class:
                        if self.noclasses:
                            style = formatter.class2style[css_class][0]
                            attrs = Attrs([(QName('style'), style)])
                        else:
                            attrs = Attrs([(QName('class'), css_class)])
                    else:
                        attrs = Attrs()
                    prev_class = css_class
                    if attrs:
                        yield START, (tagname, attrs), pos
                    yield TEXT, value, pos

            if attrs:
                yield END, tagname, pos

        return Stream(_coalesce(_generate()))

    def lines(self, source, lexer=None, stripnl=False):
        """Generate a markup stream for every line in the given code using the
        specified Pygments lexer.
        
        :param source: the source text to highlight as a string
        :param lexer: the Pygments lexer to use; this can be either the name
                      of the lexer as a string, or a ``Lexer`` instance; if
                      `None`, an attempt is made to guess the lexer from the
                      file contents (for example using the shebang line)
        :return: an iterator over markup streams, one for every line
        :rtype: `generator`
        """
        buf = []
        depth = 0
        pos = (None, -1, -1)
        tagname = self.tagname

        def _split_lines(stream):
            stack = []
            def _reverse():
                for event in reversed(stack):
                    yield END, event[1][0], event[2]

            for kind, data, pos in stream:
                if kind is TEXT:
                    lines = data.split('\n')
                    if lines:
                        # First element
                        line = lines.pop(0)
                        if line:
                            for e in stack:
                                yield e
                            yield kind, line, pos
                            for e in _reverse():
                                yield e
                        # Subsequent ones, prefix with \n
                        for line in lines:
                            yield TEXT, '\n', pos
                            if line:
                                for e in stack:
                                    yield e
                                yield kind, line, pos
                                for e in _reverse():
                                    yield e
                else:
                    if kind is START:
                        stack.append((kind, data, pos))
                    elif kind is END:
                        stack.pop()
                    else:
                        yield kind, data, pos

        buf = []
        for kind, data, pos in _split_lines(self.generate(source, lexer=lexer,
                                                          stripnl=stripnl)):
            if kind is TEXT and data == '\n':
                yield Stream(buf[:])
                del buf[:]
            else:
                buf.append((kind, data, pos))
        if buf:
            yield Stream(buf[:])


