SickRage/lib/bs4/tests/test_html5lib.py

"""Tests to ensure that the html5lib tree builder generates good trees."""

import warnings

try:
    from bs4.builder import HTML5TreeBuilder
    HTML5LIB_PRESENT = True
except ImportError, e:
    HTML5LIB_PRESENT = False
from bs4.element import SoupStrainer
from bs4.testing import (
    HTML5TreeBuilderSmokeTest,
    SoupTest,
    skipIf,
)

@skipIf(
    not HTML5LIB_PRESENT,
    "html5lib seems not to be present, not testing its tree builder.")
class HTML5LibBuilderSmokeTest(SoupTest, HTML5TreeBuilderSmokeTest):
    """See ``HTML5TreeBuilderSmokeTest``."""

    @property
    def default_builder(self):
        return HTML5TreeBuilder()

    def test_soupstrainer(self):
        # The html5lib tree builder does not support SoupStrainers.
        strainer = SoupStrainer("b")
        markup = "<p>A <b>bold</b> statement.</p>"
        with warnings.catch_warnings(record=True) as w:
            soup = self.soup(markup, parse_only=strainer)
        self.assertEqual(
            soup.decode(), self.document_for(markup))

        self.assertTrue(
            "the html5lib tree builder doesn't support parse_only" in
            str(w[0].message))

    def test_correctly_nested_tables(self):
        """html5lib inserts <tbody> tags where other parsers don't."""
        markup = ('<table id="1">'
                  '<tr>'
                  "<td>Here's another table:"
                  '<table id="2">'
                  '<tr><td>foo</td></tr>'
                  '</table></td>')

        self.assertSoupEquals(
            markup,
            '<table id="1"><tbody><tr><td>Here\'s another table:'
            '<table id="2"><tbody><tr><td>foo</td></tr></tbody></table>'
            '</td></tr></tbody></table>')

        self.assertSoupEquals(
            "<table><thead><tr><td>Foo</td></tr></thead>"
            "<tbody><tr><td>Bar</td></tr></tbody>"
            "<tfoot><tr><td>Baz</td></tr></tfoot></table>")

    def test_xml_declaration_followed_by_doctype(self):
        markup = '''<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html>
<html>
  <head>
  </head>
  <body>
   <p>foo</p>
  </body>
</html>'''
        soup = self.soup(markup)
        # Verify that we can reach the <p> tag; this means the tree is connected.
        self.assertEqual(b"<p>foo</p>", soup.p.encode())

    def test_reparented_markup(self):
        markup = '<p><em>foo</p>\n<p>bar<a></a></em></p>'
        soup = self.soup(markup)
        self.assertEqual(u"<body><p><em>foo</em></p><em>\n</em><p><em>bar<a></a></em></p></body>", soup.body.decode())
        self.assertEqual(2, len(soup.find_all('p')))


    def test_reparented_markup_ends_with_whitespace(self):
        markup = '<p><em>foo</p>\n<p>bar<a></a></em></p>\n'
        soup = self.soup(markup)
        self.assertEqual(u"<body><p><em>foo</em></p><em>\n</em><p><em>bar<a></a></em></p>\n</body>", soup.body.decode())
        self.assertEqual(2, len(soup.find_all('p')))
Welcome to our SickBeard-TVRage Edition ... This version of SickBeard uses both TVDB and TVRage to search and gather it's series data from allowing you to now have access to and download shows that you couldn't before because of being locked into only what TheTVDB had to offer. Also this edition is based off the code we used in our XEM editon so it does come with scene numbering support as well as all the other features our XEM edition has to offer. Please before using this with your existing database (sickbeard.db) please make a backup copy of it and delete any other database files such as cache.db and failed.db if present, we HIGHLY recommend starting out with no database files at all to make this a fresh start but the choice is at your own risk! Enjoy! 2014-03-10 01:18:05 -04:00			`"""Tests to ensure that the html5lib tree builder generates good trees."""`

			`import warnings`

			`try:`
			`from bs4.builder import HTML5TreeBuilder`
			`HTML5LIB_PRESENT = True`
			`except ImportError, e:`
			`HTML5LIB_PRESENT = False`
			`from bs4.element import SoupStrainer`
			`from bs4.testing import (`
			`HTML5TreeBuilderSmokeTest,`
			`SoupTest,`
			`skipIf,`
			`)`

			`@skipIf(`
			`not HTML5LIB_PRESENT,`
			`"html5lib seems not to be present, not testing its tree builder.")`
			`class HTML5LibBuilderSmokeTest(SoupTest, HTML5TreeBuilderSmokeTest):`
			"""See ``HTML5TreeBuilderSmokeTest``."""

			`@property`
			`def default_builder(self):`
			`return HTML5TreeBuilder()`

			`def test_soupstrainer(self):`
			`# The html5lib tree builder does not support SoupStrainers.`
			`strainer = SoupStrainer("b")`
			`markup = "<p>A <b>bold</b> statement.</p>"`
			`with warnings.catch_warnings(record=True) as w:`
			`soup = self.soup(markup, parse_only=strainer)`
			`self.assertEqual(`
			`soup.decode(), self.document_for(markup))`

			`self.assertTrue(`
			`"the html5lib tree builder doesn't support parse_only" in`
			`str(w[0].message))`

			`def test_correctly_nested_tables(self):`
			`"""html5lib inserts <tbody> tags where other parsers don't."""`
			`markup = ('<table id="1">'`
			`'<tr>'`
			`"<td>Here's another table:"`
			`'<table id="2">'`
			`'<tr><td>foo</td></tr>'`
			`'</table></td>')`

			`self.assertSoupEquals(`
			`markup,`
			`'<table id="1"><tbody><tr><td>Here\'s another table:'`
			`'<table id="2"><tbody><tr><td>foo</td></tr></tbody></table>'`
			`'</td></tr></tbody></table>')`

			`self.assertSoupEquals(`
			`"<table><thead><tr><td>Foo</td></tr></thead>"`
			`"<tbody><tr><td>Bar</td></tr></tbody>"`
			`"<tfoot><tr><td>Baz</td></tr></tfoot></table>")`

			`def test_xml_declaration_followed_by_doctype(self):`
			`markup = '''<?xml version="1.0" encoding="utf-8"?>`
			`<!DOCTYPE html>`
			`<html>`
			`<head>`
			`</head>`
			`<body>`
			`<p>foo</p>`
			`</body>`
			`</html>'''`
			`soup = self.soup(markup)`
			`# Verify that we can reach the <p> tag; this means the tree is connected.`
			`self.assertEqual(b"<p>foo</p>", soup.p.encode())`

			`def test_reparented_markup(self):`
			`markup = '<p><em>foo</p>\n<p>bar<a></a></em></p>'`
			`soup = self.soup(markup)`
			`self.assertEqual(u"<body><p><em>foo</em></p><em>\n</em><p><em>bar<a></a></em></p></body>", soup.body.decode())`
			`self.assertEqual(2, len(soup.find_all('p')))`


			`def test_reparented_markup_ends_with_whitespace(self):`
			`markup = '<p><em>foo</p>\n<p>bar<a></a></em></p>\n'`
			`soup = self.soup(markup)`
			`self.assertEqual(u"<body><p><em>foo</em></p><em>\n</em><p><em>bar<a></a></em></p>\n</body>", soup.body.decode())`
			`self.assertEqual(2, len(soup.find_all('p')))`