Sophie

Sophie

distrib > Mageia > 6 > x86_64 > media > core-updates > by-pkgid > d5ca09083fa1e0650b386d1b93516003 > files > 213

python-lxml-docs-4.2.5-1.mga6.noarch.rpm

<?xml version="1.0" encoding="ascii"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
          "DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head>
  <title>lxml.html.clean</title>
  <link rel="stylesheet" href="epydoc.css" type="text/css" />
  <script type="text/javascript" src="epydoc.js"></script>
</head>

<body bgcolor="white" text="black" link="blue" vlink="#204080"
      alink="#204080">
<!-- ==================== NAVIGATION BAR ==================== -->
<table class="navbar" border="0" width="100%" cellpadding="0"
       bgcolor="#a0c0ff" cellspacing="0">
  <tr valign="middle">
  <!-- Home link -->
      <th>&nbsp;&nbsp;&nbsp;<a
        href="lxml-module.html">Home</a>&nbsp;&nbsp;&nbsp;</th>

  <!-- Tree link -->
      <th>&nbsp;&nbsp;&nbsp;<a
        href="module-tree.html">Trees</a>&nbsp;&nbsp;&nbsp;</th>

  <!-- Index link -->
      <th>&nbsp;&nbsp;&nbsp;<a
        href="identifier-index.html">Indices</a>&nbsp;&nbsp;&nbsp;</th>

  <!-- Help link -->
      <th>&nbsp;&nbsp;&nbsp;<a
        href="help.html">Help</a>&nbsp;&nbsp;&nbsp;</th>

  <!-- Project homepage -->
      <th class="navbar" align="right" width="100%">
        <table border="0" cellpadding="0" cellspacing="0">
          <tr><th class="navbar" align="center"
            ><a class="navbar" target="_top" href="/">lxml API</a></th>
          </tr></table></th>
  </tr>
</table>
<table width="100%" cellpadding="0" cellspacing="0">
  <tr valign="top">
    <td width="100%">
      <span class="breadcrumbs">
        <a href="lxml-module.html">Package&nbsp;lxml</a> ::
        <a href="lxml.html-module.html">Package&nbsp;html</a> ::
        Module&nbsp;clean
      </span>
    </td>
    <td>
      <table cellpadding="0" cellspacing="0">
        <!-- hide/show private -->
        <tr><td align="right"><span class="options">[<a href="javascript:void(0);" class="privatelink"
    onclick="toggle_private();">hide&nbsp;private</a>]</span></td></tr>
        <tr><td align="right"><span class="options"
            >[<a href="frames.html" target="_top">frames</a
            >]&nbsp;|&nbsp;<a href="lxml.html.clean-pysrc.html"
            target="_top">no&nbsp;frames</a>]</span></td></tr>
      </table>
    </td>
  </tr>
</table>
<h1 class="epydoc">Source Code for <a href="lxml.html.clean-module.html">Module lxml.html.clean</a></h1>
<pre class="py-src">
<a name="L1"></a><tt class="py-lineno">  1</tt>  <tt class="py-line"><tt class="py-docstring">"""A cleanup tool for HTML.</tt> </tt>
<a name="L2"></a><tt class="py-lineno">  2</tt>  <tt class="py-line"><tt class="py-docstring"></tt> </tt>
<a name="L3"></a><tt class="py-lineno">  3</tt>  <tt class="py-line"><tt class="py-docstring">Removes unwanted tags and content.  See the `Cleaner` class for</tt> </tt>
<a name="L4"></a><tt class="py-lineno">  4</tt>  <tt class="py-line"><tt class="py-docstring">details.</tt> </tt>
<a name="L5"></a><tt class="py-lineno">  5</tt>  <tt class="py-line"><tt class="py-docstring">"""</tt> </tt>
<a name="L6"></a><tt class="py-lineno">  6</tt>  <tt class="py-line"> </tt>
<a name="L7"></a><tt class="py-lineno">  7</tt>  <tt class="py-line"><tt class="py-keyword">import</tt> <tt class="py-name">re</tt> </tt>
<a name="L8"></a><tt class="py-lineno">  8</tt>  <tt class="py-line"><tt class="py-keyword">import</tt> <tt id="link-0" class="py-name" targets="Method lxml.etree.PyErrorLog.copy()=lxml.etree.PyErrorLog-class.html#copy,Method lxml.etree._BaseErrorLog.copy()=lxml.etree._BaseErrorLog-class.html#copy,Method lxml.etree._ErrorLog.copy()=lxml.etree._ErrorLog-class.html#copy,Method lxml.etree._IDDict.copy()=lxml.etree._IDDict-class.html#copy,Method lxml.etree._ListErrorLog.copy()=lxml.etree._ListErrorLog-class.html#copy,Function lxml.tests.selftest2.copy()=lxml.tests.selftest2-module.html#copy"><a title="lxml.etree.PyErrorLog.copy
lxml.etree._BaseErrorLog.copy
lxml.etree._ErrorLog.copy
lxml.etree._IDDict.copy
lxml.etree._ListErrorLog.copy
lxml.tests.selftest2.copy" class="py-name" href="#" onclick="return doclink('link-0', 'copy', 'link-0');">copy</a></tt> </tt>
<a name="L9"></a><tt class="py-lineno">  9</tt>  <tt class="py-line"><tt class="py-keyword">try</tt><tt class="py-op">:</tt> </tt>
<a name="L10"></a><tt class="py-lineno"> 10</tt>  <tt class="py-line">    <tt class="py-keyword">from</tt> <tt class="py-name">urlparse</tt> <tt class="py-keyword">import</tt> <tt class="py-name">urlsplit</tt> </tt>
<a name="L11"></a><tt class="py-lineno"> 11</tt>  <tt class="py-line">    <tt class="py-keyword">from</tt> <tt class="py-name">urllib</tt> <tt class="py-keyword">import</tt> <tt class="py-name">unquote_plus</tt> </tt>
<a name="L12"></a><tt class="py-lineno"> 12</tt>  <tt class="py-line"><tt class="py-keyword">except</tt> <tt class="py-name">ImportError</tt><tt class="py-op">:</tt> </tt>
<a name="L13"></a><tt class="py-lineno"> 13</tt>  <tt class="py-line">    <tt class="py-comment"># Python 3</tt> </tt>
<a name="L14"></a><tt class="py-lineno"> 14</tt>  <tt class="py-line">    <tt class="py-keyword">from</tt> <tt class="py-name">urllib</tt><tt class="py-op">.</tt><tt id="link-1" class="py-name" targets="Method lxml.etree._ElementTree.parse()=lxml.etree._ElementTree-class.html#parse,Function lxml.etree.parse()=lxml.etree-module.html#parse,Function lxml.html.ElementSoup.parse()=lxml.html.ElementSoup-module.html#parse,Function lxml.html.html5parser.parse()=lxml.html.html5parser-module.html#parse,Function lxml.html.soupparser.parse()=lxml.html.soupparser-module.html#parse,Function lxml.objectify.parse()=lxml.objectify-module.html#parse,Method lxml.tests.common_imports.HelperTestCase.parse()=lxml.tests.common_imports.HelperTestCase-class.html#parse"><a title="lxml.etree._ElementTree.parse
lxml.etree.parse
lxml.html.ElementSoup.parse
lxml.html.html5parser.parse
lxml.html.soupparser.parse
lxml.objectify.parse
lxml.tests.common_imports.HelperTestCase.parse" class="py-name" href="#" onclick="return doclink('link-1', 'parse', 'link-1');">parse</a></tt> <tt class="py-keyword">import</tt> <tt class="py-name">urlsplit</tt><tt class="py-op">,</tt> <tt class="py-name">unquote_plus</tt> </tt>
<a name="L15"></a><tt class="py-lineno"> 15</tt>  <tt class="py-line"><tt class="py-keyword">from</tt> <tt id="link-2" class="py-name" targets="Package lxml=lxml-module.html"><a title="lxml" class="py-name" href="#" onclick="return doclink('link-2', 'lxml', 'link-2');">lxml</a></tt> <tt class="py-keyword">import</tt> <tt id="link-3" class="py-name" targets="Module lxml.etree=lxml.etree-module.html,Variable lxml.sax.ElementTreeContentHandler.etree=lxml.sax.ElementTreeContentHandler-class.html#etree,Variable lxml.tests.test_elementtree.CElementTreeTestCase.etree=lxml.tests.test_elementtree.CElementTreeTestCase-class.html#etree,Variable lxml.tests.test_elementtree._ETreeTestCaseBase.etree=lxml.tests.test_elementtree._ETreeTestCaseBase-class.html#etree,Variable lxml.tests.test_elementtree._XMLPullParserTest.etree=lxml.tests.test_elementtree._XMLPullParserTest-class.html#etree,Variable lxml.tests.test_io._IOTestCaseBase.etree=lxml.tests.test_io._IOTestCaseBase-class.html#etree"><a title="lxml.etree
lxml.sax.ElementTreeContentHandler.etree
lxml.tests.test_elementtree.CElementTreeTestCase.etree
lxml.tests.test_elementtree._ETreeTestCaseBase.etree
lxml.tests.test_elementtree._XMLPullParserTest.etree
lxml.tests.test_io._IOTestCaseBase.etree" class="py-name" href="#" onclick="return doclink('link-3', 'etree', 'link-3');">etree</a></tt> </tt>
<a name="L16"></a><tt class="py-lineno"> 16</tt>  <tt class="py-line"><tt class="py-keyword">from</tt> <tt id="link-4" class="py-name"><a title="lxml" class="py-name" href="#" onclick="return doclink('link-4', 'lxml', 'link-2');">lxml</a></tt><tt class="py-op">.</tt><tt id="link-5" class="py-name" targets="Package lxml.html=lxml.html-module.html,Method lxml.html.diff.href_token.html()=lxml.html.diff.href_token-class.html#html,Method lxml.html.diff.tag_token.html()=lxml.html.diff.tag_token-class.html#html,Method lxml.html.diff.token.html()=lxml.html.diff.token-class.html#html"><a title="lxml.html
lxml.html.diff.href_token.html
lxml.html.diff.tag_token.html
lxml.html.diff.token.html" class="py-name" href="#" onclick="return doclink('link-5', 'html', 'link-5');">html</a></tt> <tt class="py-keyword">import</tt> <tt id="link-6" class="py-name" targets="Module lxml.html.defs=lxml.html.defs-module.html"><a title="lxml.html.defs" class="py-name" href="#" onclick="return doclink('link-6', 'defs', 'link-6');">defs</a></tt> </tt>
<a name="L17"></a><tt class="py-lineno"> 17</tt>  <tt class="py-line"><tt class="py-keyword">from</tt> <tt id="link-7" class="py-name"><a title="lxml" class="py-name" href="#" onclick="return doclink('link-7', 'lxml', 'link-2');">lxml</a></tt><tt class="py-op">.</tt><tt id="link-8" class="py-name"><a title="lxml.html
lxml.html.diff.href_token.html
lxml.html.diff.tag_token.html
lxml.html.diff.token.html" class="py-name" href="#" onclick="return doclink('link-8', 'html', 'link-5');">html</a></tt> <tt class="py-keyword">import</tt> <tt id="link-9" class="py-name" targets="Function lxml.etree.fromstring()=lxml.etree-module.html#fromstring,Function lxml.html.html5parser.fromstring()=lxml.html.html5parser-module.html#fromstring,Function lxml.html.soupparser.fromstring()=lxml.html.soupparser-module.html#fromstring,Function lxml.objectify.fromstring()=lxml.objectify-module.html#fromstring"><a title="lxml.etree.fromstring
lxml.html.html5parser.fromstring
lxml.html.soupparser.fromstring
lxml.objectify.fromstring" class="py-name" href="#" onclick="return doclink('link-9', 'fromstring', 'link-9');">fromstring</a></tt><tt class="py-op">,</tt> <tt id="link-10" class="py-name" targets="Variable lxml.html.XHTML_NAMESPACE=lxml.html-module.html#XHTML_NAMESPACE"><a title="lxml.html.XHTML_NAMESPACE" class="py-name" href="#" onclick="return doclink('link-10', 'XHTML_NAMESPACE', 'link-10');">XHTML_NAMESPACE</a></tt> </tt>
<a name="L18"></a><tt class="py-lineno"> 18</tt>  <tt class="py-line"><tt class="py-keyword">from</tt> <tt id="link-11" class="py-name"><a title="lxml" class="py-name" href="#" onclick="return doclink('link-11', 'lxml', 'link-2');">lxml</a></tt><tt class="py-op">.</tt><tt id="link-12" class="py-name"><a title="lxml.html
lxml.html.diff.href_token.html
lxml.html.diff.tag_token.html
lxml.html.diff.token.html" class="py-name" href="#" onclick="return doclink('link-12', 'html', 'link-5');">html</a></tt> <tt class="py-keyword">import</tt> <tt class="py-name">xhtml_to_html</tt><tt class="py-op">,</tt> <tt class="py-name">_transform_result</tt> </tt>
<a name="L19"></a><tt class="py-lineno"> 19</tt>  <tt class="py-line"> </tt>
<a name="L20"></a><tt class="py-lineno"> 20</tt>  <tt class="py-line"><tt class="py-keyword">try</tt><tt class="py-op">:</tt> </tt>
<a name="L21"></a><tt class="py-lineno"> 21</tt>  <tt class="py-line">    <tt id="link-13" class="py-name" targets="Function lxml.html.clean.unichr()=lxml.html.clean-module.html#unichr"><a title="lxml.html.clean.unichr" class="py-name" href="#" onclick="return doclink('link-13', 'unichr', 'link-13');">unichr</a></tt> </tt>
<a name="L22"></a><tt class="py-lineno"> 22</tt>  <tt class="py-line"><tt class="py-keyword">except</tt> <tt class="py-name">NameError</tt><tt class="py-op">:</tt> </tt>
<a name="L23"></a><tt class="py-lineno"> 23</tt>  <tt class="py-line">    <tt class="py-comment"># Python 3</tt> </tt>
<a name="L24"></a><tt class="py-lineno"> 24</tt>  <tt class="py-line">    <tt id="link-14" class="py-name"><a title="lxml.html.clean.unichr" class="py-name" href="#" onclick="return doclink('link-14', 'unichr', 'link-13');">unichr</a></tt> <tt class="py-op">=</tt> <tt class="py-name">chr</tt> </tt>
<a name="L25"></a><tt class="py-lineno"> 25</tt>  <tt class="py-line"><tt class="py-keyword">try</tt><tt class="py-op">:</tt> </tt>
<a name="L26"></a><tt class="py-lineno"> 26</tt>  <tt class="py-line">    <tt class="py-name">unicode</tt> </tt>
<a name="L27"></a><tt class="py-lineno"> 27</tt>  <tt class="py-line"><tt class="py-keyword">except</tt> <tt class="py-name">NameError</tt><tt class="py-op">:</tt> </tt>
<a name="L28"></a><tt class="py-lineno"> 28</tt>  <tt class="py-line">    <tt class="py-comment"># Python 3</tt> </tt>
<a name="L29"></a><tt class="py-lineno"> 29</tt>  <tt class="py-line">    <tt class="py-name">unicode</tt> <tt class="py-op">=</tt> <tt id="link-15" class="py-name" targets="Class str=str-class.html"><a title="str" class="py-name" href="#" onclick="return doclink('link-15', 'str', 'link-15');">str</a></tt> </tt>
<a name="L30"></a><tt class="py-lineno"> 30</tt>  <tt class="py-line"><tt class="py-keyword">try</tt><tt class="py-op">:</tt> </tt>
<a name="L31"></a><tt class="py-lineno"> 31</tt>  <tt class="py-line">    <tt class="py-name">bytes</tt> </tt>
<a name="L32"></a><tt class="py-lineno"> 32</tt>  <tt class="py-line"><tt class="py-keyword">except</tt> <tt class="py-name">NameError</tt><tt class="py-op">:</tt> </tt>
<a name="L33"></a><tt class="py-lineno"> 33</tt>  <tt class="py-line">    <tt class="py-comment"># Python &lt; 2.6</tt> </tt>
<a name="L34"></a><tt class="py-lineno"> 34</tt>  <tt class="py-line">    <tt class="py-name">bytes</tt> <tt class="py-op">=</tt> <tt id="link-16" class="py-name"><a title="str" class="py-name" href="#" onclick="return doclink('link-16', 'str', 'link-15');">str</a></tt> </tt>
<a name="L35"></a><tt class="py-lineno"> 35</tt>  <tt class="py-line"><tt class="py-keyword">try</tt><tt class="py-op">:</tt> </tt>
<a name="L36"></a><tt class="py-lineno"> 36</tt>  <tt class="py-line">    <tt id="link-17" class="py-name" targets="Variable lxml.html.clean.basestring=lxml.html.clean-module.html#basestring"><a title="lxml.html.clean.basestring" class="py-name" href="#" onclick="return doclink('link-17', 'basestring', 'link-17');">basestring</a></tt> </tt>
<a name="L37"></a><tt class="py-lineno"> 37</tt>  <tt class="py-line"><tt class="py-keyword">except</tt> <tt class="py-name">NameError</tt><tt class="py-op">:</tt> </tt>
<a name="L38"></a><tt class="py-lineno"> 38</tt>  <tt class="py-line">    <tt id="link-18" class="py-name"><a title="lxml.html.clean.basestring" class="py-name" href="#" onclick="return doclink('link-18', 'basestring', 'link-17');">basestring</a></tt> <tt class="py-op">=</tt> <tt class="py-op">(</tt><tt id="link-19" class="py-name"><a title="str" class="py-name" href="#" onclick="return doclink('link-19', 'str', 'link-15');">str</a></tt><tt class="py-op">,</tt> <tt class="py-name">bytes</tt><tt class="py-op">)</tt> </tt>
<a name="L39"></a><tt class="py-lineno"> 39</tt>  <tt class="py-line"> </tt>
<a name="L40"></a><tt class="py-lineno"> 40</tt>  <tt class="py-line"> </tt>
<a name="L41"></a><tt class="py-lineno"> 41</tt>  <tt class="py-line"><tt class="py-name">__all__</tt> <tt class="py-op">=</tt> <tt class="py-op">[</tt><tt class="py-string">'clean_html'</tt><tt class="py-op">,</tt> <tt class="py-string">'clean'</tt><tt class="py-op">,</tt> <tt class="py-string">'Cleaner'</tt><tt class="py-op">,</tt> <tt class="py-string">'autolink'</tt><tt class="py-op">,</tt> <tt class="py-string">'autolink_html'</tt><tt class="py-op">,</tt> </tt>
<a name="L42"></a><tt class="py-lineno"> 42</tt>  <tt class="py-line">           <tt class="py-string">'word_break'</tt><tt class="py-op">,</tt> <tt class="py-string">'word_break_html'</tt><tt class="py-op">]</tt> </tt>
<a name="L43"></a><tt class="py-lineno"> 43</tt>  <tt class="py-line"> </tt>
<a name="L44"></a><tt class="py-lineno"> 44</tt>  <tt class="py-line"><tt class="py-comment"># Look at http://code.sixapart.com/trac/livejournal/browser/trunk/cgi-bin/cleanhtml.pl</tt> </tt>
<a name="L45"></a><tt class="py-lineno"> 45</tt>  <tt class="py-line"><tt class="py-comment">#   Particularly the CSS cleaning; most of the tag cleaning is integrated now</tt> </tt>
<a name="L46"></a><tt class="py-lineno"> 46</tt>  <tt class="py-line"><tt class="py-comment"># I have multiple kinds of schemes searched; but should schemes be</tt> </tt>
<a name="L47"></a><tt class="py-lineno"> 47</tt>  <tt class="py-line"><tt class="py-comment">#   whitelisted instead?</tt> </tt>
<a name="L48"></a><tt class="py-lineno"> 48</tt>  <tt class="py-line"><tt class="py-comment"># max height?</tt> </tt>
<a name="L49"></a><tt class="py-lineno"> 49</tt>  <tt class="py-line"><tt class="py-comment"># remove images?  Also in CSS?  background attribute?</tt> </tt>
<a name="L50"></a><tt class="py-lineno"> 50</tt>  <tt class="py-line"><tt class="py-comment"># Some way to whitelist object, iframe, etc (e.g., if you want to</tt> </tt>
<a name="L51"></a><tt class="py-lineno"> 51</tt>  <tt class="py-line"><tt class="py-comment">#   allow *just* embedded YouTube movies)</tt> </tt>
<a name="L52"></a><tt class="py-lineno"> 52</tt>  <tt class="py-line"><tt class="py-comment"># Log what was deleted and why?</tt> </tt>
<a name="L53"></a><tt class="py-lineno"> 53</tt>  <tt class="py-line"><tt class="py-comment"># style="behavior: ..." might be bad in IE?</tt> </tt>
<a name="L54"></a><tt class="py-lineno"> 54</tt>  <tt class="py-line"><tt class="py-comment"># Should we have something for just &lt;meta http-equiv&gt;?  That's the worst of the</tt> </tt>
<a name="L55"></a><tt class="py-lineno"> 55</tt>  <tt class="py-line"><tt class="py-comment">#   metas.</tt> </tt>
<a name="L56"></a><tt class="py-lineno"> 56</tt>  <tt class="py-line"><tt class="py-comment"># UTF-7 detections?  Example:</tt> </tt>
<a name="L57"></a><tt class="py-lineno"> 57</tt>  <tt class="py-line"><tt class="py-comment">#     &lt;HEAD&gt;&lt;META HTTP-EQUIV="CONTENT-TYPE" CONTENT="text/html; charset=UTF-7"&gt; &lt;/HEAD&gt;+ADw-SCRIPT+AD4-alert('XSS');+ADw-/SCRIPT+AD4-</tt> </tt>
<a name="L58"></a><tt class="py-lineno"> 58</tt>  <tt class="py-line"><tt class="py-comment">#   you don't always have to have the charset set, if the page has no charset</tt> </tt>
<a name="L59"></a><tt class="py-lineno"> 59</tt>  <tt class="py-line"><tt class="py-comment">#   and there's UTF7-like code in it.</tt> </tt>
<a name="L60"></a><tt class="py-lineno"> 60</tt>  <tt class="py-line"><tt class="py-comment"># Look at these tests: http://htmlpurifier.org/live/smoketests/xssAttacks.php</tt> </tt>
<a name="L61"></a><tt class="py-lineno"> 61</tt>  <tt class="py-line"> </tt>
<a name="L62"></a><tt class="py-lineno"> 62</tt>  <tt class="py-line"> </tt>
<a name="L63"></a><tt class="py-lineno"> 63</tt>  <tt class="py-line"><tt class="py-comment"># This is an IE-specific construct you can have in a stylesheet to</tt> </tt>
<a name="L64"></a><tt class="py-lineno"> 64</tt>  <tt class="py-line"><tt class="py-comment"># run some Javascript:</tt> </tt>
<a name="L65"></a><tt class="py-lineno"> 65</tt>  <tt class="py-line"><tt id="link-20" class="py-name" targets="Variable lxml.html.clean._css_javascript_re=lxml.html.clean-module.html#_css_javascript_re"><a title="lxml.html.clean._css_javascript_re" class="py-name" href="#" onclick="return doclink('link-20', '_css_javascript_re', 'link-20');">_css_javascript_re</a></tt> <tt class="py-op">=</tt> <tt class="py-name">re</tt><tt class="py-op">.</tt><tt class="py-name">compile</tt><tt class="py-op">(</tt> </tt>
<a name="L66"></a><tt class="py-lineno"> 66</tt>  <tt class="py-line">    <tt class="py-string">r'expression\s*\(.*?\)'</tt><tt class="py-op">,</tt> <tt class="py-name">re</tt><tt class="py-op">.</tt><tt id="link-21" class="py-name" targets="Variable lxml.html.builder.S=lxml.html.builder-module.html#S"><a title="lxml.html.builder.S" class="py-name" href="#" onclick="return doclink('link-21', 'S', 'link-21');">S</a></tt><tt class="py-op">|</tt><tt class="py-name">re</tt><tt class="py-op">.</tt><tt id="link-22" class="py-name" targets="Variable lxml.html.builder.I=lxml.html.builder-module.html#I"><a title="lxml.html.builder.I" class="py-name" href="#" onclick="return doclink('link-22', 'I', 'link-22');">I</a></tt><tt class="py-op">)</tt> </tt>
<a name="L67"></a><tt class="py-lineno"> 67</tt>  <tt class="py-line"> </tt>
<a name="L68"></a><tt class="py-lineno"> 68</tt>  <tt class="py-line"><tt class="py-comment"># Do I have to worry about @\nimport?</tt> </tt>
<a name="L69"></a><tt class="py-lineno"> 69</tt>  <tt class="py-line"><tt id="link-23" class="py-name" targets="Variable lxml.html.clean._css_import_re=lxml.html.clean-module.html#_css_import_re"><a title="lxml.html.clean._css_import_re" class="py-name" href="#" onclick="return doclink('link-23', '_css_import_re', 'link-23');">_css_import_re</a></tt> <tt class="py-op">=</tt> <tt class="py-name">re</tt><tt class="py-op">.</tt><tt class="py-name">compile</tt><tt class="py-op">(</tt> </tt>
<a name="L70"></a><tt class="py-lineno"> 70</tt>  <tt class="py-line">    <tt class="py-string">r'@\s*import'</tt><tt class="py-op">,</tt> <tt class="py-name">re</tt><tt class="py-op">.</tt><tt id="link-24" class="py-name"><a title="lxml.html.builder.I" class="py-name" href="#" onclick="return doclink('link-24', 'I', 'link-22');">I</a></tt><tt class="py-op">)</tt> </tt>
<a name="L71"></a><tt class="py-lineno"> 71</tt>  <tt class="py-line"> </tt>
<a name="L72"></a><tt class="py-lineno"> 72</tt>  <tt class="py-line"><tt class="py-comment"># All kinds of schemes besides just javascript: that can cause</tt> </tt>
<a name="L73"></a><tt class="py-lineno"> 73</tt>  <tt class="py-line"><tt class="py-comment"># execution:</tt> </tt>
<a name="L74"></a><tt class="py-lineno"> 74</tt>  <tt class="py-line"><tt id="link-25" class="py-name" targets="Function lxml.html.clean._is_image_dataurl()=lxml.html.clean-module.html#_is_image_dataurl"><a title="lxml.html.clean._is_image_dataurl" class="py-name" href="#" onclick="return doclink('link-25', '_is_image_dataurl', 'link-25');">_is_image_dataurl</a></tt> <tt class="py-op">=</tt> <tt class="py-name">re</tt><tt class="py-op">.</tt><tt class="py-name">compile</tt><tt class="py-op">(</tt> </tt>
<a name="L75"></a><tt class="py-lineno"> 75</tt>  <tt class="py-line">    <tt class="py-string">r'^data:image/.+;base64'</tt><tt class="py-op">,</tt> <tt class="py-name">re</tt><tt class="py-op">.</tt><tt id="link-26" class="py-name"><a title="lxml.html.builder.I" class="py-name" href="#" onclick="return doclink('link-26', 'I', 'link-22');">I</a></tt><tt class="py-op">)</tt><tt class="py-op">.</tt><tt class="py-name">search</tt> </tt>
<a name="L76"></a><tt class="py-lineno"> 76</tt>  <tt class="py-line"><tt id="link-27" class="py-name" targets="Function lxml.html.clean._is_possibly_malicious_scheme()=lxml.html.clean-module.html#_is_possibly_malicious_scheme"><a title="lxml.html.clean._is_possibly_malicious_scheme" class="py-name" href="#" onclick="return doclink('link-27', '_is_possibly_malicious_scheme', 'link-27');">_is_possibly_malicious_scheme</a></tt> <tt class="py-op">=</tt> <tt class="py-name">re</tt><tt class="py-op">.</tt><tt class="py-name">compile</tt><tt class="py-op">(</tt> </tt>
<a name="L77"></a><tt class="py-lineno"> 77</tt>  <tt class="py-line">    <tt class="py-string">r'(?:javascript|jscript|livescript|vbscript|data|about|mocha):'</tt><tt class="py-op">,</tt> </tt>
<a name="L78"></a><tt class="py-lineno"> 78</tt>  <tt class="py-line">    <tt class="py-name">re</tt><tt class="py-op">.</tt><tt id="link-28" class="py-name"><a title="lxml.html.builder.I" class="py-name" href="#" onclick="return doclink('link-28', 'I', 'link-22');">I</a></tt><tt class="py-op">)</tt><tt class="py-op">.</tt><tt class="py-name">search</tt> </tt>
<a name="_is_javascript_scheme"></a><div id="_is_javascript_scheme-def"><a name="L79"></a><tt class="py-lineno"> 79</tt> <a class="py-toggle" href="#" id="_is_javascript_scheme-toggle" onclick="return toggle('_is_javascript_scheme');">-</a><tt class="py-line"><tt class="py-keyword">def</tt> <a class="py-def-name" href="lxml.html.clean-module.html#_is_javascript_scheme">_is_javascript_scheme</a><tt class="py-op">(</tt><tt class="py-param">s</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
</div><div id="_is_javascript_scheme-collapsed" style="display:none;" pad="+++" indent="++++"></div><div id="_is_javascript_scheme-expanded"><a name="L80"></a><tt class="py-lineno"> 80</tt>  <tt class="py-line">    <tt class="py-keyword">if</tt> <tt id="link-29" class="py-name"><a title="lxml.html.clean._is_image_dataurl" class="py-name" href="#" onclick="return doclink('link-29', '_is_image_dataurl', 'link-25');">_is_image_dataurl</a></tt><tt class="py-op">(</tt><tt class="py-name">s</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
<a name="L81"></a><tt class="py-lineno"> 81</tt>  <tt class="py-line">        <tt class="py-keyword">return</tt> <tt class="py-name">None</tt> </tt>
<a name="L82"></a><tt class="py-lineno"> 82</tt>  <tt class="py-line">    <tt class="py-keyword">return</tt> <tt id="link-30" class="py-name"><a title="lxml.html.clean._is_possibly_malicious_scheme" class="py-name" href="#" onclick="return doclink('link-30', '_is_possibly_malicious_scheme', 'link-27');">_is_possibly_malicious_scheme</a></tt><tt class="py-op">(</tt><tt class="py-name">s</tt><tt class="py-op">)</tt> </tt>
</div><a name="L83"></a><tt class="py-lineno"> 83</tt>  <tt class="py-line"> </tt>
<a name="L84"></a><tt class="py-lineno"> 84</tt>  <tt class="py-line"><tt id="link-31" class="py-name" targets="Function lxml.html.clean._substitute_whitespace()=lxml.html.clean-module.html#_substitute_whitespace"><a title="lxml.html.clean._substitute_whitespace" class="py-name" href="#" onclick="return doclink('link-31', '_substitute_whitespace', 'link-31');">_substitute_whitespace</a></tt> <tt class="py-op">=</tt> <tt class="py-name">re</tt><tt class="py-op">.</tt><tt class="py-name">compile</tt><tt class="py-op">(</tt><tt class="py-string">r'[\s\x00-\x08\x0B\x0C\x0E-\x19]+'</tt><tt class="py-op">)</tt><tt class="py-op">.</tt><tt class="py-name">sub</tt> </tt>
<a name="L85"></a><tt class="py-lineno"> 85</tt>  <tt class="py-line"><tt class="py-comment"># FIXME: should data: be blocked?</tt> </tt>
<a name="L86"></a><tt class="py-lineno"> 86</tt>  <tt class="py-line"> </tt>
<a name="L87"></a><tt class="py-lineno"> 87</tt>  <tt class="py-line"><tt class="py-comment"># FIXME: check against: http://msdn2.microsoft.com/en-us/library/ms537512.aspx</tt> </tt>
<a name="L88"></a><tt class="py-lineno"> 88</tt>  <tt class="py-line"><tt id="link-32" class="py-name" targets="Variable lxml.html.clean._conditional_comment_re=lxml.html.clean-module.html#_conditional_comment_re"><a title="lxml.html.clean._conditional_comment_re" class="py-name" href="#" onclick="return doclink('link-32', '_conditional_comment_re', 'link-32');">_conditional_comment_re</a></tt> <tt class="py-op">=</tt> <tt class="py-name">re</tt><tt class="py-op">.</tt><tt class="py-name">compile</tt><tt class="py-op">(</tt> </tt>
<a name="L89"></a><tt class="py-lineno"> 89</tt>  <tt class="py-line">    <tt class="py-string">r'\[if[\s\n\r]+.*?][\s\n\r]*&gt;'</tt><tt class="py-op">,</tt> <tt class="py-name">re</tt><tt class="py-op">.</tt><tt id="link-33" class="py-name"><a title="lxml.html.builder.I" class="py-name" href="#" onclick="return doclink('link-33', 'I', 'link-22');">I</a></tt><tt class="py-op">|</tt><tt class="py-name">re</tt><tt class="py-op">.</tt><tt id="link-34" class="py-name"><a title="lxml.html.builder.S" class="py-name" href="#" onclick="return doclink('link-34', 'S', 'link-21');">S</a></tt><tt class="py-op">)</tt> </tt>
<a name="L90"></a><tt class="py-lineno"> 90</tt>  <tt class="py-line"> </tt>
<a name="L91"></a><tt class="py-lineno"> 91</tt>  <tt class="py-line"><tt id="link-35" class="py-name" targets="Variable lxml.html.clean._find_styled_elements=lxml.html.clean-module.html#_find_styled_elements"><a title="lxml.html.clean._find_styled_elements" class="py-name" href="#" onclick="return doclink('link-35', '_find_styled_elements', 'link-35');">_find_styled_elements</a></tt> <tt class="py-op">=</tt> <tt id="link-36" class="py-name"><a title="lxml.etree
lxml.sax.ElementTreeContentHandler.etree
lxml.tests.test_elementtree.CElementTreeTestCase.etree
lxml.tests.test_elementtree._ETreeTestCaseBase.etree
lxml.tests.test_elementtree._XMLPullParserTest.etree
lxml.tests.test_io._IOTestCaseBase.etree" class="py-name" href="#" onclick="return doclink('link-36', 'etree', 'link-3');">etree</a></tt><tt class="py-op">.</tt><tt id="link-37" class="py-name" targets="Class lxml.etree.XPath=lxml.etree.XPath-class.html"><a title="lxml.etree.XPath" class="py-name" href="#" onclick="return doclink('link-37', 'XPath', 'link-37');">XPath</a></tt><tt class="py-op">(</tt> </tt>
<a name="L92"></a><tt class="py-lineno"> 92</tt>  <tt class="py-line">    <tt class="py-string">"descendant-or-self::*[@style]"</tt><tt class="py-op">)</tt> </tt>
<a name="L93"></a><tt class="py-lineno"> 93</tt>  <tt class="py-line"> </tt>
<a name="L94"></a><tt class="py-lineno"> 94</tt>  <tt class="py-line"><tt id="link-38" class="py-name" targets="Variable lxml.html.clean._find_external_links=lxml.html.clean-module.html#_find_external_links"><a title="lxml.html.clean._find_external_links" class="py-name" href="#" onclick="return doclink('link-38', '_find_external_links', 'link-38');">_find_external_links</a></tt> <tt class="py-op">=</tt> <tt id="link-39" class="py-name"><a title="lxml.etree
lxml.sax.ElementTreeContentHandler.etree
lxml.tests.test_elementtree.CElementTreeTestCase.etree
lxml.tests.test_elementtree._ETreeTestCaseBase.etree
lxml.tests.test_elementtree._XMLPullParserTest.etree
lxml.tests.test_io._IOTestCaseBase.etree" class="py-name" href="#" onclick="return doclink('link-39', 'etree', 'link-3');">etree</a></tt><tt class="py-op">.</tt><tt id="link-40" class="py-name"><a title="lxml.etree.XPath" class="py-name" href="#" onclick="return doclink('link-40', 'XPath', 'link-37');">XPath</a></tt><tt class="py-op">(</tt> </tt>
<a name="L95"></a><tt class="py-lineno"> 95</tt>  <tt class="py-line">    <tt class="py-op">(</tt><tt class="py-string">"descendant-or-self::a  [normalize-space(@href) and substring(normalize-space(@href),1,1) != '#'] |"</tt> </tt>
<a name="L96"></a><tt class="py-lineno"> 96</tt>  <tt class="py-line">     <tt class="py-string">"descendant-or-self::x:a[normalize-space(@href) and substring(normalize-space(@href),1,1) != '#']"</tt><tt class="py-op">)</tt><tt class="py-op">,</tt> </tt>
<a name="L97"></a><tt class="py-lineno"> 97</tt>  <tt class="py-line">    <tt class="py-name">namespaces</tt><tt class="py-op">=</tt><tt class="py-op">{</tt><tt class="py-string">'x'</tt><tt class="py-op">:</tt><tt id="link-41" class="py-name"><a title="lxml.html.XHTML_NAMESPACE" class="py-name" href="#" onclick="return doclink('link-41', 'XHTML_NAMESPACE', 'link-10');">XHTML_NAMESPACE</a></tt><tt class="py-op">}</tt><tt class="py-op">)</tt> </tt>
<a name="L98"></a><tt class="py-lineno"> 98</tt>  <tt class="py-line"> </tt>
<a name="L99"></a><tt class="py-lineno"> 99</tt>  <tt class="py-line"> </tt>
<a name="Cleaner"></a><div id="Cleaner-def"><a name="L100"></a><tt class="py-lineno">100</tt> <a class="py-toggle" href="#" id="Cleaner-toggle" onclick="return toggle('Cleaner');">-</a><tt class="py-line"><tt class="py-keyword">class</tt> <a class="py-def-name" href="lxml.html.clean.Cleaner-class.html">Cleaner</a><tt class="py-op">(</tt><tt class="py-base-class">object</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
</div><div id="Cleaner-collapsed" style="display:none;" pad="+++" indent="++++"></div><div id="Cleaner-expanded"><a name="L101"></a><tt class="py-lineno">101</tt>  <tt class="py-line">    <tt class="py-docstring">"""</tt> </tt>
<a name="L102"></a><tt class="py-lineno">102</tt>  <tt class="py-line"><tt class="py-docstring">    Instances cleans the document of each of the possible offending</tt> </tt>
<a name="L103"></a><tt class="py-lineno">103</tt>  <tt class="py-line"><tt class="py-docstring">    elements.  The cleaning is controlled by attributes; you can</tt> </tt>
<a name="L104"></a><tt class="py-lineno">104</tt>  <tt class="py-line"><tt class="py-docstring">    override attributes in a subclass, or set them in the constructor.</tt> </tt>
<a name="L105"></a><tt class="py-lineno">105</tt>  <tt class="py-line"><tt class="py-docstring"></tt> </tt>
<a name="L106"></a><tt class="py-lineno">106</tt>  <tt class="py-line"><tt class="py-docstring">    ``scripts``:</tt> </tt>
<a name="L107"></a><tt class="py-lineno">107</tt>  <tt class="py-line"><tt class="py-docstring">        Removes any ``&lt;script&gt;`` tags.</tt> </tt>
<a name="L108"></a><tt class="py-lineno">108</tt>  <tt class="py-line"><tt class="py-docstring"></tt> </tt>
<a name="L109"></a><tt class="py-lineno">109</tt>  <tt class="py-line"><tt class="py-docstring">    ``javascript``:</tt> </tt>
<a name="L110"></a><tt class="py-lineno">110</tt>  <tt class="py-line"><tt class="py-docstring">        Removes any Javascript, like an ``onclick`` attribute. Also removes stylesheets</tt> </tt>
<a name="L111"></a><tt class="py-lineno">111</tt>  <tt class="py-line"><tt class="py-docstring">        as they could contain Javascript.</tt> </tt>
<a name="L112"></a><tt class="py-lineno">112</tt>  <tt class="py-line"><tt class="py-docstring"></tt> </tt>
<a name="L113"></a><tt class="py-lineno">113</tt>  <tt class="py-line"><tt class="py-docstring">    ``comments``:</tt> </tt>
<a name="L114"></a><tt class="py-lineno">114</tt>  <tt class="py-line"><tt class="py-docstring">        Removes any comments.</tt> </tt>
<a name="L115"></a><tt class="py-lineno">115</tt>  <tt class="py-line"><tt class="py-docstring"></tt> </tt>
<a name="L116"></a><tt class="py-lineno">116</tt>  <tt class="py-line"><tt class="py-docstring">    ``style``:</tt> </tt>
<a name="L117"></a><tt class="py-lineno">117</tt>  <tt class="py-line"><tt class="py-docstring">        Removes any style tags.</tt> </tt>
<a name="L118"></a><tt class="py-lineno">118</tt>  <tt class="py-line"><tt class="py-docstring"></tt> </tt>
<a name="L119"></a><tt class="py-lineno">119</tt>  <tt class="py-line"><tt class="py-docstring">    ``inline_style``</tt> </tt>
<a name="L120"></a><tt class="py-lineno">120</tt>  <tt class="py-line"><tt class="py-docstring">        Removes any style attributes.  Defaults to the value of the ``style`` option.</tt> </tt>
<a name="L121"></a><tt class="py-lineno">121</tt>  <tt class="py-line"><tt class="py-docstring"></tt> </tt>
<a name="L122"></a><tt class="py-lineno">122</tt>  <tt class="py-line"><tt class="py-docstring">    ``links``:</tt> </tt>
<a name="L123"></a><tt class="py-lineno">123</tt>  <tt class="py-line"><tt class="py-docstring">        Removes any ``&lt;link&gt;`` tags</tt> </tt>
<a name="L124"></a><tt class="py-lineno">124</tt>  <tt class="py-line"><tt class="py-docstring"></tt> </tt>
<a name="L125"></a><tt class="py-lineno">125</tt>  <tt class="py-line"><tt class="py-docstring">    ``meta``:</tt> </tt>
<a name="L126"></a><tt class="py-lineno">126</tt>  <tt class="py-line"><tt class="py-docstring">        Removes any ``&lt;meta&gt;`` tags</tt> </tt>
<a name="L127"></a><tt class="py-lineno">127</tt>  <tt class="py-line"><tt class="py-docstring"></tt> </tt>
<a name="L128"></a><tt class="py-lineno">128</tt>  <tt class="py-line"><tt class="py-docstring">    ``page_structure``:</tt> </tt>
<a name="L129"></a><tt class="py-lineno">129</tt>  <tt class="py-line"><tt class="py-docstring">        Structural parts of a page: ``&lt;head&gt;``, ``&lt;html&gt;``, ``&lt;title&gt;``.</tt> </tt>
<a name="L130"></a><tt class="py-lineno">130</tt>  <tt class="py-line"><tt class="py-docstring"></tt> </tt>
<a name="L131"></a><tt class="py-lineno">131</tt>  <tt class="py-line"><tt class="py-docstring">    ``processing_instructions``:</tt> </tt>
<a name="L132"></a><tt class="py-lineno">132</tt>  <tt class="py-line"><tt class="py-docstring">        Removes any processing instructions.</tt> </tt>
<a name="L133"></a><tt class="py-lineno">133</tt>  <tt class="py-line"><tt class="py-docstring"></tt> </tt>
<a name="L134"></a><tt class="py-lineno">134</tt>  <tt class="py-line"><tt class="py-docstring">    ``embedded``:</tt> </tt>
<a name="L135"></a><tt class="py-lineno">135</tt>  <tt class="py-line"><tt class="py-docstring">        Removes any embedded objects (flash, iframes)</tt> </tt>
<a name="L136"></a><tt class="py-lineno">136</tt>  <tt class="py-line"><tt class="py-docstring"></tt> </tt>
<a name="L137"></a><tt class="py-lineno">137</tt>  <tt class="py-line"><tt class="py-docstring">    ``frames``:</tt> </tt>
<a name="L138"></a><tt class="py-lineno">138</tt>  <tt class="py-line"><tt class="py-docstring">        Removes any frame-related tags</tt> </tt>
<a name="L139"></a><tt class="py-lineno">139</tt>  <tt class="py-line"><tt class="py-docstring"></tt> </tt>
<a name="L140"></a><tt class="py-lineno">140</tt>  <tt class="py-line"><tt class="py-docstring">    ``forms``:</tt> </tt>
<a name="L141"></a><tt class="py-lineno">141</tt>  <tt class="py-line"><tt class="py-docstring">        Removes any form tags</tt> </tt>
<a name="L142"></a><tt class="py-lineno">142</tt>  <tt class="py-line"><tt class="py-docstring"></tt> </tt>
<a name="L143"></a><tt class="py-lineno">143</tt>  <tt class="py-line"><tt class="py-docstring">    ``annoying_tags``:</tt> </tt>
<a name="L144"></a><tt class="py-lineno">144</tt>  <tt class="py-line"><tt class="py-docstring">        Tags that aren't *wrong*, but are annoying.  ``&lt;blink&gt;`` and ``&lt;marquee&gt;``</tt> </tt>
<a name="L145"></a><tt class="py-lineno">145</tt>  <tt class="py-line"><tt class="py-docstring"></tt> </tt>
<a name="L146"></a><tt class="py-lineno">146</tt>  <tt class="py-line"><tt class="py-docstring">    ``remove_tags``:</tt> </tt>
<a name="L147"></a><tt class="py-lineno">147</tt>  <tt class="py-line"><tt class="py-docstring">        A list of tags to remove.  Only the tags will be removed,</tt> </tt>
<a name="L148"></a><tt class="py-lineno">148</tt>  <tt class="py-line"><tt class="py-docstring">        their content will get pulled up into the parent tag.</tt> </tt>
<a name="L149"></a><tt class="py-lineno">149</tt>  <tt class="py-line"><tt class="py-docstring"></tt> </tt>
<a name="L150"></a><tt class="py-lineno">150</tt>  <tt class="py-line"><tt class="py-docstring">    ``kill_tags``:</tt> </tt>
<a name="L151"></a><tt class="py-lineno">151</tt>  <tt class="py-line"><tt class="py-docstring">        A list of tags to kill.  Killing also removes the tag's content,</tt> </tt>
<a name="L152"></a><tt class="py-lineno">152</tt>  <tt class="py-line"><tt class="py-docstring">        i.e. the whole subtree, not just the tag itself.</tt> </tt>
<a name="L153"></a><tt class="py-lineno">153</tt>  <tt class="py-line"><tt class="py-docstring"></tt> </tt>
<a name="L154"></a><tt class="py-lineno">154</tt>  <tt class="py-line"><tt class="py-docstring">    ``allow_tags``:</tt> </tt>
<a name="L155"></a><tt class="py-lineno">155</tt>  <tt class="py-line"><tt class="py-docstring">        A list of tags to include (default include all).</tt> </tt>
<a name="L156"></a><tt class="py-lineno">156</tt>  <tt class="py-line"><tt class="py-docstring"></tt> </tt>
<a name="L157"></a><tt class="py-lineno">157</tt>  <tt class="py-line"><tt class="py-docstring">    ``remove_unknown_tags``:</tt> </tt>
<a name="L158"></a><tt class="py-lineno">158</tt>  <tt class="py-line"><tt class="py-docstring">        Remove any tags that aren't standard parts of HTML.</tt> </tt>
<a name="L159"></a><tt class="py-lineno">159</tt>  <tt class="py-line"><tt class="py-docstring"></tt> </tt>
<a name="L160"></a><tt class="py-lineno">160</tt>  <tt class="py-line"><tt class="py-docstring">    ``safe_attrs_only``:</tt> </tt>
<a name="L161"></a><tt class="py-lineno">161</tt>  <tt class="py-line"><tt class="py-docstring">        If true, only include 'safe' attributes (specifically the list</tt> </tt>
<a name="L162"></a><tt class="py-lineno">162</tt>  <tt class="py-line"><tt class="py-docstring">        from the feedparser HTML sanitisation web site).</tt> </tt>
<a name="L163"></a><tt class="py-lineno">163</tt>  <tt class="py-line"><tt class="py-docstring"></tt> </tt>
<a name="L164"></a><tt class="py-lineno">164</tt>  <tt class="py-line"><tt class="py-docstring">    ``safe_attrs``:</tt> </tt>
<a name="L165"></a><tt class="py-lineno">165</tt>  <tt class="py-line"><tt class="py-docstring">        A set of attribute names to override the default list of attributes</tt> </tt>
<a name="L166"></a><tt class="py-lineno">166</tt>  <tt class="py-line"><tt class="py-docstring">        considered 'safe' (when safe_attrs_only=True).</tt> </tt>
<a name="L167"></a><tt class="py-lineno">167</tt>  <tt class="py-line"><tt class="py-docstring"></tt> </tt>
<a name="L168"></a><tt class="py-lineno">168</tt>  <tt class="py-line"><tt class="py-docstring">    ``add_nofollow``:</tt> </tt>
<a name="L169"></a><tt class="py-lineno">169</tt>  <tt class="py-line"><tt class="py-docstring">        If true, then any &lt;a&gt; tags will have ``rel="nofollow"`` added to them.</tt> </tt>
<a name="L170"></a><tt class="py-lineno">170</tt>  <tt class="py-line"><tt class="py-docstring"></tt> </tt>
<a name="L171"></a><tt class="py-lineno">171</tt>  <tt class="py-line"><tt class="py-docstring">    ``host_whitelist``:</tt> </tt>
<a name="L172"></a><tt class="py-lineno">172</tt>  <tt class="py-line"><tt class="py-docstring">        A list or set of hosts that you can use for embedded content</tt> </tt>
<a name="L173"></a><tt class="py-lineno">173</tt>  <tt class="py-line"><tt class="py-docstring">        (for content like ``&lt;object&gt;``, ``&lt;link rel="stylesheet"&gt;``, etc).</tt> </tt>
<a name="L174"></a><tt class="py-lineno">174</tt>  <tt class="py-line"><tt class="py-docstring">        You can also implement/override the method</tt> </tt>
<a name="L175"></a><tt class="py-lineno">175</tt>  <tt class="py-line"><tt class="py-docstring">        ``allow_embedded_url(el, url)`` or ``allow_element(el)`` to</tt> </tt>
<a name="L176"></a><tt class="py-lineno">176</tt>  <tt class="py-line"><tt class="py-docstring">        implement more complex rules for what can be embedded.</tt> </tt>
<a name="L177"></a><tt class="py-lineno">177</tt>  <tt class="py-line"><tt class="py-docstring">        Anything that passes this test will be shown, regardless of</tt> </tt>
<a name="L178"></a><tt class="py-lineno">178</tt>  <tt class="py-line"><tt class="py-docstring">        the value of (for instance) ``embedded``.</tt> </tt>
<a name="L179"></a><tt class="py-lineno">179</tt>  <tt class="py-line"><tt class="py-docstring"></tt> </tt>
<a name="L180"></a><tt class="py-lineno">180</tt>  <tt class="py-line"><tt class="py-docstring">        Note that this parameter might not work as intended if you do not</tt> </tt>
<a name="L181"></a><tt class="py-lineno">181</tt>  <tt class="py-line"><tt class="py-docstring">        make the links absolute before doing the cleaning.</tt> </tt>
<a name="L182"></a><tt class="py-lineno">182</tt>  <tt class="py-line"><tt class="py-docstring"></tt> </tt>
<a name="L183"></a><tt class="py-lineno">183</tt>  <tt class="py-line"><tt class="py-docstring">        Note that you may also need to set ``whitelist_tags``.</tt> </tt>
<a name="L184"></a><tt class="py-lineno">184</tt>  <tt class="py-line"><tt class="py-docstring"></tt> </tt>
<a name="L185"></a><tt class="py-lineno">185</tt>  <tt class="py-line"><tt class="py-docstring">    ``whitelist_tags``:</tt> </tt>
<a name="L186"></a><tt class="py-lineno">186</tt>  <tt class="py-line"><tt class="py-docstring">        A set of tags that can be included with ``host_whitelist``.</tt> </tt>
<a name="L187"></a><tt class="py-lineno">187</tt>  <tt class="py-line"><tt class="py-docstring">        The default is ``iframe`` and ``embed``; you may wish to</tt> </tt>
<a name="L188"></a><tt class="py-lineno">188</tt>  <tt class="py-line"><tt class="py-docstring">        include other tags like ``script``, or you may want to</tt> </tt>
<a name="L189"></a><tt class="py-lineno">189</tt>  <tt class="py-line"><tt class="py-docstring">        implement ``allow_embedded_url`` for more control.  Set to None to</tt> </tt>
<a name="L190"></a><tt class="py-lineno">190</tt>  <tt class="py-line"><tt class="py-docstring">        include all tags.</tt> </tt>
<a name="L191"></a><tt class="py-lineno">191</tt>  <tt class="py-line"><tt class="py-docstring"></tt> </tt>
<a name="L192"></a><tt class="py-lineno">192</tt>  <tt class="py-line"><tt class="py-docstring">    This modifies the document *in place*.</tt> </tt>
<a name="L193"></a><tt class="py-lineno">193</tt>  <tt class="py-line"><tt class="py-docstring">    """</tt> </tt>
<a name="L194"></a><tt class="py-lineno">194</tt>  <tt class="py-line"> </tt>
<a name="L195"></a><tt class="py-lineno">195</tt>  <tt class="py-line">    <tt id="link-42" class="py-name" targets="Variable lxml.html.clean.Cleaner.scripts=lxml.html.clean.Cleaner-class.html#scripts"><a title="lxml.html.clean.Cleaner.scripts" class="py-name" href="#" onclick="return doclink('link-42', 'scripts', 'link-42');">scripts</a></tt> <tt class="py-op">=</tt> <tt class="py-name">True</tt> </tt>
<a name="L196"></a><tt class="py-lineno">196</tt>  <tt class="py-line">    <tt id="link-43" class="py-name" targets="Variable lxml.html.clean.Cleaner.javascript=lxml.html.clean.Cleaner-class.html#javascript"><a title="lxml.html.clean.Cleaner.javascript" class="py-name" href="#" onclick="return doclink('link-43', 'javascript', 'link-43');">javascript</a></tt> <tt class="py-op">=</tt> <tt class="py-name">True</tt> </tt>
<a name="L197"></a><tt class="py-lineno">197</tt>  <tt class="py-line">    <tt id="link-44" class="py-name" targets="Variable lxml.html.clean.Cleaner.comments=lxml.html.clean.Cleaner-class.html#comments"><a title="lxml.html.clean.Cleaner.comments" class="py-name" href="#" onclick="return doclink('link-44', 'comments', 'link-44');">comments</a></tt> <tt class="py-op">=</tt> <tt class="py-name">True</tt> </tt>
<a name="L198"></a><tt class="py-lineno">198</tt>  <tt class="py-line">    <tt id="link-45" class="py-name" targets="Variable lxml.html.clean.Cleaner.style=lxml.html.clean.Cleaner-class.html#style"><a title="lxml.html.clean.Cleaner.style" class="py-name" href="#" onclick="return doclink('link-45', 'style', 'link-45');">style</a></tt> <tt class="py-op">=</tt> <tt class="py-name">False</tt> </tt>
<a name="L199"></a><tt class="py-lineno">199</tt>  <tt class="py-line">    <tt id="link-46" class="py-name" targets="Variable lxml.html.clean.Cleaner.inline_style=lxml.html.clean.Cleaner-class.html#inline_style"><a title="lxml.html.clean.Cleaner.inline_style" class="py-name" href="#" onclick="return doclink('link-46', 'inline_style', 'link-46');">inline_style</a></tt> <tt class="py-op">=</tt> <tt class="py-name">None</tt> </tt>
<a name="L200"></a><tt class="py-lineno">200</tt>  <tt class="py-line">    <tt id="link-47" class="py-name" targets="Variable lxml.html.clean.Cleaner.links=lxml.html.clean.Cleaner-class.html#links"><a title="lxml.html.clean.Cleaner.links" class="py-name" href="#" onclick="return doclink('link-47', 'links', 'link-47');">links</a></tt> <tt class="py-op">=</tt> <tt class="py-name">True</tt> </tt>
<a name="L201"></a><tt class="py-lineno">201</tt>  <tt class="py-line">    <tt id="link-48" class="py-name" targets="Variable lxml.html.clean.Cleaner.meta=lxml.html.clean.Cleaner-class.html#meta"><a title="lxml.html.clean.Cleaner.meta" class="py-name" href="#" onclick="return doclink('link-48', 'meta', 'link-48');">meta</a></tt> <tt class="py-op">=</tt> <tt class="py-name">True</tt> </tt>
<a name="L202"></a><tt class="py-lineno">202</tt>  <tt class="py-line">    <tt id="link-49" class="py-name" targets="Variable lxml.html.clean.Cleaner.page_structure=lxml.html.clean.Cleaner-class.html#page_structure"><a title="lxml.html.clean.Cleaner.page_structure" class="py-name" href="#" onclick="return doclink('link-49', 'page_structure', 'link-49');">page_structure</a></tt> <tt class="py-op">=</tt> <tt class="py-name">True</tt> </tt>
<a name="L203"></a><tt class="py-lineno">203</tt>  <tt class="py-line">    <tt id="link-50" class="py-name" targets="Variable lxml.html.clean.Cleaner.processing_instructions=lxml.html.clean.Cleaner-class.html#processing_instructions"><a title="lxml.html.clean.Cleaner.processing_instructions" class="py-name" href="#" onclick="return doclink('link-50', 'processing_instructions', 'link-50');">processing_instructions</a></tt> <tt class="py-op">=</tt> <tt class="py-name">True</tt> </tt>
<a name="L204"></a><tt class="py-lineno">204</tt>  <tt class="py-line">    <tt id="link-51" class="py-name" targets="Variable lxml.html.clean.Cleaner.embedded=lxml.html.clean.Cleaner-class.html#embedded"><a title="lxml.html.clean.Cleaner.embedded" class="py-name" href="#" onclick="return doclink('link-51', 'embedded', 'link-51');">embedded</a></tt> <tt class="py-op">=</tt> <tt class="py-name">True</tt> </tt>
<a name="L205"></a><tt class="py-lineno">205</tt>  <tt class="py-line">    <tt id="link-52" class="py-name" targets="Variable lxml.html.clean.Cleaner.frames=lxml.html.clean.Cleaner-class.html#frames"><a title="lxml.html.clean.Cleaner.frames" class="py-name" href="#" onclick="return doclink('link-52', 'frames', 'link-52');">frames</a></tt> <tt class="py-op">=</tt> <tt class="py-name">True</tt> </tt>
<a name="L206"></a><tt class="py-lineno">206</tt>  <tt class="py-line">    <tt id="link-53" class="py-name" targets="Variable lxml.html.HtmlMixin.forms=lxml.html.HtmlMixin-class.html#forms,Variable lxml.html.clean.Cleaner.forms=lxml.html.clean.Cleaner-class.html#forms"><a title="lxml.html.HtmlMixin.forms
lxml.html.clean.Cleaner.forms" class="py-name" href="#" onclick="return doclink('link-53', 'forms', 'link-53');">forms</a></tt> <tt class="py-op">=</tt> <tt class="py-name">True</tt> </tt>
<a name="L207"></a><tt class="py-lineno">207</tt>  <tt class="py-line">    <tt id="link-54" class="py-name" targets="Variable lxml.html.clean.Cleaner.annoying_tags=lxml.html.clean.Cleaner-class.html#annoying_tags"><a title="lxml.html.clean.Cleaner.annoying_tags" class="py-name" href="#" onclick="return doclink('link-54', 'annoying_tags', 'link-54');">annoying_tags</a></tt> <tt class="py-op">=</tt> <tt class="py-name">True</tt> </tt>
<a name="L208"></a><tt class="py-lineno">208</tt>  <tt class="py-line">    <tt id="link-55" class="py-name" targets="Variable lxml.html.clean.Cleaner.remove_tags=lxml.html.clean.Cleaner-class.html#remove_tags"><a title="lxml.html.clean.Cleaner.remove_tags" class="py-name" href="#" onclick="return doclink('link-55', 'remove_tags', 'link-55');">remove_tags</a></tt> <tt class="py-op">=</tt> <tt class="py-name">None</tt> </tt>
<a name="L209"></a><tt class="py-lineno">209</tt>  <tt class="py-line">    <tt id="link-56" class="py-name" targets="Variable lxml.html.clean.Cleaner.allow_tags=lxml.html.clean.Cleaner-class.html#allow_tags"><a title="lxml.html.clean.Cleaner.allow_tags" class="py-name" href="#" onclick="return doclink('link-56', 'allow_tags', 'link-56');">allow_tags</a></tt> <tt class="py-op">=</tt> <tt class="py-name">None</tt> </tt>
<a name="L210"></a><tt class="py-lineno">210</tt>  <tt class="py-line">    <tt id="link-57" class="py-name" targets="Variable lxml.html.clean.Cleaner.kill_tags=lxml.html.clean.Cleaner-class.html#kill_tags"><a title="lxml.html.clean.Cleaner.kill_tags" class="py-name" href="#" onclick="return doclink('link-57', 'kill_tags', 'link-57');">kill_tags</a></tt> <tt class="py-op">=</tt> <tt class="py-name">None</tt> </tt>
<a name="L211"></a><tt class="py-lineno">211</tt>  <tt class="py-line">    <tt id="link-58" class="py-name" targets="Variable lxml.html.clean.Cleaner.remove_unknown_tags=lxml.html.clean.Cleaner-class.html#remove_unknown_tags"><a title="lxml.html.clean.Cleaner.remove_unknown_tags" class="py-name" href="#" onclick="return doclink('link-58', 'remove_unknown_tags', 'link-58');">remove_unknown_tags</a></tt> <tt class="py-op">=</tt> <tt class="py-name">True</tt> </tt>
<a name="L212"></a><tt class="py-lineno">212</tt>  <tt class="py-line">    <tt id="link-59" class="py-name" targets="Variable lxml.html.clean.Cleaner.safe_attrs_only=lxml.html.clean.Cleaner-class.html#safe_attrs_only"><a title="lxml.html.clean.Cleaner.safe_attrs_only" class="py-name" href="#" onclick="return doclink('link-59', 'safe_attrs_only', 'link-59');">safe_attrs_only</a></tt> <tt class="py-op">=</tt> <tt class="py-name">True</tt> </tt>
<a name="L213"></a><tt class="py-lineno">213</tt>  <tt class="py-line">    <tt id="link-60" class="py-name" targets="Variable lxml.html.clean.Cleaner.safe_attrs=lxml.html.clean.Cleaner-class.html#safe_attrs,Variable lxml.html.defs.safe_attrs=lxml.html.defs-module.html#safe_attrs"><a title="lxml.html.clean.Cleaner.safe_attrs
lxml.html.defs.safe_attrs" class="py-name" href="#" onclick="return doclink('link-60', 'safe_attrs', 'link-60');">safe_attrs</a></tt> <tt class="py-op">=</tt> <tt id="link-61" class="py-name"><a title="lxml.html.defs" class="py-name" href="#" onclick="return doclink('link-61', 'defs', 'link-6');">defs</a></tt><tt class="py-op">.</tt><tt id="link-62" class="py-name"><a title="lxml.html.clean.Cleaner.safe_attrs
lxml.html.defs.safe_attrs" class="py-name" href="#" onclick="return doclink('link-62', 'safe_attrs', 'link-60');">safe_attrs</a></tt> </tt>
<a name="L214"></a><tt class="py-lineno">214</tt>  <tt class="py-line">    <tt id="link-63" class="py-name" targets="Variable lxml.html.clean.Cleaner.add_nofollow=lxml.html.clean.Cleaner-class.html#add_nofollow"><a title="lxml.html.clean.Cleaner.add_nofollow" class="py-name" href="#" onclick="return doclink('link-63', 'add_nofollow', 'link-63');">add_nofollow</a></tt> <tt class="py-op">=</tt> <tt class="py-name">False</tt> </tt>
<a name="L215"></a><tt class="py-lineno">215</tt>  <tt class="py-line">    <tt id="link-64" class="py-name" targets="Variable lxml.html.clean.Cleaner.host_whitelist=lxml.html.clean.Cleaner-class.html#host_whitelist"><a title="lxml.html.clean.Cleaner.host_whitelist" class="py-name" href="#" onclick="return doclink('link-64', 'host_whitelist', 'link-64');">host_whitelist</a></tt> <tt class="py-op">=</tt> <tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
<a name="L216"></a><tt class="py-lineno">216</tt>  <tt class="py-line">    <tt id="link-65" class="py-name" targets="Variable lxml.html.clean.Cleaner.whitelist_tags=lxml.html.clean.Cleaner-class.html#whitelist_tags"><a title="lxml.html.clean.Cleaner.whitelist_tags" class="py-name" href="#" onclick="return doclink('link-65', 'whitelist_tags', 'link-65');">whitelist_tags</a></tt> <tt class="py-op">=</tt> <tt id="link-66" class="py-name" targets="Method lxml.etree._Element.set()=lxml.etree._Element-class.html#set,Method lxml.etree._XSLTProcessingInstruction.set()=lxml.etree._XSLTProcessingInstruction-class.html#set,Method lxml.html.HtmlElement.set()=lxml.html.HtmlElement-class.html#set,Method lxml.html.HtmlMixin.set()=lxml.html.HtmlMixin-class.html#set"><a title="lxml.etree._Element.set
lxml.etree._XSLTProcessingInstruction.set
lxml.html.HtmlElement.set
lxml.html.HtmlMixin.set" class="py-name" href="#" onclick="return doclink('link-66', 'set', 'link-66');">set</a></tt><tt class="py-op">(</tt><tt class="py-op">[</tt><tt class="py-string">'iframe'</tt><tt class="py-op">,</tt> <tt class="py-string">'embed'</tt><tt class="py-op">]</tt><tt class="py-op">)</tt> </tt>
<a name="L217"></a><tt class="py-lineno">217</tt>  <tt class="py-line"> </tt>
<a name="Cleaner.__init__"></a><div id="Cleaner.__init__-def"><a name="L218"></a><tt class="py-lineno">218</tt> <a class="py-toggle" href="#" id="Cleaner.__init__-toggle" onclick="return toggle('Cleaner.__init__');">-</a><tt class="py-line">    <tt class="py-keyword">def</tt> <a class="py-def-name" href="lxml.html.clean.Cleaner-class.html#__init__">__init__</a><tt class="py-op">(</tt><tt class="py-param">self</tt><tt class="py-op">,</tt> <tt class="py-op">**</tt><tt class="py-param">kw</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
</div><div id="Cleaner.__init__-collapsed" style="display:none;" pad="+++" indent="++++++++"></div><div id="Cleaner.__init__-expanded"><a name="L219"></a><tt class="py-lineno">219</tt>  <tt class="py-line">        <tt class="py-keyword">for</tt> <tt id="link-67" class="py-name" targets="Variable lxml.etree.DTD.name=lxml.etree.DTD-class.html#name,Variable lxml.etree._Entity.name=lxml.etree._Entity-class.html#name,Variable lxml.html.InputMixin.name=lxml.html.InputMixin-class.html#name,Variable lxml.objectify.PyType.name=lxml.objectify.PyType-class.html#name"><a title="lxml.etree.DTD.name
lxml.etree._Entity.name
lxml.html.InputMixin.name
lxml.objectify.PyType.name" class="py-name" href="#" onclick="return doclink('link-67', 'name', 'link-67');">name</a></tt><tt class="py-op">,</tt> <tt id="link-68" class="py-name" targets="Variable lxml.html.CheckboxGroup.value=lxml.html.CheckboxGroup-class.html#value,Variable lxml.html.InputElement.value=lxml.html.InputElement-class.html#value,Variable lxml.html.RadioGroup.value=lxml.html.RadioGroup-class.html#value,Variable lxml.html.SelectElement.value=lxml.html.SelectElement-class.html#value,Variable lxml.html.TextareaElement.value=lxml.html.TextareaElement-class.html#value"><a title="lxml.html.CheckboxGroup.value
lxml.html.InputElement.value
lxml.html.RadioGroup.value
lxml.html.SelectElement.value
lxml.html.TextareaElement.value" class="py-name" href="#" onclick="return doclink('link-68', 'value', 'link-68');">value</a></tt> <tt class="py-keyword">in</tt> <tt class="py-name">kw</tt><tt class="py-op">.</tt><tt id="link-69" class="py-name" targets="Method lxml.etree._Attrib.items()=lxml.etree._Attrib-class.html#items,Method lxml.etree._Element.items()=lxml.etree._Element-class.html#items,Method lxml.etree._IDDict.items()=lxml.etree._IDDict-class.html#items"><a title="lxml.etree._Attrib.items
lxml.etree._Element.items
lxml.etree._IDDict.items" class="py-name" href="#" onclick="return doclink('link-69', 'items', 'link-69');">items</a></tt><tt class="py-op">(</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
<a name="L220"></a><tt class="py-lineno">220</tt>  <tt class="py-line">            <tt class="py-keyword">if</tt> <tt class="py-keyword">not</tt> <tt id="link-70" class="py-name" targets="Method lxml.objectify.ObjectPath.hasattr()=lxml.objectify.ObjectPath-class.html#hasattr"><a title="lxml.objectify.ObjectPath.hasattr" class="py-name" href="#" onclick="return doclink('link-70', 'hasattr', 'link-70');">hasattr</a></tt><tt class="py-op">(</tt><tt class="py-name">self</tt><tt class="py-op">,</tt> <tt id="link-71" class="py-name"><a title="lxml.etree.DTD.name
lxml.etree._Entity.name
lxml.html.InputMixin.name
lxml.objectify.PyType.name" class="py-name" href="#" onclick="return doclink('link-71', 'name', 'link-67');">name</a></tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
<a name="L221"></a><tt class="py-lineno">221</tt>  <tt class="py-line">                <tt class="py-keyword">raise</tt> <tt class="py-name">TypeError</tt><tt class="py-op">(</tt> </tt>
<a name="L222"></a><tt class="py-lineno">222</tt>  <tt class="py-line">                    <tt class="py-string">"Unknown parameter: %s=%r"</tt> <tt class="py-op">%</tt> <tt class="py-op">(</tt><tt id="link-72" class="py-name"><a title="lxml.etree.DTD.name
lxml.etree._Entity.name
lxml.html.InputMixin.name
lxml.objectify.PyType.name" class="py-name" href="#" onclick="return doclink('link-72', 'name', 'link-67');">name</a></tt><tt class="py-op">,</tt> <tt id="link-73" class="py-name"><a title="lxml.html.CheckboxGroup.value
lxml.html.InputElement.value
lxml.html.RadioGroup.value
lxml.html.SelectElement.value
lxml.html.TextareaElement.value" class="py-name" href="#" onclick="return doclink('link-73', 'value', 'link-68');">value</a></tt><tt class="py-op">)</tt><tt class="py-op">)</tt> </tt>
<a name="L223"></a><tt class="py-lineno">223</tt>  <tt class="py-line">            <tt id="link-74" class="py-name" targets="Method lxml.objectify.ObjectPath.setattr()=lxml.objectify.ObjectPath-class.html#setattr"><a title="lxml.objectify.ObjectPath.setattr" class="py-name" href="#" onclick="return doclink('link-74', 'setattr', 'link-74');">setattr</a></tt><tt class="py-op">(</tt><tt class="py-name">self</tt><tt class="py-op">,</tt> <tt id="link-75" class="py-name"><a title="lxml.etree.DTD.name
lxml.etree._Entity.name
lxml.html.InputMixin.name
lxml.objectify.PyType.name" class="py-name" href="#" onclick="return doclink('link-75', 'name', 'link-67');">name</a></tt><tt class="py-op">,</tt> <tt id="link-76" class="py-name"><a title="lxml.html.CheckboxGroup.value
lxml.html.InputElement.value
lxml.html.RadioGroup.value
lxml.html.SelectElement.value
lxml.html.TextareaElement.value" class="py-name" href="#" onclick="return doclink('link-76', 'value', 'link-68');">value</a></tt><tt class="py-op">)</tt> </tt>
<a name="L224"></a><tt class="py-lineno">224</tt>  <tt class="py-line">        <tt class="py-keyword">if</tt> <tt class="py-name">self</tt><tt class="py-op">.</tt><tt id="link-77" class="py-name"><a title="lxml.html.clean.Cleaner.inline_style" class="py-name" href="#" onclick="return doclink('link-77', 'inline_style', 'link-46');">inline_style</a></tt> <tt class="py-keyword">is</tt> <tt class="py-name">None</tt> <tt class="py-keyword">and</tt> <tt class="py-string">'inline_style'</tt> <tt class="py-keyword">not</tt> <tt class="py-keyword">in</tt> <tt class="py-name">kw</tt><tt class="py-op">:</tt> </tt>
<a name="L225"></a><tt class="py-lineno">225</tt>  <tt class="py-line">            <tt class="py-name">self</tt><tt class="py-op">.</tt><tt id="link-78" class="py-name"><a title="lxml.html.clean.Cleaner.inline_style" class="py-name" href="#" onclick="return doclink('link-78', 'inline_style', 'link-46');">inline_style</a></tt> <tt class="py-op">=</tt> <tt class="py-name">self</tt><tt class="py-op">.</tt><tt id="link-79" class="py-name"><a title="lxml.html.clean.Cleaner.style" class="py-name" href="#" onclick="return doclink('link-79', 'style', 'link-45');">style</a></tt> </tt>
</div><a name="L226"></a><tt class="py-lineno">226</tt>  <tt class="py-line"> </tt>
<a name="L227"></a><tt class="py-lineno">227</tt>  <tt class="py-line">    <tt class="py-comment"># Used to lookup the primary URL for a given tag that is up for</tt> </tt>
<a name="L228"></a><tt class="py-lineno">228</tt>  <tt class="py-line">    <tt class="py-comment"># removal:</tt> </tt>
<a name="L229"></a><tt class="py-lineno">229</tt>  <tt class="py-line">    <tt id="link-80" class="py-name" targets="Variable lxml.html.clean.Cleaner._tag_link_attrs=lxml.html.clean.Cleaner-class.html#_tag_link_attrs"><a title="lxml.html.clean.Cleaner._tag_link_attrs" class="py-name" href="#" onclick="return doclink('link-80', '_tag_link_attrs', 'link-80');">_tag_link_attrs</a></tt> <tt class="py-op">=</tt> <tt class="py-name">dict</tt><tt class="py-op">(</tt> </tt>
<a name="L230"></a><tt class="py-lineno">230</tt>  <tt class="py-line">        <tt class="py-name">script</tt><tt class="py-op">=</tt><tt class="py-string">'src'</tt><tt class="py-op">,</tt> </tt>
<a name="L231"></a><tt class="py-lineno">231</tt>  <tt class="py-line">        <tt class="py-name">link</tt><tt class="py-op">=</tt><tt class="py-string">'href'</tt><tt class="py-op">,</tt> </tt>
<a name="L232"></a><tt class="py-lineno">232</tt>  <tt class="py-line">        <tt class="py-comment"># From: http://java.sun.com/j2se/1.4.2/docs/guide/misc/applet.html</tt> </tt>
<a name="L233"></a><tt class="py-lineno">233</tt>  <tt class="py-line">        <tt class="py-comment"># From what I can tell, both attributes can contain a link:</tt> </tt>
<a name="L234"></a><tt class="py-lineno">234</tt>  <tt class="py-line">        <tt class="py-name">applet</tt><tt class="py-op">=</tt><tt class="py-op">[</tt><tt class="py-string">'code'</tt><tt class="py-op">,</tt> <tt class="py-string">'object'</tt><tt class="py-op">]</tt><tt class="py-op">,</tt> </tt>
<a name="L235"></a><tt class="py-lineno">235</tt>  <tt class="py-line">        <tt class="py-name">iframe</tt><tt class="py-op">=</tt><tt class="py-string">'src'</tt><tt class="py-op">,</tt> </tt>
<a name="L236"></a><tt class="py-lineno">236</tt>  <tt class="py-line">        <tt class="py-name">embed</tt><tt class="py-op">=</tt><tt class="py-string">'src'</tt><tt class="py-op">,</tt> </tt>
<a name="L237"></a><tt class="py-lineno">237</tt>  <tt class="py-line">        <tt class="py-name">layer</tt><tt class="py-op">=</tt><tt class="py-string">'src'</tt><tt class="py-op">,</tt> </tt>
<a name="L238"></a><tt class="py-lineno">238</tt>  <tt class="py-line">        <tt class="py-comment"># FIXME: there doesn't really seem like a general way to figure out what</tt> </tt>
<a name="L239"></a><tt class="py-lineno">239</tt>  <tt class="py-line">        <tt class="py-comment"># links an &lt;object&gt; tag uses; links often go in &lt;param&gt; tags with values</tt> </tt>
<a name="L240"></a><tt class="py-lineno">240</tt>  <tt class="py-line">        <tt class="py-comment"># that we don't really know.  You'd have to have knowledge about specific</tt> </tt>
<a name="L241"></a><tt class="py-lineno">241</tt>  <tt class="py-line">        <tt class="py-comment"># kinds of plugins (probably keyed off classid), and match against those.</tt> </tt>
<a name="L242"></a><tt class="py-lineno">242</tt>  <tt class="py-line">        <tt class="py-comment">##object=?,</tt> </tt>
<a name="L243"></a><tt class="py-lineno">243</tt>  <tt class="py-line">        <tt class="py-comment"># FIXME: not looking at the action currently, because it is more complex</tt> </tt>
<a name="L244"></a><tt class="py-lineno">244</tt>  <tt class="py-line">        <tt class="py-comment"># than than -- if you keep the form, you should keep the form controls.</tt> </tt>
<a name="L245"></a><tt class="py-lineno">245</tt>  <tt class="py-line">        <tt class="py-comment">##form='action',</tt> </tt>
<a name="L246"></a><tt class="py-lineno">246</tt>  <tt class="py-line">        <tt class="py-name">a</tt><tt class="py-op">=</tt><tt class="py-string">'href'</tt><tt class="py-op">,</tt> </tt>
<a name="L247"></a><tt class="py-lineno">247</tt>  <tt class="py-line">        <tt class="py-op">)</tt> </tt>
<a name="L248"></a><tt class="py-lineno">248</tt>  <tt class="py-line"> </tt>
<a name="Cleaner.__call__"></a><div id="Cleaner.__call__-def"><a name="L249"></a><tt class="py-lineno">249</tt> <a class="py-toggle" href="#" id="Cleaner.__call__-toggle" onclick="return toggle('Cleaner.__call__');">-</a><tt class="py-line">    <tt class="py-keyword">def</tt> <a class="py-def-name" href="lxml.html.clean.Cleaner-class.html#__call__">__call__</a><tt class="py-op">(</tt><tt class="py-param">self</tt><tt class="py-op">,</tt> <tt class="py-param">doc</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
</div><div id="Cleaner.__call__-collapsed" style="display:none;" pad="+++" indent="++++++++"></div><div id="Cleaner.__call__-expanded"><a name="L250"></a><tt class="py-lineno">250</tt>  <tt class="py-line">        <tt class="py-docstring">"""</tt> </tt>
<a name="L251"></a><tt class="py-lineno">251</tt>  <tt class="py-line"><tt class="py-docstring">        Cleans the document.</tt> </tt>
<a name="L252"></a><tt class="py-lineno">252</tt>  <tt class="py-line"><tt class="py-docstring">        """</tt> </tt>
<a name="L253"></a><tt class="py-lineno">253</tt>  <tt class="py-line">        <tt class="py-keyword">if</tt> <tt id="link-81" class="py-name"><a title="lxml.objectify.ObjectPath.hasattr" class="py-name" href="#" onclick="return doclink('link-81', 'hasattr', 'link-70');">hasattr</a></tt><tt class="py-op">(</tt><tt class="py-name">doc</tt><tt class="py-op">,</tt> <tt class="py-string">'getroot'</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
<a name="L254"></a><tt class="py-lineno">254</tt>  <tt class="py-line">            <tt class="py-comment"># ElementTree instance, instead of an element</tt> </tt>
<a name="L255"></a><tt class="py-lineno">255</tt>  <tt class="py-line">            <tt class="py-name">doc</tt> <tt class="py-op">=</tt> <tt class="py-name">doc</tt><tt class="py-op">.</tt><tt id="link-82" class="py-name" targets="Method lxml.etree._ElementTree.getroot()=lxml.etree._ElementTree-class.html#getroot"><a title="lxml.etree._ElementTree.getroot" class="py-name" href="#" onclick="return doclink('link-82', 'getroot', 'link-82');">getroot</a></tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
<a name="L256"></a><tt class="py-lineno">256</tt>  <tt class="py-line">        <tt class="py-comment"># convert XHTML to HTML</tt> </tt>
<a name="L257"></a><tt class="py-lineno">257</tt>  <tt class="py-line">        <tt class="py-name">xhtml_to_html</tt><tt class="py-op">(</tt><tt class="py-name">doc</tt><tt class="py-op">)</tt> </tt>
<a name="L258"></a><tt class="py-lineno">258</tt>  <tt class="py-line">        <tt class="py-comment"># Normalize a case that IE treats &lt;image&gt; like &lt;img&gt;, and that</tt> </tt>
<a name="L259"></a><tt class="py-lineno">259</tt>  <tt class="py-line">        <tt class="py-comment"># can confuse either this step or later steps.</tt> </tt>
<a name="L260"></a><tt class="py-lineno">260</tt>  <tt class="py-line">        <tt class="py-keyword">for</tt> <tt class="py-name">el</tt> <tt class="py-keyword">in</tt> <tt class="py-name">doc</tt><tt class="py-op">.</tt><tt id="link-83" class="py-name" targets="Method lxml.etree._Element.iter()=lxml.etree._Element-class.html#iter,Method lxml.etree._ElementTree.iter()=lxml.etree._ElementTree-class.html#iter"><a title="lxml.etree._Element.iter
lxml.etree._ElementTree.iter" class="py-name" href="#" onclick="return doclink('link-83', 'iter', 'link-83');">iter</a></tt><tt class="py-op">(</tt><tt class="py-string">'image'</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
<a name="L261"></a><tt class="py-lineno">261</tt>  <tt class="py-line">            <tt class="py-name">el</tt><tt class="py-op">.</tt><tt id="link-84" class="py-name" targets="Variable lxml.etree._Comment.tag=lxml.etree._Comment-class.html#tag,Variable lxml.etree._Element.tag=lxml.etree._Element-class.html#tag,Variable lxml.etree._Entity.tag=lxml.etree._Entity-class.html#tag,Variable lxml.etree._ProcessingInstruction.tag=lxml.etree._ProcessingInstruction-class.html#tag,Function lxml.tests.test_xpathevaluator.tag()=lxml.tests.test_xpathevaluator-module.html#tag,Variable xml.etree.ElementTree.Element.tag=xml.etree.ElementTree.Element-class.html#tag"><a title="lxml.etree._Comment.tag
lxml.etree._Element.tag
lxml.etree._Entity.tag
lxml.etree._ProcessingInstruction.tag
lxml.tests.test_xpathevaluator.tag
xml.etree.ElementTree.Element.tag" class="py-name" href="#" onclick="return doclink('link-84', 'tag', 'link-84');">tag</a></tt> <tt class="py-op">=</tt> <tt class="py-string">'img'</tt> </tt>
<a name="L262"></a><tt class="py-lineno">262</tt>  <tt class="py-line">        <tt class="py-keyword">if</tt> <tt class="py-keyword">not</tt> <tt class="py-name">self</tt><tt class="py-op">.</tt><tt id="link-85" class="py-name"><a title="lxml.html.clean.Cleaner.comments" class="py-name" href="#" onclick="return doclink('link-85', 'comments', 'link-44');">comments</a></tt><tt class="py-op">:</tt> </tt>
<a name="L263"></a><tt class="py-lineno">263</tt>  <tt class="py-line">            <tt class="py-comment"># Of course, if we were going to kill comments anyway, we don't</tt> </tt>
<a name="L264"></a><tt class="py-lineno">264</tt>  <tt class="py-line">            <tt class="py-comment"># need to worry about this</tt> </tt>
<a name="L265"></a><tt class="py-lineno">265</tt>  <tt class="py-line">            <tt class="py-name">self</tt><tt class="py-op">.</tt><tt id="link-86" class="py-name" targets="Method lxml.html.clean.Cleaner.kill_conditional_comments()=lxml.html.clean.Cleaner-class.html#kill_conditional_comments"><a title="lxml.html.clean.Cleaner.kill_conditional_comments" class="py-name" href="#" onclick="return doclink('link-86', 'kill_conditional_comments', 'link-86');">kill_conditional_comments</a></tt><tt class="py-op">(</tt><tt class="py-name">doc</tt><tt class="py-op">)</tt> </tt>
<a name="L266"></a><tt class="py-lineno">266</tt>  <tt class="py-line"> </tt>
<a name="L267"></a><tt class="py-lineno">267</tt>  <tt class="py-line">        <tt id="link-87" class="py-name"><a title="lxml.html.clean.Cleaner.kill_tags" class="py-name" href="#" onclick="return doclink('link-87', 'kill_tags', 'link-57');">kill_tags</a></tt> <tt class="py-op">=</tt> <tt id="link-88" class="py-name"><a title="lxml.etree._Element.set
lxml.etree._XSLTProcessingInstruction.set
lxml.html.HtmlElement.set
lxml.html.HtmlMixin.set" class="py-name" href="#" onclick="return doclink('link-88', 'set', 'link-66');">set</a></tt><tt class="py-op">(</tt><tt class="py-name">self</tt><tt class="py-op">.</tt><tt id="link-89" class="py-name"><a title="lxml.html.clean.Cleaner.kill_tags" class="py-name" href="#" onclick="return doclink('link-89', 'kill_tags', 'link-57');">kill_tags</a></tt> <tt class="py-keyword">or</tt> <tt class="py-op">(</tt><tt class="py-op">)</tt><tt class="py-op">)</tt> </tt>
<a name="L268"></a><tt class="py-lineno">268</tt>  <tt class="py-line">        <tt id="link-90" class="py-name"><a title="lxml.html.clean.Cleaner.remove_tags" class="py-name" href="#" onclick="return doclink('link-90', 'remove_tags', 'link-55');">remove_tags</a></tt> <tt class="py-op">=</tt> <tt id="link-91" class="py-name"><a title="lxml.etree._Element.set
lxml.etree._XSLTProcessingInstruction.set
lxml.html.HtmlElement.set
lxml.html.HtmlMixin.set" class="py-name" href="#" onclick="return doclink('link-91', 'set', 'link-66');">set</a></tt><tt class="py-op">(</tt><tt class="py-name">self</tt><tt class="py-op">.</tt><tt id="link-92" class="py-name"><a title="lxml.html.clean.Cleaner.remove_tags" class="py-name" href="#" onclick="return doclink('link-92', 'remove_tags', 'link-55');">remove_tags</a></tt> <tt class="py-keyword">or</tt> <tt class="py-op">(</tt><tt class="py-op">)</tt><tt class="py-op">)</tt> </tt>
<a name="L269"></a><tt class="py-lineno">269</tt>  <tt class="py-line">        <tt id="link-93" class="py-name"><a title="lxml.html.clean.Cleaner.allow_tags" class="py-name" href="#" onclick="return doclink('link-93', 'allow_tags', 'link-56');">allow_tags</a></tt> <tt class="py-op">=</tt> <tt id="link-94" class="py-name"><a title="lxml.etree._Element.set
lxml.etree._XSLTProcessingInstruction.set
lxml.html.HtmlElement.set
lxml.html.HtmlMixin.set" class="py-name" href="#" onclick="return doclink('link-94', 'set', 'link-66');">set</a></tt><tt class="py-op">(</tt><tt class="py-name">self</tt><tt class="py-op">.</tt><tt id="link-95" class="py-name"><a title="lxml.html.clean.Cleaner.allow_tags" class="py-name" href="#" onclick="return doclink('link-95', 'allow_tags', 'link-56');">allow_tags</a></tt> <tt class="py-keyword">or</tt> <tt class="py-op">(</tt><tt class="py-op">)</tt><tt class="py-op">)</tt> </tt>
<a name="L270"></a><tt class="py-lineno">270</tt>  <tt class="py-line"> </tt>
<a name="L271"></a><tt class="py-lineno">271</tt>  <tt class="py-line">        <tt class="py-keyword">if</tt> <tt class="py-name">self</tt><tt class="py-op">.</tt><tt id="link-96" class="py-name"><a title="lxml.html.clean.Cleaner.scripts" class="py-name" href="#" onclick="return doclink('link-96', 'scripts', 'link-42');">scripts</a></tt><tt class="py-op">:</tt> </tt>
<a name="L272"></a><tt class="py-lineno">272</tt>  <tt class="py-line">            <tt id="link-97" class="py-name"><a title="lxml.html.clean.Cleaner.kill_tags" class="py-name" href="#" onclick="return doclink('link-97', 'kill_tags', 'link-57');">kill_tags</a></tt><tt class="py-op">.</tt><tt id="link-98" class="py-name" targets="Method lxml.html.CheckboxValues.add()=lxml.html.CheckboxValues-class.html#add,Method lxml.html.Classes.add()=lxml.html.Classes-class.html#add,Method lxml.html.MultipleSelectOptions.add()=lxml.html.MultipleSelectOptions-class.html#add"><a title="lxml.html.CheckboxValues.add
lxml.html.Classes.add
lxml.html.MultipleSelectOptions.add" class="py-name" href="#" onclick="return doclink('link-98', 'add', 'link-98');">add</a></tt><tt class="py-op">(</tt><tt class="py-string">'script'</tt><tt class="py-op">)</tt> </tt>
<a name="L273"></a><tt class="py-lineno">273</tt>  <tt class="py-line">        <tt class="py-keyword">if</tt> <tt class="py-name">self</tt><tt class="py-op">.</tt><tt id="link-99" class="py-name"><a title="lxml.html.clean.Cleaner.safe_attrs_only" class="py-name" href="#" onclick="return doclink('link-99', 'safe_attrs_only', 'link-59');">safe_attrs_only</a></tt><tt class="py-op">:</tt> </tt>
<a name="L274"></a><tt class="py-lineno">274</tt>  <tt class="py-line">            <tt id="link-100" class="py-name"><a title="lxml.html.clean.Cleaner.safe_attrs
lxml.html.defs.safe_attrs" class="py-name" href="#" onclick="return doclink('link-100', 'safe_attrs', 'link-60');">safe_attrs</a></tt> <tt class="py-op">=</tt> <tt id="link-101" class="py-name"><a title="lxml.etree._Element.set
lxml.etree._XSLTProcessingInstruction.set
lxml.html.HtmlElement.set
lxml.html.HtmlMixin.set" class="py-name" href="#" onclick="return doclink('link-101', 'set', 'link-66');">set</a></tt><tt class="py-op">(</tt><tt class="py-name">self</tt><tt class="py-op">.</tt><tt id="link-102" class="py-name"><a title="lxml.html.clean.Cleaner.safe_attrs
lxml.html.defs.safe_attrs" class="py-name" href="#" onclick="return doclink('link-102', 'safe_attrs', 'link-60');">safe_attrs</a></tt><tt class="py-op">)</tt> </tt>
<a name="L275"></a><tt class="py-lineno">275</tt>  <tt class="py-line">            <tt class="py-keyword">for</tt> <tt class="py-name">el</tt> <tt class="py-keyword">in</tt> <tt class="py-name">doc</tt><tt class="py-op">.</tt><tt id="link-103" class="py-name"><a title="lxml.etree._Element.iter
lxml.etree._ElementTree.iter" class="py-name" href="#" onclick="return doclink('link-103', 'iter', 'link-83');">iter</a></tt><tt class="py-op">(</tt><tt id="link-104" class="py-name"><a title="lxml.etree
lxml.sax.ElementTreeContentHandler.etree
lxml.tests.test_elementtree.CElementTreeTestCase.etree
lxml.tests.test_elementtree._ETreeTestCaseBase.etree
lxml.tests.test_elementtree._XMLPullParserTest.etree
lxml.tests.test_io._IOTestCaseBase.etree" class="py-name" href="#" onclick="return doclink('link-104', 'etree', 'link-3');">etree</a></tt><tt class="py-op">.</tt><tt id="link-105" class="py-name" targets="Function lxml.etree.Element()=lxml.etree-module.html#Element,Function lxml.objectify.Element()=lxml.objectify-module.html#Element,Method lxml.tests.test_pyclasslookup.PyClassLookupTestCase.Element()=lxml.tests.test_pyclasslookup.PyClassLookupTestCase-class.html#Element,Class xml.etree.ElementTree.Element=xml.etree.ElementTree.Element-class.html"><a title="lxml.etree.Element
lxml.objectify.Element
lxml.tests.test_pyclasslookup.PyClassLookupTestCase.Element
xml.etree.ElementTree.Element" class="py-name" href="#" onclick="return doclink('link-105', 'Element', 'link-105');">Element</a></tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
<a name="L276"></a><tt class="py-lineno">276</tt>  <tt class="py-line">                <tt id="link-106" class="py-name" targets="Variable lxml.etree._Element.attrib=lxml.etree._Element-class.html#attrib,Variable lxml.etree._ProcessingInstruction.attrib=lxml.etree._ProcessingInstruction-class.html#attrib,Function lxml.tests.selftest2.attrib()=lxml.tests.selftest2-module.html#attrib,Variable xml.etree.ElementTree.Element.attrib=xml.etree.ElementTree.Element-class.html#attrib"><a title="lxml.etree._Element.attrib
lxml.etree._ProcessingInstruction.attrib
lxml.tests.selftest2.attrib
xml.etree.ElementTree.Element.attrib" class="py-name" href="#" onclick="return doclink('link-106', 'attrib', 'link-106');">attrib</a></tt> <tt class="py-op">=</tt> <tt class="py-name">el</tt><tt class="py-op">.</tt><tt id="link-107" class="py-name"><a title="lxml.etree._Element.attrib
lxml.etree._ProcessingInstruction.attrib
lxml.tests.selftest2.attrib
xml.etree.ElementTree.Element.attrib" class="py-name" href="#" onclick="return doclink('link-107', 'attrib', 'link-106');">attrib</a></tt> </tt>
<a name="L277"></a><tt class="py-lineno">277</tt>  <tt class="py-line">                <tt class="py-keyword">for</tt> <tt class="py-name">aname</tt> <tt class="py-keyword">in</tt> <tt id="link-108" class="py-name"><a title="lxml.etree._Element.attrib
lxml.etree._ProcessingInstruction.attrib
lxml.tests.selftest2.attrib
xml.etree.ElementTree.Element.attrib" class="py-name" href="#" onclick="return doclink('link-108', 'attrib', 'link-106');">attrib</a></tt><tt class="py-op">.</tt><tt id="link-109" class="py-name" targets="Method lxml.etree._Attrib.keys()=lxml.etree._Attrib-class.html#keys,Method lxml.etree._Element.keys()=lxml.etree._Element-class.html#keys,Method lxml.etree._IDDict.keys()=lxml.etree._IDDict-class.html#keys,Method lxml.html.FieldsDict.keys()=lxml.html.FieldsDict-class.html#keys,Method lxml.html.InputGetter.keys()=lxml.html.InputGetter-class.html#keys"><a title="lxml.etree._Attrib.keys
lxml.etree._Element.keys
lxml.etree._IDDict.keys
lxml.html.FieldsDict.keys
lxml.html.InputGetter.keys" class="py-name" href="#" onclick="return doclink('link-109', 'keys', 'link-109');">keys</a></tt><tt class="py-op">(</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
<a name="L278"></a><tt class="py-lineno">278</tt>  <tt class="py-line">                    <tt class="py-keyword">if</tt> <tt class="py-name">aname</tt> <tt class="py-keyword">not</tt> <tt class="py-keyword">in</tt> <tt id="link-110" class="py-name"><a title="lxml.html.clean.Cleaner.safe_attrs
lxml.html.defs.safe_attrs" class="py-name" href="#" onclick="return doclink('link-110', 'safe_attrs', 'link-60');">safe_attrs</a></tt><tt class="py-op">:</tt> </tt>
<a name="L279"></a><tt class="py-lineno">279</tt>  <tt class="py-line">                        <tt class="py-keyword">del</tt> <tt id="link-111" class="py-name"><a title="lxml.etree._Element.attrib
lxml.etree._ProcessingInstruction.attrib
lxml.tests.selftest2.attrib
xml.etree.ElementTree.Element.attrib" class="py-name" href="#" onclick="return doclink('link-111', 'attrib', 'link-106');">attrib</a></tt><tt class="py-op">[</tt><tt class="py-name">aname</tt><tt class="py-op">]</tt> </tt>
<a name="L280"></a><tt class="py-lineno">280</tt>  <tt class="py-line">        <tt class="py-keyword">if</tt> <tt class="py-name">self</tt><tt class="py-op">.</tt><tt id="link-112" class="py-name"><a title="lxml.html.clean.Cleaner.javascript" class="py-name" href="#" onclick="return doclink('link-112', 'javascript', 'link-43');">javascript</a></tt><tt class="py-op">:</tt> </tt>
<a name="L281"></a><tt class="py-lineno">281</tt>  <tt class="py-line">            <tt class="py-keyword">if</tt> <tt class="py-keyword">not</tt> <tt class="py-op">(</tt><tt class="py-name">self</tt><tt class="py-op">.</tt><tt id="link-113" class="py-name"><a title="lxml.html.clean.Cleaner.safe_attrs_only" class="py-name" href="#" onclick="return doclink('link-113', 'safe_attrs_only', 'link-59');">safe_attrs_only</a></tt> <tt class="py-keyword">and</tt> </tt>
<a name="L282"></a><tt class="py-lineno">282</tt>  <tt class="py-line">                    <tt class="py-name">self</tt><tt class="py-op">.</tt><tt id="link-114" class="py-name"><a title="lxml.html.clean.Cleaner.safe_attrs
lxml.html.defs.safe_attrs" class="py-name" href="#" onclick="return doclink('link-114', 'safe_attrs', 'link-60');">safe_attrs</a></tt> <tt class="py-op">==</tt> <tt id="link-115" class="py-name"><a title="lxml.html.defs" class="py-name" href="#" onclick="return doclink('link-115', 'defs', 'link-6');">defs</a></tt><tt class="py-op">.</tt><tt id="link-116" class="py-name"><a title="lxml.html.clean.Cleaner.safe_attrs
lxml.html.defs.safe_attrs" class="py-name" href="#" onclick="return doclink('link-116', 'safe_attrs', 'link-60');">safe_attrs</a></tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
<a name="L283"></a><tt class="py-lineno">283</tt>  <tt class="py-line">                <tt class="py-comment"># safe_attrs handles events attributes itself</tt> </tt>
<a name="L284"></a><tt class="py-lineno">284</tt>  <tt class="py-line">                <tt class="py-keyword">for</tt> <tt class="py-name">el</tt> <tt class="py-keyword">in</tt> <tt class="py-name">doc</tt><tt class="py-op">.</tt><tt id="link-117" class="py-name"><a title="lxml.etree._Element.iter
lxml.etree._ElementTree.iter" class="py-name" href="#" onclick="return doclink('link-117', 'iter', 'link-83');">iter</a></tt><tt class="py-op">(</tt><tt id="link-118" class="py-name"><a title="lxml.etree
lxml.sax.ElementTreeContentHandler.etree
lxml.tests.test_elementtree.CElementTreeTestCase.etree
lxml.tests.test_elementtree._ETreeTestCaseBase.etree
lxml.tests.test_elementtree._XMLPullParserTest.etree
lxml.tests.test_io._IOTestCaseBase.etree" class="py-name" href="#" onclick="return doclink('link-118', 'etree', 'link-3');">etree</a></tt><tt class="py-op">.</tt><tt id="link-119" class="py-name"><a title="lxml.etree.Element
lxml.objectify.Element
lxml.tests.test_pyclasslookup.PyClassLookupTestCase.Element
xml.etree.ElementTree.Element" class="py-name" href="#" onclick="return doclink('link-119', 'Element', 'link-105');">Element</a></tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
<a name="L285"></a><tt class="py-lineno">285</tt>  <tt class="py-line">                    <tt id="link-120" class="py-name"><a title="lxml.etree._Element.attrib
lxml.etree._ProcessingInstruction.attrib
lxml.tests.selftest2.attrib
xml.etree.ElementTree.Element.attrib" class="py-name" href="#" onclick="return doclink('link-120', 'attrib', 'link-106');">attrib</a></tt> <tt class="py-op">=</tt> <tt class="py-name">el</tt><tt class="py-op">.</tt><tt id="link-121" class="py-name"><a title="lxml.etree._Element.attrib
lxml.etree._ProcessingInstruction.attrib
lxml.tests.selftest2.attrib
xml.etree.ElementTree.Element.attrib" class="py-name" href="#" onclick="return doclink('link-121', 'attrib', 'link-106');">attrib</a></tt> </tt>
<a name="L286"></a><tt class="py-lineno">286</tt>  <tt class="py-line">                    <tt class="py-keyword">for</tt> <tt class="py-name">aname</tt> <tt class="py-keyword">in</tt> <tt id="link-122" class="py-name"><a title="lxml.etree._Element.attrib
lxml.etree._ProcessingInstruction.attrib
lxml.tests.selftest2.attrib
xml.etree.ElementTree.Element.attrib" class="py-name" href="#" onclick="return doclink('link-122', 'attrib', 'link-106');">attrib</a></tt><tt class="py-op">.</tt><tt id="link-123" class="py-name"><a title="lxml.etree._Attrib.keys
lxml.etree._Element.keys
lxml.etree._IDDict.keys
lxml.html.FieldsDict.keys
lxml.html.InputGetter.keys" class="py-name" href="#" onclick="return doclink('link-123', 'keys', 'link-109');">keys</a></tt><tt class="py-op">(</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
<a name="L287"></a><tt class="py-lineno">287</tt>  <tt class="py-line">                        <tt class="py-keyword">if</tt> <tt class="py-name">aname</tt><tt class="py-op">.</tt><tt class="py-name">startswith</tt><tt class="py-op">(</tt><tt class="py-string">'on'</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
<a name="L288"></a><tt class="py-lineno">288</tt>  <tt class="py-line">                            <tt class="py-keyword">del</tt> <tt id="link-124" class="py-name"><a title="lxml.etree._Element.attrib
lxml.etree._ProcessingInstruction.attrib
lxml.tests.selftest2.attrib
xml.etree.ElementTree.Element.attrib" class="py-name" href="#" onclick="return doclink('link-124', 'attrib', 'link-106');">attrib</a></tt><tt class="py-op">[</tt><tt class="py-name">aname</tt><tt class="py-op">]</tt> </tt>
<a name="L289"></a><tt class="py-lineno">289</tt>  <tt class="py-line">            <tt class="py-name">doc</tt><tt class="py-op">.</tt><tt id="link-125" class="py-name" targets="Method lxml.html.HtmlMixin.rewrite_links()=lxml.html.HtmlMixin-class.html#rewrite_links,Variable lxml.html.rewrite_links=lxml.html-module.html#rewrite_links"><a title="lxml.html.HtmlMixin.rewrite_links
lxml.html.rewrite_links" class="py-name" href="#" onclick="return doclink('link-125', 'rewrite_links', 'link-125');">rewrite_links</a></tt><tt class="py-op">(</tt><tt class="py-name">self</tt><tt class="py-op">.</tt><tt id="link-126" class="py-name" targets="Method lxml.html.clean.Cleaner._remove_javascript_link()=lxml.html.clean.Cleaner-class.html#_remove_javascript_link"><a title="lxml.html.clean.Cleaner._remove_javascript_link" class="py-name" href="#" onclick="return doclink('link-126', '_remove_javascript_link', 'link-126');">_remove_javascript_link</a></tt><tt class="py-op">,</tt> </tt>
<a name="L290"></a><tt class="py-lineno">290</tt>  <tt class="py-line">                              <tt id="link-127" class="py-name" targets="Method lxml.html.HtmlMixin.resolve_base_href()=lxml.html.HtmlMixin-class.html#resolve_base_href,Variable lxml.html.resolve_base_href=lxml.html-module.html#resolve_base_href"><a title="lxml.html.HtmlMixin.resolve_base_href
lxml.html.resolve_base_href" class="py-name" href="#" onclick="return doclink('link-127', 'resolve_base_href', 'link-127');">resolve_base_href</a></tt><tt class="py-op">=</tt><tt class="py-name">False</tt><tt class="py-op">)</tt> </tt>
<a name="L291"></a><tt class="py-lineno">291</tt>  <tt class="py-line">            <tt class="py-comment"># If we're deleting style then we don't have to remove JS links</tt> </tt>
<a name="L292"></a><tt class="py-lineno">292</tt>  <tt class="py-line">            <tt class="py-comment"># from styles, otherwise...</tt> </tt>
<a name="L293"></a><tt class="py-lineno">293</tt>  <tt class="py-line">            <tt class="py-keyword">if</tt> <tt class="py-keyword">not</tt> <tt class="py-name">self</tt><tt class="py-op">.</tt><tt id="link-128" class="py-name"><a title="lxml.html.clean.Cleaner.inline_style" class="py-name" href="#" onclick="return doclink('link-128', 'inline_style', 'link-46');">inline_style</a></tt><tt class="py-op">:</tt> </tt>
<a name="L294"></a><tt class="py-lineno">294</tt>  <tt class="py-line">                <tt class="py-keyword">for</tt> <tt class="py-name">el</tt> <tt class="py-keyword">in</tt> <tt id="link-129" class="py-name"><a title="lxml.html.clean._find_styled_elements" class="py-name" href="#" onclick="return doclink('link-129', '_find_styled_elements', 'link-35');">_find_styled_elements</a></tt><tt class="py-op">(</tt><tt class="py-name">doc</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
<a name="L295"></a><tt class="py-lineno">295</tt>  <tt class="py-line">                    <tt class="py-name">old</tt> <tt class="py-op">=</tt> <tt class="py-name">el</tt><tt class="py-op">.</tt><tt id="link-130" class="py-name" targets="Method lxml.etree._Attrib.get()=lxml.etree._Attrib-class.html#get,Method lxml.etree._Element.get()=lxml.etree._Element-class.html#get,Method lxml.etree._IDDict.get()=lxml.etree._IDDict-class.html#get,Method lxml.etree._ProcessingInstruction.get()=lxml.etree._ProcessingInstruction-class.html#get"><a title="lxml.etree._Attrib.get
lxml.etree._Element.get
lxml.etree._IDDict.get
lxml.etree._ProcessingInstruction.get" class="py-name" href="#" onclick="return doclink('link-130', 'get', 'link-130');">get</a></tt><tt class="py-op">(</tt><tt class="py-string">'style'</tt><tt class="py-op">)</tt> </tt>
<a name="L296"></a><tt class="py-lineno">296</tt>  <tt class="py-line">                    <tt class="py-name">new</tt> <tt class="py-op">=</tt> <tt id="link-131" class="py-name"><a title="lxml.html.clean._css_javascript_re" class="py-name" href="#" onclick="return doclink('link-131', '_css_javascript_re', 'link-20');">_css_javascript_re</a></tt><tt class="py-op">.</tt><tt class="py-name">sub</tt><tt class="py-op">(</tt><tt class="py-string">''</tt><tt class="py-op">,</tt> <tt class="py-name">old</tt><tt class="py-op">)</tt> </tt>
<a name="L297"></a><tt class="py-lineno">297</tt>  <tt class="py-line">                    <tt class="py-name">new</tt> <tt class="py-op">=</tt> <tt id="link-132" class="py-name"><a title="lxml.html.clean._css_import_re" class="py-name" href="#" onclick="return doclink('link-132', '_css_import_re', 'link-23');">_css_import_re</a></tt><tt class="py-op">.</tt><tt class="py-name">sub</tt><tt class="py-op">(</tt><tt class="py-string">''</tt><tt class="py-op">,</tt> <tt class="py-name">new</tt><tt class="py-op">)</tt> </tt>
<a name="L298"></a><tt class="py-lineno">298</tt>  <tt class="py-line">                    <tt class="py-keyword">if</tt> <tt class="py-name">self</tt><tt class="py-op">.</tt><tt id="link-133" class="py-name" targets="Method lxml.html.clean.Cleaner._has_sneaky_javascript()=lxml.html.clean.Cleaner-class.html#_has_sneaky_javascript"><a title="lxml.html.clean.Cleaner._has_sneaky_javascript" class="py-name" href="#" onclick="return doclink('link-133', '_has_sneaky_javascript', 'link-133');">_has_sneaky_javascript</a></tt><tt class="py-op">(</tt><tt class="py-name">new</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
<a name="L299"></a><tt class="py-lineno">299</tt>  <tt class="py-line">                        <tt class="py-comment"># Something tricky is going on...</tt> </tt>
<a name="L300"></a><tt class="py-lineno">300</tt>  <tt class="py-line">                        <tt class="py-keyword">del</tt> <tt class="py-name">el</tt><tt class="py-op">.</tt><tt id="link-134" class="py-name"><a title="lxml.etree._Element.attrib
lxml.etree._ProcessingInstruction.attrib
lxml.tests.selftest2.attrib
xml.etree.ElementTree.Element.attrib" class="py-name" href="#" onclick="return doclink('link-134', 'attrib', 'link-106');">attrib</a></tt><tt class="py-op">[</tt><tt class="py-string">'style'</tt><tt class="py-op">]</tt> </tt>
<a name="L301"></a><tt class="py-lineno">301</tt>  <tt class="py-line">                    <tt class="py-keyword">elif</tt> <tt class="py-name">new</tt> <tt class="py-op">!=</tt> <tt class="py-name">old</tt><tt class="py-op">:</tt> </tt>
<a name="L302"></a><tt class="py-lineno">302</tt>  <tt class="py-line">                        <tt class="py-name">el</tt><tt class="py-op">.</tt><tt id="link-135" class="py-name"><a title="lxml.etree._Element.set
lxml.etree._XSLTProcessingInstruction.set
lxml.html.HtmlElement.set
lxml.html.HtmlMixin.set" class="py-name" href="#" onclick="return doclink('link-135', 'set', 'link-66');">set</a></tt><tt class="py-op">(</tt><tt class="py-string">'style'</tt><tt class="py-op">,</tt> <tt class="py-name">new</tt><tt class="py-op">)</tt> </tt>
<a name="L303"></a><tt class="py-lineno">303</tt>  <tt class="py-line">            <tt class="py-keyword">if</tt> <tt class="py-keyword">not</tt> <tt class="py-name">self</tt><tt class="py-op">.</tt><tt id="link-136" class="py-name"><a title="lxml.html.clean.Cleaner.style" class="py-name" href="#" onclick="return doclink('link-136', 'style', 'link-45');">style</a></tt><tt class="py-op">:</tt> </tt>
<a name="L304"></a><tt class="py-lineno">304</tt>  <tt class="py-line">                <tt class="py-keyword">for</tt> <tt class="py-name">el</tt> <tt class="py-keyword">in</tt> <tt class="py-name">list</tt><tt class="py-op">(</tt><tt class="py-name">doc</tt><tt class="py-op">.</tt><tt id="link-137" class="py-name"><a title="lxml.etree._Element.iter
lxml.etree._ElementTree.iter" class="py-name" href="#" onclick="return doclink('link-137', 'iter', 'link-83');">iter</a></tt><tt class="py-op">(</tt><tt class="py-string">'style'</tt><tt class="py-op">)</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
<a name="L305"></a><tt class="py-lineno">305</tt>  <tt class="py-line">                    <tt class="py-keyword">if</tt> <tt class="py-name">el</tt><tt class="py-op">.</tt><tt id="link-138" class="py-name"><a title="lxml.etree._Attrib.get
lxml.etree._Element.get
lxml.etree._IDDict.get
lxml.etree._ProcessingInstruction.get" class="py-name" href="#" onclick="return doclink('link-138', 'get', 'link-130');">get</a></tt><tt class="py-op">(</tt><tt class="py-string">'type'</tt><tt class="py-op">,</tt> <tt class="py-string">''</tt><tt class="py-op">)</tt><tt class="py-op">.</tt><tt class="py-name">lower</tt><tt class="py-op">(</tt><tt class="py-op">)</tt><tt class="py-op">.</tt><tt id="link-139" class="py-name" targets="Function lxml.doctestcompare.strip()=lxml.doctestcompare-module.html#strip"><a title="lxml.doctestcompare.strip" class="py-name" href="#" onclick="return doclink('link-139', 'strip', 'link-139');">strip</a></tt><tt class="py-op">(</tt><tt class="py-op">)</tt> <tt class="py-op">==</tt> <tt class="py-string">'text/javascript'</tt><tt class="py-op">:</tt> </tt>
<a name="L306"></a><tt class="py-lineno">306</tt>  <tt class="py-line">                        <tt class="py-name">el</tt><tt class="py-op">.</tt><tt id="link-140" class="py-name" targets="Method lxml.html.HtmlMixin.drop_tree()=lxml.html.HtmlMixin-class.html#drop_tree"><a title="lxml.html.HtmlMixin.drop_tree" class="py-name" href="#" onclick="return doclink('link-140', 'drop_tree', 'link-140');">drop_tree</a></tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
<a name="L307"></a><tt class="py-lineno">307</tt>  <tt class="py-line">                        <tt class="py-keyword">continue</tt> </tt>
<a name="L308"></a><tt class="py-lineno">308</tt>  <tt class="py-line">                    <tt class="py-name">old</tt> <tt class="py-op">=</tt> <tt class="py-name">el</tt><tt class="py-op">.</tt><tt id="link-141" class="py-name" targets="Variable lxml.etree.QName.text=lxml.etree.QName-class.html#text,Variable lxml.etree._Element.text=lxml.etree._Element-class.html#text,Variable lxml.etree._Entity.text=lxml.etree._Entity-class.html#text,Variable lxml.objectify.ObjectifiedElement.text=lxml.objectify.ObjectifiedElement-class.html#text,Variable xml.etree.ElementTree.Element.text=xml.etree.ElementTree.Element-class.html#text"><a title="lxml.etree.QName.text
lxml.etree._Element.text
lxml.etree._Entity.text
lxml.objectify.ObjectifiedElement.text
xml.etree.ElementTree.Element.text" class="py-name" href="#" onclick="return doclink('link-141', 'text', 'link-141');">text</a></tt> <tt class="py-keyword">or</tt> <tt class="py-string">''</tt> </tt>
<a name="L309"></a><tt class="py-lineno">309</tt>  <tt class="py-line">                    <tt class="py-name">new</tt> <tt class="py-op">=</tt> <tt id="link-142" class="py-name"><a title="lxml.html.clean._css_javascript_re" class="py-name" href="#" onclick="return doclink('link-142', '_css_javascript_re', 'link-20');">_css_javascript_re</a></tt><tt class="py-op">.</tt><tt class="py-name">sub</tt><tt class="py-op">(</tt><tt class="py-string">''</tt><tt class="py-op">,</tt> <tt class="py-name">old</tt><tt class="py-op">)</tt> </tt>
<a name="L310"></a><tt class="py-lineno">310</tt>  <tt class="py-line">                    <tt class="py-comment"># The imported CSS can do anything; we just can't allow:</tt> </tt>
<a name="L311"></a><tt class="py-lineno">311</tt>  <tt class="py-line">                    <tt class="py-name">new</tt> <tt class="py-op">=</tt> <tt id="link-143" class="py-name"><a title="lxml.html.clean._css_import_re" class="py-name" href="#" onclick="return doclink('link-143', '_css_import_re', 'link-23');">_css_import_re</a></tt><tt class="py-op">.</tt><tt class="py-name">sub</tt><tt class="py-op">(</tt><tt class="py-string">''</tt><tt class="py-op">,</tt> <tt class="py-name">old</tt><tt class="py-op">)</tt> </tt>
<a name="L312"></a><tt class="py-lineno">312</tt>  <tt class="py-line">                    <tt class="py-keyword">if</tt> <tt class="py-name">self</tt><tt class="py-op">.</tt><tt id="link-144" class="py-name"><a title="lxml.html.clean.Cleaner._has_sneaky_javascript" class="py-name" href="#" onclick="return doclink('link-144', '_has_sneaky_javascript', 'link-133');">_has_sneaky_javascript</a></tt><tt class="py-op">(</tt><tt class="py-name">new</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
<a name="L313"></a><tt class="py-lineno">313</tt>  <tt class="py-line">                        <tt class="py-comment"># Something tricky is going on...</tt> </tt>
<a name="L314"></a><tt class="py-lineno">314</tt>  <tt class="py-line">                        <tt class="py-name">el</tt><tt class="py-op">.</tt><tt id="link-145" class="py-name"><a title="lxml.etree.QName.text
lxml.etree._Element.text
lxml.etree._Entity.text
lxml.objectify.ObjectifiedElement.text
xml.etree.ElementTree.Element.text" class="py-name" href="#" onclick="return doclink('link-145', 'text', 'link-141');">text</a></tt> <tt class="py-op">=</tt> <tt class="py-string">'/* deleted */'</tt> </tt>
<a name="L315"></a><tt class="py-lineno">315</tt>  <tt class="py-line">                    <tt class="py-keyword">elif</tt> <tt class="py-name">new</tt> <tt class="py-op">!=</tt> <tt class="py-name">old</tt><tt class="py-op">:</tt> </tt>
<a name="L316"></a><tt class="py-lineno">316</tt>  <tt class="py-line">                        <tt class="py-name">el</tt><tt class="py-op">.</tt><tt id="link-146" class="py-name"><a title="lxml.etree.QName.text
lxml.etree._Element.text
lxml.etree._Entity.text
lxml.objectify.ObjectifiedElement.text
xml.etree.ElementTree.Element.text" class="py-name" href="#" onclick="return doclink('link-146', 'text', 'link-141');">text</a></tt> <tt class="py-op">=</tt> <tt class="py-name">new</tt> </tt>
<a name="L317"></a><tt class="py-lineno">317</tt>  <tt class="py-line">        <tt class="py-keyword">if</tt> <tt class="py-name">self</tt><tt class="py-op">.</tt><tt id="link-147" class="py-name"><a title="lxml.html.clean.Cleaner.comments" class="py-name" href="#" onclick="return doclink('link-147', 'comments', 'link-44');">comments</a></tt> <tt class="py-keyword">or</tt> <tt class="py-name">self</tt><tt class="py-op">.</tt><tt id="link-148" class="py-name"><a title="lxml.html.clean.Cleaner.processing_instructions" class="py-name" href="#" onclick="return doclink('link-148', 'processing_instructions', 'link-50');">processing_instructions</a></tt><tt class="py-op">:</tt> </tt>
<a name="L318"></a><tt class="py-lineno">318</tt>  <tt class="py-line">            <tt class="py-comment"># FIXME: why either?  I feel like there's some obscure reason</tt> </tt>
<a name="L319"></a><tt class="py-lineno">319</tt>  <tt class="py-line">            <tt class="py-comment"># because you can put PIs in comments...?  But I've already</tt> </tt>
<a name="L320"></a><tt class="py-lineno">320</tt>  <tt class="py-line">            <tt class="py-comment"># forgotten it</tt> </tt>
<a name="L321"></a><tt class="py-lineno">321</tt>  <tt class="py-line">            <tt id="link-149" class="py-name"><a title="lxml.html.clean.Cleaner.kill_tags" class="py-name" href="#" onclick="return doclink('link-149', 'kill_tags', 'link-57');">kill_tags</a></tt><tt class="py-op">.</tt><tt id="link-150" class="py-name"><a title="lxml.html.CheckboxValues.add
lxml.html.Classes.add
lxml.html.MultipleSelectOptions.add" class="py-name" href="#" onclick="return doclink('link-150', 'add', 'link-98');">add</a></tt><tt class="py-op">(</tt><tt id="link-151" class="py-name"><a title="lxml.etree
lxml.sax.ElementTreeContentHandler.etree
lxml.tests.test_elementtree.CElementTreeTestCase.etree
lxml.tests.test_elementtree._ETreeTestCaseBase.etree
lxml.tests.test_elementtree._XMLPullParserTest.etree
lxml.tests.test_io._IOTestCaseBase.etree" class="py-name" href="#" onclick="return doclink('link-151', 'etree', 'link-3');">etree</a></tt><tt class="py-op">.</tt><tt id="link-152" class="py-name" targets="Function lxml.etree.Comment()=lxml.etree-module.html#Comment"><a title="lxml.etree.Comment" class="py-name" href="#" onclick="return doclink('link-152', 'Comment', 'link-152');">Comment</a></tt><tt class="py-op">)</tt> </tt>
<a name="L322"></a><tt class="py-lineno">322</tt>  <tt class="py-line">        <tt class="py-keyword">if</tt> <tt class="py-name">self</tt><tt class="py-op">.</tt><tt id="link-153" class="py-name"><a title="lxml.html.clean.Cleaner.processing_instructions" class="py-name" href="#" onclick="return doclink('link-153', 'processing_instructions', 'link-50');">processing_instructions</a></tt><tt class="py-op">:</tt> </tt>
<a name="L323"></a><tt class="py-lineno">323</tt>  <tt class="py-line">            <tt id="link-154" class="py-name"><a title="lxml.html.clean.Cleaner.kill_tags" class="py-name" href="#" onclick="return doclink('link-154', 'kill_tags', 'link-57');">kill_tags</a></tt><tt class="py-op">.</tt><tt id="link-155" class="py-name"><a title="lxml.html.CheckboxValues.add
lxml.html.Classes.add
lxml.html.MultipleSelectOptions.add" class="py-name" href="#" onclick="return doclink('link-155', 'add', 'link-98');">add</a></tt><tt class="py-op">(</tt><tt id="link-156" class="py-name"><a title="lxml.etree
lxml.sax.ElementTreeContentHandler.etree
lxml.tests.test_elementtree.CElementTreeTestCase.etree
lxml.tests.test_elementtree._ETreeTestCaseBase.etree
lxml.tests.test_elementtree._XMLPullParserTest.etree
lxml.tests.test_io._IOTestCaseBase.etree" class="py-name" href="#" onclick="return doclink('link-156', 'etree', 'link-3');">etree</a></tt><tt class="py-op">.</tt><tt class="py-name">ProcessingInstruction</tt><tt class="py-op">)</tt> </tt>
<a name="L324"></a><tt class="py-lineno">324</tt>  <tt class="py-line">        <tt class="py-keyword">if</tt> <tt class="py-name">self</tt><tt class="py-op">.</tt><tt id="link-157" class="py-name"><a title="lxml.html.clean.Cleaner.style" class="py-name" href="#" onclick="return doclink('link-157', 'style', 'link-45');">style</a></tt><tt class="py-op">:</tt> </tt>
<a name="L325"></a><tt class="py-lineno">325</tt>  <tt class="py-line">            <tt id="link-158" class="py-name"><a title="lxml.html.clean.Cleaner.kill_tags" class="py-name" href="#" onclick="return doclink('link-158', 'kill_tags', 'link-57');">kill_tags</a></tt><tt class="py-op">.</tt><tt id="link-159" class="py-name"><a title="lxml.html.CheckboxValues.add
lxml.html.Classes.add
lxml.html.MultipleSelectOptions.add" class="py-name" href="#" onclick="return doclink('link-159', 'add', 'link-98');">add</a></tt><tt class="py-op">(</tt><tt class="py-string">'style'</tt><tt class="py-op">)</tt> </tt>
<a name="L326"></a><tt class="py-lineno">326</tt>  <tt class="py-line">        <tt class="py-keyword">if</tt> <tt class="py-name">self</tt><tt class="py-op">.</tt><tt id="link-160" class="py-name"><a title="lxml.html.clean.Cleaner.inline_style" class="py-name" href="#" onclick="return doclink('link-160', 'inline_style', 'link-46');">inline_style</a></tt><tt class="py-op">:</tt> </tt>
<a name="L327"></a><tt class="py-lineno">327</tt>  <tt class="py-line">            <tt id="link-161" class="py-name"><a title="lxml.etree
lxml.sax.ElementTreeContentHandler.etree
lxml.tests.test_elementtree.CElementTreeTestCase.etree
lxml.tests.test_elementtree._ETreeTestCaseBase.etree
lxml.tests.test_elementtree._XMLPullParserTest.etree
lxml.tests.test_io._IOTestCaseBase.etree" class="py-name" href="#" onclick="return doclink('link-161', 'etree', 'link-3');">etree</a></tt><tt class="py-op">.</tt><tt id="link-162" class="py-name" targets="Function lxml.etree.strip_attributes()=lxml.etree-module.html#strip_attributes"><a title="lxml.etree.strip_attributes" class="py-name" href="#" onclick="return doclink('link-162', 'strip_attributes', 'link-162');">strip_attributes</a></tt><tt class="py-op">(</tt><tt class="py-name">doc</tt><tt class="py-op">,</tt> <tt class="py-string">'style'</tt><tt class="py-op">)</tt> </tt>
<a name="L328"></a><tt class="py-lineno">328</tt>  <tt class="py-line">        <tt class="py-keyword">if</tt> <tt class="py-name">self</tt><tt class="py-op">.</tt><tt id="link-163" class="py-name"><a title="lxml.html.clean.Cleaner.links" class="py-name" href="#" onclick="return doclink('link-163', 'links', 'link-47');">links</a></tt><tt class="py-op">:</tt> </tt>
<a name="L329"></a><tt class="py-lineno">329</tt>  <tt class="py-line">            <tt id="link-164" class="py-name"><a title="lxml.html.clean.Cleaner.kill_tags" class="py-name" href="#" onclick="return doclink('link-164', 'kill_tags', 'link-57');">kill_tags</a></tt><tt class="py-op">.</tt><tt id="link-165" class="py-name"><a title="lxml.html.CheckboxValues.add
lxml.html.Classes.add
lxml.html.MultipleSelectOptions.add" class="py-name" href="#" onclick="return doclink('link-165', 'add', 'link-98');">add</a></tt><tt class="py-op">(</tt><tt class="py-string">'link'</tt><tt class="py-op">)</tt> </tt>
<a name="L330"></a><tt class="py-lineno">330</tt>  <tt class="py-line">        <tt class="py-keyword">elif</tt> <tt class="py-name">self</tt><tt class="py-op">.</tt><tt id="link-166" class="py-name"><a title="lxml.html.clean.Cleaner.style" class="py-name" href="#" onclick="return doclink('link-166', 'style', 'link-45');">style</a></tt> <tt class="py-keyword">or</tt> <tt class="py-name">self</tt><tt class="py-op">.</tt><tt id="link-167" class="py-name"><a title="lxml.html.clean.Cleaner.javascript" class="py-name" href="#" onclick="return doclink('link-167', 'javascript', 'link-43');">javascript</a></tt><tt class="py-op">:</tt> </tt>
<a name="L331"></a><tt class="py-lineno">331</tt>  <tt class="py-line">            <tt class="py-comment"># We must get rid of included stylesheets if Javascript is not</tt> </tt>
<a name="L332"></a><tt class="py-lineno">332</tt>  <tt class="py-line">            <tt class="py-comment"># allowed, as you can put Javascript in them</tt> </tt>
<a name="L333"></a><tt class="py-lineno">333</tt>  <tt class="py-line">            <tt class="py-keyword">for</tt> <tt class="py-name">el</tt> <tt class="py-keyword">in</tt> <tt class="py-name">list</tt><tt class="py-op">(</tt><tt class="py-name">doc</tt><tt class="py-op">.</tt><tt id="link-168" class="py-name"><a title="lxml.etree._Element.iter
lxml.etree._ElementTree.iter" class="py-name" href="#" onclick="return doclink('link-168', 'iter', 'link-83');">iter</a></tt><tt class="py-op">(</tt><tt class="py-string">'link'</tt><tt class="py-op">)</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
<a name="L334"></a><tt class="py-lineno">334</tt>  <tt class="py-line">                <tt class="py-keyword">if</tt> <tt class="py-string">'stylesheet'</tt> <tt class="py-keyword">in</tt> <tt class="py-name">el</tt><tt class="py-op">.</tt><tt id="link-169" class="py-name"><a title="lxml.etree._Attrib.get
lxml.etree._Element.get
lxml.etree._IDDict.get
lxml.etree._ProcessingInstruction.get" class="py-name" href="#" onclick="return doclink('link-169', 'get', 'link-130');">get</a></tt><tt class="py-op">(</tt><tt class="py-string">'rel'</tt><tt class="py-op">,</tt> <tt class="py-string">''</tt><tt class="py-op">)</tt><tt class="py-op">.</tt><tt class="py-name">lower</tt><tt class="py-op">(</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
<a name="L335"></a><tt class="py-lineno">335</tt>  <tt class="py-line">                    <tt class="py-comment"># Note this kills alternate stylesheets as well</tt> </tt>
<a name="L336"></a><tt class="py-lineno">336</tt>  <tt class="py-line">                    <tt class="py-keyword">if</tt> <tt class="py-keyword">not</tt> <tt class="py-name">self</tt><tt class="py-op">.</tt><tt id="link-170" class="py-name" targets="Method lxml.html.clean.Cleaner.allow_element()=lxml.html.clean.Cleaner-class.html#allow_element"><a title="lxml.html.clean.Cleaner.allow_element" class="py-name" href="#" onclick="return doclink('link-170', 'allow_element', 'link-170');">allow_element</a></tt><tt class="py-op">(</tt><tt class="py-name">el</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
<a name="L337"></a><tt class="py-lineno">337</tt>  <tt class="py-line">                        <tt class="py-name">el</tt><tt class="py-op">.</tt><tt id="link-171" class="py-name"><a title="lxml.html.HtmlMixin.drop_tree" class="py-name" href="#" onclick="return doclink('link-171', 'drop_tree', 'link-140');">drop_tree</a></tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
<a name="L338"></a><tt class="py-lineno">338</tt>  <tt class="py-line">        <tt class="py-keyword">if</tt> <tt class="py-name">self</tt><tt class="py-op">.</tt><tt id="link-172" class="py-name"><a title="lxml.html.clean.Cleaner.meta" class="py-name" href="#" onclick="return doclink('link-172', 'meta', 'link-48');">meta</a></tt><tt class="py-op">:</tt> </tt>
<a name="L339"></a><tt class="py-lineno">339</tt>  <tt class="py-line">            <tt id="link-173" class="py-name"><a title="lxml.html.clean.Cleaner.kill_tags" class="py-name" href="#" onclick="return doclink('link-173', 'kill_tags', 'link-57');">kill_tags</a></tt><tt class="py-op">.</tt><tt id="link-174" class="py-name"><a title="lxml.html.CheckboxValues.add
lxml.html.Classes.add
lxml.html.MultipleSelectOptions.add" class="py-name" href="#" onclick="return doclink('link-174', 'add', 'link-98');">add</a></tt><tt class="py-op">(</tt><tt class="py-string">'meta'</tt><tt class="py-op">)</tt> </tt>
<a name="L340"></a><tt class="py-lineno">340</tt>  <tt class="py-line">        <tt class="py-keyword">if</tt> <tt class="py-name">self</tt><tt class="py-op">.</tt><tt id="link-175" class="py-name"><a title="lxml.html.clean.Cleaner.page_structure" class="py-name" href="#" onclick="return doclink('link-175', 'page_structure', 'link-49');">page_structure</a></tt><tt class="py-op">:</tt> </tt>
<a name="L341"></a><tt class="py-lineno">341</tt>  <tt class="py-line">            <tt id="link-176" class="py-name"><a title="lxml.html.clean.Cleaner.remove_tags" class="py-name" href="#" onclick="return doclink('link-176', 'remove_tags', 'link-55');">remove_tags</a></tt><tt class="py-op">.</tt><tt id="link-177" class="py-name" targets="Method lxml.etree._Attrib.update()=lxml.etree._Attrib-class.html#update,Method lxml.html.Classes.update()=lxml.html.Classes-class.html#update"><a title="lxml.etree._Attrib.update
lxml.html.Classes.update" class="py-name" href="#" onclick="return doclink('link-177', 'update', 'link-177');">update</a></tt><tt class="py-op">(</tt><tt class="py-op">(</tt><tt class="py-string">'head'</tt><tt class="py-op">,</tt> <tt class="py-string">'html'</tt><tt class="py-op">,</tt> <tt class="py-string">'title'</tt><tt class="py-op">)</tt><tt class="py-op">)</tt> </tt>
<a name="L342"></a><tt class="py-lineno">342</tt>  <tt class="py-line">        <tt class="py-keyword">if</tt> <tt class="py-name">self</tt><tt class="py-op">.</tt><tt id="link-178" class="py-name"><a title="lxml.html.clean.Cleaner.embedded" class="py-name" href="#" onclick="return doclink('link-178', 'embedded', 'link-51');">embedded</a></tt><tt class="py-op">:</tt> </tt>
<a name="L343"></a><tt class="py-lineno">343</tt>  <tt class="py-line">            <tt class="py-comment"># FIXME: is &lt;layer&gt; really embedded?</tt> </tt>
<a name="L344"></a><tt class="py-lineno">344</tt>  <tt class="py-line">            <tt class="py-comment"># We should get rid of any &lt;param&gt; tags not inside &lt;applet&gt;;</tt> </tt>
<a name="L345"></a><tt class="py-lineno">345</tt>  <tt class="py-line">            <tt class="py-comment"># These are not really valid anyway.</tt> </tt>
<a name="L346"></a><tt class="py-lineno">346</tt>  <tt class="py-line">            <tt class="py-keyword">for</tt> <tt class="py-name">el</tt> <tt class="py-keyword">in</tt> <tt class="py-name">list</tt><tt class="py-op">(</tt><tt class="py-name">doc</tt><tt class="py-op">.</tt><tt id="link-179" class="py-name"><a title="lxml.etree._Element.iter
lxml.etree._ElementTree.iter" class="py-name" href="#" onclick="return doclink('link-179', 'iter', 'link-83');">iter</a></tt><tt class="py-op">(</tt><tt class="py-string">'param'</tt><tt class="py-op">)</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
<a name="L347"></a><tt class="py-lineno">347</tt>  <tt class="py-line">                <tt class="py-name">found_parent</tt> <tt class="py-op">=</tt> <tt class="py-name">False</tt> </tt>
<a name="L348"></a><tt class="py-lineno">348</tt>  <tt class="py-line">                <tt class="py-name">parent</tt> <tt class="py-op">=</tt> <tt class="py-name">el</tt><tt class="py-op">.</tt><tt id="link-180" class="py-name" targets="Method lxml.etree._Element.getparent()=lxml.etree._Element-class.html#getparent,Method lxml.etree._ElementStringResult.getparent()=lxml.etree._ElementStringResult-class.html#getparent,Method lxml.etree._ElementUnicodeResult.getparent()=lxml.etree._ElementUnicodeResult-class.html#getparent"><a title="lxml.etree._Element.getparent
lxml.etree._ElementStringResult.getparent
lxml.etree._ElementUnicodeResult.getparent" class="py-name" href="#" onclick="return doclink('link-180', 'getparent', 'link-180');">getparent</a></tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
<a name="L349"></a><tt class="py-lineno">349</tt>  <tt class="py-line">                <tt class="py-keyword">while</tt> <tt class="py-name">parent</tt> <tt class="py-keyword">is</tt> <tt class="py-keyword">not</tt> <tt class="py-name">None</tt> <tt class="py-keyword">and</tt> <tt class="py-name">parent</tt><tt class="py-op">.</tt><tt id="link-181" class="py-name"><a title="lxml.etree._Comment.tag
lxml.etree._Element.tag
lxml.etree._Entity.tag
lxml.etree._ProcessingInstruction.tag
lxml.tests.test_xpathevaluator.tag
xml.etree.ElementTree.Element.tag" class="py-name" href="#" onclick="return doclink('link-181', 'tag', 'link-84');">tag</a></tt> <tt class="py-keyword">not</tt> <tt class="py-keyword">in</tt> <tt class="py-op">(</tt><tt class="py-string">'applet'</tt><tt class="py-op">,</tt> <tt class="py-string">'object'</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
<a name="L350"></a><tt class="py-lineno">350</tt>  <tt class="py-line">                    <tt class="py-name">parent</tt> <tt class="py-op">=</tt> <tt class="py-name">parent</tt><tt class="py-op">.</tt><tt id="link-182" class="py-name"><a title="lxml.etree._Element.getparent
lxml.etree._ElementStringResult.getparent
lxml.etree._ElementUnicodeResult.getparent" class="py-name" href="#" onclick="return doclink('link-182', 'getparent', 'link-180');">getparent</a></tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
<a name="L351"></a><tt class="py-lineno">351</tt>  <tt class="py-line">                <tt class="py-keyword">if</tt> <tt class="py-name">parent</tt> <tt class="py-keyword">is</tt> <tt class="py-name">None</tt><tt class="py-op">:</tt> </tt>
<a name="L352"></a><tt class="py-lineno">352</tt>  <tt class="py-line">                    <tt class="py-name">el</tt><tt class="py-op">.</tt><tt id="link-183" class="py-name"><a title="lxml.html.HtmlMixin.drop_tree" class="py-name" href="#" onclick="return doclink('link-183', 'drop_tree', 'link-140');">drop_tree</a></tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
<a name="L353"></a><tt class="py-lineno">353</tt>  <tt class="py-line">            <tt id="link-184" class="py-name"><a title="lxml.html.clean.Cleaner.kill_tags" class="py-name" href="#" onclick="return doclink('link-184', 'kill_tags', 'link-57');">kill_tags</a></tt><tt class="py-op">.</tt><tt id="link-185" class="py-name"><a title="lxml.etree._Attrib.update
lxml.html.Classes.update" class="py-name" href="#" onclick="return doclink('link-185', 'update', 'link-177');">update</a></tt><tt class="py-op">(</tt><tt class="py-op">(</tt><tt class="py-string">'applet'</tt><tt class="py-op">,</tt><tt class="py-op">)</tt><tt class="py-op">)</tt> </tt>
<a name="L354"></a><tt class="py-lineno">354</tt>  <tt class="py-line">            <tt class="py-comment"># The alternate contents that are in an iframe are a good fallback:</tt> </tt>
<a name="L355"></a><tt class="py-lineno">355</tt>  <tt class="py-line">            <tt id="link-186" class="py-name"><a title="lxml.html.clean.Cleaner.remove_tags" class="py-name" href="#" onclick="return doclink('link-186', 'remove_tags', 'link-55');">remove_tags</a></tt><tt class="py-op">.</tt><tt id="link-187" class="py-name"><a title="lxml.etree._Attrib.update
lxml.html.Classes.update" class="py-name" href="#" onclick="return doclink('link-187', 'update', 'link-177');">update</a></tt><tt class="py-op">(</tt><tt class="py-op">(</tt><tt class="py-string">'iframe'</tt><tt class="py-op">,</tt> <tt class="py-string">'embed'</tt><tt class="py-op">,</tt> <tt class="py-string">'layer'</tt><tt class="py-op">,</tt> <tt class="py-string">'object'</tt><tt class="py-op">,</tt> <tt class="py-string">'param'</tt><tt class="py-op">)</tt><tt class="py-op">)</tt> </tt>
<a name="L356"></a><tt class="py-lineno">356</tt>  <tt class="py-line">        <tt class="py-keyword">if</tt> <tt class="py-name">self</tt><tt class="py-op">.</tt><tt id="link-188" class="py-name"><a title="lxml.html.clean.Cleaner.frames" class="py-name" href="#" onclick="return doclink('link-188', 'frames', 'link-52');">frames</a></tt><tt class="py-op">:</tt> </tt>
<a name="L357"></a><tt class="py-lineno">357</tt>  <tt class="py-line">            <tt class="py-comment"># FIXME: ideally we should look at the frame links, but</tt> </tt>
<a name="L358"></a><tt class="py-lineno">358</tt>  <tt class="py-line">            <tt class="py-comment"># generally frames don't mix properly with an HTML</tt> </tt>
<a name="L359"></a><tt class="py-lineno">359</tt>  <tt class="py-line">            <tt class="py-comment"># fragment anyway.</tt> </tt>
<a name="L360"></a><tt class="py-lineno">360</tt>  <tt class="py-line">            <tt id="link-189" class="py-name"><a title="lxml.html.clean.Cleaner.kill_tags" class="py-name" href="#" onclick="return doclink('link-189', 'kill_tags', 'link-57');">kill_tags</a></tt><tt class="py-op">.</tt><tt id="link-190" class="py-name"><a title="lxml.etree._Attrib.update
lxml.html.Classes.update" class="py-name" href="#" onclick="return doclink('link-190', 'update', 'link-177');">update</a></tt><tt class="py-op">(</tt><tt id="link-191" class="py-name"><a title="lxml.html.defs" class="py-name" href="#" onclick="return doclink('link-191', 'defs', 'link-6');">defs</a></tt><tt class="py-op">.</tt><tt id="link-192" class="py-name" targets="Variable lxml.html.defs.frame_tags=lxml.html.defs-module.html#frame_tags"><a title="lxml.html.defs.frame_tags" class="py-name" href="#" onclick="return doclink('link-192', 'frame_tags', 'link-192');">frame_tags</a></tt><tt class="py-op">)</tt> </tt>
<a name="L361"></a><tt class="py-lineno">361</tt>  <tt class="py-line">        <tt class="py-keyword">if</tt> <tt class="py-name">self</tt><tt class="py-op">.</tt><tt id="link-193" class="py-name"><a title="lxml.html.HtmlMixin.forms
lxml.html.clean.Cleaner.forms" class="py-name" href="#" onclick="return doclink('link-193', 'forms', 'link-53');">forms</a></tt><tt class="py-op">:</tt> </tt>
<a name="L362"></a><tt class="py-lineno">362</tt>  <tt class="py-line">            <tt id="link-194" class="py-name"><a title="lxml.html.clean.Cleaner.remove_tags" class="py-name" href="#" onclick="return doclink('link-194', 'remove_tags', 'link-55');">remove_tags</a></tt><tt class="py-op">.</tt><tt id="link-195" class="py-name"><a title="lxml.html.CheckboxValues.add
lxml.html.Classes.add
lxml.html.MultipleSelectOptions.add" class="py-name" href="#" onclick="return doclink('link-195', 'add', 'link-98');">add</a></tt><tt class="py-op">(</tt><tt class="py-string">'form'</tt><tt class="py-op">)</tt> </tt>
<a name="L363"></a><tt class="py-lineno">363</tt>  <tt class="py-line">            <tt id="link-196" class="py-name"><a title="lxml.html.clean.Cleaner.kill_tags" class="py-name" href="#" onclick="return doclink('link-196', 'kill_tags', 'link-57');">kill_tags</a></tt><tt class="py-op">.</tt><tt id="link-197" class="py-name"><a title="lxml.etree._Attrib.update
lxml.html.Classes.update" class="py-name" href="#" onclick="return doclink('link-197', 'update', 'link-177');">update</a></tt><tt class="py-op">(</tt><tt class="py-op">(</tt><tt class="py-string">'button'</tt><tt class="py-op">,</tt> <tt class="py-string">'input'</tt><tt class="py-op">,</tt> <tt class="py-string">'select'</tt><tt class="py-op">,</tt> <tt class="py-string">'textarea'</tt><tt class="py-op">)</tt><tt class="py-op">)</tt> </tt>
<a name="L364"></a><tt class="py-lineno">364</tt>  <tt class="py-line">        <tt class="py-keyword">if</tt> <tt class="py-name">self</tt><tt class="py-op">.</tt><tt id="link-198" class="py-name"><a title="lxml.html.clean.Cleaner.annoying_tags" class="py-name" href="#" onclick="return doclink('link-198', 'annoying_tags', 'link-54');">annoying_tags</a></tt><tt class="py-op">:</tt> </tt>
<a name="L365"></a><tt class="py-lineno">365</tt>  <tt class="py-line">            <tt id="link-199" class="py-name"><a title="lxml.html.clean.Cleaner.remove_tags" class="py-name" href="#" onclick="return doclink('link-199', 'remove_tags', 'link-55');">remove_tags</a></tt><tt class="py-op">.</tt><tt id="link-200" class="py-name"><a title="lxml.etree._Attrib.update
lxml.html.Classes.update" class="py-name" href="#" onclick="return doclink('link-200', 'update', 'link-177');">update</a></tt><tt class="py-op">(</tt><tt class="py-op">(</tt><tt class="py-string">'blink'</tt><tt class="py-op">,</tt> <tt class="py-string">'marquee'</tt><tt class="py-op">)</tt><tt class="py-op">)</tt> </tt>
<a name="L366"></a><tt class="py-lineno">366</tt>  <tt class="py-line"> </tt>
<a name="L367"></a><tt class="py-lineno">367</tt>  <tt class="py-line">        <tt class="py-name">_remove</tt> <tt class="py-op">=</tt> <tt class="py-op">[</tt><tt class="py-op">]</tt> </tt>
<a name="L368"></a><tt class="py-lineno">368</tt>  <tt class="py-line">        <tt class="py-name">_kill</tt> <tt class="py-op">=</tt> <tt class="py-op">[</tt><tt class="py-op">]</tt> </tt>
<a name="L369"></a><tt class="py-lineno">369</tt>  <tt class="py-line">        <tt class="py-keyword">for</tt> <tt class="py-name">el</tt> <tt class="py-keyword">in</tt> <tt class="py-name">doc</tt><tt class="py-op">.</tt><tt id="link-201" class="py-name"><a title="lxml.etree._Element.iter
lxml.etree._ElementTree.iter" class="py-name" href="#" onclick="return doclink('link-201', 'iter', 'link-83');">iter</a></tt><tt class="py-op">(</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
<a name="L370"></a><tt class="py-lineno">370</tt>  <tt class="py-line">            <tt class="py-keyword">if</tt> <tt class="py-name">el</tt><tt class="py-op">.</tt><tt id="link-202" class="py-name"><a title="lxml.etree._Comment.tag
lxml.etree._Element.tag
lxml.etree._Entity.tag
lxml.etree._ProcessingInstruction.tag
lxml.tests.test_xpathevaluator.tag
xml.etree.ElementTree.Element.tag" class="py-name" href="#" onclick="return doclink('link-202', 'tag', 'link-84');">tag</a></tt> <tt class="py-keyword">in</tt> <tt id="link-203" class="py-name"><a title="lxml.html.clean.Cleaner.kill_tags" class="py-name" href="#" onclick="return doclink('link-203', 'kill_tags', 'link-57');">kill_tags</a></tt><tt class="py-op">:</tt> </tt>
<a name="L371"></a><tt class="py-lineno">371</tt>  <tt class="py-line">                <tt class="py-keyword">if</tt> <tt class="py-name">self</tt><tt class="py-op">.</tt><tt id="link-204" class="py-name"><a title="lxml.html.clean.Cleaner.allow_element" class="py-name" href="#" onclick="return doclink('link-204', 'allow_element', 'link-170');">allow_element</a></tt><tt class="py-op">(</tt><tt class="py-name">el</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
<a name="L372"></a><tt class="py-lineno">372</tt>  <tt class="py-line">                    <tt class="py-keyword">continue</tt> </tt>
<a name="L373"></a><tt class="py-lineno">373</tt>  <tt class="py-line">                <tt class="py-name">_kill</tt><tt class="py-op">.</tt><tt id="link-205" class="py-name" targets="Method lxml.etree._Element.append()=lxml.etree._Element-class.html#append"><a title="lxml.etree._Element.append" class="py-name" href="#" onclick="return doclink('link-205', 'append', 'link-205');">append</a></tt><tt class="py-op">(</tt><tt class="py-name">el</tt><tt class="py-op">)</tt> </tt>
<a name="L374"></a><tt class="py-lineno">374</tt>  <tt class="py-line">            <tt class="py-keyword">elif</tt> <tt class="py-name">el</tt><tt class="py-op">.</tt><tt id="link-206" class="py-name"><a title="lxml.etree._Comment.tag
lxml.etree._Element.tag
lxml.etree._Entity.tag
lxml.etree._ProcessingInstruction.tag
lxml.tests.test_xpathevaluator.tag
xml.etree.ElementTree.Element.tag" class="py-name" href="#" onclick="return doclink('link-206', 'tag', 'link-84');">tag</a></tt> <tt class="py-keyword">in</tt> <tt id="link-207" class="py-name"><a title="lxml.html.clean.Cleaner.remove_tags" class="py-name" href="#" onclick="return doclink('link-207', 'remove_tags', 'link-55');">remove_tags</a></tt><tt class="py-op">:</tt> </tt>
<a name="L375"></a><tt class="py-lineno">375</tt>  <tt class="py-line">                <tt class="py-keyword">if</tt> <tt class="py-name">self</tt><tt class="py-op">.</tt><tt id="link-208" class="py-name"><a title="lxml.html.clean.Cleaner.allow_element" class="py-name" href="#" onclick="return doclink('link-208', 'allow_element', 'link-170');">allow_element</a></tt><tt class="py-op">(</tt><tt class="py-name">el</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
<a name="L376"></a><tt class="py-lineno">376</tt>  <tt class="py-line">                    <tt class="py-keyword">continue</tt> </tt>
<a name="L377"></a><tt class="py-lineno">377</tt>  <tt class="py-line">                <tt class="py-name">_remove</tt><tt class="py-op">.</tt><tt id="link-209" class="py-name"><a title="lxml.etree._Element.append" class="py-name" href="#" onclick="return doclink('link-209', 'append', 'link-205');">append</a></tt><tt class="py-op">(</tt><tt class="py-name">el</tt><tt class="py-op">)</tt> </tt>
<a name="L378"></a><tt class="py-lineno">378</tt>  <tt class="py-line"> </tt>
<a name="L379"></a><tt class="py-lineno">379</tt>  <tt class="py-line">        <tt class="py-keyword">if</tt> <tt class="py-name">_remove</tt> <tt class="py-keyword">and</tt> <tt class="py-name">_remove</tt><tt class="py-op">[</tt><tt class="py-number">0</tt><tt class="py-op">]</tt> <tt class="py-op">==</tt> <tt class="py-name">doc</tt><tt class="py-op">:</tt> </tt>
<a name="L380"></a><tt class="py-lineno">380</tt>  <tt class="py-line">            <tt class="py-comment"># We have to drop the parent-most tag, which we can't</tt> </tt>
<a name="L381"></a><tt class="py-lineno">381</tt>  <tt class="py-line">            <tt class="py-comment"># do.  Instead we'll rewrite it:</tt> </tt>
<a name="L382"></a><tt class="py-lineno">382</tt>  <tt class="py-line">            <tt class="py-name">el</tt> <tt class="py-op">=</tt> <tt class="py-name">_remove</tt><tt class="py-op">.</tt><tt id="link-210" class="py-name" targets="Method lxml.etree._Attrib.pop()=lxml.etree._Attrib-class.html#pop"><a title="lxml.etree._Attrib.pop" class="py-name" href="#" onclick="return doclink('link-210', 'pop', 'link-210');">pop</a></tt><tt class="py-op">(</tt><tt class="py-number">0</tt><tt class="py-op">)</tt> </tt>
<a name="L383"></a><tt class="py-lineno">383</tt>  <tt class="py-line">            <tt class="py-name">el</tt><tt class="py-op">.</tt><tt id="link-211" class="py-name"><a title="lxml.etree._Comment.tag
lxml.etree._Element.tag
lxml.etree._Entity.tag
lxml.etree._ProcessingInstruction.tag
lxml.tests.test_xpathevaluator.tag
xml.etree.ElementTree.Element.tag" class="py-name" href="#" onclick="return doclink('link-211', 'tag', 'link-84');">tag</a></tt> <tt class="py-op">=</tt> <tt class="py-string">'div'</tt> </tt>
<a name="L384"></a><tt class="py-lineno">384</tt>  <tt class="py-line">            <tt class="py-name">el</tt><tt class="py-op">.</tt><tt id="link-212" class="py-name"><a title="lxml.etree._Element.attrib
lxml.etree._ProcessingInstruction.attrib
lxml.tests.selftest2.attrib
xml.etree.ElementTree.Element.attrib" class="py-name" href="#" onclick="return doclink('link-212', 'attrib', 'link-106');">attrib</a></tt><tt class="py-op">.</tt><tt id="link-213" class="py-name" targets="Method lxml.etree.DocInfo.clear()=lxml.etree.DocInfo-class.html#clear,Method lxml.etree._Attrib.clear()=lxml.etree._Attrib-class.html#clear,Method lxml.etree._Element.clear()=lxml.etree._Element-class.html#clear,Method lxml.etree._ErrorLog.clear()=lxml.etree._ErrorLog-class.html#clear"><a title="lxml.etree.DocInfo.clear
lxml.etree._Attrib.clear
lxml.etree._Element.clear
lxml.etree._ErrorLog.clear" class="py-name" href="#" onclick="return doclink('link-213', 'clear', 'link-213');">clear</a></tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
<a name="L385"></a><tt class="py-lineno">385</tt>  <tt class="py-line">        <tt class="py-keyword">elif</tt> <tt class="py-name">_kill</tt> <tt class="py-keyword">and</tt> <tt class="py-name">_kill</tt><tt class="py-op">[</tt><tt class="py-number">0</tt><tt class="py-op">]</tt> <tt class="py-op">==</tt> <tt class="py-name">doc</tt><tt class="py-op">:</tt> </tt>
<a name="L386"></a><tt class="py-lineno">386</tt>  <tt class="py-line">            <tt class="py-comment"># We have to drop the parent-most element, which we can't</tt> </tt>
<a name="L387"></a><tt class="py-lineno">387</tt>  <tt class="py-line">            <tt class="py-comment"># do.  Instead we'll clear it:</tt> </tt>
<a name="L388"></a><tt class="py-lineno">388</tt>  <tt class="py-line">            <tt class="py-name">el</tt> <tt class="py-op">=</tt> <tt class="py-name">_kill</tt><tt class="py-op">.</tt><tt id="link-214" class="py-name"><a title="lxml.etree._Attrib.pop" class="py-name" href="#" onclick="return doclink('link-214', 'pop', 'link-210');">pop</a></tt><tt class="py-op">(</tt><tt class="py-number">0</tt><tt class="py-op">)</tt> </tt>
<a name="L389"></a><tt class="py-lineno">389</tt>  <tt class="py-line">            <tt class="py-keyword">if</tt> <tt class="py-name">el</tt><tt class="py-op">.</tt><tt id="link-215" class="py-name"><a title="lxml.etree._Comment.tag
lxml.etree._Element.tag
lxml.etree._Entity.tag
lxml.etree._ProcessingInstruction.tag
lxml.tests.test_xpathevaluator.tag
xml.etree.ElementTree.Element.tag" class="py-name" href="#" onclick="return doclink('link-215', 'tag', 'link-84');">tag</a></tt> <tt class="py-op">!=</tt> <tt class="py-string">'html'</tt><tt class="py-op">:</tt> </tt>
<a name="L390"></a><tt class="py-lineno">390</tt>  <tt class="py-line">                <tt class="py-name">el</tt><tt class="py-op">.</tt><tt id="link-216" class="py-name"><a title="lxml.etree._Comment.tag
lxml.etree._Element.tag
lxml.etree._Entity.tag
lxml.etree._ProcessingInstruction.tag
lxml.tests.test_xpathevaluator.tag
xml.etree.ElementTree.Element.tag" class="py-name" href="#" onclick="return doclink('link-216', 'tag', 'link-84');">tag</a></tt> <tt class="py-op">=</tt> <tt class="py-string">'div'</tt> </tt>
<a name="L391"></a><tt class="py-lineno">391</tt>  <tt class="py-line">            <tt class="py-name">el</tt><tt class="py-op">.</tt><tt id="link-217" class="py-name"><a title="lxml.etree.DocInfo.clear
lxml.etree._Attrib.clear
lxml.etree._Element.clear
lxml.etree._ErrorLog.clear" class="py-name" href="#" onclick="return doclink('link-217', 'clear', 'link-213');">clear</a></tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
<a name="L392"></a><tt class="py-lineno">392</tt>  <tt class="py-line"> </tt>
<a name="L393"></a><tt class="py-lineno">393</tt>  <tt class="py-line">        <tt class="py-name">_kill</tt><tt class="py-op">.</tt><tt class="py-name">reverse</tt><tt class="py-op">(</tt><tt class="py-op">)</tt> <tt class="py-comment"># start with innermost tags</tt> </tt>
<a name="L394"></a><tt class="py-lineno">394</tt>  <tt class="py-line">        <tt class="py-keyword">for</tt> <tt class="py-name">el</tt> <tt class="py-keyword">in</tt> <tt class="py-name">_kill</tt><tt class="py-op">:</tt> </tt>
<a name="L395"></a><tt class="py-lineno">395</tt>  <tt class="py-line">            <tt class="py-name">el</tt><tt class="py-op">.</tt><tt id="link-218" class="py-name"><a title="lxml.html.HtmlMixin.drop_tree" class="py-name" href="#" onclick="return doclink('link-218', 'drop_tree', 'link-140');">drop_tree</a></tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
<a name="L396"></a><tt class="py-lineno">396</tt>  <tt class="py-line">        <tt class="py-keyword">for</tt> <tt class="py-name">el</tt> <tt class="py-keyword">in</tt> <tt class="py-name">_remove</tt><tt class="py-op">:</tt> </tt>
<a name="L397"></a><tt class="py-lineno">397</tt>  <tt class="py-line">            <tt class="py-name">el</tt><tt class="py-op">.</tt><tt id="link-219" class="py-name" targets="Method lxml.html.HtmlMixin.drop_tag()=lxml.html.HtmlMixin-class.html#drop_tag"><a title="lxml.html.HtmlMixin.drop_tag" class="py-name" href="#" onclick="return doclink('link-219', 'drop_tag', 'link-219');">drop_tag</a></tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
<a name="L398"></a><tt class="py-lineno">398</tt>  <tt class="py-line"> </tt>
<a name="L399"></a><tt class="py-lineno">399</tt>  <tt class="py-line">        <tt class="py-keyword">if</tt> <tt class="py-name">self</tt><tt class="py-op">.</tt><tt id="link-220" class="py-name"><a title="lxml.html.clean.Cleaner.remove_unknown_tags" class="py-name" href="#" onclick="return doclink('link-220', 'remove_unknown_tags', 'link-58');">remove_unknown_tags</a></tt><tt class="py-op">:</tt> </tt>
<a name="L400"></a><tt class="py-lineno">400</tt>  <tt class="py-line">            <tt class="py-keyword">if</tt> <tt id="link-221" class="py-name"><a title="lxml.html.clean.Cleaner.allow_tags" class="py-name" href="#" onclick="return doclink('link-221', 'allow_tags', 'link-56');">allow_tags</a></tt><tt class="py-op">:</tt> </tt>
<a name="L401"></a><tt class="py-lineno">401</tt>  <tt class="py-line">                <tt class="py-keyword">raise</tt> <tt class="py-name">ValueError</tt><tt class="py-op">(</tt> </tt>
<a name="L402"></a><tt class="py-lineno">402</tt>  <tt class="py-line">                    <tt class="py-string">"It does not make sense to pass in both allow_tags and remove_unknown_tags"</tt><tt class="py-op">)</tt> </tt>
<a name="L403"></a><tt class="py-lineno">403</tt>  <tt class="py-line">            <tt id="link-222" class="py-name"><a title="lxml.html.clean.Cleaner.allow_tags" class="py-name" href="#" onclick="return doclink('link-222', 'allow_tags', 'link-56');">allow_tags</a></tt> <tt class="py-op">=</tt> <tt id="link-223" class="py-name"><a title="lxml.etree._Element.set
lxml.etree._XSLTProcessingInstruction.set
lxml.html.HtmlElement.set
lxml.html.HtmlMixin.set" class="py-name" href="#" onclick="return doclink('link-223', 'set', 'link-66');">set</a></tt><tt class="py-op">(</tt><tt id="link-224" class="py-name"><a title="lxml.html.defs" class="py-name" href="#" onclick="return doclink('link-224', 'defs', 'link-6');">defs</a></tt><tt class="py-op">.</tt><tt id="link-225" class="py-name" targets="Variable lxml.html.defs.tags=lxml.html.defs-module.html#tags"><a title="lxml.html.defs.tags" class="py-name" href="#" onclick="return doclink('link-225', 'tags', 'link-225');">tags</a></tt><tt class="py-op">)</tt> </tt>
<a name="L404"></a><tt class="py-lineno">404</tt>  <tt class="py-line">        <tt class="py-keyword">if</tt> <tt id="link-226" class="py-name"><a title="lxml.html.clean.Cleaner.allow_tags" class="py-name" href="#" onclick="return doclink('link-226', 'allow_tags', 'link-56');">allow_tags</a></tt><tt class="py-op">:</tt> </tt>
<a name="L405"></a><tt class="py-lineno">405</tt>  <tt class="py-line">            <tt class="py-name">bad</tt> <tt class="py-op">=</tt> <tt class="py-op">[</tt><tt class="py-op">]</tt> </tt>
<a name="L406"></a><tt class="py-lineno">406</tt>  <tt class="py-line">            <tt class="py-keyword">for</tt> <tt class="py-name">el</tt> <tt class="py-keyword">in</tt> <tt class="py-name">doc</tt><tt class="py-op">.</tt><tt id="link-227" class="py-name"><a title="lxml.etree._Element.iter
lxml.etree._ElementTree.iter" class="py-name" href="#" onclick="return doclink('link-227', 'iter', 'link-83');">iter</a></tt><tt class="py-op">(</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
<a name="L407"></a><tt class="py-lineno">407</tt>  <tt class="py-line">                <tt class="py-keyword">if</tt> <tt class="py-name">el</tt><tt class="py-op">.</tt><tt id="link-228" class="py-name"><a title="lxml.etree._Comment.tag
lxml.etree._Element.tag
lxml.etree._Entity.tag
lxml.etree._ProcessingInstruction.tag
lxml.tests.test_xpathevaluator.tag
xml.etree.ElementTree.Element.tag" class="py-name" href="#" onclick="return doclink('link-228', 'tag', 'link-84');">tag</a></tt> <tt class="py-keyword">not</tt> <tt class="py-keyword">in</tt> <tt id="link-229" class="py-name"><a title="lxml.html.clean.Cleaner.allow_tags" class="py-name" href="#" onclick="return doclink('link-229', 'allow_tags', 'link-56');">allow_tags</a></tt><tt class="py-op">:</tt> </tt>
<a name="L408"></a><tt class="py-lineno">408</tt>  <tt class="py-line">                    <tt class="py-name">bad</tt><tt class="py-op">.</tt><tt id="link-230" class="py-name"><a title="lxml.etree._Element.append" class="py-name" href="#" onclick="return doclink('link-230', 'append', 'link-205');">append</a></tt><tt class="py-op">(</tt><tt class="py-name">el</tt><tt class="py-op">)</tt> </tt>
<a name="L409"></a><tt class="py-lineno">409</tt>  <tt class="py-line">            <tt class="py-keyword">if</tt> <tt class="py-name">bad</tt><tt class="py-op">:</tt> </tt>
<a name="L410"></a><tt class="py-lineno">410</tt>  <tt class="py-line">                <tt class="py-keyword">if</tt> <tt class="py-name">bad</tt><tt class="py-op">[</tt><tt class="py-number">0</tt><tt class="py-op">]</tt> <tt class="py-keyword">is</tt> <tt class="py-name">doc</tt><tt class="py-op">:</tt> </tt>
<a name="L411"></a><tt class="py-lineno">411</tt>  <tt class="py-line">                    <tt class="py-name">el</tt> <tt class="py-op">=</tt> <tt class="py-name">bad</tt><tt class="py-op">.</tt><tt id="link-231" class="py-name"><a title="lxml.etree._Attrib.pop" class="py-name" href="#" onclick="return doclink('link-231', 'pop', 'link-210');">pop</a></tt><tt class="py-op">(</tt><tt class="py-number">0</tt><tt class="py-op">)</tt> </tt>
<a name="L412"></a><tt class="py-lineno">412</tt>  <tt class="py-line">                    <tt class="py-name">el</tt><tt class="py-op">.</tt><tt id="link-232" class="py-name"><a title="lxml.etree._Comment.tag
lxml.etree._Element.tag
lxml.etree._Entity.tag
lxml.etree._ProcessingInstruction.tag
lxml.tests.test_xpathevaluator.tag
xml.etree.ElementTree.Element.tag" class="py-name" href="#" onclick="return doclink('link-232', 'tag', 'link-84');">tag</a></tt> <tt class="py-op">=</tt> <tt class="py-string">'div'</tt> </tt>
<a name="L413"></a><tt class="py-lineno">413</tt>  <tt class="py-line">                    <tt class="py-name">el</tt><tt class="py-op">.</tt><tt id="link-233" class="py-name"><a title="lxml.etree._Element.attrib
lxml.etree._ProcessingInstruction.attrib
lxml.tests.selftest2.attrib
xml.etree.ElementTree.Element.attrib" class="py-name" href="#" onclick="return doclink('link-233', 'attrib', 'link-106');">attrib</a></tt><tt class="py-op">.</tt><tt id="link-234" class="py-name"><a title="lxml.etree.DocInfo.clear
lxml.etree._Attrib.clear
lxml.etree._Element.clear
lxml.etree._ErrorLog.clear" class="py-name" href="#" onclick="return doclink('link-234', 'clear', 'link-213');">clear</a></tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
<a name="L414"></a><tt class="py-lineno">414</tt>  <tt class="py-line">                <tt class="py-keyword">for</tt> <tt class="py-name">el</tt> <tt class="py-keyword">in</tt> <tt class="py-name">bad</tt><tt class="py-op">:</tt> </tt>
<a name="L415"></a><tt class="py-lineno">415</tt>  <tt class="py-line">                    <tt class="py-name">el</tt><tt class="py-op">.</tt><tt id="link-235" class="py-name"><a title="lxml.html.HtmlMixin.drop_tag" class="py-name" href="#" onclick="return doclink('link-235', 'drop_tag', 'link-219');">drop_tag</a></tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
<a name="L416"></a><tt class="py-lineno">416</tt>  <tt class="py-line">        <tt class="py-keyword">if</tt> <tt class="py-name">self</tt><tt class="py-op">.</tt><tt id="link-236" class="py-name"><a title="lxml.html.clean.Cleaner.add_nofollow" class="py-name" href="#" onclick="return doclink('link-236', 'add_nofollow', 'link-63');">add_nofollow</a></tt><tt class="py-op">:</tt> </tt>
<a name="L417"></a><tt class="py-lineno">417</tt>  <tt class="py-line">            <tt class="py-keyword">for</tt> <tt class="py-name">el</tt> <tt class="py-keyword">in</tt> <tt id="link-237" class="py-name"><a title="lxml.html.clean._find_external_links" class="py-name" href="#" onclick="return doclink('link-237', '_find_external_links', 'link-38');">_find_external_links</a></tt><tt class="py-op">(</tt><tt class="py-name">doc</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
<a name="L418"></a><tt class="py-lineno">418</tt>  <tt class="py-line">                <tt class="py-keyword">if</tt> <tt class="py-keyword">not</tt> <tt class="py-name">self</tt><tt class="py-op">.</tt><tt id="link-238" class="py-name" targets="Method lxml.html.clean.Cleaner.allow_follow()=lxml.html.clean.Cleaner-class.html#allow_follow"><a title="lxml.html.clean.Cleaner.allow_follow" class="py-name" href="#" onclick="return doclink('link-238', 'allow_follow', 'link-238');">allow_follow</a></tt><tt class="py-op">(</tt><tt class="py-name">el</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
<a name="L419"></a><tt class="py-lineno">419</tt>  <tt class="py-line">                    <tt class="py-name">rel</tt> <tt class="py-op">=</tt> <tt class="py-name">el</tt><tt class="py-op">.</tt><tt id="link-239" class="py-name"><a title="lxml.etree._Attrib.get
lxml.etree._Element.get
lxml.etree._IDDict.get
lxml.etree._ProcessingInstruction.get" class="py-name" href="#" onclick="return doclink('link-239', 'get', 'link-130');">get</a></tt><tt class="py-op">(</tt><tt class="py-string">'rel'</tt><tt class="py-op">)</tt> </tt>
<a name="L420"></a><tt class="py-lineno">420</tt>  <tt class="py-line">                    <tt class="py-keyword">if</tt> <tt class="py-name">rel</tt><tt class="py-op">:</tt> </tt>
<a name="L421"></a><tt class="py-lineno">421</tt>  <tt class="py-line">                        <tt class="py-keyword">if</tt> <tt class="py-op">(</tt><tt class="py-string">'nofollow'</tt> <tt class="py-keyword">in</tt> <tt class="py-name">rel</tt> </tt>
<a name="L422"></a><tt class="py-lineno">422</tt>  <tt class="py-line">                                <tt class="py-keyword">and</tt> <tt class="py-string">' nofollow '</tt> <tt class="py-keyword">in</tt> <tt class="py-op">(</tt><tt class="py-string">' %s '</tt> <tt class="py-op">%</tt> <tt class="py-name">rel</tt><tt class="py-op">)</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
<a name="L423"></a><tt class="py-lineno">423</tt>  <tt class="py-line">                            <tt class="py-keyword">continue</tt> </tt>
<a name="L424"></a><tt class="py-lineno">424</tt>  <tt class="py-line">                        <tt class="py-name">rel</tt> <tt class="py-op">=</tt> <tt class="py-string">'%s nofollow'</tt> <tt class="py-op">%</tt> <tt class="py-name">rel</tt> </tt>
<a name="L425"></a><tt class="py-lineno">425</tt>  <tt class="py-line">                    <tt class="py-keyword">else</tt><tt class="py-op">:</tt> </tt>
<a name="L426"></a><tt class="py-lineno">426</tt>  <tt class="py-line">                        <tt class="py-name">rel</tt> <tt class="py-op">=</tt> <tt class="py-string">'nofollow'</tt> </tt>
<a name="L427"></a><tt class="py-lineno">427</tt>  <tt class="py-line">                    <tt class="py-name">el</tt><tt class="py-op">.</tt><tt id="link-240" class="py-name"><a title="lxml.etree._Element.set
lxml.etree._XSLTProcessingInstruction.set
lxml.html.HtmlElement.set
lxml.html.HtmlMixin.set" class="py-name" href="#" onclick="return doclink('link-240', 'set', 'link-66');">set</a></tt><tt class="py-op">(</tt><tt class="py-string">'rel'</tt><tt class="py-op">,</tt> <tt class="py-name">rel</tt><tt class="py-op">)</tt> </tt>
</div><a name="L428"></a><tt class="py-lineno">428</tt>  <tt class="py-line"> </tt>
<a name="Cleaner.allow_follow"></a><div id="Cleaner.allow_follow-def"><a name="L429"></a><tt class="py-lineno">429</tt> <a class="py-toggle" href="#" id="Cleaner.allow_follow-toggle" onclick="return toggle('Cleaner.allow_follow');">-</a><tt class="py-line">    <tt class="py-keyword">def</tt> <a class="py-def-name" href="lxml.html.clean.Cleaner-class.html#allow_follow">allow_follow</a><tt class="py-op">(</tt><tt class="py-param">self</tt><tt class="py-op">,</tt> <tt class="py-param">anchor</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
</div><div id="Cleaner.allow_follow-collapsed" style="display:none;" pad="+++" indent="++++++++"></div><div id="Cleaner.allow_follow-expanded"><a name="L430"></a><tt class="py-lineno">430</tt>  <tt class="py-line">        <tt class="py-docstring">"""</tt> </tt>
<a name="L431"></a><tt class="py-lineno">431</tt>  <tt class="py-line"><tt class="py-docstring">        Override to suppress rel="nofollow" on some anchors.</tt> </tt>
<a name="L432"></a><tt class="py-lineno">432</tt>  <tt class="py-line"><tt class="py-docstring">        """</tt> </tt>
<a name="L433"></a><tt class="py-lineno">433</tt>  <tt class="py-line">        <tt class="py-keyword">return</tt> <tt class="py-name">False</tt> </tt>
</div><a name="L434"></a><tt class="py-lineno">434</tt>  <tt class="py-line"> </tt>
<a name="Cleaner.allow_element"></a><div id="Cleaner.allow_element-def"><a name="L435"></a><tt class="py-lineno">435</tt> <a class="py-toggle" href="#" id="Cleaner.allow_element-toggle" onclick="return toggle('Cleaner.allow_element');">-</a><tt class="py-line">    <tt class="py-keyword">def</tt> <a class="py-def-name" href="lxml.html.clean.Cleaner-class.html#allow_element">allow_element</a><tt class="py-op">(</tt><tt class="py-param">self</tt><tt class="py-op">,</tt> <tt class="py-param">el</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
</div><div id="Cleaner.allow_element-collapsed" style="display:none;" pad="+++" indent="++++++++"></div><div id="Cleaner.allow_element-expanded"><a name="L436"></a><tt class="py-lineno">436</tt>  <tt class="py-line">        <tt class="py-keyword">if</tt> <tt class="py-name">el</tt><tt class="py-op">.</tt><tt id="link-241" class="py-name"><a title="lxml.etree._Comment.tag
lxml.etree._Element.tag
lxml.etree._Entity.tag
lxml.etree._ProcessingInstruction.tag
lxml.tests.test_xpathevaluator.tag
xml.etree.ElementTree.Element.tag" class="py-name" href="#" onclick="return doclink('link-241', 'tag', 'link-84');">tag</a></tt> <tt class="py-keyword">not</tt> <tt class="py-keyword">in</tt> <tt class="py-name">self</tt><tt class="py-op">.</tt><tt id="link-242" class="py-name"><a title="lxml.html.clean.Cleaner._tag_link_attrs" class="py-name" href="#" onclick="return doclink('link-242', '_tag_link_attrs', 'link-80');">_tag_link_attrs</a></tt><tt class="py-op">:</tt> </tt>
<a name="L437"></a><tt class="py-lineno">437</tt>  <tt class="py-line">            <tt class="py-keyword">return</tt> <tt class="py-name">False</tt> </tt>
<a name="L438"></a><tt class="py-lineno">438</tt>  <tt class="py-line">        <tt class="py-name">attr</tt> <tt class="py-op">=</tt> <tt class="py-name">self</tt><tt class="py-op">.</tt><tt id="link-243" class="py-name"><a title="lxml.html.clean.Cleaner._tag_link_attrs" class="py-name" href="#" onclick="return doclink('link-243', '_tag_link_attrs', 'link-80');">_tag_link_attrs</a></tt><tt class="py-op">[</tt><tt class="py-name">el</tt><tt class="py-op">.</tt><tt id="link-244" class="py-name"><a title="lxml.etree._Comment.tag
lxml.etree._Element.tag
lxml.etree._Entity.tag
lxml.etree._ProcessingInstruction.tag
lxml.tests.test_xpathevaluator.tag
xml.etree.ElementTree.Element.tag" class="py-name" href="#" onclick="return doclink('link-244', 'tag', 'link-84');">tag</a></tt><tt class="py-op">]</tt> </tt>
<a name="L439"></a><tt class="py-lineno">439</tt>  <tt class="py-line">        <tt class="py-keyword">if</tt> <tt class="py-name">isinstance</tt><tt class="py-op">(</tt><tt class="py-name">attr</tt><tt class="py-op">,</tt> <tt class="py-op">(</tt><tt class="py-name">list</tt><tt class="py-op">,</tt> <tt class="py-name">tuple</tt><tt class="py-op">)</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
<a name="L440"></a><tt class="py-lineno">440</tt>  <tt class="py-line">            <tt class="py-keyword">for</tt> <tt class="py-name">one_attr</tt> <tt class="py-keyword">in</tt> <tt class="py-name">attr</tt><tt class="py-op">:</tt> </tt>
<a name="L441"></a><tt class="py-lineno">441</tt>  <tt class="py-line">                <tt class="py-name">url</tt> <tt class="py-op">=</tt> <tt class="py-name">el</tt><tt class="py-op">.</tt><tt id="link-245" class="py-name"><a title="lxml.etree._Attrib.get
lxml.etree._Element.get
lxml.etree._IDDict.get
lxml.etree._ProcessingInstruction.get" class="py-name" href="#" onclick="return doclink('link-245', 'get', 'link-130');">get</a></tt><tt class="py-op">(</tt><tt class="py-name">one_attr</tt><tt class="py-op">)</tt> </tt>
<a name="L442"></a><tt class="py-lineno">442</tt>  <tt class="py-line">                <tt class="py-keyword">if</tt> <tt class="py-keyword">not</tt> <tt class="py-name">url</tt><tt class="py-op">:</tt> </tt>
<a name="L443"></a><tt class="py-lineno">443</tt>  <tt class="py-line">                    <tt class="py-keyword">return</tt> <tt class="py-name">False</tt> </tt>
<a name="L444"></a><tt class="py-lineno">444</tt>  <tt class="py-line">                <tt class="py-keyword">if</tt> <tt class="py-keyword">not</tt> <tt class="py-name">self</tt><tt class="py-op">.</tt><tt id="link-246" class="py-name" targets="Method lxml.html.clean.Cleaner.allow_embedded_url()=lxml.html.clean.Cleaner-class.html#allow_embedded_url"><a title="lxml.html.clean.Cleaner.allow_embedded_url" class="py-name" href="#" onclick="return doclink('link-246', 'allow_embedded_url', 'link-246');">allow_embedded_url</a></tt><tt class="py-op">(</tt><tt class="py-name">el</tt><tt class="py-op">,</tt> <tt class="py-name">url</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
<a name="L445"></a><tt class="py-lineno">445</tt>  <tt class="py-line">                    <tt class="py-keyword">return</tt> <tt class="py-name">False</tt> </tt>
<a name="L446"></a><tt class="py-lineno">446</tt>  <tt class="py-line">            <tt class="py-keyword">return</tt> <tt class="py-name">True</tt> </tt>
<a name="L447"></a><tt class="py-lineno">447</tt>  <tt class="py-line">        <tt class="py-keyword">else</tt><tt class="py-op">:</tt> </tt>
<a name="L448"></a><tt class="py-lineno">448</tt>  <tt class="py-line">            <tt class="py-name">url</tt> <tt class="py-op">=</tt> <tt class="py-name">el</tt><tt class="py-op">.</tt><tt id="link-247" class="py-name"><a title="lxml.etree._Attrib.get
lxml.etree._Element.get
lxml.etree._IDDict.get
lxml.etree._ProcessingInstruction.get" class="py-name" href="#" onclick="return doclink('link-247', 'get', 'link-130');">get</a></tt><tt class="py-op">(</tt><tt class="py-name">attr</tt><tt class="py-op">)</tt> </tt>
<a name="L449"></a><tt class="py-lineno">449</tt>  <tt class="py-line">            <tt class="py-keyword">if</tt> <tt class="py-keyword">not</tt> <tt class="py-name">url</tt><tt class="py-op">:</tt> </tt>
<a name="L450"></a><tt class="py-lineno">450</tt>  <tt class="py-line">                <tt class="py-keyword">return</tt> <tt class="py-name">False</tt> </tt>
<a name="L451"></a><tt class="py-lineno">451</tt>  <tt class="py-line">            <tt class="py-keyword">return</tt> <tt class="py-name">self</tt><tt class="py-op">.</tt><tt id="link-248" class="py-name"><a title="lxml.html.clean.Cleaner.allow_embedded_url" class="py-name" href="#" onclick="return doclink('link-248', 'allow_embedded_url', 'link-246');">allow_embedded_url</a></tt><tt class="py-op">(</tt><tt class="py-name">el</tt><tt class="py-op">,</tt> <tt class="py-name">url</tt><tt class="py-op">)</tt> </tt>
</div><a name="L452"></a><tt class="py-lineno">452</tt>  <tt class="py-line"> </tt>
<a name="Cleaner.allow_embedded_url"></a><div id="Cleaner.allow_embedded_url-def"><a name="L453"></a><tt class="py-lineno">453</tt> <a class="py-toggle" href="#" id="Cleaner.allow_embedded_url-toggle" onclick="return toggle('Cleaner.allow_embedded_url');">-</a><tt class="py-line">    <tt class="py-keyword">def</tt> <a class="py-def-name" href="lxml.html.clean.Cleaner-class.html#allow_embedded_url">allow_embedded_url</a><tt class="py-op">(</tt><tt class="py-param">self</tt><tt class="py-op">,</tt> <tt class="py-param">el</tt><tt class="py-op">,</tt> <tt class="py-param">url</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
</div><div id="Cleaner.allow_embedded_url-collapsed" style="display:none;" pad="+++" indent="++++++++"></div><div id="Cleaner.allow_embedded_url-expanded"><a name="L454"></a><tt class="py-lineno">454</tt>  <tt class="py-line">        <tt class="py-keyword">if</tt> <tt class="py-op">(</tt><tt class="py-name">self</tt><tt class="py-op">.</tt><tt id="link-249" class="py-name"><a title="lxml.html.clean.Cleaner.whitelist_tags" class="py-name" href="#" onclick="return doclink('link-249', 'whitelist_tags', 'link-65');">whitelist_tags</a></tt> <tt class="py-keyword">is</tt> <tt class="py-keyword">not</tt> <tt class="py-name">None</tt> </tt>
<a name="L455"></a><tt class="py-lineno">455</tt>  <tt class="py-line">            <tt class="py-keyword">and</tt> <tt class="py-name">el</tt><tt class="py-op">.</tt><tt id="link-250" class="py-name"><a title="lxml.etree._Comment.tag
lxml.etree._Element.tag
lxml.etree._Entity.tag
lxml.etree._ProcessingInstruction.tag
lxml.tests.test_xpathevaluator.tag
xml.etree.ElementTree.Element.tag" class="py-name" href="#" onclick="return doclink('link-250', 'tag', 'link-84');">tag</a></tt> <tt class="py-keyword">not</tt> <tt class="py-keyword">in</tt> <tt class="py-name">self</tt><tt class="py-op">.</tt><tt id="link-251" class="py-name"><a title="lxml.html.clean.Cleaner.whitelist_tags" class="py-name" href="#" onclick="return doclink('link-251', 'whitelist_tags', 'link-65');">whitelist_tags</a></tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
<a name="L456"></a><tt class="py-lineno">456</tt>  <tt class="py-line">            <tt class="py-keyword">return</tt> <tt class="py-name">False</tt> </tt>
<a name="L457"></a><tt class="py-lineno">457</tt>  <tt class="py-line">        <tt class="py-name">scheme</tt><tt class="py-op">,</tt> <tt class="py-name">netloc</tt><tt class="py-op">,</tt> <tt id="link-252" class="py-name" targets="Variable lxml.etree.XPath.path=lxml.etree.XPath-class.html#path,Variable lxml.etree._LogEntry.path=lxml.etree._LogEntry-class.html#path"><a title="lxml.etree.XPath.path
lxml.etree._LogEntry.path" class="py-name" href="#" onclick="return doclink('link-252', 'path', 'link-252');">path</a></tt><tt class="py-op">,</tt> <tt class="py-name">query</tt><tt class="py-op">,</tt> <tt class="py-name">fragment</tt> <tt class="py-op">=</tt> <tt class="py-name">urlsplit</tt><tt class="py-op">(</tt><tt class="py-name">url</tt><tt class="py-op">)</tt> </tt>
<a name="L458"></a><tt class="py-lineno">458</tt>  <tt class="py-line">        <tt class="py-name">netloc</tt> <tt class="py-op">=</tt> <tt class="py-name">netloc</tt><tt class="py-op">.</tt><tt class="py-name">lower</tt><tt class="py-op">(</tt><tt class="py-op">)</tt><tt class="py-op">.</tt><tt class="py-name">split</tt><tt class="py-op">(</tt><tt class="py-string">':'</tt><tt class="py-op">,</tt> <tt class="py-number">1</tt><tt class="py-op">)</tt><tt class="py-op">[</tt><tt class="py-number">0</tt><tt class="py-op">]</tt> </tt>
<a name="L459"></a><tt class="py-lineno">459</tt>  <tt class="py-line">        <tt class="py-keyword">if</tt> <tt class="py-name">scheme</tt> <tt class="py-keyword">not</tt> <tt class="py-keyword">in</tt> <tt class="py-op">(</tt><tt class="py-string">'http'</tt><tt class="py-op">,</tt> <tt class="py-string">'https'</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
<a name="L460"></a><tt class="py-lineno">460</tt>  <tt class="py-line">            <tt class="py-keyword">return</tt> <tt class="py-name">False</tt> </tt>
<a name="L461"></a><tt class="py-lineno">461</tt>  <tt class="py-line">        <tt class="py-keyword">if</tt> <tt class="py-name">netloc</tt> <tt class="py-keyword">in</tt> <tt class="py-name">self</tt><tt class="py-op">.</tt><tt id="link-253" class="py-name"><a title="lxml.html.clean.Cleaner.host_whitelist" class="py-name" href="#" onclick="return doclink('link-253', 'host_whitelist', 'link-64');">host_whitelist</a></tt><tt class="py-op">:</tt> </tt>
<a name="L462"></a><tt class="py-lineno">462</tt>  <tt class="py-line">            <tt class="py-keyword">return</tt> <tt class="py-name">True</tt> </tt>
<a name="L463"></a><tt class="py-lineno">463</tt>  <tt class="py-line">        <tt class="py-keyword">return</tt> <tt class="py-name">False</tt> </tt>
</div><a name="L464"></a><tt class="py-lineno">464</tt>  <tt class="py-line"> </tt>
<a name="Cleaner.kill_conditional_comments"></a><div id="Cleaner.kill_conditional_comments-def"><a name="L465"></a><tt class="py-lineno">465</tt> <a class="py-toggle" href="#" id="Cleaner.kill_conditional_comments-toggle" onclick="return toggle('Cleaner.kill_conditional_comments');">-</a><tt class="py-line">    <tt class="py-keyword">def</tt> <a class="py-def-name" href="lxml.html.clean.Cleaner-class.html#kill_conditional_comments">kill_conditional_comments</a><tt class="py-op">(</tt><tt class="py-param">self</tt><tt class="py-op">,</tt> <tt class="py-param">doc</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
</div><div id="Cleaner.kill_conditional_comments-collapsed" style="display:none;" pad="+++" indent="++++++++"></div><div id="Cleaner.kill_conditional_comments-expanded"><a name="L466"></a><tt class="py-lineno">466</tt>  <tt class="py-line">        <tt class="py-docstring">"""</tt> </tt>
<a name="L467"></a><tt class="py-lineno">467</tt>  <tt class="py-line"><tt class="py-docstring">        IE conditional comments basically embed HTML that the parser</tt> </tt>
<a name="L468"></a><tt class="py-lineno">468</tt>  <tt class="py-line"><tt class="py-docstring">        doesn't normally see.  We can't allow anything like that, so</tt> </tt>
<a name="L469"></a><tt class="py-lineno">469</tt>  <tt class="py-line"><tt class="py-docstring">        we'll kill any comments that could be conditional.</tt> </tt>
<a name="L470"></a><tt class="py-lineno">470</tt>  <tt class="py-line"><tt class="py-docstring">        """</tt> </tt>
<a name="L471"></a><tt class="py-lineno">471</tt>  <tt class="py-line">        <tt class="py-name">bad</tt> <tt class="py-op">=</tt> <tt class="py-op">[</tt><tt class="py-op">]</tt> </tt>
<a name="L472"></a><tt class="py-lineno">472</tt>  <tt class="py-line">        <tt class="py-name">self</tt><tt class="py-op">.</tt><tt id="link-254" class="py-name" targets="Method lxml.html.clean.Cleaner._kill_elements()=lxml.html.clean.Cleaner-class.html#_kill_elements"><a title="lxml.html.clean.Cleaner._kill_elements" class="py-name" href="#" onclick="return doclink('link-254', '_kill_elements', 'link-254');">_kill_elements</a></tt><tt class="py-op">(</tt> </tt>
<a name="L473"></a><tt class="py-lineno">473</tt>  <tt class="py-line">            <tt class="py-name">doc</tt><tt class="py-op">,</tt> <tt class="py-keyword">lambda</tt> <tt class="py-name">el</tt><tt class="py-op">:</tt> <tt id="link-255" class="py-name"><a title="lxml.html.clean._conditional_comment_re" class="py-name" href="#" onclick="return doclink('link-255', '_conditional_comment_re', 'link-32');">_conditional_comment_re</a></tt><tt class="py-op">.</tt><tt class="py-name">search</tt><tt class="py-op">(</tt><tt class="py-name">el</tt><tt class="py-op">.</tt><tt id="link-256" class="py-name"><a title="lxml.etree.QName.text
lxml.etree._Element.text
lxml.etree._Entity.text
lxml.objectify.ObjectifiedElement.text
xml.etree.ElementTree.Element.text" class="py-name" href="#" onclick="return doclink('link-256', 'text', 'link-141');">text</a></tt><tt class="py-op">)</tt><tt class="py-op">,</tt> </tt>
<a name="L474"></a><tt class="py-lineno">474</tt>  <tt class="py-line">            <tt id="link-257" class="py-name"><a title="lxml.etree
lxml.sax.ElementTreeContentHandler.etree
lxml.tests.test_elementtree.CElementTreeTestCase.etree
lxml.tests.test_elementtree._ETreeTestCaseBase.etree
lxml.tests.test_elementtree._XMLPullParserTest.etree
lxml.tests.test_io._IOTestCaseBase.etree" class="py-name" href="#" onclick="return doclink('link-257', 'etree', 'link-3');">etree</a></tt><tt class="py-op">.</tt><tt id="link-258" class="py-name"><a title="lxml.etree.Comment" class="py-name" href="#" onclick="return doclink('link-258', 'Comment', 'link-152');">Comment</a></tt><tt class="py-op">)</tt>                 </tt>
</div><a name="L475"></a><tt class="py-lineno">475</tt>  <tt class="py-line"> </tt>
<a name="Cleaner._kill_elements"></a><div id="Cleaner._kill_elements-def"><a name="L476"></a><tt class="py-lineno">476</tt> <a class="py-toggle" href="#" id="Cleaner._kill_elements-toggle" onclick="return toggle('Cleaner._kill_elements');">-</a><tt class="py-line">    <tt class="py-keyword">def</tt> <a class="py-def-name" href="lxml.html.clean.Cleaner-class.html#_kill_elements">_kill_elements</a><tt class="py-op">(</tt><tt class="py-param">self</tt><tt class="py-op">,</tt> <tt class="py-param">doc</tt><tt class="py-op">,</tt> <tt class="py-param">condition</tt><tt class="py-op">,</tt> <tt class="py-param">iterate</tt><tt class="py-op">=</tt><tt class="py-name">None</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
</div><div id="Cleaner._kill_elements-collapsed" style="display:none;" pad="+++" indent="++++++++"></div><div id="Cleaner._kill_elements-expanded"><a name="L477"></a><tt class="py-lineno">477</tt>  <tt class="py-line">        <tt class="py-name">bad</tt> <tt class="py-op">=</tt> <tt class="py-op">[</tt><tt class="py-op">]</tt> </tt>
<a name="L478"></a><tt class="py-lineno">478</tt>  <tt class="py-line">        <tt class="py-keyword">for</tt> <tt class="py-name">el</tt> <tt class="py-keyword">in</tt> <tt class="py-name">doc</tt><tt class="py-op">.</tt><tt id="link-259" class="py-name"><a title="lxml.etree._Element.iter
lxml.etree._ElementTree.iter" class="py-name" href="#" onclick="return doclink('link-259', 'iter', 'link-83');">iter</a></tt><tt class="py-op">(</tt><tt class="py-name">iterate</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
<a name="L479"></a><tt class="py-lineno">479</tt>  <tt class="py-line">            <tt class="py-keyword">if</tt> <tt class="py-name">condition</tt><tt class="py-op">(</tt><tt class="py-name">el</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
<a name="L480"></a><tt class="py-lineno">480</tt>  <tt class="py-line">                <tt class="py-name">bad</tt><tt class="py-op">.</tt><tt id="link-260" class="py-name"><a title="lxml.etree._Element.append" class="py-name" href="#" onclick="return doclink('link-260', 'append', 'link-205');">append</a></tt><tt class="py-op">(</tt><tt class="py-name">el</tt><tt class="py-op">)</tt> </tt>
<a name="L481"></a><tt class="py-lineno">481</tt>  <tt class="py-line">        <tt class="py-keyword">for</tt> <tt class="py-name">el</tt> <tt class="py-keyword">in</tt> <tt class="py-name">bad</tt><tt class="py-op">:</tt> </tt>
<a name="L482"></a><tt class="py-lineno">482</tt>  <tt class="py-line">            <tt class="py-name">el</tt><tt class="py-op">.</tt><tt id="link-261" class="py-name"><a title="lxml.html.HtmlMixin.drop_tree" class="py-name" href="#" onclick="return doclink('link-261', 'drop_tree', 'link-140');">drop_tree</a></tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
</div><a name="L483"></a><tt class="py-lineno">483</tt>  <tt class="py-line"> </tt>
<a name="Cleaner._remove_javascript_link"></a><div id="Cleaner._remove_javascript_link-def"><a name="L484"></a><tt class="py-lineno">484</tt> <a class="py-toggle" href="#" id="Cleaner._remove_javascript_link-toggle" onclick="return toggle('Cleaner._remove_javascript_link');">-</a><tt class="py-line">    <tt class="py-keyword">def</tt> <a class="py-def-name" href="lxml.html.clean.Cleaner-class.html#_remove_javascript_link">_remove_javascript_link</a><tt class="py-op">(</tt><tt class="py-param">self</tt><tt class="py-op">,</tt> <tt class="py-param">link</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
</div><div id="Cleaner._remove_javascript_link-collapsed" style="display:none;" pad="+++" indent="++++++++"></div><div id="Cleaner._remove_javascript_link-expanded"><a name="L485"></a><tt class="py-lineno">485</tt>  <tt class="py-line">        <tt class="py-comment"># links like "j a v a s c r i p t:" might be interpreted in IE</tt> </tt>
<a name="L486"></a><tt class="py-lineno">486</tt>  <tt class="py-line">        <tt class="py-name">new</tt> <tt class="py-op">=</tt> <tt id="link-262" class="py-name"><a title="lxml.html.clean._substitute_whitespace" class="py-name" href="#" onclick="return doclink('link-262', '_substitute_whitespace', 'link-31');">_substitute_whitespace</a></tt><tt class="py-op">(</tt><tt class="py-string">''</tt><tt class="py-op">,</tt> <tt class="py-name">unquote_plus</tt><tt class="py-op">(</tt><tt class="py-name">link</tt><tt class="py-op">)</tt><tt class="py-op">)</tt> </tt>
<a name="L487"></a><tt class="py-lineno">487</tt>  <tt class="py-line">        <tt class="py-keyword">if</tt> <tt id="link-263" class="py-name" targets="Function lxml.html.clean._is_javascript_scheme()=lxml.html.clean-module.html#_is_javascript_scheme"><a title="lxml.html.clean._is_javascript_scheme" class="py-name" href="#" onclick="return doclink('link-263', '_is_javascript_scheme', 'link-263');">_is_javascript_scheme</a></tt><tt class="py-op">(</tt><tt class="py-name">new</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
<a name="L488"></a><tt class="py-lineno">488</tt>  <tt class="py-line">            <tt class="py-comment"># FIXME: should this be None to delete?</tt> </tt>
<a name="L489"></a><tt class="py-lineno">489</tt>  <tt class="py-line">            <tt class="py-keyword">return</tt> <tt class="py-string">''</tt> </tt>
<a name="L490"></a><tt class="py-lineno">490</tt>  <tt class="py-line">        <tt class="py-keyword">return</tt> <tt class="py-name">link</tt> </tt>
</div><a name="L491"></a><tt class="py-lineno">491</tt>  <tt class="py-line"> </tt>
<a name="L492"></a><tt class="py-lineno">492</tt>  <tt class="py-line">    <tt id="link-264" class="py-name" targets="Method lxml.html.clean.Cleaner._substitute_comments()=lxml.html.clean.Cleaner-class.html#_substitute_comments"><a title="lxml.html.clean.Cleaner._substitute_comments" class="py-name" href="#" onclick="return doclink('link-264', '_substitute_comments', 'link-264');">_substitute_comments</a></tt> <tt class="py-op">=</tt> <tt class="py-name">re</tt><tt class="py-op">.</tt><tt class="py-name">compile</tt><tt class="py-op">(</tt><tt class="py-string">r'/\*.*?\*/'</tt><tt class="py-op">,</tt> <tt class="py-name">re</tt><tt class="py-op">.</tt><tt id="link-265" class="py-name"><a title="lxml.html.builder.S" class="py-name" href="#" onclick="return doclink('link-265', 'S', 'link-21');">S</a></tt><tt class="py-op">)</tt><tt class="py-op">.</tt><tt class="py-name">sub</tt> </tt>
<a name="L493"></a><tt class="py-lineno">493</tt>  <tt class="py-line"> </tt>
<a name="Cleaner._has_sneaky_javascript"></a><div id="Cleaner._has_sneaky_javascript-def"><a name="L494"></a><tt class="py-lineno">494</tt> <a class="py-toggle" href="#" id="Cleaner._has_sneaky_javascript-toggle" onclick="return toggle('Cleaner._has_sneaky_javascript');">-</a><tt class="py-line">    <tt class="py-keyword">def</tt> <a class="py-def-name" href="lxml.html.clean.Cleaner-class.html#_has_sneaky_javascript">_has_sneaky_javascript</a><tt class="py-op">(</tt><tt class="py-param">self</tt><tt class="py-op">,</tt> <tt class="py-param">style</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
</div><div id="Cleaner._has_sneaky_javascript-collapsed" style="display:none;" pad="+++" indent="++++++++"></div><div id="Cleaner._has_sneaky_javascript-expanded"><a name="L495"></a><tt class="py-lineno">495</tt>  <tt class="py-line">        <tt class="py-docstring">"""</tt> </tt>
<a name="L496"></a><tt class="py-lineno">496</tt>  <tt class="py-line"><tt class="py-docstring">        Depending on the browser, stuff like ``e x p r e s s i o n(...)``</tt> </tt>
<a name="L497"></a><tt class="py-lineno">497</tt>  <tt class="py-line"><tt class="py-docstring">        can get interpreted, or ``expre/* stuff */ssion(...)``.  This</tt> </tt>
<a name="L498"></a><tt class="py-lineno">498</tt>  <tt class="py-line"><tt class="py-docstring">        checks for attempt to do stuff like this.</tt> </tt>
<a name="L499"></a><tt class="py-lineno">499</tt>  <tt class="py-line"><tt class="py-docstring"></tt> </tt>
<a name="L500"></a><tt class="py-lineno">500</tt>  <tt class="py-line"><tt class="py-docstring">        Typically the response will be to kill the entire style; if you</tt> </tt>
<a name="L501"></a><tt class="py-lineno">501</tt>  <tt class="py-line"><tt class="py-docstring">        have just a bit of Javascript in the style another rule will catch</tt> </tt>
<a name="L502"></a><tt class="py-lineno">502</tt>  <tt class="py-line"><tt class="py-docstring">        that and remove only the Javascript from the style; this catches</tt> </tt>
<a name="L503"></a><tt class="py-lineno">503</tt>  <tt class="py-line"><tt class="py-docstring">        more sneaky attempts.</tt> </tt>
<a name="L504"></a><tt class="py-lineno">504</tt>  <tt class="py-line"><tt class="py-docstring">        """</tt> </tt>
<a name="L505"></a><tt class="py-lineno">505</tt>  <tt class="py-line">        <tt id="link-266" class="py-name"><a title="lxml.html.clean.Cleaner.style" class="py-name" href="#" onclick="return doclink('link-266', 'style', 'link-45');">style</a></tt> <tt class="py-op">=</tt> <tt class="py-name">self</tt><tt class="py-op">.</tt><tt id="link-267" class="py-name"><a title="lxml.html.clean.Cleaner._substitute_comments" class="py-name" href="#" onclick="return doclink('link-267', '_substitute_comments', 'link-264');">_substitute_comments</a></tt><tt class="py-op">(</tt><tt class="py-string">''</tt><tt class="py-op">,</tt> <tt id="link-268" class="py-name"><a title="lxml.html.clean.Cleaner.style" class="py-name" href="#" onclick="return doclink('link-268', 'style', 'link-45');">style</a></tt><tt class="py-op">)</tt> </tt>
<a name="L506"></a><tt class="py-lineno">506</tt>  <tt class="py-line">        <tt id="link-269" class="py-name"><a title="lxml.html.clean.Cleaner.style" class="py-name" href="#" onclick="return doclink('link-269', 'style', 'link-45');">style</a></tt> <tt class="py-op">=</tt> <tt id="link-270" class="py-name"><a title="lxml.html.clean.Cleaner.style" class="py-name" href="#" onclick="return doclink('link-270', 'style', 'link-45');">style</a></tt><tt class="py-op">.</tt><tt id="link-271" class="py-name" targets="Method lxml.etree._Element.replace()=lxml.etree._Element-class.html#replace"><a title="lxml.etree._Element.replace" class="py-name" href="#" onclick="return doclink('link-271', 'replace', 'link-271');">replace</a></tt><tt class="py-op">(</tt><tt class="py-string">'\\'</tt><tt class="py-op">,</tt> <tt class="py-string">''</tt><tt class="py-op">)</tt> </tt>
<a name="L507"></a><tt class="py-lineno">507</tt>  <tt class="py-line">        <tt id="link-272" class="py-name"><a title="lxml.html.clean.Cleaner.style" class="py-name" href="#" onclick="return doclink('link-272', 'style', 'link-45');">style</a></tt> <tt class="py-op">=</tt> <tt id="link-273" class="py-name"><a title="lxml.html.clean._substitute_whitespace" class="py-name" href="#" onclick="return doclink('link-273', '_substitute_whitespace', 'link-31');">_substitute_whitespace</a></tt><tt class="py-op">(</tt><tt class="py-string">''</tt><tt class="py-op">,</tt> <tt id="link-274" class="py-name"><a title="lxml.html.clean.Cleaner.style" class="py-name" href="#" onclick="return doclink('link-274', 'style', 'link-45');">style</a></tt><tt class="py-op">)</tt> </tt>
<a name="L508"></a><tt class="py-lineno">508</tt>  <tt class="py-line">        <tt id="link-275" class="py-name"><a title="lxml.html.clean.Cleaner.style" class="py-name" href="#" onclick="return doclink('link-275', 'style', 'link-45');">style</a></tt> <tt class="py-op">=</tt> <tt id="link-276" class="py-name"><a title="lxml.html.clean.Cleaner.style" class="py-name" href="#" onclick="return doclink('link-276', 'style', 'link-45');">style</a></tt><tt class="py-op">.</tt><tt class="py-name">lower</tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
<a name="L509"></a><tt class="py-lineno">509</tt>  <tt class="py-line">        <tt class="py-keyword">if</tt> <tt class="py-string">'javascript:'</tt> <tt class="py-keyword">in</tt> <tt id="link-277" class="py-name"><a title="lxml.html.clean.Cleaner.style" class="py-name" href="#" onclick="return doclink('link-277', 'style', 'link-45');">style</a></tt><tt class="py-op">:</tt> </tt>
<a name="L510"></a><tt class="py-lineno">510</tt>  <tt class="py-line">            <tt class="py-keyword">return</tt> <tt class="py-name">True</tt> </tt>
<a name="L511"></a><tt class="py-lineno">511</tt>  <tt class="py-line">        <tt class="py-keyword">if</tt> <tt class="py-string">'expression('</tt> <tt class="py-keyword">in</tt> <tt id="link-278" class="py-name"><a title="lxml.html.clean.Cleaner.style" class="py-name" href="#" onclick="return doclink('link-278', 'style', 'link-45');">style</a></tt><tt class="py-op">:</tt> </tt>
<a name="L512"></a><tt class="py-lineno">512</tt>  <tt class="py-line">            <tt class="py-keyword">return</tt> <tt class="py-name">True</tt> </tt>
<a name="L513"></a><tt class="py-lineno">513</tt>  <tt class="py-line">        <tt class="py-keyword">return</tt> <tt class="py-name">False</tt> </tt>
</div><a name="L514"></a><tt class="py-lineno">514</tt>  <tt class="py-line"> </tt>
<a name="Cleaner.clean_html"></a><div id="Cleaner.clean_html-def"><a name="L515"></a><tt class="py-lineno">515</tt> <a class="py-toggle" href="#" id="Cleaner.clean_html-toggle" onclick="return toggle('Cleaner.clean_html');">-</a><tt class="py-line">    <tt class="py-keyword">def</tt> <a class="py-def-name" href="lxml.html.clean.Cleaner-class.html#clean_html">clean_html</a><tt class="py-op">(</tt><tt class="py-param">self</tt><tt class="py-op">,</tt> <tt class="py-param">html</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
</div><div id="Cleaner.clean_html-collapsed" style="display:none;" pad="+++" indent="++++++++"></div><div id="Cleaner.clean_html-expanded"><a name="L516"></a><tt class="py-lineno">516</tt>  <tt class="py-line">        <tt class="py-name">result_type</tt> <tt class="py-op">=</tt> <tt id="link-279" class="py-name" targets="Variable lxml.etree._LogEntry.type=lxml.etree._LogEntry-class.html#type,Variable lxml.html.InputElement.type=lxml.html.InputElement-class.html#type"><a title="lxml.etree._LogEntry.type
lxml.html.InputElement.type" class="py-name" href="#" onclick="return doclink('link-279', 'type', 'link-279');">type</a></tt><tt class="py-op">(</tt><tt id="link-280" class="py-name"><a title="lxml.html
lxml.html.diff.href_token.html
lxml.html.diff.tag_token.html
lxml.html.diff.token.html" class="py-name" href="#" onclick="return doclink('link-280', 'html', 'link-5');">html</a></tt><tt class="py-op">)</tt> </tt>
<a name="L517"></a><tt class="py-lineno">517</tt>  <tt class="py-line">        <tt class="py-keyword">if</tt> <tt class="py-name">isinstance</tt><tt class="py-op">(</tt><tt id="link-281" class="py-name"><a title="lxml.html
lxml.html.diff.href_token.html
lxml.html.diff.tag_token.html
lxml.html.diff.token.html" class="py-name" href="#" onclick="return doclink('link-281', 'html', 'link-5');">html</a></tt><tt class="py-op">,</tt> <tt id="link-282" class="py-name"><a title="lxml.html.clean.basestring" class="py-name" href="#" onclick="return doclink('link-282', 'basestring', 'link-17');">basestring</a></tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
<a name="L518"></a><tt class="py-lineno">518</tt>  <tt class="py-line">            <tt class="py-name">doc</tt> <tt class="py-op">=</tt> <tt id="link-283" class="py-name"><a title="lxml.etree.fromstring
lxml.html.html5parser.fromstring
lxml.html.soupparser.fromstring
lxml.objectify.fromstring" class="py-name" href="#" onclick="return doclink('link-283', 'fromstring', 'link-9');">fromstring</a></tt><tt class="py-op">(</tt><tt id="link-284" class="py-name"><a title="lxml.html
lxml.html.diff.href_token.html
lxml.html.diff.tag_token.html
lxml.html.diff.token.html" class="py-name" href="#" onclick="return doclink('link-284', 'html', 'link-5');">html</a></tt><tt class="py-op">)</tt> </tt>
<a name="L519"></a><tt class="py-lineno">519</tt>  <tt class="py-line">        <tt class="py-keyword">else</tt><tt class="py-op">:</tt> </tt>
<a name="L520"></a><tt class="py-lineno">520</tt>  <tt class="py-line">            <tt class="py-name">doc</tt> <tt class="py-op">=</tt> <tt id="link-285" class="py-name"><a title="lxml.etree.PyErrorLog.copy
lxml.etree._BaseErrorLog.copy
lxml.etree._ErrorLog.copy
lxml.etree._IDDict.copy
lxml.etree._ListErrorLog.copy
lxml.tests.selftest2.copy" class="py-name" href="#" onclick="return doclink('link-285', 'copy', 'link-0');">copy</a></tt><tt class="py-op">.</tt><tt class="py-name">deepcopy</tt><tt class="py-op">(</tt><tt id="link-286" class="py-name"><a title="lxml.html
lxml.html.diff.href_token.html
lxml.html.diff.tag_token.html
lxml.html.diff.token.html" class="py-name" href="#" onclick="return doclink('link-286', 'html', 'link-5');">html</a></tt><tt class="py-op">)</tt> </tt>
<a name="L521"></a><tt class="py-lineno">521</tt>  <tt class="py-line">        <tt class="py-name">self</tt><tt class="py-op">(</tt><tt class="py-name">doc</tt><tt class="py-op">)</tt> </tt>
<a name="L522"></a><tt class="py-lineno">522</tt>  <tt class="py-line">        <tt class="py-keyword">return</tt> <tt class="py-name">_transform_result</tt><tt class="py-op">(</tt><tt class="py-name">result_type</tt><tt class="py-op">,</tt> <tt class="py-name">doc</tt><tt class="py-op">)</tt> </tt>
</div></div><a name="L523"></a><tt class="py-lineno">523</tt>  <tt class="py-line"> </tt>
<a name="L524"></a><tt class="py-lineno">524</tt>  <tt class="py-line"><tt id="link-287" class="py-name" targets="Module lxml.html.clean=lxml.html.clean-module.html,Variable lxml.html.clean.clean=lxml.html.clean-module.html#clean"><a title="lxml.html.clean
lxml.html.clean.clean" class="py-name" href="#" onclick="return doclink('link-287', 'clean', 'link-287');">clean</a></tt> <tt class="py-op">=</tt> <tt id="link-288" class="py-name" targets="Class lxml.html.clean.Cleaner=lxml.html.clean.Cleaner-class.html"><a title="lxml.html.clean.Cleaner" class="py-name" href="#" onclick="return doclink('link-288', 'Cleaner', 'link-288');">Cleaner</a></tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
<a name="L525"></a><tt class="py-lineno">525</tt>  <tt class="py-line"><tt id="link-289" class="py-name" targets="Method lxml.html.clean.Cleaner.clean_html()=lxml.html.clean.Cleaner-class.html#clean_html,Function lxml.html.clean.clean_html()=lxml.html.clean-module.html#clean_html"><a title="lxml.html.clean.Cleaner.clean_html
lxml.html.clean.clean_html" class="py-name" href="#" onclick="return doclink('link-289', 'clean_html', 'link-289');">clean_html</a></tt> <tt class="py-op">=</tt> <tt id="link-290" class="py-name"><a title="lxml.html.clean
lxml.html.clean.clean" class="py-name" href="#" onclick="return doclink('link-290', 'clean', 'link-287');">clean</a></tt><tt class="py-op">.</tt><tt id="link-291" class="py-name"><a title="lxml.html.clean.Cleaner.clean_html
lxml.html.clean.clean_html" class="py-name" href="#" onclick="return doclink('link-291', 'clean_html', 'link-289');">clean_html</a></tt> </tt>
<a name="L526"></a><tt class="py-lineno">526</tt>  <tt class="py-line"> </tt>
<a name="L527"></a><tt class="py-lineno">527</tt>  <tt class="py-line"><tt class="py-comment">############################################################</tt> </tt>
<a name="L528"></a><tt class="py-lineno">528</tt>  <tt class="py-line"><tt class="py-comment">## Autolinking</tt> </tt>
<a name="L529"></a><tt class="py-lineno">529</tt>  <tt class="py-line"><tt class="py-comment">############################################################</tt> </tt>
<a name="L530"></a><tt class="py-lineno">530</tt>  <tt class="py-line"> </tt>
<a name="L531"></a><tt class="py-lineno">531</tt>  <tt class="py-line"><tt id="link-292" class="py-name" targets="Variable lxml.html.clean._link_regexes=lxml.html.clean-module.html#_link_regexes"><a title="lxml.html.clean._link_regexes" class="py-name" href="#" onclick="return doclink('link-292', '_link_regexes', 'link-292');">_link_regexes</a></tt> <tt class="py-op">=</tt> <tt class="py-op">[</tt> </tt>
<a name="L532"></a><tt class="py-lineno">532</tt>  <tt class="py-line">    <tt class="py-name">re</tt><tt class="py-op">.</tt><tt class="py-name">compile</tt><tt class="py-op">(</tt><tt class="py-string">r'(?P&lt;body&gt;https?://(?P&lt;host&gt;[a-z0-9._-]+)(?:/[/\-_.,a-z0-9%&amp;?;=~]*)?(?:\([/\-_.,a-z0-9%&amp;?;=~]*\))?)'</tt><tt class="py-op">,</tt> <tt class="py-name">re</tt><tt class="py-op">.</tt><tt id="link-293" class="py-name"><a title="lxml.html.builder.I" class="py-name" href="#" onclick="return doclink('link-293', 'I', 'link-22');">I</a></tt><tt class="py-op">)</tt><tt class="py-op">,</tt> </tt>
<a name="L533"></a><tt class="py-lineno">533</tt>  <tt class="py-line">    <tt class="py-comment"># This is conservative, but autolinking can be a bit conservative:</tt> </tt>
<a name="L534"></a><tt class="py-lineno">534</tt>  <tt class="py-line">    <tt class="py-name">re</tt><tt class="py-op">.</tt><tt class="py-name">compile</tt><tt class="py-op">(</tt><tt class="py-string">r'mailto:(?P&lt;body&gt;[a-z0-9._-]+@(?P&lt;host&gt;[a-z0-9_.-]+[a-z]))'</tt><tt class="py-op">,</tt> <tt class="py-name">re</tt><tt class="py-op">.</tt><tt id="link-294" class="py-name"><a title="lxml.html.builder.I" class="py-name" href="#" onclick="return doclink('link-294', 'I', 'link-22');">I</a></tt><tt class="py-op">)</tt><tt class="py-op">,</tt> </tt>
<a name="L535"></a><tt class="py-lineno">535</tt>  <tt class="py-line">    <tt class="py-op">]</tt> </tt>
<a name="L536"></a><tt class="py-lineno">536</tt>  <tt class="py-line"> </tt>
<a name="L537"></a><tt class="py-lineno">537</tt>  <tt class="py-line"><tt id="link-295" class="py-name" targets="Variable lxml.html.clean._avoid_elements=lxml.html.clean-module.html#_avoid_elements"><a title="lxml.html.clean._avoid_elements" class="py-name" href="#" onclick="return doclink('link-295', '_avoid_elements', 'link-295');">_avoid_elements</a></tt> <tt class="py-op">=</tt> <tt class="py-op">[</tt><tt class="py-string">'textarea'</tt><tt class="py-op">,</tt> <tt class="py-string">'pre'</tt><tt class="py-op">,</tt> <tt class="py-string">'code'</tt><tt class="py-op">,</tt> <tt class="py-string">'head'</tt><tt class="py-op">,</tt> <tt class="py-string">'select'</tt><tt class="py-op">,</tt> <tt class="py-string">'a'</tt><tt class="py-op">]</tt> </tt>
<a name="L538"></a><tt class="py-lineno">538</tt>  <tt class="py-line"> </tt>
<a name="L539"></a><tt class="py-lineno">539</tt>  <tt class="py-line"><tt id="link-296" class="py-name" targets="Variable lxml.html.clean._avoid_hosts=lxml.html.clean-module.html#_avoid_hosts"><a title="lxml.html.clean._avoid_hosts" class="py-name" href="#" onclick="return doclink('link-296', '_avoid_hosts', 'link-296');">_avoid_hosts</a></tt> <tt class="py-op">=</tt> <tt class="py-op">[</tt> </tt>
<a name="L540"></a><tt class="py-lineno">540</tt>  <tt class="py-line">    <tt class="py-name">re</tt><tt class="py-op">.</tt><tt class="py-name">compile</tt><tt class="py-op">(</tt><tt class="py-string">r'^localhost'</tt><tt class="py-op">,</tt> <tt class="py-name">re</tt><tt class="py-op">.</tt><tt id="link-297" class="py-name"><a title="lxml.html.builder.I" class="py-name" href="#" onclick="return doclink('link-297', 'I', 'link-22');">I</a></tt><tt class="py-op">)</tt><tt class="py-op">,</tt> </tt>
<a name="L541"></a><tt class="py-lineno">541</tt>  <tt class="py-line">    <tt class="py-name">re</tt><tt class="py-op">.</tt><tt class="py-name">compile</tt><tt class="py-op">(</tt><tt class="py-string">r'\bexample\.(?:com|org|net)$'</tt><tt class="py-op">,</tt> <tt class="py-name">re</tt><tt class="py-op">.</tt><tt id="link-298" class="py-name"><a title="lxml.html.builder.I" class="py-name" href="#" onclick="return doclink('link-298', 'I', 'link-22');">I</a></tt><tt class="py-op">)</tt><tt class="py-op">,</tt> </tt>
<a name="L542"></a><tt class="py-lineno">542</tt>  <tt class="py-line">    <tt class="py-name">re</tt><tt class="py-op">.</tt><tt class="py-name">compile</tt><tt class="py-op">(</tt><tt class="py-string">r'^127\.0\.0\.1$'</tt><tt class="py-op">)</tt><tt class="py-op">,</tt> </tt>
<a name="L543"></a><tt class="py-lineno">543</tt>  <tt class="py-line">    <tt class="py-op">]</tt> </tt>
<a name="L544"></a><tt class="py-lineno">544</tt>  <tt class="py-line"> </tt>
<a name="L545"></a><tt class="py-lineno">545</tt>  <tt class="py-line"><tt id="link-299" class="py-name" targets="Variable lxml.html.clean._avoid_classes=lxml.html.clean-module.html#_avoid_classes"><a title="lxml.html.clean._avoid_classes" class="py-name" href="#" onclick="return doclink('link-299', '_avoid_classes', 'link-299');">_avoid_classes</a></tt> <tt class="py-op">=</tt> <tt class="py-op">[</tt><tt class="py-string">'nolink'</tt><tt class="py-op">]</tt> </tt>
<a name="L546"></a><tt class="py-lineno">546</tt>  <tt class="py-line"> </tt>
<a name="autolink"></a><div id="autolink-def"><a name="L547"></a><tt class="py-lineno">547</tt> <a class="py-toggle" href="#" id="autolink-toggle" onclick="return toggle('autolink');">-</a><tt class="py-line"><tt class="py-keyword">def</tt> <a class="py-def-name" href="lxml.html.clean-module.html#autolink">autolink</a><tt class="py-op">(</tt><tt class="py-param">el</tt><tt class="py-op">,</tt> <tt class="py-param">link_regexes</tt><tt class="py-op">=</tt><tt id="link-300" class="py-name"><a title="lxml.html.clean._link_regexes" class="py-name" href="#" onclick="return doclink('link-300', '_link_regexes', 'link-292');">_link_regexes</a></tt><tt class="py-op">,</tt> </tt>
<a name="L548"></a><tt class="py-lineno">548</tt>  <tt class="py-line">             <tt class="py-param">avoid_elements</tt><tt class="py-op">=</tt><tt id="link-301" class="py-name"><a title="lxml.html.clean._avoid_elements" class="py-name" href="#" onclick="return doclink('link-301', '_avoid_elements', 'link-295');">_avoid_elements</a></tt><tt class="py-op">,</tt> </tt>
<a name="L549"></a><tt class="py-lineno">549</tt>  <tt class="py-line">             <tt class="py-param">avoid_hosts</tt><tt class="py-op">=</tt><tt id="link-302" class="py-name"><a title="lxml.html.clean._avoid_hosts" class="py-name" href="#" onclick="return doclink('link-302', '_avoid_hosts', 'link-296');">_avoid_hosts</a></tt><tt class="py-op">,</tt> </tt>
<a name="L550"></a><tt class="py-lineno">550</tt>  <tt class="py-line">             <tt class="py-param">avoid_classes</tt><tt class="py-op">=</tt><tt id="link-303" class="py-name"><a title="lxml.html.clean._avoid_classes" class="py-name" href="#" onclick="return doclink('link-303', '_avoid_classes', 'link-299');">_avoid_classes</a></tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
</div><div id="autolink-collapsed" style="display:none;" pad="+++" indent="++++"></div><div id="autolink-expanded"><a name="L551"></a><tt class="py-lineno">551</tt>  <tt class="py-line">    <tt class="py-docstring">"""</tt> </tt>
<a name="L552"></a><tt class="py-lineno">552</tt>  <tt class="py-line"><tt class="py-docstring">    Turn any URLs into links.</tt> </tt>
<a name="L553"></a><tt class="py-lineno">553</tt>  <tt class="py-line"><tt class="py-docstring"></tt> </tt>
<a name="L554"></a><tt class="py-lineno">554</tt>  <tt class="py-line"><tt class="py-docstring">    It will search for links identified by the given regular</tt> </tt>
<a name="L555"></a><tt class="py-lineno">555</tt>  <tt class="py-line"><tt class="py-docstring">    expressions (by default mailto and http(s) links).</tt> </tt>
<a name="L556"></a><tt class="py-lineno">556</tt>  <tt class="py-line"><tt class="py-docstring"></tt> </tt>
<a name="L557"></a><tt class="py-lineno">557</tt>  <tt class="py-line"><tt class="py-docstring">    It won't link text in an element in avoid_elements, or an element</tt> </tt>
<a name="L558"></a><tt class="py-lineno">558</tt>  <tt class="py-line"><tt class="py-docstring">    with a class in avoid_classes.  It won't link to anything with a</tt> </tt>
<a name="L559"></a><tt class="py-lineno">559</tt>  <tt class="py-line"><tt class="py-docstring">    host that matches one of the regular expressions in avoid_hosts</tt> </tt>
<a name="L560"></a><tt class="py-lineno">560</tt>  <tt class="py-line"><tt class="py-docstring">    (default localhost and 127.0.0.1).</tt> </tt>
<a name="L561"></a><tt class="py-lineno">561</tt>  <tt class="py-line"><tt class="py-docstring"></tt> </tt>
<a name="L562"></a><tt class="py-lineno">562</tt>  <tt class="py-line"><tt class="py-docstring">    If you pass in an element, the element's tail will not be</tt> </tt>
<a name="L563"></a><tt class="py-lineno">563</tt>  <tt class="py-line"><tt class="py-docstring">    substituted, only the contents of the element.</tt> </tt>
<a name="L564"></a><tt class="py-lineno">564</tt>  <tt class="py-line"><tt class="py-docstring">    """</tt> </tt>
<a name="L565"></a><tt class="py-lineno">565</tt>  <tt class="py-line">    <tt class="py-keyword">if</tt> <tt class="py-name">el</tt><tt class="py-op">.</tt><tt id="link-304" class="py-name"><a title="lxml.etree._Comment.tag
lxml.etree._Element.tag
lxml.etree._Entity.tag
lxml.etree._ProcessingInstruction.tag
lxml.tests.test_xpathevaluator.tag
xml.etree.ElementTree.Element.tag" class="py-name" href="#" onclick="return doclink('link-304', 'tag', 'link-84');">tag</a></tt> <tt class="py-keyword">in</tt> <tt class="py-name">avoid_elements</tt><tt class="py-op">:</tt> </tt>
<a name="L566"></a><tt class="py-lineno">566</tt>  <tt class="py-line">        <tt class="py-keyword">return</tt> </tt>
<a name="L567"></a><tt class="py-lineno">567</tt>  <tt class="py-line">    <tt class="py-name">class_name</tt> <tt class="py-op">=</tt> <tt class="py-name">el</tt><tt class="py-op">.</tt><tt id="link-305" class="py-name"><a title="lxml.etree._Attrib.get
lxml.etree._Element.get
lxml.etree._IDDict.get
lxml.etree._ProcessingInstruction.get" class="py-name" href="#" onclick="return doclink('link-305', 'get', 'link-130');">get</a></tt><tt class="py-op">(</tt><tt class="py-string">'class'</tt><tt class="py-op">)</tt> </tt>
<a name="L568"></a><tt class="py-lineno">568</tt>  <tt class="py-line">    <tt class="py-keyword">if</tt> <tt class="py-name">class_name</tt><tt class="py-op">:</tt> </tt>
<a name="L569"></a><tt class="py-lineno">569</tt>  <tt class="py-line">        <tt class="py-name">class_name</tt> <tt class="py-op">=</tt> <tt class="py-name">class_name</tt><tt class="py-op">.</tt><tt class="py-name">split</tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
<a name="L570"></a><tt class="py-lineno">570</tt>  <tt class="py-line">        <tt class="py-keyword">for</tt> <tt class="py-name">match_class</tt> <tt class="py-keyword">in</tt> <tt class="py-name">avoid_classes</tt><tt class="py-op">:</tt> </tt>
<a name="L571"></a><tt class="py-lineno">571</tt>  <tt class="py-line">            <tt class="py-keyword">if</tt> <tt class="py-name">match_class</tt> <tt class="py-keyword">in</tt> <tt class="py-name">class_name</tt><tt class="py-op">:</tt> </tt>
<a name="L572"></a><tt class="py-lineno">572</tt>  <tt class="py-line">                <tt class="py-keyword">return</tt> </tt>
<a name="L573"></a><tt class="py-lineno">573</tt>  <tt class="py-line">    <tt class="py-keyword">for</tt> <tt class="py-name">child</tt> <tt class="py-keyword">in</tt> <tt class="py-name">list</tt><tt class="py-op">(</tt><tt class="py-name">el</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
<a name="L574"></a><tt class="py-lineno">574</tt>  <tt class="py-line">        <tt id="link-306" class="py-name" targets="Function lxml.html.clean.autolink()=lxml.html.clean-module.html#autolink"><a title="lxml.html.clean.autolink" class="py-name" href="#" onclick="return doclink('link-306', 'autolink', 'link-306');">autolink</a></tt><tt class="py-op">(</tt><tt class="py-name">child</tt><tt class="py-op">,</tt> <tt class="py-name">link_regexes</tt><tt class="py-op">=</tt><tt class="py-name">link_regexes</tt><tt class="py-op">,</tt> </tt>
<a name="L575"></a><tt class="py-lineno">575</tt>  <tt class="py-line">                 <tt class="py-name">avoid_elements</tt><tt class="py-op">=</tt><tt class="py-name">avoid_elements</tt><tt class="py-op">,</tt> </tt>
<a name="L576"></a><tt class="py-lineno">576</tt>  <tt class="py-line">                 <tt class="py-name">avoid_hosts</tt><tt class="py-op">=</tt><tt class="py-name">avoid_hosts</tt><tt class="py-op">,</tt> </tt>
<a name="L577"></a><tt class="py-lineno">577</tt>  <tt class="py-line">                 <tt class="py-name">avoid_classes</tt><tt class="py-op">=</tt><tt class="py-name">avoid_classes</tt><tt class="py-op">)</tt> </tt>
<a name="L578"></a><tt class="py-lineno">578</tt>  <tt class="py-line">        <tt class="py-keyword">if</tt> <tt class="py-name">child</tt><tt class="py-op">.</tt><tt id="link-307" class="py-name" targets="Variable lxml.etree._Element.tail=lxml.etree._Element-class.html#tail,Variable xml.etree.ElementTree.Element.tail=xml.etree.ElementTree.Element-class.html#tail"><a title="lxml.etree._Element.tail
xml.etree.ElementTree.Element.tail" class="py-name" href="#" onclick="return doclink('link-307', 'tail', 'link-307');">tail</a></tt><tt class="py-op">:</tt> </tt>
<a name="L579"></a><tt class="py-lineno">579</tt>  <tt class="py-line">            <tt id="link-308" class="py-name"><a title="lxml.etree.QName.text
lxml.etree._Element.text
lxml.etree._Entity.text
lxml.objectify.ObjectifiedElement.text
xml.etree.ElementTree.Element.text" class="py-name" href="#" onclick="return doclink('link-308', 'text', 'link-141');">text</a></tt><tt class="py-op">,</tt> <tt class="py-name">tail_children</tt> <tt class="py-op">=</tt> <tt id="link-309" class="py-name" targets="Function lxml.html.clean._link_text()=lxml.html.clean-module.html#_link_text"><a title="lxml.html.clean._link_text" class="py-name" href="#" onclick="return doclink('link-309', '_link_text', 'link-309');">_link_text</a></tt><tt class="py-op">(</tt> </tt>
<a name="L580"></a><tt class="py-lineno">580</tt>  <tt class="py-line">                <tt class="py-name">child</tt><tt class="py-op">.</tt><tt id="link-310" class="py-name"><a title="lxml.etree._Element.tail
xml.etree.ElementTree.Element.tail" class="py-name" href="#" onclick="return doclink('link-310', 'tail', 'link-307');">tail</a></tt><tt class="py-op">,</tt> <tt class="py-name">link_regexes</tt><tt class="py-op">,</tt> <tt class="py-name">avoid_hosts</tt><tt class="py-op">,</tt> <tt class="py-name">factory</tt><tt class="py-op">=</tt><tt class="py-name">el</tt><tt class="py-op">.</tt><tt id="link-311" class="py-name" targets="Method lxml.etree._Element.makeelement()=lxml.etree._Element-class.html#makeelement,Method lxml.etree.iterparse.makeelement()=lxml.etree.iterparse-class.html#makeelement,Function lxml.tests.selftest2.makeelement()=lxml.tests.selftest2-module.html#makeelement"><a title="lxml.etree._Element.makeelement
lxml.etree.iterparse.makeelement
lxml.tests.selftest2.makeelement" class="py-name" href="#" onclick="return doclink('link-311', 'makeelement', 'link-311');">makeelement</a></tt><tt class="py-op">)</tt> </tt>
<a name="L581"></a><tt class="py-lineno">581</tt>  <tt class="py-line">            <tt class="py-keyword">if</tt> <tt class="py-name">tail_children</tt><tt class="py-op">:</tt> </tt>
<a name="L582"></a><tt class="py-lineno">582</tt>  <tt class="py-line">                <tt class="py-name">child</tt><tt class="py-op">.</tt><tt id="link-312" class="py-name"><a title="lxml.etree._Element.tail
xml.etree.ElementTree.Element.tail" class="py-name" href="#" onclick="return doclink('link-312', 'tail', 'link-307');">tail</a></tt> <tt class="py-op">=</tt> <tt id="link-313" class="py-name"><a title="lxml.etree.QName.text
lxml.etree._Element.text
lxml.etree._Entity.text
lxml.objectify.ObjectifiedElement.text
xml.etree.ElementTree.Element.text" class="py-name" href="#" onclick="return doclink('link-313', 'text', 'link-141');">text</a></tt> </tt>
<a name="L583"></a><tt class="py-lineno">583</tt>  <tt class="py-line">                <tt id="link-314" class="py-name" targets="Method lxml.etree._Element.index()=lxml.etree._Element-class.html#index"><a title="lxml.etree._Element.index" class="py-name" href="#" onclick="return doclink('link-314', 'index', 'link-314');">index</a></tt> <tt class="py-op">=</tt> <tt class="py-name">el</tt><tt class="py-op">.</tt><tt id="link-315" class="py-name"><a title="lxml.etree._Element.index" class="py-name" href="#" onclick="return doclink('link-315', 'index', 'link-314');">index</a></tt><tt class="py-op">(</tt><tt class="py-name">child</tt><tt class="py-op">)</tt> </tt>
<a name="L584"></a><tt class="py-lineno">584</tt>  <tt class="py-line">                <tt class="py-name">el</tt><tt class="py-op">[</tt><tt id="link-316" class="py-name"><a title="lxml.etree._Element.index" class="py-name" href="#" onclick="return doclink('link-316', 'index', 'link-314');">index</a></tt><tt class="py-op">+</tt><tt class="py-number">1</tt><tt class="py-op">:</tt><tt id="link-317" class="py-name"><a title="lxml.etree._Element.index" class="py-name" href="#" onclick="return doclink('link-317', 'index', 'link-314');">index</a></tt><tt class="py-op">+</tt><tt class="py-number">1</tt><tt class="py-op">]</tt> <tt class="py-op">=</tt> <tt class="py-name">tail_children</tt> </tt>
<a name="L585"></a><tt class="py-lineno">585</tt>  <tt class="py-line">    <tt class="py-keyword">if</tt> <tt class="py-name">el</tt><tt class="py-op">.</tt><tt id="link-318" class="py-name"><a title="lxml.etree.QName.text
lxml.etree._Element.text
lxml.etree._Entity.text
lxml.objectify.ObjectifiedElement.text
xml.etree.ElementTree.Element.text" class="py-name" href="#" onclick="return doclink('link-318', 'text', 'link-141');">text</a></tt><tt class="py-op">:</tt> </tt>
<a name="L586"></a><tt class="py-lineno">586</tt>  <tt class="py-line">        <tt id="link-319" class="py-name"><a title="lxml.etree.QName.text
lxml.etree._Element.text
lxml.etree._Entity.text
lxml.objectify.ObjectifiedElement.text
xml.etree.ElementTree.Element.text" class="py-name" href="#" onclick="return doclink('link-319', 'text', 'link-141');">text</a></tt><tt class="py-op">,</tt> <tt class="py-name">pre_children</tt> <tt class="py-op">=</tt> <tt id="link-320" class="py-name"><a title="lxml.html.clean._link_text" class="py-name" href="#" onclick="return doclink('link-320', '_link_text', 'link-309');">_link_text</a></tt><tt class="py-op">(</tt> </tt>
<a name="L587"></a><tt class="py-lineno">587</tt>  <tt class="py-line">            <tt class="py-name">el</tt><tt class="py-op">.</tt><tt id="link-321" class="py-name"><a title="lxml.etree.QName.text
lxml.etree._Element.text
lxml.etree._Entity.text
lxml.objectify.ObjectifiedElement.text
xml.etree.ElementTree.Element.text" class="py-name" href="#" onclick="return doclink('link-321', 'text', 'link-141');">text</a></tt><tt class="py-op">,</tt> <tt class="py-name">link_regexes</tt><tt class="py-op">,</tt> <tt class="py-name">avoid_hosts</tt><tt class="py-op">,</tt> <tt class="py-name">factory</tt><tt class="py-op">=</tt><tt class="py-name">el</tt><tt class="py-op">.</tt><tt id="link-322" class="py-name"><a title="lxml.etree._Element.makeelement
lxml.etree.iterparse.makeelement
lxml.tests.selftest2.makeelement" class="py-name" href="#" onclick="return doclink('link-322', 'makeelement', 'link-311');">makeelement</a></tt><tt class="py-op">)</tt> </tt>
<a name="L588"></a><tt class="py-lineno">588</tt>  <tt class="py-line">        <tt class="py-keyword">if</tt> <tt class="py-name">pre_children</tt><tt class="py-op">:</tt> </tt>
<a name="L589"></a><tt class="py-lineno">589</tt>  <tt class="py-line">            <tt class="py-name">el</tt><tt class="py-op">.</tt><tt id="link-323" class="py-name"><a title="lxml.etree.QName.text
lxml.etree._Element.text
lxml.etree._Entity.text
lxml.objectify.ObjectifiedElement.text
xml.etree.ElementTree.Element.text" class="py-name" href="#" onclick="return doclink('link-323', 'text', 'link-141');">text</a></tt> <tt class="py-op">=</tt> <tt id="link-324" class="py-name"><a title="lxml.etree.QName.text
lxml.etree._Element.text
lxml.etree._Entity.text
lxml.objectify.ObjectifiedElement.text
xml.etree.ElementTree.Element.text" class="py-name" href="#" onclick="return doclink('link-324', 'text', 'link-141');">text</a></tt> </tt>
<a name="L590"></a><tt class="py-lineno">590</tt>  <tt class="py-line">            <tt class="py-name">el</tt><tt class="py-op">[</tt><tt class="py-op">:</tt><tt class="py-number">0</tt><tt class="py-op">]</tt> <tt class="py-op">=</tt> <tt class="py-name">pre_children</tt> </tt>
</div><a name="L591"></a><tt class="py-lineno">591</tt>  <tt class="py-line"> </tt>
<a name="_link_text"></a><div id="_link_text-def"><a name="L592"></a><tt class="py-lineno">592</tt> <a class="py-toggle" href="#" id="_link_text-toggle" onclick="return toggle('_link_text');">-</a><tt class="py-line"><tt class="py-keyword">def</tt> <a class="py-def-name" href="lxml.html.clean-module.html#_link_text">_link_text</a><tt class="py-op">(</tt><tt class="py-param">text</tt><tt class="py-op">,</tt> <tt class="py-param">link_regexes</tt><tt class="py-op">,</tt> <tt class="py-param">avoid_hosts</tt><tt class="py-op">,</tt> <tt class="py-param">factory</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
</div><div id="_link_text-collapsed" style="display:none;" pad="+++" indent="++++"></div><div id="_link_text-expanded"><a name="L593"></a><tt class="py-lineno">593</tt>  <tt class="py-line">    <tt class="py-name">leading_text</tt> <tt class="py-op">=</tt> <tt class="py-string">''</tt> </tt>
<a name="L594"></a><tt class="py-lineno">594</tt>  <tt class="py-line">    <tt id="link-325" class="py-name"><a title="lxml.html.clean.Cleaner.links" class="py-name" href="#" onclick="return doclink('link-325', 'links', 'link-47');">links</a></tt> <tt class="py-op">=</tt> <tt class="py-op">[</tt><tt class="py-op">]</tt> </tt>
<a name="L595"></a><tt class="py-lineno">595</tt>  <tt class="py-line">    <tt class="py-name">last_pos</tt> <tt class="py-op">=</tt> <tt class="py-number">0</tt> </tt>
<a name="L596"></a><tt class="py-lineno">596</tt>  <tt class="py-line">    <tt class="py-keyword">while</tt> <tt class="py-number">1</tt><tt class="py-op">:</tt> </tt>
<a name="L597"></a><tt class="py-lineno">597</tt>  <tt class="py-line">        <tt class="py-name">best_match</tt><tt class="py-op">,</tt> <tt class="py-name">best_pos</tt> <tt class="py-op">=</tt> <tt class="py-name">None</tt><tt class="py-op">,</tt> <tt class="py-name">None</tt> </tt>
<a name="L598"></a><tt class="py-lineno">598</tt>  <tt class="py-line">        <tt class="py-keyword">for</tt> <tt class="py-name">regex</tt> <tt class="py-keyword">in</tt> <tt class="py-name">link_regexes</tt><tt class="py-op">:</tt> </tt>
<a name="L599"></a><tt class="py-lineno">599</tt>  <tt class="py-line">            <tt class="py-name">regex_pos</tt> <tt class="py-op">=</tt> <tt class="py-name">last_pos</tt> </tt>
<a name="L600"></a><tt class="py-lineno">600</tt>  <tt class="py-line">            <tt class="py-keyword">while</tt> <tt class="py-number">1</tt><tt class="py-op">:</tt> </tt>
<a name="L601"></a><tt class="py-lineno">601</tt>  <tt class="py-line">                <tt class="py-name">match</tt> <tt class="py-op">=</tt> <tt class="py-name">regex</tt><tt class="py-op">.</tt><tt class="py-name">search</tt><tt class="py-op">(</tt><tt id="link-326" class="py-name"><a title="lxml.etree.QName.text
lxml.etree._Element.text
lxml.etree._Entity.text
lxml.objectify.ObjectifiedElement.text
xml.etree.ElementTree.Element.text" class="py-name" href="#" onclick="return doclink('link-326', 'text', 'link-141');">text</a></tt><tt class="py-op">,</tt> <tt class="py-name">pos</tt><tt class="py-op">=</tt><tt class="py-name">regex_pos</tt><tt class="py-op">)</tt> </tt>
<a name="L602"></a><tt class="py-lineno">602</tt>  <tt class="py-line">                <tt class="py-keyword">if</tt> <tt class="py-name">match</tt> <tt class="py-keyword">is</tt> <tt class="py-name">None</tt><tt class="py-op">:</tt> </tt>
<a name="L603"></a><tt class="py-lineno">603</tt>  <tt class="py-line">                    <tt class="py-keyword">break</tt> </tt>
<a name="L604"></a><tt class="py-lineno">604</tt>  <tt class="py-line">                <tt class="py-name">host</tt> <tt class="py-op">=</tt> <tt class="py-name">match</tt><tt class="py-op">.</tt><tt class="py-name">group</tt><tt class="py-op">(</tt><tt class="py-string">'host'</tt><tt class="py-op">)</tt> </tt>
<a name="L605"></a><tt class="py-lineno">605</tt>  <tt class="py-line">                <tt class="py-keyword">for</tt> <tt class="py-name">host_regex</tt> <tt class="py-keyword">in</tt> <tt class="py-name">avoid_hosts</tt><tt class="py-op">:</tt> </tt>
<a name="L606"></a><tt class="py-lineno">606</tt>  <tt class="py-line">                    <tt class="py-keyword">if</tt> <tt class="py-name">host_regex</tt><tt class="py-op">.</tt><tt class="py-name">search</tt><tt class="py-op">(</tt><tt class="py-name">host</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
<a name="L607"></a><tt class="py-lineno">607</tt>  <tt class="py-line">                        <tt class="py-name">regex_pos</tt> <tt class="py-op">=</tt> <tt class="py-name">match</tt><tt class="py-op">.</tt><tt id="link-327" class="py-name" targets="Method lxml.etree.TreeBuilder.end()=lxml.etree.TreeBuilder-class.html#end"><a title="lxml.etree.TreeBuilder.end" class="py-name" href="#" onclick="return doclink('link-327', 'end', 'link-327');">end</a></tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
<a name="L608"></a><tt class="py-lineno">608</tt>  <tt class="py-line">                        <tt class="py-keyword">break</tt> </tt>
<a name="L609"></a><tt class="py-lineno">609</tt>  <tt class="py-line">                <tt class="py-keyword">else</tt><tt class="py-op">:</tt> </tt>
<a name="L610"></a><tt class="py-lineno">610</tt>  <tt class="py-line">                    <tt class="py-keyword">break</tt> </tt>
<a name="L611"></a><tt class="py-lineno">611</tt>  <tt class="py-line">            <tt class="py-keyword">if</tt> <tt class="py-name">match</tt> <tt class="py-keyword">is</tt> <tt class="py-name">None</tt><tt class="py-op">:</tt> </tt>
<a name="L612"></a><tt class="py-lineno">612</tt>  <tt class="py-line">                <tt class="py-keyword">continue</tt> </tt>
<a name="L613"></a><tt class="py-lineno">613</tt>  <tt class="py-line">            <tt class="py-keyword">if</tt> <tt class="py-name">best_pos</tt> <tt class="py-keyword">is</tt> <tt class="py-name">None</tt> <tt class="py-keyword">or</tt> <tt class="py-name">match</tt><tt class="py-op">.</tt><tt id="link-328" class="py-name" targets="Method lxml.etree.TreeBuilder.start()=lxml.etree.TreeBuilder-class.html#start"><a title="lxml.etree.TreeBuilder.start" class="py-name" href="#" onclick="return doclink('link-328', 'start', 'link-328');">start</a></tt><tt class="py-op">(</tt><tt class="py-op">)</tt> <tt class="py-op">&lt;</tt> <tt class="py-name">best_pos</tt><tt class="py-op">:</tt> </tt>
<a name="L614"></a><tt class="py-lineno">614</tt>  <tt class="py-line">                <tt class="py-name">best_match</tt> <tt class="py-op">=</tt> <tt class="py-name">match</tt> </tt>
<a name="L615"></a><tt class="py-lineno">615</tt>  <tt class="py-line">                <tt class="py-name">best_pos</tt> <tt class="py-op">=</tt> <tt class="py-name">match</tt><tt class="py-op">.</tt><tt id="link-329" class="py-name"><a title="lxml.etree.TreeBuilder.start" class="py-name" href="#" onclick="return doclink('link-329', 'start', 'link-328');">start</a></tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
<a name="L616"></a><tt class="py-lineno">616</tt>  <tt class="py-line">        <tt class="py-keyword">if</tt> <tt class="py-name">best_match</tt> <tt class="py-keyword">is</tt> <tt class="py-name">None</tt><tt class="py-op">:</tt> </tt>
<a name="L617"></a><tt class="py-lineno">617</tt>  <tt class="py-line">            <tt class="py-comment"># No more matches</tt> </tt>
<a name="L618"></a><tt class="py-lineno">618</tt>  <tt class="py-line">            <tt class="py-keyword">if</tt> <tt id="link-330" class="py-name"><a title="lxml.html.clean.Cleaner.links" class="py-name" href="#" onclick="return doclink('link-330', 'links', 'link-47');">links</a></tt><tt class="py-op">:</tt> </tt>
<a name="L619"></a><tt class="py-lineno">619</tt>  <tt class="py-line">                <tt class="py-keyword">assert</tt> <tt class="py-keyword">not</tt> <tt id="link-331" class="py-name"><a title="lxml.html.clean.Cleaner.links" class="py-name" href="#" onclick="return doclink('link-331', 'links', 'link-47');">links</a></tt><tt class="py-op">[</tt><tt class="py-op">-</tt><tt class="py-number">1</tt><tt class="py-op">]</tt><tt class="py-op">.</tt><tt id="link-332" class="py-name"><a title="lxml.etree._Element.tail
xml.etree.ElementTree.Element.tail" class="py-name" href="#" onclick="return doclink('link-332', 'tail', 'link-307');">tail</a></tt> </tt>
<a name="L620"></a><tt class="py-lineno">620</tt>  <tt class="py-line">                <tt id="link-333" class="py-name"><a title="lxml.html.clean.Cleaner.links" class="py-name" href="#" onclick="return doclink('link-333', 'links', 'link-47');">links</a></tt><tt class="py-op">[</tt><tt class="py-op">-</tt><tt class="py-number">1</tt><tt class="py-op">]</tt><tt class="py-op">.</tt><tt id="link-334" class="py-name"><a title="lxml.etree._Element.tail
xml.etree.ElementTree.Element.tail" class="py-name" href="#" onclick="return doclink('link-334', 'tail', 'link-307');">tail</a></tt> <tt class="py-op">=</tt> <tt id="link-335" class="py-name"><a title="lxml.etree.QName.text
lxml.etree._Element.text
lxml.etree._Entity.text
lxml.objectify.ObjectifiedElement.text
xml.etree.ElementTree.Element.text" class="py-name" href="#" onclick="return doclink('link-335', 'text', 'link-141');">text</a></tt> </tt>
<a name="L621"></a><tt class="py-lineno">621</tt>  <tt class="py-line">            <tt class="py-keyword">else</tt><tt class="py-op">:</tt> </tt>
<a name="L622"></a><tt class="py-lineno">622</tt>  <tt class="py-line">                <tt class="py-keyword">assert</tt> <tt class="py-keyword">not</tt> <tt class="py-name">leading_text</tt> </tt>
<a name="L623"></a><tt class="py-lineno">623</tt>  <tt class="py-line">                <tt class="py-name">leading_text</tt> <tt class="py-op">=</tt> <tt id="link-336" class="py-name"><a title="lxml.etree.QName.text
lxml.etree._Element.text
lxml.etree._Entity.text
lxml.objectify.ObjectifiedElement.text
xml.etree.ElementTree.Element.text" class="py-name" href="#" onclick="return doclink('link-336', 'text', 'link-141');">text</a></tt> </tt>
<a name="L624"></a><tt class="py-lineno">624</tt>  <tt class="py-line">            <tt class="py-keyword">break</tt> </tt>
<a name="L625"></a><tt class="py-lineno">625</tt>  <tt class="py-line">        <tt class="py-name">link</tt> <tt class="py-op">=</tt> <tt class="py-name">best_match</tt><tt class="py-op">.</tt><tt class="py-name">group</tt><tt class="py-op">(</tt><tt class="py-number">0</tt><tt class="py-op">)</tt> </tt>
<a name="L626"></a><tt class="py-lineno">626</tt>  <tt class="py-line">        <tt id="link-337" class="py-name"><a title="lxml.etree.TreeBuilder.end" class="py-name" href="#" onclick="return doclink('link-337', 'end', 'link-327');">end</a></tt> <tt class="py-op">=</tt> <tt class="py-name">best_match</tt><tt class="py-op">.</tt><tt id="link-338" class="py-name"><a title="lxml.etree.TreeBuilder.end" class="py-name" href="#" onclick="return doclink('link-338', 'end', 'link-327');">end</a></tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
<a name="L627"></a><tt class="py-lineno">627</tt>  <tt class="py-line">        <tt class="py-keyword">if</tt> <tt class="py-name">link</tt><tt class="py-op">.</tt><tt class="py-name">endswith</tt><tt class="py-op">(</tt><tt class="py-string">'.'</tt><tt class="py-op">)</tt> <tt class="py-keyword">or</tt> <tt class="py-name">link</tt><tt class="py-op">.</tt><tt class="py-name">endswith</tt><tt class="py-op">(</tt><tt class="py-string">','</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
<a name="L628"></a><tt class="py-lineno">628</tt>  <tt class="py-line">            <tt class="py-comment"># These punctuation marks shouldn't end a link</tt> </tt>
<a name="L629"></a><tt class="py-lineno">629</tt>  <tt class="py-line">            <tt id="link-339" class="py-name"><a title="lxml.etree.TreeBuilder.end" class="py-name" href="#" onclick="return doclink('link-339', 'end', 'link-327');">end</a></tt> <tt class="py-op">-=</tt> <tt class="py-number">1</tt> </tt>
<a name="L630"></a><tt class="py-lineno">630</tt>  <tt class="py-line">            <tt class="py-name">link</tt> <tt class="py-op">=</tt> <tt class="py-name">link</tt><tt class="py-op">[</tt><tt class="py-op">:</tt><tt class="py-op">-</tt><tt class="py-number">1</tt><tt class="py-op">]</tt> </tt>
<a name="L631"></a><tt class="py-lineno">631</tt>  <tt class="py-line">        <tt class="py-name">prev_text</tt> <tt class="py-op">=</tt> <tt id="link-340" class="py-name"><a title="lxml.etree.QName.text
lxml.etree._Element.text
lxml.etree._Entity.text
lxml.objectify.ObjectifiedElement.text
xml.etree.ElementTree.Element.text" class="py-name" href="#" onclick="return doclink('link-340', 'text', 'link-141');">text</a></tt><tt class="py-op">[</tt><tt class="py-op">:</tt><tt class="py-name">best_match</tt><tt class="py-op">.</tt><tt id="link-341" class="py-name"><a title="lxml.etree.TreeBuilder.start" class="py-name" href="#" onclick="return doclink('link-341', 'start', 'link-328');">start</a></tt><tt class="py-op">(</tt><tt class="py-op">)</tt><tt class="py-op">]</tt> </tt>
<a name="L632"></a><tt class="py-lineno">632</tt>  <tt class="py-line">        <tt class="py-keyword">if</tt> <tt id="link-342" class="py-name"><a title="lxml.html.clean.Cleaner.links" class="py-name" href="#" onclick="return doclink('link-342', 'links', 'link-47');">links</a></tt><tt class="py-op">:</tt> </tt>
<a name="L633"></a><tt class="py-lineno">633</tt>  <tt class="py-line">            <tt class="py-keyword">assert</tt> <tt class="py-keyword">not</tt> <tt id="link-343" class="py-name"><a title="lxml.html.clean.Cleaner.links" class="py-name" href="#" onclick="return doclink('link-343', 'links', 'link-47');">links</a></tt><tt class="py-op">[</tt><tt class="py-op">-</tt><tt class="py-number">1</tt><tt class="py-op">]</tt><tt class="py-op">.</tt><tt id="link-344" class="py-name"><a title="lxml.etree._Element.tail
xml.etree.ElementTree.Element.tail" class="py-name" href="#" onclick="return doclink('link-344', 'tail', 'link-307');">tail</a></tt> </tt>
<a name="L634"></a><tt class="py-lineno">634</tt>  <tt class="py-line">            <tt id="link-345" class="py-name"><a title="lxml.html.clean.Cleaner.links" class="py-name" href="#" onclick="return doclink('link-345', 'links', 'link-47');">links</a></tt><tt class="py-op">[</tt><tt class="py-op">-</tt><tt class="py-number">1</tt><tt class="py-op">]</tt><tt class="py-op">.</tt><tt id="link-346" class="py-name"><a title="lxml.etree._Element.tail
xml.etree.ElementTree.Element.tail" class="py-name" href="#" onclick="return doclink('link-346', 'tail', 'link-307');">tail</a></tt> <tt class="py-op">=</tt> <tt class="py-name">prev_text</tt> </tt>
<a name="L635"></a><tt class="py-lineno">635</tt>  <tt class="py-line">        <tt class="py-keyword">else</tt><tt class="py-op">:</tt> </tt>
<a name="L636"></a><tt class="py-lineno">636</tt>  <tt class="py-line">            <tt class="py-keyword">assert</tt> <tt class="py-keyword">not</tt> <tt class="py-name">leading_text</tt> </tt>
<a name="L637"></a><tt class="py-lineno">637</tt>  <tt class="py-line">            <tt class="py-name">leading_text</tt> <tt class="py-op">=</tt> <tt class="py-name">prev_text</tt> </tt>
<a name="L638"></a><tt class="py-lineno">638</tt>  <tt class="py-line">        <tt class="py-name">anchor</tt> <tt class="py-op">=</tt> <tt class="py-name">factory</tt><tt class="py-op">(</tt><tt class="py-string">'a'</tt><tt class="py-op">)</tt> </tt>
<a name="L639"></a><tt class="py-lineno">639</tt>  <tt class="py-line">        <tt class="py-name">anchor</tt><tt class="py-op">.</tt><tt id="link-347" class="py-name"><a title="lxml.etree._Element.set
lxml.etree._XSLTProcessingInstruction.set
lxml.html.HtmlElement.set
lxml.html.HtmlMixin.set" class="py-name" href="#" onclick="return doclink('link-347', 'set', 'link-66');">set</a></tt><tt class="py-op">(</tt><tt class="py-string">'href'</tt><tt class="py-op">,</tt> <tt class="py-name">link</tt><tt class="py-op">)</tt> </tt>
<a name="L640"></a><tt class="py-lineno">640</tt>  <tt class="py-line">        <tt id="link-348" class="py-name" targets="Variable lxml.html.HtmlMixin.body=lxml.html.HtmlMixin-class.html#body"><a title="lxml.html.HtmlMixin.body" class="py-name" href="#" onclick="return doclink('link-348', 'body', 'link-348');">body</a></tt> <tt class="py-op">=</tt> <tt class="py-name">best_match</tt><tt class="py-op">.</tt><tt class="py-name">group</tt><tt class="py-op">(</tt><tt class="py-string">'body'</tt><tt class="py-op">)</tt> </tt>
<a name="L641"></a><tt class="py-lineno">641</tt>  <tt class="py-line">        <tt class="py-keyword">if</tt> <tt class="py-keyword">not</tt> <tt id="link-349" class="py-name"><a title="lxml.html.HtmlMixin.body" class="py-name" href="#" onclick="return doclink('link-349', 'body', 'link-348');">body</a></tt><tt class="py-op">:</tt> </tt>
<a name="L642"></a><tt class="py-lineno">642</tt>  <tt class="py-line">            <tt id="link-350" class="py-name"><a title="lxml.html.HtmlMixin.body" class="py-name" href="#" onclick="return doclink('link-350', 'body', 'link-348');">body</a></tt> <tt class="py-op">=</tt> <tt class="py-name">link</tt> </tt>
<a name="L643"></a><tt class="py-lineno">643</tt>  <tt class="py-line">        <tt class="py-keyword">if</tt> <tt id="link-351" class="py-name"><a title="lxml.html.HtmlMixin.body" class="py-name" href="#" onclick="return doclink('link-351', 'body', 'link-348');">body</a></tt><tt class="py-op">.</tt><tt class="py-name">endswith</tt><tt class="py-op">(</tt><tt class="py-string">'.'</tt><tt class="py-op">)</tt> <tt class="py-keyword">or</tt> <tt id="link-352" class="py-name"><a title="lxml.html.HtmlMixin.body" class="py-name" href="#" onclick="return doclink('link-352', 'body', 'link-348');">body</a></tt><tt class="py-op">.</tt><tt class="py-name">endswith</tt><tt class="py-op">(</tt><tt class="py-string">','</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
<a name="L644"></a><tt class="py-lineno">644</tt>  <tt class="py-line">            <tt id="link-353" class="py-name"><a title="lxml.html.HtmlMixin.body" class="py-name" href="#" onclick="return doclink('link-353', 'body', 'link-348');">body</a></tt> <tt class="py-op">=</tt> <tt id="link-354" class="py-name"><a title="lxml.html.HtmlMixin.body" class="py-name" href="#" onclick="return doclink('link-354', 'body', 'link-348');">body</a></tt><tt class="py-op">[</tt><tt class="py-op">:</tt><tt class="py-op">-</tt><tt class="py-number">1</tt><tt class="py-op">]</tt> </tt>
<a name="L645"></a><tt class="py-lineno">645</tt>  <tt class="py-line">        <tt class="py-name">anchor</tt><tt class="py-op">.</tt><tt id="link-355" class="py-name"><a title="lxml.etree.QName.text
lxml.etree._Element.text
lxml.etree._Entity.text
lxml.objectify.ObjectifiedElement.text
xml.etree.ElementTree.Element.text" class="py-name" href="#" onclick="return doclink('link-355', 'text', 'link-141');">text</a></tt> <tt class="py-op">=</tt> <tt id="link-356" class="py-name"><a title="lxml.html.HtmlMixin.body" class="py-name" href="#" onclick="return doclink('link-356', 'body', 'link-348');">body</a></tt> </tt>
<a name="L646"></a><tt class="py-lineno">646</tt>  <tt class="py-line">        <tt id="link-357" class="py-name"><a title="lxml.html.clean.Cleaner.links" class="py-name" href="#" onclick="return doclink('link-357', 'links', 'link-47');">links</a></tt><tt class="py-op">.</tt><tt id="link-358" class="py-name"><a title="lxml.etree._Element.append" class="py-name" href="#" onclick="return doclink('link-358', 'append', 'link-205');">append</a></tt><tt class="py-op">(</tt><tt class="py-name">anchor</tt><tt class="py-op">)</tt> </tt>
<a name="L647"></a><tt class="py-lineno">647</tt>  <tt class="py-line">        <tt id="link-359" class="py-name"><a title="lxml.etree.QName.text
lxml.etree._Element.text
lxml.etree._Entity.text
lxml.objectify.ObjectifiedElement.text
xml.etree.ElementTree.Element.text" class="py-name" href="#" onclick="return doclink('link-359', 'text', 'link-141');">text</a></tt> <tt class="py-op">=</tt> <tt id="link-360" class="py-name"><a title="lxml.etree.QName.text
lxml.etree._Element.text
lxml.etree._Entity.text
lxml.objectify.ObjectifiedElement.text
xml.etree.ElementTree.Element.text" class="py-name" href="#" onclick="return doclink('link-360', 'text', 'link-141');">text</a></tt><tt class="py-op">[</tt><tt id="link-361" class="py-name"><a title="lxml.etree.TreeBuilder.end" class="py-name" href="#" onclick="return doclink('link-361', 'end', 'link-327');">end</a></tt><tt class="py-op">:</tt><tt class="py-op">]</tt> </tt>
<a name="L648"></a><tt class="py-lineno">648</tt>  <tt class="py-line">    <tt class="py-keyword">return</tt> <tt class="py-name">leading_text</tt><tt class="py-op">,</tt> <tt id="link-362" class="py-name"><a title="lxml.html.clean.Cleaner.links" class="py-name" href="#" onclick="return doclink('link-362', 'links', 'link-47');">links</a></tt> </tt>
</div><a name="L649"></a><tt class="py-lineno">649</tt>  <tt class="py-line">                 </tt>
<a name="autolink_html"></a><div id="autolink_html-def"><a name="L650"></a><tt class="py-lineno">650</tt> <a class="py-toggle" href="#" id="autolink_html-toggle" onclick="return toggle('autolink_html');">-</a><tt class="py-line"><tt class="py-keyword">def</tt> <a class="py-def-name" href="lxml.html.clean-module.html#autolink_html">autolink_html</a><tt class="py-op">(</tt><tt class="py-param">html</tt><tt class="py-op">,</tt> <tt class="py-op">*</tt><tt class="py-param">args</tt><tt class="py-op">,</tt> <tt class="py-op">**</tt><tt class="py-param">kw</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
</div><div id="autolink_html-collapsed" style="display:none;" pad="+++" indent="++++"></div><div id="autolink_html-expanded"><a name="L651"></a><tt class="py-lineno">651</tt>  <tt class="py-line">    <tt class="py-name">result_type</tt> <tt class="py-op">=</tt> <tt id="link-363" class="py-name"><a title="lxml.etree._LogEntry.type
lxml.html.InputElement.type" class="py-name" href="#" onclick="return doclink('link-363', 'type', 'link-279');">type</a></tt><tt class="py-op">(</tt><tt id="link-364" class="py-name"><a title="lxml.html
lxml.html.diff.href_token.html
lxml.html.diff.tag_token.html
lxml.html.diff.token.html" class="py-name" href="#" onclick="return doclink('link-364', 'html', 'link-5');">html</a></tt><tt class="py-op">)</tt> </tt>
<a name="L652"></a><tt class="py-lineno">652</tt>  <tt class="py-line">    <tt class="py-keyword">if</tt> <tt class="py-name">isinstance</tt><tt class="py-op">(</tt><tt id="link-365" class="py-name"><a title="lxml.html
lxml.html.diff.href_token.html
lxml.html.diff.tag_token.html
lxml.html.diff.token.html" class="py-name" href="#" onclick="return doclink('link-365', 'html', 'link-5');">html</a></tt><tt class="py-op">,</tt> <tt id="link-366" class="py-name"><a title="lxml.html.clean.basestring" class="py-name" href="#" onclick="return doclink('link-366', 'basestring', 'link-17');">basestring</a></tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
<a name="L653"></a><tt class="py-lineno">653</tt>  <tt class="py-line">        <tt class="py-name">doc</tt> <tt class="py-op">=</tt> <tt id="link-367" class="py-name"><a title="lxml.etree.fromstring
lxml.html.html5parser.fromstring
lxml.html.soupparser.fromstring
lxml.objectify.fromstring" class="py-name" href="#" onclick="return doclink('link-367', 'fromstring', 'link-9');">fromstring</a></tt><tt class="py-op">(</tt><tt id="link-368" class="py-name"><a title="lxml.html
lxml.html.diff.href_token.html
lxml.html.diff.tag_token.html
lxml.html.diff.token.html" class="py-name" href="#" onclick="return doclink('link-368', 'html', 'link-5');">html</a></tt><tt class="py-op">)</tt> </tt>
<a name="L654"></a><tt class="py-lineno">654</tt>  <tt class="py-line">    <tt class="py-keyword">else</tt><tt class="py-op">:</tt> </tt>
<a name="L655"></a><tt class="py-lineno">655</tt>  <tt class="py-line">        <tt class="py-name">doc</tt> <tt class="py-op">=</tt> <tt id="link-369" class="py-name"><a title="lxml.etree.PyErrorLog.copy
lxml.etree._BaseErrorLog.copy
lxml.etree._ErrorLog.copy
lxml.etree._IDDict.copy
lxml.etree._ListErrorLog.copy
lxml.tests.selftest2.copy" class="py-name" href="#" onclick="return doclink('link-369', 'copy', 'link-0');">copy</a></tt><tt class="py-op">.</tt><tt class="py-name">deepcopy</tt><tt class="py-op">(</tt><tt id="link-370" class="py-name"><a title="lxml.html
lxml.html.diff.href_token.html
lxml.html.diff.tag_token.html
lxml.html.diff.token.html" class="py-name" href="#" onclick="return doclink('link-370', 'html', 'link-5');">html</a></tt><tt class="py-op">)</tt> </tt>
<a name="L656"></a><tt class="py-lineno">656</tt>  <tt class="py-line">    <tt id="link-371" class="py-name"><a title="lxml.html.clean.autolink" class="py-name" href="#" onclick="return doclink('link-371', 'autolink', 'link-306');">autolink</a></tt><tt class="py-op">(</tt><tt class="py-name">doc</tt><tt class="py-op">,</tt> <tt class="py-op">*</tt><tt class="py-name">args</tt><tt class="py-op">,</tt> <tt class="py-op">**</tt><tt class="py-name">kw</tt><tt class="py-op">)</tt> </tt>
<a name="L657"></a><tt class="py-lineno">657</tt>  <tt class="py-line">    <tt class="py-keyword">return</tt> <tt class="py-name">_transform_result</tt><tt class="py-op">(</tt><tt class="py-name">result_type</tt><tt class="py-op">,</tt> <tt class="py-name">doc</tt><tt class="py-op">)</tt> </tt>
</div><a name="L658"></a><tt class="py-lineno">658</tt>  <tt class="py-line"> </tt>
<a name="L659"></a><tt class="py-lineno">659</tt>  <tt class="py-line"><tt id="link-372" class="py-name" targets="Function lxml.html.clean.autolink_html()=lxml.html.clean-module.html#autolink_html"><a title="lxml.html.clean.autolink_html" class="py-name" href="#" onclick="return doclink('link-372', 'autolink_html', 'link-372');">autolink_html</a></tt><tt class="py-op">.</tt><tt id="link-373" class="py-name" targets="Variable lxml.html.ElementSoup.__doc__=lxml.html.ElementSoup-module.html#__doc__"><a title="lxml.html.ElementSoup.__doc__" class="py-name" href="#" onclick="return doclink('link-373', '__doc__', 'link-373');">__doc__</a></tt> <tt class="py-op">=</tt> <tt id="link-374" class="py-name"><a title="lxml.html.clean.autolink" class="py-name" href="#" onclick="return doclink('link-374', 'autolink', 'link-306');">autolink</a></tt><tt class="py-op">.</tt><tt id="link-375" class="py-name"><a title="lxml.html.ElementSoup.__doc__" class="py-name" href="#" onclick="return doclink('link-375', '__doc__', 'link-373');">__doc__</a></tt> </tt>
<a name="L660"></a><tt class="py-lineno">660</tt>  <tt class="py-line"> </tt>
<a name="L661"></a><tt class="py-lineno">661</tt>  <tt class="py-line"><tt class="py-comment">############################################################</tt> </tt>
<a name="L662"></a><tt class="py-lineno">662</tt>  <tt class="py-line"><tt class="py-comment">## Word wrapping</tt> </tt>
<a name="L663"></a><tt class="py-lineno">663</tt>  <tt class="py-line"><tt class="py-comment">############################################################</tt> </tt>
<a name="L664"></a><tt class="py-lineno">664</tt>  <tt class="py-line"> </tt>
<a name="L665"></a><tt class="py-lineno">665</tt>  <tt class="py-line"><tt id="link-376" class="py-name" targets="Variable lxml.html.clean._avoid_word_break_elements=lxml.html.clean-module.html#_avoid_word_break_elements"><a title="lxml.html.clean._avoid_word_break_elements" class="py-name" href="#" onclick="return doclink('link-376', '_avoid_word_break_elements', 'link-376');">_avoid_word_break_elements</a></tt> <tt class="py-op">=</tt> <tt class="py-op">[</tt><tt class="py-string">'pre'</tt><tt class="py-op">,</tt> <tt class="py-string">'textarea'</tt><tt class="py-op">,</tt> <tt class="py-string">'code'</tt><tt class="py-op">]</tt> </tt>
<a name="L666"></a><tt class="py-lineno">666</tt>  <tt class="py-line"><tt id="link-377" class="py-name" targets="Variable lxml.html.clean._avoid_word_break_classes=lxml.html.clean-module.html#_avoid_word_break_classes"><a title="lxml.html.clean._avoid_word_break_classes" class="py-name" href="#" onclick="return doclink('link-377', '_avoid_word_break_classes', 'link-377');">_avoid_word_break_classes</a></tt> <tt class="py-op">=</tt> <tt class="py-op">[</tt><tt class="py-string">'nobreak'</tt><tt class="py-op">]</tt> </tt>
<a name="L667"></a><tt class="py-lineno">667</tt>  <tt class="py-line"> </tt>
<a name="word_break"></a><div id="word_break-def"><a name="L668"></a><tt class="py-lineno">668</tt> <a class="py-toggle" href="#" id="word_break-toggle" onclick="return toggle('word_break');">-</a><tt class="py-line"><tt class="py-keyword">def</tt> <a class="py-def-name" href="lxml.html.clean-module.html#word_break">word_break</a><tt class="py-op">(</tt><tt class="py-param">el</tt><tt class="py-op">,</tt> <tt class="py-param">max_width</tt><tt class="py-op">=</tt><tt class="py-number">40</tt><tt class="py-op">,</tt> </tt>
<a name="L669"></a><tt class="py-lineno">669</tt>  <tt class="py-line">               <tt class="py-param">avoid_elements</tt><tt class="py-op">=</tt><tt id="link-378" class="py-name"><a title="lxml.html.clean._avoid_word_break_elements" class="py-name" href="#" onclick="return doclink('link-378', '_avoid_word_break_elements', 'link-376');">_avoid_word_break_elements</a></tt><tt class="py-op">,</tt> </tt>
<a name="L670"></a><tt class="py-lineno">670</tt>  <tt class="py-line">               <tt class="py-param">avoid_classes</tt><tt class="py-op">=</tt><tt id="link-379" class="py-name"><a title="lxml.html.clean._avoid_word_break_classes" class="py-name" href="#" onclick="return doclink('link-379', '_avoid_word_break_classes', 'link-377');">_avoid_word_break_classes</a></tt><tt class="py-op">,</tt> </tt>
<a name="L671"></a><tt class="py-lineno">671</tt>  <tt class="py-line">               <tt class="py-param">break_character</tt><tt class="py-op">=</tt><tt id="link-380" class="py-name"><a title="lxml.html.clean.unichr" class="py-name" href="#" onclick="return doclink('link-380', 'unichr', 'link-13');">unichr</a></tt><tt class="py-op">(</tt><tt class="py-number">0x200b</tt><tt class="py-op">)</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
</div><div id="word_break-collapsed" style="display:none;" pad="+++" indent="++++"></div><div id="word_break-expanded"><a name="L672"></a><tt class="py-lineno">672</tt>  <tt class="py-line">    <tt class="py-docstring">"""</tt> </tt>
<a name="L673"></a><tt class="py-lineno">673</tt>  <tt class="py-line"><tt class="py-docstring">    Breaks any long words found in the body of the text (not attributes).</tt> </tt>
<a name="L674"></a><tt class="py-lineno">674</tt>  <tt class="py-line"><tt class="py-docstring"></tt> </tt>
<a name="L675"></a><tt class="py-lineno">675</tt>  <tt class="py-line"><tt class="py-docstring">    Doesn't effect any of the tags in avoid_elements, by default</tt> </tt>
<a name="L676"></a><tt class="py-lineno">676</tt>  <tt class="py-line"><tt class="py-docstring">    ``&lt;textarea&gt;`` and ``&lt;pre&gt;``</tt> </tt>
<a name="L677"></a><tt class="py-lineno">677</tt>  <tt class="py-line"><tt class="py-docstring"></tt> </tt>
<a name="L678"></a><tt class="py-lineno">678</tt>  <tt class="py-line"><tt class="py-docstring">    Breaks words by inserting &amp;#8203;, which is a unicode character</tt> </tt>
<a name="L679"></a><tt class="py-lineno">679</tt>  <tt class="py-line"><tt class="py-docstring">    for Zero Width Space character.  This generally takes up no space</tt> </tt>
<a name="L680"></a><tt class="py-lineno">680</tt>  <tt class="py-line"><tt class="py-docstring">    in rendering, but does copy as a space, and in monospace contexts</tt> </tt>
<a name="L681"></a><tt class="py-lineno">681</tt>  <tt class="py-line"><tt class="py-docstring">    usually takes up space.</tt> </tt>
<a name="L682"></a><tt class="py-lineno">682</tt>  <tt class="py-line"><tt class="py-docstring"></tt> </tt>
<a name="L683"></a><tt class="py-lineno">683</tt>  <tt class="py-line"><tt class="py-docstring">    See http://www.cs.tut.fi/~jkorpela/html/nobr.html for a discussion</tt> </tt>
<a name="L684"></a><tt class="py-lineno">684</tt>  <tt class="py-line"><tt class="py-docstring">    """</tt> </tt>
<a name="L685"></a><tt class="py-lineno">685</tt>  <tt class="py-line">    <tt class="py-comment"># Character suggestion of &amp;#8203 comes from:</tt> </tt>
<a name="L686"></a><tt class="py-lineno">686</tt>  <tt class="py-line">    <tt class="py-comment">#   http://www.cs.tut.fi/~jkorpela/html/nobr.html</tt> </tt>
<a name="L687"></a><tt class="py-lineno">687</tt>  <tt class="py-line">    <tt class="py-keyword">if</tt> <tt class="py-name">el</tt><tt class="py-op">.</tt><tt id="link-381" class="py-name"><a title="lxml.etree._Comment.tag
lxml.etree._Element.tag
lxml.etree._Entity.tag
lxml.etree._ProcessingInstruction.tag
lxml.tests.test_xpathevaluator.tag
xml.etree.ElementTree.Element.tag" class="py-name" href="#" onclick="return doclink('link-381', 'tag', 'link-84');">tag</a></tt> <tt class="py-keyword">in</tt> <tt id="link-382" class="py-name"><a title="lxml.html.clean._avoid_word_break_elements" class="py-name" href="#" onclick="return doclink('link-382', '_avoid_word_break_elements', 'link-376');">_avoid_word_break_elements</a></tt><tt class="py-op">:</tt> </tt>
<a name="L688"></a><tt class="py-lineno">688</tt>  <tt class="py-line">        <tt class="py-keyword">return</tt> </tt>
<a name="L689"></a><tt class="py-lineno">689</tt>  <tt class="py-line">    <tt class="py-name">class_name</tt> <tt class="py-op">=</tt> <tt class="py-name">el</tt><tt class="py-op">.</tt><tt id="link-383" class="py-name"><a title="lxml.etree._Attrib.get
lxml.etree._Element.get
lxml.etree._IDDict.get
lxml.etree._ProcessingInstruction.get" class="py-name" href="#" onclick="return doclink('link-383', 'get', 'link-130');">get</a></tt><tt class="py-op">(</tt><tt class="py-string">'class'</tt><tt class="py-op">)</tt> </tt>
<a name="L690"></a><tt class="py-lineno">690</tt>  <tt class="py-line">    <tt class="py-keyword">if</tt> <tt class="py-name">class_name</tt><tt class="py-op">:</tt> </tt>
<a name="L691"></a><tt class="py-lineno">691</tt>  <tt class="py-line">        <tt class="py-name">dont_break</tt> <tt class="py-op">=</tt> <tt class="py-name">False</tt> </tt>
<a name="L692"></a><tt class="py-lineno">692</tt>  <tt class="py-line">        <tt class="py-name">class_name</tt> <tt class="py-op">=</tt> <tt class="py-name">class_name</tt><tt class="py-op">.</tt><tt class="py-name">split</tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
<a name="L693"></a><tt class="py-lineno">693</tt>  <tt class="py-line">        <tt class="py-keyword">for</tt> <tt class="py-name">avoid</tt> <tt class="py-keyword">in</tt> <tt class="py-name">avoid_classes</tt><tt class="py-op">:</tt> </tt>
<a name="L694"></a><tt class="py-lineno">694</tt>  <tt class="py-line">            <tt class="py-keyword">if</tt> <tt class="py-name">avoid</tt> <tt class="py-keyword">in</tt> <tt class="py-name">class_name</tt><tt class="py-op">:</tt> </tt>
<a name="L695"></a><tt class="py-lineno">695</tt>  <tt class="py-line">                <tt class="py-name">dont_break</tt> <tt class="py-op">=</tt> <tt class="py-name">True</tt> </tt>
<a name="L696"></a><tt class="py-lineno">696</tt>  <tt class="py-line">                <tt class="py-keyword">break</tt> </tt>
<a name="L697"></a><tt class="py-lineno">697</tt>  <tt class="py-line">        <tt class="py-keyword">if</tt> <tt class="py-name">dont_break</tt><tt class="py-op">:</tt> </tt>
<a name="L698"></a><tt class="py-lineno">698</tt>  <tt class="py-line">            <tt class="py-keyword">return</tt> </tt>
<a name="L699"></a><tt class="py-lineno">699</tt>  <tt class="py-line">    <tt class="py-keyword">if</tt> <tt class="py-name">el</tt><tt class="py-op">.</tt><tt id="link-384" class="py-name"><a title="lxml.etree.QName.text
lxml.etree._Element.text
lxml.etree._Entity.text
lxml.objectify.ObjectifiedElement.text
xml.etree.ElementTree.Element.text" class="py-name" href="#" onclick="return doclink('link-384', 'text', 'link-141');">text</a></tt><tt class="py-op">:</tt> </tt>
<a name="L700"></a><tt class="py-lineno">700</tt>  <tt class="py-line">        <tt class="py-name">el</tt><tt class="py-op">.</tt><tt id="link-385" class="py-name"><a title="lxml.etree.QName.text
lxml.etree._Element.text
lxml.etree._Entity.text
lxml.objectify.ObjectifiedElement.text
xml.etree.ElementTree.Element.text" class="py-name" href="#" onclick="return doclink('link-385', 'text', 'link-141');">text</a></tt> <tt class="py-op">=</tt> <tt id="link-386" class="py-name" targets="Function lxml.html.clean._break_text()=lxml.html.clean-module.html#_break_text"><a title="lxml.html.clean._break_text" class="py-name" href="#" onclick="return doclink('link-386', '_break_text', 'link-386');">_break_text</a></tt><tt class="py-op">(</tt><tt class="py-name">el</tt><tt class="py-op">.</tt><tt id="link-387" class="py-name"><a title="lxml.etree.QName.text
lxml.etree._Element.text
lxml.etree._Entity.text
lxml.objectify.ObjectifiedElement.text
xml.etree.ElementTree.Element.text" class="py-name" href="#" onclick="return doclink('link-387', 'text', 'link-141');">text</a></tt><tt class="py-op">,</tt> <tt class="py-name">max_width</tt><tt class="py-op">,</tt> <tt class="py-name">break_character</tt><tt class="py-op">)</tt> </tt>
<a name="L701"></a><tt class="py-lineno">701</tt>  <tt class="py-line">    <tt class="py-keyword">for</tt> <tt class="py-name">child</tt> <tt class="py-keyword">in</tt> <tt class="py-name">el</tt><tt class="py-op">:</tt> </tt>
<a name="L702"></a><tt class="py-lineno">702</tt>  <tt class="py-line">        <tt id="link-388" class="py-name" targets="Function lxml.html.clean.word_break()=lxml.html.clean-module.html#word_break"><a title="lxml.html.clean.word_break" class="py-name" href="#" onclick="return doclink('link-388', 'word_break', 'link-388');">word_break</a></tt><tt class="py-op">(</tt><tt class="py-name">child</tt><tt class="py-op">,</tt> <tt class="py-name">max_width</tt><tt class="py-op">=</tt><tt class="py-name">max_width</tt><tt class="py-op">,</tt> </tt>
<a name="L703"></a><tt class="py-lineno">703</tt>  <tt class="py-line">                   <tt class="py-name">avoid_elements</tt><tt class="py-op">=</tt><tt class="py-name">avoid_elements</tt><tt class="py-op">,</tt> </tt>
<a name="L704"></a><tt class="py-lineno">704</tt>  <tt class="py-line">                   <tt class="py-name">avoid_classes</tt><tt class="py-op">=</tt><tt class="py-name">avoid_classes</tt><tt class="py-op">,</tt> </tt>
<a name="L705"></a><tt class="py-lineno">705</tt>  <tt class="py-line">                   <tt class="py-name">break_character</tt><tt class="py-op">=</tt><tt class="py-name">break_character</tt><tt class="py-op">)</tt> </tt>
<a name="L706"></a><tt class="py-lineno">706</tt>  <tt class="py-line">        <tt class="py-keyword">if</tt> <tt class="py-name">child</tt><tt class="py-op">.</tt><tt id="link-389" class="py-name"><a title="lxml.etree._Element.tail
xml.etree.ElementTree.Element.tail" class="py-name" href="#" onclick="return doclink('link-389', 'tail', 'link-307');">tail</a></tt><tt class="py-op">:</tt> </tt>
<a name="L707"></a><tt class="py-lineno">707</tt>  <tt class="py-line">            <tt class="py-name">child</tt><tt class="py-op">.</tt><tt id="link-390" class="py-name"><a title="lxml.etree._Element.tail
xml.etree.ElementTree.Element.tail" class="py-name" href="#" onclick="return doclink('link-390', 'tail', 'link-307');">tail</a></tt> <tt class="py-op">=</tt> <tt id="link-391" class="py-name"><a title="lxml.html.clean._break_text" class="py-name" href="#" onclick="return doclink('link-391', '_break_text', 'link-386');">_break_text</a></tt><tt class="py-op">(</tt><tt class="py-name">child</tt><tt class="py-op">.</tt><tt id="link-392" class="py-name"><a title="lxml.etree._Element.tail
xml.etree.ElementTree.Element.tail" class="py-name" href="#" onclick="return doclink('link-392', 'tail', 'link-307');">tail</a></tt><tt class="py-op">,</tt> <tt class="py-name">max_width</tt><tt class="py-op">,</tt> <tt class="py-name">break_character</tt><tt class="py-op">)</tt> </tt>
</div><a name="L708"></a><tt class="py-lineno">708</tt>  <tt class="py-line"> </tt>
<a name="word_break_html"></a><div id="word_break_html-def"><a name="L709"></a><tt class="py-lineno">709</tt> <a class="py-toggle" href="#" id="word_break_html-toggle" onclick="return toggle('word_break_html');">-</a><tt class="py-line"><tt class="py-keyword">def</tt> <a class="py-def-name" href="lxml.html.clean-module.html#word_break_html">word_break_html</a><tt class="py-op">(</tt><tt class="py-param">html</tt><tt class="py-op">,</tt> <tt class="py-op">*</tt><tt class="py-param">args</tt><tt class="py-op">,</tt> <tt class="py-op">**</tt><tt class="py-param">kw</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
</div><div id="word_break_html-collapsed" style="display:none;" pad="+++" indent="++++"></div><div id="word_break_html-expanded"><a name="L710"></a><tt class="py-lineno">710</tt>  <tt class="py-line">    <tt class="py-name">result_type</tt> <tt class="py-op">=</tt> <tt id="link-393" class="py-name"><a title="lxml.etree._LogEntry.type
lxml.html.InputElement.type" class="py-name" href="#" onclick="return doclink('link-393', 'type', 'link-279');">type</a></tt><tt class="py-op">(</tt><tt id="link-394" class="py-name"><a title="lxml.html
lxml.html.diff.href_token.html
lxml.html.diff.tag_token.html
lxml.html.diff.token.html" class="py-name" href="#" onclick="return doclink('link-394', 'html', 'link-5');">html</a></tt><tt class="py-op">)</tt> </tt>
<a name="L711"></a><tt class="py-lineno">711</tt>  <tt class="py-line">    <tt class="py-name">doc</tt> <tt class="py-op">=</tt> <tt id="link-395" class="py-name"><a title="lxml.etree.fromstring
lxml.html.html5parser.fromstring
lxml.html.soupparser.fromstring
lxml.objectify.fromstring" class="py-name" href="#" onclick="return doclink('link-395', 'fromstring', 'link-9');">fromstring</a></tt><tt class="py-op">(</tt><tt id="link-396" class="py-name"><a title="lxml.html
lxml.html.diff.href_token.html
lxml.html.diff.tag_token.html
lxml.html.diff.token.html" class="py-name" href="#" onclick="return doclink('link-396', 'html', 'link-5');">html</a></tt><tt class="py-op">)</tt> </tt>
<a name="L712"></a><tt class="py-lineno">712</tt>  <tt class="py-line">    <tt id="link-397" class="py-name"><a title="lxml.html.clean.word_break" class="py-name" href="#" onclick="return doclink('link-397', 'word_break', 'link-388');">word_break</a></tt><tt class="py-op">(</tt><tt class="py-name">doc</tt><tt class="py-op">,</tt> <tt class="py-op">*</tt><tt class="py-name">args</tt><tt class="py-op">,</tt> <tt class="py-op">**</tt><tt class="py-name">kw</tt><tt class="py-op">)</tt> </tt>
<a name="L713"></a><tt class="py-lineno">713</tt>  <tt class="py-line">    <tt class="py-keyword">return</tt> <tt class="py-name">_transform_result</tt><tt class="py-op">(</tt><tt class="py-name">result_type</tt><tt class="py-op">,</tt> <tt class="py-name">doc</tt><tt class="py-op">)</tt> </tt>
</div><a name="L714"></a><tt class="py-lineno">714</tt>  <tt class="py-line"> </tt>
<a name="_break_text"></a><div id="_break_text-def"><a name="L715"></a><tt class="py-lineno">715</tt> <a class="py-toggle" href="#" id="_break_text-toggle" onclick="return toggle('_break_text');">-</a><tt class="py-line"><tt class="py-keyword">def</tt> <a class="py-def-name" href="lxml.html.clean-module.html#_break_text">_break_text</a><tt class="py-op">(</tt><tt class="py-param">text</tt><tt class="py-op">,</tt> <tt class="py-param">max_width</tt><tt class="py-op">,</tt> <tt class="py-param">break_character</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
</div><div id="_break_text-collapsed" style="display:none;" pad="+++" indent="++++"></div><div id="_break_text-expanded"><a name="L716"></a><tt class="py-lineno">716</tt>  <tt class="py-line">    <tt class="py-name">words</tt> <tt class="py-op">=</tt> <tt id="link-398" class="py-name"><a title="lxml.etree.QName.text
lxml.etree._Element.text
lxml.etree._Entity.text
lxml.objectify.ObjectifiedElement.text
xml.etree.ElementTree.Element.text" class="py-name" href="#" onclick="return doclink('link-398', 'text', 'link-141');">text</a></tt><tt class="py-op">.</tt><tt class="py-name">split</tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
<a name="L717"></a><tt class="py-lineno">717</tt>  <tt class="py-line">    <tt class="py-keyword">for</tt> <tt class="py-name">word</tt> <tt class="py-keyword">in</tt> <tt class="py-name">words</tt><tt class="py-op">:</tt> </tt>
<a name="L718"></a><tt class="py-lineno">718</tt>  <tt class="py-line">        <tt class="py-keyword">if</tt> <tt class="py-name">len</tt><tt class="py-op">(</tt><tt class="py-name">word</tt><tt class="py-op">)</tt> <tt class="py-op">&gt;</tt> <tt class="py-name">max_width</tt><tt class="py-op">:</tt> </tt>
<a name="L719"></a><tt class="py-lineno">719</tt>  <tt class="py-line">            <tt class="py-name">replacement</tt> <tt class="py-op">=</tt> <tt id="link-399" class="py-name" targets="Function lxml.html.clean._insert_break()=lxml.html.clean-module.html#_insert_break"><a title="lxml.html.clean._insert_break" class="py-name" href="#" onclick="return doclink('link-399', '_insert_break', 'link-399');">_insert_break</a></tt><tt class="py-op">(</tt><tt class="py-name">word</tt><tt class="py-op">,</tt> <tt class="py-name">max_width</tt><tt class="py-op">,</tt> <tt class="py-name">break_character</tt><tt class="py-op">)</tt> </tt>
<a name="L720"></a><tt class="py-lineno">720</tt>  <tt class="py-line">            <tt id="link-400" class="py-name"><a title="lxml.etree.QName.text
lxml.etree._Element.text
lxml.etree._Entity.text
lxml.objectify.ObjectifiedElement.text
xml.etree.ElementTree.Element.text" class="py-name" href="#" onclick="return doclink('link-400', 'text', 'link-141');">text</a></tt> <tt class="py-op">=</tt> <tt id="link-401" class="py-name"><a title="lxml.etree.QName.text
lxml.etree._Element.text
lxml.etree._Entity.text
lxml.objectify.ObjectifiedElement.text
xml.etree.ElementTree.Element.text" class="py-name" href="#" onclick="return doclink('link-401', 'text', 'link-141');">text</a></tt><tt class="py-op">.</tt><tt id="link-402" class="py-name"><a title="lxml.etree._Element.replace" class="py-name" href="#" onclick="return doclink('link-402', 'replace', 'link-271');">replace</a></tt><tt class="py-op">(</tt><tt class="py-name">word</tt><tt class="py-op">,</tt> <tt class="py-name">replacement</tt><tt class="py-op">)</tt> </tt>
<a name="L721"></a><tt class="py-lineno">721</tt>  <tt class="py-line">    <tt class="py-keyword">return</tt> <tt id="link-403" class="py-name"><a title="lxml.etree.QName.text
lxml.etree._Element.text
lxml.etree._Entity.text
lxml.objectify.ObjectifiedElement.text
xml.etree.ElementTree.Element.text" class="py-name" href="#" onclick="return doclink('link-403', 'text', 'link-141');">text</a></tt> </tt>
</div><a name="L722"></a><tt class="py-lineno">722</tt>  <tt class="py-line"> </tt>
<a name="L723"></a><tt class="py-lineno">723</tt>  <tt class="py-line"><tt id="link-404" class="py-name" targets="Variable lxml.html.clean._break_prefer_re=lxml.html.clean-module.html#_break_prefer_re"><a title="lxml.html.clean._break_prefer_re" class="py-name" href="#" onclick="return doclink('link-404', '_break_prefer_re', 'link-404');">_break_prefer_re</a></tt> <tt class="py-op">=</tt> <tt class="py-name">re</tt><tt class="py-op">.</tt><tt class="py-name">compile</tt><tt class="py-op">(</tt><tt class="py-string">r'[^a-z]'</tt><tt class="py-op">,</tt> <tt class="py-name">re</tt><tt class="py-op">.</tt><tt id="link-405" class="py-name"><a title="lxml.html.builder.I" class="py-name" href="#" onclick="return doclink('link-405', 'I', 'link-22');">I</a></tt><tt class="py-op">)</tt> </tt>
<a name="L724"></a><tt class="py-lineno">724</tt>  <tt class="py-line"> </tt>
<a name="_insert_break"></a><div id="_insert_break-def"><a name="L725"></a><tt class="py-lineno">725</tt> <a class="py-toggle" href="#" id="_insert_break-toggle" onclick="return toggle('_insert_break');">-</a><tt class="py-line"><tt class="py-keyword">def</tt> <a class="py-def-name" href="lxml.html.clean-module.html#_insert_break">_insert_break</a><tt class="py-op">(</tt><tt class="py-param">word</tt><tt class="py-op">,</tt> <tt class="py-param">width</tt><tt class="py-op">,</tt> <tt class="py-param">break_character</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
</div><div id="_insert_break-collapsed" style="display:none;" pad="+++" indent="++++"></div><div id="_insert_break-expanded"><a name="L726"></a><tt class="py-lineno">726</tt>  <tt class="py-line">    <tt class="py-name">orig_word</tt> <tt class="py-op">=</tt> <tt class="py-name">word</tt> </tt>
<a name="L727"></a><tt class="py-lineno">727</tt>  <tt class="py-line">    <tt class="py-name">result</tt> <tt class="py-op">=</tt> <tt class="py-string">''</tt> </tt>
<a name="L728"></a><tt class="py-lineno">728</tt>  <tt class="py-line">    <tt class="py-keyword">while</tt> <tt class="py-name">len</tt><tt class="py-op">(</tt><tt class="py-name">word</tt><tt class="py-op">)</tt> <tt class="py-op">&gt;</tt> <tt class="py-name">width</tt><tt class="py-op">:</tt> </tt>
<a name="L729"></a><tt class="py-lineno">729</tt>  <tt class="py-line">        <tt id="link-406" class="py-name"><a title="lxml.etree.TreeBuilder.start" class="py-name" href="#" onclick="return doclink('link-406', 'start', 'link-328');">start</a></tt> <tt class="py-op">=</tt> <tt class="py-name">word</tt><tt class="py-op">[</tt><tt class="py-op">:</tt><tt class="py-name">width</tt><tt class="py-op">]</tt> </tt>
<a name="L730"></a><tt class="py-lineno">730</tt>  <tt class="py-line">        <tt class="py-name">breaks</tt> <tt class="py-op">=</tt> <tt class="py-name">list</tt><tt class="py-op">(</tt><tt id="link-407" class="py-name"><a title="lxml.html.clean._break_prefer_re" class="py-name" href="#" onclick="return doclink('link-407', '_break_prefer_re', 'link-404');">_break_prefer_re</a></tt><tt class="py-op">.</tt><tt class="py-name">finditer</tt><tt class="py-op">(</tt><tt id="link-408" class="py-name"><a title="lxml.etree.TreeBuilder.start" class="py-name" href="#" onclick="return doclink('link-408', 'start', 'link-328');">start</a></tt><tt class="py-op">)</tt><tt class="py-op">)</tt> </tt>
<a name="L731"></a><tt class="py-lineno">731</tt>  <tt class="py-line">        <tt class="py-keyword">if</tt> <tt class="py-name">breaks</tt><tt class="py-op">:</tt> </tt>
<a name="L732"></a><tt class="py-lineno">732</tt>  <tt class="py-line">            <tt class="py-name">last_break</tt> <tt class="py-op">=</tt> <tt class="py-name">breaks</tt><tt class="py-op">[</tt><tt class="py-op">-</tt><tt class="py-number">1</tt><tt class="py-op">]</tt> </tt>
<a name="L733"></a><tt class="py-lineno">733</tt>  <tt class="py-line">            <tt class="py-comment"># Only walk back up to 10 characters to find a nice break:</tt> </tt>
<a name="L734"></a><tt class="py-lineno">734</tt>  <tt class="py-line">            <tt class="py-keyword">if</tt> <tt class="py-name">last_break</tt><tt class="py-op">.</tt><tt id="link-409" class="py-name"><a title="lxml.etree.TreeBuilder.end" class="py-name" href="#" onclick="return doclink('link-409', 'end', 'link-327');">end</a></tt><tt class="py-op">(</tt><tt class="py-op">)</tt> <tt class="py-op">&gt;</tt> <tt class="py-name">width</tt><tt class="py-op">-</tt><tt class="py-number">10</tt><tt class="py-op">:</tt> </tt>
<a name="L735"></a><tt class="py-lineno">735</tt>  <tt class="py-line">                <tt class="py-comment"># FIXME: should the break character be at the end of the</tt> </tt>
<a name="L736"></a><tt class="py-lineno">736</tt>  <tt class="py-line">                <tt class="py-comment"># chunk, or the beginning of the next chunk?</tt> </tt>
<a name="L737"></a><tt class="py-lineno">737</tt>  <tt class="py-line">                <tt id="link-410" class="py-name"><a title="lxml.etree.TreeBuilder.start" class="py-name" href="#" onclick="return doclink('link-410', 'start', 'link-328');">start</a></tt> <tt class="py-op">=</tt> <tt class="py-name">word</tt><tt class="py-op">[</tt><tt class="py-op">:</tt><tt class="py-name">last_break</tt><tt class="py-op">.</tt><tt id="link-411" class="py-name"><a title="lxml.etree.TreeBuilder.end" class="py-name" href="#" onclick="return doclink('link-411', 'end', 'link-327');">end</a></tt><tt class="py-op">(</tt><tt class="py-op">)</tt><tt class="py-op">]</tt> </tt>
<a name="L738"></a><tt class="py-lineno">738</tt>  <tt class="py-line">        <tt class="py-name">result</tt> <tt class="py-op">+=</tt> <tt id="link-412" class="py-name"><a title="lxml.etree.TreeBuilder.start" class="py-name" href="#" onclick="return doclink('link-412', 'start', 'link-328');">start</a></tt> <tt class="py-op">+</tt> <tt class="py-name">break_character</tt> </tt>
<a name="L739"></a><tt class="py-lineno">739</tt>  <tt class="py-line">        <tt class="py-name">word</tt> <tt class="py-op">=</tt> <tt class="py-name">word</tt><tt class="py-op">[</tt><tt class="py-name">len</tt><tt class="py-op">(</tt><tt id="link-413" class="py-name"><a title="lxml.etree.TreeBuilder.start" class="py-name" href="#" onclick="return doclink('link-413', 'start', 'link-328');">start</a></tt><tt class="py-op">)</tt><tt class="py-op">:</tt><tt class="py-op">]</tt> </tt>
<a name="L740"></a><tt class="py-lineno">740</tt>  <tt class="py-line">    <tt class="py-name">result</tt> <tt class="py-op">+=</tt> <tt class="py-name">word</tt> </tt>
<a name="L741"></a><tt class="py-lineno">741</tt>  <tt class="py-line">    <tt class="py-keyword">return</tt> <tt class="py-name">result</tt> </tt>
</div><a name="L742"></a><tt class="py-lineno">742</tt>  <tt class="py-line"> </tt><script type="text/javascript">
<!--
expandto(location.href);
// -->
</script>
</pre>
<br />
<!-- ==================== NAVIGATION BAR ==================== -->
<table class="navbar" border="0" width="100%" cellpadding="0"
       bgcolor="#a0c0ff" cellspacing="0">
  <tr valign="middle">
  <!-- Home link -->
      <th>&nbsp;&nbsp;&nbsp;<a
        href="lxml-module.html">Home</a>&nbsp;&nbsp;&nbsp;</th>

  <!-- Tree link -->
      <th>&nbsp;&nbsp;&nbsp;<a
        href="module-tree.html">Trees</a>&nbsp;&nbsp;&nbsp;</th>

  <!-- Index link -->
      <th>&nbsp;&nbsp;&nbsp;<a
        href="identifier-index.html">Indices</a>&nbsp;&nbsp;&nbsp;</th>

  <!-- Help link -->
      <th>&nbsp;&nbsp;&nbsp;<a
        href="help.html">Help</a>&nbsp;&nbsp;&nbsp;</th>

  <!-- Project homepage -->
      <th class="navbar" align="right" width="100%">
        <table border="0" cellpadding="0" cellspacing="0">
          <tr><th class="navbar" align="center"
            ><a class="navbar" target="_top" href="/">lxml API</a></th>
          </tr></table></th>
  </tr>
</table>
<table border="0" cellpadding="0" cellspacing="0" width="100%%">
  <tr>
    <td align="left" class="footer">
    Generated by Epydoc 3.0.1
    on Sun Sep  9 15:24:18 2018
    </td>
    <td align="right" class="footer">
      <a target="mainFrame" href="http://epydoc.sourceforge.net"
        >http://epydoc.sourceforge.net</a>
    </td>
  </tr>
</table>

<script type="text/javascript">
  <!--
  // Private objects are initially displayed (because if
  // javascript is turned off then we want them to be
  // visible); but by default, we want to hide them.  So hide
  // them unless we have a cookie that says to show them.
  checkCookie();
  // -->
</script>
</body>
</html>