Sophie

Sophie

distrib > Mandriva > 2008.1 > x86_64 > by-pkgid > 98d6b53e11e983e268c3e19f14e091a0 > files > 329

kdepim-devel-doc-3.5.9-9mdv2008.1.x86_64.rpm

<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" lang="en_US" xml:lang="en_US">

<head>
  <title>akregator/src/librss: feeddetector.cpp Source File (akregator/src/librss)</title>
  <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />

  <meta http-equiv="Content-Style-Type" content="text/css" />

  <meta http-equiv="pics-label" content='(pics-1.1 "http://www.icra.org/ratingsv02.html" comment "ICRAonline DE v2.0" l gen true for "http://www.kde.org"  r (nz 1 vz 1 lz 1 oz 1 cb 1) "http://www.rsac.org/ratingsv01.html" l gen true for "http://www.kde.org"  r (n 0 s 0 v 0 l 0))' />

  <meta name="trademark" content="KDE e.V." />
  <meta name="description" content="K Desktop Environment Homepage, KDE.org" />
  <meta name="MSSmartTagsPreventParsing" content="true" />
  <meta name="robots" content="all" />

  <link rel="shortcut icon" href="../../../../favicon.ico" />

<link rel="stylesheet" media="screen" type="text/css" title="APIDOX" href="doxygen.css" />



<style type="text/css">
<!--
hr { display: none; }
#content h2 { margin-left: 0px; }
table.mdTable { background-color: #f8f8f8; border: .2em solid #d7d7d7; }
td.mdRow { padding: 8px 20px; }
td.md { font-weight: bold; }
td.mdname1 { font-weight: bold; color: #602020; }
td.mdname { font-weight: bold; color: #602020;  }

-->
</style>

</head>

<body>

<div id="nav_header_top" align="right">
  <a href="#content" class="doNotDisplay" accesskey="2">Skip to main content ::</a>

  <a href="../../../.."><img id="nav_header_logo" alt="Home" align="left" src="../../../../kde_gear_64.png" border="0" /></a>
  <span class="doNotDisplay">::</span>
  <img id="nav_header_logo_right" alt="" align="right" src="../../../../pimlogo.png" border="0" />

  <div id="nav_header_title" align="left">KDE PIM API Reference</div>


</div>

<div id="nav_header_bottom" align="right">
  <span class="doNotDisplay">:: <a href="#navigation" accesskey="5">Skip to Link Menu</a><br/></span>
  <div id="nav_header_bottom_left" style="text-align: left;">
/ <a href="../../../../">API Reference</a>
 / <a href="../../../html/index.html">akregator</a> / <a href="../../html/index.html">src</a> / <a href=".">librss</a>
  </div>
</div>


<table id="main" border="0" cellpadding="0" cellspacing="0" width="100%">
<tr>
      <td valign="top" class="menuheader" height="0"></td>

  <td id="contentcolumn" valign="top" rowspan="2" >
    <div id="content" style="padding-top: 0px;"><div style="width:100%; margin: 0px; padding: 0px;">
    <h2><a name="content"></a>akregator/src/librss</h2>


<!-- Generated by Doxygen 1.5.5 -->
<h1>feeddetector.cpp</h1><div class="fragment"><pre class="fragment"><a name="l00001"></a>00001 <span class="comment">/*</span>
<a name="l00002"></a>00002 <span class="comment">    This file is part of Akregator.</span>
<a name="l00003"></a>00003 <span class="comment"></span>
<a name="l00004"></a>00004 <span class="comment">    Copyright (C) 2004 Teemu Rytilahti &lt;tpr@d5k.net&gt;</span>
<a name="l00005"></a>00005 <span class="comment"></span>
<a name="l00006"></a>00006 <span class="comment">    This program is free software; you can redistribute it and/or modify</span>
<a name="l00007"></a>00007 <span class="comment">    it under the terms of the GNU General Public License as published by</span>
<a name="l00008"></a>00008 <span class="comment">    the Free Software Foundation; either version 2 of the License, or</span>
<a name="l00009"></a>00009 <span class="comment">    (at your option) any later version.</span>
<a name="l00010"></a>00010 <span class="comment"></span>
<a name="l00011"></a>00011 <span class="comment">    This program is distributed in the hope that it will be useful,</span>
<a name="l00012"></a>00012 <span class="comment">    but WITHOUT ANY WARRANTY; without even the implied warranty of</span>
<a name="l00013"></a>00013 <span class="comment">    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the</span>
<a name="l00014"></a>00014 <span class="comment">    GNU General Public License for more details.</span>
<a name="l00015"></a>00015 <span class="comment"></span>
<a name="l00016"></a>00016 <span class="comment">    You should have received a copy of the GNU General Public License</span>
<a name="l00017"></a>00017 <span class="comment">    along with this program; if not, write to the Free Software</span>
<a name="l00018"></a>00018 <span class="comment">    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.</span>
<a name="l00019"></a>00019 <span class="comment"></span>
<a name="l00020"></a>00020 <span class="comment">    As a special exception, permission is given to link this program</span>
<a name="l00021"></a>00021 <span class="comment">    with any edition of Qt, and distribute the resulting executable,</span>
<a name="l00022"></a>00022 <span class="comment">    without including the source code for Qt in the source distribution.</span>
<a name="l00023"></a>00023 <span class="comment">*/</span>
<a name="l00024"></a>00024  
<a name="l00025"></a>00025 <span class="preprocessor">#include &lt;qregexp.h&gt;</span>
<a name="l00026"></a>00026 <span class="preprocessor">#include &lt;qstring.h&gt;</span>
<a name="l00027"></a>00027 <span class="preprocessor">#include &lt;qstringlist.h&gt;</span>
<a name="l00028"></a>00028 <span class="preprocessor">#include &lt;qvaluelist.h&gt;</span>
<a name="l00029"></a>00029 <span class="preprocessor">#include &lt;kcharsets.h&gt;</span>
<a name="l00030"></a>00030 <span class="preprocessor">#include &lt;kurl.h&gt;</span>
<a name="l00031"></a>00031 
<a name="l00032"></a>00032 <span class="preprocessor">#include "feeddetector.h"</span>
<a name="l00033"></a>00033 
<a name="l00034"></a>00034 
<a name="l00035"></a>00035 <span class="keyword">using namespace </span>RSS;
<a name="l00036"></a>00036 
<a name="l00037"></a><a class="code" href="classRSS_1_1FeedDetector.html#24417ee5c9a1fb9abf2ee3dc483f28ac">00037</a> FeedDetectorEntryList FeedDetector::extractFromLinkTags(<span class="keyword">const</span> QString&amp; s)   
<a name="l00038"></a>00038 {
<a name="l00039"></a>00039     <span class="comment">//reduce all sequences of spaces, newlines etc. to one space:</span>
<a name="l00040"></a>00040     QString str = s.simplifyWhiteSpace();
<a name="l00041"></a>00041 
<a name="l00042"></a>00042     <span class="comment">// extracts &lt;link&gt; tags</span>
<a name="l00043"></a>00043     QRegExp reLinkTag(<span class="stringliteral">"&lt;[\\s]?LINK[^&gt;]*REL[\\s]?=[\\s]?\\\"[\\s]?(ALTERNATE|SERVICE\\.FEED)[\\s]?\\\"[^&gt;]*&gt;"</span>, <span class="keyword">false</span>);
<a name="l00044"></a>00044 
<a name="l00045"></a>00045     <span class="comment">// extracts the URL (href="url")</span>
<a name="l00046"></a>00046     QRegExp reHref(<span class="stringliteral">"HREF[\\s]?=[\\s]?\\\"([^\\\"]*)\\\""</span>, <span class="keyword">false</span>);
<a name="l00047"></a>00047     <span class="comment">// extracts type attribute</span>
<a name="l00048"></a>00048     QRegExp reType(<span class="stringliteral">"TYPE[\\s]?=[\\s]?\\\"([^\\\"]*)\\\""</span>, <span class="keyword">false</span>);
<a name="l00049"></a>00049     <span class="comment">// extracts the title (title="title")</span>
<a name="l00050"></a>00050     QRegExp reTitle(<span class="stringliteral">"TITLE[\\s]?=[\\s]?\\\"([^\\\"]*)\\\""</span>, <span class="keyword">false</span>);
<a name="l00051"></a>00051 
<a name="l00052"></a>00052     <span class="keywordtype">int</span> pos = 0;
<a name="l00053"></a>00053     <span class="keywordtype">int</span> matchpos = 0;
<a name="l00054"></a>00054 
<a name="l00055"></a>00055     <span class="comment">// get all &lt;link&gt; tags</span>
<a name="l00056"></a>00056     QStringList linkTags;
<a name="l00057"></a>00057     <span class="comment">//int strlength = str.length();</span>
<a name="l00058"></a>00058     <span class="keywordflow">while</span> ( matchpos != -1 )
<a name="l00059"></a>00059     {
<a name="l00060"></a>00060         matchpos = reLinkTag.search(str, pos);
<a name="l00061"></a>00061         <span class="keywordflow">if</span> (matchpos != -1)
<a name="l00062"></a>00062         {
<a name="l00063"></a>00063             linkTags.append( str.mid(matchpos, reLinkTag.matchedLength()) );
<a name="l00064"></a>00064             pos = matchpos + reLinkTag.matchedLength();
<a name="l00065"></a>00065         }
<a name="l00066"></a>00066     }
<a name="l00067"></a>00067 
<a name="l00068"></a>00068     FeedDetectorEntryList list;
<a name="l00069"></a>00069 
<a name="l00070"></a>00070     <span class="keywordflow">for</span> ( QStringList::Iterator it = linkTags.begin(); it != linkTags.end(); ++it )
<a name="l00071"></a>00071     {
<a name="l00072"></a>00072         QString type;
<a name="l00073"></a>00073         <span class="keywordtype">int</span> pos = reType.search(*it, 0);
<a name="l00074"></a>00074         <span class="keywordflow">if</span> (pos != -1)
<a name="l00075"></a>00075             type = reType.cap(1).lower();
<a name="l00076"></a>00076 
<a name="l00077"></a>00077         <span class="comment">// we accept only type attributes indicating a feed</span>
<a name="l00078"></a>00078         <span class="keywordflow">if</span> ( type != <span class="stringliteral">"application/rss+xml"</span> &amp;&amp; type != <span class="stringliteral">"application/rdf+xml"</span>
<a name="l00079"></a>00079           &amp;&amp; type != <span class="stringliteral">"application/atom+xml"</span> &amp;&amp; type != <span class="stringliteral">"text/xml"</span> )
<a name="l00080"></a>00080             <span class="keywordflow">continue</span>;
<a name="l00081"></a>00081                 
<a name="l00082"></a>00082         QString title;
<a name="l00083"></a>00083         pos = reTitle.search(*it, 0);
<a name="l00084"></a>00084         <span class="keywordflow">if</span> (pos != -1)
<a name="l00085"></a>00085         title = reTitle.cap(1);
<a name="l00086"></a>00086 
<a name="l00087"></a>00087         title = KCharsets::resolveEntities(title);
<a name="l00088"></a>00088 
<a name="l00089"></a>00089         QString url;
<a name="l00090"></a>00090         pos = reHref.search(*it, 0);
<a name="l00091"></a>00091         <span class="keywordflow">if</span> (pos != -1)
<a name="l00092"></a>00092             url = reHref.cap(1);
<a name="l00093"></a>00093 
<a name="l00094"></a>00094         url = KCharsets::resolveEntities(url);
<a name="l00095"></a>00095 
<a name="l00096"></a>00096         <span class="comment">// if feed has no title, use the url as preliminary title (until feed is parsed)</span>
<a name="l00097"></a>00097         <span class="keywordflow">if</span> ( title.isEmpty() )
<a name="l00098"></a>00098             title = url;
<a name="l00099"></a>00099 
<a name="l00100"></a>00100         <span class="keywordflow">if</span> ( !url.isEmpty() )
<a name="l00101"></a>00101             list.append(FeedDetectorEntry(url, title) );        
<a name="l00102"></a>00102     }
<a name="l00103"></a>00103 
<a name="l00104"></a>00104 
<a name="l00105"></a>00105     <span class="keywordflow">return</span> list;
<a name="l00106"></a>00106 }
<a name="l00107"></a>00107 
<a name="l00108"></a><a class="code" href="classRSS_1_1FeedDetector.html#66bc93d78f93d0b6f24947ea5441bb7b">00108</a> QStringList <a class="code" href="classRSS_1_1FeedDetector.html#66bc93d78f93d0b6f24947ea5441bb7b" title="searches an HTML page for slightly feed-like looking links and catches everything...">FeedDetector::extractBruteForce</a>(<span class="keyword">const</span> QString&amp; s)
<a name="l00109"></a>00109 {
<a name="l00110"></a>00110     QString str = s.simplifyWhiteSpace();
<a name="l00111"></a>00111     
<a name="l00112"></a>00112     QRegExp reAhrefTag(<span class="stringliteral">"&lt;[\\s]?A[^&gt;]?HREF=[\\s]?\\\"[^\\\"]*\\\"[^&gt;]*&gt;"</span>, <span class="keyword">false</span>);
<a name="l00113"></a>00113     
<a name="l00114"></a>00114     <span class="comment">// extracts the URL (href="url")</span>
<a name="l00115"></a>00115     QRegExp reHref(<span class="stringliteral">"HREF[\\s]?=[\\s]?\\\"([^\\\"]*)\\\""</span>, <span class="keyword">false</span>);
<a name="l00116"></a>00116 
<a name="l00117"></a>00117     QRegExp rssrdfxml(<span class="stringliteral">".*(RSS|RDF|XML)"</span>, <span class="keyword">false</span>);
<a name="l00118"></a>00118 
<a name="l00119"></a>00119     <span class="keywordtype">int</span> pos = 0;
<a name="l00120"></a>00120     <span class="keywordtype">int</span> matchpos = 0;
<a name="l00121"></a>00121     
<a name="l00122"></a>00122     <span class="comment">// get all &lt;a href&gt; tags and capture url</span>
<a name="l00123"></a>00123     QStringList list;
<a name="l00124"></a>00124     <span class="comment">//int strlength = str.length();</span>
<a name="l00125"></a>00125     <span class="keywordflow">while</span> ( matchpos != -1 )
<a name="l00126"></a>00126     {
<a name="l00127"></a>00127         matchpos = reAhrefTag.search(str, pos);
<a name="l00128"></a>00128         <span class="keywordflow">if</span> ( matchpos != -1 )
<a name="l00129"></a>00129         {
<a name="l00130"></a>00130             QString ahref = str.mid(matchpos, reAhrefTag.matchedLength());
<a name="l00131"></a>00131             <span class="keywordtype">int</span> hrefpos = reHref.search(ahref, 0);
<a name="l00132"></a>00132             <span class="keywordflow">if</span> ( hrefpos != -1 )
<a name="l00133"></a>00133             {
<a name="l00134"></a>00134                 QString url = reHref.cap(1);
<a name="l00135"></a>00135 
<a name="l00136"></a>00136                 url = KCharsets::resolveEntities(url);
<a name="l00137"></a>00137 
<a name="l00138"></a>00138                 <span class="keywordflow">if</span> ( rssrdfxml.exactMatch(url) )
<a name="l00139"></a>00139                     list.append(url);
<a name="l00140"></a>00140             }
<a name="l00141"></a>00141 
<a name="l00142"></a>00142             pos = matchpos + reAhrefTag.matchedLength();
<a name="l00143"></a>00143         }
<a name="l00144"></a>00144     }
<a name="l00145"></a>00145     
<a name="l00146"></a>00146     <span class="keywordflow">return</span> list;
<a name="l00147"></a>00147 }
<a name="l00148"></a>00148 
<a name="l00149"></a>00149 QString FeedDetector::fixRelativeURL(<span class="keyword">const</span> QString &amp;s, <span class="keyword">const</span> KURL &amp;baseurl)
<a name="l00150"></a>00150 {
<a name="l00151"></a>00151     QString s2=s;
<a name="l00152"></a>00152     KURL u;
<a name="l00153"></a>00153     <span class="keywordflow">if</span> (KURL::isRelativeURL(s2))
<a name="l00154"></a>00154     {
<a name="l00155"></a>00155         <span class="keywordflow">if</span> (s2.startsWith(<span class="stringliteral">"//"</span>))
<a name="l00156"></a>00156         {
<a name="l00157"></a>00157             s2=s2.prepend(baseurl.protocol()+<span class="stringliteral">":"</span>);
<a name="l00158"></a>00158             u=s2;
<a name="l00159"></a>00159         }
<a name="l00160"></a>00160         <span class="keywordflow">else</span> <span class="keywordflow">if</span> (s2.startsWith(<span class="stringliteral">"/"</span>))
<a name="l00161"></a>00161         {
<a name="l00162"></a>00162             KURL b2(baseurl);
<a name="l00163"></a>00163             b2.setPath(QString()); <span class="comment">// delete path and query, so that only protocol://host remains</span>
<a name="l00164"></a>00164             b2.setQuery(QString());
<a name="l00165"></a>00165             u = KURL(b2, s2.remove(0,1)); <span class="comment">// remove leading "/" </span>
<a name="l00166"></a>00166         }
<a name="l00167"></a>00167         <span class="keywordflow">else</span>
<a name="l00168"></a>00168         {
<a name="l00169"></a>00169             u = KURL(baseurl, s2);
<a name="l00170"></a>00170         }
<a name="l00171"></a>00171     }
<a name="l00172"></a>00172     <span class="keywordflow">else</span>
<a name="l00173"></a>00173         u=s2;
<a name="l00174"></a>00174 
<a name="l00175"></a>00175     u.cleanPath();
<a name="l00176"></a>00176     <span class="comment">//kdDebug() &lt;&lt; "AKREGATOR_PLUGIN_FIXURL: " &lt;&lt; "url=" &lt;&lt; s &lt;&lt; " baseurl=" &lt;&lt; baseurl.url() &lt;&lt; " fixed=" &lt;&lt; u.url() &lt;&lt; </span>
<a name="l00177"></a>00177     <span class="comment">//endl;</span>
<a name="l00178"></a>00178     <span class="keywordflow">return</span> u.url();
<a name="l00179"></a>00179 }
</pre></div></div>
    </div></div>


      </td>
  </tr>
  <tr>
    <td valign="top" id="leftmenu" width="25%">
      <a name="navigation"></a>
      <div class="menu_box"><h2>akregator/src/librss</h2>
<div class="nav_list">
<ul><li><a href="index.html">Main Page</a></li><li><a href="hierarchy.html">Class Hierarchy</a></li><li><a href="classes.html">Alphabetical List</a></li><li><a href="annotated.html">Class List</a></li><li><a href="files.html">File List</a></li><li><a href="functions.html">Class Members</a></li></ul>
<!--
<h2>Class Picker</h2>
<div style="text-align: center;">
<form name="guideform">
<select name="guidelinks" style="width:100%;" onChange="window.location=document.guideform.guidelinks.options[document.guideform.guidelinks.selectedIndex].value">
<option value="annotated.html">-- Choose --</option>
  <option value="classRSS_1_1Article.html">rss::article</option>,  <option value="classRSS_1_1DataRetriever.html">rss::dataretriever</option>,  <option value="classRSS_1_1Document.html">rss::document</option>,  <option value="classRSS_1_1FeedDetector.html">rss::feeddetector</option>,  <option value="classRSS_1_1FileRetriever.html">rss::fileretriever</option>,  <option value="classRSS_1_1Image.html">rss::image</option>,  <option value="classRSS_1_1Loader.html">rss::loader</option>,  <option value="classRSS_1_1OutputRetriever.html">rss::outputretriever</option>,  <option value="classRSS_1_1TextInput.html">rss::textinput</option>,
</select>
</form>
</div>
-->
</div></div>
<div class="menu_box"><h2>API Dox</h2>
<div class="nav_list">
<ul>
<li><a href="../../../../akregator/html/index.html">akregator</a></li><li>&nbsp;&nbsp;<a href="../../../../akregator/src/html/index.html">src</a></li><li>&nbsp;&nbsp;&nbsp;&nbsp;<a href="../../../../akregator/src/librss/html/index.html">librss</a></li><li><a href="../../../../certmanager/html/index.html">certmanager</a></li><li>&nbsp;&nbsp;<a href="../../../../certmanager/lib/html/index.html">lib</a></li><li><a href="../../../../kaddressbook/html/index.html">kaddressbook</a></li><li><a href="../../../../kalarm/html/index.html">kalarm</a></li><li>&nbsp;&nbsp;<a href="../../../../kalarm/lib/html/index.html">lib</a></li><li><a href="../../../../kandy/html/index.html">kandy</a></li><li><a href="../../../../karm/html/index.html">karm</a></li><li><a href="../../../../kdgantt/html/index.html">kdgantt</a></li><li><a href="../../../../kgantt/html/index.html">kgantt</a></li><li><a href="../../../../kioslaves/html/index.html">kioslaves</a></li><li>&nbsp;&nbsp;<a href="../../../../kioslaves/imap4/html/index.html">imap4</a></li><li>&nbsp;&nbsp;<a href="../../../../kioslaves/mbox/html/index.html">mbox</a></li><li><a href="../../../../kitchensync/html/index.html">kitchensync</a></li><li><a href="../../../../kmail/html/index.html">kmail</a></li><li><a href="../../../../knotes/html/index.html">knotes</a></li><li><a href="../../../../konsolekalendar/html/index.html">konsolekalendar</a></li><li><a href="../../../../kontact/html/index.html">kontact</a></li><li><a href="../../../../korganizer/html/index.html">korganizer</a></li><li><a href="../../../../kpilot/html/index.html">kpilot</a></li><li>&nbsp;&nbsp;<a href="../../../../kpilot/kpilot/html/index.html">kpilot</a></li><li>&nbsp;&nbsp;<a href="../../../../kpilot/lib/html/index.html">lib</a></li><li><a href="../../../../libemailfunctions/html/index.html">libemailfunctions</a></li><li><a href="../../../../libkcal/html/index.html">libkcal</a></li><li><a href="../../../../libkdepim/html/index.html">libkdepim</a></li><li><a href="../../../../libkholidays/html/index.html">libkholidays</a></li><li><a href="../../../../libkmime/html/index.html">libkmime</a></li><li><a href="../../../../libkpgp/html/index.html">libkpgp</a></li><li><a href="../../../../libkpimidentities/html/index.html">libkpimidentities</a></li>
</ul></div></div>


        </td>
</tr>
</table>

<span class="doNotDisplay">
  <a href="http://www.kde.org/" accesskey="8">KDE Home</a> |
  <a href="http://accessibility.kde.org/" accesskey="9">KDE Accessibility Home</a> |
  <a href="http://www.kde.org/media/accesskeys.php" accesskey="0">Description of Access Keys</a>
</span>


<div style="height: 8px"></div>

<div id="footer">
  <div id="footer_left">
    Maintained by <a href="&#109;&#97;&#105;&#108;&#116;&#111;&#58;groo&#116;&#64;kde&#46;or&#x67;">Adriaan de Groot</a>
and
<a href="&#109;a&#105;&#108;&#116;&#111;&#58;w&#105;nter&#64;kde&#46;or&#x67">Allen Winter</a>.
<br/>
    KDE and K Desktop Environment are trademarks of <a href="http://www.kde.org/areas/kde-ev/" title="Homepage of the KDE non-profit Organization">KDE e.V.</a> |
    <a href="http://www.kde.org/contact/impressum.php">Legal</a>
  </div>
  <div id="footer_right"><img src="/media/images/footer_right.png" style="margin: 0px" alt="" /></div>
</div>

<!--
WARNING: DO NOT SEND MAIL TO THE FOLLOWING EMAIL ADDRESS! YOU WILL
BE BLOCKED INSTANTLY AND PERMANENTLY!
<a href="mailto:aaaatrap-425acc3b5374943f@kde.org">Block me</a>
WARNING END
-->

</body>
</html>