composer update
This commit is contained in:
parent
9ac51e0523
commit
623395064f
279 changed files with 4458 additions and 16328 deletions
|
@ -2,6 +2,7 @@
|
|||
|
||||
namespace PicoFeed\Client;
|
||||
|
||||
use DateTime;
|
||||
use LogicException;
|
||||
use PicoFeed\Logging\Logger;
|
||||
use PicoFeed\Config\Config;
|
||||
|
@ -55,6 +56,13 @@ abstract class Client
|
|||
*/
|
||||
protected $last_modified = '';
|
||||
|
||||
/**
|
||||
* Expiration DateTime
|
||||
*
|
||||
* @var DateTime
|
||||
*/
|
||||
protected $expiration = null;
|
||||
|
||||
/**
|
||||
* Proxy hostname.
|
||||
*
|
||||
|
@ -214,6 +222,9 @@ abstract class Client
|
|||
$this->handleErrorResponse($response);
|
||||
$this->handleNormalResponse($response);
|
||||
|
||||
$this->expiration = $this->parseExpiration($response['headers']);
|
||||
Logger::setMessage(get_called_class().' Expiration: '.$this->expiration->format(DATE_ISO8601));
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
|
@ -241,6 +252,9 @@ abstract class Client
|
|||
* Handle Http Error codes
|
||||
*
|
||||
* @param array $response Client response
|
||||
* @throws ForbiddenException
|
||||
* @throws InvalidUrlException
|
||||
* @throws UnauthorizedException
|
||||
*/
|
||||
protected function handleErrorResponse(array $response)
|
||||
{
|
||||
|
@ -402,13 +416,12 @@ abstract class Client
|
|||
/**
|
||||
* Set the url.
|
||||
*
|
||||
* @param $url
|
||||
* @return string
|
||||
* @return \PicoFeed\Client\Client
|
||||
*/
|
||||
public function setUrl($url)
|
||||
{
|
||||
$this->url = $url;
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
|
@ -670,4 +683,31 @@ abstract class Client
|
|||
{
|
||||
return $code == 301 || $code == 302 || $code == 303 || $code == 307;
|
||||
}
|
||||
|
||||
public function parseExpiration(HttpHeaders $headers)
|
||||
{
|
||||
if (isset($headers['Cache-Control'])) {
|
||||
if (preg_match('/s-maxage=(\d+)/', $headers['Cache-Control'], $matches)) {
|
||||
return new DateTime('+' . $matches[1] . ' seconds');
|
||||
} else if (preg_match('/max-age=(\d+)/', $headers['Cache-Control'], $matches)) {
|
||||
return new DateTime('+' . $matches[1] . ' seconds');
|
||||
}
|
||||
}
|
||||
|
||||
if (! empty($headers['Expires'])) {
|
||||
return new DateTime($headers['Expires']);
|
||||
}
|
||||
|
||||
return new DateTime();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get expiration date time from "Expires" or "Cache-Control" headers
|
||||
*
|
||||
* @return DateTime
|
||||
*/
|
||||
public function getExpiration()
|
||||
{
|
||||
return $this->expiration ?: new DateTime();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -11,6 +11,8 @@ use PicoFeed\Logging\Logger;
|
|||
*/
|
||||
class Curl extends Client
|
||||
{
|
||||
protected $nbRedirects = 0;
|
||||
|
||||
/**
|
||||
* HTTP response body.
|
||||
*
|
||||
|
@ -136,6 +138,7 @@ class Curl extends Client
|
|||
|
||||
if ($this->etag) {
|
||||
$headers[] = 'If-None-Match: '.$this->etag;
|
||||
$headers[] = 'A-IM: feed';
|
||||
}
|
||||
|
||||
if ($this->last_modified) {
|
||||
|
@ -199,6 +202,9 @@ class Curl extends Client
|
|||
*/
|
||||
private function prepareDownloadMode($ch)
|
||||
{
|
||||
$this->body = '';
|
||||
$this->response_headers = array();
|
||||
$this->response_headers_count = 0;
|
||||
$write_function = 'readBody';
|
||||
$header_function = 'readHeaders';
|
||||
|
||||
|
@ -304,12 +310,11 @@ class Curl extends Client
|
|||
* Handle HTTP redirects
|
||||
*
|
||||
* @param string $location Redirected URL
|
||||
*
|
||||
* @return array
|
||||
* @throws MaxRedirectException
|
||||
*/
|
||||
private function handleRedirection($location)
|
||||
{
|
||||
$nb_redirects = 0;
|
||||
$result = array();
|
||||
$this->url = Url::resolve($location, $this->url);
|
||||
$this->body = '';
|
||||
|
@ -318,9 +323,9 @@ class Curl extends Client
|
|||
$this->response_headers_count = 0;
|
||||
|
||||
while (true) {
|
||||
++$nb_redirects;
|
||||
$this->nbRedirects++;
|
||||
|
||||
if ($nb_redirects >= $this->max_redirects) {
|
||||
if ($this->nbRedirects >= $this->max_redirects) {
|
||||
throw new MaxRedirectException('Maximum number of redirections reached');
|
||||
}
|
||||
|
||||
|
|
|
@ -31,6 +31,7 @@ class Stream extends Client
|
|||
|
||||
if ($this->etag) {
|
||||
$headers[] = 'If-None-Match: '.$this->etag;
|
||||
$headers[] = 'A-IM: feed';
|
||||
}
|
||||
|
||||
if ($this->last_modified) {
|
||||
|
@ -104,6 +105,9 @@ class Stream extends Client
|
|||
* Do the HTTP request.
|
||||
*
|
||||
* @return array HTTP response ['body' => ..., 'status' => ..., 'headers' => ...]
|
||||
* @throws InvalidUrlException
|
||||
* @throws MaxSizeException
|
||||
* @throws TimeoutException
|
||||
*/
|
||||
public function doRequest()
|
||||
{
|
||||
|
|
|
@ -51,6 +51,7 @@ class Attribute
|
|||
'td' => array(),
|
||||
'tbody' => array(),
|
||||
'thead' => array(),
|
||||
'h1' => array(),
|
||||
'h2' => array(),
|
||||
'h3' => array(),
|
||||
'h4' => array(),
|
||||
|
|
|
@ -42,6 +42,7 @@ class Tag extends Base
|
|||
'td',
|
||||
'tbody',
|
||||
'thead',
|
||||
'h1',
|
||||
'h2',
|
||||
'h3',
|
||||
'h4',
|
||||
|
@ -67,6 +68,8 @@ class Tag extends Base
|
|||
'abbr',
|
||||
'iframe',
|
||||
'q',
|
||||
'sup',
|
||||
'sub',
|
||||
);
|
||||
|
||||
/**
|
||||
|
|
|
@ -9,6 +9,7 @@ use PicoFeed\Client\Url;
|
|||
/**
|
||||
* Atom parser.
|
||||
*
|
||||
* @package PicoFeed\Parser
|
||||
* @author Frederic Guillot
|
||||
*/
|
||||
class Atom extends Parser
|
||||
|
@ -154,30 +155,33 @@ class Atom extends Parser
|
|||
}
|
||||
|
||||
/**
|
||||
* Find the item date.
|
||||
* Find the item published date.
|
||||
*
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param Item $item Item object
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public function findItemDate(SimpleXMLElement $entry, Item $item, Feed $feed)
|
||||
public function findItemPublishedDate(SimpleXMLElement $entry, Item $item, Feed $feed)
|
||||
{
|
||||
$published = XmlParser::getXPathResult($entry, 'atom:published', $this->namespaces)
|
||||
?: XmlParser::getXPathResult($entry, 'published');
|
||||
$date = XmlParser::getXPathResult($entry, 'atom:published', $this->namespaces)
|
||||
?: XmlParser::getXPathResult($entry, 'published');
|
||||
|
||||
$updated = XmlParser::getXPathResult($entry, 'atom:updated', $this->namespaces)
|
||||
?: XmlParser::getXPathResult($entry, 'updated');
|
||||
$item->setPublishedDate(!empty($date) ? $this->getDateParser()->getDateTime((string) current($date)) : null);
|
||||
}
|
||||
|
||||
$published = !empty($published) ? $this->getDateParser()->getDateTime((string) current($published)) : null;
|
||||
$updated = !empty($updated) ? $this->getDateParser()->getDateTime((string) current($updated)) : null;
|
||||
/**
|
||||
* Find the item updated date.
|
||||
*
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param Item $item Item object
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public function findItemUpdatedDate(SimpleXMLElement $entry, Item $item, Feed $feed)
|
||||
{
|
||||
$date = XmlParser::getXPathResult($entry, 'atom:updated', $this->namespaces)
|
||||
?: XmlParser::getXPathResult($entry, 'updated');
|
||||
|
||||
if ($published === null && $updated === null) {
|
||||
$item->setDate($feed->getDate()); // We use the feed date if there is no date for the item
|
||||
} elseif ($published !== null && $updated !== null) {
|
||||
$item->setDate(max($published, $updated)); // We use the most recent date between published and updated
|
||||
} else {
|
||||
$item->setDate($updated ?: $published);
|
||||
}
|
||||
$item->setUpdatedDate(!empty($date) ? $this->getDateParser()->getDateTime((string) current($date)) : null);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -9,6 +9,7 @@ use PicoFeed\Base;
|
|||
/**
|
||||
* Date Parser.
|
||||
*
|
||||
* @package PicoFeed\Parser
|
||||
* @author Frederic Guillot
|
||||
*/
|
||||
class DateParser extends Base
|
||||
|
|
|
@ -5,6 +5,7 @@ namespace PicoFeed\Parser;
|
|||
/**
|
||||
* Feed.
|
||||
*
|
||||
* @package PicoFeed\Parser
|
||||
* @author Frederic Guillot
|
||||
*/
|
||||
class Feed
|
||||
|
@ -12,7 +13,7 @@ class Feed
|
|||
/**
|
||||
* Feed items.
|
||||
*
|
||||
* @var array
|
||||
* @var Item[]
|
||||
*/
|
||||
public $items = array();
|
||||
|
||||
|
|
|
@ -5,6 +5,7 @@ namespace PicoFeed\Parser;
|
|||
/**
|
||||
* Feed Item.
|
||||
*
|
||||
* @package PicoFeed\Parser
|
||||
* @author Frederic Guillot
|
||||
*/
|
||||
class Item
|
||||
|
@ -60,6 +61,20 @@ class Item
|
|||
*/
|
||||
public $date = null;
|
||||
|
||||
/**
|
||||
* Item published date.
|
||||
*
|
||||
* @var \DateTime
|
||||
*/
|
||||
public $publishedDate = null;
|
||||
|
||||
/**
|
||||
* Item updated date.
|
||||
*
|
||||
* @var \DateTime
|
||||
*/
|
||||
public $updatedDate = null;
|
||||
|
||||
/**
|
||||
* Item content.
|
||||
*
|
||||
|
@ -151,7 +166,12 @@ class Item
|
|||
$output .= 'Item::'.$property.' = '.$this->$property.PHP_EOL;
|
||||
}
|
||||
|
||||
$publishedDate = $this->publishedDate != null ? $this->publishedDate->format(DATE_RFC822) : null;
|
||||
$updatedDate = $this->updatedDate != null ? $this->updatedDate->format(DATE_RFC822) : null;
|
||||
|
||||
$output .= 'Item::date = '.$this->date->format(DATE_RFC822).PHP_EOL;
|
||||
$output .= 'Item::publishedDate = '.$publishedDate.PHP_EOL;
|
||||
$output .= 'Item::updatedDate = '.$updatedDate.PHP_EOL;
|
||||
$output .= 'Item::isRTL() = '.($this->isRTL() ? 'true' : 'false').PHP_EOL;
|
||||
$output .= 'Item::content = '.strlen($this->content).' bytes'.PHP_EOL;
|
||||
|
||||
|
@ -212,6 +232,26 @@ class Item
|
|||
return $this->date;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get published date.
|
||||
*
|
||||
* @return \DateTime
|
||||
*/
|
||||
public function getPublishedDate()
|
||||
{
|
||||
return $this->publishedDate;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get updated date.
|
||||
*
|
||||
* @return \DateTime
|
||||
*/
|
||||
public function getUpdatedDate()
|
||||
{
|
||||
return $this->updatedDate;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get content.
|
||||
*
|
||||
|
@ -333,6 +373,30 @@ class Item
|
|||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set item published date.
|
||||
*
|
||||
* @param \DateTime $publishedDate
|
||||
* @return Item
|
||||
*/
|
||||
public function setPublishedDate($publishedDate)
|
||||
{
|
||||
$this->publishedDate = $publishedDate;
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set item updated date.
|
||||
*
|
||||
* @param \DateTime $updatedDate
|
||||
* @return Item
|
||||
*/
|
||||
public function setUpdatedDate($updatedDate)
|
||||
{
|
||||
$this->updatedDate = $updatedDate;
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set enclosure url.
|
||||
*
|
||||
|
|
|
@ -5,6 +5,7 @@ namespace PicoFeed\Parser;
|
|||
/**
|
||||
* MalformedXmlException Exception.
|
||||
*
|
||||
* @package PicoFeed\Parser
|
||||
* @author Frederic Guillot
|
||||
*/
|
||||
class MalformedXmlException extends ParserException
|
||||
|
|
|
@ -15,9 +15,10 @@ use PicoFeed\Logging\Logger;
|
|||
/**
|
||||
* Base parser class.
|
||||
*
|
||||
* @package PicoFeed\Parser
|
||||
* @author Frederic Guillot
|
||||
*/
|
||||
abstract class Parser
|
||||
abstract class Parser implements ParserInterface
|
||||
{
|
||||
/**
|
||||
* Config object.
|
||||
|
@ -211,6 +212,32 @@ abstract class Parser
|
|||
$item->url = Url::resolve($item->getUrl(), $feed->getSiteUrl());
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the item date.
|
||||
*
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param Item $item Item object
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public function findItemDate(SimpleXMLElement $entry, Item $item, Feed $feed)
|
||||
{
|
||||
$this->findItemPublishedDate($entry, $item, $feed);
|
||||
$this->findItemUpdatedDate($entry, $item, $feed);
|
||||
|
||||
if ($item->getPublishedDate() === null) {
|
||||
// Use the updated date if available, otherwise use the feed date
|
||||
$item->setPublishedDate($item->getUpdatedDate() ?: $feed->getDate());
|
||||
}
|
||||
|
||||
if ($item->getUpdatedDate() === null) {
|
||||
// Use the published date as fallback
|
||||
$item->setUpdatedDate($item->getPublishedDate());
|
||||
}
|
||||
|
||||
// Use the most recent of published and updated dates
|
||||
$item->setDate(max($item->getPublishedDate(), $item->getUpdatedDate()));
|
||||
}
|
||||
|
||||
/**
|
||||
* Get Item Post Processor instance
|
||||
*
|
||||
|
@ -371,153 +398,5 @@ abstract class Parser
|
|||
return $xml;
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the feed url.
|
||||
*
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
abstract public function findFeedUrl(SimpleXMLElement $xml, Feed $feed);
|
||||
|
||||
/**
|
||||
* Find the site url.
|
||||
*
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
abstract public function findSiteUrl(SimpleXMLElement $xml, Feed $feed);
|
||||
|
||||
/**
|
||||
* Find the feed title.
|
||||
*
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
abstract public function findFeedTitle(SimpleXMLElement $xml, Feed $feed);
|
||||
|
||||
/**
|
||||
* Find the feed description.
|
||||
*
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
abstract public function findFeedDescription(SimpleXMLElement $xml, Feed $feed);
|
||||
|
||||
/**
|
||||
* Find the feed language.
|
||||
*
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
abstract public function findFeedLanguage(SimpleXMLElement $xml, Feed $feed);
|
||||
|
||||
/**
|
||||
* Find the feed id.
|
||||
*
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
abstract public function findFeedId(SimpleXMLElement $xml, Feed $feed);
|
||||
|
||||
/**
|
||||
* Find the feed date.
|
||||
*
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
abstract public function findFeedDate(SimpleXMLElement $xml, Feed $feed);
|
||||
|
||||
/**
|
||||
* Find the feed logo url.
|
||||
*
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
abstract public function findFeedLogo(SimpleXMLElement $xml, Feed $feed);
|
||||
|
||||
/**
|
||||
* Find the feed icon.
|
||||
*
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
abstract public function findFeedIcon(SimpleXMLElement $xml, Feed $feed);
|
||||
|
||||
/**
|
||||
* Get the path to the items XML tree.
|
||||
*
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
*
|
||||
* @return SimpleXMLElement
|
||||
*/
|
||||
abstract public function getItemsTree(SimpleXMLElement $xml);
|
||||
|
||||
/**
|
||||
* Find the item author.
|
||||
*
|
||||
* @param SimpleXMLElement $xml Feed
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Parser\Item $item Item object
|
||||
*/
|
||||
abstract public function findItemAuthor(SimpleXMLElement $xml, SimpleXMLElement $entry, Item $item);
|
||||
|
||||
/**
|
||||
* Find the item URL.
|
||||
*
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Parser\Item $item Item object
|
||||
*/
|
||||
abstract public function findItemUrl(SimpleXMLElement $entry, Item $item);
|
||||
|
||||
/**
|
||||
* Find the item title.
|
||||
*
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Parser\Item $item Item object
|
||||
*/
|
||||
abstract public function findItemTitle(SimpleXMLElement $entry, Item $item);
|
||||
|
||||
/**
|
||||
* Genereate the item id.
|
||||
*
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Parser\Item $item Item object
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
abstract public function findItemId(SimpleXMLElement $entry, Item $item, Feed $feed);
|
||||
|
||||
/**
|
||||
* Find the item date.
|
||||
*
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param Item $item Item object
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
abstract public function findItemDate(SimpleXMLElement $entry, Item $item, Feed $feed);
|
||||
|
||||
/**
|
||||
* Find the item content.
|
||||
*
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Parser\Item $item Item object
|
||||
*/
|
||||
abstract public function findItemContent(SimpleXMLElement $entry, Item $item);
|
||||
|
||||
/**
|
||||
* Find the item enclosure.
|
||||
*
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Parser\Item $item Item object
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
abstract public function findItemEnclosure(SimpleXMLElement $entry, Item $item, Feed $feed);
|
||||
|
||||
/**
|
||||
* Find the item language.
|
||||
*
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Parser\Item $item Item object
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
abstract public function findItemLanguage(SimpleXMLElement $entry, Item $item, Feed $feed);
|
||||
}
|
||||
|
|
|
@ -7,6 +7,7 @@ use PicoFeed\PicoFeedException;
|
|||
/**
|
||||
* ParserException Exception.
|
||||
*
|
||||
* @package PicoFeed\Parser
|
||||
* @author Frederic Guillot
|
||||
*/
|
||||
abstract class ParserException extends PicoFeedException
|
||||
|
|
173
vendor/fguillot/picofeed/lib/PicoFeed/Parser/ParserInterface.php
vendored
Normal file
173
vendor/fguillot/picofeed/lib/PicoFeed/Parser/ParserInterface.php
vendored
Normal file
|
@ -0,0 +1,173 @@
|
|||
<?php
|
||||
|
||||
namespace PicoFeed\Parser;
|
||||
|
||||
use SimpleXMLElement;
|
||||
|
||||
/**
|
||||
* Interface ParserInterface
|
||||
*
|
||||
* @package PicoFeed\Parser
|
||||
* @author Frederic Guillot
|
||||
*/
|
||||
interface ParserInterface
|
||||
{
|
||||
/**
|
||||
* Find the feed url.
|
||||
*
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param Feed $feed Feed object
|
||||
*/
|
||||
public function findFeedUrl(SimpleXMLElement $xml, Feed $feed);
|
||||
|
||||
/**
|
||||
* Find the site url.
|
||||
*
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param Feed $feed Feed object
|
||||
*/
|
||||
public function findSiteUrl(SimpleXMLElement $xml, Feed $feed);
|
||||
|
||||
/**
|
||||
* Find the feed title.
|
||||
*
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param Feed $feed Feed object
|
||||
*/
|
||||
public function findFeedTitle(SimpleXMLElement $xml, Feed $feed);
|
||||
|
||||
/**
|
||||
* Find the feed description.
|
||||
*
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param Feed $feed Feed object
|
||||
*/
|
||||
public function findFeedDescription(SimpleXMLElement $xml, Feed $feed);
|
||||
|
||||
/**
|
||||
* Find the feed language.
|
||||
*
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param Feed $feed Feed object
|
||||
*/
|
||||
public function findFeedLanguage(SimpleXMLElement $xml, Feed $feed);
|
||||
|
||||
/**
|
||||
* Find the feed id.
|
||||
*
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param Feed $feed Feed object
|
||||
*/
|
||||
public function findFeedId(SimpleXMLElement $xml, Feed $feed);
|
||||
|
||||
/**
|
||||
* Find the feed date.
|
||||
*
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param Feed $feed Feed object
|
||||
*/
|
||||
public function findFeedDate(SimpleXMLElement $xml, Feed $feed);
|
||||
|
||||
/**
|
||||
* Find the feed logo url.
|
||||
*
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param Feed $feed Feed object
|
||||
*/
|
||||
public function findFeedLogo(SimpleXMLElement $xml, Feed $feed);
|
||||
|
||||
/**
|
||||
* Find the feed icon.
|
||||
*
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param Feed $feed Feed object
|
||||
*/
|
||||
public function findFeedIcon(SimpleXMLElement $xml, Feed $feed);
|
||||
|
||||
/**
|
||||
* Get the path to the items XML tree.
|
||||
*
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
*
|
||||
* @return SimpleXMLElement
|
||||
*/
|
||||
public function getItemsTree(SimpleXMLElement $xml);
|
||||
|
||||
/**
|
||||
* Find the item author.
|
||||
*
|
||||
* @param SimpleXMLElement $xml Feed
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param Item $item Item object
|
||||
*/
|
||||
public function findItemAuthor(SimpleXMLElement $xml, SimpleXMLElement $entry, Item $item);
|
||||
|
||||
/**
|
||||
* Find the item URL.
|
||||
*
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param Item $item Item object
|
||||
*/
|
||||
public function findItemUrl(SimpleXMLElement $entry, Item $item);
|
||||
|
||||
/**
|
||||
* Find the item title.
|
||||
*
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param Item $item Item object
|
||||
*/
|
||||
public function findItemTitle(SimpleXMLElement $entry, Item $item);
|
||||
|
||||
/**
|
||||
* Genereate the item id.
|
||||
*
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param Item $item Item object
|
||||
* @param Feed $feed Feed object
|
||||
*/
|
||||
public function findItemId(SimpleXMLElement $entry, Item $item, Feed $feed);
|
||||
|
||||
/**
|
||||
* Find the item published date.
|
||||
*
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param Item $item Item object
|
||||
* @param Feed $feed Feed object
|
||||
*/
|
||||
public function findItemPublishedDate(SimpleXMLElement $entry, Item $item, Feed $feed);
|
||||
|
||||
/**
|
||||
* Find the item updated date.
|
||||
*
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param Item $item Item object
|
||||
* @param Feed $feed Feed object
|
||||
*/
|
||||
public function findItemUpdatedDate(SimpleXMLElement $entry, Item $item, Feed $feed);
|
||||
|
||||
/**
|
||||
* Find the item content.
|
||||
*
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param Item $item Item object
|
||||
*/
|
||||
public function findItemContent(SimpleXMLElement $entry, Item $item);
|
||||
|
||||
/**
|
||||
* Find the item enclosure.
|
||||
*
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param Item $item Item object
|
||||
* @param Feed $feed Feed object
|
||||
*/
|
||||
public function findItemEnclosure(SimpleXMLElement $entry, Item $item, Feed $feed);
|
||||
|
||||
/**
|
||||
* Find the item language.
|
||||
*
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param Item $item Item object
|
||||
* @param Feed $feed Feed object
|
||||
*/
|
||||
public function findItemLanguage(SimpleXMLElement $entry, Item $item, Feed $feed);
|
||||
}
|
|
@ -8,6 +8,7 @@ use PicoFeed\Filter\Filter;
|
|||
/**
|
||||
* RSS 1.0 parser.
|
||||
*
|
||||
* @package PicoFeed\Parser
|
||||
* @author Frederic Guillot
|
||||
*/
|
||||
class Rss10 extends Parser
|
||||
|
@ -157,17 +158,32 @@ class Rss10 extends Parser
|
|||
}
|
||||
|
||||
/**
|
||||
* Find the item date.
|
||||
* Find the item published date.
|
||||
*
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param Item $item Item object
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public function findItemDate(SimpleXMLElement $entry, Item $item, Feed $feed)
|
||||
public function findItemPublishedDate(SimpleXMLElement $entry, Item $item, Feed $feed)
|
||||
{
|
||||
$date = XmlParser::getXPathResult($entry, 'dc:date', $this->namespaces);
|
||||
|
||||
$item->setDate(empty($date) ? $feed->getDate() : $this->getDateParser()->getDateTime(XmlParser::getValue($date)));
|
||||
$item->setPublishedDate(!empty($date) ? $this->getDateParser()->getDateTime(XmlParser::getValue($date)) : null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the item updated date.
|
||||
*
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param Item $item Item object
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public function findItemUpdatedDate(SimpleXMLElement $entry, Item $item, Feed $feed)
|
||||
{
|
||||
if ($item->publishedDate === null) {
|
||||
$this->findItemPublishedDate($entry, $item, $feed);
|
||||
}
|
||||
$item->setUpdatedDate($item->getPublishedDate()); // No updated date in RSS 1.0 specifications
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -9,6 +9,7 @@ use PicoFeed\Client\Url;
|
|||
/**
|
||||
* RSS 2.0 Parser.
|
||||
*
|
||||
* @package PicoFeed\Parser
|
||||
* @author Frederic Guillot
|
||||
*/
|
||||
class Rss20 extends Parser
|
||||
|
@ -152,17 +153,32 @@ class Rss20 extends Parser
|
|||
}
|
||||
|
||||
/**
|
||||
* Find the item date.
|
||||
* Find the item published date.
|
||||
*
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param Item $item Item object
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public function findItemDate(SimpleXMLElement $entry, Item $item, Feed $feed)
|
||||
public function findItemPublishedDate(SimpleXMLElement $entry, Item $item, Feed $feed)
|
||||
{
|
||||
$date = XmlParser::getXPathResult($entry, 'pubDate');
|
||||
|
||||
$item->setDate(empty($date) ? $feed->getDate() : $this->getDateParser()->getDateTime(XmlParser::getValue($date)));
|
||||
$item->setPublishedDate(!empty($date) ? $this->getDateParser()->getDateTime(XmlParser::getValue($date)) : null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the item updated date.
|
||||
*
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param Item $item Item object
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public function findItemUpdatedDate(SimpleXMLElement $entry, Item $item, Feed $feed)
|
||||
{
|
||||
if ($item->publishedDate === null) {
|
||||
$this->findItemPublishedDate($entry, $item, $feed);
|
||||
}
|
||||
$item->setUpdatedDate($item->getPublishedDate()); // No updated date in RSS 2.0 specifications
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -5,6 +5,7 @@ namespace PicoFeed\Parser;
|
|||
/**
|
||||
* RSS 0.91 Parser.
|
||||
*
|
||||
* @package PicoFeed\Parser
|
||||
* @author Frederic Guillot
|
||||
*/
|
||||
class Rss91 extends Rss20
|
||||
|
|
|
@ -5,6 +5,7 @@ namespace PicoFeed\Parser;
|
|||
/**
|
||||
* RSS 0.92 Parser.
|
||||
*
|
||||
* @package PicoFeed\Parser
|
||||
* @author Frederic Guillot
|
||||
*/
|
||||
class Rss92 extends Rss20
|
||||
|
|
|
@ -5,6 +5,7 @@ namespace PicoFeed\Parser;
|
|||
/**
|
||||
* XmlEntityException Exception.
|
||||
*
|
||||
* @package PicoFeed\Parser
|
||||
* @author Bernhard Posselt
|
||||
*/
|
||||
class XmlEntityException extends MalformedXmlException
|
||||
|
|
|
@ -2,9 +2,9 @@
|
|||
|
||||
namespace PicoFeed\Parser;
|
||||
|
||||
use DomDocument;
|
||||
use SimpleXmlElement;
|
||||
|
||||
use DOMDocument;
|
||||
use SimpleXMLElement;
|
||||
use ZendXml\Exception\RuntimeException;
|
||||
use ZendXml\Security;
|
||||
|
||||
/**
|
||||
|
@ -12,6 +12,7 @@ use ZendXml\Security;
|
|||
*
|
||||
* Checks for XML eXternal Entity (XXE) and XML Entity Expansion (XEE) attacks on XML documents
|
||||
*
|
||||
* @package PicoFeed\Parser
|
||||
* @author Frederic Guillot
|
||||
*/
|
||||
class XmlParser
|
||||
|
@ -33,7 +34,7 @@ class XmlParser
|
|||
*
|
||||
* @static
|
||||
* @param string $input XML content
|
||||
* @return \DOMDocument
|
||||
* @return DOMDocument
|
||||
*/
|
||||
public static function getDomDocument($input)
|
||||
{
|
||||
|
@ -52,18 +53,20 @@ class XmlParser
|
|||
}
|
||||
|
||||
/**
|
||||
* Small wrapper around ZendXml to turn their exceptions into picoFeed
|
||||
* exceptions
|
||||
* Small wrapper around ZendXml to turn their exceptions into PicoFeed exceptions
|
||||
*
|
||||
* @param $input the xml to load
|
||||
* @param $dom pass in a dom document or use null/omit if simpleXml should
|
||||
* be used
|
||||
* @static
|
||||
* @access private
|
||||
* @param string $input
|
||||
* @param DOMDocument $dom
|
||||
* @throws XmlEntityException
|
||||
* @return SimpleXMLElement|DomDocument|boolean
|
||||
*/
|
||||
private static function scan($input, $dom = null)
|
||||
{
|
||||
try {
|
||||
return Security::scan($input, $dom);
|
||||
} catch(\ZendXml\Exception\RuntimeException $e) {
|
||||
} catch(RuntimeException $e) {
|
||||
throw new XmlEntityException($e->getMessage());
|
||||
}
|
||||
}
|
||||
|
@ -72,8 +75,9 @@ class XmlParser
|
|||
* Load HTML document by using a DomDocument instance or return false on failure.
|
||||
*
|
||||
* @static
|
||||
* @param string $input XML content
|
||||
* @return \DOMDocument
|
||||
* @access public
|
||||
* @param string $input XML content
|
||||
* @return DOMDocument
|
||||
*/
|
||||
public static function getHtmlDocument($input)
|
||||
{
|
||||
|
@ -98,9 +102,8 @@ class XmlParser
|
|||
* Convert a HTML document to XML.
|
||||
*
|
||||
* @static
|
||||
*
|
||||
* @param string $html HTML document
|
||||
*
|
||||
* @access public
|
||||
* @param string $html HTML document
|
||||
* @return string
|
||||
*/
|
||||
public static function htmlToXml($html)
|
||||
|
@ -113,6 +116,7 @@ class XmlParser
|
|||
* Get XML parser errors.
|
||||
*
|
||||
* @static
|
||||
* @access public
|
||||
* @return string
|
||||
*/
|
||||
public static function getErrors()
|
||||
|
@ -135,7 +139,8 @@ class XmlParser
|
|||
* Get the encoding from a xml tag.
|
||||
*
|
||||
* @static
|
||||
* @param string $data Input data
|
||||
* @access public
|
||||
* @param string $data Input data
|
||||
* @return string
|
||||
*/
|
||||
public static function getEncodingFromXmlTag($data)
|
||||
|
@ -162,7 +167,8 @@ class XmlParser
|
|||
* Get the charset from a meta tag.
|
||||
*
|
||||
* @static
|
||||
* @param string $data Input data
|
||||
* @access public
|
||||
* @param string $data Input data
|
||||
* @return string
|
||||
*/
|
||||
public static function getEncodingFromMetaTag($data)
|
||||
|
@ -179,6 +185,8 @@ class XmlParser
|
|||
/**
|
||||
* Rewrite XPath query to use namespace-uri and local-name derived from prefix.
|
||||
*
|
||||
* @static
|
||||
* @access public
|
||||
* @param string $query XPath query
|
||||
* @param array $ns Prefix to namespace URI mapping
|
||||
* @return string
|
||||
|
@ -199,10 +207,12 @@ class XmlParser
|
|||
/**
|
||||
* Get the result elements of a XPath query.
|
||||
*
|
||||
* @param \SimpleXMLElement $xml XML element
|
||||
* @param string $query XPath query
|
||||
* @param array $ns Prefix to namespace URI mapping
|
||||
* @return \SimpleXMLElement[]
|
||||
* @static
|
||||
* @access public
|
||||
* @param SimpleXMLElement $xml XML element
|
||||
* @param string $query XPath query
|
||||
* @param array $ns Prefix to namespace URI mapping
|
||||
* @return SimpleXMLElement[]
|
||||
*/
|
||||
public static function getXPathResult(SimpleXMLElement $xml, $query, array $ns = array())
|
||||
{
|
||||
|
|
24
vendor/fguillot/picofeed/lib/PicoFeed/Rules/adventuregamers.com.php
vendored
Normal file
24
vendor/fguillot/picofeed/lib/PicoFeed/Rules/adventuregamers.com.php
vendored
Normal file
|
@ -0,0 +1,24 @@
|
|||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%^/news.*%' => array(
|
||||
'test_url' => 'http://www.adventuregamers.com/news/view/31079',
|
||||
'body' => array(
|
||||
'//div[@class="bodytext"]',
|
||||
)
|
||||
),
|
||||
'%^/videos.*%' => array(
|
||||
'test_url' => 'http://www.adventuregamers.com/videos/view/31056',
|
||||
'body' => array(
|
||||
'//iframe',
|
||||
)
|
||||
),
|
||||
'%^/articles.*%' => array(
|
||||
'test_url' => 'http://www.adventuregamers.com/articles/view/31049',
|
||||
'body' => array(
|
||||
'//div[@class="cleft"]',
|
||||
)
|
||||
)
|
||||
),
|
||||
);
|
31
vendor/fguillot/picofeed/lib/PicoFeed/Rules/bigpicture.ru.php
vendored
Executable file
31
vendor/fguillot/picofeed/lib/PicoFeed/Rules/bigpicture.ru.php
vendored
Executable file
|
@ -0,0 +1,31 @@
|
|||
<?php
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://bigpicture.ru/?p=556658',
|
||||
'body' => array(
|
||||
'//div[@class="article container"]',
|
||||
),
|
||||
'strip' => array(
|
||||
'//script',
|
||||
'//form',
|
||||
'//style',
|
||||
'//h1',
|
||||
'//*[@class="wp-smiley"]',
|
||||
'//div[@class="ipmd"]',
|
||||
'//div[@class="tags"]',
|
||||
'//div[@class="social-button"]',
|
||||
'//div[@class="bottom-share"]',
|
||||
'//div[@class="raccoonbox"]',
|
||||
'//div[@class="yndadvert"]',
|
||||
'//div[@class="we-recommend"]',
|
||||
'//div[@class="relap-bigpicture_ru-wrapper"]',
|
||||
'//div[@id="mmail"]',
|
||||
'//div[@id="mobile-ads-cut"]',
|
||||
'//div[@id="liquidstorm-alt-html"]',
|
||||
'//div[contains(@class, "post-tags")]',
|
||||
'//*[contains(text(),"Смотрите также")]',
|
||||
),
|
||||
),
|
||||
),
|
||||
);
|
22
vendor/fguillot/picofeed/lib/PicoFeed/Rules/e-w-e.ru.php
vendored
Executable file
22
vendor/fguillot/picofeed/lib/PicoFeed/Rules/e-w-e.ru.php
vendored
Executable file
|
@ -0,0 +1,22 @@
|
|||
<?php
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://e-w-e.ru/16-prekrasnyx-izobretenij-zhenshhin/',
|
||||
'body' => array(
|
||||
'//div[contains(@class, "post_text")]',
|
||||
),
|
||||
'strip' => array(
|
||||
'//script',
|
||||
'//form',
|
||||
'//style',
|
||||
'//*[@class="views_post"]',
|
||||
'//*[@class="adman_mobile"]',
|
||||
'//*[@class="adman_desctop"]',
|
||||
'//*[contains(@rel, "nofollow")]',
|
||||
'//*[contains(@class, "wp-smiley")]',
|
||||
'//*[contains(text(),"Источник:")]',
|
||||
),
|
||||
),
|
||||
),
|
||||
);
|
27
vendor/fguillot/picofeed/lib/PicoFeed/Rules/factroom.ru.php
vendored
Executable file
27
vendor/fguillot/picofeed/lib/PicoFeed/Rules/factroom.ru.php
vendored
Executable file
|
@ -0,0 +1,27 @@
|
|||
<?php
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://www.factroom.ru/life/20-facts-about-oil',
|
||||
'body' => array(
|
||||
'//div[@class="post"]',
|
||||
),
|
||||
'strip' => array(
|
||||
'//script',
|
||||
'//form',
|
||||
'//style',
|
||||
'//h1',
|
||||
'//div[@id="yandex_ad2"]',
|
||||
'//*[@class="jp-relatedposts"]',
|
||||
'//div[contains(@class, "likely-desktop")]',
|
||||
'//div[contains(@class, "likely-mobile")]',
|
||||
'//p[last()]',
|
||||
'//div[contains(@class, "facebook")]',
|
||||
'//div[contains(@class, "desktop-underpost-direct")]',
|
||||
'//div[contains(@class, "source-box")]',
|
||||
'//div[contains(@class, "under-likely-desktop")]',
|
||||
'//div[contains(@class, "mobile-down-post")]',
|
||||
),
|
||||
),
|
||||
),
|
||||
);
|
19
vendor/fguillot/picofeed/lib/PicoFeed/Rules/fototelegraf.ru.php
vendored
Executable file
19
vendor/fguillot/picofeed/lib/PicoFeed/Rules/fototelegraf.ru.php
vendored
Executable file
|
@ -0,0 +1,19 @@
|
|||
<?php
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://fototelegraf.ru/?p=348232',
|
||||
'body' => array(
|
||||
'//div[@class="post-content"]',
|
||||
),
|
||||
'strip' => array(
|
||||
'//script',
|
||||
'//form',
|
||||
'//style',
|
||||
'//div[@class="imageButtonsBlock"]',
|
||||
'//div[@class="adOnPostBtwImg"]',
|
||||
'//div[contains(@class, "post-tags")]',
|
||||
),
|
||||
),
|
||||
),
|
||||
);
|
|
@ -6,7 +6,15 @@ return array(
|
|||
'test_url' => 'http://www.golem.de/news/breko-telekom-verzoegert-gezielt-den-vectoring-ausbau-1311-102974.html',
|
||||
'body' => array(
|
||||
'//header[@class="cluster-header"]',
|
||||
'//header[@class="paged-cluster-header"]',
|
||||
'//div[@class="formatted"]',
|
||||
),
|
||||
'next_page' => array(
|
||||
'//a[@id="atoc_next"]'
|
||||
),
|
||||
'strip' => array(
|
||||
'//header[@class="cluster-header"]/a',
|
||||
'//div[@id="iqadtile4"]',
|
||||
),
|
||||
),
|
||||
),
|
||||
|
|
19
vendor/fguillot/picofeed/lib/PicoFeed/Rules/gorabbit.ru.php
vendored
Executable file
19
vendor/fguillot/picofeed/lib/PicoFeed/Rules/gorabbit.ru.php
vendored
Executable file
|
@ -0,0 +1,19 @@
|
|||
<?php
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://gorabbit.ru/article/10-oshchushcheniy-za-rulem-kogda-tolko-poluchil-voditelskie-prava',
|
||||
'body' => array(
|
||||
'//div[@class="detail_text"]',
|
||||
),
|
||||
'strip' => array(
|
||||
'//script',
|
||||
'//form',
|
||||
'//style',
|
||||
'//div[@class="socials"]',
|
||||
'//div[@id="cr_1"]',
|
||||
'//div[@class="related_items"]',
|
||||
),
|
||||
),
|
||||
),
|
||||
);
|
12
vendor/fguillot/picofeed/lib/PicoFeed/Rules/hardware.fr.php
vendored
Normal file
12
vendor/fguillot/picofeed/lib/PicoFeed/Rules/hardware.fr.php
vendored
Normal file
|
@ -0,0 +1,12 @@
|
|||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%^/news.*%' => array(
|
||||
'test_url' => 'http://www.hardware.fr/news/14760/intel-lance-nouveaux-ssd-nand-3d.html',
|
||||
'body' => array(
|
||||
'//div[@class="content_actualite"]/div[@class="md"]',
|
||||
)
|
||||
),
|
||||
),
|
||||
);
|
23
vendor/fguillot/picofeed/lib/PicoFeed/Rules/hotshowlife.com.php
vendored
Executable file
23
vendor/fguillot/picofeed/lib/PicoFeed/Rules/hotshowlife.com.php
vendored
Executable file
|
@ -0,0 +1,23 @@
|
|||
<?php
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'https://hotshowlife.com/top-10-chempionov-produktov-po-szhiganiyu-kalorij/',
|
||||
'body' => array(
|
||||
'//div[@class="entry-content"]',
|
||||
),
|
||||
'strip' => array(
|
||||
'//script',
|
||||
'//form',
|
||||
'//style',
|
||||
'//div[@class="ads2"]',
|
||||
'//div[@class="mistape_caption"]',
|
||||
'//div[contains(@class, "et_social_media_hidden")]',
|
||||
'//div[contains(@class, "et_social_inline_bottom")]',
|
||||
'//div[contains(@class, "avatar")]',
|
||||
'//ul[contains(@class, "entry-tags")]',
|
||||
'//div[contains(@class, "entry-meta")]',
|
||||
),
|
||||
),
|
||||
),
|
||||
);
|
12
vendor/fguillot/picofeed/lib/PicoFeed/Rules/indiehaven.com.php
vendored
Normal file
12
vendor/fguillot/picofeed/lib/PicoFeed/Rules/indiehaven.com.php
vendored
Normal file
|
@ -0,0 +1,12 @@
|
|||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://indiehaven.com/no-mans-sky-is-a-solo-space-adventure-and-im-ok-with-that/',
|
||||
'body' => array(
|
||||
'//section[contains(@class, "entry-content")]',
|
||||
)
|
||||
),
|
||||
),
|
||||
);
|
19
vendor/fguillot/picofeed/lib/PicoFeed/Rules/justcoolidea.ru.php
vendored
Executable file
19
vendor/fguillot/picofeed/lib/PicoFeed/Rules/justcoolidea.ru.php
vendored
Executable file
|
@ -0,0 +1,19 @@
|
|||
<?php
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://justcoolidea.ru/idealnyj-sad-samodelnye-proekty-dlya-berezhlivogo-domovladeltsa/',
|
||||
'body' => array(
|
||||
'//section[@class="entry-content"]',
|
||||
),
|
||||
'strip' => array(
|
||||
'//script',
|
||||
'//form',
|
||||
'//style',
|
||||
'//*[contains(@class, "essb_links")]',
|
||||
'//*[contains(@rel, "nofollow")]',
|
||||
'//*[contains(@class, "ads")]',
|
||||
),
|
||||
),
|
||||
),
|
||||
);
|
23
vendor/fguillot/picofeed/lib/PicoFeed/Rules/legorafi.fr.php
vendored
Normal file
23
vendor/fguillot/picofeed/lib/PicoFeed/Rules/legorafi.fr.php
vendored
Normal file
|
@ -0,0 +1,23 @@
|
|||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => array(
|
||||
'http://www.legorafi.fr/2016/12/16/gorafi-magazine-bravo-vous-avez-bientot-presque-survecu-a-2016/',
|
||||
'http://www.legorafi.fr/2016/12/15/manuel-valls-promet-quune-fois-elu-il-debarrassera-la-france-de-manuel-valls/',
|
||||
),
|
||||
'body' => array(
|
||||
'//section[@id="banner_magazine"]',
|
||||
'//figure[@class="main_picture"]',
|
||||
'//div[@class="content"]',
|
||||
),
|
||||
'strip' => array(
|
||||
'//figcaption',
|
||||
'//div[@class="sharebox"]',
|
||||
'//div[@class="tags"]',
|
||||
'//section[@class="taboola_article"]',
|
||||
),
|
||||
),
|
||||
),
|
||||
);
|
22
vendor/fguillot/picofeed/lib/PicoFeed/Rules/lifehacker.ru.php
vendored
Executable file
22
vendor/fguillot/picofeed/lib/PicoFeed/Rules/lifehacker.ru.php
vendored
Executable file
|
@ -0,0 +1,22 @@
|
|||
<?php
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://lifehacker.ru/2016/03/03/polymail/',
|
||||
'body' => array(
|
||||
'//div[@class="post-content"]',
|
||||
),
|
||||
'strip' => array(
|
||||
'//script',
|
||||
'//form',
|
||||
'//style',
|
||||
'//*[@class="wp-thumbnail-caption"]',
|
||||
'//*[contains(@class, "social-likes")]',
|
||||
'//*[@class="jp-relatedposts"]',
|
||||
'//*[contains(@class, "wpappbox")]',
|
||||
'//*[contains(@class, "icon__image")]',
|
||||
'//div[@id="hypercomments_widget"]',
|
||||
),
|
||||
),
|
||||
),
|
||||
);
|
|
@ -6,6 +6,7 @@ return array(
|
|||
'test_url' => 'https://medium.com/lessons-learned/917b8b63ae3e',
|
||||
'body' => array(
|
||||
'//div[contains(@class, "post-field body")]',
|
||||
'//div[contains(@class, "section-inner layoutSingleColumn")]',
|
||||
),
|
||||
'strip' => array(
|
||||
),
|
||||
|
|
14
vendor/fguillot/picofeed/lib/PicoFeed/Rules/monandroid.com.php
vendored
Normal file
14
vendor/fguillot/picofeed/lib/PicoFeed/Rules/monandroid.com.php
vendored
Normal file
|
@ -0,0 +1,14 @@
|
|||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://www.monandroid.com/blog/tutoriel-avance-activer-le-stockage-fusionne-sur-android-6-marshamallow-t12.html',
|
||||
'body' => array(
|
||||
'//div[@class="blog-post-body"]',
|
||||
),
|
||||
'strip' => array(
|
||||
),
|
||||
),
|
||||
),
|
||||
);
|
|
@ -3,7 +3,7 @@
|
|||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://www.monwindowsphone.com/tout-savoir-sur-le-centre-d-action-de-windows-phone-8-1-t40574.html',
|
||||
'test_url' => 'http://www.monwindows.com/tout-savoir-sur-le-centre-d-action-de-windows-phone-8-1-t40574.html',
|
||||
'body' => array(
|
||||
'//div[@class="blog-post-body"]',
|
||||
),
|
21
vendor/fguillot/picofeed/lib/PicoFeed/Rules/moya-planeta.ru.php
vendored
Executable file
21
vendor/fguillot/picofeed/lib/PicoFeed/Rules/moya-planeta.ru.php
vendored
Executable file
|
@ -0,0 +1,21 @@
|
|||
<?php
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://www.moya-planeta.ru/travel/view/chto_yaponcu_horosho_russkomu_ne_ponyat_20432/',
|
||||
'body' => array(
|
||||
'//div[@class="full_object"]',
|
||||
),
|
||||
'strip' => array(
|
||||
'//div[@class="full_object_panel object_panel"]',
|
||||
'//div[@class="full_object_panel_geo object_panel"]',
|
||||
'//div[@class="full_object_title"]',
|
||||
'//div[@class="full_object_social_likes"]',
|
||||
'//div[@class="full_object_planeta_likes"]',
|
||||
'//div[@class="full_object_go2comments"]',
|
||||
'//div[@id="yandex_ad_R-163191-3"]',
|
||||
'//div[@class="full_object_shop_article_recommend"]',
|
||||
),
|
||||
),
|
||||
),
|
||||
);
|
11
vendor/fguillot/picofeed/lib/PicoFeed/Rules/nat-geo.ru.php
vendored
Executable file
11
vendor/fguillot/picofeed/lib/PicoFeed/Rules/nat-geo.ru.php
vendored
Executable file
|
@ -0,0 +1,11 @@
|
|||
<?php
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://www.nat-geo.ru/fact/868093-knidos-antichnyy-naukograd/',
|
||||
'body' => array(
|
||||
'//div[@class="article-inner-text"]',
|
||||
),
|
||||
),
|
||||
),
|
||||
);
|
19
vendor/fguillot/picofeed/lib/PicoFeed/Rules/nextinpact.com.php
vendored
Normal file
19
vendor/fguillot/picofeed/lib/PicoFeed/Rules/nextinpact.com.php
vendored
Normal file
|
@ -0,0 +1,19 @@
|
|||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://www.nextinpact.com/news/101122-3d-nand-intel-lance-six-nouvelles-gammes-ssd-pour-tous-usages.htm',
|
||||
'body' => array(
|
||||
'//div[@class="container_article"]',
|
||||
),
|
||||
'strip' => array(
|
||||
'//div[@class="infos_article"]',
|
||||
'//div[@id="actu_auteur"]',
|
||||
'//div[@id="soutenir_journaliste"]',
|
||||
'//section[@id="bandeau_abonnez_vous"]',
|
||||
'//br'
|
||||
),
|
||||
),
|
||||
),
|
||||
);
|
24
vendor/fguillot/picofeed/lib/PicoFeed/Rules/publy.ru.php
vendored
Executable file
24
vendor/fguillot/picofeed/lib/PicoFeed/Rules/publy.ru.php
vendored
Executable file
|
@ -0,0 +1,24 @@
|
|||
<?php
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://www.publy.ru/post/19988',
|
||||
'body' => array(
|
||||
'//div[@class="singlepost"]',
|
||||
),
|
||||
'strip' => array(
|
||||
'//script',
|
||||
'//form',
|
||||
'//style',
|
||||
'//*[@class="featured"]',
|
||||
'//*[@class="toc_white no_bullets"]',
|
||||
'//*[@class="toc_title"]',
|
||||
'//*[@class="pba"]',
|
||||
'//*[@class="comments"]',
|
||||
'//*[contains(@class, "g-single")]',
|
||||
'//*[@class="ts-fab-wrapper"]',
|
||||
'//*[contains(@class, "wp_rp_wrap")]',
|
||||
),
|
||||
),
|
||||
),
|
||||
);
|
12
vendor/fguillot/picofeed/lib/PicoFeed/Rules/rockpapershotgun.com.php
vendored
Normal file
12
vendor/fguillot/picofeed/lib/PicoFeed/Rules/rockpapershotgun.com.php
vendored
Normal file
|
@ -0,0 +1,12 @@
|
|||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'https://www.rockpapershotgun.com/2016/08/26/the-divisions-expansions-delayed-to-improve-the-game/',
|
||||
'body' => array(
|
||||
'//div[@class="entry"]',
|
||||
)
|
||||
),
|
||||
),
|
||||
);
|
|
@ -3,15 +3,15 @@ return array(
|
|||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://www.rugbyrama.fr/rugby/top-14/2015-2016/top-14-hayman-coupe-du-monde-finale-2012-lutte.-voici-levan-chilachava-toulon_sto5283863/story.shtml',
|
||||
'body' => array(
|
||||
'//div[@class="story-simple-content"]',
|
||||
'body' => array(
|
||||
'//div[@class="storyfull__content"]',
|
||||
),
|
||||
'strip' => array(
|
||||
'//script',
|
||||
'//form',
|
||||
'//style',
|
||||
'//*[@class="share-buttons"]',
|
||||
'//*[@class="show-mobile-block"]',
|
||||
'//*[@class="ad"]',
|
||||
'//*[@class="hide-desktop"]',
|
||||
'//*[@id="tracking_img"]',
|
||||
)
|
||||
|
|
|
@ -1,9 +1,15 @@
|
|||
<?php
|
||||
|
||||
return array(
|
||||
'filter' => array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'%(<img.+)(\.png"/>)%' => '$1$2$1after$2',
|
||||
'test_url' => 'http://www.smbc-comics.com/comic/the-troll-toll',
|
||||
'body' => array(
|
||||
'//div[@id="cc-comicbody"]',
|
||||
'//div[@id="aftercomic"]',
|
||||
),
|
||||
'strip' => array(
|
||||
),
|
||||
),
|
||||
),
|
||||
);
|
||||
|
|
21
vendor/fguillot/picofeed/lib/PicoFeed/Rules/takprosto.cc.php
vendored
Executable file
21
vendor/fguillot/picofeed/lib/PicoFeed/Rules/takprosto.cc.php
vendored
Executable file
|
@ -0,0 +1,21 @@
|
|||
<?php
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://takprosto.cc/kokteyl-dlya-pohudeniya-v-domashnih-usloviyah/',
|
||||
'body' => array(
|
||||
'//div[contains(@class, "entry-contentt")]',
|
||||
),
|
||||
'strip' => array(
|
||||
'//script',
|
||||
'//form',
|
||||
'//style',
|
||||
'//*[@class="views_post"]',
|
||||
'//*[contains(@class, "mailchimp-box")]',
|
||||
'//*[contains(@class, "essb_links")]',
|
||||
'//*[contains(@rel, "nofollow")]',
|
||||
'//*[contains(@class, "ads")]',
|
||||
),
|
||||
),
|
||||
),
|
||||
);
|
|
@ -2,20 +2,16 @@
|
|||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://www.thelocal.se/20151018/swedish-moderates-tighten-focus-on-begging-ban',
|
||||
'test_url' => 'www.thelocal.se/20161219/this-swede-can-memorize-hundreds-of-numbers-in-only-five-minutes',
|
||||
'body' => array(
|
||||
'//article',
|
||||
'//div[@id="article-photo"]',
|
||||
'//div[@id="article-description"]',
|
||||
'//div[@id="article-body"]',
|
||||
),
|
||||
'strip' => array(
|
||||
'//p[@id="mobile-signature"]',
|
||||
'//article/div[4]',
|
||||
'//article/ul[1]',
|
||||
'//div[@class="clr"]',
|
||||
'//p[@class="small"]',
|
||||
'//p[@style="font-weight: bold; font-size: 14px;"]',
|
||||
'//div[@class="author"]',
|
||||
'//div[@class="ad_container"]',
|
||||
'//div[@id="article-info-middle"]',
|
||||
)
|
||||
)
|
||||
)
|
||||
);
|
||||
|
||||
|
|
|
@ -4,7 +4,7 @@ return array(
|
|||
'%.*%' => array(
|
||||
'test_url' => 'http://www.geekculture.com/joyoftech/joyarchives/2180.html',
|
||||
'body' => array(
|
||||
'//p[contains(@class,"Maintext")][2]/img',
|
||||
'//p[contains(@class,"Maintext")][2]/a/img[contains(@src,"joyimages")]',
|
||||
),
|
||||
'strip' => array(),
|
||||
),
|
||||
|
|
|
@ -1,14 +0,0 @@
|
|||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://www.pcinpact.com/news/85954-air-france-ne-vous-demande-plus-deteindre-vos-appareils-electroniques.htm?utm_source=PCi_RSS_Feed&utm_medium=news&utm_campaign=pcinpact',
|
||||
'body' => array(
|
||||
'//div[contains(@id, "actu_content")]',
|
||||
),
|
||||
'strip' => array(
|
||||
),
|
||||
),
|
||||
),
|
||||
);
|
|
@ -243,6 +243,16 @@ class CandidateParser implements ParserInterface
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Find link for next page of the article.
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function findNextLink()
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return false if the node should not be removed.
|
||||
*
|
||||
|
|
|
@ -10,4 +10,11 @@ interface ParserInterface
|
|||
* @return string
|
||||
*/
|
||||
public function execute();
|
||||
|
||||
/**
|
||||
* Find link for next page of the article.
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function findNextLink();
|
||||
}
|
||||
|
|
|
@ -65,7 +65,6 @@ class RuleParser implements ParserInterface
|
|||
public function findContent()
|
||||
{
|
||||
$content = '';
|
||||
|
||||
if (isset($this->rules['body']) && is_array($this->rules['body'])) {
|
||||
foreach ($this->rules['body'] as $pattern) {
|
||||
$nodes = $this->xpath->query($pattern);
|
||||
|
@ -80,4 +79,24 @@ class RuleParser implements ParserInterface
|
|||
|
||||
return $content;
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch next link based on Xpath rules.
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function findNextLink()
|
||||
{
|
||||
if (isset($this->rules['next_page']) && is_array($this->rules['next_page'])) {
|
||||
foreach ($this->rules['next_page'] as $pattern) {
|
||||
$nodes = $this->xpath->query($pattern);
|
||||
if ($nodes !== false && $nodes->length > 0) {
|
||||
foreach ($nodes as $node) {
|
||||
return $node->getAttribute('href');
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -206,19 +206,31 @@ class Scraper extends Base
|
|||
/**
|
||||
* Execute the scraper.
|
||||
*/
|
||||
public function execute()
|
||||
public function execute($pageContent = '', $recursionDepth = 0)
|
||||
{
|
||||
$this->content = '';
|
||||
$this->html = '';
|
||||
$this->encoding = '';
|
||||
|
||||
$this->content = '';
|
||||
$this->download();
|
||||
$this->prepareHtml();
|
||||
|
||||
$parser = $this->getParser();
|
||||
|
||||
if ($parser !== null) {
|
||||
$this->content = $parser->execute();
|
||||
$maxRecursions = $this->config->getMaxRecursions();
|
||||
if(!isset($maxRecursions)){
|
||||
$maxRecursions = 25;
|
||||
}
|
||||
$pageContent .= $parser->execute();
|
||||
// check if there is a link to next page and recursively get content (max 25 pages)
|
||||
if((($nextLink = $parser->findNextLink()) !== null) && $recursionDepth < $maxRecursions){
|
||||
$nextLink = Url::resolve($nextLink,$this->url);
|
||||
$this->setUrl($nextLink);
|
||||
$this->execute($pageContent,$recursionDepth+1);
|
||||
}
|
||||
else{
|
||||
$this->content = $pageContent;
|
||||
}
|
||||
Logger::setMessage(get_called_class().': Content length: '.strlen($this->content).' bytes');
|
||||
}
|
||||
}
|
||||
|
|
|
@ -56,7 +56,7 @@ abstract class FeedBuilder
|
|||
/**
|
||||
* @var ItemBuilder[]
|
||||
*/
|
||||
protected $items;
|
||||
protected $items = array();
|
||||
|
||||
/**
|
||||
* Constructor
|
||||
|
|
|
@ -36,7 +36,7 @@ class Rss20Helper
|
|||
* @param DOMElement $element
|
||||
* @param string $tag
|
||||
* @param string $value
|
||||
* @return AtomHelper
|
||||
* @return $this
|
||||
*/
|
||||
public function buildNode(DOMElement $element, $tag, $value)
|
||||
{
|
||||
|
@ -52,7 +52,7 @@ class Rss20Helper
|
|||
* @access public
|
||||
* @param DOMElement $element
|
||||
* @param string $title
|
||||
* @return AtomHelper
|
||||
* @return $this
|
||||
*/
|
||||
public function buildTitle(DOMElement $element, $title)
|
||||
{
|
||||
|
@ -66,7 +66,7 @@ class Rss20Helper
|
|||
* @param DOMElement $element
|
||||
* @param DateTime $date
|
||||
* @param string $type
|
||||
* @return AtomHelper
|
||||
* @return $this
|
||||
*/
|
||||
public function buildDate(DOMElement $element, DateTime $date, $type = 'pubDate')
|
||||
{
|
||||
|
@ -79,7 +79,7 @@ class Rss20Helper
|
|||
* @access public
|
||||
* @param DOMElement $element
|
||||
* @param string $url
|
||||
* @return AtomHelper
|
||||
* @return $this
|
||||
*/
|
||||
public function buildLink(DOMElement $element, $url)
|
||||
{
|
||||
|
@ -94,7 +94,7 @@ class Rss20Helper
|
|||
* @param string $tag
|
||||
* @param string $authorName
|
||||
* @param string $authorEmail
|
||||
* @return AtomHelper
|
||||
* @return $this
|
||||
*/
|
||||
public function buildAuthor(DOMElement $element, $tag, $authorName, $authorEmail)
|
||||
{
|
||||
|
|
Reference in a new issue